From d10f2150eab62f633aeae36cf4612d1f648a817e Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@goma.davemloft.net>
Date: Thu, 24 Jan 2008 16:57:39 -0800
Subject: [MAC80211]: Revert unaligned warning removal.

For release Linux removed this warning, but we want it
back for development.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/mac80211/rx.c | 13 +++++++++++++
 1 file changed, 13 insertions(+)

(limited to 'net')

diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index 00f908d9275..a7263fc476b 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -1443,6 +1443,7 @@ void __ieee80211_rx(struct ieee80211_hw *hw, struct sk_buff *skb,
 	struct ieee80211_sub_if_data *prev = NULL;
 	struct sk_buff *skb_new;
 	u8 *bssid;
+	int hdrlen;
 
 	/*
 	 * key references and virtual interfaces are protected using RCU
@@ -1472,6 +1473,18 @@ void __ieee80211_rx(struct ieee80211_hw *hw, struct sk_buff *skb,
 	rx.fc = le16_to_cpu(hdr->frame_control);
 	type = rx.fc & IEEE80211_FCTL_FTYPE;
 
+	/*
+	 * Drivers are required to align the payload data to a four-byte
+	 * boundary, so the last two bits of the address where it starts
+	 * may not be set. The header is required to be directly before
+	 * the payload data, padding like atheros hardware adds which is
+	 * inbetween the 802.11 header and the payload is not supported,
+	 * the driver is required to move the 802.11 header further back
+	 * in that case.
+	 */
+	hdrlen = ieee80211_get_hdrlen(rx.fc);
+	WARN_ON_ONCE(((unsigned long)(skb->data + hdrlen)) & 3);
+
 	if (type == IEEE80211_FTYPE_DATA || type == IEEE80211_FTYPE_MGMT)
 		local->dot11ReceivedFragmentCount++;
 
-- 
cgit v1.2.3


From 9c55e01c0cc835818475a6ce8c4d684df9949ac8 Mon Sep 17 00:00:00 2001
From: Jens Axboe <jens.axboe@oracle.com>
Date: Tue, 6 Nov 2007 23:30:13 -0800
Subject: [TCP]: Splice receive support.

Support for network splice receive.

Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/skbuff.c  | 246 +++++++++++++++++++++++++++++++++++++++++++++++++++++
 net/ipv4/af_inet.c |   1 +
 net/ipv4/tcp.c     | 129 ++++++++++++++++++++++++++++
 net/socket.c       |  13 +++
 4 files changed, 389 insertions(+)

(limited to 'net')

diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index b6283779e93..98420f9c4b6 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -52,6 +52,7 @@
 #endif
 #include <linux/string.h>
 #include <linux/skbuff.h>
+#include <linux/splice.h>
 #include <linux/cache.h>
 #include <linux/rtnetlink.h>
 #include <linux/init.h>
@@ -71,6 +72,40 @@
 static struct kmem_cache *skbuff_head_cache __read_mostly;
 static struct kmem_cache *skbuff_fclone_cache __read_mostly;
 
+static void sock_pipe_buf_release(struct pipe_inode_info *pipe,
+				  struct pipe_buffer *buf)
+{
+	struct sk_buff *skb = (struct sk_buff *) buf->private;
+
+	kfree_skb(skb);
+}
+
+static void sock_pipe_buf_get(struct pipe_inode_info *pipe,
+				struct pipe_buffer *buf)
+{
+	struct sk_buff *skb = (struct sk_buff *) buf->private;
+
+	skb_get(skb);
+}
+
+static int sock_pipe_buf_steal(struct pipe_inode_info *pipe,
+			       struct pipe_buffer *buf)
+{
+	return 1;
+}
+
+
+/* Pipe buffer operations for a socket. */
+static struct pipe_buf_operations sock_pipe_buf_ops = {
+	.can_merge = 0,
+	.map = generic_pipe_buf_map,
+	.unmap = generic_pipe_buf_unmap,
+	.confirm = generic_pipe_buf_confirm,
+	.release = sock_pipe_buf_release,
+	.steal = sock_pipe_buf_steal,
+	.get = sock_pipe_buf_get,
+};
+
 /*
  *	Keep out-of-line to prevent kernel bloat.
  *	__builtin_return_address is not used because it is not always
@@ -1122,6 +1157,217 @@ fault:
 	return -EFAULT;
 }
 
+/*
+ * Callback from splice_to_pipe(), if we need to release some pages
+ * at the end of the spd in case we error'ed out in filling the pipe.
+ */
+static void sock_spd_release(struct splice_pipe_desc *spd, unsigned int i)
+{
+	struct sk_buff *skb = (struct sk_buff *) spd->partial[i].private;
+
+	kfree_skb(skb);
+}
+
+/*
+ * Fill page/offset/length into spd, if it can hold more pages.
+ */
+static inline int spd_fill_page(struct splice_pipe_desc *spd, struct page *page,
+				unsigned int len, unsigned int offset,
+				struct sk_buff *skb)
+{
+	if (unlikely(spd->nr_pages == PIPE_BUFFERS))
+		return 1;
+
+	spd->pages[spd->nr_pages] = page;
+	spd->partial[spd->nr_pages].len = len;
+	spd->partial[spd->nr_pages].offset = offset;
+	spd->partial[spd->nr_pages].private = (unsigned long) skb_get(skb);
+	spd->nr_pages++;
+	return 0;
+}
+
+/*
+ * Map linear and fragment data from the skb to spd. Returns number of
+ * pages mapped.
+ */
+static int __skb_splice_bits(struct sk_buff *skb, unsigned int *offset,
+			     unsigned int *total_len,
+			     struct splice_pipe_desc *spd)
+{
+	unsigned int nr_pages = spd->nr_pages;
+	unsigned int poff, plen, len, toff, tlen;
+	int headlen, seg;
+
+	toff = *offset;
+	tlen = *total_len;
+	if (!tlen)
+		goto err;
+
+	/*
+	 * if the offset is greater than the linear part, go directly to
+	 * the fragments.
+	 */
+	headlen = skb_headlen(skb);
+	if (toff >= headlen) {
+		toff -= headlen;
+		goto map_frag;
+	}
+
+	/*
+	 * first map the linear region into the pages/partial map, skipping
+	 * any potential initial offset.
+	 */
+	len = 0;
+	while (len < headlen) {
+		void *p = skb->data + len;
+
+		poff = (unsigned long) p & (PAGE_SIZE - 1);
+		plen = min_t(unsigned int, headlen - len, PAGE_SIZE - poff);
+		len += plen;
+
+		if (toff) {
+			if (plen <= toff) {
+				toff -= plen;
+				continue;
+			}
+			plen -= toff;
+			poff += toff;
+			toff = 0;
+		}
+
+		plen = min(plen, tlen);
+		if (!plen)
+			break;
+
+		/*
+		 * just jump directly to update and return, no point
+		 * in going over fragments when the output is full.
+		 */
+		if (spd_fill_page(spd, virt_to_page(p), plen, poff, skb))
+			goto done;
+
+		tlen -= plen;
+	}
+
+	/*
+	 * then map the fragments
+	 */
+map_frag:
+	for (seg = 0; seg < skb_shinfo(skb)->nr_frags; seg++) {
+		const skb_frag_t *f = &skb_shinfo(skb)->frags[seg];
+
+		plen = f->size;
+		poff = f->page_offset;
+
+		if (toff) {
+			if (plen <= toff) {
+				toff -= plen;
+				continue;
+			}
+			plen -= toff;
+			poff += toff;
+			toff = 0;
+		}
+
+		plen = min(plen, tlen);
+		if (!plen)
+			break;
+
+		if (spd_fill_page(spd, f->page, plen, poff, skb))
+			break;
+
+		tlen -= plen;
+	}
+
+done:
+	if (spd->nr_pages - nr_pages) {
+		*offset = 0;
+		*total_len = tlen;
+		return 0;
+	}
+err:
+	return 1;
+}
+
+/*
+ * Map data from the skb to a pipe. Should handle both the linear part,
+ * the fragments, and the frag list. It does NOT handle frag lists within
+ * the frag list, if such a thing exists. We'd probably need to recurse to
+ * handle that cleanly.
+ */
+int skb_splice_bits(struct sk_buff *__skb, unsigned int offset,
+		    struct pipe_inode_info *pipe, unsigned int tlen,
+		    unsigned int flags)
+{
+	struct partial_page partial[PIPE_BUFFERS];
+	struct page *pages[PIPE_BUFFERS];
+	struct splice_pipe_desc spd = {
+		.pages = pages,
+		.partial = partial,
+		.flags = flags,
+		.ops = &sock_pipe_buf_ops,
+		.spd_release = sock_spd_release,
+	};
+	struct sk_buff *skb;
+
+	/*
+	 * I'd love to avoid the clone here, but tcp_read_sock()
+	 * ignores reference counts and unconditonally kills the sk_buff
+	 * on return from the actor.
+	 */
+	skb = skb_clone(__skb, GFP_KERNEL);
+	if (unlikely(!skb))
+		return -ENOMEM;
+
+	/*
+	 * __skb_splice_bits() only fails if the output has no room left,
+	 * so no point in going over the frag_list for the error case.
+	 */
+	if (__skb_splice_bits(skb, &offset, &tlen, &spd))
+		goto done;
+	else if (!tlen)
+		goto done;
+
+	/*
+	 * now see if we have a frag_list to map
+	 */
+	if (skb_shinfo(skb)->frag_list) {
+		struct sk_buff *list = skb_shinfo(skb)->frag_list;
+
+		for (; list && tlen; list = list->next) {
+			if (__skb_splice_bits(list, &offset, &tlen, &spd))
+				break;
+		}
+	}
+
+done:
+	/*
+	 * drop our reference to the clone, the pipe consumption will
+	 * drop the rest.
+	 */
+	kfree_skb(skb);
+
+	if (spd.nr_pages) {
+		int ret;
+
+		/*
+		 * Drop the socket lock, otherwise we have reverse
+		 * locking dependencies between sk_lock and i_mutex
+		 * here as compared to sendfile(). We enter here
+		 * with the socket lock held, and splice_to_pipe() will
+		 * grab the pipe inode lock. For sendfile() emulation,
+		 * we call into ->sendpage() with the i_mutex lock held
+		 * and networking will grab the socket lock.
+		 */
+		release_sock(__skb->sk);
+		ret = splice_to_pipe(pipe, &spd);
+		lock_sock(__skb->sk);
+		return ret;
+	}
+
+	return 0;
+}
+
 /**
  *	skb_store_bits - store bits from kernel buffer to skb
  *	@skb: destination buffer
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index d2f22e74b26..c75f20b4993 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -838,6 +838,7 @@ const struct proto_ops inet_stream_ops = {
 	.recvmsg	   = sock_common_recvmsg,
 	.mmap		   = sock_no_mmap,
 	.sendpage	   = tcp_sendpage,
+	.splice_read	   = tcp_splice_read,
 #ifdef CONFIG_COMPAT
 	.compat_setsockopt = compat_sock_common_setsockopt,
 	.compat_getsockopt = compat_sock_common_getsockopt,
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 8e65182f7af..56ed40703f9 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -254,6 +254,10 @@
 #include <linux/poll.h>
 #include <linux/init.h>
 #include <linux/fs.h>
+#include <linux/skbuff.h>
+#include <linux/splice.h>
+#include <linux/net.h>
+#include <linux/socket.h>
 #include <linux/random.h>
 #include <linux/bootmem.h>
 #include <linux/cache.h>
@@ -265,6 +269,7 @@
 #include <net/xfrm.h>
 #include <net/ip.h>
 #include <net/netdma.h>
+#include <net/sock.h>
 
 #include <asm/uaccess.h>
 #include <asm/ioctls.h>
@@ -291,6 +296,15 @@ atomic_t tcp_sockets_allocated;	/* Current number of TCP sockets. */
 EXPORT_SYMBOL(tcp_memory_allocated);
 EXPORT_SYMBOL(tcp_sockets_allocated);
 
+/*
+ * TCP splice context
+ */
+struct tcp_splice_state {
+	struct pipe_inode_info *pipe;
+	size_t len;
+	unsigned int flags;
+};
+
 /*
  * Pressure flag: try to collapse.
  * Technical note: it is used by multiple contexts non atomically.
@@ -501,6 +515,120 @@ static inline void tcp_push(struct sock *sk, int flags, int mss_now,
 	}
 }
 
+int tcp_splice_data_recv(read_descriptor_t *rd_desc, struct sk_buff *skb,
+			 unsigned int offset, size_t len)
+{
+	struct tcp_splice_state *tss = rd_desc->arg.data;
+
+	return skb_splice_bits(skb, offset, tss->pipe, tss->len, tss->flags);
+}
+
+static int __tcp_splice_read(struct sock *sk, struct tcp_splice_state *tss)
+{
+	/* Store TCP splice context information in read_descriptor_t. */
+	read_descriptor_t rd_desc = {
+		.arg.data = tss,
+	};
+
+	return tcp_read_sock(sk, &rd_desc, tcp_splice_data_recv);
+}
+
+/**
+ *  tcp_splice_read - splice data from TCP socket to a pipe
+ * @sock:	socket to splice from
+ * @ppos:	position (not valid)
+ * @pipe:	pipe to splice to
+ * @len:	number of bytes to splice
+ * @flags:	splice modifier flags
+ *
+ * Description:
+ *    Will read pages from given socket and fill them into a pipe.
+ *
+ **/
+ssize_t tcp_splice_read(struct socket *sock, loff_t *ppos,
+			struct pipe_inode_info *pipe, size_t len,
+			unsigned int flags)
+{
+	struct sock *sk = sock->sk;
+	struct tcp_splice_state tss = {
+		.pipe = pipe,
+		.len = len,
+		.flags = flags,
+	};
+	long timeo;
+	ssize_t spliced;
+	int ret;
+
+	/*
+	 * We can't seek on a socket input
+	 */
+	if (unlikely(*ppos))
+		return -ESPIPE;
+
+	ret = spliced = 0;
+
+	lock_sock(sk);
+
+	timeo = sock_rcvtimeo(sk, flags & SPLICE_F_NONBLOCK);
+	while (tss.len) {
+		ret = __tcp_splice_read(sk, &tss);
+		if (ret < 0)
+			break;
+		else if (!ret) {
+			if (spliced)
+				break;
+			if (flags & SPLICE_F_NONBLOCK) {
+				ret = -EAGAIN;
+				break;
+			}
+			if (sock_flag(sk, SOCK_DONE))
+				break;
+			if (sk->sk_err) {
+				ret = sock_error(sk);
+				break;
+			}
+			if (sk->sk_shutdown & RCV_SHUTDOWN)
+				break;
+			if (sk->sk_state == TCP_CLOSE) {
+				/*
+				 * This occurs when user tries to read
+				 * from never connected socket.
+				 */
+				if (!sock_flag(sk, SOCK_DONE))
+					ret = -ENOTCONN;
+				break;
+			}
+			if (!timeo) {
+				ret = -EAGAIN;
+				break;
+			}
+			sk_wait_data(sk, &timeo);
+			if (signal_pending(current)) {
+				ret = sock_intr_errno(timeo);
+				break;
+			}
+			continue;
+		}
+		tss.len -= ret;
+		spliced += ret;
+
+		release_sock(sk);
+		lock_sock(sk);
+
+		if (sk->sk_err || sk->sk_state == TCP_CLOSE ||
+		    (sk->sk_shutdown & RCV_SHUTDOWN) || !timeo ||
+		    signal_pending(current))
+			break;
+	}
+
+	release_sock(sk);
+
+	if (spliced)
+		return spliced;
+
+	return ret;
+}
+
 static ssize_t do_tcp_sendpages(struct sock *sk, struct page **pages, int poffset,
 			 size_t psize, int flags)
 {
@@ -2532,6 +2660,7 @@ EXPORT_SYMBOL(tcp_poll);
 EXPORT_SYMBOL(tcp_read_sock);
 EXPORT_SYMBOL(tcp_recvmsg);
 EXPORT_SYMBOL(tcp_sendmsg);
+EXPORT_SYMBOL(tcp_splice_read);
 EXPORT_SYMBOL(tcp_sendpage);
 EXPORT_SYMBOL(tcp_setsockopt);
 EXPORT_SYMBOL(tcp_shutdown);
diff --git a/net/socket.c b/net/socket.c
index 74784dfe8e5..92fab9e1c60 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -112,6 +112,9 @@ static long compat_sock_ioctl(struct file *file,
 static int sock_fasync(int fd, struct file *filp, int on);
 static ssize_t sock_sendpage(struct file *file, struct page *page,
 			     int offset, size_t size, loff_t *ppos, int more);
+static ssize_t sock_splice_read(struct file *file, loff_t *ppos,
+			        struct pipe_inode_info *pipe, size_t len,
+				unsigned int flags);
 
 /*
  *	Socket files have a set of 'special' operations as well as the generic file ones. These don't appear
@@ -134,6 +137,7 @@ static const struct file_operations socket_file_ops = {
 	.fasync =	sock_fasync,
 	.sendpage =	sock_sendpage,
 	.splice_write = generic_splice_sendpage,
+	.splice_read =	sock_splice_read,
 };
 
 /*
@@ -691,6 +695,15 @@ static ssize_t sock_sendpage(struct file *file, struct page *page,
 	return sock->ops->sendpage(sock, page, offset, size, flags);
 }
 
+static ssize_t sock_splice_read(struct file *file, loff_t *ppos,
+			        struct pipe_inode_info *pipe, size_t len,
+				unsigned int flags)
+{
+	struct socket *sock = file->private_data;
+
+	return sock->ops->splice_read(sock, ppos, pipe, len, flags);
+}
+
 static struct sock_iocb *alloc_sock_iocb(struct kiocb *iocb,
 					 struct sock_iocb *siocb)
 {
-- 
cgit v1.2.3


From a0974dd3da87667e26ef5d3b32989a43319866f2 Mon Sep 17 00:00:00 2001
From: Jens Axboe <jens.axboe@oracle.com>
Date: Tue, 6 Nov 2007 23:31:58 -0800
Subject: [TCP] splice: add tcp_splice_read() to IPV6

Thanks to YOSHIFUJI Hideaki for the hint!

Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/af_inet6.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'net')

diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index ecbd38894fd..85178f71b21 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -491,6 +491,7 @@ const struct proto_ops inet6_stream_ops = {
 	.recvmsg	   = sock_common_recvmsg,	/* ok		*/
 	.mmap		   = sock_no_mmap,
 	.sendpage	   = tcp_sendpage,
+	.splice_read	   = tcp_splice_read,
 #ifdef CONFIG_COMPAT
 	.compat_setsockopt = compat_sock_common_setsockopt,
 	.compat_getsockopt = compat_sock_common_getsockopt,
-- 
cgit v1.2.3


From 6ff7751d06f63131830102ffa0c9a38b313f100e Mon Sep 17 00:00:00 2001
From: Adrian Bunk <bunk@kernel.org>
Date: Tue, 6 Nov 2007 23:32:26 -0800
Subject: [TCP]: Make tcp_splice_data_recv() static.

Signed-off-by: Adrian Bunk <bunk@kernel.org>
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 56ed40703f9..e055f25876d 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -515,8 +515,8 @@ static inline void tcp_push(struct sock *sk, int flags, int mss_now,
 	}
 }
 
-int tcp_splice_data_recv(read_descriptor_t *rd_desc, struct sk_buff *skb,
-			 unsigned int offset, size_t len)
+static int tcp_splice_data_recv(read_descriptor_t *rd_desc, struct sk_buff *skb,
+				unsigned int offset, size_t len)
 {
 	struct tcp_splice_state *tss = rd_desc->arg.data;
 
-- 
cgit v1.2.3


From 33c732c36169d7022ad7d6eb474b0c9be43a2dc1 Mon Sep 17 00:00:00 2001
From: Wang Chen <wangchen@cn.fujitsu.com>
Date: Tue, 13 Nov 2007 20:30:01 -0800
Subject: [IPV4]: Add raw drops counter.

Add raw drops counter for IPv4 in /proc/net/raw .

Signed-off-by: Wang Chen <wangchen@cn.fujitsu.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/sock.c |  1 +
 net/ipv4/raw.c  | 17 ++++++++++-------
 2 files changed, 11 insertions(+), 7 deletions(-)

(limited to 'net')

diff --git a/net/core/sock.c b/net/core/sock.c
index c519b439b8b..2029d095b4c 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -1611,6 +1611,7 @@ void sock_init_data(struct socket *sock, struct sock *sk)
 	sk->sk_stamp = ktime_set(-1L, -1L);
 
 	atomic_set(&sk->sk_refcnt, 1);
+	atomic_set(&sk->sk_drops, 0);
 }
 
 void fastcall lock_sock_nested(struct sock *sk, int subclass)
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c
index e7050f8eabe..761056ef493 100644
--- a/net/ipv4/raw.c
+++ b/net/ipv4/raw.c
@@ -241,7 +241,7 @@ static int raw_rcv_skb(struct sock * sk, struct sk_buff * skb)
 	/* Charge it to the socket. */
 
 	if (sock_queue_rcv_skb(sk, skb) < 0) {
-		/* FIXME: increment a raw drops counter here */
+		atomic_inc(&sk->sk_drops);
 		kfree_skb(skb);
 		return NET_RX_DROP;
 	}
@@ -252,6 +252,7 @@ static int raw_rcv_skb(struct sock * sk, struct sk_buff * skb)
 int raw_rcv(struct sock *sk, struct sk_buff *skb)
 {
 	if (!xfrm4_policy_check(sk, XFRM_POLICY_IN, skb)) {
+		atomic_inc(&sk->sk_drops);
 		kfree_skb(skb);
 		return NET_RX_DROP;
 	}
@@ -871,28 +872,30 @@ static __inline__ char *get_raw_sock(struct sock *sp, char *tmpbuf, int i)
 	      srcp  = inet->num;
 
 	sprintf(tmpbuf, "%4d: %08X:%04X %08X:%04X"
-		" %02X %08X:%08X %02X:%08lX %08X %5d %8d %lu %d %p",
+		" %02X %08X:%08X %02X:%08lX %08X %5d %8d %lu %d %p %d",
 		i, src, srcp, dest, destp, sp->sk_state,
 		atomic_read(&sp->sk_wmem_alloc),
 		atomic_read(&sp->sk_rmem_alloc),
 		0, 0L, 0, sock_i_uid(sp), 0, sock_i_ino(sp),
-		atomic_read(&sp->sk_refcnt), sp);
+		atomic_read(&sp->sk_refcnt), sp, atomic_read(&sp->sk_drops));
 	return tmpbuf;
 }
 
+#define TMPSZ 128
+
 static int raw_seq_show(struct seq_file *seq, void *v)
 {
-	char tmpbuf[129];
+	char tmpbuf[TMPSZ+1];
 
 	if (v == SEQ_START_TOKEN)
-		seq_printf(seq, "%-127s\n",
+		seq_printf(seq, "%-*s\n", TMPSZ-1,
 			       "  sl  local_address rem_address   st tx_queue "
 			       "rx_queue tr tm->when retrnsmt   uid  timeout "
-			       "inode");
+			       "inode  drops");
 	else {
 		struct raw_iter_state *state = raw_seq_private(seq);
 
-		seq_printf(seq, "%-127s\n",
+		seq_printf(seq, "%-*s\n", TMPSZ-1,
 			   get_raw_sock(v, tmpbuf, state->bucket));
 	}
 	return 0;
-- 
cgit v1.2.3


From a92aa318b4b369091fd80433c80e62838db8bc1c Mon Sep 17 00:00:00 2001
From: Wang Chen <wangchen@cn.fujitsu.com>
Date: Tue, 13 Nov 2007 20:31:14 -0800
Subject: [IPV6]: Add raw6 drops counter.

Add raw drops counter for IPv6 in /proc/net/raw6 .

Signed-off-by: Wang Chen <wangchen@cn.fujitsu.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/raw.c | 15 ++++++++-------
 1 file changed, 8 insertions(+), 7 deletions(-)

(limited to 'net')

diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c
index 807260d0358..ae314f3fea4 100644
--- a/net/ipv6/raw.c
+++ b/net/ipv6/raw.c
@@ -354,14 +354,14 @@ static inline int rawv6_rcv_skb(struct sock * sk, struct sk_buff * skb)
 {
 	if ((raw6_sk(sk)->checksum || sk->sk_filter) &&
 	    skb_checksum_complete(skb)) {
-		/* FIXME: increment a raw6 drops counter here */
+		atomic_inc(&sk->sk_drops);
 		kfree_skb(skb);
 		return 0;
 	}
 
 	/* Charge it to the socket. */
 	if (sock_queue_rcv_skb(sk,skb)<0) {
-		/* FIXME: increment a raw6 drops counter here */
+		atomic_inc(&sk->sk_drops);
 		kfree_skb(skb);
 		return 0;
 	}
@@ -382,6 +382,7 @@ int rawv6_rcv(struct sock *sk, struct sk_buff *skb)
 	struct raw6_sock *rp = raw6_sk(sk);
 
 	if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb)) {
+		atomic_inc(&sk->sk_drops);
 		kfree_skb(skb);
 		return NET_RX_DROP;
 	}
@@ -405,7 +406,7 @@ int rawv6_rcv(struct sock *sk, struct sk_buff *skb)
 
 	if (inet->hdrincl) {
 		if (skb_checksum_complete(skb)) {
-			/* FIXME: increment a raw6 drops counter here */
+			atomic_inc(&sk->sk_drops);
 			kfree_skb(skb);
 			return 0;
 		}
@@ -496,7 +497,7 @@ csum_copy_err:
 	   as some normal condition.
 	 */
 	err = (flags&MSG_DONTWAIT) ? -EAGAIN : -EHOSTUNREACH;
-	/* FIXME: increment a raw6 drops counter here */
+	atomic_inc(&sk->sk_drops);
 	goto out;
 }
 
@@ -1254,7 +1255,7 @@ static void raw6_sock_seq_show(struct seq_file *seq, struct sock *sp, int i)
 	srcp  = inet_sk(sp)->num;
 	seq_printf(seq,
 		   "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
-		   "%02X %08X:%08X %02X:%08lX %08X %5d %8d %lu %d %p\n",
+		   "%02X %08X:%08X %02X:%08lX %08X %5d %8d %lu %d %p %d\n",
 		   i,
 		   src->s6_addr32[0], src->s6_addr32[1],
 		   src->s6_addr32[2], src->s6_addr32[3], srcp,
@@ -1266,7 +1267,7 @@ static void raw6_sock_seq_show(struct seq_file *seq, struct sock *sp, int i)
 		   0, 0L, 0,
 		   sock_i_uid(sp), 0,
 		   sock_i_ino(sp),
-		   atomic_read(&sp->sk_refcnt), sp);
+		   atomic_read(&sp->sk_refcnt), sp, atomic_read(&sp->sk_drops));
 }
 
 static int raw6_seq_show(struct seq_file *seq, void *v)
@@ -1277,7 +1278,7 @@ static int raw6_seq_show(struct seq_file *seq, void *v)
 			   "local_address                         "
 			   "remote_address                        "
 			   "st tx_queue rx_queue tr tm->when retrnsmt"
-			   "   uid  timeout inode\n");
+			   "   uid  timeout inode  drops\n");
 	else
 		raw6_sock_seq_show(seq, v, raw6_seq_private(seq)->bucket);
 	return 0;
-- 
cgit v1.2.3


From b24b8a247ff65c01b252025926fe564209fae4fc Mon Sep 17 00:00:00 2001
From: Pavel Emelyanov <xemul@openvz.org>
Date: Wed, 23 Jan 2008 21:20:07 -0800
Subject: [NET]: Convert init_timer into setup_timer

Many-many code in the kernel initialized the timer->function
and  timer->data together with calling init_timer(timer). There
is already a helper for this. Use it for networking code.

The patch is HUGE, but makes the code 130 lines shorter
(98 insertions(+), 228 deletions(-)).

Signed-off-by: Pavel Emelyanov <xemul@openvz.org>
Acked-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/802/tr.c                     |  4 +---
 net/appletalk/aarp.c             |  4 +---
 net/appletalk/ddp.c              |  5 ++---
 net/atm/lec.c                    |  5 ++---
 net/ax25/af_ax25.c               |  5 ++---
 net/bluetooth/hci_conn.c         |  9 ++-------
 net/bluetooth/hidp/core.c        |  5 +----
 net/bluetooth/l2cap.c            | 13 ++-----------
 net/bluetooth/rfcomm/core.c      |  4 +---
 net/bluetooth/sco.c              |  9 +--------
 net/core/flow.c                  |  3 +--
 net/core/neighbour.c             | 12 +++---------
 net/dccp/ccids/ccid2.c           |  6 ++----
 net/dccp/ccids/ccid3.c           |  7 ++-----
 net/dccp/timer.c                 |  5 ++---
 net/decnet/dn_route.c            |  3 +--
 net/econet/af_econet.c           |  3 +--
 net/ieee80211/ieee80211_module.c |  5 ++---
 net/ipv4/igmp.c                  | 14 +++++---------
 net/ipv4/inet_connection_sock.c  | 17 +++++------------
 net/ipv4/inet_fragment.c         |  5 ++---
 net/ipv4/ipmr.c                  |  3 +--
 net/ipv4/ipvs/ip_vs_conn.c       |  4 +---
 net/ipv4/ipvs/ip_vs_est.c        |  3 +--
 net/ipv4/ipvs/ip_vs_lblc.c       |  5 ++---
 net/ipv4/ipvs/ip_vs_lblcr.c      |  5 ++---
 net/ipv4/route.c                 |  6 ++----
 net/ipv6/addrconf.c              |  4 +---
 net/ipv6/mcast.c                 | 14 +++++---------
 net/irda/af_irda.c               |  5 ++---
 net/iucv/af_iucv.c               |  9 +--------
 net/llc/llc_conn.c               | 20 ++++++++------------
 net/llc/llc_station.c            |  5 ++---
 net/mac80211/sta_info.c          |  5 ++---
 net/netrom/nr_timer.c            | 19 ++++---------------
 net/rose/af_rose.c               |  5 ++---
 net/sched/sch_generic.c          |  9 +--------
 net/sched/sch_sfq.c              |  4 +---
 net/sctp/associola.c             |  8 +++-----
 net/sctp/transport.c             | 13 ++++---------
 net/sunrpc/sched.c               |  5 ++---
 net/sunrpc/xprt.c                |  5 ++---
 net/tipc/core.h                  |  4 +---
 net/x25/x25_link.c               |  5 +----
 net/x25/x25_timer.c              |  4 +---
 net/xfrm/xfrm_policy.c           |  5 ++---
 net/xfrm/xfrm_state.c            |  9 +++------
 47 files changed, 98 insertions(+), 228 deletions(-)

(limited to 'net')

diff --git a/net/802/tr.c b/net/802/tr.c
index 1e115e5beab..151855dd459 100644
--- a/net/802/tr.c
+++ b/net/802/tr.c
@@ -641,10 +641,8 @@ struct net_device *alloc_trdev(int sizeof_priv)
 
 static int __init rif_init(void)
 {
-	init_timer(&rif_timer);
 	rif_timer.expires  = jiffies + sysctl_tr_rif_timeout;
-	rif_timer.data     = 0L;
-	rif_timer.function = rif_check_expire;
+	setup_timer(&rif_timer, rif_check_expire, 0);
 	add_timer(&rif_timer);
 
 	proc_net_fops_create(&init_net, "tr_rif", S_IRUGO, &rif_seq_fops);
diff --git a/net/appletalk/aarp.c b/net/appletalk/aarp.c
index 6c5c6dc098e..b950fb6bd2b 100644
--- a/net/appletalk/aarp.c
+++ b/net/appletalk/aarp.c
@@ -874,9 +874,7 @@ void __init aarp_proto_init(void)
 	aarp_dl = register_snap_client(aarp_snap_id, aarp_rcv);
 	if (!aarp_dl)
 		printk(KERN_CRIT "Unable to register AARP with SNAP.\n");
-	init_timer(&aarp_timer);
-	aarp_timer.function = aarp_expire_timeout;
-	aarp_timer.data	    = 0;
+	setup_timer(&aarp_timer, aarp_expire_timeout, 0);
 	aarp_timer.expires  = jiffies + sysctl_aarp_expiry_time;
 	add_timer(&aarp_timer);
 	register_netdevice_notifier(&aarp_notifier);
diff --git a/net/appletalk/ddp.c b/net/appletalk/ddp.c
index e0d37d6dc1f..3be55c8ca4e 100644
--- a/net/appletalk/ddp.c
+++ b/net/appletalk/ddp.c
@@ -177,10 +177,9 @@ static inline void atalk_destroy_socket(struct sock *sk)
 
 	if (atomic_read(&sk->sk_wmem_alloc) ||
 	    atomic_read(&sk->sk_rmem_alloc)) {
-		init_timer(&sk->sk_timer);
+		setup_timer(&sk->sk_timer, atalk_destroy_timer,
+				(unsigned long)sk);
 		sk->sk_timer.expires	= jiffies + SOCK_DESTROY_TIME;
-		sk->sk_timer.function	= atalk_destroy_timer;
-		sk->sk_timer.data	= (unsigned long)sk;
 		add_timer(&sk->sk_timer);
 	} else
 		sock_put(sk);
diff --git a/net/atm/lec.c b/net/atm/lec.c
index 7eb1b21a0e9..0a9c4261968 100644
--- a/net/atm/lec.c
+++ b/net/atm/lec.c
@@ -1789,9 +1789,8 @@ static struct lec_arp_table *make_entry(struct lec_priv *priv,
 	}
 	memcpy(to_return->mac_addr, mac_addr, ETH_ALEN);
 	INIT_HLIST_NODE(&to_return->next);
-	init_timer(&to_return->timer);
-	to_return->timer.function = lec_arp_expire_arp;
-	to_return->timer.data = (unsigned long)to_return;
+	setup_timer(&to_return->timer, lec_arp_expire_arp,
+			(unsigned long)to_return);
 	to_return->last_used = jiffies;
 	to_return->priv = priv;
 	skb_queue_head_init(&to_return->tx_wait);
diff --git a/net/ax25/af_ax25.c b/net/ax25/af_ax25.c
index b4725ff317c..a028d37ba2e 100644
--- a/net/ax25/af_ax25.c
+++ b/net/ax25/af_ax25.c
@@ -330,10 +330,9 @@ void ax25_destroy_socket(ax25_cb *ax25)
 		if (atomic_read(&ax25->sk->sk_wmem_alloc) ||
 		    atomic_read(&ax25->sk->sk_rmem_alloc)) {
 			/* Defer: outstanding buffers */
-			init_timer(&ax25->dtimer);
+			setup_timer(&ax25->dtimer, ax25_destroy_timer,
+					(unsigned long)ax25);
 			ax25->dtimer.expires  = jiffies + 2 * HZ;
-			ax25->dtimer.function = ax25_destroy_timer;
-			ax25->dtimer.data     = (unsigned long)ax25;
 			add_timer(&ax25->dtimer);
 		} else {
 			struct sock *sk=ax25->sk;
diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c
index 34d1a3c822b..5fc7be206f6 100644
--- a/net/bluetooth/hci_conn.c
+++ b/net/bluetooth/hci_conn.c
@@ -208,13 +208,8 @@ struct hci_conn *hci_conn_add(struct hci_dev *hdev, int type, bdaddr_t *dst)
 
 	skb_queue_head_init(&conn->data_q);
 
-	init_timer(&conn->disc_timer);
-	conn->disc_timer.function = hci_conn_timeout;
-	conn->disc_timer.data = (unsigned long) conn;
-
-	init_timer(&conn->idle_timer);
-	conn->idle_timer.function = hci_conn_idle;
-	conn->idle_timer.data = (unsigned long) conn;
+	setup_timer(&conn->disc_timer, hci_conn_timeout, (unsigned long)conn);
+	setup_timer(&conn->idle_timer, hci_conn_idle, (unsigned long)conn);
 
 	atomic_set(&conn->refcnt, 0);
 
diff --git a/net/bluetooth/hidp/core.c b/net/bluetooth/hidp/core.c
index 4bbacddeb49..782a22602b8 100644
--- a/net/bluetooth/hidp/core.c
+++ b/net/bluetooth/hidp/core.c
@@ -811,10 +811,7 @@ int hidp_add_connection(struct hidp_connadd_req *req, struct socket *ctrl_sock,
 	session->intr_sock = intr_sock;
 	session->state     = BT_CONNECTED;
 
-	init_timer(&session->timer);
-
-	session->timer.function = hidp_idle_timeout;
-	session->timer.data     = (unsigned long) session;
+	setup_timer(&session->timer, hidp_idle_timeout, (unsigned long)session);
 
 	skb_queue_head_init(&session->ctrl_transmit);
 	skb_queue_head_init(&session->intr_transmit);
diff --git a/net/bluetooth/l2cap.c b/net/bluetooth/l2cap.c
index 477e052b17b..a8811c0a0ce 100644
--- a/net/bluetooth/l2cap.c
+++ b/net/bluetooth/l2cap.c
@@ -99,13 +99,6 @@ static void l2cap_sock_clear_timer(struct sock *sk)
 	sk_stop_timer(sk, &sk->sk_timer);
 }
 
-static void l2cap_sock_init_timer(struct sock *sk)
-{
-	init_timer(&sk->sk_timer);
-	sk->sk_timer.function = l2cap_sock_timeout;
-	sk->sk_timer.data = (unsigned long)sk;
-}
-
 /* ---- L2CAP channels ---- */
 static struct sock *__l2cap_get_chan_by_dcid(struct l2cap_chan_list *l, u16 cid)
 {
@@ -395,9 +388,7 @@ static struct l2cap_conn *l2cap_conn_add(struct hci_conn *hcon, u8 status)
 
 	conn->feat_mask = 0;
 
-	init_timer(&conn->info_timer);
-	conn->info_timer.function = l2cap_info_timeout;
-	conn->info_timer.data = (unsigned long) conn;
+	setup_timer(&conn->info_timer, l2cap_info_timeout, (unsigned long)conn);
 
 	spin_lock_init(&conn->lock);
 	rwlock_init(&conn->chan_list.lock);
@@ -622,7 +613,7 @@ static struct sock *l2cap_sock_alloc(struct net *net, struct socket *sock, int p
 	sk->sk_protocol = proto;
 	sk->sk_state    = BT_OPEN;
 
-	l2cap_sock_init_timer(sk);
+	setup_timer(&sk->sk_timer, l2cap_sock_timeout, (unsigned long)sk);
 
 	bt_sock_link(&l2cap_sk_list, sk);
 	return sk;
diff --git a/net/bluetooth/rfcomm/core.c b/net/bluetooth/rfcomm/core.c
index e7ac6ba7eca..d3e4e1877e6 100644
--- a/net/bluetooth/rfcomm/core.c
+++ b/net/bluetooth/rfcomm/core.c
@@ -279,9 +279,7 @@ struct rfcomm_dlc *rfcomm_dlc_alloc(gfp_t prio)
 	if (!d)
 		return NULL;
 
-	init_timer(&d->timer);
-	d->timer.function = rfcomm_dlc_timeout;
-	d->timer.data = (unsigned long) d;
+	setup_timer(&d->timer, rfcomm_dlc_timeout, (unsigned long)d);
 
 	skb_queue_head_init(&d->tx_queue);
 	spin_lock_init(&d->lock);
diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c
index 93ad1aae3f3..b91d3c81a73 100644
--- a/net/bluetooth/sco.c
+++ b/net/bluetooth/sco.c
@@ -97,13 +97,6 @@ static void sco_sock_clear_timer(struct sock *sk)
 	sk_stop_timer(sk, &sk->sk_timer);
 }
 
-static void sco_sock_init_timer(struct sock *sk)
-{
-	init_timer(&sk->sk_timer);
-	sk->sk_timer.function = sco_sock_timeout;
-	sk->sk_timer.data = (unsigned long)sk;
-}
-
 /* ---- SCO connections ---- */
 static struct sco_conn *sco_conn_add(struct hci_conn *hcon, __u8 status)
 {
@@ -436,7 +429,7 @@ static struct sock *sco_sock_alloc(struct net *net, struct socket *sock, int pro
 	sk->sk_protocol = proto;
 	sk->sk_state    = BT_OPEN;
 
-	sco_sock_init_timer(sk);
+	setup_timer(&sk->sk_timer, sco_sock_timeout, (unsigned long)sk);
 
 	bt_sock_link(&sco_sk_list, sk);
 	return sk;
diff --git a/net/core/flow.c b/net/core/flow.c
index 6489f4e24ec..46b38e06e0d 100644
--- a/net/core/flow.c
+++ b/net/core/flow.c
@@ -352,8 +352,7 @@ static int __init flow_cache_init(void)
 	flow_lwm = 2 * flow_hash_size;
 	flow_hwm = 4 * flow_hash_size;
 
-	init_timer(&flow_hash_rnd_timer);
-	flow_hash_rnd_timer.function = flow_cache_new_hashrnd;
+	setup_timer(&flow_hash_rnd_timer, flow_cache_new_hashrnd, 0);
 	flow_hash_rnd_timer.expires = jiffies + FLOW_HASH_RND_PERIOD;
 	add_timer(&flow_hash_rnd_timer);
 
diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index 29b8ee4e35d..175bbc0a974 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -270,9 +270,7 @@ static struct neighbour *neigh_alloc(struct neigh_table *tbl)
 	n->nud_state	  = NUD_NONE;
 	n->output	  = neigh_blackhole;
 	n->parms	  = neigh_parms_clone(&tbl->parms);
-	init_timer(&n->timer);
-	n->timer.function = neigh_timer_handler;
-	n->timer.data	  = (unsigned long)n;
+	setup_timer(&n->timer, neigh_timer_handler, (unsigned long)n);
 
 	NEIGH_CACHE_STAT_INC(tbl, allocs);
 	n->tbl		  = tbl;
@@ -1372,15 +1370,11 @@ void neigh_table_init_no_netlink(struct neigh_table *tbl)
 	get_random_bytes(&tbl->hash_rnd, sizeof(tbl->hash_rnd));
 
 	rwlock_init(&tbl->lock);
-	init_timer(&tbl->gc_timer);
-	tbl->gc_timer.data     = (unsigned long)tbl;
-	tbl->gc_timer.function = neigh_periodic_timer;
+	setup_timer(&tbl->gc_timer, neigh_periodic_timer, (unsigned long)tbl);
 	tbl->gc_timer.expires  = now + 1;
 	add_timer(&tbl->gc_timer);
 
-	init_timer(&tbl->proxy_timer);
-	tbl->proxy_timer.data	  = (unsigned long)tbl;
-	tbl->proxy_timer.function = neigh_proxy_process;
+	setup_timer(&tbl->proxy_timer, neigh_proxy_process, (unsigned long)tbl);
 	skb_queue_head_init_class(&tbl->proxy_queue,
 			&neigh_table_proxy_queue_class);
 
diff --git a/net/dccp/ccids/ccid2.c b/net/dccp/ccids/ccid2.c
index d694656b880..c9c465e8628 100644
--- a/net/dccp/ccids/ccid2.c
+++ b/net/dccp/ccids/ccid2.c
@@ -760,10 +760,8 @@ static int ccid2_hc_tx_init(struct ccid *ccid, struct sock *sk)
 	hctx->ccid2hctx_rttvar	 = -1;
 	hctx->ccid2hctx_rpdupack = -1;
 	hctx->ccid2hctx_last_cong = jiffies;
-
-	hctx->ccid2hctx_rtotimer.function = &ccid2_hc_tx_rto_expire;
-	hctx->ccid2hctx_rtotimer.data	  = (unsigned long)sk;
-	init_timer(&hctx->ccid2hctx_rtotimer);
+	setup_timer(&hctx->ccid2hctx_rtotimer, ccid2_hc_tx_rto_expire,
+			(unsigned long)sk);
 
 	ccid2_hc_tx_check_sanity(hctx);
 	return 0;
diff --git a/net/dccp/ccids/ccid3.c b/net/dccp/ccids/ccid3.c
index d133416d397..f56aaecb56b 100644
--- a/net/dccp/ccids/ccid3.c
+++ b/net/dccp/ccids/ccid3.c
@@ -606,11 +606,8 @@ static int ccid3_hc_tx_init(struct ccid *ccid, struct sock *sk)
 
 	hctx->ccid3hctx_state = TFRC_SSTATE_NO_SENT;
 	INIT_LIST_HEAD(&hctx->ccid3hctx_hist);
-
-	hctx->ccid3hctx_no_feedback_timer.function =
-				ccid3_hc_tx_no_feedback_timer;
-	hctx->ccid3hctx_no_feedback_timer.data     = (unsigned long)sk;
-	init_timer(&hctx->ccid3hctx_no_feedback_timer);
+	setup_timer(&hctx->ccid3hctx_no_feedback_timer,
+			ccid3_hc_tx_no_feedback_timer, (unsigned long)sk);
 
 	return 0;
 }
diff --git a/net/dccp/timer.c b/net/dccp/timer.c
index 3af067354bd..8703a792b56 100644
--- a/net/dccp/timer.c
+++ b/net/dccp/timer.c
@@ -280,9 +280,8 @@ static void dccp_init_write_xmit_timer(struct sock *sk)
 {
 	struct dccp_sock *dp = dccp_sk(sk);
 
-	init_timer(&dp->dccps_xmit_timer);
-	dp->dccps_xmit_timer.data = (unsigned long)sk;
-	dp->dccps_xmit_timer.function = dccp_write_xmit_timer;
+	setup_timer(&dp->dccps_xmit_timer, dccp_write_xmit_timer,
+			(unsigned long)sk);
 }
 
 void dccp_init_xmit_timers(struct sock *sk)
diff --git a/net/decnet/dn_route.c b/net/decnet/dn_route.c
index 0e10ff21e29..23aa3556e56 100644
--- a/net/decnet/dn_route.c
+++ b/net/decnet/dn_route.c
@@ -1752,8 +1752,7 @@ void __init dn_route_init(void)
 	dn_dst_ops.kmem_cachep =
 		kmem_cache_create("dn_dst_cache", sizeof(struct dn_route), 0,
 				  SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL);
-	init_timer(&dn_route_timer);
-	dn_route_timer.function = dn_dst_check_expire;
+	setup_timer(&dn_route_timer, dn_dst_check_expire, 0);
 	dn_route_timer.expires = jiffies + decnet_dst_gc_interval * HZ;
 	add_timer(&dn_route_timer);
 
diff --git a/net/econet/af_econet.c b/net/econet/af_econet.c
index f70df073c58..bc0f6252613 100644
--- a/net/econet/af_econet.c
+++ b/net/econet/af_econet.c
@@ -1014,9 +1014,8 @@ static int __init aun_udp_initialise(void)
 
 	skb_queue_head_init(&aun_queue);
 	spin_lock_init(&aun_queue_lock);
-	init_timer(&ab_cleanup_timer);
+	setup_timer(&ab_cleanup_timer, ab_cleanup, 0);
 	ab_cleanup_timer.expires = jiffies + (HZ*2);
-	ab_cleanup_timer.function = ab_cleanup;
 	add_timer(&ab_cleanup_timer);
 
 	memset(&sin, 0, sizeof(sin));
diff --git a/net/ieee80211/ieee80211_module.c b/net/ieee80211/ieee80211_module.c
index 69cb6aad25b..3bca97f55d4 100644
--- a/net/ieee80211/ieee80211_module.c
+++ b/net/ieee80211/ieee80211_module.c
@@ -181,9 +181,8 @@ struct net_device *alloc_ieee80211(int sizeof_priv)
 	ieee->ieee802_1x = 1;	/* Default to supporting 802.1x */
 
 	INIT_LIST_HEAD(&ieee->crypt_deinit_list);
-	init_timer(&ieee->crypt_deinit_timer);
-	ieee->crypt_deinit_timer.data = (unsigned long)ieee;
-	ieee->crypt_deinit_timer.function = ieee80211_crypt_deinit_handler;
+	setup_timer(&ieee->crypt_deinit_timer, ieee80211_crypt_deinit_handler,
+			(unsigned long)ieee);
 	ieee->crypt_quiesced = 0;
 
 	spin_lock_init(&ieee->lock);
diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c
index 7dbc282d4f9..701558564e9 100644
--- a/net/ipv4/igmp.c
+++ b/net/ipv4/igmp.c
@@ -1234,9 +1234,7 @@ void ip_mc_inc_group(struct in_device *in_dev, __be32 addr)
 	spin_lock_init(&im->lock);
 #ifdef CONFIG_IP_MULTICAST
 	im->tm_running=0;
-	init_timer(&im->timer);
-	im->timer.data=(unsigned long)im;
-	im->timer.function=&igmp_timer_expire;
+	setup_timer(&im->timer, &igmp_timer_expire, (unsigned long)im);
 	im->unsolicit_count = IGMP_Unsolicited_Report_Count;
 	im->reporter = 0;
 	im->gsquery = 0;
@@ -1338,13 +1336,11 @@ void ip_mc_init_dev(struct in_device *in_dev)
 	in_dev->mc_tomb = NULL;
 #ifdef CONFIG_IP_MULTICAST
 	in_dev->mr_gq_running = 0;
-	init_timer(&in_dev->mr_gq_timer);
-	in_dev->mr_gq_timer.data=(unsigned long) in_dev;
-	in_dev->mr_gq_timer.function=&igmp_gq_timer_expire;
+	setup_timer(&in_dev->mr_gq_timer, igmp_gq_timer_expire,
+			(unsigned long)in_dev);
 	in_dev->mr_ifc_count = 0;
-	init_timer(&in_dev->mr_ifc_timer);
-	in_dev->mr_ifc_timer.data=(unsigned long) in_dev;
-	in_dev->mr_ifc_timer.function=&igmp_ifc_timer_expire;
+	setup_timer(&in_dev->mr_ifc_timer, igmp_ifc_timer_expire,
+			(unsigned long)in_dev);
 	in_dev->mr_qrv = IGMP_Unsolicited_Report_Count;
 #endif
 
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index 8fb6ca23700..1c2a32f6bfc 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -277,18 +277,11 @@ void inet_csk_init_xmit_timers(struct sock *sk,
 {
 	struct inet_connection_sock *icsk = inet_csk(sk);
 
-	init_timer(&icsk->icsk_retransmit_timer);
-	init_timer(&icsk->icsk_delack_timer);
-	init_timer(&sk->sk_timer);
-
-	icsk->icsk_retransmit_timer.function = retransmit_handler;
-	icsk->icsk_delack_timer.function     = delack_handler;
-	sk->sk_timer.function		     = keepalive_handler;
-
-	icsk->icsk_retransmit_timer.data =
-		icsk->icsk_delack_timer.data =
-			sk->sk_timer.data  = (unsigned long)sk;
-
+	setup_timer(&icsk->icsk_retransmit_timer, retransmit_handler,
+			(unsigned long)sk);
+	setup_timer(&icsk->icsk_delack_timer, delack_handler,
+			(unsigned long)sk);
+	setup_timer(&sk->sk_timer, keepalive_handler, (unsigned long)sk);
 	icsk->icsk_pending = icsk->icsk_ack.pending = 0;
 }
 
diff --git a/net/ipv4/inet_fragment.c b/net/ipv4/inet_fragment.c
index e15e04fc666..737910767ff 100644
--- a/net/ipv4/inet_fragment.c
+++ b/net/ipv4/inet_fragment.c
@@ -66,9 +66,8 @@ void inet_frags_init(struct inet_frags *f)
 	f->nqueues = 0;
 	atomic_set(&f->mem, 0);
 
-	init_timer(&f->secret_timer);
-	f->secret_timer.function = inet_frag_secret_rebuild;
-	f->secret_timer.data = (unsigned long)f;
+	setup_timer(&f->secret_timer, inet_frag_secret_rebuild,
+			(unsigned long)f);
 	f->secret_timer.expires = jiffies + f->ctl->secret_interval;
 	add_timer(&f->secret_timer);
 }
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index 37bb497d92a..ba6c23cdf47 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -1889,8 +1889,7 @@ void __init ip_mr_init(void)
 				       sizeof(struct mfc_cache),
 				       0, SLAB_HWCACHE_ALIGN|SLAB_PANIC,
 				       NULL);
-	init_timer(&ipmr_expire_timer);
-	ipmr_expire_timer.function=ipmr_expire_process;
+	setup_timer(&ipmr_expire_timer, ipmr_expire_process, 0);
 	register_netdevice_notifier(&ip_mr_notifier);
 #ifdef CONFIG_PROC_FS
 	proc_net_fops_create(&init_net, "ip_mr_vif", 0, &ipmr_vif_fops);
diff --git a/net/ipv4/ipvs/ip_vs_conn.c b/net/ipv4/ipvs/ip_vs_conn.c
index 0a9f3c37e18..a22cee43ed7 100644
--- a/net/ipv4/ipvs/ip_vs_conn.c
+++ b/net/ipv4/ipvs/ip_vs_conn.c
@@ -629,9 +629,7 @@ ip_vs_conn_new(int proto, __be32 caddr, __be16 cport, __be32 vaddr, __be16 vport
 	}
 
 	INIT_LIST_HEAD(&cp->c_list);
-	init_timer(&cp->timer);
-	cp->timer.data     = (unsigned long)cp;
-	cp->timer.function = ip_vs_conn_expire;
+	setup_timer(&cp->timer, ip_vs_conn_expire, (unsigned long)cp);
 	cp->protocol	   = proto;
 	cp->caddr	   = caddr;
 	cp->cport	   = cport;
diff --git a/net/ipv4/ipvs/ip_vs_est.c b/net/ipv4/ipvs/ip_vs_est.c
index 7d68b80c4c1..efdd74e4fa2 100644
--- a/net/ipv4/ipvs/ip_vs_est.c
+++ b/net/ipv4/ipvs/ip_vs_est.c
@@ -146,9 +146,8 @@ int ip_vs_new_estimator(struct ip_vs_stats *stats)
 	write_lock_bh(&est_lock);
 	est->next = est_list;
 	if (est->next == NULL) {
-		init_timer(&est_timer);
+		setup_timer(&est_timer, estimation_timer, 0);
 		est_timer.expires = jiffies + 2*HZ;
-		est_timer.function = estimation_timer;
 		add_timer(&est_timer);
 	}
 	est_list = est;
diff --git a/net/ipv4/ipvs/ip_vs_lblc.c b/net/ipv4/ipvs/ip_vs_lblc.c
index ad89644ef5d..bf8c04a5754 100644
--- a/net/ipv4/ipvs/ip_vs_lblc.c
+++ b/net/ipv4/ipvs/ip_vs_lblc.c
@@ -391,9 +391,8 @@ static int ip_vs_lblc_init_svc(struct ip_vs_service *svc)
 	/*
 	 *    Hook periodic timer for garbage collection
 	 */
-	init_timer(&tbl->periodic_timer);
-	tbl->periodic_timer.data = (unsigned long)tbl;
-	tbl->periodic_timer.function = ip_vs_lblc_check_expire;
+	setup_timer(&tbl->periodic_timer, ip_vs_lblc_check_expire,
+			(unsigned long)tbl);
 	tbl->periodic_timer.expires = jiffies+CHECK_EXPIRE_INTERVAL;
 	add_timer(&tbl->periodic_timer);
 
diff --git a/net/ipv4/ipvs/ip_vs_lblcr.c b/net/ipv4/ipvs/ip_vs_lblcr.c
index 2a5ed85a335..f50da641137 100644
--- a/net/ipv4/ipvs/ip_vs_lblcr.c
+++ b/net/ipv4/ipvs/ip_vs_lblcr.c
@@ -575,9 +575,8 @@ static int ip_vs_lblcr_init_svc(struct ip_vs_service *svc)
 	/*
 	 *    Hook periodic timer for garbage collection
 	 */
-	init_timer(&tbl->periodic_timer);
-	tbl->periodic_timer.data = (unsigned long)tbl;
-	tbl->periodic_timer.function = ip_vs_lblcr_check_expire;
+	setup_timer(&tbl->periodic_timer, ip_vs_lblcr_check_expire,
+			(unsigned long)tbl);
 	tbl->periodic_timer.expires = jiffies+CHECK_EXPIRE_INTERVAL;
 	add_timer(&tbl->periodic_timer);
 
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 28484f396b0..49e008568dd 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -2964,10 +2964,8 @@ int __init ip_rt_init(void)
 	devinet_init();
 	ip_fib_init();
 
-	init_timer(&rt_flush_timer);
-	rt_flush_timer.function = rt_run_flush;
-	init_timer(&rt_secret_timer);
-	rt_secret_timer.function = rt_secret_rebuild;
+	setup_timer(&rt_flush_timer, rt_run_flush, 0);
+	setup_timer(&rt_secret_timer, rt_secret_rebuild, 0);
 
 	/* All the timers, started at system startup tend
 	   to synchronize. Perturb it a bit.
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index e8c347579da..c0720e4659b 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -366,9 +366,7 @@ static struct inet6_dev * ipv6_add_dev(struct net_device *dev)
 	in6_dev_hold(ndev);
 
 #ifdef CONFIG_IPV6_PRIVACY
-	init_timer(&ndev->regen_timer);
-	ndev->regen_timer.function = ipv6_regen_rndid;
-	ndev->regen_timer.data = (unsigned long) ndev;
+	setup_timer(&ndev->regen_timer, ipv6_regen_rndid, (unsigned long)ndev);
 	if ((dev->flags&IFF_LOOPBACK) ||
 	    dev->type == ARPHRD_TUNNEL ||
 #if defined(CONFIG_IPV6_SIT) || defined(CONFIG_IPV6_SIT_MODULE)
diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c
index 331d728c203..17d7318ff7b 100644
--- a/net/ipv6/mcast.c
+++ b/net/ipv6/mcast.c
@@ -903,9 +903,7 @@ int ipv6_dev_mc_inc(struct net_device *dev, struct in6_addr *addr)
 		return -ENOMEM;
 	}
 
-	init_timer(&mc->mca_timer);
-	mc->mca_timer.function = igmp6_timer_handler;
-	mc->mca_timer.data = (unsigned long) mc;
+	setup_timer(&mc->mca_timer, igmp6_timer_handler, (unsigned long)mc);
 
 	ipv6_addr_copy(&mc->mca_addr, addr);
 	mc->idev = idev;
@@ -2259,14 +2257,12 @@ void ipv6_mc_init_dev(struct inet6_dev *idev)
 	write_lock_bh(&idev->lock);
 	rwlock_init(&idev->mc_lock);
 	idev->mc_gq_running = 0;
-	init_timer(&idev->mc_gq_timer);
-	idev->mc_gq_timer.data = (unsigned long) idev;
-	idev->mc_gq_timer.function = &mld_gq_timer_expire;
+	setup_timer(&idev->mc_gq_timer, mld_gq_timer_expire,
+			(unsigned long)idev);
 	idev->mc_tomb = NULL;
 	idev->mc_ifc_count = 0;
-	init_timer(&idev->mc_ifc_timer);
-	idev->mc_ifc_timer.data = (unsigned long) idev;
-	idev->mc_ifc_timer.function = &mld_ifc_timer_expire;
+	setup_timer(&idev->mc_ifc_timer, mld_ifc_timer_expire,
+			(unsigned long)idev);
 	idev->mc_qrv = MLD_QRV_DEFAULT;
 	idev->mc_maxdelay = IGMP6_UNSOLICITED_IVAL;
 	idev->mc_v1_seen = 0;
diff --git a/net/irda/af_irda.c b/net/irda/af_irda.c
index 07dfa7fdd2a..240b0cbfb53 100644
--- a/net/irda/af_irda.c
+++ b/net/irda/af_irda.c
@@ -2410,9 +2410,8 @@ bed:
 
 			/* Set watchdog timer to expire in <val> ms. */
 			self->errno = 0;
-			init_timer(&self->watchdog);
-			self->watchdog.function = irda_discovery_timeout;
-			self->watchdog.data = (unsigned long) self;
+			setup_timer(&self->watchdog, irda_discovery_timeout,
+					(unsigned long)self);
 			self->watchdog.expires = jiffies + (val * HZ/1000);
 			add_timer(&(self->watchdog));
 
diff --git a/net/iucv/af_iucv.c b/net/iucv/af_iucv.c
index aef66458035..2255e3c082e 100644
--- a/net/iucv/af_iucv.c
+++ b/net/iucv/af_iucv.c
@@ -94,13 +94,6 @@ static void iucv_sock_clear_timer(struct sock *sk)
 	sk_stop_timer(sk, &sk->sk_timer);
 }
 
-static void iucv_sock_init_timer(struct sock *sk)
-{
-	init_timer(&sk->sk_timer);
-	sk->sk_timer.function = iucv_sock_timeout;
-	sk->sk_timer.data = (unsigned long)sk;
-}
-
 static struct sock *__iucv_get_sock_by_name(char *nm)
 {
 	struct sock *sk;
@@ -238,7 +231,7 @@ static struct sock *iucv_sock_alloc(struct socket *sock, int proto, gfp_t prio)
 	sk->sk_protocol = proto;
 	sk->sk_state	= IUCV_OPEN;
 
-	iucv_sock_init_timer(sk);
+	setup_timer(&sk->sk_timer, iucv_sock_timeout, (unsigned long)sk);
 
 	iucv_sock_link(&iucv_sk_list, sk);
 	return sk;
diff --git a/net/llc/llc_conn.c b/net/llc/llc_conn.c
index 5c0b484237c..441bc18f996 100644
--- a/net/llc/llc_conn.c
+++ b/net/llc/llc_conn.c
@@ -831,25 +831,21 @@ static void llc_sk_init(struct sock* sk)
 	llc->inc_cntr = llc->dec_cntr = 2;
 	llc->dec_step = llc->connect_step = 1;
 
-	init_timer(&llc->ack_timer.timer);
+	setup_timer(&llc->ack_timer.timer, llc_conn_ack_tmr_cb,
+			(unsigned long)sk);
 	llc->ack_timer.expire	      = sysctl_llc2_ack_timeout;
-	llc->ack_timer.timer.data     = (unsigned long)sk;
-	llc->ack_timer.timer.function = llc_conn_ack_tmr_cb;
 
-	init_timer(&llc->pf_cycle_timer.timer);
+	setup_timer(&llc->pf_cycle_timer.timer, llc_conn_pf_cycle_tmr_cb,
+			(unsigned long)sk);
 	llc->pf_cycle_timer.expire	   = sysctl_llc2_p_timeout;
-	llc->pf_cycle_timer.timer.data     = (unsigned long)sk;
-	llc->pf_cycle_timer.timer.function = llc_conn_pf_cycle_tmr_cb;
 
-	init_timer(&llc->rej_sent_timer.timer);
+	setup_timer(&llc->rej_sent_timer.timer, llc_conn_rej_tmr_cb,
+			(unsigned long)sk);
 	llc->rej_sent_timer.expire	   = sysctl_llc2_rej_timeout;
-	llc->rej_sent_timer.timer.data     = (unsigned long)sk;
-	llc->rej_sent_timer.timer.function = llc_conn_rej_tmr_cb;
 
-	init_timer(&llc->busy_state_timer.timer);
+	setup_timer(&llc->busy_state_timer.timer, llc_conn_busy_tmr_cb,
+			(unsigned long)sk);
 	llc->busy_state_timer.expire	     = sysctl_llc2_busy_timeout;
-	llc->busy_state_timer.timer.data     = (unsigned long)sk;
-	llc->busy_state_timer.timer.function = llc_conn_busy_tmr_cb;
 
 	llc->n2 = 2;   /* max retransmit */
 	llc->k  = 2;   /* tx win size, will adjust dynam */
diff --git a/net/llc/llc_station.c b/net/llc/llc_station.c
index 576355a192a..6f2ea209032 100644
--- a/net/llc/llc_station.c
+++ b/net/llc/llc_station.c
@@ -688,9 +688,8 @@ int __init llc_station_init(void)
 	skb_queue_head_init(&llc_main_station.mac_pdu_q);
 	skb_queue_head_init(&llc_main_station.ev_q.list);
 	spin_lock_init(&llc_main_station.ev_q.lock);
-	init_timer(&llc_main_station.ack_timer);
-	llc_main_station.ack_timer.data     = (unsigned long)&llc_main_station;
-	llc_main_station.ack_timer.function = llc_station_ack_tmr_cb;
+	setup_timer(&llc_main_station.ack_timer, llc_station_ack_tmr_cb,
+			(unsigned long)&llc_main_station);
 	llc_main_station.ack_timer.expires  = jiffies +
 						sysctl_llc_station_ack_timeout;
 	skb = alloc_skb(0, GFP_ATOMIC);
diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c
index cfd8ee9adad..ffe8a49d892 100644
--- a/net/mac80211/sta_info.c
+++ b/net/mac80211/sta_info.c
@@ -346,11 +346,10 @@ void sta_info_init(struct ieee80211_local *local)
 	rwlock_init(&local->sta_lock);
 	INIT_LIST_HEAD(&local->sta_list);
 
-	init_timer(&local->sta_cleanup);
+	setup_timer(&local->sta_cleanup, sta_info_cleanup,
+		    (unsigned long)local);
 	local->sta_cleanup.expires =
 		round_jiffies(jiffies + STA_INFO_CLEANUP_INTERVAL);
-	local->sta_cleanup.data = (unsigned long) local;
-	local->sta_cleanup.function = sta_info_cleanup;
 
 #ifdef CONFIG_MAC80211_DEBUGFS
 	INIT_WORK(&local->sta_debugfs_add, sta_info_debugfs_add_task);
diff --git a/net/netrom/nr_timer.c b/net/netrom/nr_timer.c
index 6cfaad952c6..1cb98e88f5e 100644
--- a/net/netrom/nr_timer.c
+++ b/net/netrom/nr_timer.c
@@ -40,21 +40,10 @@ void nr_init_timers(struct sock *sk)
 {
 	struct nr_sock *nr = nr_sk(sk);
 
-	init_timer(&nr->t1timer);
-	nr->t1timer.data     = (unsigned long)sk;
-	nr->t1timer.function = &nr_t1timer_expiry;
-
-	init_timer(&nr->t2timer);
-	nr->t2timer.data     = (unsigned long)sk;
-	nr->t2timer.function = &nr_t2timer_expiry;
-
-	init_timer(&nr->t4timer);
-	nr->t4timer.data     = (unsigned long)sk;
-	nr->t4timer.function = &nr_t4timer_expiry;
-
-	init_timer(&nr->idletimer);
-	nr->idletimer.data     = (unsigned long)sk;
-	nr->idletimer.function = &nr_idletimer_expiry;
+	setup_timer(&nr->t1timer, nr_t1timer_expiry, (unsigned long)sk);
+	setup_timer(&nr->t2timer, nr_t2timer_expiry, (unsigned long)sk);
+	setup_timer(&nr->t4timer, nr_t4timer_expiry, (unsigned long)sk);
+	setup_timer(&nr->idletimer, nr_idletimer_expiry, (unsigned long)sk);
 
 	/* initialized by sock_init_data */
 	sk->sk_timer.data     = (unsigned long)sk;
diff --git a/net/rose/af_rose.c b/net/rose/af_rose.c
index ed2d65cd801..323d42a7b36 100644
--- a/net/rose/af_rose.c
+++ b/net/rose/af_rose.c
@@ -345,10 +345,9 @@ void rose_destroy_socket(struct sock *sk)
 	if (atomic_read(&sk->sk_wmem_alloc) ||
 	    atomic_read(&sk->sk_rmem_alloc)) {
 		/* Defer: outstanding buffers */
-		init_timer(&sk->sk_timer);
+		setup_timer(&sk->sk_timer, rose_destroy_timer,
+				(unsigned long)sk);
 		sk->sk_timer.expires  = jiffies + 10 * HZ;
-		sk->sk_timer.function = rose_destroy_timer;
-		sk->sk_timer.data     = (unsigned long)sk;
 		add_timer(&sk->sk_timer);
 	} else
 		sock_put(sk);
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index e595e6570ce..84c048a5479 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -211,13 +211,6 @@ static void dev_watchdog(unsigned long arg)
 	dev_put(dev);
 }
 
-static void dev_watchdog_init(struct net_device *dev)
-{
-	init_timer(&dev->watchdog_timer);
-	dev->watchdog_timer.data = (unsigned long)dev;
-	dev->watchdog_timer.function = dev_watchdog;
-}
-
 void __netdev_watchdog_up(struct net_device *dev)
 {
 	if (dev->tx_timeout) {
@@ -608,7 +601,7 @@ void dev_init_scheduler(struct net_device *dev)
 	INIT_LIST_HEAD(&dev->qdisc_list);
 	qdisc_unlock_tree(dev);
 
-	dev_watchdog_init(dev);
+	setup_timer(&dev->watchdog_timer, dev_watchdog, (unsigned long)dev);
 }
 
 void dev_shutdown(struct net_device *dev)
diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c
index b542c875e15..65293876cd6 100644
--- a/net/sched/sch_sfq.c
+++ b/net/sched/sch_sfq.c
@@ -426,9 +426,7 @@ static int sfq_init(struct Qdisc *sch, struct rtattr *opt)
 	struct sfq_sched_data *q = qdisc_priv(sch);
 	int i;
 
-	init_timer(&q->perturb_timer);
-	q->perturb_timer.data = (unsigned long)sch;
-	q->perturb_timer.function = sfq_perturbation;
+	setup_timer(&q->perturb_timer, sfq_perturbation, (unsigned long)sch);
 
 	for (i=0; i<SFQ_HASH_DIVISOR; i++)
 		q->ht[i] = SFQ_DEPTH;
diff --git a/net/sctp/associola.c b/net/sctp/associola.c
index 013e3d3ab0f..33ae9b01131 100644
--- a/net/sctp/associola.c
+++ b/net/sctp/associola.c
@@ -167,11 +167,9 @@ static struct sctp_association *sctp_association_init(struct sctp_association *a
 		sp->autoclose * HZ;
 
 	/* Initilizes the timers */
-	for (i = SCTP_EVENT_TIMEOUT_NONE; i < SCTP_NUM_TIMEOUT_TYPES; ++i) {
-		init_timer(&asoc->timers[i]);
-		asoc->timers[i].function = sctp_timer_events[i];
-		asoc->timers[i].data = (unsigned long) asoc;
-	}
+	for (i = SCTP_EVENT_TIMEOUT_NONE; i < SCTP_NUM_TIMEOUT_TYPES; ++i)
+		setup_timer(&asoc->timers[i], sctp_timer_events[i],
+				(unsigned long)asoc);
 
 	/* Pull default initialization values from the sock options.
 	 * Note: This assumes that the values have already been
diff --git a/net/sctp/transport.c b/net/sctp/transport.c
index d55ce83a020..dfa109341ae 100644
--- a/net/sctp/transport.c
+++ b/net/sctp/transport.c
@@ -99,15 +99,10 @@ static struct sctp_transport *sctp_transport_init(struct sctp_transport *peer,
 	INIT_LIST_HEAD(&peer->send_ready);
 	INIT_LIST_HEAD(&peer->transports);
 
-	/* Set up the retransmission timer.  */
-	init_timer(&peer->T3_rtx_timer);
-	peer->T3_rtx_timer.function = sctp_generate_t3_rtx_event;
-	peer->T3_rtx_timer.data = (unsigned long)peer;
-
-	/* Set up the heartbeat timer. */
-	init_timer(&peer->hb_timer);
-	peer->hb_timer.function = sctp_generate_heartbeat_event;
-	peer->hb_timer.data = (unsigned long)peer;
+	setup_timer(&peer->T3_rtx_timer, sctp_generate_t3_rtx_event,
+			(unsigned long)peer);
+	setup_timer(&peer->hb_timer, sctp_generate_heartbeat_event,
+			(unsigned long)peer);
 
 	/* Initialize the 64-bit random nonce sent with heartbeat. */
 	get_random_bytes(&peer->hb_nonce, sizeof(peer->hb_nonce));
diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c
index c98873f39ae..eed5dd9819c 100644
--- a/net/sunrpc/sched.c
+++ b/net/sunrpc/sched.c
@@ -811,9 +811,8 @@ EXPORT_SYMBOL_GPL(rpc_free);
 void rpc_init_task(struct rpc_task *task, struct rpc_clnt *clnt, int flags, const struct rpc_call_ops *tk_ops, void *calldata)
 {
 	memset(task, 0, sizeof(*task));
-	init_timer(&task->tk_timer);
-	task->tk_timer.data     = (unsigned long) task;
-	task->tk_timer.function = (void (*)(unsigned long)) rpc_run_timer;
+	setup_timer(&task->tk_timer, (void (*)(unsigned long))rpc_run_timer,
+			(unsigned long)task);
 	atomic_set(&task->tk_count, 1);
 	task->tk_client = clnt;
 	task->tk_flags  = flags;
diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c
index cd641c8634f..fb92f51405c 100644
--- a/net/sunrpc/xprt.c
+++ b/net/sunrpc/xprt.c
@@ -1011,9 +1011,8 @@ found:
 	INIT_LIST_HEAD(&xprt->free);
 	INIT_LIST_HEAD(&xprt->recv);
 	INIT_WORK(&xprt->task_cleanup, xprt_autoclose);
-	init_timer(&xprt->timer);
-	xprt->timer.function = xprt_init_autodisconnect;
-	xprt->timer.data = (unsigned long) xprt;
+	setup_timer(&xprt->timer, xprt_init_autodisconnect,
+			(unsigned long)xprt);
 	xprt->last_used = jiffies;
 	xprt->cwnd = RPC_INITCWND;
 	xprt->bind_index = 0;
diff --git a/net/tipc/core.h b/net/tipc/core.h
index e40ada964d6..feabca58082 100644
--- a/net/tipc/core.h
+++ b/net/tipc/core.h
@@ -212,9 +212,7 @@ static inline void k_init_timer(struct timer_list *timer, Handler routine,
 				unsigned long argument)
 {
 	dbg("initializing timer %p\n", timer);
-	init_timer(timer);
-	timer->function = routine;
-	timer->data = argument;
+	setup_timer(timer, routine, argument);
 }
 
 /**
diff --git a/net/x25/x25_link.c b/net/x25/x25_link.c
index 741ce95d4ad..753f2b64abe 100644
--- a/net/x25/x25_link.c
+++ b/net/x25/x25_link.c
@@ -247,10 +247,7 @@ void x25_link_device_up(struct net_device *dev)
 		return;
 
 	skb_queue_head_init(&nb->queue);
-
-	init_timer(&nb->t20timer);
-	nb->t20timer.data     = (unsigned long)nb;
-	nb->t20timer.function = &x25_t20timer_expiry;
+	setup_timer(&nb->t20timer, x25_t20timer_expiry, (unsigned long)nb);
 
 	dev_hold(dev);
 	nb->dev      = dev;
diff --git a/net/x25/x25_timer.c b/net/x25/x25_timer.c
index 2af190dc5b0..d3e3e54db93 100644
--- a/net/x25/x25_timer.c
+++ b/net/x25/x25_timer.c
@@ -33,9 +33,7 @@ void x25_init_timers(struct sock *sk)
 {
 	struct x25_sock *x25 = x25_sk(sk);
 
-	init_timer(&x25->timer);
-	x25->timer.data     = (unsigned long)sk;
-	x25->timer.function = &x25_timer_expiry;
+	setup_timer(&x25->timer, x25_timer_expiry, (unsigned long)sk);
 
 	/* initialized by sock_init_data */
 	sk->sk_timer.data     = (unsigned long)sk;
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index 26b846e11bf..df5bfa837eb 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -196,9 +196,8 @@ struct xfrm_policy *xfrm_policy_alloc(gfp_t gfp)
 		INIT_HLIST_NODE(&policy->byidx);
 		rwlock_init(&policy->lock);
 		atomic_set(&policy->refcnt, 1);
-		init_timer(&policy->timer);
-		policy->timer.data = (unsigned long)policy;
-		policy->timer.function = xfrm_policy_timer;
+		setup_timer(&policy->timer, xfrm_policy_timer,
+				(unsigned long)policy);
 	}
 	return policy;
 }
diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c
index f26aaaca1fa..51866b7fab3 100644
--- a/net/xfrm/xfrm_state.c
+++ b/net/xfrm/xfrm_state.c
@@ -504,12 +504,9 @@ struct xfrm_state *xfrm_state_alloc(void)
 		INIT_HLIST_NODE(&x->bydst);
 		INIT_HLIST_NODE(&x->bysrc);
 		INIT_HLIST_NODE(&x->byspi);
-		init_timer(&x->timer);
-		x->timer.function = xfrm_timer_handler;
-		x->timer.data	  = (unsigned long)x;
-		init_timer(&x->rtimer);
-		x->rtimer.function = xfrm_replay_timer_handler;
-		x->rtimer.data     = (unsigned long)x;
+		setup_timer(&x->timer, xfrm_timer_handler, (unsigned long)x);
+		setup_timer(&x->rtimer, xfrm_replay_timer_handler,
+				(unsigned long)x);
 		x->curlft.add_time = get_seconds();
 		x->lft.soft_byte_limit = XFRM_INF;
 		x->lft.soft_packet_limit = XFRM_INF;
-- 
cgit v1.2.3


From 0148894223740da4818d7f4e6f92cbb5481a25b8 Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Tue, 13 Nov 2007 21:32:26 -0800
Subject: [IPV6]: Only set nfheader_len for top xfrm dst

We only need to set nfheader_len in the top xfrm dst.  This is because
we only ever read the nfheader_len from the top xfrm dst.

It is also easier to count nfheader_len as part of header_len which
then lets us remove the ugly wrapper functions for incrementing and
decrementing header lengths in xfrm6_policy.c.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/ip6_output.c   |  3 ++-
 net/ipv6/xfrm6_policy.c | 26 ++++----------------------
 2 files changed, 6 insertions(+), 23 deletions(-)

(limited to 'net')

diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index 3bef30e4a23..150615758fd 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -1098,7 +1098,8 @@ int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to,
 		inet->cork.length = 0;
 		sk->sk_sndmsg_page = NULL;
 		sk->sk_sndmsg_off = 0;
-		exthdrlen = rt->u.dst.header_len + (opt ? opt->opt_flen : 0);
+		exthdrlen = rt->u.dst.header_len + (opt ? opt->opt_flen : 0) -
+			    rt->u.dst.nfheader_len;
 		length += exthdrlen;
 		transhdrlen += exthdrlen;
 	} else {
diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c
index b8e9eb445d7..3cad3e8dc4d 100644
--- a/net/ipv6/xfrm6_policy.c
+++ b/net/ipv6/xfrm6_policy.c
@@ -102,24 +102,6 @@ __xfrm6_bundle_addr_local(struct xfrm_state *x, struct in6_addr *addr)
 		(struct in6_addr*)&x->props.saddr;
 }
 
-static inline void
-__xfrm6_bundle_len_inc(int *len, int *nflen, struct xfrm_state *x)
-{
-	if (x->type->flags & XFRM_TYPE_NON_FRAGMENT)
-		*nflen += x->props.header_len;
-	else
-		*len += x->props.header_len;
-}
-
-static inline void
-__xfrm6_bundle_len_dec(int *len, int *nflen, struct xfrm_state *x)
-{
-	if (x->type->flags & XFRM_TYPE_NON_FRAGMENT)
-		*nflen -= x->props.header_len;
-	else
-		*len -= x->props.header_len;
-}
-
 /* Allocate chain of dst_entry's, attach known xfrm's, calculate
  * all the metrics... Shortly, bundle a bundle.
  */
@@ -142,7 +124,6 @@ __xfrm6_bundle_create(struct xfrm_policy *policy, struct xfrm_state **xfrm, int
 	int i;
 	int err = 0;
 	int header_len = 0;
-	int nfheader_len = 0;
 	int trailer_len = 0;
 
 	dst = dst_prev = NULL;
@@ -175,7 +156,9 @@ __xfrm6_bundle_create(struct xfrm_policy *policy, struct xfrm_state **xfrm, int
 		dst1->next = dst_prev;
 		dst_prev = dst1;
 
-		__xfrm6_bundle_len_inc(&header_len, &nfheader_len, xfrm[i]);
+		if (xfrm[i]->type->flags & XFRM_TYPE_NON_FRAGMENT)
+			dst->nfheader_len += xfrm[i]->props.header_len;
+		header_len += xfrm[i]->props.header_len;
 		trailer_len += xfrm[i]->props.trailer_len;
 
 		if (xfrm[i]->props.mode != XFRM_MODE_TRANSPORT) {
@@ -223,7 +206,6 @@ __xfrm6_bundle_create(struct xfrm_policy *policy, struct xfrm_state **xfrm, int
 		dst_prev->flags	       |= DST_HOST;
 		dst_prev->lastuse	= jiffies;
 		dst_prev->header_len	= header_len;
-		dst_prev->nfheader_len	= nfheader_len;
 		dst_prev->trailer_len	= trailer_len;
 		memcpy(&dst_prev->metrics, &x->route->metrics, sizeof(dst_prev->metrics));
 
@@ -242,7 +224,7 @@ __xfrm6_bundle_create(struct xfrm_policy *policy, struct xfrm_state **xfrm, int
 		x->u.rt6.rt6i_src      = rt0->rt6i_src;
 		x->u.rt6.rt6i_idev     = rt0->rt6i_idev;
 		in6_dev_hold(rt0->rt6i_idev);
-		__xfrm6_bundle_len_dec(&header_len, &nfheader_len, x->u.dst.xfrm);
+		header_len -= x->u.dst.xfrm->props.header_len;
 		trailer_len -= x->u.dst.xfrm->props.trailer_len;
 	}
 
-- 
cgit v1.2.3


From 550ade8432a2a6fbfb48ba7018750b0e8c81e8d0 Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Tue, 13 Nov 2007 21:33:01 -0800
Subject: [IPSEC]: Use dst->header_len when resizing on output

Currently we use x->props.header_len when resizing on output.
However, if we're resizing at all we might as well go the whole hog
and do it for the whole dst.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/xfrm/xfrm_output.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/xfrm/xfrm_output.c b/net/xfrm/xfrm_output.c
index f4bfd6c4565..58d5a746b1c 100644
--- a/net/xfrm/xfrm_output.c
+++ b/net/xfrm/xfrm_output.c
@@ -19,7 +19,8 @@
 
 static int xfrm_state_check_space(struct xfrm_state *x, struct sk_buff *skb)
 {
-	int nhead = x->props.header_len + LL_RESERVED_SPACE(skb->dst->dev)
+	struct dst_entry *dst = skb->dst;
+	int nhead = dst->header_len + LL_RESERVED_SPACE(dst->dev)
 		- skb_headroom(skb);
 
 	if (nhead > 0)
-- 
cgit v1.2.3


From b4ce92775c2e7ff9cf79cca4e0a19c8c5fd6287b Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Tue, 13 Nov 2007 21:33:32 -0800
Subject: [IPV6]: Move nfheader_len into rt6_info

The dst member nfheader_len is only used by IPv6.  It's also currently
creating a rather ugly alignment hole in struct dst.  Therefore this patch
moves it from there into struct rt6_info.

It also reorders the fields in rt6_info to minimize holes.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/xfrm4_policy.c | 1 -
 net/ipv6/ip6_output.c   | 5 +++--
 net/ipv6/xfrm6_policy.c | 3 ++-
 3 files changed, 5 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c
index cc86fb110dd..5ee3a2f9fb2 100644
--- a/net/ipv4/xfrm4_policy.c
+++ b/net/ipv4/xfrm4_policy.c
@@ -161,7 +161,6 @@ __xfrm4_bundle_create(struct xfrm_policy *policy, struct xfrm_state **xfrm, int
 		dst_prev->flags	       |= DST_HOST;
 		dst_prev->lastuse	= jiffies;
 		dst_prev->header_len	= header_len;
-		dst_prev->nfheader_len	= 0;
 		dst_prev->trailer_len	= trailer_len;
 		memcpy(&dst_prev->metrics, &x->route->metrics, sizeof(dst_prev->metrics));
 
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index 150615758fd..387030d2483 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -1099,7 +1099,7 @@ int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to,
 		sk->sk_sndmsg_page = NULL;
 		sk->sk_sndmsg_off = 0;
 		exthdrlen = rt->u.dst.header_len + (opt ? opt->opt_flen : 0) -
-			    rt->u.dst.nfheader_len;
+			    rt->nfheader_len;
 		length += exthdrlen;
 		transhdrlen += exthdrlen;
 	} else {
@@ -1114,7 +1114,8 @@ int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to,
 
 	hh_len = LL_RESERVED_SPACE(rt->u.dst.dev);
 
-	fragheaderlen = sizeof(struct ipv6hdr) + rt->u.dst.nfheader_len + (opt ? opt->opt_nflen : 0);
+	fragheaderlen = sizeof(struct ipv6hdr) + rt->nfheader_len +
+			(opt ? opt->opt_nflen : 0);
 	maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen - sizeof(struct frag_hdr);
 
 	if (mtu <= sizeof(struct ipv6hdr) + IPV6_MAXPLEN) {
diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c
index 3cad3e8dc4d..5b02f0efd38 100644
--- a/net/ipv6/xfrm6_policy.c
+++ b/net/ipv6/xfrm6_policy.c
@@ -157,7 +157,8 @@ __xfrm6_bundle_create(struct xfrm_policy *policy, struct xfrm_state **xfrm, int
 		dst_prev = dst1;
 
 		if (xfrm[i]->type->flags & XFRM_TYPE_NON_FRAGMENT)
-			dst->nfheader_len += xfrm[i]->props.header_len;
+			((struct rt6_info *)dst)->nfheader_len +=
+				xfrm[i]->props.header_len;
 		header_len += xfrm[i]->props.header_len;
 		trailer_len += xfrm[i]->props.trailer_len;
 
-- 
cgit v1.2.3


From 352e512c32b634768303a43768245a0363cebbe7 Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Tue, 13 Nov 2007 21:34:06 -0800
Subject: [NET]: Eliminate duplicate copies of dst_discard

We have a number of copies of dst_discard scattered around the place
which all do the same thing, namely free a packet on the input or
output paths.

This patch deletes all of them except dst_discard and points all the
users to it.

The only non-trivial bit is decnet where it returns an error.
However, conceptually this is identical to the blackhole functions
used in IPv4 and IPv6 which do not return errors.  So they should
either all return errors or all return zero.  For now I've stuck with
the majority and picked zero as the return value.

It doesn't really matter in practice since few if any driver would
react differently depending on a zero return value or NET_RX_DROP.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/dst.c        |  3 ++-
 net/decnet/dn_route.c | 13 +------------
 net/ipv4/route.c      | 11 +++--------
 net/ipv6/exthdrs.c    | 13 ++-----------
 net/ipv6/route.c      | 21 ++++-----------------
 5 files changed, 12 insertions(+), 49 deletions(-)

(limited to 'net')

diff --git a/net/core/dst.c b/net/core/dst.c
index 03daead3592..f538061f4b9 100644
--- a/net/core/dst.c
+++ b/net/core/dst.c
@@ -153,11 +153,12 @@ loop:
 #endif
 }
 
-static int dst_discard(struct sk_buff *skb)
+int dst_discard(struct sk_buff *skb)
 {
 	kfree_skb(skb);
 	return 0;
 }
+EXPORT_SYMBOL(dst_discard);
 
 void * dst_alloc(struct dst_ops * ops)
 {
diff --git a/net/decnet/dn_route.c b/net/decnet/dn_route.c
index 23aa3556e56..2a5bb0714c7 100644
--- a/net/decnet/dn_route.c
+++ b/net/decnet/dn_route.c
@@ -764,17 +764,6 @@ drop:
 	return NET_RX_DROP;
 }
 
-/*
- * Drop packet. This is used for endnodes and for
- * when we should not be forwarding packets from
- * this dest.
- */
-static int dn_blackhole(struct sk_buff *skb)
-{
-	kfree_skb(skb);
-	return NET_RX_DROP;
-}
-
 /*
  * Used to catch bugs. This should never normally get
  * called.
@@ -1396,7 +1385,7 @@ make_route:
 		default:
 		case RTN_UNREACHABLE:
 		case RTN_BLACKHOLE:
-			rt->u.dst.input = dn_blackhole;
+			rt->u.dst.input = dst_discard;
 	}
 	rt->rt_flags = flags;
 	if (rt->u.dst.dev)
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 49e008568dd..137b8eb666b 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -92,6 +92,7 @@
 #include <linux/jhash.h>
 #include <linux/rcupdate.h>
 #include <linux/times.h>
+#include <net/dst.h>
 #include <net/net_namespace.h>
 #include <net/protocol.h>
 #include <net/ip.h>
@@ -2357,12 +2358,6 @@ static struct dst_ops ipv4_dst_blackhole_ops = {
 };
 
 
-static int ipv4_blackhole_output(struct sk_buff *skb)
-{
-	kfree_skb(skb);
-	return 0;
-}
-
 static int ipv4_dst_blackhole(struct rtable **rp, struct flowi *flp, struct sock *sk)
 {
 	struct rtable *ort = *rp;
@@ -2374,8 +2369,8 @@ static int ipv4_dst_blackhole(struct rtable **rp, struct flowi *flp, struct sock
 
 		atomic_set(&new->__refcnt, 1);
 		new->__use = 1;
-		new->input = ipv4_blackhole_output;
-		new->output = ipv4_blackhole_output;
+		new->input = dst_discard;
+		new->output = dst_discard;
 		memcpy(new->metrics, ort->u.dst.metrics, RTAX_MAX*sizeof(u32));
 
 		new->dev = ort->u.dst.dev;
diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c
index 1e89efd38a0..cee06b1655c 100644
--- a/net/ipv6/exthdrs.c
+++ b/net/ipv6/exthdrs.c
@@ -32,6 +32,7 @@
 #include <linux/in6.h>
 #include <linux/icmpv6.h>
 
+#include <net/dst.h>
 #include <net/sock.h>
 #include <net/snmp.h>
 
@@ -318,18 +319,8 @@ void __init ipv6_destopt_init(void)
 		printk(KERN_ERR "ipv6_destopt_init: Could not register protocol\n");
 }
 
-/********************************
-  NONE header. No data in packet.
- ********************************/
-
-static int ipv6_nodata_rcv(struct sk_buff *skb)
-{
-	kfree_skb(skb);
-	return 0;
-}
-
 static struct inet6_protocol nodata_protocol = {
-	.handler	=	ipv6_nodata_rcv,
+	.handler	=	dst_discard,
 	.flags		=	INET6_PROTO_NOPOLICY,
 };
 
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 20083e0d399..ac70e2d3b10 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -152,7 +152,6 @@ struct rt6_info ip6_null_entry = {
 
 static int ip6_pkt_prohibit(struct sk_buff *skb);
 static int ip6_pkt_prohibit_out(struct sk_buff *skb);
-static int ip6_pkt_blk_hole(struct sk_buff *skb);
 
 struct rt6_info ip6_prohibit_entry = {
 	.u = {
@@ -181,8 +180,8 @@ struct rt6_info ip6_blk_hole_entry = {
 			.obsolete	= -1,
 			.error		= -EINVAL,
 			.metrics	= { [RTAX_HOPLIMIT - 1] = 255, },
-			.input		= ip6_pkt_blk_hole,
-			.output		= ip6_pkt_blk_hole,
+			.input		= dst_discard,
+			.output		= dst_discard,
 			.ops		= &ip6_dst_ops,
 			.path		= (struct dst_entry*)&ip6_blk_hole_entry,
 		}
@@ -782,12 +781,6 @@ struct dst_entry * ip6_route_output(struct sock *sk, struct flowi *fl)
 
 EXPORT_SYMBOL(ip6_route_output);
 
-static int ip6_blackhole_output(struct sk_buff *skb)
-{
-	kfree_skb(skb);
-	return 0;
-}
-
 int ip6_dst_blackhole(struct sock *sk, struct dst_entry **dstp, struct flowi *fl)
 {
 	struct rt6_info *ort = (struct rt6_info *) *dstp;
@@ -800,8 +793,8 @@ int ip6_dst_blackhole(struct sock *sk, struct dst_entry **dstp, struct flowi *fl
 
 		atomic_set(&new->__refcnt, 1);
 		new->__use = 1;
-		new->input = ip6_blackhole_output;
-		new->output = ip6_blackhole_output;
+		new->input = dst_discard;
+		new->output = dst_discard;
 
 		memcpy(new->metrics, ort->u.dst.metrics, RTAX_MAX*sizeof(u32));
 		new->dev = ort->u.dst.dev;
@@ -1811,12 +1804,6 @@ static int ip6_pkt_prohibit_out(struct sk_buff *skb)
 	return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
 }
 
-static int ip6_pkt_blk_hole(struct sk_buff *skb)
-{
-	kfree_skb(skb);
-	return 0;
-}
-
 #endif
 
 /*
-- 
cgit v1.2.3


From 8ce68ceb55fb62d2c8e9a3e94c4ef6ff3e3064ce Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Tue, 13 Nov 2007 21:35:01 -0800
Subject: [IPSEC]: Only set neighbour on top xfrm dst

The neighbour field is only used by dst_confirm which only ever happens on
the top-most xfrm dst.  So it's a waste to duplicate for every other xfrm
dst.  This patch moves its setting out of the loop so that only the top one
gets set.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/xfrm4_policy.c | 5 +++--
 net/ipv6/xfrm6_policy.c | 6 ++++--
 2 files changed, 7 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c
index 5ee3a2f9fb2..7d250a1bd2c 100644
--- a/net/ipv4/xfrm4_policy.c
+++ b/net/ipv4/xfrm4_policy.c
@@ -144,6 +144,9 @@ __xfrm4_bundle_create(struct xfrm_policy *policy, struct xfrm_state **xfrm, int
 	dst_prev->child = &rt->u.dst;
 	dst->path = &rt->u.dst;
 
+	/* Copy neighbout for reachability confirmation */
+	dst->neighbour = neigh_clone(rt->u.dst.neighbour);
+
 	*dst_p = dst;
 	dst = dst_prev;
 
@@ -164,8 +167,6 @@ __xfrm4_bundle_create(struct xfrm_policy *policy, struct xfrm_state **xfrm, int
 		dst_prev->trailer_len	= trailer_len;
 		memcpy(&dst_prev->metrics, &x->route->metrics, sizeof(dst_prev->metrics));
 
-		/* Copy neighbout for reachability confirmation */
-		dst_prev->neighbour	= neigh_clone(rt->u.dst.neighbour);
 		dst_prev->input		= rt->u.dst.input;
 		dst_prev->output = dst_prev->xfrm->outer_mode->afinfo->output;
 		if (rt0->peer)
diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c
index 5b02f0efd38..bc508d0a87d 100644
--- a/net/ipv6/xfrm6_policy.c
+++ b/net/ipv6/xfrm6_policy.c
@@ -188,6 +188,10 @@ __xfrm6_bundle_create(struct xfrm_policy *policy, struct xfrm_state **xfrm, int
 
 	dst_prev->child = &rt->u.dst;
 	dst->path = &rt->u.dst;
+
+	/* Copy neighbour for reachability confirmation */
+	dst->neighbour = neigh_clone(rt->u.dst.neighbour);
+
 	if (rt->rt6i_node)
 		((struct xfrm_dst *)dst)->path_cookie = rt->rt6i_node->fn_sernum;
 
@@ -210,8 +214,6 @@ __xfrm6_bundle_create(struct xfrm_policy *policy, struct xfrm_state **xfrm, int
 		dst_prev->trailer_len	= trailer_len;
 		memcpy(&dst_prev->metrics, &x->route->metrics, sizeof(dst_prev->metrics));
 
-		/* Copy neighbour for reachability confirmation */
-		dst_prev->neighbour	= neigh_clone(rt->u.dst.neighbour);
 		dst_prev->input		= rt->u.dst.input;
 		dst_prev->output = dst_prev->xfrm->outer_mode->afinfo->output;
 		/* Sheit... I remember I did this right. Apparently,
-- 
cgit v1.2.3


From 45ff5a3f9a3d0b1b4f063b5285ab39b7fac59471 Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Tue, 13 Nov 2007 21:35:32 -0800
Subject: [IPSEC]: Set dst->input to dst_discard

The input function should never be invoked on IPsec dst objects.  This
is because we don't apply IPsec on input until after we've made the
routing decision.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/xfrm4_policy.c | 3 ++-
 net/ipv6/xfrm6_policy.c | 3 ++-
 2 files changed, 4 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c
index 7d250a1bd2c..c40a71b74db 100644
--- a/net/ipv4/xfrm4_policy.c
+++ b/net/ipv4/xfrm4_policy.c
@@ -10,6 +10,7 @@
 
 #include <linux/compiler.h>
 #include <linux/inetdevice.h>
+#include <net/dst.h>
 #include <net/xfrm.h>
 #include <net/ip.h>
 
@@ -167,7 +168,7 @@ __xfrm4_bundle_create(struct xfrm_policy *policy, struct xfrm_state **xfrm, int
 		dst_prev->trailer_len	= trailer_len;
 		memcpy(&dst_prev->metrics, &x->route->metrics, sizeof(dst_prev->metrics));
 
-		dst_prev->input		= rt->u.dst.input;
+		dst_prev->input = dst_discard;
 		dst_prev->output = dst_prev->xfrm->outer_mode->afinfo->output;
 		if (rt0->peer)
 			atomic_inc(&rt0->peer->refcnt);
diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c
index bc508d0a87d..89432279d3a 100644
--- a/net/ipv6/xfrm6_policy.c
+++ b/net/ipv6/xfrm6_policy.c
@@ -14,6 +14,7 @@
 #include <linux/compiler.h>
 #include <linux/netdevice.h>
 #include <net/addrconf.h>
+#include <net/dst.h>
 #include <net/xfrm.h>
 #include <net/ip.h>
 #include <net/ipv6.h>
@@ -214,7 +215,7 @@ __xfrm6_bundle_create(struct xfrm_policy *policy, struct xfrm_state **xfrm, int
 		dst_prev->trailer_len	= trailer_len;
 		memcpy(&dst_prev->metrics, &x->route->metrics, sizeof(dst_prev->metrics));
 
-		dst_prev->input		= rt->u.dst.input;
+		dst_prev->input = dst_discard;
 		dst_prev->output = dst_prev->xfrm->outer_mode->afinfo->output;
 		/* Sheit... I remember I did this right. Apparently,
 		 * it was magically lost, so this code needs audit */
-- 
cgit v1.2.3


From fff693888012806370c98c601fbaa141fb12dfca Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Tue, 13 Nov 2007 21:36:07 -0800
Subject: [IPSEC]: Make sure idev is consistent with dev in xfrm_dst

Previously we took the device from the bottom route and idev from the
top route.  This is bad because idev may well point to a different
device.  This patch changes it so that we get the idev from the device
directly.

It also makes it an error if either dev or idev is NULL.  This is
consistent with the rest of the routing code which also treats these
cases as errors.

I've removed the err initialisation in xfrm6_policy.c because it
achieves no purpose and hid a bug when an initial version of this
patch neglected to set err to -ENODEV (fortunately the IPv4 version
warned about it).

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/xfrm4_policy.c | 13 +++++++++----
 net/ipv6/xfrm6_policy.c | 15 ++++++++++-----
 2 files changed, 19 insertions(+), 9 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c
index c40a71b74db..d903c8bdffc 100644
--- a/net/ipv4/xfrm4_policy.c
+++ b/net/ipv4/xfrm4_policy.c
@@ -153,14 +153,21 @@ __xfrm4_bundle_create(struct xfrm_policy *policy, struct xfrm_state **xfrm, int
 
 	dst_prev = *dst_p;
 	i = 0;
+	err = -ENODEV;
 	for (; dst_prev != &rt->u.dst; dst_prev = dst_prev->child) {
 		struct xfrm_dst *x = (struct xfrm_dst*)dst_prev;
 		x->u.rt.fl = *fl;
 
 		dst_prev->xfrm = xfrm[i++];
 		dst_prev->dev = rt->u.dst.dev;
-		if (rt->u.dst.dev)
-			dev_hold(rt->u.dst.dev);
+		if (!rt->u.dst.dev)
+			goto error;
+		dev_hold(rt->u.dst.dev);
+
+		x->u.rt.idev = in_dev_get(rt->u.dst.dev);
+		if (!x->u.rt.idev)
+			goto error;
+
 		dst_prev->obsolete	= -1;
 		dst_prev->flags	       |= DST_HOST;
 		dst_prev->lastuse	= jiffies;
@@ -181,8 +188,6 @@ __xfrm4_bundle_create(struct xfrm_policy *policy, struct xfrm_state **xfrm, int
 		x->u.rt.rt_dst = rt0->rt_dst;
 		x->u.rt.rt_gateway = rt0->rt_gateway;
 		x->u.rt.rt_spec_dst = rt0->rt_spec_dst;
-		x->u.rt.idev = rt0->idev;
-		in_dev_hold(rt0->idev);
 		header_len -= x->u.dst.xfrm->props.header_len;
 		trailer_len -= x->u.dst.xfrm->props.trailer_len;
 	}
diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c
index 89432279d3a..77dc3651437 100644
--- a/net/ipv6/xfrm6_policy.c
+++ b/net/ipv6/xfrm6_policy.c
@@ -123,7 +123,7 @@ __xfrm6_bundle_create(struct xfrm_policy *policy, struct xfrm_state **xfrm, int
 		}
 	};
 	int i;
-	int err = 0;
+	int err;
 	int header_len = 0;
 	int trailer_len = 0;
 
@@ -201,13 +201,20 @@ __xfrm6_bundle_create(struct xfrm_policy *policy, struct xfrm_state **xfrm, int
 
 	dst_prev = *dst_p;
 	i = 0;
+	err = -ENODEV;
 	for (; dst_prev != &rt->u.dst; dst_prev = dst_prev->child) {
 		struct xfrm_dst *x = (struct xfrm_dst*)dst_prev;
 
 		dst_prev->xfrm = xfrm[i++];
 		dst_prev->dev = rt->u.dst.dev;
-		if (rt->u.dst.dev)
-			dev_hold(rt->u.dst.dev);
+		if (!rt->u.dst.dev)
+			goto error;
+		dev_hold(rt->u.dst.dev);
+
+		x->u.rt6.rt6i_idev = in6_dev_get(rt->u.dst.dev);
+		if (!x->u.rt6.rt6i_idev)
+			goto error;
+
 		dst_prev->obsolete	= -1;
 		dst_prev->flags	       |= DST_HOST;
 		dst_prev->lastuse	= jiffies;
@@ -226,8 +233,6 @@ __xfrm6_bundle_create(struct xfrm_policy *policy, struct xfrm_state **xfrm, int
 		memcpy(&x->u.rt6.rt6i_gateway, &rt0->rt6i_gateway, sizeof(x->u.rt6.rt6i_gateway));
 		x->u.rt6.rt6i_dst      = rt0->rt6i_dst;
 		x->u.rt6.rt6i_src      = rt0->rt6i_src;
-		x->u.rt6.rt6i_idev     = rt0->rt6i_idev;
-		in6_dev_hold(rt0->rt6i_idev);
 		header_len -= x->u.dst.xfrm->props.header_len;
 		trailer_len -= x->u.dst.xfrm->props.trailer_len;
 	}
-- 
cgit v1.2.3


From f04e7e8d7f175c05bbde3ae748bf2541da53721d Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Tue, 13 Nov 2007 21:36:51 -0800
Subject: [IPSEC]: Replace x->type->{local,remote}_addr with flags

The functions local_addr and remote_addr are more than what they're
needed for.  The same thing can be done easily with flags on the type
object.  This patch does that and simplifies the wrapper functions in
xfrm6_policy accordingly.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/mip6.c         | 11 ++---------
 net/ipv6/xfrm6_policy.c | 20 ++++++++------------
 2 files changed, 10 insertions(+), 21 deletions(-)

(limited to 'net')

diff --git a/net/ipv6/mip6.c b/net/ipv6/mip6.c
index 7fd841d4101..edfd9cdd721 100644
--- a/net/ipv6/mip6.c
+++ b/net/ipv6/mip6.c
@@ -34,11 +34,6 @@
 #include <net/xfrm.h>
 #include <net/mip6.h>
 
-static xfrm_address_t *mip6_xfrm_addr(struct xfrm_state *x, xfrm_address_t *addr)
-{
-	return x->coaddr;
-}
-
 static inline unsigned int calc_padlen(unsigned int len, unsigned int n)
 {
 	return (n - len + 16) & 0x7;
@@ -337,14 +332,13 @@ static struct xfrm_type mip6_destopt_type =
 	.description	= "MIP6DESTOPT",
 	.owner		= THIS_MODULE,
 	.proto	     	= IPPROTO_DSTOPTS,
-	.flags		= XFRM_TYPE_NON_FRAGMENT,
+	.flags		= XFRM_TYPE_NON_FRAGMENT | XFRM_TYPE_LOCAL_COADDR,
 	.init_state	= mip6_destopt_init_state,
 	.destructor	= mip6_destopt_destroy,
 	.input		= mip6_destopt_input,
 	.output		= mip6_destopt_output,
 	.reject		= mip6_destopt_reject,
 	.hdr_offset	= mip6_destopt_offset,
-	.local_addr	= mip6_xfrm_addr,
 };
 
 static int mip6_rthdr_input(struct xfrm_state *x, struct sk_buff *skb)
@@ -467,13 +461,12 @@ static struct xfrm_type mip6_rthdr_type =
 	.description	= "MIP6RT",
 	.owner		= THIS_MODULE,
 	.proto	     	= IPPROTO_ROUTING,
-	.flags		= XFRM_TYPE_NON_FRAGMENT,
+	.flags		= XFRM_TYPE_NON_FRAGMENT | XFRM_TYPE_REMOTE_COADDR,
 	.init_state	= mip6_rthdr_init_state,
 	.destructor	= mip6_rthdr_destroy,
 	.input		= mip6_rthdr_input,
 	.output		= mip6_rthdr_output,
 	.hdr_offset	= mip6_rthdr_offset,
-	.remote_addr	= mip6_xfrm_addr,
 };
 
 static int __init mip6_init(void)
diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c
index 77dc3651437..3b38e493d15 100644
--- a/net/ipv6/xfrm6_policy.c
+++ b/net/ipv6/xfrm6_policy.c
@@ -87,20 +87,16 @@ __xfrm6_find_bundle(struct flowi *fl, struct xfrm_policy *policy)
 	return dst;
 }
 
-static inline struct in6_addr*
-__xfrm6_bundle_addr_remote(struct xfrm_state *x, struct in6_addr *addr)
+static inline xfrm_address_t *__xfrm6_bundle_addr_remote(struct xfrm_state *x)
 {
-	return (x->type->remote_addr) ?
-		(struct in6_addr*)x->type->remote_addr(x, (xfrm_address_t *)addr) :
-		(struct in6_addr*)&x->id.daddr;
+	return (x->type->flags & XFRM_TYPE_REMOTE_COADDR) ? x->coaddr :
+							    &x->id.daddr;
 }
 
-static inline struct in6_addr*
-__xfrm6_bundle_addr_local(struct xfrm_state *x, struct in6_addr *addr)
+static inline xfrm_address_t *__xfrm6_bundle_addr_local(struct xfrm_state *x)
 {
-	return (x->type->local_addr) ?
-		(struct in6_addr*)x->type->local_addr(x, (xfrm_address_t *)addr) :
-		(struct in6_addr*)&x->props.saddr;
+	return (x->type->flags & XFRM_TYPE_LOCAL_COADDR) ? x->coaddr :
+							   &x->props.saddr;
 }
 
 /* Allocate chain of dst_entry's, attach known xfrm's, calculate
@@ -171,9 +167,9 @@ __xfrm6_bundle_create(struct xfrm_policy *policy, struct xfrm_state **xfrm, int
 				fl_tunnel.fl4_src = xfrm[i]->props.saddr.a4;
 				break;
 			case AF_INET6:
-				ipv6_addr_copy(&fl_tunnel.fl6_dst, __xfrm6_bundle_addr_remote(xfrm[i], &fl->fl6_dst));
+				ipv6_addr_copy(&fl_tunnel.fl6_dst, (struct in6_addr *)__xfrm6_bundle_addr_remote(xfrm[i]));
 
-				ipv6_addr_copy(&fl_tunnel.fl6_src, __xfrm6_bundle_addr_local(xfrm[i], &fl->fl6_src));
+				ipv6_addr_copy(&fl_tunnel.fl6_src, (struct in6_addr *)__xfrm6_bundle_addr_local(xfrm[i]));
 				break;
 			default:
 				BUG_ON(1);
-- 
cgit v1.2.3


From 66cdb3ca27323a92712d289fc5edc7841d74a139 Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Tue, 13 Nov 2007 21:37:28 -0800
Subject: [IPSEC]: Move flow construction into xfrm_dst_lookup

This patch moves the flow construction from the callers of
xfrm_dst_lookup into that function.  It also changes xfrm_dst_lookup
so that it takes an xfrm state as its argument instead of explicit
addresses.

This removes any address-specific logic from the callers of
xfrm_dst_lookup which is needed to correctly support inter-family
transforms.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/xfrm4_policy.c | 80 +++++++++++++++++++---------------------
 net/ipv6/xfrm6_policy.c | 97 ++++++++++++++++++-------------------------------
 net/xfrm/xfrm_policy.c  | 25 ++++++++-----
 3 files changed, 88 insertions(+), 114 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c
index d903c8bdffc..cebc8473196 100644
--- a/net/ipv4/xfrm4_policy.c
+++ b/net/ipv4/xfrm4_policy.c
@@ -8,7 +8,8 @@
  *
  */
 
-#include <linux/compiler.h>
+#include <linux/err.h>
+#include <linux/kernel.h>
 #include <linux/inetdevice.h>
 #include <net/dst.h>
 #include <net/xfrm.h>
@@ -17,28 +18,44 @@
 static struct dst_ops xfrm4_dst_ops;
 static struct xfrm_policy_afinfo xfrm4_policy_afinfo;
 
-static int xfrm4_dst_lookup(struct xfrm_dst **dst, struct flowi *fl)
+static struct dst_entry *xfrm4_dst_lookup(int tos, xfrm_address_t *saddr,
+					  xfrm_address_t *daddr)
 {
-	return __ip_route_output_key((struct rtable**)dst, fl);
-}
-
-static int xfrm4_get_saddr(xfrm_address_t *saddr, xfrm_address_t *daddr)
-{
-	struct rtable *rt;
-	struct flowi fl_tunnel = {
+	struct flowi fl = {
 		.nl_u = {
 			.ip4_u = {
+				.tos = tos,
 				.daddr = daddr->a4,
 			},
 		},
 	};
+	struct dst_entry *dst;
+	struct rtable *rt;
+	int err;
 
-	if (!xfrm4_dst_lookup((struct xfrm_dst **)&rt, &fl_tunnel)) {
-		saddr->a4 = rt->rt_src;
-		dst_release(&rt->u.dst);
-		return 0;
-	}
-	return -EHOSTUNREACH;
+	if (saddr)
+		fl.fl4_src = saddr->a4;
+
+	err = __ip_route_output_key(&rt, &fl);
+	dst = &rt->u.dst;
+	if (err)
+		dst = ERR_PTR(err);
+	return dst;
+}
+
+static int xfrm4_get_saddr(xfrm_address_t *saddr, xfrm_address_t *daddr)
+{
+	struct dst_entry *dst;
+	struct rtable *rt;
+
+	dst = xfrm4_dst_lookup(0, NULL, daddr);
+	if (IS_ERR(dst))
+		return -EHOSTUNREACH;
+
+	rt = (struct rtable *)dst;
+	saddr->a4 = rt->rt_src;
+	dst_release(dst);
+	return 0;
 }
 
 static struct dst_entry *
@@ -73,15 +90,7 @@ __xfrm4_bundle_create(struct xfrm_policy *policy, struct xfrm_state **xfrm, int
 	struct dst_entry *dst, *dst_prev;
 	struct rtable *rt0 = (struct rtable*)(*dst_p);
 	struct rtable *rt = rt0;
-	struct flowi fl_tunnel = {
-		.nl_u = {
-			.ip4_u = {
-				.saddr = fl->fl4_src,
-				.daddr = fl->fl4_dst,
-				.tos = fl->fl4_tos
-			}
-		}
-	};
+	int tos = fl->fl4_tos;
 	int i;
 	int err;
 	int header_len = 0;
@@ -119,25 +128,12 @@ __xfrm4_bundle_create(struct xfrm_policy *policy, struct xfrm_state **xfrm, int
 		trailer_len += xfrm[i]->props.trailer_len;
 
 		if (xfrm[i]->props.mode != XFRM_MODE_TRANSPORT) {
-			unsigned short encap_family = xfrm[i]->props.family;
-			switch (encap_family) {
-			case AF_INET:
-				fl_tunnel.fl4_dst = xfrm[i]->id.daddr.a4;
-				fl_tunnel.fl4_src = xfrm[i]->props.saddr.a4;
-				break;
-#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE)
-			case AF_INET6:
-				ipv6_addr_copy(&fl_tunnel.fl6_dst, (struct in6_addr*)&xfrm[i]->id.daddr.a6);
-				ipv6_addr_copy(&fl_tunnel.fl6_src, (struct in6_addr*)&xfrm[i]->props.saddr.a6);
-				break;
-#endif
-			default:
-				BUG_ON(1);
-			}
-			err = xfrm_dst_lookup((struct xfrm_dst **)&rt,
-					      &fl_tunnel, encap_family);
-			if (err)
+			dst1 = xfrm_dst_lookup(xfrm[i], tos);
+			err = PTR_ERR(dst1);
+			if (IS_ERR(dst1))
 				goto error;
+
+			rt = (struct rtable *)dst1;
 		} else
 			dst_hold(&rt->u.dst);
 	}
diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c
index 3b38e493d15..8e78530865a 100644
--- a/net/ipv6/xfrm6_policy.c
+++ b/net/ipv6/xfrm6_policy.c
@@ -11,7 +11,8 @@
  *
  */
 
-#include <linux/compiler.h>
+#include <linux/err.h>
+#include <linux/kernel.h>
 #include <linux/netdevice.h>
 #include <net/addrconf.h>
 #include <net/dst.h>
@@ -26,35 +27,40 @@
 static struct dst_ops xfrm6_dst_ops;
 static struct xfrm_policy_afinfo xfrm6_policy_afinfo;
 
-static int xfrm6_dst_lookup(struct xfrm_dst **xdst, struct flowi *fl)
+static struct dst_entry *xfrm6_dst_lookup(int tos, xfrm_address_t *saddr,
+					  xfrm_address_t *daddr)
 {
-	struct dst_entry *dst = ip6_route_output(NULL, fl);
-	int err = dst->error;
-	if (!err)
-		*xdst = (struct xfrm_dst *) dst;
-	else
+	struct flowi fl = {};
+	struct dst_entry *dst;
+	int err;
+
+	memcpy(&fl.fl6_dst, daddr, sizeof(fl.fl6_dst));
+	if (saddr)
+		memcpy(&fl.fl6_src, saddr, sizeof(fl.fl6_src));
+
+	dst = ip6_route_output(NULL, &fl);
+
+	err = dst->error;
+	if (dst->error) {
 		dst_release(dst);
-	return err;
+		dst = ERR_PTR(err);
+	}
+
+	return dst;
 }
 
 static int xfrm6_get_saddr(xfrm_address_t *saddr, xfrm_address_t *daddr)
 {
-	struct rt6_info *rt;
-	struct flowi fl_tunnel = {
-		.nl_u = {
-			.ip6_u = {
-				.daddr = *(struct in6_addr *)&daddr->a6,
-			},
-		},
-	};
-
-	if (!xfrm6_dst_lookup((struct xfrm_dst **)&rt, &fl_tunnel)) {
-		ipv6_get_saddr(&rt->u.dst, (struct in6_addr *)&daddr->a6,
-			       (struct in6_addr *)&saddr->a6);
-		dst_release(&rt->u.dst);
-		return 0;
-	}
-	return -EHOSTUNREACH;
+	struct dst_entry *dst;
+
+	dst = xfrm6_dst_lookup(0, NULL, daddr);
+	if (IS_ERR(dst))
+		return -EHOSTUNREACH;
+
+	ipv6_get_saddr(dst, (struct in6_addr *)&daddr->a6,
+		       (struct in6_addr *)&saddr->a6);
+	dst_release(dst);
+	return 0;
 }
 
 static struct dst_entry *
@@ -87,18 +93,6 @@ __xfrm6_find_bundle(struct flowi *fl, struct xfrm_policy *policy)
 	return dst;
 }
 
-static inline xfrm_address_t *__xfrm6_bundle_addr_remote(struct xfrm_state *x)
-{
-	return (x->type->flags & XFRM_TYPE_REMOTE_COADDR) ? x->coaddr :
-							    &x->id.daddr;
-}
-
-static inline xfrm_address_t *__xfrm6_bundle_addr_local(struct xfrm_state *x)
-{
-	return (x->type->flags & XFRM_TYPE_LOCAL_COADDR) ? x->coaddr :
-							   &x->props.saddr;
-}
-
 /* Allocate chain of dst_entry's, attach known xfrm's, calculate
  * all the metrics... Shortly, bundle a bundle.
  */
@@ -110,14 +104,6 @@ __xfrm6_bundle_create(struct xfrm_policy *policy, struct xfrm_state **xfrm, int
 	struct dst_entry *dst, *dst_prev;
 	struct rt6_info *rt0 = (struct rt6_info*)(*dst_p);
 	struct rt6_info *rt  = rt0;
-	struct flowi fl_tunnel = {
-		.nl_u = {
-			.ip6_u = {
-				.saddr = fl->fl6_src,
-				.daddr = fl->fl6_dst,
-			}
-		}
-	};
 	int i;
 	int err;
 	int header_len = 0;
@@ -160,25 +146,12 @@ __xfrm6_bundle_create(struct xfrm_policy *policy, struct xfrm_state **xfrm, int
 		trailer_len += xfrm[i]->props.trailer_len;
 
 		if (xfrm[i]->props.mode != XFRM_MODE_TRANSPORT) {
-			unsigned short encap_family = xfrm[i]->props.family;
-			switch(encap_family) {
-			case AF_INET:
-				fl_tunnel.fl4_dst = xfrm[i]->id.daddr.a4;
-				fl_tunnel.fl4_src = xfrm[i]->props.saddr.a4;
-				break;
-			case AF_INET6:
-				ipv6_addr_copy(&fl_tunnel.fl6_dst, (struct in6_addr *)__xfrm6_bundle_addr_remote(xfrm[i]));
-
-				ipv6_addr_copy(&fl_tunnel.fl6_src, (struct in6_addr *)__xfrm6_bundle_addr_local(xfrm[i]));
-				break;
-			default:
-				BUG_ON(1);
-			}
-
-			err = xfrm_dst_lookup((struct xfrm_dst **) &rt,
-					      &fl_tunnel, encap_family);
-			if (err)
+			dst1 = xfrm_dst_lookup(xfrm[i], 0);
+			err = PTR_ERR(dst1);
+			if (IS_ERR(dst1))
 				goto error;
+
+			rt = (struct rt6_info *)dst1;
 		} else
 			dst_hold(&rt->u.dst);
 	}
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index df5bfa837eb..085c19d4d1b 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -13,6 +13,7 @@
  *
  */
 
+#include <linux/err.h>
 #include <linux/slab.h>
 #include <linux/kmod.h>
 #include <linux/list.h>
@@ -84,21 +85,25 @@ int xfrm_selector_match(struct xfrm_selector *sel, struct flowi *fl,
 	return 0;
 }
 
-int xfrm_dst_lookup(struct xfrm_dst **dst, struct flowi *fl,
-		    unsigned short family)
+struct dst_entry *xfrm_dst_lookup(struct xfrm_state *x, int tos)
 {
-	struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family);
-	int err = 0;
+	xfrm_address_t *saddr = &x->props.saddr;
+	xfrm_address_t *daddr = &x->id.daddr;
+	struct xfrm_policy_afinfo *afinfo;
+	struct dst_entry *dst;
 
+	if (x->type->flags & XFRM_TYPE_LOCAL_COADDR)
+		saddr = x->coaddr;
+	if (x->type->flags & XFRM_TYPE_REMOTE_COADDR)
+		daddr = x->coaddr;
+
+	afinfo = xfrm_policy_get_afinfo(x->props.family);
 	if (unlikely(afinfo == NULL))
-		return -EAFNOSUPPORT;
+		return ERR_PTR(-EAFNOSUPPORT);
 
-	if (likely(afinfo->dst_lookup != NULL))
-		err = afinfo->dst_lookup(dst, fl);
-	else
-		err = -EINVAL;
+	dst = afinfo->dst_lookup(tos, saddr, daddr);
 	xfrm_policy_put_afinfo(afinfo);
-	return err;
+	return dst;
 }
 EXPORT_SYMBOL(xfrm_dst_lookup);
 
-- 
cgit v1.2.3


From 25ee3286dcbc830a833354bb1d15567956844813 Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Tue, 11 Dec 2007 09:32:34 -0800
Subject: [IPSEC]: Merge common code into xfrm_bundle_create

Half of the code in xfrm4_bundle_create and xfrm6_bundle_create are
common.  This patch extracts that logic and puts it into
xfrm_bundle_create.  The rest of it are then accessed through afinfo.

As a result this fixes the problem with inter-family transforms where
we treat every xfrm dst in the bundle as if it belongs to the top
family.

This patch also fixes a long-standing error-path bug where we may free
the xfrm states twice.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/xfrm4_policy.c | 134 +++++++----------------------------
 net/ipv6/xfrm6_policy.c | 136 ++++++-----------------------------
 net/xfrm/xfrm_policy.c  | 183 +++++++++++++++++++++++++++++++++++++++++-------
 3 files changed, 207 insertions(+), 246 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c
index cebc8473196..1d7524375b4 100644
--- a/net/ipv4/xfrm4_policy.c
+++ b/net/ipv4/xfrm4_policy.c
@@ -79,122 +79,39 @@ __xfrm4_find_bundle(struct flowi *fl, struct xfrm_policy *policy)
 	return dst;
 }
 
-/* Allocate chain of dst_entry's, attach known xfrm's, calculate
- * all the metrics... Shortly, bundle a bundle.
- */
-
-static int
-__xfrm4_bundle_create(struct xfrm_policy *policy, struct xfrm_state **xfrm, int nx,
-		      struct flowi *fl, struct dst_entry **dst_p)
+static int xfrm4_get_tos(struct flowi *fl)
 {
-	struct dst_entry *dst, *dst_prev;
-	struct rtable *rt0 = (struct rtable*)(*dst_p);
-	struct rtable *rt = rt0;
-	int tos = fl->fl4_tos;
-	int i;
-	int err;
-	int header_len = 0;
-	int trailer_len = 0;
-
-	dst = dst_prev = NULL;
-	dst_hold(&rt->u.dst);
-
-	for (i = 0; i < nx; i++) {
-		struct dst_entry *dst1 = dst_alloc(&xfrm4_dst_ops);
-		struct xfrm_dst *xdst;
-
-		if (unlikely(dst1 == NULL)) {
-			err = -ENOBUFS;
-			dst_release(&rt->u.dst);
-			goto error;
-		}
+	return fl->fl4_tos;
+}
 
-		if (!dst)
-			dst = dst1;
-		else {
-			dst_prev->child = dst1;
-			dst1->flags |= DST_NOHASH;
-			dst_clone(dst1);
-		}
+static int xfrm4_fill_dst(struct xfrm_dst *xdst, struct net_device *dev)
+{
+	struct rtable *rt = (struct rtable *)xdst->route;
 
-		xdst = (struct xfrm_dst *)dst1;
-		xdst->route = &rt->u.dst;
-		xdst->genid = xfrm[i]->genid;
+	xdst->u.rt.fl = rt->fl;
 
-		dst1->next = dst_prev;
-		dst_prev = dst1;
+	xdst->u.dst.dev = dev;
+	dev_hold(dev);
 
-		header_len += xfrm[i]->props.header_len;
-		trailer_len += xfrm[i]->props.trailer_len;
+	xdst->u.rt.idev = in_dev_get(dev);
+	if (!xdst->u.rt.idev)
+		return -ENODEV;
 
-		if (xfrm[i]->props.mode != XFRM_MODE_TRANSPORT) {
-			dst1 = xfrm_dst_lookup(xfrm[i], tos);
-			err = PTR_ERR(dst1);
-			if (IS_ERR(dst1))
-				goto error;
+	xdst->u.rt.peer = rt->peer;
+	if (rt->peer)
+		atomic_inc(&rt->peer->refcnt);
 
-			rt = (struct rtable *)dst1;
-		} else
-			dst_hold(&rt->u.dst);
-	}
+	/* Sheit... I remember I did this right. Apparently,
+	 * it was magically lost, so this code needs audit */
+	xdst->u.rt.rt_flags = rt->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST |
+					      RTCF_LOCAL);
+	xdst->u.rt.rt_type = rt->rt_type;
+	xdst->u.rt.rt_src = rt->rt_src;
+	xdst->u.rt.rt_dst = rt->rt_dst;
+	xdst->u.rt.rt_gateway = rt->rt_gateway;
+	xdst->u.rt.rt_spec_dst = rt->rt_spec_dst;
 
-	dst_prev->child = &rt->u.dst;
-	dst->path = &rt->u.dst;
-
-	/* Copy neighbout for reachability confirmation */
-	dst->neighbour = neigh_clone(rt->u.dst.neighbour);
-
-	*dst_p = dst;
-	dst = dst_prev;
-
-	dst_prev = *dst_p;
-	i = 0;
-	err = -ENODEV;
-	for (; dst_prev != &rt->u.dst; dst_prev = dst_prev->child) {
-		struct xfrm_dst *x = (struct xfrm_dst*)dst_prev;
-		x->u.rt.fl = *fl;
-
-		dst_prev->xfrm = xfrm[i++];
-		dst_prev->dev = rt->u.dst.dev;
-		if (!rt->u.dst.dev)
-			goto error;
-		dev_hold(rt->u.dst.dev);
-
-		x->u.rt.idev = in_dev_get(rt->u.dst.dev);
-		if (!x->u.rt.idev)
-			goto error;
-
-		dst_prev->obsolete	= -1;
-		dst_prev->flags	       |= DST_HOST;
-		dst_prev->lastuse	= jiffies;
-		dst_prev->header_len	= header_len;
-		dst_prev->trailer_len	= trailer_len;
-		memcpy(&dst_prev->metrics, &x->route->metrics, sizeof(dst_prev->metrics));
-
-		dst_prev->input = dst_discard;
-		dst_prev->output = dst_prev->xfrm->outer_mode->afinfo->output;
-		if (rt0->peer)
-			atomic_inc(&rt0->peer->refcnt);
-		x->u.rt.peer = rt0->peer;
-		/* Sheit... I remember I did this right. Apparently,
-		 * it was magically lost, so this code needs audit */
-		x->u.rt.rt_flags = rt0->rt_flags&(RTCF_BROADCAST|RTCF_MULTICAST|RTCF_LOCAL);
-		x->u.rt.rt_type = rt0->rt_type;
-		x->u.rt.rt_src = rt0->rt_src;
-		x->u.rt.rt_dst = rt0->rt_dst;
-		x->u.rt.rt_gateway = rt0->rt_gateway;
-		x->u.rt.rt_spec_dst = rt0->rt_spec_dst;
-		header_len -= x->u.dst.xfrm->props.header_len;
-		trailer_len -= x->u.dst.xfrm->props.trailer_len;
-	}
-
-	xfrm_init_pmtu(dst);
 	return 0;
-
-error:
-	if (dst)
-		dst_free(dst);
-	return err;
 }
 
 static void
@@ -330,8 +247,9 @@ static struct xfrm_policy_afinfo xfrm4_policy_afinfo = {
 	.dst_lookup =		xfrm4_dst_lookup,
 	.get_saddr =		xfrm4_get_saddr,
 	.find_bundle = 		__xfrm4_find_bundle,
-	.bundle_create =	__xfrm4_bundle_create,
 	.decode_session =	_decode_session4,
+	.get_tos =		xfrm4_get_tos,
+	.fill_dst =		xfrm4_fill_dst,
 };
 
 static void __init xfrm4_policy_init(void)
diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c
index 8e78530865a..63932c5fd3c 100644
--- a/net/ipv6/xfrm6_policy.c
+++ b/net/ipv6/xfrm6_policy.c
@@ -93,126 +93,33 @@ __xfrm6_find_bundle(struct flowi *fl, struct xfrm_policy *policy)
 	return dst;
 }
 
-/* Allocate chain of dst_entry's, attach known xfrm's, calculate
- * all the metrics... Shortly, bundle a bundle.
- */
-
-static int
-__xfrm6_bundle_create(struct xfrm_policy *policy, struct xfrm_state **xfrm, int nx,
-		      struct flowi *fl, struct dst_entry **dst_p)
+static int xfrm6_get_tos(struct flowi *fl)
 {
-	struct dst_entry *dst, *dst_prev;
-	struct rt6_info *rt0 = (struct rt6_info*)(*dst_p);
-	struct rt6_info *rt  = rt0;
-	int i;
-	int err;
-	int header_len = 0;
-	int trailer_len = 0;
-
-	dst = dst_prev = NULL;
-	dst_hold(&rt->u.dst);
-
-	for (i = 0; i < nx; i++) {
-		struct dst_entry *dst1 = dst_alloc(&xfrm6_dst_ops);
-		struct xfrm_dst *xdst;
+	return 0;
+}
 
-		if (unlikely(dst1 == NULL)) {
-			err = -ENOBUFS;
-			dst_release(&rt->u.dst);
-			goto error;
-		}
+static int xfrm6_fill_dst(struct xfrm_dst *xdst, struct net_device *dev)
+{
+	struct rt6_info *rt = (struct rt6_info*)xdst->route;
 
-		if (!dst)
-			dst = dst1;
-		else {
-			dst_prev->child = dst1;
-			dst1->flags |= DST_NOHASH;
-			dst_clone(dst1);
-		}
+	xdst->u.dst.dev = dev;
+	dev_hold(dev);
 
-		xdst = (struct xfrm_dst *)dst1;
-		xdst->route = &rt->u.dst;
-		xdst->genid = xfrm[i]->genid;
-		if (rt->rt6i_node)
-			xdst->route_cookie = rt->rt6i_node->fn_sernum;
-
-		dst1->next = dst_prev;
-		dst_prev = dst1;
-
-		if (xfrm[i]->type->flags & XFRM_TYPE_NON_FRAGMENT)
-			((struct rt6_info *)dst)->nfheader_len +=
-				xfrm[i]->props.header_len;
-		header_len += xfrm[i]->props.header_len;
-		trailer_len += xfrm[i]->props.trailer_len;
-
-		if (xfrm[i]->props.mode != XFRM_MODE_TRANSPORT) {
-			dst1 = xfrm_dst_lookup(xfrm[i], 0);
-			err = PTR_ERR(dst1);
-			if (IS_ERR(dst1))
-				goto error;
-
-			rt = (struct rt6_info *)dst1;
-		} else
-			dst_hold(&rt->u.dst);
-	}
+	xdst->u.rt6.rt6i_idev = in6_dev_get(rt->u.dst.dev);
+	if (!xdst->u.rt6.rt6i_idev)
+		return -ENODEV;
 
-	dst_prev->child = &rt->u.dst;
-	dst->path = &rt->u.dst;
-
-	/* Copy neighbour for reachability confirmation */
-	dst->neighbour = neigh_clone(rt->u.dst.neighbour);
-
-	if (rt->rt6i_node)
-		((struct xfrm_dst *)dst)->path_cookie = rt->rt6i_node->fn_sernum;
-
-	*dst_p = dst;
-	dst = dst_prev;
-
-	dst_prev = *dst_p;
-	i = 0;
-	err = -ENODEV;
-	for (; dst_prev != &rt->u.dst; dst_prev = dst_prev->child) {
-		struct xfrm_dst *x = (struct xfrm_dst*)dst_prev;
-
-		dst_prev->xfrm = xfrm[i++];
-		dst_prev->dev = rt->u.dst.dev;
-		if (!rt->u.dst.dev)
-			goto error;
-		dev_hold(rt->u.dst.dev);
-
-		x->u.rt6.rt6i_idev = in6_dev_get(rt->u.dst.dev);
-		if (!x->u.rt6.rt6i_idev)
-			goto error;
-
-		dst_prev->obsolete	= -1;
-		dst_prev->flags	       |= DST_HOST;
-		dst_prev->lastuse	= jiffies;
-		dst_prev->header_len	= header_len;
-		dst_prev->trailer_len	= trailer_len;
-		memcpy(&dst_prev->metrics, &x->route->metrics, sizeof(dst_prev->metrics));
-
-		dst_prev->input = dst_discard;
-		dst_prev->output = dst_prev->xfrm->outer_mode->afinfo->output;
-		/* Sheit... I remember I did this right. Apparently,
-		 * it was magically lost, so this code needs audit */
-		x->u.rt6.rt6i_flags    = rt0->rt6i_flags&(RTF_ANYCAST|RTF_LOCAL);
-		x->u.rt6.rt6i_metric   = rt0->rt6i_metric;
-		x->u.rt6.rt6i_node     = rt0->rt6i_node;
-		x->u.rt6.rt6i_gateway  = rt0->rt6i_gateway;
-		memcpy(&x->u.rt6.rt6i_gateway, &rt0->rt6i_gateway, sizeof(x->u.rt6.rt6i_gateway));
-		x->u.rt6.rt6i_dst      = rt0->rt6i_dst;
-		x->u.rt6.rt6i_src      = rt0->rt6i_src;
-		header_len -= x->u.dst.xfrm->props.header_len;
-		trailer_len -= x->u.dst.xfrm->props.trailer_len;
-	}
+	/* Sheit... I remember I did this right. Apparently,
+	 * it was magically lost, so this code needs audit */
+	xdst->u.rt6.rt6i_flags = rt->rt6i_flags & (RTF_ANYCAST |
+						   RTF_LOCAL);
+	xdst->u.rt6.rt6i_metric = rt->rt6i_metric;
+	xdst->u.rt6.rt6i_node = rt->rt6i_node;
+	xdst->u.rt6.rt6i_gateway = rt->rt6i_gateway;
+	xdst->u.rt6.rt6i_dst = rt->rt6i_dst;
+	xdst->u.rt6.rt6i_src = rt->rt6i_src;
 
-	xfrm_init_pmtu(dst);
 	return 0;
-
-error:
-	if (dst)
-		dst_free(dst);
-	return err;
 }
 
 static inline void
@@ -355,8 +262,9 @@ static struct xfrm_policy_afinfo xfrm6_policy_afinfo = {
 	.dst_lookup =		xfrm6_dst_lookup,
 	.get_saddr = 		xfrm6_get_saddr,
 	.find_bundle =		__xfrm6_find_bundle,
-	.bundle_create =	__xfrm6_bundle_create,
 	.decode_session =	_decode_session6,
+	.get_tos =		xfrm6_get_tos,
+	.fill_dst =		xfrm6_fill_dst,
 };
 
 static void __init xfrm6_policy_init(void)
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index 085c19d4d1b..b153f748205 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -24,6 +24,7 @@
 #include <linux/netfilter.h>
 #include <linux/module.h>
 #include <linux/cache.h>
+#include <net/dst.h>
 #include <net/xfrm.h>
 #include <net/ip.h>
 
@@ -50,6 +51,7 @@ static DEFINE_SPINLOCK(xfrm_policy_gc_lock);
 
 static struct xfrm_policy_afinfo *xfrm_policy_get_afinfo(unsigned short family);
 static void xfrm_policy_put_afinfo(struct xfrm_policy_afinfo *afinfo);
+static void xfrm_init_pmtu(struct dst_entry *dst);
 
 static inline int
 __xfrm4_selector_match(struct xfrm_selector *sel, struct flowi *fl)
@@ -85,7 +87,8 @@ int xfrm_selector_match(struct xfrm_selector *sel, struct flowi *fl,
 	return 0;
 }
 
-struct dst_entry *xfrm_dst_lookup(struct xfrm_state *x, int tos)
+static inline struct dst_entry *xfrm_dst_lookup(struct xfrm_state *x, int tos,
+						int family)
 {
 	xfrm_address_t *saddr = &x->props.saddr;
 	xfrm_address_t *daddr = &x->id.daddr;
@@ -97,7 +100,7 @@ struct dst_entry *xfrm_dst_lookup(struct xfrm_state *x, int tos)
 	if (x->type->flags & XFRM_TYPE_REMOTE_COADDR)
 		daddr = x->coaddr;
 
-	afinfo = xfrm_policy_get_afinfo(x->props.family);
+	afinfo = xfrm_policy_get_afinfo(family);
 	if (unlikely(afinfo == NULL))
 		return ERR_PTR(-EAFNOSUPPORT);
 
@@ -105,7 +108,6 @@ struct dst_entry *xfrm_dst_lookup(struct xfrm_state *x, int tos)
 	xfrm_policy_put_afinfo(afinfo);
 	return dst;
 }
-EXPORT_SYMBOL(xfrm_dst_lookup);
 
 static inline unsigned long make_jiffies(long secs)
 {
@@ -1234,24 +1236,164 @@ xfrm_find_bundle(struct flowi *fl, struct xfrm_policy *policy, unsigned short fa
 	return x;
 }
 
-/* Allocate chain of dst_entry's, attach known xfrm's, calculate
- * all the metrics... Shortly, bundle a bundle.
- */
+static inline int xfrm_get_tos(struct flowi *fl, int family)
+{
+	struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family);
+	int tos;
 
-static int
-xfrm_bundle_create(struct xfrm_policy *policy, struct xfrm_state **xfrm, int nx,
-		   struct flowi *fl, struct dst_entry **dst_p,
-		   unsigned short family)
+	if (!afinfo)
+		return -EINVAL;
+
+	tos = afinfo->get_tos(fl);
+
+	xfrm_policy_put_afinfo(afinfo);
+
+	return tos;
+}
+
+static inline struct xfrm_dst *xfrm_alloc_dst(int family)
 {
-	int err;
 	struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family);
-	if (unlikely(afinfo == NULL))
+	struct xfrm_dst *xdst;
+
+	if (!afinfo)
+		return ERR_PTR(-EINVAL);
+
+	xdst = dst_alloc(afinfo->dst_ops) ?: ERR_PTR(-ENOBUFS);
+
+	xfrm_policy_put_afinfo(afinfo);
+
+	return xdst;
+}
+
+static inline int xfrm_fill_dst(struct xfrm_dst *xdst, struct net_device *dev)
+{
+	struct xfrm_policy_afinfo *afinfo =
+		xfrm_policy_get_afinfo(xdst->u.dst.ops->family);
+	int err;
+
+	if (!afinfo)
 		return -EINVAL;
-	err = afinfo->bundle_create(policy, xfrm, nx, fl, dst_p);
+
+	err = afinfo->fill_dst(xdst, dev);
+
 	xfrm_policy_put_afinfo(afinfo);
+
 	return err;
 }
 
+/* Allocate chain of dst_entry's, attach known xfrm's, calculate
+ * all the metrics... Shortly, bundle a bundle.
+ */
+
+static struct dst_entry *xfrm_bundle_create(struct xfrm_policy *policy,
+					    struct xfrm_state **xfrm, int nx,
+					    struct flowi *fl,
+					    struct dst_entry *dst)
+{
+	unsigned long now = jiffies;
+	struct net_device *dev;
+	struct dst_entry *dst_prev = NULL;
+	struct dst_entry *dst0 = NULL;
+	int i = 0;
+	int err;
+	int header_len = 0;
+	int trailer_len = 0;
+	int tos;
+	int family = policy->selector.family;
+
+	tos = xfrm_get_tos(fl, family);
+	err = tos;
+	if (tos < 0)
+		goto put_states;
+
+	dst_hold(dst);
+
+	for (; i < nx; i++) {
+		struct xfrm_dst *xdst = xfrm_alloc_dst(family);
+		struct dst_entry *dst1 = &xdst->u.dst;
+
+		err = PTR_ERR(xdst);
+		if (IS_ERR(xdst)) {
+			dst_release(dst);
+			goto put_states;
+		}
+
+		if (!dst_prev)
+			dst0 = dst1;
+		else {
+			dst_prev->child = dst_clone(dst1);
+			dst1->flags |= DST_NOHASH;
+		}
+
+		xdst->route = dst;
+		memcpy(&dst1->metrics, &dst->metrics, sizeof(dst->metrics));
+
+		if (xfrm[i]->props.mode != XFRM_MODE_TRANSPORT) {
+			family = xfrm[i]->props.family;
+			dst = xfrm_dst_lookup(xfrm[i], tos, family);
+			err = PTR_ERR(dst);
+			if (IS_ERR(dst))
+				goto put_states;
+		} else
+			dst_hold(dst);
+
+		dst1->xfrm = xfrm[i];
+		xdst->genid = xfrm[i]->genid;
+
+		dst1->obsolete = -1;
+		dst1->flags |= DST_HOST;
+		dst1->lastuse = now;
+
+		dst1->input = dst_discard;
+		dst1->output = xfrm[i]->outer_mode->afinfo->output;
+
+		dst1->next = dst_prev;
+		dst_prev = dst1;
+
+		header_len += xfrm[i]->props.header_len;
+		trailer_len += xfrm[i]->props.trailer_len;
+	}
+
+	dst_prev->child = dst;
+	dst0->path = dst;
+
+	err = -ENODEV;
+	dev = dst->dev;
+	if (!dev)
+		goto free_dst;
+
+	/* Copy neighbout for reachability confirmation */
+	dst0->neighbour = neigh_clone(dst->neighbour);
+
+	xfrm_init_pmtu(dst_prev);
+
+	for (dst_prev = dst0; dst_prev != dst; dst_prev = dst_prev->child) {
+		struct xfrm_dst *xdst = (struct xfrm_dst *)dst_prev;
+
+		err = xfrm_fill_dst(xdst, dev);
+		if (err)
+			goto free_dst;
+
+		dst_prev->header_len = header_len;
+		dst_prev->trailer_len = trailer_len;
+		header_len -= xdst->u.dst.xfrm->props.header_len;
+		trailer_len -= xdst->u.dst.xfrm->props.trailer_len;
+	}
+
+out:
+	return dst0;
+
+put_states:
+	for (; i < nx; i++)
+		xfrm_state_put(xfrm[i]);
+free_dst:
+	if (dst0)
+		dst_free(dst0);
+	dst0 = ERR_PTR(err);
+	goto out;
+}
+
 static int inline
 xfrm_dst_alloc_copy(void **target, void *src, int size)
 {
@@ -1454,15 +1596,10 @@ restart:
 			return 0;
 		}
 
-		dst = dst_orig;
-		err = xfrm_bundle_create(policy, xfrm, nx, fl, &dst, family);
-
-		if (unlikely(err)) {
-			int i;
-			for (i=0; i<nx; i++)
-				xfrm_state_put(xfrm[i]);
+		dst = xfrm_bundle_create(policy, xfrm, nx, fl, dst_orig);
+		err = PTR_ERR(dst);
+		if (IS_ERR(dst))
 			goto error;
-		}
 
 		for (pi = 0; pi < npols; pi++) {
 			read_lock_bh(&pols[pi]->lock);
@@ -1886,7 +2023,7 @@ static int xfrm_flush_bundles(void)
 	return 0;
 }
 
-void xfrm_init_pmtu(struct dst_entry *dst)
+static void xfrm_init_pmtu(struct dst_entry *dst)
 {
 	do {
 		struct xfrm_dst *xdst = (struct xfrm_dst *)dst;
@@ -1907,8 +2044,6 @@ void xfrm_init_pmtu(struct dst_entry *dst)
 	} while ((dst = dst->next));
 }
 
-EXPORT_SYMBOL(xfrm_init_pmtu);
-
 /* Check that the bundle accepts the flow and its components are
  * still valid.
  */
-- 
cgit v1.2.3


From e40b3286158565909692e5914ea4a11bdbcc68c8 Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Tue, 13 Nov 2007 21:39:08 -0800
Subject: [IPSEC]: Forbid BEET + ipcomp for now

While BEET can theoretically work with IPComp the current code can't
do that because it tries to construct a BEET mode tunnel type which
doesn't (and cannot) exist.  In fact as it is it won't even attach a
tunnel object at all for BEET which is bogus.

To support this fully we'd also need to change the policy checks on
input to recognise a plain tunnel as a legal variant of an optional
BEET transform.

This patch simply fails such constructions for now.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/ipcomp.c  | 20 ++++++++++++--------
 net/ipv6/ipcomp6.c | 19 ++++++++-----------
 2 files changed, 20 insertions(+), 19 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/ipcomp.c b/net/ipv4/ipcomp.c
index 2c44a94c213..f4af99ad8fd 100644
--- a/net/ipv4/ipcomp.c
+++ b/net/ipv4/ipcomp.c
@@ -182,7 +182,6 @@ static void ipcomp4_err(struct sk_buff *skb, u32 info)
 static struct xfrm_state *ipcomp_tunnel_create(struct xfrm_state *x)
 {
 	struct xfrm_state *t;
-	u8 mode = XFRM_MODE_TUNNEL;
 
 	t = xfrm_state_alloc();
 	if (t == NULL)
@@ -193,9 +192,7 @@ static struct xfrm_state *ipcomp_tunnel_create(struct xfrm_state *x)
 	t->id.daddr.a4 = x->id.daddr.a4;
 	memcpy(&t->sel, &x->sel, sizeof(t->sel));
 	t->props.family = AF_INET;
-	if (x->props.mode == XFRM_MODE_BEET)
-		mode = x->props.mode;
-	t->props.mode = mode;
+	t->props.mode = x->props.mode;
 	t->props.saddr.a4 = x->props.saddr.a4;
 	t->props.flags = x->props.flags;
 
@@ -389,15 +386,22 @@ static int ipcomp_init_state(struct xfrm_state *x)
 	if (x->encap)
 		goto out;
 
+	x->props.header_len = 0;
+	switch (x->props.mode) {
+	case XFRM_MODE_TRANSPORT:
+		break;
+	case XFRM_MODE_TUNNEL:
+		x->props.header_len += sizeof(struct iphdr);
+		break;
+	default:
+		goto out;
+	}
+
 	err = -ENOMEM;
 	ipcd = kzalloc(sizeof(*ipcd), GFP_KERNEL);
 	if (!ipcd)
 		goto out;
 
-	x->props.header_len = 0;
-	if (x->props.mode == XFRM_MODE_TUNNEL)
-		x->props.header_len += sizeof(struct iphdr);
-
 	mutex_lock(&ipcomp_resource_mutex);
 	if (!ipcomp_alloc_scratches())
 		goto error;
diff --git a/net/ipv6/ipcomp6.c b/net/ipv6/ipcomp6.c
index 0cd4056f912..b276d04d6db 100644
--- a/net/ipv6/ipcomp6.c
+++ b/net/ipv6/ipcomp6.c
@@ -190,7 +190,6 @@ static void ipcomp6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
 static struct xfrm_state *ipcomp6_tunnel_create(struct xfrm_state *x)
 {
 	struct xfrm_state *t = NULL;
-	u8 mode = XFRM_MODE_TUNNEL;
 
 	t = xfrm_state_alloc();
 	if (!t)
@@ -204,9 +203,7 @@ static struct xfrm_state *ipcomp6_tunnel_create(struct xfrm_state *x)
 	memcpy(t->id.daddr.a6, x->id.daddr.a6, sizeof(struct in6_addr));
 	memcpy(&t->sel, &x->sel, sizeof(t->sel));
 	t->props.family = AF_INET6;
-	if (x->props.mode == XFRM_MODE_BEET)
-		mode = x->props.mode;
-	t->props.mode = mode;
+	t->props.mode = x->props.mode;
 	memcpy(t->props.saddr.a6, x->props.saddr.a6, sizeof(struct in6_addr));
 
 	if (xfrm_init_state(t))
@@ -405,22 +402,22 @@ static int ipcomp6_init_state(struct xfrm_state *x)
 	if (x->encap)
 		goto out;
 
-	err = -ENOMEM;
-	ipcd = kzalloc(sizeof(*ipcd), GFP_KERNEL);
-	if (!ipcd)
-		goto out;
-
 	x->props.header_len = 0;
 	switch (x->props.mode) {
-	case XFRM_MODE_BEET:
 	case XFRM_MODE_TRANSPORT:
 		break;
 	case XFRM_MODE_TUNNEL:
 		x->props.header_len += sizeof(struct ipv6hdr);
+		break;
 	default:
-		goto error;
+		goto out;
 	}
 
+	err = -ENOMEM;
+	ipcd = kzalloc(sizeof(*ipcd), GFP_KERNEL);
+	if (!ipcd)
+		goto out;
+
 	mutex_lock(&ipcomp6_resource_mutex);
 	if (!ipcomp6_alloc_scratches())
 		goto error;
-- 
cgit v1.2.3


From a2deb6d26f16ed7bf787dbd6a58c5d7be47d8db3 Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Tue, 13 Nov 2007 21:39:38 -0800
Subject: [IPSEC]: Move x->outer_mode->output out of locked section

RO mode is the only one that requires a locked output function.  So
it's easier to move the lock into that function rather than requiring
everyone else to run under the lock.

In particular, this allows us to move the size check into the output
function without causing a potential dead-lock should the ICMP error
somehow hit the same SA on transmission.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/xfrm6_mode_ro.c | 3 +++
 net/xfrm/xfrm_output.c   | 8 ++++----
 2 files changed, 7 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/ipv6/xfrm6_mode_ro.c b/net/ipv6/xfrm6_mode_ro.c
index a7bc8c62317..4a01cb3c370 100644
--- a/net/ipv6/xfrm6_mode_ro.c
+++ b/net/ipv6/xfrm6_mode_ro.c
@@ -28,6 +28,7 @@
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/skbuff.h>
+#include <linux/spinlock.h>
 #include <linux/stringify.h>
 #include <linux/time.h>
 #include <net/ipv6.h>
@@ -53,7 +54,9 @@ static int xfrm6_ro_output(struct xfrm_state *x, struct sk_buff *skb)
 	__skb_pull(skb, hdr_len);
 	memmove(ipv6_hdr(skb), iph, hdr_len);
 
+	spin_lock_bh(&x->lock);
 	x->lastused = get_seconds();
+	spin_unlock_bh(&x->lock);
 
 	return 0;
 }
diff --git a/net/xfrm/xfrm_output.c b/net/xfrm/xfrm_output.c
index 58d5a746b1c..b1efdc8850a 100644
--- a/net/xfrm/xfrm_output.c
+++ b/net/xfrm/xfrm_output.c
@@ -53,6 +53,10 @@ int xfrm_output(struct sk_buff *skb)
 	}
 
 	do {
+		err = x->outer_mode->output(x, skb);
+		if (err)
+			goto error;
+
 		spin_lock_bh(&x->lock);
 		err = xfrm_state_check(x, skb);
 		if (err)
@@ -64,10 +68,6 @@ int xfrm_output(struct sk_buff *skb)
 				xfrm_replay_notify(x, XFRM_REPLAY_UPDATE);
 		}
 
-		err = x->outer_mode->output(x, skb);
-		if (err)
-			goto error;
-
 		x->curlft.bytes += skb->len;
 		x->curlft.packets++;
 
-- 
cgit v1.2.3


From 29bb43b4ec4e625b0659186fc8a7c8f8b7c81982 Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Tue, 13 Nov 2007 21:40:13 -0800
Subject: [INET]: Give outer DSCP directly to ip*_copy_dscp

This patch changes the prototype of ipv4_copy_dscp and ipv6_copy_dscp so
that they directly take the outer DSCP rather than the outer IP header.
This will help us to unify the code for inter-family tunnels.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/xfrm4_mode_tunnel.c | 2 +-
 net/ipv6/ip6_tunnel.c        | 2 +-
 net/ipv6/xfrm6_mode_tunnel.c | 3 ++-
 3 files changed, 4 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/xfrm4_mode_tunnel.c b/net/ipv4/xfrm4_mode_tunnel.c
index e4deecba6dd..68a9f56ff09 100644
--- a/net/ipv4/xfrm4_mode_tunnel.c
+++ b/net/ipv4/xfrm4_mode_tunnel.c
@@ -113,7 +113,7 @@ static int xfrm4_tunnel_input(struct xfrm_state *x, struct sk_buff *skb)
 	iph = ip_hdr(skb);
 	if (iph->protocol == IPPROTO_IPIP) {
 		if (x->props.flags & XFRM_STATE_DECAP_DSCP)
-			ipv4_copy_dscp(iph, ipip_hdr(skb));
+			ipv4_copy_dscp(ipv4_get_dsfield(iph), ipip_hdr(skb));
 		if (!(x->props.flags & XFRM_STATE_NOECN))
 			ipip_ecn_decapsulate(skb);
 	}
diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c
index 5383b33db8c..a4051afaf77 100644
--- a/net/ipv6/ip6_tunnel.c
+++ b/net/ipv6/ip6_tunnel.c
@@ -635,7 +635,7 @@ static void ip6ip6_dscp_ecn_decapsulate(struct ip6_tnl *t,
 					struct sk_buff *skb)
 {
 	if (t->parms.flags & IP6_TNL_F_RCV_DSCP_COPY)
-		ipv6_copy_dscp(ipv6h, ipv6_hdr(skb));
+		ipv6_copy_dscp(ipv6_get_dsfield(ipv6h), ipv6_hdr(skb));
 
 	if (INET_ECN_is_ce(ipv6_get_dsfield(ipv6h)))
 		IP6_ECN_set_ce(ipv6_hdr(skb));
diff --git a/net/ipv6/xfrm6_mode_tunnel.c b/net/ipv6/xfrm6_mode_tunnel.c
index fd84e221727..9a43ea72248 100644
--- a/net/ipv6/xfrm6_mode_tunnel.c
+++ b/net/ipv6/xfrm6_mode_tunnel.c
@@ -95,7 +95,8 @@ static int xfrm6_tunnel_input(struct xfrm_state *x, struct sk_buff *skb)
 	nh = skb_network_header(skb);
 	if (nh[IP6CB(skb)->nhoff] == IPPROTO_IPV6) {
 		if (x->props.flags & XFRM_STATE_DECAP_DSCP)
-			ipv6_copy_dscp(ipv6_hdr(skb), ipipv6_hdr(skb));
+			ipv6_copy_dscp(ipv6_get_dsfield(ipv6_hdr(skb)),
+				       ipipv6_hdr(skb));
 		if (!(x->props.flags & XFRM_STATE_NOECN))
 			ipip6_ecn_decapsulate(skb);
 	} else {
-- 
cgit v1.2.3


From 36cf9acf93e8561d9faec24849e57688a81eb9c5 Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Tue, 13 Nov 2007 21:40:52 -0800
Subject: [IPSEC]: Separate inner/outer mode processing on output

With inter-family transforms the inner mode differs from the outer
mode.  Attempting to handle both sides from the same function means
that it needs to handle both IPv4 and IPv6 which creates duplication
and confusion.

This patch separates the two parts on the output path so that each
function deals with one family only.

In particular, the functions xfrm4_extract_output/xfrm6_extract_output
moves the pertinent fields from the IPv4/IPv6 IP headers into a
neutral format stored in skb->cb.  This is then used by the outer mode
output functions to write the outer IP header.  In this way the output
function no longer has to know about the inner address family.

Since the extract functions are only called by tunnel modes (the only
modes that can support inter-family transforms), I've also moved the
xfrm*_tunnel_check_size calls into them.  This allows the correct ICMP
message to be sent as opposed to now where you might call icmp_send
with an IPv6 packet and vice versa.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/xfrm4_mode_beet.c   | 15 +++++++++++++--
 net/ipv4/xfrm4_mode_tunnel.c | 37 +++++++++++--------------------------
 net/ipv4/xfrm4_output.c      | 41 ++++++++++++++++++++++++++++++-----------
 net/ipv4/xfrm4_state.c       | 17 +++++++++++++++++
 net/ipv6/xfrm6_mode_beet.c   | 28 ++++++++++++++--------------
 net/ipv6/xfrm6_mode_tunnel.c | 31 ++++++++++---------------------
 net/ipv6/xfrm6_output.c      | 39 ++++++++++++++++++++++++++++++---------
 net/ipv6/xfrm6_state.c       | 18 ++++++++++++++++++
 8 files changed, 143 insertions(+), 83 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/xfrm4_mode_beet.c b/net/ipv4/xfrm4_mode_beet.c
index e42e122414b..94842adce14 100644
--- a/net/ipv4/xfrm4_mode_beet.c
+++ b/net/ipv4/xfrm4_mode_beet.c
@@ -43,7 +43,17 @@ static int xfrm4_beet_output(struct xfrm_state *x, struct sk_buff *skb)
 	ph = (struct ip_beet_phdr *)__skb_pull(skb, sizeof(*iph) - hdrlen);
 
 	top_iph = ip_hdr(skb);
-	memmove(top_iph, iph, sizeof(*iph));
+
+	top_iph->ihl = 5;
+	top_iph->version = 4;
+
+	top_iph->protocol = XFRM_MODE_SKB_CB(skb)->protocol;
+	top_iph->tos = XFRM_MODE_SKB_CB(skb)->tos;
+
+	top_iph->id = XFRM_MODE_SKB_CB(skb)->id;
+	top_iph->frag_off = XFRM_MODE_SKB_CB(skb)->frag_off;
+	top_iph->ttl = XFRM_MODE_SKB_CB(skb)->ttl;
+
 	if (unlikely(optlen)) {
 		BUG_ON(optlen < 0);
 
@@ -111,7 +121,8 @@ out:
 
 static struct xfrm_mode xfrm4_beet_mode = {
 	.input = xfrm4_beet_input,
-	.output = xfrm4_beet_output,
+	.output2 = xfrm4_beet_output,
+	.output = xfrm4_prepare_output,
 	.owner = THIS_MODULE,
 	.encap = XFRM_MODE_BEET,
 	.flags = XFRM_MODE_FLAG_TUNNEL,
diff --git a/net/ipv4/xfrm4_mode_tunnel.c b/net/ipv4/xfrm4_mode_tunnel.c
index 68a9f56ff09..cc8bbb274e3 100644
--- a/net/ipv4/xfrm4_mode_tunnel.c
+++ b/net/ipv4/xfrm4_mode_tunnel.c
@@ -36,53 +36,37 @@ static inline void ipip6_ecn_decapsulate(struct iphdr *iph, struct sk_buff *skb)
 static int xfrm4_tunnel_output(struct xfrm_state *x, struct sk_buff *skb)
 {
 	struct dst_entry *dst = skb->dst;
-	struct xfrm_dst *xdst = (struct xfrm_dst*)dst;
-	struct iphdr *iph, *top_iph;
+	struct iphdr *top_iph;
 	int flags;
 
-	iph = ip_hdr(skb);
-
 	skb_set_network_header(skb, -x->props.header_len);
 	skb->mac_header = skb->network_header +
 			  offsetof(struct iphdr, protocol);
-	skb->transport_header = skb->network_header + sizeof(*iph);
+	skb->transport_header = skb->network_header + sizeof(*top_iph);
 	top_iph = ip_hdr(skb);
 
 	top_iph->ihl = 5;
 	top_iph->version = 4;
 
-	flags = x->props.flags;
+	top_iph->protocol = x->inner_mode->afinfo->proto;
 
 	/* DS disclosed */
-	if (xdst->route->ops->family == AF_INET) {
-		top_iph->protocol = IPPROTO_IPIP;
-		top_iph->tos = INET_ECN_encapsulate(iph->tos, iph->tos);
-		top_iph->frag_off = (flags & XFRM_STATE_NOPMTUDISC) ?
-			0 : (iph->frag_off & htons(IP_DF));
-	}
-#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE)
-	else {
-		struct ipv6hdr *ipv6h = (struct ipv6hdr*)iph;
-		top_iph->protocol = IPPROTO_IPV6;
-		top_iph->tos = INET_ECN_encapsulate(iph->tos, ipv6_get_dsfield(ipv6h));
-		top_iph->frag_off = 0;
-	}
-#endif
+	top_iph->tos = INET_ECN_encapsulate(XFRM_MODE_SKB_CB(skb)->tos,
+					    XFRM_MODE_SKB_CB(skb)->tos);
 
+	flags = x->props.flags;
 	if (flags & XFRM_STATE_NOECN)
 		IP_ECN_clear(top_iph);
 
-	if (!top_iph->frag_off)
-		__ip_select_ident(top_iph, dst->child, 0);
+	top_iph->frag_off = (flags & XFRM_STATE_NOPMTUDISC) ?
+			    0 : XFRM_MODE_SKB_CB(skb)->frag_off;
+	ip_select_ident(top_iph, dst->child, NULL);
 
 	top_iph->ttl = dst_metric(dst->child, RTAX_HOPLIMIT);
 
 	top_iph->saddr = x->props.saddr.a4;
 	top_iph->daddr = x->id.daddr.a4;
 
-	skb->protocol = htons(ETH_P_IP);
-
-	memset(&(IPCB(skb)->opt), 0, sizeof(struct ip_options));
 	return 0;
 }
 
@@ -136,7 +120,8 @@ out:
 
 static struct xfrm_mode xfrm4_tunnel_mode = {
 	.input = xfrm4_tunnel_input,
-	.output = xfrm4_tunnel_output,
+	.output2 = xfrm4_tunnel_output,
+	.output = xfrm4_prepare_output,
 	.owner = THIS_MODULE,
 	.encap = XFRM_MODE_TUNNEL,
 	.flags = XFRM_MODE_FLAG_TUNNEL,
diff --git a/net/ipv4/xfrm4_output.c b/net/ipv4/xfrm4_output.c
index c4a7156962b..13fd11335e2 100644
--- a/net/ipv4/xfrm4_output.c
+++ b/net/ipv4/xfrm4_output.c
@@ -8,11 +8,12 @@
  * 2 of the License, or (at your option) any later version.
  */
 
-#include <linux/compiler.h>
 #include <linux/if_ether.h>
 #include <linux/kernel.h>
+#include <linux/module.h>
 #include <linux/skbuff.h>
 #include <linux/netfilter_ipv4.h>
+#include <net/dst.h>
 #include <net/ip.h>
 #include <net/xfrm.h>
 #include <net/icmp.h>
@@ -25,8 +26,6 @@ static int xfrm4_tunnel_check_size(struct sk_buff *skb)
 	if (IPCB(skb)->flags & IPSKB_XFRM_TUNNEL_SIZE)
 		goto out;
 
-	IPCB(skb)->flags |= IPSKB_XFRM_TUNNEL_SIZE;
-
 	if (!(ip_hdr(skb)->frag_off & htons(IP_DF)) || skb->local_df)
 		goto out;
 
@@ -40,19 +39,39 @@ out:
 	return ret;
 }
 
+int xfrm4_extract_output(struct xfrm_state *x, struct sk_buff *skb)
+{
+	int err;
+
+	err = xfrm4_tunnel_check_size(skb);
+	if (err)
+		return err;
+
+	return xfrm4_extract_header(skb);
+}
+
+int xfrm4_prepare_output(struct xfrm_state *x, struct sk_buff *skb)
+{
+	int err;
+
+	err = x->inner_mode->afinfo->extract_output(x, skb);
+	if (err)
+		return err;
+
+	memset(IPCB(skb), 0, sizeof(*IPCB(skb)));
+	IPCB(skb)->flags |= IPSKB_XFRM_TUNNEL_SIZE;
+
+	skb->protocol = htons(ETH_P_IP);
+
+	return x->outer_mode->output2(x, skb);
+}
+EXPORT_SYMBOL(xfrm4_prepare_output);
+
 static inline int xfrm4_output_one(struct sk_buff *skb)
 {
-	struct dst_entry *dst = skb->dst;
-	struct xfrm_state *x = dst->xfrm;
 	struct iphdr *iph;
 	int err;
 
-	if (x->outer_mode->flags & XFRM_MODE_FLAG_TUNNEL) {
-		err = xfrm4_tunnel_check_size(skb);
-		if (err)
-			goto error_nolock;
-	}
-
 	err = xfrm_output(skb);
 	if (err)
 		goto error_nolock;
diff --git a/net/ipv4/xfrm4_state.c b/net/ipv4/xfrm4_state.c
index 13d54a1c333..e6030e74ff6 100644
--- a/net/ipv4/xfrm4_state.c
+++ b/net/ipv4/xfrm4_state.c
@@ -47,12 +47,29 @@ __xfrm4_init_tempsel(struct xfrm_state *x, struct flowi *fl,
 	x->props.family = AF_INET;
 }
 
+int xfrm4_extract_header(struct sk_buff *skb)
+{
+	struct iphdr *iph = ip_hdr(skb);
+
+	XFRM_MODE_SKB_CB(skb)->id = iph->id;
+	XFRM_MODE_SKB_CB(skb)->frag_off = iph->frag_off;
+	XFRM_MODE_SKB_CB(skb)->tos = iph->tos;
+	XFRM_MODE_SKB_CB(skb)->ttl = iph->ttl;
+	XFRM_MODE_SKB_CB(skb)->protocol = iph->protocol;
+	memset(XFRM_MODE_SKB_CB(skb)->flow_lbl, 0,
+	       sizeof(XFRM_MODE_SKB_CB(skb)->flow_lbl));
+
+	return 0;
+}
+
 static struct xfrm_state_afinfo xfrm4_state_afinfo = {
 	.family			= AF_INET,
+	.proto			= IPPROTO_IPIP,
 	.owner			= THIS_MODULE,
 	.init_flags		= xfrm4_init_flags,
 	.init_tempsel		= __xfrm4_init_tempsel,
 	.output			= xfrm4_output,
+	.extract_output		= xfrm4_extract_output,
 };
 
 void __init xfrm4_state_init(void)
diff --git a/net/ipv6/xfrm6_mode_beet.c b/net/ipv6/xfrm6_mode_beet.c
index 2bfb4f05c14..4988ed9c76c 100644
--- a/net/ipv6/xfrm6_mode_beet.c
+++ b/net/ipv6/xfrm6_mode_beet.c
@@ -25,25 +25,24 @@
  */
 static int xfrm6_beet_output(struct xfrm_state *x, struct sk_buff *skb)
 {
-	struct ipv6hdr *iph, *top_iph;
-	u8 *prevhdr;
-	int hdr_len;
+	struct ipv6hdr *top_iph;
 
-	iph = ipv6_hdr(skb);
-
-	hdr_len = ip6_find_1stfragopt(skb, &prevhdr);
-
-	skb_set_mac_header(skb, (prevhdr - x->props.header_len) - skb->data);
 	skb_set_network_header(skb, -x->props.header_len);
-	skb->transport_header = skb->network_header + hdr_len;
-	__skb_pull(skb, hdr_len);
-
+	skb->mac_header = skb->network_header +
+			  offsetof(struct ipv6hdr, nexthdr);
+	skb->transport_header = skb->network_header + sizeof(*top_iph);
 	top_iph = ipv6_hdr(skb);
-	memmove(top_iph, iph, hdr_len);
 
+	top_iph->version = 6;
+
+	memcpy(top_iph->flow_lbl, XFRM_MODE_SKB_CB(skb)->flow_lbl,
+	       sizeof(top_iph->flow_lbl));
+	top_iph->nexthdr = XFRM_MODE_SKB_CB(skb)->protocol;
+
+	ipv6_change_dsfield(top_iph, 0, XFRM_MODE_SKB_CB(skb)->tos);
+	top_iph->hop_limit = XFRM_MODE_SKB_CB(skb)->ttl;
 	ipv6_addr_copy(&top_iph->saddr, (struct in6_addr *)&x->props.saddr);
 	ipv6_addr_copy(&top_iph->daddr, (struct in6_addr *)&x->id.daddr);
-
 	return 0;
 }
 
@@ -76,7 +75,8 @@ out:
 
 static struct xfrm_mode xfrm6_beet_mode = {
 	.input = xfrm6_beet_input,
-	.output = xfrm6_beet_output,
+	.output2 = xfrm6_beet_output,
+	.output = xfrm6_prepare_output,
 	.owner = THIS_MODULE,
 	.encap = XFRM_MODE_BEET,
 	.flags = XFRM_MODE_FLAG_TUNNEL,
diff --git a/net/ipv6/xfrm6_mode_tunnel.c b/net/ipv6/xfrm6_mode_tunnel.c
index 9a43ea72248..d45ce5d4419 100644
--- a/net/ipv6/xfrm6_mode_tunnel.c
+++ b/net/ipv6/xfrm6_mode_tunnel.c
@@ -38,33 +38,22 @@ static inline void ip6ip_ecn_decapsulate(struct sk_buff *skb)
 static int xfrm6_tunnel_output(struct xfrm_state *x, struct sk_buff *skb)
 {
 	struct dst_entry *dst = skb->dst;
-	struct xfrm_dst *xdst = (struct xfrm_dst*)dst;
-	struct ipv6hdr *iph, *top_iph;
+	struct ipv6hdr *top_iph;
 	int dsfield;
 
-	iph = ipv6_hdr(skb);
-
 	skb_set_network_header(skb, -x->props.header_len);
 	skb->mac_header = skb->network_header +
 			  offsetof(struct ipv6hdr, nexthdr);
-	skb->transport_header = skb->network_header + sizeof(*iph);
+	skb->transport_header = skb->network_header + sizeof(*top_iph);
 	top_iph = ipv6_hdr(skb);
 
 	top_iph->version = 6;
-	if (xdst->route->ops->family == AF_INET6) {
-		top_iph->priority = iph->priority;
-		top_iph->flow_lbl[0] = iph->flow_lbl[0];
-		top_iph->flow_lbl[1] = iph->flow_lbl[1];
-		top_iph->flow_lbl[2] = iph->flow_lbl[2];
-		top_iph->nexthdr = IPPROTO_IPV6;
-	} else {
-		top_iph->priority = 0;
-		top_iph->flow_lbl[0] = 0;
-		top_iph->flow_lbl[1] = 0;
-		top_iph->flow_lbl[2] = 0;
-		top_iph->nexthdr = IPPROTO_IPIP;
-	}
-	dsfield = ipv6_get_dsfield(top_iph);
+
+	memcpy(top_iph->flow_lbl, XFRM_MODE_SKB_CB(skb)->flow_lbl,
+	       sizeof(top_iph->flow_lbl));
+	top_iph->nexthdr = x->inner_mode->afinfo->proto;
+
+	dsfield = XFRM_MODE_SKB_CB(skb)->tos;
 	dsfield = INET_ECN_encapsulate(dsfield, dsfield);
 	if (x->props.flags & XFRM_STATE_NOECN)
 		dsfield &= ~INET_ECN_MASK;
@@ -72,7 +61,6 @@ static int xfrm6_tunnel_output(struct xfrm_state *x, struct sk_buff *skb)
 	top_iph->hop_limit = dst_metric(dst->child, RTAX_HOPLIMIT);
 	ipv6_addr_copy(&top_iph->saddr, (struct in6_addr *)&x->props.saddr);
 	ipv6_addr_copy(&top_iph->daddr, (struct in6_addr *)&x->id.daddr);
-	skb->protocol = htons(ETH_P_IPV6);
 	return 0;
 }
 
@@ -116,7 +104,8 @@ out:
 
 static struct xfrm_mode xfrm6_tunnel_mode = {
 	.input = xfrm6_tunnel_input,
-	.output = xfrm6_tunnel_output,
+	.output2 = xfrm6_tunnel_output,
+	.output = xfrm6_prepare_output,
 	.owner = THIS_MODULE,
 	.encap = XFRM_MODE_TUNNEL,
 	.flags = XFRM_MODE_FLAG_TUNNEL,
diff --git a/net/ipv6/xfrm6_output.c b/net/ipv6/xfrm6_output.c
index 656976760ad..bc2e80e3b0b 100644
--- a/net/ipv6/xfrm6_output.c
+++ b/net/ipv6/xfrm6_output.c
@@ -10,10 +10,12 @@
  */
 
 #include <linux/if_ether.h>
-#include <linux/compiler.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
 #include <linux/skbuff.h>
 #include <linux/icmpv6.h>
 #include <linux/netfilter_ipv6.h>
+#include <net/dst.h>
 #include <net/ipv6.h>
 #include <net/xfrm.h>
 
@@ -43,19 +45,38 @@ static int xfrm6_tunnel_check_size(struct sk_buff *skb)
 	return ret;
 }
 
+int xfrm6_extract_output(struct xfrm_state *x, struct sk_buff *skb)
+{
+	int err;
+
+	err = xfrm6_tunnel_check_size(skb);
+	if (err)
+		return err;
+
+	return xfrm6_extract_header(skb);
+}
+
+int xfrm6_prepare_output(struct xfrm_state *x, struct sk_buff *skb)
+{
+	int err;
+
+	err = x->inner_mode->afinfo->extract_output(x, skb);
+	if (err)
+		return err;
+
+	memset(IP6CB(skb), 0, sizeof(*IP6CB(skb)));
+
+	skb->protocol = htons(ETH_P_IPV6);
+
+	return x->outer_mode->output2(x, skb);
+}
+EXPORT_SYMBOL(xfrm6_prepare_output);
+
 static inline int xfrm6_output_one(struct sk_buff *skb)
 {
-	struct dst_entry *dst = skb->dst;
-	struct xfrm_state *x = dst->xfrm;
 	struct ipv6hdr *iph;
 	int err;
 
-	if (x->outer_mode->flags & XFRM_MODE_FLAG_TUNNEL) {
-		err = xfrm6_tunnel_check_size(skb);
-		if (err)
-			goto error_nolock;
-	}
-
 	err = xfrm_output(skb);
 	if (err)
 		goto error_nolock;
diff --git a/net/ipv6/xfrm6_state.c b/net/ipv6/xfrm6_state.c
index b392bee396f..98b05f47232 100644
--- a/net/ipv6/xfrm6_state.c
+++ b/net/ipv6/xfrm6_state.c
@@ -14,6 +14,7 @@
 #include <net/xfrm.h>
 #include <linux/pfkeyv2.h>
 #include <linux/ipsec.h>
+#include <net/dsfield.h>
 #include <net/ipv6.h>
 #include <net/addrconf.h>
 
@@ -168,13 +169,30 @@ __xfrm6_tmpl_sort(struct xfrm_tmpl **dst, struct xfrm_tmpl **src, int n)
 	return 0;
 }
 
+int xfrm6_extract_header(struct sk_buff *skb)
+{
+	struct ipv6hdr *iph = ipv6_hdr(skb);
+
+	XFRM_MODE_SKB_CB(skb)->id = 0;
+	XFRM_MODE_SKB_CB(skb)->frag_off = htons(IP_DF);
+	XFRM_MODE_SKB_CB(skb)->tos = ipv6_get_dsfield(iph);
+	XFRM_MODE_SKB_CB(skb)->ttl = iph->hop_limit;
+	XFRM_MODE_SKB_CB(skb)->protocol = iph->nexthdr;
+	memcpy(XFRM_MODE_SKB_CB(skb)->flow_lbl, iph->flow_lbl,
+	       sizeof(XFRM_MODE_SKB_CB(skb)->flow_lbl));
+
+	return 0;
+}
+
 static struct xfrm_state_afinfo xfrm6_state_afinfo = {
 	.family			= AF_INET6,
+	.proto			= IPPROTO_IPV6,
 	.owner			= THIS_MODULE,
 	.init_tempsel		= __xfrm6_init_tempsel,
 	.tmpl_sort		= __xfrm6_tmpl_sort,
 	.state_sort		= __xfrm6_state_sort,
 	.output			= xfrm6_output,
+	.extract_output		= xfrm6_extract_output,
 };
 
 void __init xfrm6_state_init(void)
-- 
cgit v1.2.3


From 227620e295090629fcb2c46ad3828222ab65438d Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Tue, 13 Nov 2007 21:41:28 -0800
Subject: [IPSEC]: Separate inner/outer mode processing on input

With inter-family transforms the inner mode differs from the outer
mode.  Attempting to handle both sides from the same function means
that it needs to handle both IPv4 and IPv6 which creates duplication
and confusion.

This patch separates the two parts on the input path so that each
function deals with one family only.

In particular, the functions xfrm4_extract_inut/xfrm6_extract_inut
moves the pertinent fields from the IPv4/IPv6 IP headers into a
neutral format stored in skb->cb.  This is then used by the inner mode
input functions to modify the inner IP header.  In this way the input
function no longer has to know about the outer address family.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/xfrm4_input.c       |  7 ++++-
 net/ipv4/xfrm4_mode_beet.c   | 67 ++++++++++++++++++++++++++------------------
 net/ipv4/xfrm4_mode_tunnel.c | 44 +++++++----------------------
 net/ipv4/xfrm4_state.c       |  2 ++
 net/ipv6/xfrm6_input.c       |  7 ++++-
 net/ipv6/xfrm6_mode_beet.c   | 36 ++++++++++++++++--------
 net/ipv6/xfrm6_mode_tunnel.c | 31 ++++++--------------
 net/ipv6/xfrm6_output.c      |  1 +
 net/ipv6/xfrm6_state.c       |  5 +++-
 net/xfrm/xfrm_input.c        | 13 +++++++++
 10 files changed, 114 insertions(+), 99 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/xfrm4_input.c b/net/ipv4/xfrm4_input.c
index 5e95c8a07ef..c0323d05ab6 100644
--- a/net/ipv4/xfrm4_input.c
+++ b/net/ipv4/xfrm4_input.c
@@ -16,6 +16,11 @@
 #include <net/ip.h>
 #include <net/xfrm.h>
 
+int xfrm4_extract_input(struct xfrm_state *x, struct sk_buff *skb)
+{
+	return xfrm4_extract_header(skb);
+}
+
 #ifdef CONFIG_NETFILTER
 static inline int xfrm4_rcv_encap_finish(struct sk_buff *skb)
 {
@@ -91,7 +96,7 @@ int xfrm4_rcv_encap(struct sk_buff *skb, int nexthdr, __be32 spi,
 
 		xfrm_vec[xfrm_nr++] = x;
 
-		if (x->outer_mode->input(x, skb))
+		if (x->inner_mode->input(x, skb))
 			goto drop;
 
 		if (x->outer_mode->flags & XFRM_MODE_FLAG_TUNNEL) {
diff --git a/net/ipv4/xfrm4_mode_beet.c b/net/ipv4/xfrm4_mode_beet.c
index 94842adce14..e093a7b59e1 100644
--- a/net/ipv4/xfrm4_mode_beet.c
+++ b/net/ipv4/xfrm4_mode_beet.c
@@ -17,6 +17,21 @@
 #include <net/ip.h>
 #include <net/xfrm.h>
 
+static void xfrm4_beet_make_header(struct sk_buff *skb)
+{
+	struct iphdr *iph = ip_hdr(skb);
+
+	iph->ihl = 5;
+	iph->version = 4;
+
+	iph->protocol = XFRM_MODE_SKB_CB(skb)->protocol;
+	iph->tos = XFRM_MODE_SKB_CB(skb)->tos;
+
+	iph->id = XFRM_MODE_SKB_CB(skb)->id;
+	iph->frag_off = XFRM_MODE_SKB_CB(skb)->frag_off;
+	iph->ttl = XFRM_MODE_SKB_CB(skb)->ttl;
+}
+
 /* Add encapsulation header.
  *
  * The top IP header will be constructed per draft-nikander-esp-beet-mode-06.txt.
@@ -40,20 +55,12 @@ static int xfrm4_beet_output(struct xfrm_state *x, struct sk_buff *skb)
 			  offsetof(struct iphdr, protocol);
 	skb->transport_header = skb->network_header + sizeof(*iph);
 
+	xfrm4_beet_make_header(skb);
+
 	ph = (struct ip_beet_phdr *)__skb_pull(skb, sizeof(*iph) - hdrlen);
 
 	top_iph = ip_hdr(skb);
 
-	top_iph->ihl = 5;
-	top_iph->version = 4;
-
-	top_iph->protocol = XFRM_MODE_SKB_CB(skb)->protocol;
-	top_iph->tos = XFRM_MODE_SKB_CB(skb)->tos;
-
-	top_iph->id = XFRM_MODE_SKB_CB(skb)->id;
-	top_iph->frag_off = XFRM_MODE_SKB_CB(skb)->frag_off;
-	top_iph->ttl = XFRM_MODE_SKB_CB(skb)->ttl;
-
 	if (unlikely(optlen)) {
 		BUG_ON(optlen < 0);
 
@@ -75,43 +82,46 @@ static int xfrm4_beet_output(struct xfrm_state *x, struct sk_buff *skb)
 
 static int xfrm4_beet_input(struct xfrm_state *x, struct sk_buff *skb)
 {
-	struct iphdr *iph = ip_hdr(skb);
-	int phlen = 0;
+	struct iphdr *iph;
 	int optlen = 0;
-	u8 ph_nexthdr = 0;
 	int err = -EINVAL;
 
-	if (unlikely(iph->protocol == IPPROTO_BEETPH)) {
+	if (unlikely(XFRM_MODE_SKB_CB(skb)->protocol == IPPROTO_BEETPH)) {
 		struct ip_beet_phdr *ph;
+		int phlen;
 
 		if (!pskb_may_pull(skb, sizeof(*ph)))
 			goto out;
-		ph = (struct ip_beet_phdr *)(ipip_hdr(skb) + 1);
+
+		ph = (struct ip_beet_phdr *)skb->data;
 
 		phlen = sizeof(*ph) + ph->padlen;
 		optlen = ph->hdrlen * 8 + (IPV4_BEET_PHMAXLEN - phlen);
 		if (optlen < 0 || optlen & 3 || optlen > 250)
 			goto out;
 
-		if (!pskb_may_pull(skb, phlen + optlen))
-			goto out;
-		skb->len -= phlen + optlen;
+		XFRM_MODE_SKB_CB(skb)->protocol = ph->nexthdr;
 
-		ph_nexthdr = ph->nexthdr;
+		if (!pskb_may_pull(skb, phlen));
+			goto out;
+		__skb_pull(skb, phlen);
 	}
 
-	skb_set_network_header(skb, phlen - sizeof(*iph));
-	memmove(skb_network_header(skb), iph, sizeof(*iph));
-	skb_set_transport_header(skb, phlen + optlen);
-	skb->data = skb_transport_header(skb);
+	skb_push(skb, sizeof(*iph));
+	skb_reset_network_header(skb);
+
+	memmove(skb->data - skb->mac_len, skb_mac_header(skb),
+		skb->mac_len);
+	skb_set_mac_header(skb, -skb->mac_len);
+
+	xfrm4_beet_make_header(skb);
 
 	iph = ip_hdr(skb);
-	iph->ihl = (sizeof(*iph) + optlen) / 4;
-	iph->tot_len = htons(skb->len + iph->ihl * 4);
+
+	iph->ihl += optlen / 4;
+	iph->tot_len = htons(skb->len);
 	iph->daddr = x->sel.daddr.a4;
 	iph->saddr = x->sel.saddr.a4;
-	if (ph_nexthdr)
-		iph->protocol = ph_nexthdr;
 	iph->check = 0;
 	iph->check = ip_fast_csum(skb_network_header(skb), iph->ihl);
 	err = 0;
@@ -120,7 +130,8 @@ out:
 }
 
 static struct xfrm_mode xfrm4_beet_mode = {
-	.input = xfrm4_beet_input,
+	.input2 = xfrm4_beet_input,
+	.input = xfrm_prepare_input,
 	.output2 = xfrm4_beet_output,
 	.output = xfrm4_prepare_output,
 	.owner = THIS_MODULE,
diff --git a/net/ipv4/xfrm4_mode_tunnel.c b/net/ipv4/xfrm4_mode_tunnel.c
index cc8bbb274e3..aa335dba8ff 100644
--- a/net/ipv4/xfrm4_mode_tunnel.c
+++ b/net/ipv4/xfrm4_mode_tunnel.c
@@ -16,19 +16,12 @@
 
 static inline void ipip_ecn_decapsulate(struct sk_buff *skb)
 {
-	struct iphdr *outer_iph = ip_hdr(skb);
 	struct iphdr *inner_iph = ipip_hdr(skb);
 
-	if (INET_ECN_is_ce(outer_iph->tos))
+	if (INET_ECN_is_ce(XFRM_MODE_SKB_CB(skb)->tos))
 		IP_ECN_set_ce(inner_iph);
 }
 
-static inline void ipip6_ecn_decapsulate(struct iphdr *iph, struct sk_buff *skb)
-{
-	if (INET_ECN_is_ce(iph->tos))
-		IP6_ECN_set_ce(ipv6_hdr(skb));
-}
-
 /* Add encapsulation header.
  *
  * The top IP header will be constructed per RFC 2401.
@@ -72,20 +65,11 @@ static int xfrm4_tunnel_output(struct xfrm_state *x, struct sk_buff *skb)
 
 static int xfrm4_tunnel_input(struct xfrm_state *x, struct sk_buff *skb)
 {
-	struct iphdr *iph = ip_hdr(skb);
 	const unsigned char *old_mac;
 	int err = -EINVAL;
 
-	switch (iph->protocol){
-		case IPPROTO_IPIP:
-			break;
-#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE)
-		case IPPROTO_IPV6:
-			break;
-#endif
-		default:
-			goto out;
-	}
+	if (XFRM_MODE_SKB_CB(skb)->protocol != IPPROTO_IPIP)
+		goto out;
 
 	if (!pskb_may_pull(skb, sizeof(struct iphdr)))
 		goto out;
@@ -94,20 +78,11 @@ static int xfrm4_tunnel_input(struct xfrm_state *x, struct sk_buff *skb)
 	    (err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC)))
 		goto out;
 
-	iph = ip_hdr(skb);
-	if (iph->protocol == IPPROTO_IPIP) {
-		if (x->props.flags & XFRM_STATE_DECAP_DSCP)
-			ipv4_copy_dscp(ipv4_get_dsfield(iph), ipip_hdr(skb));
-		if (!(x->props.flags & XFRM_STATE_NOECN))
-			ipip_ecn_decapsulate(skb);
-	}
-#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE)
-	else {
-		if (!(x->props.flags & XFRM_STATE_NOECN))
-			ipip6_ecn_decapsulate(iph, skb);
-		skb->protocol = htons(ETH_P_IPV6);
-	}
-#endif
+	if (x->props.flags & XFRM_STATE_DECAP_DSCP)
+		ipv4_copy_dscp(XFRM_MODE_SKB_CB(skb)->tos, ipip_hdr(skb));
+	if (!(x->props.flags & XFRM_STATE_NOECN))
+		ipip_ecn_decapsulate(skb);
+
 	old_mac = skb_mac_header(skb);
 	skb_set_mac_header(skb, -skb->mac_len);
 	memmove(skb_mac_header(skb), old_mac, skb->mac_len);
@@ -119,7 +94,8 @@ out:
 }
 
 static struct xfrm_mode xfrm4_tunnel_mode = {
-	.input = xfrm4_tunnel_input,
+	.input2 = xfrm4_tunnel_input,
+	.input = xfrm_prepare_input,
 	.output2 = xfrm4_tunnel_output,
 	.output = xfrm4_prepare_output,
 	.owner = THIS_MODULE,
diff --git a/net/ipv4/xfrm4_state.c b/net/ipv4/xfrm4_state.c
index e6030e74ff6..85f04b7b237 100644
--- a/net/ipv4/xfrm4_state.c
+++ b/net/ipv4/xfrm4_state.c
@@ -65,10 +65,12 @@ int xfrm4_extract_header(struct sk_buff *skb)
 static struct xfrm_state_afinfo xfrm4_state_afinfo = {
 	.family			= AF_INET,
 	.proto			= IPPROTO_IPIP,
+	.eth_proto		= htons(ETH_P_IP),
 	.owner			= THIS_MODULE,
 	.init_flags		= xfrm4_init_flags,
 	.init_tempsel		= __xfrm4_init_tempsel,
 	.output			= xfrm4_output,
+	.extract_input		= xfrm4_extract_input,
 	.extract_output		= xfrm4_extract_output,
 };
 
diff --git a/net/ipv6/xfrm6_input.c b/net/ipv6/xfrm6_input.c
index 515783707e8..c458d0a2e68 100644
--- a/net/ipv6/xfrm6_input.c
+++ b/net/ipv6/xfrm6_input.c
@@ -16,6 +16,11 @@
 #include <net/ipv6.h>
 #include <net/xfrm.h>
 
+int xfrm6_extract_input(struct xfrm_state *x, struct sk_buff *skb)
+{
+	return xfrm6_extract_header(skb);
+}
+
 int xfrm6_rcv_spi(struct sk_buff *skb, int nexthdr, __be32 spi)
 {
 	int err;
@@ -68,7 +73,7 @@ int xfrm6_rcv_spi(struct sk_buff *skb, int nexthdr, __be32 spi)
 
 		xfrm_vec[xfrm_nr++] = x;
 
-		if (x->outer_mode->input(x, skb))
+		if (x->inner_mode->input(x, skb))
 			goto drop;
 
 		if (x->outer_mode->flags & XFRM_MODE_FLAG_TUNNEL) {
diff --git a/net/ipv6/xfrm6_mode_beet.c b/net/ipv6/xfrm6_mode_beet.c
index 4988ed9c76c..0527d11c1ae 100644
--- a/net/ipv6/xfrm6_mode_beet.c
+++ b/net/ipv6/xfrm6_mode_beet.c
@@ -19,6 +19,20 @@
 #include <net/ipv6.h>
 #include <net/xfrm.h>
 
+static void xfrm6_beet_make_header(struct sk_buff *skb)
+{
+	struct ipv6hdr *iph = ipv6_hdr(skb);
+
+	iph->version = 6;
+
+	memcpy(iph->flow_lbl, XFRM_MODE_SKB_CB(skb)->flow_lbl,
+	       sizeof(iph->flow_lbl));
+	iph->nexthdr = XFRM_MODE_SKB_CB(skb)->protocol;
+
+	ipv6_change_dsfield(iph, 0, XFRM_MODE_SKB_CB(skb)->tos);
+	iph->hop_limit = XFRM_MODE_SKB_CB(skb)->ttl;
+}
+
 /* Add encapsulation header.
  *
  * The top IP header will be constructed per draft-nikander-esp-beet-mode-06.txt.
@@ -31,16 +45,11 @@ static int xfrm6_beet_output(struct xfrm_state *x, struct sk_buff *skb)
 	skb->mac_header = skb->network_header +
 			  offsetof(struct ipv6hdr, nexthdr);
 	skb->transport_header = skb->network_header + sizeof(*top_iph);
-	top_iph = ipv6_hdr(skb);
 
-	top_iph->version = 6;
+	xfrm6_beet_make_header(skb);
 
-	memcpy(top_iph->flow_lbl, XFRM_MODE_SKB_CB(skb)->flow_lbl,
-	       sizeof(top_iph->flow_lbl));
-	top_iph->nexthdr = XFRM_MODE_SKB_CB(skb)->protocol;
+	top_iph = ipv6_hdr(skb);
 
-	ipv6_change_dsfield(top_iph, 0, XFRM_MODE_SKB_CB(skb)->tos);
-	top_iph->hop_limit = XFRM_MODE_SKB_CB(skb)->ttl;
 	ipv6_addr_copy(&top_iph->saddr, (struct in6_addr *)&x->props.saddr);
 	ipv6_addr_copy(&top_iph->daddr, (struct in6_addr *)&x->id.daddr);
 	return 0;
@@ -51,19 +60,21 @@ static int xfrm6_beet_input(struct xfrm_state *x, struct sk_buff *skb)
 	struct ipv6hdr *ip6h;
 	const unsigned char *old_mac;
 	int size = sizeof(struct ipv6hdr);
-	int err = -EINVAL;
+	int err;
 
-	if (!pskb_may_pull(skb, sizeof(struct ipv6hdr)))
+	err = skb_cow_head(skb, size + skb->mac_len);
+	if (err)
 		goto out;
 
-	skb_push(skb, size);
-	memmove(skb->data, skb_network_header(skb), size);
+	__skb_push(skb, size);
 	skb_reset_network_header(skb);
 
 	old_mac = skb_mac_header(skb);
 	skb_set_mac_header(skb, -skb->mac_len);
 	memmove(skb_mac_header(skb), old_mac, skb->mac_len);
 
+	xfrm6_beet_make_header(skb);
+
 	ip6h = ipv6_hdr(skb);
 	ip6h->payload_len = htons(skb->len - size);
 	ipv6_addr_copy(&ip6h->daddr, (struct in6_addr *) &x->sel.daddr.a6);
@@ -74,7 +85,8 @@ out:
 }
 
 static struct xfrm_mode xfrm6_beet_mode = {
-	.input = xfrm6_beet_input,
+	.input2 = xfrm6_beet_input,
+	.input = xfrm_prepare_input,
 	.output2 = xfrm6_beet_output,
 	.output = xfrm6_prepare_output,
 	.owner = THIS_MODULE,
diff --git a/net/ipv6/xfrm6_mode_tunnel.c b/net/ipv6/xfrm6_mode_tunnel.c
index d45ce5d4419..f7d0d661265 100644
--- a/net/ipv6/xfrm6_mode_tunnel.c
+++ b/net/ipv6/xfrm6_mode_tunnel.c
@@ -25,12 +25,6 @@ static inline void ipip6_ecn_decapsulate(struct sk_buff *skb)
 		IP6_ECN_set_ce(inner_iph);
 }
 
-static inline void ip6ip_ecn_decapsulate(struct sk_buff *skb)
-{
-	if (INET_ECN_is_ce(ipv6_get_dsfield(ipv6_hdr(skb))))
-			IP_ECN_set_ce(ipip_hdr(skb));
-}
-
 /* Add encapsulation header.
  *
  * The top IP header will be constructed per RFC 2401.
@@ -68,10 +62,8 @@ static int xfrm6_tunnel_input(struct xfrm_state *x, struct sk_buff *skb)
 {
 	int err = -EINVAL;
 	const unsigned char *old_mac;
-	const unsigned char *nh = skb_network_header(skb);
 
-	if (nh[IP6CB(skb)->nhoff] != IPPROTO_IPV6 &&
-	    nh[IP6CB(skb)->nhoff] != IPPROTO_IPIP)
+	if (XFRM_MODE_SKB_CB(skb)->protocol != IPPROTO_IPV6)
 		goto out;
 	if (!pskb_may_pull(skb, sizeof(struct ipv6hdr)))
 		goto out;
@@ -80,18 +72,12 @@ static int xfrm6_tunnel_input(struct xfrm_state *x, struct sk_buff *skb)
 	    (err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC)))
 		goto out;
 
-	nh = skb_network_header(skb);
-	if (nh[IP6CB(skb)->nhoff] == IPPROTO_IPV6) {
-		if (x->props.flags & XFRM_STATE_DECAP_DSCP)
-			ipv6_copy_dscp(ipv6_get_dsfield(ipv6_hdr(skb)),
-				       ipipv6_hdr(skb));
-		if (!(x->props.flags & XFRM_STATE_NOECN))
-			ipip6_ecn_decapsulate(skb);
-	} else {
-		if (!(x->props.flags & XFRM_STATE_NOECN))
-			ip6ip_ecn_decapsulate(skb);
-		skb->protocol = htons(ETH_P_IP);
-	}
+	if (x->props.flags & XFRM_STATE_DECAP_DSCP)
+		ipv6_copy_dscp(ipv6_get_dsfield(ipv6_hdr(skb)),
+			       ipipv6_hdr(skb));
+	if (!(x->props.flags & XFRM_STATE_NOECN))
+		ipip6_ecn_decapsulate(skb);
+
 	old_mac = skb_mac_header(skb);
 	skb_set_mac_header(skb, -skb->mac_len);
 	memmove(skb_mac_header(skb), old_mac, skb->mac_len);
@@ -103,7 +89,8 @@ out:
 }
 
 static struct xfrm_mode xfrm6_tunnel_mode = {
-	.input = xfrm6_tunnel_input,
+	.input2 = xfrm6_tunnel_input,
+	.input = xfrm_prepare_input,
 	.output2 = xfrm6_tunnel_output,
 	.output = xfrm6_prepare_output,
 	.owner = THIS_MODULE,
diff --git a/net/ipv6/xfrm6_output.c b/net/ipv6/xfrm6_output.c
index bc2e80e3b0b..c45050cfe72 100644
--- a/net/ipv6/xfrm6_output.c
+++ b/net/ipv6/xfrm6_output.c
@@ -53,6 +53,7 @@ int xfrm6_extract_output(struct xfrm_state *x, struct sk_buff *skb)
 	if (err)
 		return err;
 
+	IP6CB(skb)->nhoff = offsetof(struct ipv6hdr, nexthdr);
 	return xfrm6_extract_header(skb);
 }
 
diff --git a/net/ipv6/xfrm6_state.c b/net/ipv6/xfrm6_state.c
index 98b05f47232..90fef0a4726 100644
--- a/net/ipv6/xfrm6_state.c
+++ b/net/ipv6/xfrm6_state.c
@@ -177,7 +177,8 @@ int xfrm6_extract_header(struct sk_buff *skb)
 	XFRM_MODE_SKB_CB(skb)->frag_off = htons(IP_DF);
 	XFRM_MODE_SKB_CB(skb)->tos = ipv6_get_dsfield(iph);
 	XFRM_MODE_SKB_CB(skb)->ttl = iph->hop_limit;
-	XFRM_MODE_SKB_CB(skb)->protocol = iph->nexthdr;
+	XFRM_MODE_SKB_CB(skb)->protocol =
+		skb_network_header(skb)[IP6CB(skb)->nhoff];
 	memcpy(XFRM_MODE_SKB_CB(skb)->flow_lbl, iph->flow_lbl,
 	       sizeof(XFRM_MODE_SKB_CB(skb)->flow_lbl));
 
@@ -187,11 +188,13 @@ int xfrm6_extract_header(struct sk_buff *skb)
 static struct xfrm_state_afinfo xfrm6_state_afinfo = {
 	.family			= AF_INET6,
 	.proto			= IPPROTO_IPV6,
+	.eth_proto		= htons(ETH_P_IPV6),
 	.owner			= THIS_MODULE,
 	.init_tempsel		= __xfrm6_init_tempsel,
 	.tmpl_sort		= __xfrm6_tmpl_sort,
 	.state_sort		= __xfrm6_state_sort,
 	.output			= xfrm6_output,
+	.extract_input		= xfrm6_extract_input,
 	.extract_output		= xfrm6_extract_output,
 };
 
diff --git a/net/xfrm/xfrm_input.c b/net/xfrm/xfrm_input.c
index cb97fda1b6d..4c803f7e74e 100644
--- a/net/xfrm/xfrm_input.c
+++ b/net/xfrm/xfrm_input.c
@@ -81,6 +81,19 @@ int xfrm_parse_spi(struct sk_buff *skb, u8 nexthdr, __be32 *spi, __be32 *seq)
 }
 EXPORT_SYMBOL(xfrm_parse_spi);
 
+int xfrm_prepare_input(struct xfrm_state *x, struct sk_buff *skb)
+{
+	int err;
+
+	err = x->outer_mode->afinfo->extract_input(x, skb);
+	if (err)
+		return err;
+
+	skb->protocol = x->inner_mode->afinfo->eth_proto;
+	return x->inner_mode->input2(x, skb);
+}
+EXPORT_SYMBOL(xfrm_prepare_input);
+
 void __init xfrm_input_init(void)
 {
 	secpath_cachep = kmem_cache_create("secpath_cache",
-- 
cgit v1.2.3


From c439cb2e4b13cf1cb2abcd006b906315a3381323 Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Fri, 11 Jan 2008 19:14:00 -0800
Subject: [IPV4]: Add ip_local_out

Most callers of the LOCAL_OUT chain will set the IP packet length and
header checksum before doing so.  They also share the same output
function dst_output.

This patch creates a new function called ip_local_out which does all
of that and converts the appropriate users over to it.

Apart from removing duplicate code, it will also help in merging the
IPsec output path once the same thing is done for IPv6.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/igmp.c                 | 12 ++----------
 net/ipv4/ip_output.c            | 39 +++++++++++++++++++++++++--------------
 net/ipv4/ipvs/ip_vs_xmit.c      |  6 ++----
 net/ipv4/netfilter/ipt_REJECT.c |  8 +-------
 net/ipv4/xfrm4_output.c         |  8 +-------
 5 files changed, 31 insertions(+), 42 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c
index 701558564e9..c560a9392b1 100644
--- a/net/ipv4/igmp.c
+++ b/net/ipv4/igmp.c
@@ -349,17 +349,12 @@ static struct sk_buff *igmpv3_newpack(struct net_device *dev, int size)
 
 static int igmpv3_sendpack(struct sk_buff *skb)
 {
-	struct iphdr *pip = ip_hdr(skb);
 	struct igmphdr *pig = igmp_hdr(skb);
-	const int iplen = skb->tail - skb->network_header;
 	const int igmplen = skb->tail - skb->transport_header;
 
-	pip->tot_len = htons(iplen);
-	ip_send_check(pip);
 	pig->csum = ip_compute_csum(igmp_hdr(skb), igmplen);
 
-	return NF_HOOK(PF_INET, NF_IP_LOCAL_OUT, skb, NULL, skb->dev,
-		       dst_output);
+	return ip_local_out(skb);
 }
 
 static int grec_size(struct ip_mc_list *pmc, int type, int gdel, int sdel)
@@ -680,13 +675,11 @@ static int igmp_send_report(struct in_device *in_dev, struct ip_mc_list *pmc,
 	iph->daddr    = dst;
 	iph->saddr    = rt->rt_src;
 	iph->protocol = IPPROTO_IGMP;
-	iph->tot_len  = htons(IGMP_SIZE);
 	ip_select_ident(iph, &rt->u.dst, NULL);
 	((u8*)&iph[1])[0] = IPOPT_RA;
 	((u8*)&iph[1])[1] = 4;
 	((u8*)&iph[1])[2] = 0;
 	((u8*)&iph[1])[3] = 0;
-	ip_send_check(iph);
 
 	ih = (struct igmphdr *)skb_put(skb, sizeof(struct igmphdr));
 	ih->type=type;
@@ -695,8 +688,7 @@ static int igmp_send_report(struct in_device *in_dev, struct ip_mc_list *pmc,
 	ih->group=group;
 	ih->csum=ip_compute_csum((void *)ih, sizeof(struct igmphdr));
 
-	return NF_HOOK(PF_INET, NF_IP_LOCAL_OUT, skb, NULL, rt->u.dst.dev,
-		       dst_output);
+	return ip_local_out(skb);
 }
 
 static void igmp_gq_timer_expire(unsigned long data)
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index bc9e57550e8..03b9b060027 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -91,6 +91,28 @@ __inline__ void ip_send_check(struct iphdr *iph)
 	iph->check = ip_fast_csum((unsigned char *)iph, iph->ihl);
 }
 
+int __ip_local_out(struct sk_buff *skb)
+{
+	struct iphdr *iph = ip_hdr(skb);
+
+	iph->tot_len = htons(skb->len);
+	ip_send_check(iph);
+	return nf_hook(PF_INET, NF_IP_LOCAL_OUT, skb, NULL, skb->dst->dev,
+		       dst_output);
+}
+
+int ip_local_out(struct sk_buff *skb)
+{
+	int err;
+
+	err = __ip_local_out(skb);
+	if (likely(err == 1))
+		err = dst_output(skb);
+
+	return err;
+}
+EXPORT_SYMBOL_GPL(ip_local_out);
+
 /* dev_loopback_xmit for use with netfilter. */
 static int ip_dev_loopback_xmit(struct sk_buff *newskb)
 {
@@ -138,20 +160,17 @@ int ip_build_and_send_pkt(struct sk_buff *skb, struct sock *sk,
 	iph->daddr    = rt->rt_dst;
 	iph->saddr    = rt->rt_src;
 	iph->protocol = sk->sk_protocol;
-	iph->tot_len  = htons(skb->len);
 	ip_select_ident(iph, &rt->u.dst, sk);
 
 	if (opt && opt->optlen) {
 		iph->ihl += opt->optlen>>2;
 		ip_options_build(skb, opt, daddr, rt, 0);
 	}
-	ip_send_check(iph);
 
 	skb->priority = sk->sk_priority;
 
 	/* Send it out. */
-	return NF_HOOK(PF_INET, NF_IP_LOCAL_OUT, skb, NULL, rt->u.dst.dev,
-		       dst_output);
+	return ip_local_out(skb);
 }
 
 EXPORT_SYMBOL_GPL(ip_build_and_send_pkt);
@@ -347,7 +366,6 @@ packet_routed:
 	skb_reset_network_header(skb);
 	iph = ip_hdr(skb);
 	*((__be16 *)iph) = htons((4 << 12) | (5 << 8) | (inet->tos & 0xff));
-	iph->tot_len = htons(skb->len);
 	if (ip_dont_fragment(sk, &rt->u.dst) && !ipfragok)
 		iph->frag_off = htons(IP_DF);
 	else
@@ -366,13 +384,9 @@ packet_routed:
 	ip_select_ident_more(iph, &rt->u.dst, sk,
 			     (skb_shinfo(skb)->gso_segs ?: 1) - 1);
 
-	/* Add an IP checksum. */
-	ip_send_check(iph);
-
 	skb->priority = sk->sk_priority;
 
-	return NF_HOOK(PF_INET, NF_IP_LOCAL_OUT, skb, NULL, rt->u.dst.dev,
-		       dst_output);
+	return ip_local_out(skb);
 
 no_route:
 	IP_INC_STATS(IPSTATS_MIB_OUTNOROUTES);
@@ -1262,14 +1276,12 @@ int ip_push_pending_frames(struct sock *sk)
 		ip_options_build(skb, opt, inet->cork.addr, rt, 0);
 	}
 	iph->tos = inet->tos;
-	iph->tot_len = htons(skb->len);
 	iph->frag_off = df;
 	ip_select_ident(iph, &rt->u.dst, sk);
 	iph->ttl = ttl;
 	iph->protocol = sk->sk_protocol;
 	iph->saddr = rt->rt_src;
 	iph->daddr = rt->rt_dst;
-	ip_send_check(iph);
 
 	skb->priority = sk->sk_priority;
 	skb->dst = dst_clone(&rt->u.dst);
@@ -1279,8 +1291,7 @@ int ip_push_pending_frames(struct sock *sk)
 			skb_transport_header(skb))->type);
 
 	/* Netfilter gets whole the not fragmented skb. */
-	err = NF_HOOK(PF_INET, NF_IP_LOCAL_OUT, skb, NULL,
-		      skb->dst->dev, dst_output);
+	err = ip_local_out(skb);
 	if (err) {
 		if (err > 0)
 			err = inet->recverr ? net_xmit_errno(err) : 0;
diff --git a/net/ipv4/ipvs/ip_vs_xmit.c b/net/ipv4/ipvs/ip_vs_xmit.c
index 7c074e386c1..66775ad9e32 100644
--- a/net/ipv4/ipvs/ip_vs_xmit.c
+++ b/net/ipv4/ipvs/ip_vs_xmit.c
@@ -16,8 +16,8 @@
  */
 
 #include <linux/kernel.h>
-#include <linux/ip.h>
 #include <linux/tcp.h>                  /* for tcphdr */
+#include <net/ip.h>
 #include <net/tcp.h>                    /* for csum_tcpudp_magic */
 #include <net/udp.h>
 #include <net/icmp.h>                   /* for icmp_send */
@@ -406,14 +406,12 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
 	iph->daddr		=	rt->rt_dst;
 	iph->saddr		=	rt->rt_src;
 	iph->ttl		=	old_iph->ttl;
-	iph->tot_len		=	htons(skb->len);
 	ip_select_ident(iph, &rt->u.dst, NULL);
-	ip_send_check(iph);
 
 	/* Another hack: avoid icmp_send in ip_fragment */
 	skb->local_df = 1;
 
-	IP_VS_XMIT(skb, rt);
+	ip_local_out(skb);
 
 	LeaveFunction(10);
 
diff --git a/net/ipv4/netfilter/ipt_REJECT.c b/net/ipv4/netfilter/ipt_REJECT.c
index dcf4d21d511..ccb2a03dcd5 100644
--- a/net/ipv4/netfilter/ipt_REJECT.c
+++ b/net/ipv4/netfilter/ipt_REJECT.c
@@ -90,7 +90,6 @@ static void send_reset(struct sk_buff *oldskb, int hook)
 	/* Truncate to length (no data) */
 	tcph->doff = sizeof(struct tcphdr)/4;
 	skb_trim(nskb, ip_hdrlen(nskb) + sizeof(struct tcphdr));
-	niph->tot_len = htons(nskb->len);
 
 	if (tcph->ack) {
 		needs_ack = 0;
@@ -139,18 +138,13 @@ static void send_reset(struct sk_buff *oldskb, int hook)
 	/* Adjust IP TTL */
 	niph->ttl = dst_metric(nskb->dst, RTAX_HOPLIMIT);
 
-	/* Adjust IP checksum */
-	niph->check = 0;
-	niph->check = ip_fast_csum(skb_network_header(nskb), niph->ihl);
-
 	/* "Never happens" */
 	if (nskb->len > dst_mtu(nskb->dst))
 		goto free_nskb;
 
 	nf_ct_attach(nskb, oldskb);
 
-	NF_HOOK(PF_INET, NF_IP_LOCAL_OUT, nskb, NULL, nskb->dst->dev,
-		dst_output);
+	ip_local_out(nskb);
 	return;
 
  free_nskb:
diff --git a/net/ipv4/xfrm4_output.c b/net/ipv4/xfrm4_output.c
index 13fd11335e2..0ffc3d07848 100644
--- a/net/ipv4/xfrm4_output.c
+++ b/net/ipv4/xfrm4_output.c
@@ -69,17 +69,12 @@ EXPORT_SYMBOL(xfrm4_prepare_output);
 
 static inline int xfrm4_output_one(struct sk_buff *skb)
 {
-	struct iphdr *iph;
 	int err;
 
 	err = xfrm_output(skb);
 	if (err)
 		goto error_nolock;
 
-	iph = ip_hdr(skb);
-	iph->tot_len = htons(skb->len);
-	ip_send_check(iph);
-
 	IPCB(skb)->flags |= IPSKB_XFRM_TRANSFORMED;
 	err = 0;
 
@@ -97,8 +92,7 @@ static int xfrm4_output_finish2(struct sk_buff *skb)
 	while (likely((err = xfrm4_output_one(skb)) == 0)) {
 		nf_reset(skb);
 
-		err = nf_hook(PF_INET, NF_IP_LOCAL_OUT, skb, NULL,
-			      skb->dst->dev, dst_output);
+		err = __ip_local_out(skb);
 		if (unlikely(err != 1))
 			break;
 
-- 
cgit v1.2.3


From ef76bc23ef2acf20c8f7f841a542d8ab74c827c6 Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Fri, 11 Jan 2008 19:15:08 -0800
Subject: [IPV6]: Add ip6_local_out

Most callers of the LOCAL_OUT chain will set the IP packet length
before doing so.  They also share the same output function dst_output.

This patch creates a new function called ip6_local_out which does all
of that and converts the appropriate users over to it.

Apart from removing duplicate code, it will also help in merging the
IPsec output path.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/ip6_output.c            | 33 +++++++++++++++++++++++++++------
 net/ipv6/ip6_tunnel.c            |  4 +---
 net/ipv6/netfilter/ip6t_REJECT.c |  4 +---
 net/ipv6/xfrm6_output.c          |  7 +------
 4 files changed, 30 insertions(+), 18 deletions(-)

(limited to 'net')

diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index 387030d2483..bd121f9ae0a 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -29,7 +29,7 @@
  */
 
 #include <linux/errno.h>
-#include <linux/types.h>
+#include <linux/kernel.h>
 #include <linux/string.h>
 #include <linux/socket.h>
 #include <linux/net.h>
@@ -70,6 +70,31 @@ static __inline__ void ipv6_select_ident(struct sk_buff *skb, struct frag_hdr *f
 	spin_unlock_bh(&ip6_id_lock);
 }
 
+int __ip6_local_out(struct sk_buff *skb)
+{
+	int len;
+
+	len = skb->len - sizeof(struct ipv6hdr);
+	if (len > IPV6_MAXPLEN)
+		len = 0;
+	ipv6_hdr(skb)->payload_len = htons(len);
+
+	return nf_hook(PF_INET6, NF_IP6_LOCAL_OUT, skb, NULL, skb->dst->dev,
+		       dst_output);
+}
+
+int ip6_local_out(struct sk_buff *skb)
+{
+	int err;
+
+	err = __ip6_local_out(skb);
+	if (likely(err == 1))
+		err = dst_output(skb);
+
+	return err;
+}
+EXPORT_SYMBOL_GPL(ip6_local_out);
+
 static int ip6_output_finish(struct sk_buff *skb)
 {
 	struct dst_entry *dst = skb->dst;
@@ -1403,10 +1428,6 @@ int ip6_push_pending_frames(struct sock *sk)
 	*(__be32*)hdr = fl->fl6_flowlabel |
 		     htonl(0x60000000 | ((int)np->cork.tclass << 20));
 
-	if (skb->len <= sizeof(struct ipv6hdr) + IPV6_MAXPLEN)
-		hdr->payload_len = htons(skb->len - sizeof(struct ipv6hdr));
-	else
-		hdr->payload_len = 0;
 	hdr->hop_limit = np->cork.hop_limit;
 	hdr->nexthdr = proto;
 	ipv6_addr_copy(&hdr->saddr, &fl->fl6_src);
@@ -1423,7 +1444,7 @@ int ip6_push_pending_frames(struct sock *sk)
 		ICMP6_INC_STATS_BH(idev, ICMP6_MIB_OUTMSGS);
 	}
 
-	err = NF_HOOK(PF_INET6, NF_IP6_LOCAL_OUT, skb, NULL, skb->dst->dev, dst_output);
+	err = ip6_local_out(skb);
 	if (err) {
 		if (err > 0)
 			err = np->recverr ? net_xmit_errno(err) : 0;
diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c
index a4051afaf77..29b5321e39c 100644
--- a/net/ipv6/ip6_tunnel.c
+++ b/net/ipv6/ip6_tunnel.c
@@ -910,15 +910,13 @@ static int ip6_tnl_xmit2(struct sk_buff *skb,
 	*(__be32*)ipv6h = fl->fl6_flowlabel | htonl(0x60000000);
 	dsfield = INET_ECN_encapsulate(0, dsfield);
 	ipv6_change_dsfield(ipv6h, ~INET_ECN_MASK, dsfield);
-	ipv6h->payload_len = htons(skb->len - sizeof(struct ipv6hdr));
 	ipv6h->hop_limit = t->parms.hop_limit;
 	ipv6h->nexthdr = proto;
 	ipv6_addr_copy(&ipv6h->saddr, &fl->fl6_src);
 	ipv6_addr_copy(&ipv6h->daddr, &fl->fl6_dst);
 	nf_reset(skb);
 	pkt_len = skb->len;
-	err = NF_HOOK(PF_INET6, NF_IP6_LOCAL_OUT, skb, NULL,
-		      skb->dst->dev, dst_output);
+	err = ip6_local_out(skb);
 
 	if (net_xmit_eval(err) == 0) {
 		stats->tx_bytes += pkt_len;
diff --git a/net/ipv6/netfilter/ip6t_REJECT.c b/net/ipv6/netfilter/ip6t_REJECT.c
index 1a7d2917545..c1c66348283 100644
--- a/net/ipv6/netfilter/ip6t_REJECT.c
+++ b/net/ipv6/netfilter/ip6t_REJECT.c
@@ -121,7 +121,6 @@ static void send_reset(struct sk_buff *oldskb)
 	ip6h->version = 6;
 	ip6h->hop_limit = dst_metric(dst, RTAX_HOPLIMIT);
 	ip6h->nexthdr = IPPROTO_TCP;
-	ip6h->payload_len = htons(sizeof(struct tcphdr));
 	ipv6_addr_copy(&ip6h->saddr, &oip6h->daddr);
 	ipv6_addr_copy(&ip6h->daddr, &oip6h->saddr);
 
@@ -159,8 +158,7 @@ static void send_reset(struct sk_buff *oldskb)
 
 	nf_ct_attach(nskb, oldskb);
 
-	NF_HOOK(PF_INET6, NF_IP6_LOCAL_OUT, nskb, NULL, nskb->dst->dev,
-		dst_output);
+	ip6_local_out(nskb);
 }
 
 static inline void
diff --git a/net/ipv6/xfrm6_output.c b/net/ipv6/xfrm6_output.c
index c45050cfe72..0f0ff51f6db 100644
--- a/net/ipv6/xfrm6_output.c
+++ b/net/ipv6/xfrm6_output.c
@@ -75,16 +75,12 @@ EXPORT_SYMBOL(xfrm6_prepare_output);
 
 static inline int xfrm6_output_one(struct sk_buff *skb)
 {
-	struct ipv6hdr *iph;
 	int err;
 
 	err = xfrm_output(skb);
 	if (err)
 		goto error_nolock;
 
-	iph = ipv6_hdr(skb);
-	iph->payload_len = htons(skb->len - sizeof(*iph));
-
 	IP6CB(skb)->flags |= IP6SKB_XFRM_TRANSFORMED;
 	err = 0;
 
@@ -102,8 +98,7 @@ static int xfrm6_output_finish2(struct sk_buff *skb)
 	while (likely((err = xfrm6_output_one(skb)) == 0)) {
 		nf_reset(skb);
 
-		err = nf_hook(PF_INET6, NF_IP6_LOCAL_OUT, skb, NULL,
-			      skb->dst->dev, dst_output);
+		err = __ip6_local_out(skb);
 		if (unlikely(err != 1))
 			break;
 
-- 
cgit v1.2.3


From 862b82c6f960cc61274d370aa78ce1112f92a83e Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Tue, 13 Nov 2007 21:43:11 -0800
Subject: [IPSEC]: Merge most of the output path

As part of the work on asynchrnous cryptographic operations, we need
to be able to resume from the spot where they occur.  As such, it
helps if we isolate them to one spot.

This patch moves most of the remaining family-specific processing into
the common output code.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/route.c        |  1 +
 net/ipv4/xfrm4_output.c | 76 +++---------------------------------------------
 net/ipv4/xfrm4_policy.c |  1 +
 net/ipv4/xfrm4_state.c  |  2 ++
 net/ipv6/route.c        |  1 +
 net/ipv6/xfrm6_output.c | 77 +++++--------------------------------------------
 net/ipv6/xfrm6_policy.c |  1 +
 net/ipv6/xfrm6_state.c  |  2 ++
 net/xfrm/xfrm_output.c  | 70 ++++++++++++++++++++++++++++++++++++++++++--
 9 files changed, 86 insertions(+), 145 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 137b8eb666b..94ef788a2ac 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -166,6 +166,7 @@ static struct dst_ops ipv4_dst_ops = {
 	.negative_advice =	ipv4_negative_advice,
 	.link_failure =		ipv4_link_failure,
 	.update_pmtu =		ip_rt_update_pmtu,
+	.local_out =		ip_local_out,
 	.entry_size =		sizeof(struct rtable),
 };
 
diff --git a/net/ipv4/xfrm4_output.c b/net/ipv4/xfrm4_output.c
index 0ffc3d07848..2fb4efa3ff2 100644
--- a/net/ipv4/xfrm4_output.c
+++ b/net/ipv4/xfrm4_output.c
@@ -59,7 +59,7 @@ int xfrm4_prepare_output(struct xfrm_state *x, struct sk_buff *skb)
 		return err;
 
 	memset(IPCB(skb), 0, sizeof(*IPCB(skb)));
-	IPCB(skb)->flags |= IPSKB_XFRM_TUNNEL_SIZE;
+	IPCB(skb)->flags |= IPSKB_XFRM_TUNNEL_SIZE | IPSKB_XFRM_TRANSFORMED;
 
 	skb->protocol = htons(ETH_P_IP);
 
@@ -67,87 +67,19 @@ int xfrm4_prepare_output(struct xfrm_state *x, struct sk_buff *skb)
 }
 EXPORT_SYMBOL(xfrm4_prepare_output);
 
-static inline int xfrm4_output_one(struct sk_buff *skb)
-{
-	int err;
-
-	err = xfrm_output(skb);
-	if (err)
-		goto error_nolock;
-
-	IPCB(skb)->flags |= IPSKB_XFRM_TRANSFORMED;
-	err = 0;
-
-out_exit:
-	return err;
-error_nolock:
-	kfree_skb(skb);
-	goto out_exit;
-}
-
-static int xfrm4_output_finish2(struct sk_buff *skb)
-{
-	int err;
-
-	while (likely((err = xfrm4_output_one(skb)) == 0)) {
-		nf_reset(skb);
-
-		err = __ip_local_out(skb);
-		if (unlikely(err != 1))
-			break;
-
-		if (!skb->dst->xfrm)
-			return dst_output(skb);
-
-		err = nf_hook(PF_INET, NF_IP_POST_ROUTING, skb, NULL,
-			      skb->dst->dev, xfrm4_output_finish2);
-		if (unlikely(err != 1))
-			break;
-	}
-
-	return err;
-}
-
 static int xfrm4_output_finish(struct sk_buff *skb)
 {
-	struct sk_buff *segs;
-
 #ifdef CONFIG_NETFILTER
 	if (!skb->dst->xfrm) {
 		IPCB(skb)->flags |= IPSKB_REROUTED;
 		return dst_output(skb);
 	}
-#endif
 
-	if (!skb_is_gso(skb))
-		return xfrm4_output_finish2(skb);
+	IPCB(skb)->flags |= IPSKB_XFRM_TRANSFORMED;
+#endif
 
 	skb->protocol = htons(ETH_P_IP);
-	segs = skb_gso_segment(skb, 0);
-	kfree_skb(skb);
-	if (unlikely(IS_ERR(segs)))
-		return PTR_ERR(segs);
-
-	do {
-		struct sk_buff *nskb = segs->next;
-		int err;
-
-		segs->next = NULL;
-		err = xfrm4_output_finish2(segs);
-
-		if (unlikely(err)) {
-			while ((segs = nskb)) {
-				nskb = segs->next;
-				segs->next = NULL;
-				kfree_skb(segs);
-			}
-			return err;
-		}
-
-		segs = nskb;
-	} while (segs);
-
-	return 0;
+	return xfrm_output(skb);
 }
 
 int xfrm4_output(struct sk_buff *skb)
diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c
index 1d7524375b4..b4948c170b3 100644
--- a/net/ipv4/xfrm4_policy.c
+++ b/net/ipv4/xfrm4_policy.c
@@ -237,6 +237,7 @@ static struct dst_ops xfrm4_dst_ops = {
 	.update_pmtu =		xfrm4_update_pmtu,
 	.destroy =		xfrm4_dst_destroy,
 	.ifdown =		xfrm4_dst_ifdown,
+	.local_out =		__ip_local_out,
 	.gc_thresh =		1024,
 	.entry_size =		sizeof(struct xfrm_dst),
 };
diff --git a/net/ipv4/xfrm4_state.c b/net/ipv4/xfrm4_state.c
index 85f04b7b237..80292fbf221 100644
--- a/net/ipv4/xfrm4_state.c
+++ b/net/ipv4/xfrm4_state.c
@@ -11,6 +11,7 @@
 #include <net/xfrm.h>
 #include <linux/pfkeyv2.h>
 #include <linux/ipsec.h>
+#include <linux/netfilter_ipv4.h>
 
 static struct xfrm_state_afinfo xfrm4_state_afinfo;
 
@@ -66,6 +67,7 @@ static struct xfrm_state_afinfo xfrm4_state_afinfo = {
 	.family			= AF_INET,
 	.proto			= IPPROTO_IPIP,
 	.eth_proto		= htons(ETH_P_IP),
+	.nf_post_routing	= NF_IP_POST_ROUTING,
 	.owner			= THIS_MODULE,
 	.init_flags		= xfrm4_init_flags,
 	.init_tempsel		= __xfrm4_init_tempsel,
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index ac70e2d3b10..4ef2cfaa346 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -113,6 +113,7 @@ static struct dst_ops ip6_dst_ops = {
 	.negative_advice	=	ip6_negative_advice,
 	.link_failure		=	ip6_link_failure,
 	.update_pmtu		=	ip6_rt_update_pmtu,
+	.local_out		=	ip6_local_out,
 	.entry_size		=	sizeof(struct rt6_info),
 };
 
diff --git a/net/ipv6/xfrm6_output.c b/net/ipv6/xfrm6_output.c
index 0f0ff51f6db..a0a924991c4 100644
--- a/net/ipv6/xfrm6_output.c
+++ b/net/ipv6/xfrm6_output.c
@@ -66,6 +66,9 @@ int xfrm6_prepare_output(struct xfrm_state *x, struct sk_buff *skb)
 		return err;
 
 	memset(IP6CB(skb), 0, sizeof(*IP6CB(skb)));
+#ifdef CONFIG_NETFILTER
+	IP6CB(skb)->flags |= IP6SKB_XFRM_TRANSFORMED;
+#endif
 
 	skb->protocol = htons(ETH_P_IPV6);
 
@@ -73,80 +76,14 @@ int xfrm6_prepare_output(struct xfrm_state *x, struct sk_buff *skb)
 }
 EXPORT_SYMBOL(xfrm6_prepare_output);
 
-static inline int xfrm6_output_one(struct sk_buff *skb)
-{
-	int err;
-
-	err = xfrm_output(skb);
-	if (err)
-		goto error_nolock;
-
-	IP6CB(skb)->flags |= IP6SKB_XFRM_TRANSFORMED;
-	err = 0;
-
-out_exit:
-	return err;
-error_nolock:
-	kfree_skb(skb);
-	goto out_exit;
-}
-
-static int xfrm6_output_finish2(struct sk_buff *skb)
-{
-	int err;
-
-	while (likely((err = xfrm6_output_one(skb)) == 0)) {
-		nf_reset(skb);
-
-		err = __ip6_local_out(skb);
-		if (unlikely(err != 1))
-			break;
-
-		if (!skb->dst->xfrm)
-			return dst_output(skb);
-
-		err = nf_hook(PF_INET6, NF_IP6_POST_ROUTING, skb, NULL,
-			      skb->dst->dev, xfrm6_output_finish2);
-		if (unlikely(err != 1))
-			break;
-	}
-
-	return err;
-}
-
 static int xfrm6_output_finish(struct sk_buff *skb)
 {
-	struct sk_buff *segs;
-
-	if (!skb_is_gso(skb))
-		return xfrm6_output_finish2(skb);
+#ifdef CONFIG_NETFILTER
+	IP6CB(skb)->flags |= IP6SKB_XFRM_TRANSFORMED;
+#endif
 
 	skb->protocol = htons(ETH_P_IPV6);
-	segs = skb_gso_segment(skb, 0);
-	kfree_skb(skb);
-	if (unlikely(IS_ERR(segs)))
-		return PTR_ERR(segs);
-
-	do {
-		struct sk_buff *nskb = segs->next;
-		int err;
-
-		segs->next = NULL;
-		err = xfrm6_output_finish2(segs);
-
-		if (unlikely(err)) {
-			while ((segs = nskb)) {
-				nskb = segs->next;
-				segs->next = NULL;
-				kfree_skb(segs);
-			}
-			return err;
-		}
-
-		segs = nskb;
-	} while (segs);
-
-	return 0;
+	return xfrm_output(skb);
 }
 
 int xfrm6_output(struct sk_buff *skb)
diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c
index 63932c5fd3c..a31dd531e19 100644
--- a/net/ipv6/xfrm6_policy.c
+++ b/net/ipv6/xfrm6_policy.c
@@ -252,6 +252,7 @@ static struct dst_ops xfrm6_dst_ops = {
 	.update_pmtu =		xfrm6_update_pmtu,
 	.destroy =		xfrm6_dst_destroy,
 	.ifdown =		xfrm6_dst_ifdown,
+	.local_out =		__ip6_local_out,
 	.gc_thresh =		1024,
 	.entry_size =		sizeof(struct xfrm_dst),
 };
diff --git a/net/ipv6/xfrm6_state.c b/net/ipv6/xfrm6_state.c
index 90fef0a4726..bb09e85a336 100644
--- a/net/ipv6/xfrm6_state.c
+++ b/net/ipv6/xfrm6_state.c
@@ -14,6 +14,7 @@
 #include <net/xfrm.h>
 #include <linux/pfkeyv2.h>
 #include <linux/ipsec.h>
+#include <linux/netfilter_ipv6.h>
 #include <net/dsfield.h>
 #include <net/ipv6.h>
 #include <net/addrconf.h>
@@ -189,6 +190,7 @@ static struct xfrm_state_afinfo xfrm6_state_afinfo = {
 	.family			= AF_INET6,
 	.proto			= IPPROTO_IPV6,
 	.eth_proto		= htons(ETH_P_IPV6),
+	.nf_post_routing	= NF_IP6_POST_ROUTING,
 	.owner			= THIS_MODULE,
 	.init_tempsel		= __xfrm6_init_tempsel,
 	.tmpl_sort		= __xfrm6_tmpl_sort,
diff --git a/net/xfrm/xfrm_output.c b/net/xfrm/xfrm_output.c
index b1efdc8850a..bcb3701c5cf 100644
--- a/net/xfrm/xfrm_output.c
+++ b/net/xfrm/xfrm_output.c
@@ -12,6 +12,7 @@
 #include <linux/errno.h>
 #include <linux/module.h>
 #include <linux/netdevice.h>
+#include <linux/netfilter.h>
 #include <linux/skbuff.h>
 #include <linux/spinlock.h>
 #include <net/dst.h>
@@ -40,7 +41,7 @@ err:
 	return err;
 }
 
-int xfrm_output(struct sk_buff *skb)
+static int xfrm_output_one(struct sk_buff *skb)
 {
 	struct dst_entry *dst = skb->dst;
 	struct xfrm_state *x = dst->xfrm;
@@ -87,10 +88,73 @@ int xfrm_output(struct sk_buff *skb)
 
 	err = 0;
 
-error_nolock:
+out_exit:
 	return err;
 error:
 	spin_unlock_bh(&x->lock);
-	goto error_nolock;
+error_nolock:
+	kfree_skb(skb);
+	goto out_exit;
+}
+
+static int xfrm_output2(struct sk_buff *skb)
+{
+	int err;
+
+	while (likely((err = xfrm_output_one(skb)) == 0)) {
+		struct xfrm_state *x;
+
+		nf_reset(skb);
+
+		err = skb->dst->ops->local_out(skb);
+		if (unlikely(err != 1))
+			break;
+
+		x = skb->dst->xfrm;
+		if (!x)
+			return dst_output(skb);
+
+		err = nf_hook(x->inner_mode->afinfo->family,
+			      x->inner_mode->afinfo->nf_post_routing, skb,
+			      NULL, skb->dst->dev, xfrm_output2);
+		if (unlikely(err != 1))
+			break;
+	}
+
+	return err;
+}
+
+int xfrm_output(struct sk_buff *skb)
+{
+	struct sk_buff *segs;
+
+	if (!skb_is_gso(skb))
+		return xfrm_output2(skb);
+
+	segs = skb_gso_segment(skb, 0);
+	kfree_skb(skb);
+	if (unlikely(IS_ERR(segs)))
+		return PTR_ERR(segs);
+
+	do {
+		struct sk_buff *nskb = segs->next;
+		int err;
+
+		segs->next = NULL;
+		err = xfrm_output2(segs);
+
+		if (unlikely(err)) {
+			while ((segs = nskb)) {
+				nskb = segs->next;
+				segs->next = NULL;
+				kfree_skb(segs);
+			}
+			return err;
+		}
+
+		segs = nskb;
+	} while (segs);
+
+	return 0;
 }
 EXPORT_SYMBOL_GPL(xfrm_output);
-- 
cgit v1.2.3


From c6581a457e661b7070e484ad723bbf555b17aca2 Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Tue, 13 Nov 2007 21:43:43 -0800
Subject: [IPSEC]: Add async resume support on output

This patch adds support for async resumptions on output.  To do so,
the transform would return -EINPROGRESS and subsequently invoke the
function xfrm_output_resume to resume processing.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/xfrm/xfrm_output.c | 57 +++++++++++++++++++++++++++++++++++---------------
 1 file changed, 40 insertions(+), 17 deletions(-)

(limited to 'net')

diff --git a/net/xfrm/xfrm_output.c b/net/xfrm/xfrm_output.c
index bcb3701c5cf..048d240c3e1 100644
--- a/net/xfrm/xfrm_output.c
+++ b/net/xfrm/xfrm_output.c
@@ -18,6 +18,8 @@
 #include <net/dst.h>
 #include <net/xfrm.h>
 
+static int xfrm_output2(struct sk_buff *skb);
+
 static int xfrm_state_check_space(struct xfrm_state *x, struct sk_buff *skb)
 {
 	struct dst_entry *dst = skb->dst;
@@ -41,17 +43,13 @@ err:
 	return err;
 }
 
-static int xfrm_output_one(struct sk_buff *skb)
+static int xfrm_output_one(struct sk_buff *skb, int err)
 {
 	struct dst_entry *dst = skb->dst;
 	struct xfrm_state *x = dst->xfrm;
-	int err;
 
-	if (skb->ip_summed == CHECKSUM_PARTIAL) {
-		err = skb_checksum_help(skb);
-		if (err)
-			goto error_nolock;
-	}
+	if (err <= 0)
+		goto resume;
 
 	do {
 		err = x->outer_mode->output(x, skb);
@@ -75,6 +73,8 @@ static int xfrm_output_one(struct sk_buff *skb)
 		spin_unlock_bh(&x->lock);
 
 		err = x->type->output(x, skb);
+
+resume:
 		if (err)
 			goto error_nolock;
 
@@ -97,18 +97,16 @@ error_nolock:
 	goto out_exit;
 }
 
-static int xfrm_output2(struct sk_buff *skb)
+int xfrm_output_resume(struct sk_buff *skb, int err)
 {
-	int err;
-
-	while (likely((err = xfrm_output_one(skb)) == 0)) {
+	while (likely((err = xfrm_output_one(skb, err)) == 0)) {
 		struct xfrm_state *x;
 
 		nf_reset(skb);
 
 		err = skb->dst->ops->local_out(skb);
 		if (unlikely(err != 1))
-			break;
+			goto out;
 
 		x = skb->dst->xfrm;
 		if (!x)
@@ -118,18 +116,25 @@ static int xfrm_output2(struct sk_buff *skb)
 			      x->inner_mode->afinfo->nf_post_routing, skb,
 			      NULL, skb->dst->dev, xfrm_output2);
 		if (unlikely(err != 1))
-			break;
+			goto out;
 	}
 
+	if (err == -EINPROGRESS)
+		err = 0;
+
+out:
 	return err;
 }
+EXPORT_SYMBOL_GPL(xfrm_output_resume);
 
-int xfrm_output(struct sk_buff *skb)
+static int xfrm_output2(struct sk_buff *skb)
 {
-	struct sk_buff *segs;
+	return xfrm_output_resume(skb, 1);
+}
 
-	if (!skb_is_gso(skb))
-		return xfrm_output2(skb);
+static int xfrm_output_gso(struct sk_buff *skb)
+{
+	struct sk_buff *segs;
 
 	segs = skb_gso_segment(skb, 0);
 	kfree_skb(skb);
@@ -157,4 +162,22 @@ int xfrm_output(struct sk_buff *skb)
 
 	return 0;
 }
+
+int xfrm_output(struct sk_buff *skb)
+{
+	int err;
+
+	if (skb_is_gso(skb))
+		return xfrm_output_gso(skb);
+
+	if (skb->ip_summed == CHECKSUM_PARTIAL) {
+		err = skb_checksum_help(skb);
+		if (err) {
+			kfree_skb(skb);
+			return err;
+		}
+	}
+
+	return xfrm_output2(skb);
+}
 EXPORT_SYMBOL_GPL(xfrm_output);
-- 
cgit v1.2.3


From 716062fd4c2f88a33ab409f62a1e7397ad0a7e33 Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Tue, 13 Nov 2007 21:44:23 -0800
Subject: [IPSEC]: Merge most of the input path

As part of the work on asynchronous cryptographic operations, we need
to be able to resume from the spot where they occur.  As such, it
helps if we isolate them to one spot.

This patch moves most of the remaining family-specific processing into
the common input code.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/xfrm4_input.c | 126 ++++++-------------------------------------------
 net/ipv4/xfrm4_state.c |   1 +
 net/ipv6/xfrm6_input.c | 118 +++++----------------------------------------
 net/ipv6/xfrm6_state.c |   1 +
 net/xfrm/xfrm_input.c  | 113 ++++++++++++++++++++++++++++++++++++++++++++
 5 files changed, 142 insertions(+), 217 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/xfrm4_input.c b/net/ipv4/xfrm4_input.c
index c0323d05ab6..e374903dacd 100644
--- a/net/ipv4/xfrm4_input.c
+++ b/net/ipv4/xfrm4_input.c
@@ -41,124 +41,26 @@ drop:
 int xfrm4_rcv_encap(struct sk_buff *skb, int nexthdr, __be32 spi,
 		    int encap_type)
 {
-	int err;
-	__be32 seq;
-	struct xfrm_state *xfrm_vec[XFRM_MAX_DEPTH];
-	struct xfrm_state *x;
-	int xfrm_nr = 0;
-	int decaps = 0;
-	unsigned int nhoff = offsetof(struct iphdr, protocol);
-
-	seq = 0;
-	if (!spi && (err = xfrm_parse_spi(skb, nexthdr, &spi, &seq)) != 0)
-		goto drop;
-
-	do {
-		const struct iphdr *iph = ip_hdr(skb);
-
-		if (xfrm_nr == XFRM_MAX_DEPTH)
-			goto drop;
-
-		x = xfrm_state_lookup((xfrm_address_t *)&iph->daddr, spi,
-				      nexthdr, AF_INET);
-		if (x == NULL)
-			goto drop;
-
-		spin_lock(&x->lock);
-		if (unlikely(x->km.state != XFRM_STATE_VALID))
-			goto drop_unlock;
-
-		if ((x->encap ? x->encap->encap_type : 0) != encap_type)
-			goto drop_unlock;
-
-		if (x->props.replay_window && xfrm_replay_check(x, seq))
-			goto drop_unlock;
-
-		if (xfrm_state_check_expire(x))
-			goto drop_unlock;
-
-		nexthdr = x->type->input(x, skb);
-		if (nexthdr <= 0)
-			goto drop_unlock;
-
-		skb_network_header(skb)[nhoff] = nexthdr;
-
-		/* only the first xfrm gets the encap type */
-		encap_type = 0;
-
-		if (x->props.replay_window)
-			xfrm_replay_advance(x, seq);
-
-		x->curlft.bytes += skb->len;
-		x->curlft.packets++;
-
-		spin_unlock(&x->lock);
-
-		xfrm_vec[xfrm_nr++] = x;
-
-		if (x->inner_mode->input(x, skb))
-			goto drop;
-
-		if (x->outer_mode->flags & XFRM_MODE_FLAG_TUNNEL) {
-			decaps = 1;
-			break;
-		}
-
-		err = xfrm_parse_spi(skb, nexthdr, &spi, &seq);
-		if (err < 0)
-			goto drop;
-	} while (!err);
-
-	/* Allocate new secpath or COW existing one. */
-
-	if (!skb->sp || atomic_read(&skb->sp->refcnt) != 1) {
-		struct sec_path *sp;
-		sp = secpath_dup(skb->sp);
-		if (!sp)
-			goto drop;
-		if (skb->sp)
-			secpath_put(skb->sp);
-		skb->sp = sp;
-	}
-	if (xfrm_nr + skb->sp->len > XFRM_MAX_DEPTH)
-		goto drop;
-
-	memcpy(skb->sp->xvec + skb->sp->len, xfrm_vec,
-	       xfrm_nr * sizeof(xfrm_vec[0]));
-	skb->sp->len += xfrm_nr;
-
-	nf_reset(skb);
+	XFRM_SPI_SKB_CB(skb)->nhoff = offsetof(struct iphdr, protocol);
+	XFRM_SPI_SKB_CB(skb)->daddroff = offsetof(struct iphdr, daddr);
+	return xfrm_input(skb, nexthdr, spi, encap_type);
+}
+EXPORT_SYMBOL(xfrm4_rcv_encap);
 
-	if (decaps) {
-		dst_release(skb->dst);
-		skb->dst = NULL;
-		netif_rx(skb);
-		return 0;
-	} else {
+int xfrm4_transport_finish(struct sk_buff *skb, int async)
+{
 #ifdef CONFIG_NETFILTER
-		__skb_push(skb, skb->data - skb_network_header(skb));
-		ip_hdr(skb)->tot_len = htons(skb->len);
-		ip_send_check(ip_hdr(skb));
+	__skb_push(skb, skb->data - skb_network_header(skb));
+	ip_hdr(skb)->tot_len = htons(skb->len);
+	ip_send_check(ip_hdr(skb));
 
-		NF_HOOK(PF_INET, NF_IP_PRE_ROUTING, skb, skb->dev, NULL,
-			xfrm4_rcv_encap_finish);
-		return 0;
+	NF_HOOK(PF_INET, NF_IP_PRE_ROUTING, skb, skb->dev, NULL,
+		xfrm4_rcv_encap_finish);
+	return 0;
 #else
-		return -ip_hdr(skb)->protocol;
+	return -ip_hdr(skb)->protocol;
 #endif
-	}
-
-drop_unlock:
-	spin_unlock(&x->lock);
-	xfrm_state_put(x);
-drop:
-	while (--xfrm_nr >= 0)
-		xfrm_state_put(xfrm_vec[xfrm_nr]);
-
-	kfree_skb(skb);
-	return 0;
 }
-EXPORT_SYMBOL(xfrm4_rcv_encap);
 
 /* If it's a keepalive packet, then just eat it.
  * If it's an encapsulated packet, then pass it to the
diff --git a/net/ipv4/xfrm4_state.c b/net/ipv4/xfrm4_state.c
index 80292fbf221..3b067e8b7bf 100644
--- a/net/ipv4/xfrm4_state.c
+++ b/net/ipv4/xfrm4_state.c
@@ -74,6 +74,7 @@ static struct xfrm_state_afinfo xfrm4_state_afinfo = {
 	.output			= xfrm4_output,
 	.extract_input		= xfrm4_extract_input,
 	.extract_output		= xfrm4_extract_output,
+	.transport_finish	= xfrm4_transport_finish,
 };
 
 void __init xfrm4_state_init(void)
diff --git a/net/ipv6/xfrm6_input.c b/net/ipv6/xfrm6_input.c
index c458d0a2e68..3b9eedf5b24 100644
--- a/net/ipv6/xfrm6_input.c
+++ b/net/ipv6/xfrm6_input.c
@@ -23,118 +23,26 @@ int xfrm6_extract_input(struct xfrm_state *x, struct sk_buff *skb)
 
 int xfrm6_rcv_spi(struct sk_buff *skb, int nexthdr, __be32 spi)
 {
-	int err;
-	__be32 seq;
-	struct xfrm_state *xfrm_vec[XFRM_MAX_DEPTH];
-	struct xfrm_state *x;
-	int xfrm_nr = 0;
-	int decaps = 0;
-	unsigned int nhoff;
-
-	nhoff = IP6CB(skb)->nhoff;
-
-	seq = 0;
-	if (!spi && (err = xfrm_parse_spi(skb, nexthdr, &spi, &seq)) != 0)
-		goto drop;
-
-	do {
-		struct ipv6hdr *iph = ipv6_hdr(skb);
-
-		if (xfrm_nr == XFRM_MAX_DEPTH)
-			goto drop;
-
-		x = xfrm_state_lookup((xfrm_address_t *)&iph->daddr, spi,
-				      nexthdr, AF_INET6);
-		if (x == NULL)
-			goto drop;
-		spin_lock(&x->lock);
-		if (unlikely(x->km.state != XFRM_STATE_VALID))
-			goto drop_unlock;
-
-		if (x->props.replay_window && xfrm_replay_check(x, seq))
-			goto drop_unlock;
-
-		if (xfrm_state_check_expire(x))
-			goto drop_unlock;
-
-		nexthdr = x->type->input(x, skb);
-		if (nexthdr <= 0)
-			goto drop_unlock;
-
-		skb_network_header(skb)[nhoff] = nexthdr;
-
-		if (x->props.replay_window)
-			xfrm_replay_advance(x, seq);
-
-		x->curlft.bytes += skb->len;
-		x->curlft.packets++;
-
-		spin_unlock(&x->lock);
-
-		xfrm_vec[xfrm_nr++] = x;
-
-		if (x->inner_mode->input(x, skb))
-			goto drop;
-
-		if (x->outer_mode->flags & XFRM_MODE_FLAG_TUNNEL) {
-			decaps = 1;
-			break;
-		}
-
-		if ((err = xfrm_parse_spi(skb, nexthdr, &spi, &seq)) < 0)
-			goto drop;
-	} while (!err);
-
-	/* Allocate new secpath or COW existing one. */
-	if (!skb->sp || atomic_read(&skb->sp->refcnt) != 1) {
-		struct sec_path *sp;
-		sp = secpath_dup(skb->sp);
-		if (!sp)
-			goto drop;
-		if (skb->sp)
-			secpath_put(skb->sp);
-		skb->sp = sp;
-	}
-
-	if (xfrm_nr + skb->sp->len > XFRM_MAX_DEPTH)
-		goto drop;
-
-	memcpy(skb->sp->xvec + skb->sp->len, xfrm_vec,
-	       xfrm_nr * sizeof(xfrm_vec[0]));
-	skb->sp->len += xfrm_nr;
-
-	nf_reset(skb);
+	XFRM_SPI_SKB_CB(skb)->nhoff = IP6CB(skb)->nhoff;
+	XFRM_SPI_SKB_CB(skb)->daddroff = offsetof(struct ipv6hdr, daddr);
+	return xfrm_input(skb, nexthdr, spi, 0);
+}
+EXPORT_SYMBOL(xfrm6_rcv_spi);
 
-	if (decaps) {
-		dst_release(skb->dst);
-		skb->dst = NULL;
-		netif_rx(skb);
-		return -1;
-	} else {
+int xfrm6_transport_finish(struct sk_buff *skb, int async)
+{
 #ifdef CONFIG_NETFILTER
-		ipv6_hdr(skb)->payload_len = htons(skb->len);
-		__skb_push(skb, skb->data - skb_network_header(skb));
+	ipv6_hdr(skb)->payload_len = htons(skb->len);
+	__skb_push(skb, skb->data - skb_network_header(skb));
 
-		NF_HOOK(PF_INET6, NF_IP6_PRE_ROUTING, skb, skb->dev, NULL,
-			ip6_rcv_finish);
-		return -1;
+	NF_HOOK(PF_INET6, NF_IP6_PRE_ROUTING, skb, skb->dev, NULL,
+		ip6_rcv_finish);
+	return -1;
 #else
-		return 1;
+	return 1;
 #endif
-	}
-
-drop_unlock:
-	spin_unlock(&x->lock);
-	xfrm_state_put(x);
-drop:
-	while (--xfrm_nr >= 0)
-		xfrm_state_put(xfrm_vec[xfrm_nr]);
-	kfree_skb(skb);
-	return -1;
 }
 
-EXPORT_SYMBOL(xfrm6_rcv_spi);
-
 int xfrm6_rcv(struct sk_buff *skb)
 {
 	return xfrm6_rcv_spi(skb, skb_network_header(skb)[IP6CB(skb)->nhoff],
diff --git a/net/ipv6/xfrm6_state.c b/net/ipv6/xfrm6_state.c
index bb09e85a336..00360b514e9 100644
--- a/net/ipv6/xfrm6_state.c
+++ b/net/ipv6/xfrm6_state.c
@@ -198,6 +198,7 @@ static struct xfrm_state_afinfo xfrm6_state_afinfo = {
 	.output			= xfrm6_output,
 	.extract_input		= xfrm6_extract_input,
 	.extract_output		= xfrm6_extract_output,
+	.transport_finish	= xfrm6_transport_finish,
 };
 
 void __init xfrm6_state_init(void)
diff --git a/net/xfrm/xfrm_input.c b/net/xfrm/xfrm_input.c
index 4c803f7e74e..b980095be93 100644
--- a/net/xfrm/xfrm_input.c
+++ b/net/xfrm/xfrm_input.c
@@ -9,6 +9,8 @@
 
 #include <linux/slab.h>
 #include <linux/module.h>
+#include <linux/netdevice.h>
+#include <net/dst.h>
 #include <net/ip.h>
 #include <net/xfrm.h>
 
@@ -94,6 +96,117 @@ int xfrm_prepare_input(struct xfrm_state *x, struct sk_buff *skb)
 }
 EXPORT_SYMBOL(xfrm_prepare_input);
 
+int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type)
+{
+	int err;
+	__be32 seq;
+	struct xfrm_state *xfrm_vec[XFRM_MAX_DEPTH];
+	struct xfrm_state *x;
+	int xfrm_nr = 0;
+	int decaps = 0;
+	unsigned int nhoff = XFRM_SPI_SKB_CB(skb)->nhoff;
+	unsigned int daddroff = XFRM_SPI_SKB_CB(skb)->daddroff;
+
+	seq = 0;
+	if (!spi && (err = xfrm_parse_spi(skb, nexthdr, &spi, &seq)) != 0)
+		goto drop;
+
+	do {
+		if (xfrm_nr == XFRM_MAX_DEPTH)
+			goto drop;
+
+		x = xfrm_state_lookup((xfrm_address_t *)
+				      (skb_network_header(skb) + daddroff),
+				      spi, nexthdr, AF_INET);
+		if (x == NULL)
+			goto drop;
+
+		spin_lock(&x->lock);
+		if (unlikely(x->km.state != XFRM_STATE_VALID))
+			goto drop_unlock;
+
+		if ((x->encap ? x->encap->encap_type : 0) != encap_type)
+			goto drop_unlock;
+
+		if (x->props.replay_window && xfrm_replay_check(x, seq))
+			goto drop_unlock;
+
+		if (xfrm_state_check_expire(x))
+			goto drop_unlock;
+
+		nexthdr = x->type->input(x, skb);
+		if (nexthdr <= 0)
+			goto drop_unlock;
+
+		skb_network_header(skb)[nhoff] = nexthdr;
+
+		/* only the first xfrm gets the encap type */
+		encap_type = 0;
+
+		if (x->props.replay_window)
+			xfrm_replay_advance(x, seq);
+
+		x->curlft.bytes += skb->len;
+		x->curlft.packets++;
+
+		spin_unlock(&x->lock);
+
+		xfrm_vec[xfrm_nr++] = x;
+
+		if (x->inner_mode->input(x, skb))
+			goto drop;
+
+		if (x->outer_mode->flags & XFRM_MODE_FLAG_TUNNEL) {
+			decaps = 1;
+			break;
+		}
+
+		err = xfrm_parse_spi(skb, nexthdr, &spi, &seq);
+		if (err < 0)
+			goto drop;
+	} while (!err);
+
+	/* Allocate new secpath or COW existing one. */
+
+	if (!skb->sp || atomic_read(&skb->sp->refcnt) != 1) {
+		struct sec_path *sp;
+		sp = secpath_dup(skb->sp);
+		if (!sp)
+			goto drop;
+		if (skb->sp)
+			secpath_put(skb->sp);
+		skb->sp = sp;
+	}
+	if (xfrm_nr + skb->sp->len > XFRM_MAX_DEPTH)
+		goto drop;
+
+	memcpy(skb->sp->xvec + skb->sp->len, xfrm_vec,
+	       xfrm_nr * sizeof(xfrm_vec[0]));
+	skb->sp->len += xfrm_nr;
+
+	nf_reset(skb);
+
+	if (decaps) {
+		dst_release(skb->dst);
+		skb->dst = NULL;
+		netif_rx(skb);
+		return 0;
+	} else {
+		return x->inner_mode->afinfo->transport_finish(skb, 0);
+	}
+
+drop_unlock:
+	spin_unlock(&x->lock);
+	xfrm_state_put(x);
+drop:
+	while (--xfrm_nr >= 0)
+		xfrm_state_put(xfrm_vec[xfrm_nr]);
+
+	kfree_skb(skb);
+	return 0;
+}
+EXPORT_SYMBOL(xfrm_input);
+
 void __init xfrm_input_init(void)
 {
 	secpath_cachep = kmem_cache_create("secpath_cache",
-- 
cgit v1.2.3


From b2aa5e9d43a38dcdfa0878ed750cf32f98460278 Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Tue, 13 Nov 2007 21:44:55 -0800
Subject: [IPSEC]: Store xfrm states in security path directly

As it is xfrm_input first collects a list of xfrm states on the stack
before storing them in the packet's security path just before it
returns.  For async crypto, this construction presents an obstacle
since we may need to leave the loop after each transform.

In fact, it's much easier to just skip the stack completely and always
store to the security path.  This is proven by the fact that this
patch actually shrinks the code.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/xfrm/xfrm_input.c | 42 +++++++++++++++---------------------------
 1 file changed, 15 insertions(+), 27 deletions(-)

(limited to 'net')

diff --git a/net/xfrm/xfrm_input.c b/net/xfrm/xfrm_input.c
index b980095be93..587f3474ed3 100644
--- a/net/xfrm/xfrm_input.c
+++ b/net/xfrm/xfrm_input.c
@@ -100,19 +100,29 @@ int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type)
 {
 	int err;
 	__be32 seq;
-	struct xfrm_state *xfrm_vec[XFRM_MAX_DEPTH];
 	struct xfrm_state *x;
-	int xfrm_nr = 0;
 	int decaps = 0;
 	unsigned int nhoff = XFRM_SPI_SKB_CB(skb)->nhoff;
 	unsigned int daddroff = XFRM_SPI_SKB_CB(skb)->daddroff;
 
+	/* Allocate new secpath or COW existing one. */
+	if (!skb->sp || atomic_read(&skb->sp->refcnt) != 1) {
+		struct sec_path *sp;
+
+		sp = secpath_dup(skb->sp);
+		if (!sp)
+			goto drop;
+		if (skb->sp)
+			secpath_put(skb->sp);
+		skb->sp = sp;
+	}
+
 	seq = 0;
 	if (!spi && (err = xfrm_parse_spi(skb, nexthdr, &spi, &seq)) != 0)
 		goto drop;
 
 	do {
-		if (xfrm_nr == XFRM_MAX_DEPTH)
+		if (skb->sp->len == XFRM_MAX_DEPTH)
 			goto drop;
 
 		x = xfrm_state_lookup((xfrm_address_t *)
@@ -121,6 +131,8 @@ int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type)
 		if (x == NULL)
 			goto drop;
 
+		skb->sp->xvec[skb->sp->len++] = x;
+
 		spin_lock(&x->lock);
 		if (unlikely(x->km.state != XFRM_STATE_VALID))
 			goto drop_unlock;
@@ -151,8 +163,6 @@ int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type)
 
 		spin_unlock(&x->lock);
 
-		xfrm_vec[xfrm_nr++] = x;
-
 		if (x->inner_mode->input(x, skb))
 			goto drop;
 
@@ -166,24 +176,6 @@ int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type)
 			goto drop;
 	} while (!err);
 
-	/* Allocate new secpath or COW existing one. */
-
-	if (!skb->sp || atomic_read(&skb->sp->refcnt) != 1) {
-		struct sec_path *sp;
-		sp = secpath_dup(skb->sp);
-		if (!sp)
-			goto drop;
-		if (skb->sp)
-			secpath_put(skb->sp);
-		skb->sp = sp;
-	}
-	if (xfrm_nr + skb->sp->len > XFRM_MAX_DEPTH)
-		goto drop;
-
-	memcpy(skb->sp->xvec + skb->sp->len, xfrm_vec,
-	       xfrm_nr * sizeof(xfrm_vec[0]));
-	skb->sp->len += xfrm_nr;
-
 	nf_reset(skb);
 
 	if (decaps) {
@@ -197,11 +189,7 @@ int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type)
 
 drop_unlock:
 	spin_unlock(&x->lock);
-	xfrm_state_put(x);
 drop:
-	while (--xfrm_nr >= 0)
-		xfrm_state_put(xfrm_vec[xfrm_nr]);
-
 	kfree_skb(skb);
 	return 0;
 }
-- 
cgit v1.2.3


From 668dc8af3150f837f7f0461001bbbc0ce25d7bdf Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Sun, 16 Dec 2007 15:55:02 -0800
Subject: [IPSEC]: Move integrity stat collection into xfrm_input

Similar to the moving out of the replay processing on the output, this
patch moves the integrity stat collectin from x->type->input into
xfrm_input.

This would eventually allow transforms such as AH/ESP to be lockless.

The error value EBADMSG (currently unused in the crypto layer) is used
to indicate a failed integrity check.  In future this error can be
directly returned by the crypto layer once we switch to aead
algorithms.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/ah4.c        |  3 +--
 net/ipv4/esp4.c       | 13 ++++++++-----
 net/ipv6/ah6.c        |  3 +--
 net/ipv6/esp6.c       |  3 +--
 net/xfrm/xfrm_input.c |  5 ++++-
 5 files changed, 15 insertions(+), 12 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/ah4.c b/net/ipv4/ah4.c
index 5fc346d8b56..a989d29b44e 100644
--- a/net/ipv4/ah4.c
+++ b/net/ipv4/ah4.c
@@ -177,9 +177,8 @@ static int ah_input(struct xfrm_state *x, struct sk_buff *skb)
 		err = ah_mac_digest(ahp, skb, ah->auth_data);
 		if (err)
 			goto out;
-		err = -EINVAL;
 		if (memcmp(ahp->work_icv, auth_data, ahp->icv_trunc_len)) {
-			x->stats.integrity_failed++;
+			err = -EBADMSG;
 			goto out;
 		}
 	}
diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c
index 1738113268b..3350a7d5066 100644
--- a/net/ipv4/esp4.c
+++ b/net/ipv4/esp4.c
@@ -163,7 +163,7 @@ static int esp_input(struct xfrm_state *x, struct sk_buff *skb)
 	u8 nexthdr[2];
 	struct scatterlist *sg;
 	int padlen;
-	int err;
+	int err = -EINVAL;
 
 	if (!pskb_may_pull(skb, sizeof(*esph)))
 		goto out;
@@ -183,13 +183,14 @@ static int esp_input(struct xfrm_state *x, struct sk_buff *skb)
 			BUG();
 
 		if (unlikely(memcmp(esp->auth.work_icv, sum, alen))) {
-			x->stats.integrity_failed++;
+			err = -EBADMSG;
 			goto out;
 		}
 	}
 
-	if ((nfrags = skb_cow_data(skb, 0, &trailer)) < 0)
+	if ((err = skb_cow_data(skb, 0, &trailer)) < 0)
 		goto out;
+	nfrags = err;
 
 	skb->ip_summed = CHECKSUM_NONE;
 
@@ -202,6 +203,7 @@ static int esp_input(struct xfrm_state *x, struct sk_buff *skb)
 	sg = &esp->sgbuf[0];
 
 	if (unlikely(nfrags > ESP_NUM_FAST_SG)) {
+		err = -ENOMEM;
 		sg = kmalloc(sizeof(struct scatterlist)*nfrags, GFP_ATOMIC);
 		if (!sg)
 			goto out;
@@ -214,11 +216,12 @@ static int esp_input(struct xfrm_state *x, struct sk_buff *skb)
 	if (unlikely(sg != &esp->sgbuf[0]))
 		kfree(sg);
 	if (unlikely(err))
-		return err;
+		goto out;
 
 	if (skb_copy_bits(skb, skb->len-alen-2, nexthdr, 2))
 		BUG();
 
+	err = -EINVAL;
 	padlen = nexthdr[0];
 	if (padlen+2 >= elen)
 		goto out;
@@ -276,7 +279,7 @@ static int esp_input(struct xfrm_state *x, struct sk_buff *skb)
 	return nexthdr[1];
 
 out:
-	return -EINVAL;
+	return err;
 }
 
 static u32 esp4_get_mtu(struct xfrm_state *x, int mtu)
diff --git a/net/ipv6/ah6.c b/net/ipv6/ah6.c
index 4eaf55072b1..d4b59ecb0b5 100644
--- a/net/ipv6/ah6.c
+++ b/net/ipv6/ah6.c
@@ -379,10 +379,9 @@ static int ah6_input(struct xfrm_state *x, struct sk_buff *skb)
 		err = ah_mac_digest(ahp, skb, ah->auth_data);
 		if (err)
 			goto free_out;
-		err = -EINVAL;
 		if (memcmp(ahp->work_icv, auth_data, ahp->icv_trunc_len)) {
 			LIMIT_NETDEBUG(KERN_WARNING "ipsec ah authentication error\n");
-			x->stats.integrity_failed++;
+			err = -EBADMSG;
 			goto free_out;
 		}
 	}
diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c
index 44405325467..096974ba642 100644
--- a/net/ipv6/esp6.c
+++ b/net/ipv6/esp6.c
@@ -177,8 +177,7 @@ static int esp6_input(struct xfrm_state *x, struct sk_buff *skb)
 			BUG();
 
 		if (unlikely(memcmp(esp->auth.work_icv, sum, alen))) {
-			x->stats.integrity_failed++;
-			ret = -EINVAL;
+			ret = -EBADMSG;
 			goto out;
 		}
 	}
diff --git a/net/xfrm/xfrm_input.c b/net/xfrm/xfrm_input.c
index 587f3474ed3..b7d68eb9434 100644
--- a/net/xfrm/xfrm_input.c
+++ b/net/xfrm/xfrm_input.c
@@ -147,8 +147,11 @@ int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type)
 			goto drop_unlock;
 
 		nexthdr = x->type->input(x, skb);
-		if (nexthdr <= 0)
+		if (nexthdr <= 0) {
+			if (nexthdr == -EBADMSG)
+				x->stats.integrity_failed++;
 			goto drop_unlock;
+		}
 
 		skb_network_header(skb)[nhoff] = nexthdr;
 
-- 
cgit v1.2.3


From 0ebea8ef3559b545c37b016f44e84c3b33e47c39 Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Tue, 13 Nov 2007 21:45:58 -0800
Subject: [IPSEC]: Move state lock into x->type->input

This patch releases the lock on the state before calling
x->type->input.  It also adds the lock to the spots where they're
currently needed.

Most of those places (all except mip6) are expected to disappear with
async crypto.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/ah4.c        | 14 ++++++++++----
 net/ipv4/esp4.c       | 24 +++++++++++++++---------
 net/ipv6/ah6.c        |  9 +++++++--
 net/ipv6/esp6.c       | 37 +++++++++++++++++++++++--------------
 net/ipv6/mip6.c       | 14 ++++++++++----
 net/xfrm/xfrm_input.c |  4 ++++
 6 files changed, 69 insertions(+), 33 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/ah4.c b/net/ipv4/ah4.c
index a989d29b44e..d76803a3dca 100644
--- a/net/ipv4/ah4.c
+++ b/net/ipv4/ah4.c
@@ -169,6 +169,8 @@ static int ah_input(struct xfrm_state *x, struct sk_buff *skb)
 		if (ip_clear_mutable_options(iph, &dummy))
 			goto out;
 	}
+
+	spin_lock(&x->lock);
 	{
 		u8 auth_data[MAX_AH_AUTH_LEN];
 
@@ -176,12 +178,16 @@ static int ah_input(struct xfrm_state *x, struct sk_buff *skb)
 		skb_push(skb, ihl);
 		err = ah_mac_digest(ahp, skb, ah->auth_data);
 		if (err)
-			goto out;
-		if (memcmp(ahp->work_icv, auth_data, ahp->icv_trunc_len)) {
+			goto unlock;
+		if (memcmp(ahp->work_icv, auth_data, ahp->icv_trunc_len))
 			err = -EBADMSG;
-			goto out;
-		}
 	}
+unlock:
+	spin_unlock(&x->lock);
+
+	if (err)
+		goto out;
+
 	skb->network_header += ah_hlen;
 	memcpy(skb_network_header(skb), work_buf, ihl);
 	skb->transport_header = skb->network_header;
diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c
index 3350a7d5066..28ea5c77ca2 100644
--- a/net/ipv4/esp4.c
+++ b/net/ipv4/esp4.c
@@ -171,29 +171,31 @@ static int esp_input(struct xfrm_state *x, struct sk_buff *skb)
 	if (elen <= 0 || (elen & (blksize-1)))
 		goto out;
 
+	if ((err = skb_cow_data(skb, 0, &trailer)) < 0)
+		goto out;
+	nfrags = err;
+
+	skb->ip_summed = CHECKSUM_NONE;
+
+	spin_lock(&x->lock);
+
 	/* If integrity check is required, do this. */
 	if (esp->auth.icv_full_len) {
 		u8 sum[alen];
 
 		err = esp_mac_digest(esp, skb, 0, skb->len - alen);
 		if (err)
-			goto out;
+			goto unlock;
 
 		if (skb_copy_bits(skb, skb->len - alen, sum, alen))
 			BUG();
 
 		if (unlikely(memcmp(esp->auth.work_icv, sum, alen))) {
 			err = -EBADMSG;
-			goto out;
+			goto unlock;
 		}
 	}
 
-	if ((err = skb_cow_data(skb, 0, &trailer)) < 0)
-		goto out;
-	nfrags = err;
-
-	skb->ip_summed = CHECKSUM_NONE;
-
 	esph = (struct ip_esp_hdr *)skb->data;
 
 	/* Get ivec. This can be wrong, check against another impls. */
@@ -206,7 +208,7 @@ static int esp_input(struct xfrm_state *x, struct sk_buff *skb)
 		err = -ENOMEM;
 		sg = kmalloc(sizeof(struct scatterlist)*nfrags, GFP_ATOMIC);
 		if (!sg)
-			goto out;
+			goto unlock;
 	}
 	sg_init_table(sg, nfrags);
 	skb_to_sgvec(skb, sg,
@@ -215,6 +217,10 @@ static int esp_input(struct xfrm_state *x, struct sk_buff *skb)
 	err = crypto_blkcipher_decrypt(&desc, sg, sg, elen);
 	if (unlikely(sg != &esp->sgbuf[0]))
 		kfree(sg);
+
+unlock:
+	spin_unlock(&x->lock);
+
 	if (unlikely(err))
 		goto out;
 
diff --git a/net/ipv6/ah6.c b/net/ipv6/ah6.c
index d4b59ecb0b5..1b51d1eedbd 100644
--- a/net/ipv6/ah6.c
+++ b/net/ipv6/ah6.c
@@ -370,6 +370,7 @@ static int ah6_input(struct xfrm_state *x, struct sk_buff *skb)
 	ip6h->flow_lbl[2] = 0;
 	ip6h->hop_limit   = 0;
 
+	spin_lock(&x->lock);
 	{
 		u8 auth_data[MAX_AH_AUTH_LEN];
 
@@ -378,13 +379,17 @@ static int ah6_input(struct xfrm_state *x, struct sk_buff *skb)
 		skb_push(skb, hdr_len);
 		err = ah_mac_digest(ahp, skb, ah->auth_data);
 		if (err)
-			goto free_out;
+			goto unlock;
 		if (memcmp(ahp->work_icv, auth_data, ahp->icv_trunc_len)) {
 			LIMIT_NETDEBUG(KERN_WARNING "ipsec ah authentication error\n");
 			err = -EBADMSG;
-			goto free_out;
 		}
 	}
+unlock:
+	spin_unlock(&x->lock);
+
+	if (err)
+		goto free_out;
 
 	skb->network_header += ah_hlen;
 	memcpy(skb_network_header(skb), tmp_hdr, hdr_len);
diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c
index 096974ba642..5bd5292ad9f 100644
--- a/net/ipv6/esp6.c
+++ b/net/ipv6/esp6.c
@@ -165,30 +165,32 @@ static int esp6_input(struct xfrm_state *x, struct sk_buff *skb)
 		goto out;
 	}
 
+	if ((nfrags = skb_cow_data(skb, 0, &trailer)) < 0) {
+		ret = -EINVAL;
+		goto out;
+	}
+
+	skb->ip_summed = CHECKSUM_NONE;
+
+	spin_lock(&x->lock);
+
 	/* If integrity check is required, do this. */
 	if (esp->auth.icv_full_len) {
 		u8 sum[alen];
 
 		ret = esp_mac_digest(esp, skb, 0, skb->len - alen);
 		if (ret)
-			goto out;
+			goto unlock;
 
 		if (skb_copy_bits(skb, skb->len - alen, sum, alen))
 			BUG();
 
 		if (unlikely(memcmp(esp->auth.work_icv, sum, alen))) {
 			ret = -EBADMSG;
-			goto out;
+			goto unlock;
 		}
 	}
 
-	if ((nfrags = skb_cow_data(skb, 0, &trailer)) < 0) {
-		ret = -EINVAL;
-		goto out;
-	}
-
-	skb->ip_summed = CHECKSUM_NONE;
-
 	esph = (struct ip_esp_hdr *)skb->data;
 	iph = ipv6_hdr(skb);
 
@@ -197,15 +199,13 @@ static int esp6_input(struct xfrm_state *x, struct sk_buff *skb)
 		crypto_blkcipher_set_iv(tfm, esph->enc_data, esp->conf.ivlen);
 
 	{
-		u8 nexthdr[2];
 		struct scatterlist *sg = &esp->sgbuf[0];
-		u8 padlen;
 
 		if (unlikely(nfrags > ESP_NUM_FAST_SG)) {
 			sg = kmalloc(sizeof(struct scatterlist)*nfrags, GFP_ATOMIC);
 			if (!sg) {
 				ret = -ENOMEM;
-				goto out;
+				goto unlock;
 			}
 		}
 		sg_init_table(sg, nfrags);
@@ -215,8 +215,17 @@ static int esp6_input(struct xfrm_state *x, struct sk_buff *skb)
 		ret = crypto_blkcipher_decrypt(&desc, sg, sg, elen);
 		if (unlikely(sg != &esp->sgbuf[0]))
 			kfree(sg);
-		if (unlikely(ret))
-			goto out;
+	}
+
+unlock:
+	spin_unlock(&x->lock);
+
+	if (unlikely(ret))
+		goto out;
+
+	{
+		u8 nexthdr[2];
+		u8 padlen;
 
 		if (skb_copy_bits(skb, skb->len-alen-2, nexthdr, 2))
 			BUG();
diff --git a/net/ipv6/mip6.c b/net/ipv6/mip6.c
index edfd9cdd721..49d396620ea 100644
--- a/net/ipv6/mip6.c
+++ b/net/ipv6/mip6.c
@@ -128,12 +128,15 @@ static int mip6_destopt_input(struct xfrm_state *x, struct sk_buff *skb)
 {
 	struct ipv6hdr *iph = ipv6_hdr(skb);
 	struct ipv6_destopt_hdr *destopt = (struct ipv6_destopt_hdr *)skb->data;
+	int err = destopt->nexthdr;
 
+	spin_lock(&x->lock);
 	if (!ipv6_addr_equal(&iph->saddr, (struct in6_addr *)x->coaddr) &&
 	    !ipv6_addr_any((struct in6_addr *)x->coaddr))
-		return -ENOENT;
+		err = -ENOENT;
+	spin_unlock(&x->lock);
 
-	return destopt->nexthdr;
+	return err;
 }
 
 /* Destination Option Header is inserted.
@@ -344,12 +347,15 @@ static struct xfrm_type mip6_destopt_type =
 static int mip6_rthdr_input(struct xfrm_state *x, struct sk_buff *skb)
 {
 	struct rt2_hdr *rt2 = (struct rt2_hdr *)skb->data;
+	int err = rt2->rt_hdr.nexthdr;
 
+	spin_lock(&x->lock);
 	if (!ipv6_addr_equal(&rt2->addr, (struct in6_addr *)x->coaddr) &&
 	    !ipv6_addr_any((struct in6_addr *)x->coaddr))
-		return -ENOENT;
+		err = -ENOENT;
+	spin_unlock(&x->lock);
 
-	return rt2->rt_hdr.nexthdr;
+	return err;
 }
 
 /* Routing Header type 2 is inserted.
diff --git a/net/xfrm/xfrm_input.c b/net/xfrm/xfrm_input.c
index b7d68eb9434..5cad522e8ef 100644
--- a/net/xfrm/xfrm_input.c
+++ b/net/xfrm/xfrm_input.c
@@ -146,7 +146,11 @@ int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type)
 		if (xfrm_state_check_expire(x))
 			goto drop_unlock;
 
+		spin_unlock(&x->lock);
+
 		nexthdr = x->type->input(x, skb);
+
+		spin_lock(&x->lock);
 		if (nexthdr <= 0) {
 			if (nexthdr == -EBADMSG)
 				x->stats.integrity_failed++;
-- 
cgit v1.2.3


From d26f398400311982d2433debae85746c348b7d58 Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Tue, 13 Nov 2007 21:47:08 -0800
Subject: [IPSEC]: Make x->lastused an unsigned long

Currently x->lastused is u64 which means that it cannot be
read/written atomically on all architectures.  David Miller observed
that the value stored in it is only an unsigned long which is always
atomic.

So based on his suggestion this patch changes the internal
representation from u64 to unsigned long while the user-interface
still refers to it as u64.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/xfrm6_mode_ro.c | 2 --
 net/xfrm/xfrm_user.c     | 4 ++--
 2 files changed, 2 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/ipv6/xfrm6_mode_ro.c b/net/ipv6/xfrm6_mode_ro.c
index 4a01cb3c370..63d5d493098 100644
--- a/net/ipv6/xfrm6_mode_ro.c
+++ b/net/ipv6/xfrm6_mode_ro.c
@@ -54,9 +54,7 @@ static int xfrm6_ro_output(struct xfrm_state *x, struct sk_buff *skb)
 	__skb_pull(skb, hdr_len);
 	memmove(ipv6_hdr(skb), iph, hdr_len);
 
-	spin_lock_bh(&x->lock);
 	x->lastused = get_seconds();
-	spin_unlock_bh(&x->lock);
 
 	return 0;
 }
diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c
index c4f6419b176..6424e536051 100644
--- a/net/xfrm/xfrm_user.c
+++ b/net/xfrm/xfrm_user.c
@@ -1986,8 +1986,8 @@ static inline size_t xfrm_sa_len(struct xfrm_state *x)
 	if (x->coaddr)
 		l += nla_total_size(sizeof(*x->coaddr));
 
-	/* Must count this as this may become non-zero behind our back. */
-	l += nla_total_size(sizeof(x->lastused));
+	/* Must count x->lastused as it may become non-zero behind our back. */
+	l += nla_total_size(sizeof(u64));
 
 	return l;
 }
-- 
cgit v1.2.3


From 60d5fcfb19d8a958fc563e52240cd05ec23f36c9 Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Mon, 19 Nov 2007 18:47:58 -0800
Subject: [IPSEC]: Remove nhoff from xfrm_input

The nhoff field isn't actually necessary in xfrm_input.  For tunnel
mode transforms we now throw away the output IP header so it makes no
sense to fill in the nexthdr field.  For transport mode we can now let
the function transport_finish do the setting and it knows where the
nexthdr field is.

The only other thing that needs the nexthdr field to be set is the
header extraction code.  However, we can simply move the protocol
extraction out of the generic header extraction.

We want to minimise the amount of info we have to carry around between
transforms as this simplifies the resumption process for async crypto.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/xfrm4_input.c  | 13 +++++++------
 net/ipv4/xfrm4_output.c |  2 ++
 net/ipv4/xfrm4_state.c  |  1 -
 net/ipv6/xfrm6_input.c  |  4 +++-
 net/ipv6/xfrm6_output.c |  3 ++-
 net/ipv6/xfrm6_state.c  |  2 --
 net/xfrm/xfrm_input.c   |  5 ++---
 7 files changed, 16 insertions(+), 14 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/xfrm4_input.c b/net/ipv4/xfrm4_input.c
index e374903dacd..662d1e86cfb 100644
--- a/net/ipv4/xfrm4_input.c
+++ b/net/ipv4/xfrm4_input.c
@@ -21,7 +21,6 @@ int xfrm4_extract_input(struct xfrm_state *x, struct sk_buff *skb)
 	return xfrm4_extract_header(skb);
 }
 
-#ifdef CONFIG_NETFILTER
 static inline int xfrm4_rcv_encap_finish(struct sk_buff *skb)
 {
 	if (skb->dst == NULL) {
@@ -36,12 +35,10 @@ drop:
 	kfree_skb(skb);
 	return NET_RX_DROP;
 }
-#endif
 
 int xfrm4_rcv_encap(struct sk_buff *skb, int nexthdr, __be32 spi,
 		    int encap_type)
 {
-	XFRM_SPI_SKB_CB(skb)->nhoff = offsetof(struct iphdr, protocol);
 	XFRM_SPI_SKB_CB(skb)->daddroff = offsetof(struct iphdr, daddr);
 	return xfrm_input(skb, nexthdr, spi, encap_type);
 }
@@ -49,16 +46,20 @@ EXPORT_SYMBOL(xfrm4_rcv_encap);
 
 int xfrm4_transport_finish(struct sk_buff *skb, int async)
 {
+	struct iphdr *iph = ip_hdr(skb);
+
+	iph->protocol = XFRM_MODE_SKB_CB(skb)->protocol;
+
 #ifdef CONFIG_NETFILTER
 	__skb_push(skb, skb->data - skb_network_header(skb));
-	ip_hdr(skb)->tot_len = htons(skb->len);
-	ip_send_check(ip_hdr(skb));
+	iph->tot_len = htons(skb->len);
+	ip_send_check(iph);
 
 	NF_HOOK(PF_INET, NF_IP_PRE_ROUTING, skb, skb->dev, NULL,
 		xfrm4_rcv_encap_finish);
 	return 0;
 #else
-	return -ip_hdr(skb)->protocol;
+	return -iph->protocol;
 #endif
 }
 
diff --git a/net/ipv4/xfrm4_output.c b/net/ipv4/xfrm4_output.c
index 2fb4efa3ff2..1900200d3c0 100644
--- a/net/ipv4/xfrm4_output.c
+++ b/net/ipv4/xfrm4_output.c
@@ -47,6 +47,8 @@ int xfrm4_extract_output(struct xfrm_state *x, struct sk_buff *skb)
 	if (err)
 		return err;
 
+	XFRM_MODE_SKB_CB(skb)->protocol = ip_hdr(skb)->protocol;
+
 	return xfrm4_extract_header(skb);
 }
 
diff --git a/net/ipv4/xfrm4_state.c b/net/ipv4/xfrm4_state.c
index 3b067e8b7bf..d837784a219 100644
--- a/net/ipv4/xfrm4_state.c
+++ b/net/ipv4/xfrm4_state.c
@@ -56,7 +56,6 @@ int xfrm4_extract_header(struct sk_buff *skb)
 	XFRM_MODE_SKB_CB(skb)->frag_off = iph->frag_off;
 	XFRM_MODE_SKB_CB(skb)->tos = iph->tos;
 	XFRM_MODE_SKB_CB(skb)->ttl = iph->ttl;
-	XFRM_MODE_SKB_CB(skb)->protocol = iph->protocol;
 	memset(XFRM_MODE_SKB_CB(skb)->flow_lbl, 0,
 	       sizeof(XFRM_MODE_SKB_CB(skb)->flow_lbl));
 
diff --git a/net/ipv6/xfrm6_input.c b/net/ipv6/xfrm6_input.c
index 3b9eedf5b24..5c006c84594 100644
--- a/net/ipv6/xfrm6_input.c
+++ b/net/ipv6/xfrm6_input.c
@@ -23,7 +23,6 @@ int xfrm6_extract_input(struct xfrm_state *x, struct sk_buff *skb)
 
 int xfrm6_rcv_spi(struct sk_buff *skb, int nexthdr, __be32 spi)
 {
-	XFRM_SPI_SKB_CB(skb)->nhoff = IP6CB(skb)->nhoff;
 	XFRM_SPI_SKB_CB(skb)->daddroff = offsetof(struct ipv6hdr, daddr);
 	return xfrm_input(skb, nexthdr, spi, 0);
 }
@@ -31,6 +30,9 @@ EXPORT_SYMBOL(xfrm6_rcv_spi);
 
 int xfrm6_transport_finish(struct sk_buff *skb, int async)
 {
+	skb_network_header(skb)[IP6CB(skb)->nhoff] =
+		XFRM_MODE_SKB_CB(skb)->protocol;
+
 #ifdef CONFIG_NETFILTER
 	ipv6_hdr(skb)->payload_len = htons(skb->len);
 	__skb_push(skb, skb->data - skb_network_header(skb));
diff --git a/net/ipv6/xfrm6_output.c b/net/ipv6/xfrm6_output.c
index a0a924991c4..318669a9cb4 100644
--- a/net/ipv6/xfrm6_output.c
+++ b/net/ipv6/xfrm6_output.c
@@ -53,7 +53,8 @@ int xfrm6_extract_output(struct xfrm_state *x, struct sk_buff *skb)
 	if (err)
 		return err;
 
-	IP6CB(skb)->nhoff = offsetof(struct ipv6hdr, nexthdr);
+	XFRM_MODE_SKB_CB(skb)->protocol = ipv6_hdr(skb)->nexthdr;
+
 	return xfrm6_extract_header(skb);
 }
 
diff --git a/net/ipv6/xfrm6_state.c b/net/ipv6/xfrm6_state.c
index 00360b514e9..df7e98d914f 100644
--- a/net/ipv6/xfrm6_state.c
+++ b/net/ipv6/xfrm6_state.c
@@ -178,8 +178,6 @@ int xfrm6_extract_header(struct sk_buff *skb)
 	XFRM_MODE_SKB_CB(skb)->frag_off = htons(IP_DF);
 	XFRM_MODE_SKB_CB(skb)->tos = ipv6_get_dsfield(iph);
 	XFRM_MODE_SKB_CB(skb)->ttl = iph->hop_limit;
-	XFRM_MODE_SKB_CB(skb)->protocol =
-		skb_network_header(skb)[IP6CB(skb)->nhoff];
 	memcpy(XFRM_MODE_SKB_CB(skb)->flow_lbl, iph->flow_lbl,
 	       sizeof(XFRM_MODE_SKB_CB(skb)->flow_lbl));
 
diff --git a/net/xfrm/xfrm_input.c b/net/xfrm/xfrm_input.c
index 5cad522e8ef..cce9d458604 100644
--- a/net/xfrm/xfrm_input.c
+++ b/net/xfrm/xfrm_input.c
@@ -102,7 +102,6 @@ int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type)
 	__be32 seq;
 	struct xfrm_state *x;
 	int decaps = 0;
-	unsigned int nhoff = XFRM_SPI_SKB_CB(skb)->nhoff;
 	unsigned int daddroff = XFRM_SPI_SKB_CB(skb)->daddroff;
 
 	/* Allocate new secpath or COW existing one. */
@@ -157,8 +156,6 @@ int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type)
 			goto drop_unlock;
 		}
 
-		skb_network_header(skb)[nhoff] = nexthdr;
-
 		/* only the first xfrm gets the encap type */
 		encap_type = 0;
 
@@ -170,6 +167,8 @@ int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type)
 
 		spin_unlock(&x->lock);
 
+		XFRM_MODE_SKB_CB(skb)->protocol = nexthdr;
+
 		if (x->inner_mode->input(x, skb))
 			goto drop;
 
-- 
cgit v1.2.3


From 1bf06cd2e338fd6fc29169d30eaf0df982338285 Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Mon, 19 Nov 2007 18:50:17 -0800
Subject: [IPSEC]: Add async resume support on input

This patch adds support for async resumptions on input.  To do so, the
transform would return -EINPROGRESS and subsequently invoke the
function xfrm_input_resume to resume processing.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/xfrm4_input.c |  3 +++
 net/ipv6/xfrm6_input.c |  3 +++
 net/xfrm/xfrm_input.c  | 38 +++++++++++++++++++++++++++++++++-----
 3 files changed, 39 insertions(+), 5 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/xfrm4_input.c b/net/ipv4/xfrm4_input.c
index 662d1e86cfb..d5890c84a49 100644
--- a/net/ipv4/xfrm4_input.c
+++ b/net/ipv4/xfrm4_input.c
@@ -59,6 +59,9 @@ int xfrm4_transport_finish(struct sk_buff *skb, int async)
 		xfrm4_rcv_encap_finish);
 	return 0;
 #else
+	if (async)
+		return xfrm4_rcv_encap_finish(skb);
+
 	return -iph->protocol;
 #endif
 }
diff --git a/net/ipv6/xfrm6_input.c b/net/ipv6/xfrm6_input.c
index 5c006c84594..e317d085546 100644
--- a/net/ipv6/xfrm6_input.c
+++ b/net/ipv6/xfrm6_input.c
@@ -41,6 +41,9 @@ int xfrm6_transport_finish(struct sk_buff *skb, int async)
 		ip6_rcv_finish);
 	return -1;
 #else
+	if (async)
+		return ip6_rcv_finish(skb);
+
 	return 1;
 #endif
 }
diff --git a/net/xfrm/xfrm_input.c b/net/xfrm/xfrm_input.c
index cce9d458604..96f42c1d2e8 100644
--- a/net/xfrm/xfrm_input.c
+++ b/net/xfrm/xfrm_input.c
@@ -101,8 +101,17 @@ int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type)
 	int err;
 	__be32 seq;
 	struct xfrm_state *x;
+	xfrm_address_t *daddr;
 	int decaps = 0;
-	unsigned int daddroff = XFRM_SPI_SKB_CB(skb)->daddroff;
+	int async = 0;
+
+	/* A negative encap_type indicates async resumption. */
+	if (encap_type < 0) {
+		async = 1;
+		x = skb->sp->xvec[skb->sp->len - 1];
+		seq = XFRM_SKB_CB(skb)->seq;
+		goto resume;
+	}
 
 	/* Allocate new secpath or COW existing one. */
 	if (!skb->sp || atomic_read(&skb->sp->refcnt) != 1) {
@@ -116,6 +125,9 @@ int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type)
 		skb->sp = sp;
 	}
 
+	daddr = (xfrm_address_t *)(skb_network_header(skb) +
+				   XFRM_SPI_SKB_CB(skb)->daddroff);
+
 	seq = 0;
 	if (!spi && (err = xfrm_parse_spi(skb, nexthdr, &spi, &seq)) != 0)
 		goto drop;
@@ -124,9 +136,7 @@ int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type)
 		if (skb->sp->len == XFRM_MAX_DEPTH)
 			goto drop;
 
-		x = xfrm_state_lookup((xfrm_address_t *)
-				      (skb_network_header(skb) + daddroff),
-				      spi, nexthdr, AF_INET);
+		x = xfrm_state_lookup(daddr, spi, nexthdr, AF_INET);
 		if (x == NULL)
 			goto drop;
 
@@ -147,8 +157,14 @@ int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type)
 
 		spin_unlock(&x->lock);
 
+		XFRM_SKB_CB(skb)->seq = seq;
+
 		nexthdr = x->type->input(x, skb);
 
+		if (nexthdr == -EINPROGRESS)
+			return 0;
+
+resume:
 		spin_lock(&x->lock);
 		if (nexthdr <= 0) {
 			if (nexthdr == -EBADMSG)
@@ -177,6 +193,12 @@ int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type)
 			break;
 		}
 
+		/*
+		 * We need the inner address.  However, we only get here for
+		 * transport mode so the outer address is identical.
+		 */
+		daddr = &x->id.daddr;
+
 		err = xfrm_parse_spi(skb, nexthdr, &spi, &seq);
 		if (err < 0)
 			goto drop;
@@ -190,7 +212,7 @@ int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type)
 		netif_rx(skb);
 		return 0;
 	} else {
-		return x->inner_mode->afinfo->transport_finish(skb, 0);
+		return x->inner_mode->afinfo->transport_finish(skb, async);
 	}
 
 drop_unlock:
@@ -201,6 +223,12 @@ drop:
 }
 EXPORT_SYMBOL(xfrm_input);
 
+int xfrm_input_resume(struct sk_buff *skb, int nexthdr)
+{
+	return xfrm_input(skb, nexthdr, 0, -1);
+}
+EXPORT_SYMBOL(xfrm_input_resume);
+
 void __init xfrm_input_init(void)
 {
 	secpath_cachep = kmem_cache_create("secpath_cache",
-- 
cgit v1.2.3


From 6e23ae2a48750bda407a4a58f52a4865d7308bf5 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Mon, 19 Nov 2007 18:53:30 -0800
Subject: [NETFILTER]: Introduce NF_INET_ hook values

The IPv4 and IPv6 hook values are identical, yet some code tries to figure
out the "correct" value by looking at the address family. Introduce NF_INET_*
values for both IPv4 and IPv6. The old values are kept in a #ifndef __KERNEL__
section for userspace compatibility.

Signed-off-by: Patrick McHardy <kaber@trash.net>
Acked-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/bridge/br_netfilter.c                      | 12 +++----
 net/compat.c                                   |  6 ++--
 net/ipv4/ip_forward.c                          |  2 +-
 net/ipv4/ip_input.c                            |  4 +--
 net/ipv4/ip_output.c                           | 12 +++----
 net/ipv4/ipmr.c                                |  2 +-
 net/ipv4/ipvs/ip_vs_core.c                     | 18 +++++------
 net/ipv4/ipvs/ip_vs_xmit.c                     |  2 +-
 net/ipv4/netfilter.c                           |  8 ++---
 net/ipv4/netfilter/ip_tables.c                 | 44 +++++++++++++-------------
 net/ipv4/netfilter/ipt_MASQUERADE.c            |  4 +--
 net/ipv4/netfilter/ipt_NETMAP.c                | 13 ++++----
 net/ipv4/netfilter/ipt_REDIRECT.c              |  8 ++---
 net/ipv4/netfilter/ipt_REJECT.c                |  6 ++--
 net/ipv4/netfilter/ipt_SAME.c                  |  7 ++--
 net/ipv4/netfilter/ipt_owner.c                 |  3 +-
 net/ipv4/netfilter/iptable_filter.c            | 22 +++++++------
 net/ipv4/netfilter/iptable_mangle.c            | 40 +++++++++++------------
 net/ipv4/netfilter/iptable_raw.c               | 14 ++++----
 net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c | 18 +++++------
 net/ipv4/netfilter/nf_conntrack_proto_icmp.c   |  2 +-
 net/ipv4/netfilter/nf_nat_core.c               | 14 ++++----
 net/ipv4/netfilter/nf_nat_h323.c               |  8 ++---
 net/ipv4/netfilter/nf_nat_helper.c             |  4 +--
 net/ipv4/netfilter/nf_nat_pptp.c               |  4 +--
 net/ipv4/netfilter/nf_nat_rule.c               | 28 ++++++++--------
 net/ipv4/netfilter/nf_nat_sip.c                |  4 +--
 net/ipv4/netfilter/nf_nat_standalone.c         | 14 ++++----
 net/ipv4/raw.c                                 |  2 +-
 net/ipv4/xfrm4_input.c                         |  2 +-
 net/ipv4/xfrm4_output.c                        |  4 +--
 net/ipv4/xfrm4_state.c                         |  2 +-
 net/ipv6/ip6_input.c                           |  6 ++--
 net/ipv6/ip6_output.c                          | 14 ++++----
 net/ipv6/mcast.c                               |  6 ++--
 net/ipv6/ndisc.c                               |  6 ++--
 net/ipv6/netfilter.c                           |  6 ++--
 net/ipv6/netfilter/ip6_tables.c                | 26 +++++++--------
 net/ipv6/netfilter/ip6t_REJECT.c               |  6 ++--
 net/ipv6/netfilter/ip6t_eui64.c                |  4 +--
 net/ipv6/netfilter/ip6t_owner.c                |  3 +-
 net/ipv6/netfilter/ip6table_filter.c           | 22 +++++++------
 net/ipv6/netfilter/ip6table_mangle.c           | 40 +++++++++++------------
 net/ipv6/netfilter/ip6table_raw.c              | 14 ++++----
 net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c | 12 +++----
 net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c |  2 +-
 net/ipv6/raw.c                                 |  2 +-
 net/ipv6/xfrm6_input.c                         |  2 +-
 net/ipv6/xfrm6_output.c                        |  2 +-
 net/ipv6/xfrm6_state.c                         |  2 +-
 net/netfilter/nf_conntrack_netlink.c           |  8 ++---
 net/netfilter/nf_conntrack_proto_tcp.c         |  4 +--
 net/netfilter/nf_conntrack_proto_udp.c         |  4 +--
 net/netfilter/nf_conntrack_proto_udplite.c     |  3 +-
 net/netfilter/xt_CLASSIFY.c                    | 12 +++----
 net/netfilter/xt_TCPMSS.c                      | 12 +++----
 net/netfilter/xt_mac.c                         | 12 +++----
 net/netfilter/xt_physdev.c                     |  6 ++--
 net/netfilter/xt_policy.c                      |  5 ++-
 net/netfilter/xt_realm.c                       |  4 +--
 net/sched/sch_ingress.c                        |  4 +--
 61 files changed, 296 insertions(+), 286 deletions(-)

(limited to 'net')

diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c
index 9f78a69d6b8..f9ef3e58b4c 100644
--- a/net/bridge/br_netfilter.c
+++ b/net/bridge/br_netfilter.c
@@ -511,7 +511,7 @@ static unsigned int br_nf_pre_routing_ipv6(unsigned int hook,
 	if (!setup_pre_routing(skb))
 		return NF_DROP;
 
-	NF_HOOK(PF_INET6, NF_IP6_PRE_ROUTING, skb, skb->dev, NULL,
+	NF_HOOK(PF_INET6, NF_INET_PRE_ROUTING, skb, skb->dev, NULL,
 		br_nf_pre_routing_finish_ipv6);
 
 	return NF_STOLEN;
@@ -584,7 +584,7 @@ static unsigned int br_nf_pre_routing(unsigned int hook, struct sk_buff *skb,
 		return NF_DROP;
 	store_orig_dstaddr(skb);
 
-	NF_HOOK(PF_INET, NF_IP_PRE_ROUTING, skb, skb->dev, NULL,
+	NF_HOOK(PF_INET, NF_INET_PRE_ROUTING, skb, skb->dev, NULL,
 		br_nf_pre_routing_finish);
 
 	return NF_STOLEN;
@@ -681,7 +681,7 @@ static unsigned int br_nf_forward_ip(unsigned int hook, struct sk_buff *skb,
 	nf_bridge->mask |= BRNF_BRIDGED;
 	nf_bridge->physoutdev = skb->dev;
 
-	NF_HOOK(pf, NF_IP_FORWARD, skb, bridge_parent(in), parent,
+	NF_HOOK(pf, NF_INET_FORWARD, skb, bridge_parent(in), parent,
 		br_nf_forward_finish);
 
 	return NF_STOLEN;
@@ -832,7 +832,7 @@ static unsigned int br_nf_post_routing(unsigned int hook, struct sk_buff *skb,
 	if (nf_bridge->netoutdev)
 		realoutdev = nf_bridge->netoutdev;
 #endif
-	NF_HOOK(pf, NF_IP_POST_ROUTING, skb, NULL, realoutdev,
+	NF_HOOK(pf, NF_INET_POST_ROUTING, skb, NULL, realoutdev,
 		br_nf_dev_queue_xmit);
 
 	return NF_STOLEN;
@@ -905,12 +905,12 @@ static struct nf_hook_ops br_nf_ops[] = {
 	{ .hook = ip_sabotage_in,
 	  .owner = THIS_MODULE,
 	  .pf = PF_INET,
-	  .hooknum = NF_IP_PRE_ROUTING,
+	  .hooknum = NF_INET_PRE_ROUTING,
 	  .priority = NF_IP_PRI_FIRST, },
 	{ .hook = ip_sabotage_in,
 	  .owner = THIS_MODULE,
 	  .pf = PF_INET6,
-	  .hooknum = NF_IP6_PRE_ROUTING,
+	  .hooknum = NF_INET_PRE_ROUTING,
 	  .priority = NF_IP6_PRI_FIRST, },
 };
 
diff --git a/net/compat.c b/net/compat.c
index 377e560ab5c..f4ef4c04865 100644
--- a/net/compat.c
+++ b/net/compat.c
@@ -325,8 +325,8 @@ struct compat_ipt_replace {
 	u32			valid_hooks;
 	u32			num_entries;
 	u32			size;
-	u32			hook_entry[NF_IP_NUMHOOKS];
-	u32			underflow[NF_IP_NUMHOOKS];
+	u32			hook_entry[NF_INET_NUMHOOKS];
+	u32			underflow[NF_INET_NUMHOOKS];
 	u32			num_counters;
 	compat_uptr_t		counters;	/* struct ipt_counters * */
 	struct ipt_entry	entries[0];
@@ -391,7 +391,7 @@ static int do_netfilter_replace(int fd, int level, int optname,
 			   origsize))
 		goto out;
 
-	for (i = 0; i < NF_IP_NUMHOOKS; i++) {
+	for (i = 0; i < NF_INET_NUMHOOKS; i++) {
 		if (__get_user(tmp32, &urepl->hook_entry[i]) ||
 		    __put_user(tmp32, &repl_nat->hook_entry[i]) ||
 		    __get_user(tmp32, &urepl->underflow[i]) ||
diff --git a/net/ipv4/ip_forward.c b/net/ipv4/ip_forward.c
index 877da3ed52e..0b3b328d82d 100644
--- a/net/ipv4/ip_forward.c
+++ b/net/ipv4/ip_forward.c
@@ -110,7 +110,7 @@ int ip_forward(struct sk_buff *skb)
 
 	skb->priority = rt_tos2priority(iph->tos);
 
-	return NF_HOOK(PF_INET, NF_IP_FORWARD, skb, skb->dev, rt->u.dst.dev,
+	return NF_HOOK(PF_INET, NF_INET_FORWARD, skb, skb->dev, rt->u.dst.dev,
 		       ip_forward_finish);
 
 sr_failed:
diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c
index 168c871fcd7..5b8a7603e60 100644
--- a/net/ipv4/ip_input.c
+++ b/net/ipv4/ip_input.c
@@ -268,7 +268,7 @@ int ip_local_deliver(struct sk_buff *skb)
 			return 0;
 	}
 
-	return NF_HOOK(PF_INET, NF_IP_LOCAL_IN, skb, skb->dev, NULL,
+	return NF_HOOK(PF_INET, NF_INET_LOCAL_IN, skb, skb->dev, NULL,
 		       ip_local_deliver_finish);
 }
 
@@ -442,7 +442,7 @@ int ip_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt,
 	/* Remove any debris in the socket control block */
 	memset(IPCB(skb), 0, sizeof(struct inet_skb_parm));
 
-	return NF_HOOK(PF_INET, NF_IP_PRE_ROUTING, skb, dev, NULL,
+	return NF_HOOK(PF_INET, NF_INET_PRE_ROUTING, skb, dev, NULL,
 		       ip_rcv_finish);
 
 inhdr_error:
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index 03b9b060027..6dd1d9c5d52 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -97,7 +97,7 @@ int __ip_local_out(struct sk_buff *skb)
 
 	iph->tot_len = htons(skb->len);
 	ip_send_check(iph);
-	return nf_hook(PF_INET, NF_IP_LOCAL_OUT, skb, NULL, skb->dst->dev,
+	return nf_hook(PF_INET, NF_INET_LOCAL_OUT, skb, NULL, skb->dst->dev,
 		       dst_output);
 }
 
@@ -270,8 +270,8 @@ int ip_mc_output(struct sk_buff *skb)
 		) {
 			struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC);
 			if (newskb)
-				NF_HOOK(PF_INET, NF_IP_POST_ROUTING, newskb, NULL,
-					newskb->dev,
+				NF_HOOK(PF_INET, NF_INET_POST_ROUTING, newskb,
+					NULL, newskb->dev,
 					ip_dev_loopback_xmit);
 		}
 
@@ -286,11 +286,11 @@ int ip_mc_output(struct sk_buff *skb)
 	if (rt->rt_flags&RTCF_BROADCAST) {
 		struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC);
 		if (newskb)
-			NF_HOOK(PF_INET, NF_IP_POST_ROUTING, newskb, NULL,
+			NF_HOOK(PF_INET, NF_INET_POST_ROUTING, newskb, NULL,
 				newskb->dev, ip_dev_loopback_xmit);
 	}
 
-	return NF_HOOK_COND(PF_INET, NF_IP_POST_ROUTING, skb, NULL, skb->dev,
+	return NF_HOOK_COND(PF_INET, NF_INET_POST_ROUTING, skb, NULL, skb->dev,
 			    ip_finish_output,
 			    !(IPCB(skb)->flags & IPSKB_REROUTED));
 }
@@ -304,7 +304,7 @@ int ip_output(struct sk_buff *skb)
 	skb->dev = dev;
 	skb->protocol = htons(ETH_P_IP);
 
-	return NF_HOOK_COND(PF_INET, NF_IP_POST_ROUTING, skb, NULL, dev,
+	return NF_HOOK_COND(PF_INET, NF_INET_POST_ROUTING, skb, NULL, dev,
 			    ip_finish_output,
 			    !(IPCB(skb)->flags & IPSKB_REROUTED));
 }
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index ba6c23cdf47..8e5d47a6060 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -1245,7 +1245,7 @@ static void ipmr_queue_xmit(struct sk_buff *skb, struct mfc_cache *c, int vifi)
 	 * not mrouter) cannot join to more than one interface - it will
 	 * result in receiving multiple packets.
 	 */
-	NF_HOOK(PF_INET, NF_IP_FORWARD, skb, skb->dev, dev,
+	NF_HOOK(PF_INET, NF_INET_FORWARD, skb, skb->dev, dev,
 		ipmr_forward_finish);
 	return;
 
diff --git a/net/ipv4/ipvs/ip_vs_core.c b/net/ipv4/ipvs/ip_vs_core.c
index 8fba20256f5..30e8f757152 100644
--- a/net/ipv4/ipvs/ip_vs_core.c
+++ b/net/ipv4/ipvs/ip_vs_core.c
@@ -481,7 +481,7 @@ int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb,
 
 
 /*
- *      It is hooked before NF_IP_PRI_NAT_SRC at the NF_IP_POST_ROUTING
+ *      It is hooked before NF_IP_PRI_NAT_SRC at the NF_INET_POST_ROUTING
  *      chain, and is used for VS/NAT.
  *      It detects packets for VS/NAT connections and sends the packets
  *      immediately. This can avoid that iptable_nat mangles the packets
@@ -679,7 +679,7 @@ static inline int is_tcp_reset(const struct sk_buff *skb)
 }
 
 /*
- *	It is hooked at the NF_IP_FORWARD chain, used only for VS/NAT.
+ *	It is hooked at the NF_INET_FORWARD chain, used only for VS/NAT.
  *	Check if outgoing packet belongs to the established ip_vs_conn,
  *      rewrite addresses of the packet and send it on its way...
  */
@@ -814,7 +814,7 @@ ip_vs_in_icmp(struct sk_buff *skb, int *related, unsigned int hooknum)
 
 	/* reassemble IP fragments */
 	if (ip_hdr(skb)->frag_off & htons(IP_MF | IP_OFFSET)) {
-		if (ip_vs_gather_frags(skb, hooknum == NF_IP_LOCAL_IN ?
+		if (ip_vs_gather_frags(skb, hooknum == NF_INET_LOCAL_IN ?
 					    IP_DEFRAG_VS_IN : IP_DEFRAG_VS_FWD))
 			return NF_STOLEN;
 	}
@@ -1003,12 +1003,12 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb,
 
 
 /*
- *	It is hooked at the NF_IP_FORWARD chain, in order to catch ICMP
+ *	It is hooked at the NF_INET_FORWARD chain, in order to catch ICMP
  *      related packets destined for 0.0.0.0/0.
  *      When fwmark-based virtual service is used, such as transparent
  *      cache cluster, TCP packets can be marked and routed to ip_vs_in,
  *      but ICMP destined for 0.0.0.0/0 cannot not be easily marked and
- *      sent to ip_vs_in_icmp. So, catch them at the NF_IP_FORWARD chain
+ *      sent to ip_vs_in_icmp. So, catch them at the NF_INET_FORWARD chain
  *      and send them to ip_vs_in_icmp.
  */
 static unsigned int
@@ -1032,7 +1032,7 @@ static struct nf_hook_ops ip_vs_in_ops = {
 	.hook		= ip_vs_in,
 	.owner		= THIS_MODULE,
 	.pf		= PF_INET,
-	.hooknum        = NF_IP_LOCAL_IN,
+	.hooknum        = NF_INET_LOCAL_IN,
 	.priority       = 100,
 };
 
@@ -1041,7 +1041,7 @@ static struct nf_hook_ops ip_vs_out_ops = {
 	.hook		= ip_vs_out,
 	.owner		= THIS_MODULE,
 	.pf		= PF_INET,
-	.hooknum        = NF_IP_FORWARD,
+	.hooknum        = NF_INET_FORWARD,
 	.priority       = 100,
 };
 
@@ -1051,7 +1051,7 @@ static struct nf_hook_ops ip_vs_forward_icmp_ops = {
 	.hook		= ip_vs_forward_icmp,
 	.owner		= THIS_MODULE,
 	.pf		= PF_INET,
-	.hooknum        = NF_IP_FORWARD,
+	.hooknum        = NF_INET_FORWARD,
 	.priority       = 99,
 };
 
@@ -1060,7 +1060,7 @@ static struct nf_hook_ops ip_vs_post_routing_ops = {
 	.hook		= ip_vs_post_routing,
 	.owner		= THIS_MODULE,
 	.pf		= PF_INET,
-	.hooknum        = NF_IP_POST_ROUTING,
+	.hooknum        = NF_INET_POST_ROUTING,
 	.priority       = NF_IP_PRI_NAT_SRC-1,
 };
 
diff --git a/net/ipv4/ipvs/ip_vs_xmit.c b/net/ipv4/ipvs/ip_vs_xmit.c
index 66775ad9e32..1e96bf82a0b 100644
--- a/net/ipv4/ipvs/ip_vs_xmit.c
+++ b/net/ipv4/ipvs/ip_vs_xmit.c
@@ -129,7 +129,7 @@ ip_vs_dst_reset(struct ip_vs_dest *dest)
 do {							\
 	(skb)->ipvs_property = 1;			\
 	skb_forward_csum(skb);				\
-	NF_HOOK(PF_INET, NF_IP_LOCAL_OUT, (skb), NULL,	\
+	NF_HOOK(PF_INET, NF_INET_LOCAL_OUT, (skb), NULL,	\
 		(rt)->u.dst.dev, dst_output);		\
 } while (0)
 
diff --git a/net/ipv4/netfilter.c b/net/ipv4/netfilter.c
index 5539debf497..d9022467e08 100644
--- a/net/ipv4/netfilter.c
+++ b/net/ipv4/netfilter.c
@@ -23,7 +23,7 @@ int ip_route_me_harder(struct sk_buff *skb, unsigned addr_type)
 		addr_type = type;
 
 	/* some non-standard hacks like ipt_REJECT.c:send_reset() can cause
-	 * packets with foreign saddr to appear on the NF_IP_LOCAL_OUT hook.
+	 * packets with foreign saddr to appear on the NF_INET_LOCAL_OUT hook.
 	 */
 	if (addr_type == RTN_LOCAL) {
 		fl.nl_u.ip4_u.daddr = iph->daddr;
@@ -126,7 +126,7 @@ static void nf_ip_saveroute(const struct sk_buff *skb, struct nf_info *info)
 {
 	struct ip_rt_info *rt_info = nf_info_reroute(info);
 
-	if (info->hook == NF_IP_LOCAL_OUT) {
+	if (info->hook == NF_INET_LOCAL_OUT) {
 		const struct iphdr *iph = ip_hdr(skb);
 
 		rt_info->tos = iph->tos;
@@ -139,7 +139,7 @@ static int nf_ip_reroute(struct sk_buff *skb, const struct nf_info *info)
 {
 	const struct ip_rt_info *rt_info = nf_info_reroute(info);
 
-	if (info->hook == NF_IP_LOCAL_OUT) {
+	if (info->hook == NF_INET_LOCAL_OUT) {
 		const struct iphdr *iph = ip_hdr(skb);
 
 		if (!(iph->tos == rt_info->tos
@@ -158,7 +158,7 @@ __sum16 nf_ip_checksum(struct sk_buff *skb, unsigned int hook,
 
 	switch (skb->ip_summed) {
 	case CHECKSUM_COMPLETE:
-		if (hook != NF_IP_PRE_ROUTING && hook != NF_IP_LOCAL_IN)
+		if (hook != NF_INET_PRE_ROUTING && hook != NF_INET_LOCAL_IN)
 			break;
 		if ((protocol == 0 && !csum_fold(skb->csum)) ||
 		    !csum_tcpudp_magic(iph->saddr, iph->daddr,
diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c
index b9b189c2620..ca23c63ced3 100644
--- a/net/ipv4/netfilter/ip_tables.c
+++ b/net/ipv4/netfilter/ip_tables.c
@@ -220,11 +220,11 @@ unconditional(const struct ipt_ip *ip)
 #if defined(CONFIG_NETFILTER_XT_TARGET_TRACE) || \
     defined(CONFIG_NETFILTER_XT_TARGET_TRACE_MODULE)
 static const char *hooknames[] = {
-	[NF_IP_PRE_ROUTING]		= "PREROUTING",
-	[NF_IP_LOCAL_IN]		= "INPUT",
-	[NF_IP_FORWARD]			= "FORWARD",
-	[NF_IP_LOCAL_OUT]		= "OUTPUT",
-	[NF_IP_POST_ROUTING]		= "POSTROUTING",
+	[NF_INET_PRE_ROUTING]		= "PREROUTING",
+	[NF_INET_LOCAL_IN]		= "INPUT",
+	[NF_INET_FORWARD]			= "FORWARD",
+	[NF_INET_LOCAL_OUT]		= "OUTPUT",
+	[NF_INET_POST_ROUTING]		= "POSTROUTING",
 };
 
 enum nf_ip_trace_comments {
@@ -465,7 +465,7 @@ mark_source_chains(struct xt_table_info *newinfo,
 
 	/* No recursion; use packet counter to save back ptrs (reset
 	   to 0 as we leave), and comefrom to save source hook bitmask */
-	for (hook = 0; hook < NF_IP_NUMHOOKS; hook++) {
+	for (hook = 0; hook < NF_INET_NUMHOOKS; hook++) {
 		unsigned int pos = newinfo->hook_entry[hook];
 		struct ipt_entry *e
 			= (struct ipt_entry *)(entry0 + pos);
@@ -481,13 +481,13 @@ mark_source_chains(struct xt_table_info *newinfo,
 				= (void *)ipt_get_target(e);
 			int visited = e->comefrom & (1 << hook);
 
-			if (e->comefrom & (1 << NF_IP_NUMHOOKS)) {
+			if (e->comefrom & (1 << NF_INET_NUMHOOKS)) {
 				printk("iptables: loop hook %u pos %u %08X.\n",
 				       hook, pos, e->comefrom);
 				return 0;
 			}
 			e->comefrom
-				|= ((1 << hook) | (1 << NF_IP_NUMHOOKS));
+				|= ((1 << hook) | (1 << NF_INET_NUMHOOKS));
 
 			/* Unconditional return/END. */
 			if ((e->target_offset == sizeof(struct ipt_entry)
@@ -507,10 +507,10 @@ mark_source_chains(struct xt_table_info *newinfo,
 				/* Return: backtrack through the last
 				   big jump. */
 				do {
-					e->comefrom ^= (1<<NF_IP_NUMHOOKS);
+					e->comefrom ^= (1<<NF_INET_NUMHOOKS);
 #ifdef DEBUG_IP_FIREWALL_USER
 					if (e->comefrom
-					    & (1 << NF_IP_NUMHOOKS)) {
+					    & (1 << NF_INET_NUMHOOKS)) {
 						duprintf("Back unset "
 							 "on hook %u "
 							 "rule %u\n",
@@ -741,7 +741,7 @@ check_entry_size_and_hooks(struct ipt_entry *e,
 	}
 
 	/* Check hooks & underflows */
-	for (h = 0; h < NF_IP_NUMHOOKS; h++) {
+	for (h = 0; h < NF_INET_NUMHOOKS; h++) {
 		if ((unsigned char *)e - base == hook_entries[h])
 			newinfo->hook_entry[h] = hook_entries[h];
 		if ((unsigned char *)e - base == underflows[h])
@@ -795,7 +795,7 @@ translate_table(const char *name,
 	newinfo->number = number;
 
 	/* Init all hooks to impossible value. */
-	for (i = 0; i < NF_IP_NUMHOOKS; i++) {
+	for (i = 0; i < NF_INET_NUMHOOKS; i++) {
 		newinfo->hook_entry[i] = 0xFFFFFFFF;
 		newinfo->underflow[i] = 0xFFFFFFFF;
 	}
@@ -819,7 +819,7 @@ translate_table(const char *name,
 	}
 
 	/* Check hooks all assigned */
-	for (i = 0; i < NF_IP_NUMHOOKS; i++) {
+	for (i = 0; i < NF_INET_NUMHOOKS; i++) {
 		/* Only hooks which are valid */
 		if (!(valid_hooks & (1 << i)))
 			continue;
@@ -1107,7 +1107,7 @@ static int compat_calc_entry(struct ipt_entry *e, struct xt_table_info *info,
 	if (ret)
 		return ret;
 
-	for (i = 0; i< NF_IP_NUMHOOKS; i++) {
+	for (i = 0; i < NF_INET_NUMHOOKS; i++) {
 		if (info->hook_entry[i] && (e < (struct ipt_entry *)
 				(base + info->hook_entry[i])))
 			newinfo->hook_entry[i] -= off;
@@ -1130,7 +1130,7 @@ static int compat_table_info(struct xt_table_info *info,
 	memset(newinfo, 0, sizeof(struct xt_table_info));
 	newinfo->size = info->size;
 	newinfo->number = info->number;
-	for (i = 0; i < NF_IP_NUMHOOKS; i++) {
+	for (i = 0; i < NF_INET_NUMHOOKS; i++) {
 		newinfo->hook_entry[i] = info->hook_entry[i];
 		newinfo->underflow[i] = info->underflow[i];
 	}
@@ -1479,8 +1479,8 @@ struct compat_ipt_replace {
 	u32			valid_hooks;
 	u32			num_entries;
 	u32			size;
-	u32			hook_entry[NF_IP_NUMHOOKS];
-	u32			underflow[NF_IP_NUMHOOKS];
+	u32			hook_entry[NF_INET_NUMHOOKS];
+	u32			underflow[NF_INET_NUMHOOKS];
 	u32			num_counters;
 	compat_uptr_t		counters;	/* struct ipt_counters * */
 	struct compat_ipt_entry	entries[0];
@@ -1645,7 +1645,7 @@ check_compat_entry_size_and_hooks(struct ipt_entry *e,
 		goto out;
 
 	/* Check hooks & underflows */
-	for (h = 0; h < NF_IP_NUMHOOKS; h++) {
+	for (h = 0; h < NF_INET_NUMHOOKS; h++) {
 		if ((unsigned char *)e - base == hook_entries[h])
 			newinfo->hook_entry[h] = hook_entries[h];
 		if ((unsigned char *)e - base == underflows[h])
@@ -1700,7 +1700,7 @@ static int compat_copy_entry_from_user(struct ipt_entry *e, void **dstptr,
 	xt_compat_target_from_user(t, dstptr, size);
 
 	de->next_offset = e->next_offset - (origsize - *size);
-	for (h = 0; h < NF_IP_NUMHOOKS; h++) {
+	for (h = 0; h < NF_INET_NUMHOOKS; h++) {
 		if ((unsigned char *)de - base < newinfo->hook_entry[h])
 			newinfo->hook_entry[h] -= origsize - *size;
 		if ((unsigned char *)de - base < newinfo->underflow[h])
@@ -1753,7 +1753,7 @@ translate_compat_table(const char *name,
 	info->number = number;
 
 	/* Init all hooks to impossible value. */
-	for (i = 0; i < NF_IP_NUMHOOKS; i++) {
+	for (i = 0; i < NF_INET_NUMHOOKS; i++) {
 		info->hook_entry[i] = 0xFFFFFFFF;
 		info->underflow[i] = 0xFFFFFFFF;
 	}
@@ -1778,7 +1778,7 @@ translate_compat_table(const char *name,
 	}
 
 	/* Check hooks all assigned */
-	for (i = 0; i < NF_IP_NUMHOOKS; i++) {
+	for (i = 0; i < NF_INET_NUMHOOKS; i++) {
 		/* Only hooks which are valid */
 		if (!(valid_hooks & (1 << i)))
 			continue;
@@ -1800,7 +1800,7 @@ translate_compat_table(const char *name,
 		goto out_unlock;
 
 	newinfo->number = number;
-	for (i = 0; i < NF_IP_NUMHOOKS; i++) {
+	for (i = 0; i < NF_INET_NUMHOOKS; i++) {
 		newinfo->hook_entry[i] = info->hook_entry[i];
 		newinfo->underflow[i] = info->underflow[i];
 	}
diff --git a/net/ipv4/netfilter/ipt_MASQUERADE.c b/net/ipv4/netfilter/ipt_MASQUERADE.c
index 44b516e7cb7..5a18997bb3d 100644
--- a/net/ipv4/netfilter/ipt_MASQUERADE.c
+++ b/net/ipv4/netfilter/ipt_MASQUERADE.c
@@ -67,7 +67,7 @@ masquerade_target(struct sk_buff *skb,
 	const struct rtable *rt;
 	__be32 newsrc;
 
-	NF_CT_ASSERT(hooknum == NF_IP_POST_ROUTING);
+	NF_CT_ASSERT(hooknum == NF_INET_POST_ROUTING);
 
 	ct = nf_ct_get(skb, &ctinfo);
 	nat = nfct_nat(ct);
@@ -172,7 +172,7 @@ static struct xt_target masquerade __read_mostly = {
 	.target		= masquerade_target,
 	.targetsize	= sizeof(struct nf_nat_multi_range_compat),
 	.table		= "nat",
-	.hooks		= 1 << NF_IP_POST_ROUTING,
+	.hooks		= 1 << NF_INET_POST_ROUTING,
 	.checkentry	= masquerade_check,
 	.me		= THIS_MODULE,
 };
diff --git a/net/ipv4/netfilter/ipt_NETMAP.c b/net/ipv4/netfilter/ipt_NETMAP.c
index f8699291e33..973bbee7ee1 100644
--- a/net/ipv4/netfilter/ipt_NETMAP.c
+++ b/net/ipv4/netfilter/ipt_NETMAP.c
@@ -56,14 +56,14 @@ target(struct sk_buff *skb,
 	const struct nf_nat_multi_range_compat *mr = targinfo;
 	struct nf_nat_range newrange;
 
-	NF_CT_ASSERT(hooknum == NF_IP_PRE_ROUTING
-		     || hooknum == NF_IP_POST_ROUTING
-		     || hooknum == NF_IP_LOCAL_OUT);
+	NF_CT_ASSERT(hooknum == NF_INET_PRE_ROUTING
+		     || hooknum == NF_INET_POST_ROUTING
+		     || hooknum == NF_INET_LOCAL_OUT);
 	ct = nf_ct_get(skb, &ctinfo);
 
 	netmask = ~(mr->range[0].min_ip ^ mr->range[0].max_ip);
 
-	if (hooknum == NF_IP_PRE_ROUTING || hooknum == NF_IP_LOCAL_OUT)
+	if (hooknum == NF_INET_PRE_ROUTING || hooknum == NF_INET_LOCAL_OUT)
 		new_ip = ip_hdr(skb)->daddr & ~netmask;
 	else
 		new_ip = ip_hdr(skb)->saddr & ~netmask;
@@ -84,8 +84,9 @@ static struct xt_target target_module __read_mostly = {
 	.target 	= target,
 	.targetsize	= sizeof(struct nf_nat_multi_range_compat),
 	.table		= "nat",
-	.hooks		= (1 << NF_IP_PRE_ROUTING) | (1 << NF_IP_POST_ROUTING) |
-			  (1 << NF_IP_LOCAL_OUT),
+	.hooks		= (1 << NF_INET_PRE_ROUTING) |
+			  (1 << NF_INET_POST_ROUTING) |
+			  (1 << NF_INET_LOCAL_OUT),
 	.checkentry 	= check,
 	.me 		= THIS_MODULE
 };
diff --git a/net/ipv4/netfilter/ipt_REDIRECT.c b/net/ipv4/netfilter/ipt_REDIRECT.c
index f7cf7d61a2d..4757af293ba 100644
--- a/net/ipv4/netfilter/ipt_REDIRECT.c
+++ b/net/ipv4/netfilter/ipt_REDIRECT.c
@@ -60,14 +60,14 @@ redirect_target(struct sk_buff *skb,
 	const struct nf_nat_multi_range_compat *mr = targinfo;
 	struct nf_nat_range newrange;
 
-	NF_CT_ASSERT(hooknum == NF_IP_PRE_ROUTING
-		     || hooknum == NF_IP_LOCAL_OUT);
+	NF_CT_ASSERT(hooknum == NF_INET_PRE_ROUTING
+		     || hooknum == NF_INET_LOCAL_OUT);
 
 	ct = nf_ct_get(skb, &ctinfo);
 	NF_CT_ASSERT(ct && (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED));
 
 	/* Local packets: make them go to loopback */
-	if (hooknum == NF_IP_LOCAL_OUT)
+	if (hooknum == NF_INET_LOCAL_OUT)
 		newdst = htonl(0x7F000001);
 	else {
 		struct in_device *indev;
@@ -101,7 +101,7 @@ static struct xt_target redirect_reg __read_mostly = {
 	.target		= redirect_target,
 	.targetsize	= sizeof(struct nf_nat_multi_range_compat),
 	.table		= "nat",
-	.hooks		= (1 << NF_IP_PRE_ROUTING) | (1 << NF_IP_LOCAL_OUT),
+	.hooks		= (1 << NF_INET_PRE_ROUTING) | (1 << NF_INET_LOCAL_OUT),
 	.checkentry	= redirect_check,
 	.me		= THIS_MODULE,
 };
diff --git a/net/ipv4/netfilter/ipt_REJECT.c b/net/ipv4/netfilter/ipt_REJECT.c
index ccb2a03dcd5..d55b262bf60 100644
--- a/net/ipv4/netfilter/ipt_REJECT.c
+++ b/net/ipv4/netfilter/ipt_REJECT.c
@@ -123,7 +123,7 @@ static void send_reset(struct sk_buff *oldskb, int hook)
 	niph->id = 0;
 
 	addr_type = RTN_UNSPEC;
-	if (hook != NF_IP_FORWARD
+	if (hook != NF_INET_FORWARD
 #ifdef CONFIG_BRIDGE_NETFILTER
 	    || (nskb->nf_bridge && nskb->nf_bridge->mask & BRNF_BRIDGED)
 #endif
@@ -234,8 +234,8 @@ static struct xt_target ipt_reject_reg __read_mostly = {
 	.target		= reject,
 	.targetsize	= sizeof(struct ipt_reject_info),
 	.table		= "filter",
-	.hooks		= (1 << NF_IP_LOCAL_IN) | (1 << NF_IP_FORWARD) |
-			  (1 << NF_IP_LOCAL_OUT),
+	.hooks		= (1 << NF_INET_LOCAL_IN) | (1 << NF_INET_FORWARD) |
+			  (1 << NF_INET_LOCAL_OUT),
 	.checkentry	= check,
 	.me		= THIS_MODULE,
 };
diff --git a/net/ipv4/netfilter/ipt_SAME.c b/net/ipv4/netfilter/ipt_SAME.c
index 8988571436b..f2f62b5ce9a 100644
--- a/net/ipv4/netfilter/ipt_SAME.c
+++ b/net/ipv4/netfilter/ipt_SAME.c
@@ -119,8 +119,8 @@ same_target(struct sk_buff *skb,
 	struct nf_nat_range newrange;
 	const struct nf_conntrack_tuple *t;
 
-	NF_CT_ASSERT(hooknum == NF_IP_PRE_ROUTING ||
-			hooknum == NF_IP_POST_ROUTING);
+	NF_CT_ASSERT(hooknum == NF_INET_PRE_ROUTING ||
+			hooknum == NF_INET_POST_ROUTING);
 	ct = nf_ct_get(skb, &ctinfo);
 
 	t = &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple;
@@ -158,7 +158,8 @@ static struct xt_target same_reg __read_mostly = {
 	.target		= same_target,
 	.targetsize	= sizeof(struct ipt_same_info),
 	.table		= "nat",
-	.hooks		= (1 << NF_IP_PRE_ROUTING | 1 << NF_IP_POST_ROUTING),
+	.hooks		= (1 << NF_INET_PRE_ROUTING) |
+			  (1 << NF_INET_POST_ROUTING),
 	.checkentry	= same_check,
 	.destroy	= same_destroy,
 	.me		= THIS_MODULE,
diff --git a/net/ipv4/netfilter/ipt_owner.c b/net/ipv4/netfilter/ipt_owner.c
index b14e77da7a3..6bc4bfea66d 100644
--- a/net/ipv4/netfilter/ipt_owner.c
+++ b/net/ipv4/netfilter/ipt_owner.c
@@ -73,7 +73,8 @@ static struct xt_match owner_match __read_mostly = {
 	.family		= AF_INET,
 	.match		= match,
 	.matchsize	= sizeof(struct ipt_owner_info),
-	.hooks		= (1 << NF_IP_LOCAL_OUT) | (1 << NF_IP_POST_ROUTING),
+	.hooks		= (1 << NF_INET_LOCAL_OUT) |
+			  (1 << NF_INET_POST_ROUTING),
 	.checkentry	= checkentry,
 	.me		= THIS_MODULE,
 };
diff --git a/net/ipv4/netfilter/iptable_filter.c b/net/ipv4/netfilter/iptable_filter.c
index ba3262c6043..06ab64e30e8 100644
--- a/net/ipv4/netfilter/iptable_filter.c
+++ b/net/ipv4/netfilter/iptable_filter.c
@@ -19,7 +19,9 @@ MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>");
 MODULE_DESCRIPTION("iptables filter table");
 
-#define FILTER_VALID_HOOKS ((1 << NF_IP_LOCAL_IN) | (1 << NF_IP_FORWARD) | (1 << NF_IP_LOCAL_OUT))
+#define FILTER_VALID_HOOKS ((1 << NF_INET_LOCAL_IN) | \
+			    (1 << NF_INET_FORWARD) | \
+			    (1 << NF_INET_LOCAL_OUT))
 
 static struct
 {
@@ -33,14 +35,14 @@ static struct
 		.num_entries = 4,
 		.size = sizeof(struct ipt_standard) * 3 + sizeof(struct ipt_error),
 		.hook_entry = {
-			[NF_IP_LOCAL_IN] = 0,
-			[NF_IP_FORWARD] = sizeof(struct ipt_standard),
-			[NF_IP_LOCAL_OUT] = sizeof(struct ipt_standard) * 2,
+			[NF_INET_LOCAL_IN] = 0,
+			[NF_INET_FORWARD] = sizeof(struct ipt_standard),
+			[NF_INET_LOCAL_OUT] = sizeof(struct ipt_standard) * 2,
 		},
 		.underflow = {
-			[NF_IP_LOCAL_IN] = 0,
-			[NF_IP_FORWARD] = sizeof(struct ipt_standard),
-			[NF_IP_LOCAL_OUT] = sizeof(struct ipt_standard) * 2,
+			[NF_INET_LOCAL_IN] = 0,
+			[NF_INET_FORWARD] = sizeof(struct ipt_standard),
+			[NF_INET_LOCAL_OUT] = sizeof(struct ipt_standard) * 2,
 		},
 	},
 	.entries = {
@@ -94,21 +96,21 @@ static struct nf_hook_ops ipt_ops[] = {
 		.hook		= ipt_hook,
 		.owner		= THIS_MODULE,
 		.pf		= PF_INET,
-		.hooknum	= NF_IP_LOCAL_IN,
+		.hooknum	= NF_INET_LOCAL_IN,
 		.priority	= NF_IP_PRI_FILTER,
 	},
 	{
 		.hook		= ipt_hook,
 		.owner		= THIS_MODULE,
 		.pf		= PF_INET,
-		.hooknum	= NF_IP_FORWARD,
+		.hooknum	= NF_INET_FORWARD,
 		.priority	= NF_IP_PRI_FILTER,
 	},
 	{
 		.hook		= ipt_local_out_hook,
 		.owner		= THIS_MODULE,
 		.pf		= PF_INET,
-		.hooknum	= NF_IP_LOCAL_OUT,
+		.hooknum	= NF_INET_LOCAL_OUT,
 		.priority	= NF_IP_PRI_FILTER,
 	},
 };
diff --git a/net/ipv4/netfilter/iptable_mangle.c b/net/ipv4/netfilter/iptable_mangle.c
index b4360a69d5c..0335827d3e4 100644
--- a/net/ipv4/netfilter/iptable_mangle.c
+++ b/net/ipv4/netfilter/iptable_mangle.c
@@ -21,11 +21,11 @@ MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>");
 MODULE_DESCRIPTION("iptables mangle table");
 
-#define MANGLE_VALID_HOOKS ((1 << NF_IP_PRE_ROUTING) | \
-			    (1 << NF_IP_LOCAL_IN) | \
-			    (1 << NF_IP_FORWARD) | \
-			    (1 << NF_IP_LOCAL_OUT) | \
-			    (1 << NF_IP_POST_ROUTING))
+#define MANGLE_VALID_HOOKS ((1 << NF_INET_PRE_ROUTING) | \
+			    (1 << NF_INET_LOCAL_IN) | \
+			    (1 << NF_INET_FORWARD) | \
+			    (1 << NF_INET_LOCAL_OUT) | \
+			    (1 << NF_INET_POST_ROUTING))
 
 /* Ouch - five different hooks? Maybe this should be a config option..... -- BC */
 static struct
@@ -40,18 +40,18 @@ static struct
 		.num_entries = 6,
 		.size = sizeof(struct ipt_standard) * 5 + sizeof(struct ipt_error),
 		.hook_entry = {
-			[NF_IP_PRE_ROUTING] 	= 0,
-			[NF_IP_LOCAL_IN] 	= sizeof(struct ipt_standard),
-			[NF_IP_FORWARD] 	= sizeof(struct ipt_standard) * 2,
-			[NF_IP_LOCAL_OUT] 	= sizeof(struct ipt_standard) * 3,
-			[NF_IP_POST_ROUTING] 	= sizeof(struct ipt_standard) * 4,
+			[NF_INET_PRE_ROUTING] 	= 0,
+			[NF_INET_LOCAL_IN] 	= sizeof(struct ipt_standard),
+			[NF_INET_FORWARD] 	= sizeof(struct ipt_standard) * 2,
+			[NF_INET_LOCAL_OUT] 	= sizeof(struct ipt_standard) * 3,
+			[NF_INET_POST_ROUTING] 	= sizeof(struct ipt_standard) * 4,
 		},
 		.underflow = {
-			[NF_IP_PRE_ROUTING] 	= 0,
-			[NF_IP_LOCAL_IN] 	= sizeof(struct ipt_standard),
-			[NF_IP_FORWARD] 	= sizeof(struct ipt_standard) * 2,
-			[NF_IP_LOCAL_OUT] 	= sizeof(struct ipt_standard) * 3,
-			[NF_IP_POST_ROUTING]	= sizeof(struct ipt_standard) * 4,
+			[NF_INET_PRE_ROUTING] 	= 0,
+			[NF_INET_LOCAL_IN] 	= sizeof(struct ipt_standard),
+			[NF_INET_FORWARD] 	= sizeof(struct ipt_standard) * 2,
+			[NF_INET_LOCAL_OUT] 	= sizeof(struct ipt_standard) * 3,
+			[NF_INET_POST_ROUTING]	= sizeof(struct ipt_standard) * 4,
 		},
 	},
 	.entries = {
@@ -133,35 +133,35 @@ static struct nf_hook_ops ipt_ops[] = {
 		.hook		= ipt_route_hook,
 		.owner		= THIS_MODULE,
 		.pf		= PF_INET,
-		.hooknum	= NF_IP_PRE_ROUTING,
+		.hooknum	= NF_INET_PRE_ROUTING,
 		.priority	= NF_IP_PRI_MANGLE,
 	},
 	{
 		.hook		= ipt_route_hook,
 		.owner		= THIS_MODULE,
 		.pf		= PF_INET,
-		.hooknum	= NF_IP_LOCAL_IN,
+		.hooknum	= NF_INET_LOCAL_IN,
 		.priority	= NF_IP_PRI_MANGLE,
 	},
 	{
 		.hook		= ipt_route_hook,
 		.owner		= THIS_MODULE,
 		.pf		= PF_INET,
-		.hooknum	= NF_IP_FORWARD,
+		.hooknum	= NF_INET_FORWARD,
 		.priority	= NF_IP_PRI_MANGLE,
 	},
 	{
 		.hook		= ipt_local_hook,
 		.owner		= THIS_MODULE,
 		.pf		= PF_INET,
-		.hooknum	= NF_IP_LOCAL_OUT,
+		.hooknum	= NF_INET_LOCAL_OUT,
 		.priority	= NF_IP_PRI_MANGLE,
 	},
 	{
 		.hook		= ipt_route_hook,
 		.owner		= THIS_MODULE,
 		.pf		= PF_INET,
-		.hooknum	= NF_IP_POST_ROUTING,
+		.hooknum	= NF_INET_POST_ROUTING,
 		.priority	= NF_IP_PRI_MANGLE,
 	},
 };
diff --git a/net/ipv4/netfilter/iptable_raw.c b/net/ipv4/netfilter/iptable_raw.c
index f8678651250..66be2329559 100644
--- a/net/ipv4/netfilter/iptable_raw.c
+++ b/net/ipv4/netfilter/iptable_raw.c
@@ -7,7 +7,7 @@
 #include <linux/netfilter_ipv4/ip_tables.h>
 #include <net/ip.h>
 
-#define RAW_VALID_HOOKS ((1 << NF_IP_PRE_ROUTING) | (1 << NF_IP_LOCAL_OUT))
+#define RAW_VALID_HOOKS ((1 << NF_INET_PRE_ROUTING) | (1 << NF_INET_LOCAL_OUT))
 
 static struct
 {
@@ -21,12 +21,12 @@ static struct
 		.num_entries = 3,
 		.size = sizeof(struct ipt_standard) * 2 + sizeof(struct ipt_error),
 		.hook_entry = {
-			[NF_IP_PRE_ROUTING] = 0,
-			[NF_IP_LOCAL_OUT] = sizeof(struct ipt_standard)
+			[NF_INET_PRE_ROUTING] = 0,
+			[NF_INET_LOCAL_OUT] = sizeof(struct ipt_standard)
 		},
 		.underflow = {
-			[NF_IP_PRE_ROUTING] = 0,
-			[NF_IP_LOCAL_OUT]  = sizeof(struct ipt_standard)
+			[NF_INET_PRE_ROUTING] = 0,
+			[NF_INET_LOCAL_OUT]  = sizeof(struct ipt_standard)
 		},
 	},
 	.entries = {
@@ -78,14 +78,14 @@ static struct nf_hook_ops ipt_ops[] = {
 	{
 		.hook = ipt_hook,
 		.pf = PF_INET,
-		.hooknum = NF_IP_PRE_ROUTING,
+		.hooknum = NF_INET_PRE_ROUTING,
 		.priority = NF_IP_PRI_RAW,
 		.owner = THIS_MODULE,
 	},
 	{
 		.hook = ipt_local_hook,
 		.pf = PF_INET,
-		.hooknum = NF_IP_LOCAL_OUT,
+		.hooknum = NF_INET_LOCAL_OUT,
 		.priority = NF_IP_PRI_RAW,
 		.owner = THIS_MODULE,
 	},
diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
index 910dae732a0..c91725a8578 100644
--- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
+++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
@@ -150,7 +150,7 @@ static unsigned int ipv4_conntrack_defrag(unsigned int hooknum,
 	/* Gather fragments. */
 	if (ip_hdr(skb)->frag_off & htons(IP_MF | IP_OFFSET)) {
 		if (nf_ct_ipv4_gather_frags(skb,
-					    hooknum == NF_IP_PRE_ROUTING ?
+					    hooknum == NF_INET_PRE_ROUTING ?
 					    IP_DEFRAG_CONNTRACK_IN :
 					    IP_DEFRAG_CONNTRACK_OUT))
 			return NF_STOLEN;
@@ -190,56 +190,56 @@ static struct nf_hook_ops ipv4_conntrack_ops[] = {
 		.hook		= ipv4_conntrack_defrag,
 		.owner		= THIS_MODULE,
 		.pf		= PF_INET,
-		.hooknum	= NF_IP_PRE_ROUTING,
+		.hooknum	= NF_INET_PRE_ROUTING,
 		.priority	= NF_IP_PRI_CONNTRACK_DEFRAG,
 	},
 	{
 		.hook		= ipv4_conntrack_in,
 		.owner		= THIS_MODULE,
 		.pf		= PF_INET,
-		.hooknum	= NF_IP_PRE_ROUTING,
+		.hooknum	= NF_INET_PRE_ROUTING,
 		.priority	= NF_IP_PRI_CONNTRACK,
 	},
 	{
 		.hook           = ipv4_conntrack_defrag,
 		.owner          = THIS_MODULE,
 		.pf             = PF_INET,
-		.hooknum        = NF_IP_LOCAL_OUT,
+		.hooknum        = NF_INET_LOCAL_OUT,
 		.priority       = NF_IP_PRI_CONNTRACK_DEFRAG,
 	},
 	{
 		.hook		= ipv4_conntrack_local,
 		.owner		= THIS_MODULE,
 		.pf		= PF_INET,
-		.hooknum	= NF_IP_LOCAL_OUT,
+		.hooknum	= NF_INET_LOCAL_OUT,
 		.priority	= NF_IP_PRI_CONNTRACK,
 	},
 	{
 		.hook		= ipv4_conntrack_help,
 		.owner		= THIS_MODULE,
 		.pf		= PF_INET,
-		.hooknum	= NF_IP_POST_ROUTING,
+		.hooknum	= NF_INET_POST_ROUTING,
 		.priority	= NF_IP_PRI_CONNTRACK_HELPER,
 	},
 	{
 		.hook		= ipv4_conntrack_help,
 		.owner		= THIS_MODULE,
 		.pf		= PF_INET,
-		.hooknum	= NF_IP_LOCAL_IN,
+		.hooknum	= NF_INET_LOCAL_IN,
 		.priority	= NF_IP_PRI_CONNTRACK_HELPER,
 	},
 	{
 		.hook		= ipv4_confirm,
 		.owner		= THIS_MODULE,
 		.pf		= PF_INET,
-		.hooknum	= NF_IP_POST_ROUTING,
+		.hooknum	= NF_INET_POST_ROUTING,
 		.priority	= NF_IP_PRI_CONNTRACK_CONFIRM,
 	},
 	{
 		.hook		= ipv4_confirm,
 		.owner		= THIS_MODULE,
 		.pf		= PF_INET,
-		.hooknum	= NF_IP_LOCAL_IN,
+		.hooknum	= NF_INET_LOCAL_IN,
 		.priority	= NF_IP_PRI_CONNTRACK_CONFIRM,
 	},
 };
diff --git a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c
index adcbaf6d429..0e2c448ea38 100644
--- a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c
+++ b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c
@@ -195,7 +195,7 @@ icmp_error(struct sk_buff *skb, unsigned int dataoff,
 	}
 
 	/* See ip_conntrack_proto_tcp.c */
-	if (nf_conntrack_checksum && hooknum == NF_IP_PRE_ROUTING &&
+	if (nf_conntrack_checksum && hooknum == NF_INET_PRE_ROUTING &&
 	    nf_ip_checksum(skb, hooknum, dataoff, 0)) {
 		if (LOG_INVALID(IPPROTO_ICMP))
 			nf_log_packet(PF_INET, 0, skb, NULL, NULL, NULL,
diff --git a/net/ipv4/netfilter/nf_nat_core.c b/net/ipv4/netfilter/nf_nat_core.c
index 86b465b176b..d237511cf46 100644
--- a/net/ipv4/netfilter/nf_nat_core.c
+++ b/net/ipv4/netfilter/nf_nat_core.c
@@ -213,9 +213,9 @@ find_best_ips_proto(struct nf_conntrack_tuple *tuple,
 	*var_ipp = htonl(minip + j % (maxip - minip + 1));
 }
 
-/* Manipulate the tuple into the range given.  For NF_IP_POST_ROUTING,
- * we change the source to map into the range.  For NF_IP_PRE_ROUTING
- * and NF_IP_LOCAL_OUT, we change the destination to map into the
+/* Manipulate the tuple into the range given.  For NF_INET_POST_ROUTING,
+ * we change the source to map into the range.  For NF_INET_PRE_ROUTING
+ * and NF_INET_LOCAL_OUT, we change the destination to map into the
  * range.  It might not be possible to get a unique tuple, but we try.
  * At worst (or if we race), we will end up with a final duplicate in
  * __ip_conntrack_confirm and drop the packet. */
@@ -293,10 +293,10 @@ nf_nat_setup_info(struct nf_conn *ct,
 		}
 	}
 
-	NF_CT_ASSERT(hooknum == NF_IP_PRE_ROUTING ||
-		     hooknum == NF_IP_POST_ROUTING ||
-		     hooknum == NF_IP_LOCAL_IN ||
-		     hooknum == NF_IP_LOCAL_OUT);
+	NF_CT_ASSERT(hooknum == NF_INET_PRE_ROUTING ||
+		     hooknum == NF_INET_POST_ROUTING ||
+		     hooknum == NF_INET_LOCAL_IN ||
+		     hooknum == NF_INET_LOCAL_OUT);
 	BUG_ON(nf_nat_initialized(ct, maniptype));
 
 	/* What we've got will look like inverse of reply. Normally
diff --git a/net/ipv4/netfilter/nf_nat_h323.c b/net/ipv4/netfilter/nf_nat_h323.c
index 93e18ef114f..0f226df76f5 100644
--- a/net/ipv4/netfilter/nf_nat_h323.c
+++ b/net/ipv4/netfilter/nf_nat_h323.c
@@ -391,7 +391,7 @@ static void ip_nat_q931_expect(struct nf_conn *new,
 	range.min_ip = range.max_ip = new->tuplehash[!this->dir].tuple.src.u3.ip;
 
 	/* hook doesn't matter, but it has to do source manip */
-	nf_nat_setup_info(new, &range, NF_IP_POST_ROUTING);
+	nf_nat_setup_info(new, &range, NF_INET_POST_ROUTING);
 
 	/* For DST manip, map port here to where it's expected. */
 	range.flags = (IP_NAT_RANGE_MAP_IPS | IP_NAT_RANGE_PROTO_SPECIFIED);
@@ -400,7 +400,7 @@ static void ip_nat_q931_expect(struct nf_conn *new,
 	    new->master->tuplehash[!this->dir].tuple.src.u3.ip;
 
 	/* hook doesn't matter, but it has to do destination manip */
-	nf_nat_setup_info(new, &range, NF_IP_PRE_ROUTING);
+	nf_nat_setup_info(new, &range, NF_INET_PRE_ROUTING);
 }
 
 /****************************************************************************/
@@ -481,7 +481,7 @@ static void ip_nat_callforwarding_expect(struct nf_conn *new,
 	range.min_ip = range.max_ip = new->tuplehash[!this->dir].tuple.src.u3.ip;
 
 	/* hook doesn't matter, but it has to do source manip */
-	nf_nat_setup_info(new, &range, NF_IP_POST_ROUTING);
+	nf_nat_setup_info(new, &range, NF_INET_POST_ROUTING);
 
 	/* For DST manip, map port here to where it's expected. */
 	range.flags = (IP_NAT_RANGE_MAP_IPS | IP_NAT_RANGE_PROTO_SPECIFIED);
@@ -489,7 +489,7 @@ static void ip_nat_callforwarding_expect(struct nf_conn *new,
 	range.min_ip = range.max_ip = this->saved_ip;
 
 	/* hook doesn't matter, but it has to do destination manip */
-	nf_nat_setup_info(new, &range, NF_IP_PRE_ROUTING);
+	nf_nat_setup_info(new, &range, NF_INET_PRE_ROUTING);
 }
 
 /****************************************************************************/
diff --git a/net/ipv4/netfilter/nf_nat_helper.c b/net/ipv4/netfilter/nf_nat_helper.c
index 8718da00ef2..d00b8b2891f 100644
--- a/net/ipv4/netfilter/nf_nat_helper.c
+++ b/net/ipv4/netfilter/nf_nat_helper.c
@@ -431,7 +431,7 @@ void nf_nat_follow_master(struct nf_conn *ct,
 	range.min_ip = range.max_ip
 		= ct->master->tuplehash[!exp->dir].tuple.dst.u3.ip;
 	/* hook doesn't matter, but it has to do source manip */
-	nf_nat_setup_info(ct, &range, NF_IP_POST_ROUTING);
+	nf_nat_setup_info(ct, &range, NF_INET_POST_ROUTING);
 
 	/* For DST manip, map port here to where it's expected. */
 	range.flags = (IP_NAT_RANGE_MAP_IPS | IP_NAT_RANGE_PROTO_SPECIFIED);
@@ -439,6 +439,6 @@ void nf_nat_follow_master(struct nf_conn *ct,
 	range.min_ip = range.max_ip
 		= ct->master->tuplehash[!exp->dir].tuple.src.u3.ip;
 	/* hook doesn't matter, but it has to do destination manip */
-	nf_nat_setup_info(ct, &range, NF_IP_PRE_ROUTING);
+	nf_nat_setup_info(ct, &range, NF_INET_PRE_ROUTING);
 }
 EXPORT_SYMBOL(nf_nat_follow_master);
diff --git a/net/ipv4/netfilter/nf_nat_pptp.c b/net/ipv4/netfilter/nf_nat_pptp.c
index 6817e7995f3..c540999f509 100644
--- a/net/ipv4/netfilter/nf_nat_pptp.c
+++ b/net/ipv4/netfilter/nf_nat_pptp.c
@@ -94,7 +94,7 @@ static void pptp_nat_expected(struct nf_conn *ct,
 		range.min = range.max = exp->saved_proto;
 	}
 	/* hook doesn't matter, but it has to do source manip */
-	nf_nat_setup_info(ct, &range, NF_IP_POST_ROUTING);
+	nf_nat_setup_info(ct, &range, NF_INET_POST_ROUTING);
 
 	/* For DST manip, map port here to where it's expected. */
 	range.flags = IP_NAT_RANGE_MAP_IPS;
@@ -105,7 +105,7 @@ static void pptp_nat_expected(struct nf_conn *ct,
 		range.min = range.max = exp->saved_proto;
 	}
 	/* hook doesn't matter, but it has to do destination manip */
-	nf_nat_setup_info(ct, &range, NF_IP_PRE_ROUTING);
+	nf_nat_setup_info(ct, &range, NF_INET_PRE_ROUTING);
 }
 
 /* outbound packets == from PNS to PAC */
diff --git a/net/ipv4/netfilter/nf_nat_rule.c b/net/ipv4/netfilter/nf_nat_rule.c
index 46b25ab5f78..ee39ed87bb0 100644
--- a/net/ipv4/netfilter/nf_nat_rule.c
+++ b/net/ipv4/netfilter/nf_nat_rule.c
@@ -24,7 +24,9 @@
 #include <net/netfilter/nf_nat_core.h>
 #include <net/netfilter/nf_nat_rule.h>
 
-#define NAT_VALID_HOOKS ((1<<NF_IP_PRE_ROUTING) | (1<<NF_IP_POST_ROUTING) | (1<<NF_IP_LOCAL_OUT))
+#define NAT_VALID_HOOKS ((1 << NF_INET_PRE_ROUTING) | \
+			 (1 << NF_INET_POST_ROUTING) | \
+			 (1 << NF_INET_LOCAL_OUT))
 
 static struct
 {
@@ -38,14 +40,14 @@ static struct
 		.num_entries = 4,
 		.size = sizeof(struct ipt_standard) * 3 + sizeof(struct ipt_error),
 		.hook_entry = {
-			[NF_IP_PRE_ROUTING] = 0,
-			[NF_IP_POST_ROUTING] = sizeof(struct ipt_standard),
-			[NF_IP_LOCAL_OUT] = sizeof(struct ipt_standard) * 2
+			[NF_INET_PRE_ROUTING] = 0,
+			[NF_INET_POST_ROUTING] = sizeof(struct ipt_standard),
+			[NF_INET_LOCAL_OUT] = sizeof(struct ipt_standard) * 2
 		},
 		.underflow = {
-			[NF_IP_PRE_ROUTING] = 0,
-			[NF_IP_POST_ROUTING] = sizeof(struct ipt_standard),
-			[NF_IP_LOCAL_OUT] = sizeof(struct ipt_standard) * 2
+			[NF_INET_PRE_ROUTING] = 0,
+			[NF_INET_POST_ROUTING] = sizeof(struct ipt_standard),
+			[NF_INET_LOCAL_OUT] = sizeof(struct ipt_standard) * 2
 		},
 	},
 	.entries = {
@@ -76,7 +78,7 @@ static unsigned int ipt_snat_target(struct sk_buff *skb,
 	enum ip_conntrack_info ctinfo;
 	const struct nf_nat_multi_range_compat *mr = targinfo;
 
-	NF_CT_ASSERT(hooknum == NF_IP_POST_ROUTING);
+	NF_CT_ASSERT(hooknum == NF_INET_POST_ROUTING);
 
 	ct = nf_ct_get(skb, &ctinfo);
 
@@ -118,15 +120,15 @@ static unsigned int ipt_dnat_target(struct sk_buff *skb,
 	enum ip_conntrack_info ctinfo;
 	const struct nf_nat_multi_range_compat *mr = targinfo;
 
-	NF_CT_ASSERT(hooknum == NF_IP_PRE_ROUTING ||
-		     hooknum == NF_IP_LOCAL_OUT);
+	NF_CT_ASSERT(hooknum == NF_INET_PRE_ROUTING ||
+		     hooknum == NF_INET_LOCAL_OUT);
 
 	ct = nf_ct_get(skb, &ctinfo);
 
 	/* Connection must be valid and new. */
 	NF_CT_ASSERT(ct && (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED));
 
-	if (hooknum == NF_IP_LOCAL_OUT &&
+	if (hooknum == NF_INET_LOCAL_OUT &&
 	    mr->range[0].flags & IP_NAT_RANGE_MAP_IPS)
 		warn_if_extra_mangle(ip_hdr(skb)->daddr,
 				     mr->range[0].min_ip);
@@ -227,7 +229,7 @@ static struct xt_target ipt_snat_reg __read_mostly = {
 	.target		= ipt_snat_target,
 	.targetsize	= sizeof(struct nf_nat_multi_range_compat),
 	.table		= "nat",
-	.hooks		= 1 << NF_IP_POST_ROUTING,
+	.hooks		= 1 << NF_INET_POST_ROUTING,
 	.checkentry	= ipt_snat_checkentry,
 	.family		= AF_INET,
 };
@@ -237,7 +239,7 @@ static struct xt_target ipt_dnat_reg __read_mostly = {
 	.target		= ipt_dnat_target,
 	.targetsize	= sizeof(struct nf_nat_multi_range_compat),
 	.table		= "nat",
-	.hooks		= (1 << NF_IP_PRE_ROUTING) | (1 << NF_IP_LOCAL_OUT),
+	.hooks		= (1 << NF_INET_PRE_ROUTING) | (1 << NF_INET_LOCAL_OUT),
 	.checkentry	= ipt_dnat_checkentry,
 	.family		= AF_INET,
 };
diff --git a/net/ipv4/netfilter/nf_nat_sip.c b/net/ipv4/netfilter/nf_nat_sip.c
index 8996ccb757d..b8c0720cf42 100644
--- a/net/ipv4/netfilter/nf_nat_sip.c
+++ b/net/ipv4/netfilter/nf_nat_sip.c
@@ -229,14 +229,14 @@ static void ip_nat_sdp_expect(struct nf_conn *ct,
 	range.min_ip = range.max_ip
 		= ct->master->tuplehash[!exp->dir].tuple.dst.u3.ip;
 	/* hook doesn't matter, but it has to do source manip */
-	nf_nat_setup_info(ct, &range, NF_IP_POST_ROUTING);
+	nf_nat_setup_info(ct, &range, NF_INET_POST_ROUTING);
 
 	/* For DST manip, map port here to where it's expected. */
 	range.flags = (IP_NAT_RANGE_MAP_IPS | IP_NAT_RANGE_PROTO_SPECIFIED);
 	range.min = range.max = exp->saved_proto;
 	range.min_ip = range.max_ip = exp->saved_ip;
 	/* hook doesn't matter, but it has to do destination manip */
-	nf_nat_setup_info(ct, &range, NF_IP_PRE_ROUTING);
+	nf_nat_setup_info(ct, &range, NF_INET_PRE_ROUTING);
 }
 
 /* So, this packet has hit the connection tracking matching code.
diff --git a/net/ipv4/netfilter/nf_nat_standalone.c b/net/ipv4/netfilter/nf_nat_standalone.c
index 7db76ea9af9..84172e9dcb1 100644
--- a/net/ipv4/netfilter/nf_nat_standalone.c
+++ b/net/ipv4/netfilter/nf_nat_standalone.c
@@ -137,7 +137,7 @@ nf_nat_fn(unsigned int hooknum,
 			if (unlikely(nf_ct_is_confirmed(ct)))
 				/* NAT module was loaded late */
 				ret = alloc_null_binding_confirmed(ct, hooknum);
-			else if (hooknum == NF_IP_LOCAL_IN)
+			else if (hooknum == NF_INET_LOCAL_IN)
 				/* LOCAL_IN hook doesn't have a chain!  */
 				ret = alloc_null_binding(ct, hooknum);
 			else
@@ -279,7 +279,7 @@ static struct nf_hook_ops nf_nat_ops[] = {
 		.hook		= nf_nat_in,
 		.owner		= THIS_MODULE,
 		.pf		= PF_INET,
-		.hooknum	= NF_IP_PRE_ROUTING,
+		.hooknum	= NF_INET_PRE_ROUTING,
 		.priority	= NF_IP_PRI_NAT_DST,
 	},
 	/* After packet filtering, change source */
@@ -287,7 +287,7 @@ static struct nf_hook_ops nf_nat_ops[] = {
 		.hook		= nf_nat_out,
 		.owner		= THIS_MODULE,
 		.pf		= PF_INET,
-		.hooknum	= NF_IP_POST_ROUTING,
+		.hooknum	= NF_INET_POST_ROUTING,
 		.priority	= NF_IP_PRI_NAT_SRC,
 	},
 	/* After conntrack, adjust sequence number */
@@ -295,7 +295,7 @@ static struct nf_hook_ops nf_nat_ops[] = {
 		.hook		= nf_nat_adjust,
 		.owner		= THIS_MODULE,
 		.pf		= PF_INET,
-		.hooknum	= NF_IP_POST_ROUTING,
+		.hooknum	= NF_INET_POST_ROUTING,
 		.priority	= NF_IP_PRI_NAT_SEQ_ADJUST,
 	},
 	/* Before packet filtering, change destination */
@@ -303,7 +303,7 @@ static struct nf_hook_ops nf_nat_ops[] = {
 		.hook		= nf_nat_local_fn,
 		.owner		= THIS_MODULE,
 		.pf		= PF_INET,
-		.hooknum	= NF_IP_LOCAL_OUT,
+		.hooknum	= NF_INET_LOCAL_OUT,
 		.priority	= NF_IP_PRI_NAT_DST,
 	},
 	/* After packet filtering, change source */
@@ -311,7 +311,7 @@ static struct nf_hook_ops nf_nat_ops[] = {
 		.hook		= nf_nat_fn,
 		.owner		= THIS_MODULE,
 		.pf		= PF_INET,
-		.hooknum	= NF_IP_LOCAL_IN,
+		.hooknum	= NF_INET_LOCAL_IN,
 		.priority	= NF_IP_PRI_NAT_SRC,
 	},
 	/* After conntrack, adjust sequence number */
@@ -319,7 +319,7 @@ static struct nf_hook_ops nf_nat_ops[] = {
 		.hook		= nf_nat_adjust,
 		.owner		= THIS_MODULE,
 		.pf		= PF_INET,
-		.hooknum	= NF_IP_LOCAL_IN,
+		.hooknum	= NF_INET_LOCAL_IN,
 		.priority	= NF_IP_PRI_NAT_SEQ_ADJUST,
 	},
 };
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c
index 761056ef493..b80987d2fc5 100644
--- a/net/ipv4/raw.c
+++ b/net/ipv4/raw.c
@@ -321,7 +321,7 @@ static int raw_send_hdrinc(struct sock *sk, void *from, size_t length,
 		icmp_out_count(((struct icmphdr *)
 			skb_transport_header(skb))->type);
 
-	err = NF_HOOK(PF_INET, NF_IP_LOCAL_OUT, skb, NULL, rt->u.dst.dev,
+	err = NF_HOOK(PF_INET, NF_INET_LOCAL_OUT, skb, NULL, rt->u.dst.dev,
 		      dst_output);
 	if (err > 0)
 		err = inet->recverr ? net_xmit_errno(err) : 0;
diff --git a/net/ipv4/xfrm4_input.c b/net/ipv4/xfrm4_input.c
index d5890c84a49..0c377a66b8b 100644
--- a/net/ipv4/xfrm4_input.c
+++ b/net/ipv4/xfrm4_input.c
@@ -55,7 +55,7 @@ int xfrm4_transport_finish(struct sk_buff *skb, int async)
 	iph->tot_len = htons(skb->len);
 	ip_send_check(iph);
 
-	NF_HOOK(PF_INET, NF_IP_PRE_ROUTING, skb, skb->dev, NULL,
+	NF_HOOK(PF_INET, NF_INET_PRE_ROUTING, skb, skb->dev, NULL,
 		xfrm4_rcv_encap_finish);
 	return 0;
 #else
diff --git a/net/ipv4/xfrm4_output.c b/net/ipv4/xfrm4_output.c
index 1900200d3c0..d5a58a81802 100644
--- a/net/ipv4/xfrm4_output.c
+++ b/net/ipv4/xfrm4_output.c
@@ -86,7 +86,7 @@ static int xfrm4_output_finish(struct sk_buff *skb)
 
 int xfrm4_output(struct sk_buff *skb)
 {
-	return NF_HOOK_COND(PF_INET, NF_IP_POST_ROUTING, skb, NULL, skb->dst->dev,
-			    xfrm4_output_finish,
+	return NF_HOOK_COND(PF_INET, NF_INET_POST_ROUTING, skb,
+			    NULL, skb->dst->dev, xfrm4_output_finish,
 			    !(IPCB(skb)->flags & IPSKB_REROUTED));
 }
diff --git a/net/ipv4/xfrm4_state.c b/net/ipv4/xfrm4_state.c
index d837784a219..29611359894 100644
--- a/net/ipv4/xfrm4_state.c
+++ b/net/ipv4/xfrm4_state.c
@@ -66,7 +66,7 @@ static struct xfrm_state_afinfo xfrm4_state_afinfo = {
 	.family			= AF_INET,
 	.proto			= IPPROTO_IPIP,
 	.eth_proto		= htons(ETH_P_IP),
-	.nf_post_routing	= NF_IP_POST_ROUTING,
+	.nf_post_routing	= NF_INET_POST_ROUTING,
 	.owner			= THIS_MODULE,
 	.init_flags		= xfrm4_init_flags,
 	.init_tempsel		= __xfrm4_init_tempsel,
diff --git a/net/ipv6/ip6_input.c b/net/ipv6/ip6_input.c
index fac6f7f9dd7..79610b4bad3 100644
--- a/net/ipv6/ip6_input.c
+++ b/net/ipv6/ip6_input.c
@@ -134,7 +134,8 @@ int ipv6_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt
 
 	rcu_read_unlock();
 
-	return NF_HOOK(PF_INET6,NF_IP6_PRE_ROUTING, skb, dev, NULL, ip6_rcv_finish);
+	return NF_HOOK(PF_INET6, NF_INET_PRE_ROUTING, skb, dev, NULL,
+		       ip6_rcv_finish);
 err:
 	IP6_INC_STATS_BH(idev, IPSTATS_MIB_INHDRERRORS);
 drop:
@@ -229,7 +230,8 @@ discard:
 
 int ip6_input(struct sk_buff *skb)
 {
-	return NF_HOOK(PF_INET6,NF_IP6_LOCAL_IN, skb, skb->dev, NULL, ip6_input_finish);
+	return NF_HOOK(PF_INET6, NF_INET_LOCAL_IN, skb, skb->dev, NULL,
+		       ip6_input_finish);
 }
 
 int ip6_mc_input(struct sk_buff *skb)
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index bd121f9ae0a..d54da616e3a 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -79,7 +79,7 @@ int __ip6_local_out(struct sk_buff *skb)
 		len = 0;
 	ipv6_hdr(skb)->payload_len = htons(len);
 
-	return nf_hook(PF_INET6, NF_IP6_LOCAL_OUT, skb, NULL, skb->dst->dev,
+	return nf_hook(PF_INET6, NF_INET_LOCAL_OUT, skb, NULL, skb->dst->dev,
 		       dst_output);
 }
 
@@ -145,8 +145,8 @@ static int ip6_output2(struct sk_buff *skb)
 			   is not supported in any case.
 			 */
 			if (newskb)
-				NF_HOOK(PF_INET6, NF_IP6_POST_ROUTING, newskb, NULL,
-					newskb->dev,
+				NF_HOOK(PF_INET6, NF_INET_POST_ROUTING, newskb,
+					NULL, newskb->dev,
 					ip6_dev_loopback_xmit);
 
 			if (ipv6_hdr(skb)->hop_limit == 0) {
@@ -159,7 +159,8 @@ static int ip6_output2(struct sk_buff *skb)
 		IP6_INC_STATS(idev, IPSTATS_MIB_OUTMCASTPKTS);
 	}
 
-	return NF_HOOK(PF_INET6, NF_IP6_POST_ROUTING, skb,NULL, skb->dev,ip6_output_finish);
+	return NF_HOOK(PF_INET6, NF_INET_POST_ROUTING, skb, NULL, skb->dev,
+		       ip6_output_finish);
 }
 
 static inline int ip6_skb_dst_mtu(struct sk_buff *skb)
@@ -261,7 +262,7 @@ int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl,
 	if ((skb->len <= mtu) || ipfragok || skb_is_gso(skb)) {
 		IP6_INC_STATS(ip6_dst_idev(skb->dst),
 			      IPSTATS_MIB_OUTREQUESTS);
-		return NF_HOOK(PF_INET6, NF_IP6_LOCAL_OUT, skb, NULL, dst->dev,
+		return NF_HOOK(PF_INET6, NF_INET_LOCAL_OUT, skb, NULL, dst->dev,
 				dst_output);
 	}
 
@@ -525,7 +526,8 @@ int ip6_forward(struct sk_buff *skb)
 	hdr->hop_limit--;
 
 	IP6_INC_STATS_BH(ip6_dst_idev(dst), IPSTATS_MIB_OUTFORWDATAGRAMS);
-	return NF_HOOK(PF_INET6,NF_IP6_FORWARD, skb, skb->dev, dst->dev, ip6_forward_finish);
+	return NF_HOOK(PF_INET6, NF_INET_FORWARD, skb, skb->dev, dst->dev,
+		       ip6_forward_finish);
 
 error:
 	IP6_INC_STATS_BH(ip6_dst_idev(dst), IPSTATS_MIB_INADDRERRORS);
diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c
index 17d7318ff7b..82b12940c2a 100644
--- a/net/ipv6/mcast.c
+++ b/net/ipv6/mcast.c
@@ -1448,7 +1448,7 @@ static inline int mld_dev_queue_xmit2(struct sk_buff *skb)
 
 static inline int mld_dev_queue_xmit(struct sk_buff *skb)
 {
-	return NF_HOOK(PF_INET6, NF_IP6_POST_ROUTING, skb, NULL, skb->dev,
+	return NF_HOOK(PF_INET6, NF_INET_POST_ROUTING, skb, NULL, skb->dev,
 		       mld_dev_queue_xmit2);
 }
 
@@ -1469,7 +1469,7 @@ static void mld_sendpack(struct sk_buff *skb)
 	pmr->csum = csum_ipv6_magic(&pip6->saddr, &pip6->daddr, mldlen,
 		IPPROTO_ICMPV6, csum_partial(skb_transport_header(skb),
 					     mldlen, 0));
-	err = NF_HOOK(PF_INET6, NF_IP6_LOCAL_OUT, skb, NULL, skb->dev,
+	err = NF_HOOK(PF_INET6, NF_INET_LOCAL_OUT, skb, NULL, skb->dev,
 		mld_dev_queue_xmit);
 	if (!err) {
 		ICMP6MSGOUT_INC_STATS_BH(idev, ICMPV6_MLD2_REPORT);
@@ -1813,7 +1813,7 @@ static void igmp6_send(struct in6_addr *addr, struct net_device *dev, int type)
 
 	idev = in6_dev_get(skb->dev);
 
-	err = NF_HOOK(PF_INET6, NF_IP6_LOCAL_OUT, skb, NULL, skb->dev,
+	err = NF_HOOK(PF_INET6, NF_INET_LOCAL_OUT, skb, NULL, skb->dev,
 		mld_dev_queue_xmit);
 	if (!err) {
 		ICMP6MSGOUT_INC_STATS(idev, type);
diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c
index 85947eae5bf..b2531f80317 100644
--- a/net/ipv6/ndisc.c
+++ b/net/ipv6/ndisc.c
@@ -533,7 +533,8 @@ static void __ndisc_send(struct net_device *dev,
 	idev = in6_dev_get(dst->dev);
 	IP6_INC_STATS(idev, IPSTATS_MIB_OUTREQUESTS);
 
-	err = NF_HOOK(PF_INET6, NF_IP6_LOCAL_OUT, skb, NULL, dst->dev, dst_output);
+	err = NF_HOOK(PF_INET6, NF_INET_LOCAL_OUT, skb, NULL, dst->dev,
+		      dst_output);
 	if (!err) {
 		ICMP6MSGOUT_INC_STATS(idev, type);
 		ICMP6_INC_STATS(idev, ICMP6_MIB_OUTMSGS);
@@ -1538,7 +1539,8 @@ void ndisc_send_redirect(struct sk_buff *skb, struct neighbour *neigh,
 	buff->dst = dst;
 	idev = in6_dev_get(dst->dev);
 	IP6_INC_STATS(idev, IPSTATS_MIB_OUTREQUESTS);
-	err = NF_HOOK(PF_INET6, NF_IP6_LOCAL_OUT, buff, NULL, dst->dev, dst_output);
+	err = NF_HOOK(PF_INET6, NF_INET_LOCAL_OUT, buff, NULL, dst->dev,
+		      dst_output);
 	if (!err) {
 		ICMP6MSGOUT_INC_STATS(idev, NDISC_REDIRECT);
 		ICMP6_INC_STATS(idev, ICMP6_MIB_OUTMSGS);
diff --git a/net/ipv6/netfilter.c b/net/ipv6/netfilter.c
index b1326c2bf8a..175e19f8025 100644
--- a/net/ipv6/netfilter.c
+++ b/net/ipv6/netfilter.c
@@ -60,7 +60,7 @@ static void nf_ip6_saveroute(const struct sk_buff *skb, struct nf_info *info)
 {
 	struct ip6_rt_info *rt_info = nf_info_reroute(info);
 
-	if (info->hook == NF_IP6_LOCAL_OUT) {
+	if (info->hook == NF_INET_LOCAL_OUT) {
 		struct ipv6hdr *iph = ipv6_hdr(skb);
 
 		rt_info->daddr = iph->daddr;
@@ -72,7 +72,7 @@ static int nf_ip6_reroute(struct sk_buff *skb, const struct nf_info *info)
 {
 	struct ip6_rt_info *rt_info = nf_info_reroute(info);
 
-	if (info->hook == NF_IP6_LOCAL_OUT) {
+	if (info->hook == NF_INET_LOCAL_OUT) {
 		struct ipv6hdr *iph = ipv6_hdr(skb);
 		if (!ipv6_addr_equal(&iph->daddr, &rt_info->daddr) ||
 		    !ipv6_addr_equal(&iph->saddr, &rt_info->saddr))
@@ -89,7 +89,7 @@ __sum16 nf_ip6_checksum(struct sk_buff *skb, unsigned int hook,
 
 	switch (skb->ip_summed) {
 	case CHECKSUM_COMPLETE:
-		if (hook != NF_IP6_PRE_ROUTING && hook != NF_IP6_LOCAL_IN)
+		if (hook != NF_INET_PRE_ROUTING && hook != NF_INET_LOCAL_IN)
 			break;
 		if (!csum_ipv6_magic(&ip6h->saddr, &ip6h->daddr,
 				     skb->len - dataoff, protocol,
diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c
index acaba153793..e1e87eff468 100644
--- a/net/ipv6/netfilter/ip6_tables.c
+++ b/net/ipv6/netfilter/ip6_tables.c
@@ -258,11 +258,11 @@ unconditional(const struct ip6t_ip6 *ipv6)
     defined(CONFIG_NETFILTER_XT_TARGET_TRACE_MODULE)
 /* This cries for unification! */
 static const char *hooknames[] = {
-	[NF_IP6_PRE_ROUTING]		= "PREROUTING",
-	[NF_IP6_LOCAL_IN]		= "INPUT",
-	[NF_IP6_FORWARD]		= "FORWARD",
-	[NF_IP6_LOCAL_OUT]		= "OUTPUT",
-	[NF_IP6_POST_ROUTING]		= "POSTROUTING",
+	[NF_INET_PRE_ROUTING]		= "PREROUTING",
+	[NF_INET_LOCAL_IN]		= "INPUT",
+	[NF_INET_FORWARD]		= "FORWARD",
+	[NF_INET_LOCAL_OUT]		= "OUTPUT",
+	[NF_INET_POST_ROUTING]		= "POSTROUTING",
 };
 
 enum nf_ip_trace_comments {
@@ -502,7 +502,7 @@ mark_source_chains(struct xt_table_info *newinfo,
 
 	/* No recursion; use packet counter to save back ptrs (reset
 	   to 0 as we leave), and comefrom to save source hook bitmask */
-	for (hook = 0; hook < NF_IP6_NUMHOOKS; hook++) {
+	for (hook = 0; hook < NF_INET_NUMHOOKS; hook++) {
 		unsigned int pos = newinfo->hook_entry[hook];
 		struct ip6t_entry *e
 			= (struct ip6t_entry *)(entry0 + pos);
@@ -518,13 +518,13 @@ mark_source_chains(struct xt_table_info *newinfo,
 			struct ip6t_standard_target *t
 				= (void *)ip6t_get_target(e);
 
-			if (e->comefrom & (1 << NF_IP6_NUMHOOKS)) {
+			if (e->comefrom & (1 << NF_INET_NUMHOOKS)) {
 				printk("iptables: loop hook %u pos %u %08X.\n",
 				       hook, pos, e->comefrom);
 				return 0;
 			}
 			e->comefrom
-				|= ((1 << hook) | (1 << NF_IP6_NUMHOOKS));
+				|= ((1 << hook) | (1 << NF_INET_NUMHOOKS));
 
 			/* Unconditional return/END. */
 			if ((e->target_offset == sizeof(struct ip6t_entry)
@@ -544,10 +544,10 @@ mark_source_chains(struct xt_table_info *newinfo,
 				/* Return: backtrack through the last
 				   big jump. */
 				do {
-					e->comefrom ^= (1<<NF_IP6_NUMHOOKS);
+					e->comefrom ^= (1<<NF_INET_NUMHOOKS);
 #ifdef DEBUG_IP_FIREWALL_USER
 					if (e->comefrom
-					    & (1 << NF_IP6_NUMHOOKS)) {
+					    & (1 << NF_INET_NUMHOOKS)) {
 						duprintf("Back unset "
 							 "on hook %u "
 							 "rule %u\n",
@@ -746,7 +746,7 @@ check_entry_size_and_hooks(struct ip6t_entry *e,
 	}
 
 	/* Check hooks & underflows */
-	for (h = 0; h < NF_IP6_NUMHOOKS; h++) {
+	for (h = 0; h < NF_INET_NUMHOOKS; h++) {
 		if ((unsigned char *)e - base == hook_entries[h])
 			newinfo->hook_entry[h] = hook_entries[h];
 		if ((unsigned char *)e - base == underflows[h])
@@ -800,7 +800,7 @@ translate_table(const char *name,
 	newinfo->number = number;
 
 	/* Init all hooks to impossible value. */
-	for (i = 0; i < NF_IP6_NUMHOOKS; i++) {
+	for (i = 0; i < NF_INET_NUMHOOKS; i++) {
 		newinfo->hook_entry[i] = 0xFFFFFFFF;
 		newinfo->underflow[i] = 0xFFFFFFFF;
 	}
@@ -824,7 +824,7 @@ translate_table(const char *name,
 	}
 
 	/* Check hooks all assigned */
-	for (i = 0; i < NF_IP6_NUMHOOKS; i++) {
+	for (i = 0; i < NF_INET_NUMHOOKS; i++) {
 		/* Only hooks which are valid */
 		if (!(valid_hooks & (1 << i)))
 			continue;
diff --git a/net/ipv6/netfilter/ip6t_REJECT.c b/net/ipv6/netfilter/ip6t_REJECT.c
index c1c66348283..960ba1780a9 100644
--- a/net/ipv6/netfilter/ip6t_REJECT.c
+++ b/net/ipv6/netfilter/ip6t_REJECT.c
@@ -164,7 +164,7 @@ static void send_reset(struct sk_buff *oldskb)
 static inline void
 send_unreach(struct sk_buff *skb_in, unsigned char code, unsigned int hooknum)
 {
-	if (hooknum == NF_IP6_LOCAL_OUT && skb_in->dev == NULL)
+	if (hooknum == NF_INET_LOCAL_OUT && skb_in->dev == NULL)
 		skb_in->dev = init_net.loopback_dev;
 
 	icmpv6_send(skb_in, ICMPV6_DEST_UNREACH, code, 0, NULL);
@@ -243,8 +243,8 @@ static struct xt_target ip6t_reject_reg __read_mostly = {
 	.target		= reject6_target,
 	.targetsize	= sizeof(struct ip6t_reject_info),
 	.table		= "filter",
-	.hooks		= (1 << NF_IP6_LOCAL_IN) | (1 << NF_IP6_FORWARD) |
-			  (1 << NF_IP6_LOCAL_OUT),
+	.hooks		= (1 << NF_INET_LOCAL_IN) | (1 << NF_INET_FORWARD) |
+			  (1 << NF_INET_LOCAL_OUT),
 	.checkentry	= check,
 	.me		= THIS_MODULE
 };
diff --git a/net/ipv6/netfilter/ip6t_eui64.c b/net/ipv6/netfilter/ip6t_eui64.c
index 41df9a578c7..ff71269579d 100644
--- a/net/ipv6/netfilter/ip6t_eui64.c
+++ b/net/ipv6/netfilter/ip6t_eui64.c
@@ -67,8 +67,8 @@ static struct xt_match eui64_match __read_mostly = {
 	.family		= AF_INET6,
 	.match		= match,
 	.matchsize	= sizeof(int),
-	.hooks		= (1 << NF_IP6_PRE_ROUTING) | (1 << NF_IP6_LOCAL_IN) |
-			  (1 << NF_IP6_FORWARD),
+	.hooks		= (1 << NF_INET_PRE_ROUTING) | (1 << NF_INET_LOCAL_IN) |
+			  (1 << NF_INET_FORWARD),
 	.me		= THIS_MODULE,
 };
 
diff --git a/net/ipv6/netfilter/ip6t_owner.c b/net/ipv6/netfilter/ip6t_owner.c
index 6036613aef3..1e0dc4a972c 100644
--- a/net/ipv6/netfilter/ip6t_owner.c
+++ b/net/ipv6/netfilter/ip6t_owner.c
@@ -73,7 +73,8 @@ static struct xt_match owner_match __read_mostly = {
 	.family		= AF_INET6,
 	.match		= match,
 	.matchsize	= sizeof(struct ip6t_owner_info),
-	.hooks		= (1 << NF_IP6_LOCAL_OUT) | (1 << NF_IP6_POST_ROUTING),
+	.hooks		= (1 << NF_INET_LOCAL_OUT) |
+			  (1 << NF_INET_POST_ROUTING),
 	.checkentry	= checkentry,
 	.me		= THIS_MODULE,
 };
diff --git a/net/ipv6/netfilter/ip6table_filter.c b/net/ipv6/netfilter/ip6table_filter.c
index 1d26b202bf3..0ae072dd692 100644
--- a/net/ipv6/netfilter/ip6table_filter.c
+++ b/net/ipv6/netfilter/ip6table_filter.c
@@ -17,7 +17,9 @@ MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>");
 MODULE_DESCRIPTION("ip6tables filter table");
 
-#define FILTER_VALID_HOOKS ((1 << NF_IP6_LOCAL_IN) | (1 << NF_IP6_FORWARD) | (1 << NF_IP6_LOCAL_OUT))
+#define FILTER_VALID_HOOKS ((1 << NF_INET_LOCAL_IN) | \
+			    (1 << NF_INET_FORWARD) | \
+			    (1 << NF_INET_LOCAL_OUT))
 
 static struct
 {
@@ -31,14 +33,14 @@ static struct
 		.num_entries = 4,
 		.size = sizeof(struct ip6t_standard) * 3 + sizeof(struct ip6t_error),
 		.hook_entry = {
-			[NF_IP6_LOCAL_IN] = 0,
-			[NF_IP6_FORWARD] = sizeof(struct ip6t_standard),
-			[NF_IP6_LOCAL_OUT] = sizeof(struct ip6t_standard) * 2
+			[NF_INET_LOCAL_IN] = 0,
+			[NF_INET_FORWARD] = sizeof(struct ip6t_standard),
+			[NF_INET_LOCAL_OUT] = sizeof(struct ip6t_standard) * 2
 		},
 		.underflow = {
-			[NF_IP6_LOCAL_IN] = 0,
-			[NF_IP6_FORWARD] = sizeof(struct ip6t_standard),
-			[NF_IP6_LOCAL_OUT] = sizeof(struct ip6t_standard) * 2
+			[NF_INET_LOCAL_IN] = 0,
+			[NF_INET_FORWARD] = sizeof(struct ip6t_standard),
+			[NF_INET_LOCAL_OUT] = sizeof(struct ip6t_standard) * 2
 		},
 	},
 	.entries = {
@@ -93,21 +95,21 @@ static struct nf_hook_ops ip6t_ops[] = {
 		.hook		= ip6t_hook,
 		.owner		= THIS_MODULE,
 		.pf		= PF_INET6,
-		.hooknum	= NF_IP6_LOCAL_IN,
+		.hooknum	= NF_INET_LOCAL_IN,
 		.priority	= NF_IP6_PRI_FILTER,
 	},
 	{
 		.hook		= ip6t_hook,
 		.owner		= THIS_MODULE,
 		.pf		= PF_INET6,
-		.hooknum	= NF_IP6_FORWARD,
+		.hooknum	= NF_INET_FORWARD,
 		.priority	= NF_IP6_PRI_FILTER,
 	},
 	{
 		.hook		= ip6t_local_out_hook,
 		.owner		= THIS_MODULE,
 		.pf		= PF_INET6,
-		.hooknum	= NF_IP6_LOCAL_OUT,
+		.hooknum	= NF_INET_LOCAL_OUT,
 		.priority	= NF_IP6_PRI_FILTER,
 	},
 };
diff --git a/net/ipv6/netfilter/ip6table_mangle.c b/net/ipv6/netfilter/ip6table_mangle.c
index a0b6381f1e8..8e62b231682 100644
--- a/net/ipv6/netfilter/ip6table_mangle.c
+++ b/net/ipv6/netfilter/ip6table_mangle.c
@@ -15,11 +15,11 @@ MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>");
 MODULE_DESCRIPTION("ip6tables mangle table");
 
-#define MANGLE_VALID_HOOKS ((1 << NF_IP6_PRE_ROUTING) | \
-			    (1 << NF_IP6_LOCAL_IN) | \
-			    (1 << NF_IP6_FORWARD) | \
-			    (1 << NF_IP6_LOCAL_OUT) | \
-			    (1 << NF_IP6_POST_ROUTING))
+#define MANGLE_VALID_HOOKS ((1 << NF_INET_PRE_ROUTING) | \
+			    (1 << NF_INET_LOCAL_IN) | \
+			    (1 << NF_INET_FORWARD) | \
+			    (1 << NF_INET_LOCAL_OUT) | \
+			    (1 << NF_INET_POST_ROUTING))
 
 static struct
 {
@@ -33,18 +33,18 @@ static struct
 		.num_entries = 6,
 		.size = sizeof(struct ip6t_standard) * 5 + sizeof(struct ip6t_error),
 		.hook_entry = {
-			[NF_IP6_PRE_ROUTING] 	= 0,
-			[NF_IP6_LOCAL_IN]	= sizeof(struct ip6t_standard),
-			[NF_IP6_FORWARD]	= sizeof(struct ip6t_standard) * 2,
-			[NF_IP6_LOCAL_OUT] 	= sizeof(struct ip6t_standard) * 3,
-			[NF_IP6_POST_ROUTING]	= sizeof(struct ip6t_standard) * 4,
+			[NF_INET_PRE_ROUTING] 	= 0,
+			[NF_INET_LOCAL_IN]	= sizeof(struct ip6t_standard),
+			[NF_INET_FORWARD]	= sizeof(struct ip6t_standard) * 2,
+			[NF_INET_LOCAL_OUT] 	= sizeof(struct ip6t_standard) * 3,
+			[NF_INET_POST_ROUTING]	= sizeof(struct ip6t_standard) * 4,
 		},
 		.underflow = {
-			[NF_IP6_PRE_ROUTING] 	= 0,
-			[NF_IP6_LOCAL_IN]	= sizeof(struct ip6t_standard),
-			[NF_IP6_FORWARD]	= sizeof(struct ip6t_standard) * 2,
-			[NF_IP6_LOCAL_OUT] 	= sizeof(struct ip6t_standard) * 3,
-			[NF_IP6_POST_ROUTING]	= sizeof(struct ip6t_standard) * 4,
+			[NF_INET_PRE_ROUTING] 	= 0,
+			[NF_INET_LOCAL_IN]	= sizeof(struct ip6t_standard),
+			[NF_INET_FORWARD]	= sizeof(struct ip6t_standard) * 2,
+			[NF_INET_LOCAL_OUT] 	= sizeof(struct ip6t_standard) * 3,
+			[NF_INET_POST_ROUTING]	= sizeof(struct ip6t_standard) * 4,
 		},
 	},
 	.entries = {
@@ -125,35 +125,35 @@ static struct nf_hook_ops ip6t_ops[] = {
 		.hook		= ip6t_route_hook,
 		.owner		= THIS_MODULE,
 		.pf		= PF_INET6,
-		.hooknum	= NF_IP6_PRE_ROUTING,
+		.hooknum	= NF_INET_PRE_ROUTING,
 		.priority	= NF_IP6_PRI_MANGLE,
 	},
 	{
 		.hook		= ip6t_local_hook,
 		.owner		= THIS_MODULE,
 		.pf		= PF_INET6,
-		.hooknum	= NF_IP6_LOCAL_IN,
+		.hooknum	= NF_INET_LOCAL_IN,
 		.priority	= NF_IP6_PRI_MANGLE,
 	},
 	{
 		.hook		= ip6t_route_hook,
 		.owner		= THIS_MODULE,
 		.pf		= PF_INET6,
-		.hooknum	= NF_IP6_FORWARD,
+		.hooknum	= NF_INET_FORWARD,
 		.priority	= NF_IP6_PRI_MANGLE,
 	},
 	{
 		.hook		= ip6t_local_hook,
 		.owner		= THIS_MODULE,
 		.pf		= PF_INET6,
-		.hooknum	= NF_IP6_LOCAL_OUT,
+		.hooknum	= NF_INET_LOCAL_OUT,
 		.priority	= NF_IP6_PRI_MANGLE,
 	},
 	{
 		.hook		= ip6t_route_hook,
 		.owner		= THIS_MODULE,
 		.pf		= PF_INET6,
-		.hooknum	= NF_IP6_POST_ROUTING,
+		.hooknum	= NF_INET_POST_ROUTING,
 		.priority	= NF_IP6_PRI_MANGLE,
 	},
 };
diff --git a/net/ipv6/netfilter/ip6table_raw.c b/net/ipv6/netfilter/ip6table_raw.c
index 8f7109f991e..4fecd8de8cc 100644
--- a/net/ipv6/netfilter/ip6table_raw.c
+++ b/net/ipv6/netfilter/ip6table_raw.c
@@ -6,7 +6,7 @@
 #include <linux/module.h>
 #include <linux/netfilter_ipv6/ip6_tables.h>
 
-#define RAW_VALID_HOOKS ((1 << NF_IP6_PRE_ROUTING) | (1 << NF_IP6_LOCAL_OUT))
+#define RAW_VALID_HOOKS ((1 << NF_INET_PRE_ROUTING) | (1 << NF_INET_LOCAL_OUT))
 
 static struct
 {
@@ -20,12 +20,12 @@ static struct
 		.num_entries = 3,
 		.size = sizeof(struct ip6t_standard) * 2 + sizeof(struct ip6t_error),
 		.hook_entry = {
-			[NF_IP6_PRE_ROUTING] = 0,
-			[NF_IP6_LOCAL_OUT] = sizeof(struct ip6t_standard)
+			[NF_INET_PRE_ROUTING] = 0,
+			[NF_INET_LOCAL_OUT] = sizeof(struct ip6t_standard)
 		},
 		.underflow = {
-			[NF_IP6_PRE_ROUTING] = 0,
-			[NF_IP6_LOCAL_OUT] = sizeof(struct ip6t_standard)
+			[NF_INET_PRE_ROUTING] = 0,
+			[NF_INET_LOCAL_OUT] = sizeof(struct ip6t_standard)
 		},
 	},
 	.entries = {
@@ -58,14 +58,14 @@ static struct nf_hook_ops ip6t_ops[] = {
 	{
 	  .hook = ip6t_hook,
 	  .pf = PF_INET6,
-	  .hooknum = NF_IP6_PRE_ROUTING,
+	  .hooknum = NF_INET_PRE_ROUTING,
 	  .priority = NF_IP6_PRI_FIRST,
 	  .owner = THIS_MODULE,
 	},
 	{
 	  .hook = ip6t_hook,
 	  .pf = PF_INET6,
-	  .hooknum = NF_IP6_LOCAL_OUT,
+	  .hooknum = NF_INET_LOCAL_OUT,
 	  .priority = NF_IP6_PRI_FIRST,
 	  .owner = THIS_MODULE,
 	},
diff --git a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
index ad74bab0504..50f46787fda 100644
--- a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
+++ b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
@@ -263,42 +263,42 @@ static struct nf_hook_ops ipv6_conntrack_ops[] = {
 		.hook		= ipv6_defrag,
 		.owner		= THIS_MODULE,
 		.pf		= PF_INET6,
-		.hooknum	= NF_IP6_PRE_ROUTING,
+		.hooknum	= NF_INET_PRE_ROUTING,
 		.priority	= NF_IP6_PRI_CONNTRACK_DEFRAG,
 	},
 	{
 		.hook		= ipv6_conntrack_in,
 		.owner		= THIS_MODULE,
 		.pf		= PF_INET6,
-		.hooknum	= NF_IP6_PRE_ROUTING,
+		.hooknum	= NF_INET_PRE_ROUTING,
 		.priority	= NF_IP6_PRI_CONNTRACK,
 	},
 	{
 		.hook		= ipv6_conntrack_local,
 		.owner		= THIS_MODULE,
 		.pf		= PF_INET6,
-		.hooknum	= NF_IP6_LOCAL_OUT,
+		.hooknum	= NF_INET_LOCAL_OUT,
 		.priority	= NF_IP6_PRI_CONNTRACK,
 	},
 	{
 		.hook		= ipv6_defrag,
 		.owner		= THIS_MODULE,
 		.pf		= PF_INET6,
-		.hooknum	= NF_IP6_LOCAL_OUT,
+		.hooknum	= NF_INET_LOCAL_OUT,
 		.priority	= NF_IP6_PRI_CONNTRACK_DEFRAG,
 	},
 	{
 		.hook		= ipv6_confirm,
 		.owner		= THIS_MODULE,
 		.pf		= PF_INET6,
-		.hooknum	= NF_IP6_POST_ROUTING,
+		.hooknum	= NF_INET_POST_ROUTING,
 		.priority	= NF_IP6_PRI_LAST,
 	},
 	{
 		.hook		= ipv6_confirm,
 		.owner		= THIS_MODULE,
 		.pf		= PF_INET6,
-		.hooknum	= NF_IP6_LOCAL_IN,
+		.hooknum	= NF_INET_LOCAL_IN,
 		.priority	= NF_IP6_PRI_LAST-1,
 	},
 };
diff --git a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c
index fd9123f3dc0..e99384f9764 100644
--- a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c
+++ b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c
@@ -192,7 +192,7 @@ icmpv6_error(struct sk_buff *skb, unsigned int dataoff,
 		return -NF_ACCEPT;
 	}
 
-	if (nf_conntrack_checksum && hooknum == NF_IP6_PRE_ROUTING &&
+	if (nf_conntrack_checksum && hooknum == NF_INET_PRE_ROUTING &&
 	    nf_ip6_checksum(skb, hooknum, dataoff, IPPROTO_ICMPV6)) {
 		nf_log_packet(PF_INET6, 0, skb, NULL, NULL, NULL,
 			      "nf_ct_icmpv6: ICMPv6 checksum failed\n");
diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c
index ae314f3fea4..ad622cc11bd 100644
--- a/net/ipv6/raw.c
+++ b/net/ipv6/raw.c
@@ -619,7 +619,7 @@ static int rawv6_send_hdrinc(struct sock *sk, void *from, int length,
 		goto error_fault;
 
 	IP6_INC_STATS(rt->rt6i_idev, IPSTATS_MIB_OUTREQUESTS);
-	err = NF_HOOK(PF_INET6, NF_IP6_LOCAL_OUT, skb, NULL, rt->u.dst.dev,
+	err = NF_HOOK(PF_INET6, NF_INET_LOCAL_OUT, skb, NULL, rt->u.dst.dev,
 		      dst_output);
 	if (err > 0)
 		err = np->recverr ? net_xmit_errno(err) : 0;
diff --git a/net/ipv6/xfrm6_input.c b/net/ipv6/xfrm6_input.c
index e317d085546..e2c3efd2579 100644
--- a/net/ipv6/xfrm6_input.c
+++ b/net/ipv6/xfrm6_input.c
@@ -37,7 +37,7 @@ int xfrm6_transport_finish(struct sk_buff *skb, int async)
 	ipv6_hdr(skb)->payload_len = htons(skb->len);
 	__skb_push(skb, skb->data - skb_network_header(skb));
 
-	NF_HOOK(PF_INET6, NF_IP6_PRE_ROUTING, skb, skb->dev, NULL,
+	NF_HOOK(PF_INET6, NF_INET_PRE_ROUTING, skb, skb->dev, NULL,
 		ip6_rcv_finish);
 	return -1;
 #else
diff --git a/net/ipv6/xfrm6_output.c b/net/ipv6/xfrm6_output.c
index 318669a9cb4..b34c58c6565 100644
--- a/net/ipv6/xfrm6_output.c
+++ b/net/ipv6/xfrm6_output.c
@@ -89,6 +89,6 @@ static int xfrm6_output_finish(struct sk_buff *skb)
 
 int xfrm6_output(struct sk_buff *skb)
 {
-	return NF_HOOK(PF_INET6, NF_IP6_POST_ROUTING, skb, NULL, skb->dst->dev,
+	return NF_HOOK(PF_INET6, NF_INET_POST_ROUTING, skb, NULL, skb->dst->dev,
 		       xfrm6_output_finish);
 }
diff --git a/net/ipv6/xfrm6_state.c b/net/ipv6/xfrm6_state.c
index df7e98d914f..29e0d25b9e1 100644
--- a/net/ipv6/xfrm6_state.c
+++ b/net/ipv6/xfrm6_state.c
@@ -188,7 +188,7 @@ static struct xfrm_state_afinfo xfrm6_state_afinfo = {
 	.family			= AF_INET6,
 	.proto			= IPPROTO_IPV6,
 	.eth_proto		= htons(ETH_P_IPV6),
-	.nf_post_routing	= NF_IP6_POST_ROUTING,
+	.nf_post_routing	= NF_INET_POST_ROUTING,
 	.owner			= THIS_MODULE,
 	.init_tempsel		= __xfrm6_init_tempsel,
 	.tmpl_sort		= __xfrm6_tmpl_sort,
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index 7d231243754..a15971e9923 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -829,18 +829,18 @@ ctnetlink_change_status(struct nf_conn *ct, struct nlattr *cda[])
 						&range) < 0)
 				return -EINVAL;
 			if (nf_nat_initialized(ct,
-					       HOOK2MANIP(NF_IP_PRE_ROUTING)))
+					       HOOK2MANIP(NF_INET_PRE_ROUTING)))
 				return -EEXIST;
-			nf_nat_setup_info(ct, &range, NF_IP_PRE_ROUTING);
+			nf_nat_setup_info(ct, &range, NF_INET_PRE_ROUTING);
 		}
 		if (cda[CTA_NAT_SRC]) {
 			if (nfnetlink_parse_nat(cda[CTA_NAT_SRC], ct,
 						&range) < 0)
 				return -EINVAL;
 			if (nf_nat_initialized(ct,
-					       HOOK2MANIP(NF_IP_POST_ROUTING)))
+					       HOOK2MANIP(NF_INET_POST_ROUTING)))
 				return -EEXIST;
-			nf_nat_setup_info(ct, &range, NF_IP_POST_ROUTING);
+			nf_nat_setup_info(ct, &range, NF_INET_POST_ROUTING);
 		}
 #endif
 	}
diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c
index 7a3f64c1aca..d96f18863fd 100644
--- a/net/netfilter/nf_conntrack_proto_tcp.c
+++ b/net/netfilter/nf_conntrack_proto_tcp.c
@@ -783,9 +783,7 @@ static int tcp_error(struct sk_buff *skb,
 	 * because the checksum is assumed to be correct.
 	 */
 	/* FIXME: Source route IP option packets --RR */
-	if (nf_conntrack_checksum &&
-	    ((pf == PF_INET && hooknum == NF_IP_PRE_ROUTING) ||
-	     (pf == PF_INET6 && hooknum == NF_IP6_PRE_ROUTING)) &&
+	if (nf_conntrack_checksum && hooknum == NF_INET_PRE_ROUTING &&
 	    nf_checksum(skb, hooknum, dataoff, IPPROTO_TCP, pf)) {
 		if (LOG_INVALID(IPPROTO_TCP))
 			nf_log_packet(pf, 0, skb, NULL, NULL, NULL,
diff --git a/net/netfilter/nf_conntrack_proto_udp.c b/net/netfilter/nf_conntrack_proto_udp.c
index b3e7ecb080e..570a2e10947 100644
--- a/net/netfilter/nf_conntrack_proto_udp.c
+++ b/net/netfilter/nf_conntrack_proto_udp.c
@@ -128,9 +128,7 @@ static int udp_error(struct sk_buff *skb, unsigned int dataoff,
 	 * We skip checking packets on the outgoing path
 	 * because the checksum is assumed to be correct.
 	 * FIXME: Source route IP option packets --RR */
-	if (nf_conntrack_checksum &&
-	    ((pf == PF_INET && hooknum == NF_IP_PRE_ROUTING) ||
-	     (pf == PF_INET6 && hooknum == NF_IP6_PRE_ROUTING)) &&
+	if (nf_conntrack_checksum && hooknum == NF_INET_PRE_ROUTING &&
 	    nf_checksum(skb, hooknum, dataoff, IPPROTO_UDP, pf)) {
 		if (LOG_INVALID(IPPROTO_UDP))
 			nf_log_packet(pf, 0, skb, NULL, NULL, NULL,
diff --git a/net/netfilter/nf_conntrack_proto_udplite.c b/net/netfilter/nf_conntrack_proto_udplite.c
index b8981dd922b..7e116d5766d 100644
--- a/net/netfilter/nf_conntrack_proto_udplite.c
+++ b/net/netfilter/nf_conntrack_proto_udplite.c
@@ -133,8 +133,7 @@ static int udplite_error(struct sk_buff *skb, unsigned int dataoff,
 
 	/* Checksum invalid? Ignore. */
 	if (nf_conntrack_checksum && !skb_csum_unnecessary(skb) &&
-	    ((pf == PF_INET && hooknum == NF_IP_PRE_ROUTING) ||
-	     (pf == PF_INET6 && hooknum == NF_IP6_PRE_ROUTING))) {
+	    hooknum == NF_INET_PRE_ROUTING) {
 		if (pf == PF_INET) {
 			struct iphdr *iph = ip_hdr(skb);
 
diff --git a/net/netfilter/xt_CLASSIFY.c b/net/netfilter/xt_CLASSIFY.c
index 77eeae658d4..e4f7f86d7dd 100644
--- a/net/netfilter/xt_CLASSIFY.c
+++ b/net/netfilter/xt_CLASSIFY.c
@@ -47,9 +47,9 @@ static struct xt_target xt_classify_target[] __read_mostly = {
 		.target 	= target,
 		.targetsize	= sizeof(struct xt_classify_target_info),
 		.table		= "mangle",
-		.hooks		= (1 << NF_IP_LOCAL_OUT) |
-				  (1 << NF_IP_FORWARD) |
-				  (1 << NF_IP_POST_ROUTING),
+		.hooks		= (1 << NF_INET_LOCAL_OUT) |
+				  (1 << NF_INET_FORWARD) |
+				  (1 << NF_INET_POST_ROUTING),
 		.me 		= THIS_MODULE,
 	},
 	{
@@ -58,9 +58,9 @@ static struct xt_target xt_classify_target[] __read_mostly = {
 		.target 	= target,
 		.targetsize	= sizeof(struct xt_classify_target_info),
 		.table		= "mangle",
-		.hooks		= (1 << NF_IP6_LOCAL_OUT) |
-				  (1 << NF_IP6_FORWARD) |
-				  (1 << NF_IP6_POST_ROUTING),
+		.hooks		= (1 << NF_INET_LOCAL_OUT) |
+				  (1 << NF_INET_FORWARD) |
+				  (1 << NF_INET_POST_ROUTING),
 		.me 		= THIS_MODULE,
 	},
 };
diff --git a/net/netfilter/xt_TCPMSS.c b/net/netfilter/xt_TCPMSS.c
index 8e76d1f52fb..f183c8fa47a 100644
--- a/net/netfilter/xt_TCPMSS.c
+++ b/net/netfilter/xt_TCPMSS.c
@@ -214,9 +214,9 @@ xt_tcpmss_checkentry4(const char *tablename,
 	const struct ipt_entry *e = entry;
 
 	if (info->mss == XT_TCPMSS_CLAMP_PMTU &&
-	    (hook_mask & ~((1 << NF_IP_FORWARD) |
-			   (1 << NF_IP_LOCAL_OUT) |
-			   (1 << NF_IP_POST_ROUTING))) != 0) {
+	    (hook_mask & ~((1 << NF_INET_FORWARD) |
+			   (1 << NF_INET_LOCAL_OUT) |
+			   (1 << NF_INET_POST_ROUTING))) != 0) {
 		printk("xt_TCPMSS: path-MTU clamping only supported in "
 		       "FORWARD, OUTPUT and POSTROUTING hooks\n");
 		return false;
@@ -239,9 +239,9 @@ xt_tcpmss_checkentry6(const char *tablename,
 	const struct ip6t_entry *e = entry;
 
 	if (info->mss == XT_TCPMSS_CLAMP_PMTU &&
-	    (hook_mask & ~((1 << NF_IP6_FORWARD) |
-			   (1 << NF_IP6_LOCAL_OUT) |
-			   (1 << NF_IP6_POST_ROUTING))) != 0) {
+	    (hook_mask & ~((1 << NF_INET_FORWARD) |
+			   (1 << NF_INET_LOCAL_OUT) |
+			   (1 << NF_INET_POST_ROUTING))) != 0) {
 		printk("xt_TCPMSS: path-MTU clamping only supported in "
 		       "FORWARD, OUTPUT and POSTROUTING hooks\n");
 		return false;
diff --git a/net/netfilter/xt_mac.c b/net/netfilter/xt_mac.c
index 00490d777a0..6ff4479ca63 100644
--- a/net/netfilter/xt_mac.c
+++ b/net/netfilter/xt_mac.c
@@ -50,9 +50,9 @@ static struct xt_match xt_mac_match[] __read_mostly = {
 		.family		= AF_INET,
 		.match		= match,
 		.matchsize	= sizeof(struct xt_mac_info),
-		.hooks		= (1 << NF_IP_PRE_ROUTING) |
-				  (1 << NF_IP_LOCAL_IN) |
-				  (1 << NF_IP_FORWARD),
+		.hooks		= (1 << NF_INET_PRE_ROUTING) |
+				  (1 << NF_INET_LOCAL_IN) |
+				  (1 << NF_INET_FORWARD),
 		.me		= THIS_MODULE,
 	},
 	{
@@ -60,9 +60,9 @@ static struct xt_match xt_mac_match[] __read_mostly = {
 		.family		= AF_INET6,
 		.match		= match,
 		.matchsize	= sizeof(struct xt_mac_info),
-		.hooks		= (1 << NF_IP6_PRE_ROUTING) |
-				  (1 << NF_IP6_LOCAL_IN) |
-				  (1 << NF_IP6_FORWARD),
+		.hooks		= (1 << NF_INET_PRE_ROUTING) |
+				  (1 << NF_INET_LOCAL_IN) |
+				  (1 << NF_INET_FORWARD),
 		.me		= THIS_MODULE,
 	},
 };
diff --git a/net/netfilter/xt_physdev.c b/net/netfilter/xt_physdev.c
index a4bab043a6d..e91aee74de5 100644
--- a/net/netfilter/xt_physdev.c
+++ b/net/netfilter/xt_physdev.c
@@ -113,12 +113,12 @@ checkentry(const char *tablename,
 	if (info->bitmask & XT_PHYSDEV_OP_OUT &&
 	    (!(info->bitmask & XT_PHYSDEV_OP_BRIDGED) ||
 	     info->invert & XT_PHYSDEV_OP_BRIDGED) &&
-	    hook_mask & ((1 << NF_IP_LOCAL_OUT) | (1 << NF_IP_FORWARD) |
-			 (1 << NF_IP_POST_ROUTING))) {
+	    hook_mask & ((1 << NF_INET_LOCAL_OUT) | (1 << NF_INET_FORWARD) |
+			 (1 << NF_INET_POST_ROUTING))) {
 		printk(KERN_WARNING "physdev match: using --physdev-out in the "
 		       "OUTPUT, FORWARD and POSTROUTING chains for non-bridged "
 		       "traffic is not supported anymore.\n");
-		if (hook_mask & (1 << NF_IP_LOCAL_OUT))
+		if (hook_mask & (1 << NF_INET_LOCAL_OUT))
 			return false;
 	}
 	return true;
diff --git a/net/netfilter/xt_policy.c b/net/netfilter/xt_policy.c
index 6d6d3b7fcbb..2eaa6fd089c 100644
--- a/net/netfilter/xt_policy.c
+++ b/net/netfilter/xt_policy.c
@@ -144,14 +144,13 @@ static bool checkentry(const char *tablename, const void *ip_void,
 				"outgoing policy selected\n");
 		return false;
 	}
-	/* hook values are equal for IPv4 and IPv6 */
-	if (hook_mask & (1 << NF_IP_PRE_ROUTING | 1 << NF_IP_LOCAL_IN)
+	if (hook_mask & (1 << NF_INET_PRE_ROUTING | 1 << NF_INET_LOCAL_IN)
 	    && info->flags & XT_POLICY_MATCH_OUT) {
 		printk(KERN_ERR "xt_policy: output policy not valid in "
 				"PRE_ROUTING and INPUT\n");
 		return false;
 	}
-	if (hook_mask & (1 << NF_IP_POST_ROUTING | 1 << NF_IP_LOCAL_OUT)
+	if (hook_mask & (1 << NF_INET_POST_ROUTING | 1 << NF_INET_LOCAL_OUT)
 	    && info->flags & XT_POLICY_MATCH_IN) {
 		printk(KERN_ERR "xt_policy: input policy not valid in "
 				"POST_ROUTING and OUTPUT\n");
diff --git a/net/netfilter/xt_realm.c b/net/netfilter/xt_realm.c
index cc3e76d77a9..91113dcbe0f 100644
--- a/net/netfilter/xt_realm.c
+++ b/net/netfilter/xt_realm.c
@@ -41,8 +41,8 @@ static struct xt_match realm_match __read_mostly = {
 	.name		= "realm",
 	.match		= match,
 	.matchsize	= sizeof(struct xt_realm_info),
-	.hooks		= (1 << NF_IP_POST_ROUTING) | (1 << NF_IP_FORWARD) |
-			  (1 << NF_IP_LOCAL_OUT) | (1 << NF_IP_LOCAL_IN),
+	.hooks		= (1 << NF_INET_POST_ROUTING) | (1 << NF_INET_FORWARD) |
+			  (1 << NF_INET_LOCAL_OUT) | (1 << NF_INET_LOCAL_IN),
 	.family		= AF_INET,
 	.me		= THIS_MODULE
 };
diff --git a/net/sched/sch_ingress.c b/net/sched/sch_ingress.c
index 3f8335e6ea2..d377deca4f2 100644
--- a/net/sched/sch_ingress.c
+++ b/net/sched/sch_ingress.c
@@ -235,7 +235,7 @@ static struct nf_hook_ops ing_ops = {
 	.hook           = ing_hook,
 	.owner		= THIS_MODULE,
 	.pf             = PF_INET,
-	.hooknum        = NF_IP_PRE_ROUTING,
+	.hooknum        = NF_INET_PRE_ROUTING,
 	.priority       = NF_IP_PRI_FILTER + 1,
 };
 
@@ -243,7 +243,7 @@ static struct nf_hook_ops ing6_ops = {
 	.hook           = ing_hook,
 	.owner		= THIS_MODULE,
 	.pf             = PF_INET6,
-	.hooknum        = NF_IP6_PRE_ROUTING,
+	.hooknum        = NF_INET_PRE_ROUTING,
 	.priority       = NF_IP6_PRI_FILTER + 1,
 };
 
-- 
cgit v1.2.3


From 294b4baf292197e13d1df1d253efa7ac84ffee3f Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@sunset.davemloft.net>
Date: Wed, 14 Nov 2007 01:57:47 -0800
Subject: [IPSEC]: Kill afinfo->nf_post_routing

After changeset:

	[NETFILTER]: Introduce NF_INET_ hook values

It always evaluates to NF_INET_POST_ROUTING.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/xfrm4_state.c | 1 -
 net/ipv6/xfrm6_state.c | 1 -
 net/xfrm/xfrm_output.c | 2 +-
 3 files changed, 1 insertion(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/xfrm4_state.c b/net/ipv4/xfrm4_state.c
index 29611359894..fdeebe68a37 100644
--- a/net/ipv4/xfrm4_state.c
+++ b/net/ipv4/xfrm4_state.c
@@ -66,7 +66,6 @@ static struct xfrm_state_afinfo xfrm4_state_afinfo = {
 	.family			= AF_INET,
 	.proto			= IPPROTO_IPIP,
 	.eth_proto		= htons(ETH_P_IP),
-	.nf_post_routing	= NF_INET_POST_ROUTING,
 	.owner			= THIS_MODULE,
 	.init_flags		= xfrm4_init_flags,
 	.init_tempsel		= __xfrm4_init_tempsel,
diff --git a/net/ipv6/xfrm6_state.c b/net/ipv6/xfrm6_state.c
index 29e0d25b9e1..a7a7e8fd6a3 100644
--- a/net/ipv6/xfrm6_state.c
+++ b/net/ipv6/xfrm6_state.c
@@ -188,7 +188,6 @@ static struct xfrm_state_afinfo xfrm6_state_afinfo = {
 	.family			= AF_INET6,
 	.proto			= IPPROTO_IPV6,
 	.eth_proto		= htons(ETH_P_IPV6),
-	.nf_post_routing	= NF_INET_POST_ROUTING,
 	.owner			= THIS_MODULE,
 	.init_tempsel		= __xfrm6_init_tempsel,
 	.tmpl_sort		= __xfrm6_tmpl_sort,
diff --git a/net/xfrm/xfrm_output.c b/net/xfrm/xfrm_output.c
index 048d240c3e1..3c277a4d0e7 100644
--- a/net/xfrm/xfrm_output.c
+++ b/net/xfrm/xfrm_output.c
@@ -113,7 +113,7 @@ int xfrm_output_resume(struct sk_buff *skb, int err)
 			return dst_output(skb);
 
 		err = nf_hook(x->inner_mode->afinfo->family,
-			      x->inner_mode->afinfo->nf_post_routing, skb,
+			      NF_INET_POST_ROUTING, skb,
 			      NULL, skb->dst->dev, xfrm_output2);
 		if (unlikely(err != 1))
 			goto out;
-- 
cgit v1.2.3


From c1ee656ccb3b03304d38f852debccdd1567702e6 Mon Sep 17 00:00:00 2001
From: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
Date: Wed, 14 Nov 2007 15:55:29 +0900
Subject: [IPV6] ADDRCONF: Rename ipv6_saddr_label() to ipv6_addr_label().

This patch renames ipv6_saddr_label() to ipv6_addr_label() because
address label is used for both of source address and destination
address.

Signed-off-by: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/addrconf.c | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index c0720e4659b..733374b68d2 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -875,7 +875,7 @@ static inline int ipv6_saddr_preferred(int type)
 }
 
 /* static matching label */
-static inline int ipv6_saddr_label(const struct in6_addr *addr, int type)
+static inline int ipv6_addr_label(const struct in6_addr *addr, int type)
 {
  /*
   * 	prefix (longest match)	label
@@ -910,7 +910,7 @@ int ipv6_dev_get_saddr(struct net_device *daddr_dev,
 	struct inet6_ifaddr *ifa_result = NULL;
 	int daddr_type = __ipv6_addr_type(daddr);
 	int daddr_scope = __ipv6_addr_src_scope(daddr_type);
-	u32 daddr_label = ipv6_saddr_label(daddr, daddr_type);
+	u32 daddr_label = ipv6_addr_label(daddr, daddr_type);
 	struct net_device *dev;
 
 	memset(&hiscore, 0, sizeof(hiscore));
@@ -1083,11 +1083,13 @@ int ipv6_dev_get_saddr(struct net_device *daddr_dev,
 
 			/* Rule 6: Prefer matching label */
 			if (hiscore.rule < 6) {
-				if (ipv6_saddr_label(&ifa_result->addr, hiscore.addr_type) == daddr_label)
+				if (ipv6_addr_label(&ifa_result->addr,
+						    hiscore.addr_type) == daddr_label)
 					hiscore.attrs |= IPV6_SADDR_SCORE_LABEL;
 				hiscore.rule++;
 			}
-			if (ipv6_saddr_label(&ifa->addr, score.addr_type) == daddr_label) {
+			if (ipv6_addr_label(&ifa->addr,
+					    score.addr_type) == daddr_label) {
 				score.attrs |= IPV6_SADDR_SCORE_LABEL;
 				if (!(hiscore.attrs & IPV6_SADDR_SCORE_LABEL)) {
 					score.rule = 6;
-- 
cgit v1.2.3


From 303065a8545bf7524550bd9564afb48e8a685a2d Mon Sep 17 00:00:00 2001
From: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
Date: Wed, 14 Nov 2007 15:56:15 +0900
Subject: [IPV6] ADDRCONF: Allow address selection policy with ifindex.

This patch allows ifindex to be a key for address selection policy table.

Signed-off-by: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/addrconf.c | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 733374b68d2..e1e591bfbdc 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -875,7 +875,8 @@ static inline int ipv6_saddr_preferred(int type)
 }
 
 /* static matching label */
-static inline int ipv6_addr_label(const struct in6_addr *addr, int type)
+static inline int ipv6_addr_label(const struct in6_addr *addr, int type,
+				  int ifindex)
 {
  /*
   * 	prefix (longest match)	label
@@ -910,7 +911,8 @@ int ipv6_dev_get_saddr(struct net_device *daddr_dev,
 	struct inet6_ifaddr *ifa_result = NULL;
 	int daddr_type = __ipv6_addr_type(daddr);
 	int daddr_scope = __ipv6_addr_src_scope(daddr_type);
-	u32 daddr_label = ipv6_addr_label(daddr, daddr_type);
+	int daddr_ifindex = daddr_dev ? daddr_dev->ifindex : 0;
+	u32 daddr_label = ipv6_addr_label(daddr, daddr_type, daddr_ifindex);
 	struct net_device *dev;
 
 	memset(&hiscore, 0, sizeof(hiscore));
@@ -1084,12 +1086,14 @@ int ipv6_dev_get_saddr(struct net_device *daddr_dev,
 			/* Rule 6: Prefer matching label */
 			if (hiscore.rule < 6) {
 				if (ipv6_addr_label(&ifa_result->addr,
-						    hiscore.addr_type) == daddr_label)
+						    hiscore.addr_type,
+						    ifa_result->idev->dev->ifindex) == daddr_label)
 					hiscore.attrs |= IPV6_SADDR_SCORE_LABEL;
 				hiscore.rule++;
 			}
 			if (ipv6_addr_label(&ifa->addr,
-					    score.addr_type) == daddr_label) {
+					    score.addr_type,
+					    ifa->idev->dev->ifindex) == daddr_label) {
 				score.attrs |= IPV6_SADDR_SCORE_LABEL;
 				if (!(hiscore.attrs & IPV6_SADDR_SCORE_LABEL)) {
 					score.rule = 6;
-- 
cgit v1.2.3


From 2a8cc6c89039e0530a3335954253b76ed0f9339a Mon Sep 17 00:00:00 2001
From: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
Date: Wed, 14 Nov 2007 15:56:23 +0900
Subject: [IPV6] ADDRCONF: Support RFC3484 configurable address selection
 policy table.

Policy table is implemented as an RCU linear list since we do not expect
large list nor frequent updates.

Signed-off-by: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/Makefile    |   1 +
 net/ipv6/addrconf.c  |  40 +---
 net/ipv6/addrlabel.c | 551 +++++++++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 561 insertions(+), 31 deletions(-)
 create mode 100644 net/ipv6/addrlabel.c

(limited to 'net')

diff --git a/net/ipv6/Makefile b/net/ipv6/Makefile
index 87c23a73d28..5ffa9800305 100644
--- a/net/ipv6/Makefile
+++ b/net/ipv6/Makefile
@@ -5,6 +5,7 @@
 obj-$(CONFIG_IPV6) += ipv6.o
 
 ipv6-objs :=	af_inet6.o anycast.o ip6_output.o ip6_input.o addrconf.o \
+		addrlabel.o \
 		route.o ip6_fib.o ipv6_sockglue.o ndisc.o udp.o udplite.o \
 		raw.o protocol.o icmp.o mcast.o reassembly.o tcp_ipv6.o \
 		exthdrs.o sysctl_net_ipv6.o datagram.o \
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index e1e591bfbdc..a70cecf8fc8 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -874,36 +874,6 @@ static inline int ipv6_saddr_preferred(int type)
 	return 0;
 }
 
-/* static matching label */
-static inline int ipv6_addr_label(const struct in6_addr *addr, int type,
-				  int ifindex)
-{
- /*
-  * 	prefix (longest match)	label
-  * 	-----------------------------
-  * 	::1/128			0
-  * 	::/0			1
-  * 	2002::/16		2
-  * 	::/96			3
-  * 	::ffff:0:0/96		4
-  *	fc00::/7		5
-  * 	2001::/32		6
-  */
-	if (type & IPV6_ADDR_LOOPBACK)
-		return 0;
-	else if (type & IPV6_ADDR_COMPATv4)
-		return 3;
-	else if (type & IPV6_ADDR_MAPPED)
-		return 4;
-	else if (addr->s6_addr32[0] == htonl(0x20010000))
-		return 6;
-	else if (addr->s6_addr16[0] == htons(0x2002))
-		return 2;
-	else if ((addr->s6_addr[0] & 0xfe) == 0xfc)
-		return 5;
-	return 1;
-}
-
 int ipv6_dev_get_saddr(struct net_device *daddr_dev,
 		       struct in6_addr *daddr, struct in6_addr *saddr)
 {
@@ -4189,7 +4159,13 @@ EXPORT_SYMBOL(unregister_inet6addr_notifier);
 
 int __init addrconf_init(void)
 {
-	int err = 0;
+	int err;
+
+	if ((err = ipv6_addr_label_init()) < 0) {
+		printk(KERN_CRIT "IPv6 Addrconf: cannot initialize default policy table: %d.\n",
+			err);
+		return err;
+	}
 
 	/* The addrconf netdev notifier requires that loopback_dev
 	 * has it's ipv6 private information allocated and setup
@@ -4240,6 +4216,8 @@ int __init addrconf_init(void)
 	__rtnl_register(PF_INET6, RTM_GETMULTICAST, NULL, inet6_dump_ifmcaddr);
 	__rtnl_register(PF_INET6, RTM_GETANYCAST, NULL, inet6_dump_ifacaddr);
 
+	ipv6_addr_label_rtnl_register();
+
 #ifdef CONFIG_SYSCTL
 	addrconf_sysctl.sysctl_header =
 		register_sysctl_table(addrconf_sysctl.addrconf_root_dir);
diff --git a/net/ipv6/addrlabel.c b/net/ipv6/addrlabel.c
new file mode 100644
index 00000000000..204d4d66834
--- /dev/null
+++ b/net/ipv6/addrlabel.c
@@ -0,0 +1,551 @@
+/*
+ * IPv6 Address Label subsystem
+ * for the IPv6 "Default" Source Address Selection
+ *
+ * Copyright (C)2007 USAGI/WIDE Project
+ */
+/*
+ * Author:
+ * 	YOSHIFUJI Hideaki @ USAGI/WIDE Project <yoshfuji@linux-ipv6.org>
+ */
+
+#include <linux/kernel.h>
+#include <linux/list.h>
+#include <linux/rcupdate.h>
+#include <linux/in6.h>
+#include <net/addrconf.h>
+#include <linux/if_addrlabel.h>
+#include <linux/netlink.h>
+#include <linux/rtnetlink.h>
+
+#if 0
+#define ADDRLABEL(x...) printk(x)
+#else
+#define ADDRLABEL(x...) do { ; } while(0)
+#endif
+
+/*
+ * Policy Table
+ */
+struct ip6addrlbl_entry
+{
+	struct in6_addr prefix;
+	int prefixlen;
+	int ifindex;
+	int addrtype;
+	u32 label;
+	struct hlist_node list;
+	atomic_t refcnt;
+	struct rcu_head rcu;
+};
+
+static struct ip6addrlbl_table
+{
+	struct hlist_head head;
+	spinlock_t lock;
+	u32 seq;
+} ip6addrlbl_table;
+
+/*
+ * Default policy table (RFC3484 + extensions)
+ *
+ * prefix		addr_type	label
+ * -------------------------------------------------------------------------
+ * ::1/128		LOOPBACK	0
+ * ::/0			N/A		1
+ * 2002::/16		N/A		2
+ * ::/96		COMPATv4	3
+ * ::ffff:0:0/96	V4MAPPED	4
+ * fc00::/7		N/A		5		ULA (RFC 4193)
+ * 2001::/32		N/A		6		Teredo (RFC 4380)
+ *
+ * Note: 0xffffffff is used if we do not have any policies.
+ */
+
+#define IPV6_ADDR_LABEL_DEFAULT	0xffffffffUL
+
+static const __initdata struct ip6addrlbl_init_table
+{
+	const struct in6_addr *prefix;
+	int prefixlen;
+	u32 label;
+} ip6addrlbl_init_table[] = {
+	{	/* ::/0 */
+		.prefix = &in6addr_any,
+		.label = 1,
+	},{	/* fc00::/7 */
+		.prefix = &(struct in6_addr){{{ 0xfc }}},
+		.prefixlen = 7,
+		.label = 5,
+	},{	/* 2002::/16 */
+		.prefix = &(struct in6_addr){{{ 0x20, 0x02 }}},
+		.prefixlen = 16,
+		.label = 2,
+	},{	/* 2001::/32 */
+		.prefix = &(struct in6_addr){{{ 0x20, 0x01 }}},
+		.prefixlen = 32,
+		.label = 6,
+	},{	/* ::ffff:0:0 */
+		.prefix = &(struct in6_addr){{{ [10] = 0xff, [11] = 0xff }}},
+		.prefixlen = 96,
+		.label = 4,
+	},{	/* ::/96 */
+		.prefix = &in6addr_any,
+		.prefixlen = 96,
+		.label = 3,
+	},{	/* ::1/128 */
+		.prefix = &in6addr_loopback,
+		.prefixlen = 128,
+		.label = 0,
+	}
+};
+
+/* Object management */
+static inline void ip6addrlbl_free(struct ip6addrlbl_entry *p)
+{
+	kfree(p);
+}
+
+static inline int ip6addrlbl_hold(struct ip6addrlbl_entry *p)
+{
+	return atomic_inc_not_zero(&p->refcnt);
+}
+
+static inline void ip6addrlbl_put(struct ip6addrlbl_entry *p)
+{
+	if (atomic_dec_and_test(&p->refcnt))
+		ip6addrlbl_free(p);
+}
+
+static void ip6addrlbl_free_rcu(struct rcu_head *h)
+{
+	ip6addrlbl_free(container_of(h, struct ip6addrlbl_entry, rcu));
+}
+
+/* Find label */
+static int __ip6addrlbl_match(struct ip6addrlbl_entry *p,
+			      const struct in6_addr *addr,
+			      int addrtype, int ifindex)
+{
+	if (p->ifindex && p->ifindex != ifindex)
+		return 0;
+	if (p->addrtype && p->addrtype != addrtype)
+		return 0;
+	if (!ipv6_prefix_equal(addr, &p->prefix, p->prefixlen))
+		return 0;
+	return 1;
+}
+
+static struct ip6addrlbl_entry *__ipv6_addr_label(const struct in6_addr *addr,
+						  int type, int ifindex)
+{
+	struct hlist_node *pos;
+	struct ip6addrlbl_entry *p;
+	hlist_for_each_entry_rcu(p, pos, &ip6addrlbl_table.head, list) {
+		if (__ip6addrlbl_match(p, addr, type, ifindex))
+			return p;
+	}
+	return NULL;
+}
+
+u32 ipv6_addr_label(const struct in6_addr *addr, int type, int ifindex)
+{
+	u32 label;
+	struct ip6addrlbl_entry *p;
+
+	type &= IPV6_ADDR_MAPPED | IPV6_ADDR_COMPATv4 | IPV6_ADDR_LOOPBACK;
+
+	rcu_read_lock();
+	p = __ipv6_addr_label(addr, type, ifindex);
+	label = p ? p->label : IPV6_ADDR_LABEL_DEFAULT;
+	rcu_read_unlock();
+
+	ADDRLABEL(KERN_DEBUG "%s(addr=" NIP6_FMT ", type=%d, ifindex=%d) => %08x\n",
+			__FUNCTION__,
+			NIP6(*addr), type, ifindex,
+			label);
+
+	return label;
+}
+
+/* allocate one entry */
+struct ip6addrlbl_entry *ip6addrlbl_alloc(const struct in6_addr *prefix,
+					  int prefixlen, int ifindex,
+					  u32 label)
+{
+	struct ip6addrlbl_entry *newp;
+	int addrtype;
+
+	ADDRLABEL(KERN_DEBUG "%s(prefix=" NIP6_FMT ", prefixlen=%d, ifindex=%d, label=%u)\n",
+			__FUNCTION__,
+			NIP6(*prefix), prefixlen,
+			ifindex,
+			(unsigned int)label);
+
+	addrtype = ipv6_addr_type(prefix) & (IPV6_ADDR_MAPPED | IPV6_ADDR_COMPATv4 | IPV6_ADDR_LOOPBACK);
+
+	switch (addrtype) {
+	case IPV6_ADDR_MAPPED:
+		if (prefixlen > 96)
+			return ERR_PTR(-EINVAL);
+		if (prefixlen < 96)
+			addrtype = 0;
+		break;
+	case IPV6_ADDR_COMPATv4:
+		if (prefixlen != 96)
+			addrtype = 0;
+		break;
+	case IPV6_ADDR_LOOPBACK:
+		if (prefixlen != 128)
+			addrtype = 0;
+		break;
+	}
+
+	newp = kmalloc(sizeof(*newp), GFP_KERNEL);
+	if (!newp)
+		return ERR_PTR(-ENOMEM);
+
+	ipv6_addr_prefix(&newp->prefix, prefix, prefixlen);
+	newp->prefixlen = prefixlen;
+	newp->ifindex = ifindex;
+	newp->addrtype = addrtype;
+	newp->label = label;
+	INIT_HLIST_NODE(&newp->list);
+	atomic_set(&newp->refcnt, 1);
+	return newp;
+}
+
+/* add a label */
+int __ip6addrlbl_add(struct ip6addrlbl_entry *newp, int replace)
+{
+	int ret = 0;
+
+	ADDRLABEL(KERN_DEBUG "%s(newp=%p, replace=%d)\n",
+			__FUNCTION__,
+			newp, replace);
+
+	if (hlist_empty(&ip6addrlbl_table.head)) {
+		hlist_add_head_rcu(&newp->list, &ip6addrlbl_table.head);
+	} else {
+		struct hlist_node *pos, *n;
+		struct ip6addrlbl_entry *p = NULL;
+		hlist_for_each_entry_safe(p, pos, n,
+					  &ip6addrlbl_table.head, list) {
+			if (p->prefixlen == newp->prefixlen &&
+			    p->ifindex == newp->ifindex &&
+			    ipv6_addr_equal(&p->prefix, &newp->prefix)) {
+				if (!replace) {
+					ret = -EEXIST;
+					goto out;
+				}
+				hlist_replace_rcu(&p->list, &newp->list);
+				ip6addrlbl_put(p);
+				call_rcu(&p->rcu, ip6addrlbl_free_rcu);
+				goto out;
+			} else if ((p->prefixlen == newp->prefixlen && !p->ifindex) ||
+				   (p->prefixlen < newp->prefixlen)) {
+				hlist_add_before_rcu(&newp->list, &p->list);
+				goto out;
+			}
+		}
+		hlist_add_after_rcu(&p->list, &newp->list);
+	}
+out:
+	if (!ret)
+		ip6addrlbl_table.seq++;
+	return ret;
+}
+
+/* add a label */
+int ip6addrlbl_add(const struct in6_addr *prefix, int prefixlen,
+		       int ifindex, u32 label, int replace)
+{
+	struct ip6addrlbl_entry *newp;
+	int ret = 0;
+
+	ADDRLABEL(KERN_DEBUG "%s(prefix=" NIP6_FMT ", prefixlen=%d, ifindex=%d, label=%u, replace=%d)\n",
+			__FUNCTION__,
+			NIP6(*prefix), prefixlen,
+			ifindex,
+			(unsigned int)label,
+			replace);
+
+	newp = ip6addrlbl_alloc(prefix, prefixlen, ifindex, label);
+	if (IS_ERR(newp))
+		return PTR_ERR(newp);
+	spin_lock(&ip6addrlbl_table.lock);
+	ret = __ip6addrlbl_add(newp, replace);
+	spin_unlock(&ip6addrlbl_table.lock);
+	if (ret)
+		ip6addrlbl_free(newp);
+	return ret;
+}
+
+/* remove a label */
+int __ip6addrlbl_del(const struct in6_addr *prefix, int prefixlen,
+			  int ifindex)
+{
+	struct ip6addrlbl_entry *p = NULL;
+	struct hlist_node *pos, *n;
+	int ret = -ESRCH;
+
+	ADDRLABEL(KERN_DEBUG "%s(prefix=" NIP6_FMT ", prefixlen=%d, ifindex=%d)\n",
+			__FUNCTION__,
+			NIP6(*prefix), prefixlen,
+			ifindex);
+
+	hlist_for_each_entry_safe(p, pos, n, &ip6addrlbl_table.head, list) {
+		if (p->prefixlen == prefixlen &&
+		    p->ifindex == ifindex &&
+		    ipv6_addr_equal(&p->prefix, prefix)) {
+			hlist_del_rcu(&p->list);
+			ip6addrlbl_put(p);
+			call_rcu(&p->rcu, ip6addrlbl_free_rcu);
+			ret = 0;
+			break;
+		}
+	}
+	return ret;
+}
+
+int ip6addrlbl_del(const struct in6_addr *prefix, int prefixlen,
+		       int ifindex)
+{
+	struct in6_addr prefix_buf;
+	int ret;
+
+	ADDRLABEL(KERN_DEBUG "%s(prefix=" NIP6_FMT ", prefixlen=%d, ifindex=%d)\n",
+			__FUNCTION__,
+			NIP6(*prefix), prefixlen,
+			ifindex);
+
+	ipv6_addr_prefix(&prefix_buf, prefix, prefixlen);
+	spin_lock(&ip6addrlbl_table.lock);
+	ret = __ip6addrlbl_del(&prefix_buf, prefixlen, ifindex);
+	spin_unlock(&ip6addrlbl_table.lock);
+	return ret;
+}
+
+/* add default label */
+static __init int ip6addrlbl_init(void)
+{
+	int err = 0;
+	int i;
+
+	ADDRLABEL(KERN_DEBUG "%s()\n", __FUNCTION__);
+
+	for (i = 0; i < ARRAY_SIZE(ip6addrlbl_init_table); i++) {
+		int ret = ip6addrlbl_add(ip6addrlbl_init_table[i].prefix,
+					 ip6addrlbl_init_table[i].prefixlen,
+					 0,
+					 ip6addrlbl_init_table[i].label, 0);
+		/* XXX: should we free all rules when we catch an error? */
+		if (ret && (!err || err != -ENOMEM))
+			err = ret;
+	}
+	return err;
+}
+
+int __init ipv6_addr_label_init(void)
+{
+	spin_lock_init(&ip6addrlbl_table.lock);
+
+	return ip6addrlbl_init();
+}
+
+static const struct nla_policy ifal_policy[IFAL_MAX+1] = {
+	[IFAL_ADDRESS]		= { .len = sizeof(struct in6_addr), },
+	[IFAL_LABEL]		= { .len = sizeof(u32), },
+};
+
+static int ip6addrlbl_newdel(struct sk_buff *skb, struct nlmsghdr *nlh,
+			     void *arg)
+{
+	struct ifaddrlblmsg *ifal;
+	struct nlattr *tb[IFAL_MAX+1];
+	struct in6_addr *pfx;
+	u32 label;
+	int err = 0;
+
+	err = nlmsg_parse(nlh, sizeof(*ifal), tb, IFAL_MAX, ifal_policy);
+	if (err < 0)
+		return err;
+
+	ifal = nlmsg_data(nlh);
+
+	if (ifal->ifal_family != AF_INET6 ||
+	    ifal->ifal_prefixlen > 128)
+		return -EINVAL;
+
+	if (ifal->ifal_index &&
+	    !__dev_get_by_index(&init_net, ifal->ifal_index))
+		return -EINVAL;
+
+	if (!tb[IFAL_ADDRESS])
+		return -EINVAL;
+
+	pfx = nla_data(tb[IFAL_ADDRESS]);
+	if (!pfx)
+		return -EINVAL;
+
+	if (!tb[IFAL_LABEL])
+		return -EINVAL;
+	label = nla_get_u32(tb[IFAL_LABEL]);
+	if (label == IPV6_ADDR_LABEL_DEFAULT)
+		return -EINVAL;
+
+	switch(nlh->nlmsg_type) {
+	case RTM_NEWADDRLABEL:
+		err = ip6addrlbl_add(pfx, ifal->ifal_prefixlen,
+				     ifal->ifal_index, label,
+				     nlh->nlmsg_flags & NLM_F_REPLACE);
+		break;
+	case RTM_DELADDRLABEL:
+		err = ip6addrlbl_del(pfx, ifal->ifal_prefixlen,
+				     ifal->ifal_index);
+		break;
+	default:
+		err = -EOPNOTSUPP;
+	}
+	return err;
+}
+
+static inline void ip6addrlbl_putmsg(struct nlmsghdr *nlh,
+				     int prefixlen, int ifindex, u32 lseq)
+{
+	struct ifaddrlblmsg *ifal = nlmsg_data(nlh);
+	ifal->ifal_family = AF_INET6;
+	ifal->ifal_prefixlen = prefixlen;
+	ifal->ifal_flags = 0;
+	ifal->ifal_index = ifindex;
+	ifal->ifal_seq = lseq;
+};
+
+static int ip6addrlbl_fill(struct sk_buff *skb,
+			   struct ip6addrlbl_entry *p,
+			   u32 lseq,
+			   u32 pid, u32 seq, int event,
+			   unsigned int flags)
+{
+	struct nlmsghdr *nlh = nlmsg_put(skb, pid, seq, event,
+					 sizeof(struct ifaddrlblmsg), flags);
+	if (!nlh)
+		return -EMSGSIZE;
+
+	ip6addrlbl_putmsg(nlh, p->prefixlen, p->ifindex, lseq);
+
+	if (nla_put(skb, IFAL_ADDRESS, 16, &p->prefix) < 0 ||
+	    nla_put_u32(skb, IFAL_LABEL, p->label) < 0) {
+		nlmsg_cancel(skb, nlh);
+		return -EMSGSIZE;
+	}
+
+	return nlmsg_end(skb, nlh);
+}
+
+static int ip6addrlbl_dump(struct sk_buff *skb, struct netlink_callback *cb)
+{
+	struct ip6addrlbl_entry *p;
+	struct hlist_node *pos;
+	int idx = 0, s_idx = cb->args[0];
+	int err;
+
+	rcu_read_lock();
+	hlist_for_each_entry_rcu(p, pos, &ip6addrlbl_table.head, list) {
+		if (idx >= s_idx) {
+			if ((err = ip6addrlbl_fill(skb, p,
+						   ip6addrlbl_table.seq,
+						   NETLINK_CB(cb->skb).pid,
+						   cb->nlh->nlmsg_seq,
+						   RTM_NEWADDRLABEL,
+						   NLM_F_MULTI)) <= 0)
+				break;
+		}
+		idx++;
+	}
+	rcu_read_unlock();
+	cb->args[0] = idx;
+	return skb->len;
+}
+
+static inline int ip6addrlbl_msgsize(void)
+{
+	return (NLMSG_ALIGN(sizeof(struct ifaddrlblmsg))
+		+ nla_total_size(16)	/* IFAL_ADDRESS */
+		+ nla_total_size(4)	/* IFAL_LABEL */
+	);
+}
+
+static int ip6addrlbl_get(struct sk_buff *in_skb, struct nlmsghdr* nlh,
+			  void *arg)
+{
+	struct ifaddrlblmsg *ifal;
+	struct nlattr *tb[IFAL_MAX+1];
+	struct in6_addr *addr;
+	u32 lseq;
+	int err = 0;
+	struct ip6addrlbl_entry *p;
+	struct sk_buff *skb;
+
+	err = nlmsg_parse(nlh, sizeof(*ifal), tb, IFAL_MAX, ifal_policy);
+	if (err < 0)
+		return err;
+
+	ifal = nlmsg_data(nlh);
+
+	if (ifal->ifal_family != AF_INET6 ||
+	    ifal->ifal_prefixlen != 128)
+		return -EINVAL;
+
+	if (ifal->ifal_index &&
+	    !__dev_get_by_index(&init_net, ifal->ifal_index))
+		return -EINVAL;
+
+	if (!tb[IFAL_ADDRESS])
+		return -EINVAL;
+
+	addr = nla_data(tb[IFAL_ADDRESS]);
+	if (!addr)
+		return -EINVAL;
+
+	rcu_read_lock();
+	p = __ipv6_addr_label(addr, ipv6_addr_type(addr), ifal->ifal_index);
+	if (p && ip6addrlbl_hold(p))
+		p = NULL;
+	lseq = ip6addrlbl_table.seq;
+	rcu_read_unlock();
+
+	if (!p) {
+		err = -ESRCH;
+		goto out;
+	}
+
+	if (!(skb = nlmsg_new(ip6addrlbl_msgsize(), GFP_KERNEL))) {
+		ip6addrlbl_put(p);
+		return -ENOBUFS;
+	}
+
+	err = ip6addrlbl_fill(skb, p, lseq,
+			      NETLINK_CB(in_skb).pid, nlh->nlmsg_seq,
+			      RTM_NEWADDRLABEL, 0);
+
+	ip6addrlbl_put(p);
+
+	if (err < 0) {
+		WARN_ON(err == -EMSGSIZE);
+		kfree_skb(skb);
+		goto out;
+	}
+
+	err = rtnl_unicast(skb, NETLINK_CB(in_skb).pid);
+out:
+	return err;
+}
+
+void __init ipv6_addr_label_rtnl_register(void)
+{
+	__rtnl_register(PF_INET6, RTM_NEWADDRLABEL, ip6addrlbl_newdel, NULL);
+	__rtnl_register(PF_INET6, RTM_DELADDRLABEL, ip6addrlbl_newdel, NULL);
+	__rtnl_register(PF_INET6, RTM_GETADDRLABEL, ip6addrlbl_get, ip6addrlbl_dump);
+}
+
-- 
cgit v1.2.3


From 20fea08b5fb639c4c175b5c74a2bb346c5c5bc2e Mon Sep 17 00:00:00 2001
From: Eric Dumazet <dada1@cosmosbay.com>
Date: Wed, 14 Nov 2007 01:44:41 -0800
Subject: [NET]: Move Qdisc_class_ops and Qdisc_ops in appropriate sections.

Qdisc_class_ops are const, and Qdisc_ops are mostly read.

Using "const" and "__read_mostly" qualifiers helps to reduce false
sharing.

Signed-off-by: Eric Dumazet <dada1@cosmosbay.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/mac80211/wme.c        |  4 ++--
 net/sched/cls_api.c       |  4 ++--
 net/sched/sch_api.c       | 12 ++++++------
 net/sched/sch_atm.c       |  4 ++--
 net/sched/sch_blackhole.c |  2 +-
 net/sched/sch_cbq.c       |  4 ++--
 net/sched/sch_dsmark.c    |  4 ++--
 net/sched/sch_fifo.c      |  4 ++--
 net/sched/sch_generic.c   | 10 +++++-----
 net/sched/sch_gred.c      |  2 +-
 net/sched/sch_hfsc.c      |  4 ++--
 net/sched/sch_htb.c       |  4 ++--
 net/sched/sch_ingress.c   |  4 ++--
 net/sched/sch_netem.c     |  6 +++---
 net/sched/sch_prio.c      |  6 +++---
 net/sched/sch_red.c       |  4 ++--
 net/sched/sch_sfq.c       |  2 +-
 net/sched/sch_tbf.c       |  4 ++--
 18 files changed, 42 insertions(+), 42 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/wme.c b/net/mac80211/wme.c
index 5b8a157975a..8dbdededdec 100644
--- a/net/mac80211/wme.c
+++ b/net/mac80211/wme.c
@@ -527,7 +527,7 @@ static struct tcf_proto ** wme_classop_find_tcf(struct Qdisc *qd,
 
 /* this qdisc is classful (i.e. has classes, some of which may have leaf qdiscs attached)
  * - these are the operations on the classes */
-static struct Qdisc_class_ops class_ops =
+static const struct Qdisc_class_ops class_ops =
 {
 	.graft = wme_classop_graft,
 	.leaf = wme_classop_leaf,
@@ -547,7 +547,7 @@ static struct Qdisc_class_ops class_ops =
 
 
 /* queueing discipline operations */
-static struct Qdisc_ops wme_qdisc_ops =
+static struct Qdisc_ops wme_qdisc_ops __read_mostly =
 {
 	.next = NULL,
 	.cl_ops = &class_ops,
diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
index 03657976fd5..bb98045d550 100644
--- a/net/sched/cls_api.c
+++ b/net/sched/cls_api.c
@@ -130,7 +130,7 @@ static int tc_ctl_tfilter(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
 	struct tcf_proto **back, **chain;
 	struct tcf_proto *tp;
 	struct tcf_proto_ops *tp_ops;
-	struct Qdisc_class_ops *cops;
+	const struct Qdisc_class_ops *cops;
 	unsigned long cl;
 	unsigned long fh;
 	int err;
@@ -382,7 +382,7 @@ static int tc_dump_tfilter(struct sk_buff *skb, struct netlink_callback *cb)
 	struct tcf_proto *tp, **chain;
 	struct tcmsg *tcm = (struct tcmsg*)NLMSG_DATA(cb->nlh);
 	unsigned long cl = 0;
-	struct Qdisc_class_ops *cops;
+	const struct Qdisc_class_ops *cops;
 	struct tcf_dump_args arg;
 
 	if (cb->nlh->nlmsg_len < NLMSG_LENGTH(sizeof(*tcm)))
diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index 8ae137e3522..259321be1ad 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -195,7 +195,7 @@ static struct Qdisc *qdisc_leaf(struct Qdisc *p, u32 classid)
 {
 	unsigned long cl;
 	struct Qdisc *leaf;
-	struct Qdisc_class_ops *cops = p->ops->cl_ops;
+	const struct Qdisc_class_ops *cops = p->ops->cl_ops;
 
 	if (cops == NULL)
 		return NULL;
@@ -373,7 +373,7 @@ dev_graft_qdisc(struct net_device *dev, struct Qdisc *qdisc)
 
 void qdisc_tree_decrease_qlen(struct Qdisc *sch, unsigned int n)
 {
-	struct Qdisc_class_ops *cops;
+	const struct Qdisc_class_ops *cops;
 	unsigned long cl;
 	u32 parentid;
 
@@ -417,7 +417,7 @@ static int qdisc_graft(struct net_device *dev, struct Qdisc *parent,
 			*old = dev_graft_qdisc(dev, new);
 		}
 	} else {
-		struct Qdisc_class_ops *cops = parent->ops->cl_ops;
+		const struct Qdisc_class_ops *cops = parent->ops->cl_ops;
 
 		err = -EINVAL;
 
@@ -581,7 +581,7 @@ static int
 check_loop_fn(struct Qdisc *q, unsigned long cl, struct qdisc_walker *w)
 {
 	struct Qdisc *leaf;
-	struct Qdisc_class_ops *cops = q->ops->cl_ops;
+	const struct Qdisc_class_ops *cops = q->ops->cl_ops;
 	struct check_loop_arg *arg = (struct check_loop_arg *)w;
 
 	leaf = cops->leaf(q, cl);
@@ -924,7 +924,7 @@ static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
 	struct rtattr **tca = arg;
 	struct net_device *dev;
 	struct Qdisc *q = NULL;
-	struct Qdisc_class_ops *cops;
+	const struct Qdisc_class_ops *cops;
 	unsigned long cl = 0;
 	unsigned long new_cl;
 	u32 pid = tcm->tcm_parent;
@@ -1039,7 +1039,7 @@ static int tc_fill_tclass(struct sk_buff *skb, struct Qdisc *q,
 	struct nlmsghdr  *nlh;
 	unsigned char *b = skb_tail_pointer(skb);
 	struct gnet_dump d;
-	struct Qdisc_class_ops *cl_ops = q->ops->cl_ops;
+	const struct Qdisc_class_ops *cl_ops = q->ops->cl_ops;
 
 	nlh = NLMSG_NEW(skb, pid, seq, event, sizeof(*tcm), flags);
 	tcm = NLMSG_DATA(nlh);
diff --git a/net/sched/sch_atm.c b/net/sched/sch_atm.c
index ddc4f2c5437..d870a4115d9 100644
--- a/net/sched/sch_atm.c
+++ b/net/sched/sch_atm.c
@@ -668,7 +668,7 @@ static int atm_tc_dump(struct Qdisc *sch, struct sk_buff *skb)
 	return 0;
 }
 
-static struct Qdisc_class_ops atm_class_ops = {
+static const struct Qdisc_class_ops atm_class_ops = {
 	.graft		= atm_tc_graft,
 	.leaf		= atm_tc_leaf,
 	.get		= atm_tc_get,
@@ -683,7 +683,7 @@ static struct Qdisc_class_ops atm_class_ops = {
 	.dump_stats	= atm_tc_dump_class_stats,
 };
 
-static struct Qdisc_ops atm_qdisc_ops = {
+static struct Qdisc_ops atm_qdisc_ops __read_mostly = {
 	.cl_ops		= &atm_class_ops,
 	.id		= "atm",
 	.priv_size	= sizeof(struct atm_qdisc_data),
diff --git a/net/sched/sch_blackhole.c b/net/sched/sch_blackhole.c
index f914fc43a12..507fb488bc9 100644
--- a/net/sched/sch_blackhole.c
+++ b/net/sched/sch_blackhole.c
@@ -28,7 +28,7 @@ static struct sk_buff *blackhole_dequeue(struct Qdisc *sch)
 	return NULL;
 }
 
-static struct Qdisc_ops blackhole_qdisc_ops = {
+static struct Qdisc_ops blackhole_qdisc_ops __read_mostly = {
 	.id		= "blackhole",
 	.priv_size	= 0,
 	.enqueue	= blackhole_enqueue,
diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c
index 4de3744e65c..bea123fc24a 100644
--- a/net/sched/sch_cbq.c
+++ b/net/sched/sch_cbq.c
@@ -2045,7 +2045,7 @@ static void cbq_walk(struct Qdisc *sch, struct qdisc_walker *arg)
 	}
 }
 
-static struct Qdisc_class_ops cbq_class_ops = {
+static const struct Qdisc_class_ops cbq_class_ops = {
 	.graft		=	cbq_graft,
 	.leaf		=	cbq_leaf,
 	.qlen_notify	=	cbq_qlen_notify,
@@ -2061,7 +2061,7 @@ static struct Qdisc_class_ops cbq_class_ops = {
 	.dump_stats	=	cbq_dump_class_stats,
 };
 
-static struct Qdisc_ops cbq_qdisc_ops = {
+static struct Qdisc_ops cbq_qdisc_ops __read_mostly = {
 	.next		=	NULL,
 	.cl_ops		=	&cbq_class_ops,
 	.id		=	"cbq",
diff --git a/net/sched/sch_dsmark.c b/net/sched/sch_dsmark.c
index 60f89199e3d..b9fe6975fbe 100644
--- a/net/sched/sch_dsmark.c
+++ b/net/sched/sch_dsmark.c
@@ -461,7 +461,7 @@ rtattr_failure:
 	return RTA_NEST_CANCEL(skb, opts);
 }
 
-static struct Qdisc_class_ops dsmark_class_ops = {
+static const struct Qdisc_class_ops dsmark_class_ops = {
 	.graft		=	dsmark_graft,
 	.leaf		=	dsmark_leaf,
 	.get		=	dsmark_get,
@@ -475,7 +475,7 @@ static struct Qdisc_class_ops dsmark_class_ops = {
 	.dump		=	dsmark_dump_class,
 };
 
-static struct Qdisc_ops dsmark_qdisc_ops = {
+static struct Qdisc_ops dsmark_qdisc_ops __read_mostly = {
 	.next		=	NULL,
 	.cl_ops		=	&dsmark_class_ops,
 	.id		=	"dsmark",
diff --git a/net/sched/sch_fifo.c b/net/sched/sch_fifo.c
index c264308f17c..d71dbfc790c 100644
--- a/net/sched/sch_fifo.c
+++ b/net/sched/sch_fifo.c
@@ -78,7 +78,7 @@ rtattr_failure:
 	return -1;
 }
 
-struct Qdisc_ops pfifo_qdisc_ops = {
+struct Qdisc_ops pfifo_qdisc_ops __read_mostly = {
 	.id		=	"pfifo",
 	.priv_size	=	sizeof(struct fifo_sched_data),
 	.enqueue	=	pfifo_enqueue,
@@ -92,7 +92,7 @@ struct Qdisc_ops pfifo_qdisc_ops = {
 	.owner		=	THIS_MODULE,
 };
 
-struct Qdisc_ops bfifo_qdisc_ops = {
+struct Qdisc_ops bfifo_qdisc_ops __read_mostly = {
 	.id		=	"bfifo",
 	.priv_size	=	sizeof(struct fifo_sched_data),
 	.enqueue	=	bfifo_enqueue,
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index 84c048a5479..9be2f152455 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -287,7 +287,7 @@ static int noop_requeue(struct sk_buff *skb, struct Qdisc* qdisc)
 	return NET_XMIT_CN;
 }
 
-struct Qdisc_ops noop_qdisc_ops = {
+struct Qdisc_ops noop_qdisc_ops __read_mostly = {
 	.id		=	"noop",
 	.priv_size	=	0,
 	.enqueue	=	noop_enqueue,
@@ -304,7 +304,7 @@ struct Qdisc noop_qdisc = {
 	.list		=	LIST_HEAD_INIT(noop_qdisc.list),
 };
 
-static struct Qdisc_ops noqueue_qdisc_ops = {
+static struct Qdisc_ops noqueue_qdisc_ops __read_mostly = {
 	.id		=	"noqueue",
 	.priv_size	=	0,
 	.enqueue	=	noop_enqueue,
@@ -406,7 +406,7 @@ static int pfifo_fast_init(struct Qdisc *qdisc, struct rtattr *opt)
 	return 0;
 }
 
-static struct Qdisc_ops pfifo_fast_ops = {
+static struct Qdisc_ops pfifo_fast_ops __read_mostly = {
 	.id		=	"pfifo_fast",
 	.priv_size	=	PFIFO_FAST_BANDS * sizeof(struct sk_buff_head),
 	.enqueue	=	pfifo_fast_enqueue,
@@ -472,7 +472,7 @@ errout:
 
 void qdisc_reset(struct Qdisc *qdisc)
 {
-	struct Qdisc_ops *ops = qdisc->ops;
+	const struct Qdisc_ops *ops = qdisc->ops;
 
 	if (ops->reset)
 		ops->reset(qdisc);
@@ -491,7 +491,7 @@ static void __qdisc_destroy(struct rcu_head *head)
 
 void qdisc_destroy(struct Qdisc *qdisc)
 {
-	struct Qdisc_ops  *ops = qdisc->ops;
+	const struct Qdisc_ops  *ops = qdisc->ops;
 
 	if (qdisc->flags & TCQ_F_BUILTIN ||
 	    !atomic_dec_and_test(&qdisc->refcnt))
diff --git a/net/sched/sch_gred.c b/net/sched/sch_gred.c
index 3cc6dda02e2..e2bcd6682c7 100644
--- a/net/sched/sch_gred.c
+++ b/net/sched/sch_gred.c
@@ -577,7 +577,7 @@ static void gred_destroy(struct Qdisc *sch)
 	}
 }
 
-static struct Qdisc_ops gred_qdisc_ops = {
+static struct Qdisc_ops gred_qdisc_ops __read_mostly = {
 	.id		=	"gred",
 	.priv_size	=	sizeof(struct gred_sched),
 	.enqueue	=	gred_enqueue,
diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c
index a6ad491e434..69dc3bccf02 100644
--- a/net/sched/sch_hfsc.c
+++ b/net/sched/sch_hfsc.c
@@ -1698,7 +1698,7 @@ hfsc_drop(struct Qdisc *sch)
 	return 0;
 }
 
-static struct Qdisc_class_ops hfsc_class_ops = {
+static const struct Qdisc_class_ops hfsc_class_ops = {
 	.change		= hfsc_change_class,
 	.delete		= hfsc_delete_class,
 	.graft		= hfsc_graft_class,
@@ -1714,7 +1714,7 @@ static struct Qdisc_class_ops hfsc_class_ops = {
 	.walk		= hfsc_walk
 };
 
-static struct Qdisc_ops hfsc_qdisc_ops = {
+static struct Qdisc_ops hfsc_qdisc_ops __read_mostly = {
 	.id		= "hfsc",
 	.init		= hfsc_init_qdisc,
 	.change		= hfsc_change_qdisc,
diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c
index 5e608a64935..72beb66cec9 100644
--- a/net/sched/sch_htb.c
+++ b/net/sched/sch_htb.c
@@ -1529,7 +1529,7 @@ static void htb_walk(struct Qdisc *sch, struct qdisc_walker *arg)
 	}
 }
 
-static struct Qdisc_class_ops htb_class_ops = {
+static const struct Qdisc_class_ops htb_class_ops = {
 	.graft		=	htb_graft,
 	.leaf		=	htb_leaf,
 	.qlen_notify	=	htb_qlen_notify,
@@ -1545,7 +1545,7 @@ static struct Qdisc_class_ops htb_class_ops = {
 	.dump_stats	=	htb_dump_class_stats,
 };
 
-static struct Qdisc_ops htb_qdisc_ops = {
+static struct Qdisc_ops htb_qdisc_ops __read_mostly = {
 	.next		=	NULL,
 	.cl_ops		=	&htb_class_ops,
 	.id		=	"htb",
diff --git a/net/sched/sch_ingress.c b/net/sched/sch_ingress.c
index d377deca4f2..902d82ea764 100644
--- a/net/sched/sch_ingress.c
+++ b/net/sched/sch_ingress.c
@@ -338,7 +338,7 @@ rtattr_failure:
 	return -1;
 }
 
-static struct Qdisc_class_ops ingress_class_ops = {
+static const struct Qdisc_class_ops ingress_class_ops = {
 	.graft		=	ingress_graft,
 	.leaf		=	ingress_leaf,
 	.get		=	ingress_get,
@@ -352,7 +352,7 @@ static struct Qdisc_class_ops ingress_class_ops = {
 	.dump		=	NULL,
 };
 
-static struct Qdisc_ops ingress_qdisc_ops = {
+static struct Qdisc_ops ingress_qdisc_ops __read_mostly = {
 	.next		=	NULL,
 	.cl_ops		=	&ingress_class_ops,
 	.id		=	"ingress",
diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c
index 9e5e87e81f0..6c344ade33c 100644
--- a/net/sched/sch_netem.c
+++ b/net/sched/sch_netem.c
@@ -544,7 +544,7 @@ rtattr_failure:
 	return -1;
 }
 
-static struct Qdisc_ops tfifo_qdisc_ops = {
+static struct Qdisc_ops tfifo_qdisc_ops __read_mostly = {
 	.id		=	"tfifo",
 	.priv_size	=	sizeof(struct fifo_sched_data),
 	.enqueue	=	tfifo_enqueue,
@@ -705,7 +705,7 @@ static struct tcf_proto **netem_find_tcf(struct Qdisc *sch, unsigned long cl)
 	return NULL;
 }
 
-static struct Qdisc_class_ops netem_class_ops = {
+static const struct Qdisc_class_ops netem_class_ops = {
 	.graft		=	netem_graft,
 	.leaf		=	netem_leaf,
 	.get		=	netem_get,
@@ -717,7 +717,7 @@ static struct Qdisc_class_ops netem_class_ops = {
 	.dump		=	netem_dump_class,
 };
 
-static struct Qdisc_ops netem_qdisc_ops = {
+static struct Qdisc_ops netem_qdisc_ops __read_mostly = {
 	.id		=	"netem",
 	.cl_ops		=	&netem_class_ops,
 	.priv_size	=	sizeof(struct netem_sched_data),
diff --git a/net/sched/sch_prio.c b/net/sched/sch_prio.c
index de894096e44..2243aaa8d85 100644
--- a/net/sched/sch_prio.c
+++ b/net/sched/sch_prio.c
@@ -468,7 +468,7 @@ static struct tcf_proto ** prio_find_tcf(struct Qdisc *sch, unsigned long cl)
 	return &q->filter_list;
 }
 
-static struct Qdisc_class_ops prio_class_ops = {
+static const struct Qdisc_class_ops prio_class_ops = {
 	.graft		=	prio_graft,
 	.leaf		=	prio_leaf,
 	.get		=	prio_get,
@@ -483,7 +483,7 @@ static struct Qdisc_class_ops prio_class_ops = {
 	.dump_stats	=	prio_dump_class_stats,
 };
 
-static struct Qdisc_ops prio_qdisc_ops = {
+static struct Qdisc_ops prio_qdisc_ops __read_mostly = {
 	.next		=	NULL,
 	.cl_ops		=	&prio_class_ops,
 	.id		=	"prio",
@@ -500,7 +500,7 @@ static struct Qdisc_ops prio_qdisc_ops = {
 	.owner		=	THIS_MODULE,
 };
 
-static struct Qdisc_ops rr_qdisc_ops = {
+static struct Qdisc_ops rr_qdisc_ops __read_mostly = {
 	.next		=	NULL,
 	.cl_ops		=	&prio_class_ops,
 	.id		=	"rr",
diff --git a/net/sched/sch_red.c b/net/sched/sch_red.c
index 9b95fefb70f..f1e9647f7db 100644
--- a/net/sched/sch_red.c
+++ b/net/sched/sch_red.c
@@ -359,7 +359,7 @@ static struct tcf_proto **red_find_tcf(struct Qdisc *sch, unsigned long cl)
 	return NULL;
 }
 
-static struct Qdisc_class_ops red_class_ops = {
+static const struct Qdisc_class_ops red_class_ops = {
 	.graft		=	red_graft,
 	.leaf		=	red_leaf,
 	.get		=	red_get,
@@ -371,7 +371,7 @@ static struct Qdisc_class_ops red_class_ops = {
 	.dump		=	red_dump_class,
 };
 
-static struct Qdisc_ops red_qdisc_ops = {
+static struct Qdisc_ops red_qdisc_ops __read_mostly = {
 	.id		=	"red",
 	.priv_size	=	sizeof(struct red_sched_data),
 	.cl_ops		=	&red_class_ops,
diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c
index 65293876cd6..c58fa6efc7a 100644
--- a/net/sched/sch_sfq.c
+++ b/net/sched/sch_sfq.c
@@ -480,7 +480,7 @@ rtattr_failure:
 	return -1;
 }
 
-static struct Qdisc_ops sfq_qdisc_ops = {
+static struct Qdisc_ops sfq_qdisc_ops __read_mostly = {
 	.next		=	NULL,
 	.cl_ops		=	NULL,
 	.id		=	"sfq",
diff --git a/net/sched/sch_tbf.c b/net/sched/sch_tbf.c
index b0d81098b0e..d88fea9d6b6 100644
--- a/net/sched/sch_tbf.c
+++ b/net/sched/sch_tbf.c
@@ -469,7 +469,7 @@ static struct tcf_proto **tbf_find_tcf(struct Qdisc *sch, unsigned long cl)
 	return NULL;
 }
 
-static struct Qdisc_class_ops tbf_class_ops =
+static const struct Qdisc_class_ops tbf_class_ops =
 {
 	.graft		=	tbf_graft,
 	.leaf		=	tbf_leaf,
@@ -482,7 +482,7 @@ static struct Qdisc_class_ops tbf_class_ops =
 	.dump		=	tbf_dump_class,
 };
 
-static struct Qdisc_ops tbf_qdisc_ops = {
+static struct Qdisc_ops tbf_qdisc_ops __read_mostly = {
 	.next		=	NULL,
 	.cl_ops		=	&tbf_class_ops,
 	.id		=	"tbf",
-- 
cgit v1.2.3


From 62013dbb8418eb7231c1577d238cf2e76b7696b0 Mon Sep 17 00:00:00 2001
From: Rainer Jochem <rainer.jochem@mpi-sb.mpg.de>
Date: Wed, 14 Nov 2007 02:18:39 -0800
Subject: [IPV4] ipconfig: Implement DHCP Class-identifier

From : Rainer Jochem <rainer.jochem@mpi-sb.mpg.de>

Acked-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/ipconfig.c | 24 ++++++++++++++++++++++++
 1 file changed, 24 insertions(+)

(limited to 'net')

diff --git a/net/ipv4/ipconfig.c b/net/ipv4/ipconfig.c
index b8f7763b226..6422422b8e3 100644
--- a/net/ipv4/ipconfig.c
+++ b/net/ipv4/ipconfig.c
@@ -140,6 +140,8 @@ __be32 ic_servaddr = NONE;	/* Boot server IP address */
 __be32 root_server_addr = NONE;	/* Address of NFS server */
 u8 root_server_path[256] = { 0, };	/* Path to mount as root */
 
+static char vendor_class_identifier[253]; /* vendor class identifier */
+
 /* Persistent data: */
 
 static int ic_proto_used;			/* Protocol used, if any */
@@ -588,6 +590,7 @@ ic_dhcp_init_options(u8 *options)
 	u8 mt = ((ic_servaddr == NONE)
 		 ? DHCPDISCOVER : DHCPREQUEST);
 	u8 *e = options;
+	int len;
 
 #ifdef IPCONFIG_DEBUG
 	printk("DHCP: Sending message type %d\n", mt);
@@ -628,6 +631,16 @@ ic_dhcp_init_options(u8 *options)
 		*e++ = sizeof(ic_req_params);
 		memcpy(e, ic_req_params, sizeof(ic_req_params));
 		e += sizeof(ic_req_params);
+
+		if (*vendor_class_identifier) {
+			printk(KERN_INFO "DHCP: sending class identifier \"%s\"\n",
+			       vendor_class_identifier);
+			*e++ = 60;	/* Class-identifier */
+			len = strlen(vendor_class_identifier);
+			*e++ = len;
+			memcpy(e, vendor_class_identifier, len);
+			e += len;
+		}
 	}
 
 	*e++ = 255;	/* End of the list */
@@ -1513,5 +1526,16 @@ static int __init nfsaddrs_config_setup(char *addrs)
 	return ip_auto_config_setup(addrs);
 }
 
+static int __init vendor_class_identifier_setup(char *addrs)
+{
+	if (strlcpy(vendor_class_identifier, addrs,
+		    sizeof(vendor_class_identifier))
+	    >= sizeof(vendor_class_identifier))
+		printk(KERN_WARNING "DHCP: vendorclass too long, truncated to \"%s\"",
+		       vendor_class_identifier);
+	return 1;
+}
+
 __setup("ip=", ip_auto_config_setup);
 __setup("nfsaddrs=", nfsaddrs_config_setup);
+__setup("dhcpclass=", vendor_class_identifier_setup);
-- 
cgit v1.2.3


From de0fa95c14bc4d4b545fae26439371ebfdcb8534 Mon Sep 17 00:00:00 2001
From: Pavel Emelyanov <xemul@openvz.org>
Date: Wed, 14 Nov 2007 16:01:43 -0800
Subject: [NET]: Use sockfd_lookup_light in the rest of the net/socket.c

Some time ago a sockfd_lookup_light was introduced and
most of the socket.c file was patched to use it. However
two routines were left - sys_sendto and sys_recvfrom.

Patch them as well, since this helper does exactly what
these two need.

Signed-off-by: Pavel Emelyanov <xemul@openvz.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/socket.c | 25 +++++++------------------
 1 file changed, 7 insertions(+), 18 deletions(-)

(limited to 'net')

diff --git a/net/socket.c b/net/socket.c
index 92fab9e1c60..aeeab388cc3 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -1594,16 +1594,11 @@ asmlinkage long sys_sendto(int fd, void __user *buff, size_t len,
 	struct msghdr msg;
 	struct iovec iov;
 	int fput_needed;
-	struct file *sock_file;
 
-	sock_file = fget_light(fd, &fput_needed);
-	err = -EBADF;
-	if (!sock_file)
+	sock = sockfd_lookup_light(fd, &err, &fput_needed);
+	if (!sock)
 		goto out;
 
-	sock = sock_from_file(sock_file, &err);
-	if (!sock)
-		goto out_put;
 	iov.iov_base = buff;
 	iov.iov_len = len;
 	msg.msg_name = NULL;
@@ -1625,7 +1620,7 @@ asmlinkage long sys_sendto(int fd, void __user *buff, size_t len,
 	err = sock_sendmsg(sock, &msg, len);
 
 out_put:
-	fput_light(sock_file, fput_needed);
+	fput_light(sock->file, fput_needed);
 out:
 	return err;
 }
@@ -1654,17 +1649,11 @@ asmlinkage long sys_recvfrom(int fd, void __user *ubuf, size_t size,
 	struct msghdr msg;
 	char address[MAX_SOCK_ADDR];
 	int err, err2;
-	struct file *sock_file;
 	int fput_needed;
 
-	sock_file = fget_light(fd, &fput_needed);
-	err = -EBADF;
-	if (!sock_file)
-		goto out;
-
-	sock = sock_from_file(sock_file, &err);
+	sock = sockfd_lookup_light(fd, &err, &fput_needed);
 	if (!sock)
-		goto out_put;
+		goto out;
 
 	msg.msg_control = NULL;
 	msg.msg_controllen = 0;
@@ -1683,8 +1672,8 @@ asmlinkage long sys_recvfrom(int fd, void __user *ubuf, size_t size,
 		if (err2 < 0)
 			err = err2;
 	}
-out_put:
-	fput_light(sock_file, fput_needed);
+
+	fput_light(sock->file, fput_needed);
 out:
 	return err;
 }
-- 
cgit v1.2.3


From c0ef877b2c9f543e9fb7953bfe1a0cd3a4eae362 Mon Sep 17 00:00:00 2001
From: Pavel Emelyanov <xemul@openvz.org>
Date: Thu, 15 Nov 2007 03:03:19 -0800
Subject: [NET]: Move sock_valbool_flag to socket.c

The sock_valbool_flag() helper is used in setsockopt to
set or reset some flag on the sock. This helper is required
in the net/socket.c only, so move it there.

Besides, patch two places in sys_setsockopt() that repeat
this helper functionality manually.

Since this is not a bugfix, but a trivial cleanup, I
prepared this patch against net-2.6.25, but it also
applies (with a single offset) to the latest net-2.6.

Signed-off-by: Pavel Emelyanov <xemul@openvz.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/sock.c | 20 +++++++++++---------
 1 file changed, 11 insertions(+), 9 deletions(-)

(limited to 'net')

diff --git a/net/core/sock.c b/net/core/sock.c
index 2029d095b4c..98b243a5b32 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -419,6 +419,14 @@ out:
 	return ret;
 }
 
+static inline void sock_valbool_flag(struct sock *sk, int bit, int valbool)
+{
+	if (valbool)
+		sock_set_flag(sk, bit);
+	else
+		sock_reset_flag(sk, bit);
+}
+
 /*
  *	This is meant for all protocols to use and covers goings on
  *	at the socket level. Everything here is generic.
@@ -463,11 +471,8 @@ int sock_setsockopt(struct socket *sock, int level, int optname,
 	case SO_DEBUG:
 		if (val && !capable(CAP_NET_ADMIN)) {
 			ret = -EACCES;
-		}
-		else if (valbool)
-			sock_set_flag(sk, SOCK_DBG);
-		else
-			sock_reset_flag(sk, SOCK_DBG);
+		} else
+			sock_valbool_flag(sk, SOCK_DBG, valbool);
 		break;
 	case SO_REUSEADDR:
 		sk->sk_reuse = valbool;
@@ -477,10 +482,7 @@ int sock_setsockopt(struct socket *sock, int level, int optname,
 		ret = -ENOPROTOOPT;
 		break;
 	case SO_DONTROUTE:
-		if (valbool)
-			sock_set_flag(sk, SOCK_LOCALROUTE);
-		else
-			sock_reset_flag(sk, SOCK_LOCALROUTE);
+		sock_valbool_flag(sk, SOCK_LOCALROUTE, valbool);
 		break;
 	case SO_BROADCAST:
 		sock_valbool_flag(sk, SOCK_BROADCAST, valbool);
-- 
cgit v1.2.3


From b9d86585dc6c9265aa373c7036458fe8aa7627c6 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= <ilpo.jarvinen@helsinki.fi>
Date: Thu, 15 Nov 2007 19:33:31 -0800
Subject: [TCP]: Move !in_sack test earlier in sacktag & reorganize if()s
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

All intermediate conditions include it already, make them
simpler as well.

Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@helsinki.fi>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp_input.c | 31 ++++++++++++++-----------------
 1 file changed, 14 insertions(+), 17 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index b39f0d86e44..901240f4068 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -1406,28 +1406,25 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_
 			if (unlikely(in_sack < 0))
 				break;
 
+			if (!in_sack) {
+				fack_count += tcp_skb_pcount(skb);
+				continue;
+			}
+
 			sacked = TCP_SKB_CB(skb)->sacked;
 
 			/* Account D-SACK for retransmitted packet. */
-			if ((dup_sack && in_sack) &&
-			    (sacked & TCPCB_RETRANS) &&
-			    after(TCP_SKB_CB(skb)->end_seq, tp->undo_marker))
-				tp->undo_retrans--;
-
-			/* The frame is ACKed. */
-			if (!after(TCP_SKB_CB(skb)->end_seq, tp->snd_una)) {
-				if (sacked&TCPCB_RETRANS) {
-					if ((dup_sack && in_sack) &&
-					    (sacked&TCPCB_SACKED_ACKED))
-						reord = min(fack_count, reord);
-				}
-
-				/* Nothing to do; acked frame is about to be dropped. */
-				fack_count += tcp_skb_pcount(skb);
-				continue;
+			if (dup_sack && (sacked & TCPCB_RETRANS)) {
+				if (after(TCP_SKB_CB(skb)->end_seq, tp->undo_marker))
+					tp->undo_retrans--;
+				if (!after(TCP_SKB_CB(skb)->end_seq, tp->snd_una) &&
+				    (sacked & TCPCB_SACKED_ACKED))
+					reord = min(fack_count, reord);
 			}
 
-			if (!in_sack) {
+
+			/* Nothing to do; acked frame is about to be dropped (was ACKed). */
+			if (!after(TCP_SKB_CB(skb)->end_seq, tp->snd_una)) {
 				fack_count += tcp_skb_pcount(skb);
 				continue;
 			}
-- 
cgit v1.2.3


From f577111302677e6d1448475821cc19ba8835f60e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= <ilpo.jarvinen@helsinki.fi>
Date: Thu, 15 Nov 2007 19:35:11 -0800
Subject: [TCP]: Extend reordering detection to cover CA_Loss partially
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This implements more accurately what is stated in sacktag's
overall comment:

  "Both of these heuristics are not used in Loss state, when
   we cannot account for retransmits accurately."

When CA_Loss state is entered, the state changer ensures that
undo_marker is only set if no TCPCB_RETRANS skbs were found,
thus having non-zero undo_marker in CA_Loss basically tells
that the R-bits still accurately reflect the current state
of TCP.

Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@helsinki.fi>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp_input.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 901240f4068..26713e5d89d 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -1511,7 +1511,8 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_
 
 	tcp_verify_left_out(tp);
 
-	if ((reord < tp->fackets_out) && icsk->icsk_ca_state != TCP_CA_Loss &&
+	if ((reord < tp->fackets_out) &&
+	    ((icsk->icsk_ca_state != TCP_CA_Loss) || tp->undo_marker) &&
 	    (!tp->frto_highmark || after(tp->snd_una, tp->frto_highmark)))
 		tcp_update_reordering(sk, tp->fackets_out - reord, 0);
 
-- 
cgit v1.2.3


From 85cc391c0e4584db594bfc4005c63c07c76c5077 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= <ilpo.jarvinen@helsinki.fi>
Date: Thu, 15 Nov 2007 19:39:31 -0800
Subject: [TCP]: non-FACK SACK follows conservative SACK loss recovery
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Many assumptions that are true when no reordering or other
strange events happen are not a part of the RFC3517. FACK
implementation is based on such assumptions. Previously (before
the rewrite) the non-FACK SACK was basically doing fast rexmit
and then it times out all skbs when first cumulative ACK arrives,
which cannot really be called SACK based recovery :-).

RFC3517 SACK disables these things:
- Per SKB timeouts & head timeout entry to recovery
- Marking at least one skb while in recovery (RFC3517 does this
  only for the fast retransmission but not for the other skbs
  when cumulative ACKs arrive in the recovery)
- Sacktag's loss detection flavors B and C (see comment before
  tcp_sacktag_write_queue)

This does not implement the "last resort" rule 3 of NextSeg, which
allows retransmissions also when not enough SACK blocks have yet
arrived above a segment for IsLost to return true [RFC3517].

The implementation differs from RFC3517 in these points:
- Rate-halving is used instead of FlightSize / 2
- Instead of using dupACKs to trigger the recovery, the number
  of SACK blocks is used as FACK does with SACK blocks+holes
  (which provides more accurate number). It seems that the
  difference can affect negatively only if the receiver does not
  generate SACK blocks at all even though it claimed to be
  SACK-capable.
- Dupthresh is not a constant one. Dynamical adjustments include
  both holes and sacked segments (equal to what FACK has) due to
  complexity involved in determining the number sacked blocks
  between highest_sack and the reordered segment. Thus it's will
  be an over-estimate.

Implementation note:

tcp_clean_rtx_queue doesn't need a lost_cnt tweak because head
skb at that point cannot be SACKED_ACKED (nor would such
situation last for long enough to cause problems).

Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@helsinki.fi>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp_input.c | 80 ++++++++++++++++++++++++++++++++++++++++------------
 1 file changed, 62 insertions(+), 18 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 26713e5d89d..c0e8f2b1fa7 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -863,6 +863,9 @@ void tcp_enter_cwr(struct sock *sk, const int set_ssthresh)
  */
 static void tcp_disable_fack(struct tcp_sock *tp)
 {
+	/* RFC3517 uses different metric in lost marker => reset on change */
+	if (tcp_is_fack(tp))
+		tp->lost_skb_hint = NULL;
 	tp->rx_opt.sack_ok &= ~2;
 }
 
@@ -1470,6 +1473,13 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_
 				tp->sacked_out += tcp_skb_pcount(skb);
 
 				fack_count += tcp_skb_pcount(skb);
+
+				/* Lost marker hint past SACKed? Tweak RFC3517 cnt */
+				if (!tcp_is_fack(tp) && (tp->lost_skb_hint != NULL) &&
+				    before(TCP_SKB_CB(skb)->seq,
+					   TCP_SKB_CB(tp->lost_skb_hint)->seq))
+					tp->lost_cnt_hint += tcp_skb_pcount(skb);
+
 				if (fack_count > tp->fackets_out)
 					tp->fackets_out = fack_count;
 
@@ -1504,7 +1514,7 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_
 			flag &= ~FLAG_ONLY_ORIG_SACKED;
 	}
 
-	if (tp->retrans_out &&
+	if (tcp_is_fack(tp) && tp->retrans_out &&
 	    after(highest_sack_end_seq, tp->lost_retrans_low) &&
 	    icsk->icsk_ca_state == TCP_CA_Recovery)
 		flag |= tcp_mark_lost_retrans(sk, highest_sack_end_seq);
@@ -1858,6 +1868,26 @@ static inline int tcp_fackets_out(struct tcp_sock *tp)
 	return tcp_is_reno(tp) ? tp->sacked_out+1 : tp->fackets_out;
 }
 
+/* Heurestics to calculate number of duplicate ACKs. There's no dupACKs
+ * counter when SACK is enabled (without SACK, sacked_out is used for
+ * that purpose).
+ *
+ * Instead, with FACK TCP uses fackets_out that includes both SACKed
+ * segments up to the highest received SACK block so far and holes in
+ * between them.
+ *
+ * With reordering, holes may still be in flight, so RFC3517 recovery
+ * uses pure sacked_out (total number of SACKed segments) even though
+ * it violates the RFC that uses duplicate ACKs, often these are equal
+ * but when e.g. out-of-window ACKs or packet duplication occurs,
+ * they differ. Since neither occurs due to loss, TCP should really
+ * ignore them.
+ */
+static inline int tcp_dupack_heurestics(struct tcp_sock *tp)
+{
+	return tcp_is_fack(tp) ? tp->fackets_out : tp->sacked_out + 1;
+}
+
 static inline int tcp_skb_timedout(struct sock *sk, struct sk_buff *skb)
 {
 	return (tcp_time_stamp - TCP_SKB_CB(skb)->when > inet_csk(sk)->icsk_rto);
@@ -1978,13 +2008,13 @@ static int tcp_time_to_recover(struct sock *sk)
 		return 1;
 
 	/* Not-A-Trick#2 : Classic rule... */
-	if (tcp_fackets_out(tp) > tp->reordering)
+	if (tcp_dupack_heurestics(tp) > tp->reordering)
 		return 1;
 
 	/* Trick#3 : when we use RFC2988 timer restart, fast
 	 * retransmit can be triggered by timeout of queue head.
 	 */
-	if (tcp_head_timedout(sk))
+	if (tcp_is_fack(tp) && tcp_head_timedout(sk))
 		return 1;
 
 	/* Trick#4: It is still not OK... But will it be useful to delay
@@ -2017,8 +2047,10 @@ static void tcp_verify_retransmit_hint(struct tcp_sock *tp,
 		tp->retransmit_skb_hint = NULL;
 }
 
-/* Mark head of queue up as lost. */
-static void tcp_mark_head_lost(struct sock *sk, int packets)
+/* Mark head of queue up as lost. With RFC3517 SACK, the packets is
+ * is against sacked "cnt", otherwise it's against facked "cnt"
+ */
+static void tcp_mark_head_lost(struct sock *sk, int packets, int fast_rexmit)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
 	struct sk_buff *skb;
@@ -2040,8 +2072,13 @@ static void tcp_mark_head_lost(struct sock *sk, int packets)
 		/* this is not the most efficient way to do this... */
 		tp->lost_skb_hint = skb;
 		tp->lost_cnt_hint = cnt;
-		cnt += tcp_skb_pcount(skb);
-		if (cnt > packets || after(TCP_SKB_CB(skb)->end_seq, tp->high_seq))
+
+		if (tcp_is_fack(tp) ||
+		    (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED))
+			cnt += tcp_skb_pcount(skb);
+
+		if (((!fast_rexmit || (tp->lost_out > 0)) && (cnt > packets)) ||
+		     after(TCP_SKB_CB(skb)->end_seq, tp->high_seq))
 			break;
 		if (!(TCP_SKB_CB(skb)->sacked & (TCPCB_SACKED_ACKED|TCPCB_LOST))) {
 			TCP_SKB_CB(skb)->sacked |= TCPCB_LOST;
@@ -2054,17 +2091,22 @@ static void tcp_mark_head_lost(struct sock *sk, int packets)
 
 /* Account newly detected lost packet(s) */
 
-static void tcp_update_scoreboard(struct sock *sk)
+static void tcp_update_scoreboard(struct sock *sk, int fast_rexmit)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
 
-	if (tcp_is_fack(tp)) {
+	if (tcp_is_reno(tp)) {
+		tcp_mark_head_lost(sk, 1, fast_rexmit);
+	} else if (tcp_is_fack(tp)) {
 		int lost = tp->fackets_out - tp->reordering;
 		if (lost <= 0)
 			lost = 1;
-		tcp_mark_head_lost(sk, lost);
+		tcp_mark_head_lost(sk, lost, fast_rexmit);
 	} else {
-		tcp_mark_head_lost(sk, 1);
+		int sacked_upto = tp->sacked_out - tp->reordering;
+		if (sacked_upto < 0)
+			sacked_upto = 0;
+		tcp_mark_head_lost(sk, sacked_upto, fast_rexmit);
 	}
 
 	/* New heuristics: it is possible only after we switched
@@ -2072,7 +2114,7 @@ static void tcp_update_scoreboard(struct sock *sk)
 	 * Hence, we can detect timed out packets during fast
 	 * retransmit without falling to slow start.
 	 */
-	if (!tcp_is_reno(tp) && tcp_head_timedout(sk)) {
+	if (tcp_is_fack(tp) && tcp_head_timedout(sk)) {
 		struct sk_buff *skb;
 
 		skb = tp->scoreboard_skb_hint ? tp->scoreboard_skb_hint
@@ -2245,7 +2287,7 @@ static int tcp_try_undo_partial(struct sock *sk, int acked)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
 	/* Partial ACK arrived. Force Hoe's retransmit. */
-	int failed = tcp_is_reno(tp) || tp->fackets_out>tp->reordering;
+	int failed = tcp_is_reno(tp) || (tcp_fackets_out(tp) > tp->reordering);
 
 	if (tcp_may_undo(tp)) {
 		/* Plain luck! Hole if filled with delayed
@@ -2379,7 +2421,8 @@ tcp_fastretrans_alert(struct sock *sk, int pkts_acked, int flag)
 	struct tcp_sock *tp = tcp_sk(sk);
 	int is_dupack = !(flag&(FLAG_SND_UNA_ADVANCED|FLAG_NOT_DUP));
 	int do_lost = is_dupack || ((flag&FLAG_DATA_SACKED) &&
-				    (tp->fackets_out > tp->reordering));
+				    (tcp_fackets_out(tp) > tp->reordering));
+	int fast_rexmit = 0;
 
 	/* Some technical things:
 	 * 1. Reno does not count dupacks (sacked_out) automatically. */
@@ -2399,11 +2442,11 @@ tcp_fastretrans_alert(struct sock *sk, int pkts_acked, int flag)
 		return;
 
 	/* C. Process data loss notification, provided it is valid. */
-	if ((flag&FLAG_DATA_LOST) &&
+	if (tcp_is_fack(tp) && (flag & FLAG_DATA_LOST) &&
 	    before(tp->snd_una, tp->high_seq) &&
 	    icsk->icsk_ca_state != TCP_CA_Open &&
 	    tp->fackets_out > tp->reordering) {
-		tcp_mark_head_lost(sk, tp->fackets_out - tp->reordering);
+		tcp_mark_head_lost(sk, tp->fackets_out-tp->reordering, 0);
 		NET_INC_STATS_BH(LINUX_MIB_TCPLOSS);
 	}
 
@@ -2522,10 +2565,11 @@ tcp_fastretrans_alert(struct sock *sk, int pkts_acked, int flag)
 		tp->bytes_acked = 0;
 		tp->snd_cwnd_cnt = 0;
 		tcp_set_ca_state(sk, TCP_CA_Recovery);
+		fast_rexmit = 1;
 	}
 
-	if (do_lost || tcp_head_timedout(sk))
-		tcp_update_scoreboard(sk);
+	if (do_lost || (tcp_is_fack(tp) && tcp_head_timedout(sk)))
+		tcp_update_scoreboard(sk, fast_rexmit);
 	tcp_cwnd_down(sk, flag);
 	tcp_xmit_retransmit_queue(sk);
 }
-- 
cgit v1.2.3


From a47e5a988a575e64c8c9bae65a1dfe3dca7cba32 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= <ilpo.jarvinen@helsinki.fi>
Date: Thu, 15 Nov 2007 19:41:46 -0800
Subject: [TCP]: Convert highest_sack to sk_buff to allow direct access
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

It is going to replace the sack fastpath hint quite soon... :-)

Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@helsinki.fi>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp_input.c  | 12 ++++++------
 net/ipv4/tcp_output.c | 19 ++++++++++---------
 2 files changed, 16 insertions(+), 15 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index c0e8f2b1fa7..31294b52ad4 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -1245,7 +1245,7 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_
 	int num_sacks = (ptr[1] - TCPOLEN_SACK_BASE)>>3;
 	int reord = tp->packets_out;
 	int prior_fackets;
-	u32 highest_sack_end_seq = tp->lost_retrans_low;
+	u32 highest_sack_end_seq;
 	int flag = 0;
 	int found_dup_sack = 0;
 	int cached_fack_count;
@@ -1256,7 +1256,7 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_
 	if (!tp->sacked_out) {
 		if (WARN_ON(tp->fackets_out))
 			tp->fackets_out = 0;
-		tp->highest_sack = tp->snd_una;
+		tp->highest_sack = tcp_write_queue_head(sk);
 	}
 	prior_fackets = tp->fackets_out;
 
@@ -1483,10 +1483,9 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_
 				if (fack_count > tp->fackets_out)
 					tp->fackets_out = fack_count;
 
-				if (after(TCP_SKB_CB(skb)->seq, tp->highest_sack)) {
-					tp->highest_sack = TCP_SKB_CB(skb)->seq;
-					highest_sack_end_seq = TCP_SKB_CB(skb)->end_seq;
-				}
+				if (after(TCP_SKB_CB(skb)->seq, tcp_highest_sack_seq(tp)))
+					tp->highest_sack = skb;
+
 			} else {
 				if (dup_sack && (sacked&TCPCB_RETRANS))
 					reord = min(fack_count, reord);
@@ -1514,6 +1513,7 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_
 			flag &= ~FLAG_ONLY_ORIG_SACKED;
 	}
 
+	highest_sack_end_seq = TCP_SKB_CB(tp->highest_sack)->end_seq;
 	if (tcp_is_fack(tp) && tp->retrans_out &&
 	    after(highest_sack_end_seq, tp->lost_retrans_low) &&
 	    icsk->icsk_ca_state == TCP_CA_Recovery)
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index f4c1eef89af..ce506af5ce0 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -657,13 +657,15 @@ static void tcp_set_skb_tso_segs(struct sock *sk, struct sk_buff *skb, unsigned
  * tweak SACK fastpath hint too as it would overwrite all changes unless
  * hint is also changed.
  */
-static void tcp_adjust_fackets_out(struct tcp_sock *tp, struct sk_buff *skb,
+static void tcp_adjust_fackets_out(struct sock *sk, struct sk_buff *skb,
 				   int decr)
 {
+	struct tcp_sock *tp = tcp_sk(sk);
+
 	if (!tp->sacked_out || tcp_is_reno(tp))
 		return;
 
-	if (!before(tp->highest_sack, TCP_SKB_CB(skb)->seq))
+	if (!before(tcp_highest_sack_seq(tp), TCP_SKB_CB(skb)->seq))
 		tp->fackets_out -= decr;
 
 	/* cnt_hint is "off-by-one" compared with fackets_out (see sacktag) */
@@ -712,9 +714,8 @@ int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len, unsigned int mss
 	TCP_SKB_CB(buff)->end_seq = TCP_SKB_CB(skb)->end_seq;
 	TCP_SKB_CB(skb)->end_seq = TCP_SKB_CB(buff)->seq;
 
-	if (tcp_is_sack(tp) && tp->sacked_out &&
-	    (TCP_SKB_CB(skb)->seq == tp->highest_sack))
-		tp->highest_sack = TCP_SKB_CB(buff)->seq;
+	if (tcp_is_sack(tp) && tp->sacked_out && (skb == tp->highest_sack))
+		tp->highest_sack = buff;
 
 	/* PSH and FIN should only be set in the second packet. */
 	flags = TCP_SKB_CB(skb)->flags;
@@ -772,7 +773,7 @@ int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len, unsigned int mss
 			tcp_dec_pcount_approx_int(&tp->sacked_out, diff);
 			tcp_verify_left_out(tp);
 		}
-		tcp_adjust_fackets_out(tp, skb, diff);
+		tcp_adjust_fackets_out(sk, skb, diff);
 	}
 
 	/* Link BUFF into the send queue. */
@@ -1712,7 +1713,7 @@ static void tcp_retrans_try_collapse(struct sock *sk, struct sk_buff *skb, int m
 		       tcp_skb_pcount(next_skb) != 1);
 
 		if (WARN_ON(tcp_is_sack(tp) && tp->sacked_out &&
-		    (TCP_SKB_CB(next_skb)->seq == tp->highest_sack)))
+		    (next_skb == tp->highest_sack)))
 			return;
 
 		/* Ok.	We will be able to collapse the packet. */
@@ -1747,7 +1748,7 @@ static void tcp_retrans_try_collapse(struct sock *sk, struct sk_buff *skb, int m
 		if (tcp_is_reno(tp) && tp->sacked_out)
 			tcp_dec_pcount_approx(&tp->sacked_out, next_skb);
 
-		tcp_adjust_fackets_out(tp, next_skb, tcp_skb_pcount(next_skb));
+		tcp_adjust_fackets_out(sk, next_skb, tcp_skb_pcount(next_skb));
 		tp->packets_out -= tcp_skb_pcount(next_skb);
 
 		/* changed transmit queue under us so clear hints */
@@ -2028,7 +2029,7 @@ void tcp_xmit_retransmit_queue(struct sock *sk)
 			break;
 		tp->forward_skb_hint = skb;
 
-		if (after(TCP_SKB_CB(skb)->seq, tp->highest_sack))
+		if (after(TCP_SKB_CB(skb)->seq, tcp_highest_sack_seq(tp)))
 			break;
 
 		if (tcp_packets_in_flight(tp) >= tp->snd_cwnd)
-- 
cgit v1.2.3


From 9f58f3b721f52a4d3f497ea57f830ccd307f1d76 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= <ilpo.jarvinen@helsinki.fi>
Date: Thu, 15 Nov 2007 19:42:54 -0800
Subject: [TCP]: Make lost retrans detection more self-contained
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Highest_sack_end_seq is no longer calculated in the loop,
thus it can be pushed to the worker function altogether
making that function independent of the sacktag.

Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@helsinki.fi>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp_input.c | 20 +++++++++++---------
 1 file changed, 11 insertions(+), 9 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 31294b52ad4..7a2bfd85fff 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -1115,16 +1115,23 @@ static int tcp_is_sackblock_valid(struct tcp_sock *tp, int is_dsack,
  *
  * Search retransmitted skbs from write_queue that were sent when snd_nxt was
  * less than what is now known to be received by the other end (derived from
- * SACK blocks by the caller). Also calculate the lowest snd_nxt among the
- * remaining retransmitted skbs to avoid some costly processing per ACKs.
+ * highest SACK block). Also calculate the lowest snd_nxt among the remaining
+ * retransmitted skbs to avoid some costly processing per ACKs.
  */
-static int tcp_mark_lost_retrans(struct sock *sk, u32 received_upto)
+static int tcp_mark_lost_retrans(struct sock *sk)
 {
+	const struct inet_connection_sock *icsk = inet_csk(sk);
 	struct tcp_sock *tp = tcp_sk(sk);
 	struct sk_buff *skb;
 	int flag = 0;
 	int cnt = 0;
 	u32 new_low_seq = tp->snd_nxt;
+	u32 received_upto = TCP_SKB_CB(tp->highest_sack)->end_seq;
+
+	if (!tcp_is_fack(tp) || !tp->retrans_out ||
+	    !after(received_upto, tp->lost_retrans_low) ||
+	    icsk->icsk_ca_state != TCP_CA_Recovery)
+		return flag;
 
 	tcp_for_write_queue(skb, sk) {
 		u32 ack_seq = TCP_SKB_CB(skb)->ack_seq;
@@ -1245,7 +1252,6 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_
 	int num_sacks = (ptr[1] - TCPOLEN_SACK_BASE)>>3;
 	int reord = tp->packets_out;
 	int prior_fackets;
-	u32 highest_sack_end_seq;
 	int flag = 0;
 	int found_dup_sack = 0;
 	int cached_fack_count;
@@ -1513,11 +1519,7 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_
 			flag &= ~FLAG_ONLY_ORIG_SACKED;
 	}
 
-	highest_sack_end_seq = TCP_SKB_CB(tp->highest_sack)->end_seq;
-	if (tcp_is_fack(tp) && tp->retrans_out &&
-	    after(highest_sack_end_seq, tp->lost_retrans_low) &&
-	    icsk->icsk_ca_state == TCP_CA_Recovery)
-		flag |= tcp_mark_lost_retrans(sk, highest_sack_end_seq);
+	flag |= tcp_mark_lost_retrans(sk);
 
 	tcp_verify_left_out(tp);
 
-- 
cgit v1.2.3


From b7d4815f35ab1d0f1eef2521a94a7d4c789290a2 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= <ilpo.jarvinen@helsinki.fi>
Date: Thu, 15 Nov 2007 19:43:56 -0800
Subject: [TCP]: Prior_fackets can be replaced by highest_sack seq
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@helsinki.fi>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp_input.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 7a2bfd85fff..5e01ac2c003 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -1251,7 +1251,6 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_
 	struct sk_buff *cached_skb;
 	int num_sacks = (ptr[1] - TCPOLEN_SACK_BASE)>>3;
 	int reord = tp->packets_out;
-	int prior_fackets;
 	int flag = 0;
 	int found_dup_sack = 0;
 	int cached_fack_count;
@@ -1264,7 +1263,6 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_
 			tp->fackets_out = 0;
 		tp->highest_sack = tcp_write_queue_head(sk);
 	}
-	prior_fackets = tp->fackets_out;
 
 	found_dup_sack = tcp_check_dsack(tp, ack_skb, sp,
 					 num_sacks, prior_snd_una);
@@ -1457,7 +1455,8 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_
 						/* New sack for not retransmitted frame,
 						 * which was in hole. It is reordering.
 						 */
-						if (fack_count < prior_fackets)
+						if (before(TCP_SKB_CB(skb)->seq,
+							   tcp_highest_sack_seq(tp)))
 							reord = min(fack_count, reord);
 
 						/* SACK enhanced F-RTO (RFC4138; Appendix B) */
-- 
cgit v1.2.3


From 9e10c47cb9fe3154416787523f7a0df02133063f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= <ilpo.jarvinen@helsinki.fi>
Date: Thu, 15 Nov 2007 19:44:56 -0800
Subject: [TCP]: Create tcp_sacktag_one().
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Worker function that implements the main logic of
the inner-most loop of tcp_sacktag_write_queue().

Idea was originally presented by David S. Miller.

Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@helsinki.fi>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp_input.c | 192 +++++++++++++++++++++++++--------------------------
 1 file changed, 96 insertions(+), 96 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 5e01ac2c003..a62e0f90f56 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -1240,6 +1240,99 @@ static int tcp_match_skb_to_sack(struct sock *sk, struct sk_buff *skb,
 	return in_sack;
 }
 
+static int tcp_sacktag_one(struct sk_buff *skb, struct tcp_sock *tp,
+			   int *reord, int dup_sack, int fack_count)
+{
+	u8 sacked = TCP_SKB_CB(skb)->sacked;
+	int flag = 0;
+
+	/* Account D-SACK for retransmitted packet. */
+	if (dup_sack && (sacked & TCPCB_RETRANS)) {
+		if (after(TCP_SKB_CB(skb)->end_seq, tp->undo_marker))
+			tp->undo_retrans--;
+		if (!after(TCP_SKB_CB(skb)->end_seq, tp->snd_una) &&
+		    (sacked & TCPCB_SACKED_ACKED))
+			*reord = min(fack_count, *reord);
+	}
+
+	/* Nothing to do; acked frame is about to be dropped (was ACKed). */
+	if (!after(TCP_SKB_CB(skb)->end_seq, tp->snd_una))
+		return flag;
+
+	if (!(sacked & TCPCB_SACKED_ACKED)) {
+		if (sacked & TCPCB_SACKED_RETRANS) {
+			/* If the segment is not tagged as lost,
+			 * we do not clear RETRANS, believing
+			 * that retransmission is still in flight.
+			 */
+			if (sacked & TCPCB_LOST) {
+				TCP_SKB_CB(skb)->sacked &=
+					~(TCPCB_LOST|TCPCB_SACKED_RETRANS);
+				tp->lost_out -= tcp_skb_pcount(skb);
+				tp->retrans_out -= tcp_skb_pcount(skb);
+
+				/* clear lost hint */
+				tp->retransmit_skb_hint = NULL;
+			}
+		} else {
+			if (!(sacked & TCPCB_RETRANS)) {
+				/* New sack for not retransmitted frame,
+				 * which was in hole. It is reordering.
+				 */
+				if (before(TCP_SKB_CB(skb)->seq,
+					   tcp_highest_sack_seq(tp)))
+					*reord = min(fack_count, *reord);
+
+				/* SACK enhanced F-RTO (RFC4138; Appendix B) */
+				if (!after(TCP_SKB_CB(skb)->end_seq, tp->frto_highmark))
+					flag |= FLAG_ONLY_ORIG_SACKED;
+			}
+
+			if (sacked & TCPCB_LOST) {
+				TCP_SKB_CB(skb)->sacked &= ~TCPCB_LOST;
+				tp->lost_out -= tcp_skb_pcount(skb);
+
+				/* clear lost hint */
+				tp->retransmit_skb_hint = NULL;
+			}
+		}
+
+		TCP_SKB_CB(skb)->sacked |= TCPCB_SACKED_ACKED;
+		flag |= FLAG_DATA_SACKED;
+		tp->sacked_out += tcp_skb_pcount(skb);
+
+		fack_count += tcp_skb_pcount(skb);
+
+		/* Lost marker hint past SACKed? Tweak RFC3517 cnt */
+		if (!tcp_is_fack(tp) && (tp->lost_skb_hint != NULL) &&
+		    before(TCP_SKB_CB(skb)->seq,
+			   TCP_SKB_CB(tp->lost_skb_hint)->seq))
+			tp->lost_cnt_hint += tcp_skb_pcount(skb);
+
+		if (fack_count > tp->fackets_out)
+			tp->fackets_out = fack_count;
+
+		if (after(TCP_SKB_CB(skb)->seq, tcp_highest_sack_seq(tp)))
+			tp->highest_sack = skb;
+
+	} else {
+		if (dup_sack && (sacked & TCPCB_RETRANS))
+			*reord = min(fack_count, *reord);
+	}
+
+	/* D-SACK. We can detect redundant retransmission in S|R and plain R
+	 * frames and clear it. undo_retrans is decreased above, L|R frames
+	 * are accounted above as well.
+	 */
+	if (dup_sack && (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_RETRANS)) {
+		TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_RETRANS;
+		tp->retrans_out -= tcp_skb_pcount(skb);
+		tp->retransmit_skb_hint = NULL;
+	}
+
+	return flag;
+}
+
 static int
 tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_una)
 {
@@ -1375,7 +1468,6 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_
 
 		tcp_for_write_queue_from(skb, sk) {
 			int in_sack = 0;
-			u8 sacked;
 
 			if (skb == tcp_send_head(sk))
 				break;
@@ -1413,102 +1505,10 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_
 			if (unlikely(in_sack < 0))
 				break;
 
-			if (!in_sack) {
-				fack_count += tcp_skb_pcount(skb);
-				continue;
-			}
-
-			sacked = TCP_SKB_CB(skb)->sacked;
-
-			/* Account D-SACK for retransmitted packet. */
-			if (dup_sack && (sacked & TCPCB_RETRANS)) {
-				if (after(TCP_SKB_CB(skb)->end_seq, tp->undo_marker))
-					tp->undo_retrans--;
-				if (!after(TCP_SKB_CB(skb)->end_seq, tp->snd_una) &&
-				    (sacked & TCPCB_SACKED_ACKED))
-					reord = min(fack_count, reord);
-			}
-
-
-			/* Nothing to do; acked frame is about to be dropped (was ACKed). */
-			if (!after(TCP_SKB_CB(skb)->end_seq, tp->snd_una)) {
-				fack_count += tcp_skb_pcount(skb);
-				continue;
-			}
-
-			if (!(sacked&TCPCB_SACKED_ACKED)) {
-				if (sacked & TCPCB_SACKED_RETRANS) {
-					/* If the segment is not tagged as lost,
-					 * we do not clear RETRANS, believing
-					 * that retransmission is still in flight.
-					 */
-					if (sacked & TCPCB_LOST) {
-						TCP_SKB_CB(skb)->sacked &= ~(TCPCB_LOST|TCPCB_SACKED_RETRANS);
-						tp->lost_out -= tcp_skb_pcount(skb);
-						tp->retrans_out -= tcp_skb_pcount(skb);
-
-						/* clear lost hint */
-						tp->retransmit_skb_hint = NULL;
-					}
-				} else {
-					if (!(sacked & TCPCB_RETRANS)) {
-						/* New sack for not retransmitted frame,
-						 * which was in hole. It is reordering.
-						 */
-						if (before(TCP_SKB_CB(skb)->seq,
-							   tcp_highest_sack_seq(tp)))
-							reord = min(fack_count, reord);
-
-						/* SACK enhanced F-RTO (RFC4138; Appendix B) */
-						if (!after(TCP_SKB_CB(skb)->end_seq, tp->frto_highmark))
-							flag |= FLAG_ONLY_ORIG_SACKED;
-					}
-
-					if (sacked & TCPCB_LOST) {
-						TCP_SKB_CB(skb)->sacked &= ~TCPCB_LOST;
-						tp->lost_out -= tcp_skb_pcount(skb);
-
-						/* clear lost hint */
-						tp->retransmit_skb_hint = NULL;
-					}
-				}
-
-				TCP_SKB_CB(skb)->sacked |= TCPCB_SACKED_ACKED;
-				flag |= FLAG_DATA_SACKED;
-				tp->sacked_out += tcp_skb_pcount(skb);
-
-				fack_count += tcp_skb_pcount(skb);
-
-				/* Lost marker hint past SACKed? Tweak RFC3517 cnt */
-				if (!tcp_is_fack(tp) && (tp->lost_skb_hint != NULL) &&
-				    before(TCP_SKB_CB(skb)->seq,
-					   TCP_SKB_CB(tp->lost_skb_hint)->seq))
-					tp->lost_cnt_hint += tcp_skb_pcount(skb);
-
-				if (fack_count > tp->fackets_out)
-					tp->fackets_out = fack_count;
+			if (in_sack)
+				flag |= tcp_sacktag_one(skb, tp, &reord, dup_sack, fack_count);
 
-				if (after(TCP_SKB_CB(skb)->seq, tcp_highest_sack_seq(tp)))
-					tp->highest_sack = skb;
-
-			} else {
-				if (dup_sack && (sacked&TCPCB_RETRANS))
-					reord = min(fack_count, reord);
-
-				fack_count += tcp_skb_pcount(skb);
-			}
-
-			/* D-SACK. We can detect redundant retransmission
-			 * in S|R and plain R frames and clear it.
-			 * undo_retrans is decreased above, L|R frames
-			 * are accounted above as well.
-			 */
-			if (dup_sack &&
-			    (TCP_SKB_CB(skb)->sacked&TCPCB_SACKED_RETRANS)) {
-				TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_RETRANS;
-				tp->retrans_out -= tcp_skb_pcount(skb);
-				tp->retransmit_skb_hint = NULL;
-			}
+			fack_count += tcp_skb_pcount(skb);
 		}
 
 		/* SACK enhanced FRTO (RFC4138, Appendix B): Clearing correct
-- 
cgit v1.2.3


From fd6dad616d4fe2f08d690f25ca76b0102158fb3a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= <ilpo.jarvinen@helsinki.fi>
Date: Thu, 15 Nov 2007 19:49:47 -0800
Subject: [TCP]: Earlier SACK block verification & simplify access to them
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@helsinki.fi>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp_input.c | 85 +++++++++++++++++++++++++++++++---------------------
 1 file changed, 51 insertions(+), 34 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index a62e0f90f56..a287747e9dd 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -1340,9 +1340,11 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_
 	struct tcp_sock *tp = tcp_sk(sk);
 	unsigned char *ptr = (skb_transport_header(ack_skb) +
 			      TCP_SKB_CB(ack_skb)->sacked);
-	struct tcp_sack_block_wire *sp = (struct tcp_sack_block_wire *)(ptr+2);
+	struct tcp_sack_block_wire *sp_wire = (struct tcp_sack_block_wire *)(ptr+2);
+	struct tcp_sack_block sp[4];
 	struct sk_buff *cached_skb;
 	int num_sacks = (ptr[1] - TCPOLEN_SACK_BASE)>>3;
+	int used_sacks;
 	int reord = tp->packets_out;
 	int flag = 0;
 	int found_dup_sack = 0;
@@ -1357,7 +1359,7 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_
 		tp->highest_sack = tcp_write_queue_head(sk);
 	}
 
-	found_dup_sack = tcp_check_dsack(tp, ack_skb, sp,
+	found_dup_sack = tcp_check_dsack(tp, ack_skb, sp_wire,
 					 num_sacks, prior_snd_una);
 	if (found_dup_sack)
 		flag |= FLAG_DSACKING_ACK;
@@ -1372,14 +1374,49 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_
 	if (!tp->packets_out)
 		goto out;
 
+	used_sacks = 0;
+	first_sack_index = 0;
+	for (i = 0; i < num_sacks; i++) {
+		int dup_sack = !i && found_dup_sack;
+
+		sp[used_sacks].start_seq = ntohl(get_unaligned(&sp_wire[i].start_seq));
+		sp[used_sacks].end_seq = ntohl(get_unaligned(&sp_wire[i].end_seq));
+
+		if (!tcp_is_sackblock_valid(tp, dup_sack,
+					    sp[used_sacks].start_seq,
+					    sp[used_sacks].end_seq)) {
+			if (dup_sack) {
+				if (!tp->undo_marker)
+					NET_INC_STATS_BH(LINUX_MIB_TCPDSACKIGNOREDNOUNDO);
+				else
+					NET_INC_STATS_BH(LINUX_MIB_TCPDSACKIGNOREDOLD);
+			} else {
+				/* Don't count olds caused by ACK reordering */
+				if ((TCP_SKB_CB(ack_skb)->ack_seq != tp->snd_una) &&
+				    !after(sp[used_sacks].end_seq, tp->snd_una))
+					continue;
+				NET_INC_STATS_BH(LINUX_MIB_TCPSACKDISCARD);
+			}
+			if (i == 0)
+				first_sack_index = -1;
+			continue;
+		}
+
+		/* Ignore very old stuff early */
+		if (!after(sp[used_sacks].end_seq, prior_snd_una))
+			continue;
+
+		used_sacks++;
+	}
+
 	/* SACK fastpath:
 	 * if the only SACK change is the increase of the end_seq of
 	 * the first block then only apply that SACK block
 	 * and use retrans queue hinting otherwise slowpath */
 	force_one_sack = 1;
-	for (i = 0; i < num_sacks; i++) {
-		__be32 start_seq = sp[i].start_seq;
-		__be32 end_seq = sp[i].end_seq;
+	for (i = 0; i < used_sacks; i++) {
+		u32 start_seq = sp[i].start_seq;
+		u32 end_seq = sp[i].end_seq;
 
 		if (i == 0) {
 			if (tp->recv_sack_cache[i].start_seq != start_seq)
@@ -1398,19 +1435,17 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_
 		tp->recv_sack_cache[i].end_seq = 0;
 	}
 
-	first_sack_index = 0;
 	if (force_one_sack)
-		num_sacks = 1;
+		used_sacks = 1;
 	else {
 		int j;
 		tp->fastpath_skb_hint = NULL;
 
 		/* order SACK blocks to allow in order walk of the retrans queue */
-		for (i = num_sacks-1; i > 0; i--) {
+		for (i = used_sacks - 1; i > 0; i--) {
 			for (j = 0; j < i; j++){
-				if (after(ntohl(sp[j].start_seq),
-					  ntohl(sp[j+1].start_seq))){
-					struct tcp_sack_block_wire tmp;
+				if (after(sp[j].start_seq, sp[j+1].start_seq)) {
+					struct tcp_sack_block tmp;
 
 					tmp = sp[j];
 					sp[j] = sp[j+1];
@@ -1433,32 +1468,14 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_
 		cached_fack_count = 0;
 	}
 
-	for (i = 0; i < num_sacks; i++) {
+	for (i = 0; i < used_sacks; i++) {
 		struct sk_buff *skb;
-		__u32 start_seq = ntohl(sp->start_seq);
-		__u32 end_seq = ntohl(sp->end_seq);
+		u32 start_seq = sp[i].start_seq;
+		u32 end_seq = sp[i].end_seq;
 		int fack_count;
 		int dup_sack = (found_dup_sack && (i == first_sack_index));
 		int next_dup = (found_dup_sack && (i+1 == first_sack_index));
 
-		sp++;
-
-		if (!tcp_is_sackblock_valid(tp, dup_sack, start_seq, end_seq)) {
-			if (dup_sack) {
-				if (!tp->undo_marker)
-					NET_INC_STATS_BH(LINUX_MIB_TCPDSACKIGNOREDNOUNDO);
-				else
-					NET_INC_STATS_BH(LINUX_MIB_TCPDSACKIGNOREDOLD);
-			} else {
-				/* Don't count olds caused by ACK reordering */
-				if ((TCP_SKB_CB(ack_skb)->ack_seq != tp->snd_una) &&
-				    !after(end_seq, tp->snd_una))
-					continue;
-				NET_INC_STATS_BH(LINUX_MIB_TCPSACKDISCARD);
-			}
-			continue;
-		}
-
 		skb = cached_skb;
 		fack_count = cached_fack_count;
 
@@ -1489,8 +1506,8 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_
 
 			/* Due to sorting DSACK may reside within this SACK block! */
 			if (next_dup) {
-				u32 dup_start = ntohl(sp->start_seq);
-				u32 dup_end = ntohl(sp->end_seq);
+				u32 dup_start = sp[i+1].start_seq;
+				u32 dup_end = sp[i+1].end_seq;
 
 				if (before(TCP_SKB_CB(skb)->seq, dup_end)) {
 					in_sack = tcp_match_skb_to_sack(sk, skb, dup_start, dup_end);
-- 
cgit v1.2.3


From 68f8353b480e5f2e136c38a511abdbb88eaa8ce2 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= <ilpo.jarvinen@helsinki.fi>
Date: Thu, 15 Nov 2007 19:50:37 -0800
Subject: [TCP]: Rewrite SACK block processing & sack_recv_cache use
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Key points of this patch are:

  - In case new SACK information is advance only type, no skb
    processing below previously discovered highest point is done
  - Optimize cases below highest point too since there's no need
    to always go up to highest point (which is very likely still
    present in that SACK), this is not entirely true though
    because I'm dropping the fastpath_skb_hint which could
    previously optimize those cases even better. Whether that's
    significant, I'm not too sure.

Currently it will provide skipping by walking. Combined with
RB-tree, all skipping would become fast too regardless of window
size (can be done incrementally later).

Previously a number of cases in TCP SACK processing fails to
take advantage of costly stored information in sack_recv_cache,
most importantly, expected events such as cumulative ACK and new
hole ACKs. Processing on such ACKs result in rather long walks
building up latencies (which easily gets nasty when window is
huge). Those latencies are often completely unnecessary
compared with the amount of _new_ information received, usually
for cumulative ACK there's no new information at all, yet TCP
walks whole queue unnecessary potentially taking a number of
costly cache misses on the way, etc.!

Since the inclusion of highest_sack, there's a lot information
that is very likely redundant (SACK fastpath hint stuff,
fackets_out, highest_sack), though there's no ultimate guarantee
that they'll remain the same whole the time (in all unearthly
scenarios). Take advantage of this knowledge here and drop
fastpath hint and use direct access to highest SACKed skb as
a replacement.

Effectively "special cased" fastpath is dropped. This change
adds some complexity to introduce better coveraged "fastpath",
though the added complexity should make TCP behave more cache
friendly.

The current ACK's SACK blocks are compared against each cached
block individially and only ranges that are new are then scanned
by the high constant walk. For other parts of write queue, even
when in previously known part of the SACK blocks, a faster skip
function is used (if necessary at all). In addition, whenever
possible, TCP fast-forwards to highest_sack skb that was made
available by an earlier patch. In typical case, no other things
but this fast-forward and mandatory markings after that occur
making the access pattern quite similar to the former fastpath
"special case".

DSACKs are special case that must always be walked.

The local to recv_sack_cache copying could be more intelligent
w.r.t DSACKs which are likely to be there only once but that
is left to a separate patch.

Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@helsinki.fi>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp_input.c  | 271 +++++++++++++++++++++++++++++++-------------------
 net/ipv4/tcp_output.c |  14 +--
 2 files changed, 172 insertions(+), 113 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index a287747e9dd..3ad6a19ad30 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -1333,6 +1333,88 @@ static int tcp_sacktag_one(struct sk_buff *skb, struct tcp_sock *tp,
 	return flag;
 }
 
+static struct sk_buff *tcp_sacktag_walk(struct sk_buff *skb, struct sock *sk,
+					struct tcp_sack_block *next_dup,
+					u32 start_seq, u32 end_seq,
+					int dup_sack_in, int *fack_count,
+					int *reord, int *flag)
+{
+	struct tcp_sock *tp = tcp_sk(sk);
+
+	tcp_for_write_queue_from(skb, sk) {
+		int in_sack = 0;
+		int dup_sack = dup_sack_in;
+
+		if (skb == tcp_send_head(sk))
+			break;
+
+		/* queue is in-order => we can short-circuit the walk early */
+		if (!before(TCP_SKB_CB(skb)->seq, end_seq))
+			break;
+
+		if ((next_dup != NULL) &&
+		    before(TCP_SKB_CB(skb)->seq, next_dup->end_seq)) {
+			in_sack = tcp_match_skb_to_sack(sk, skb,
+							next_dup->start_seq,
+							next_dup->end_seq);
+			if (in_sack > 0)
+				dup_sack = 1;
+		}
+
+		if (in_sack <= 0)
+			in_sack = tcp_match_skb_to_sack(sk, skb, start_seq, end_seq);
+		if (unlikely(in_sack < 0))
+			break;
+
+		if (in_sack)
+			*flag |= tcp_sacktag_one(skb, tp, reord, dup_sack, *fack_count);
+
+		*fack_count += tcp_skb_pcount(skb);
+	}
+	return skb;
+}
+
+/* Avoid all extra work that is being done by sacktag while walking in
+ * a normal way
+ */
+static struct sk_buff *tcp_sacktag_skip(struct sk_buff *skb, struct sock *sk,
+					u32 skip_to_seq)
+{
+	tcp_for_write_queue_from(skb, sk) {
+		if (skb == tcp_send_head(sk))
+			break;
+
+		if (before(TCP_SKB_CB(skb)->end_seq, skip_to_seq))
+			break;
+	}
+	return skb;
+}
+
+static struct sk_buff *tcp_maybe_skipping_dsack(struct sk_buff *skb,
+						struct sock *sk,
+						struct tcp_sack_block *next_dup,
+						u32 skip_to_seq,
+						int *fack_count, int *reord,
+						int *flag)
+{
+	if (next_dup == NULL)
+		return skb;
+
+	if (before(next_dup->start_seq, skip_to_seq)) {
+		skb = tcp_sacktag_skip(skb, sk, next_dup->start_seq);
+		tcp_sacktag_walk(skb, sk, NULL,
+				 next_dup->start_seq, next_dup->end_seq,
+				 1, fack_count, reord, flag);
+	}
+
+	return skb;
+}
+
+static int tcp_sack_cache_ok(struct tcp_sock *tp, struct tcp_sack_block *cache)
+{
+	return cache < tp->recv_sack_cache + ARRAY_SIZE(tp->recv_sack_cache);
+}
+
 static int
 tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_una)
 {
@@ -1342,16 +1424,16 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_
 			      TCP_SKB_CB(ack_skb)->sacked);
 	struct tcp_sack_block_wire *sp_wire = (struct tcp_sack_block_wire *)(ptr+2);
 	struct tcp_sack_block sp[4];
-	struct sk_buff *cached_skb;
+	struct tcp_sack_block *cache;
+	struct sk_buff *skb;
 	int num_sacks = (ptr[1] - TCPOLEN_SACK_BASE)>>3;
 	int used_sacks;
 	int reord = tp->packets_out;
 	int flag = 0;
 	int found_dup_sack = 0;
-	int cached_fack_count;
-	int i;
+	int fack_count;
+	int i, j;
 	int first_sack_index;
-	int force_one_sack;
 
 	if (!tp->sacked_out) {
 		if (WARN_ON(tp->fackets_out))
@@ -1409,132 +1491,123 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_
 		used_sacks++;
 	}
 
-	/* SACK fastpath:
-	 * if the only SACK change is the increase of the end_seq of
-	 * the first block then only apply that SACK block
-	 * and use retrans queue hinting otherwise slowpath */
-	force_one_sack = 1;
-	for (i = 0; i < used_sacks; i++) {
-		u32 start_seq = sp[i].start_seq;
-		u32 end_seq = sp[i].end_seq;
-
-		if (i == 0) {
-			if (tp->recv_sack_cache[i].start_seq != start_seq)
-				force_one_sack = 0;
-		} else {
-			if ((tp->recv_sack_cache[i].start_seq != start_seq) ||
-			    (tp->recv_sack_cache[i].end_seq != end_seq))
-				force_one_sack = 0;
-		}
-		tp->recv_sack_cache[i].start_seq = start_seq;
-		tp->recv_sack_cache[i].end_seq = end_seq;
-	}
-	/* Clear the rest of the cache sack blocks so they won't match mistakenly. */
-	for (; i < ARRAY_SIZE(tp->recv_sack_cache); i++) {
-		tp->recv_sack_cache[i].start_seq = 0;
-		tp->recv_sack_cache[i].end_seq = 0;
-	}
+	/* order SACK blocks to allow in order walk of the retrans queue */
+	for (i = used_sacks - 1; i > 0; i--) {
+		for (j = 0; j < i; j++){
+			if (after(sp[j].start_seq, sp[j+1].start_seq)) {
+				struct tcp_sack_block tmp;
 
-	if (force_one_sack)
-		used_sacks = 1;
-	else {
-		int j;
-		tp->fastpath_skb_hint = NULL;
-
-		/* order SACK blocks to allow in order walk of the retrans queue */
-		for (i = used_sacks - 1; i > 0; i--) {
-			for (j = 0; j < i; j++){
-				if (after(sp[j].start_seq, sp[j+1].start_seq)) {
-					struct tcp_sack_block tmp;
-
-					tmp = sp[j];
-					sp[j] = sp[j+1];
-					sp[j+1] = tmp;
-
-					/* Track where the first SACK block goes to */
-					if (j == first_sack_index)
-						first_sack_index = j+1;
-				}
+				tmp = sp[j];
+				sp[j] = sp[j+1];
+				sp[j+1] = tmp;
 
+				/* Track where the first SACK block goes to */
+				if (j == first_sack_index)
+					first_sack_index = j+1;
 			}
 		}
 	}
 
-	/* Use SACK fastpath hint if valid */
-	cached_skb = tp->fastpath_skb_hint;
-	cached_fack_count = tp->fastpath_cnt_hint;
-	if (!cached_skb) {
-		cached_skb = tcp_write_queue_head(sk);
-		cached_fack_count = 0;
+	skb = tcp_write_queue_head(sk);
+	fack_count = 0;
+	i = 0;
+
+	if (!tp->sacked_out) {
+		/* It's already past, so skip checking against it */
+		cache = tp->recv_sack_cache + ARRAY_SIZE(tp->recv_sack_cache);
+	} else {
+		cache = tp->recv_sack_cache;
+		/* Skip empty blocks in at head of the cache */
+		while (tcp_sack_cache_ok(tp, cache) && !cache->start_seq &&
+		       !cache->end_seq)
+			cache++;
 	}
 
-	for (i = 0; i < used_sacks; i++) {
-		struct sk_buff *skb;
+	while (i < used_sacks) {
 		u32 start_seq = sp[i].start_seq;
 		u32 end_seq = sp[i].end_seq;
-		int fack_count;
 		int dup_sack = (found_dup_sack && (i == first_sack_index));
-		int next_dup = (found_dup_sack && (i+1 == first_sack_index));
+		struct tcp_sack_block *next_dup = NULL;
 
-		skb = cached_skb;
-		fack_count = cached_fack_count;
+		if (found_dup_sack && ((i + 1) == first_sack_index))
+			next_dup = &sp[i + 1];
 
 		/* Event "B" in the comment above. */
 		if (after(end_seq, tp->high_seq))
 			flag |= FLAG_DATA_LOST;
 
-		tcp_for_write_queue_from(skb, sk) {
-			int in_sack = 0;
-
-			if (skb == tcp_send_head(sk))
-				break;
-
-			cached_skb = skb;
-			cached_fack_count = fack_count;
-			if (i == first_sack_index) {
-				tp->fastpath_skb_hint = skb;
-				tp->fastpath_cnt_hint = fack_count;
+		/* Skip too early cached blocks */
+		while (tcp_sack_cache_ok(tp, cache) &&
+		       !before(start_seq, cache->end_seq))
+			cache++;
+
+		/* Can skip some work by looking recv_sack_cache? */
+		if (tcp_sack_cache_ok(tp, cache) && !dup_sack &&
+		    after(end_seq, cache->start_seq)) {
+
+			/* Head todo? */
+			if (before(start_seq, cache->start_seq)) {
+				skb = tcp_sacktag_skip(skb, sk, start_seq);
+				skb = tcp_sacktag_walk(skb, sk, next_dup, start_seq,
+						       cache->start_seq, dup_sack,
+						       &fack_count, &reord, &flag);
 			}
 
-			/* The retransmission queue is always in order, so
-			 * we can short-circuit the walk early.
-			 */
-			if (!before(TCP_SKB_CB(skb)->seq, end_seq))
-				break;
-
-			dup_sack = (found_dup_sack && (i == first_sack_index));
+			/* Rest of the block already fully processed? */
+			if (!after(end_seq, cache->end_seq)) {
+				skb = tcp_maybe_skipping_dsack(skb, sk, next_dup, cache->end_seq,
+							       &fack_count, &reord, &flag);
+				goto advance_sp;
+			}
 
-			/* Due to sorting DSACK may reside within this SACK block! */
-			if (next_dup) {
-				u32 dup_start = sp[i+1].start_seq;
-				u32 dup_end = sp[i+1].end_seq;
+			/* ...tail remains todo... */
+			if (TCP_SKB_CB(tp->highest_sack)->end_seq == cache->end_seq) {
+				/* ...but better entrypoint exists! Check that DSACKs are
+				 * properly accounted while skipping here
+				 */
+				tcp_maybe_skipping_dsack(skb, sk, next_dup, cache->end_seq,
+							 &fack_count, &reord, &flag);
 
-				if (before(TCP_SKB_CB(skb)->seq, dup_end)) {
-					in_sack = tcp_match_skb_to_sack(sk, skb, dup_start, dup_end);
-					if (in_sack > 0)
-						dup_sack = 1;
-				}
+				skb = tcp_write_queue_next(sk, tp->highest_sack);
+				fack_count = tp->fackets_out;
+				cache++;
+				goto walk;
 			}
 
-			/* DSACK info lost if out-of-mem, try SACK still */
-			if (in_sack <= 0)
-				in_sack = tcp_match_skb_to_sack(sk, skb, start_seq, end_seq);
-			if (unlikely(in_sack < 0))
-				break;
-
-			if (in_sack)
-				flag |= tcp_sacktag_one(skb, tp, &reord, dup_sack, fack_count);
+			skb = tcp_sacktag_skip(skb, sk, cache->end_seq);
+			/* Check overlap against next cached too (past this one already) */
+			cache++;
+			continue;
+		}
 
-			fack_count += tcp_skb_pcount(skb);
+		if (!before(start_seq, tcp_highest_sack_seq(tp))) {
+			skb = tcp_write_queue_next(sk, tp->highest_sack);
+			fack_count = tp->fackets_out;
 		}
+		skb = tcp_sacktag_skip(skb, sk, start_seq);
+
+walk:
+		skb = tcp_sacktag_walk(skb, sk, next_dup, start_seq, end_seq,
+				       dup_sack, &fack_count, &reord, &flag);
 
+advance_sp:
 		/* SACK enhanced FRTO (RFC4138, Appendix B): Clearing correct
 		 * due to in-order walk
 		 */
 		if (after(end_seq, tp->frto_highmark))
 			flag &= ~FLAG_ONLY_ORIG_SACKED;
+
+		i++;
 	}
 
+	/* Clear the head of the cache sack blocks so we can skip it next time */
+	for (i = 0; i < ARRAY_SIZE(tp->recv_sack_cache) - used_sacks; i++) {
+		tp->recv_sack_cache[i].start_seq = 0;
+		tp->recv_sack_cache[i].end_seq = 0;
+	}
+	for (j = 0; j < used_sacks; j++)
+		tp->recv_sack_cache[i++] = sp[j];
+
 	flag |= tcp_mark_lost_retrans(sk);
 
 	tcp_verify_left_out(tp);
@@ -2821,9 +2894,7 @@ static int tcp_clean_rtx_queue(struct sock *sk, s32 *seq_rtt_p,
 		}
 
 		tp->fackets_out -= min(pkts_acked, tp->fackets_out);
-		/* hint's skb might be NULL but we don't need to care */
-		tp->fastpath_cnt_hint -= min_t(u32, pkts_acked,
-					       tp->fastpath_cnt_hint);
+
 		if (ca_ops->pkts_acked) {
 			s32 rtt_us = -1;
 
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index ce506af5ce0..030fc69ea21 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -653,9 +653,7 @@ static void tcp_set_skb_tso_segs(struct sock *sk, struct sk_buff *skb, unsigned
 }
 
 /* When a modification to fackets out becomes necessary, we need to check
- * skb is counted to fackets_out or not. Another important thing is to
- * tweak SACK fastpath hint too as it would overwrite all changes unless
- * hint is also changed.
+ * skb is counted to fackets_out or not.
  */
 static void tcp_adjust_fackets_out(struct sock *sk, struct sk_buff *skb,
 				   int decr)
@@ -667,11 +665,6 @@ static void tcp_adjust_fackets_out(struct sock *sk, struct sk_buff *skb,
 
 	if (!before(tcp_highest_sack_seq(tp), TCP_SKB_CB(skb)->seq))
 		tp->fackets_out -= decr;
-
-	/* cnt_hint is "off-by-one" compared with fackets_out (see sacktag) */
-	if (tp->fastpath_skb_hint != NULL &&
-	    after(TCP_SKB_CB(tp->fastpath_skb_hint)->seq, TCP_SKB_CB(skb)->seq))
-		tp->fastpath_cnt_hint -= decr;
 }
 
 /* Function to create two new TCP segments.  Shrinks the given segment
@@ -1753,11 +1746,6 @@ static void tcp_retrans_try_collapse(struct sock *sk, struct sk_buff *skb, int m
 
 		/* changed transmit queue under us so clear hints */
 		tcp_clear_retrans_hints_partial(tp);
-		/* manually tune sacktag skb hint */
-		if (tp->fastpath_skb_hint == next_skb) {
-			tp->fastpath_skb_hint = skb;
-			tp->fastpath_cnt_hint -= tcp_skb_pcount(skb);
-		}
 
 		sk_stream_free_skb(sk, next_skb);
 	}
-- 
cgit v1.2.3


From 8dbde28d9711475adfe0e9c88505e38743cdc2a7 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <dada1@cosmosbay.com>
Date: Fri, 16 Nov 2007 03:32:10 -0800
Subject: [NET]: NET_CLS_ROUTE : convert ip_rt_acct to per_cpu variables

ip_rt_acct needs 4096 bytes per cpu to perform some accounting.
It is actually allocated as a single huge array [4096*NR_CPUS]
(rounded up to a power of two)

Converting it to a per cpu variable is wanted to :
 - Save space on machines were num_possible_cpus() < NR_CPUS
 - Better NUMA placement (each cpu gets memory on its node)

Signed-off-by: Eric Dumazet <dada1@cosmosbay.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/ip_input.c |  2 +-
 net/ipv4/route.c    | 15 +++------------
 2 files changed, 4 insertions(+), 13 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c
index 5b8a7603e60..4068e178d74 100644
--- a/net/ipv4/ip_input.c
+++ b/net/ipv4/ip_input.c
@@ -347,7 +347,7 @@ static int ip_rcv_finish(struct sk_buff *skb)
 
 #ifdef CONFIG_NET_CLS_ROUTE
 	if (unlikely(skb->dst->tclassid)) {
-		struct ip_rt_acct *st = ip_rt_acct + 256*smp_processor_id();
+		struct ip_rt_acct *st = per_cpu_ptr(ip_rt_acct, smp_processor_id());
 		u32 idx = skb->dst->tclassid;
 		st[idx&0xFF].o_packets++;
 		st[idx&0xFF].o_bytes+=skb->len;
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 94ef788a2ac..a21021bf140 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -2858,12 +2858,10 @@ ctl_table ipv4_route_table[] = {
 #endif
 
 #ifdef CONFIG_NET_CLS_ROUTE
-struct ip_rt_acct *ip_rt_acct;
-
-/* This code sucks.  But you should have seen it before! --RR */
+struct ip_rt_acct *ip_rt_acct __read_mostly;
 
 /* IP route accounting ptr for this logical cpu number. */
-#define IP_RT_ACCT_CPU(i) (ip_rt_acct + i * 256)
+#define IP_RT_ACCT_CPU(cpu) (per_cpu_ptr(ip_rt_acct, cpu))
 
 #ifdef CONFIG_PROC_FS
 static int ip_rt_acct_read(char *buffer, char **start, off_t offset,
@@ -2923,16 +2921,9 @@ int __init ip_rt_init(void)
 			     (jiffies ^ (jiffies >> 7)));
 
 #ifdef CONFIG_NET_CLS_ROUTE
-	{
-	int order;
-	for (order = 0;
-	     (PAGE_SIZE << order) < 256 * sizeof(struct ip_rt_acct) * NR_CPUS; order++)
-		/* NOTHING */;
-	ip_rt_acct = (struct ip_rt_acct *)__get_free_pages(GFP_KERNEL, order);
+	ip_rt_acct = __alloc_percpu(256 * sizeof(struct ip_rt_acct));
 	if (!ip_rt_acct)
 		panic("IP: failed to allocate ip_rt_acct\n");
-	memset(ip_rt_acct, 0, PAGE_SIZE << order);
-	}
 #endif
 
 	ipv4_dst_ops.kmem_cachep =
-- 
cgit v1.2.3


From cd05acfe65ed2cf2db683fa9a6adb8d35635263b Mon Sep 17 00:00:00 2001
From: Oliver Hartkopp <oliver.hartkopp@volkswagen.de>
Date: Sun, 16 Dec 2007 15:59:24 -0800
Subject: [CAN]: Allocate protocol numbers for PF_CAN

This patch adds a protocol/address family number, ARP hardware type,
ethernet packet type, and a line discipline number for the SocketCAN
implementation.

Signed-off-by: Oliver Hartkopp <oliver.hartkopp@volkswagen.de>
Signed-off-by: Urs Thuermann <urs.thuermann@volkswagen.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/sock.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/core/sock.c b/net/core/sock.c
index 98b243a5b32..c9305a86176 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -154,7 +154,7 @@ static const char *af_family_key_strings[AF_MAX+1] = {
   "sk_lock-AF_ASH"   , "sk_lock-AF_ECONET"   , "sk_lock-AF_ATMSVC"   ,
   "sk_lock-21"       , "sk_lock-AF_SNA"      , "sk_lock-AF_IRDA"     ,
   "sk_lock-AF_PPPOX" , "sk_lock-AF_WANPIPE"  , "sk_lock-AF_LLC"      ,
-  "sk_lock-27"       , "sk_lock-28"          , "sk_lock-29"          ,
+  "sk_lock-27"       , "sk_lock-28"          , "sk_lock-AF_CAN"      ,
   "sk_lock-AF_TIPC"  , "sk_lock-AF_BLUETOOTH", "sk_lock-IUCV"        ,
   "sk_lock-AF_RXRPC" , "sk_lock-AF_MAX"
 };
@@ -168,7 +168,7 @@ static const char *af_family_slock_key_strings[AF_MAX+1] = {
   "slock-AF_ASH"   , "slock-AF_ECONET"   , "slock-AF_ATMSVC"   ,
   "slock-21"       , "slock-AF_SNA"      , "slock-AF_IRDA"     ,
   "slock-AF_PPPOX" , "slock-AF_WANPIPE"  , "slock-AF_LLC"      ,
-  "slock-27"       , "slock-28"          , "slock-29"          ,
+  "slock-27"       , "slock-28"          , "slock-AF_CAN"      ,
   "slock-AF_TIPC"  , "slock-AF_BLUETOOTH", "slock-AF_IUCV"     ,
   "slock-AF_RXRPC" , "slock-AF_MAX"
 };
-- 
cgit v1.2.3


From 0d66548a10cbbe0ef256852d63d30603f0f73f9b Mon Sep 17 00:00:00 2001
From: Oliver Hartkopp <oliver.hartkopp@volkswagen.de>
Date: Fri, 16 Nov 2007 15:52:17 -0800
Subject: [CAN]: Add PF_CAN core module

This patch adds the CAN core functionality but no protocols or drivers.
No protocol implementations are included here.  They come as separate
patches.  Protocol numbers are already in include/linux/can.h.

Signed-off-by: Oliver Hartkopp <oliver.hartkopp@volkswagen.de>
Signed-off-by: Urs Thuermann <urs.thuermann@volkswagen.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/Kconfig      |   1 +
 net/Makefile     |   1 +
 net/can/Kconfig  |  17 ++
 net/can/Makefile |   6 +
 net/can/af_can.c | 861 +++++++++++++++++++++++++++++++++++++++++++++++++++++++
 net/can/af_can.h | 122 ++++++++
 net/can/proc.c   | 533 ++++++++++++++++++++++++++++++++++
 7 files changed, 1541 insertions(+)
 create mode 100644 net/can/Kconfig
 create mode 100644 net/can/Makefile
 create mode 100644 net/can/af_can.c
 create mode 100644 net/can/af_can.h
 create mode 100644 net/can/proc.c

(limited to 'net')

diff --git a/net/Kconfig b/net/Kconfig
index ab4e6da5012..58ed2f4199d 100644
--- a/net/Kconfig
+++ b/net/Kconfig
@@ -218,6 +218,7 @@ endmenu
 endmenu
 
 source "net/ax25/Kconfig"
+source "net/can/Kconfig"
 source "net/irda/Kconfig"
 source "net/bluetooth/Kconfig"
 source "net/rxrpc/Kconfig"
diff --git a/net/Makefile b/net/Makefile
index bbe7d2a4148..b7a13643b54 100644
--- a/net/Makefile
+++ b/net/Makefile
@@ -34,6 +34,7 @@ obj-$(CONFIG_LAPB)		+= lapb/
 obj-$(CONFIG_NETROM)		+= netrom/
 obj-$(CONFIG_ROSE)		+= rose/
 obj-$(CONFIG_AX25)		+= ax25/
+obj-$(CONFIG_CAN)		+= can/
 obj-$(CONFIG_IRDA)		+= irda/
 obj-$(CONFIG_BT)		+= bluetooth/
 obj-$(CONFIG_SUNRPC)		+= sunrpc/
diff --git a/net/can/Kconfig b/net/can/Kconfig
new file mode 100644
index 00000000000..8b92790747e
--- /dev/null
+++ b/net/can/Kconfig
@@ -0,0 +1,17 @@
+#
+# Controller Area Network (CAN) network layer core configuration
+#
+
+menuconfig CAN
+	depends on NET
+	tristate "CAN bus subsystem support"
+	---help---
+	  Controller Area Network (CAN) is a slow (up to 1Mbit/s) serial
+	  communications protocol which was developed by Bosch in
+	  1991, mainly for automotive, but now widely used in marine
+	  (NMEA2000), industrial, and medical applications.
+	  More information on the CAN network protocol family PF_CAN
+	  is contained in <Documentation/networking/can.txt>.
+
+	  If you want CAN support you should say Y here and also to the
+	  specific driver for your controller(s) below.
diff --git a/net/can/Makefile b/net/can/Makefile
new file mode 100644
index 00000000000..4c7563c2ccb
--- /dev/null
+++ b/net/can/Makefile
@@ -0,0 +1,6 @@
+#
+#  Makefile for the Linux Controller Area Network core.
+#
+
+obj-$(CONFIG_CAN)	+= can.o
+can-objs		:= af_can.o proc.o
diff --git a/net/can/af_can.c b/net/can/af_can.c
new file mode 100644
index 00000000000..5158e886630
--- /dev/null
+++ b/net/can/af_can.c
@@ -0,0 +1,861 @@
+/*
+ * af_can.c - Protocol family CAN core module
+ *            (used by different CAN protocol modules)
+ *
+ * Copyright (c) 2002-2007 Volkswagen Group Electronic Research
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of Volkswagen nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * Alternatively, provided that this notice is retained in full, this
+ * software may be distributed under the terms of the GNU General
+ * Public License ("GPL") version 2, in which case the provisions of the
+ * GPL apply INSTEAD OF those given above.
+ *
+ * The provided data structures and external interfaces from this code
+ * are not restricted to be used by modules with a GPL compatible license.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
+ * DAMAGE.
+ *
+ * Send feedback to <socketcan-users@lists.berlios.de>
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/kmod.h>
+#include <linux/slab.h>
+#include <linux/list.h>
+#include <linux/spinlock.h>
+#include <linux/rcupdate.h>
+#include <linux/uaccess.h>
+#include <linux/net.h>
+#include <linux/netdevice.h>
+#include <linux/socket.h>
+#include <linux/if_ether.h>
+#include <linux/if_arp.h>
+#include <linux/skbuff.h>
+#include <linux/can.h>
+#include <linux/can/core.h>
+#include <net/net_namespace.h>
+#include <net/sock.h>
+
+#include "af_can.h"
+
+static __initdata const char banner[] = KERN_INFO
+	"can: controller area network core (" CAN_VERSION_STRING ")\n";
+
+MODULE_DESCRIPTION("Controller Area Network PF_CAN core");
+MODULE_LICENSE("Dual BSD/GPL");
+MODULE_AUTHOR("Urs Thuermann <urs.thuermann@volkswagen.de>, "
+	      "Oliver Hartkopp <oliver.hartkopp@volkswagen.de>");
+
+MODULE_ALIAS_NETPROTO(PF_CAN);
+
+static int stats_timer __read_mostly = 1;
+module_param(stats_timer, int, S_IRUGO);
+MODULE_PARM_DESC(stats_timer, "enable timer for statistics (default:on)");
+
+HLIST_HEAD(can_rx_dev_list);
+static struct dev_rcv_lists can_rx_alldev_list;
+static DEFINE_SPINLOCK(can_rcvlists_lock);
+
+static struct kmem_cache *rcv_cache __read_mostly;
+
+/* table of registered CAN protocols */
+static struct can_proto *proto_tab[CAN_NPROTO] __read_mostly;
+static DEFINE_SPINLOCK(proto_tab_lock);
+
+struct timer_list can_stattimer;   /* timer for statistics update */
+struct s_stats    can_stats;       /* packet statistics */
+struct s_pstats   can_pstats;      /* receive list statistics */
+
+/*
+ * af_can socket functions
+ */
+
+static int can_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
+{
+	struct sock *sk = sock->sk;
+
+	switch (cmd) {
+
+	case SIOCGSTAMP:
+		return sock_get_timestamp(sk, (struct timeval __user *)arg);
+
+	default:
+		return -ENOIOCTLCMD;
+	}
+}
+
+static void can_sock_destruct(struct sock *sk)
+{
+	skb_queue_purge(&sk->sk_receive_queue);
+}
+
+static int can_create(struct net *net, struct socket *sock, int protocol)
+{
+	struct sock *sk;
+	struct can_proto *cp;
+	char module_name[sizeof("can-proto-000")];
+	int err = 0;
+
+	sock->state = SS_UNCONNECTED;
+
+	if (protocol < 0 || protocol >= CAN_NPROTO)
+		return -EINVAL;
+
+	if (net != &init_net)
+		return -EAFNOSUPPORT;
+
+	/* try to load protocol module, when CONFIG_KMOD is defined */
+	if (!proto_tab[protocol]) {
+		sprintf(module_name, "can-proto-%d", protocol);
+		err = request_module(module_name);
+
+		/*
+		 * In case of error we only print a message but don't
+		 * return the error code immediately.  Below we will
+		 * return -EPROTONOSUPPORT
+		 */
+		if (err == -ENOSYS) {
+			if (printk_ratelimit())
+				printk(KERN_INFO "can: request_module(%s)"
+				       " not implemented.\n", module_name);
+		} else if (err) {
+			if (printk_ratelimit())
+				printk(KERN_ERR "can: request_module(%s)"
+				       " failed.\n", module_name);
+		}
+	}
+
+	spin_lock(&proto_tab_lock);
+	cp = proto_tab[protocol];
+	if (cp && !try_module_get(cp->prot->owner))
+		cp = NULL;
+	spin_unlock(&proto_tab_lock);
+
+	/* check for available protocol and correct usage */
+
+	if (!cp)
+		return -EPROTONOSUPPORT;
+
+	if (cp->type != sock->type) {
+		err = -EPROTONOSUPPORT;
+		goto errout;
+	}
+
+	if (cp->capability >= 0 && !capable(cp->capability)) {
+		err = -EPERM;
+		goto errout;
+	}
+
+	sock->ops = cp->ops;
+
+	sk = sk_alloc(net, PF_CAN, GFP_KERNEL, cp->prot);
+	if (!sk) {
+		err = -ENOMEM;
+		goto errout;
+	}
+
+	sock_init_data(sock, sk);
+	sk->sk_destruct = can_sock_destruct;
+
+	if (sk->sk_prot->init)
+		err = sk->sk_prot->init(sk);
+
+	if (err) {
+		/* release sk on errors */
+		sock_orphan(sk);
+		sock_put(sk);
+	}
+
+ errout:
+	module_put(cp->prot->owner);
+	return err;
+}
+
+/*
+ * af_can tx path
+ */
+
+/**
+ * can_send - transmit a CAN frame (optional with local loopback)
+ * @skb: pointer to socket buffer with CAN frame in data section
+ * @loop: loopback for listeners on local CAN sockets (recommended default!)
+ *
+ * Return:
+ *  0 on success
+ *  -ENETDOWN when the selected interface is down
+ *  -ENOBUFS on full driver queue (see net_xmit_errno())
+ *  -ENOMEM when local loopback failed at calling skb_clone()
+ *  -EPERM when trying to send on a non-CAN interface
+ */
+int can_send(struct sk_buff *skb, int loop)
+{
+	int err;
+
+	if (skb->dev->type != ARPHRD_CAN) {
+		kfree_skb(skb);
+		return -EPERM;
+	}
+
+	if (!(skb->dev->flags & IFF_UP)) {
+		kfree_skb(skb);
+		return -ENETDOWN;
+	}
+
+	skb->protocol = htons(ETH_P_CAN);
+	skb_reset_network_header(skb);
+	skb_reset_transport_header(skb);
+
+	if (loop) {
+		/* local loopback of sent CAN frames */
+
+		/* indication for the CAN driver: do loopback */
+		skb->pkt_type = PACKET_LOOPBACK;
+
+		/*
+		 * The reference to the originating sock may be required
+		 * by the receiving socket to check whether the frame is
+		 * its own. Example: can_raw sockopt CAN_RAW_RECV_OWN_MSGS
+		 * Therefore we have to ensure that skb->sk remains the
+		 * reference to the originating sock by restoring skb->sk
+		 * after each skb_clone() or skb_orphan() usage.
+		 */
+
+		if (!(skb->dev->flags & IFF_ECHO)) {
+			/*
+			 * If the interface is not capable to do loopback
+			 * itself, we do it here.
+			 */
+			struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC);
+
+			if (!newskb) {
+				kfree_skb(skb);
+				return -ENOMEM;
+			}
+
+			newskb->sk = skb->sk;
+			newskb->ip_summed = CHECKSUM_UNNECESSARY;
+			newskb->pkt_type = PACKET_BROADCAST;
+			netif_rx(newskb);
+		}
+	} else {
+		/* indication for the CAN driver: no loopback required */
+		skb->pkt_type = PACKET_HOST;
+	}
+
+	/* send to netdevice */
+	err = dev_queue_xmit(skb);
+	if (err > 0)
+		err = net_xmit_errno(err);
+
+	/* update statistics */
+	can_stats.tx_frames++;
+	can_stats.tx_frames_delta++;
+
+	return err;
+}
+EXPORT_SYMBOL(can_send);
+
+/*
+ * af_can rx path
+ */
+
+static struct dev_rcv_lists *find_dev_rcv_lists(struct net_device *dev)
+{
+	struct dev_rcv_lists *d = NULL;
+	struct hlist_node *n;
+
+	/*
+	 * find receive list for this device
+	 *
+	 * The hlist_for_each_entry*() macros curse through the list
+	 * using the pointer variable n and set d to the containing
+	 * struct in each list iteration.  Therefore, after list
+	 * iteration, d is unmodified when the list is empty, and it
+	 * points to last list element, when the list is non-empty
+	 * but no match in the loop body is found.  I.e. d is *not*
+	 * NULL when no match is found.  We can, however, use the
+	 * cursor variable n to decide if a match was found.
+	 */
+
+	hlist_for_each_entry_rcu(d, n, &can_rx_dev_list, list) {
+		if (d->dev == dev)
+			break;
+	}
+
+	return n ? d : NULL;
+}
+
+static struct hlist_head *find_rcv_list(canid_t *can_id, canid_t *mask,
+					struct dev_rcv_lists *d)
+{
+	canid_t inv = *can_id & CAN_INV_FILTER; /* save flag before masking */
+
+	/* filter error frames */
+	if (*mask & CAN_ERR_FLAG) {
+		/* clear CAN_ERR_FLAG in list entry */
+		*mask &= CAN_ERR_MASK;
+		return &d->rx[RX_ERR];
+	}
+
+	/* ensure valid values in can_mask */
+	if (*mask & CAN_EFF_FLAG)
+		*mask &= (CAN_EFF_MASK | CAN_EFF_FLAG | CAN_RTR_FLAG);
+	else
+		*mask &= (CAN_SFF_MASK | CAN_RTR_FLAG);
+
+	/* reduce condition testing at receive time */
+	*can_id &= *mask;
+
+	/* inverse can_id/can_mask filter */
+	if (inv)
+		return &d->rx[RX_INV];
+
+	/* mask == 0 => no condition testing at receive time */
+	if (!(*mask))
+		return &d->rx[RX_ALL];
+
+	/* use extra filterset for the subscription of exactly *ONE* can_id */
+	if (*can_id & CAN_EFF_FLAG) {
+		if (*mask == (CAN_EFF_MASK | CAN_EFF_FLAG)) {
+			/* RFC: a use-case for hash-tables in the future? */
+			return &d->rx[RX_EFF];
+		}
+	} else {
+		if (*mask == CAN_SFF_MASK)
+			return &d->rx_sff[*can_id];
+	}
+
+	/* default: filter via can_id/can_mask */
+	return &d->rx[RX_FIL];
+}
+
+/**
+ * can_rx_register - subscribe CAN frames from a specific interface
+ * @dev: pointer to netdevice (NULL => subcribe from 'all' CAN devices list)
+ * @can_id: CAN identifier (see description)
+ * @mask: CAN mask (see description)
+ * @func: callback function on filter match
+ * @data: returned parameter for callback function
+ * @ident: string for calling module indentification
+ *
+ * Description:
+ *  Invokes the callback function with the received sk_buff and the given
+ *  parameter 'data' on a matching receive filter. A filter matches, when
+ *
+ *          <received_can_id> & mask == can_id & mask
+ *
+ *  The filter can be inverted (CAN_INV_FILTER bit set in can_id) or it can
+ *  filter for error frames (CAN_ERR_FLAG bit set in mask).
+ *
+ * Return:
+ *  0 on success
+ *  -ENOMEM on missing cache mem to create subscription entry
+ *  -ENODEV unknown device
+ */
+int can_rx_register(struct net_device *dev, canid_t can_id, canid_t mask,
+		    void (*func)(struct sk_buff *, void *), void *data,
+		    char *ident)
+{
+	struct receiver *r;
+	struct hlist_head *rl;
+	struct dev_rcv_lists *d;
+	int err = 0;
+
+	/* insert new receiver  (dev,canid,mask) -> (func,data) */
+
+	r = kmem_cache_alloc(rcv_cache, GFP_KERNEL);
+	if (!r)
+		return -ENOMEM;
+
+	spin_lock(&can_rcvlists_lock);
+
+	d = find_dev_rcv_lists(dev);
+	if (d) {
+		rl = find_rcv_list(&can_id, &mask, d);
+
+		r->can_id  = can_id;
+		r->mask    = mask;
+		r->matches = 0;
+		r->func    = func;
+		r->data    = data;
+		r->ident   = ident;
+
+		hlist_add_head_rcu(&r->list, rl);
+		d->entries++;
+
+		can_pstats.rcv_entries++;
+		if (can_pstats.rcv_entries_max < can_pstats.rcv_entries)
+			can_pstats.rcv_entries_max = can_pstats.rcv_entries;
+	} else {
+		kmem_cache_free(rcv_cache, r);
+		err = -ENODEV;
+	}
+
+	spin_unlock(&can_rcvlists_lock);
+
+	return err;
+}
+EXPORT_SYMBOL(can_rx_register);
+
+/*
+ * can_rx_delete_device - rcu callback for dev_rcv_lists structure removal
+ */
+static void can_rx_delete_device(struct rcu_head *rp)
+{
+	struct dev_rcv_lists *d = container_of(rp, struct dev_rcv_lists, rcu);
+
+	kfree(d);
+}
+
+/*
+ * can_rx_delete_receiver - rcu callback for single receiver entry removal
+ */
+static void can_rx_delete_receiver(struct rcu_head *rp)
+{
+	struct receiver *r = container_of(rp, struct receiver, rcu);
+
+	kmem_cache_free(rcv_cache, r);
+}
+
+/**
+ * can_rx_unregister - unsubscribe CAN frames from a specific interface
+ * @dev: pointer to netdevice (NULL => unsubcribe from 'all' CAN devices list)
+ * @can_id: CAN identifier
+ * @mask: CAN mask
+ * @func: callback function on filter match
+ * @data: returned parameter for callback function
+ *
+ * Description:
+ *  Removes subscription entry depending on given (subscription) values.
+ */
+void can_rx_unregister(struct net_device *dev, canid_t can_id, canid_t mask,
+		       void (*func)(struct sk_buff *, void *), void *data)
+{
+	struct receiver *r = NULL;
+	struct hlist_head *rl;
+	struct hlist_node *next;
+	struct dev_rcv_lists *d;
+
+	spin_lock(&can_rcvlists_lock);
+
+	d = find_dev_rcv_lists(dev);
+	if (!d) {
+		printk(KERN_ERR "BUG: receive list not found for "
+		       "dev %s, id %03X, mask %03X\n",
+		       DNAME(dev), can_id, mask);
+		goto out;
+	}
+
+	rl = find_rcv_list(&can_id, &mask, d);
+
+	/*
+	 * Search the receiver list for the item to delete.  This should
+	 * exist, since no receiver may be unregistered that hasn't
+	 * been registered before.
+	 */
+
+	hlist_for_each_entry_rcu(r, next, rl, list) {
+		if (r->can_id == can_id && r->mask == mask
+		    && r->func == func && r->data == data)
+			break;
+	}
+
+	/*
+	 * Check for bugs in CAN protocol implementations:
+	 * If no matching list item was found, the list cursor variable next
+	 * will be NULL, while r will point to the last item of the list.
+	 */
+
+	if (!next) {
+		printk(KERN_ERR "BUG: receive list entry not found for "
+		       "dev %s, id %03X, mask %03X\n",
+		       DNAME(dev), can_id, mask);
+		r = NULL;
+		d = NULL;
+		goto out;
+	}
+
+	hlist_del_rcu(&r->list);
+	d->entries--;
+
+	if (can_pstats.rcv_entries > 0)
+		can_pstats.rcv_entries--;
+
+	/* remove device structure requested by NETDEV_UNREGISTER */
+	if (d->remove_on_zero_entries && !d->entries)
+		hlist_del_rcu(&d->list);
+	else
+		d = NULL;
+
+ out:
+	spin_unlock(&can_rcvlists_lock);
+
+	/* schedule the receiver item for deletion */
+	if (r)
+		call_rcu(&r->rcu, can_rx_delete_receiver);
+
+	/* schedule the device structure for deletion */
+	if (d)
+		call_rcu(&d->rcu, can_rx_delete_device);
+}
+EXPORT_SYMBOL(can_rx_unregister);
+
+static inline void deliver(struct sk_buff *skb, struct receiver *r)
+{
+	struct sk_buff *clone = skb_clone(skb, GFP_ATOMIC);
+
+	if (clone) {
+		clone->sk = skb->sk;
+		r->func(clone, r->data);
+		r->matches++;
+	}
+}
+
+static int can_rcv_filter(struct dev_rcv_lists *d, struct sk_buff *skb)
+{
+	struct receiver *r;
+	struct hlist_node *n;
+	int matches = 0;
+	struct can_frame *cf = (struct can_frame *)skb->data;
+	canid_t can_id = cf->can_id;
+
+	if (d->entries == 0)
+		return 0;
+
+	if (can_id & CAN_ERR_FLAG) {
+		/* check for error frame entries only */
+		hlist_for_each_entry_rcu(r, n, &d->rx[RX_ERR], list) {
+			if (can_id & r->mask) {
+				deliver(skb, r);
+				matches++;
+			}
+		}
+		return matches;
+	}
+
+	/* check for unfiltered entries */
+	hlist_for_each_entry_rcu(r, n, &d->rx[RX_ALL], list) {
+		deliver(skb, r);
+		matches++;
+	}
+
+	/* check for can_id/mask entries */
+	hlist_for_each_entry_rcu(r, n, &d->rx[RX_FIL], list) {
+		if ((can_id & r->mask) == r->can_id) {
+			deliver(skb, r);
+			matches++;
+		}
+	}
+
+	/* check for inverted can_id/mask entries */
+	hlist_for_each_entry_rcu(r, n, &d->rx[RX_INV], list) {
+		if ((can_id & r->mask) != r->can_id) {
+			deliver(skb, r);
+			matches++;
+		}
+	}
+
+	/* check CAN_ID specific entries */
+	if (can_id & CAN_EFF_FLAG) {
+		hlist_for_each_entry_rcu(r, n, &d->rx[RX_EFF], list) {
+			if (r->can_id == can_id) {
+				deliver(skb, r);
+				matches++;
+			}
+		}
+	} else {
+		can_id &= CAN_SFF_MASK;
+		hlist_for_each_entry_rcu(r, n, &d->rx_sff[can_id], list) {
+			deliver(skb, r);
+			matches++;
+		}
+	}
+
+	return matches;
+}
+
+static int can_rcv(struct sk_buff *skb, struct net_device *dev,
+		   struct packet_type *pt, struct net_device *orig_dev)
+{
+	struct dev_rcv_lists *d;
+	int matches;
+
+	if (dev->type != ARPHRD_CAN || dev->nd_net != &init_net) {
+		kfree_skb(skb);
+		return 0;
+	}
+
+	/* update statistics */
+	can_stats.rx_frames++;
+	can_stats.rx_frames_delta++;
+
+	rcu_read_lock();
+
+	/* deliver the packet to sockets listening on all devices */
+	matches = can_rcv_filter(&can_rx_alldev_list, skb);
+
+	/* find receive list for this device */
+	d = find_dev_rcv_lists(dev);
+	if (d)
+		matches += can_rcv_filter(d, skb);
+
+	rcu_read_unlock();
+
+	/* free the skbuff allocated by the netdevice driver */
+	kfree_skb(skb);
+
+	if (matches > 0) {
+		can_stats.matches++;
+		can_stats.matches_delta++;
+	}
+
+	return 0;
+}
+
+/*
+ * af_can protocol functions
+ */
+
+/**
+ * can_proto_register - register CAN transport protocol
+ * @cp: pointer to CAN protocol structure
+ *
+ * Return:
+ *  0 on success
+ *  -EINVAL invalid (out of range) protocol number
+ *  -EBUSY  protocol already in use
+ *  -ENOBUF if proto_register() fails
+ */
+int can_proto_register(struct can_proto *cp)
+{
+	int proto = cp->protocol;
+	int err = 0;
+
+	if (proto < 0 || proto >= CAN_NPROTO) {
+		printk(KERN_ERR "can: protocol number %d out of range\n",
+		       proto);
+		return -EINVAL;
+	}
+
+	spin_lock(&proto_tab_lock);
+	if (proto_tab[proto]) {
+		printk(KERN_ERR "can: protocol %d already registered\n",
+		       proto);
+		err = -EBUSY;
+		goto errout;
+	}
+
+	err = proto_register(cp->prot, 0);
+	if (err < 0)
+		goto errout;
+
+	proto_tab[proto] = cp;
+
+	/* use generic ioctl function if the module doesn't bring its own */
+	if (!cp->ops->ioctl)
+		cp->ops->ioctl = can_ioctl;
+
+ errout:
+	spin_unlock(&proto_tab_lock);
+
+	return err;
+}
+EXPORT_SYMBOL(can_proto_register);
+
+/**
+ * can_proto_unregister - unregister CAN transport protocol
+ * @cp: pointer to CAN protocol structure
+ */
+void can_proto_unregister(struct can_proto *cp)
+{
+	int proto = cp->protocol;
+
+	spin_lock(&proto_tab_lock);
+	if (!proto_tab[proto]) {
+		printk(KERN_ERR "BUG: can: protocol %d is not registered\n",
+		       proto);
+	}
+	proto_unregister(cp->prot);
+	proto_tab[proto] = NULL;
+	spin_unlock(&proto_tab_lock);
+}
+EXPORT_SYMBOL(can_proto_unregister);
+
+/*
+ * af_can notifier to create/remove CAN netdevice specific structs
+ */
+static int can_notifier(struct notifier_block *nb, unsigned long msg,
+			void *data)
+{
+	struct net_device *dev = (struct net_device *)data;
+	struct dev_rcv_lists *d;
+
+	if (dev->nd_net != &init_net)
+		return NOTIFY_DONE;
+
+	if (dev->type != ARPHRD_CAN)
+		return NOTIFY_DONE;
+
+	switch (msg) {
+
+	case NETDEV_REGISTER:
+
+		/*
+		 * create new dev_rcv_lists for this device
+		 *
+		 * N.B. zeroing the struct is the correct initialization
+		 * for the embedded hlist_head structs.
+		 * Another list type, e.g. list_head, would require
+		 * explicit initialization.
+		 */
+
+		d = kzalloc(sizeof(*d), GFP_KERNEL);
+		if (!d) {
+			printk(KERN_ERR
+			       "can: allocation of receive list failed\n");
+			return NOTIFY_DONE;
+		}
+		d->dev = dev;
+
+		spin_lock(&can_rcvlists_lock);
+		hlist_add_head_rcu(&d->list, &can_rx_dev_list);
+		spin_unlock(&can_rcvlists_lock);
+
+		break;
+
+	case NETDEV_UNREGISTER:
+		spin_lock(&can_rcvlists_lock);
+
+		d = find_dev_rcv_lists(dev);
+		if (d) {
+			if (d->entries) {
+				d->remove_on_zero_entries = 1;
+				d = NULL;
+			} else
+				hlist_del_rcu(&d->list);
+		} else
+			printk(KERN_ERR "can: notifier: receive list not "
+			       "found for dev %s\n", dev->name);
+
+		spin_unlock(&can_rcvlists_lock);
+
+		if (d)
+			call_rcu(&d->rcu, can_rx_delete_device);
+
+		break;
+	}
+
+	return NOTIFY_DONE;
+}
+
+/*
+ * af_can module init/exit functions
+ */
+
+static struct packet_type can_packet __read_mostly = {
+	.type = __constant_htons(ETH_P_CAN),
+	.dev  = NULL,
+	.func = can_rcv,
+};
+
+static struct net_proto_family can_family_ops __read_mostly = {
+	.family = PF_CAN,
+	.create = can_create,
+	.owner  = THIS_MODULE,
+};
+
+/* notifier block for netdevice event */
+static struct notifier_block can_netdev_notifier __read_mostly = {
+	.notifier_call = can_notifier,
+};
+
+static __init int can_init(void)
+{
+	printk(banner);
+
+	rcv_cache = kmem_cache_create("can_receiver", sizeof(struct receiver),
+				      0, 0, NULL);
+	if (!rcv_cache)
+		return -ENOMEM;
+
+	/*
+	 * Insert can_rx_alldev_list for reception on all devices.
+	 * This struct is zero initialized which is correct for the
+	 * embedded hlist heads, the dev pointer, and the entries counter.
+	 */
+
+	spin_lock(&can_rcvlists_lock);
+	hlist_add_head_rcu(&can_rx_alldev_list.list, &can_rx_dev_list);
+	spin_unlock(&can_rcvlists_lock);
+
+	if (stats_timer) {
+		/* the statistics are updated every second (timer triggered) */
+		setup_timer(&can_stattimer, can_stat_update, 0);
+		mod_timer(&can_stattimer, round_jiffies(jiffies + HZ));
+	} else
+		can_stattimer.function = NULL;
+
+	can_init_proc();
+
+	/* protocol register */
+	sock_register(&can_family_ops);
+	register_netdevice_notifier(&can_netdev_notifier);
+	dev_add_pack(&can_packet);
+
+	return 0;
+}
+
+static __exit void can_exit(void)
+{
+	struct dev_rcv_lists *d;
+	struct hlist_node *n, *next;
+
+	if (stats_timer)
+		del_timer(&can_stattimer);
+
+	can_remove_proc();
+
+	/* protocol unregister */
+	dev_remove_pack(&can_packet);
+	unregister_netdevice_notifier(&can_netdev_notifier);
+	sock_unregister(PF_CAN);
+
+	/* remove can_rx_dev_list */
+	spin_lock(&can_rcvlists_lock);
+	hlist_del(&can_rx_alldev_list.list);
+	hlist_for_each_entry_safe(d, n, next, &can_rx_dev_list, list) {
+		hlist_del(&d->list);
+		kfree(d);
+	}
+	spin_unlock(&can_rcvlists_lock);
+
+	kmem_cache_destroy(rcv_cache);
+}
+
+module_init(can_init);
+module_exit(can_exit);
diff --git a/net/can/af_can.h b/net/can/af_can.h
new file mode 100644
index 00000000000..18f91e37cc3
--- /dev/null
+++ b/net/can/af_can.h
@@ -0,0 +1,122 @@
+/*
+ * Copyright (c) 2002-2007 Volkswagen Group Electronic Research
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of Volkswagen nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * Alternatively, provided that this notice is retained in full, this
+ * software may be distributed under the terms of the GNU General
+ * Public License ("GPL") version 2, in which case the provisions of the
+ * GPL apply INSTEAD OF those given above.
+ *
+ * The provided data structures and external interfaces from this code
+ * are not restricted to be used by modules with a GPL compatible license.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
+ * DAMAGE.
+ *
+ * Send feedback to <socketcan-users@lists.berlios.de>
+ *
+ */
+
+#ifndef AF_CAN_H
+#define AF_CAN_H
+
+#include <linux/skbuff.h>
+#include <linux/netdevice.h>
+#include <linux/list.h>
+#include <linux/rcupdate.h>
+#include <linux/can.h>
+
+/* af_can rx dispatcher structures */
+
+struct receiver {
+	struct hlist_node list;
+	struct rcu_head rcu;
+	canid_t can_id;
+	canid_t mask;
+	unsigned long matches;
+	void (*func)(struct sk_buff *, void *);
+	void *data;
+	char *ident;
+};
+
+enum { RX_ERR, RX_ALL, RX_FIL, RX_INV, RX_EFF, RX_MAX };
+
+struct dev_rcv_lists {
+	struct hlist_node list;
+	struct rcu_head rcu;
+	struct net_device *dev;
+	struct hlist_head rx[RX_MAX];
+	struct hlist_head rx_sff[0x800];
+	int remove_on_zero_entries;
+	int entries;
+};
+
+/* statistic structures */
+
+/* can be reset e.g. by can_init_stats() */
+struct s_stats {
+	unsigned long jiffies_init;
+
+	unsigned long rx_frames;
+	unsigned long tx_frames;
+	unsigned long matches;
+
+	unsigned long total_rx_rate;
+	unsigned long total_tx_rate;
+	unsigned long total_rx_match_ratio;
+
+	unsigned long current_rx_rate;
+	unsigned long current_tx_rate;
+	unsigned long current_rx_match_ratio;
+
+	unsigned long max_rx_rate;
+	unsigned long max_tx_rate;
+	unsigned long max_rx_match_ratio;
+
+	unsigned long rx_frames_delta;
+	unsigned long tx_frames_delta;
+	unsigned long matches_delta;
+};
+
+/* persistent statistics */
+struct s_pstats {
+	unsigned long stats_reset;
+	unsigned long user_reset;
+	unsigned long rcv_entries;
+	unsigned long rcv_entries_max;
+};
+
+/* function prototypes for the CAN networklayer procfs (proc.c) */
+extern void can_init_proc(void);
+extern void can_remove_proc(void);
+extern void can_stat_update(unsigned long data);
+
+/* structures and variables from af_can.c needed in proc.c for reading */
+extern struct timer_list can_stattimer;    /* timer for statistics update */
+extern struct s_stats    can_stats;        /* packet statistics */
+extern struct s_pstats   can_pstats;       /* receive list statistics */
+extern struct hlist_head can_rx_dev_list;  /* rx dispatcher structures */
+
+#endif /* AF_CAN_H */
diff --git a/net/can/proc.c b/net/can/proc.c
new file mode 100644
index 00000000000..520fef5e539
--- /dev/null
+++ b/net/can/proc.c
@@ -0,0 +1,533 @@
+/*
+ * proc.c - procfs support for Protocol family CAN core module
+ *
+ * Copyright (c) 2002-2007 Volkswagen Group Electronic Research
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of Volkswagen nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * Alternatively, provided that this notice is retained in full, this
+ * software may be distributed under the terms of the GNU General
+ * Public License ("GPL") version 2, in which case the provisions of the
+ * GPL apply INSTEAD OF those given above.
+ *
+ * The provided data structures and external interfaces from this code
+ * are not restricted to be used by modules with a GPL compatible license.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
+ * DAMAGE.
+ *
+ * Send feedback to <socketcan-users@lists.berlios.de>
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/proc_fs.h>
+#include <linux/list.h>
+#include <linux/rcupdate.h>
+#include <linux/can/core.h>
+
+#include "af_can.h"
+
+/*
+ * proc filenames for the PF_CAN core
+ */
+
+#define CAN_PROC_VERSION     "version"
+#define CAN_PROC_STATS       "stats"
+#define CAN_PROC_RESET_STATS "reset_stats"
+#define CAN_PROC_RCVLIST_ALL "rcvlist_all"
+#define CAN_PROC_RCVLIST_FIL "rcvlist_fil"
+#define CAN_PROC_RCVLIST_INV "rcvlist_inv"
+#define CAN_PROC_RCVLIST_SFF "rcvlist_sff"
+#define CAN_PROC_RCVLIST_EFF "rcvlist_eff"
+#define CAN_PROC_RCVLIST_ERR "rcvlist_err"
+
+static struct proc_dir_entry *can_dir;
+static struct proc_dir_entry *pde_version;
+static struct proc_dir_entry *pde_stats;
+static struct proc_dir_entry *pde_reset_stats;
+static struct proc_dir_entry *pde_rcvlist_all;
+static struct proc_dir_entry *pde_rcvlist_fil;
+static struct proc_dir_entry *pde_rcvlist_inv;
+static struct proc_dir_entry *pde_rcvlist_sff;
+static struct proc_dir_entry *pde_rcvlist_eff;
+static struct proc_dir_entry *pde_rcvlist_err;
+
+static int user_reset;
+
+static const char rx_list_name[][8] = {
+	[RX_ERR] = "rx_err",
+	[RX_ALL] = "rx_all",
+	[RX_FIL] = "rx_fil",
+	[RX_INV] = "rx_inv",
+	[RX_EFF] = "rx_eff",
+};
+
+/*
+ * af_can statistics stuff
+ */
+
+static void can_init_stats(void)
+{
+	/*
+	 * This memset function is called from a timer context (when
+	 * can_stattimer is active which is the default) OR in a process
+	 * context (reading the proc_fs when can_stattimer is disabled).
+	 */
+	memset(&can_stats, 0, sizeof(can_stats));
+	can_stats.jiffies_init = jiffies;
+
+	can_pstats.stats_reset++;
+
+	if (user_reset) {
+		user_reset = 0;
+		can_pstats.user_reset++;
+	}
+}
+
+static unsigned long calc_rate(unsigned long oldjif, unsigned long newjif,
+			       unsigned long count)
+{
+	unsigned long rate;
+
+	if (oldjif == newjif)
+		return 0;
+
+	/* see can_stat_update() - this should NEVER happen! */
+	if (count > (ULONG_MAX / HZ)) {
+		printk(KERN_ERR "can: calc_rate: count exceeded! %ld\n",
+		       count);
+		return 99999999;
+	}
+
+	rate = (count * HZ) / (newjif - oldjif);
+
+	return rate;
+}
+
+void can_stat_update(unsigned long data)
+{
+	unsigned long j = jiffies; /* snapshot */
+
+	/* restart counting in timer context on user request */
+	if (user_reset)
+		can_init_stats();
+
+	/* restart counting on jiffies overflow */
+	if (j < can_stats.jiffies_init)
+		can_init_stats();
+
+	/* prevent overflow in calc_rate() */
+	if (can_stats.rx_frames > (ULONG_MAX / HZ))
+		can_init_stats();
+
+	/* prevent overflow in calc_rate() */
+	if (can_stats.tx_frames > (ULONG_MAX / HZ))
+		can_init_stats();
+
+	/* matches overflow - very improbable */
+	if (can_stats.matches > (ULONG_MAX / 100))
+		can_init_stats();
+
+	/* calc total values */
+	if (can_stats.rx_frames)
+		can_stats.total_rx_match_ratio = (can_stats.matches * 100) /
+			can_stats.rx_frames;
+
+	can_stats.total_tx_rate = calc_rate(can_stats.jiffies_init, j,
+					    can_stats.tx_frames);
+	can_stats.total_rx_rate = calc_rate(can_stats.jiffies_init, j,
+					    can_stats.rx_frames);
+
+	/* calc current values */
+	if (can_stats.rx_frames_delta)
+		can_stats.current_rx_match_ratio =
+			(can_stats.matches_delta * 100) /
+			can_stats.rx_frames_delta;
+
+	can_stats.current_tx_rate = calc_rate(0, HZ, can_stats.tx_frames_delta);
+	can_stats.current_rx_rate = calc_rate(0, HZ, can_stats.rx_frames_delta);
+
+	/* check / update maximum values */
+	if (can_stats.max_tx_rate < can_stats.current_tx_rate)
+		can_stats.max_tx_rate = can_stats.current_tx_rate;
+
+	if (can_stats.max_rx_rate < can_stats.current_rx_rate)
+		can_stats.max_rx_rate = can_stats.current_rx_rate;
+
+	if (can_stats.max_rx_match_ratio < can_stats.current_rx_match_ratio)
+		can_stats.max_rx_match_ratio = can_stats.current_rx_match_ratio;
+
+	/* clear values for 'current rate' calculation */
+	can_stats.tx_frames_delta = 0;
+	can_stats.rx_frames_delta = 0;
+	can_stats.matches_delta   = 0;
+
+	/* restart timer (one second) */
+	mod_timer(&can_stattimer, round_jiffies(jiffies + HZ));
+}
+
+/*
+ * proc read functions
+ *
+ * From known use-cases we expect about 10 entries in a receive list to be
+ * printed in the proc_fs. So PAGE_SIZE is definitely enough space here.
+ *
+ */
+
+static int can_print_rcvlist(char *page, int len, struct hlist_head *rx_list,
+			     struct net_device *dev)
+{
+	struct receiver *r;
+	struct hlist_node *n;
+
+	rcu_read_lock();
+	hlist_for_each_entry_rcu(r, n, rx_list, list) {
+		char *fmt = (r->can_id & CAN_EFF_FLAG)?
+			"   %-5s  %08X  %08x  %08x  %08x  %8ld  %s\n" :
+			"   %-5s     %03X    %08x  %08lx  %08lx  %8ld  %s\n";
+
+		len += snprintf(page + len, PAGE_SIZE - len, fmt,
+				DNAME(dev), r->can_id, r->mask,
+				(unsigned long)r->func, (unsigned long)r->data,
+				r->matches, r->ident);
+
+		/* does a typical line fit into the current buffer? */
+
+		/* 100 Bytes before end of buffer */
+		if (len > PAGE_SIZE - 100) {
+			/* mark output cut off */
+			len += snprintf(page + len, PAGE_SIZE - len,
+					"   (..)\n");
+			break;
+		}
+	}
+	rcu_read_unlock();
+
+	return len;
+}
+
+static int can_print_recv_banner(char *page, int len)
+{
+	/*
+	 *                  can1.  00000000  00000000  00000000
+	 *                 .......          0  tp20
+	 */
+	len += snprintf(page + len, PAGE_SIZE - len,
+			"  device   can_id   can_mask  function"
+			"  userdata   matches  ident\n");
+
+	return len;
+}
+
+static int can_proc_read_stats(char *page, char **start, off_t off,
+			       int count, int *eof, void *data)
+{
+	int len = 0;
+
+	len += snprintf(page + len, PAGE_SIZE - len, "\n");
+	len += snprintf(page + len, PAGE_SIZE - len,
+			" %8ld transmitted frames (TXF)\n",
+			can_stats.tx_frames);
+	len += snprintf(page + len, PAGE_SIZE - len,
+			" %8ld received frames (RXF)\n", can_stats.rx_frames);
+	len += snprintf(page + len, PAGE_SIZE - len,
+			" %8ld matched frames (RXMF)\n", can_stats.matches);
+
+	len += snprintf(page + len, PAGE_SIZE - len, "\n");
+
+	if (can_stattimer.function == can_stat_update) {
+		len += snprintf(page + len, PAGE_SIZE - len,
+				" %8ld %% total match ratio (RXMR)\n",
+				can_stats.total_rx_match_ratio);
+
+		len += snprintf(page + len, PAGE_SIZE - len,
+				" %8ld frames/s total tx rate (TXR)\n",
+				can_stats.total_tx_rate);
+		len += snprintf(page + len, PAGE_SIZE - len,
+				" %8ld frames/s total rx rate (RXR)\n",
+				can_stats.total_rx_rate);
+
+		len += snprintf(page + len, PAGE_SIZE - len, "\n");
+
+		len += snprintf(page + len, PAGE_SIZE - len,
+				" %8ld %% current match ratio (CRXMR)\n",
+				can_stats.current_rx_match_ratio);
+
+		len += snprintf(page + len, PAGE_SIZE - len,
+				" %8ld frames/s current tx rate (CTXR)\n",
+				can_stats.current_tx_rate);
+		len += snprintf(page + len, PAGE_SIZE - len,
+				" %8ld frames/s current rx rate (CRXR)\n",
+				can_stats.current_rx_rate);
+
+		len += snprintf(page + len, PAGE_SIZE - len, "\n");
+
+		len += snprintf(page + len, PAGE_SIZE - len,
+				" %8ld %% max match ratio (MRXMR)\n",
+				can_stats.max_rx_match_ratio);
+
+		len += snprintf(page + len, PAGE_SIZE - len,
+				" %8ld frames/s max tx rate (MTXR)\n",
+				can_stats.max_tx_rate);
+		len += snprintf(page + len, PAGE_SIZE - len,
+				" %8ld frames/s max rx rate (MRXR)\n",
+				can_stats.max_rx_rate);
+
+		len += snprintf(page + len, PAGE_SIZE - len, "\n");
+	}
+
+	len += snprintf(page + len, PAGE_SIZE - len,
+			" %8ld current receive list entries (CRCV)\n",
+			can_pstats.rcv_entries);
+	len += snprintf(page + len, PAGE_SIZE - len,
+			" %8ld maximum receive list entries (MRCV)\n",
+			can_pstats.rcv_entries_max);
+
+	if (can_pstats.stats_reset)
+		len += snprintf(page + len, PAGE_SIZE - len,
+				"\n %8ld statistic resets (STR)\n",
+				can_pstats.stats_reset);
+
+	if (can_pstats.user_reset)
+		len += snprintf(page + len, PAGE_SIZE - len,
+				" %8ld user statistic resets (USTR)\n",
+				can_pstats.user_reset);
+
+	len += snprintf(page + len, PAGE_SIZE - len, "\n");
+
+	*eof = 1;
+	return len;
+}
+
+static int can_proc_read_reset_stats(char *page, char **start, off_t off,
+				     int count, int *eof, void *data)
+{
+	int len = 0;
+
+	user_reset = 1;
+
+	if (can_stattimer.function == can_stat_update) {
+		len += snprintf(page + len, PAGE_SIZE - len,
+				"Scheduled statistic reset #%ld.\n",
+				can_pstats.stats_reset + 1);
+
+	} else {
+		if (can_stats.jiffies_init != jiffies)
+			can_init_stats();
+
+		len += snprintf(page + len, PAGE_SIZE - len,
+				"Performed statistic reset #%ld.\n",
+				can_pstats.stats_reset);
+	}
+
+	*eof = 1;
+	return len;
+}
+
+static int can_proc_read_version(char *page, char **start, off_t off,
+				 int count, int *eof, void *data)
+{
+	int len = 0;
+
+	len += snprintf(page + len, PAGE_SIZE - len, "%s\n",
+			CAN_VERSION_STRING);
+	*eof = 1;
+	return len;
+}
+
+static int can_proc_read_rcvlist(char *page, char **start, off_t off,
+				 int count, int *eof, void *data)
+{
+	/* double cast to prevent GCC warning */
+	int idx = (int)(long)data;
+	int len = 0;
+	struct dev_rcv_lists *d;
+	struct hlist_node *n;
+
+	len += snprintf(page + len, PAGE_SIZE - len,
+			"\nreceive list '%s':\n", rx_list_name[idx]);
+
+	rcu_read_lock();
+	hlist_for_each_entry_rcu(d, n, &can_rx_dev_list, list) {
+
+		if (!hlist_empty(&d->rx[idx])) {
+			len = can_print_recv_banner(page, len);
+			len = can_print_rcvlist(page, len, &d->rx[idx], d->dev);
+		} else
+			len += snprintf(page + len, PAGE_SIZE - len,
+					"  (%s: no entry)\n", DNAME(d->dev));
+
+		/* exit on end of buffer? */
+		if (len > PAGE_SIZE - 100)
+			break;
+	}
+	rcu_read_unlock();
+
+	len += snprintf(page + len, PAGE_SIZE - len, "\n");
+
+	*eof = 1;
+	return len;
+}
+
+static int can_proc_read_rcvlist_sff(char *page, char **start, off_t off,
+				     int count, int *eof, void *data)
+{
+	int len = 0;
+	struct dev_rcv_lists *d;
+	struct hlist_node *n;
+
+	/* RX_SFF */
+	len += snprintf(page + len, PAGE_SIZE - len,
+			"\nreceive list 'rx_sff':\n");
+
+	rcu_read_lock();
+	hlist_for_each_entry_rcu(d, n, &can_rx_dev_list, list) {
+		int i, all_empty = 1;
+		/* check wether at least one list is non-empty */
+		for (i = 0; i < 0x800; i++)
+			if (!hlist_empty(&d->rx_sff[i])) {
+				all_empty = 0;
+				break;
+			}
+
+		if (!all_empty) {
+			len = can_print_recv_banner(page, len);
+			for (i = 0; i < 0x800; i++) {
+				if (!hlist_empty(&d->rx_sff[i]) &&
+				    len < PAGE_SIZE - 100)
+					len = can_print_rcvlist(page, len,
+								&d->rx_sff[i],
+								d->dev);
+			}
+		} else
+			len += snprintf(page + len, PAGE_SIZE - len,
+					"  (%s: no entry)\n", DNAME(d->dev));
+
+		/* exit on end of buffer? */
+		if (len > PAGE_SIZE - 100)
+			break;
+	}
+	rcu_read_unlock();
+
+	len += snprintf(page + len, PAGE_SIZE - len, "\n");
+
+	*eof = 1;
+	return len;
+}
+
+/*
+ * proc utility functions
+ */
+
+static struct proc_dir_entry *can_create_proc_readentry(const char *name,
+							mode_t mode,
+							read_proc_t *read_proc,
+							void *data)
+{
+	if (can_dir)
+		return create_proc_read_entry(name, mode, can_dir, read_proc,
+					      data);
+	else
+		return NULL;
+}
+
+static void can_remove_proc_readentry(const char *name)
+{
+	if (can_dir)
+		remove_proc_entry(name, can_dir);
+}
+
+/*
+ * can_init_proc - create main CAN proc directory and procfs entries
+ */
+void can_init_proc(void)
+{
+	/* create /proc/net/can directory */
+	can_dir = proc_mkdir("can", init_net.proc_net);
+
+	if (!can_dir) {
+		printk(KERN_INFO "can: failed to create /proc/net/can . "
+		       "CONFIG_PROC_FS missing?\n");
+		return;
+	}
+
+	can_dir->owner = THIS_MODULE;
+
+	/* own procfs entries from the AF_CAN core */
+	pde_version     = can_create_proc_readentry(CAN_PROC_VERSION, 0644,
+					can_proc_read_version, NULL);
+	pde_stats       = can_create_proc_readentry(CAN_PROC_STATS, 0644,
+					can_proc_read_stats, NULL);
+	pde_reset_stats = can_create_proc_readentry(CAN_PROC_RESET_STATS, 0644,
+					can_proc_read_reset_stats, NULL);
+	pde_rcvlist_err = can_create_proc_readentry(CAN_PROC_RCVLIST_ERR, 0644,
+					can_proc_read_rcvlist, (void *)RX_ERR);
+	pde_rcvlist_all = can_create_proc_readentry(CAN_PROC_RCVLIST_ALL, 0644,
+					can_proc_read_rcvlist, (void *)RX_ALL);
+	pde_rcvlist_fil = can_create_proc_readentry(CAN_PROC_RCVLIST_FIL, 0644,
+					can_proc_read_rcvlist, (void *)RX_FIL);
+	pde_rcvlist_inv = can_create_proc_readentry(CAN_PROC_RCVLIST_INV, 0644,
+					can_proc_read_rcvlist, (void *)RX_INV);
+	pde_rcvlist_eff = can_create_proc_readentry(CAN_PROC_RCVLIST_EFF, 0644,
+					can_proc_read_rcvlist, (void *)RX_EFF);
+	pde_rcvlist_sff = can_create_proc_readentry(CAN_PROC_RCVLIST_SFF, 0644,
+					can_proc_read_rcvlist_sff, NULL);
+}
+
+/*
+ * can_remove_proc - remove procfs entries and main CAN proc directory
+ */
+void can_remove_proc(void)
+{
+	if (pde_version)
+		can_remove_proc_readentry(CAN_PROC_VERSION);
+
+	if (pde_stats)
+		can_remove_proc_readentry(CAN_PROC_STATS);
+
+	if (pde_reset_stats)
+		can_remove_proc_readentry(CAN_PROC_RESET_STATS);
+
+	if (pde_rcvlist_err)
+		can_remove_proc_readentry(CAN_PROC_RCVLIST_ERR);
+
+	if (pde_rcvlist_all)
+		can_remove_proc_readentry(CAN_PROC_RCVLIST_ALL);
+
+	if (pde_rcvlist_fil)
+		can_remove_proc_readentry(CAN_PROC_RCVLIST_FIL);
+
+	if (pde_rcvlist_inv)
+		can_remove_proc_readentry(CAN_PROC_RCVLIST_INV);
+
+	if (pde_rcvlist_eff)
+		can_remove_proc_readentry(CAN_PROC_RCVLIST_EFF);
+
+	if (pde_rcvlist_sff)
+		can_remove_proc_readentry(CAN_PROC_RCVLIST_SFF);
+
+	if (can_dir)
+		proc_net_remove(&init_net, "can");
+}
-- 
cgit v1.2.3


From c18ce101f2e47d97ace125033e2896895a6db3dd Mon Sep 17 00:00:00 2001
From: Oliver Hartkopp <oliver.hartkopp@volkswagen.de>
Date: Fri, 16 Nov 2007 15:53:09 -0800
Subject: [CAN]: Add raw protocol

This patch adds the CAN raw protocol.

Signed-off-by: Oliver Hartkopp <oliver.hartkopp@volkswagen.de>
Signed-off-by: Urs Thuermann <urs.thuermann@volkswagen.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/can/Kconfig  |  11 +
 net/can/Makefile |   3 +
 net/can/raw.c    | 763 +++++++++++++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 777 insertions(+)
 create mode 100644 net/can/raw.c

(limited to 'net')

diff --git a/net/can/Kconfig b/net/can/Kconfig
index 8b92790747e..4718d1f50ab 100644
--- a/net/can/Kconfig
+++ b/net/can/Kconfig
@@ -15,3 +15,14 @@ menuconfig CAN
 
 	  If you want CAN support you should say Y here and also to the
 	  specific driver for your controller(s) below.
+
+config CAN_RAW
+	tristate "Raw CAN Protocol (raw access with CAN-ID filtering)"
+	depends on CAN
+	default N
+	---help---
+	  The raw CAN protocol option offers access to the CAN bus via
+	  the BSD socket API. You probably want to use the raw socket in
+	  most cases where no higher level protocol is being used. The raw
+	  socket has several filter options e.g. ID masking / error frames.
+	  To receive/send raw CAN messages, use AF_CAN with protocol CAN_RAW.
diff --git a/net/can/Makefile b/net/can/Makefile
index 4c7563c2ccb..86f1cf21fce 100644
--- a/net/can/Makefile
+++ b/net/can/Makefile
@@ -4,3 +4,6 @@
 
 obj-$(CONFIG_CAN)	+= can.o
 can-objs		:= af_can.o proc.o
+
+obj-$(CONFIG_CAN_RAW)	+= can-raw.o
+can-raw-objs		:= raw.o
diff --git a/net/can/raw.c b/net/can/raw.c
new file mode 100644
index 00000000000..aeefd1419d0
--- /dev/null
+++ b/net/can/raw.c
@@ -0,0 +1,763 @@
+/*
+ * raw.c - Raw sockets for protocol family CAN
+ *
+ * Copyright (c) 2002-2007 Volkswagen Group Electronic Research
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of Volkswagen nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * Alternatively, provided that this notice is retained in full, this
+ * software may be distributed under the terms of the GNU General
+ * Public License ("GPL") version 2, in which case the provisions of the
+ * GPL apply INSTEAD OF those given above.
+ *
+ * The provided data structures and external interfaces from this code
+ * are not restricted to be used by modules with a GPL compatible license.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
+ * DAMAGE.
+ *
+ * Send feedback to <socketcan-users@lists.berlios.de>
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/uio.h>
+#include <linux/net.h>
+#include <linux/netdevice.h>
+#include <linux/socket.h>
+#include <linux/if_arp.h>
+#include <linux/skbuff.h>
+#include <linux/can.h>
+#include <linux/can/core.h>
+#include <linux/can/raw.h>
+#include <net/sock.h>
+#include <net/net_namespace.h>
+
+#define CAN_RAW_VERSION CAN_VERSION
+static __initdata const char banner[] =
+	KERN_INFO "can: raw protocol (rev " CAN_RAW_VERSION ")\n";
+
+MODULE_DESCRIPTION("PF_CAN raw protocol");
+MODULE_LICENSE("Dual BSD/GPL");
+MODULE_AUTHOR("Urs Thuermann <urs.thuermann@volkswagen.de>");
+
+#define MASK_ALL 0
+
+/*
+ * A raw socket has a list of can_filters attached to it, each receiving
+ * the CAN frames matching that filter.  If the filter list is empty,
+ * no CAN frames will be received by the socket.  The default after
+ * opening the socket, is to have one filter which receives all frames.
+ * The filter list is allocated dynamically with the exception of the
+ * list containing only one item.  This common case is optimized by
+ * storing the single filter in dfilter, to avoid using dynamic memory.
+ */
+
+struct raw_sock {
+	struct sock sk;
+	int bound;
+	int ifindex;
+	struct notifier_block notifier;
+	int loopback;
+	int recv_own_msgs;
+	int count;                 /* number of active filters */
+	struct can_filter dfilter; /* default/single filter */
+	struct can_filter *filter; /* pointer to filter(s) */
+	can_err_mask_t err_mask;
+};
+
+static inline struct raw_sock *raw_sk(const struct sock *sk)
+{
+	return (struct raw_sock *)sk;
+}
+
+static void raw_rcv(struct sk_buff *skb, void *data)
+{
+	struct sock *sk = (struct sock *)data;
+	struct raw_sock *ro = raw_sk(sk);
+	struct sockaddr_can *addr;
+	int error;
+
+	if (!ro->recv_own_msgs) {
+		/* check the received tx sock reference */
+		if (skb->sk == sk) {
+			kfree_skb(skb);
+			return;
+		}
+	}
+
+	/*
+	 *  Put the datagram to the queue so that raw_recvmsg() can
+	 *  get it from there.  We need to pass the interface index to
+	 *  raw_recvmsg().  We pass a whole struct sockaddr_can in skb->cb
+	 *  containing the interface index.
+	 */
+
+	BUILD_BUG_ON(sizeof(skb->cb) < sizeof(struct sockaddr_can));
+	addr = (struct sockaddr_can *)skb->cb;
+	memset(addr, 0, sizeof(*addr));
+	addr->can_family  = AF_CAN;
+	addr->can_ifindex = skb->dev->ifindex;
+
+	error = sock_queue_rcv_skb(sk, skb);
+	if (error < 0)
+		kfree_skb(skb);
+}
+
+static int raw_enable_filters(struct net_device *dev, struct sock *sk,
+			      struct can_filter *filter,
+			      int count)
+{
+	int err = 0;
+	int i;
+
+	for (i = 0; i < count; i++) {
+		err = can_rx_register(dev, filter[i].can_id,
+				      filter[i].can_mask,
+				      raw_rcv, sk, "raw");
+		if (err) {
+			/* clean up successfully registered filters */
+			while (--i >= 0)
+				can_rx_unregister(dev, filter[i].can_id,
+						  filter[i].can_mask,
+						  raw_rcv, sk);
+			break;
+		}
+	}
+
+	return err;
+}
+
+static int raw_enable_errfilter(struct net_device *dev, struct sock *sk,
+				can_err_mask_t err_mask)
+{
+	int err = 0;
+
+	if (err_mask)
+		err = can_rx_register(dev, 0, err_mask | CAN_ERR_FLAG,
+				      raw_rcv, sk, "raw");
+
+	return err;
+}
+
+static void raw_disable_filters(struct net_device *dev, struct sock *sk,
+			      struct can_filter *filter,
+			      int count)
+{
+	int i;
+
+	for (i = 0; i < count; i++)
+		can_rx_unregister(dev, filter[i].can_id, filter[i].can_mask,
+				  raw_rcv, sk);
+}
+
+static inline void raw_disable_errfilter(struct net_device *dev,
+					 struct sock *sk,
+					 can_err_mask_t err_mask)
+
+{
+	if (err_mask)
+		can_rx_unregister(dev, 0, err_mask | CAN_ERR_FLAG,
+				  raw_rcv, sk);
+}
+
+static inline void raw_disable_allfilters(struct net_device *dev,
+					  struct sock *sk)
+{
+	struct raw_sock *ro = raw_sk(sk);
+
+	raw_disable_filters(dev, sk, ro->filter, ro->count);
+	raw_disable_errfilter(dev, sk, ro->err_mask);
+}
+
+static int raw_enable_allfilters(struct net_device *dev, struct sock *sk)
+{
+	struct raw_sock *ro = raw_sk(sk);
+	int err;
+
+	err = raw_enable_filters(dev, sk, ro->filter, ro->count);
+	if (!err) {
+		err = raw_enable_errfilter(dev, sk, ro->err_mask);
+		if (err)
+			raw_disable_filters(dev, sk, ro->filter, ro->count);
+	}
+
+	return err;
+}
+
+static int raw_notifier(struct notifier_block *nb,
+			unsigned long msg, void *data)
+{
+	struct net_device *dev = (struct net_device *)data;
+	struct raw_sock *ro = container_of(nb, struct raw_sock, notifier);
+	struct sock *sk = &ro->sk;
+
+	if (dev->nd_net != &init_net)
+		return NOTIFY_DONE;
+
+	if (dev->type != ARPHRD_CAN)
+		return NOTIFY_DONE;
+
+	if (ro->ifindex != dev->ifindex)
+		return NOTIFY_DONE;
+
+	switch (msg) {
+
+	case NETDEV_UNREGISTER:
+		lock_sock(sk);
+		/* remove current filters & unregister */
+		if (ro->bound)
+			raw_disable_allfilters(dev, sk);
+
+		if (ro->count > 1)
+			kfree(ro->filter);
+
+		ro->ifindex = 0;
+		ro->bound   = 0;
+		ro->count   = 0;
+		release_sock(sk);
+
+		sk->sk_err = ENODEV;
+		if (!sock_flag(sk, SOCK_DEAD))
+			sk->sk_error_report(sk);
+		break;
+
+	case NETDEV_DOWN:
+		sk->sk_err = ENETDOWN;
+		if (!sock_flag(sk, SOCK_DEAD))
+			sk->sk_error_report(sk);
+		break;
+	}
+
+	return NOTIFY_DONE;
+}
+
+static int raw_init(struct sock *sk)
+{
+	struct raw_sock *ro = raw_sk(sk);
+
+	ro->bound            = 0;
+	ro->ifindex          = 0;
+
+	/* set default filter to single entry dfilter */
+	ro->dfilter.can_id   = 0;
+	ro->dfilter.can_mask = MASK_ALL;
+	ro->filter           = &ro->dfilter;
+	ro->count            = 1;
+
+	/* set default loopback behaviour */
+	ro->loopback         = 1;
+	ro->recv_own_msgs    = 0;
+
+	/* set notifier */
+	ro->notifier.notifier_call = raw_notifier;
+
+	register_netdevice_notifier(&ro->notifier);
+
+	return 0;
+}
+
+static int raw_release(struct socket *sock)
+{
+	struct sock *sk = sock->sk;
+	struct raw_sock *ro = raw_sk(sk);
+
+	unregister_netdevice_notifier(&ro->notifier);
+
+	lock_sock(sk);
+
+	/* remove current filters & unregister */
+	if (ro->bound) {
+		if (ro->ifindex) {
+			struct net_device *dev;
+
+			dev = dev_get_by_index(&init_net, ro->ifindex);
+			if (dev) {
+				raw_disable_allfilters(dev, sk);
+				dev_put(dev);
+			}
+		} else
+			raw_disable_allfilters(NULL, sk);
+	}
+
+	if (ro->count > 1)
+		kfree(ro->filter);
+
+	ro->ifindex = 0;
+	ro->bound   = 0;
+	ro->count   = 0;
+
+	release_sock(sk);
+	sock_put(sk);
+
+	return 0;
+}
+
+static int raw_bind(struct socket *sock, struct sockaddr *uaddr, int len)
+{
+	struct sockaddr_can *addr = (struct sockaddr_can *)uaddr;
+	struct sock *sk = sock->sk;
+	struct raw_sock *ro = raw_sk(sk);
+	int ifindex;
+	int err = 0;
+	int notify_enetdown = 0;
+
+	if (len < sizeof(*addr))
+		return -EINVAL;
+
+	lock_sock(sk);
+
+	if (ro->bound && addr->can_ifindex == ro->ifindex)
+		goto out;
+
+	if (addr->can_ifindex) {
+		struct net_device *dev;
+
+		dev = dev_get_by_index(&init_net, addr->can_ifindex);
+		if (!dev) {
+			err = -ENODEV;
+			goto out;
+		}
+		if (dev->type != ARPHRD_CAN) {
+			dev_put(dev);
+			err = -ENODEV;
+			goto out;
+		}
+		if (!(dev->flags & IFF_UP))
+			notify_enetdown = 1;
+
+		ifindex = dev->ifindex;
+
+		/* filters set by default/setsockopt */
+		err = raw_enable_allfilters(dev, sk);
+		dev_put(dev);
+
+	} else {
+		ifindex = 0;
+
+		/* filters set by default/setsockopt */
+		err = raw_enable_allfilters(NULL, sk);
+	}
+
+	if (!err) {
+		if (ro->bound) {
+			/* unregister old filters */
+			if (ro->ifindex) {
+				struct net_device *dev;
+
+				dev = dev_get_by_index(&init_net, ro->ifindex);
+				if (dev) {
+					raw_disable_allfilters(dev, sk);
+					dev_put(dev);
+				}
+			} else
+				raw_disable_allfilters(NULL, sk);
+		}
+		ro->ifindex = ifindex;
+		ro->bound = 1;
+	}
+
+ out:
+	release_sock(sk);
+
+	if (notify_enetdown) {
+		sk->sk_err = ENETDOWN;
+		if (!sock_flag(sk, SOCK_DEAD))
+			sk->sk_error_report(sk);
+	}
+
+	return err;
+}
+
+static int raw_getname(struct socket *sock, struct sockaddr *uaddr,
+		       int *len, int peer)
+{
+	struct sockaddr_can *addr = (struct sockaddr_can *)uaddr;
+	struct sock *sk = sock->sk;
+	struct raw_sock *ro = raw_sk(sk);
+
+	if (peer)
+		return -EOPNOTSUPP;
+
+	addr->can_family  = AF_CAN;
+	addr->can_ifindex = ro->ifindex;
+
+	*len = sizeof(*addr);
+
+	return 0;
+}
+
+static int raw_setsockopt(struct socket *sock, int level, int optname,
+			  char __user *optval, int optlen)
+{
+	struct sock *sk = sock->sk;
+	struct raw_sock *ro = raw_sk(sk);
+	struct can_filter *filter = NULL;  /* dyn. alloc'ed filters */
+	struct can_filter sfilter;         /* single filter */
+	struct net_device *dev = NULL;
+	can_err_mask_t err_mask = 0;
+	int count = 0;
+	int err = 0;
+
+	if (level != SOL_CAN_RAW)
+		return -EINVAL;
+	if (optlen < 0)
+		return -EINVAL;
+
+	switch (optname) {
+
+	case CAN_RAW_FILTER:
+		if (optlen % sizeof(struct can_filter) != 0)
+			return -EINVAL;
+
+		count = optlen / sizeof(struct can_filter);
+
+		if (count > 1) {
+			/* filter does not fit into dfilter => alloc space */
+			filter = kmalloc(optlen, GFP_KERNEL);
+			if (!filter)
+				return -ENOMEM;
+
+			err = copy_from_user(filter, optval, optlen);
+			if (err) {
+				kfree(filter);
+				return err;
+			}
+		} else if (count == 1) {
+			err = copy_from_user(&sfilter, optval, optlen);
+			if (err)
+				return err;
+		}
+
+		lock_sock(sk);
+
+		if (ro->bound && ro->ifindex)
+			dev = dev_get_by_index(&init_net, ro->ifindex);
+
+		if (ro->bound) {
+			/* (try to) register the new filters */
+			if (count == 1)
+				err = raw_enable_filters(dev, sk, &sfilter, 1);
+			else
+				err = raw_enable_filters(dev, sk, filter,
+							 count);
+			if (err) {
+				if (count > 1)
+					kfree(filter);
+
+				goto out_fil;
+			}
+
+			/* remove old filter registrations */
+			raw_disable_filters(dev, sk, ro->filter, ro->count);
+		}
+
+		/* remove old filter space */
+		if (ro->count > 1)
+			kfree(ro->filter);
+
+		/* link new filters to the socket */
+		if (count == 1) {
+			/* copy filter data for single filter */
+			ro->dfilter = sfilter;
+			filter = &ro->dfilter;
+		}
+		ro->filter = filter;
+		ro->count  = count;
+
+ out_fil:
+		if (dev)
+			dev_put(dev);
+
+		release_sock(sk);
+
+		break;
+
+	case CAN_RAW_ERR_FILTER:
+		if (optlen != sizeof(err_mask))
+			return -EINVAL;
+
+		err = copy_from_user(&err_mask, optval, optlen);
+		if (err)
+			return err;
+
+		err_mask &= CAN_ERR_MASK;
+
+		lock_sock(sk);
+
+		if (ro->bound && ro->ifindex)
+			dev = dev_get_by_index(&init_net, ro->ifindex);
+
+		/* remove current error mask */
+		if (ro->bound) {
+			/* (try to) register the new err_mask */
+			err = raw_enable_errfilter(dev, sk, err_mask);
+
+			if (err)
+				goto out_err;
+
+			/* remove old err_mask registration */
+			raw_disable_errfilter(dev, sk, ro->err_mask);
+		}
+
+		/* link new err_mask to the socket */
+		ro->err_mask = err_mask;
+
+ out_err:
+		if (dev)
+			dev_put(dev);
+
+		release_sock(sk);
+
+		break;
+
+	case CAN_RAW_LOOPBACK:
+		if (optlen != sizeof(ro->loopback))
+			return -EINVAL;
+
+		err = copy_from_user(&ro->loopback, optval, optlen);
+
+		break;
+
+	case CAN_RAW_RECV_OWN_MSGS:
+		if (optlen != sizeof(ro->recv_own_msgs))
+			return -EINVAL;
+
+		err = copy_from_user(&ro->recv_own_msgs, optval, optlen);
+
+		break;
+
+	default:
+		return -ENOPROTOOPT;
+	}
+	return err;
+}
+
+static int raw_getsockopt(struct socket *sock, int level, int optname,
+			  char __user *optval, int __user *optlen)
+{
+	struct sock *sk = sock->sk;
+	struct raw_sock *ro = raw_sk(sk);
+	int len;
+	void *val;
+	int err = 0;
+
+	if (level != SOL_CAN_RAW)
+		return -EINVAL;
+	if (get_user(len, optlen))
+		return -EFAULT;
+	if (len < 0)
+		return -EINVAL;
+
+	switch (optname) {
+
+	case CAN_RAW_FILTER:
+		lock_sock(sk);
+		if (ro->count > 0) {
+			int fsize = ro->count * sizeof(struct can_filter);
+			if (len > fsize)
+				len = fsize;
+			err = copy_to_user(optval, ro->filter, len);
+		} else
+			len = 0;
+		release_sock(sk);
+
+		if (!err)
+			err = put_user(len, optlen);
+		return err;
+
+	case CAN_RAW_ERR_FILTER:
+		if (len > sizeof(can_err_mask_t))
+			len = sizeof(can_err_mask_t);
+		val = &ro->err_mask;
+		break;
+
+	case CAN_RAW_LOOPBACK:
+		if (len > sizeof(int))
+			len = sizeof(int);
+		val = &ro->loopback;
+		break;
+
+	case CAN_RAW_RECV_OWN_MSGS:
+		if (len > sizeof(int))
+			len = sizeof(int);
+		val = &ro->recv_own_msgs;
+		break;
+
+	default:
+		return -ENOPROTOOPT;
+	}
+
+	if (put_user(len, optlen))
+		return -EFAULT;
+	if (copy_to_user(optval, val, len))
+		return -EFAULT;
+	return 0;
+}
+
+static int raw_sendmsg(struct kiocb *iocb, struct socket *sock,
+		       struct msghdr *msg, size_t size)
+{
+	struct sock *sk = sock->sk;
+	struct raw_sock *ro = raw_sk(sk);
+	struct sk_buff *skb;
+	struct net_device *dev;
+	int ifindex;
+	int err;
+
+	if (msg->msg_name) {
+		struct sockaddr_can *addr =
+			(struct sockaddr_can *)msg->msg_name;
+
+		if (addr->can_family != AF_CAN)
+			return -EINVAL;
+
+		ifindex = addr->can_ifindex;
+	} else
+		ifindex = ro->ifindex;
+
+	dev = dev_get_by_index(&init_net, ifindex);
+	if (!dev)
+		return -ENXIO;
+
+	skb = sock_alloc_send_skb(sk, size, msg->msg_flags & MSG_DONTWAIT,
+				  &err);
+	if (!skb) {
+		dev_put(dev);
+		return err;
+	}
+
+	err = memcpy_fromiovec(skb_put(skb, size), msg->msg_iov, size);
+	if (err < 0) {
+		kfree_skb(skb);
+		dev_put(dev);
+		return err;
+	}
+	skb->dev = dev;
+	skb->sk  = sk;
+
+	err = can_send(skb, ro->loopback);
+
+	dev_put(dev);
+
+	if (err)
+		return err;
+
+	return size;
+}
+
+static int raw_recvmsg(struct kiocb *iocb, struct socket *sock,
+		       struct msghdr *msg, size_t size, int flags)
+{
+	struct sock *sk = sock->sk;
+	struct sk_buff *skb;
+	int error = 0;
+	int noblock;
+
+	noblock =  flags & MSG_DONTWAIT;
+	flags   &= ~MSG_DONTWAIT;
+
+	skb = skb_recv_datagram(sk, flags, noblock, &error);
+	if (!skb)
+		return error;
+
+	if (size < skb->len)
+		msg->msg_flags |= MSG_TRUNC;
+	else
+		size = skb->len;
+
+	error = memcpy_toiovec(msg->msg_iov, skb->data, size);
+	if (error < 0) {
+		skb_free_datagram(sk, skb);
+		return error;
+	}
+
+	sock_recv_timestamp(msg, sk, skb);
+
+	if (msg->msg_name) {
+		msg->msg_namelen = sizeof(struct sockaddr_can);
+		memcpy(msg->msg_name, skb->cb, msg->msg_namelen);
+	}
+
+	skb_free_datagram(sk, skb);
+
+	return size;
+}
+
+static struct proto_ops raw_ops __read_mostly = {
+	.family        = PF_CAN,
+	.release       = raw_release,
+	.bind          = raw_bind,
+	.connect       = sock_no_connect,
+	.socketpair    = sock_no_socketpair,
+	.accept        = sock_no_accept,
+	.getname       = raw_getname,
+	.poll          = datagram_poll,
+	.ioctl         = NULL,		/* use can_ioctl() from af_can.c */
+	.listen        = sock_no_listen,
+	.shutdown      = sock_no_shutdown,
+	.setsockopt    = raw_setsockopt,
+	.getsockopt    = raw_getsockopt,
+	.sendmsg       = raw_sendmsg,
+	.recvmsg       = raw_recvmsg,
+	.mmap          = sock_no_mmap,
+	.sendpage      = sock_no_sendpage,
+};
+
+static struct proto raw_proto __read_mostly = {
+	.name       = "CAN_RAW",
+	.owner      = THIS_MODULE,
+	.obj_size   = sizeof(struct raw_sock),
+	.init       = raw_init,
+};
+
+static struct can_proto raw_can_proto __read_mostly = {
+	.type       = SOCK_RAW,
+	.protocol   = CAN_RAW,
+	.capability = -1,
+	.ops        = &raw_ops,
+	.prot       = &raw_proto,
+};
+
+static __init int raw_module_init(void)
+{
+	int err;
+
+	printk(banner);
+
+	err = can_proto_register(&raw_can_proto);
+	if (err < 0)
+		printk(KERN_ERR "can: registration of raw protocol failed\n");
+
+	return err;
+}
+
+static __exit void raw_module_exit(void)
+{
+	can_proto_unregister(&raw_can_proto);
+}
+
+module_init(raw_module_init);
+module_exit(raw_module_exit);
-- 
cgit v1.2.3


From ffd980f976e7fd666c2e61bf8ab35107efd11828 Mon Sep 17 00:00:00 2001
From: Oliver Hartkopp <oliver.hartkopp@volkswagen.de>
Date: Fri, 16 Nov 2007 15:53:52 -0800
Subject: [CAN]: Add broadcast manager (bcm) protocol

This patch adds the CAN broadcast manager (bcm) protocol.

Signed-off-by: Oliver Hartkopp <oliver.hartkopp@volkswagen.de>
Signed-off-by: Urs Thuermann <urs.thuermann@volkswagen.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/can/Kconfig  |   13 +
 net/can/Makefile |    3 +
 net/can/bcm.c    | 1561 ++++++++++++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 1577 insertions(+)
 create mode 100644 net/can/bcm.c

(limited to 'net')

diff --git a/net/can/Kconfig b/net/can/Kconfig
index 4718d1f50ab..182b96b80eb 100644
--- a/net/can/Kconfig
+++ b/net/can/Kconfig
@@ -26,3 +26,16 @@ config CAN_RAW
 	  most cases where no higher level protocol is being used. The raw
 	  socket has several filter options e.g. ID masking / error frames.
 	  To receive/send raw CAN messages, use AF_CAN with protocol CAN_RAW.
+
+config CAN_BCM
+	tristate "Broadcast Manager CAN Protocol (with content filtering)"
+	depends on CAN
+	default N
+	---help---
+	  The Broadcast Manager offers content filtering, timeout monitoring,
+	  sending of RTR frames, and cyclic CAN messages without permanent user
+	  interaction. The BCM can be 'programmed' via the BSD socket API and
+	  informs you on demand e.g. only on content updates / timeouts.
+	  You probably want to use the bcm socket in most cases where cyclic
+	  CAN messages are used on the bus (e.g. in automotive environments).
+	  To use the Broadcast Manager, use AF_CAN with protocol CAN_BCM.
diff --git a/net/can/Makefile b/net/can/Makefile
index 86f1cf21fce..9cd3c4b3abd 100644
--- a/net/can/Makefile
+++ b/net/can/Makefile
@@ -7,3 +7,6 @@ can-objs		:= af_can.o proc.o
 
 obj-$(CONFIG_CAN_RAW)	+= can-raw.o
 can-raw-objs		:= raw.o
+
+obj-$(CONFIG_CAN_BCM)	+= can-bcm.o
+can-bcm-objs		:= bcm.o
diff --git a/net/can/bcm.c b/net/can/bcm.c
new file mode 100644
index 00000000000..bd4282dae75
--- /dev/null
+++ b/net/can/bcm.c
@@ -0,0 +1,1561 @@
+/*
+ * bcm.c - Broadcast Manager to filter/send (cyclic) CAN content
+ *
+ * Copyright (c) 2002-2007 Volkswagen Group Electronic Research
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of Volkswagen nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * Alternatively, provided that this notice is retained in full, this
+ * software may be distributed under the terms of the GNU General
+ * Public License ("GPL") version 2, in which case the provisions of the
+ * GPL apply INSTEAD OF those given above.
+ *
+ * The provided data structures and external interfaces from this code
+ * are not restricted to be used by modules with a GPL compatible license.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
+ * DAMAGE.
+ *
+ * Send feedback to <socketcan-users@lists.berlios.de>
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/list.h>
+#include <linux/proc_fs.h>
+#include <linux/uio.h>
+#include <linux/net.h>
+#include <linux/netdevice.h>
+#include <linux/socket.h>
+#include <linux/if_arp.h>
+#include <linux/skbuff.h>
+#include <linux/can.h>
+#include <linux/can/core.h>
+#include <linux/can/bcm.h>
+#include <net/sock.h>
+#include <net/net_namespace.h>
+
+/* use of last_frames[index].can_dlc */
+#define RX_RECV    0x40 /* received data for this element */
+#define RX_THR     0x80 /* element not been sent due to throttle feature */
+#define BCM_CAN_DLC_MASK 0x0F /* clean private flags in can_dlc by masking */
+
+/* get best masking value for can_rx_register() for a given single can_id */
+#define REGMASK(id) ((id & CAN_RTR_FLAG) | ((id & CAN_EFF_FLAG) ? \
+			(CAN_EFF_MASK | CAN_EFF_FLAG) : CAN_SFF_MASK))
+
+#define CAN_BCM_VERSION CAN_VERSION
+static __initdata const char banner[] = KERN_INFO
+	"can: broadcast manager protocol (rev " CAN_BCM_VERSION ")\n";
+
+MODULE_DESCRIPTION("PF_CAN broadcast manager protocol");
+MODULE_LICENSE("Dual BSD/GPL");
+MODULE_AUTHOR("Oliver Hartkopp <oliver.hartkopp@volkswagen.de>");
+
+/* easy access to can_frame payload */
+static inline u64 GET_U64(const struct can_frame *cp)
+{
+	return *(u64 *)cp->data;
+}
+
+struct bcm_op {
+	struct list_head list;
+	int ifindex;
+	canid_t can_id;
+	int flags;
+	unsigned long j_ival1, j_ival2, j_lastmsg;
+	unsigned long frames_abs, frames_filtered;
+	struct timer_list timer, thrtimer;
+	struct timeval ival1, ival2;
+	ktime_t rx_stamp;
+	int rx_ifindex;
+	int count;
+	int nframes;
+	int currframe;
+	struct can_frame *frames;
+	struct can_frame *last_frames;
+	struct can_frame sframe;
+	struct can_frame last_sframe;
+	struct sock *sk;
+	struct net_device *rx_reg_dev;
+};
+
+static struct proc_dir_entry *proc_dir;
+
+struct bcm_sock {
+	struct sock sk;
+	int bound;
+	int ifindex;
+	struct notifier_block notifier;
+	struct list_head rx_ops;
+	struct list_head tx_ops;
+	unsigned long dropped_usr_msgs;
+	struct proc_dir_entry *bcm_proc_read;
+	char procname [9]; /* pointer printed in ASCII with \0 */
+};
+
+static inline struct bcm_sock *bcm_sk(const struct sock *sk)
+{
+	return (struct bcm_sock *)sk;
+}
+
+#define CFSIZ sizeof(struct can_frame)
+#define OPSIZ sizeof(struct bcm_op)
+#define MHSIZ sizeof(struct bcm_msg_head)
+
+/*
+ * rounded_tv2jif - calculate jiffies from timeval including optional up
+ * @tv: pointer to timeval
+ *
+ * Description:
+ * Unlike timeval_to_jiffies() provided in include/linux/jiffies.h, this
+ * function is intentionally more relaxed on precise timer ticks to get
+ * exact one jiffy for requested 1000us on a 1000HZ machine.
+ * This code is to be removed when upgrading to kernel hrtimer.
+ *
+ * Return:
+ *  calculated jiffies (max: ULONG_MAX)
+ */
+static unsigned long rounded_tv2jif(const struct timeval *tv)
+{
+	unsigned long sec  = tv->tv_sec;
+	unsigned long usec = tv->tv_usec;
+	unsigned long jif;
+
+	if (sec > ULONG_MAX / HZ)
+		return ULONG_MAX;
+
+	/* round up to get at least the requested time */
+	usec += 1000000 / HZ - 1;
+
+	jif  = usec / (1000000 / HZ);
+
+	if (sec * HZ > ULONG_MAX - jif)
+		return ULONG_MAX;
+
+	return jif + sec * HZ;
+}
+
+/*
+ * procfs functions
+ */
+static char *bcm_proc_getifname(int ifindex)
+{
+	struct net_device *dev;
+
+	if (!ifindex)
+		return "any";
+
+	/* no usage counting */
+	dev = __dev_get_by_index(&init_net, ifindex);
+	if (dev)
+		return dev->name;
+
+	return "???";
+}
+
+static int bcm_read_proc(char *page, char **start, off_t off,
+			 int count, int *eof, void *data)
+{
+	int len = 0;
+	struct sock *sk = (struct sock *)data;
+	struct bcm_sock *bo = bcm_sk(sk);
+	struct bcm_op *op;
+
+	len += snprintf(page + len, PAGE_SIZE - len, ">>> socket %p",
+			sk->sk_socket);
+	len += snprintf(page + len, PAGE_SIZE - len, " / sk %p", sk);
+	len += snprintf(page + len, PAGE_SIZE - len, " / bo %p", bo);
+	len += snprintf(page + len, PAGE_SIZE - len, " / dropped %lu",
+			bo->dropped_usr_msgs);
+	len += snprintf(page + len, PAGE_SIZE - len, " / bound %s",
+			bcm_proc_getifname(bo->ifindex));
+	len += snprintf(page + len, PAGE_SIZE - len, " <<<\n");
+
+	list_for_each_entry(op, &bo->rx_ops, list) {
+
+		unsigned long reduction;
+
+		/* print only active entries & prevent division by zero */
+		if (!op->frames_abs)
+			continue;
+
+		len += snprintf(page + len, PAGE_SIZE - len,
+				"rx_op: %03X %-5s ",
+				op->can_id, bcm_proc_getifname(op->ifindex));
+		len += snprintf(page + len, PAGE_SIZE - len, "[%d]%c ",
+				op->nframes,
+				(op->flags & RX_CHECK_DLC)?'d':' ');
+		if (op->j_ival1)
+			len += snprintf(page + len, PAGE_SIZE - len,
+					"timeo=%ld ", op->j_ival1);
+
+		if (op->j_ival2)
+			len += snprintf(page + len, PAGE_SIZE - len,
+					"thr=%ld ", op->j_ival2);
+
+		len += snprintf(page + len, PAGE_SIZE - len,
+				"# recv %ld (%ld) => reduction: ",
+				op->frames_filtered, op->frames_abs);
+
+		reduction = 100 - (op->frames_filtered * 100) / op->frames_abs;
+
+		len += snprintf(page + len, PAGE_SIZE - len, "%s%ld%%\n",
+				(reduction == 100)?"near ":"", reduction);
+
+		if (len > PAGE_SIZE - 200) {
+			/* mark output cut off */
+			len += snprintf(page + len, PAGE_SIZE - len, "(..)\n");
+			break;
+		}
+	}
+
+	list_for_each_entry(op, &bo->tx_ops, list) {
+
+		len += snprintf(page + len, PAGE_SIZE - len,
+				"tx_op: %03X %s [%d] ",
+				op->can_id, bcm_proc_getifname(op->ifindex),
+				op->nframes);
+		if (op->j_ival1)
+			len += snprintf(page + len, PAGE_SIZE - len, "t1=%ld ",
+					op->j_ival1);
+
+		if (op->j_ival2)
+			len += snprintf(page + len, PAGE_SIZE - len, "t2=%ld ",
+					op->j_ival2);
+
+		len += snprintf(page + len, PAGE_SIZE - len, "# sent %ld\n",
+				op->frames_abs);
+
+		if (len > PAGE_SIZE - 100) {
+			/* mark output cut off */
+			len += snprintf(page + len, PAGE_SIZE - len, "(..)\n");
+			break;
+		}
+	}
+
+	len += snprintf(page + len, PAGE_SIZE - len, "\n");
+
+	*eof = 1;
+	return len;
+}
+
+/*
+ * bcm_can_tx - send the (next) CAN frame to the appropriate CAN interface
+ *              of the given bcm tx op
+ */
+static void bcm_can_tx(struct bcm_op *op)
+{
+	struct sk_buff *skb;
+	struct net_device *dev;
+	struct can_frame *cf = &op->frames[op->currframe];
+
+	/* no target device? => exit */
+	if (!op->ifindex)
+		return;
+
+	dev = dev_get_by_index(&init_net, op->ifindex);
+	if (!dev) {
+		/* RFC: should this bcm_op remove itself here? */
+		return;
+	}
+
+	skb = alloc_skb(CFSIZ, gfp_any());
+	if (!skb)
+		goto out;
+
+	memcpy(skb_put(skb, CFSIZ), cf, CFSIZ);
+
+	/* send with loopback */
+	skb->dev = dev;
+	skb->sk = op->sk;
+	can_send(skb, 1);
+
+	/* update statistics */
+	op->currframe++;
+	op->frames_abs++;
+
+	/* reached last frame? */
+	if (op->currframe >= op->nframes)
+		op->currframe = 0;
+ out:
+	dev_put(dev);
+}
+
+/*
+ * bcm_send_to_user - send a BCM message to the userspace
+ *                    (consisting of bcm_msg_head + x CAN frames)
+ */
+static void bcm_send_to_user(struct bcm_op *op, struct bcm_msg_head *head,
+			     struct can_frame *frames, int has_timestamp)
+{
+	struct sk_buff *skb;
+	struct can_frame *firstframe;
+	struct sockaddr_can *addr;
+	struct sock *sk = op->sk;
+	int datalen = head->nframes * CFSIZ;
+	int err;
+
+	skb = alloc_skb(sizeof(*head) + datalen, gfp_any());
+	if (!skb)
+		return;
+
+	memcpy(skb_put(skb, sizeof(*head)), head, sizeof(*head));
+
+	if (head->nframes) {
+		/* can_frames starting here */
+		firstframe = (struct can_frame *) skb_tail_pointer(skb);
+
+		memcpy(skb_put(skb, datalen), frames, datalen);
+
+		/*
+		 * the BCM uses the can_dlc-element of the can_frame
+		 * structure for internal purposes. This is only
+		 * relevant for updates that are generated by the
+		 * BCM, where nframes is 1
+		 */
+		if (head->nframes == 1)
+			firstframe->can_dlc &= BCM_CAN_DLC_MASK;
+	}
+
+	if (has_timestamp) {
+		/* restore rx timestamp */
+		skb->tstamp = op->rx_stamp;
+	}
+
+	/*
+	 *  Put the datagram to the queue so that bcm_recvmsg() can
+	 *  get it from there.  We need to pass the interface index to
+	 *  bcm_recvmsg().  We pass a whole struct sockaddr_can in skb->cb
+	 *  containing the interface index.
+	 */
+
+	BUILD_BUG_ON(sizeof(skb->cb) < sizeof(struct sockaddr_can));
+	addr = (struct sockaddr_can *)skb->cb;
+	memset(addr, 0, sizeof(*addr));
+	addr->can_family  = AF_CAN;
+	addr->can_ifindex = op->rx_ifindex;
+
+	err = sock_queue_rcv_skb(sk, skb);
+	if (err < 0) {
+		struct bcm_sock *bo = bcm_sk(sk);
+
+		kfree_skb(skb);
+		/* don't care about overflows in this statistic */
+		bo->dropped_usr_msgs++;
+	}
+}
+
+/*
+ * bcm_tx_timeout_handler - performes cyclic CAN frame transmissions
+ */
+static void bcm_tx_timeout_handler(unsigned long data)
+{
+	struct bcm_op *op = (struct bcm_op *)data;
+
+	if (op->j_ival1 && (op->count > 0)) {
+
+		op->count--;
+		if (!op->count && (op->flags & TX_COUNTEVT)) {
+			struct bcm_msg_head msg_head;
+
+			/* create notification to user */
+			msg_head.opcode  = TX_EXPIRED;
+			msg_head.flags   = op->flags;
+			msg_head.count   = op->count;
+			msg_head.ival1   = op->ival1;
+			msg_head.ival2   = op->ival2;
+			msg_head.can_id  = op->can_id;
+			msg_head.nframes = 0;
+
+			bcm_send_to_user(op, &msg_head, NULL, 0);
+		}
+	}
+
+	if (op->j_ival1 && (op->count > 0)) {
+
+		/* send (next) frame */
+		bcm_can_tx(op);
+		mod_timer(&op->timer, jiffies + op->j_ival1);
+
+	} else {
+		if (op->j_ival2) {
+
+			/* send (next) frame */
+			bcm_can_tx(op);
+			mod_timer(&op->timer, jiffies + op->j_ival2);
+		}
+	}
+
+	return;
+}
+
+/*
+ * bcm_rx_changed - create a RX_CHANGED notification due to changed content
+ */
+static void bcm_rx_changed(struct bcm_op *op, struct can_frame *data)
+{
+	struct bcm_msg_head head;
+
+	op->j_lastmsg = jiffies;
+
+	/* update statistics */
+	op->frames_filtered++;
+
+	/* prevent statistics overflow */
+	if (op->frames_filtered > ULONG_MAX/100)
+		op->frames_filtered = op->frames_abs = 0;
+
+	head.opcode  = RX_CHANGED;
+	head.flags   = op->flags;
+	head.count   = op->count;
+	head.ival1   = op->ival1;
+	head.ival2   = op->ival2;
+	head.can_id  = op->can_id;
+	head.nframes = 1;
+
+	bcm_send_to_user(op, &head, data, 1);
+}
+
+/*
+ * bcm_rx_update_and_send - process a detected relevant receive content change
+ *                          1. update the last received data
+ *                          2. send a notification to the user (if possible)
+ */
+static void bcm_rx_update_and_send(struct bcm_op *op,
+				   struct can_frame *lastdata,
+				   struct can_frame *rxdata)
+{
+	unsigned long nexttx = op->j_lastmsg + op->j_ival2;
+
+	memcpy(lastdata, rxdata, CFSIZ);
+
+	/* mark as used */
+	lastdata->can_dlc |= RX_RECV;
+
+	/* throttle bcm_rx_changed ? */
+	if ((op->thrtimer.expires) ||
+	    ((op->j_ival2) && (nexttx > jiffies))) {
+		/* we are already waiting OR we have to start waiting */
+
+		/* mark as 'throttled' */
+		lastdata->can_dlc |= RX_THR;
+
+		if (!(op->thrtimer.expires)) {
+			/* start the timer only the first time */
+			mod_timer(&op->thrtimer, nexttx);
+		}
+
+	} else {
+		/* send RX_CHANGED to the user immediately */
+		bcm_rx_changed(op, rxdata);
+	}
+}
+
+/*
+ * bcm_rx_cmp_to_index - (bit)compares the currently received data to formerly
+ *                       received data stored in op->last_frames[]
+ */
+static void bcm_rx_cmp_to_index(struct bcm_op *op, int index,
+				struct can_frame *rxdata)
+{
+	/*
+	 * no one uses the MSBs of can_dlc for comparation,
+	 * so we use it here to detect the first time of reception
+	 */
+
+	if (!(op->last_frames[index].can_dlc & RX_RECV)) {
+		/* received data for the first time => send update to user */
+		bcm_rx_update_and_send(op, &op->last_frames[index], rxdata);
+		return;
+	}
+
+	/* do a real check in can_frame data section */
+
+	if ((GET_U64(&op->frames[index]) & GET_U64(rxdata)) !=
+	    (GET_U64(&op->frames[index]) & GET_U64(&op->last_frames[index]))) {
+		bcm_rx_update_and_send(op, &op->last_frames[index], rxdata);
+		return;
+	}
+
+	if (op->flags & RX_CHECK_DLC) {
+		/* do a real check in can_frame dlc */
+		if (rxdata->can_dlc != (op->last_frames[index].can_dlc &
+					BCM_CAN_DLC_MASK)) {
+			bcm_rx_update_and_send(op, &op->last_frames[index],
+					       rxdata);
+			return;
+		}
+	}
+}
+
+/*
+ * bcm_rx_starttimer - enable timeout monitoring for CAN frame receiption
+ */
+static void bcm_rx_starttimer(struct bcm_op *op)
+{
+	if (op->flags & RX_NO_AUTOTIMER)
+		return;
+
+	if (op->j_ival1)
+		mod_timer(&op->timer, jiffies + op->j_ival1);
+}
+
+/*
+ * bcm_rx_timeout_handler - when the (cyclic) CAN frame receiption timed out
+ */
+static void bcm_rx_timeout_handler(unsigned long data)
+{
+	struct bcm_op *op = (struct bcm_op *)data;
+	struct bcm_msg_head msg_head;
+
+	msg_head.opcode  = RX_TIMEOUT;
+	msg_head.flags   = op->flags;
+	msg_head.count   = op->count;
+	msg_head.ival1   = op->ival1;
+	msg_head.ival2   = op->ival2;
+	msg_head.can_id  = op->can_id;
+	msg_head.nframes = 0;
+
+	bcm_send_to_user(op, &msg_head, NULL, 0);
+
+	/* no restart of the timer is done here! */
+
+	/* if user wants to be informed, when cyclic CAN-Messages come back */
+	if ((op->flags & RX_ANNOUNCE_RESUME) && op->last_frames) {
+		/* clear received can_frames to indicate 'nothing received' */
+		memset(op->last_frames, 0, op->nframes * CFSIZ);
+	}
+}
+
+/*
+ * bcm_rx_thr_handler - the time for blocked content updates is over now:
+ *                      Check for throttled data and send it to the userspace
+ */
+static void bcm_rx_thr_handler(unsigned long data)
+{
+	struct bcm_op *op = (struct bcm_op *)data;
+	int i = 0;
+
+	/* mark disabled / consumed timer */
+	op->thrtimer.expires = 0;
+
+	if (op->nframes > 1) {
+		/* for MUX filter we start at index 1 */
+		for (i = 1; i < op->nframes; i++) {
+			if ((op->last_frames) &&
+			    (op->last_frames[i].can_dlc & RX_THR)) {
+				op->last_frames[i].can_dlc &= ~RX_THR;
+				bcm_rx_changed(op, &op->last_frames[i]);
+			}
+		}
+
+	} else {
+		/* for RX_FILTER_ID and simple filter */
+		if (op->last_frames && (op->last_frames[0].can_dlc & RX_THR)) {
+			op->last_frames[0].can_dlc &= ~RX_THR;
+			bcm_rx_changed(op, &op->last_frames[0]);
+		}
+	}
+}
+
+/*
+ * bcm_rx_handler - handle a CAN frame receiption
+ */
+static void bcm_rx_handler(struct sk_buff *skb, void *data)
+{
+	struct bcm_op *op = (struct bcm_op *)data;
+	struct can_frame rxframe;
+	int i;
+
+	/* disable timeout */
+	del_timer(&op->timer);
+
+	if (skb->len == sizeof(rxframe)) {
+		memcpy(&rxframe, skb->data, sizeof(rxframe));
+		/* save rx timestamp */
+		op->rx_stamp = skb->tstamp;
+		/* save originator for recvfrom() */
+		op->rx_ifindex = skb->dev->ifindex;
+		/* update statistics */
+		op->frames_abs++;
+		kfree_skb(skb);
+
+	} else {
+		kfree_skb(skb);
+		return;
+	}
+
+	if (op->can_id != rxframe.can_id)
+		return;
+
+	if (op->flags & RX_RTR_FRAME) {
+		/* send reply for RTR-request (placed in op->frames[0]) */
+		bcm_can_tx(op);
+		return;
+	}
+
+	if (op->flags & RX_FILTER_ID) {
+		/* the easiest case */
+		bcm_rx_update_and_send(op, &op->last_frames[0], &rxframe);
+		bcm_rx_starttimer(op);
+		return;
+	}
+
+	if (op->nframes == 1) {
+		/* simple compare with index 0 */
+		bcm_rx_cmp_to_index(op, 0, &rxframe);
+		bcm_rx_starttimer(op);
+		return;
+	}
+
+	if (op->nframes > 1) {
+		/*
+		 * multiplex compare
+		 *
+		 * find the first multiplex mask that fits.
+		 * Remark: The MUX-mask is stored in index 0
+		 */
+
+		for (i = 1; i < op->nframes; i++) {
+			if ((GET_U64(&op->frames[0]) & GET_U64(&rxframe)) ==
+			    (GET_U64(&op->frames[0]) &
+			     GET_U64(&op->frames[i]))) {
+				bcm_rx_cmp_to_index(op, i, &rxframe);
+				break;
+			}
+		}
+		bcm_rx_starttimer(op);
+	}
+}
+
+/*
+ * helpers for bcm_op handling: find & delete bcm [rx|tx] op elements
+ */
+static struct bcm_op *bcm_find_op(struct list_head *ops, canid_t can_id,
+				  int ifindex)
+{
+	struct bcm_op *op;
+
+	list_for_each_entry(op, ops, list) {
+		if ((op->can_id == can_id) && (op->ifindex == ifindex))
+			return op;
+	}
+
+	return NULL;
+}
+
+static void bcm_remove_op(struct bcm_op *op)
+{
+	del_timer(&op->timer);
+	del_timer(&op->thrtimer);
+
+	if ((op->frames) && (op->frames != &op->sframe))
+		kfree(op->frames);
+
+	if ((op->last_frames) && (op->last_frames != &op->last_sframe))
+		kfree(op->last_frames);
+
+	kfree(op);
+
+	return;
+}
+
+static void bcm_rx_unreg(struct net_device *dev, struct bcm_op *op)
+{
+	if (op->rx_reg_dev == dev) {
+		can_rx_unregister(dev, op->can_id, REGMASK(op->can_id),
+				  bcm_rx_handler, op);
+
+		/* mark as removed subscription */
+		op->rx_reg_dev = NULL;
+	} else
+		printk(KERN_ERR "can-bcm: bcm_rx_unreg: registered device "
+		       "mismatch %p %p\n", op->rx_reg_dev, dev);
+}
+
+/*
+ * bcm_delete_rx_op - find and remove a rx op (returns number of removed ops)
+ */
+static int bcm_delete_rx_op(struct list_head *ops, canid_t can_id, int ifindex)
+{
+	struct bcm_op *op, *n;
+
+	list_for_each_entry_safe(op, n, ops, list) {
+		if ((op->can_id == can_id) && (op->ifindex == ifindex)) {
+
+			/*
+			 * Don't care if we're bound or not (due to netdev
+			 * problems) can_rx_unregister() is always a save
+			 * thing to do here.
+			 */
+			if (op->ifindex) {
+				/*
+				 * Only remove subscriptions that had not
+				 * been removed due to NETDEV_UNREGISTER
+				 * in bcm_notifier()
+				 */
+				if (op->rx_reg_dev) {
+					struct net_device *dev;
+
+					dev = dev_get_by_index(&init_net,
+							       op->ifindex);
+					if (dev) {
+						bcm_rx_unreg(dev, op);
+						dev_put(dev);
+					}
+				}
+			} else
+				can_rx_unregister(NULL, op->can_id,
+						  REGMASK(op->can_id),
+						  bcm_rx_handler, op);
+
+			list_del(&op->list);
+			bcm_remove_op(op);
+			return 1; /* done */
+		}
+	}
+
+	return 0; /* not found */
+}
+
+/*
+ * bcm_delete_tx_op - find and remove a tx op (returns number of removed ops)
+ */
+static int bcm_delete_tx_op(struct list_head *ops, canid_t can_id, int ifindex)
+{
+	struct bcm_op *op, *n;
+
+	list_for_each_entry_safe(op, n, ops, list) {
+		if ((op->can_id == can_id) && (op->ifindex == ifindex)) {
+			list_del(&op->list);
+			bcm_remove_op(op);
+			return 1; /* done */
+		}
+	}
+
+	return 0; /* not found */
+}
+
+/*
+ * bcm_read_op - read out a bcm_op and send it to the user (for bcm_sendmsg)
+ */
+static int bcm_read_op(struct list_head *ops, struct bcm_msg_head *msg_head,
+		       int ifindex)
+{
+	struct bcm_op *op = bcm_find_op(ops, msg_head->can_id, ifindex);
+
+	if (!op)
+		return -EINVAL;
+
+	/* put current values into msg_head */
+	msg_head->flags   = op->flags;
+	msg_head->count   = op->count;
+	msg_head->ival1   = op->ival1;
+	msg_head->ival2   = op->ival2;
+	msg_head->nframes = op->nframes;
+
+	bcm_send_to_user(op, msg_head, op->frames, 0);
+
+	return MHSIZ;
+}
+
+/*
+ * bcm_tx_setup - create or update a bcm tx op (for bcm_sendmsg)
+ */
+static int bcm_tx_setup(struct bcm_msg_head *msg_head, struct msghdr *msg,
+			int ifindex, struct sock *sk)
+{
+	struct bcm_sock *bo = bcm_sk(sk);
+	struct bcm_op *op;
+	int i, err;
+
+	/* we need a real device to send frames */
+	if (!ifindex)
+		return -ENODEV;
+
+	/* we need at least one can_frame */
+	if (msg_head->nframes < 1)
+		return -EINVAL;
+
+	/* check the given can_id */
+	op = bcm_find_op(&bo->tx_ops, msg_head->can_id, ifindex);
+
+	if (op) {
+		/* update existing BCM operation */
+
+		/*
+		 * Do we need more space for the can_frames than currently
+		 * allocated? -> This is a _really_ unusual use-case and
+		 * therefore (complexity / locking) it is not supported.
+		 */
+		if (msg_head->nframes > op->nframes)
+			return -E2BIG;
+
+		/* update can_frames content */
+		for (i = 0; i < msg_head->nframes; i++) {
+			err = memcpy_fromiovec((u8 *)&op->frames[i],
+					       msg->msg_iov, CFSIZ);
+			if (err < 0)
+				return err;
+
+			if (msg_head->flags & TX_CP_CAN_ID) {
+				/* copy can_id into frame */
+				op->frames[i].can_id = msg_head->can_id;
+			}
+		}
+
+	} else {
+		/* insert new BCM operation for the given can_id */
+
+		op = kzalloc(OPSIZ, GFP_KERNEL);
+		if (!op)
+			return -ENOMEM;
+
+		op->can_id    = msg_head->can_id;
+
+		/* create array for can_frames and copy the data */
+		if (msg_head->nframes > 1) {
+			op->frames = kmalloc(msg_head->nframes * CFSIZ,
+					     GFP_KERNEL);
+			if (!op->frames) {
+				kfree(op);
+				return -ENOMEM;
+			}
+		} else
+			op->frames = &op->sframe;
+
+		for (i = 0; i < msg_head->nframes; i++) {
+			err = memcpy_fromiovec((u8 *)&op->frames[i],
+					       msg->msg_iov, CFSIZ);
+			if (err < 0) {
+				if (op->frames != &op->sframe)
+					kfree(op->frames);
+				kfree(op);
+				return err;
+			}
+
+			if (msg_head->flags & TX_CP_CAN_ID) {
+				/* copy can_id into frame */
+				op->frames[i].can_id = msg_head->can_id;
+			}
+		}
+
+		/* tx_ops never compare with previous received messages */
+		op->last_frames = NULL;
+
+		/* bcm_can_tx / bcm_tx_timeout_handler needs this */
+		op->sk = sk;
+		op->ifindex = ifindex;
+
+		/* initialize uninitialized (kzalloc) structure */
+		setup_timer(&op->timer, bcm_tx_timeout_handler,
+			    (unsigned long)op);
+
+		/* currently unused in tx_ops */
+		init_timer(&op->thrtimer);
+
+		/* add this bcm_op to the list of the tx_ops */
+		list_add(&op->list, &bo->tx_ops);
+
+	} /* if ((op = bcm_find_op(&bo->tx_ops, msg_head->can_id, ifindex))) */
+
+	if (op->nframes != msg_head->nframes) {
+		op->nframes   = msg_head->nframes;
+		/* start multiple frame transmission with index 0 */
+		op->currframe = 0;
+	}
+
+	/* check flags */
+
+	op->flags = msg_head->flags;
+
+	if (op->flags & TX_RESET_MULTI_IDX) {
+		/* start multiple frame transmission with index 0 */
+		op->currframe = 0;
+	}
+
+	if (op->flags & SETTIMER) {
+		/* set timer values */
+		op->count = msg_head->count;
+		op->ival1 = msg_head->ival1;
+		op->ival2 = msg_head->ival2;
+		op->j_ival1 = rounded_tv2jif(&msg_head->ival1);
+		op->j_ival2 = rounded_tv2jif(&msg_head->ival2);
+
+		/* disable an active timer due to zero values? */
+		if (!op->j_ival1 && !op->j_ival2)
+			del_timer(&op->timer);
+	}
+
+	if ((op->flags & STARTTIMER) &&
+	    ((op->j_ival1 && op->count) || op->j_ival2)) {
+
+		/* spec: send can_frame when starting timer */
+		op->flags |= TX_ANNOUNCE;
+
+		if (op->j_ival1 && (op->count > 0)) {
+			/* op->count-- is done in bcm_tx_timeout_handler */
+			mod_timer(&op->timer, jiffies + op->j_ival1);
+		} else
+			mod_timer(&op->timer, jiffies + op->j_ival2);
+	}
+
+	if (op->flags & TX_ANNOUNCE)
+		bcm_can_tx(op);
+
+	return msg_head->nframes * CFSIZ + MHSIZ;
+}
+
+/*
+ * bcm_rx_setup - create or update a bcm rx op (for bcm_sendmsg)
+ */
+static int bcm_rx_setup(struct bcm_msg_head *msg_head, struct msghdr *msg,
+			int ifindex, struct sock *sk)
+{
+	struct bcm_sock *bo = bcm_sk(sk);
+	struct bcm_op *op;
+	int do_rx_register;
+	int err = 0;
+
+	if ((msg_head->flags & RX_FILTER_ID) || (!(msg_head->nframes))) {
+		/* be robust against wrong usage ... */
+		msg_head->flags |= RX_FILTER_ID;
+		/* ignore trailing garbage */
+		msg_head->nframes = 0;
+	}
+
+	if ((msg_head->flags & RX_RTR_FRAME) &&
+	    ((msg_head->nframes != 1) ||
+	     (!(msg_head->can_id & CAN_RTR_FLAG))))
+		return -EINVAL;
+
+	/* check the given can_id */
+	op = bcm_find_op(&bo->rx_ops, msg_head->can_id, ifindex);
+	if (op) {
+		/* update existing BCM operation */
+
+		/*
+		 * Do we need more space for the can_frames than currently
+		 * allocated? -> This is a _really_ unusual use-case and
+		 * therefore (complexity / locking) it is not supported.
+		 */
+		if (msg_head->nframes > op->nframes)
+			return -E2BIG;
+
+		if (msg_head->nframes) {
+			/* update can_frames content */
+			err = memcpy_fromiovec((u8 *)op->frames,
+					       msg->msg_iov,
+					       msg_head->nframes * CFSIZ);
+			if (err < 0)
+				return err;
+
+			/* clear last_frames to indicate 'nothing received' */
+			memset(op->last_frames, 0, msg_head->nframes * CFSIZ);
+		}
+
+		op->nframes = msg_head->nframes;
+
+		/* Only an update -> do not call can_rx_register() */
+		do_rx_register = 0;
+
+	} else {
+		/* insert new BCM operation for the given can_id */
+		op = kzalloc(OPSIZ, GFP_KERNEL);
+		if (!op)
+			return -ENOMEM;
+
+		op->can_id    = msg_head->can_id;
+		op->nframes   = msg_head->nframes;
+
+		if (msg_head->nframes > 1) {
+			/* create array for can_frames and copy the data */
+			op->frames = kmalloc(msg_head->nframes * CFSIZ,
+					     GFP_KERNEL);
+			if (!op->frames) {
+				kfree(op);
+				return -ENOMEM;
+			}
+
+			/* create and init array for received can_frames */
+			op->last_frames = kzalloc(msg_head->nframes * CFSIZ,
+						  GFP_KERNEL);
+			if (!op->last_frames) {
+				kfree(op->frames);
+				kfree(op);
+				return -ENOMEM;
+			}
+
+		} else {
+			op->frames = &op->sframe;
+			op->last_frames = &op->last_sframe;
+		}
+
+		if (msg_head->nframes) {
+			err = memcpy_fromiovec((u8 *)op->frames, msg->msg_iov,
+					       msg_head->nframes * CFSIZ);
+			if (err < 0) {
+				if (op->frames != &op->sframe)
+					kfree(op->frames);
+				if (op->last_frames != &op->last_sframe)
+					kfree(op->last_frames);
+				kfree(op);
+				return err;
+			}
+		}
+
+		/* bcm_can_tx / bcm_tx_timeout_handler needs this */
+		op->sk = sk;
+		op->ifindex = ifindex;
+
+		/* initialize uninitialized (kzalloc) structure */
+		setup_timer(&op->timer, bcm_rx_timeout_handler,
+			    (unsigned long)op);
+
+		/* init throttle timer for RX_CHANGED */
+		setup_timer(&op->thrtimer, bcm_rx_thr_handler,
+			    (unsigned long)op);
+
+		/* mark disabled timer */
+		op->thrtimer.expires = 0;
+
+		/* add this bcm_op to the list of the rx_ops */
+		list_add(&op->list, &bo->rx_ops);
+
+		/* call can_rx_register() */
+		do_rx_register = 1;
+
+	} /* if ((op = bcm_find_op(&bo->rx_ops, msg_head->can_id, ifindex))) */
+
+	/* check flags */
+	op->flags = msg_head->flags;
+
+	if (op->flags & RX_RTR_FRAME) {
+
+		/* no timers in RTR-mode */
+		del_timer(&op->thrtimer);
+		del_timer(&op->timer);
+
+		/*
+		 * funny feature in RX(!)_SETUP only for RTR-mode:
+		 * copy can_id into frame BUT without RTR-flag to
+		 * prevent a full-load-loopback-test ... ;-]
+		 */
+		if ((op->flags & TX_CP_CAN_ID) ||
+		    (op->frames[0].can_id == op->can_id))
+			op->frames[0].can_id = op->can_id & ~CAN_RTR_FLAG;
+
+	} else {
+		if (op->flags & SETTIMER) {
+
+			/* set timer value */
+			op->ival1 = msg_head->ival1;
+			op->ival2 = msg_head->ival2;
+			op->j_ival1 = rounded_tv2jif(&msg_head->ival1);
+			op->j_ival2 = rounded_tv2jif(&msg_head->ival2);
+
+			/* disable an active timer due to zero value? */
+			if (!op->j_ival1)
+				del_timer(&op->timer);
+
+			/* free currently blocked msgs ? */
+			if (op->thrtimer.expires) {
+				/* send blocked msgs hereafter */
+				mod_timer(&op->thrtimer, jiffies + 2);
+			}
+
+			/*
+			 * if (op->j_ival2) is zero, no (new) throttling
+			 * will happen. For details see functions
+			 * bcm_rx_update_and_send() and bcm_rx_thr_handler()
+			 */
+		}
+
+		if ((op->flags & STARTTIMER) && op->j_ival1)
+			mod_timer(&op->timer, jiffies + op->j_ival1);
+	}
+
+	/* now we can register for can_ids, if we added a new bcm_op */
+	if (do_rx_register) {
+		if (ifindex) {
+			struct net_device *dev;
+
+			dev = dev_get_by_index(&init_net, ifindex);
+			if (dev) {
+				err = can_rx_register(dev, op->can_id,
+						      REGMASK(op->can_id),
+						      bcm_rx_handler, op,
+						      "bcm");
+
+				op->rx_reg_dev = dev;
+				dev_put(dev);
+			}
+
+		} else
+			err = can_rx_register(NULL, op->can_id,
+					      REGMASK(op->can_id),
+					      bcm_rx_handler, op, "bcm");
+		if (err) {
+			/* this bcm rx op is broken -> remove it */
+			list_del(&op->list);
+			bcm_remove_op(op);
+			return err;
+		}
+	}
+
+	return msg_head->nframes * CFSIZ + MHSIZ;
+}
+
+/*
+ * bcm_tx_send - send a single CAN frame to the CAN interface (for bcm_sendmsg)
+ */
+static int bcm_tx_send(struct msghdr *msg, int ifindex, struct sock *sk)
+{
+	struct sk_buff *skb;
+	struct net_device *dev;
+	int err;
+
+	/* we need a real device to send frames */
+	if (!ifindex)
+		return -ENODEV;
+
+	skb = alloc_skb(CFSIZ, GFP_KERNEL);
+
+	if (!skb)
+		return -ENOMEM;
+
+	err = memcpy_fromiovec(skb_put(skb, CFSIZ), msg->msg_iov, CFSIZ);
+	if (err < 0) {
+		kfree_skb(skb);
+		return err;
+	}
+
+	dev = dev_get_by_index(&init_net, ifindex);
+	if (!dev) {
+		kfree_skb(skb);
+		return -ENODEV;
+	}
+
+	skb->dev = dev;
+	skb->sk  = sk;
+	can_send(skb, 1); /* send with loopback */
+	dev_put(dev);
+
+	return CFSIZ + MHSIZ;
+}
+
+/*
+ * bcm_sendmsg - process BCM commands (opcodes) from the userspace
+ */
+static int bcm_sendmsg(struct kiocb *iocb, struct socket *sock,
+		       struct msghdr *msg, size_t size)
+{
+	struct sock *sk = sock->sk;
+	struct bcm_sock *bo = bcm_sk(sk);
+	int ifindex = bo->ifindex; /* default ifindex for this bcm_op */
+	struct bcm_msg_head msg_head;
+	int ret; /* read bytes or error codes as return value */
+
+	if (!bo->bound)
+		return -ENOTCONN;
+
+	/* check for alternative ifindex for this bcm_op */
+
+	if (!ifindex && msg->msg_name) {
+		/* no bound device as default => check msg_name */
+		struct sockaddr_can *addr =
+			(struct sockaddr_can *)msg->msg_name;
+
+		if (addr->can_family != AF_CAN)
+			return -EINVAL;
+
+		/* ifindex from sendto() */
+		ifindex = addr->can_ifindex;
+
+		if (ifindex) {
+			struct net_device *dev;
+
+			dev = dev_get_by_index(&init_net, ifindex);
+			if (!dev)
+				return -ENODEV;
+
+			if (dev->type != ARPHRD_CAN) {
+				dev_put(dev);
+				return -ENODEV;
+			}
+
+			dev_put(dev);
+		}
+	}
+
+	/* read message head information */
+
+	ret = memcpy_fromiovec((u8 *)&msg_head, msg->msg_iov, MHSIZ);
+	if (ret < 0)
+		return ret;
+
+	lock_sock(sk);
+
+	switch (msg_head.opcode) {
+
+	case TX_SETUP:
+		ret = bcm_tx_setup(&msg_head, msg, ifindex, sk);
+		break;
+
+	case RX_SETUP:
+		ret = bcm_rx_setup(&msg_head, msg, ifindex, sk);
+		break;
+
+	case TX_DELETE:
+		if (bcm_delete_tx_op(&bo->tx_ops, msg_head.can_id, ifindex))
+			ret = MHSIZ;
+		else
+			ret = -EINVAL;
+		break;
+
+	case RX_DELETE:
+		if (bcm_delete_rx_op(&bo->rx_ops, msg_head.can_id, ifindex))
+			ret = MHSIZ;
+		else
+			ret = -EINVAL;
+		break;
+
+	case TX_READ:
+		/* reuse msg_head for the reply to TX_READ */
+		msg_head.opcode  = TX_STATUS;
+		ret = bcm_read_op(&bo->tx_ops, &msg_head, ifindex);
+		break;
+
+	case RX_READ:
+		/* reuse msg_head for the reply to RX_READ */
+		msg_head.opcode  = RX_STATUS;
+		ret = bcm_read_op(&bo->rx_ops, &msg_head, ifindex);
+		break;
+
+	case TX_SEND:
+		/* we need at least one can_frame */
+		if (msg_head.nframes < 1)
+			ret = -EINVAL;
+		else
+			ret = bcm_tx_send(msg, ifindex, sk);
+		break;
+
+	default:
+		ret = -EINVAL;
+		break;
+	}
+
+	release_sock(sk);
+
+	return ret;
+}
+
+/*
+ * notification handler for netdevice status changes
+ */
+static int bcm_notifier(struct notifier_block *nb, unsigned long msg,
+			void *data)
+{
+	struct net_device *dev = (struct net_device *)data;
+	struct bcm_sock *bo = container_of(nb, struct bcm_sock, notifier);
+	struct sock *sk = &bo->sk;
+	struct bcm_op *op;
+	int notify_enodev = 0;
+
+	if (dev->nd_net != &init_net)
+		return NOTIFY_DONE;
+
+	if (dev->type != ARPHRD_CAN)
+		return NOTIFY_DONE;
+
+	switch (msg) {
+
+	case NETDEV_UNREGISTER:
+		lock_sock(sk);
+
+		/* remove device specific receive entries */
+		list_for_each_entry(op, &bo->rx_ops, list)
+			if (op->rx_reg_dev == dev)
+				bcm_rx_unreg(dev, op);
+
+		/* remove device reference, if this is our bound device */
+		if (bo->bound && bo->ifindex == dev->ifindex) {
+			bo->bound   = 0;
+			bo->ifindex = 0;
+			notify_enodev = 1;
+		}
+
+		release_sock(sk);
+
+		if (notify_enodev) {
+			sk->sk_err = ENODEV;
+			if (!sock_flag(sk, SOCK_DEAD))
+				sk->sk_error_report(sk);
+		}
+		break;
+
+	case NETDEV_DOWN:
+		if (bo->bound && bo->ifindex == dev->ifindex) {
+			sk->sk_err = ENETDOWN;
+			if (!sock_flag(sk, SOCK_DEAD))
+				sk->sk_error_report(sk);
+		}
+	}
+
+	return NOTIFY_DONE;
+}
+
+/*
+ * initial settings for all BCM sockets to be set at socket creation time
+ */
+static int bcm_init(struct sock *sk)
+{
+	struct bcm_sock *bo = bcm_sk(sk);
+
+	bo->bound            = 0;
+	bo->ifindex          = 0;
+	bo->dropped_usr_msgs = 0;
+	bo->bcm_proc_read    = NULL;
+
+	INIT_LIST_HEAD(&bo->tx_ops);
+	INIT_LIST_HEAD(&bo->rx_ops);
+
+	/* set notifier */
+	bo->notifier.notifier_call = bcm_notifier;
+
+	register_netdevice_notifier(&bo->notifier);
+
+	return 0;
+}
+
+/*
+ * standard socket functions
+ */
+static int bcm_release(struct socket *sock)
+{
+	struct sock *sk = sock->sk;
+	struct bcm_sock *bo = bcm_sk(sk);
+	struct bcm_op *op, *next;
+
+	/* remove bcm_ops, timer, rx_unregister(), etc. */
+
+	unregister_netdevice_notifier(&bo->notifier);
+
+	lock_sock(sk);
+
+	list_for_each_entry_safe(op, next, &bo->tx_ops, list)
+		bcm_remove_op(op);
+
+	list_for_each_entry_safe(op, next, &bo->rx_ops, list) {
+		/*
+		 * Don't care if we're bound or not (due to netdev problems)
+		 * can_rx_unregister() is always a save thing to do here.
+		 */
+		if (op->ifindex) {
+			/*
+			 * Only remove subscriptions that had not
+			 * been removed due to NETDEV_UNREGISTER
+			 * in bcm_notifier()
+			 */
+			if (op->rx_reg_dev) {
+				struct net_device *dev;
+
+				dev = dev_get_by_index(&init_net, op->ifindex);
+				if (dev) {
+					bcm_rx_unreg(dev, op);
+					dev_put(dev);
+				}
+			}
+		} else
+			can_rx_unregister(NULL, op->can_id,
+					  REGMASK(op->can_id),
+					  bcm_rx_handler, op);
+
+		bcm_remove_op(op);
+	}
+
+	/* remove procfs entry */
+	if (proc_dir && bo->bcm_proc_read)
+		remove_proc_entry(bo->procname, proc_dir);
+
+	/* remove device reference */
+	if (bo->bound) {
+		bo->bound   = 0;
+		bo->ifindex = 0;
+	}
+
+	release_sock(sk);
+	sock_put(sk);
+
+	return 0;
+}
+
+static int bcm_connect(struct socket *sock, struct sockaddr *uaddr, int len,
+		       int flags)
+{
+	struct sockaddr_can *addr = (struct sockaddr_can *)uaddr;
+	struct sock *sk = sock->sk;
+	struct bcm_sock *bo = bcm_sk(sk);
+
+	if (bo->bound)
+		return -EISCONN;
+
+	/* bind a device to this socket */
+	if (addr->can_ifindex) {
+		struct net_device *dev;
+
+		dev = dev_get_by_index(&init_net, addr->can_ifindex);
+		if (!dev)
+			return -ENODEV;
+
+		if (dev->type != ARPHRD_CAN) {
+			dev_put(dev);
+			return -ENODEV;
+		}
+
+		bo->ifindex = dev->ifindex;
+		dev_put(dev);
+
+	} else {
+		/* no interface reference for ifindex = 0 ('any' CAN device) */
+		bo->ifindex = 0;
+	}
+
+	bo->bound = 1;
+
+	if (proc_dir) {
+		/* unique socket address as filename */
+		sprintf(bo->procname, "%p", sock);
+		bo->bcm_proc_read = create_proc_read_entry(bo->procname, 0644,
+							   proc_dir,
+							   bcm_read_proc, sk);
+	}
+
+	return 0;
+}
+
+static int bcm_recvmsg(struct kiocb *iocb, struct socket *sock,
+		       struct msghdr *msg, size_t size, int flags)
+{
+	struct sock *sk = sock->sk;
+	struct sk_buff *skb;
+	int error = 0;
+	int noblock;
+	int err;
+
+	noblock =  flags & MSG_DONTWAIT;
+	flags   &= ~MSG_DONTWAIT;
+	skb = skb_recv_datagram(sk, flags, noblock, &error);
+	if (!skb)
+		return error;
+
+	if (skb->len < size)
+		size = skb->len;
+
+	err = memcpy_toiovec(msg->msg_iov, skb->data, size);
+	if (err < 0) {
+		skb_free_datagram(sk, skb);
+		return err;
+	}
+
+	sock_recv_timestamp(msg, sk, skb);
+
+	if (msg->msg_name) {
+		msg->msg_namelen = sizeof(struct sockaddr_can);
+		memcpy(msg->msg_name, skb->cb, msg->msg_namelen);
+	}
+
+	skb_free_datagram(sk, skb);
+
+	return size;
+}
+
+static struct proto_ops bcm_ops __read_mostly = {
+	.family        = PF_CAN,
+	.release       = bcm_release,
+	.bind          = sock_no_bind,
+	.connect       = bcm_connect,
+	.socketpair    = sock_no_socketpair,
+	.accept        = sock_no_accept,
+	.getname       = sock_no_getname,
+	.poll          = datagram_poll,
+	.ioctl         = NULL,		/* use can_ioctl() from af_can.c */
+	.listen        = sock_no_listen,
+	.shutdown      = sock_no_shutdown,
+	.setsockopt    = sock_no_setsockopt,
+	.getsockopt    = sock_no_getsockopt,
+	.sendmsg       = bcm_sendmsg,
+	.recvmsg       = bcm_recvmsg,
+	.mmap          = sock_no_mmap,
+	.sendpage      = sock_no_sendpage,
+};
+
+static struct proto bcm_proto __read_mostly = {
+	.name       = "CAN_BCM",
+	.owner      = THIS_MODULE,
+	.obj_size   = sizeof(struct bcm_sock),
+	.init       = bcm_init,
+};
+
+static struct can_proto bcm_can_proto __read_mostly = {
+	.type       = SOCK_DGRAM,
+	.protocol   = CAN_BCM,
+	.capability = -1,
+	.ops        = &bcm_ops,
+	.prot       = &bcm_proto,
+};
+
+static int __init bcm_module_init(void)
+{
+	int err;
+
+	printk(banner);
+
+	err = can_proto_register(&bcm_can_proto);
+	if (err < 0) {
+		printk(KERN_ERR "can: registration of bcm protocol failed\n");
+		return err;
+	}
+
+	/* create /proc/net/can-bcm directory */
+	proc_dir = proc_mkdir("can-bcm", init_net.proc_net);
+
+	if (proc_dir)
+		proc_dir->owner = THIS_MODULE;
+
+	return 0;
+}
+
+static void __exit bcm_module_exit(void)
+{
+	can_proto_unregister(&bcm_can_proto);
+
+	if (proc_dir)
+		proc_net_remove(&init_net, "can-bcm");
+}
+
+module_init(bcm_module_init);
+module_exit(bcm_module_exit);
-- 
cgit v1.2.3


From ccb29637991fa6b8321a80c2320a71e379aea962 Mon Sep 17 00:00:00 2001
From: Oliver Hartkopp <oliver.hartkopp@volkswagen.de>
Date: Fri, 16 Nov 2007 15:56:08 -0800
Subject: [CAN]: Add virtual CAN netdevice driver

This patch adds the virtual CAN bus (vcan) network driver.
The vcan device is just a loopback device for CAN frames, no
real CAN hardware is involved.

Signed-off-by: Oliver Hartkopp <oliver.hartkopp@volkswagen.de>
Signed-off-by: Urs Thuermann <urs.thuermann@volkswagen.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/can/Kconfig | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'net')

diff --git a/net/can/Kconfig b/net/can/Kconfig
index 182b96b80eb..89395b2c8bc 100644
--- a/net/can/Kconfig
+++ b/net/can/Kconfig
@@ -39,3 +39,6 @@ config CAN_BCM
 	  You probably want to use the bcm socket in most cases where cyclic
 	  CAN messages are used on the bus (e.g. in automotive environments).
 	  To use the Broadcast Manager, use AF_CAN with protocol CAN_BCM.
+
+
+source "drivers/net/can/Kconfig"
-- 
cgit v1.2.3


From 20de20beba6e9bd2e1c83696bfefa3b16cda9a74 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= <ilpo.jarvinen@helsinki.fi>
Date: Fri, 16 Nov 2007 16:17:05 -0800
Subject: [TCP]: Correct DSACK check placing
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Previously one of the in-block skip branches was missing it.

Also, drop it from tail-fully-processed case because the next
iteration will do exactly the same thing, i.e., process the
SACK block that contains the DSACK information.

Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@helsinki.fi>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp_input.c | 15 +++++----------
 1 file changed, 5 insertions(+), 10 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 3ad6a19ad30..79996b16b94 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -1554,20 +1554,15 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_
 			}
 
 			/* Rest of the block already fully processed? */
-			if (!after(end_seq, cache->end_seq)) {
-				skb = tcp_maybe_skipping_dsack(skb, sk, next_dup, cache->end_seq,
-							       &fack_count, &reord, &flag);
+			if (!after(end_seq, cache->end_seq))
 				goto advance_sp;
-			}
+
+			skb = tcp_maybe_skipping_dsack(skb, sk, next_dup, cache->end_seq,
+						       &fack_count, &reord, &flag);
 
 			/* ...tail remains todo... */
 			if (TCP_SKB_CB(tp->highest_sack)->end_seq == cache->end_seq) {
-				/* ...but better entrypoint exists! Check that DSACKs are
-				 * properly accounted while skipping here
-				 */
-				tcp_maybe_skipping_dsack(skb, sk, next_dup, cache->end_seq,
-							 &fack_count, &reord, &flag);
-
+				/* ...but better entrypoint exists! */
 				skb = tcp_write_queue_next(sk, tp->highest_sack);
 				fack_count = tp->fackets_out;
 				cache++;
-- 
cgit v1.2.3


From 0adc9add7709f87995c003c0ccb085076afdc923 Mon Sep 17 00:00:00 2001
From: Stephen Hemminger <shemminger@linux-foundation.org>
Date: Mon, 19 Nov 2007 19:15:03 -0800
Subject: [NETPOLL]: Use skb_queue_purge().

Use standard routine for flushing queue.

Signed-off-by: Stephen Hemminger <shemminger@linux-foundation.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/netpoll.c | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

(limited to 'net')

diff --git a/net/core/netpoll.c b/net/core/netpoll.c
index c499b5c69be..f63a70be3cd 100644
--- a/net/core/netpoll.c
+++ b/net/core/netpoll.c
@@ -816,11 +816,7 @@ void netpoll_cleanup(struct netpoll *np)
 				cancel_rearming_delayed_work(&npinfo->tx_work);
 
 				/* clean after last, unfinished work */
-				if (!skb_queue_empty(&npinfo->txq)) {
-					struct sk_buff *skb;
-					skb = __skb_dequeue(&npinfo->txq);
-					kfree_skb(skb);
-				}
+				__skb_queue_purge(&npinfo->txq);
 				kfree(npinfo);
 				np->dev->npinfo = NULL;
 			}
-- 
cgit v1.2.3


From 5106930bd6b57402205e3de54dae9476e215b622 Mon Sep 17 00:00:00 2001
From: Stephen Hemminger <shemminger@linux-foundation.org>
Date: Mon, 19 Nov 2007 19:18:11 -0800
Subject: [NETPOLL]: netpoll_poll() cleanup

Restructure code slightly to improve readability:
  * dereference device once
  * change obvious while() loop
  * let poll_napi() handle null list itself

Signed-off-by: Stephen Hemminger <shemminger@linux-foundation.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/netpoll.c | 32 ++++++++++++++------------------
 1 file changed, 14 insertions(+), 18 deletions(-)

(limited to 'net')

diff --git a/net/core/netpoll.c b/net/core/netpoll.c
index f63a70be3cd..250868f6876 100644
--- a/net/core/netpoll.c
+++ b/net/core/netpoll.c
@@ -139,16 +139,15 @@ static int poll_one_napi(struct netpoll_info *npinfo,
 	return budget - work;
 }
 
-static void poll_napi(struct netpoll *np)
+static void poll_napi(struct net_device *dev)
 {
-	struct netpoll_info *npinfo = np->dev->npinfo;
 	struct napi_struct *napi;
 	int budget = 16;
 
-	list_for_each_entry(napi, &np->dev->napi_list, dev_list) {
+	list_for_each_entry(napi, &dev->napi_list, dev_list) {
 		if (napi->poll_owner != smp_processor_id() &&
 		    spin_trylock(&napi->poll_lock)) {
-			budget = poll_one_napi(npinfo, napi, budget);
+			budget = poll_one_napi(dev->npinfo, napi, budget);
 			spin_unlock(&napi->poll_lock);
 
 			if (!budget)
@@ -159,30 +158,27 @@ static void poll_napi(struct netpoll *np)
 
 static void service_arp_queue(struct netpoll_info *npi)
 {
-	struct sk_buff *skb;
-
-	if (unlikely(!npi))
-		return;
+	if (npi) {
+		struct sk_buff *skb;
 
-	skb = skb_dequeue(&npi->arp_tx);
-
-	while (skb != NULL) {
-		arp_reply(skb);
-		skb = skb_dequeue(&npi->arp_tx);
+		while ((skb = skb_dequeue(&npi->arp_tx)))
+			arp_reply(skb);
 	}
 }
 
 void netpoll_poll(struct netpoll *np)
 {
-	if (!np->dev || !netif_running(np->dev) || !np->dev->poll_controller)
+	struct net_device *dev = np->dev;
+
+	if (!dev || !netif_running(dev) || !dev->poll_controller)
 		return;
 
 	/* Process pending work on NIC */
-	np->dev->poll_controller(np->dev);
-	if (!list_empty(&np->dev->napi_list))
-		poll_napi(np);
+	dev->poll_controller(dev);
+
+	poll_napi(dev);
 
-	service_arp_queue(np->dev->npinfo);
+	service_arp_queue(dev->npinfo);
 
 	zap_completion_queue();
 }
-- 
cgit v1.2.3


From 0953864160bdd28dfe45fd46fa462b4d2d53cb96 Mon Sep 17 00:00:00 2001
From: Stephen Hemminger <shemminger@linux-foundation.org>
Date: Mon, 19 Nov 2007 19:23:29 -0800
Subject: [NETPOLL]: no need to store local_mac

The local_mac is managed by the network device, no need to keep a
spare copy and all the management problems that could cause.

Signed-off-by: Stephen Hemminger <shemminger@linux-foundation.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/netpoll.c | 9 +++------
 1 file changed, 3 insertions(+), 6 deletions(-)

(limited to 'net')

diff --git a/net/core/netpoll.c b/net/core/netpoll.c
index 250868f6876..cf6acd3084a 100644
--- a/net/core/netpoll.c
+++ b/net/core/netpoll.c
@@ -360,8 +360,8 @@ void netpoll_send_udp(struct netpoll *np, const char *msg, int len)
 	eth = (struct ethhdr *) skb_push(skb, ETH_HLEN);
 	skb_reset_mac_header(skb);
 	skb->protocol = eth->h_proto = htons(ETH_P_IP);
-	memcpy(eth->h_source, np->local_mac, 6);
-	memcpy(eth->h_dest, np->remote_mac, 6);
+	memcpy(eth->h_source, np->dev->dev_addr, ETH_ALEN);
+	memcpy(eth->h_dest, np->remote_mac, ETH_ALEN);
 
 	skb->dev = np->dev;
 
@@ -431,7 +431,7 @@ static void arp_reply(struct sk_buff *skb)
 
 	/* Fill the device header for the ARP frame */
 	if (dev_hard_header(send_skb, skb->dev, ptype,
-			    sha, np->local_mac,
+			    sha, np->dev->dev_addr,
 			    send_skb->len) < 0) {
 		kfree_skb(send_skb);
 		return;
@@ -737,9 +737,6 @@ int netpoll_setup(struct netpoll *np)
 		}
 	}
 
-	if (is_zero_ether_addr(np->local_mac) && ndev->dev_addr)
-		memcpy(np->local_mac, ndev->dev_addr, 6);
-
 	if (!np->local_ip) {
 		rcu_read_lock();
 		in_dev = __in_dev_get_rcu(ndev);
-- 
cgit v1.2.3


From 33f807ba0d9259e7c75c7a2ce8bd2787e5b540c7 Mon Sep 17 00:00:00 2001
From: Stephen Hemminger <shemminger@linux-foundation.org>
Date: Mon, 19 Nov 2007 19:24:52 -0800
Subject: [NETPOLL]: Kill NETPOLL_RX_DROP, set but never tested.

Signed-off-by: Stephen Hemminger <shemminger@linux-foundation.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/netpoll.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/core/netpoll.c b/net/core/netpoll.c
index cf6acd3084a..9e3aea0bd36 100644
--- a/net/core/netpoll.c
+++ b/net/core/netpoll.c
@@ -40,7 +40,6 @@ static atomic_t trapped;
 
 #define USEC_PER_POLL	50
 #define NETPOLL_RX_ENABLED  1
-#define NETPOLL_RX_DROP     2
 
 #define MAX_SKB_SIZE \
 		(MAX_UDP_CHUNK + sizeof(struct udphdr) + \
@@ -128,13 +127,11 @@ static int poll_one_napi(struct netpoll_info *npinfo,
 	if (!test_bit(NAPI_STATE_SCHED, &napi->state))
 		return budget;
 
-	npinfo->rx_flags |= NETPOLL_RX_DROP;
 	atomic_inc(&trapped);
 
 	work = napi->poll(napi, budget);
 
 	atomic_dec(&trapped);
-	npinfo->rx_flags &= ~NETPOLL_RX_DROP;
 
 	return budget - work;
 }
@@ -475,7 +472,7 @@ int __netpoll_rx(struct sk_buff *skb)
 	if (skb->dev->type != ARPHRD_ETHER)
 		goto out;
 
-	/* check if netpoll clients need ARP */
+	/* if receive ARP during middle of NAPI poll, then queue */
 	if (skb->protocol == htons(ETH_P_ARP) &&
 	    atomic_read(&trapped)) {
 		skb_queue_tail(&npi->arp_tx, skb);
@@ -537,6 +534,9 @@ int __netpoll_rx(struct sk_buff *skb)
 	return 1;
 
 out:
+	/* If packet received while already in poll then just
+	 * silently drop.
+	 */
 	if (atomic_read(&trapped)) {
 		kfree_skb(skb);
 		return 1;
-- 
cgit v1.2.3


From c7b6ea24b43afb5749cb704e143df19d70e23dea Mon Sep 17 00:00:00 2001
From: Stephen Hemminger <shemminger@linux-foundation.org>
Date: Mon, 19 Nov 2007 19:37:09 -0800
Subject: [NETPOLL]: Don't need rx_flags.

The rx_flags variable is redundant. Turning rx on/off is done
via setting the rx_np pointer.

Signed-off-by: Stephen Hemminger <shemminger@linux-foundation.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/netpoll.c | 4 ----
 1 file changed, 4 deletions(-)

(limited to 'net')

diff --git a/net/core/netpoll.c b/net/core/netpoll.c
index 9e3aea0bd36..b1d5acd2fc7 100644
--- a/net/core/netpoll.c
+++ b/net/core/netpoll.c
@@ -39,7 +39,6 @@ static struct sk_buff_head skb_pool;
 static atomic_t trapped;
 
 #define USEC_PER_POLL	50
-#define NETPOLL_RX_ENABLED  1
 
 #define MAX_SKB_SIZE \
 		(MAX_UDP_CHUNK + sizeof(struct udphdr) + \
@@ -675,7 +674,6 @@ int netpoll_setup(struct netpoll *np)
 			goto release;
 		}
 
-		npinfo->rx_flags = 0;
 		npinfo->rx_np = NULL;
 
 		spin_lock_init(&npinfo->rx_lock);
@@ -757,7 +755,6 @@ int netpoll_setup(struct netpoll *np)
 
 	if (np->rx_hook) {
 		spin_lock_irqsave(&npinfo->rx_lock, flags);
-		npinfo->rx_flags |= NETPOLL_RX_ENABLED;
 		npinfo->rx_np = np;
 		spin_unlock_irqrestore(&npinfo->rx_lock, flags);
 	}
@@ -799,7 +796,6 @@ void netpoll_cleanup(struct netpoll *np)
 			if (npinfo->rx_np == np) {
 				spin_lock_irqsave(&npinfo->rx_lock, flags);
 				npinfo->rx_np = NULL;
-				npinfo->rx_flags &= ~NETPOLL_RX_ENABLED;
 				spin_unlock_irqrestore(&npinfo->rx_lock, flags);
 			}
 
-- 
cgit v1.2.3


From 6a4329554c527e86c2745bc001c29bb8d1b8efaf Mon Sep 17 00:00:00 2001
From: Mattias Nissler <mattias.nissler@gmx.de>
Date: Wed, 24 Oct 2007 23:30:36 +0200
Subject: mac80211: Accept auto txpower setting

This changes the SIWTXPOWER ioctl to also accept a txpower setting of
"automatic". Since mac80211 currently cannot tell drivers to automatically
adjust tx power, we select the tx power level of the current channel. While
this is kind of a hack, it certainly saves some iwconfig users from headaches.

Signed-off-by: Mattias Nissler <mattias.nissler@gmx.de>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/mac80211/ieee80211_ioctl.c | 21 +++++++++++++++++----
 1 file changed, 17 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/ieee80211_ioctl.c b/net/mac80211/ieee80211_ioctl.c
index 308bbe4a133..da3350d44da 100644
--- a/net/mac80211/ieee80211_ioctl.c
+++ b/net/mac80211/ieee80211_ioctl.c
@@ -634,22 +634,35 @@ static int ieee80211_ioctl_siwtxpower(struct net_device *dev,
 {
 	struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr);
 	bool need_reconfig = 0;
+	u8 new_power_level;
 
 	if ((data->txpower.flags & IW_TXPOW_TYPE) != IW_TXPOW_DBM)
 		return -EINVAL;
 	if (data->txpower.flags & IW_TXPOW_RANGE)
 		return -EINVAL;
-	if (!data->txpower.fixed)
-		return -EINVAL;
 
-	if (local->hw.conf.power_level != data->txpower.value) {
-		local->hw.conf.power_level = data->txpower.value;
+	if (data->txpower.fixed) {
+		new_power_level = data->txpower.value;
+	} else {
+		/* Automatic power level. Get the px power from the current
+		 * channel. */
+		struct ieee80211_channel* chan = local->oper_channel;
+		if (!chan)
+			return -EINVAL;
+
+		new_power_level = chan->power_level;
+	}
+
+	if (local->hw.conf.power_level != new_power_level) {
+		local->hw.conf.power_level = new_power_level;
 		need_reconfig = 1;
 	}
+
 	if (local->hw.conf.radio_enabled != !(data->txpower.disabled)) {
 		local->hw.conf.radio_enabled = !(data->txpower.disabled);
 		need_reconfig = 1;
 	}
+
 	if (need_reconfig) {
 		ieee80211_hw_config(local);
 		/* The return value of hw_config is not of big interest here,
-- 
cgit v1.2.3


From 7a4fbb1fa46e1a84c246e7bcd99bff45935bf114 Mon Sep 17 00:00:00 2001
From: "Rumen G. Bogdanovski" <rumen@voicecho.com>
Date: Mon, 19 Nov 2007 21:52:42 -0800
Subject: [IPVS]: Flag synced connections and expose them in proc

This patch labels the sync-created connections with IP_VS_CONN_F_SYNC
flag and creates /proc/net/ip_vs_conn_sync to enable monitoring of the
origin of the connections, if they are local or created by the
synchronization.

Signed-off-by: Rumen G. Bogdanovski <rumen@voicecho.com>
Signed-off-by: Simon Horman <horms@verge.net.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/ipvs/ip_vs_conn.c | 53 ++++++++++++++++++++++++++++++++++++++++++++++
 net/ipv4/ipvs/ip_vs_sync.c |  2 +-
 2 files changed, 54 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/ipv4/ipvs/ip_vs_conn.c b/net/ipv4/ipvs/ip_vs_conn.c
index a22cee43ed7..45a642edb93 100644
--- a/net/ipv4/ipvs/ip_vs_conn.c
+++ b/net/ipv4/ipvs/ip_vs_conn.c
@@ -781,6 +781,57 @@ static const struct file_operations ip_vs_conn_fops = {
 	.llseek  = seq_lseek,
 	.release = seq_release,
 };
+
+static const char *ip_vs_origin_name(unsigned flags)
+{
+	if (flags & IP_VS_CONN_F_SYNC)
+		return "SYNC";
+	else
+		return "LOCAL";
+}
+
+static int ip_vs_conn_sync_seq_show(struct seq_file *seq, void *v)
+{
+
+	if (v == SEQ_START_TOKEN)
+		seq_puts(seq,
+   "Pro FromIP   FPrt ToIP     TPrt DestIP   DPrt State       Origin Expires\n");
+	else {
+		const struct ip_vs_conn *cp = v;
+
+		seq_printf(seq,
+			"%-3s %08X %04X %08X %04X %08X %04X %-11s %-6s %7lu\n",
+				ip_vs_proto_name(cp->protocol),
+				ntohl(cp->caddr), ntohs(cp->cport),
+				ntohl(cp->vaddr), ntohs(cp->vport),
+				ntohl(cp->daddr), ntohs(cp->dport),
+				ip_vs_state_name(cp->protocol, cp->state),
+				ip_vs_origin_name(cp->flags),
+				(cp->timer.expires-jiffies)/HZ);
+	}
+	return 0;
+}
+
+static const struct seq_operations ip_vs_conn_sync_seq_ops = {
+	.start = ip_vs_conn_seq_start,
+	.next  = ip_vs_conn_seq_next,
+	.stop  = ip_vs_conn_seq_stop,
+	.show  = ip_vs_conn_sync_seq_show,
+};
+
+static int ip_vs_conn_sync_open(struct inode *inode, struct file *file)
+{
+	return seq_open(file, &ip_vs_conn_sync_seq_ops);
+}
+
+static const struct file_operations ip_vs_conn_sync_fops = {
+	.owner	 = THIS_MODULE,
+	.open    = ip_vs_conn_sync_open,
+	.read    = seq_read,
+	.llseek  = seq_lseek,
+	.release = seq_release,
+};
+
 #endif
 
 
@@ -940,6 +991,7 @@ int ip_vs_conn_init(void)
 	}
 
 	proc_net_fops_create(&init_net, "ip_vs_conn", 0, &ip_vs_conn_fops);
+	proc_net_fops_create(&init_net, "ip_vs_conn_sync", 0, &ip_vs_conn_sync_fops);
 
 	/* calculate the random value for connection hash */
 	get_random_bytes(&ip_vs_conn_rnd, sizeof(ip_vs_conn_rnd));
@@ -956,5 +1008,6 @@ void ip_vs_conn_cleanup(void)
 	/* Release the empty cache */
 	kmem_cache_destroy(ip_vs_conn_cachep);
 	proc_net_remove(&init_net, "ip_vs_conn");
+	proc_net_remove(&init_net, "ip_vs_conn_sync");
 	vfree(ip_vs_conn_tab);
 }
diff --git a/net/ipv4/ipvs/ip_vs_sync.c b/net/ipv4/ipvs/ip_vs_sync.c
index bd930efc18d..47b7f8f3ae7 100644
--- a/net/ipv4/ipvs/ip_vs_sync.c
+++ b/net/ipv4/ipvs/ip_vs_sync.c
@@ -308,7 +308,7 @@ static void ip_vs_process_message(const char *buffer, const size_t buflen)
 		unsigned flags;
 
 		s = (struct ip_vs_sync_conn *)p;
-		flags = ntohs(s->flags);
+		flags = ntohs(s->flags) | IP_VS_CONN_F_SYNC;
 		if (!(flags & IP_VS_CONN_F_TEMPLATE))
 			cp = ip_vs_conn_in_get(s->protocol,
 					       s->caddr, s->cport,
-- 
cgit v1.2.3


From b209639e8a91aaabedf8bf3716710e6d9ae942e3 Mon Sep 17 00:00:00 2001
From: "Rumen G. Bogdanovski" <rumen@voicecho.com>
Date: Mon, 19 Nov 2007 21:53:27 -0800
Subject: [IPVS]: Create synced connections with their real state

With this patch the synced connections are created with their real state,
which can be changed on the next synchronizations if necessary. This way
on fail-over all the connections will be treated according to their actual
state, causing no scheduling problems (the active and the nonactive
connections have different weights in the schedulers).
The backwards compatibility is preserved and the existing tools will show
the true connection states even on the backup director.

Signed-off-by: Rumen G. Bogdanovski <rumen@voicecho.com>
Signed-off-by: Simon Horman <horms@verge.net.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/ipvs/ip_vs_conn.c | 16 ++++++++++++++--
 net/ipv4/ipvs/ip_vs_sync.c | 32 +++++++++++++++++++++++++++-----
 2 files changed, 41 insertions(+), 7 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/ipvs/ip_vs_conn.c b/net/ipv4/ipvs/ip_vs_conn.c
index 45a642edb93..65f1ba11275 100644
--- a/net/ipv4/ipvs/ip_vs_conn.c
+++ b/net/ipv4/ipvs/ip_vs_conn.c
@@ -393,7 +393,15 @@ ip_vs_bind_dest(struct ip_vs_conn *cp, struct ip_vs_dest *dest)
 	atomic_inc(&dest->refcnt);
 
 	/* Bind with the destination and its corresponding transmitter */
-	cp->flags |= atomic_read(&dest->conn_flags);
+	if ((cp->flags & IP_VS_CONN_F_SYNC) &&
+	    (!(cp->flags & IP_VS_CONN_F_TEMPLATE)))
+		/* if the connection is not template and is created
+		 * by sync, preserve the activity flag.
+		 */
+		cp->flags |= atomic_read(&dest->conn_flags) &
+			     (~IP_VS_CONN_F_INACTIVE);
+	else
+		cp->flags |= atomic_read(&dest->conn_flags);
 	cp->dest = dest;
 
 	IP_VS_DBG(7, "Bind-dest %s c:%u.%u.%u.%u:%d v:%u.%u.%u.%u:%d "
@@ -412,7 +420,11 @@ ip_vs_bind_dest(struct ip_vs_conn *cp, struct ip_vs_dest *dest)
 		/* It is a normal connection, so increase the inactive
 		   connection counter because it is in TCP SYNRECV
 		   state (inactive) or other protocol inacive state */
-		atomic_inc(&dest->inactconns);
+		if ((cp->flags & IP_VS_CONN_F_SYNC) &&
+		    (!(cp->flags & IP_VS_CONN_F_INACTIVE)))
+			atomic_inc(&dest->activeconns);
+		else
+			atomic_inc(&dest->inactconns);
 	} else {
 		/* It is a persistent connection/template, so increase
 		   the peristent connection counter */
diff --git a/net/ipv4/ipvs/ip_vs_sync.c b/net/ipv4/ipvs/ip_vs_sync.c
index 47b7f8f3ae7..948378d0a75 100644
--- a/net/ipv4/ipvs/ip_vs_sync.c
+++ b/net/ipv4/ipvs/ip_vs_sync.c
@@ -305,10 +305,11 @@ static void ip_vs_process_message(const char *buffer, const size_t buflen)
 
 	p = (char *)buffer + sizeof(struct ip_vs_sync_mesg);
 	for (i=0; i<m->nr_conns; i++) {
-		unsigned flags;
+		unsigned flags, state;
 
 		s = (struct ip_vs_sync_conn *)p;
 		flags = ntohs(s->flags) | IP_VS_CONN_F_SYNC;
+		state = ntohs(s->state);
 		if (!(flags & IP_VS_CONN_F_TEMPLATE))
 			cp = ip_vs_conn_in_get(s->protocol,
 					       s->caddr, s->cport,
@@ -326,6 +327,13 @@ static void ip_vs_process_message(const char *buffer, const size_t buflen)
 			dest = ip_vs_find_dest(s->daddr, s->dport,
 					       s->vaddr, s->vport,
 					       s->protocol);
+			/*  Set the approprite ativity flag */
+			if (s->protocol == IPPROTO_TCP) {
+				if (state != IP_VS_TCP_S_ESTABLISHED)
+					flags |= IP_VS_CONN_F_INACTIVE;
+				else
+					flags &= ~IP_VS_CONN_F_INACTIVE;
+			}
 			cp = ip_vs_conn_new(s->protocol,
 					    s->caddr, s->cport,
 					    s->vaddr, s->vport,
@@ -337,7 +345,7 @@ static void ip_vs_process_message(const char *buffer, const size_t buflen)
 				IP_VS_ERR("ip_vs_conn_new failed\n");
 				return;
 			}
-			cp->state = ntohs(s->state);
+			cp->state = state;
 		} else if (!cp->dest) {
 			dest = ip_vs_try_bind_dest(cp);
 			if (!dest) {
@@ -346,8 +354,22 @@ static void ip_vs_process_message(const char *buffer, const size_t buflen)
 				cp->flags = flags | IP_VS_CONN_F_HASHED;
 			} else
 				atomic_dec(&dest->refcnt);
-		}	/* Note that we don't touch its state and flags
-			   if it is a normal entry. */
+		} else if ((cp->dest) && (cp->protocol == IPPROTO_TCP) &&
+			   (cp->state != state)) {
+			/* update active/inactive flag for the connection */
+			dest = cp->dest;
+			if (!(cp->flags & IP_VS_CONN_F_INACTIVE) &&
+				(state != IP_VS_TCP_S_ESTABLISHED)) {
+				atomic_dec(&dest->activeconns);
+				atomic_inc(&dest->inactconns);
+				cp->flags |= IP_VS_CONN_F_INACTIVE;
+			} else if ((cp->flags & IP_VS_CONN_F_INACTIVE) &&
+				(state == IP_VS_TCP_S_ESTABLISHED)) {
+				atomic_inc(&dest->activeconns);
+				atomic_dec(&dest->inactconns);
+				cp->flags &= ~IP_VS_CONN_F_INACTIVE;
+			}
+		}
 
 		if (flags & IP_VS_CONN_F_SEQ_MASK) {
 			opt = (struct ip_vs_sync_conn_options *)&s[1];
@@ -357,7 +379,7 @@ static void ip_vs_process_message(const char *buffer, const size_t buflen)
 			p += SIMPLE_CONN_SIZE;
 
 		atomic_set(&cp->in_pkts, sysctl_ip_vs_sync_threshold[0]);
-		cp->state = ntohs(s->state);
+		cp->state = state;
 		pp = ip_vs_proto_get(s->protocol);
 		cp->timeout = pp->timeout_table[cp->state];
 		ip_vs_conn_put(cp);
-- 
cgit v1.2.3


From 1b0b04f9fb657a9ee85e3b9388394b9b943f7ffc Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@sunset.davemloft.net>
Date: Mon, 19 Nov 2007 21:56:16 -0800
Subject: [IPCONFIG]: Mark vendor_class_identifier as __initdata.

Based upon a suggestion by Francois Romieu.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/ipconfig.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/ipv4/ipconfig.c b/net/ipv4/ipconfig.c
index 6422422b8e3..4bc3a57c183 100644
--- a/net/ipv4/ipconfig.c
+++ b/net/ipv4/ipconfig.c
@@ -140,7 +140,8 @@ __be32 ic_servaddr = NONE;	/* Boot server IP address */
 __be32 root_server_addr = NONE;	/* Address of NFS server */
 u8 root_server_path[256] = { 0, };	/* Path to mount as root */
 
-static char vendor_class_identifier[253]; /* vendor class identifier */
+/* vendor class identifier */
+static char vendor_class_identifier[253] __initdata;
 
 /* Persistent data: */
 
-- 
cgit v1.2.3


From b854272b3c732316676e9128f7b9e6f1e1ff88b0 Mon Sep 17 00:00:00 2001
From: "Denis V. Lunev" <den@openvz.org>
Date: Sat, 1 Dec 2007 00:21:31 +1100
Subject: [NET]: Modify all rtnetlink methods to only work in the initial
 namespace (v2)

Before I can enable rtnetlink to work in all network namespaces I need
to be certain that something won't break.  So this patch deliberately
disables all of the rtnletlink methods in everything except the
initial network namespace.  After the methods have been audited this
extra check can be disabled.

Changes from v1:
- added IPv6 addrlabel protection

Signed-off-by: Denis V. Lunev <den@openvz.org>
Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 net/bridge/br_netlink.c |  9 +++++++++
 net/core/fib_rules.c    | 11 +++++++++++
 net/core/neighbour.c    | 18 ++++++++++++++++++
 net/core/rtnetlink.c    | 19 +++++++++++++++++++
 net/decnet/dn_dev.c     | 14 +++++++++++++-
 net/decnet/dn_fib.c     |  8 ++++++++
 net/decnet/dn_route.c   |  8 ++++++++
 net/decnet/dn_table.c   |  4 ++++
 net/ipv4/devinet.c      | 12 ++++++++++++
 net/ipv4/fib_frontend.c | 12 ++++++++++++
 net/ipv4/route.c        |  4 ++++
 net/ipv6/addrconf.c     | 31 +++++++++++++++++++++++++++++++
 net/ipv6/addrlabel.c    | 12 ++++++++++++
 net/ipv6/ip6_fib.c      |  4 ++++
 net/ipv6/route.c        | 12 ++++++++++++
 net/sched/act_api.c     | 10 ++++++++++
 net/sched/cls_api.c     | 10 ++++++++++
 net/sched/sch_api.c     | 21 +++++++++++++++++++++
 18 files changed, 218 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c
index 53ab8e0cb51..a4ffa2b63cd 100644
--- a/net/bridge/br_netlink.c
+++ b/net/bridge/br_netlink.c
@@ -13,6 +13,7 @@
 #include <linux/kernel.h>
 #include <net/rtnetlink.h>
 #include <net/net_namespace.h>
+#include <net/sock.h>
 #include "br_private.h"
 
 static inline size_t br_nlmsg_size(void)
@@ -107,9 +108,13 @@ errout:
  */
 static int br_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb)
 {
+	struct net *net = skb->sk->sk_net;
 	struct net_device *dev;
 	int idx;
 
+	if (net != &init_net)
+		return 0;
+
 	idx = 0;
 	for_each_netdev(&init_net, dev) {
 		/* not a bridge port */
@@ -135,12 +140,16 @@ skip:
  */
 static int br_rtm_setlink(struct sk_buff *skb,  struct nlmsghdr *nlh, void *arg)
 {
+	struct net *net = skb->sk->sk_net;
 	struct ifinfomsg *ifm;
 	struct nlattr *protinfo;
 	struct net_device *dev;
 	struct net_bridge_port *p;
 	u8 new_state;
 
+	if (net != &init_net)
+		return -EINVAL;
+
 	if (nlmsg_len(nlh) < sizeof(*ifm))
 		return -EINVAL;
 
diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c
index 848132b6cb7..3b20b6f0982 100644
--- a/net/core/fib_rules.c
+++ b/net/core/fib_rules.c
@@ -228,6 +228,9 @@ static int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
 	struct nlattr *tb[FRA_MAX+1];
 	int err = -EINVAL, unresolved = 0;
 
+	if (net != &init_net)
+		return -EINVAL;
+
 	if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*frh)))
 		goto errout;
 
@@ -358,12 +361,16 @@ errout:
 
 static int fib_nl_delrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
 {
+	struct net *net = skb->sk->sk_net;
 	struct fib_rule_hdr *frh = nlmsg_data(nlh);
 	struct fib_rules_ops *ops = NULL;
 	struct fib_rule *rule, *tmp;
 	struct nlattr *tb[FRA_MAX+1];
 	int err = -EINVAL;
 
+	if (net != &init_net)
+		return -EINVAL;
+
 	if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*frh)))
 		goto errout;
 
@@ -539,9 +546,13 @@ skip:
 
 static int fib_nl_dumprule(struct sk_buff *skb, struct netlink_callback *cb)
 {
+	struct net *net = skb->sk->sk_net;
 	struct fib_rules_ops *ops;
 	int idx = 0, family;
 
+	if (net != &init_net)
+		return -EINVAL;
+
 	family = rtnl_msg_family(cb->nlh);
 	if (family != AF_UNSPEC) {
 		/* Protocol specific dump request */
diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index 175bbc0a974..29f0a4d2008 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -1449,6 +1449,9 @@ static int neigh_delete(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
 	struct net_device *dev = NULL;
 	int err = -EINVAL;
 
+	if (net != &init_net)
+		return -EINVAL;
+
 	if (nlmsg_len(nlh) < sizeof(*ndm))
 		goto out;
 
@@ -1515,6 +1518,9 @@ static int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
 	struct net_device *dev = NULL;
 	int err;
 
+	if (net != &init_net)
+		return -EINVAL;
+
 	err = nlmsg_parse(nlh, sizeof(*ndm), tb, NDA_MAX, NULL);
 	if (err < 0)
 		goto out;
@@ -1789,11 +1795,15 @@ static const struct nla_policy nl_ntbl_parm_policy[NDTPA_MAX+1] = {
 
 static int neightbl_set(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
 {
+	struct net *net = skb->sk->sk_net;
 	struct neigh_table *tbl;
 	struct ndtmsg *ndtmsg;
 	struct nlattr *tb[NDTA_MAX+1];
 	int err;
 
+	if (net != &init_net)
+		return -EINVAL;
+
 	err = nlmsg_parse(nlh, sizeof(*ndtmsg), tb, NDTA_MAX,
 			  nl_neightbl_policy);
 	if (err < 0)
@@ -1913,11 +1923,15 @@ errout:
 
 static int neightbl_dump_info(struct sk_buff *skb, struct netlink_callback *cb)
 {
+	struct net *net = skb->sk->sk_net;
 	int family, tidx, nidx = 0;
 	int tbl_skip = cb->args[0];
 	int neigh_skip = cb->args[1];
 	struct neigh_table *tbl;
 
+	if (net != &init_net)
+		return 0;
+
 	family = ((struct rtgenmsg *) nlmsg_data(cb->nlh))->rtgen_family;
 
 	read_lock(&neigh_tbl_lock);
@@ -2042,9 +2056,13 @@ out:
 
 static int neigh_dump_info(struct sk_buff *skb, struct netlink_callback *cb)
 {
+	struct net *net = skb->sk->sk_net;
 	struct neigh_table *tbl;
 	int t, family, s_t;
 
+	if (net != &init_net)
+		return 0;
+
 	read_lock(&neigh_tbl_lock);
 	family = ((struct rtgenmsg *) nlmsg_data(cb->nlh))->rtgen_family;
 	s_t = cb->args[0];
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index fed95a323b2..4edc3dac4cc 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -703,6 +703,9 @@ static int rtnl_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb)
 	int s_idx = cb->args[0];
 	struct net_device *dev;
 
+	if (net != &init_net)
+		return 0;
+
 	idx = 0;
 	for_each_netdev(net, dev) {
 		if (idx < s_idx)
@@ -905,6 +908,9 @@ static int rtnl_setlink(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
 	struct nlattr *tb[IFLA_MAX+1];
 	char ifname[IFNAMSIZ];
 
+	if (net != &init_net)
+		return -EINVAL;
+
 	err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFLA_MAX, ifla_policy);
 	if (err < 0)
 		goto errout;
@@ -953,6 +959,9 @@ static int rtnl_dellink(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
 	struct nlattr *tb[IFLA_MAX+1];
 	int err;
 
+	if (net != &init_net)
+		return -EINVAL;
+
 	err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFLA_MAX, ifla_policy);
 	if (err < 0)
 		return err;
@@ -1034,6 +1043,9 @@ static int rtnl_newlink(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
 	struct nlattr *linkinfo[IFLA_INFO_MAX+1];
 	int err;
 
+	if (net != &init_net)
+		return -EINVAL;
+
 #ifdef CONFIG_KMOD
 replay:
 #endif
@@ -1160,6 +1172,9 @@ static int rtnl_getlink(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
 	struct sk_buff *nskb;
 	int err;
 
+	if (net != &init_net)
+		return -EINVAL;
+
 	err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFLA_MAX, ifla_policy);
 	if (err < 0)
 		return err;
@@ -1195,9 +1210,13 @@ errout:
 
 static int rtnl_dump_all(struct sk_buff *skb, struct netlink_callback *cb)
 {
+	struct net *net = skb->sk->sk_net;
 	int idx;
 	int s_idx = cb->family;
 
+	if (net != &init_net)
+		return 0;
+
 	if (s_idx == 0)
 		s_idx = 1;
 	for (idx=1; idx<NPROTO; idx++) {
diff --git a/net/decnet/dn_dev.c b/net/decnet/dn_dev.c
index 3bc82dc83b3..94256845a05 100644
--- a/net/decnet/dn_dev.c
+++ b/net/decnet/dn_dev.c
@@ -647,11 +647,15 @@ static const struct nla_policy dn_ifa_policy[IFA_MAX+1] = {
 
 static int dn_nl_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
 {
+	struct net *net = skb->sk->sk_net;
 	struct nlattr *tb[IFA_MAX+1];
 	struct dn_dev *dn_db;
 	struct ifaddrmsg *ifm;
 	struct dn_ifaddr *ifa, **ifap;
-	int err;
+	int err = -EINVAL;
+
+	if (net != &init_net)
+		goto errout;
 
 	err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, dn_ifa_policy);
 	if (err < 0)
@@ -681,6 +685,7 @@ errout:
 
 static int dn_nl_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
 {
+	struct net *net = skb->sk->sk_net;
 	struct nlattr *tb[IFA_MAX+1];
 	struct net_device *dev;
 	struct dn_dev *dn_db;
@@ -688,6 +693,9 @@ static int dn_nl_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
 	struct dn_ifaddr *ifa;
 	int err;
 
+	if (net != &init_net)
+		return -EINVAL;
+
 	err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, dn_ifa_policy);
 	if (err < 0)
 		return err;
@@ -793,11 +801,15 @@ errout:
 
 static int dn_nl_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb)
 {
+	struct net *net = skb->sk->sk_net;
 	int idx, dn_idx = 0, skip_ndevs, skip_naddr;
 	struct net_device *dev;
 	struct dn_dev *dn_db;
 	struct dn_ifaddr *ifa;
 
+	if (net != &init_net)
+		return 0;
+
 	skip_ndevs = cb->args[0];
 	skip_naddr = cb->args[1];
 
diff --git a/net/decnet/dn_fib.c b/net/decnet/dn_fib.c
index 3760a20d10d..5413e1b75b5 100644
--- a/net/decnet/dn_fib.c
+++ b/net/decnet/dn_fib.c
@@ -506,10 +506,14 @@ static int dn_fib_check_attr(struct rtmsg *r, struct rtattr **rta)
 
 static int dn_fib_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
 {
+	struct net *net = skb->sk->sk_net;
 	struct dn_fib_table *tb;
 	struct rtattr **rta = arg;
 	struct rtmsg *r = NLMSG_DATA(nlh);
 
+	if (net != &init_net)
+		return -EINVAL;
+
 	if (dn_fib_check_attr(r, rta))
 		return -EINVAL;
 
@@ -522,10 +526,14 @@ static int dn_fib_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh, void *
 
 static int dn_fib_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
 {
+	struct net *net = skb->sk->sk_net;
 	struct dn_fib_table *tb;
 	struct rtattr **rta = arg;
 	struct rtmsg *r = NLMSG_DATA(nlh);
 
+	if (net != &init_net)
+		return -EINVAL;
+
 	if (dn_fib_check_attr(r, rta))
 		return -EINVAL;
 
diff --git a/net/decnet/dn_route.c b/net/decnet/dn_route.c
index 2a5bb0714c7..28aeba15cf1 100644
--- a/net/decnet/dn_route.c
+++ b/net/decnet/dn_route.c
@@ -1511,6 +1511,7 @@ rtattr_failure:
  */
 static int dn_cache_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh, void *arg)
 {
+	struct net *net = in_skb->sk->sk_net;
 	struct rtattr **rta = arg;
 	struct rtmsg *rtm = NLMSG_DATA(nlh);
 	struct dn_route *rt = NULL;
@@ -1519,6 +1520,9 @@ static int dn_cache_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh, void
 	struct sk_buff *skb;
 	struct flowi fl;
 
+	if (net != &init_net)
+		return -EINVAL;
+
 	memset(&fl, 0, sizeof(fl));
 	fl.proto = DNPROTO_NSP;
 
@@ -1596,10 +1600,14 @@ out_free:
  */
 int dn_cache_dump(struct sk_buff *skb, struct netlink_callback *cb)
 {
+	struct net *net = skb->sk->sk_net;
 	struct dn_route *rt;
 	int h, s_h;
 	int idx, s_idx;
 
+	if (net != &init_net)
+		return 0;
+
 	if (NLMSG_PAYLOAD(cb->nlh, 0) < sizeof(struct rtmsg))
 		return -EINVAL;
 	if (!(((struct rtmsg *)NLMSG_DATA(cb->nlh))->rtm_flags&RTM_F_CLONED))
diff --git a/net/decnet/dn_table.c b/net/decnet/dn_table.c
index fda0772fa21..a3bdb8dd1fb 100644
--- a/net/decnet/dn_table.c
+++ b/net/decnet/dn_table.c
@@ -463,12 +463,16 @@ static int dn_fib_table_dump(struct dn_fib_table *tb, struct sk_buff *skb,
 
 int dn_fib_dump(struct sk_buff *skb, struct netlink_callback *cb)
 {
+	struct net *net = skb->sk->sk_net;
 	unsigned int h, s_h;
 	unsigned int e = 0, s_e;
 	struct dn_fib_table *tb;
 	struct hlist_node *node;
 	int dumped = 0;
 
+	if (net != &init_net)
+		return 0;
+
 	if (NLMSG_PAYLOAD(cb->nlh, 0) >= sizeof(struct rtmsg) &&
 		((struct rtmsg *)NLMSG_DATA(cb->nlh))->rtm_flags&RTM_F_CLONED)
 			return dn_cache_dump(skb, cb);
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index b42f74617ba..c0eb26a0d0b 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -441,6 +441,7 @@ struct in_ifaddr *inet_ifa_byprefix(struct in_device *in_dev, __be32 prefix,
 
 static int inet_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
 {
+	struct net *net = skb->sk->sk_net;
 	struct nlattr *tb[IFA_MAX+1];
 	struct in_device *in_dev;
 	struct ifaddrmsg *ifm;
@@ -449,6 +450,9 @@ static int inet_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg
 
 	ASSERT_RTNL();
 
+	if (net != &init_net)
+		return -EINVAL;
+
 	err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy);
 	if (err < 0)
 		goto errout;
@@ -560,10 +564,14 @@ errout:
 
 static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
 {
+	struct net *net = skb->sk->sk_net;
 	struct in_ifaddr *ifa;
 
 	ASSERT_RTNL();
 
+	if (net != &init_net)
+		return -EINVAL;
+
 	ifa = rtm_to_ifaddr(nlh);
 	if (IS_ERR(ifa))
 		return PTR_ERR(ifa);
@@ -1174,12 +1182,16 @@ nla_put_failure:
 
 static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb)
 {
+	struct net *net = skb->sk->sk_net;
 	int idx, ip_idx;
 	struct net_device *dev;
 	struct in_device *in_dev;
 	struct in_ifaddr *ifa;
 	int s_ip_idx, s_idx = cb->args[0];
 
+	if (net != &init_net)
+		return 0;
+
 	s_ip_idx = ip_idx = cb->args[1];
 	idx = 0;
 	for_each_netdev(&init_net, dev) {
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index 97abf934d18..e02aba5fa13 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -538,10 +538,14 @@ errout:
 
 static int inet_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
 {
+	struct net *net = skb->sk->sk_net;
 	struct fib_config cfg;
 	struct fib_table *tb;
 	int err;
 
+	if (net != &init_net)
+		return -EINVAL;
+
 	err = rtm_to_fib_config(skb, nlh, &cfg);
 	if (err < 0)
 		goto errout;
@@ -559,10 +563,14 @@ errout:
 
 static int inet_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
 {
+	struct net *net = skb->sk->sk_net;
 	struct fib_config cfg;
 	struct fib_table *tb;
 	int err;
 
+	if (net != &init_net)
+		return -EINVAL;
+
 	err = rtm_to_fib_config(skb, nlh, &cfg);
 	if (err < 0)
 		goto errout;
@@ -580,12 +588,16 @@ errout:
 
 static int inet_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
 {
+	struct net *net = skb->sk->sk_net;
 	unsigned int h, s_h;
 	unsigned int e = 0, s_e;
 	struct fib_table *tb;
 	struct hlist_node *node;
 	int dumped = 0;
 
+	if (net != &init_net)
+		return 0;
+
 	if (nlmsg_len(cb->nlh) >= sizeof(struct rtmsg) &&
 	    ((struct rtmsg *) nlmsg_data(cb->nlh))->rtm_flags & RTM_F_CLONED)
 		return ip_rt_dump(skb, cb);
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index a21021bf140..1d2839571d2 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -2527,6 +2527,7 @@ nla_put_failure:
 
 static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
 {
+	struct net *net = in_skb->sk->sk_net;
 	struct rtmsg *rtm;
 	struct nlattr *tb[RTA_MAX+1];
 	struct rtable *rt = NULL;
@@ -2536,6 +2537,9 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void
 	int err;
 	struct sk_buff *skb;
 
+	if (net != &init_net)
+		return -EINVAL;
+
 	err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv4_policy);
 	if (err < 0)
 		goto errout;
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index a70cecf8fc8..26de8ee5095 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -2971,11 +2971,15 @@ static const struct nla_policy ifa_ipv6_policy[IFA_MAX+1] = {
 static int
 inet6_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
 {
+	struct net *net = skb->sk->sk_net;
 	struct ifaddrmsg *ifm;
 	struct nlattr *tb[IFA_MAX+1];
 	struct in6_addr *pfx;
 	int err;
 
+	if (net != &init_net)
+		return -EINVAL;
+
 	err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv6_policy);
 	if (err < 0)
 		return err;
@@ -3028,6 +3032,7 @@ static int inet6_addr_modify(struct inet6_ifaddr *ifp, u8 ifa_flags,
 static int
 inet6_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
 {
+	struct net *net = skb->sk->sk_net;
 	struct ifaddrmsg *ifm;
 	struct nlattr *tb[IFA_MAX+1];
 	struct in6_addr *pfx;
@@ -3037,6 +3042,9 @@ inet6_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
 	u8 ifa_flags;
 	int err;
 
+	if (net != &init_net)
+		return -EINVAL;
+
 	err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv6_policy);
 	if (err < 0)
 		return err;
@@ -3310,26 +3318,42 @@ done:
 
 static int inet6_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb)
 {
+	struct net *net = skb->sk->sk_net;
 	enum addr_type_t type = UNICAST_ADDR;
+
+	if (net != &init_net)
+		return 0;
+
 	return inet6_dump_addr(skb, cb, type);
 }
 
 static int inet6_dump_ifmcaddr(struct sk_buff *skb, struct netlink_callback *cb)
 {
+	struct net *net = skb->sk->sk_net;
 	enum addr_type_t type = MULTICAST_ADDR;
+
+	if (net != &init_net)
+		return 0;
+
 	return inet6_dump_addr(skb, cb, type);
 }
 
 
 static int inet6_dump_ifacaddr(struct sk_buff *skb, struct netlink_callback *cb)
 {
+	struct net *net = skb->sk->sk_net;
 	enum addr_type_t type = ANYCAST_ADDR;
+
+	if (net != &init_net)
+		return 0;
+
 	return inet6_dump_addr(skb, cb, type);
 }
 
 static int inet6_rtm_getaddr(struct sk_buff *in_skb, struct nlmsghdr* nlh,
 			     void *arg)
 {
+	struct net *net = in_skb->sk->sk_net;
 	struct ifaddrmsg *ifm;
 	struct nlattr *tb[IFA_MAX+1];
 	struct in6_addr *addr = NULL;
@@ -3338,6 +3362,9 @@ static int inet6_rtm_getaddr(struct sk_buff *in_skb, struct nlmsghdr* nlh,
 	struct sk_buff *skb;
 	int err;
 
+	if (net != &init_net)
+		return -EINVAL;
+
 	err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv6_policy);
 	if (err < 0)
 		goto errout;
@@ -3555,11 +3582,15 @@ nla_put_failure:
 
 static int inet6_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb)
 {
+	struct net *net = skb->sk->sk_net;
 	int idx, err;
 	int s_idx = cb->args[0];
 	struct net_device *dev;
 	struct inet6_dev *idev;
 
+	if (net != &init_net)
+		return 0;
+
 	read_lock(&dev_base_lock);
 	idx = 0;
 	for_each_netdev(&init_net, dev) {
diff --git a/net/ipv6/addrlabel.c b/net/ipv6/addrlabel.c
index 204d4d66834..b9b5d570714 100644
--- a/net/ipv6/addrlabel.c
+++ b/net/ipv6/addrlabel.c
@@ -361,12 +361,16 @@ static const struct nla_policy ifal_policy[IFAL_MAX+1] = {
 static int ip6addrlbl_newdel(struct sk_buff *skb, struct nlmsghdr *nlh,
 			     void *arg)
 {
+	struct net *net = skb->sk->sk_net;
 	struct ifaddrlblmsg *ifal;
 	struct nlattr *tb[IFAL_MAX+1];
 	struct in6_addr *pfx;
 	u32 label;
 	int err = 0;
 
+	if (net != &init_net)
+		return 0;
+
 	err = nlmsg_parse(nlh, sizeof(*ifal), tb, IFAL_MAX, ifal_policy);
 	if (err < 0)
 		return err;
@@ -445,11 +449,15 @@ static int ip6addrlbl_fill(struct sk_buff *skb,
 
 static int ip6addrlbl_dump(struct sk_buff *skb, struct netlink_callback *cb)
 {
+	struct net *net = skb->sk->sk_net;
 	struct ip6addrlbl_entry *p;
 	struct hlist_node *pos;
 	int idx = 0, s_idx = cb->args[0];
 	int err;
 
+	if (net != &init_net)
+		return 0;
+
 	rcu_read_lock();
 	hlist_for_each_entry_rcu(p, pos, &ip6addrlbl_table.head, list) {
 		if (idx >= s_idx) {
@@ -479,6 +487,7 @@ static inline int ip6addrlbl_msgsize(void)
 static int ip6addrlbl_get(struct sk_buff *in_skb, struct nlmsghdr* nlh,
 			  void *arg)
 {
+	struct net *net = in_skb->sk->sk_net;
 	struct ifaddrlblmsg *ifal;
 	struct nlattr *tb[IFAL_MAX+1];
 	struct in6_addr *addr;
@@ -487,6 +496,9 @@ static int ip6addrlbl_get(struct sk_buff *in_skb, struct nlmsghdr* nlh,
 	struct ip6addrlbl_entry *p;
 	struct sk_buff *skb;
 
+	if (net != &init_net)
+		return 0;
+
 	err = nlmsg_parse(nlh, sizeof(*ifal), tb, IFAL_MAX, ifal_policy);
 	if (err < 0)
 		return err;
diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c
index 946cf389ab9..31b60a02512 100644
--- a/net/ipv6/ip6_fib.c
+++ b/net/ipv6/ip6_fib.c
@@ -361,6 +361,7 @@ end:
 
 static int inet6_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
 {
+	struct net *net = skb->sk->sk_net;
 	unsigned int h, s_h;
 	unsigned int e = 0, s_e;
 	struct rt6_rtnl_dump_arg arg;
@@ -369,6 +370,9 @@ static int inet6_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
 	struct hlist_node *node;
 	int res = 0;
 
+	if (net != &init_net)
+		return 0;
+
 	s_h = cb->args[0];
 	s_e = cb->args[1];
 
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 4ef2cfaa346..5e1c5796761 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -2003,9 +2003,13 @@ errout:
 
 static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
 {
+	struct net *net = skb->sk->sk_net;
 	struct fib6_config cfg;
 	int err;
 
+	if (net != &init_net)
+		return -EINVAL;
+
 	err = rtm_to_fib6_config(skb, nlh, &cfg);
 	if (err < 0)
 		return err;
@@ -2015,9 +2019,13 @@ static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *a
 
 static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
 {
+	struct net *net = skb->sk->sk_net;
 	struct fib6_config cfg;
 	int err;
 
+	if (net != &init_net)
+		return -EINVAL;
+
 	err = rtm_to_fib6_config(skb, nlh, &cfg);
 	if (err < 0)
 		return err;
@@ -2152,6 +2160,7 @@ int rt6_dump_route(struct rt6_info *rt, void *p_arg)
 
 static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
 {
+	struct net *net = in_skb->sk->sk_net;
 	struct nlattr *tb[RTA_MAX+1];
 	struct rt6_info *rt;
 	struct sk_buff *skb;
@@ -2159,6 +2168,9 @@ static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void
 	struct flowi fl;
 	int err, iif = 0;
 
+	if (net != &init_net)
+		return -EINVAL;
+
 	err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
 	if (err < 0)
 		goto errout;
diff --git a/net/sched/act_api.c b/net/sched/act_api.c
index 72cdb0fade2..852829139c6 100644
--- a/net/sched/act_api.c
+++ b/net/sched/act_api.c
@@ -18,6 +18,8 @@
 #include <linux/skbuff.h>
 #include <linux/init.h>
 #include <linux/kmod.h>
+#include <net/net_namespace.h>
+#include <net/sock.h>
 #include <net/sch_generic.h>
 #include <net/act_api.h>
 #include <net/netlink.h>
@@ -924,10 +926,14 @@ done:
 
 static int tc_ctl_action(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
 {
+	struct net *net = skb->sk->sk_net;
 	struct rtattr **tca = arg;
 	u32 pid = skb ? NETLINK_CB(skb).pid : 0;
 	int ret = 0, ovr = 0;
 
+	if (net != &init_net)
+		return -EINVAL;
+
 	if (tca[TCA_ACT_TAB-1] == NULL) {
 		printk("tc_ctl_action: received NO action attribs\n");
 		return -EINVAL;
@@ -997,6 +1003,7 @@ find_dump_kind(struct nlmsghdr *n)
 static int
 tc_dump_action(struct sk_buff *skb, struct netlink_callback *cb)
 {
+	struct net *net = skb->sk->sk_net;
 	struct nlmsghdr *nlh;
 	unsigned char *b = skb_tail_pointer(skb);
 	struct rtattr *x;
@@ -1006,6 +1013,9 @@ tc_dump_action(struct sk_buff *skb, struct netlink_callback *cb)
 	struct tcamsg *t = (struct tcamsg *) NLMSG_DATA(cb->nlh);
 	struct rtattr *kind = find_dump_kind(cb->nlh);
 
+	if (net != &init_net)
+		return 0;
+
 	if (kind == NULL) {
 		printk("tc_dump_action: action bad kind\n");
 		return 0;
diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
index bb98045d550..fdab6a530bb 100644
--- a/net/sched/cls_api.c
+++ b/net/sched/cls_api.c
@@ -23,6 +23,8 @@
 #include <linux/init.h>
 #include <linux/kmod.h>
 #include <linux/netlink.h>
+#include <net/net_namespace.h>
+#include <net/sock.h>
 #include <net/netlink.h>
 #include <net/pkt_sched.h>
 #include <net/pkt_cls.h>
@@ -119,6 +121,7 @@ static __inline__ u32 tcf_auto_prio(struct tcf_proto *tp)
 
 static int tc_ctl_tfilter(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
 {
+	struct net *net = skb->sk->sk_net;
 	struct rtattr **tca;
 	struct tcmsg *t;
 	u32 protocol;
@@ -135,6 +138,9 @@ static int tc_ctl_tfilter(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
 	unsigned long fh;
 	int err;
 
+	if (net != &init_net)
+		return -EINVAL;
+
 replay:
 	tca = arg;
 	t = NLMSG_DATA(n);
@@ -375,6 +381,7 @@ static int tcf_node_dump(struct tcf_proto *tp, unsigned long n, struct tcf_walke
 
 static int tc_dump_tfilter(struct sk_buff *skb, struct netlink_callback *cb)
 {
+	struct net *net = skb->sk->sk_net;
 	int t;
 	int s_t;
 	struct net_device *dev;
@@ -385,6 +392,9 @@ static int tc_dump_tfilter(struct sk_buff *skb, struct netlink_callback *cb)
 	const struct Qdisc_class_ops *cops;
 	struct tcf_dump_args arg;
 
+	if (net != &init_net)
+		return 0;
+
 	if (cb->nlh->nlmsg_len < NLMSG_LENGTH(sizeof(*tcm)))
 		return skb->len;
 	if ((dev = dev_get_by_index(&init_net, tcm->tcm_ifindex)) == NULL)
diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index 259321be1ad..f30e3f7ad88 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -29,6 +29,7 @@
 #include <linux/hrtimer.h>
 
 #include <net/net_namespace.h>
+#include <net/sock.h>
 #include <net/netlink.h>
 #include <net/pkt_sched.h>
 
@@ -599,6 +600,7 @@ check_loop_fn(struct Qdisc *q, unsigned long cl, struct qdisc_walker *w)
 
 static int tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
 {
+	struct net *net = skb->sk->sk_net;
 	struct tcmsg *tcm = NLMSG_DATA(n);
 	struct rtattr **tca = arg;
 	struct net_device *dev;
@@ -607,6 +609,9 @@ static int tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
 	struct Qdisc *p = NULL;
 	int err;
 
+	if (net != &init_net)
+		return -EINVAL;
+
 	if ((dev = __dev_get_by_index(&init_net, tcm->tcm_ifindex)) == NULL)
 		return -ENODEV;
 
@@ -660,6 +665,7 @@ static int tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
 
 static int tc_modify_qdisc(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
 {
+	struct net *net = skb->sk->sk_net;
 	struct tcmsg *tcm;
 	struct rtattr **tca;
 	struct net_device *dev;
@@ -667,6 +673,9 @@ static int tc_modify_qdisc(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
 	struct Qdisc *q, *p;
 	int err;
 
+	if (net != &init_net)
+		return -EINVAL;
+
 replay:
 	/* Reinit, just in case something touches this. */
 	tcm = NLMSG_DATA(n);
@@ -872,11 +881,15 @@ err_out:
 
 static int tc_dump_qdisc(struct sk_buff *skb, struct netlink_callback *cb)
 {
+	struct net *net = skb->sk->sk_net;
 	int idx, q_idx;
 	int s_idx, s_q_idx;
 	struct net_device *dev;
 	struct Qdisc *q;
 
+	if (net != &init_net)
+		return 0;
+
 	s_idx = cb->args[0];
 	s_q_idx = q_idx = cb->args[1];
 	read_lock(&dev_base_lock);
@@ -920,6 +933,7 @@ done:
 
 static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
 {
+	struct net *net = skb->sk->sk_net;
 	struct tcmsg *tcm = NLMSG_DATA(n);
 	struct rtattr **tca = arg;
 	struct net_device *dev;
@@ -932,6 +946,9 @@ static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
 	u32 qid = TC_H_MAJ(clid);
 	int err;
 
+	if (net != &init_net)
+		return -EINVAL;
+
 	if ((dev = __dev_get_by_index(&init_net, tcm->tcm_ifindex)) == NULL)
 		return -ENODEV;
 
@@ -1106,6 +1123,7 @@ static int qdisc_class_dump(struct Qdisc *q, unsigned long cl, struct qdisc_walk
 
 static int tc_dump_tclass(struct sk_buff *skb, struct netlink_callback *cb)
 {
+	struct net *net = skb->sk->sk_net;
 	int t;
 	int s_t;
 	struct net_device *dev;
@@ -1113,6 +1131,9 @@ static int tc_dump_tclass(struct sk_buff *skb, struct netlink_callback *cb)
 	struct tcmsg *tcm = (struct tcmsg*)NLMSG_DATA(cb->nlh);
 	struct qdisc_dump_args arg;
 
+	if (net != &init_net)
+		return 0;
+
 	if (cb->nlh->nlmsg_len < NLMSG_LENGTH(sizeof(*tcm)))
 		return 0;
 	if ((dev = dev_get_by_index(&init_net, tcm->tcm_ifindex)) == NULL)
-- 
cgit v1.2.3


From 97c53cacf00d1f5aa04adabfebcc806ca8b22b10 Mon Sep 17 00:00:00 2001
From: "Denis V. Lunev" <den@openvz.org>
Date: Mon, 19 Nov 2007 22:26:51 -0800
Subject: [NET]: Make rtnetlink infrastructure network namespace aware (v3)

After this patch none of the netlink callback support anything
except the initial network namespace but the rtnetlink infrastructure
now handles multiple network namespaces.

Changes from v2:
- IPv6 addrlabel processing

Changes from v1:
- no need for special rtnl_unlock handling
- fixed IPv6 ndisc

Signed-off-by: Denis V. Lunev <den@openvz.org>
Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/bridge/br_netlink.c  |  4 +--
 net/core/fib_rules.c     |  4 +--
 net/core/neighbour.c     |  4 +--
 net/core/rtnetlink.c     | 63 +++++++++++++++++++++++++++++++++++++++---------
 net/decnet/dn_dev.c      |  4 +--
 net/decnet/dn_route.c    |  2 +-
 net/decnet/dn_table.c    |  4 +--
 net/ipv4/devinet.c       |  4 +--
 net/ipv4/fib_semantics.c |  4 +--
 net/ipv4/ipmr.c          |  4 +--
 net/ipv4/route.c         |  2 +-
 net/ipv6/addrconf.c      | 14 +++++------
 net/ipv6/addrlabel.c     |  2 +-
 net/ipv6/ndisc.c         |  5 ++--
 net/ipv6/route.c         |  6 ++---
 net/sched/act_api.c      |  8 +++---
 net/sched/cls_api.c      |  2 +-
 net/sched/sch_api.c      |  4 +--
 net/wireless/wext.c      |  5 +++-
 19 files changed, 95 insertions(+), 50 deletions(-)

(limited to 'net')

diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c
index a4ffa2b63cd..f5d69336d97 100644
--- a/net/bridge/br_netlink.c
+++ b/net/bridge/br_netlink.c
@@ -97,10 +97,10 @@ void br_ifinfo_notify(int event, struct net_bridge_port *port)
 		kfree_skb(skb);
 		goto errout;
 	}
-	err = rtnl_notify(skb, 0, RTNLGRP_LINK, NULL, GFP_ATOMIC);
+	err = rtnl_notify(skb, &init_net,0, RTNLGRP_LINK, NULL, GFP_ATOMIC);
 errout:
 	if (err < 0)
-		rtnl_set_sk_err(RTNLGRP_LINK, err);
+		rtnl_set_sk_err(&init_net, RTNLGRP_LINK, err);
 }
 
 /*
diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c
index 3b20b6f0982..0af0538343d 100644
--- a/net/core/fib_rules.c
+++ b/net/core/fib_rules.c
@@ -599,10 +599,10 @@ static void notify_rule_change(int event, struct fib_rule *rule,
 		kfree_skb(skb);
 		goto errout;
 	}
-	err = rtnl_notify(skb, pid, ops->nlgroup, nlh, GFP_KERNEL);
+	err = rtnl_notify(skb, &init_net, pid, ops->nlgroup, nlh, GFP_KERNEL);
 errout:
 	if (err < 0)
-		rtnl_set_sk_err(ops->nlgroup, err);
+		rtnl_set_sk_err(&init_net, ops->nlgroup, err);
 }
 
 static void attach_rules(struct list_head *rules, struct net_device *dev)
diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index 29f0a4d2008..a8b72c1c7c8 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -2467,10 +2467,10 @@ static void __neigh_notify(struct neighbour *n, int type, int flags)
 		kfree_skb(skb);
 		goto errout;
 	}
-	err = rtnl_notify(skb, 0, RTNLGRP_NEIGH, NULL, GFP_ATOMIC);
+	err = rtnl_notify(skb, &init_net, 0, RTNLGRP_NEIGH, NULL, GFP_ATOMIC);
 errout:
 	if (err < 0)
-		rtnl_set_sk_err(RTNLGRP_NEIGH, err);
+		rtnl_set_sk_err(&init_net, RTNLGRP_NEIGH, err);
 }
 
 #ifdef CONFIG_ARPD
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 4edc3dac4cc..9efaf35934f 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -60,7 +60,6 @@ struct rtnl_link
 };
 
 static DEFINE_MUTEX(rtnl_mutex);
-static struct sock *rtnl;
 
 void rtnl_lock(void)
 {
@@ -458,8 +457,9 @@ size_t rtattr_strlcpy(char *dest, const struct rtattr *rta, size_t size)
 	return ret;
 }
 
-int rtnetlink_send(struct sk_buff *skb, u32 pid, unsigned group, int echo)
+int rtnetlink_send(struct sk_buff *skb, struct net *net, u32 pid, unsigned group, int echo)
 {
+	struct sock *rtnl = net->rtnl;
 	int err = 0;
 
 	NETLINK_CB(skb).dst_group = group;
@@ -471,14 +471,17 @@ int rtnetlink_send(struct sk_buff *skb, u32 pid, unsigned group, int echo)
 	return err;
 }
 
-int rtnl_unicast(struct sk_buff *skb, u32 pid)
+int rtnl_unicast(struct sk_buff *skb, struct net *net, u32 pid)
 {
+	struct sock *rtnl = net->rtnl;
+
 	return nlmsg_unicast(rtnl, skb, pid);
 }
 
-int rtnl_notify(struct sk_buff *skb, u32 pid, u32 group,
+int rtnl_notify(struct sk_buff *skb, struct net *net, u32 pid, u32 group,
 		struct nlmsghdr *nlh, gfp_t flags)
 {
+	struct sock *rtnl = net->rtnl;
 	int report = 0;
 
 	if (nlh)
@@ -487,8 +490,10 @@ int rtnl_notify(struct sk_buff *skb, u32 pid, u32 group,
 	return nlmsg_notify(rtnl, skb, pid, group, report, flags);
 }
 
-void rtnl_set_sk_err(u32 group, int error)
+void rtnl_set_sk_err(struct net *net, u32 group, int error)
 {
+	struct sock *rtnl = net->rtnl;
+
 	netlink_set_err(rtnl, 0, group, error);
 }
 
@@ -1201,7 +1206,7 @@ static int rtnl_getlink(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
 		kfree_skb(nskb);
 		goto errout;
 	}
-	err = rtnl_unicast(nskb, NETLINK_CB(skb).pid);
+	err = rtnl_unicast(nskb, net, NETLINK_CB(skb).pid);
 errout:
 	dev_put(dev);
 
@@ -1252,10 +1257,10 @@ void rtmsg_ifinfo(int type, struct net_device *dev, unsigned change)
 		kfree_skb(skb);
 		goto errout;
 	}
-	err = rtnl_notify(skb, 0, RTNLGRP_LINK, NULL, GFP_KERNEL);
+	err = rtnl_notify(skb, &init_net, 0, RTNLGRP_LINK, NULL, GFP_KERNEL);
 errout:
 	if (err < 0)
-		rtnl_set_sk_err(RTNLGRP_LINK, err);
+		rtnl_set_sk_err(&init_net, RTNLGRP_LINK, err);
 }
 
 /* Protected by RTNL sempahore.  */
@@ -1266,6 +1271,7 @@ static int rtattr_max;
 
 static int rtnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
 {
+	struct net *net = skb->sk->sk_net;
 	rtnl_doit_func doit;
 	int sz_idx, kind;
 	int min_len;
@@ -1294,6 +1300,7 @@ static int rtnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
 		return -EPERM;
 
 	if (kind == 2 && nlh->nlmsg_flags&NLM_F_DUMP) {
+		struct sock *rtnl;
 		rtnl_dumpit_func dumpit;
 
 		dumpit = rtnl_get_dumpit(family, type);
@@ -1301,6 +1308,7 @@ static int rtnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
 			return -EOPNOTSUPP;
 
 		__rtnl_unlock();
+		rtnl = net->rtnl;
 		err = netlink_dump_start(rtnl, skb, nlh, dumpit, NULL);
 		rtnl_lock();
 		return err;
@@ -1373,6 +1381,40 @@ static struct notifier_block rtnetlink_dev_notifier = {
 	.notifier_call	= rtnetlink_event,
 };
 
+
+static int rtnetlink_net_init(struct net *net)
+{
+	struct sock *sk;
+	sk = netlink_kernel_create(net, NETLINK_ROUTE, RTNLGRP_MAX,
+				   rtnetlink_rcv, &rtnl_mutex, THIS_MODULE);
+	if (!sk)
+		return -ENOMEM;
+
+	/* Don't hold an extra reference on the namespace */
+	put_net(sk->sk_net);
+	net->rtnl = sk;
+	return 0;
+}
+
+static void rtnetlink_net_exit(struct net *net)
+{
+	struct sock *sk = net->rtnl;
+	if (sk) {
+		/* At the last minute lie and say this is a socket for the
+		 * initial network namespace.  So the socket will be safe to
+		 * free.
+		 */
+		sk->sk_net = get_net(&init_net);
+		sock_put(sk);
+		net->rtnl = NULL;
+	}
+}
+
+static struct pernet_operations rtnetlink_net_ops = {
+	.init = rtnetlink_net_init,
+	.exit = rtnetlink_net_exit,
+};
+
 void __init rtnetlink_init(void)
 {
 	int i;
@@ -1385,10 +1427,9 @@ void __init rtnetlink_init(void)
 	if (!rta_buf)
 		panic("rtnetlink_init: cannot allocate rta_buf\n");
 
-	rtnl = netlink_kernel_create(&init_net, NETLINK_ROUTE, RTNLGRP_MAX,
-				     rtnetlink_rcv, &rtnl_mutex, THIS_MODULE);
-	if (rtnl == NULL)
+	if (register_pernet_subsys(&rtnetlink_net_ops))
 		panic("rtnetlink_init: cannot initialize rtnetlink\n");
+
 	netlink_set_nonroot(NETLINK_ROUTE, NL_NONROOT_RECV);
 	register_netdevice_notifier(&rtnetlink_dev_notifier);
 
diff --git a/net/decnet/dn_dev.c b/net/decnet/dn_dev.c
index 94256845a05..39c89c68204 100644
--- a/net/decnet/dn_dev.c
+++ b/net/decnet/dn_dev.c
@@ -793,10 +793,10 @@ static void dn_ifaddr_notify(int event, struct dn_ifaddr *ifa)
 		kfree_skb(skb);
 		goto errout;
 	}
-	err = rtnl_notify(skb, 0, RTNLGRP_DECnet_IFADDR, NULL, GFP_KERNEL);
+	err = rtnl_notify(skb, &init_net, 0, RTNLGRP_DECnet_IFADDR, NULL, GFP_KERNEL);
 errout:
 	if (err < 0)
-		rtnl_set_sk_err(RTNLGRP_DECnet_IFADDR, err);
+		rtnl_set_sk_err(&init_net, RTNLGRP_DECnet_IFADDR, err);
 }
 
 static int dn_nl_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb)
diff --git a/net/decnet/dn_route.c b/net/decnet/dn_route.c
index 28aeba15cf1..5d742f1420d 100644
--- a/net/decnet/dn_route.c
+++ b/net/decnet/dn_route.c
@@ -1587,7 +1587,7 @@ static int dn_cache_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh, void
 		goto out_free;
 	}
 
-	return rtnl_unicast(skb, NETLINK_CB(in_skb).pid);
+	return rtnl_unicast(skb, &init_net, NETLINK_CB(in_skb).pid);
 
 out_free:
 	kfree_skb(skb);
diff --git a/net/decnet/dn_table.c b/net/decnet/dn_table.c
index a3bdb8dd1fb..e09d915dbd7 100644
--- a/net/decnet/dn_table.c
+++ b/net/decnet/dn_table.c
@@ -375,10 +375,10 @@ static void dn_rtmsg_fib(int event, struct dn_fib_node *f, int z, u32 tb_id,
 		kfree_skb(skb);
 		goto errout;
 	}
-	err = rtnl_notify(skb, pid, RTNLGRP_DECnet_ROUTE, nlh, GFP_KERNEL);
+	err = rtnl_notify(skb, &init_net, pid, RTNLGRP_DECnet_ROUTE, nlh, GFP_KERNEL);
 errout:
 	if (err < 0)
-		rtnl_set_sk_err(RTNLGRP_DECnet_ROUTE, err);
+		rtnl_set_sk_err(&init_net, RTNLGRP_DECnet_ROUTE, err);
 }
 
 static __inline__ int dn_hash_dump_bucket(struct sk_buff *skb,
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index c0eb26a0d0b..6e75c884e1a 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -1240,10 +1240,10 @@ static void rtmsg_ifa(int event, struct in_ifaddr* ifa, struct nlmsghdr *nlh,
 		kfree_skb(skb);
 		goto errout;
 	}
-	err = rtnl_notify(skb, pid, RTNLGRP_IPV4_IFADDR, nlh, GFP_KERNEL);
+	err = rtnl_notify(skb, &init_net, pid, RTNLGRP_IPV4_IFADDR, nlh, GFP_KERNEL);
 errout:
 	if (err < 0)
-		rtnl_set_sk_err(RTNLGRP_IPV4_IFADDR, err);
+		rtnl_set_sk_err(&init_net, RTNLGRP_IPV4_IFADDR, err);
 }
 
 #ifdef CONFIG_SYSCTL
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index 1351a2617dc..33ec96001d9 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -320,11 +320,11 @@ void rtmsg_fib(int event, __be32 key, struct fib_alias *fa,
 		kfree_skb(skb);
 		goto errout;
 	}
-	err = rtnl_notify(skb, info->pid, RTNLGRP_IPV4_ROUTE,
+	err = rtnl_notify(skb, &init_net, info->pid, RTNLGRP_IPV4_ROUTE,
 			  info->nlh, GFP_KERNEL);
 errout:
 	if (err < 0)
-		rtnl_set_sk_err(RTNLGRP_IPV4_ROUTE, err);
+		rtnl_set_sk_err(&init_net, RTNLGRP_IPV4_ROUTE, err);
 }
 
 /* Return the first fib alias matching TOS with
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index 8e5d47a6060..11879283ad5 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -321,7 +321,7 @@ static void ipmr_destroy_unres(struct mfc_cache *c)
 			e->error = -ETIMEDOUT;
 			memset(&e->msg, 0, sizeof(e->msg));
 
-			rtnl_unicast(skb, NETLINK_CB(skb).pid);
+			rtnl_unicast(skb, &init_net, NETLINK_CB(skb).pid);
 		} else
 			kfree_skb(skb);
 	}
@@ -533,7 +533,7 @@ static void ipmr_cache_resolve(struct mfc_cache *uc, struct mfc_cache *c)
 				memset(&e->msg, 0, sizeof(e->msg));
 			}
 
-			rtnl_unicast(skb, NETLINK_CB(skb).pid);
+			rtnl_unicast(skb, &init_net, NETLINK_CB(skb).pid);
 		} else
 			ip_mr_forward(skb, c, 0);
 	}
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 1d2839571d2..e4b6fb4b1f4 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -2610,7 +2610,7 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void
 	if (err <= 0)
 		goto errout_free;
 
-	err = rtnl_unicast(skb, NETLINK_CB(in_skb).pid);
+	err = rtnl_unicast(skb, &init_net, NETLINK_CB(in_skb).pid);
 errout:
 	return err;
 
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 26de8ee5095..6c8b193474b 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -3397,7 +3397,7 @@ static int inet6_rtm_getaddr(struct sk_buff *in_skb, struct nlmsghdr* nlh,
 		kfree_skb(skb);
 		goto errout_ifa;
 	}
-	err = rtnl_unicast(skb, NETLINK_CB(in_skb).pid);
+	err = rtnl_unicast(skb, &init_net, NETLINK_CB(in_skb).pid);
 errout_ifa:
 	in6_ifa_put(ifa);
 errout:
@@ -3420,10 +3420,10 @@ static void inet6_ifa_notify(int event, struct inet6_ifaddr *ifa)
 		kfree_skb(skb);
 		goto errout;
 	}
-	err = rtnl_notify(skb, 0, RTNLGRP_IPV6_IFADDR, NULL, GFP_ATOMIC);
+	err = rtnl_notify(skb, &init_net, 0, RTNLGRP_IPV6_IFADDR, NULL, GFP_ATOMIC);
 errout:
 	if (err < 0)
-		rtnl_set_sk_err(RTNLGRP_IPV6_IFADDR, err);
+		rtnl_set_sk_err(&init_net, RTNLGRP_IPV6_IFADDR, err);
 }
 
 static inline void ipv6_store_devconf(struct ipv6_devconf *cnf,
@@ -3628,10 +3628,10 @@ void inet6_ifinfo_notify(int event, struct inet6_dev *idev)
 		kfree_skb(skb);
 		goto errout;
 	}
-	err = rtnl_notify(skb, 0, RTNLGRP_IPV6_IFADDR, NULL, GFP_ATOMIC);
+	err = rtnl_notify(skb, &init_net, 0, RTNLGRP_IPV6_IFADDR, NULL, GFP_ATOMIC);
 errout:
 	if (err < 0)
-		rtnl_set_sk_err(RTNLGRP_IPV6_IFADDR, err);
+		rtnl_set_sk_err(&init_net, RTNLGRP_IPV6_IFADDR, err);
 }
 
 static inline size_t inet6_prefix_nlmsg_size(void)
@@ -3697,10 +3697,10 @@ static void inet6_prefix_notify(int event, struct inet6_dev *idev,
 		kfree_skb(skb);
 		goto errout;
 	}
-	err = rtnl_notify(skb, 0, RTNLGRP_IPV6_PREFIX, NULL, GFP_ATOMIC);
+	err = rtnl_notify(skb, &init_net, 0, RTNLGRP_IPV6_PREFIX, NULL, GFP_ATOMIC);
 errout:
 	if (err < 0)
-		rtnl_set_sk_err(RTNLGRP_IPV6_PREFIX, err);
+		rtnl_set_sk_err(&init_net, RTNLGRP_IPV6_PREFIX, err);
 }
 
 static void __ipv6_ifa_notify(int event, struct inet6_ifaddr *ifp)
diff --git a/net/ipv6/addrlabel.c b/net/ipv6/addrlabel.c
index b9b5d570714..6f1ca607edd 100644
--- a/net/ipv6/addrlabel.c
+++ b/net/ipv6/addrlabel.c
@@ -549,7 +549,7 @@ static int ip6addrlbl_get(struct sk_buff *in_skb, struct nlmsghdr* nlh,
 		goto out;
 	}
 
-	err = rtnl_unicast(skb, NETLINK_CB(in_skb).pid);
+	err = rtnl_unicast(skb, &init_net, NETLINK_CB(in_skb).pid);
 out:
 	return err;
 }
diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c
index b2531f80317..b87f9d245e2 100644
--- a/net/ipv6/ndisc.c
+++ b/net/ipv6/ndisc.c
@@ -1049,7 +1049,8 @@ static void ndisc_ra_useropt(struct sk_buff *ra, struct nd_opt_hdr *opt)
 		&ipv6_hdr(ra)->saddr);
 	nlmsg_end(skb, nlh);
 
-	err = rtnl_notify(skb, 0, RTNLGRP_ND_USEROPT, NULL, GFP_ATOMIC);
+	err = rtnl_notify(skb, &init_net, 0, RTNLGRP_ND_USEROPT, NULL,
+			  GFP_ATOMIC);
 	if (err < 0)
 		goto errout;
 
@@ -1059,7 +1060,7 @@ nla_put_failure:
 	nlmsg_free(skb);
 	err = -EMSGSIZE;
 errout:
-	rtnl_set_sk_err(RTNLGRP_ND_USEROPT, err);
+	rtnl_set_sk_err(&init_net, RTNLGRP_ND_USEROPT, err);
 }
 
 static void ndisc_router_discovery(struct sk_buff *skb)
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 5e1c5796761..d7ec4c9ffc4 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -2230,7 +2230,7 @@ static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void
 		goto errout;
 	}
 
-	err = rtnl_unicast(skb, NETLINK_CB(in_skb).pid);
+	err = rtnl_unicast(skb, &init_net, NETLINK_CB(in_skb).pid);
 errout:
 	return err;
 }
@@ -2260,10 +2260,10 @@ void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info)
 		kfree_skb(skb);
 		goto errout;
 	}
-	err = rtnl_notify(skb, pid, RTNLGRP_IPV6_ROUTE, nlh, gfp_any());
+	err = rtnl_notify(skb, &init_net, pid, RTNLGRP_IPV6_ROUTE, nlh, gfp_any());
 errout:
 	if (err < 0)
-		rtnl_set_sk_err(RTNLGRP_IPV6_ROUTE, err);
+		rtnl_set_sk_err(&init_net, RTNLGRP_IPV6_ROUTE, err);
 }
 
 /*
diff --git a/net/sched/act_api.c b/net/sched/act_api.c
index 852829139c6..81506474a4f 100644
--- a/net/sched/act_api.c
+++ b/net/sched/act_api.c
@@ -660,7 +660,7 @@ act_get_notify(u32 pid, struct nlmsghdr *n, struct tc_action *a, int event)
 		return -EINVAL;
 	}
 
-	return rtnl_unicast(skb, pid);
+	return rtnl_unicast(skb, &init_net, pid);
 }
 
 static struct tc_action *
@@ -781,7 +781,7 @@ static int tca_action_flush(struct rtattr *rta, struct nlmsghdr *n, u32 pid)
 	nlh->nlmsg_flags |= NLM_F_ROOT;
 	module_put(a->ops->owner);
 	kfree(a);
-	err = rtnetlink_send(skb, pid, RTNLGRP_TC, n->nlmsg_flags&NLM_F_ECHO);
+	err = rtnetlink_send(skb, &init_net, pid, RTNLGRP_TC, n->nlmsg_flags&NLM_F_ECHO);
 	if (err > 0)
 		return 0;
 
@@ -844,7 +844,7 @@ tca_action_gd(struct rtattr *rta, struct nlmsghdr *n, u32 pid, int event)
 
 		/* now do the delete */
 		tcf_action_destroy(head, 0);
-		ret = rtnetlink_send(skb, pid, RTNLGRP_TC,
+		ret = rtnetlink_send(skb, &init_net, pid, RTNLGRP_TC,
 				     n->nlmsg_flags&NLM_F_ECHO);
 		if (ret > 0)
 			return 0;
@@ -888,7 +888,7 @@ static int tcf_add_notify(struct tc_action *a, u32 pid, u32 seq, int event,
 	nlh->nlmsg_len = skb_tail_pointer(skb) - b;
 	NETLINK_CB(skb).dst_group = RTNLGRP_TC;
 
-	err = rtnetlink_send(skb, pid, RTNLGRP_TC, flags&NLM_F_ECHO);
+	err = rtnetlink_send(skb, &init_net, pid, RTNLGRP_TC, flags&NLM_F_ECHO);
 	if (err > 0)
 		err = 0;
 	return err;
diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
index fdab6a530bb..80dccac769d 100644
--- a/net/sched/cls_api.c
+++ b/net/sched/cls_api.c
@@ -361,7 +361,7 @@ static int tfilter_notify(struct sk_buff *oskb, struct nlmsghdr *n,
 		return -EINVAL;
 	}
 
-	return rtnetlink_send(skb, pid, RTNLGRP_TC, n->nlmsg_flags&NLM_F_ECHO);
+	return rtnetlink_send(skb, &init_net, pid, RTNLGRP_TC, n->nlmsg_flags&NLM_F_ECHO);
 }
 
 struct tcf_dump_args
diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index f30e3f7ad88..273c628be05 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -872,7 +872,7 @@ static int qdisc_notify(struct sk_buff *oskb, struct nlmsghdr *n,
 	}
 
 	if (skb->len)
-		return rtnetlink_send(skb, pid, RTNLGRP_TC, n->nlmsg_flags&NLM_F_ECHO);
+		return rtnetlink_send(skb, &init_net, pid, RTNLGRP_TC, n->nlmsg_flags&NLM_F_ECHO);
 
 err_out:
 	kfree_skb(skb);
@@ -1103,7 +1103,7 @@ static int tclass_notify(struct sk_buff *oskb, struct nlmsghdr *n,
 		return -EINVAL;
 	}
 
-	return rtnetlink_send(skb, pid, RTNLGRP_TC, n->nlmsg_flags&NLM_F_ECHO);
+	return rtnetlink_send(skb, &init_net, pid, RTNLGRP_TC, n->nlmsg_flags&NLM_F_ECHO);
 }
 
 struct qdisc_dump_args
diff --git a/net/wireless/wext.c b/net/wireless/wext.c
index 47e80cc2077..db03ed5ce05 100644
--- a/net/wireless/wext.c
+++ b/net/wireless/wext.c
@@ -1137,7 +1137,7 @@ static void wireless_nlevent_process(unsigned long data)
 	struct sk_buff *skb;
 
 	while ((skb = skb_dequeue(&wireless_nlevent_queue)))
-		rtnl_notify(skb, 0, RTNLGRP_LINK, NULL, GFP_ATOMIC);
+		rtnl_notify(skb, &init_net, 0, RTNLGRP_LINK, NULL, GFP_ATOMIC);
 }
 
 static DECLARE_TASKLET(wireless_nlevent_tasklet, wireless_nlevent_process, 0);
@@ -1189,6 +1189,9 @@ static void rtmsg_iwinfo(struct net_device *dev, char *event, int event_len)
 	struct sk_buff *skb;
 	int err;
 
+	if (dev->nd_net != &init_net)
+		return;
+
 	skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_ATOMIC);
 	if (!skb)
 		return;
-- 
cgit v1.2.3


From 4b3da706bbe4613d2fe4df8df4d965954ea98964 Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Mon, 19 Nov 2007 22:27:40 -0800
Subject: [NET]: Make the netlink methods in rtnetlink handle multiple network
 namespaces

After the previous prep work this just consists of removing checks
limiting the code to work in the initial network namespace, and
updating rtmsg_ifinfo so we can generate events for devices in
something other then the initial network namespace.

Referring to network other network devices like the IFLA_LINK
and IFLA_MASTER attributes do, gets interesting if those network
devices happen to be in other network namespaces.  Currently
ifindex numbers are allocated globally so I have taken the path
of least resistance and not still report the information even
though the devices they are talking about are invisible.

If applications start getting confused or when ifindex
numbers become local to the network namespace we may need
to do something different in the future.

Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
Signed-off-by: Denis V. Lunev <den@openz.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/rtnetlink.c | 27 +++------------------------
 1 file changed, 3 insertions(+), 24 deletions(-)

(limited to 'net')

diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 9efaf35934f..8c45d7e35ee 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -708,9 +708,6 @@ static int rtnl_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb)
 	int s_idx = cb->args[0];
 	struct net_device *dev;
 
-	if (net != &init_net)
-		return 0;
-
 	idx = 0;
 	for_each_netdev(net, dev) {
 		if (idx < s_idx)
@@ -913,9 +910,6 @@ static int rtnl_setlink(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
 	struct nlattr *tb[IFLA_MAX+1];
 	char ifname[IFNAMSIZ];
 
-	if (net != &init_net)
-		return -EINVAL;
-
 	err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFLA_MAX, ifla_policy);
 	if (err < 0)
 		goto errout;
@@ -964,9 +958,6 @@ static int rtnl_dellink(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
 	struct nlattr *tb[IFLA_MAX+1];
 	int err;
 
-	if (net != &init_net)
-		return -EINVAL;
-
 	err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFLA_MAX, ifla_policy);
 	if (err < 0)
 		return err;
@@ -1048,9 +1039,6 @@ static int rtnl_newlink(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
 	struct nlattr *linkinfo[IFLA_INFO_MAX+1];
 	int err;
 
-	if (net != &init_net)
-		return -EINVAL;
-
 #ifdef CONFIG_KMOD
 replay:
 #endif
@@ -1177,9 +1165,6 @@ static int rtnl_getlink(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
 	struct sk_buff *nskb;
 	int err;
 
-	if (net != &init_net)
-		return -EINVAL;
-
 	err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFLA_MAX, ifla_policy);
 	if (err < 0)
 		return err;
@@ -1215,13 +1200,9 @@ errout:
 
 static int rtnl_dump_all(struct sk_buff *skb, struct netlink_callback *cb)
 {
-	struct net *net = skb->sk->sk_net;
 	int idx;
 	int s_idx = cb->family;
 
-	if (net != &init_net)
-		return 0;
-
 	if (s_idx == 0)
 		s_idx = 1;
 	for (idx=1; idx<NPROTO; idx++) {
@@ -1243,6 +1224,7 @@ static int rtnl_dump_all(struct sk_buff *skb, struct netlink_callback *cb)
 
 void rtmsg_ifinfo(int type, struct net_device *dev, unsigned change)
 {
+	struct net *net = dev->nd_net;
 	struct sk_buff *skb;
 	int err = -ENOBUFS;
 
@@ -1257,10 +1239,10 @@ void rtmsg_ifinfo(int type, struct net_device *dev, unsigned change)
 		kfree_skb(skb);
 		goto errout;
 	}
-	err = rtnl_notify(skb, &init_net, 0, RTNLGRP_LINK, NULL, GFP_KERNEL);
+	err = rtnl_notify(skb, net, 0, RTNLGRP_LINK, NULL, GFP_KERNEL);
 errout:
 	if (err < 0)
-		rtnl_set_sk_err(&init_net, RTNLGRP_LINK, err);
+		rtnl_set_sk_err(net, RTNLGRP_LINK, err);
 }
 
 /* Protected by RTNL sempahore.  */
@@ -1353,9 +1335,6 @@ static int rtnetlink_event(struct notifier_block *this, unsigned long event, voi
 {
 	struct net_device *dev = ptr;
 
-	if (dev->nd_net != &init_net)
-		return NOTIFY_DONE;
-
 	switch (event) {
 	case NETDEV_UNREGISTER:
 		rtmsg_ifinfo(RTM_DELLINK, dev, ~0U);
-- 
cgit v1.2.3


From d12d01d6b4d197115c4d7736172b5b1ec8b1eb71 Mon Sep 17 00:00:00 2001
From: "Denis V. Lunev" <den@openvz.org>
Date: Mon, 19 Nov 2007 22:28:35 -0800
Subject: [NET]: Make AF_PACKET handle multiple network namespaces

This is done by making packet_sklist_lock and packet_sklist per
network namespace and adding an additional filter condition on
received packets to ensure they came from the proper network
namespace.

Changes from v1:
- prohibit to call inet_dgram_ops.ioctl in other than init_net

Signed-off-by: Denis V. Lunev <den@openvz.org>
Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/packet/af_packet.c | 131 ++++++++++++++++++++++++++++++++-----------------
 1 file changed, 85 insertions(+), 46 deletions(-)

(limited to 'net')

diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index 8a7807dbba0..45e3cbcb276 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -135,10 +135,6 @@ dev->hard_header == NULL (ll header is added by device, we cannot control it)
    packet classifier depends on it.
  */
 
-/* List of all packet sockets. */
-static HLIST_HEAD(packet_sklist);
-static DEFINE_RWLOCK(packet_sklist_lock);
-
 /* Private packet socket structures. */
 
 struct packet_mclist
@@ -246,9 +242,6 @@ static int packet_rcv_spkt(struct sk_buff *skb, struct net_device *dev,  struct
 	struct sock *sk;
 	struct sockaddr_pkt *spkt;
 
-	if (dev->nd_net != &init_net)
-		goto out;
-
 	/*
 	 *	When we registered the protocol we saved the socket in the data
 	 *	field for just this event.
@@ -270,6 +263,9 @@ static int packet_rcv_spkt(struct sk_buff *skb, struct net_device *dev,  struct
 	if (skb->pkt_type == PACKET_LOOPBACK)
 		goto out;
 
+	if (dev->nd_net != sk->sk_net)
+		goto out;
+
 	if ((skb = skb_share_check(skb, GFP_ATOMIC)) == NULL)
 		goto oom;
 
@@ -341,7 +337,7 @@ static int packet_sendmsg_spkt(struct kiocb *iocb, struct socket *sock,
 	 */
 
 	saddr->spkt_device[13] = 0;
-	dev = dev_get_by_name(&init_net, saddr->spkt_device);
+	dev = dev_get_by_name(sk->sk_net, saddr->spkt_device);
 	err = -ENODEV;
 	if (dev == NULL)
 		goto out_unlock;
@@ -449,15 +445,15 @@ static int packet_rcv(struct sk_buff *skb, struct net_device *dev, struct packet
 	int skb_len = skb->len;
 	unsigned int snaplen, res;
 
-	if (dev->nd_net != &init_net)
-		goto drop;
-
 	if (skb->pkt_type == PACKET_LOOPBACK)
 		goto drop;
 
 	sk = pt->af_packet_priv;
 	po = pkt_sk(sk);
 
+	if (dev->nd_net != sk->sk_net)
+		goto drop;
+
 	skb->dev = dev;
 
 	if (dev->header_ops) {
@@ -566,15 +562,15 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev, struct packe
 	struct sk_buff *copy_skb = NULL;
 	struct timeval tv;
 
-	if (dev->nd_net != &init_net)
-		goto drop;
-
 	if (skb->pkt_type == PACKET_LOOPBACK)
 		goto drop;
 
 	sk = pt->af_packet_priv;
 	po = pkt_sk(sk);
 
+	if (dev->nd_net != sk->sk_net)
+		goto drop;
+
 	if (dev->header_ops) {
 		if (sk->sk_type != SOCK_DGRAM)
 			skb_push(skb, skb->data - skb_mac_header(skb));
@@ -732,7 +728,7 @@ static int packet_sendmsg(struct kiocb *iocb, struct socket *sock,
 	}
 
 
-	dev = dev_get_by_index(&init_net, ifindex);
+	dev = dev_get_by_index(sk->sk_net, ifindex);
 	err = -ENXIO;
 	if (dev == NULL)
 		goto out_unlock;
@@ -799,15 +795,17 @@ static int packet_release(struct socket *sock)
 {
 	struct sock *sk = sock->sk;
 	struct packet_sock *po;
+	struct net *net;
 
 	if (!sk)
 		return 0;
 
+	net = sk->sk_net;
 	po = pkt_sk(sk);
 
-	write_lock_bh(&packet_sklist_lock);
+	write_lock_bh(&net->packet_sklist_lock);
 	sk_del_node_init(sk);
-	write_unlock_bh(&packet_sklist_lock);
+	write_unlock_bh(&net->packet_sklist_lock);
 
 	/*
 	 *	Unhook packet receive handler.
@@ -916,7 +914,7 @@ static int packet_bind_spkt(struct socket *sock, struct sockaddr *uaddr, int add
 		return -EINVAL;
 	strlcpy(name,uaddr->sa_data,sizeof(name));
 
-	dev = dev_get_by_name(&init_net, name);
+	dev = dev_get_by_name(sk->sk_net, name);
 	if (dev) {
 		err = packet_do_bind(sk, dev, pkt_sk(sk)->num);
 		dev_put(dev);
@@ -943,7 +941,7 @@ static int packet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len
 
 	if (sll->sll_ifindex) {
 		err = -ENODEV;
-		dev = dev_get_by_index(&init_net, sll->sll_ifindex);
+		dev = dev_get_by_index(sk->sk_net, sll->sll_ifindex);
 		if (dev == NULL)
 			goto out;
 	}
@@ -972,9 +970,6 @@ static int packet_create(struct net *net, struct socket *sock, int protocol)
 	__be16 proto = (__force __be16)protocol; /* weird, but documented */
 	int err;
 
-	if (net != &init_net)
-		return -EAFNOSUPPORT;
-
 	if (!capable(CAP_NET_RAW))
 		return -EPERM;
 	if (sock->type != SOCK_DGRAM && sock->type != SOCK_RAW &&
@@ -1020,9 +1015,9 @@ static int packet_create(struct net *net, struct socket *sock, int protocol)
 		po->running = 1;
 	}
 
-	write_lock_bh(&packet_sklist_lock);
-	sk_add_node(sk, &packet_sklist);
-	write_unlock_bh(&packet_sklist_lock);
+	write_lock_bh(&net->packet_sklist_lock);
+	sk_add_node(sk, &net->packet_sklist);
+	write_unlock_bh(&net->packet_sklist_lock);
 	return(0);
 out:
 	return err;
@@ -1140,7 +1135,7 @@ static int packet_getname_spkt(struct socket *sock, struct sockaddr *uaddr,
 		return -EOPNOTSUPP;
 
 	uaddr->sa_family = AF_PACKET;
-	dev = dev_get_by_index(&init_net, pkt_sk(sk)->ifindex);
+	dev = dev_get_by_index(sk->sk_net, pkt_sk(sk)->ifindex);
 	if (dev) {
 		strlcpy(uaddr->sa_data, dev->name, 15);
 		dev_put(dev);
@@ -1165,7 +1160,7 @@ static int packet_getname(struct socket *sock, struct sockaddr *uaddr,
 	sll->sll_family = AF_PACKET;
 	sll->sll_ifindex = po->ifindex;
 	sll->sll_protocol = po->num;
-	dev = dev_get_by_index(&init_net, po->ifindex);
+	dev = dev_get_by_index(sk->sk_net, po->ifindex);
 	if (dev) {
 		sll->sll_hatype = dev->type;
 		sll->sll_halen = dev->addr_len;
@@ -1217,7 +1212,7 @@ static int packet_mc_add(struct sock *sk, struct packet_mreq_max *mreq)
 	rtnl_lock();
 
 	err = -ENODEV;
-	dev = __dev_get_by_index(&init_net, mreq->mr_ifindex);
+	dev = __dev_get_by_index(sk->sk_net, mreq->mr_ifindex);
 	if (!dev)
 		goto done;
 
@@ -1271,7 +1266,7 @@ static int packet_mc_drop(struct sock *sk, struct packet_mreq_max *mreq)
 			if (--ml->count == 0) {
 				struct net_device *dev;
 				*mlp = ml->next;
-				dev = dev_get_by_index(&init_net, ml->ifindex);
+				dev = dev_get_by_index(sk->sk_net, ml->ifindex);
 				if (dev) {
 					packet_dev_mc(dev, ml, -1);
 					dev_put(dev);
@@ -1299,7 +1294,7 @@ static void packet_flush_mclist(struct sock *sk)
 		struct net_device *dev;
 
 		po->mclist = ml->next;
-		if ((dev = dev_get_by_index(&init_net, ml->ifindex)) != NULL) {
+		if ((dev = dev_get_by_index(sk->sk_net, ml->ifindex)) != NULL) {
 			packet_dev_mc(dev, ml, -1);
 			dev_put(dev);
 		}
@@ -1455,12 +1450,10 @@ static int packet_notifier(struct notifier_block *this, unsigned long msg, void
 	struct sock *sk;
 	struct hlist_node *node;
 	struct net_device *dev = data;
+	struct net *net = dev->nd_net;
 
-	if (dev->nd_net != &init_net)
-		return NOTIFY_DONE;
-
-	read_lock(&packet_sklist_lock);
-	sk_for_each(sk, node, &packet_sklist) {
+	read_lock(&net->packet_sklist_lock);
+	sk_for_each(sk, node, &net->packet_sklist) {
 		struct packet_sock *po = pkt_sk(sk);
 
 		switch (msg) {
@@ -1499,7 +1492,7 @@ static int packet_notifier(struct notifier_block *this, unsigned long msg, void
 			break;
 		}
 	}
-	read_unlock(&packet_sklist_lock);
+	read_unlock(&net->packet_sklist_lock);
 	return NOTIFY_DONE;
 }
 
@@ -1547,6 +1540,8 @@ static int packet_ioctl(struct socket *sock, unsigned int cmd,
 		case SIOCGIFDSTADDR:
 		case SIOCSIFDSTADDR:
 		case SIOCSIFFLAGS:
+			if (sk->sk_net != &init_net)
+				return -ENOIOCTLCMD;
 			return inet_dgram_ops.ioctl(sock, cmd, arg);
 #endif
 
@@ -1862,12 +1857,12 @@ static struct notifier_block packet_netdev_notifier = {
 };
 
 #ifdef CONFIG_PROC_FS
-static inline struct sock *packet_seq_idx(loff_t off)
+static inline struct sock *packet_seq_idx(struct net *net, loff_t off)
 {
 	struct sock *s;
 	struct hlist_node *node;
 
-	sk_for_each(s, node, &packet_sklist) {
+	sk_for_each(s, node, &net->packet_sklist) {
 		if (!off--)
 			return s;
 	}
@@ -1876,21 +1871,24 @@ static inline struct sock *packet_seq_idx(loff_t off)
 
 static void *packet_seq_start(struct seq_file *seq, loff_t *pos)
 {
-	read_lock(&packet_sklist_lock);
-	return *pos ? packet_seq_idx(*pos - 1) : SEQ_START_TOKEN;
+	struct net *net = seq->private;
+	read_lock(&net->packet_sklist_lock);
+	return *pos ? packet_seq_idx(net, *pos - 1) : SEQ_START_TOKEN;
 }
 
 static void *packet_seq_next(struct seq_file *seq, void *v, loff_t *pos)
 {
+	struct net *net = seq->private;
 	++*pos;
 	return  (v == SEQ_START_TOKEN)
-		? sk_head(&packet_sklist)
+		? sk_head(&net->packet_sklist)
 		: sk_next((struct sock*)v) ;
 }
 
 static void packet_seq_stop(struct seq_file *seq, void *v)
 {
-	read_unlock(&packet_sklist_lock);
+	struct net *net = seq->private;
+	read_unlock(&net->packet_sklist_lock);
 }
 
 static int packet_seq_show(struct seq_file *seq, void *v)
@@ -1926,7 +1924,26 @@ static const struct seq_operations packet_seq_ops = {
 
 static int packet_seq_open(struct inode *inode, struct file *file)
 {
-	return seq_open(file, &packet_seq_ops);
+	struct seq_file *seq;
+	int res;
+	res = seq_open(file, &packet_seq_ops);
+	if (!res) {
+		seq = file->private_data;
+		seq->private = get_proc_net(inode);
+		if (!seq->private) {
+			seq_release(inode, file);
+			res = -ENXIO;
+		}
+	}
+	return res;
+}
+
+static int packet_seq_release(struct inode *inode, struct file *file)
+{
+	struct seq_file *seq=  file->private_data;
+	struct net *net = seq->private;
+	put_net(net);
+	return seq_release(inode, file);
 }
 
 static const struct file_operations packet_seq_fops = {
@@ -1934,15 +1951,37 @@ static const struct file_operations packet_seq_fops = {
 	.open		= packet_seq_open,
 	.read		= seq_read,
 	.llseek		= seq_lseek,
-	.release	= seq_release,
+	.release	= packet_seq_release,
 };
 
 #endif
 
+static int packet_net_init(struct net *net)
+{
+	rwlock_init(&net->packet_sklist_lock);
+	INIT_HLIST_HEAD(&net->packet_sklist);
+
+	if (!proc_net_fops_create(net, "packet", 0, &packet_seq_fops))
+		return -ENOMEM;
+
+	return 0;
+}
+
+static void packet_net_exit(struct net *net)
+{
+	proc_net_remove(net, "packet");
+}
+
+static struct pernet_operations packet_net_ops = {
+	.init = packet_net_init,
+	.exit = packet_net_exit,
+};
+
+
 static void __exit packet_exit(void)
 {
-	proc_net_remove(&init_net, "packet");
 	unregister_netdevice_notifier(&packet_netdev_notifier);
+	unregister_pernet_subsys(&packet_net_ops);
 	sock_unregister(PF_PACKET);
 	proto_unregister(&packet_proto);
 }
@@ -1955,8 +1994,8 @@ static int __init packet_init(void)
 		goto out;
 
 	sock_register(&packet_family_ops);
+	register_pernet_subsys(&packet_net_ops);
 	register_netdevice_notifier(&packet_netdev_notifier);
-	proc_net_fops_create(&init_net, "packet", 0, &packet_seq_fops);
 out:
 	return rc;
 }
-- 
cgit v1.2.3


From 097e66c578459f79e3a2128c54e9df5194e1419a Mon Sep 17 00:00:00 2001
From: "Denis V. Lunev" <den@openvz.org>
Date: Mon, 19 Nov 2007 22:29:30 -0800
Subject: [NET]: Make AF_UNIX per network namespace safe [v2]

Because of the global nature of garbage collection, and because of the
cost of per namespace hash tables unix_socket_table has been kept
global.  With a filter added on lookups so we don't see sockets from
the wrong namespace.

Currently I don't fold the namesapce into the hash so multiple
namespaces using the same socket name will be guaranteed a hash
collision.

Changes from v1:
- fixed unix_seq_open

Signed-off-by: Denis V. Lunev <den@openvz.org>
Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/unix/af_unix.c | 118 +++++++++++++++++++++++++++++++++++++++++------------
 1 file changed, 92 insertions(+), 26 deletions(-)

(limited to 'net')

diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index 060bba4567d..ecad42b72ad 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -270,7 +270,8 @@ static inline void unix_insert_socket(struct hlist_head *list, struct sock *sk)
 	spin_unlock(&unix_table_lock);
 }
 
-static struct sock *__unix_find_socket_byname(struct sockaddr_un *sunname,
+static struct sock *__unix_find_socket_byname(struct net *net,
+					      struct sockaddr_un *sunname,
 					      int len, int type, unsigned hash)
 {
 	struct sock *s;
@@ -279,6 +280,9 @@ static struct sock *__unix_find_socket_byname(struct sockaddr_un *sunname,
 	sk_for_each(s, node, &unix_socket_table[hash ^ type]) {
 		struct unix_sock *u = unix_sk(s);
 
+		if (s->sk_net != net)
+			continue;
+
 		if (u->addr->len == len &&
 		    !memcmp(u->addr->name, sunname, len))
 			goto found;
@@ -288,21 +292,22 @@ found:
 	return s;
 }
 
-static inline struct sock *unix_find_socket_byname(struct sockaddr_un *sunname,
+static inline struct sock *unix_find_socket_byname(struct net *net,
+						   struct sockaddr_un *sunname,
 						   int len, int type,
 						   unsigned hash)
 {
 	struct sock *s;
 
 	spin_lock(&unix_table_lock);
-	s = __unix_find_socket_byname(sunname, len, type, hash);
+	s = __unix_find_socket_byname(net, sunname, len, type, hash);
 	if (s)
 		sock_hold(s);
 	spin_unlock(&unix_table_lock);
 	return s;
 }
 
-static struct sock *unix_find_socket_byinode(struct inode *i)
+static struct sock *unix_find_socket_byinode(struct net *net, struct inode *i)
 {
 	struct sock *s;
 	struct hlist_node *node;
@@ -312,6 +317,9 @@ static struct sock *unix_find_socket_byinode(struct inode *i)
 		    &unix_socket_table[i->i_ino & (UNIX_HASH_SIZE - 1)]) {
 		struct dentry *dentry = unix_sk(s)->dentry;
 
+		if (s->sk_net != net)
+			continue;
+
 		if(dentry && dentry->d_inode == i)
 		{
 			sock_hold(s);
@@ -631,9 +639,6 @@ out:
 
 static int unix_create(struct net *net, struct socket *sock, int protocol)
 {
-	if (net != &init_net)
-		return -EAFNOSUPPORT;
-
 	if (protocol && protocol != PF_UNIX)
 		return -EPROTONOSUPPORT;
 
@@ -677,6 +682,7 @@ static int unix_release(struct socket *sock)
 static int unix_autobind(struct socket *sock)
 {
 	struct sock *sk = sock->sk;
+	struct net *net = sk->sk_net;
 	struct unix_sock *u = unix_sk(sk);
 	static u32 ordernum = 1;
 	struct unix_address * addr;
@@ -703,7 +709,7 @@ retry:
 	spin_lock(&unix_table_lock);
 	ordernum = (ordernum+1)&0xFFFFF;
 
-	if (__unix_find_socket_byname(addr->name, addr->len, sock->type,
+	if (__unix_find_socket_byname(net, addr->name, addr->len, sock->type,
 				      addr->hash)) {
 		spin_unlock(&unix_table_lock);
 		/* Sanity yield. It is unusual case, but yet... */
@@ -723,7 +729,8 @@ out:	mutex_unlock(&u->readlock);
 	return err;
 }
 
-static struct sock *unix_find_other(struct sockaddr_un *sunname, int len,
+static struct sock *unix_find_other(struct net *net,
+				    struct sockaddr_un *sunname, int len,
 				    int type, unsigned hash, int *error)
 {
 	struct sock *u;
@@ -741,7 +748,7 @@ static struct sock *unix_find_other(struct sockaddr_un *sunname, int len,
 		err = -ECONNREFUSED;
 		if (!S_ISSOCK(nd.dentry->d_inode->i_mode))
 			goto put_fail;
-		u=unix_find_socket_byinode(nd.dentry->d_inode);
+		u=unix_find_socket_byinode(net, nd.dentry->d_inode);
 		if (!u)
 			goto put_fail;
 
@@ -757,7 +764,7 @@ static struct sock *unix_find_other(struct sockaddr_un *sunname, int len,
 		}
 	} else {
 		err = -ECONNREFUSED;
-		u=unix_find_socket_byname(sunname, len, type, hash);
+		u=unix_find_socket_byname(net, sunname, len, type, hash);
 		if (u) {
 			struct dentry *dentry;
 			dentry = unix_sk(u)->dentry;
@@ -779,6 +786,7 @@ fail:
 static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
 {
 	struct sock *sk = sock->sk;
+	struct net *net = sk->sk_net;
 	struct unix_sock *u = unix_sk(sk);
 	struct sockaddr_un *sunaddr=(struct sockaddr_un *)uaddr;
 	struct dentry * dentry = NULL;
@@ -853,7 +861,7 @@ static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
 
 	if (!sunaddr->sun_path[0]) {
 		err = -EADDRINUSE;
-		if (__unix_find_socket_byname(sunaddr, addr_len,
+		if (__unix_find_socket_byname(net, sunaddr, addr_len,
 					      sk->sk_type, hash)) {
 			unix_release_addr(addr);
 			goto out_unlock;
@@ -919,6 +927,7 @@ static int unix_dgram_connect(struct socket *sock, struct sockaddr *addr,
 			      int alen, int flags)
 {
 	struct sock *sk = sock->sk;
+	struct net *net = sk->sk_net;
 	struct sockaddr_un *sunaddr=(struct sockaddr_un*)addr;
 	struct sock *other;
 	unsigned hash;
@@ -935,7 +944,7 @@ static int unix_dgram_connect(struct socket *sock, struct sockaddr *addr,
 			goto out;
 
 restart:
-		other=unix_find_other(sunaddr, alen, sock->type, hash, &err);
+		other=unix_find_other(net, sunaddr, alen, sock->type, hash, &err);
 		if (!other)
 			goto out;
 
@@ -1015,6 +1024,7 @@ static int unix_stream_connect(struct socket *sock, struct sockaddr *uaddr,
 {
 	struct sockaddr_un *sunaddr=(struct sockaddr_un *)uaddr;
 	struct sock *sk = sock->sk;
+	struct net *net = sk->sk_net;
 	struct unix_sock *u = unix_sk(sk), *newu, *otheru;
 	struct sock *newsk = NULL;
 	struct sock *other = NULL;
@@ -1054,7 +1064,7 @@ static int unix_stream_connect(struct socket *sock, struct sockaddr *uaddr,
 
 restart:
 	/*  Find listening sock. */
-	other = unix_find_other(sunaddr, addr_len, sk->sk_type, hash, &err);
+	other = unix_find_other(net, sunaddr, addr_len, sk->sk_type, hash, &err);
 	if (!other)
 		goto out;
 
@@ -1330,6 +1340,7 @@ static int unix_dgram_sendmsg(struct kiocb *kiocb, struct socket *sock,
 {
 	struct sock_iocb *siocb = kiocb_to_siocb(kiocb);
 	struct sock *sk = sock->sk;
+	struct net *net = sk->sk_net;
 	struct unix_sock *u = unix_sk(sk);
 	struct sockaddr_un *sunaddr=msg->msg_name;
 	struct sock *other = NULL;
@@ -1393,7 +1404,7 @@ restart:
 		if (sunaddr == NULL)
 			goto out_free;
 
-		other = unix_find_other(sunaddr, namelen, sk->sk_type,
+		other = unix_find_other(net, sunaddr, namelen, sk->sk_type,
 					hash, &err);
 		if (other==NULL)
 			goto out_free;
@@ -2006,12 +2017,18 @@ static unsigned int unix_poll(struct file * file, struct socket *sock, poll_tabl
 
 
 #ifdef CONFIG_PROC_FS
-static struct sock *unix_seq_idx(int *iter, loff_t pos)
+struct unix_iter_state {
+	struct net *net;
+	int i;
+};
+static struct sock *unix_seq_idx(struct unix_iter_state *iter, loff_t pos)
 {
 	loff_t off = 0;
 	struct sock *s;
 
-	for (s = first_unix_socket(iter); s; s = next_unix_socket(iter, s)) {
+	for (s = first_unix_socket(&iter->i); s; s = next_unix_socket(&iter->i, s)) {
+		if (s->sk_net != iter->net)
+			continue;
 		if (off == pos)
 			return s;
 		++off;
@@ -2022,17 +2039,24 @@ static struct sock *unix_seq_idx(int *iter, loff_t pos)
 
 static void *unix_seq_start(struct seq_file *seq, loff_t *pos)
 {
+	struct unix_iter_state *iter = seq->private;
 	spin_lock(&unix_table_lock);
-	return *pos ? unix_seq_idx(seq->private, *pos - 1) : ((void *) 1);
+	return *pos ? unix_seq_idx(iter, *pos - 1) : ((void *) 1);
 }
 
 static void *unix_seq_next(struct seq_file *seq, void *v, loff_t *pos)
 {
+	struct unix_iter_state *iter = seq->private;
+	struct sock *sk = v;
 	++*pos;
 
 	if (v == (void *)1)
-		return first_unix_socket(seq->private);
-	return next_unix_socket(seq->private, v);
+		sk = first_unix_socket(&iter->i);
+	else
+		sk = next_unix_socket(&iter->i, sk);
+	while (sk && (sk->sk_net != iter->net))
+		sk = next_unix_socket(&iter->i, sk);
+	return sk;
 }
 
 static void unix_seq_stop(struct seq_file *seq, void *v)
@@ -2094,7 +2118,27 @@ static const struct seq_operations unix_seq_ops = {
 
 static int unix_seq_open(struct inode *inode, struct file *file)
 {
-	return seq_open_private(file, &unix_seq_ops, sizeof(int));
+	struct unix_iter_state *it;
+
+	it = __seq_open_private(file, &unix_seq_ops,
+				sizeof(struct unix_iter_state));
+	if (it == NULL)
+		return -ENOMEM;
+
+	it->net = get_proc_net(inode);
+	if (it->net == NULL) {
+		seq_release_private(inode, file);
+		return -ENXIO;
+	}
+	return 0;
+}
+
+static int unix_seq_release(struct inode *inode, struct file *file)
+{
+	struct seq_file *seq = file->private_data;
+	struct unix_iter_state *iter = seq->private;
+	put_net(iter->net);
+	return seq_release_private(inode, file);
 }
 
 static const struct file_operations unix_seq_fops = {
@@ -2102,7 +2146,7 @@ static const struct file_operations unix_seq_fops = {
 	.open		= unix_seq_open,
 	.read		= seq_read,
 	.llseek		= seq_lseek,
-	.release	= seq_release_private,
+	.release	= unix_seq_release,
 };
 
 #endif
@@ -2113,6 +2157,30 @@ static struct net_proto_family unix_family_ops = {
 	.owner	= THIS_MODULE,
 };
 
+
+static int unix_net_init(struct net *net)
+{
+	int error = -ENOMEM;
+
+#ifdef CONFIG_PROC_FS
+	if (!proc_net_fops_create(net, "unix", 0, &unix_seq_fops))
+		goto out;
+#endif
+	error = 0;
+out:
+	return 0;
+}
+
+static void unix_net_exit(struct net *net)
+{
+	proc_net_remove(net, "unix");
+}
+
+static struct pernet_operations unix_net_ops = {
+	.init = unix_net_init,
+	.exit = unix_net_exit,
+};
+
 static int __init af_unix_init(void)
 {
 	int rc = -1;
@@ -2128,9 +2196,7 @@ static int __init af_unix_init(void)
 	}
 
 	sock_register(&unix_family_ops);
-#ifdef CONFIG_PROC_FS
-	proc_net_fops_create(&init_net, "unix", 0, &unix_seq_fops);
-#endif
+	register_pernet_subsys(&unix_net_ops);
 	unix_sysctl_register();
 out:
 	return rc;
@@ -2140,8 +2206,8 @@ static void __exit af_unix_exit(void)
 {
 	sock_unregister(PF_UNIX);
 	unix_sysctl_unregister();
-	proc_net_remove(&init_net, "unix");
 	proto_unregister(&unix_proto);
+	unregister_pernet_subsys(&unix_net_ops);
 }
 
 module_init(af_unix_init);
-- 
cgit v1.2.3


From e372c41401993b45c721c4d92730e7e0a79f7c1b Mon Sep 17 00:00:00 2001
From: "Denis V. Lunev" <den@openvz.org>
Date: Mon, 19 Nov 2007 22:31:54 -0800
Subject: [NET]: Consolidate net namespace related proc files creation.

Signed-off-by: Denis V. Lunev <den@openvz.org>
Signed-off-by: Pavel Emelyanov <xemul@openvz.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/dev.c           | 28 +++++-----------------------
 net/core/dev_mcast.c     | 26 ++++----------------------
 net/netlink/af_netlink.c | 33 +++++++--------------------------
 net/packet/af_packet.c   | 26 ++++----------------------
 net/unix/af_unix.c       | 31 ++++++-------------------------
 net/wireless/wext.c      | 24 +++---------------------
 6 files changed, 29 insertions(+), 139 deletions(-)

(limited to 'net')

diff --git a/net/core/dev.c b/net/core/dev.c
index 0879f52115e..d0e23d8310f 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2364,7 +2364,7 @@ static int dev_ifconf(struct net *net, char __user *arg)
  */
 void *dev_seq_start(struct seq_file *seq, loff_t *pos)
 {
-	struct net *net = seq->private;
+	struct net *net = seq_file_net(seq);
 	loff_t off;
 	struct net_device *dev;
 
@@ -2382,7 +2382,7 @@ void *dev_seq_start(struct seq_file *seq, loff_t *pos)
 
 void *dev_seq_next(struct seq_file *seq, void *v, loff_t *pos)
 {
-	struct net *net = seq->private;
+	struct net *net = seq_file_net(seq);
 	++*pos;
 	return v == SEQ_START_TOKEN ?
 		first_net_device(net) : next_net_device((struct net_device *)v);
@@ -2481,26 +2481,8 @@ static const struct seq_operations dev_seq_ops = {
 
 static int dev_seq_open(struct inode *inode, struct file *file)
 {
-	struct seq_file *seq;
-	int res;
-	res =  seq_open(file, &dev_seq_ops);
-	if (!res) {
-		seq = file->private_data;
-		seq->private = get_proc_net(inode);
-		if (!seq->private) {
-			seq_release(inode, file);
-			res = -ENXIO;
-		}
-	}
-	return res;
-}
-
-static int dev_seq_release(struct inode *inode, struct file *file)
-{
-	struct seq_file *seq = file->private_data;
-	struct net *net = seq->private;
-	put_net(net);
-	return seq_release(inode, file);
+	return seq_open_net(inode, file, &dev_seq_ops,
+			    sizeof(struct seq_net_private));
 }
 
 static const struct file_operations dev_seq_fops = {
@@ -2508,7 +2490,7 @@ static const struct file_operations dev_seq_fops = {
 	.open    = dev_seq_open,
 	.read    = seq_read,
 	.llseek  = seq_lseek,
-	.release = dev_seq_release,
+	.release = seq_release_net,
 };
 
 static const struct seq_operations softnet_seq_ops = {
diff --git a/net/core/dev_mcast.c b/net/core/dev_mcast.c
index 69fff16ece1..63f0b33d7ce 100644
--- a/net/core/dev_mcast.c
+++ b/net/core/dev_mcast.c
@@ -187,7 +187,7 @@ EXPORT_SYMBOL(dev_mc_unsync);
 #ifdef CONFIG_PROC_FS
 static void *dev_mc_seq_start(struct seq_file *seq, loff_t *pos)
 {
-	struct net *net = seq->private;
+	struct net *net = seq_file_net(seq);
 	struct net_device *dev;
 	loff_t off = 0;
 
@@ -241,26 +241,8 @@ static const struct seq_operations dev_mc_seq_ops = {
 
 static int dev_mc_seq_open(struct inode *inode, struct file *file)
 {
-	struct seq_file *seq;
-	int res;
-	res = seq_open(file, &dev_mc_seq_ops);
-	if (!res) {
-		seq = file->private_data;
-		seq->private = get_proc_net(inode);
-		if (!seq->private) {
-			seq_release(inode, file);
-			res = -ENXIO;
-		}
-	}
-	return res;
-}
-
-static int dev_mc_seq_release(struct inode *inode, struct file *file)
-{
-	struct seq_file *seq = file->private_data;
-	struct net *net = seq->private;
-	put_net(net);
-	return seq_release(inode, file);
+	return seq_open_net(inode, file, &dev_mc_seq_ops,
+			    sizeof(struct seq_net_private));
 }
 
 static const struct file_operations dev_mc_seq_fops = {
@@ -268,7 +250,7 @@ static const struct file_operations dev_mc_seq_fops = {
 	.open    = dev_mc_seq_open,
 	.read    = seq_read,
 	.llseek  = seq_lseek,
-	.release = dev_mc_seq_release,
+	.release = seq_release_net,
 };
 
 #endif
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index de3988ba1f4..1518244ffad 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -1681,7 +1681,7 @@ int nlmsg_notify(struct sock *sk, struct sk_buff *skb, u32 pid,
 
 #ifdef CONFIG_PROC_FS
 struct nl_seq_iter {
-	struct net *net;
+	struct seq_net_private p;
 	int link;
 	int hash_idx;
 };
@@ -1699,7 +1699,7 @@ static struct sock *netlink_seq_socket_idx(struct seq_file *seq, loff_t pos)
 
 		for (j = 0; j <= hash->mask; j++) {
 			sk_for_each(s, node, &hash->table[j]) {
-				if (iter->net != s->sk_net)
+				if (iter->p.net != s->sk_net)
 					continue;
 				if (off == pos) {
 					iter->link = i;
@@ -1734,7 +1734,7 @@ static void *netlink_seq_next(struct seq_file *seq, void *v, loff_t *pos)
 	s = v;
 	do {
 		s = sk_next(s);
-	} while (s && (iter->net != s->sk_net));
+	} while (s && (iter->p.net != s->sk_net));
 	if (s)
 		return s;
 
@@ -1746,7 +1746,7 @@ static void *netlink_seq_next(struct seq_file *seq, void *v, loff_t *pos)
 
 		for (; j <= hash->mask; j++) {
 			s = sk_head(&hash->table[j]);
-			while (s && (iter->net != s->sk_net))
+			while (s && (iter->p.net != s->sk_net))
 				s = sk_next(s);
 			if (s) {
 				iter->link = i;
@@ -1802,27 +1802,8 @@ static const struct seq_operations netlink_seq_ops = {
 
 static int netlink_seq_open(struct inode *inode, struct file *file)
 {
-	struct nl_seq_iter *iter;
-
-	iter = __seq_open_private(file, &netlink_seq_ops, sizeof(*iter));
-	if (!iter)
-		return -ENOMEM;
-
-	iter->net = get_proc_net(inode);
-	if (!iter->net) {
-		seq_release_private(inode, file);
-		return -ENXIO;
-	}
-
-	return 0;
-}
-
-static int netlink_seq_release(struct inode *inode, struct file *file)
-{
-	struct seq_file *seq = file->private_data;
-	struct nl_seq_iter *iter = seq->private;
-	put_net(iter->net);
-	return seq_release_private(inode, file);
+	return seq_open_net(inode, file, &netlink_seq_ops,
+				sizeof(struct nl_seq_iter));
 }
 
 static const struct file_operations netlink_seq_fops = {
@@ -1830,7 +1811,7 @@ static const struct file_operations netlink_seq_fops = {
 	.open		= netlink_seq_open,
 	.read		= seq_read,
 	.llseek		= seq_lseek,
-	.release	= netlink_seq_release,
+	.release	= seq_release_net,
 };
 
 #endif
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index 45e3cbcb276..ace29f1c4c5 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -1871,7 +1871,7 @@ static inline struct sock *packet_seq_idx(struct net *net, loff_t off)
 
 static void *packet_seq_start(struct seq_file *seq, loff_t *pos)
 {
-	struct net *net = seq->private;
+	struct net *net = seq_file_net(seq);
 	read_lock(&net->packet_sklist_lock);
 	return *pos ? packet_seq_idx(net, *pos - 1) : SEQ_START_TOKEN;
 }
@@ -1924,26 +1924,8 @@ static const struct seq_operations packet_seq_ops = {
 
 static int packet_seq_open(struct inode *inode, struct file *file)
 {
-	struct seq_file *seq;
-	int res;
-	res = seq_open(file, &packet_seq_ops);
-	if (!res) {
-		seq = file->private_data;
-		seq->private = get_proc_net(inode);
-		if (!seq->private) {
-			seq_release(inode, file);
-			res = -ENXIO;
-		}
-	}
-	return res;
-}
-
-static int packet_seq_release(struct inode *inode, struct file *file)
-{
-	struct seq_file *seq=  file->private_data;
-	struct net *net = seq->private;
-	put_net(net);
-	return seq_release(inode, file);
+	return seq_open_net(inode, file, &packet_seq_ops,
+			    sizeof(struct seq_net_private));
 }
 
 static const struct file_operations packet_seq_fops = {
@@ -1951,7 +1933,7 @@ static const struct file_operations packet_seq_fops = {
 	.open		= packet_seq_open,
 	.read		= seq_read,
 	.llseek		= seq_lseek,
-	.release	= packet_seq_release,
+	.release	= seq_release_net,
 };
 
 #endif
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index ecad42b72ad..eacddb21a9b 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -2018,7 +2018,7 @@ static unsigned int unix_poll(struct file * file, struct socket *sock, poll_tabl
 
 #ifdef CONFIG_PROC_FS
 struct unix_iter_state {
-	struct net *net;
+	struct seq_net_private p;
 	int i;
 };
 static struct sock *unix_seq_idx(struct unix_iter_state *iter, loff_t pos)
@@ -2027,7 +2027,7 @@ static struct sock *unix_seq_idx(struct unix_iter_state *iter, loff_t pos)
 	struct sock *s;
 
 	for (s = first_unix_socket(&iter->i); s; s = next_unix_socket(&iter->i, s)) {
-		if (s->sk_net != iter->net)
+		if (s->sk_net != iter->p.net)
 			continue;
 		if (off == pos)
 			return s;
@@ -2054,7 +2054,7 @@ static void *unix_seq_next(struct seq_file *seq, void *v, loff_t *pos)
 		sk = first_unix_socket(&iter->i);
 	else
 		sk = next_unix_socket(&iter->i, sk);
-	while (sk && (sk->sk_net != iter->net))
+	while (sk && (sk->sk_net != iter->p.net))
 		sk = next_unix_socket(&iter->i, sk);
 	return sk;
 }
@@ -2118,27 +2118,8 @@ static const struct seq_operations unix_seq_ops = {
 
 static int unix_seq_open(struct inode *inode, struct file *file)
 {
-	struct unix_iter_state *it;
-
-	it = __seq_open_private(file, &unix_seq_ops,
-				sizeof(struct unix_iter_state));
-	if (it == NULL)
-		return -ENOMEM;
-
-	it->net = get_proc_net(inode);
-	if (it->net == NULL) {
-		seq_release_private(inode, file);
-		return -ENXIO;
-	}
-	return 0;
-}
-
-static int unix_seq_release(struct inode *inode, struct file *file)
-{
-	struct seq_file *seq = file->private_data;
-	struct unix_iter_state *iter = seq->private;
-	put_net(iter->net);
-	return seq_release_private(inode, file);
+	return seq_open_net(inode, file, &unix_seq_ops,
+			    sizeof(struct unix_iter_state));
 }
 
 static const struct file_operations unix_seq_fops = {
@@ -2146,7 +2127,7 @@ static const struct file_operations unix_seq_fops = {
 	.open		= unix_seq_open,
 	.read		= seq_read,
 	.llseek		= seq_lseek,
-	.release	= unix_seq_release,
+	.release	= seq_release_net,
 };
 
 #endif
diff --git a/net/wireless/wext.c b/net/wireless/wext.c
index db03ed5ce05..1e4cf615f87 100644
--- a/net/wireless/wext.c
+++ b/net/wireless/wext.c
@@ -673,26 +673,8 @@ static const struct seq_operations wireless_seq_ops = {
 
 static int wireless_seq_open(struct inode *inode, struct file *file)
 {
-	struct seq_file *seq;
-	int res;
-	res = seq_open(file, &wireless_seq_ops);
-	if (!res) {
-		seq = file->private_data;
-		seq->private = get_proc_net(inode);
-		if (!seq->private) {
-			seq_release(inode, file);
-			res = -ENXIO;
-		}
-	}
-	return res;
-}
-
-static int wireless_seq_release(struct inode *inode, struct file *file)
-{
-	struct seq_file *seq = file->private_data;
-	struct net *net = seq->private;
-	put_net(net);
-	return seq_release(inode, file);
+	return seq_open_net(inode, file, &wireless_seq_ops,
+			    sizeof(struct seq_net_private));
 }
 
 static const struct file_operations wireless_seq_fops = {
@@ -700,7 +682,7 @@ static const struct file_operations wireless_seq_fops = {
 	.open    = wireless_seq_open,
 	.read    = seq_read,
 	.llseek  = seq_lseek,
-	.release = wireless_seq_release,
+	.release = seq_release_net,
 };
 
 int wext_proc_init(struct net *net)
-- 
cgit v1.2.3


From 7bc54c90307b4bc3d7fb2ffd6ad8fbda0671a45e Mon Sep 17 00:00:00 2001
From: Pavel Emelyanov <xemul@openvz.org>
Date: Mon, 19 Nov 2007 22:35:07 -0800
Subject: [IPv4] RAW: Compact the API for the kernel

The raw sockets functions are explicitly used from
inside the kernel in two places:

1. in ip_local_deliver_finish to intercept skb-s
2. in icmp_error

For this purposes many functions and even data structures,
that are naturally internal for raw protocol, are exported.

Compact the API to two functions and hide all the other
(including hash table and rwlock) inside the net/ipv4/raw.c

Signed-off-by: Pavel Emelyanov <xemul@openvz.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/icmp.c     | 15 +--------------
 net/ipv4/ip_input.c | 16 ++++------------
 net/ipv4/raw.c      | 53 ++++++++++++++++++++++++++++++++++++++++++++++++-----
 3 files changed, 53 insertions(+), 31 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
index 82baea02648..13d74598d3e 100644
--- a/net/ipv4/icmp.c
+++ b/net/ipv4/icmp.c
@@ -603,7 +603,6 @@ static void icmp_unreach(struct sk_buff *skb)
 	struct icmphdr *icmph;
 	int hash, protocol;
 	struct net_protocol *ipprot;
-	struct sock *raw_sk;
 	u32 info = 0;
 
 	/*
@@ -697,21 +696,9 @@ static void icmp_unreach(struct sk_buff *skb)
 	/*
 	 *	Deliver ICMP message to raw sockets. Pretty useless feature?
 	 */
+	raw_icmp_error(skb, protocol, info);
 
-	/* Note: See raw.c and net/raw.h, RAWV4_HTABLE_SIZE==MAX_INET_PROTOS */
 	hash = protocol & (MAX_INET_PROTOS - 1);
-	read_lock(&raw_v4_lock);
-	if ((raw_sk = sk_head(&raw_v4_htable[hash])) != NULL) {
-		while ((raw_sk = __raw_v4_lookup(raw_sk, protocol, iph->daddr,
-						 iph->saddr,
-						 skb->dev->ifindex)) != NULL) {
-			raw_err(raw_sk, skb, info);
-			raw_sk = sk_next(raw_sk);
-			iph = (struct iphdr *)skb->data;
-		}
-	}
-	read_unlock(&raw_v4_lock);
-
 	rcu_read_lock();
 	ipprot = rcu_dereference(inet_protos[hash]);
 	if (ipprot && ipprot->err_handler)
diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c
index 4068e178d74..65631391d47 100644
--- a/net/ipv4/ip_input.c
+++ b/net/ipv4/ip_input.c
@@ -204,22 +204,14 @@ static int ip_local_deliver_finish(struct sk_buff *skb)
 
 	rcu_read_lock();
 	{
-		/* Note: See raw.c and net/raw.h, RAWV4_HTABLE_SIZE==MAX_INET_PROTOS */
 		int protocol = ip_hdr(skb)->protocol;
-		int hash;
-		struct sock *raw_sk;
+		int hash, raw;
 		struct net_protocol *ipprot;
 
 	resubmit:
-		hash = protocol & (MAX_INET_PROTOS - 1);
-		raw_sk = sk_head(&raw_v4_htable[hash]);
-
-		/* If there maybe a raw socket we must check - if not we
-		 * don't care less
-		 */
-		if (raw_sk && !raw_v4_input(skb, ip_hdr(skb), hash))
-			raw_sk = NULL;
+		raw = raw_local_deliver(skb, protocol);
 
+		hash = protocol & (MAX_INET_PROTOS - 1);
 		if ((ipprot = rcu_dereference(inet_protos[hash])) != NULL) {
 			int ret;
 
@@ -237,7 +229,7 @@ static int ip_local_deliver_finish(struct sk_buff *skb)
 			}
 			IP_INC_STATS_BH(IPSTATS_MIB_INDELIVERS);
 		} else {
-			if (!raw_sk) {
+			if (!raw) {
 				if (xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) {
 					IP_INC_STATS_BH(IPSTATS_MIB_INUNKNOWNPROTOS);
 					icmp_send(skb, ICMP_DEST_UNREACH,
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c
index b80987d2fc5..8a506618b91 100644
--- a/net/ipv4/raw.c
+++ b/net/ipv4/raw.c
@@ -80,8 +80,10 @@
 #include <linux/netfilter.h>
 #include <linux/netfilter_ipv4.h>
 
-struct hlist_head raw_v4_htable[RAWV4_HTABLE_SIZE];
-DEFINE_RWLOCK(raw_v4_lock);
+#define RAWV4_HTABLE_SIZE	MAX_INET_PROTOS
+
+static struct hlist_head raw_v4_htable[RAWV4_HTABLE_SIZE];
+static DEFINE_RWLOCK(raw_v4_lock);
 
 static void raw_v4_hash(struct sock *sk)
 {
@@ -102,7 +104,7 @@ static void raw_v4_unhash(struct sock *sk)
 	write_unlock_bh(&raw_v4_lock);
 }
 
-struct sock *__raw_v4_lookup(struct sock *sk, unsigned short num,
+static struct sock *__raw_v4_lookup(struct sock *sk, unsigned short num,
 			     __be32 raddr, __be32 laddr,
 			     int dif)
 {
@@ -150,7 +152,7 @@ static __inline__ int icmp_filter(struct sock *sk, struct sk_buff *skb)
  * RFC 1122: SHOULD pass TOS value up to the transport layer.
  * -> It does. And not only TOS, but all IP header.
  */
-int raw_v4_input(struct sk_buff *skb, struct iphdr *iph, int hash)
+static int raw_v4_input(struct sk_buff *skb, struct iphdr *iph, int hash)
 {
 	struct sock *sk;
 	struct hlist_head *head;
@@ -182,7 +184,25 @@ out:
 	return delivered;
 }
 
-void raw_err (struct sock *sk, struct sk_buff *skb, u32 info)
+int raw_local_deliver(struct sk_buff *skb, int protocol)
+{
+	int hash;
+	struct sock *raw_sk;
+
+	hash = protocol & (RAWV4_HTABLE_SIZE - 1);
+	raw_sk = sk_head(&raw_v4_htable[hash]);
+
+	/* If there maybe a raw socket we must check - if not we
+	 * don't care less
+	 */
+	if (raw_sk && !raw_v4_input(skb, ip_hdr(skb), hash))
+		raw_sk = NULL;
+
+	return raw_sk != NULL;
+
+}
+
+static void raw_err(struct sock *sk, struct sk_buff *skb, u32 info)
 {
 	struct inet_sock *inet = inet_sk(sk);
 	const int type = icmp_hdr(skb)->type;
@@ -236,6 +256,29 @@ void raw_err (struct sock *sk, struct sk_buff *skb, u32 info)
 	}
 }
 
+void raw_icmp_error(struct sk_buff *skb, int protocol, u32 info)
+{
+	int hash;
+	struct sock *raw_sk;
+	struct iphdr *iph;
+
+	hash = protocol & (RAWV4_HTABLE_SIZE - 1);
+
+	read_lock(&raw_v4_lock);
+	raw_sk = sk_head(&raw_v4_htable[hash]);
+	if (raw_sk != NULL) {
+		iph = (struct iphdr *)skb->data;
+		while ((raw_sk = __raw_v4_lookup(raw_sk, protocol, iph->daddr,
+						iph->saddr,
+						skb->dev->ifindex)) != NULL) {
+			raw_err(raw_sk, skb, info);
+			raw_sk = sk_next(raw_sk);
+			iph = (struct iphdr *)skb->data;
+		}
+	}
+	read_unlock(&raw_v4_lock);
+}
+
 static int raw_rcv_skb(struct sock * sk, struct sk_buff * skb)
 {
 	/* Charge it to the socket. */
-- 
cgit v1.2.3


From 69d6da0b0faa70249a243a14e6066c013e9294e5 Mon Sep 17 00:00:00 2001
From: Pavel Emelyanov <xemul@openvz.org>
Date: Mon, 19 Nov 2007 22:35:57 -0800
Subject: [IPv6] RAW: Compact the API for the kernel

Same as in the previous patch for ipv4, compact the
API and hide hash table and rwlock inside the raw.c
file.

Plus fix some "bad" places from checkpatch.pl point
of view (assignments inside if()).

Signed-off-by: Pavel Emelyanov <xemul@openvz.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/icmp.c      | 15 +--------------
 net/ipv6/ip6_input.c |  9 +++------
 net/ipv6/raw.c       | 52 ++++++++++++++++++++++++++++++++++++++++++++--------
 3 files changed, 48 insertions(+), 28 deletions(-)

(limited to 'net')

diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c
index f1240688dc5..93c96cfd5ee 100644
--- a/net/ipv6/icmp.c
+++ b/net/ipv6/icmp.c
@@ -555,9 +555,7 @@ out:
 
 static void icmpv6_notify(struct sk_buff *skb, int type, int code, __be32 info)
 {
-	struct in6_addr *saddr, *daddr;
 	struct inet6_protocol *ipprot;
-	struct sock *sk;
 	int inner_offset;
 	int hash;
 	u8 nexthdr;
@@ -579,9 +577,6 @@ static void icmpv6_notify(struct sk_buff *skb, int type, int code, __be32 info)
 	if (!pskb_may_pull(skb, inner_offset+8))
 		return;
 
-	saddr = &ipv6_hdr(skb)->saddr;
-	daddr = &ipv6_hdr(skb)->daddr;
-
 	/* BUGGG_FUTURE: we should try to parse exthdrs in this packet.
 	   Without this we will not able f.e. to make source routed
 	   pmtu discovery.
@@ -597,15 +592,7 @@ static void icmpv6_notify(struct sk_buff *skb, int type, int code, __be32 info)
 		ipprot->err_handler(skb, NULL, type, code, inner_offset, info);
 	rcu_read_unlock();
 
-	read_lock(&raw_v6_lock);
-	if ((sk = sk_head(&raw_v6_htable[hash])) != NULL) {
-		while ((sk = __raw_v6_lookup(sk, nexthdr, saddr, daddr,
-					    IP6CB(skb)->iif))) {
-			rawv6_err(sk, skb, NULL, type, code, inner_offset, info);
-			sk = sk_next(sk);
-		}
-	}
-	read_unlock(&raw_v6_lock);
+	raw6_icmp_error(skb, nexthdr, type, code, inner_offset, info);
 }
 
 /*
diff --git a/net/ipv6/ip6_input.c b/net/ipv6/ip6_input.c
index 79610b4bad3..178aebc0427 100644
--- a/net/ipv6/ip6_input.c
+++ b/net/ipv6/ip6_input.c
@@ -153,9 +153,8 @@ out:
 static int ip6_input_finish(struct sk_buff *skb)
 {
 	struct inet6_protocol *ipprot;
-	struct sock *raw_sk;
 	unsigned int nhoff;
-	int nexthdr;
+	int nexthdr, raw;
 	u8 hash;
 	struct inet6_dev *idev;
 
@@ -171,9 +170,7 @@ resubmit:
 	nhoff = IP6CB(skb)->nhoff;
 	nexthdr = skb_network_header(skb)[nhoff];
 
-	raw_sk = sk_head(&raw_v6_htable[nexthdr & (MAX_INET_PROTOS - 1)]);
-	if (raw_sk && !ipv6_raw_deliver(skb, nexthdr))
-		raw_sk = NULL;
+	raw = raw6_local_deliver(skb, nexthdr);
 
 	hash = nexthdr & (MAX_INET_PROTOS - 1);
 	if ((ipprot = rcu_dereference(inet6_protos[hash])) != NULL) {
@@ -206,7 +203,7 @@ resubmit:
 		else if (ret == 0)
 			IP6_INC_STATS_BH(idev, IPSTATS_MIB_INDELIVERS);
 	} else {
-		if (!raw_sk) {
+		if (!raw) {
 			if (xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) {
 				IP6_INC_STATS_BH(idev, IPSTATS_MIB_INUNKNOWNPROTOS);
 				icmpv6_send(skb, ICMPV6_PARAMPROB,
diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c
index ad622cc11bd..53f01b4982c 100644
--- a/net/ipv6/raw.c
+++ b/net/ipv6/raw.c
@@ -60,8 +60,10 @@
 #include <linux/proc_fs.h>
 #include <linux/seq_file.h>
 
-struct hlist_head raw_v6_htable[RAWV6_HTABLE_SIZE];
-DEFINE_RWLOCK(raw_v6_lock);
+#define RAWV6_HTABLE_SIZE	MAX_INET_PROTOS
+
+static struct hlist_head raw_v6_htable[RAWV6_HTABLE_SIZE];
+static DEFINE_RWLOCK(raw_v6_lock);
 
 static void raw_v6_hash(struct sock *sk)
 {
@@ -83,10 +85,8 @@ static void raw_v6_unhash(struct sock *sk)
 }
 
 
-/* Grumble... icmp and ip_input want to get at this... */
-struct sock *__raw_v6_lookup(struct sock *sk, unsigned short num,
-			     struct in6_addr *loc_addr, struct in6_addr *rmt_addr,
-			     int dif)
+static struct sock *__raw_v6_lookup(struct sock *sk, unsigned short num,
+		struct in6_addr *loc_addr, struct in6_addr *rmt_addr, int dif)
 {
 	struct hlist_node *node;
 	int is_multicast = ipv6_addr_is_multicast(loc_addr);
@@ -167,7 +167,7 @@ EXPORT_SYMBOL(rawv6_mh_filter_unregister);
  *
  *	Caller owns SKB so we must make clones.
  */
-int ipv6_raw_deliver(struct sk_buff *skb, int nexthdr)
+static int ipv6_raw_deliver(struct sk_buff *skb, int nexthdr)
 {
 	struct in6_addr *saddr;
 	struct in6_addr *daddr;
@@ -242,6 +242,17 @@ out:
 	return delivered;
 }
 
+int raw6_local_deliver(struct sk_buff *skb, int nexthdr)
+{
+	struct sock *raw_sk;
+
+	raw_sk = sk_head(&raw_v6_htable[nexthdr & (MAX_INET_PROTOS - 1)]);
+	if (raw_sk && !ipv6_raw_deliver(skb, nexthdr))
+		raw_sk = NULL;
+
+	return raw_sk != NULL;
+}
+
 /* This cleans up af_inet6 a bit. -DaveM */
 static int rawv6_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len)
 {
@@ -316,7 +327,7 @@ out:
 	return err;
 }
 
-void rawv6_err(struct sock *sk, struct sk_buff *skb,
+static void rawv6_err(struct sock *sk, struct sk_buff *skb,
 	       struct inet6_skb_parm *opt,
 	       int type, int code, int offset, __be32 info)
 {
@@ -350,6 +361,31 @@ void rawv6_err(struct sock *sk, struct sk_buff *skb,
 	}
 }
 
+void raw6_icmp_error(struct sk_buff *skb, int nexthdr,
+		int type, int code, int inner_offset, __be32 info)
+{
+	struct sock *sk;
+	int hash;
+	struct in6_addr *saddr, *daddr;
+
+	hash = nexthdr & (RAWV6_HTABLE_SIZE - 1);
+
+	read_lock(&raw_v6_lock);
+	sk = sk_head(&raw_v6_htable[hash]);
+	if (sk != NULL) {
+		saddr = &ipv6_hdr(skb)->saddr;
+		daddr = &ipv6_hdr(skb)->daddr;
+
+		while ((sk = __raw_v6_lookup(sk, nexthdr, saddr, daddr,
+						IP6CB(skb)->iif))) {
+			rawv6_err(sk, skb, NULL, type, code,
+					inner_offset, info);
+			sk = sk_next(sk);
+		}
+	}
+	read_unlock(&raw_v6_lock);
+}
+
 static inline int rawv6_rcv_skb(struct sock * sk, struct sk_buff * skb)
 {
 	if ((raw6_sk(sk)->checksum || sk->sk_filter) &&
-- 
cgit v1.2.3


From b673e4dfc8f29e5bfe4d342029b793e9d504f6dd Mon Sep 17 00:00:00 2001
From: Pavel Emelyanov <xemul@openvz.org>
Date: Mon, 19 Nov 2007 22:36:45 -0800
Subject: [RAW]: Introduce raw_hashinfo structure

The ipv4/raw.c and ipv6/raw.c contain many common code (most
of which is proc interface) which can be consolidated.

Most of the places to consolidate deal with the raw sockets
hashtable, so introduce a struct raw_hashinfo which describes
the raw sockets hash.

Signed-off-by: Pavel Emelyanov <xemul@openvz.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/raw.c | 50 +++++++++++++++++++++++++-------------------------
 net/ipv6/raw.c | 49 +++++++++++++++++++++++++------------------------
 2 files changed, 50 insertions(+), 49 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c
index 8a506618b91..dd9f00b3ab4 100644
--- a/net/ipv4/raw.c
+++ b/net/ipv4/raw.c
@@ -80,28 +80,27 @@
 #include <linux/netfilter.h>
 #include <linux/netfilter_ipv4.h>
 
-#define RAWV4_HTABLE_SIZE	MAX_INET_PROTOS
-
-static struct hlist_head raw_v4_htable[RAWV4_HTABLE_SIZE];
-static DEFINE_RWLOCK(raw_v4_lock);
+static struct raw_hashinfo raw_v4_hashinfo = {
+	.lock = __RW_LOCK_UNLOCKED(),
+};
 
 static void raw_v4_hash(struct sock *sk)
 {
-	struct hlist_head *head = &raw_v4_htable[inet_sk(sk)->num &
-						 (RAWV4_HTABLE_SIZE - 1)];
+	struct hlist_head *head = &raw_v4_hashinfo.ht[inet_sk(sk)->num &
+						 (RAW_HTABLE_SIZE - 1)];
 
-	write_lock_bh(&raw_v4_lock);
+	write_lock_bh(&raw_v4_hashinfo.lock);
 	sk_add_node(sk, head);
 	sock_prot_inc_use(sk->sk_prot);
-	write_unlock_bh(&raw_v4_lock);
+	write_unlock_bh(&raw_v4_hashinfo.lock);
 }
 
 static void raw_v4_unhash(struct sock *sk)
 {
-	write_lock_bh(&raw_v4_lock);
+	write_lock_bh(&raw_v4_hashinfo.lock);
 	if (sk_del_node_init(sk))
 		sock_prot_dec_use(sk->sk_prot);
-	write_unlock_bh(&raw_v4_lock);
+	write_unlock_bh(&raw_v4_hashinfo.lock);
 }
 
 static struct sock *__raw_v4_lookup(struct sock *sk, unsigned short num,
@@ -158,8 +157,8 @@ static int raw_v4_input(struct sk_buff *skb, struct iphdr *iph, int hash)
 	struct hlist_head *head;
 	int delivered = 0;
 
-	read_lock(&raw_v4_lock);
-	head = &raw_v4_htable[hash];
+	read_lock(&raw_v4_hashinfo.lock);
+	head = &raw_v4_hashinfo.ht[hash];
 	if (hlist_empty(head))
 		goto out;
 	sk = __raw_v4_lookup(__sk_head(head), iph->protocol,
@@ -180,7 +179,7 @@ static int raw_v4_input(struct sk_buff *skb, struct iphdr *iph, int hash)
 				     skb->dev->ifindex);
 	}
 out:
-	read_unlock(&raw_v4_lock);
+	read_unlock(&raw_v4_hashinfo.lock);
 	return delivered;
 }
 
@@ -189,8 +188,8 @@ int raw_local_deliver(struct sk_buff *skb, int protocol)
 	int hash;
 	struct sock *raw_sk;
 
-	hash = protocol & (RAWV4_HTABLE_SIZE - 1);
-	raw_sk = sk_head(&raw_v4_htable[hash]);
+	hash = protocol & (RAW_HTABLE_SIZE - 1);
+	raw_sk = sk_head(&raw_v4_hashinfo.ht[hash]);
 
 	/* If there maybe a raw socket we must check - if not we
 	 * don't care less
@@ -262,10 +261,10 @@ void raw_icmp_error(struct sk_buff *skb, int protocol, u32 info)
 	struct sock *raw_sk;
 	struct iphdr *iph;
 
-	hash = protocol & (RAWV4_HTABLE_SIZE - 1);
+	hash = protocol & (RAW_HTABLE_SIZE - 1);
 
-	read_lock(&raw_v4_lock);
-	raw_sk = sk_head(&raw_v4_htable[hash]);
+	read_lock(&raw_v4_hashinfo.lock);
+	raw_sk = sk_head(&raw_v4_hashinfo.ht[hash]);
 	if (raw_sk != NULL) {
 		iph = (struct iphdr *)skb->data;
 		while ((raw_sk = __raw_v4_lookup(raw_sk, protocol, iph->daddr,
@@ -276,7 +275,7 @@ void raw_icmp_error(struct sk_buff *skb, int protocol, u32 info)
 			iph = (struct iphdr *)skb->data;
 		}
 	}
-	read_unlock(&raw_v4_lock);
+	read_unlock(&raw_v4_hashinfo.lock);
 }
 
 static int raw_rcv_skb(struct sock * sk, struct sk_buff * skb)
@@ -844,10 +843,11 @@ static struct sock *raw_get_first(struct seq_file *seq)
 	struct sock *sk;
 	struct raw_iter_state* state = raw_seq_private(seq);
 
-	for (state->bucket = 0; state->bucket < RAWV4_HTABLE_SIZE; ++state->bucket) {
+	for (state->bucket = 0; state->bucket < RAW_HTABLE_SIZE;
+			++state->bucket) {
 		struct hlist_node *node;
 
-		sk_for_each(sk, node, &raw_v4_htable[state->bucket])
+		sk_for_each(sk, node, &raw_v4_hashinfo.ht[state->bucket])
 			if (sk->sk_family == PF_INET)
 				goto found;
 	}
@@ -866,8 +866,8 @@ try_again:
 		;
 	} while (sk && sk->sk_family != PF_INET);
 
-	if (!sk && ++state->bucket < RAWV4_HTABLE_SIZE) {
-		sk = sk_head(&raw_v4_htable[state->bucket]);
+	if (!sk && ++state->bucket < RAW_HTABLE_SIZE) {
+		sk = sk_head(&raw_v4_hashinfo.ht[state->bucket]);
 		goto try_again;
 	}
 	return sk;
@@ -885,7 +885,7 @@ static struct sock *raw_get_idx(struct seq_file *seq, loff_t pos)
 
 static void *raw_seq_start(struct seq_file *seq, loff_t *pos)
 {
-	read_lock(&raw_v4_lock);
+	read_lock(&raw_v4_hashinfo.lock);
 	return *pos ? raw_get_idx(seq, *pos - 1) : SEQ_START_TOKEN;
 }
 
@@ -903,7 +903,7 @@ static void *raw_seq_next(struct seq_file *seq, void *v, loff_t *pos)
 
 static void raw_seq_stop(struct seq_file *seq, void *v)
 {
-	read_unlock(&raw_v4_lock);
+	read_unlock(&raw_v4_hashinfo.lock);
 }
 
 static __inline__ char *get_raw_sock(struct sock *sp, char *tmpbuf, int i)
diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c
index 53f01b4982c..15c72a6365a 100644
--- a/net/ipv6/raw.c
+++ b/net/ipv6/raw.c
@@ -54,34 +54,34 @@
 #include <net/mip6.h>
 #endif
 
+#include <net/raw.h>
 #include <net/rawv6.h>
 #include <net/xfrm.h>
 
 #include <linux/proc_fs.h>
 #include <linux/seq_file.h>
 
-#define RAWV6_HTABLE_SIZE	MAX_INET_PROTOS
-
-static struct hlist_head raw_v6_htable[RAWV6_HTABLE_SIZE];
-static DEFINE_RWLOCK(raw_v6_lock);
+static struct raw_hashinfo raw_v6_hashinfo = {
+	.lock = __RW_LOCK_UNLOCKED(),
+};
 
 static void raw_v6_hash(struct sock *sk)
 {
-	struct hlist_head *list = &raw_v6_htable[inet_sk(sk)->num &
-						 (RAWV6_HTABLE_SIZE - 1)];
+	struct hlist_head *list = &raw_v6_hashinfo.ht[inet_sk(sk)->num &
+						 (RAW_HTABLE_SIZE - 1)];
 
-	write_lock_bh(&raw_v6_lock);
+	write_lock_bh(&raw_v6_hashinfo.lock);
 	sk_add_node(sk, list);
 	sock_prot_inc_use(sk->sk_prot);
-	write_unlock_bh(&raw_v6_lock);
+	write_unlock_bh(&raw_v6_hashinfo.lock);
 }
 
 static void raw_v6_unhash(struct sock *sk)
 {
-	write_lock_bh(&raw_v6_lock);
+	write_lock_bh(&raw_v6_hashinfo.lock);
 	if (sk_del_node_init(sk))
 		sock_prot_dec_use(sk->sk_prot);
-	write_unlock_bh(&raw_v6_lock);
+	write_unlock_bh(&raw_v6_hashinfo.lock);
 }
 
 
@@ -180,8 +180,8 @@ static int ipv6_raw_deliver(struct sk_buff *skb, int nexthdr)
 
 	hash = nexthdr & (MAX_INET_PROTOS - 1);
 
-	read_lock(&raw_v6_lock);
-	sk = sk_head(&raw_v6_htable[hash]);
+	read_lock(&raw_v6_hashinfo.lock);
+	sk = sk_head(&raw_v6_hashinfo.ht[hash]);
 
 	/*
 	 *	The first socket found will be delivered after
@@ -238,7 +238,7 @@ static int ipv6_raw_deliver(struct sk_buff *skb, int nexthdr)
 				     IP6CB(skb)->iif);
 	}
 out:
-	read_unlock(&raw_v6_lock);
+	read_unlock(&raw_v6_hashinfo.lock);
 	return delivered;
 }
 
@@ -246,7 +246,7 @@ int raw6_local_deliver(struct sk_buff *skb, int nexthdr)
 {
 	struct sock *raw_sk;
 
-	raw_sk = sk_head(&raw_v6_htable[nexthdr & (MAX_INET_PROTOS - 1)]);
+	raw_sk = sk_head(&raw_v6_hashinfo.ht[nexthdr & (MAX_INET_PROTOS - 1)]);
 	if (raw_sk && !ipv6_raw_deliver(skb, nexthdr))
 		raw_sk = NULL;
 
@@ -368,10 +368,10 @@ void raw6_icmp_error(struct sk_buff *skb, int nexthdr,
 	int hash;
 	struct in6_addr *saddr, *daddr;
 
-	hash = nexthdr & (RAWV6_HTABLE_SIZE - 1);
+	hash = nexthdr & (RAW_HTABLE_SIZE - 1);
 
-	read_lock(&raw_v6_lock);
-	sk = sk_head(&raw_v6_htable[hash]);
+	read_lock(&raw_v6_hashinfo.lock);
+	sk = sk_head(&raw_v6_hashinfo.ht[hash]);
 	if (sk != NULL) {
 		saddr = &ipv6_hdr(skb)->saddr;
 		daddr = &ipv6_hdr(skb)->daddr;
@@ -383,7 +383,7 @@ void raw6_icmp_error(struct sk_buff *skb, int nexthdr,
 			sk = sk_next(sk);
 		}
 	}
-	read_unlock(&raw_v6_lock);
+	read_unlock(&raw_v6_hashinfo.lock);
 }
 
 static inline int rawv6_rcv_skb(struct sock * sk, struct sk_buff * skb)
@@ -1221,8 +1221,9 @@ static struct sock *raw6_get_first(struct seq_file *seq)
 	struct hlist_node *node;
 	struct raw6_iter_state* state = raw6_seq_private(seq);
 
-	for (state->bucket = 0; state->bucket < RAWV6_HTABLE_SIZE; ++state->bucket)
-		sk_for_each(sk, node, &raw_v6_htable[state->bucket])
+	for (state->bucket = 0; state->bucket < RAW_HTABLE_SIZE;
+			++state->bucket)
+		sk_for_each(sk, node, &raw_v6_hashinfo.ht[state->bucket])
 			if (sk->sk_family == PF_INET6)
 				goto out;
 	sk = NULL;
@@ -1240,8 +1241,8 @@ try_again:
 		;
 	} while (sk && sk->sk_family != PF_INET6);
 
-	if (!sk && ++state->bucket < RAWV6_HTABLE_SIZE) {
-		sk = sk_head(&raw_v6_htable[state->bucket]);
+	if (!sk && ++state->bucket < RAW_HTABLE_SIZE) {
+		sk = sk_head(&raw_v6_hashinfo.ht[state->bucket]);
 		goto try_again;
 	}
 	return sk;
@@ -1258,7 +1259,7 @@ static struct sock *raw6_get_idx(struct seq_file *seq, loff_t pos)
 
 static void *raw6_seq_start(struct seq_file *seq, loff_t *pos)
 {
-	read_lock(&raw_v6_lock);
+	read_lock(&raw_v6_hashinfo.lock);
 	return *pos ? raw6_get_idx(seq, *pos - 1) : SEQ_START_TOKEN;
 }
 
@@ -1276,7 +1277,7 @@ static void *raw6_seq_next(struct seq_file *seq, void *v, loff_t *pos)
 
 static void raw6_seq_stop(struct seq_file *seq, void *v)
 {
-	read_unlock(&raw_v6_lock);
+	read_unlock(&raw_v6_hashinfo.lock);
 }
 
 static void raw6_sock_seq_show(struct seq_file *seq, struct sock *sp, int i)
-- 
cgit v1.2.3


From 65b4c50b47c4ac3d2b5a82e5553b8e5613fb9585 Mon Sep 17 00:00:00 2001
From: Pavel Emelyanov <xemul@openvz.org>
Date: Mon, 19 Nov 2007 22:37:24 -0800
Subject: [RAW]: Consolidate proto->hash callback

Having the raw_hashinfo it's easy to consolidate the
raw[46]_hash functions.

Signed-off-by: Pavel Emelyanov <xemul@openvz.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/raw.c | 17 ++++++++++++-----
 net/ipv6/raw.c |  8 +-------
 2 files changed, 13 insertions(+), 12 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c
index dd9f00b3ab4..28b95ae5d2d 100644
--- a/net/ipv4/raw.c
+++ b/net/ipv4/raw.c
@@ -84,15 +84,22 @@ static struct raw_hashinfo raw_v4_hashinfo = {
 	.lock = __RW_LOCK_UNLOCKED(),
 };
 
-static void raw_v4_hash(struct sock *sk)
+void raw_hash_sk(struct sock *sk, struct raw_hashinfo *h)
 {
-	struct hlist_head *head = &raw_v4_hashinfo.ht[inet_sk(sk)->num &
-						 (RAW_HTABLE_SIZE - 1)];
+	struct hlist_head *head;
 
-	write_lock_bh(&raw_v4_hashinfo.lock);
+	head = &h->ht[inet_sk(sk)->num & (RAW_HTABLE_SIZE - 1)];
+
+	write_lock_bh(&h->lock);
 	sk_add_node(sk, head);
 	sock_prot_inc_use(sk->sk_prot);
-	write_unlock_bh(&raw_v4_hashinfo.lock);
+	write_unlock_bh(&h->lock);
+}
+EXPORT_SYMBOL_GPL(raw_hash_sk);
+
+static void raw_v4_hash(struct sock *sk)
+{
+	raw_hash_sk(sk, &raw_v4_hashinfo);
 }
 
 static void raw_v4_unhash(struct sock *sk)
diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c
index 15c72a6365a..70db6f49453 100644
--- a/net/ipv6/raw.c
+++ b/net/ipv6/raw.c
@@ -67,13 +67,7 @@ static struct raw_hashinfo raw_v6_hashinfo = {
 
 static void raw_v6_hash(struct sock *sk)
 {
-	struct hlist_head *list = &raw_v6_hashinfo.ht[inet_sk(sk)->num &
-						 (RAW_HTABLE_SIZE - 1)];
-
-	write_lock_bh(&raw_v6_hashinfo.lock);
-	sk_add_node(sk, list);
-	sock_prot_inc_use(sk->sk_prot);
-	write_unlock_bh(&raw_v6_hashinfo.lock);
+	raw_hash_sk(sk, &raw_v6_hashinfo);
 }
 
 static void raw_v6_unhash(struct sock *sk)
-- 
cgit v1.2.3


From ab70768ec78c6784958bab3b58fbe3f4150006df Mon Sep 17 00:00:00 2001
From: Pavel Emelyanov <xemul@openvz.org>
Date: Mon, 19 Nov 2007 22:37:58 -0800
Subject: [RAW]: Consolidate proto->unhash callback

Same as the ->hash one, this is easily consolidated.

Signed-off-by: Pavel Emelyanov <xemul@openvz.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/raw.c | 14 ++++++++++----
 net/ipv6/raw.c |  5 +----
 2 files changed, 11 insertions(+), 8 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c
index 28b95ae5d2d..d24501a342a 100644
--- a/net/ipv4/raw.c
+++ b/net/ipv4/raw.c
@@ -97,6 +97,15 @@ void raw_hash_sk(struct sock *sk, struct raw_hashinfo *h)
 }
 EXPORT_SYMBOL_GPL(raw_hash_sk);
 
+void raw_unhash_sk(struct sock *sk, struct raw_hashinfo *h)
+{
+	write_lock_bh(&h->lock);
+	if (sk_del_node_init(sk))
+		sock_prot_dec_use(sk->sk_prot);
+	write_unlock_bh(&h->lock);
+}
+EXPORT_SYMBOL_GPL(raw_unhash_sk);
+
 static void raw_v4_hash(struct sock *sk)
 {
 	raw_hash_sk(sk, &raw_v4_hashinfo);
@@ -104,10 +113,7 @@ static void raw_v4_hash(struct sock *sk)
 
 static void raw_v4_unhash(struct sock *sk)
 {
-	write_lock_bh(&raw_v4_hashinfo.lock);
-	if (sk_del_node_init(sk))
-		sock_prot_dec_use(sk->sk_prot);
-	write_unlock_bh(&raw_v4_hashinfo.lock);
+	raw_unhash_sk(sk, &raw_v4_hashinfo);
 }
 
 static struct sock *__raw_v4_lookup(struct sock *sk, unsigned short num,
diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c
index 70db6f49453..422d27cfbe1 100644
--- a/net/ipv6/raw.c
+++ b/net/ipv6/raw.c
@@ -72,10 +72,7 @@ static void raw_v6_hash(struct sock *sk)
 
 static void raw_v6_unhash(struct sock *sk)
 {
-	write_lock_bh(&raw_v6_hashinfo.lock);
-	if (sk_del_node_init(sk))
-		sock_prot_dec_use(sk->sk_prot);
-	write_unlock_bh(&raw_v6_hashinfo.lock);
+	raw_unhash_sk(sk, &raw_v6_hashinfo);
 }
 
 
-- 
cgit v1.2.3


From 42a73808ed4f30b739eb52bcbb33a02fe62ceef5 Mon Sep 17 00:00:00 2001
From: Pavel Emelyanov <xemul@openvz.org>
Date: Mon, 19 Nov 2007 22:38:33 -0800
Subject: [RAW]: Consolidate proc interface.

Both ipv6/raw.c and ipv4/raw.c use the seq files to walk
through the raw sockets hash and show them.

The "walking" code is rather huge, but is identical in both
cases. The difference is the hash table to walk over and
the protocol family to check (this was not in the first
virsion of the patch, which was noticed by YOSHIFUJI)

Make the ->open store the needed hash table and the family
on the allocated raw_iter_state and make the start/next/stop
callbacks work with it.

This removes most of the code.

Signed-off-by: Pavel Emelyanov <xemul@openvz.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/raw.c | 52 ++++++++++++++++++++++++-------------
 net/ipv6/raw.c | 82 ++++------------------------------------------------------
 2 files changed, 39 insertions(+), 95 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c
index d24501a342a..2ff8214a530 100644
--- a/net/ipv4/raw.c
+++ b/net/ipv4/raw.c
@@ -845,12 +845,6 @@ struct proto raw_prot = {
 };
 
 #ifdef CONFIG_PROC_FS
-struct raw_iter_state {
-	int bucket;
-};
-
-#define raw_seq_private(seq) ((struct raw_iter_state *)(seq)->private)
-
 static struct sock *raw_get_first(struct seq_file *seq)
 {
 	struct sock *sk;
@@ -860,8 +854,8 @@ static struct sock *raw_get_first(struct seq_file *seq)
 			++state->bucket) {
 		struct hlist_node *node;
 
-		sk_for_each(sk, node, &raw_v4_hashinfo.ht[state->bucket])
-			if (sk->sk_family == PF_INET)
+		sk_for_each(sk, node, &state->h->ht[state->bucket])
+			if (sk->sk_family == state->family)
 				goto found;
 	}
 	sk = NULL;
@@ -877,10 +871,10 @@ static struct sock *raw_get_next(struct seq_file *seq, struct sock *sk)
 		sk = sk_next(sk);
 try_again:
 		;
-	} while (sk && sk->sk_family != PF_INET);
+	} while (sk && sk->sk_family != state->family);
 
 	if (!sk && ++state->bucket < RAW_HTABLE_SIZE) {
-		sk = sk_head(&raw_v4_hashinfo.ht[state->bucket]);
+		sk = sk_head(&state->h->ht[state->bucket]);
 		goto try_again;
 	}
 	return sk;
@@ -896,13 +890,16 @@ static struct sock *raw_get_idx(struct seq_file *seq, loff_t pos)
 	return pos ? NULL : sk;
 }
 
-static void *raw_seq_start(struct seq_file *seq, loff_t *pos)
+void *raw_seq_start(struct seq_file *seq, loff_t *pos)
 {
-	read_lock(&raw_v4_hashinfo.lock);
+	struct raw_iter_state *state = raw_seq_private(seq);
+
+	read_lock(&state->h->lock);
 	return *pos ? raw_get_idx(seq, *pos - 1) : SEQ_START_TOKEN;
 }
+EXPORT_SYMBOL_GPL(raw_seq_start);
 
-static void *raw_seq_next(struct seq_file *seq, void *v, loff_t *pos)
+void *raw_seq_next(struct seq_file *seq, void *v, loff_t *pos)
 {
 	struct sock *sk;
 
@@ -913,11 +910,15 @@ static void *raw_seq_next(struct seq_file *seq, void *v, loff_t *pos)
 	++*pos;
 	return sk;
 }
+EXPORT_SYMBOL_GPL(raw_seq_next);
 
-static void raw_seq_stop(struct seq_file *seq, void *v)
+void raw_seq_stop(struct seq_file *seq, void *v)
 {
-	read_unlock(&raw_v4_hashinfo.lock);
+	struct raw_iter_state *state = raw_seq_private(seq);
+
+	read_unlock(&state->h->lock);
 }
+EXPORT_SYMBOL_GPL(raw_seq_stop);
 
 static __inline__ char *get_raw_sock(struct sock *sp, char *tmpbuf, int i)
 {
@@ -964,15 +965,30 @@ static const struct seq_operations raw_seq_ops = {
 	.show  = raw_seq_show,
 };
 
-static int raw_seq_open(struct inode *inode, struct file *file)
+int raw_seq_open(struct file *file, struct raw_hashinfo *h,
+		unsigned short family)
 {
-	return seq_open_private(file, &raw_seq_ops,
+	struct raw_iter_state *i;
+
+	i = __seq_open_private(file, &raw_seq_ops,
 			sizeof(struct raw_iter_state));
+	if (i == NULL)
+		return -ENOMEM;
+
+	i->h = h;
+	i->family = family;
+	return 0;
+}
+EXPORT_SYMBOL_GPL(raw_seq_open);
+
+static int raw_v4_seq_open(struct inode *inode, struct file *file)
+{
+	return raw_seq_open(file, &raw_v4_hashinfo, PF_INET);
 }
 
 static const struct file_operations raw_seq_fops = {
 	.owner	 = THIS_MODULE,
-	.open	 = raw_seq_open,
+	.open	 = raw_v4_seq_open,
 	.read	 = seq_read,
 	.llseek	 = seq_lseek,
 	.release = seq_release_private,
diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c
index 422d27cfbe1..b34631e1b01 100644
--- a/net/ipv6/raw.c
+++ b/net/ipv6/raw.c
@@ -1200,77 +1200,6 @@ struct proto rawv6_prot = {
 };
 
 #ifdef CONFIG_PROC_FS
-struct raw6_iter_state {
-	int bucket;
-};
-
-#define raw6_seq_private(seq) ((struct raw6_iter_state *)(seq)->private)
-
-static struct sock *raw6_get_first(struct seq_file *seq)
-{
-	struct sock *sk;
-	struct hlist_node *node;
-	struct raw6_iter_state* state = raw6_seq_private(seq);
-
-	for (state->bucket = 0; state->bucket < RAW_HTABLE_SIZE;
-			++state->bucket)
-		sk_for_each(sk, node, &raw_v6_hashinfo.ht[state->bucket])
-			if (sk->sk_family == PF_INET6)
-				goto out;
-	sk = NULL;
-out:
-	return sk;
-}
-
-static struct sock *raw6_get_next(struct seq_file *seq, struct sock *sk)
-{
-	struct raw6_iter_state* state = raw6_seq_private(seq);
-
-	do {
-		sk = sk_next(sk);
-try_again:
-		;
-	} while (sk && sk->sk_family != PF_INET6);
-
-	if (!sk && ++state->bucket < RAW_HTABLE_SIZE) {
-		sk = sk_head(&raw_v6_hashinfo.ht[state->bucket]);
-		goto try_again;
-	}
-	return sk;
-}
-
-static struct sock *raw6_get_idx(struct seq_file *seq, loff_t pos)
-{
-	struct sock *sk = raw6_get_first(seq);
-	if (sk)
-		while (pos && (sk = raw6_get_next(seq, sk)) != NULL)
-			--pos;
-	return pos ? NULL : sk;
-}
-
-static void *raw6_seq_start(struct seq_file *seq, loff_t *pos)
-{
-	read_lock(&raw_v6_hashinfo.lock);
-	return *pos ? raw6_get_idx(seq, *pos - 1) : SEQ_START_TOKEN;
-}
-
-static void *raw6_seq_next(struct seq_file *seq, void *v, loff_t *pos)
-{
-	struct sock *sk;
-
-	if (v == SEQ_START_TOKEN)
-		sk = raw6_get_first(seq);
-	else
-		sk = raw6_get_next(seq, v);
-	++*pos;
-	return sk;
-}
-
-static void raw6_seq_stop(struct seq_file *seq, void *v)
-{
-	read_unlock(&raw_v6_hashinfo.lock);
-}
-
 static void raw6_sock_seq_show(struct seq_file *seq, struct sock *sp, int i)
 {
 	struct ipv6_pinfo *np = inet6_sk(sp);
@@ -1308,21 +1237,20 @@ static int raw6_seq_show(struct seq_file *seq, void *v)
 			   "st tx_queue rx_queue tr tm->when retrnsmt"
 			   "   uid  timeout inode  drops\n");
 	else
-		raw6_sock_seq_show(seq, v, raw6_seq_private(seq)->bucket);
+		raw6_sock_seq_show(seq, v, raw_seq_private(seq)->bucket);
 	return 0;
 }
 
 static const struct seq_operations raw6_seq_ops = {
-	.start =	raw6_seq_start,
-	.next =		raw6_seq_next,
-	.stop =		raw6_seq_stop,
+	.start =	raw_seq_start,
+	.next =		raw_seq_next,
+	.stop =		raw_seq_stop,
 	.show =		raw6_seq_show,
 };
 
 static int raw6_seq_open(struct inode *inode, struct file *file)
 {
-	return seq_open_private(file, &raw6_seq_ops,
-			sizeof(struct raw6_iter_state));
+	return raw_seq_open(file, &raw_v6_hashinfo, PF_INET6);
 }
 
 static const struct file_operations raw6_seq_fops = {
-- 
cgit v1.2.3


From beb659bd8c9f2ccc8195779383f71088f936bf6e Mon Sep 17 00:00:00 2001
From: Eric Dumazet <dada1@cosmosbay.com>
Date: Mon, 19 Nov 2007 22:43:37 -0800
Subject: [PATCH] IPV4 : Move ip route cache flush (secret_rebuild) from
 softirq to workqueue

Every 600 seconds (ip_rt_secret_interval), a softirq flush of the
whole ip route cache is triggered. On loaded machines, this can starve
softirq for many seconds and can eventually crash.

This patch moves this flush to a workqueue context, using the worker
we intoduced in commit 39c90ece7565f5c47110c2fa77409d7a9478bd5b (IPV4:
Convert rt_check_expire() from softirq processing to workqueue.)

Also, immediate flushes (echo 0 >/proc/sys/net/ipv4/route/flush) are
using rt_do_flush() helper function, wich take attention to
rescheduling.

Next step will be to handle delayed flushes
("echo -1 >/proc/sys/net/ipv4/route/flush" or "ip route flush cache")

Signed-off-by: Eric Dumazet <dada1@cosmosbay.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/route.c | 83 ++++++++++++++++++++++++++++++++++++++++----------------
 1 file changed, 59 insertions(+), 24 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index e4b6fb4b1f4..fcae074b7ae 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -133,13 +133,14 @@ static int ip_rt_mtu_expires		= 10 * 60 * HZ;
 static int ip_rt_min_pmtu		= 512 + 20 + 20;
 static int ip_rt_min_advmss		= 256;
 static int ip_rt_secret_interval	= 10 * 60 * HZ;
+static int ip_rt_flush_expected;
 static unsigned long rt_deadline;
 
 #define RTprint(a...)	printk(KERN_DEBUG a)
 
 static struct timer_list rt_flush_timer;
-static void rt_check_expire(struct work_struct *work);
-static DECLARE_DELAYED_WORK(expires_work, rt_check_expire);
+static void rt_worker_func(struct work_struct *work);
+static DECLARE_DELAYED_WORK(expires_work, rt_worker_func);
 static struct timer_list rt_secret_timer;
 
 /*
@@ -561,7 +562,36 @@ static inline int compare_keys(struct flowi *fl1, struct flowi *fl2)
 		(fl1->iif ^ fl2->iif)) == 0;
 }
 
-static void rt_check_expire(struct work_struct *work)
+/*
+ * Perform a full scan of hash table and free all entries.
+ * Can be called by a softirq or a process.
+ * In the later case, we want to be reschedule if necessary
+ */
+static void rt_do_flush(int process_context)
+{
+	unsigned int i;
+	struct rtable *rth, *next;
+
+	for (i = 0; i <= rt_hash_mask; i++) {
+		if (process_context && need_resched())
+			cond_resched();
+		rth = rt_hash_table[i].chain;
+		if (!rth)
+			continue;
+
+		spin_lock_bh(rt_hash_lock_addr(i));
+		rth = rt_hash_table[i].chain;
+		rt_hash_table[i].chain = NULL;
+		spin_unlock_bh(rt_hash_lock_addr(i));
+
+		for (; rth; rth = next) {
+			next = rth->u.dst.rt_next;
+			rt_free(rth);
+		}
+	}
+}
+
+static void rt_check_expire(void)
 {
 	static unsigned int rover;
 	unsigned int i = rover, goal;
@@ -607,33 +637,33 @@ static void rt_check_expire(struct work_struct *work)
 		spin_unlock_bh(rt_hash_lock_addr(i));
 	}
 	rover = i;
+}
+
+/*
+ * rt_worker_func() is run in process context.
+ * If a whole flush was scheduled, it is done.
+ * Else, we call rt_check_expire() to scan part of the hash table
+ */
+static void rt_worker_func(struct work_struct *work)
+{
+	if (ip_rt_flush_expected) {
+		ip_rt_flush_expected = 0;
+		rt_do_flush(1);
+	} else
+		rt_check_expire();
 	schedule_delayed_work(&expires_work, ip_rt_gc_interval);
 }
 
 /* This can run from both BH and non-BH contexts, the latter
  * in the case of a forced flush event.
  */
-static void rt_run_flush(unsigned long dummy)
+static void rt_run_flush(unsigned long process_context)
 {
-	int i;
-	struct rtable *rth, *next;
-
 	rt_deadline = 0;
 
 	get_random_bytes(&rt_hash_rnd, 4);
 
-	for (i = rt_hash_mask; i >= 0; i--) {
-		spin_lock_bh(rt_hash_lock_addr(i));
-		rth = rt_hash_table[i].chain;
-		if (rth)
-			rt_hash_table[i].chain = NULL;
-		spin_unlock_bh(rt_hash_lock_addr(i));
-
-		for (; rth; rth = next) {
-			next = rth->u.dst.rt_next;
-			rt_free(rth);
-		}
-	}
+	rt_do_flush(process_context);
 }
 
 static DEFINE_SPINLOCK(rt_flush_lock);
@@ -667,7 +697,7 @@ void rt_cache_flush(int delay)
 
 	if (delay <= 0) {
 		spin_unlock_bh(&rt_flush_lock);
-		rt_run_flush(0);
+		rt_run_flush(user_mode);
 		return;
 	}
 
@@ -678,12 +708,17 @@ void rt_cache_flush(int delay)
 	spin_unlock_bh(&rt_flush_lock);
 }
 
+/*
+ * We change rt_hash_rnd and ask next rt_worker_func() invocation
+ * to perform a flush in process context
+ */
 static void rt_secret_rebuild(unsigned long dummy)
 {
-	unsigned long now = jiffies;
-
-	rt_cache_flush(0);
-	mod_timer(&rt_secret_timer, now + ip_rt_secret_interval);
+	get_random_bytes(&rt_hash_rnd, 4);
+	ip_rt_flush_expected = 1;
+	cancel_delayed_work(&expires_work);
+	schedule_delayed_work(&expires_work, HZ/10);
+	mod_timer(&rt_secret_timer, jiffies + ip_rt_secret_interval);
 }
 
 /*
-- 
cgit v1.2.3


From 02d45827fa211093202f96bdd58020cc8687929f Mon Sep 17 00:00:00 2001
From: Adrian Bunk <bunk@kernel.org>
Date: Mon, 19 Nov 2007 22:45:20 -0800
Subject: [NET] net/core/request_sock.c: Remove unused exports.

This patch removes the following unused EXPORT_SYMBOL's:
- reqsk_queue_alloc
- __reqsk_queue_destroy
- reqsk_queue_destroy

Signed-off-by: Adrian Bunk <bunk@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/request_sock.c | 5 -----
 1 file changed, 5 deletions(-)

(limited to 'net')

diff --git a/net/core/request_sock.c b/net/core/request_sock.c
index 45aed75cb57..2d3035d3abd 100644
--- a/net/core/request_sock.c
+++ b/net/core/request_sock.c
@@ -69,8 +69,6 @@ int reqsk_queue_alloc(struct request_sock_queue *queue,
 	return 0;
 }
 
-EXPORT_SYMBOL(reqsk_queue_alloc);
-
 void __reqsk_queue_destroy(struct request_sock_queue *queue)
 {
 	struct listen_sock *lopt;
@@ -91,8 +89,6 @@ void __reqsk_queue_destroy(struct request_sock_queue *queue)
 		kfree(lopt);
 }
 
-EXPORT_SYMBOL(__reqsk_queue_destroy);
-
 static inline struct listen_sock *reqsk_queue_yank_listen_sk(
 		struct request_sock_queue *queue)
 {
@@ -134,4 +130,3 @@ void reqsk_queue_destroy(struct request_sock_queue *queue)
 		kfree(lopt);
 }
 
-EXPORT_SYMBOL(reqsk_queue_destroy);
-- 
cgit v1.2.3


From 85b606800be20ceeca36bd8594c1eb228d2fb2f4 Mon Sep 17 00:00:00 2001
From: Pavel Emelyanov <xemul@openvz.org>
Date: Mon, 19 Nov 2007 22:52:41 -0800
Subject: [IPVS]: Relax the module get/put in ip_vs_app.c

Both try_module_get/module_put already handle the module == NULL
case, so no need in manual checking.

This patch fits both net-2.6 and net-2.6.25.

Signed-off-by: Pavel Emelyanov <xemul@openvz.org>
Acked-by: Simon Horman <horms@verge.net.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/ipvs/ip_vs_app.c | 9 ++-------
 1 file changed, 2 insertions(+), 7 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/ipvs/ip_vs_app.c b/net/ipv4/ipvs/ip_vs_app.c
index 664cb8e97c1..535abe0c45e 100644
--- a/net/ipv4/ipvs/ip_vs_app.c
+++ b/net/ipv4/ipvs/ip_vs_app.c
@@ -51,18 +51,13 @@ static DEFINE_MUTEX(__ip_vs_app_mutex);
  */
 static inline int ip_vs_app_get(struct ip_vs_app *app)
 {
-	/* test and get the module atomically */
-	if (app->module)
-		return try_module_get(app->module);
-	else
-		return 1;
+	return try_module_get(app->module);
 }
 
 
 static inline void ip_vs_app_put(struct ip_vs_app *app)
 {
-	if (app->module)
-		module_put(app->module);
+	module_put(app->module);
 }
 
 
-- 
cgit v1.2.3


From 3ef1355dcb8551730cc71e9ef4363f5c66ccad17 Mon Sep 17 00:00:00 2001
From: Benjamin Thery <benjamin.thery@bull.net>
Date: Mon, 19 Nov 2007 23:18:16 -0800
Subject: [NET]: Make netns cleanup to run in a separate queue

This patch adds a separate workqueue for cleaning up a network
namespace. If we use the keventd workqueue to execute cleanup_net(),
there is a problem to unregister devices in IPv6. Indeed the code
that cleans up also schedule work in keventd: as long as cleanup_net()
hasn't return, dst_gc_task() cannot run and as long as dst_gc_task() has
not run, there are still some references pending on the net devices and
cleanup_net() can not unregister and exit the keventd workqueue.

Signed-off-by: Benjamin Thery <benjamin.thery@bull.net>
Signed-off-by: Daniel Lezcano <dlezcano@fr.ibm.com>
Acked-by: Denis V. Lunev <den@openvz.org>
Acked-By: Kirill Korotaev <dev@sw.ru>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/net_namespace.c | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c
index ec936ae9245..26e941d912e 100644
--- a/net/core/net_namespace.c
+++ b/net/core/net_namespace.c
@@ -58,6 +58,7 @@ out_undo:
 
 #ifdef CONFIG_NET_NS
 static struct kmem_cache *net_cachep;
+static struct workqueue_struct *netns_wq;
 
 static struct net *net_alloc(void)
 {
@@ -149,7 +150,7 @@ void __put_net(struct net *net)
 {
 	/* Cleanup the network namespace in process context */
 	INIT_WORK(&net->work, cleanup_net);
-	schedule_work(&net->work);
+	queue_work(netns_wq, &net->work);
 }
 EXPORT_SYMBOL_GPL(__put_net);
 
@@ -171,7 +172,13 @@ static int __init net_ns_init(void)
 	net_cachep = kmem_cache_create("net_namespace", sizeof(struct net),
 					SMP_CACHE_BYTES,
 					SLAB_PANIC, NULL);
+
+	/* Create workqueue for cleanup */
+	netns_wq = create_singlethread_workqueue("netns");
+	if (!netns_wq)
+		panic("Could not create netns workq");
 #endif
+
 	mutex_lock(&net_mutex);
 	err = setup_net(&init_net);
 
-- 
cgit v1.2.3


From 9859a79023d71dd4e56c195a345abc4112abfd02 Mon Sep 17 00:00:00 2001
From: Pavel Emelyanov <xemul@openvz.org>
Date: Mon, 19 Nov 2007 23:20:59 -0800
Subject: [NET]: Compact sk_stream_mem_schedule() code

This function references sk->sk_prot->xxx for many times.
It turned out, that there's so many code in it, that gcc
cannot always optimize access to sk->sk_prot's fields.

After saving the sk->sk_prot on the stack and comparing
disassembled code, it turned out that the function became
~10 bytes shorter and made less dereferences (on i386 and
x86_64). Stack consumption didn't grow.

Besides, this patch drives most of this function into the
80 columns limit.

Signed-off-by: Pavel Emelyanov <xemul@openvz.org>
Acked-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/stream.c | 27 ++++++++++++++-------------
 1 file changed, 14 insertions(+), 13 deletions(-)

(limited to 'net')

diff --git a/net/core/stream.c b/net/core/stream.c
index 755bacbcb32..b2fb846f42a 100644
--- a/net/core/stream.c
+++ b/net/core/stream.c
@@ -210,35 +210,36 @@ EXPORT_SYMBOL(__sk_stream_mem_reclaim);
 int sk_stream_mem_schedule(struct sock *sk, int size, int kind)
 {
 	int amt = sk_stream_pages(size);
+	struct proto *prot = sk->sk_prot;
 
 	sk->sk_forward_alloc += amt * SK_STREAM_MEM_QUANTUM;
-	atomic_add(amt, sk->sk_prot->memory_allocated);
+	atomic_add(amt, prot->memory_allocated);
 
 	/* Under limit. */
-	if (atomic_read(sk->sk_prot->memory_allocated) < sk->sk_prot->sysctl_mem[0]) {
-		if (*sk->sk_prot->memory_pressure)
-			*sk->sk_prot->memory_pressure = 0;
+	if (atomic_read(prot->memory_allocated) < prot->sysctl_mem[0]) {
+		if (*prot->memory_pressure)
+			*prot->memory_pressure = 0;
 		return 1;
 	}
 
 	/* Over hard limit. */
-	if (atomic_read(sk->sk_prot->memory_allocated) > sk->sk_prot->sysctl_mem[2]) {
-		sk->sk_prot->enter_memory_pressure();
+	if (atomic_read(prot->memory_allocated) > prot->sysctl_mem[2]) {
+		prot->enter_memory_pressure();
 		goto suppress_allocation;
 	}
 
 	/* Under pressure. */
-	if (atomic_read(sk->sk_prot->memory_allocated) > sk->sk_prot->sysctl_mem[1])
-		sk->sk_prot->enter_memory_pressure();
+	if (atomic_read(prot->memory_allocated) > prot->sysctl_mem[1])
+		prot->enter_memory_pressure();
 
 	if (kind) {
-		if (atomic_read(&sk->sk_rmem_alloc) < sk->sk_prot->sysctl_rmem[0])
+		if (atomic_read(&sk->sk_rmem_alloc) < prot->sysctl_rmem[0])
 			return 1;
-	} else if (sk->sk_wmem_queued < sk->sk_prot->sysctl_wmem[0])
+	} else if (sk->sk_wmem_queued < prot->sysctl_wmem[0])
 		return 1;
 
-	if (!*sk->sk_prot->memory_pressure ||
-	    sk->sk_prot->sysctl_mem[2] > atomic_read(sk->sk_prot->sockets_allocated) *
+	if (!*prot->memory_pressure ||
+	    prot->sysctl_mem[2] > atomic_read(prot->sockets_allocated) *
 				sk_stream_pages(sk->sk_wmem_queued +
 						atomic_read(&sk->sk_rmem_alloc) +
 						sk->sk_forward_alloc))
@@ -258,7 +259,7 @@ suppress_allocation:
 
 	/* Alas. Undo changes. */
 	sk->sk_forward_alloc -= amt * SK_STREAM_MEM_QUANTUM;
-	atomic_sub(amt, sk->sk_prot->memory_allocated);
+	atomic_sub(amt, prot->memory_allocated);
 	return 0;
 }
 
-- 
cgit v1.2.3


From dabeb344f54ab780d152714c18f1cb6b21c471a1 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Fri, 9 Nov 2007 01:57:29 +0100
Subject: mac80211: provide interface iterator for drivers

Sometimes drivers need to know which interfaces are associated with
their hardware. Rather than forcing those drivers to keep track of
the interfaces that were added, this adds an iteration function to
mac80211.

As it is intended to be used from the interface add/remove callbacks,
the iteration function may currently only be called under RTNL.

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/mac80211/util.c | 33 +++++++++++++++++++++++++++++++++
 1 file changed, 33 insertions(+)

(limited to 'net')

diff --git a/net/mac80211/util.c b/net/mac80211/util.c
index 5a0564e1dbd..88f262baaa5 100644
--- a/net/mac80211/util.c
+++ b/net/mac80211/util.c
@@ -22,6 +22,7 @@
 #include <linux/bitmap.h>
 #include <net/net_namespace.h>
 #include <net/cfg80211.h>
+#include <net/rtnetlink.h>
 
 #include "ieee80211_i.h"
 #include "ieee80211_rate.h"
@@ -484,3 +485,35 @@ void ieee80211_wake_queues(struct ieee80211_hw *hw)
 		ieee80211_wake_queue(hw, i);
 }
 EXPORT_SYMBOL(ieee80211_wake_queues);
+
+void ieee80211_iterate_active_interfaces(struct ieee80211_hw *hw,
+					 void (*iterator)(void *data, u8 *mac,
+							  int if_id),
+					 void *data)
+{
+	struct ieee80211_local *local = hw_to_local(hw);
+	struct ieee80211_sub_if_data *sdata;
+
+	ASSERT_RTNL();
+
+	/* we hold the RTNL here so can safely walk the list */
+	list_for_each_entry(sdata, &local->interfaces, list) {
+		switch (sdata->type) {
+		case IEEE80211_IF_TYPE_INVALID:
+		case IEEE80211_IF_TYPE_MNTR:
+		case IEEE80211_IF_TYPE_VLAN:
+			continue;
+		case IEEE80211_IF_TYPE_AP:
+		case IEEE80211_IF_TYPE_STA:
+		case IEEE80211_IF_TYPE_IBSS:
+		case IEEE80211_IF_TYPE_WDS:
+			break;
+		}
+		if (sdata->dev == local->mdev)
+			continue;
+		if (netif_running(sdata->dev))
+			iterator(data, sdata->dev->dev_addr,
+				 sdata->dev->ifindex);
+	}
+}
+EXPORT_SYMBOL_GPL(ieee80211_iterate_active_interfaces);
-- 
cgit v1.2.3


From 48933dea47c947f1d44631ce2292736e6a48eef1 Mon Sep 17 00:00:00 2001
From: Helmut Schaa <hschaa@suse.de>
Date: Fri, 9 Nov 2007 16:26:09 +0100
Subject: mac80211: Remove local->scan_flags

This patch removes all references to local->scan_flags as these are not
used anymore since the removal of prism2 ioctls.

Signed-off-by: Helmut Schaa <hschaa@suse.de>
Signed-off-by: Jiri Benc <jbenc@suse.cz>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/mac80211/ieee80211_i.h     |  4 ----
 net/mac80211/ieee80211_ioctl.c | 19 +++----------------
 net/mac80211/ieee80211_sta.c   | 12 ------------
 3 files changed, 3 insertions(+), 32 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index 72e1c93dd87..2be7fcebac4 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -483,10 +483,6 @@ struct ieee80211_local {
 	struct list_head sta_bss_list;
 	struct ieee80211_sta_bss *sta_bss_hash[STA_HASH_SIZE];
 	spinlock_t sta_bss_lock;
-#define IEEE80211_SCAN_MATCH_SSID BIT(0)
-#define IEEE80211_SCAN_WPA_ONLY BIT(1)
-#define IEEE80211_SCAN_EXTRA_INFO BIT(2)
-	int scan_flags;
 
 	/* SNMP counters */
 	/* dot11CountersTable */
diff --git a/net/mac80211/ieee80211_ioctl.c b/net/mac80211/ieee80211_ioctl.c
index da3350d44da..bbd9bc572a5 100644
--- a/net/mac80211/ieee80211_ioctl.c
+++ b/net/mac80211/ieee80211_ioctl.c
@@ -513,7 +513,6 @@ static int ieee80211_ioctl_siwscan(struct net_device *dev,
 				   struct iw_request_info *info,
 				   union iwreq_data *wrqu, char *extra)
 {
-	struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr);
 	struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
 	struct iw_scan_req *req = NULL;
 	u8 *ssid = NULL;
@@ -522,21 +521,9 @@ static int ieee80211_ioctl_siwscan(struct net_device *dev,
 	if (!netif_running(dev))
 		return -ENETDOWN;
 
-	switch (sdata->type) {
-	case IEEE80211_IF_TYPE_STA:
-	case IEEE80211_IF_TYPE_IBSS:
-		if (local->scan_flags & IEEE80211_SCAN_MATCH_SSID) {
-			ssid = sdata->u.sta.ssid;
-			ssid_len = sdata->u.sta.ssid_len;
-		}
-		break;
-	case IEEE80211_IF_TYPE_AP:
-		if (local->scan_flags & IEEE80211_SCAN_MATCH_SSID) {
-			ssid = sdata->u.ap.ssid;
-			ssid_len = sdata->u.ap.ssid_len;
-		}
-		break;
-	default:
+	if (sdata->type != IEEE80211_IF_TYPE_STA &&
+	    sdata->type != IEEE80211_IF_TYPE_IBSS &&
+	    sdata->type != IEEE80211_IF_TYPE_AP) {
 		return -EOPNOTSUPP;
 	}
 
diff --git a/net/mac80211/ieee80211_sta.c b/net/mac80211/ieee80211_sta.c
index bee8080f224..4f9be2fb2bf 100644
--- a/net/mac80211/ieee80211_sta.c
+++ b/net/mac80211/ieee80211_sta.c
@@ -2894,15 +2894,6 @@ ieee80211_sta_scan_result(struct net_device *dev,
 	if (!(local->enabled_modes & (1 << bss->hw_mode)))
 		return current_ev;
 
-	if (local->scan_flags & IEEE80211_SCAN_WPA_ONLY &&
-	    !bss->wpa_ie && !bss->rsn_ie)
-		return current_ev;
-
-	if (local->scan_flags & IEEE80211_SCAN_MATCH_SSID &&
-	    (local->scan_ssid_len != bss->ssid_len ||
-	     memcmp(local->scan_ssid, bss->ssid, bss->ssid_len) != 0))
-		return current_ev;
-
 	memset(&iwe, 0, sizeof(iwe));
 	iwe.cmd = SIOCGIWAP;
 	iwe.u.ap_addr.sa_family = ARPHRD_ETHER;
@@ -3009,9 +3000,6 @@ ieee80211_sta_scan_result(struct net_device *dev,
 	do {
 		char *buf;
 
-		if (!(local->scan_flags & IEEE80211_SCAN_EXTRA_INFO))
-			break;
-
 		buf = kmalloc(100, GFP_ATOMIC);
 		if (!buf)
 			break;
-- 
cgit v1.2.3


From 0c884439dbd7c895cce61c4974c8868b0f6cd4a1 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Fri, 16 Nov 2007 02:17:07 +0100
Subject: mac80211: remove more forgotten code

Hopefully that's the rest. Seems I didn't do a very thorough job
removing the management interface.

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/mac80211/tx.c | 58 -------------------------------------------------------
 1 file changed, 58 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index 1a531543bcc..9ccf4b5a9aa 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -1527,64 +1527,6 @@ int ieee80211_subif_start_xmit(struct sk_buff *skb,
 	return ret;
 }
 
-/*
- * This is the transmit routine for the 802.11 type interfaces
- * called by upper layers of the linux networking
- * stack when it has a frame to transmit
- */
-int ieee80211_mgmt_start_xmit(struct sk_buff *skb, struct net_device *dev)
-{
-	struct ieee80211_sub_if_data *sdata;
-	struct ieee80211_tx_packet_data *pkt_data;
-	struct ieee80211_hdr *hdr;
-	u16 fc;
-
-	sdata = IEEE80211_DEV_TO_SUB_IF(dev);
-
-	if (skb->len < 10) {
-		dev_kfree_skb(skb);
-		return 0;
-	}
-
-	if (skb_headroom(skb) < sdata->local->tx_headroom) {
-		if (pskb_expand_head(skb, sdata->local->tx_headroom,
-				     0, GFP_ATOMIC)) {
-			dev_kfree_skb(skb);
-			return 0;
-		}
-	}
-
-	hdr = (struct ieee80211_hdr *) skb->data;
-	fc = le16_to_cpu(hdr->frame_control);
-
-	pkt_data = (struct ieee80211_tx_packet_data *) skb->cb;
-	memset(pkt_data, 0, sizeof(struct ieee80211_tx_packet_data));
-	pkt_data->ifindex = sdata->dev->ifindex;
-
-	skb->priority = 20; /* use hardcoded priority for mgmt TX queue */
-	skb->dev = sdata->local->mdev;
-
-	/*
-	 * We're using the protocol field of the the frame control header
-	 * to request TX callback for hostapd. BIT(1) is checked.
-	 */
-	if ((fc & BIT(1)) == BIT(1)) {
-		pkt_data->flags |= IEEE80211_TXPD_REQ_TX_STATUS;
-		fc &= ~BIT(1);
-		hdr->frame_control = cpu_to_le16(fc);
-	}
-
-	if (!(fc & IEEE80211_FCTL_PROTECTED))
-		pkt_data->flags |= IEEE80211_TXPD_DO_NOT_ENCRYPT;
-
-	dev->stats.tx_packets++;
-	dev->stats.tx_bytes += skb->len;
-
-	dev_queue_xmit(skb);
-
-	return 0;
-}
-
 /* helper functions for pending packets for when queues are stopped */
 
 void ieee80211_clear_tx_pending(struct ieee80211_local *local)
-- 
cgit v1.2.3


From ebb53d75657f86587ac8cf3e38ab0c860a8e3d4f Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Wed, 21 Nov 2007 22:08:50 +0800
Subject: [NET] proto: Use pcounters for the inuse field

Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/sock.c | 61 ++++++---------------------------------------------------
 1 file changed, 6 insertions(+), 55 deletions(-)

(limited to 'net')

diff --git a/net/core/sock.c b/net/core/sock.c
index c9305a86176..eac7aa0721d 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -1804,65 +1804,15 @@ EXPORT_SYMBOL(sk_common_release);
 static DEFINE_RWLOCK(proto_list_lock);
 static LIST_HEAD(proto_list);
 
-#ifdef CONFIG_SMP
-/*
- * Define default functions to keep track of inuse sockets per protocol
- * Note that often used protocols use dedicated functions to get a speed increase.
- * (see DEFINE_PROTO_INUSE/REF_PROTO_INUSE)
- */
-static void inuse_add(struct proto *prot, int inc)
-{
-	per_cpu_ptr(prot->inuse_ptr, smp_processor_id())[0] += inc;
-}
-
-static int inuse_get(const struct proto *prot)
-{
-	int res = 0, cpu;
-	for_each_possible_cpu(cpu)
-		res += per_cpu_ptr(prot->inuse_ptr, cpu)[0];
-	return res;
-}
-
-static int inuse_init(struct proto *prot)
-{
-	if (!prot->inuse_getval || !prot->inuse_add) {
-		prot->inuse_ptr = alloc_percpu(int);
-		if (prot->inuse_ptr == NULL)
-			return -ENOBUFS;
-
-		prot->inuse_getval = inuse_get;
-		prot->inuse_add = inuse_add;
-	}
-	return 0;
-}
-
-static void inuse_fini(struct proto *prot)
-{
-	if (prot->inuse_ptr != NULL) {
-		free_percpu(prot->inuse_ptr);
-		prot->inuse_ptr = NULL;
-		prot->inuse_getval = NULL;
-		prot->inuse_add = NULL;
-	}
-}
-#else
-static inline int inuse_init(struct proto *prot)
-{
-	return 0;
-}
-
-static inline void inuse_fini(struct proto *prot)
-{
-}
-#endif
-
 int proto_register(struct proto *prot, int alloc_slab)
 {
 	char *request_sock_slab_name = NULL;
 	char *timewait_sock_slab_name;
 
-	if (inuse_init(prot))
+	if (pcounter_alloc(&prot->inuse) != 0) {
+		printk(KERN_CRIT "%s: Can't alloc inuse counters!\n", prot->name);
 		goto out;
+	}
 
 	if (alloc_slab) {
 		prot->slab = kmem_cache_create(prot->name, prot->obj_size, 0,
@@ -1930,7 +1880,7 @@ out_free_sock_slab:
 	kmem_cache_destroy(prot->slab);
 	prot->slab = NULL;
 out_free_inuse:
-	inuse_fini(prot);
+	pcounter_free(&prot->inuse);
 out:
 	return -ENOBUFS;
 }
@@ -1943,7 +1893,8 @@ void proto_unregister(struct proto *prot)
 	list_del(&prot->node);
 	write_unlock(&proto_list_lock);
 
-	inuse_fini(prot);
+	pcounter_free(&prot->inuse);
+
 	if (prot->slab != NULL) {
 		kmem_cache_destroy(prot->slab);
 		prot->slab = NULL;
-- 
cgit v1.2.3


From 6c08b2cf4843788e66a5e69b5512538e686ae3e3 Mon Sep 17 00:00:00 2001
From: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Date: Tue, 20 Nov 2007 17:33:17 -0200
Subject: [CCID3]: Revert use of MSS instead of s

This updates the CCID3 code with regard to two instances of using `MSS' in place of `s':

 1. The RFC3390-based initial rate: both rfc3448bis as well as the Faster Restart
    draft now consistently use `s' instead of MSS.

 2. Now agrees with section 4.2 of rfc3448bis: "If the sender is ready to send data when
    it does not yet have a round trip sample, the value of X is set to s bytes per
    second, for segment size s [...]"

Signed-off-by: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Signed-off-by: Ian McDonald <ian.mcdonald@jandi.co.nz>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/dccp/ccids/ccid3.c | 21 ++++++++++++---------
 1 file changed, 12 insertions(+), 9 deletions(-)

(limited to 'net')

diff --git a/net/dccp/ccids/ccid3.c b/net/dccp/ccids/ccid3.c
index f56aaecb56b..5bf110b28ab 100644
--- a/net/dccp/ccids/ccid3.c
+++ b/net/dccp/ccids/ccid3.c
@@ -83,18 +83,21 @@ static void ccid3_hc_tx_set_state(struct sock *sk,
 }
 
 /*
- * Compute the initial sending rate X_init according to RFC 3390:
- *	w_init   =    min(4 * MSS, max(2 * MSS, 4380 bytes))
- *	X_init   =    w_init / RTT
+ * Compute the initial sending rate X_init in the manner of RFC 3390:
+ *
+ *	X_init  =  min(4 * s, max(2 * s, 4380 bytes)) / RTT
+ *
+ * Note that RFC 3390 uses MSS, RFC 4342 refers to RFC 3390, and rfc3448bis
+ * (rev-02) clarifies the use of RFC 3390 with regard to the above formula.
  * For consistency with other parts of the code, X_init is scaled by 2^6.
  */
 static inline u64 rfc3390_initial_rate(struct sock *sk)
 {
-	const struct dccp_sock *dp = dccp_sk(sk);
-	const __u32 w_init = min(4 * dp->dccps_mss_cache,
-				 max(2 * dp->dccps_mss_cache, 4380U));
+	const struct ccid3_hc_tx_sock *hctx = ccid3_hc_tx_sk(sk);
+	const __u32 w_init = min_t(__u32, 4 * hctx->ccid3hctx_s,
+				   max_t(__u32, 2 * hctx->ccid3hctx_s, 4380));
 
-	return scaled_div(w_init << 6, ccid3_hc_tx_sk(sk)->ccid3hctx_rtt);
+	return scaled_div(w_init << 6, hctx->ccid3hctx_rtt);
 }
 
 /*
@@ -336,8 +339,8 @@ static int ccid3_hc_tx_send_packet(struct sock *sk, struct sk_buff *skb)
 			hctx->ccid3hctx_x    = rfc3390_initial_rate(sk);
 			hctx->ccid3hctx_t_ld = now;
 		} else {
-			/* Sender does not have RTT sample: X = MSS/second */
-			hctx->ccid3hctx_x = dp->dccps_mss_cache;
+			/* Sender does not have RTT sample: X_pps = 1 pkt/sec */
+			hctx->ccid3hctx_x = hctx->ccid3hctx_s;
 			hctx->ccid3hctx_x <<= 6;
 		}
 		ccid3_update_send_interval(hctx);
-- 
cgit v1.2.3


From eb279b79c46be767ecffadaa8ed6be3e3555e93d Mon Sep 17 00:00:00 2001
From: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Date: Tue, 20 Nov 2007 18:00:39 -0200
Subject: [CCID3]: Ignore trivial amounts of elapsed time

This patch fixes a previously undiscovered bug; the problem is in computing
the elapsed time as the time between `receiving' the packet (i.e. skb enters
CCID module) and sending feedback:

     - there is no layer-processing, queueing, or delay involved,
     - hence the elapsed time is in the order of 1 function call
     - this is in the dimension of maximally 50..100usec
     - which renders the use of elapsed time almost entirely useless.

The fix is simply to ignore such trivial amounts of elapsed time.

As a further advantage, the now useless elapsed_time field can be removed from
the socket, which reduces the socket structure by another four bytes.

Signed-off-by: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Signed-off-by: Ian McDonald <ian.mcdonald@jandi.co.nz>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/dccp/ccids/ccid3.c | 10 +---------
 net/dccp/ccids/ccid3.h |  2 --
 2 files changed, 1 insertion(+), 11 deletions(-)

(limited to 'net')

diff --git a/net/dccp/ccids/ccid3.c b/net/dccp/ccids/ccid3.c
index 5bf110b28ab..c025236ce49 100644
--- a/net/dccp/ccids/ccid3.c
+++ b/net/dccp/ccids/ccid3.c
@@ -744,11 +744,6 @@ static void ccid3_hc_rx_send_feedback(struct sock *sk)
 	hcrx->ccid3hcrx_ccval_last_counter   = packet->dccphrx_ccval;
 	hcrx->ccid3hcrx_bytes_recv	     = 0;
 
-	/* Elapsed time information [RFC 4340, 13.2] in units of 10 * usecs */
-	delta = ktime_us_delta(now, packet->dccphrx_tstamp);
-	DCCP_BUG_ON(delta < 0);
-	hcrx->ccid3hcrx_elapsed_time = delta / 10;
-
 	if (hcrx->ccid3hcrx_p == 0)
 		hcrx->ccid3hcrx_pinv = ~0U;	/* see RFC 4342, 8.5 */
 	else if (hcrx->ccid3hcrx_p > 1000000) {
@@ -778,10 +773,7 @@ static int ccid3_hc_rx_insert_options(struct sock *sk, struct sk_buff *skb)
 	x_recv = htonl(hcrx->ccid3hcrx_x_recv);
 	pinv   = htonl(hcrx->ccid3hcrx_pinv);
 
-	if ((hcrx->ccid3hcrx_elapsed_time != 0 &&
-	     dccp_insert_option_elapsed_time(sk, skb,
-					     hcrx->ccid3hcrx_elapsed_time)) ||
-	    dccp_insert_option_timestamp(sk, skb) ||
+	if (dccp_insert_option_timestamp(sk, skb) ||
 	    dccp_insert_option(sk, skb, TFRC_OPT_LOSS_EVENT_RATE,
 			       &pinv, sizeof(pinv)) ||
 	    dccp_insert_option(sk, skb, TFRC_OPT_RECEIVE_RATE,
diff --git a/net/dccp/ccids/ccid3.h b/net/dccp/ccids/ccid3.h
index 0cdc982cfe4..83467c34ed2 100644
--- a/net/dccp/ccids/ccid3.h
+++ b/net/dccp/ccids/ccid3.h
@@ -147,7 +147,6 @@ enum ccid3_hc_rx_states {
  *  @ccid3hcrx_li_hist  -  Loss Interval History
  *  @ccid3hcrx_s  -  Received packet size in bytes
  *  @ccid3hcrx_pinv  -  Inverse of Loss Event Rate (RFC 4342, sec. 8.5)
- *  @ccid3hcrx_elapsed_time  -  Time since packet reception
  */
 struct ccid3_hc_rx_sock {
 	struct tfrc_rx_info		ccid3hcrx_tfrc;
@@ -165,7 +164,6 @@ struct ccid3_hc_rx_sock {
 	struct list_head		ccid3hcrx_li_hist;
 	u16				ccid3hcrx_s;
 	u32				ccid3hcrx_pinv;
-	u32				ccid3hcrx_elapsed_time;
 };
 
 static inline struct ccid3_hc_rx_sock *ccid3_hc_rx_sk(const struct sock *sk)
-- 
cgit v1.2.3


From a5358fdc9c52e44d79dcd144375e089e166508d7 Mon Sep 17 00:00:00 2001
From: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Date: Tue, 20 Nov 2007 18:01:59 -0200
Subject: [CCID3]: Accurately determine idle & application-limited periods

This fixes/updates the handling of idle and application-limited periods in CCID3,
which currently is broken: there is no detection as to how long a sender has been
idle - there is only one flag which is toggled in between function calls.

Being obsolete now, the `idle' flag is removed.

Signed-off-by: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Signed-off-by: Ian McDonald <ian.mcdonald@jandi.co.nz>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/dccp/ccids/ccid3.c | 18 ++++++++++--------
 net/dccp/ccids/ccid3.h |  2 --
 2 files changed, 10 insertions(+), 10 deletions(-)

(limited to 'net')

diff --git a/net/dccp/ccids/ccid3.c b/net/dccp/ccids/ccid3.c
index c025236ce49..94a32286384 100644
--- a/net/dccp/ccids/ccid3.c
+++ b/net/dccp/ccids/ccid3.c
@@ -119,6 +119,13 @@ static inline void ccid3_update_send_interval(struct ccid3_hc_tx_sock *hctx)
 
 }
 
+static u32 ccid3_hc_tx_idle_rtt(struct ccid3_hc_tx_sock *hctx, ktime_t now)
+{
+	u32 delta = ktime_us_delta(now, hctx->ccid3hctx_t_last_win_count);
+
+	return delta / hctx->ccid3hctx_rtt;
+}
+
 /**
  * ccid3_hc_tx_update_x  -  Update allowed sending rate X
  * @stamp: most recent time if available - can be left NULL.
@@ -139,10 +146,11 @@ static void ccid3_hc_tx_update_x(struct sock *sk, ktime_t *stamp)
 
 	/*
 	 * Handle IDLE periods: do not reduce below RFC3390 initial sending rate
-	 * when idling [RFC 4342, 5.1]. See also draft-ietf-dccp-rfc3448bis.
+	 * when idling [RFC 4342, 5.1]. Definition of idling is from rfc3448bis:
+	 * a sender is idle if it has not sent anything over a 2-RTT-period.
 	 * For consistency with X and X_recv, min_rate is also scaled by 2^6.
 	 */
-	if (unlikely(hctx->ccid3hctx_idle)) {
+	if (ccid3_hc_tx_idle_rtt(hctx, now) >= 2) {
 		min_rate = rfc3390_initial_rate(sk);
 		min_rate = max(min_rate, 2 * hctx->ccid3hctx_x_recv);
 	}
@@ -228,8 +236,6 @@ static void ccid3_hc_tx_no_feedback_timer(unsigned long data)
 	ccid3_pr_debug("%s(%p, state=%s) - entry \n", dccp_role(sk), sk,
 		       ccid3_tx_state_name(hctx->ccid3hctx_state));
 
-	hctx->ccid3hctx_idle = 1;
-
 	switch (hctx->ccid3hctx_state) {
 	case TFRC_SSTATE_NO_FBACK:
 		/* RFC 3448, 4.4: Halve send rate directly */
@@ -372,7 +378,6 @@ static int ccid3_hc_tx_send_packet(struct sock *sk, struct sk_buff *skb)
 	/* prepare to send now (add options etc.) */
 	dp->dccps_hc_tx_insert_options = 1;
 	DCCP_SKB_CB(skb)->dccpd_ccval = hctx->ccid3hctx_last_win_count;
-	hctx->ccid3hctx_idle = 0;
 
 	/* set the nominal send time for the next following packet */
 	hctx->ccid3hctx_t_nom = ktime_add_us(hctx->ccid3hctx_t_nom,
@@ -531,9 +536,6 @@ static void ccid3_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb)
 
 		sk_reset_timer(sk, &hctx->ccid3hctx_no_feedback_timer,
 				   jiffies + usecs_to_jiffies(t_nfb));
-
-		/* set idle flag */
-		hctx->ccid3hctx_idle = 1;
 		break;
 	case TFRC_SSTATE_NO_SENT:	/* fall through */
 	case TFRC_SSTATE_TERM:		/* ignore feedback when closing */
diff --git a/net/dccp/ccids/ccid3.h b/net/dccp/ccids/ccid3.h
index 83467c34ed2..36eca34228f 100644
--- a/net/dccp/ccids/ccid3.h
+++ b/net/dccp/ccids/ccid3.h
@@ -88,7 +88,6 @@ enum ccid3_hc_tx_states {
  * @ccid3hctx_t_last_win_count - Timestamp of earliest packet
  *				 with last_win_count value sent
  * @ccid3hctx_no_feedback_timer - Handle to no feedback timer
- * @ccid3hctx_idle - Flag indicating that sender is idling
  * @ccid3hctx_t_ld - Time last doubled during slow start
  * @ccid3hctx_t_nom - Nominal send time of next packet
  * @ccid3hctx_delta - Send timer delta (RFC 3448, 4.6) in usecs
@@ -107,7 +106,6 @@ struct ccid3_hc_tx_sock {
 	u16				ccid3hctx_s;
 	enum ccid3_hc_tx_states		ccid3hctx_state:8;
 	u8				ccid3hctx_last_win_count;
-	u8				ccid3hctx_idle;
 	ktime_t				ccid3hctx_t_last_win_count;
 	struct timer_list		ccid3hctx_no_feedback_timer;
 	ktime_t				ccid3hctx_t_ld;
-- 
cgit v1.2.3


From c3ada46a009001e144b29736880962f24ee2afdf Mon Sep 17 00:00:00 2001
From: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Date: Tue, 20 Nov 2007 18:09:59 -0200
Subject: [CCID3]: Inline for moving average

The moving average computation occurs so frequently in the CCID 3 code that
it merits an inline function  of its own. This is uses a suggestion by
Arnaldo as per http://www.mail-archive.com/dccp@vger.kernel.org/msg01662.html

Signed-off-by: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Signed-off-by: Ian McDonald <ian.mcdonald@jandi.co.nz>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/dccp/ccids/ccid3.c    | 25 +++++--------------------
 net/dccp/ccids/lib/tfrc.h |  9 +++++++++
 2 files changed, 14 insertions(+), 20 deletions(-)

(limited to 'net')

diff --git a/net/dccp/ccids/ccid3.c b/net/dccp/ccids/ccid3.c
index 94a32286384..42893b1dfa0 100644
--- a/net/dccp/ccids/ccid3.c
+++ b/net/dccp/ccids/ccid3.c
@@ -192,7 +192,7 @@ static inline void ccid3_hc_tx_update_s(struct ccid3_hc_tx_sock *hctx, int len)
 {
 	const u16 old_s = hctx->ccid3hctx_s;
 
-	hctx->ccid3hctx_s = old_s == 0 ? len : (9 * old_s + len) / 10;
+	hctx->ccid3hctx_s = tfrc_ewma(hctx->ccid3hctx_s, len, 9);
 
 	if (hctx->ccid3hctx_s != old_s)
 		ccid3_update_send_interval(hctx);
@@ -449,25 +449,15 @@ static void ccid3_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb)
 
 		now = ktime_get_real();
 		/*
-		 * Calculate new round trip sample as per [RFC 3448, 4.3] by
-		 *	R_sample  =  (now - t_recvdata) - t_elapsed
+		 * Calculate new RTT sample and update moving average
 		 */
 		r_sample = dccp_sample_rtt(sk, ktime_us_delta(now, packet->dccphtx_tstamp));
+		hctx->ccid3hctx_rtt = tfrc_ewma(hctx->ccid3hctx_rtt, r_sample, 9);
 
-		/*
-		 * Update RTT estimate by
-		 * If (No feedback recv)
-		 *    R = R_sample;
-		 * Else
-		 *    R = q * R + (1 - q) * R_sample;
-		 *
-		 * q is a constant, RFC 3448 recomments 0.9
-		 */
 		if (hctx->ccid3hctx_state == TFRC_SSTATE_NO_FBACK) {
 			/*
 			 * Larger Initial Windows [RFC 4342, sec. 5]
 			 */
-			hctx->ccid3hctx_rtt  = r_sample;
 			hctx->ccid3hctx_x    = rfc3390_initial_rate(sk);
 			hctx->ccid3hctx_t_ld = now;
 
@@ -481,8 +471,6 @@ static void ccid3_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb)
 
 			ccid3_hc_tx_set_state(sk, TFRC_SSTATE_FBACK);
 		} else {
-			hctx->ccid3hctx_rtt = (9 * hctx->ccid3hctx_rtt +
-						   r_sample) / 10;
 
 			/* Update sending rate (step 4 of [RFC 3448, 4.3]) */
 			if (hctx->ccid3hctx_p > 0)
@@ -700,11 +688,8 @@ static void ccid3_hc_rx_set_state(struct sock *sk,
 
 static inline void ccid3_hc_rx_update_s(struct ccid3_hc_rx_sock *hcrx, int len)
 {
-	if (unlikely(len == 0))	/* don't update on empty packets (e.g. ACKs) */
-		ccid3_pr_debug("Packet payload length is 0 - not updating\n");
-	else
-		hcrx->ccid3hcrx_s = hcrx->ccid3hcrx_s == 0 ? len :
-				    (9 * hcrx->ccid3hcrx_s + len) / 10;
+	if (likely(len > 0))	/* don't update on empty packets (e.g. ACKs) */
+		hcrx->ccid3hcrx_s = tfrc_ewma(hcrx->ccid3hcrx_s, len, 9);
 }
 
 static void ccid3_hc_rx_send_feedback(struct sock *sk)
diff --git a/net/dccp/ccids/lib/tfrc.h b/net/dccp/ccids/lib/tfrc.h
index faf5f7e219e..5a0ba86df18 100644
--- a/net/dccp/ccids/lib/tfrc.h
+++ b/net/dccp/ccids/lib/tfrc.h
@@ -37,6 +37,15 @@ static inline u32 scaled_div32(u64 a, u32 b)
 	return result;
 }
 
+/**
+ * tfrc_ewma  -  Exponentially weighted moving average
+ * @weight: Weight to be used as damping factor, in units of 1/10
+ */
+static inline u32 tfrc_ewma(const u32 avg, const u32 newval, const u8 weight)
+{
+	return avg ? (weight * avg + (10 - weight) * newval) / 10 : newval;
+}
+
 extern u32 tfrc_calc_x(u16 s, u32 R, u32 p);
 extern u32 tfrc_calc_x_reverse_lookup(u32 fvalue);
 
-- 
cgit v1.2.3


From 8e8c71f1ab0ca1c4e74efad14533b991524dcb6c Mon Sep 17 00:00:00 2001
From: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Date: Wed, 21 Nov 2007 09:56:48 -0200
Subject: [DCCP]: Honour and make use of shutdown option set by user

This extends the DCCP socket API by honouring any shutdown(2) option set by the user.
The behaviour is, as much as possible, made consistent with the API for TCP's shutdown.

This patch exploits the information provided by the user via the socket API to reduce
processing costs:
 * if the read end is closed (SHUT_RD), it is not necessary to deliver to input CCID;
 * if the write end is closed (SHUT_WR), the same idea applies, but with a difference -
   as long as the TX queue has not been drained, we need to receive feedback to keep
   congestion-control rates up to date. Hence SHUT_WR is honoured only after the last
   packet (under congestion control) has been sent;
 * although SHUT_RDWR seems nonsensical, it is nevertheless supported in the same manner
   as for TCP (and agrees with test for SHUTDOWN_MASK in dccp_poll() in net/dccp/proto.c).

Furthermore, most of the code already honours the sk_shutdown flags (dccp_recvmsg() for
instance sets the read length to 0 if SHUT_RD had been called); CCID handling is now added
to this by the present patch.

There will also no longer be any delivery when the socket is in the final stages, i.e. when
one of dccp_close(), dccp_fin(), or dccp_done() has been called - which is fine since at
that stage the connection is its final stages.

Motivation and background are on http://www.erg.abdn.ac.uk/users/gerrit/dccp/notes/shutdown

A FIXME has been added to notify the other end if SHUT_RD has been set (RFC 4340, 11.7).

Note: There is a comment in inet_shutdown() in net/ipv4/af_inet.c which asks to "make
      sure the socket is a TCP socket". This should probably be extended to mean
      `TCP or DCCP socket' (the code is also used by UDP and raw sockets).

Signed-off-by: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Signed-off-by: Ian McDonald <ian.mcdonald@jandi.co.nz>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/dccp/input.c | 27 ++++++++++++++++++++-------
 net/dccp/proto.c |  2 +-
 2 files changed, 21 insertions(+), 8 deletions(-)

(limited to 'net')

diff --git a/net/dccp/input.c b/net/dccp/input.c
index 1ce10106282..df0fb2c149a 100644
--- a/net/dccp/input.c
+++ b/net/dccp/input.c
@@ -103,6 +103,21 @@ static void dccp_event_ack_recv(struct sock *sk, struct sk_buff *skb)
 					    DCCP_SKB_CB(skb)->dccpd_ack_seq);
 }
 
+static void dccp_deliver_input_to_ccids(struct sock *sk, struct sk_buff *skb)
+{
+	const struct dccp_sock *dp = dccp_sk(sk);
+
+	/* Don't deliver to RX CCID when node has shut down read end. */
+	if (!(sk->sk_shutdown & RCV_SHUTDOWN))
+		ccid_hc_rx_packet_recv(dp->dccps_hc_rx_ccid, sk, skb);
+	/*
+	 * Until the TX queue has been drained, we can not honour SHUT_WR, since
+	 * we need received feedback as input to adjust congestion control.
+	 */
+	if (sk->sk_write_queue.qlen > 0 || !(sk->sk_shutdown & SEND_SHUTDOWN))
+		ccid_hc_tx_packet_recv(dp->dccps_hc_tx_ccid, sk, skb);
+}
+
 static int dccp_check_seqno(struct sock *sk, struct sk_buff *skb)
 {
 	const struct dccp_hdr *dh = dccp_hdr(skb);
@@ -209,8 +224,9 @@ static int __dccp_rcv_established(struct sock *sk, struct sk_buff *skb,
 	case DCCP_PKT_DATAACK:
 	case DCCP_PKT_DATA:
 		/*
-		 * FIXME: check if sk_receive_queue is full, schedule DATA_DROPPED
-		 * option if it is.
+		 * FIXME: schedule DATA_DROPPED (RFC 4340, 11.7.2) if and when
+		 * - sk_shutdown == RCV_SHUTDOWN, use Code 1, "Not Listening"
+		 * - sk_receive_queue is full, use Code 2, "Receive Buffer"
 		 */
 		__skb_pull(skb, dh->dccph_doff * 4);
 		__skb_queue_tail(&sk->sk_receive_queue, skb);
@@ -300,9 +316,7 @@ int dccp_rcv_established(struct sock *sk, struct sk_buff *skb,
 			    DCCP_SKB_CB(skb)->dccpd_seq,
 			    DCCP_ACKVEC_STATE_RECEIVED))
 		goto discard;
-
-	ccid_hc_rx_packet_recv(dp->dccps_hc_rx_ccid, sk, skb);
-	ccid_hc_tx_packet_recv(dp->dccps_hc_tx_ccid, sk, skb);
+	dccp_deliver_input_to_ccids(sk, skb);
 
 	return __dccp_rcv_established(sk, skb, dh, len);
 discard:
@@ -543,8 +557,7 @@ int dccp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
 				    DCCP_ACKVEC_STATE_RECEIVED))
 			goto discard;
 
-		ccid_hc_rx_packet_recv(dp->dccps_hc_rx_ccid, sk, skb);
-		ccid_hc_tx_packet_recv(dp->dccps_hc_tx_ccid, sk, skb);
+		dccp_deliver_input_to_ccids(sk, skb);
 	}
 
 	/*
diff --git a/net/dccp/proto.c b/net/dccp/proto.c
index 7a3bea9c28c..0aec7359212 100644
--- a/net/dccp/proto.c
+++ b/net/dccp/proto.c
@@ -981,7 +981,7 @@ EXPORT_SYMBOL_GPL(dccp_close);
 
 void dccp_shutdown(struct sock *sk, int how)
 {
-	dccp_pr_debug("entry\n");
+	dccp_pr_debug("called shutdown(%x)\n", how);
 }
 
 EXPORT_SYMBOL_GPL(dccp_shutdown);
-- 
cgit v1.2.3


From e333b3edc489151afda2a4f6c798842c64cb67a4 Mon Sep 17 00:00:00 2001
From: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Date: Wed, 21 Nov 2007 10:09:56 -0200
Subject: [DCCP]: Promote CCID2 as default CCID

This patch addresses the following problems:

 1. DCCP relies for its proper functioning on having at least one CCID module
    enabled (as in TCP plugable congestion control). Currently it is possible to
    disable both CCIDs and thus leave the DCCP module in a compiled, but entirely
    non-functional state: no sockets can be created when no CCID is available.
    Furthermore, the protocol is (again like TCP) not intended to be used without
    CCIDs. Last, a non-empty CCID list is needed for doing CCID feature negotiation.

 2. Internally the default CCID that is advertised by the Linux host is set to CCID2
    (DCCPF_INITIAL_CCID in include/linux/dccp.h). Disabling CCID2 in the Kconfig
    menu without changing the defaults leads to a failure `module not found' when
    trying to load the dccp module (which internally tries to load the default CCID).

 3. The specification (RFC 4340, sec. 10) treats CCID2 somewhat like a
    `minimum common denominator'; the specification says that:

    * "New connections start with CCID 2 for both endpoints"

    * "A DCCP implementation intended for general use, such as an implementation in a
       general-purpose operating system kernel, SHOULD implement at least CCID 2.
       The intent is to make CCID 2 broadly available for interoperability [...]"

    Providing CCID2 as minimum-required CCID (like Reno/Cubic in TCP) thus seems reasonable.

Hence this patch automatically selects CCID2 when DCCP is enabled. Documentation also added.

Discussions with Ian McDonald on this subject are gratefully acknowledged.

Signed-off-by: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Signed-off-by: Ian McDonald <ian.mcdonald@jandi.co.nz>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/dccp/Kconfig       |  1 +
 net/dccp/ccids/Kconfig | 13 ++-----------
 2 files changed, 3 insertions(+), 11 deletions(-)

(limited to 'net')

diff --git a/net/dccp/Kconfig b/net/dccp/Kconfig
index 0549e4719b1..7aa2a7acc7e 100644
--- a/net/dccp/Kconfig
+++ b/net/dccp/Kconfig
@@ -1,6 +1,7 @@
 menuconfig IP_DCCP
 	tristate "The DCCP Protocol (EXPERIMENTAL)"
 	depends on INET && EXPERIMENTAL
+	select IP_DCCP_CCID2
 	---help---
 	  Datagram Congestion Control Protocol (RFC 4340)
 
diff --git a/net/dccp/ccids/Kconfig b/net/dccp/ccids/Kconfig
index 80f46988769..f37af17556b 100644
--- a/net/dccp/ccids/Kconfig
+++ b/net/dccp/ccids/Kconfig
@@ -20,18 +20,9 @@ config IP_DCCP_CCID2
 	  to the user.  For example, a hypothetical application that
 	  transferred files over DCCP, using application-level retransmissions
 	  for lost packets, would prefer CCID 2 to CCID 3.  On-line games may
-	  also prefer CCID 2.
+	  also prefer CCID 2.  See RFC 4341 for further details.
 
-	  CCID 2 is further described in RFC 4341,
-	  http://www.ietf.org/rfc/rfc4341.txt
-
-	  This text was extracted from RFC 4340 (sec. 10.1),
-	  http://www.ietf.org/rfc/rfc4340.txt
-
-	  To compile this CCID as a module, choose M here: the module will be
-	  called dccp_ccid2.
-
-	  If in doubt, say M.
+	  CCID2 is the default CCID used by DCCP.
 
 config IP_DCCP_CCID2_DEBUG
 	  bool "CCID2 debugging messages"
-- 
cgit v1.2.3


From 6d57b43bf810cd66ccf252c04ba65c3e8e56cbb1 Mon Sep 17 00:00:00 2001
From: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Date: Wed, 21 Nov 2007 10:11:52 -0200
Subject: [DCCP]: Remove redundant dependency on IP_DCCP

This cleans up the consequences of an earlier patch which
introduced the `if IP_DCCP' clause into net/dccp/Kconfig.

The CCID Kconfig menu is sourced within this clause; as a
consequence, all tests of type `depends on IP_DCCP' are now
redundant.

Signed-off-by: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Signed-off-by: Ian McDonald <ian.mcdonald@jandi.co.nz>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/dccp/ccids/Kconfig | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/dccp/ccids/Kconfig b/net/dccp/ccids/Kconfig
index f37af17556b..3d7d867a7c4 100644
--- a/net/dccp/ccids/Kconfig
+++ b/net/dccp/ccids/Kconfig
@@ -1,9 +1,8 @@
 menu "DCCP CCIDs Configuration (EXPERIMENTAL)"
-	depends on IP_DCCP && EXPERIMENTAL
+	depends on EXPERIMENTAL
 
 config IP_DCCP_CCID2
 	tristate "CCID2 (TCP-Like) (EXPERIMENTAL)"
-	depends on IP_DCCP
 	def_tristate IP_DCCP
 	select IP_DCCP_ACKVEC
 	---help---
@@ -38,7 +37,6 @@ config IP_DCCP_CCID2_DEBUG
 
 config IP_DCCP_CCID3
 	tristate "CCID3 (TCP-Friendly) (EXPERIMENTAL)"
-	depends on IP_DCCP
 	def_tristate IP_DCCP
 	---help---
 	  CCID 3 denotes TCP-Friendly Rate Control (TFRC), an equation-based
-- 
cgit v1.2.3


From c86ab2b6a5deec2223a606b90c001bedda9d950b Mon Sep 17 00:00:00 2001
From: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Date: Wed, 21 Nov 2007 10:13:53 -0200
Subject: [DCCP]: Ignore Ack Vectors / Elapsed Time on DCCP-Request also

Small update with regard to RFC 4340 (references added as documentation):
on Requests, Ack Vectors / Elapsed Time should be ignored.
Length handling of Elapsed Time also simplified.

Signed-off-by: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Signed-off-by: Ian McDonald <ian.mcdonald@jandi.co.nz>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/dccp/options.c | 13 ++++++-------
 1 file changed, 6 insertions(+), 7 deletions(-)

(limited to 'net')

diff --git a/net/dccp/options.c b/net/dccp/options.c
index d286cffe2c4..523250b45ea 100644
--- a/net/dccp/options.c
+++ b/net/dccp/options.c
@@ -136,7 +136,7 @@ int dccp_parse_options(struct sock *sk, struct sk_buff *skb)
 			break;
 		case DCCPO_ACK_VECTOR_0:
 		case DCCPO_ACK_VECTOR_1:
-			if (pkt_type == DCCP_PKT_DATA)
+			if (dccp_packet_without_ack(skb))   /* RFC 4340, 11.4 */
 				break;
 
 			if (dccp_msk(sk)->dccpms_send_ack_vector &&
@@ -194,18 +194,17 @@ int dccp_parse_options(struct sock *sk, struct sk_buff *skb)
 				opt_recv->dccpor_elapsed_time = elapsed_time;
 			break;
 		case DCCPO_ELAPSED_TIME:
-			if (len != 2 && len != 4)
-				goto out_invalid_option;
-
-			if (pkt_type == DCCP_PKT_DATA)
-				continue;
+			if (dccp_packet_without_ack(skb))   /* RFC 4340, 13.2 */
+				break;
 
 			if (len == 2) {
 				__be16 opt_val2 = get_unaligned((__be16 *)value);
 				elapsed_time = ntohs(opt_val2);
-			} else {
+			} else if (len == 4) {
 				opt_val = get_unaligned((__be32 *)value);
 				elapsed_time = ntohl(opt_val);
+			} else {
+				goto out_invalid_option;
 			}
 
 			if (elapsed_time > opt_recv->dccpor_elapsed_time)
-- 
cgit v1.2.3


From a53eb3feb2718bf906e81be89fae823b9b6157b6 Mon Sep 17 00:00:00 2001
From: Pavel Emelyanov <xemul@openvz.org>
Date: Fri, 23 Nov 2007 20:30:01 +0800
Subject: [UNIX] Move the unix sock iterators in to proper place

The first_unix_socket() and next_unix_sockets() are now used
in proc file and in forall_unix_socets macro only.

The forall_unix_sockets is not used in this file at all so
remove it. After this move the helpers to where they really
belong, i.e. closer to proc code under the #ifdef CONFIG_PROC_FS
option.

Signed-off-by: Pavel Emelyanov <xemul@openvz.org>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/unix/af_unix.c | 49 +++++++++++++++++++++++--------------------------
 1 file changed, 23 insertions(+), 26 deletions(-)

(limited to 'net')

diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index eacddb21a9b..0f1ecbf86d0 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -127,32 +127,6 @@ static atomic_t unix_nr_socks = ATOMIC_INIT(0);
 
 #define UNIX_ABSTRACT(sk)	(unix_sk(sk)->addr->hash != UNIX_HASH_SIZE)
 
-static struct sock *first_unix_socket(int *i)
-{
-	for (*i = 0; *i <= UNIX_HASH_SIZE; (*i)++) {
-		if (!hlist_empty(&unix_socket_table[*i]))
-			return __sk_head(&unix_socket_table[*i]);
-	}
-	return NULL;
-}
-
-static struct sock *next_unix_socket(int *i, struct sock *s)
-{
-	struct sock *next = sk_next(s);
-	/* More in this chain? */
-	if (next)
-		return next;
-	/* Look for next non-empty chain. */
-	for ((*i)++; *i <= UNIX_HASH_SIZE; (*i)++) {
-		if (!hlist_empty(&unix_socket_table[*i]))
-			return __sk_head(&unix_socket_table[*i]);
-	}
-	return NULL;
-}
-
-#define forall_unix_sockets(i, s) \
-	for (s = first_unix_socket(&(i)); s; s = next_unix_socket(&(i),(s)))
-
 #ifdef CONFIG_SECURITY_NETWORK
 static void unix_get_secdata(struct scm_cookie *scm, struct sk_buff *skb)
 {
@@ -2017,6 +1991,29 @@ static unsigned int unix_poll(struct file * file, struct socket *sock, poll_tabl
 
 
 #ifdef CONFIG_PROC_FS
+static struct sock *first_unix_socket(int *i)
+{
+	for (*i = 0; *i <= UNIX_HASH_SIZE; (*i)++) {
+		if (!hlist_empty(&unix_socket_table[*i]))
+			return __sk_head(&unix_socket_table[*i]);
+	}
+	return NULL;
+}
+
+static struct sock *next_unix_socket(int *i, struct sock *s)
+{
+	struct sock *next = sk_next(s);
+	/* More in this chain? */
+	if (next)
+		return next;
+	/* Look for next non-empty chain. */
+	for ((*i)++; *i <= UNIX_HASH_SIZE; (*i)++) {
+		if (!hlist_empty(&unix_socket_table[*i]))
+			return __sk_head(&unix_socket_table[*i]);
+	}
+	return NULL;
+}
+
 struct unix_iter_state {
 	struct seq_net_private p;
 	int i;
-- 
cgit v1.2.3


From f1267347353b586e1a89a7d02c142be975bae699 Mon Sep 17 00:00:00 2001
From: Pavel Emelyanov <xemul@openvz.org>
Date: Fri, 23 Nov 2007 21:28:44 +0800
Subject: [IPV6]: Correct the comment concerning inetsw6 table

It seems that net/ipv6/af_inet6.c was copied from net/ipv4/af_inet.c,
but one comment was not fixed.

Signed-off-by: Pavel Emelyanov <xemul@openvz.org>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/af_inet6.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index 85178f71b21..64135e2a309 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -68,7 +68,7 @@ MODULE_LICENSE("GPL");
 
 int sysctl_ipv6_bindv6only __read_mostly;
 
-/* The inetsw table contains everything that inet_create needs to
+/* The inetsw6 table contains everything that inet6_create needs to
  * build a new socket.
  */
 static struct list_head inetsw6[SOCK_MAX];
-- 
cgit v1.2.3


From a47c51044a77124ce66cd8513bba6f4d7673e43d Mon Sep 17 00:00:00 2001
From: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Date: Sun, 30 Dec 2007 04:19:31 -0800
Subject: [ACKVEC]: Reduce length of identifiers

This is reduces the length of the struct ackvec/ackvec_record fields. It is
a purely text-based replacement:

	s#dccpavr_#avr_#g;
	s#dccpav_#av_#g;

and increases readability somewhat.

Signed-off-by: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/dccp/ackvec.c | 163 ++++++++++++++++++++++++++----------------------------
 net/dccp/ackvec.h |  62 ++++++++++-----------
 2 files changed, 108 insertions(+), 117 deletions(-)

(limited to 'net')

diff --git a/net/dccp/ackvec.c b/net/dccp/ackvec.c
index 83378f379f7..6de4bd195d2 100644
--- a/net/dccp/ackvec.c
+++ b/net/dccp/ackvec.c
@@ -30,7 +30,7 @@ static struct dccp_ackvec_record *dccp_ackvec_record_new(void)
 			kmem_cache_alloc(dccp_ackvec_record_slab, GFP_ATOMIC);
 
 	if (avr != NULL)
-		INIT_LIST_HEAD(&avr->dccpavr_node);
+		INIT_LIST_HEAD(&avr->avr_node);
 
 	return avr;
 }
@@ -40,7 +40,7 @@ static void dccp_ackvec_record_delete(struct dccp_ackvec_record *avr)
 	if (unlikely(avr == NULL))
 		return;
 	/* Check if deleting a linked record */
-	WARN_ON(!list_empty(&avr->dccpavr_node));
+	WARN_ON(!list_empty(&avr->avr_node));
 	kmem_cache_free(dccp_ackvec_record_slab, avr);
 }
 
@@ -52,16 +52,15 @@ static void dccp_ackvec_insert_avr(struct dccp_ackvec *av,
 	 * just add the AVR at the head of the list.
 	 * -sorbo.
 	 */
-	if (!list_empty(&av->dccpav_records)) {
+	if (!list_empty(&av->av_records)) {
 		const struct dccp_ackvec_record *head =
-					list_entry(av->dccpav_records.next,
+					list_entry(av->av_records.next,
 						   struct dccp_ackvec_record,
-						   dccpavr_node);
-		BUG_ON(before48(avr->dccpavr_ack_seqno,
-				head->dccpavr_ack_seqno));
+						   avr_node);
+		BUG_ON(before48(avr->avr_ack_seqno, head->avr_ack_seqno));
 	}
 
-	list_add(&avr->dccpavr_node, &av->dccpav_records);
+	list_add(&avr->avr_node, &av->av_records);
 }
 
 int dccp_insert_option_ackvec(struct sock *sk, struct sk_buff *skb)
@@ -69,9 +68,8 @@ int dccp_insert_option_ackvec(struct sock *sk, struct sk_buff *skb)
 	struct dccp_sock *dp = dccp_sk(sk);
 	struct dccp_ackvec *av = dp->dccps_hc_rx_ackvec;
 	/* Figure out how many options do we need to represent the ackvec */
-	const u16 nr_opts = DIV_ROUND_UP(av->dccpav_vec_len,
-					 DCCP_MAX_ACKVEC_OPT_LEN);
-	u16 len = av->dccpav_vec_len + 2 * nr_opts, i;
+	const u16 nr_opts = DIV_ROUND_UP(av->av_vec_len, DCCP_MAX_ACKVEC_OPT_LEN);
+	u16 len = av->av_vec_len + 2 * nr_opts, i;
 	u32 elapsed_time;
 	const unsigned char *tail, *from;
 	unsigned char *to;
@@ -81,7 +79,7 @@ int dccp_insert_option_ackvec(struct sock *sk, struct sk_buff *skb)
 	if (DCCP_SKB_CB(skb)->dccpd_opt_len + len > DCCP_MAX_OPT_LEN)
 		return -1;
 
-	delta = ktime_us_delta(ktime_get_real(), av->dccpav_time);
+	delta = ktime_us_delta(ktime_get_real(), av->av_time);
 	elapsed_time = delta / 10;
 
 	if (elapsed_time != 0 &&
@@ -95,9 +93,9 @@ int dccp_insert_option_ackvec(struct sock *sk, struct sk_buff *skb)
 	DCCP_SKB_CB(skb)->dccpd_opt_len += len;
 
 	to   = skb_push(skb, len);
-	len  = av->dccpav_vec_len;
-	from = av->dccpav_buf + av->dccpav_buf_head;
-	tail = av->dccpav_buf + DCCP_MAX_ACKVEC_LEN;
+	len  = av->av_vec_len;
+	from = av->av_buf + av->av_buf_head;
+	tail = av->av_buf + DCCP_MAX_ACKVEC_LEN;
 
 	for (i = 0; i < nr_opts; ++i) {
 		int copylen = len;
@@ -116,7 +114,7 @@ int dccp_insert_option_ackvec(struct sock *sk, struct sk_buff *skb)
 			to	+= tailsize;
 			len	-= tailsize;
 			copylen	-= tailsize;
-			from	= av->dccpav_buf;
+			from	= av->av_buf;
 		}
 
 		memcpy(to, from, copylen);
@@ -134,19 +132,19 @@ int dccp_insert_option_ackvec(struct sock *sk, struct sk_buff *skb)
 	 *	buf_head; ack_ackno will equal buf_ackno; and ack_nonce will
 	 *	equal buf_nonce.
 	 */
-	avr->dccpavr_ack_seqno = DCCP_SKB_CB(skb)->dccpd_seq;
-	avr->dccpavr_ack_ptr   = av->dccpav_buf_head;
-	avr->dccpavr_ack_ackno = av->dccpav_buf_ackno;
-	avr->dccpavr_ack_nonce = av->dccpav_buf_nonce;
-	avr->dccpavr_sent_len  = av->dccpav_vec_len;
+	avr->avr_ack_seqno = DCCP_SKB_CB(skb)->dccpd_seq;
+	avr->avr_ack_ptr   = av->av_buf_head;
+	avr->avr_ack_ackno = av->av_buf_ackno;
+	avr->avr_ack_nonce = av->av_buf_nonce;
+	avr->avr_sent_len  = av->av_vec_len;
 
 	dccp_ackvec_insert_avr(av, avr);
 
 	dccp_pr_debug("%s ACK Vector 0, len=%d, ack_seqno=%llu, "
 		      "ack_ackno=%llu\n",
-		      dccp_role(sk), avr->dccpavr_sent_len,
-		      (unsigned long long)avr->dccpavr_ack_seqno,
-		      (unsigned long long)avr->dccpavr_ack_ackno);
+		      dccp_role(sk), avr->avr_sent_len,
+		      (unsigned long long)avr->avr_ack_seqno,
+		      (unsigned long long)avr->avr_ack_ackno);
 	return 0;
 }
 
@@ -155,12 +153,12 @@ struct dccp_ackvec *dccp_ackvec_alloc(const gfp_t priority)
 	struct dccp_ackvec *av = kmem_cache_alloc(dccp_ackvec_slab, priority);
 
 	if (av != NULL) {
-		av->dccpav_buf_head	= DCCP_MAX_ACKVEC_LEN - 1;
-		av->dccpav_buf_ackno	= UINT48_MAX + 1;
-		av->dccpav_buf_nonce = av->dccpav_buf_nonce = 0;
-		av->dccpav_time	     = ktime_set(0, 0);
-		av->dccpav_vec_len	= 0;
-		INIT_LIST_HEAD(&av->dccpav_records);
+		av->av_buf_head	 = DCCP_MAX_ACKVEC_LEN - 1;
+		av->av_buf_ackno = UINT48_MAX + 1;
+		av->av_buf_nonce = 0;
+		av->av_time	 = ktime_set(0, 0);
+		av->av_vec_len	 = 0;
+		INIT_LIST_HEAD(&av->av_records);
 	}
 
 	return av;
@@ -171,12 +169,11 @@ void dccp_ackvec_free(struct dccp_ackvec *av)
 	if (unlikely(av == NULL))
 		return;
 
-	if (!list_empty(&av->dccpav_records)) {
+	if (!list_empty(&av->av_records)) {
 		struct dccp_ackvec_record *avr, *next;
 
-		list_for_each_entry_safe(avr, next, &av->dccpav_records,
-					 dccpavr_node) {
-			list_del_init(&avr->dccpavr_node);
+		list_for_each_entry_safe(avr, next, &av->av_records, avr_node) {
+			list_del_init(&avr->avr_node);
 			dccp_ackvec_record_delete(avr);
 		}
 	}
@@ -187,13 +184,13 @@ void dccp_ackvec_free(struct dccp_ackvec *av)
 static inline u8 dccp_ackvec_state(const struct dccp_ackvec *av,
 				   const u32 index)
 {
-	return av->dccpav_buf[index] & DCCP_ACKVEC_STATE_MASK;
+	return av->av_buf[index] & DCCP_ACKVEC_STATE_MASK;
 }
 
 static inline u8 dccp_ackvec_len(const struct dccp_ackvec *av,
 				 const u32 index)
 {
-	return av->dccpav_buf[index] & DCCP_ACKVEC_LEN_MASK;
+	return av->av_buf[index] & DCCP_ACKVEC_LEN_MASK;
 }
 
 /*
@@ -208,29 +205,29 @@ static inline int dccp_ackvec_set_buf_head_state(struct dccp_ackvec *av,
 	unsigned int gap;
 	long new_head;
 
-	if (av->dccpav_vec_len + packets > DCCP_MAX_ACKVEC_LEN)
+	if (av->av_vec_len + packets > DCCP_MAX_ACKVEC_LEN)
 		return -ENOBUFS;
 
 	gap	 = packets - 1;
-	new_head = av->dccpav_buf_head - packets;
+	new_head = av->av_buf_head - packets;
 
 	if (new_head < 0) {
 		if (gap > 0) {
-			memset(av->dccpav_buf, DCCP_ACKVEC_STATE_NOT_RECEIVED,
+			memset(av->av_buf, DCCP_ACKVEC_STATE_NOT_RECEIVED,
 			       gap + new_head + 1);
 			gap = -new_head;
 		}
 		new_head += DCCP_MAX_ACKVEC_LEN;
 	}
 
-	av->dccpav_buf_head = new_head;
+	av->av_buf_head = new_head;
 
 	if (gap > 0)
-		memset(av->dccpav_buf + av->dccpav_buf_head + 1,
+		memset(av->av_buf + av->av_buf_head + 1,
 		       DCCP_ACKVEC_STATE_NOT_RECEIVED, gap);
 
-	av->dccpav_buf[av->dccpav_buf_head] = state;
-	av->dccpav_vec_len += packets;
+	av->av_buf[av->av_buf_head] = state;
+	av->av_vec_len += packets;
 	return 0;
 }
 
@@ -243,7 +240,7 @@ int dccp_ackvec_add(struct dccp_ackvec *av, const struct sock *sk,
 	/*
 	 * Check at the right places if the buffer is full, if it is, tell the
 	 * caller to start dropping packets till the HC-Sender acks our ACK
-	 * vectors, when we will free up space in dccpav_buf.
+	 * vectors, when we will free up space in av_buf.
 	 *
 	 * We may well decide to do buffer compression, etc, but for now lets
 	 * just drop.
@@ -263,22 +260,20 @@ int dccp_ackvec_add(struct dccp_ackvec *av, const struct sock *sk,
 	 */
 
 	/* See if this is the first ackno being inserted */
-	if (av->dccpav_vec_len == 0) {
-		av->dccpav_buf[av->dccpav_buf_head] = state;
-		av->dccpav_vec_len = 1;
-	} else if (after48(ackno, av->dccpav_buf_ackno)) {
-		const u64 delta = dccp_delta_seqno(av->dccpav_buf_ackno,
-						   ackno);
+	if (av->av_vec_len == 0) {
+		av->av_buf[av->av_buf_head] = state;
+		av->av_vec_len = 1;
+	} else if (after48(ackno, av->av_buf_ackno)) {
+		const u64 delta = dccp_delta_seqno(av->av_buf_ackno, ackno);
 
 		/*
 		 * Look if the state of this packet is the same as the
 		 * previous ackno and if so if we can bump the head len.
 		 */
 		if (delta == 1 &&
-		    dccp_ackvec_state(av, av->dccpav_buf_head) == state &&
-		    (dccp_ackvec_len(av, av->dccpav_buf_head) <
-		     DCCP_ACKVEC_LEN_MASK))
-			av->dccpav_buf[av->dccpav_buf_head]++;
+		    dccp_ackvec_state(av, av->av_buf_head) == state &&
+		    dccp_ackvec_len(av, av->av_buf_head) < DCCP_ACKVEC_LEN_MASK)
+			av->av_buf[av->av_buf_head]++;
 		else if (dccp_ackvec_set_buf_head_state(av, delta, state))
 			return -ENOBUFS;
 	} else {
@@ -290,14 +285,14 @@ int dccp_ackvec_add(struct dccp_ackvec *av, const struct sock *sk,
 		 *	the byte corresponding to S. (Indexing structures
 		 *	could reduce the complexity of this scan.)
 		 */
-		u64 delta = dccp_delta_seqno(ackno, av->dccpav_buf_ackno);
-		u32 index = av->dccpav_buf_head;
+		u64 delta = dccp_delta_seqno(ackno, av->av_buf_ackno);
+		u32 index = av->av_buf_head;
 
 		while (1) {
 			const u8 len = dccp_ackvec_len(av, index);
 			const u8 state = dccp_ackvec_state(av, index);
 			/*
-			 * valid packets not yet in dccpav_buf have a reserved
+			 * valid packets not yet in av_buf have a reserved
 			 * entry, with a len equal to 0.
 			 */
 			if (state == DCCP_ACKVEC_STATE_NOT_RECEIVED &&
@@ -305,7 +300,7 @@ int dccp_ackvec_add(struct dccp_ackvec *av, const struct sock *sk,
 							 reserved seat! */
 				dccp_pr_debug("Found %llu reserved seat!\n",
 					      (unsigned long long)ackno);
-				av->dccpav_buf[index] = state;
+				av->av_buf[index] = state;
 				goto out;
 			}
 			/* len == 0 means one packet */
@@ -318,8 +313,8 @@ int dccp_ackvec_add(struct dccp_ackvec *av, const struct sock *sk,
 		}
 	}
 
-	av->dccpav_buf_ackno = ackno;
-	av->dccpav_time = ktime_get_real();
+	av->av_buf_ackno = ackno;
+	av->av_time = ktime_get_real();
 out:
 	return 0;
 
@@ -349,9 +344,9 @@ void dccp_ackvector_print(const u64 ackno, const unsigned char *vector, int len)
 
 void dccp_ackvec_print(const struct dccp_ackvec *av)
 {
-	dccp_ackvector_print(av->dccpav_buf_ackno,
-			     av->dccpav_buf + av->dccpav_buf_head,
-			     av->dccpav_vec_len);
+	dccp_ackvector_print(av->av_buf_ackno,
+			     av->av_buf + av->av_buf_head,
+			     av->av_vec_len);
 }
 #endif
 
@@ -361,17 +356,15 @@ static void dccp_ackvec_throw_record(struct dccp_ackvec *av,
 	struct dccp_ackvec_record *next;
 
 	/* sort out vector length */
-	if (av->dccpav_buf_head <= avr->dccpavr_ack_ptr)
-		av->dccpav_vec_len = avr->dccpavr_ack_ptr - av->dccpav_buf_head;
+	if (av->av_buf_head <= avr->avr_ack_ptr)
+		av->av_vec_len = avr->avr_ack_ptr - av->av_buf_head;
 	else
-		av->dccpav_vec_len = DCCP_MAX_ACKVEC_LEN - 1
-				     - av->dccpav_buf_head
-				     + avr->dccpavr_ack_ptr;
+		av->av_vec_len = DCCP_MAX_ACKVEC_LEN - 1 -
+				 av->av_buf_head + avr->avr_ack_ptr;
 
 	/* free records */
-	list_for_each_entry_safe_from(avr, next, &av->dccpav_records,
-				      dccpavr_node) {
-		list_del_init(&avr->dccpavr_node);
+	list_for_each_entry_safe_from(avr, next, &av->av_records, avr_node) {
+		list_del_init(&avr->avr_node);
 		dccp_ackvec_record_delete(avr);
 	}
 }
@@ -386,16 +379,16 @@ void dccp_ackvec_check_rcv_ackno(struct dccp_ackvec *av, struct sock *sk,
 	 * windows. We will be receiving ACKs for stuff we sent a while back
 	 * -sorbo.
 	 */
-	list_for_each_entry_reverse(avr, &av->dccpav_records, dccpavr_node) {
-		if (ackno == avr->dccpavr_ack_seqno) {
+	list_for_each_entry_reverse(avr, &av->av_records, avr_node) {
+		if (ackno == avr->avr_ack_seqno) {
 			dccp_pr_debug("%s ACK packet 0, len=%d, ack_seqno=%llu, "
 				      "ack_ackno=%llu, ACKED!\n",
 				      dccp_role(sk), 1,
-				      (unsigned long long)avr->dccpavr_ack_seqno,
-				      (unsigned long long)avr->dccpavr_ack_ackno);
+				      (unsigned long long)avr->avr_ack_seqno,
+				      (unsigned long long)avr->avr_ack_ackno);
 			dccp_ackvec_throw_record(av, avr);
 			break;
-		} else if (avr->dccpavr_ack_seqno > ackno)
+		} else if (avr->avr_ack_seqno > ackno)
 			break; /* old news */
 	}
 }
@@ -409,7 +402,7 @@ static void dccp_ackvec_check_rcv_ackvector(struct dccp_ackvec *av,
 	struct dccp_ackvec_record *avr;
 
 	/* Check if we actually sent an ACK vector */
-	if (list_empty(&av->dccpav_records))
+	if (list_empty(&av->av_records))
 		return;
 
 	i = len;
@@ -418,8 +411,7 @@ static void dccp_ackvec_check_rcv_ackvector(struct dccp_ackvec *av,
 	 * I think it might be more efficient to work backwards. See comment on
 	 * rcv_ackno. -sorbo.
 	 */
-	avr = list_entry(av->dccpav_records.next, struct dccp_ackvec_record,
-			 dccpavr_node);
+	avr = list_entry(av->av_records.next, struct dccp_ackvec_record, avr_node);
 	while (i--) {
 		const u8 rl = *vector & DCCP_ACKVEC_LEN_MASK;
 		u64 ackno_end_rl;
@@ -430,15 +422,14 @@ static void dccp_ackvec_check_rcv_ackvector(struct dccp_ackvec *av,
 		 * If our AVR sequence number is greater than the ack, go
 		 * forward in the AVR list until it is not so.
 		 */
-		list_for_each_entry_from(avr, &av->dccpav_records,
-					 dccpavr_node) {
-			if (!after48(avr->dccpavr_ack_seqno, *ackno))
+		list_for_each_entry_from(avr, &av->av_records, avr_node) {
+			if (!after48(avr->avr_ack_seqno, *ackno))
 				goto found;
 		}
-		/* End of the dccpav_records list, not found, exit */
+		/* End of the av_records list, not found, exit */
 		break;
 found:
-		if (between48(avr->dccpavr_ack_seqno, ackno_end_rl, *ackno)) {
+		if (between48(avr->avr_ack_seqno, ackno_end_rl, *ackno)) {
 			const u8 state = *vector & DCCP_ACKVEC_STATE_MASK;
 			if (state != DCCP_ACKVEC_STATE_NOT_RECEIVED) {
 				dccp_pr_debug("%s ACK vector 0, len=%d, "
@@ -446,9 +437,9 @@ found:
 					      "ACKED!\n",
 					      dccp_role(sk), len,
 					      (unsigned long long)
-					      avr->dccpavr_ack_seqno,
+					      avr->avr_ack_seqno,
 					      (unsigned long long)
-					      avr->dccpavr_ack_ackno);
+					      avr->avr_ack_ackno);
 				dccp_ackvec_throw_record(av, avr);
 				break;
 			}
diff --git a/net/dccp/ackvec.h b/net/dccp/ackvec.h
index 9671ecd17e0..bcb64fb4ace 100644
--- a/net/dccp/ackvec.h
+++ b/net/dccp/ackvec.h
@@ -32,54 +32,54 @@
  *
  * This data structure is the one defined in RFC 4340, Appendix A.
  *
- * @dccpav_buf_head - circular buffer head
- * @dccpav_buf_tail - circular buffer tail
- * @dccpav_buf_ackno - ack # of the most recent packet acknowledgeable in the
- * 		       buffer (i.e. %dccpav_buf_head)
- * @dccpav_buf_nonce - the one-bit sum of the ECN Nonces on all packets acked
+ * @av_buf_head - circular buffer head
+ * @av_buf_tail - circular buffer tail
+ * @av_buf_ackno - ack # of the most recent packet acknowledgeable in the
+ *		       buffer (i.e. %av_buf_head)
+ * @av_buf_nonce - the one-bit sum of the ECN Nonces on all packets acked
  * 		       by the buffer with State 0
  *
  * Additionally, the HC-Receiver must keep some information about the
  * Ack Vectors it has recently sent. For each packet sent carrying an
  * Ack Vector, it remembers four variables:
  *
- * @dccpav_records - list of dccp_ackvec_record
- * @dccpav_ack_nonce - the one-bit sum of the ECN Nonces for all State 0.
+ * @av_records - list of dccp_ackvec_record
+ * @av_ack_nonce - the one-bit sum of the ECN Nonces for all State 0.
  *
- * @dccpav_time		- the time in usecs
- * @dccpav_buf - circular buffer of acknowledgeable packets
+ * @av_time - the time in usecs
+ * @av_buf - circular buffer of acknowledgeable packets
  */
 struct dccp_ackvec {
-	u64		dccpav_buf_ackno;
-	struct list_head dccpav_records;
-	ktime_t		dccpav_time;
-	u16		dccpav_buf_head;
-	u16		dccpav_vec_len;
-	u8		dccpav_buf_nonce;
-	u8		dccpav_ack_nonce;
-	u8		dccpav_buf[DCCP_MAX_ACKVEC_LEN];
+	u64			av_buf_ackno;
+	struct list_head	av_records;
+	ktime_t			av_time;
+	u16			av_buf_head;
+	u16			av_vec_len;
+	u8			av_buf_nonce;
+	u8			av_ack_nonce;
+	u8			av_buf[DCCP_MAX_ACKVEC_LEN];
 };
 
 /** struct dccp_ackvec_record - ack vector record
  *
  * ACK vector record as defined in Appendix A of spec.
  *
- * The list is sorted by dccpavr_ack_seqno
+ * The list is sorted by avr_ack_seqno
  *
- * @dccpavr_node - node in dccpav_records
- * @dccpavr_ack_seqno - sequence number of the packet this record was sent on
- * @dccpavr_ack_ackno - sequence number being acknowledged
- * @dccpavr_ack_ptr - pointer into dccpav_buf where this record starts
- * @dccpavr_ack_nonce - dccpav_ack_nonce at the time this record was sent
- * @dccpavr_sent_len - length of the record in dccpav_buf
+ * @avr_node - node in av_records
+ * @avr_ack_seqno - sequence number of the packet this record was sent on
+ * @avr_ack_ackno - sequence number being acknowledged
+ * @avr_ack_ptr - pointer into av_buf where this record starts
+ * @avr_ack_nonce - av_ack_nonce at the time this record was sent
+ * @avr_sent_len - lenght of the record in av_buf
  */
 struct dccp_ackvec_record {
-	struct list_head dccpavr_node;
-	u64		 dccpavr_ack_seqno;
-	u64		 dccpavr_ack_ackno;
-	u16		 dccpavr_ack_ptr;
-	u16		 dccpavr_sent_len;
-	u8		 dccpavr_ack_nonce;
+	struct list_head avr_node;
+	u64		 avr_ack_seqno;
+	u64		 avr_ack_ackno;
+	u16		 avr_ack_ptr;
+	u16		 avr_sent_len;
+	u8		 avr_ack_nonce;
 };
 
 struct sock;
@@ -105,7 +105,7 @@ extern int dccp_insert_option_ackvec(struct sock *sk, struct sk_buff *skb);
 
 static inline int dccp_ackvec_pending(const struct dccp_ackvec *av)
 {
-	return av->dccpav_vec_len;
+	return av->av_vec_len;
 }
 #else /* CONFIG_IP_DCCP_ACKVEC */
 static inline int dccp_ackvec_init(void)
-- 
cgit v1.2.3


From 3de5489f47febe0333b142e0eb6389b9924b2634 Mon Sep 17 00:00:00 2001
From: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Date: Sat, 24 Nov 2007 20:37:48 -0200
Subject: [CCID2]: Bug in reading Ack Vectors

In CCID2 the receiver-history is sorted in ascending order of sequence number,
but the processing of received Ack Vectors requires the list traversal in the
opposite direction.

The current code has a bug in this regard: the list traversal is upwards. As a
consequence, only Ack Vectors with a run length of 1 will pass, in all other
Ack Vectors the remaining (acked) sequence numbers are missed, and may later
falsely be identified as lost.

Note: This bug is only visible when Ack Ratio > 1, since otherwise the run
      lengths of Ack Vectors are 0.

Signed-off-by: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Signed-off-by: Ian McDonald <ian.mcdonald@jandi.co.nz>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/dccp/ccids/ccid2.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/dccp/ccids/ccid2.c b/net/dccp/ccids/ccid2.c
index c9c465e8628..7873dc78b6b 100644
--- a/net/dccp/ccids/ccid2.c
+++ b/net/dccp/ccids/ccid2.c
@@ -666,7 +666,7 @@ static void ccid2_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb)
 					done = 1;
 					break;
 				}
-				seqp = seqp->ccid2s_next;
+				seqp = seqp->ccid2s_prev;
 			}
 			if (done)
 				break;
-- 
cgit v1.2.3


From cfbbeabc8864902c4af1c0cadf0972b352930a26 Mon Sep 17 00:00:00 2001
From: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Date: Sat, 24 Nov 2007 20:43:59 -0200
Subject: [CCID2]: Fix sequence number arithmetic/comparisons

This replaces use of normal subtraction with modulo-48 subtraction.

Signed-off-by: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Acked-by: Ian McDonald <ian.mcdonald@jandi.co.nz>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/dccp/ccids/ccid2.c | 9 ++-------
 1 file changed, 2 insertions(+), 7 deletions(-)

(limited to 'net')

diff --git a/net/dccp/ccids/ccid2.c b/net/dccp/ccids/ccid2.c
index 7873dc78b6b..55522182f2f 100644
--- a/net/dccp/ccids/ccid2.c
+++ b/net/dccp/ccids/ccid2.c
@@ -24,9 +24,6 @@
 
 /*
  * This implementation should follow RFC 4341
- *
- * BUGS:
- * - sequence number wrapping
  */
 
 #include "../ccid.h"
@@ -619,9 +616,8 @@ static void ccid2_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb)
 		/* go through this ack vector */
 		while (veclen--) {
 			const u8 rl = *vector & DCCP_ACKVEC_LEN_MASK;
-			u64 ackno_end_rl;
+			u64 ackno_end_rl = SUB48(ackno, rl);
 
-			dccp_set_seqno(&ackno_end_rl, ackno - rl);
 			ccid2_pr_debug("ackvec start:%llu end:%llu\n",
 				       (unsigned long long)ackno,
 				       (unsigned long long)ackno_end_rl);
@@ -671,8 +667,7 @@ static void ccid2_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb)
 			if (done)
 				break;
 
-
-			dccp_set_seqno(&ackno, ackno_end_rl - 1);
+			ackno = SUB48(ackno_end_rl, 1);
 			vector++;
 		}
 		if (done)
-- 
cgit v1.2.3


From df054e1d00fdafa2e2920319df326ddb3f0d0413 Mon Sep 17 00:00:00 2001
From: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Date: Sat, 24 Nov 2007 21:32:53 -0200
Subject: [CCID2]: Don't assign negative values to Ack Ratio

Since it makes not sense to assign negative values to Ack Ratio, this
patch disallows this possibility.

As a consequence, a Bug test for negative Ack Ratio values becomes obsolete.

Furthermore, a check against overflow (as Ack Ratio may not exceed 2 bytes,
due to RFC 4340, 11.3) has been added.

Signed-off-by: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Acked-by: Ian McDonald <ian.mcdonald@jandi.co.nz>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/dccp/ccids/ccid2.c | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/dccp/ccids/ccid2.c b/net/dccp/ccids/ccid2.c
index 55522182f2f..f18235e8ce8 100644
--- a/net/dccp/ccids/ccid2.c
+++ b/net/dccp/ccids/ccid2.c
@@ -140,7 +140,7 @@ static int ccid2_hc_tx_send_packet(struct sock *sk, struct sk_buff *skb)
 	return 1; /* XXX CCID should dequeue when ready instead of polling */
 }
 
-static void ccid2_change_l_ack_ratio(struct sock *sk, int val)
+static void ccid2_change_l_ack_ratio(struct sock *sk, u32 val)
 {
 	struct dccp_sock *dp = dccp_sk(sk);
 	/*
@@ -159,9 +159,10 @@ static void ccid2_change_l_ack_ratio(struct sock *sk, int val)
 		if (val > max)
 			val = max;
 	}
+	if (val > 0xFFFF)		/* RFC 4340, 11.3 */
+		val = 0xFFFF;
 
-	ccid2_pr_debug("changing local ack ratio to %d\n", val);
-	WARN_ON(val <= 0);
+	ccid2_pr_debug("changing local ack ratio to %u\n", val);
 	dp->dccps_l_ack_ratio = val;
 }
 
@@ -572,7 +573,7 @@ static void ccid2_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb)
 				hctx->ccid2hctx_rpdupack = -1; /* XXX lame */
 				hctx->ccid2hctx_rpseq = 0;
 
-				ccid2_change_l_ack_ratio(sk, dp->dccps_l_ack_ratio << 1);
+				ccid2_change_l_ack_ratio(sk, 2 * dp->dccps_l_ack_ratio);
 			}
 		}
 	}
-- 
cgit v1.2.3


From d50ad163e6db2dcc365b8d02b30350220f86df04 Mon Sep 17 00:00:00 2001
From: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Date: Sat, 24 Nov 2007 21:40:24 -0200
Subject: [CCID2]: Deadlock and spurious timeouts when Ack Ratio > cwnd

This patch removes a bug in the current code. I agree with Andrea's comment
that there is a problem here but the way it is treated does not fix it.

The problem is that whenever Ack Ratio > cwnd, starvation/deadlock occurs:
 * the receiver will not send an Ack until (Ack Ratio - cwnd) data packets
   have arrived;
 * the sender will not send any data packet before the receipt of an Ack
   advances the send window.
The only way that the connection then progresses was via RTO timeout. In one
extreme case (bulk transfer), it was observed that this happened for every single
packet; i.e. hundreds of packets, each a RTO timeout of 1..3 seconds apart:
a transfer which normally would take a fraction of a second thus grew to
several minutes.

The solution taken by this approach is to observe the relation

                   "Ack Ratio <= cwnd"

by using the constraint (1) from RFC 4341, 6.1.2; i.e. set

                 Ack Ratio = ceil(cwnd / 2)

and update it whenever either Ack Ratio or cwnd change. This ensures that
the deadlock problem can not arise.

Signed-off-by: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Acked-by: Ian McDonald <ian.mcdonald@jandi.co.nz>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/dccp/ccids/ccid2.c | 39 +++++++++++++++++++++------------------
 1 file changed, 21 insertions(+), 18 deletions(-)

(limited to 'net')

diff --git a/net/dccp/ccids/ccid2.c b/net/dccp/ccids/ccid2.c
index f18235e8ce8..ef19fb83429 100644
--- a/net/dccp/ccids/ccid2.c
+++ b/net/dccp/ccids/ccid2.c
@@ -143,32 +143,30 @@ static int ccid2_hc_tx_send_packet(struct sock *sk, struct sk_buff *skb)
 static void ccid2_change_l_ack_ratio(struct sock *sk, u32 val)
 {
 	struct dccp_sock *dp = dccp_sk(sk);
+	u32 max_ratio = DIV_ROUND_UP(ccid2_hc_tx_sk(sk)->ccid2hctx_cwnd, 2);
+
 	/*
-	 * XXX I don't really agree with val != 2.  If cwnd is 1, ack ratio
-	 * should be 1... it shouldn't be allowed to become 2.
-	 * -sorbo.
+	 * Ensure that Ack Ratio does not exceed ceil(cwnd/2), which is (2) from
+	 * RFC 4341, 6.1.2. We ignore the statement that Ack Ratio 2 is always
+	 * acceptable since this causes starvation/deadlock whenever cwnd < 2.
+	 * The same problem arises when Ack Ratio is 0 (ie. Ack Ratio disabled).
 	 */
-	if (val != 2) {
-		const struct ccid2_hc_tx_sock *hctx = ccid2_hc_tx_sk(sk);
-		int max = hctx->ccid2hctx_cwnd / 2;
-
-		/* round up */
-		if (hctx->ccid2hctx_cwnd & 1)
-			max++;
-
-		if (val > max)
-			val = max;
+	if (val == 0 || val > max_ratio) {
+		DCCP_WARN("Limiting Ack Ratio (%u) to %u\n", val, max_ratio);
+		val = max_ratio;
 	}
 	if (val > 0xFFFF)		/* RFC 4340, 11.3 */
 		val = 0xFFFF;
 
+	if (val == dp->dccps_l_ack_ratio)
+		return;
+
 	ccid2_pr_debug("changing local ack ratio to %u\n", val);
 	dp->dccps_l_ack_ratio = val;
 }
 
 static void ccid2_change_cwnd(struct ccid2_hc_tx_sock *hctx, u32 val)
 {
-	/* XXX do we need to change ack ratio? */
 	hctx->ccid2hctx_cwnd = val? : 1;
 	ccid2_pr_debug("changed cwnd to %u\n", hctx->ccid2hctx_cwnd);
 }
@@ -519,9 +517,10 @@ static void ccid2_hc_tx_dec_pipe(struct sock *sk)
 		ccid2_hc_tx_kill_rto_timer(sk);
 }
 
-static void ccid2_congestion_event(struct ccid2_hc_tx_sock *hctx,
-				   struct ccid2_seq *seqp)
+static void ccid2_congestion_event(struct sock *sk, struct ccid2_seq *seqp)
 {
+	struct ccid2_hc_tx_sock *hctx = ccid2_hc_tx_sk(sk);
+
 	if (time_before(seqp->ccid2s_sent, hctx->ccid2hctx_last_cong)) {
 		ccid2_pr_debug("Multiple losses in an RTT---treating as one\n");
 		return;
@@ -533,6 +532,10 @@ static void ccid2_congestion_event(struct ccid2_hc_tx_sock *hctx,
 	hctx->ccid2hctx_ssthresh = hctx->ccid2hctx_cwnd;
 	if (hctx->ccid2hctx_ssthresh < 2)
 		hctx->ccid2hctx_ssthresh = 2;
+
+	/* Avoid spurious timeouts resulting from Ack Ratio > cwnd */
+	if (dccp_sk(sk)->dccps_l_ack_ratio > hctx->ccid2hctx_cwnd)
+		ccid2_change_l_ack_ratio(sk, hctx->ccid2hctx_cwnd);
 }
 
 static void ccid2_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb)
@@ -648,7 +651,7 @@ static void ccid2_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb)
 				    !seqp->ccid2s_acked) {
 					if (state ==
 					    DCCP_ACKVEC_STATE_ECN_MARKED) {
-						ccid2_congestion_event(hctx,
+						ccid2_congestion_event(sk,
 								       seqp);
 					} else
 						ccid2_new_ack(sk, seqp,
@@ -713,7 +716,7 @@ static void ccid2_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb)
 				 * order to detect multiple congestion events in
 				 * one ack vector.
 				 */
-				ccid2_congestion_event(hctx, seqp);
+				ccid2_congestion_event(sk, seqp);
 				ccid2_hc_tx_dec_pipe(sk);
 			}
 			if (seqp == hctx->ccid2hctx_seqt)
-- 
cgit v1.2.3


From e18d7a9857cb620a8f70622c4e400be477c264cf Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Sat, 24 Nov 2007 21:42:53 -0200
Subject: [DCCP]: Initialize dccp_sock before calling the ccid constructors

This is because in the next patch CCID2 will assume that dccps_mss_cache is
non-zero.

Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/dccp/proto.c | 25 +++++++++++++------------
 1 file changed, 13 insertions(+), 12 deletions(-)

(limited to 'net')

diff --git a/net/dccp/proto.c b/net/dccp/proto.c
index 0aec7359212..d48005f653c 100644
--- a/net/dccp/proto.c
+++ b/net/dccp/proto.c
@@ -174,6 +174,19 @@ int dccp_init_sock(struct sock *sk, const __u8 ctl_sock_initialized)
 
 	dccp_minisock_init(&dp->dccps_minisock);
 
+	icsk->icsk_rto		= DCCP_TIMEOUT_INIT;
+	icsk->icsk_syn_retries	= sysctl_dccp_request_retries;
+	sk->sk_state		= DCCP_CLOSED;
+	sk->sk_write_space	= dccp_write_space;
+	icsk->icsk_sync_mss	= dccp_sync_mss;
+	dp->dccps_mss_cache	= 536;
+	dp->dccps_rate_last	= jiffies;
+	dp->dccps_role		= DCCP_ROLE_UNDEFINED;
+	dp->dccps_service	= DCCP_SERVICE_CODE_IS_ABSENT;
+	dp->dccps_l_ack_ratio	= dp->dccps_r_ack_ratio = 1;
+
+	dccp_init_xmit_timers(sk);
+
 	/*
 	 * FIXME: We're hardcoding the CCID, and doing this at this point makes
 	 * the listening (master) sock get CCID control blocks, which is not
@@ -213,18 +226,6 @@ int dccp_init_sock(struct sock *sk, const __u8 ctl_sock_initialized)
 		INIT_LIST_HEAD(&dmsk->dccpms_conf);
 	}
 
-	dccp_init_xmit_timers(sk);
-	icsk->icsk_rto		= DCCP_TIMEOUT_INIT;
-	icsk->icsk_syn_retries	= sysctl_dccp_request_retries;
-	sk->sk_state		= DCCP_CLOSED;
-	sk->sk_write_space	= dccp_write_space;
-	icsk->icsk_sync_mss	= dccp_sync_mss;
-	dp->dccps_mss_cache	= 536;
-	dp->dccps_rate_last	= jiffies;
-	dp->dccps_role		= DCCP_ROLE_UNDEFINED;
-	dp->dccps_service	= DCCP_SERVICE_CODE_IS_ABSENT;
-	dp->dccps_l_ack_ratio	= dp->dccps_r_ack_ratio = 1;
-
 	return 0;
 }
 
-- 
cgit v1.2.3


From b00d2bbc45a287c9a72374582ce42205f3412419 Mon Sep 17 00:00:00 2001
From: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Date: Sat, 24 Nov 2007 21:44:30 -0200
Subject: [CCID2]: Larger initial windows also for CCID2

RFC 4341, sec. 5 states that "The cwnd parameter is initialized to at most
four packets for new connections, following the rules from [RFC3390]", which
is implemented by this patch.

Signed-off-by: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Acked-by: Ian McDonald <ian.mcdonald@jandi.co.nz>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/dccp/ccids/ccid2.c | 20 +++++++++++++++-----
 1 file changed, 15 insertions(+), 5 deletions(-)

(limited to 'net')

diff --git a/net/dccp/ccids/ccid2.c b/net/dccp/ccids/ccid2.c
index ef19fb83429..9c5b6c73f7c 100644
--- a/net/dccp/ccids/ccid2.c
+++ b/net/dccp/ccids/ccid2.c
@@ -741,15 +741,25 @@ static void ccid2_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb)
 static int ccid2_hc_tx_init(struct ccid *ccid, struct sock *sk)
 {
 	struct ccid2_hc_tx_sock *hctx = ccid_priv(ccid);
+	struct dccp_sock *dp = dccp_sk(sk);
+	u32 max_ratio;
 
-	ccid2_change_cwnd(hctx, 1);
-	/* Initialize ssthresh to infinity.  This means that we will exit the
-	 * initial slow-start after the first packet loss.  This is what we
-	 * want.
-	 */
+	/* RFC 4341, 5: initialise ssthresh to arbitrarily high (max) value */
 	hctx->ccid2hctx_ssthresh  = ~0;
 	hctx->ccid2hctx_numdupack = 3;
 
+	/*
+	 * RFC 4341, 5: "The cwnd parameter is initialized to at most four
+	 * packets for new connections, following the rules from [RFC3390]".
+	 * We need to convert the bytes of RFC3390 into the packets of RFC 4341.
+	 */
+	hctx->ccid2hctx_cwnd = min(4U, max(2U, 4380U / dp->dccps_mss_cache));
+
+	/* Make sure that Ack Ratio is enabled and within bounds. */
+	max_ratio = DIV_ROUND_UP(hctx->ccid2hctx_cwnd, 2);
+	if (dp->dccps_l_ack_ratio == 0 || dp->dccps_l_ack_ratio > max_ratio)
+		dp->dccps_l_ack_ratio = max_ratio;
+
 	/* XXX init ~ to window size... */
 	if (ccid2_hc_tx_alloc_seq(hctx))
 		return -ENOMEM;
-- 
cgit v1.2.3


From 900bfed4718126e6c32244903b6f43e0990d04ad Mon Sep 17 00:00:00 2001
From: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Date: Sat, 24 Nov 2007 21:58:33 -0200
Subject: [CCID2]: Disable broken Ack Ratio adaptation algorithm

This comments out a problematic section comprising a half-finished algorithm:

 - The variable `ccid2hctx_ackloss' is never initialised to a value different from 0 and
   hence in fact is a read-only constant.
 - The `arsent' variable counts packets other than Acks (it is incremented for every packet),
   and there is no test for Ack Loss.
 - The concept of counting Acks as such leads to a complex calculation, and the calculation
   at the moment is inconsistent with this concept.
   The problem is that the number of Acks - rather than the number of windows - is counted,
   which leads to a complex (cubic/quadratic) expression - this is not even implemented.

In its current state, the commented-out algorithm interfers with normal processing by
changing Ack Ratio incorrectly, and at the wrong times.

A new algorithm is necessary, which will not necessarily use the same variables as used by
the unfinished one; hence the old variables have been removed.

Signed-off-by: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Acked-by: Ian McDonald <ian.mcdonald@jandi.co.nz>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/dccp/ccids/ccid2.c | 23 +++++++++++++++++++++--
 net/dccp/ccids/ccid2.h |  4 ----
 2 files changed, 21 insertions(+), 6 deletions(-)

(limited to 'net')

diff --git a/net/dccp/ccids/ccid2.c b/net/dccp/ccids/ccid2.c
index 9c5b6c73f7c..a901202ada6 100644
--- a/net/dccp/ccids/ccid2.c
+++ b/net/dccp/ccids/ccid2.c
@@ -224,8 +224,6 @@ static void ccid2_hc_tx_rto_expire(unsigned long data)
 	hctx->ccid2hctx_sent	= 0;
 
 	/* clear ack ratio state. */
-	hctx->ccid2hctx_arsent	 = 0;
-	hctx->ccid2hctx_ackloss  = 0;
 	hctx->ccid2hctx_rpseq	 = 0;
 	hctx->ccid2hctx_rpdupack = -1;
 	ccid2_change_l_ack_ratio(sk, 1);
@@ -289,6 +287,26 @@ static void ccid2_hc_tx_packet_sent(struct sock *sk, int more, unsigned int len)
 
 	hctx->ccid2hctx_sent++;
 
+	/*
+	 * FIXME: The code below is broken and the variables have been removed
+	 * from the socket struct. The `ackloss' variable was always set to 0,
+	 * and with arsent there are several problems:
+	 *  (i) it doesn't just count the number of Acks, but all sent packets;
+	 *  (ii) it is expressed in # of packets, not # of windows, so the
+	 *  comparison below uses the wrong formula: Appendix A of RFC 4341
+	 *  comes up with the number K = cwnd / (R^2 - R) of consecutive windows
+	 *  of data with no lost or marked Ack packets. If arsent were the # of
+	 *  consecutive Acks received without loss, then Ack Ratio needs to be
+	 *  decreased by 1 when
+	 *	      arsent >=  K * cwnd / R  =  cwnd^2 / (R^3 - R^2)
+	 *  where cwnd / R is the number of Acks received per window of data
+	 *  (cf. RFC 4341, App. A). The problems are that
+	 *  - arsent counts other packets as well;
+	 *  - the comparison uses a formula different from RFC 4341;
+	 *  - computing a cubic/quadratic equation each time is too complicated.
+	 *  Hence a different algorithm is needed.
+	 */
+#if 0
 	/* Ack Ratio.  Need to maintain a concept of how many windows we sent */
 	hctx->ccid2hctx_arsent++;
 	/* We had an ack loss in this window... */
@@ -316,6 +334,7 @@ static void ccid2_hc_tx_packet_sent(struct sock *sk, int more, unsigned int len)
 			hctx->ccid2hctx_arsent = 0; /* or maybe set it to cwnd*/
 		}
 	}
+#endif
 
 	/* setup RTO timer */
 	if (!timer_pending(&hctx->ccid2hctx_rtotimer))
diff --git a/net/dccp/ccids/ccid2.h b/net/dccp/ccids/ccid2.h
index d9daa534c9b..ed95f8f0cc4 100644
--- a/net/dccp/ccids/ccid2.h
+++ b/net/dccp/ccids/ccid2.h
@@ -44,8 +44,6 @@ struct ccid2_seq {
  * @ccid2hctx_acks - ACKS recv in AI phase
  * @ccid2hctx_sent - packets sent in this window
  * @ccid2hctx_lastrtt -time RTT was last measured
- * @ccid2hctx_arsent - packets sent [ack ratio]
- * @ccid2hctx_ackloss - ack was lost in this win
  * @ccid2hctx_rpseq - last consecutive seqno
  * @ccid2hctx_rpdupack - dupacks since rpseq
 */
@@ -66,8 +64,6 @@ struct ccid2_hc_tx_sock {
 	int			ccid2hctx_sent;
 	unsigned long		ccid2hctx_lastrtt;
 	struct timer_list	ccid2hctx_rtotimer;
-	unsigned long		ccid2hctx_arsent;
-	int			ccid2hctx_ackloss;
 	u64			ccid2hctx_rpseq;
 	int			ccid2hctx_rpdupack;
 	int			ccid2hctx_sendwait;
-- 
cgit v1.2.3


From 7792cd8885954eb7ac38e781a7a9faae5a80a3d8 Mon Sep 17 00:00:00 2001
From: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Date: Sat, 24 Nov 2007 22:01:56 -0200
Subject: [CCID2]: Remove unused variable

This removes a variable `ccid2hctx_sent' which is incremented but
never referenced/read (i.e., dead code).

Signed-off-by: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Signed-off-by: Ian McDonald <ian.mcdonald@jandi.co.nz>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/dccp/ccids/ccid2.c | 4 ----
 net/dccp/ccids/ccid2.h | 2 --
 2 files changed, 6 deletions(-)

(limited to 'net')

diff --git a/net/dccp/ccids/ccid2.c b/net/dccp/ccids/ccid2.c
index a901202ada6..9fa0eb5f691 100644
--- a/net/dccp/ccids/ccid2.c
+++ b/net/dccp/ccids/ccid2.c
@@ -221,7 +221,6 @@ static void ccid2_hc_tx_rto_expire(unsigned long data)
 	hctx->ccid2hctx_seqt	= hctx->ccid2hctx_seqh;
 	hctx->ccid2hctx_ssacks	= 0;
 	hctx->ccid2hctx_acks	= 0;
-	hctx->ccid2hctx_sent	= 0;
 
 	/* clear ack ratio state. */
 	hctx->ccid2hctx_rpseq	 = 0;
@@ -285,8 +284,6 @@ static void ccid2_hc_tx_packet_sent(struct sock *sk, int more, unsigned int len)
 	ccid2_pr_debug("cwnd=%d pipe=%d\n", hctx->ccid2hctx_cwnd,
 		       hctx->ccid2hctx_pipe);
 
-	hctx->ccid2hctx_sent++;
-
 	/*
 	 * FIXME: The code below is broken and the variables have been removed
 	 * from the socket struct. The `ackloss' variable was always set to 0,
@@ -517,7 +514,6 @@ static inline void ccid2_new_ack(struct sock *sk,
 		ccid2_pr_debug("srtt: %ld rttvar: %ld rto: %ld (HZ=%d) R=%lu\n",
 			       hctx->ccid2hctx_srtt, hctx->ccid2hctx_rttvar,
 			       hctx->ccid2hctx_rto, HZ, r);
-		hctx->ccid2hctx_sent = 0;
 	}
 
 	/* we got a new ack, so re-start RTO timer */
diff --git a/net/dccp/ccids/ccid2.h b/net/dccp/ccids/ccid2.h
index ed95f8f0cc4..6d0b4e32b16 100644
--- a/net/dccp/ccids/ccid2.h
+++ b/net/dccp/ccids/ccid2.h
@@ -42,7 +42,6 @@ struct ccid2_seq {
  *
  * @ccid2hctx_ssacks - ACKs recv in slow start
  * @ccid2hctx_acks - ACKS recv in AI phase
- * @ccid2hctx_sent - packets sent in this window
  * @ccid2hctx_lastrtt -time RTT was last measured
  * @ccid2hctx_rpseq - last consecutive seqno
  * @ccid2hctx_rpdupack - dupacks since rpseq
@@ -61,7 +60,6 @@ struct ccid2_hc_tx_sock {
 	long			ccid2hctx_rto;
 	long			ccid2hctx_srtt;
 	long			ccid2hctx_rttvar;
-	int			ccid2hctx_sent;
 	unsigned long		ccid2hctx_lastrtt;
 	struct timer_list	ccid2hctx_rtotimer;
 	u64			ccid2hctx_rpseq;
-- 
cgit v1.2.3


From 63df18ad7fb91c65dafc89d3cf94a58a486ad416 Mon Sep 17 00:00:00 2001
From: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Date: Sat, 24 Nov 2007 22:04:35 -0200
Subject: [CCID2]: Replace read-only variable with constant

This replaces the field member `numdupack', which was used as a read-only
constant in the code, with a #define.

Signed-off-by: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Signed-off-by: Ian McDonald <ian.mcdonald@jandi.co.nz>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/dccp/ccids/ccid2.c | 8 +++-----
 net/dccp/ccids/ccid2.h | 3 ++-
 2 files changed, 5 insertions(+), 6 deletions(-)

(limited to 'net')

diff --git a/net/dccp/ccids/ccid2.c b/net/dccp/ccids/ccid2.c
index 9fa0eb5f691..a3c42cd00b0 100644
--- a/net/dccp/ccids/ccid2.c
+++ b/net/dccp/ccids/ccid2.c
@@ -586,8 +586,7 @@ static void ccid2_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb)
 			hctx->ccid2hctx_rpdupack++;
 
 			/* check if we got enough dupacks */
-			if (hctx->ccid2hctx_rpdupack >=
-			    hctx->ccid2hctx_numdupack) {
+			if (hctx->ccid2hctx_rpdupack >= NUMDUPACK) {
 				hctx->ccid2hctx_rpdupack = -1; /* XXX lame */
 				hctx->ccid2hctx_rpseq = 0;
 
@@ -708,7 +707,7 @@ static void ccid2_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb)
 	while (1) {
 		if (seqp->ccid2s_acked) {
 			done++;
-			if (done == hctx->ccid2hctx_numdupack)
+			if (done == NUMDUPACK)
 				break;
 		}
 		if (seqp == hctx->ccid2hctx_seqt)
@@ -719,7 +718,7 @@ static void ccid2_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb)
 	/* If there are at least 3 acknowledgements, anything unacknowledged
 	 * below the last sequence number is considered lost
 	 */
-	if (done == hctx->ccid2hctx_numdupack) {
+	if (done == NUMDUPACK) {
 		struct ccid2_seq *last_acked = seqp;
 
 		/* check for lost packets */
@@ -761,7 +760,6 @@ static int ccid2_hc_tx_init(struct ccid *ccid, struct sock *sk)
 
 	/* RFC 4341, 5: initialise ssthresh to arbitrarily high (max) value */
 	hctx->ccid2hctx_ssthresh  = ~0;
-	hctx->ccid2hctx_numdupack = 3;
 
 	/*
 	 * RFC 4341, 5: "The cwnd parameter is initialized to at most four
diff --git a/net/dccp/ccids/ccid2.h b/net/dccp/ccids/ccid2.h
index 6d0b4e32b16..443f08a667a 100644
--- a/net/dccp/ccids/ccid2.h
+++ b/net/dccp/ccids/ccid2.h
@@ -24,6 +24,8 @@
 #include <linux/timer.h>
 #include <linux/types.h>
 #include "../ccid.h"
+/* NUMDUPACK parameter from RFC 4341, p. 6 */
+#define NUMDUPACK	3
 
 struct sock;
 
@@ -52,7 +54,6 @@ struct ccid2_hc_tx_sock {
 	int			ccid2hctx_acks;
 	unsigned int		ccid2hctx_ssthresh;
 	int			ccid2hctx_pipe;
-	int			ccid2hctx_numdupack;
 	struct ccid2_seq	*ccid2hctx_seqbuf[CCID2_SEQBUF_MAX];
 	int			ccid2hctx_seqbufc;
 	struct ccid2_seq	*ccid2hctx_seqh;
-- 
cgit v1.2.3


From 3deeadd74bbf916b502d307222833ffcf68db557 Mon Sep 17 00:00:00 2001
From: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Date: Sat, 24 Nov 2007 22:05:51 -0200
Subject: [CCID2]: Replace cwnd assignment-function with assignment

The current function ccid2_change_cwnd in effect makes only an assignment, as
the test whether cwnd has reached 0 is only required when cwnd is halved.

This patch simplifies the code by replacing the function with the assignment
it performs.

Furthermore, since ssthresh derives from cwnd and appears in many assignments and
comparisons, the type of ssthresh has also been changed to match that of cwnd.

Signed-off-by: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Signed-off-by: Ian McDonald <ian.mcdonald@jandi.co.nz>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/dccp/ccids/ccid2.c | 22 +++++++---------------
 net/dccp/ccids/ccid2.h |  2 +-
 2 files changed, 8 insertions(+), 16 deletions(-)

(limited to 'net')

diff --git a/net/dccp/ccids/ccid2.c b/net/dccp/ccids/ccid2.c
index a3c42cd00b0..747fa1c4e42 100644
--- a/net/dccp/ccids/ccid2.c
+++ b/net/dccp/ccids/ccid2.c
@@ -165,12 +165,6 @@ static void ccid2_change_l_ack_ratio(struct sock *sk, u32 val)
 	dp->dccps_l_ack_ratio = val;
 }
 
-static void ccid2_change_cwnd(struct ccid2_hc_tx_sock *hctx, u32 val)
-{
-	hctx->ccid2hctx_cwnd = val? : 1;
-	ccid2_pr_debug("changed cwnd to %u\n", hctx->ccid2hctx_cwnd);
-}
-
 static void ccid2_change_srtt(struct ccid2_hc_tx_sock *hctx, long val)
 {
 	ccid2_pr_debug("change SRTT to %ld\n", val);
@@ -212,10 +206,10 @@ static void ccid2_hc_tx_rto_expire(unsigned long data)
 
 	/* adjust pipe, cwnd etc */
 	ccid2_change_pipe(hctx, 0);
-	hctx->ccid2hctx_ssthresh = hctx->ccid2hctx_cwnd >> 1;
+	hctx->ccid2hctx_ssthresh = hctx->ccid2hctx_cwnd / 2;
 	if (hctx->ccid2hctx_ssthresh < 2)
 		hctx->ccid2hctx_ssthresh = 2;
-	ccid2_change_cwnd(hctx, 1);
+	hctx->ccid2hctx_cwnd	 = 1;
 
 	/* clear state about stuff we sent */
 	hctx->ccid2hctx_seqt	= hctx->ccid2hctx_seqh;
@@ -440,7 +434,7 @@ static inline void ccid2_new_ack(struct sock *sk,
 			/* increase every 2 acks */
 			hctx->ccid2hctx_ssacks++;
 			if (hctx->ccid2hctx_ssacks == 2) {
-				ccid2_change_cwnd(hctx, hctx->ccid2hctx_cwnd+1);
+				hctx->ccid2hctx_cwnd++;
 				hctx->ccid2hctx_ssacks = 0;
 				*maxincr = *maxincr - 1;
 			}
@@ -453,7 +447,7 @@ static inline void ccid2_new_ack(struct sock *sk,
 		hctx->ccid2hctx_acks++;
 
 		if (hctx->ccid2hctx_acks >= hctx->ccid2hctx_cwnd) {
-			ccid2_change_cwnd(hctx, hctx->ccid2hctx_cwnd + 1);
+			hctx->ccid2hctx_cwnd++;
 			hctx->ccid2hctx_acks = 0;
 		}
 	}
@@ -543,10 +537,8 @@ static void ccid2_congestion_event(struct sock *sk, struct ccid2_seq *seqp)
 
 	hctx->ccid2hctx_last_cong = jiffies;
 
-	ccid2_change_cwnd(hctx, hctx->ccid2hctx_cwnd >> 1);
-	hctx->ccid2hctx_ssthresh = hctx->ccid2hctx_cwnd;
-	if (hctx->ccid2hctx_ssthresh < 2)
-		hctx->ccid2hctx_ssthresh = 2;
+	hctx->ccid2hctx_cwnd     = hctx->ccid2hctx_cwnd / 2 ? : 1U;
+	hctx->ccid2hctx_ssthresh = max(hctx->ccid2hctx_cwnd, 2U);
 
 	/* Avoid spurious timeouts resulting from Ack Ratio > cwnd */
 	if (dccp_sk(sk)->dccps_l_ack_ratio > hctx->ccid2hctx_cwnd)
@@ -759,7 +751,7 @@ static int ccid2_hc_tx_init(struct ccid *ccid, struct sock *sk)
 	u32 max_ratio;
 
 	/* RFC 4341, 5: initialise ssthresh to arbitrarily high (max) value */
-	hctx->ccid2hctx_ssthresh  = ~0;
+	hctx->ccid2hctx_ssthresh  = ~0U;
 
 	/*
 	 * RFC 4341, 5: "The cwnd parameter is initialized to at most four
diff --git a/net/dccp/ccids/ccid2.h b/net/dccp/ccids/ccid2.h
index 443f08a667a..b72e9556a15 100644
--- a/net/dccp/ccids/ccid2.h
+++ b/net/dccp/ccids/ccid2.h
@@ -50,9 +50,9 @@ struct ccid2_seq {
 */
 struct ccid2_hc_tx_sock {
 	u32			ccid2hctx_cwnd;
+	u32			ccid2hctx_ssthresh;
 	int			ccid2hctx_ssacks;
 	int			ccid2hctx_acks;
-	unsigned int		ccid2hctx_ssthresh;
 	int			ccid2hctx_pipe;
 	struct ccid2_seq	*ccid2hctx_seqbuf[CCID2_SEQBUF_MAX];
 	int			ccid2hctx_seqbufc;
-- 
cgit v1.2.3


From 95b21d7e9d099f1cffca08e40f292d6658a88b3c Mon Sep 17 00:00:00 2001
From: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Date: Sat, 24 Nov 2007 22:06:52 -0200
Subject: [CCID2]: Replace pipe assignment-function with assignment

The function ccid2_change_pipe only does an assignment. This patch simplifies the code by
replacing the function with the assignment it performs.

Furthermore, the type of pipe is promoted from `signed' to unsigned (increasing the range).
As a result, a BUG_ON test for negative values now becomes obsolete (for safety not removed,
but replaced with a less annoying `DCCP_BUG').

Signed-off-by: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Signed-off-by: Ian McDonald <ian.mcdonald@jandi.co.nz>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/dccp/ccids/ccid2.c | 16 ++++++----------
 net/dccp/ccids/ccid2.h |  3 ++-
 2 files changed, 8 insertions(+), 11 deletions(-)

(limited to 'net')

diff --git a/net/dccp/ccids/ccid2.c b/net/dccp/ccids/ccid2.c
index 747fa1c4e42..237007bb55a 100644
--- a/net/dccp/ccids/ccid2.c
+++ b/net/dccp/ccids/ccid2.c
@@ -171,11 +171,6 @@ static void ccid2_change_srtt(struct ccid2_hc_tx_sock *hctx, long val)
 	hctx->ccid2hctx_srtt = val;
 }
 
-static void ccid2_change_pipe(struct ccid2_hc_tx_sock *hctx, long val)
-{
-	hctx->ccid2hctx_pipe = val;
-}
-
 static void ccid2_start_rto_timer(struct sock *sk);
 
 static void ccid2_hc_tx_rto_expire(unsigned long data)
@@ -205,11 +200,11 @@ static void ccid2_hc_tx_rto_expire(unsigned long data)
 	ccid2_start_rto_timer(sk);
 
 	/* adjust pipe, cwnd etc */
-	ccid2_change_pipe(hctx, 0);
 	hctx->ccid2hctx_ssthresh = hctx->ccid2hctx_cwnd / 2;
 	if (hctx->ccid2hctx_ssthresh < 2)
 		hctx->ccid2hctx_ssthresh = 2;
 	hctx->ccid2hctx_cwnd	 = 1;
+	hctx->ccid2hctx_pipe	 = 0;
 
 	/* clear state about stuff we sent */
 	hctx->ccid2hctx_seqt	= hctx->ccid2hctx_seqh;
@@ -248,8 +243,7 @@ static void ccid2_hc_tx_packet_sent(struct sock *sk, int more, unsigned int len)
 
 	BUG_ON(!hctx->ccid2hctx_sendwait);
 	hctx->ccid2hctx_sendwait = 0;
-	ccid2_change_pipe(hctx, hctx->ccid2hctx_pipe + 1);
-	BUG_ON(hctx->ccid2hctx_pipe < 0);
+	hctx->ccid2hctx_pipe++;
 
 	/* There is an issue.  What if another packet is sent between
 	 * packet_send() and packet_sent().  Then the sequence number would be
@@ -519,8 +513,10 @@ static void ccid2_hc_tx_dec_pipe(struct sock *sk)
 {
 	struct ccid2_hc_tx_sock *hctx = ccid2_hc_tx_sk(sk);
 
-	ccid2_change_pipe(hctx, hctx->ccid2hctx_pipe-1);
-	BUG_ON(hctx->ccid2hctx_pipe < 0);
+	if (hctx->ccid2hctx_pipe == 0)
+		DCCP_BUG("pipe == 0");
+	else
+		hctx->ccid2hctx_pipe--;
 
 	if (hctx->ccid2hctx_pipe == 0)
 		ccid2_hc_tx_kill_rto_timer(sk);
diff --git a/net/dccp/ccids/ccid2.h b/net/dccp/ccids/ccid2.h
index b72e9556a15..bc659f00cb9 100644
--- a/net/dccp/ccids/ccid2.h
+++ b/net/dccp/ccids/ccid2.h
@@ -42,6 +42,7 @@ struct ccid2_seq {
 
 /** struct ccid2_hc_tx_sock - CCID2 TX half connection
  *
+ * @ccid2hctx_{cwnd,ssthresh,pipe}: as per RFC 4341, section 5
  * @ccid2hctx_ssacks - ACKs recv in slow start
  * @ccid2hctx_acks - ACKS recv in AI phase
  * @ccid2hctx_lastrtt -time RTT was last measured
@@ -51,9 +52,9 @@ struct ccid2_seq {
 struct ccid2_hc_tx_sock {
 	u32			ccid2hctx_cwnd;
 	u32			ccid2hctx_ssthresh;
+	u32			ccid2hctx_pipe;
 	int			ccid2hctx_ssacks;
 	int			ccid2hctx_acks;
-	int			ccid2hctx_pipe;
 	struct ccid2_seq	*ccid2hctx_seqbuf[CCID2_SEQBUF_MAX];
 	int			ccid2hctx_seqbufc;
 	struct ccid2_seq	*ccid2hctx_seqh;
-- 
cgit v1.2.3


From da98e0b5d4c1f88b7c9e63e8918783cd4905be2b Mon Sep 17 00:00:00 2001
From: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Date: Sat, 24 Nov 2007 22:08:27 -0200
Subject: [CCID2]: Redundant debugging output

This reduces the amount of redundant debugging messages:

 * pipe/cwnd are printed in both tx_send_packet() and tx_packet_sent().
   Both functions are called immediately after one another, so one occurrence is sufficient.

 * Since tx_packet_sent() prints pipe/cwnd already, the second printk for pipe is redundant.

 * In tx_packet_sent() the check_sanity function is called twice (at the begin and at the end).

Signed-off-by: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Acked-by: Ian McDonald <ian.mcdonald@jandi.co.nz>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/dccp/ccids/ccid2.c | 6 ------
 1 file changed, 6 deletions(-)

(limited to 'net')

diff --git a/net/dccp/ccids/ccid2.c b/net/dccp/ccids/ccid2.c
index 237007bb55a..2d1b7e30a73 100644
--- a/net/dccp/ccids/ccid2.c
+++ b/net/dccp/ccids/ccid2.c
@@ -126,9 +126,6 @@ static int ccid2_hc_tx_send_packet(struct sock *sk, struct sk_buff *skb)
 {
 	struct ccid2_hc_tx_sock *hctx = ccid2_hc_tx_sk(sk);
 
-	ccid2_pr_debug("pipe=%d cwnd=%d\n", hctx->ccid2hctx_pipe,
-		       hctx->ccid2hctx_cwnd);
-
 	if (hctx->ccid2hctx_pipe < hctx->ccid2hctx_cwnd) {
 		/* OK we can send... make sure previous packet was sent off */
 		if (!hctx->ccid2hctx_sendwait) {
@@ -239,8 +236,6 @@ static void ccid2_hc_tx_packet_sent(struct sock *sk, int more, unsigned int len)
 	struct ccid2_seq *next;
 	u64 seq;
 
-	ccid2_hc_tx_check_sanity(hctx);
-
 	BUG_ON(!hctx->ccid2hctx_sendwait);
 	hctx->ccid2hctx_sendwait = 0;
 	hctx->ccid2hctx_pipe++;
@@ -326,7 +321,6 @@ static void ccid2_hc_tx_packet_sent(struct sock *sk, int more, unsigned int len)
 		ccid2_start_rto_timer(sk);
 
 #ifdef CONFIG_IP_DCCP_CCID2_DEBUG
-	ccid2_pr_debug("pipe=%d\n", hctx->ccid2hctx_pipe);
 	ccid2_pr_debug("Sent: seq=%llu\n", (unsigned long long)seq);
 	do {
 		struct ccid2_seq *seqp = hctx->ccid2hctx_seqt;
-- 
cgit v1.2.3


From 83399361c30f2ffae20ee348ba9ada9a856d499a Mon Sep 17 00:00:00 2001
From: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Date: Sat, 24 Nov 2007 22:09:35 -0200
Subject: [CCID2]: Remove redundant synchronisation variable

This removes the synchronisation variable `ccid2hctx_sendwait', which is set to 1
when the CCID2 sender may send a new packet, and which is set to 0 otherwise

The variable is redundant, since it is only used in combination with the hc_tx_send_packet/
hc_tx_packet_sent function pair. Both functions are called under socket lock, so the
following happens when the CCID2 may send a new packet:

 * it sets sendwait = 1 in tx_send_packet and returns 0;
 * the subsequent call to tx_packet_sent clears the sendwait flag;
 * since tx_send_packet returns 0 if and only if sendwait == 1, the BUG_ON condition
   in tx_packet_sent is never satisfied, since that function is never called when
   tx_send_packet returns a value different from 0 (cf. dccp_write_xmit);
 * the call to tx_packet_sent clears the flag so that the condition "!sendwait" is
   true the next time tx_packet_sent is called.

In other words, it is sufficient to just return 0 / not-0 to synchronise tx_send_packet
and tx_packet_sent -- which is what the patch does.

Signed-off-by: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Acked-by: Ian McDonald <ian.mcdonald@jandi.co.nz>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/dccp/ccids/ccid2.c | 11 ++---------
 net/dccp/ccids/ccid2.h |  1 -
 2 files changed, 2 insertions(+), 10 deletions(-)

(limited to 'net')

diff --git a/net/dccp/ccids/ccid2.c b/net/dccp/ccids/ccid2.c
index 2d1b7e30a73..4f6c35261b6 100644
--- a/net/dccp/ccids/ccid2.c
+++ b/net/dccp/ccids/ccid2.c
@@ -126,13 +126,8 @@ static int ccid2_hc_tx_send_packet(struct sock *sk, struct sk_buff *skb)
 {
 	struct ccid2_hc_tx_sock *hctx = ccid2_hc_tx_sk(sk);
 
-	if (hctx->ccid2hctx_pipe < hctx->ccid2hctx_cwnd) {
-		/* OK we can send... make sure previous packet was sent off */
-		if (!hctx->ccid2hctx_sendwait) {
-			hctx->ccid2hctx_sendwait = 1;
-			return 0;
-		}
-	}
+	if (hctx->ccid2hctx_pipe < hctx->ccid2hctx_cwnd)
+		return 0;
 
 	return 1; /* XXX CCID should dequeue when ready instead of polling */
 }
@@ -236,8 +231,6 @@ static void ccid2_hc_tx_packet_sent(struct sock *sk, int more, unsigned int len)
 	struct ccid2_seq *next;
 	u64 seq;
 
-	BUG_ON(!hctx->ccid2hctx_sendwait);
-	hctx->ccid2hctx_sendwait = 0;
 	hctx->ccid2hctx_pipe++;
 
 	/* There is an issue.  What if another packet is sent between
diff --git a/net/dccp/ccids/ccid2.h b/net/dccp/ccids/ccid2.h
index bc659f00cb9..2671f8ebe29 100644
--- a/net/dccp/ccids/ccid2.h
+++ b/net/dccp/ccids/ccid2.h
@@ -66,7 +66,6 @@ struct ccid2_hc_tx_sock {
 	struct timer_list	ccid2hctx_rtotimer;
 	u64			ccid2hctx_rpseq;
 	int			ccid2hctx_rpdupack;
-	int			ccid2hctx_sendwait;
 	unsigned long		ccid2hctx_last_cong;
 	u64			ccid2hctx_high_ack;
 };
-- 
cgit v1.2.3


From a302002516a094015e5d004b8d939a8a34559c82 Mon Sep 17 00:00:00 2001
From: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Date: Sat, 24 Nov 2007 22:10:29 -0200
Subject: [CCID2]: Remove redundant ack-counting variable

The code used two different variables to count Acks, one of them redundant.
This patch reduces the number of Ack counters to one.

The type of the Ack counter has also been changed to u32 (twice the range of int);
and the variable has been renamed into `packets_acked' - for consistency with
RFC 3465 (and similarly named variables are used by TCP and SCTP).

Lastly, a slightly less aggressive `maxincr' increment is used (for even Ack Ratios,
maxincr was Ack Ratio/2 + 1 instead of Ack Ratio/2).

Signed-off-by: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Acked-by: Ian McDonald <ian.mcdonald@jandi.co.nz>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/dccp/ccids/ccid2.c | 46 +++++++++++++++-------------------------------
 net/dccp/ccids/ccid2.h |  6 ++----
 2 files changed, 17 insertions(+), 35 deletions(-)

(limited to 'net')

diff --git a/net/dccp/ccids/ccid2.c b/net/dccp/ccids/ccid2.c
index 4f6c35261b6..02d126d9a71 100644
--- a/net/dccp/ccids/ccid2.c
+++ b/net/dccp/ccids/ccid2.c
@@ -199,9 +199,8 @@ static void ccid2_hc_tx_rto_expire(unsigned long data)
 	hctx->ccid2hctx_pipe	 = 0;
 
 	/* clear state about stuff we sent */
-	hctx->ccid2hctx_seqt	= hctx->ccid2hctx_seqh;
-	hctx->ccid2hctx_ssacks	= 0;
-	hctx->ccid2hctx_acks	= 0;
+	hctx->ccid2hctx_seqt = hctx->ccid2hctx_seqh;
+	hctx->ccid2hctx_packets_acked = 0;
 
 	/* clear ack ratio state. */
 	hctx->ccid2hctx_rpseq	 = 0;
@@ -406,31 +405,15 @@ static inline void ccid2_new_ack(struct sock *sk,
 {
 	struct ccid2_hc_tx_sock *hctx = ccid2_hc_tx_sk(sk);
 
-	/* slow start */
 	if (hctx->ccid2hctx_cwnd < hctx->ccid2hctx_ssthresh) {
-		hctx->ccid2hctx_acks = 0;
-
-		/* We can increase cwnd at most maxincr [ack_ratio/2] */
-		if (*maxincr) {
-			/* increase every 2 acks */
-			hctx->ccid2hctx_ssacks++;
-			if (hctx->ccid2hctx_ssacks == 2) {
-				hctx->ccid2hctx_cwnd++;
-				hctx->ccid2hctx_ssacks = 0;
-				*maxincr = *maxincr - 1;
-			}
-		} else {
-			/* increased cwnd enough for this single ack */
-			hctx->ccid2hctx_ssacks = 0;
-		}
-	} else {
-		hctx->ccid2hctx_ssacks = 0;
-		hctx->ccid2hctx_acks++;
-
-		if (hctx->ccid2hctx_acks >= hctx->ccid2hctx_cwnd) {
-			hctx->ccid2hctx_cwnd++;
-			hctx->ccid2hctx_acks = 0;
+		if (*maxincr > 0 && ++hctx->ccid2hctx_packets_acked == 2) {
+			hctx->ccid2hctx_cwnd += 1;
+			*maxincr	     -= 1;
+			hctx->ccid2hctx_packets_acked = 0;
 		}
+	} else if (++hctx->ccid2hctx_packets_acked >= hctx->ccid2hctx_cwnd) {
+			hctx->ccid2hctx_cwnd += 1;
+			hctx->ccid2hctx_packets_acked = 0;
 	}
 
 	/* update RTO */
@@ -596,12 +579,13 @@ static void ccid2_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb)
 		}
 	}
 
-	/* If in slow-start, cwnd can increase at most Ack Ratio / 2 packets for
-	 * this single ack.  I round up.
-	 * -sorbo.
+	/*
+	 * In slow-start, cwnd can increase up to a maximum of Ack Ratio/2
+	 * packets per acknowledgement. Rounding up avoids that cwnd is not
+	 * advanced when Ack Ratio is 1 and gives a slight edge otherwise.
 	 */
-	maxincr = dp->dccps_l_ack_ratio >> 1;
-	maxincr++;
+	if (hctx->ccid2hctx_cwnd < hctx->ccid2hctx_ssthresh)
+		maxincr = DIV_ROUND_UP(dp->dccps_l_ack_ratio, 2);
 
 	/* go through all ack vectors */
 	while ((offset = ccid2_ackvector(sk, skb, offset,
diff --git a/net/dccp/ccids/ccid2.h b/net/dccp/ccids/ccid2.h
index 2671f8ebe29..2c94ca02901 100644
--- a/net/dccp/ccids/ccid2.h
+++ b/net/dccp/ccids/ccid2.h
@@ -43,8 +43,7 @@ struct ccid2_seq {
 /** struct ccid2_hc_tx_sock - CCID2 TX half connection
  *
  * @ccid2hctx_{cwnd,ssthresh,pipe}: as per RFC 4341, section 5
- * @ccid2hctx_ssacks - ACKs recv in slow start
- * @ccid2hctx_acks - ACKS recv in AI phase
+ * @ccid2hctx_packets_acked - Ack counter for deriving cwnd growth (RFC 3465)
  * @ccid2hctx_lastrtt -time RTT was last measured
  * @ccid2hctx_rpseq - last consecutive seqno
  * @ccid2hctx_rpdupack - dupacks since rpseq
@@ -53,8 +52,7 @@ struct ccid2_hc_tx_sock {
 	u32			ccid2hctx_cwnd;
 	u32			ccid2hctx_ssthresh;
 	u32			ccid2hctx_pipe;
-	int			ccid2hctx_ssacks;
-	int			ccid2hctx_acks;
+	u32			ccid2hctx_packets_acked;
 	struct ccid2_seq	*ccid2hctx_seqbuf[CCID2_SEQBUF_MAX];
 	int			ccid2hctx_seqbufc;
 	struct ccid2_seq	*ccid2hctx_seqh;
-- 
cgit v1.2.3


From dcfbc7e97a2e3a0d73a2e41e1bddb988dcca701e Mon Sep 17 00:00:00 2001
From: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Date: Sat, 24 Nov 2007 22:12:06 -0200
Subject: [CCID2]: Remove misleading comment

This removes a comment which identifies an `issue' with dccp_write_xmit() where there is none.
The comment assumes it is possible that a packet is sent between the calls to

	ccid_hc_tx_send_packet(),
	dccp_transmit_skb(),
	ccid_hc_tx_packet_sent()

(in the above order) in dccp_write_xmit().

I think that this is impossible, since dccp_write_xmit() is always called under lock:

 * when called as dccp_write_xmit(sk, 1) from dccp_send_close(), the socket is locked
   (see code comment above dccp_send_close());
 * when called as dccp_write_xmit(sk, 0) from dccp_send_msg(), it is after lock_sock() has been called;
 * when called as dccp_write_xmit(sk, 0) from dccp_write_xmit_timer(), bh_lock_sock() has been called
   and the if/else statement has made sure that sk_lock.owner is not set;
 * there are no other places where dccp_write_xmit() is called.

Furthermore, the debug statement for printing the sequence number of the packet just sent has been
removed, since the entire list is being printed anyway and so the entry of that number appears last.

Signed-off-by: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Acked-by: Ian McDonald <ian.mcdonald@jandi.co.nz>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/dccp/ccids/ccid2.c | 11 +----------
 1 file changed, 1 insertion(+), 10 deletions(-)

(limited to 'net')

diff --git a/net/dccp/ccids/ccid2.c b/net/dccp/ccids/ccid2.c
index 02d126d9a71..48eeb4494e8 100644
--- a/net/dccp/ccids/ccid2.c
+++ b/net/dccp/ccids/ccid2.c
@@ -228,18 +228,10 @@ static void ccid2_hc_tx_packet_sent(struct sock *sk, int more, unsigned int len)
 	struct dccp_sock *dp = dccp_sk(sk);
 	struct ccid2_hc_tx_sock *hctx = ccid2_hc_tx_sk(sk);
 	struct ccid2_seq *next;
-	u64 seq;
 
 	hctx->ccid2hctx_pipe++;
 
-	/* There is an issue.  What if another packet is sent between
-	 * packet_send() and packet_sent().  Then the sequence number would be
-	 * wrong.
-	 * -sorbo.
-	 */
-	seq = dp->dccps_gss;
-
-	hctx->ccid2hctx_seqh->ccid2s_seq   = seq;
+	hctx->ccid2hctx_seqh->ccid2s_seq   = dp->dccps_gss;
 	hctx->ccid2hctx_seqh->ccid2s_acked = 0;
 	hctx->ccid2hctx_seqh->ccid2s_sent  = jiffies;
 
@@ -313,7 +305,6 @@ static void ccid2_hc_tx_packet_sent(struct sock *sk, int more, unsigned int len)
 		ccid2_start_rto_timer(sk);
 
 #ifdef CONFIG_IP_DCCP_CCID2_DEBUG
-	ccid2_pr_debug("Sent: seq=%llu\n", (unsigned long long)seq);
 	do {
 		struct ccid2_seq *seqp = hctx->ccid2hctx_seqt;
 
-- 
cgit v1.2.3


From d83bd95bf11444993b9c405b255ffa644c32d414 Mon Sep 17 00:00:00 2001
From: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Date: Sun, 16 Dec 2007 16:06:03 -0800
Subject: [DCCP]: Check for unread data on close

This removes one FIXME with regard to close when there is still unread data.
The mechanism is implemented similar to TCP: with regard to DCCP-specifics,
a Reset with Code 2, "Aborted" is sent to the peer.

This corresponds in part to RFC 4340, 8.1.1 and 8.1.5.

Signed-off-by: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Signed-off-by: Ian McDonald <ian.mcdonald@jandi.co.nz>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/dccp/proto.c | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/dccp/proto.c b/net/dccp/proto.c
index d48005f653c..5f47b458ed8 100644
--- a/net/dccp/proto.c
+++ b/net/dccp/proto.c
@@ -891,6 +891,7 @@ void dccp_close(struct sock *sk, long timeout)
 {
 	struct dccp_sock *dp = dccp_sk(sk);
 	struct sk_buff *skb;
+	u32 data_was_unread = 0;
 	int state;
 
 	lock_sock(sk);
@@ -913,12 +914,17 @@ void dccp_close(struct sock *sk, long timeout)
 	 * descriptor close, not protocol-sourced closes, because the
 	  *reader process may not have drained the data yet!
 	 */
-	/* FIXME: check for unread data */
 	while ((skb = __skb_dequeue(&sk->sk_receive_queue)) != NULL) {
+		data_was_unread += skb->len;
 		__kfree_skb(skb);
 	}
 
-	if (sock_flag(sk, SOCK_LINGER) && !sk->sk_lingertime) {
+	if (data_was_unread) {
+		/* Unread data was tossed, send an appropriate Reset Code */
+		DCCP_WARN("DCCP: ABORT -- %u bytes unread\n", data_was_unread);
+		dccp_send_reset(sk, DCCP_RESET_CODE_ABORTED);
+		dccp_set_state(sk, DCCP_CLOSED);
+	} else if (sock_flag(sk, SOCK_LINGER) && !sk->sk_lingertime) {
 		/* Check zero linger _after_ checking for unread data. */
 		sk->sk_prot->disconnect(sk, 0);
 	} else if (dccp_close_state(sk)) {
-- 
cgit v1.2.3


From ce865a61c810c971b47f57c729ec6e9b2d522d94 Mon Sep 17 00:00:00 2001
From: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Date: Sat, 24 Nov 2007 22:14:15 -0200
Subject: [DCCP]: Add support for abortive release

This continues from the previous patch and adds support for actively aborting
a DCCP connection, using a Reset Code 2, "Aborted" to inform the peer of an
abortive release.

I have tried this in various client/server settings and it works as expected.

Signed-off-by: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Signed-off-by: Ian McDonald <ian.mcdonald@jandi.co.nz>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/dccp/proto.c | 15 +++++++++++++--
 1 file changed, 13 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/dccp/proto.c b/net/dccp/proto.c
index 5f47b458ed8..73006b74767 100644
--- a/net/dccp/proto.c
+++ b/net/dccp/proto.c
@@ -276,6 +276,12 @@ static inline int dccp_listen_start(struct sock *sk, int backlog)
 	return inet_csk_listen_start(sk, backlog);
 }
 
+static inline int dccp_need_reset(int state)
+{
+	return state != DCCP_CLOSED && state != DCCP_LISTEN &&
+	       state != DCCP_REQUESTING;
+}
+
 int dccp_disconnect(struct sock *sk, int flags)
 {
 	struct inet_connection_sock *icsk = inet_csk(sk);
@@ -286,10 +292,15 @@ int dccp_disconnect(struct sock *sk, int flags)
 	if (old_state != DCCP_CLOSED)
 		dccp_set_state(sk, DCCP_CLOSED);
 
-	/* ABORT function of RFC793 */
+	/*
+	 * This corresponds to the ABORT function of RFC793, sec. 3.8
+	 * TCP uses a RST segment, DCCP a Reset packet with Code 2, "Aborted".
+	 */
 	if (old_state == DCCP_LISTEN) {
 		inet_csk_listen_stop(sk);
-	/* FIXME: do the active reset thing */
+	} else if (dccp_need_reset(old_state)) {
+		dccp_send_reset(sk, DCCP_RESET_CODE_ABORTED);
+		sk->sk_err = ECONNRESET;
 	} else if (old_state == DCCP_REQUESTING)
 		sk->sk_err = ECONNRESET;
 
-- 
cgit v1.2.3


From 8d8ad9d7c4bfe79bc91b7fc419ecfb9dcdfe6a51 Mon Sep 17 00:00:00 2001
From: Pavel Emelyanov <xemul@openvz.org>
Date: Mon, 26 Nov 2007 20:10:50 +0800
Subject: [NET]: Name magic constants in sock_wake_async()

The sock_wake_async() performs a bit different actions
depending on "how" argument. Unfortunately this argument
ony has numerical magic values.

I propose to give names to their constants to help people
reading this function callers understand what's going on
without looking into this function all the time.

I suppose this is 2.6.25 material, but if it's not (or the
naming seems poor/bad/awful), I can rework it against the
current net-2.6 tree.

Signed-off-by: Pavel Emelyanov <xemul@openvz.org>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/atm/common.c     |  2 +-
 net/core/sock.c      |  8 ++++----
 net/core/stream.c    |  2 +-
 net/dccp/input.c     |  8 ++++----
 net/dccp/output.c    |  2 +-
 net/ipv4/tcp_input.c | 12 ++++++------
 net/rxrpc/af_rxrpc.c |  2 +-
 net/sctp/socket.c    |  3 ++-
 net/socket.c         |  9 ++++-----
 net/unix/af_unix.c   |  8 ++++----
 10 files changed, 28 insertions(+), 28 deletions(-)

(limited to 'net')

diff --git a/net/atm/common.c b/net/atm/common.c
index eba09a04f6b..c865517ba44 100644
--- a/net/atm/common.c
+++ b/net/atm/common.c
@@ -113,7 +113,7 @@ static void vcc_write_space(struct sock *sk)
 		if (sk->sk_sleep && waitqueue_active(sk->sk_sleep))
 			wake_up_interruptible(sk->sk_sleep);
 
-		sk_wake_async(sk, 2, POLL_OUT);
+		sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT);
 	}
 
 	read_unlock(&sk->sk_callback_lock);
diff --git a/net/core/sock.c b/net/core/sock.c
index eac7aa0721d..118214047ed 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -1498,7 +1498,7 @@ static void sock_def_error_report(struct sock *sk)
 	read_lock(&sk->sk_callback_lock);
 	if (sk->sk_sleep && waitqueue_active(sk->sk_sleep))
 		wake_up_interruptible(sk->sk_sleep);
-	sk_wake_async(sk,0,POLL_ERR);
+	sk_wake_async(sk, SOCK_WAKE_IO, POLL_ERR);
 	read_unlock(&sk->sk_callback_lock);
 }
 
@@ -1507,7 +1507,7 @@ static void sock_def_readable(struct sock *sk, int len)
 	read_lock(&sk->sk_callback_lock);
 	if (sk->sk_sleep && waitqueue_active(sk->sk_sleep))
 		wake_up_interruptible(sk->sk_sleep);
-	sk_wake_async(sk,1,POLL_IN);
+	sk_wake_async(sk, SOCK_WAKE_WAITD, POLL_IN);
 	read_unlock(&sk->sk_callback_lock);
 }
 
@@ -1524,7 +1524,7 @@ static void sock_def_write_space(struct sock *sk)
 
 		/* Should agree with poll, otherwise some programs break */
 		if (sock_writeable(sk))
-			sk_wake_async(sk, 2, POLL_OUT);
+			sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT);
 	}
 
 	read_unlock(&sk->sk_callback_lock);
@@ -1539,7 +1539,7 @@ void sk_send_sigurg(struct sock *sk)
 {
 	if (sk->sk_socket && sk->sk_socket->file)
 		if (send_sigurg(&sk->sk_socket->file->f_owner))
-			sk_wake_async(sk, 3, POLL_PRI);
+			sk_wake_async(sk, SOCK_WAKE_URG, POLL_PRI);
 }
 
 void sk_reset_timer(struct sock *sk, struct timer_list* timer,
diff --git a/net/core/stream.c b/net/core/stream.c
index b2fb846f42a..5586879bb9b 100644
--- a/net/core/stream.c
+++ b/net/core/stream.c
@@ -35,7 +35,7 @@ void sk_stream_write_space(struct sock *sk)
 		if (sk->sk_sleep && waitqueue_active(sk->sk_sleep))
 			wake_up_interruptible(sk->sk_sleep);
 		if (sock->fasync_list && !(sk->sk_shutdown & SEND_SHUTDOWN))
-			sock_wake_async(sock, 2, POLL_OUT);
+			sock_wake_async(sock, SOCK_WAKE_SPACE, POLL_OUT);
 	}
 }
 
diff --git a/net/dccp/input.c b/net/dccp/input.c
index df0fb2c149a..ef299fbd7c2 100644
--- a/net/dccp/input.c
+++ b/net/dccp/input.c
@@ -37,7 +37,7 @@ static void dccp_rcv_close(struct sock *sk, struct sk_buff *skb)
 	dccp_send_reset(sk, DCCP_RESET_CODE_CLOSED);
 	dccp_fin(sk, skb);
 	dccp_set_state(sk, DCCP_CLOSED);
-	sk_wake_async(sk, 1, POLL_HUP);
+	sk_wake_async(sk, SOCK_WAKE_WAITD, POLL_HUP);
 }
 
 static void dccp_rcv_closereq(struct sock *sk, struct sk_buff *skb)
@@ -90,7 +90,7 @@ static void dccp_rcv_reset(struct sock *sk, struct sk_buff *skb)
 	dccp_fin(sk, skb);
 
 	if (err && !sock_flag(sk, SOCK_DEAD))
-		sk_wake_async(sk, 0, POLL_ERR);
+		sk_wake_async(sk, SOCK_WAKE_IO, POLL_ERR);
 	dccp_time_wait(sk, DCCP_TIME_WAIT, 0);
 }
 
@@ -416,7 +416,7 @@ static int dccp_rcv_request_sent_state_process(struct sock *sk,
 
 		if (!sock_flag(sk, SOCK_DEAD)) {
 			sk->sk_state_change(sk);
-			sk_wake_async(sk, 0, POLL_OUT);
+			sk_wake_async(sk, SOCK_WAKE_IO, POLL_OUT);
 		}
 
 		if (sk->sk_write_pending || icsk->icsk_ack.pingpong ||
@@ -624,7 +624,7 @@ int dccp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
 		switch (old_state) {
 		case DCCP_PARTOPEN:
 			sk->sk_state_change(sk);
-			sk_wake_async(sk, 0, POLL_OUT);
+			sk_wake_async(sk, SOCK_WAKE_IO, POLL_OUT);
 			break;
 		}
 	} else if (unlikely(dh->dccph_type == DCCP_PKT_SYNC)) {
diff --git a/net/dccp/output.c b/net/dccp/output.c
index f49544618f2..33ce737ef3a 100644
--- a/net/dccp/output.c
+++ b/net/dccp/output.c
@@ -170,7 +170,7 @@ void dccp_write_space(struct sock *sk)
 		wake_up_interruptible(sk->sk_sleep);
 	/* Should agree with poll, otherwise some programs break */
 	if (sock_writeable(sk))
-		sk_wake_async(sk, 2, POLL_OUT);
+		sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT);
 
 	read_unlock(&sk->sk_callback_lock);
 }
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 79996b16b94..97ea3eda206 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -3595,9 +3595,9 @@ static void tcp_fin(struct sk_buff *skb, struct sock *sk, struct tcphdr *th)
 		/* Do not send POLL_HUP for half duplex close. */
 		if (sk->sk_shutdown == SHUTDOWN_MASK ||
 		    sk->sk_state == TCP_CLOSE)
-			sk_wake_async(sk, 1, POLL_HUP);
+			sk_wake_async(sk, SOCK_WAKE_WAITD, POLL_HUP);
 		else
-			sk_wake_async(sk, 1, POLL_IN);
+			sk_wake_async(sk, SOCK_WAKE_WAITD, POLL_IN);
 	}
 }
 
@@ -4956,7 +4956,7 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb,
 
 		if (!sock_flag(sk, SOCK_DEAD)) {
 			sk->sk_state_change(sk);
-			sk_wake_async(sk, 0, POLL_OUT);
+			sk_wake_async(sk, SOCK_WAKE_IO, POLL_OUT);
 		}
 
 		if (sk->sk_write_pending ||
@@ -5186,9 +5186,9 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
 				 * are not waked up, because sk->sk_sleep ==
 				 * NULL and sk->sk_socket == NULL.
 				 */
-				if (sk->sk_socket) {
-					sk_wake_async(sk,0,POLL_OUT);
-				}
+				if (sk->sk_socket)
+					sk_wake_async(sk,
+							SOCK_WAKE_IO, POLL_OUT);
 
 				tp->snd_una = TCP_SKB_CB(skb)->ack_seq;
 				tp->snd_wnd = ntohs(th->window) <<
diff --git a/net/rxrpc/af_rxrpc.c b/net/rxrpc/af_rxrpc.c
index d6389450c4b..5e82f1c0afb 100644
--- a/net/rxrpc/af_rxrpc.c
+++ b/net/rxrpc/af_rxrpc.c
@@ -65,7 +65,7 @@ static void rxrpc_write_space(struct sock *sk)
 	if (rxrpc_writable(sk)) {
 		if (sk->sk_sleep && waitqueue_active(sk->sk_sleep))
 			wake_up_interruptible(sk->sk_sleep);
-		sk_wake_async(sk, 2, POLL_OUT);
+		sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT);
 	}
 	read_unlock(&sk->sk_callback_lock);
 }
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index ea9649ca0b2..dc2f9221f09 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -6008,7 +6008,8 @@ static void __sctp_write_space(struct sctp_association *asoc)
 			 */
 			if (sock->fasync_list &&
 			    !(sk->sk_shutdown & SEND_SHUTDOWN))
-				sock_wake_async(sock, 2, POLL_OUT);
+				sock_wake_async(sock,
+						SOCK_WAKE_SPACE, POLL_OUT);
 		}
 	}
 }
diff --git a/net/socket.c b/net/socket.c
index aeeab388cc3..9ebca5c695d 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -1070,20 +1070,19 @@ int sock_wake_async(struct socket *sock, int how, int band)
 	if (!sock || !sock->fasync_list)
 		return -1;
 	switch (how) {
-	case 1:
-
+	case SOCK_WAKE_WAITD:
 		if (test_bit(SOCK_ASYNC_WAITDATA, &sock->flags))
 			break;
 		goto call_kill;
-	case 2:
+	case SOCK_WAKE_SPACE:
 		if (!test_and_clear_bit(SOCK_ASYNC_NOSPACE, &sock->flags))
 			break;
 		/* fall through */
-	case 0:
+	case SOCK_WAKE_IO:
 call_kill:
 		__kill_fasync(sock->fasync_list, SIGIO, band);
 		break;
-	case 3:
+	case SOCK_WAKE_URG:
 		__kill_fasync(sock->fasync_list, SIGURG, band);
 	}
 	return 0;
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index 0f1ecbf86d0..393197afb19 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -317,7 +317,7 @@ static void unix_write_space(struct sock *sk)
 	if (unix_writable(sk)) {
 		if (sk->sk_sleep && waitqueue_active(sk->sk_sleep))
 			wake_up_interruptible_sync(sk->sk_sleep);
-		sk_wake_async(sk, 2, POLL_OUT);
+		sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT);
 	}
 	read_unlock(&sk->sk_callback_lock);
 }
@@ -403,7 +403,7 @@ static int unix_release_sock (struct sock *sk, int embrion)
 			unix_state_unlock(skpair);
 			skpair->sk_state_change(skpair);
 			read_lock(&skpair->sk_callback_lock);
-			sk_wake_async(skpair,1,POLL_HUP);
+			sk_wake_async(skpair, SOCK_WAKE_WAITD, POLL_HUP);
 			read_unlock(&skpair->sk_callback_lock);
 		}
 		sock_put(skpair); /* It may now die */
@@ -1900,9 +1900,9 @@ static int unix_shutdown(struct socket *sock, int mode)
 			other->sk_state_change(other);
 			read_lock(&other->sk_callback_lock);
 			if (peer_mode == SHUTDOWN_MASK)
-				sk_wake_async(other,1,POLL_HUP);
+				sk_wake_async(other, SOCK_WAKE_WAITD, POLL_HUP);
 			else if (peer_mode & RCV_SHUTDOWN)
-				sk_wake_async(other,1,POLL_IN);
+				sk_wake_async(other, SOCK_WAKE_WAITD, POLL_IN);
 			read_unlock(&other->sk_callback_lock);
 		}
 		if (other)
-- 
cgit v1.2.3


From 82d8a867ffaed7fe58e789103b32c0fc6b79dafd Mon Sep 17 00:00:00 2001
From: Pavel Emelyanov <xemul@openvz.org>
Date: Mon, 26 Nov 2007 20:12:58 +0800
Subject: [NET]: Make macro to specify the ptype_base size

Currently this size is 16, but as the comment says this
is so only because all the chains (except one) has the
length 1. I think, that some day this may change, so
growing this hash will be much easier.

Besides, symbolic names are read better than magic constants.

Signed-off-by: Pavel Emelyanov <xemul@openvz.org>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/dev.c | 23 ++++++++++++++---------
 1 file changed, 14 insertions(+), 9 deletions(-)

(limited to 'net')

diff --git a/net/core/dev.c b/net/core/dev.c
index d0e23d8310f..d0d2675aaed 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -150,8 +150,11 @@
  *		86DD	IPv6
  */
 
+#define PTYPE_HASH_SIZE	(16)
+#define PTYPE_HASH_MASK	(PTYPE_HASH_SIZE - 1)
+
 static DEFINE_SPINLOCK(ptype_lock);
-static struct list_head ptype_base[16] __read_mostly;	/* 16 way hashed list */
+static struct list_head ptype_base[PTYPE_HASH_SIZE] __read_mostly;
 static struct list_head ptype_all __read_mostly;	/* Taps */
 
 #ifdef CONFIG_NET_DMA
@@ -362,7 +365,7 @@ void dev_add_pack(struct packet_type *pt)
 	if (pt->type == htons(ETH_P_ALL))
 		list_add_rcu(&pt->list, &ptype_all);
 	else {
-		hash = ntohs(pt->type) & 15;
+		hash = ntohs(pt->type) & PTYPE_HASH_MASK;
 		list_add_rcu(&pt->list, &ptype_base[hash]);
 	}
 	spin_unlock_bh(&ptype_lock);
@@ -391,7 +394,7 @@ void __dev_remove_pack(struct packet_type *pt)
 	if (pt->type == htons(ETH_P_ALL))
 		head = &ptype_all;
 	else
-		head = &ptype_base[ntohs(pt->type) & 15];
+		head = &ptype_base[ntohs(pt->type) & PTYPE_HASH_MASK];
 
 	list_for_each_entry(pt1, head, list) {
 		if (pt == pt1) {
@@ -1420,7 +1423,8 @@ struct sk_buff *skb_gso_segment(struct sk_buff *skb, int features)
 	}
 
 	rcu_read_lock();
-	list_for_each_entry_rcu(ptype, &ptype_base[ntohs(type) & 15], list) {
+	list_for_each_entry_rcu(ptype,
+			&ptype_base[ntohs(type) & PTYPE_HASH_MASK], list) {
 		if (ptype->type == type && !ptype->dev && ptype->gso_segment) {
 			if (unlikely(skb->ip_summed != CHECKSUM_PARTIAL)) {
 				err = ptype->gso_send_check(skb);
@@ -2077,7 +2081,8 @@ ncls:
 		goto out;
 
 	type = skb->protocol;
-	list_for_each_entry_rcu(ptype, &ptype_base[ntohs(type)&15], list) {
+	list_for_each_entry_rcu(ptype,
+			&ptype_base[ntohs(type) & PTYPE_HASH_MASK], list) {
 		if (ptype->type == type &&
 		    (!ptype->dev || ptype->dev == skb->dev)) {
 			if (pt_prev)
@@ -2525,7 +2530,7 @@ static void *ptype_get_idx(loff_t pos)
 		++i;
 	}
 
-	for (t = 0; t < 16; t++) {
+	for (t = 0; t < PTYPE_HASH_SIZE; t++) {
 		list_for_each_entry_rcu(pt, &ptype_base[t], list) {
 			if (i == pos)
 				return pt;
@@ -2559,10 +2564,10 @@ static void *ptype_seq_next(struct seq_file *seq, void *v, loff_t *pos)
 		hash = 0;
 		nxt = ptype_base[0].next;
 	} else
-		hash = ntohs(pt->type) & 15;
+		hash = ntohs(pt->type) & PTYPE_HASH_MASK;
 
 	while (nxt == &ptype_base[hash]) {
-		if (++hash >= 16)
+		if (++hash >= PTYPE_HASH_SIZE)
 			return NULL;
 		nxt = ptype_base[hash].next;
 	}
@@ -4398,7 +4403,7 @@ static int __init net_dev_init(void)
 		goto out;
 
 	INIT_LIST_HEAD(&ptype_all);
-	for (i = 0; i < 16; i++)
+	for (i = 0; i < PTYPE_HASH_SIZE; i++)
 		INIT_LIST_HEAD(&ptype_base[i]);
 
 	if (register_pernet_subsys(&netdev_net_ops))
-- 
cgit v1.2.3


From 8512430e554a84275669f78f86dce18566d5cf7a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= <ilpo.jarvinen@helsinki.fi>
Date: Mon, 26 Nov 2007 20:17:38 +0800
Subject: [TCP]: Move FRTO checks out from write queue abstraction funcs
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Better place exists in update_send_head (other non-queue related
adjustments are done there as well) which is the only caller of
tcp_advance_send_head (now that the bogus call from mtu_probe is
gone).

Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@helsinki.fi>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp_output.c | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'net')

diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 030fc69ea21..546f385a405 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -80,6 +80,10 @@ static void update_send_head(struct sock *sk, struct sk_buff *skb)
 	tcp_advance_send_head(sk, skb);
 	tp->snd_nxt = TCP_SKB_CB(skb)->end_seq;
 	tcp_packets_out_inc(sk, skb);
+
+	/* Don't override Nagle indefinately with F-RTO */
+	if (tp->frto_counter == 2)
+		tp->frto_counter = 3;
 }
 
 /* SND.NXT, if window was not shrunk.
-- 
cgit v1.2.3


From dcaee95a1b396f85cdc28099f39710be328d3a5e Mon Sep 17 00:00:00 2001
From: Joonwoo Park <joonwpark81@gmail.com>
Date: Mon, 26 Nov 2007 23:23:21 +0800
Subject: [IPSEC]: kmalloc + memset conversion to kzalloc

2007/11/26, Patrick McHardy <kaber@trash.net>:
> How about also switching vmalloc/get_free_pages to GFP_ZERO
> and getting rid of the memset entirely while you're at it?
>

xfrm_hash: kmalloc + memset conversion to kzalloc
fix to avoid memset entirely.

Signed-off-by: Joonwoo Park <joonwpark81@gmail.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/xfrm/xfrm_hash.c | 9 +++------
 1 file changed, 3 insertions(+), 6 deletions(-)

(limited to 'net')

diff --git a/net/xfrm/xfrm_hash.c b/net/xfrm/xfrm_hash.c
index 55ab5792af5..a2023ec5232 100644
--- a/net/xfrm/xfrm_hash.c
+++ b/net/xfrm/xfrm_hash.c
@@ -17,17 +17,14 @@ struct hlist_head *xfrm_hash_alloc(unsigned int sz)
 	struct hlist_head *n;
 
 	if (sz <= PAGE_SIZE)
-		n = kmalloc(sz, GFP_KERNEL);
+		n = kzalloc(sz, GFP_KERNEL);
 	else if (hashdist)
-		n = __vmalloc(sz, GFP_KERNEL, PAGE_KERNEL);
+		n = __vmalloc(sz, GFP_KERNEL | __GFP_ZERO, PAGE_KERNEL);
 	else
 		n = (struct hlist_head *)
-			__get_free_pages(GFP_KERNEL | __GFP_NOWARN,
+			__get_free_pages(GFP_KERNEL | __GFP_NOWARN | __GFP_ZERO,
 					 get_order(sz));
 
-	if (n)
-		memset(n, 0, sz);
-
 	return n;
 }
 
-- 
cgit v1.2.3


From 88f8349164cf4035e63db9e6f484e0ce9cb17e47 Mon Sep 17 00:00:00 2001
From: Joonwoo Park <joonwpark81@gmail.com>
Date: Mon, 26 Nov 2007 23:29:32 +0800
Subject: [IPV4] fib_semantics: kmalloc + memset conversion to kzalloc

fib_semantics: kmalloc + memset conversion to kzalloc
fix to avoid memset entirely.

Signed-off-by: Joonwoo Park <joonwpark81@gmail.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/fib_semantics.c | 10 +++-------
 1 file changed, 3 insertions(+), 7 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index 33ec96001d9..ec9b0dde9f9 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -605,10 +605,10 @@ static inline unsigned int fib_laddr_hashfn(__be32 val)
 static struct hlist_head *fib_hash_alloc(int bytes)
 {
 	if (bytes <= PAGE_SIZE)
-		return kmalloc(bytes, GFP_KERNEL);
+		return kzalloc(bytes, GFP_KERNEL);
 	else
 		return (struct hlist_head *)
-			__get_free_pages(GFP_KERNEL, get_order(bytes));
+			__get_free_pages(GFP_KERNEL | __GFP_ZERO, get_order(bytes));
 }
 
 static void fib_hash_free(struct hlist_head *hash, int bytes)
@@ -712,12 +712,8 @@ struct fib_info *fib_create_info(struct fib_config *cfg)
 		if (!new_info_hash || !new_laddrhash) {
 			fib_hash_free(new_info_hash, bytes);
 			fib_hash_free(new_laddrhash, bytes);
-		} else {
-			memset(new_info_hash, 0, bytes);
-			memset(new_laddrhash, 0, bytes);
-
+		} else
 			fib_hash_move(new_info_hash, new_laddrhash, new_size);
-		}
 
 		if (!fib_hash_size)
 			goto failure;
-- 
cgit v1.2.3


From 3015a347dcd470fcc8becf1f84b0502391a3c0e0 Mon Sep 17 00:00:00 2001
From: Joonwoo Park <joonwpark81@gmail.com>
Date: Mon, 26 Nov 2007 23:31:24 +0800
Subject: [IPV4] fib_hash: kmalloc + memset conversion to kzalloc

fib_hash: kmalloc + memset conversion to kzalloc
fix to avoid memset entirely.

Signed-off-by: Joonwoo Park <joonwpark81@gmail.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/fib_hash.c | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/fib_hash.c b/net/ipv4/fib_hash.c
index 0dfee27cfbc..eb1aa8eead0 100644
--- a/net/ipv4/fib_hash.c
+++ b/net/ipv4/fib_hash.c
@@ -102,10 +102,10 @@ static struct hlist_head *fz_hash_alloc(int divisor)
 	unsigned long size = divisor * sizeof(struct hlist_head);
 
 	if (size <= PAGE_SIZE) {
-		return kmalloc(size, GFP_KERNEL);
+		return kzalloc(size, GFP_KERNEL);
 	} else {
 		return (struct hlist_head *)
-			__get_free_pages(GFP_KERNEL, get_order(size));
+			__get_free_pages(GFP_KERNEL | __GFP_ZERO, get_order(size));
 	}
 }
 
@@ -174,8 +174,6 @@ static void fn_rehash_zone(struct fn_zone *fz)
 	ht = fz_hash_alloc(new_divisor);
 
 	if (ht)	{
-		memset(ht, 0, new_divisor * sizeof(struct hlist_head));
-
 		write_lock_bh(&fib_hash_lock);
 		old_ht = fz->fz_hash;
 		fz->fz_hash = ht;
@@ -219,7 +217,6 @@ fn_new_zone(struct fn_hash *table, int z)
 		kfree(fz);
 		return NULL;
 	}
-	memset(fz->fz_hash, 0, fz->fz_divisor * sizeof(struct hlist_head *));
 	fz->fz_order = z;
 	fz->fz_mask = inet_make_mask(z);
 
-- 
cgit v1.2.3


From f561d0f27d6283c49359bb96048f8ac3728c812c Mon Sep 17 00:00:00 2001
From: Pavel Emelyanov <xemul@openvz.org>
Date: Thu, 29 Nov 2007 20:28:50 +1100
Subject: [NET]: Uninline the sk_stream_alloc_pskb

This function seems too big for inlining. Indeed, it saves
half-a-kilo when uninlined:

add/remove: 1/0 grow/shrink: 0/7 up/down: 195/-719 (-524)
function                                     old     new   delta
sk_stream_alloc_pskb                           -     195    +195
ip_rt_init                                   529     525      -4
__inet_lookup_listener                       284     274     -10
tcp_sendmsg                                 2583    2486     -97
tcp_sendpage                                1449    1305    -144
tso_fragment                                 417     267    -150
tcp_fragment                                1149     992    -157
__tcp_push_pending_frames                   1998    1841    -157

Signed-off-by: Pavel Emelyanov <xemul@openvz.org>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp.c | 27 +++++++++++++++++++++++++++
 1 file changed, 27 insertions(+)

(limited to 'net')

diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index e055f25876d..6b35ab841db 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -629,6 +629,33 @@ ssize_t tcp_splice_read(struct socket *sock, loff_t *ppos,
 	return ret;
 }
 
+struct sk_buff *sk_stream_alloc_pskb(struct sock *sk,
+		int size, int mem, gfp_t gfp)
+{
+	struct sk_buff *skb;
+
+	/* The TCP header must be at least 32-bit aligned.  */
+	size = ALIGN(size, 4);
+
+	skb = alloc_skb_fclone(size + sk->sk_prot->max_header, gfp);
+	if (skb) {
+		skb->truesize += mem;
+		if (sk_stream_wmem_schedule(sk, skb->truesize)) {
+			/*
+			 * Make sure that we have exactly size bytes
+			 * available to the caller, no more, no less.
+			 */
+			skb_reserve(skb, skb_tailroom(skb) - size);
+			return skb;
+		}
+		__kfree_skb(skb);
+	} else {
+		sk->sk_prot->enter_memory_pressure();
+		sk_stream_moderate_sndbuf(sk);
+	}
+	return NULL;
+}
+
 static ssize_t do_tcp_sendpages(struct sock *sk, struct page **pages, int poffset,
 			 size_t psize, int flags)
 {
-- 
cgit v1.2.3


From df97c708d5e6eebdd9ded1fa588eae09acf53793 Mon Sep 17 00:00:00 2001
From: Pavel Emelyanov <xemul@openvz.org>
Date: Thu, 29 Nov 2007 21:22:33 +1100
Subject: [NET]: Eliminate unused argument from sk_stream_alloc_pskb

The 3rd argument is always zero (according to grep :) Eliminate
it and merge the function with sk_stream_alloc_skb.

This saves 44 more bytes, and together with the previous patch
we have:

add/remove: 1/0 grow/shrink: 0/8 up/down: 183/-751 (-568)
function                                     old     new   delta
sk_stream_alloc_skb                            -     183    +183
ip_rt_init                                   529     525      -4
arp_ignore                                   112     107      -5
__inet_lookup_listener                       284     274     -10
tcp_sendmsg                                 2583    2481    -102
tcp_sendpage                                1449    1300    -149
tso_fragment                                 417     258    -159
tcp_fragment                                1149     988    -161
__tcp_push_pending_frames                   1998    1837    -161

Signed-off-by: Pavel Emelyanov <xemul@openvz.org>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp.c        | 11 ++++-------
 net/ipv4/tcp_output.c |  2 +-
 2 files changed, 5 insertions(+), 8 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 6b35ab841db..eb77088f305 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -629,8 +629,7 @@ ssize_t tcp_splice_read(struct socket *sock, loff_t *ppos,
 	return ret;
 }
 
-struct sk_buff *sk_stream_alloc_pskb(struct sock *sk,
-		int size, int mem, gfp_t gfp)
+struct sk_buff *sk_stream_alloc_skb(struct sock *sk, int size, gfp_t gfp)
 {
 	struct sk_buff *skb;
 
@@ -639,7 +638,6 @@ struct sk_buff *sk_stream_alloc_pskb(struct sock *sk,
 
 	skb = alloc_skb_fclone(size + sk->sk_prot->max_header, gfp);
 	if (skb) {
-		skb->truesize += mem;
 		if (sk_stream_wmem_schedule(sk, skb->truesize)) {
 			/*
 			 * Make sure that we have exactly size bytes
@@ -692,8 +690,7 @@ new_segment:
 			if (!sk_stream_memory_free(sk))
 				goto wait_for_sndbuf;
 
-			skb = sk_stream_alloc_pskb(sk, 0, 0,
-						   sk->sk_allocation);
+			skb = sk_stream_alloc_skb(sk, 0, sk->sk_allocation);
 			if (!skb)
 				goto wait_for_memory;
 
@@ -873,8 +870,8 @@ new_segment:
 				if (!sk_stream_memory_free(sk))
 					goto wait_for_sndbuf;
 
-				skb = sk_stream_alloc_pskb(sk, select_size(sk),
-							   0, sk->sk_allocation);
+				skb = sk_stream_alloc_skb(sk, select_size(sk),
+						sk->sk_allocation);
 				if (!skb)
 					goto wait_for_memory;
 
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 546f385a405..98835afb8d8 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -1180,7 +1180,7 @@ static int tso_fragment(struct sock *sk, struct sk_buff *skb, unsigned int len,
 	if (skb->len != skb->data_len)
 		return tcp_fragment(sk, skb, len, mss_now);
 
-	buff = sk_stream_alloc_pskb(sk, 0, 0, GFP_ATOMIC);
+	buff = sk_stream_alloc_skb(sk, 0, GFP_ATOMIC);
 	if (unlikely(buff == NULL))
 		return -ENOMEM;
 
-- 
cgit v1.2.3


From c7dc89c0ac8e7c3796bff91becf58ccdbcaf9f18 Mon Sep 17 00:00:00 2001
From: "Fred L. Templin" <fred.l.templin@boeing.com>
Date: Thu, 29 Nov 2007 22:11:40 +1100
Subject: [IPV6]: Add RFC4214 support

This patch includes support for the Intra-Site Automatic Tunnel
Addressing Protocol (ISATAP) per RFC4214. It uses the SIT
module, and is configured using extensions to the "iproute2"
utility. The diffs are specific to the Linux 2.6.24-rc2 kernel
distribution.

This version includes the diff for ./include/linux/if.h which was
missing in the v2.4 submission and is needed to make the
patch compile. The patch has been installed, compiled and
tested in a clean 2.6.24-rc2 kernel build area.

Signed-off-by: Fred L. Templin <fred.l.templin@boeing.com>
Signed-off-by: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/addrconf.c | 22 ++++++++++++++-
 net/ipv6/route.c    |  2 ++
 net/ipv6/sit.c      | 77 +++++++++++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 100 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 6c8b193474b..f177424c186 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -377,6 +377,13 @@ static struct inet6_dev * ipv6_add_dev(struct net_device *dev)
 		       "%s: Disabled Privacy Extensions\n",
 		       dev->name);
 		ndev->cnf.use_tempaddr = -1;
+
+		if (dev->type == ARPHRD_SIT && (dev->priv_flags & IFF_ISATAP)) {
+			printk(KERN_INFO
+			       "%s: Disabled Multicast RS\n",
+			       dev->name);
+			ndev->cnf.rtr_solicits = 0;
+		}
 	} else {
 		in6_dev_hold(ndev);
 		ipv6_regen_rndid((unsigned long) ndev);
@@ -1409,6 +1416,9 @@ static int ipv6_generate_eui64(u8 *eui, struct net_device *dev)
 		return addrconf_ifid_arcnet(eui, dev);
 	case ARPHRD_INFINIBAND:
 		return addrconf_ifid_infiniband(eui, dev);
+	case ARPHRD_SIT:
+		if (dev->priv_flags & IFF_ISATAP)
+			return ipv6_isatap_eui64(eui, *(__be32 *)dev->dev_addr);
 	}
 	return -1;
 }
@@ -1444,7 +1454,7 @@ regen:
 	 *
 	 *  - Reserved subnet anycast (RFC 2526)
 	 *	11111101 11....11 1xxxxxxx
-	 *  - ISATAP (draft-ietf-ngtrans-isatap-13.txt) 5.1
+	 *  - ISATAP (RFC4214) 6.1
 	 *	00-00-5E-FE-xx-xx-xx-xx
 	 *  - value 0
 	 *  - XXX: already assigned to an address on the device
@@ -2175,6 +2185,16 @@ static void addrconf_sit_config(struct net_device *dev)
 		return;
 	}
 
+	if (dev->priv_flags & IFF_ISATAP) {
+		struct in6_addr addr;
+
+		ipv6_addr_set(&addr,  htonl(0xFE800000), 0, 0, 0);
+		addrconf_prefix_route(&addr, 64, dev, 0, 0);
+		if (!ipv6_generate_eui64(addr.s6_addr + 8, dev))
+			addrconf_add_linklocal(idev, &addr);
+		return;
+	}
+
 	sit_add_v4_addrs(idev);
 
 	if (dev->flags&IFF_POINTOPOINT) {
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index d7ec4c9ffc4..e2c980dbe52 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -1659,6 +1659,8 @@ struct rt6_info *rt6_get_dflt_router(struct in6_addr *addr, struct net_device *d
 	return rt;
 }
 
+EXPORT_SYMBOL(rt6_get_dflt_router);
+
 struct rt6_info *rt6_add_dflt_router(struct in6_addr *gwaddr,
 				     struct net_device *dev,
 				     unsigned int pref)
diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c
index 71433d29d88..b3b8513e9cb 100644
--- a/net/ipv6/sit.c
+++ b/net/ipv6/sit.c
@@ -16,6 +16,7 @@
  *	Changes:
  * Roger Venning <r.venning@telstra.com>:	6to4 support
  * Nate Thompson <nate@thebog.net>:		6to4 support
+ * Fred L. Templin <fltemplin@acm.org>:		isatap support
  */
 
 #include <linux/module.h>
@@ -182,6 +183,9 @@ static struct ip_tunnel * ipip6_tunnel_locate(struct ip_tunnel_parm *parms, int
 	dev->init = ipip6_tunnel_init;
 	nt->parms = *parms;
 
+	if (parms->i_flags & SIT_ISATAP)
+		dev->priv_flags |= IFF_ISATAP;
+
 	if (register_netdevice(dev) < 0) {
 		free_netdev(dev);
 		goto failed;
@@ -364,6 +368,48 @@ static inline void ipip6_ecn_decapsulate(struct iphdr *iph, struct sk_buff *skb)
 		IP6_ECN_set_ce(ipv6_hdr(skb));
 }
 
+/* ISATAP (RFC4214) - check source address */
+static int
+isatap_srcok(struct sk_buff *skb, struct iphdr *iph, struct net_device *dev)
+{
+	struct neighbour *neigh;
+	struct dst_entry *dst;
+	struct rt6_info *rt;
+	struct flowi fl;
+	struct in6_addr *addr6;
+	struct in6_addr rtr;
+	struct ipv6hdr *iph6;
+	int ok = 0;
+
+	/* from onlink default router */
+	ipv6_addr_set(&rtr,  htonl(0xFE800000), 0, 0, 0);
+	ipv6_isatap_eui64(rtr.s6_addr + 8, iph->saddr);
+	if ((rt = rt6_get_dflt_router(&rtr, dev))) {
+		dst_release(&rt->u.dst);
+		return 1;
+	}
+
+	iph6 = ipv6_hdr(skb);
+	memset(&fl, 0, sizeof(fl));
+	fl.proto = iph6->nexthdr;
+	ipv6_addr_copy(&fl.fl6_dst, &iph6->saddr);
+	fl.oif = dev->ifindex;
+	security_skb_classify_flow(skb, &fl);
+
+	dst = ip6_route_output(NULL, &fl);
+	if (!dst->error && (dst->dev == dev) && (neigh = dst->neighbour)) {
+
+		addr6 = (struct in6_addr*)&neigh->primary_key;
+
+		/* from correct previous hop */
+		if (ipv6_addr_is_isatap(addr6) &&
+		    (addr6->s6_addr32[3] == iph->saddr))
+			ok = 1;
+	}
+	dst_release(dst);
+	return ok;
+}
+
 static int ipip6_rcv(struct sk_buff *skb)
 {
 	struct iphdr *iph;
@@ -382,6 +428,14 @@ static int ipip6_rcv(struct sk_buff *skb)
 		IPCB(skb)->flags = 0;
 		skb->protocol = htons(ETH_P_IPV6);
 		skb->pkt_type = PACKET_HOST;
+
+		if ((tunnel->dev->priv_flags & IFF_ISATAP) &&
+		    !isatap_srcok(skb, iph, tunnel->dev)) {
+			tunnel->stat.rx_errors++;
+			read_unlock(&ipip6_lock);
+			kfree_skb(skb);
+			return 0;
+		}
 		tunnel->stat.rx_packets++;
 		tunnel->stat.rx_bytes += skb->len;
 		skb->dev = tunnel->dev;
@@ -444,6 +498,29 @@ static int ipip6_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
 	if (skb->protocol != htons(ETH_P_IPV6))
 		goto tx_error;
 
+	/* ISATAP (RFC4214) - must come before 6to4 */
+	if (dev->priv_flags & IFF_ISATAP) {
+		struct neighbour *neigh = NULL;
+
+		if (skb->dst)
+			neigh = skb->dst->neighbour;
+
+		if (neigh == NULL) {
+			if (net_ratelimit())
+				printk(KERN_DEBUG "sit: nexthop == NULL\n");
+			goto tx_error;
+		}
+
+		addr6 = (struct in6_addr*)&neigh->primary_key;
+		addr_type = ipv6_addr_type(addr6);
+
+		if ((addr_type & IPV6_ADDR_UNICAST) &&
+		     ipv6_addr_is_isatap(addr6))
+			dst = addr6->s6_addr32[3];
+		else
+			goto tx_error;
+	}
+
 	if (!dst)
 		dst = try_6to4(&iph6->daddr);
 
-- 
cgit v1.2.3


From df1b86c53dc56b1a3125b6792356066ac3af4254 Mon Sep 17 00:00:00 2001
From: Pavel Emelyanov <xemul@openvz.org>
Date: Fri, 30 Nov 2007 00:42:42 +1100
Subject: [NET]: Nicer WARN_ON in netstat_show

The

        if (statement)
                WARN_ON(1);

looks much better as

        WARN_ON(statement);

Signed-off-by: Pavel Emelyanov <xemul@openvz.org>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/net-sysfs.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index 61ead1d1113..e41f4b9d2e7 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -247,9 +247,8 @@ static ssize_t netstat_show(const struct device *d,
 	struct net_device_stats *stats;
 	ssize_t ret = -EINVAL;
 
-	if (offset > sizeof(struct net_device_stats) ||
-	    offset % sizeof(unsigned long) != 0)
-		WARN_ON(1);
+	WARN_ON(offset > sizeof(struct net_device_stats) ||
+			offset % sizeof(unsigned long) != 0);
 
 	read_lock(&dev_base_lock);
 	if (dev_isalive(dev) && dev->get_stats &&
-- 
cgit v1.2.3


From ea4f76ae13b4240dac304ed50636391d6b22e9c5 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= <ilpo.jarvinen@helsinki.fi>
Date: Fri, 30 Nov 2007 00:59:07 +1100
Subject: [TCP]: Two fixes to new sacktag code
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

1) Skip condition used to be wrong way around which made SACK
processing very broken, missed many blocks because of that.

2) Use highest_sack advancement only if some skbs are already
sacked because otherwise tcp_write_queue_next may move things
too far (occurs mainly with GSO). The other similar advancement
is not problem because highest_sack was previosly put to point
a sacked skb.

These problems were located because of problem report from Matt
Mathis <mathis@psc.edu>.

Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@helsinki.fi>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp_input.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 97ea3eda206..33d284e3f7f 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -1384,7 +1384,7 @@ static struct sk_buff *tcp_sacktag_skip(struct sk_buff *skb, struct sock *sk,
 		if (skb == tcp_send_head(sk))
 			break;
 
-		if (before(TCP_SKB_CB(skb)->end_seq, skip_to_seq))
+		if (!before(TCP_SKB_CB(skb)->end_seq, skip_to_seq))
 			break;
 	}
 	return skb;
@@ -1575,7 +1575,7 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_
 			continue;
 		}
 
-		if (!before(start_seq, tcp_highest_sack_seq(tp))) {
+		if (tp->sacked_out && !before(start_seq, tcp_highest_sack_seq(tp))) {
 			skb = tcp_write_queue_next(sk, tp->highest_sack);
 			fack_count = tp->fackets_out;
 		}
-- 
cgit v1.2.3


From 276f2edc52e309b38a216245952e05880e182c83 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Wed, 28 Nov 2007 11:15:40 -0200
Subject: [TFRC]: Migrate TX history to singly-linked lis

This patch was based on another made by Gerrit Renker, his changelog was:

    ------------------------------------------------------
The patch set migrates TFRC TX history to a singly-linked list.

The details are:
 * use of a consistent naming scheme (all TFRC functions now begin with `tfrc_');
 * allocation and cleanup are taken care of internally;
 * provision of a lookup function, which is used by the CCID TX infrastructure
   to determine the time a packet was sent (in turn used for RTT sampling);
 * integration of the new interface with the present use in CCID3.
    ------------------------------------------------------

Simplifications I did:

. removing the tfrc_tx_hist_head that had a pointer to the list head and
  another for the slabcache.
. No need for creating a slabcache for each CCID that wants to use the TFRC
  tx history routines, create a single slabcache when the dccp_tfrc_lib module
  init routine is called.

Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/dccp/ccids/ccid3.c              |  57 +++++----------
 net/dccp/ccids/ccid3.h              |   3 +-
 net/dccp/ccids/lib/loss_interval.c  |  12 ++--
 net/dccp/ccids/lib/packet_history.c | 138 ++++++++++++++++--------------------
 net/dccp/ccids/lib/packet_history.h |  79 ++++-----------------
 5 files changed, 102 insertions(+), 187 deletions(-)

(limited to 'net')

diff --git a/net/dccp/ccids/ccid3.c b/net/dccp/ccids/ccid3.c
index 42893b1dfa0..f73542ab9d0 100644
--- a/net/dccp/ccids/ccid3.c
+++ b/net/dccp/ccids/ccid3.c
@@ -49,7 +49,6 @@ static int ccid3_debug;
 #define ccid3_pr_debug(format, a...)
 #endif
 
-static struct dccp_tx_hist *ccid3_tx_hist;
 static struct dccp_rx_hist *ccid3_rx_hist;
 
 /*
@@ -389,28 +388,18 @@ static void ccid3_hc_tx_packet_sent(struct sock *sk, int more,
 				    unsigned int len)
 {
 	struct ccid3_hc_tx_sock *hctx = ccid3_hc_tx_sk(sk);
-	struct dccp_tx_hist_entry *packet;
 
 	ccid3_hc_tx_update_s(hctx, len);
 
-	packet = dccp_tx_hist_entry_new(ccid3_tx_hist, GFP_ATOMIC);
-	if (unlikely(packet == NULL)) {
+	if (tfrc_tx_hist_add(&hctx->ccid3hctx_hist, dccp_sk(sk)->dccps_gss))
 		DCCP_CRIT("packet history - out of memory!");
-		return;
-	}
-	dccp_tx_hist_add_entry(&hctx->ccid3hctx_hist, packet);
-
-	packet->dccphtx_tstamp = ktime_get_real();
-	packet->dccphtx_seqno  = dccp_sk(sk)->dccps_gss;
-	packet->dccphtx_rtt    = hctx->ccid3hctx_rtt;
-	packet->dccphtx_sent   = 1;
 }
 
 static void ccid3_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb)
 {
 	struct ccid3_hc_tx_sock *hctx = ccid3_hc_tx_sk(sk);
 	struct ccid3_options_received *opt_recv;
-	struct dccp_tx_hist_entry *packet;
+	struct tfrc_tx_hist_entry *packet;
 	ktime_t now;
 	unsigned long t_nfb;
 	u32 pinv, r_sample;
@@ -425,16 +414,19 @@ static void ccid3_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb)
 	switch (hctx->ccid3hctx_state) {
 	case TFRC_SSTATE_NO_FBACK:
 	case TFRC_SSTATE_FBACK:
-		/* get packet from history to look up t_recvdata */
-		packet = dccp_tx_hist_find_entry(&hctx->ccid3hctx_hist,
-					      DCCP_SKB_CB(skb)->dccpd_ack_seq);
-		if (unlikely(packet == NULL)) {
-			DCCP_WARN("%s(%p), seqno %llu(%s) doesn't exist "
-				  "in history!\n",  dccp_role(sk), sk,
-			    (unsigned long long)DCCP_SKB_CB(skb)->dccpd_ack_seq,
-				dccp_packet_name(DCCP_SKB_CB(skb)->dccpd_type));
+		/* estimate RTT from history if ACK number is valid */
+		packet = tfrc_tx_hist_find_entry(hctx->ccid3hctx_hist,
+						 DCCP_SKB_CB(skb)->dccpd_ack_seq);
+		if (packet == NULL) {
+			DCCP_WARN("%s(%p): %s with bogus ACK-%llu\n", dccp_role(sk), sk,
+				  dccp_packet_name(DCCP_SKB_CB(skb)->dccpd_type),
+				  (unsigned long long)DCCP_SKB_CB(skb)->dccpd_ack_seq);
 			return;
 		}
+		/*
+		 * Garbage-collect older (irrelevant) entries
+		 */
+		tfrc_tx_hist_purge(&packet->next);
 
 		/* Update receive rate in units of 64 * bytes/second */
 		hctx->ccid3hctx_x_recv = opt_recv->ccid3or_receive_rate;
@@ -451,7 +443,7 @@ static void ccid3_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb)
 		/*
 		 * Calculate new RTT sample and update moving average
 		 */
-		r_sample = dccp_sample_rtt(sk, ktime_us_delta(now, packet->dccphtx_tstamp));
+		r_sample = dccp_sample_rtt(sk, ktime_us_delta(now, packet->stamp));
 		hctx->ccid3hctx_rtt = tfrc_ewma(hctx->ccid3hctx_rtt, r_sample, 9);
 
 		if (hctx->ccid3hctx_state == TFRC_SSTATE_NO_FBACK) {
@@ -493,9 +485,6 @@ static void ccid3_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb)
 		/* unschedule no feedback timer */
 		sk_stop_timer(sk, &hctx->ccid3hctx_no_feedback_timer);
 
-		/* remove all packets older than the one acked from history */
-		dccp_tx_hist_purge_older(ccid3_tx_hist,
-					 &hctx->ccid3hctx_hist, packet);
 		/*
 		 * As we have calculated new ipi, delta, t_nom it is possible
 		 * that we now can send a packet, so wake up dccp_wait_for_ccid
@@ -598,7 +587,7 @@ static int ccid3_hc_tx_init(struct ccid *ccid, struct sock *sk)
 	struct ccid3_hc_tx_sock *hctx = ccid_priv(ccid);
 
 	hctx->ccid3hctx_state = TFRC_SSTATE_NO_SENT;
-	INIT_LIST_HEAD(&hctx->ccid3hctx_hist);
+	hctx->ccid3hctx_hist = NULL;
 	setup_timer(&hctx->ccid3hctx_no_feedback_timer,
 			ccid3_hc_tx_no_feedback_timer, (unsigned long)sk);
 
@@ -612,8 +601,7 @@ static void ccid3_hc_tx_exit(struct sock *sk)
 	ccid3_hc_tx_set_state(sk, TFRC_SSTATE_TERM);
 	sk_stop_timer(sk, &hctx->ccid3hctx_no_feedback_timer);
 
-	/* Empty packet history */
-	dccp_tx_hist_purge(ccid3_tx_hist, &hctx->ccid3hctx_hist);
+	tfrc_tx_hist_purge(&hctx->ccid3hctx_hist);
 }
 
 static void ccid3_hc_tx_get_info(struct sock *sk, struct tcp_info *info)
@@ -1036,19 +1024,12 @@ static __init int ccid3_module_init(void)
 	if (ccid3_rx_hist == NULL)
 		goto out;
 
-	ccid3_tx_hist = dccp_tx_hist_new("ccid3");
-	if (ccid3_tx_hist == NULL)
-		goto out_free_rx;
-
 	rc = ccid_register(&ccid3);
 	if (rc != 0)
-		goto out_free_tx;
+		goto out_free_rx;
 out:
 	return rc;
 
-out_free_tx:
-	dccp_tx_hist_delete(ccid3_tx_hist);
-	ccid3_tx_hist = NULL;
 out_free_rx:
 	dccp_rx_hist_delete(ccid3_rx_hist);
 	ccid3_rx_hist = NULL;
@@ -1060,10 +1041,6 @@ static __exit void ccid3_module_exit(void)
 {
 	ccid_unregister(&ccid3);
 
-	if (ccid3_tx_hist != NULL) {
-		dccp_tx_hist_delete(ccid3_tx_hist);
-		ccid3_tx_hist = NULL;
-	}
 	if (ccid3_rx_hist != NULL) {
 		dccp_rx_hist_delete(ccid3_rx_hist);
 		ccid3_rx_hist = NULL;
diff --git a/net/dccp/ccids/ccid3.h b/net/dccp/ccids/ccid3.h
index 36eca34228f..b842a7dd99d 100644
--- a/net/dccp/ccids/ccid3.h
+++ b/net/dccp/ccids/ccid3.h
@@ -40,6 +40,7 @@
 #include <linux/list.h>
 #include <linux/types.h>
 #include <linux/tfrc.h>
+#include "lib/packet_history.h"
 #include "../ccid.h"
 
 /* Two seconds as per RFC 3448 4.2 */
@@ -111,7 +112,7 @@ struct ccid3_hc_tx_sock {
 	ktime_t				ccid3hctx_t_ld;
 	ktime_t				ccid3hctx_t_nom;
 	u32				ccid3hctx_delta;
-	struct list_head		ccid3hctx_hist;
+	struct tfrc_tx_hist_entry	*ccid3hctx_hist;
 	struct ccid3_options_received	ccid3hctx_options_received;
 };
 
diff --git a/net/dccp/ccids/lib/loss_interval.c b/net/dccp/ccids/lib/loss_interval.c
index d26b88dbbb4..f2ca4eb74dd 100644
--- a/net/dccp/ccids/lib/loss_interval.c
+++ b/net/dccp/ccids/lib/loss_interval.c
@@ -277,7 +277,7 @@ void dccp_li_update_li(struct sock *sk,
 
 EXPORT_SYMBOL_GPL(dccp_li_update_li);
 
-static __init int dccp_li_init(void)
+int __init dccp_li_init(void)
 {
 	dccp_li_cachep = kmem_cache_create("dccp_li_hist",
 					   sizeof(struct dccp_li_hist_entry),
@@ -285,10 +285,10 @@ static __init int dccp_li_init(void)
 	return dccp_li_cachep == NULL ? -ENOBUFS : 0;
 }
 
-static __exit void dccp_li_exit(void)
+void dccp_li_exit(void)
 {
-	kmem_cache_destroy(dccp_li_cachep);
+	if (dccp_li_cachep != NULL) {
+		kmem_cache_destroy(dccp_li_cachep);
+		dccp_li_cachep = NULL;
+	}
 }
-
-module_init(dccp_li_init);
-module_exit(dccp_li_exit);
diff --git a/net/dccp/ccids/lib/packet_history.c b/net/dccp/ccids/lib/packet_history.c
index 34c4f604772..13973606471 100644
--- a/net/dccp/ccids/lib/packet_history.c
+++ b/net/dccp/ccids/lib/packet_history.c
@@ -1,7 +1,8 @@
 /*
  *  net/dccp/packet_history.c
  *
- *  Copyright (c) 2005-6 The University of Waikato, Hamilton, New Zealand.
+ *  Copyright (c) 2007   The University of Aberdeen, Scotland, UK
+ *  Copyright (c) 2005-7 The University of Waikato, Hamilton, New Zealand.
  *
  *  An implementation of the DCCP protocol
  *
@@ -39,93 +40,48 @@
 #include "packet_history.h"
 
 /*
- * 	Transmitter History Routines
+ * Transmitter History Routines
  */
-struct dccp_tx_hist *dccp_tx_hist_new(const char *name)
-{
-	struct dccp_tx_hist *hist = kmalloc(sizeof(*hist), GFP_ATOMIC);
-	static const char dccp_tx_hist_mask[] = "tx_hist_%s";
-	char *slab_name;
+static struct kmem_cache *tfrc_tx_hist;
 
-	if (hist == NULL)
-		goto out;
-
-	slab_name = kmalloc(strlen(name) + sizeof(dccp_tx_hist_mask) - 1,
-			    GFP_ATOMIC);
-	if (slab_name == NULL)
-		goto out_free_hist;
-
-	sprintf(slab_name, dccp_tx_hist_mask, name);
-	hist->dccptxh_slab = kmem_cache_create(slab_name,
-					     sizeof(struct dccp_tx_hist_entry),
-					       0, SLAB_HWCACHE_ALIGN,
-					       NULL);
-	if (hist->dccptxh_slab == NULL)
-		goto out_free_slab_name;
-out:
-	return hist;
-out_free_slab_name:
-	kfree(slab_name);
-out_free_hist:
-	kfree(hist);
-	hist = NULL;
-	goto out;
-}
-
-EXPORT_SYMBOL_GPL(dccp_tx_hist_new);
-
-void dccp_tx_hist_delete(struct dccp_tx_hist *hist)
+struct tfrc_tx_hist_entry *
+	tfrc_tx_hist_find_entry(struct tfrc_tx_hist_entry *head, u64 seqno)
 {
-	const char* name = kmem_cache_name(hist->dccptxh_slab);
+	while (head != NULL && head->seqno != seqno)
+		head = head->next;
 
-	kmem_cache_destroy(hist->dccptxh_slab);
-	kfree(name);
-	kfree(hist);
+	return head;
 }
+EXPORT_SYMBOL_GPL(tfrc_tx_hist_find_entry);
 
-EXPORT_SYMBOL_GPL(dccp_tx_hist_delete);
-
-struct dccp_tx_hist_entry *
-	dccp_tx_hist_find_entry(const struct list_head *list, const u64 seq)
+int tfrc_tx_hist_add(struct tfrc_tx_hist_entry **headp, u64 seqno)
 {
-	struct dccp_tx_hist_entry *packet = NULL, *entry;
-
-	list_for_each_entry(entry, list, dccphtx_node)
-		if (entry->dccphtx_seqno == seq) {
-			packet = entry;
-			break;
-		}
-
-	return packet;
+	struct tfrc_tx_hist_entry *entry = kmem_cache_alloc(tfrc_tx_hist, gfp_any());
+
+	if (entry == NULL)
+		return -ENOBUFS;
+	entry->seqno = seqno;
+	entry->stamp = ktime_get_real();
+	entry->next  = *headp;
+	*headp	     = entry;
+	return 0;
 }
+EXPORT_SYMBOL_GPL(tfrc_tx_hist_add);
 
-EXPORT_SYMBOL_GPL(dccp_tx_hist_find_entry);
-
-void dccp_tx_hist_purge(struct dccp_tx_hist *hist, struct list_head *list)
+void tfrc_tx_hist_purge(struct tfrc_tx_hist_entry **headp)
 {
-	struct dccp_tx_hist_entry *entry, *next;
+	struct tfrc_tx_hist_entry *head = *headp;
 
-	list_for_each_entry_safe(entry, next, list, dccphtx_node) {
-		list_del_init(&entry->dccphtx_node);
-		dccp_tx_hist_entry_delete(hist, entry);
-	}
-}
-
-EXPORT_SYMBOL_GPL(dccp_tx_hist_purge);
+	while (head != NULL) {
+		struct tfrc_tx_hist_entry *next = head->next;
 
-void dccp_tx_hist_purge_older(struct dccp_tx_hist *hist,
-			      struct list_head *list,
-			      struct dccp_tx_hist_entry *packet)
-{
-	struct dccp_tx_hist_entry *next;
-
-	list_for_each_entry_safe_continue(packet, next, list, dccphtx_node) {
-		list_del_init(&packet->dccphtx_node);
-		dccp_tx_hist_entry_delete(hist, packet);
+		kmem_cache_free(tfrc_tx_hist, head);
+		head = next;
 	}
-}
 
-EXPORT_SYMBOL_GPL(dccp_tx_hist_purge_older);
+	*headp = NULL;
+}
+EXPORT_SYMBOL_GPL(tfrc_tx_hist_purge);
 
 /*
  * 	Receiver History Routines
@@ -147,8 +103,7 @@ struct dccp_rx_hist *dccp_rx_hist_new(const char *name)
 	sprintf(slab_name, dccp_rx_hist_mask, name);
 	hist->dccprxh_slab = kmem_cache_create(slab_name,
 					     sizeof(struct dccp_rx_hist_entry),
-					       0, SLAB_HWCACHE_ALIGN,
-					       NULL);
+					     0, SLAB_HWCACHE_ALIGN, NULL);
 	if (hist->dccprxh_slab == NULL)
 		goto out_free_slab_name;
 out:
@@ -293,6 +248,37 @@ void dccp_rx_hist_purge(struct dccp_rx_hist *hist, struct list_head *list)
 
 EXPORT_SYMBOL_GPL(dccp_rx_hist_purge);
 
+extern int __init dccp_li_init(void);
+extern void dccp_li_exit(void);
+
+static __init int packet_history_init(void)
+{
+	if (dccp_li_init() != 0)
+		goto out;
+
+	tfrc_tx_hist = kmem_cache_create("tfrc_tx_hist",
+					 sizeof(struct tfrc_tx_hist_entry), 0,
+					 SLAB_HWCACHE_ALIGN, NULL);
+	if (tfrc_tx_hist == NULL)
+		goto out_li_exit;
+
+	return 0;
+out_li_exit:
+	dccp_li_exit();
+out:
+	return -ENOBUFS;
+}
+module_init(packet_history_init);
+
+static __exit void packet_history_exit(void)
+{
+	if (tfrc_tx_hist != NULL) {
+		kmem_cache_destroy(tfrc_tx_hist);
+		tfrc_tx_hist = NULL;
+	}
+	dccp_li_exit();
+}
+module_exit(packet_history_exit);
 
 MODULE_AUTHOR("Ian McDonald <ian.mcdonald@jandi.co.nz>, "
 	      "Arnaldo Carvalho de Melo <acme@ghostprotocols.net>");
diff --git a/net/dccp/ccids/lib/packet_history.h b/net/dccp/ccids/lib/packet_history.h
index 032bb61c6e3..5c07182dd65 100644
--- a/net/dccp/ccids/lib/packet_history.h
+++ b/net/dccp/ccids/lib/packet_history.h
@@ -1,10 +1,9 @@
 /*
- *  net/dccp/packet_history.h
+ *  Packet RX/TX history data structures and routines for TFRC-based protocols.
  *
+ *  Copyright (c) 2007   The University of Aberdeen, Scotland, UK
  *  Copyright (c) 2005-6 The University of Waikato, Hamilton, New Zealand.
  *
- *  An implementation of the DCCP protocol
- *
  *  This code has been developed by the University of Waikato WAND
  *  research group. For further information please see http://www.wand.net.nz/
  *  or e-mail Ian McDonald - ian.mcdonald@jandi.co.nz
@@ -49,71 +48,23 @@
 #define TFRC_WIN_COUNT_PER_RTT	 4
 #define TFRC_WIN_COUNT_LIMIT	16
 
-/*
- * 	Transmitter History data structures and declarations
+/**
+ *  tfrc_tx_hist_entry  -  Simple singly-linked TX history list
+ *  @next:  next oldest entry (LIFO order)
+ *  @seqno: sequence number of this entry
+ *  @stamp: send time of packet with sequence number @seqno
  */
-struct dccp_tx_hist_entry {
-	struct list_head dccphtx_node;
-	u64		 dccphtx_seqno:48,
-			 dccphtx_sent:1;
-	u32		 dccphtx_rtt;
-	ktime_t		 dccphtx_tstamp;
-};
-
-struct dccp_tx_hist {
-	struct kmem_cache *dccptxh_slab;
+struct tfrc_tx_hist_entry {
+	struct tfrc_tx_hist_entry *next;
+	u64			  seqno;
+	ktime_t			  stamp;
 };
 
-extern struct dccp_tx_hist *dccp_tx_hist_new(const char *name);
-extern void 		    dccp_tx_hist_delete(struct dccp_tx_hist *hist);
-
-static inline struct dccp_tx_hist_entry *
-			dccp_tx_hist_entry_new(struct dccp_tx_hist *hist,
-					       const gfp_t prio)
-{
-	struct dccp_tx_hist_entry *entry = kmem_cache_alloc(hist->dccptxh_slab,
-							    prio);
-
-	if (entry != NULL)
-		entry->dccphtx_sent = 0;
-
-	return entry;
-}
-
-static inline struct dccp_tx_hist_entry *
-			dccp_tx_hist_head(struct list_head *list)
-{
-	struct dccp_tx_hist_entry *head = NULL;
-
-	if (!list_empty(list))
-		head = list_entry(list->next, struct dccp_tx_hist_entry,
-				  dccphtx_node);
-	return head;
-}
-
-extern struct dccp_tx_hist_entry *
-			dccp_tx_hist_find_entry(const struct list_head *list,
-						const u64 seq);
-
-static inline void dccp_tx_hist_add_entry(struct list_head *list,
-					  struct dccp_tx_hist_entry *entry)
-{
-	list_add(&entry->dccphtx_node, list);
-}
-
-static inline void dccp_tx_hist_entry_delete(struct dccp_tx_hist *hist,
-					     struct dccp_tx_hist_entry *entry)
-{
-	if (entry != NULL)
-		kmem_cache_free(hist->dccptxh_slab, entry);
-}
-
-extern void dccp_tx_hist_purge(struct dccp_tx_hist *hist,
-			       struct list_head *list);
+extern int  tfrc_tx_hist_add(struct tfrc_tx_hist_entry **headp, u64 seqno);
+extern void tfrc_tx_hist_purge(struct tfrc_tx_hist_entry **headp);
 
-extern void dccp_tx_hist_purge_older(struct dccp_tx_hist *hist,
-				     struct list_head *list,
-				     struct dccp_tx_hist_entry *next);
+extern struct tfrc_tx_hist_entry *
+	tfrc_tx_hist_find_entry(struct tfrc_tx_hist_entry *head, u64 ackno);
 
 /*
  * 	Receiver History data structures and declarations
-- 
cgit v1.2.3


From f53dc67c5e7babafe239b93a11678b0e05bead51 Mon Sep 17 00:00:00 2001
From: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Date: Wed, 28 Nov 2007 08:35:08 +0000
Subject: [DCCP]: Use AF-independent rebuild_header routine

This fixes a nasty bug: dccp_send_reset() is called by both DCCPv4 and DCCPv6, but uses
inet_sk_rebuild_header() in each case. This leads to unpredictable and weird behaviour:
under some conditions, DCCPv6 Resets were sent, in other not.

The fix is to use the AF-independent rebuild_header routine.

Signed-off-by: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/dccp/output.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/dccp/output.c b/net/dccp/output.c
index 33ce737ef3a..7caa7f57bb7 100644
--- a/net/dccp/output.c
+++ b/net/dccp/output.c
@@ -391,7 +391,7 @@ int dccp_send_reset(struct sock *sk, enum dccp_reset_codes code)
 	 * FIXME: what if rebuild_header fails?
 	 * Should we be doing a rebuild_header here?
 	 */
-	int err = inet_sk_rebuild_header(sk);
+	int err = inet_csk(sk)->icsk_af_ops->rebuild_header(sk);
 
 	if (err != 0)
 		return err;
-- 
cgit v1.2.3


From f11135a3442996d78dad99933bfdb90d1f6588d3 Mon Sep 17 00:00:00 2001
From: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Date: Wed, 28 Nov 2007 11:34:53 -0200
Subject: [DCCP]: Dedicated auxiliary states to support passive-close

This adds two auxiliary states to deal with passive closes:
  * PASSIVE_CLOSE    (reached from OPEN via reception of Close)    and
  * PASSIVE_CLOSEREQ (reached from OPEN via reception of CloseReq)
as internal intermediate states.

These states are used to allow a receiver to process unread data before
acknowledging the received connection-termination-request (the Close/CloseReq).

Without such support, it will happen that passively-closed sockets enter CLOSED
state while there is still unprocessed data in the queue; leading to unexpected
and erratic API behaviour.

PASSIVE_CLOSE has been mapped into TCPF_CLOSE_WAIT, so that the code will
seamlessly work with inet_accept() (which tests for this state).

The state names are thanks to Arnaldo, who suggested this naming scheme
following an earlier revision of this patch.

Signed-off-by: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/dccp/proto.c | 22 ++++++++++++----------
 1 file changed, 12 insertions(+), 10 deletions(-)

(limited to 'net')

diff --git a/net/dccp/proto.c b/net/dccp/proto.c
index 73006b74767..3489d3f21f5 100644
--- a/net/dccp/proto.c
+++ b/net/dccp/proto.c
@@ -60,8 +60,7 @@ void dccp_set_state(struct sock *sk, const int state)
 {
 	const int oldstate = sk->sk_state;
 
-	dccp_pr_debug("%s(%p) %-10.10s -> %s\n",
-		      dccp_role(sk), sk,
+	dccp_pr_debug("%s(%p)  %s  -->  %s\n", dccp_role(sk), sk,
 		      dccp_state_name(oldstate), dccp_state_name(state));
 	WARN_ON(state == oldstate);
 
@@ -134,14 +133,17 @@ EXPORT_SYMBOL_GPL(dccp_packet_name);
 const char *dccp_state_name(const int state)
 {
 	static char *dccp_state_names[] = {
-	[DCCP_OPEN]	  = "OPEN",
-	[DCCP_REQUESTING] = "REQUESTING",
-	[DCCP_PARTOPEN]	  = "PARTOPEN",
-	[DCCP_LISTEN]	  = "LISTEN",
-	[DCCP_RESPOND]	  = "RESPOND",
-	[DCCP_CLOSING]	  = "CLOSING",
-	[DCCP_TIME_WAIT]  = "TIME_WAIT",
-	[DCCP_CLOSED]	  = "CLOSED",
+	[DCCP_OPEN]		= "OPEN",
+	[DCCP_REQUESTING]	= "REQUESTING",
+	[DCCP_PARTOPEN]		= "PARTOPEN",
+	[DCCP_LISTEN]		= "LISTEN",
+	[DCCP_RESPOND]		= "RESPOND",
+	[DCCP_CLOSING]		= "CLOSING",
+	[DCCP_ACTIVE_CLOSEREQ]	= "CLOSEREQ",
+	[DCCP_PASSIVE_CLOSE]	= "PASSIVE_CLOSE",
+	[DCCP_PASSIVE_CLOSEREQ]	= "PASSIVE_CLOSEREQ",
+	[DCCP_TIME_WAIT]	= "TIME_WAIT",
+	[DCCP_CLOSED]		= "CLOSED",
 	};
 
 	if (state >= DCCP_MAX_STATES)
-- 
cgit v1.2.3


From 0c869620762fea4b3acf6502d9e80840b27ec642 Mon Sep 17 00:00:00 2001
From: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Date: Wed, 28 Nov 2007 11:59:48 -0200
Subject: [DCCP]: Integrate state transitions for passive-close

This adds the necessary state transitions for the two forms of passive-close

 * PASSIVE_CLOSE    - which is entered when a host   receives a Close;
 * PASSIVE_CLOSEREQ - which is entered when a client receives a CloseReq.

Here is a detailed account of what the patch does in each state.

1) Receiving CloseReq

  The pseudo-code in 8.5 says:

     Step 13: Process CloseReq
          If P.type == CloseReq and S.state < CLOSEREQ,
              Generate Close
              S.state := CLOSING
              Set CLOSING timer.

  This means we need to address what to do in CLOSED, LISTEN, REQUEST, RESPOND, PARTOPEN, and OPEN.

   * CLOSED:         silently ignore - it may be a late or duplicate CloseReq;
   * LISTEN/RESPOND: will not appear, since Step 7 is performed first (we know we are the client);
   * REQUEST:        perform Step 13 directly (no need to enqueue packet);
   * OPEN/PARTOPEN:  enter PASSIVE_CLOSEREQ so that the application has a chance to process unread data.

  When already in PASSIVE_CLOSEREQ, no second CloseReq is enqueued. In any other state, the CloseReq is ignored.
  I think that this offers some robustness against rare and pathological cases: e.g. a simultaneous close where
  the client sends a Close and the server a CloseReq. The client will then be retransmitting its Close until it
  gets the Reset, so ignoring the CloseReq while in state CLOSING is sane.

2) Receiving Close

  The code below from 8.5 is unconditional.

     Step 14: Process Close
          If P.type == Close,
              Generate Reset(Closed)
              Tear down connection
              Drop packet and return

  Thus we need to consider all states:
   * CLOSED:           silently ignore, since this can happen when a retransmitted or late Close arrives;
   * LISTEN:           dccp_rcv_state_process() will generate a Reset ("No Connection");
   * REQUEST:          perform Step 14 directly (no need to enqueue packet);
   * RESPOND:          dccp_check_req() will generate a Reset ("Packet Error") -- left it at that;
   * OPEN/PARTOPEN:    enter PASSIVE_CLOSE so that application has a chance to process unread data;
   * CLOSEREQ:         server performed active-close -- perform Step 14;
   * CLOSING:          simultaneous-close: use a tie-breaker to avoid message ping-pong (see comment);
   * PASSIVE_CLOSEREQ: ignore - the peer has a bug (sending first a CloseReq and now a Close);
   * TIMEWAIT:         packet is ignored.

   Note that the condition of receiving a packet in state CLOSED here is different from the condition "there
   is no socket for such a connection": the socket still exists, but its state indicates it is unusable.

   Last, dccp_finish_passive_close sets either DCCP_CLOSED or DCCP_CLOSING = TCP_CLOSING, so that
   sk_stream_wait_close() will wait for the final Reset (which will trigger CLOSING => CLOSED).

Signed-off-by: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/dccp/input.c | 88 ++++++++++++++++++++++++++++++++++++++++++++++----------
 net/dccp/proto.c | 88 +++++++++++++++++++++++++++++++++++++-------------------
 2 files changed, 131 insertions(+), 45 deletions(-)

(limited to 'net')

diff --git a/net/dccp/input.c b/net/dccp/input.c
index ef299fbd7c2..fe4b0fbfa50 100644
--- a/net/dccp/input.c
+++ b/net/dccp/input.c
@@ -32,16 +32,56 @@ static void dccp_fin(struct sock *sk, struct sk_buff *skb)
 	sk->sk_data_ready(sk, 0);
 }
 
-static void dccp_rcv_close(struct sock *sk, struct sk_buff *skb)
+static int dccp_rcv_close(struct sock *sk, struct sk_buff *skb)
 {
-	dccp_send_reset(sk, DCCP_RESET_CODE_CLOSED);
-	dccp_fin(sk, skb);
-	dccp_set_state(sk, DCCP_CLOSED);
-	sk_wake_async(sk, SOCK_WAKE_WAITD, POLL_HUP);
+	int queued = 0;
+
+	switch (sk->sk_state) {
+	/*
+	 * We ignore Close when received in one of the following states:
+	 *  - CLOSED		(may be a late or duplicate packet)
+	 *  - PASSIVE_CLOSEREQ	(the peer has sent a CloseReq earlier)
+	 *  - RESPOND		(already handled by dccp_check_req)
+	 */
+	case DCCP_CLOSING:
+		/*
+		 * Simultaneous-close: receiving a Close after sending one. This
+		 * can happen if both client and server perform active-close and
+		 * will result in an endless ping-pong of crossing and retrans-
+		 * mitted Close packets, which only terminates when one of the
+		 * nodes times out (min. 64 seconds). Quicker convergence can be
+		 * achieved when one of the nodes acts as tie-breaker.
+		 * This is ok as both ends are done with data transfer and each
+		 * end is just waiting for the other to acknowledge termination.
+		 */
+		if (dccp_sk(sk)->dccps_role != DCCP_ROLE_CLIENT)
+			break;
+		/* fall through */
+	case DCCP_REQUESTING:
+	case DCCP_ACTIVE_CLOSEREQ:
+		dccp_send_reset(sk, DCCP_RESET_CODE_CLOSED);
+		dccp_done(sk);
+		break;
+	case DCCP_OPEN:
+	case DCCP_PARTOPEN:
+		/* Give waiting application a chance to read pending data */
+		queued = 1;
+		dccp_fin(sk, skb);
+		dccp_set_state(sk, DCCP_PASSIVE_CLOSE);
+		/* fall through */
+	case DCCP_PASSIVE_CLOSE:
+		/*
+		 * Retransmitted Close: we have already enqueued the first one.
+		 */
+		sk_wake_async(sk, SOCK_WAKE_WAITD, POLL_HUP);
+	}
+	return queued;
 }
 
-static void dccp_rcv_closereq(struct sock *sk, struct sk_buff *skb)
+static int dccp_rcv_closereq(struct sock *sk, struct sk_buff *skb)
 {
+	int queued = 0;
+
 	/*
 	 *   Step 7: Check for unexpected packet types
 	 *      If (S.is_server and P.type == CloseReq)
@@ -50,12 +90,26 @@ static void dccp_rcv_closereq(struct sock *sk, struct sk_buff *skb)
 	 */
 	if (dccp_sk(sk)->dccps_role != DCCP_ROLE_CLIENT) {
 		dccp_send_sync(sk, DCCP_SKB_CB(skb)->dccpd_seq, DCCP_PKT_SYNC);
-		return;
+		return queued;
 	}
 
-	if (sk->sk_state != DCCP_CLOSING)
+	/* Step 13: process relevant Client states < CLOSEREQ */
+	switch (sk->sk_state) {
+	case DCCP_REQUESTING:
+		dccp_send_close(sk, 0);
 		dccp_set_state(sk, DCCP_CLOSING);
-	dccp_send_close(sk, 0);
+		break;
+	case DCCP_OPEN:
+	case DCCP_PARTOPEN:
+		/* Give waiting application a chance to read pending data */
+		queued = 1;
+		dccp_fin(sk, skb);
+		dccp_set_state(sk, DCCP_PASSIVE_CLOSEREQ);
+		/* fall through */
+	case DCCP_PASSIVE_CLOSEREQ:
+		sk_wake_async(sk, SOCK_WAKE_WAITD, POLL_HUP);
+	}
+	return queued;
 }
 
 static u8 dccp_reset_code_convert(const u8 code)
@@ -247,11 +301,13 @@ static int __dccp_rcv_established(struct sock *sk, struct sk_buff *skb,
 		dccp_rcv_reset(sk, skb);
 		return 0;
 	case DCCP_PKT_CLOSEREQ:
-		dccp_rcv_closereq(sk, skb);
+		if (dccp_rcv_closereq(sk, skb))
+			return 0;
 		goto discard;
 	case DCCP_PKT_CLOSE:
-		dccp_rcv_close(sk, skb);
-		return 0;
+		if (dccp_rcv_close(sk, skb))
+			return 0;
+		goto discard;
 	case DCCP_PKT_REQUEST:
 		/* Step 7
 		 *   or (S.is_server and P.type == Response)
@@ -590,11 +646,13 @@ int dccp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
 		dccp_send_sync(sk, dcb->dccpd_seq, DCCP_PKT_SYNC);
 		goto discard;
 	} else if (dh->dccph_type == DCCP_PKT_CLOSEREQ) {
-		dccp_rcv_closereq(sk, skb);
+		if (dccp_rcv_closereq(sk, skb))
+			return 0;
 		goto discard;
 	} else if (dh->dccph_type == DCCP_PKT_CLOSE) {
-		dccp_rcv_close(sk, skb);
-		return 0;
+		if (dccp_rcv_close(sk, skb))
+			return 0;
+		goto discard;
 	}
 
 	switch (sk->sk_state) {
diff --git a/net/dccp/proto.c b/net/dccp/proto.c
index 3489d3f21f5..60f40ec72ff 100644
--- a/net/dccp/proto.c
+++ b/net/dccp/proto.c
@@ -71,7 +71,8 @@ void dccp_set_state(struct sock *sk, const int state)
 		break;
 
 	case DCCP_CLOSED:
-		if (oldstate == DCCP_CLOSING || oldstate == DCCP_OPEN)
+		if (oldstate == DCCP_OPEN || oldstate == DCCP_ACTIVE_CLOSEREQ ||
+		    oldstate == DCCP_CLOSING)
 			DCCP_INC_STATS(DCCP_MIB_ESTABRESETS);
 
 		sk->sk_prot->unhash(sk);
@@ -92,6 +93,24 @@ void dccp_set_state(struct sock *sk, const int state)
 
 EXPORT_SYMBOL_GPL(dccp_set_state);
 
+static void dccp_finish_passive_close(struct sock *sk)
+{
+	switch (sk->sk_state) {
+	case DCCP_PASSIVE_CLOSE:
+		/* Node (client or server) has received Close packet. */
+		dccp_send_reset(sk, DCCP_RESET_CODE_CLOSED);
+		dccp_set_state(sk, DCCP_CLOSED);
+		break;
+	case DCCP_PASSIVE_CLOSEREQ:
+		/*
+		 * Client received CloseReq. We set the `active' flag so that
+		 * dccp_send_close() retransmits the Close as per RFC 4340, 8.3.
+		 */
+		dccp_send_close(sk, 1);
+		dccp_set_state(sk, DCCP_CLOSING);
+	}
+}
+
 void dccp_done(struct sock *sk)
 {
 	dccp_set_state(sk, DCCP_CLOSED);
@@ -762,19 +781,26 @@ int dccp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
 
 		dh = dccp_hdr(skb);
 
-		if (dh->dccph_type == DCCP_PKT_DATA ||
-		    dh->dccph_type == DCCP_PKT_DATAACK)
+		switch (dh->dccph_type) {
+		case DCCP_PKT_DATA:
+		case DCCP_PKT_DATAACK:
 			goto found_ok_skb;
 
-		if (dh->dccph_type == DCCP_PKT_RESET ||
-		    dh->dccph_type == DCCP_PKT_CLOSE) {
-			dccp_pr_debug("found fin ok!\n");
+		case DCCP_PKT_CLOSE:
+		case DCCP_PKT_CLOSEREQ:
+			if (!(flags & MSG_PEEK))
+				dccp_finish_passive_close(sk);
+			/* fall through */
+		case DCCP_PKT_RESET:
+			dccp_pr_debug("found fin (%s) ok!\n",
+				      dccp_packet_name(dh->dccph_type));
 			len = 0;
 			goto found_fin_ok;
+		default:
+			dccp_pr_debug("packet_type=%s\n",
+				      dccp_packet_name(dh->dccph_type));
+			sk_eat_skb(sk, skb, 0);
 		}
-		dccp_pr_debug("packet_type=%s\n",
-			      dccp_packet_name(dh->dccph_type));
-		sk_eat_skb(sk, skb, 0);
 verify_sock_status:
 		if (sock_flag(sk, SOCK_DONE)) {
 			len = 0;
@@ -876,28 +902,30 @@ out:
 
 EXPORT_SYMBOL_GPL(inet_dccp_listen);
 
-static const unsigned char dccp_new_state[] = {
-	/* current state:   new state:      action:	*/
-	[0]		  = DCCP_CLOSED,
-	[DCCP_OPEN]	  = DCCP_CLOSING | DCCP_ACTION_FIN,
-	[DCCP_REQUESTING] = DCCP_CLOSED,
-	[DCCP_PARTOPEN]	  = DCCP_CLOSING | DCCP_ACTION_FIN,
-	[DCCP_LISTEN]	  = DCCP_CLOSED,
-	[DCCP_RESPOND]	  = DCCP_CLOSED,
-	[DCCP_CLOSING]	  = DCCP_CLOSED,
-	[DCCP_TIME_WAIT]  = DCCP_CLOSED,
-	[DCCP_CLOSED]	  = DCCP_CLOSED,
-};
-
-static int dccp_close_state(struct sock *sk)
+static void dccp_terminate_connection(struct sock *sk)
 {
-	const int next = dccp_new_state[sk->sk_state];
-	const int ns = next & DCCP_STATE_MASK;
+	u8 next_state = DCCP_CLOSED;
 
-	if (ns != sk->sk_state)
-		dccp_set_state(sk, ns);
+	switch (sk->sk_state) {
+	case DCCP_PASSIVE_CLOSE:
+	case DCCP_PASSIVE_CLOSEREQ:
+		dccp_finish_passive_close(sk);
+		break;
+	case DCCP_PARTOPEN:
+		dccp_pr_debug("Stop PARTOPEN timer (%p)\n", sk);
+		inet_csk_clear_xmit_timer(sk, ICSK_TIME_DACK);
+		/* fall through */
+	case DCCP_OPEN:
+		dccp_send_close(sk, 1);
 
-	return next & DCCP_ACTION_FIN;
+		if (dccp_sk(sk)->dccps_role == DCCP_ROLE_SERVER)
+			next_state = DCCP_ACTIVE_CLOSEREQ;
+		else
+			next_state = DCCP_CLOSING;
+		/* fall through */
+	default:
+		dccp_set_state(sk, next_state);
+	}
 }
 
 void dccp_close(struct sock *sk, long timeout)
@@ -940,8 +968,8 @@ void dccp_close(struct sock *sk, long timeout)
 	} else if (sock_flag(sk, SOCK_LINGER) && !sk->sk_lingertime) {
 		/* Check zero linger _after_ checking for unread data. */
 		sk->sk_prot->disconnect(sk, 0);
-	} else if (dccp_close_state(sk)) {
-		dccp_send_close(sk, 1);
+	} else if (sk->sk_state != DCCP_CLOSED) {
+		dccp_terminate_connection(sk);
 	}
 
 	sk_stream_wait_close(sk, timeout);
-- 
cgit v1.2.3


From 3159afe0d2c1994392c269cf9dee2797830d016b Mon Sep 17 00:00:00 2001
From: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Date: Wed, 28 Nov 2007 12:06:04 -0200
Subject: [DCCP]: Remove duplicate test for CloseReq

This removes a redundant test for unexpected packet types. In dccp_rcv_state_process
it is tested twice whether a DCCP-server has received a CloseReq (Step 7):

 * first in the combined if-statement,
 * then in the call to dccp_rcv_closereq().

The latter is necesssary since dccp_rcv_closereq() is also called from
__dccp_rcv_established().

This patch removes the duplicate test.

Signed-off-by: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/dccp/input.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/dccp/input.c b/net/dccp/input.c
index fe4b0fbfa50..decf2f21149 100644
--- a/net/dccp/input.c
+++ b/net/dccp/input.c
@@ -629,16 +629,14 @@ int dccp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
 		return 0;
 		/*
 		 *   Step 7: Check for unexpected packet types
-		 *      If (S.is_server and P.type == CloseReq)
-		 *	    or (S.is_server and P.type == Response)
+		 *      If (S.is_server and P.type == Response)
 		 *	    or (S.is_client and P.type == Request)
 		 *	    or (S.state == RESPOND and P.type == Data),
 		 *	  Send Sync packet acknowledging P.seqno
 		 *	  Drop packet and return
 		 */
 	} else if ((dp->dccps_role != DCCP_ROLE_CLIENT &&
-		    (dh->dccph_type == DCCP_PKT_RESPONSE ||
-		     dh->dccph_type == DCCP_PKT_CLOSEREQ)) ||
+		    dh->dccph_type == DCCP_PKT_RESPONSE) ||
 		    (dp->dccps_role == DCCP_ROLE_CLIENT &&
 		     dh->dccph_type == DCCP_PKT_REQUEST) ||
 		    (sk->sk_state == DCCP_RESPOND &&
-- 
cgit v1.2.3


From a99a00cf1adef2d3dce745c93c9cc8b0a1612c50 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Fri, 30 Nov 2007 01:14:30 +1100
Subject: [NET]: Move netfilter checksum helpers to net/core/utils.c

This allows to get rid of the CONFIG_NETFILTER dependency of NET_ACT_NAT.
This patch redefines the old names to keep the noise low, the next patch
converts all users.

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/utils.c     | 16 ++++++++++++++++
 net/netfilter/core.c | 16 ----------------
 2 files changed, 16 insertions(+), 16 deletions(-)

(limited to 'net')

diff --git a/net/core/utils.c b/net/core/utils.c
index 0bf17da40d5..34459c4c740 100644
--- a/net/core/utils.c
+++ b/net/core/utils.c
@@ -293,3 +293,19 @@ out:
 }
 
 EXPORT_SYMBOL(in6_pton);
+
+void inet_proto_csum_replace4(__sum16 *sum, struct sk_buff *skb,
+			      __be32 from, __be32 to, int pseudohdr)
+{
+	__be32 diff[] = { ~from, to };
+	if (skb->ip_summed != CHECKSUM_PARTIAL) {
+		*sum = csum_fold(csum_partial(diff, sizeof(diff),
+				~csum_unfold(*sum)));
+		if (skb->ip_summed == CHECKSUM_COMPLETE && pseudohdr)
+			skb->csum = ~csum_partial(diff, sizeof(diff),
+						~skb->csum);
+	} else if (pseudohdr)
+		*sum = ~csum_fold(csum_partial(diff, sizeof(diff),
+				csum_unfold(*sum)));
+}
+EXPORT_SYMBOL(inet_proto_csum_replace4);
diff --git a/net/netfilter/core.c b/net/netfilter/core.c
index bed9ba01e8e..631d2694831 100644
--- a/net/netfilter/core.c
+++ b/net/netfilter/core.c
@@ -217,22 +217,6 @@ int skb_make_writable(struct sk_buff *skb, unsigned int writable_len)
 }
 EXPORT_SYMBOL(skb_make_writable);
 
-void nf_proto_csum_replace4(__sum16 *sum, struct sk_buff *skb,
-			    __be32 from, __be32 to, int pseudohdr)
-{
-	__be32 diff[] = { ~from, to };
-	if (skb->ip_summed != CHECKSUM_PARTIAL) {
-		*sum = csum_fold(csum_partial(diff, sizeof(diff),
-				~csum_unfold(*sum)));
-		if (skb->ip_summed == CHECKSUM_COMPLETE && pseudohdr)
-			skb->csum = ~csum_partial(diff, sizeof(diff),
-						~skb->csum);
-	} else if (pseudohdr)
-		*sum = ~csum_fold(csum_partial(diff, sizeof(diff),
-				csum_unfold(*sum)));
-}
-EXPORT_SYMBOL(nf_proto_csum_replace4);
-
 #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
 /* This does not belong here, but locally generated errors need it if connection
    tracking in use: without this, connection may not be in hash table, and hence
-- 
cgit v1.2.3


From be0ea7d5da3d99140bde7e5cea328eb111731700 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Fri, 30 Nov 2007 01:17:11 +1100
Subject: [NETFILTER]: Convert old checksum helper names

Kill the defines again, convert to the new checksum helper names and
remove the dependency of NET_ACT_NAT on NETFILTER.

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/netfilter/ipt_ECN.c           |  6 +++---
 net/ipv4/netfilter/ipt_TOS.c           |  2 +-
 net/ipv4/netfilter/ipt_TTL.c           |  4 ++--
 net/ipv4/netfilter/nf_nat_core.c       |  4 ++--
 net/ipv4/netfilter/nf_nat_helper.c     | 20 ++++++++++----------
 net/ipv4/netfilter/nf_nat_proto_icmp.c |  4 ++--
 net/ipv4/netfilter/nf_nat_proto_tcp.c  |  4 ++--
 net/ipv4/netfilter/nf_nat_proto_udp.c  |  6 +++---
 net/netfilter/xt_TCPMSS.c              | 17 +++++++++--------
 net/sched/Kconfig                      |  1 -
 net/sched/act_nat.c                    | 12 ++++++------
 11 files changed, 40 insertions(+), 40 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/netfilter/ipt_ECN.c b/net/ipv4/netfilter/ipt_ECN.c
index add110060a2..e8d5f686797 100644
--- a/net/ipv4/netfilter/ipt_ECN.c
+++ b/net/ipv4/netfilter/ipt_ECN.c
@@ -38,7 +38,7 @@ set_ect_ip(struct sk_buff *skb, const struct ipt_ECN_info *einfo)
 		oldtos = iph->tos;
 		iph->tos &= ~IPT_ECN_IP_MASK;
 		iph->tos |= (einfo->ip_ect & IPT_ECN_IP_MASK);
-		nf_csum_replace2(&iph->check, htons(oldtos), htons(iph->tos));
+		csum_replace2(&iph->check, htons(oldtos), htons(iph->tos));
 	}
 	return true;
 }
@@ -71,8 +71,8 @@ set_ect_tcp(struct sk_buff *skb, const struct ipt_ECN_info *einfo)
 	if (einfo->operation & IPT_ECN_OP_SET_CWR)
 		tcph->cwr = einfo->proto.tcp.cwr;
 
-	nf_proto_csum_replace2(&tcph->check, skb,
-				oldval, ((__be16 *)tcph)[6], 0);
+	inet_proto_csum_replace2(&tcph->check, skb,
+				 oldval, ((__be16 *)tcph)[6], 0);
 	return true;
 }
 
diff --git a/net/ipv4/netfilter/ipt_TOS.c b/net/ipv4/netfilter/ipt_TOS.c
index d4573baa7f2..7b4a6cac399 100644
--- a/net/ipv4/netfilter/ipt_TOS.c
+++ b/net/ipv4/netfilter/ipt_TOS.c
@@ -38,7 +38,7 @@ target(struct sk_buff *skb,
 		iph = ip_hdr(skb);
 		oldtos = iph->tos;
 		iph->tos = (iph->tos & IPTOS_PREC_MASK) | tosinfo->tos;
-		nf_csum_replace2(&iph->check, htons(oldtos), htons(iph->tos));
+		csum_replace2(&iph->check, htons(oldtos), htons(iph->tos));
 	}
 	return XT_CONTINUE;
 }
diff --git a/net/ipv4/netfilter/ipt_TTL.c b/net/ipv4/netfilter/ipt_TTL.c
index c620a052766..00ddfbea279 100644
--- a/net/ipv4/netfilter/ipt_TTL.c
+++ b/net/ipv4/netfilter/ipt_TTL.c
@@ -54,8 +54,8 @@ ipt_ttl_target(struct sk_buff *skb,
 	}
 
 	if (new_ttl != iph->ttl) {
-		nf_csum_replace2(&iph->check, htons(iph->ttl << 8),
-					      htons(new_ttl << 8));
+		csum_replace2(&iph->check, htons(iph->ttl << 8),
+					   htons(new_ttl << 8));
 		iph->ttl = new_ttl;
 	}
 
diff --git a/net/ipv4/netfilter/nf_nat_core.c b/net/ipv4/netfilter/nf_nat_core.c
index d237511cf46..746c2efddcd 100644
--- a/net/ipv4/netfilter/nf_nat_core.c
+++ b/net/ipv4/netfilter/nf_nat_core.c
@@ -372,10 +372,10 @@ manip_pkt(u_int16_t proto,
 	iph = (void *)skb->data + iphdroff;
 
 	if (maniptype == IP_NAT_MANIP_SRC) {
-		nf_csum_replace4(&iph->check, iph->saddr, target->src.u3.ip);
+		csum_replace4(&iph->check, iph->saddr, target->src.u3.ip);
 		iph->saddr = target->src.u3.ip;
 	} else {
-		nf_csum_replace4(&iph->check, iph->daddr, target->dst.u3.ip);
+		csum_replace4(&iph->check, iph->daddr, target->dst.u3.ip);
 		iph->daddr = target->dst.u3.ip;
 	}
 	return 1;
diff --git a/net/ipv4/netfilter/nf_nat_helper.c b/net/ipv4/netfilter/nf_nat_helper.c
index d00b8b2891f..53f79a310b4 100644
--- a/net/ipv4/netfilter/nf_nat_helper.c
+++ b/net/ipv4/netfilter/nf_nat_helper.c
@@ -180,8 +180,8 @@ nf_nat_mangle_tcp_packet(struct sk_buff *skb,
 								datalen, 0));
 		}
 	} else
-		nf_proto_csum_replace2(&tcph->check, skb,
-				       htons(oldlen), htons(datalen), 1);
+		inet_proto_csum_replace2(&tcph->check, skb,
+					 htons(oldlen), htons(datalen), 1);
 
 	if (rep_len != match_len) {
 		set_bit(IPS_SEQ_ADJUST_BIT, &ct->status);
@@ -270,8 +270,8 @@ nf_nat_mangle_udp_packet(struct sk_buff *skb,
 				udph->check = CSUM_MANGLED_0;
 		}
 	} else
-		nf_proto_csum_replace2(&udph->check, skb,
-				       htons(oldlen), htons(datalen), 1);
+		inet_proto_csum_replace2(&udph->check, skb,
+					 htons(oldlen), htons(datalen), 1);
 
 	return 1;
 }
@@ -310,10 +310,10 @@ sack_adjust(struct sk_buff *skb,
 			 ntohl(sack->start_seq), new_start_seq,
 			 ntohl(sack->end_seq), new_end_seq);
 
-		nf_proto_csum_replace4(&tcph->check, skb,
-				       sack->start_seq, new_start_seq, 0);
-		nf_proto_csum_replace4(&tcph->check, skb,
-				       sack->end_seq, new_end_seq, 0);
+		inet_proto_csum_replace4(&tcph->check, skb,
+					 sack->start_seq, new_start_seq, 0);
+		inet_proto_csum_replace4(&tcph->check, skb,
+					 sack->end_seq, new_end_seq, 0);
 		sack->start_seq = new_start_seq;
 		sack->end_seq = new_end_seq;
 		sackoff += sizeof(*sack);
@@ -397,8 +397,8 @@ nf_nat_seq_adjust(struct sk_buff *skb,
 	else
 		newack = htonl(ntohl(tcph->ack_seq) - other_way->offset_before);
 
-	nf_proto_csum_replace4(&tcph->check, skb, tcph->seq, newseq, 0);
-	nf_proto_csum_replace4(&tcph->check, skb, tcph->ack_seq, newack, 0);
+	inet_proto_csum_replace4(&tcph->check, skb, tcph->seq, newseq, 0);
+	inet_proto_csum_replace4(&tcph->check, skb, tcph->ack_seq, newack, 0);
 
 	pr_debug("Adjusting sequence number from %u->%u, ack from %u->%u\n",
 		 ntohl(tcph->seq), ntohl(newseq), ntohl(tcph->ack_seq),
diff --git a/net/ipv4/netfilter/nf_nat_proto_icmp.c b/net/ipv4/netfilter/nf_nat_proto_icmp.c
index b9fc724388f..088bb141c15 100644
--- a/net/ipv4/netfilter/nf_nat_proto_icmp.c
+++ b/net/ipv4/netfilter/nf_nat_proto_icmp.c
@@ -65,8 +65,8 @@ icmp_manip_pkt(struct sk_buff *skb,
 		return 0;
 
 	hdr = (struct icmphdr *)(skb->data + hdroff);
-	nf_proto_csum_replace2(&hdr->checksum, skb,
-			       hdr->un.echo.id, tuple->src.u.icmp.id, 0);
+	inet_proto_csum_replace2(&hdr->checksum, skb,
+				 hdr->un.echo.id, tuple->src.u.icmp.id, 0);
 	hdr->un.echo.id = tuple->src.u.icmp.id;
 	return 1;
 }
diff --git a/net/ipv4/netfilter/nf_nat_proto_tcp.c b/net/ipv4/netfilter/nf_nat_proto_tcp.c
index 6bab2e18445..633c53ff3d3 100644
--- a/net/ipv4/netfilter/nf_nat_proto_tcp.c
+++ b/net/ipv4/netfilter/nf_nat_proto_tcp.c
@@ -132,8 +132,8 @@ tcp_manip_pkt(struct sk_buff *skb,
 	if (hdrsize < sizeof(*hdr))
 		return 1;
 
-	nf_proto_csum_replace4(&hdr->check, skb, oldip, newip, 1);
-	nf_proto_csum_replace2(&hdr->check, skb, oldport, newport, 0);
+	inet_proto_csum_replace4(&hdr->check, skb, oldip, newip, 1);
+	inet_proto_csum_replace2(&hdr->check, skb, oldport, newport, 0);
 	return 1;
 }
 
diff --git a/net/ipv4/netfilter/nf_nat_proto_udp.c b/net/ipv4/netfilter/nf_nat_proto_udp.c
index cbf1a61e290..9c6519cd15f 100644
--- a/net/ipv4/netfilter/nf_nat_proto_udp.c
+++ b/net/ipv4/netfilter/nf_nat_proto_udp.c
@@ -117,9 +117,9 @@ udp_manip_pkt(struct sk_buff *skb,
 		portptr = &hdr->dest;
 	}
 	if (hdr->check || skb->ip_summed == CHECKSUM_PARTIAL) {
-		nf_proto_csum_replace4(&hdr->check, skb, oldip, newip, 1);
-		nf_proto_csum_replace2(&hdr->check, skb, *portptr, newport,
-				       0);
+		inet_proto_csum_replace4(&hdr->check, skb, oldip, newip, 1);
+		inet_proto_csum_replace2(&hdr->check, skb, *portptr, newport,
+					 0);
 		if (!hdr->check)
 			hdr->check = CSUM_MANGLED_0;
 	}
diff --git a/net/netfilter/xt_TCPMSS.c b/net/netfilter/xt_TCPMSS.c
index f183c8fa47a..bf6249e4406 100644
--- a/net/netfilter/xt_TCPMSS.c
+++ b/net/netfilter/xt_TCPMSS.c
@@ -95,8 +95,9 @@ tcpmss_mangle_packet(struct sk_buff *skb,
 			opt[i+2] = (newmss & 0xff00) >> 8;
 			opt[i+3] = newmss & 0x00ff;
 
-			nf_proto_csum_replace2(&tcph->check, skb,
-					       htons(oldmss), htons(newmss), 0);
+			inet_proto_csum_replace2(&tcph->check, skb,
+						 htons(oldmss), htons(newmss),
+						 0);
 			return 0;
 		}
 	}
@@ -117,19 +118,19 @@ tcpmss_mangle_packet(struct sk_buff *skb,
 	opt = (u_int8_t *)tcph + sizeof(struct tcphdr);
 	memmove(opt + TCPOLEN_MSS, opt, tcplen - sizeof(struct tcphdr));
 
-	nf_proto_csum_replace2(&tcph->check, skb,
-			       htons(tcplen), htons(tcplen + TCPOLEN_MSS), 1);
+	inet_proto_csum_replace2(&tcph->check, skb,
+				 htons(tcplen), htons(tcplen + TCPOLEN_MSS), 1);
 	opt[0] = TCPOPT_MSS;
 	opt[1] = TCPOLEN_MSS;
 	opt[2] = (newmss & 0xff00) >> 8;
 	opt[3] = newmss & 0x00ff;
 
-	nf_proto_csum_replace4(&tcph->check, skb, 0, *((__be32 *)opt), 0);
+	inet_proto_csum_replace4(&tcph->check, skb, 0, *((__be32 *)opt), 0);
 
 	oldval = ((__be16 *)tcph)[6];
 	tcph->doff += TCPOLEN_MSS/4;
-	nf_proto_csum_replace2(&tcph->check, skb,
-				oldval, ((__be16 *)tcph)[6], 0);
+	inet_proto_csum_replace2(&tcph->check, skb,
+				 oldval, ((__be16 *)tcph)[6], 0);
 	return TCPOLEN_MSS;
 }
 
@@ -152,7 +153,7 @@ xt_tcpmss_target4(struct sk_buff *skb,
 	if (ret > 0) {
 		iph = ip_hdr(skb);
 		newlen = htons(ntohs(iph->tot_len) + ret);
-		nf_csum_replace2(&iph->check, iph->tot_len, newlen);
+		csum_replace2(&iph->check, iph->tot_len, newlen);
 		iph->tot_len = newlen;
 	}
 	return XT_CONTINUE;
diff --git a/net/sched/Kconfig b/net/sched/Kconfig
index 9c15c4888d1..f5ab54b1edd 100644
--- a/net/sched/Kconfig
+++ b/net/sched/Kconfig
@@ -445,7 +445,6 @@ config NET_ACT_IPT
 config NET_ACT_NAT
         tristate "Stateless NAT"
         depends on NET_CLS_ACT
-        select NETFILTER
         ---help---
 	  Say Y here to do stateless NAT on IPv4 packets.  You should use
 	  netfilter for NAT unless you know what you are doing.
diff --git a/net/sched/act_nat.c b/net/sched/act_nat.c
index c96273bcaf9..da5c1eae422 100644
--- a/net/sched/act_nat.c
+++ b/net/sched/act_nat.c
@@ -151,7 +151,7 @@ static int tcf_nat(struct sk_buff *skb, struct tc_action *a,
 		else
 			iph->daddr = new_addr;
 
-		nf_csum_replace4(&iph->check, addr, new_addr);
+		csum_replace4(&iph->check, addr, new_addr);
 	}
 
 	ihl = iph->ihl * 4;
@@ -169,7 +169,7 @@ static int tcf_nat(struct sk_buff *skb, struct tc_action *a,
 			goto drop;
 
 		tcph = (void *)(skb_network_header(skb) + ihl);
-		nf_proto_csum_replace4(&tcph->check, skb, addr, new_addr, 1);
+		inet_proto_csum_replace4(&tcph->check, skb, addr, new_addr, 1);
 		break;
 	}
 	case IPPROTO_UDP:
@@ -184,8 +184,8 @@ static int tcf_nat(struct sk_buff *skb, struct tc_action *a,
 
 		udph = (void *)(skb_network_header(skb) + ihl);
 		if (udph->check || skb->ip_summed == CHECKSUM_PARTIAL) {
-			nf_proto_csum_replace4(&udph->check, skb, addr,
-					       new_addr, 1);
+			inet_proto_csum_replace4(&udph->check, skb, addr,
+						 new_addr, 1);
 			if (!udph->check)
 				udph->check = CSUM_MANGLED_0;
 		}
@@ -232,8 +232,8 @@ static int tcf_nat(struct sk_buff *skb, struct tc_action *a,
 		else
 			iph->saddr = new_addr;
 
-		nf_proto_csum_replace4(&icmph->checksum, skb, addr, new_addr,
-				       1);
+		inet_proto_csum_replace4(&icmph->checksum, skb, addr, new_addr,
+					 1);
 		break;
 	}
 	default:
-- 
cgit v1.2.3


From 95bdfccb2bf4ea21c0065772c6a2c75cbaf6ad0d Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Fri, 30 Nov 2007 23:55:42 +1100
Subject: [NET]: Implement the per network namespace sysctl infrastructure

The user interface is: register_net_sysctl_table and
unregister_net_sysctl_table.  Very much like the current
interface except there is a network namespace parameter.

With this any sysctl registered with register_net_sysctl_table
will only show up to tasks in the same network namespace.

All other sysctls continue to be globally visible.

Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
Cc: Serge Hallyn <serue@us.ibm.com>
Cc: Daniel Lezcano <dlezcano@fr.ibm.com>
Cc: Cedric Le Goater <clg@fr.ibm.com>
Cc: Pavel Emelyanov <xemul@openvz.org>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sysctl_net.c | 57 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 57 insertions(+)

(limited to 'net')

diff --git a/net/sysctl_net.c b/net/sysctl_net.c
index cd4eafbab1b..c50c793aa7f 100644
--- a/net/sysctl_net.c
+++ b/net/sysctl_net.c
@@ -14,6 +14,7 @@
 
 #include <linux/mm.h>
 #include <linux/sysctl.h>
+#include <linux/nsproxy.h>
 
 #include <net/sock.h>
 
@@ -54,3 +55,59 @@ struct ctl_table net_table[] = {
 #endif
 	{ 0 },
 };
+
+static struct list_head *
+net_ctl_header_lookup(struct ctl_table_root *root, struct nsproxy *namespaces)
+{
+	return &namespaces->net_ns->sysctl_table_headers;
+}
+
+static struct ctl_table_root net_sysctl_root = {
+	.lookup = net_ctl_header_lookup,
+};
+
+static int sysctl_net_init(struct net *net)
+{
+	INIT_LIST_HEAD(&net->sysctl_table_headers);
+	return 0;
+}
+
+static void sysctl_net_exit(struct net *net)
+{
+	WARN_ON(!list_empty(&net->sysctl_table_headers));
+	return;
+}
+
+static struct pernet_operations sysctl_pernet_ops = {
+	.init = sysctl_net_init,
+	.exit = sysctl_net_exit,
+};
+
+static __init int sysctl_init(void)
+{
+	int ret;
+	ret = register_pernet_subsys(&sysctl_pernet_ops);
+	if (ret)
+		goto out;
+	register_sysctl_root(&net_sysctl_root);
+out:
+	return ret;
+}
+subsys_initcall(sysctl_init);
+
+struct ctl_table_header *register_net_sysctl_table(struct net *net,
+	const struct ctl_path *path, struct ctl_table *table)
+{
+	struct nsproxy namespaces;
+	namespaces = *current->nsproxy;
+	namespaces.net_ns = net;
+	return __register_sysctl_paths(&net_sysctl_root,
+					&namespaces, path, table);
+}
+EXPORT_SYMBOL_GPL(register_net_sysctl_table);
+
+void unregister_net_sysctl_table(struct ctl_table_header *header)
+{
+	return unregister_sysctl_table(header);
+}
+EXPORT_SYMBOL_GPL(unregister_net_sysctl_table);
-- 
cgit v1.2.3


From 9108d5f4b2cd82f55ad178caa0be66a866a06dcc Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Thu, 29 Nov 2007 22:47:15 -0200
Subject: [TFRC]: Hide tx history details from the CCIDs

Based on a previous patch by Gerrit Renker.

Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/dccp/ccids/ccid3.c              | 19 +++++++------------
 net/dccp/ccids/lib/packet_history.c | 33 +++++++++++++++++++++++++++++++--
 net/dccp/ccids/lib/packet_history.h | 17 +++--------------
 3 files changed, 41 insertions(+), 28 deletions(-)

(limited to 'net')

diff --git a/net/dccp/ccids/ccid3.c b/net/dccp/ccids/ccid3.c
index f73542ab9d0..49338370eb0 100644
--- a/net/dccp/ccids/ccid3.c
+++ b/net/dccp/ccids/ccid3.c
@@ -399,7 +399,6 @@ static void ccid3_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb)
 {
 	struct ccid3_hc_tx_sock *hctx = ccid3_hc_tx_sk(sk);
 	struct ccid3_options_received *opt_recv;
-	struct tfrc_tx_hist_entry *packet;
 	ktime_t now;
 	unsigned long t_nfb;
 	u32 pinv, r_sample;
@@ -414,19 +413,17 @@ static void ccid3_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb)
 	switch (hctx->ccid3hctx_state) {
 	case TFRC_SSTATE_NO_FBACK:
 	case TFRC_SSTATE_FBACK:
+		now = ktime_get_real();
+
 		/* estimate RTT from history if ACK number is valid */
-		packet = tfrc_tx_hist_find_entry(hctx->ccid3hctx_hist,
-						 DCCP_SKB_CB(skb)->dccpd_ack_seq);
-		if (packet == NULL) {
+		r_sample = tfrc_tx_hist_rtt(hctx->ccid3hctx_hist,
+					    DCCP_SKB_CB(skb)->dccpd_ack_seq, now);
+		if (r_sample == 0) {
 			DCCP_WARN("%s(%p): %s with bogus ACK-%llu\n", dccp_role(sk), sk,
 				  dccp_packet_name(DCCP_SKB_CB(skb)->dccpd_type),
 				  (unsigned long long)DCCP_SKB_CB(skb)->dccpd_ack_seq);
 			return;
 		}
-		/*
-		 * Garbage-collect older (irrelevant) entries
-		 */
-		tfrc_tx_hist_purge(&packet->next);
 
 		/* Update receive rate in units of 64 * bytes/second */
 		hctx->ccid3hctx_x_recv = opt_recv->ccid3or_receive_rate;
@@ -438,12 +435,10 @@ static void ccid3_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb)
 			hctx->ccid3hctx_p = 0;
 		else				       /* can not exceed 100% */
 			hctx->ccid3hctx_p = 1000000 / pinv;
-
-		now = ktime_get_real();
 		/*
-		 * Calculate new RTT sample and update moving average
+		 * Validate new RTT sample and update moving average
 		 */
-		r_sample = dccp_sample_rtt(sk, ktime_us_delta(now, packet->stamp));
+		r_sample = dccp_sample_rtt(sk, r_sample);
 		hctx->ccid3hctx_rtt = tfrc_ewma(hctx->ccid3hctx_rtt, r_sample, 9);
 
 		if (hctx->ccid3hctx_state == TFRC_SSTATE_NO_FBACK) {
diff --git a/net/dccp/ccids/lib/packet_history.c b/net/dccp/ccids/lib/packet_history.c
index 13973606471..4805de99656 100644
--- a/net/dccp/ccids/lib/packet_history.c
+++ b/net/dccp/ccids/lib/packet_history.c
@@ -39,12 +39,24 @@
 #include <linux/string.h>
 #include "packet_history.h"
 
+/**
+ *  tfrc_tx_hist_entry  -  Simple singly-linked TX history list
+ *  @next:  next oldest entry (LIFO order)
+ *  @seqno: sequence number of this entry
+ *  @stamp: send time of packet with sequence number @seqno
+ */
+struct tfrc_tx_hist_entry {
+	struct tfrc_tx_hist_entry *next;
+	u64			  seqno;
+	ktime_t			  stamp;
+};
+
 /*
  * Transmitter History Routines
  */
 static struct kmem_cache *tfrc_tx_hist;
 
-struct tfrc_tx_hist_entry *
+static struct tfrc_tx_hist_entry *
 	tfrc_tx_hist_find_entry(struct tfrc_tx_hist_entry *head, u64 seqno)
 {
 	while (head != NULL && head->seqno != seqno)
@@ -52,7 +64,6 @@ struct tfrc_tx_hist_entry *
 
 	return head;
 }
-EXPORT_SYMBOL_GPL(tfrc_tx_hist_find_entry);
 
 int tfrc_tx_hist_add(struct tfrc_tx_hist_entry **headp, u64 seqno)
 {
@@ -83,6 +94,24 @@ void tfrc_tx_hist_purge(struct tfrc_tx_hist_entry **headp)
 }
 EXPORT_SYMBOL_GPL(tfrc_tx_hist_purge);
 
+u32 tfrc_tx_hist_rtt(struct tfrc_tx_hist_entry *head, const u64 seqno,
+		     const ktime_t now)
+{
+	u32 rtt = 0;
+	struct tfrc_tx_hist_entry *packet = tfrc_tx_hist_find_entry(head, seqno);
+
+	if (packet != NULL) {
+		rtt = ktime_us_delta(now, packet->stamp);
+		/*
+		 * Garbage-collect older (irrelevant) entries:
+		 */
+		tfrc_tx_hist_purge(&packet->next);
+	}
+
+	return rtt;
+}
+EXPORT_SYMBOL_GPL(tfrc_tx_hist_rtt);
+
 /*
  * 	Receiver History Routines
  */
diff --git a/net/dccp/ccids/lib/packet_history.h b/net/dccp/ccids/lib/packet_history.h
index 5c07182dd65..0670f46dd53 100644
--- a/net/dccp/ccids/lib/packet_history.h
+++ b/net/dccp/ccids/lib/packet_history.h
@@ -48,23 +48,12 @@
 #define TFRC_WIN_COUNT_PER_RTT	 4
 #define TFRC_WIN_COUNT_LIMIT	16
 
-/**
- *  tfrc_tx_hist_entry  -  Simple singly-linked TX history list
- *  @next:  next oldest entry (LIFO order)
- *  @seqno: sequence number of this entry
- *  @stamp: send time of packet with sequence number @seqno
- */
-struct tfrc_tx_hist_entry {
-	struct tfrc_tx_hist_entry *next;
-	u64			  seqno;
-	ktime_t			  stamp;
-};
+struct tfrc_tx_hist_entry;
 
 extern int  tfrc_tx_hist_add(struct tfrc_tx_hist_entry **headp, u64 seqno);
 extern void tfrc_tx_hist_purge(struct tfrc_tx_hist_entry **headp);
-
-extern struct tfrc_tx_hist_entry *
-	tfrc_tx_hist_find_entry(struct tfrc_tx_hist_entry *head, u64 ackno);
+extern u32  tfrc_tx_hist_rtt(struct tfrc_tx_hist_entry *head,
+			     const u64 seqno, const ktime_t now);
 
 /*
  * 	Receiver History data structures and declarations
-- 
cgit v1.2.3


From 875179fa60ffe2eba1daaefb0af1be97ff5eda6a Mon Sep 17 00:00:00 2001
From: Paul Moore <paul.moore@hp.com>
Date: Sat, 1 Dec 2007 23:27:18 +1100
Subject: [IPSEC]: SPD auditing fix to include the netmask/prefix-length

Currently the netmask/prefix-length of an IPsec SPD entry is not included in
any of the SPD related audit messages.  This can cause a problem when the
audit log is examined as the netmask/prefix-length is vital in determining
what network traffic is affected by a particular SPD entry.  This patch fixes
this problem by adding two additional fields, "src_prefixlen" and
"dst_prefixlen", to the SPD audit messages to indicate the source and
destination netmasks.  These new fields are only included in the audit message
when the netmask/prefix-length is less than the address length, i.e. the SPD
entry applies to a network address and not a host address.

Example audit message:

 type=UNKNOWN[1415] msg=audit(1196105849.752:25): auid=0 \
   subj=root:system_r:unconfined_t:s0-s0:c0.c1023 op=SPD-add res=1 \
   src=192.168.0.0 src_prefixlen=24 dst=192.168.1.0 dst_prefixlen=24

In addition, this patch also fixes a few other things in the
xfrm_audit_common_policyinfo() function.  The IPv4 string formatting was
converted to use the standard NIPQUAD_FMT constant, the memcpy() was removed
from the IPv6 code path and replaced with a typecast (the memcpy() was acting
as a slow, implicit typecast anyway), and two local variables were created to
make referencing the XFRM security context and selector information cleaner.

Signed-off-by: Paul Moore <paul.moore@hp.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/xfrm/xfrm_policy.c | 44 ++++++++++++++++++++++++++------------------
 1 file changed, 26 insertions(+), 18 deletions(-)

(limited to 'net')

diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index b153f748205..a76280a14e7 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -2266,29 +2266,37 @@ void __init xfrm_init(void)
 static inline void xfrm_audit_common_policyinfo(struct xfrm_policy *xp,
 						struct audit_buffer *audit_buf)
 {
-	if (xp->security)
+	struct xfrm_sec_ctx *ctx = xp->security;
+	struct xfrm_selector *sel = &xp->selector;
+
+	if (ctx)
 		audit_log_format(audit_buf, " sec_alg=%u sec_doi=%u sec_obj=%s",
-				 xp->security->ctx_alg, xp->security->ctx_doi,
-				 xp->security->ctx_str);
+				 ctx->ctx_alg, ctx->ctx_doi, ctx->ctx_str);
 
-	switch(xp->selector.family) {
+	switch(sel->family) {
 	case AF_INET:
-		audit_log_format(audit_buf, " src=%u.%u.%u.%u dst=%u.%u.%u.%u",
-				 NIPQUAD(xp->selector.saddr.a4),
-				 NIPQUAD(xp->selector.daddr.a4));
+		audit_log_format(audit_buf, " src=" NIPQUAD_FMT,
+				 NIPQUAD(sel->saddr.a4));
+		if (sel->prefixlen_s != 32)
+			audit_log_format(audit_buf, " src_prefixlen=%d",
+					 sel->prefixlen_s);
+		audit_log_format(audit_buf, " dst=" NIPQUAD_FMT,
+				 NIPQUAD(sel->daddr.a4));
+		if (sel->prefixlen_d != 32)
+			audit_log_format(audit_buf, " dst_prefixlen=%d",
+					 sel->prefixlen_d);
 		break;
 	case AF_INET6:
-		{
-			struct in6_addr saddr6, daddr6;
-
-			memcpy(&saddr6, xp->selector.saddr.a6,
-				sizeof(struct in6_addr));
-			memcpy(&daddr6, xp->selector.daddr.a6,
-				sizeof(struct in6_addr));
-			audit_log_format(audit_buf,
-				" src=" NIP6_FMT " dst=" NIP6_FMT,
-				NIP6(saddr6), NIP6(daddr6));
-		}
+		audit_log_format(audit_buf, " src=" NIP6_FMT,
+				 NIP6(*(struct in6_addr *)sel->saddr.a6));
+		if (sel->prefixlen_s != 128)
+			audit_log_format(audit_buf, " src_prefixlen=%d",
+					 sel->prefixlen_s);
+		audit_log_format(audit_buf, " dst=" NIP6_FMT,
+				 NIP6(*(struct in6_addr *)sel->daddr.a6));
+		if (sel->prefixlen_d != 128)
+			audit_log_format(audit_buf, " dst_prefixlen=%d",
+					 sel->prefixlen_d);
 		break;
 	}
 }
-- 
cgit v1.2.3


From dd88590995de7c7ce108718a9ad52b3832e77814 Mon Sep 17 00:00:00 2001
From: "Denis V. Lunev" <den@openvz.org>
Date: Sat, 1 Dec 2007 23:31:02 +1100
Subject: [DECNET]: Remove extra memset from dn_fib_check_nh

Signed-off-by: Denis V. Lunev <den@openvz.org>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/decnet/dn_fib.c | 2 --
 1 file changed, 2 deletions(-)

(limited to 'net')

diff --git a/net/decnet/dn_fib.c b/net/decnet/dn_fib.c
index 5413e1b75b5..4aa9a423e60 100644
--- a/net/decnet/dn_fib.c
+++ b/net/decnet/dn_fib.c
@@ -203,8 +203,6 @@ static int dn_fib_check_nh(const struct rtmsg *r, struct dn_fib_info *fi, struct
 		struct flowi fl;
 		struct dn_fib_res res;
 
-		memset(&fl, 0, sizeof(fl));
-
 		if (nh->nh_flags&RTNH_F_ONLINK) {
 			struct net_device *dev;
 
-- 
cgit v1.2.3


From 97577e38284f48ca773392e2d401af2f6c95dc08 Mon Sep 17 00:00:00 2001
From: Pavel Emelyanov <xemul@openvz.org>
Date: Sat, 1 Dec 2007 23:40:40 +1100
Subject: [UNIX]: Extend unix_sysctl_(un)register prototypes

Add the struct net * argument to both of them to use in
the future. Also make the register one return an error code.

It is useless right now, but will make the future patches
much simpler.

Signed-off-by: Pavel Emelyanov <xemul@openvz.org>
Acked-by: Eric W. Biederman <ebiederm@xmission.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/unix/af_unix.c         | 4 ++--
 net/unix/sysctl_net_unix.c | 5 +++--
 2 files changed, 5 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index 393197afb19..a0aa6d3c2c0 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -2175,7 +2175,7 @@ static int __init af_unix_init(void)
 
 	sock_register(&unix_family_ops);
 	register_pernet_subsys(&unix_net_ops);
-	unix_sysctl_register();
+	unix_sysctl_register(&init_net);
 out:
 	return rc;
 }
@@ -2183,7 +2183,7 @@ out:
 static void __exit af_unix_exit(void)
 {
 	sock_unregister(PF_UNIX);
-	unix_sysctl_unregister();
+	unix_sysctl_unregister(&init_net);
 	proto_unregister(&unix_proto);
 	unregister_pernet_subsys(&unix_net_ops);
 }
diff --git a/net/unix/sysctl_net_unix.c b/net/unix/sysctl_net_unix.c
index eb0bd57ebad..b2e0407e1b7 100644
--- a/net/unix/sysctl_net_unix.c
+++ b/net/unix/sysctl_net_unix.c
@@ -48,12 +48,13 @@ static ctl_table unix_root_table[] = {
 
 static struct ctl_table_header * unix_sysctl_header;
 
-void unix_sysctl_register(void)
+int unix_sysctl_register(struct net *net)
 {
 	unix_sysctl_header = register_sysctl_table(unix_root_table);
+	return unix_sysctl_header == NULL ? -ENOMEM : 0;
 }
 
-void unix_sysctl_unregister(void)
+void unix_sysctl_unregister(struct net *net)
 {
 	unregister_sysctl_table(unix_sysctl_header);
 }
-- 
cgit v1.2.3


From d392e49756a99ba51577d265d6db83e3f01c6ac8 Mon Sep 17 00:00:00 2001
From: Pavel Emelyanov <xemul@openvz.org>
Date: Sat, 1 Dec 2007 23:44:15 +1100
Subject: [UNIX]: Move the sysctl_unix_max_dgram_qlen

This will make all the sub-namespaces always use the
default value (10) and leave the tuning via sysctl
to the init namespace only.

Per-namespace tuning is coming.

Signed-off-by: Pavel Emelyanov <xemul@openvz.org>
Acked-by: Eric W. Biederman <ebiederm@xmission.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/unix/af_unix.c         | 6 +++---
 net/unix/sysctl_net_unix.c | 2 +-
 2 files changed, 4 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index a0aa6d3c2c0..73620d61762 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -117,8 +117,6 @@
 #include <net/checksum.h>
 #include <linux/security.h>
 
-int sysctl_unix_max_dgram_qlen __read_mostly = 10;
-
 static struct hlist_head unix_socket_table[UNIX_HASH_SIZE + 1];
 static DEFINE_SPINLOCK(unix_table_lock);
 static atomic_t unix_nr_socks = ATOMIC_INIT(0);
@@ -594,7 +592,7 @@ static struct sock * unix_create1(struct net *net, struct socket *sock)
 				&af_unix_sk_receive_queue_lock_key);
 
 	sk->sk_write_space	= unix_write_space;
-	sk->sk_max_ack_backlog	= sysctl_unix_max_dgram_qlen;
+	sk->sk_max_ack_backlog	= net->sysctl_unix_max_dgram_qlen;
 	sk->sk_destruct		= unix_sock_destructor;
 	u	  = unix_sk(sk);
 	u->dentry = NULL;
@@ -2140,6 +2138,8 @@ static int unix_net_init(struct net *net)
 {
 	int error = -ENOMEM;
 
+	net->sysctl_unix_max_dgram_qlen = 10;
+
 #ifdef CONFIG_PROC_FS
 	if (!proc_net_fops_create(net, "unix", 0, &unix_seq_fops))
 		goto out;
diff --git a/net/unix/sysctl_net_unix.c b/net/unix/sysctl_net_unix.c
index b2e0407e1b7..c46cec04d23 100644
--- a/net/unix/sysctl_net_unix.c
+++ b/net/unix/sysctl_net_unix.c
@@ -18,7 +18,7 @@ static ctl_table unix_table[] = {
 	{
 		.ctl_name	= NET_UNIX_MAX_DGRAM_QLEN,
 		.procname	= "max_dgram_qlen",
-		.data		= &sysctl_unix_max_dgram_qlen,
+		.data		= &init_net.sysctl_unix_max_dgram_qlen,
 		.maxlen		= sizeof(int),
 		.mode		= 0644,
 		.proc_handler	= &proc_dointvec
-- 
cgit v1.2.3


From 1d430b913c2e6293a80a4a422406bc9489a476ad Mon Sep 17 00:00:00 2001
From: Pavel Emelyanov <xemul@openvz.org>
Date: Sat, 1 Dec 2007 23:45:41 +1100
Subject: [UNIX]: Use ctl paths to register unix ctl tables

Unlike previous ones, this patch is useful by its own,
as it decreases the vmlinux size :)

But it will be used later, when the per-namespace sysctl
is added.

Signed-off-by: Pavel Emelyanov <xemul@openvz.org>
Acked-by: Eric W. Biederman <ebiederm@xmission.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/unix/sysctl_net_unix.c | 24 +++++-------------------
 1 file changed, 5 insertions(+), 19 deletions(-)

(limited to 'net')

diff --git a/net/unix/sysctl_net_unix.c b/net/unix/sysctl_net_unix.c
index c46cec04d23..43dd356730f 100644
--- a/net/unix/sysctl_net_unix.c
+++ b/net/unix/sysctl_net_unix.c
@@ -26,31 +26,17 @@ static ctl_table unix_table[] = {
 	{ .ctl_name = 0 }
 };
 
-static ctl_table unix_net_table[] = {
-	{
-		.ctl_name	= NET_UNIX,
-		.procname	= "unix",
-		.mode		= 0555,
-		.child		= unix_table
-	},
-	{ .ctl_name = 0 }
-};
-
-static ctl_table unix_root_table[] = {
-	{
-		.ctl_name	= CTL_NET,
-		.procname	= "net",
-		.mode		= 0555,
-		.child		= unix_net_table
-	},
-	{ .ctl_name = 0 }
+static struct ctl_path unix_path[] = {
+	{ .procname = "net", .ctl_name = CTL_NET, },
+	{ .procname = "unix", .ctl_name = NET_UNIX, },
+	{ },
 };
 
 static struct ctl_table_header * unix_sysctl_header;
 
 int unix_sysctl_register(struct net *net)
 {
-	unix_sysctl_header = register_sysctl_table(unix_root_table);
+	unix_sysctl_header = register_sysctl_paths(unix_path, unix_table);
 	return unix_sysctl_header == NULL ? -ENOMEM : 0;
 }
 
-- 
cgit v1.2.3


From 1597fbc0faf88c42edb3fe42e92e53b83c3f120e Mon Sep 17 00:00:00 2001
From: Pavel Emelyanov <xemul@openvz.org>
Date: Sat, 1 Dec 2007 23:51:01 +1100
Subject: [UNIX]: Make the unix sysctl tables per-namespace

This is the core.

 * add the ctl_table_header on the struct net;
 * make the unix_sysctl_register and _unregister clone the table;
 * moves calls to them into per-net init and exit callbacks;
 * move the .data pointer in the proper place.

Signed-off-by: Pavel Emelyanov <xemul@openvz.org>
Acked-by: Eric W. Biederman <ebiederm@xmission.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/unix/af_unix.c         |  9 ++++++---
 net/unix/sysctl_net_unix.c | 27 ++++++++++++++++++++++-----
 2 files changed, 28 insertions(+), 8 deletions(-)

(limited to 'net')

diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index 73620d61762..b8a2189fb5c 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -2139,10 +2139,14 @@ static int unix_net_init(struct net *net)
 	int error = -ENOMEM;
 
 	net->sysctl_unix_max_dgram_qlen = 10;
+	if (unix_sysctl_register(net))
+		goto out;
 
 #ifdef CONFIG_PROC_FS
-	if (!proc_net_fops_create(net, "unix", 0, &unix_seq_fops))
+	if (!proc_net_fops_create(net, "unix", 0, &unix_seq_fops)) {
+		unix_sysctl_unregister(net);
 		goto out;
+	}
 #endif
 	error = 0;
 out:
@@ -2151,6 +2155,7 @@ out:
 
 static void unix_net_exit(struct net *net)
 {
+	unix_sysctl_unregister(net);
 	proc_net_remove(net, "unix");
 }
 
@@ -2175,7 +2180,6 @@ static int __init af_unix_init(void)
 
 	sock_register(&unix_family_ops);
 	register_pernet_subsys(&unix_net_ops);
-	unix_sysctl_register(&init_net);
 out:
 	return rc;
 }
@@ -2183,7 +2187,6 @@ out:
 static void __exit af_unix_exit(void)
 {
 	sock_unregister(PF_UNIX);
-	unix_sysctl_unregister(&init_net);
 	proto_unregister(&unix_proto);
 	unregister_pernet_subsys(&unix_net_ops);
 }
diff --git a/net/unix/sysctl_net_unix.c b/net/unix/sysctl_net_unix.c
index 43dd356730f..553ef6a487d 100644
--- a/net/unix/sysctl_net_unix.c
+++ b/net/unix/sysctl_net_unix.c
@@ -32,16 +32,33 @@ static struct ctl_path unix_path[] = {
 	{ },
 };
 
-static struct ctl_table_header * unix_sysctl_header;
-
 int unix_sysctl_register(struct net *net)
 {
-	unix_sysctl_header = register_sysctl_paths(unix_path, unix_table);
-	return unix_sysctl_header == NULL ? -ENOMEM : 0;
+	struct ctl_table *table;
+
+	table = kmemdup(unix_table, sizeof(unix_table), GFP_KERNEL);
+	if (table == NULL)
+		goto err_alloc;
+
+	table[0].data = &net->sysctl_unix_max_dgram_qlen;
+	net->unix_ctl = register_net_sysctl_table(net, unix_path, table);
+	if (net->unix_ctl == NULL)
+		goto err_reg;
+
+	return 0;
+
+err_reg:
+	kfree(table);
+err_alloc:
+	return -ENOMEM;
 }
 
 void unix_sysctl_unregister(struct net *net)
 {
-	unregister_sysctl_table(unix_sysctl_header);
+	struct ctl_table *table;
+
+	table = net->unix_ctl->ctl_table_arg;
+	unregister_sysctl_table(net->unix_ctl);
+	kfree(table);
 }
 
-- 
cgit v1.2.3


From 3c607bbb472814f01b077af01ae326944ff6b8b3 Mon Sep 17 00:00:00 2001
From: Pavel Emelyanov <xemul@openvz.org>
Date: Sun, 2 Dec 2007 00:06:34 +1100
Subject: [NEIGH]: Cleanup the neigh_sysctl_register

This mainly removes the err variable, as this call always
return the same error code (-ENOBUFS).

Besides, I moved the call to kmalloc() from the *t declaration
into the code (this is confusing when a variable is initialized
with the result of some call) and removed unneeded comment near
the error path.

Signed-off-by: Pavel Emelyanov <xemul@openvz.org>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/neighbour.c | 26 +++++++++++---------------
 1 file changed, 11 insertions(+), 15 deletions(-)

(limited to 'net')

diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index a8b72c1c7c8..5dbe26f460d 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -2652,14 +2652,14 @@ int neigh_sysctl_register(struct net_device *dev, struct neigh_parms *p,
 			  int p_id, int pdev_id, char *p_name,
 			  proc_handler *handler, ctl_handler *strategy)
 {
-	struct neigh_sysctl_table *t = kmemdup(&neigh_sysctl_template,
-					       sizeof(*t), GFP_KERNEL);
+	struct neigh_sysctl_table *t;
 	const char *dev_name_source = NULL;
 	char *dev_name = NULL;
-	int err = 0;
 
+	t = kmemdup(&neigh_sysctl_template, sizeof(*t), GFP_KERNEL);
 	if (!t)
-		return -ENOBUFS;
+		goto err;
+
 	t->neigh_vars[0].data  = &p->mcast_probes;
 	t->neigh_vars[1].data  = &p->ucast_probes;
 	t->neigh_vars[2].data  = &p->app_probes;
@@ -2717,10 +2717,8 @@ int neigh_sysctl_register(struct net_device *dev, struct neigh_parms *p,
 	}
 
 	dev_name = kstrdup(dev_name_source, GFP_KERNEL);
-	if (!dev_name) {
-		err = -ENOBUFS;
+	if (!dev_name)
 		goto free;
-	}
 
 	t->neigh_dev[0].procname = dev_name;
 
@@ -2735,20 +2733,18 @@ int neigh_sysctl_register(struct net_device *dev, struct neigh_parms *p,
 	t->neigh_root_dir[0].child     = t->neigh_proto_dir;
 
 	t->sysctl_header = register_sysctl_table(t->neigh_root_dir);
-	if (!t->sysctl_header) {
-		err = -ENOBUFS;
+	if (!t->sysctl_header)
 		goto free_procname;
-	}
+
 	p->sysctl_table = t;
 	return 0;
 
-	/* error path */
- free_procname:
+free_procname:
 	kfree(dev_name);
- free:
+free:
 	kfree(t);
-
-	return err;
+err:
+	return -ENOBUFS;
 }
 
 void neigh_sysctl_unregister(struct neigh_parms *p)
-- 
cgit v1.2.3


From c3bac5a71b24f6ed892b250d4f7511cedc33d34c Mon Sep 17 00:00:00 2001
From: Pavel Emelyanov <xemul@openvz.org>
Date: Sun, 2 Dec 2007 00:08:16 +1100
Subject: [NEIGH]: Use the ctl paths to create neighbours sysctls

The appropriate path is prepared right inside this function. It
is prepared similar to how the ctl tables were.

Since the path is modified, it is put on the stack, to avoid
possible races with multiple calls to neigh_sysctl_register() : it
is called by protocols and I didn't find any protection in this
case. Did I overlooked the rtnl lock?.

The stack growth of the neigh_sysctl_register() is 40 bytes. I
believe this is OK, since this is not that much and this function
is not called with the deep stack (device/protocols register).

The device's name is stored on the template to free it later.

This will help with the net namespaces, as each namespace should
have its own set of these ctls.

Besides, this saves ~350 bytes from the neigh template :)

Signed-off-by: Pavel Emelyanov <xemul@openvz.org>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/neighbour.c | 77 ++++++++++++++++++----------------------------------
 1 file changed, 27 insertions(+), 50 deletions(-)

(limited to 'net')

diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index 5dbe26f460d..4b6dd1e66f1 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -2484,11 +2484,8 @@ void neigh_app_ns(struct neighbour *n)
 
 static struct neigh_sysctl_table {
 	struct ctl_table_header *sysctl_header;
-	ctl_table		neigh_vars[__NET_NEIGH_MAX];
-	ctl_table		neigh_dev[2];
-	ctl_table		neigh_neigh_dir[2];
-	ctl_table		neigh_proto_dir[2];
-	ctl_table		neigh_root_dir[2];
+	struct ctl_table neigh_vars[__NET_NEIGH_MAX];
+	char *dev_name;
 } neigh_sysctl_template __read_mostly = {
 	.neigh_vars = {
 		{
@@ -2619,32 +2616,7 @@ static struct neigh_sysctl_table {
 			.mode		= 0644,
 			.proc_handler	= &proc_dointvec,
 		},
-		{}
-	},
-	.neigh_dev = {
-		{
-			.ctl_name	= NET_PROTO_CONF_DEFAULT,
-			.procname	= "default",
-			.mode		= 0555,
-		},
-	},
-	.neigh_neigh_dir = {
-		{
-			.procname	= "neigh",
-			.mode		= 0555,
-		},
-	},
-	.neigh_proto_dir = {
-		{
-			.mode		= 0555,
-		},
-	},
-	.neigh_root_dir = {
-		{
-			.ctl_name	= CTL_NET,
-			.procname	= "net",
-			.mode		= 0555,
-		},
+		{},
 	},
 };
 
@@ -2654,7 +2626,19 @@ int neigh_sysctl_register(struct net_device *dev, struct neigh_parms *p,
 {
 	struct neigh_sysctl_table *t;
 	const char *dev_name_source = NULL;
-	char *dev_name = NULL;
+
+#define NEIGH_CTL_PATH_ROOT	0
+#define NEIGH_CTL_PATH_PROTO	1
+#define NEIGH_CTL_PATH_NEIGH	2
+#define NEIGH_CTL_PATH_DEV	3
+
+	struct ctl_path neigh_path[] = {
+		{ .procname = "net",	 .ctl_name = CTL_NET, },
+		{ .procname = "proto",	 .ctl_name = 0, },
+		{ .procname = "neigh",	 .ctl_name = 0, },
+		{ .procname = "default", .ctl_name = NET_PROTO_CONF_DEFAULT, },
+		{ },
+	};
 
 	t = kmemdup(&neigh_sysctl_template, sizeof(*t), GFP_KERNEL);
 	if (!t)
@@ -2677,11 +2661,11 @@ int neigh_sysctl_register(struct net_device *dev, struct neigh_parms *p,
 
 	if (dev) {
 		dev_name_source = dev->name;
-		t->neigh_dev[0].ctl_name = dev->ifindex;
+		neigh_path[NEIGH_CTL_PATH_DEV].ctl_name = dev->ifindex;
 		/* Terminate the table early */
 		memset(&t->neigh_vars[14], 0, sizeof(t->neigh_vars[14]));
 	} else {
-		dev_name_source = t->neigh_dev[0].procname;
+		dev_name_source = neigh_path[NEIGH_CTL_PATH_DEV].procname;
 		t->neigh_vars[14].data = (int *)(p + 1);
 		t->neigh_vars[15].data = (int *)(p + 1) + 1;
 		t->neigh_vars[16].data = (int *)(p + 1) + 2;
@@ -2716,23 +2700,16 @@ int neigh_sysctl_register(struct net_device *dev, struct neigh_parms *p,
 			t->neigh_vars[13].ctl_name = CTL_UNNUMBERED;
 	}
 
-	dev_name = kstrdup(dev_name_source, GFP_KERNEL);
-	if (!dev_name)
+	t->dev_name = kstrdup(dev_name_source, GFP_KERNEL);
+	if (!t->dev_name)
 		goto free;
 
-	t->neigh_dev[0].procname = dev_name;
-
-	t->neigh_neigh_dir[0].ctl_name = pdev_id;
-
-	t->neigh_proto_dir[0].procname = p_name;
-	t->neigh_proto_dir[0].ctl_name = p_id;
-
-	t->neigh_dev[0].child	       = t->neigh_vars;
-	t->neigh_neigh_dir[0].child    = t->neigh_dev;
-	t->neigh_proto_dir[0].child    = t->neigh_neigh_dir;
-	t->neigh_root_dir[0].child     = t->neigh_proto_dir;
+	neigh_path[NEIGH_CTL_PATH_DEV].procname = t->dev_name;
+	neigh_path[NEIGH_CTL_PATH_NEIGH].ctl_name = pdev_id;
+	neigh_path[NEIGH_CTL_PATH_PROTO].procname = p_name;
+	neigh_path[NEIGH_CTL_PATH_PROTO].ctl_name = p_id;
 
-	t->sysctl_header = register_sysctl_table(t->neigh_root_dir);
+	t->sysctl_header = register_sysctl_paths(neigh_path, t->neigh_vars);
 	if (!t->sysctl_header)
 		goto free_procname;
 
@@ -2740,7 +2717,7 @@ int neigh_sysctl_register(struct net_device *dev, struct neigh_parms *p,
 	return 0;
 
 free_procname:
-	kfree(dev_name);
+	kfree(t->dev_name);
 free:
 	kfree(t);
 err:
@@ -2753,7 +2730,7 @@ void neigh_sysctl_unregister(struct neigh_parms *p)
 		struct neigh_sysctl_table *t = p->sysctl_table;
 		p->sysctl_table = NULL;
 		unregister_sysctl_table(t->sysctl_header);
-		kfree(t->neigh_dev[0].procname);
+		kfree(t->dev_name);
 		kfree(t);
 	}
 }
-- 
cgit v1.2.3


From 9fa896429905eccc263ff0d5e592ecaf651af12d Mon Sep 17 00:00:00 2001
From: Pavel Emelyanov <xemul@openvz.org>
Date: Sun, 2 Dec 2007 00:17:46 +1100
Subject: [IPV4]: Cleanup the devinet_sysctl_register

I moved the call to kmalloc() from the *t declaration into
the code (this is confusing when a variable is initialized
with the result of some call) and removed unneeded comment
near the error path. Just like I did with the neigh ctl-s.

Besides, I fixed the goto's and the labels - they were indented
with spaces :(

Signed-off-by: Pavel Emelyanov <xemul@openvz.org>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/devinet.c | 17 +++++++++--------
 1 file changed, 9 insertions(+), 8 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index 6e75c884e1a..72dd0ecb108 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -1506,12 +1506,13 @@ static void devinet_sysctl_register(struct in_device *in_dev,
 {
 	int i;
 	struct net_device *dev = in_dev ? in_dev->dev : NULL;
-	struct devinet_sysctl_table *t = kmemdup(&devinet_sysctl, sizeof(*t),
-						 GFP_KERNEL);
+	struct devinet_sysctl_table *t;
 	char *dev_name = NULL;
 
+	t = kmemdup(&devinet_sysctl, sizeof(*t), GFP_KERNEL);
 	if (!t)
-		return;
+		goto out;
+
 	for (i = 0; i < ARRAY_SIZE(t->devinet_vars) - 1; i++) {
 		t->devinet_vars[i].data += (char *)p - (char *)&ipv4_devconf;
 		t->devinet_vars[i].extra1 = p;
@@ -1532,7 +1533,7 @@ static void devinet_sysctl_register(struct in_device *in_dev,
 	 */
 	dev_name = kstrdup(dev_name, GFP_KERNEL);
 	if (!dev_name)
-	    goto free;
+		goto free;
 
 	t->devinet_dev[0].procname    = dev_name;
 	t->devinet_dev[0].child	      = t->devinet_vars;
@@ -1542,16 +1543,16 @@ static void devinet_sysctl_register(struct in_device *in_dev,
 
 	t->sysctl_header = register_sysctl_table(t->devinet_root_dir);
 	if (!t->sysctl_header)
-	    goto free_procname;
+		goto free_procname;
 
 	p->sysctl = t;
 	return;
 
-	/* error path */
- free_procname:
+free_procname:
 	kfree(dev_name);
- free:
+free:
 	kfree(t);
+out:
 	return;
 }
 
-- 
cgit v1.2.3


From f68635e627f9b21db05102e2d8fcd2894493d6bc Mon Sep 17 00:00:00 2001
From: Pavel Emelyanov <xemul@openvz.org>
Date: Sun, 2 Dec 2007 00:21:52 +1100
Subject: [IPV6]: Cleanup the addconf_sysctl_register

This only includes fixing the space-indented lines and
removing one unneeded else after the goto.

Signed-off-by: Pavel Emelyanov <xemul@openvz.org>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/addrconf.c | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

(limited to 'net')

diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index f177424c186..2d2886a0b66 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -4127,7 +4127,8 @@ static void addrconf_sysctl_register(struct inet6_dev *idev, struct ipv6_devconf
 
 	t = kmemdup(&addrconf_sysctl, sizeof(*t), GFP_KERNEL);
 	if (t == NULL)
-		return;
+		goto out;
+
 	for (i=0; t->addrconf_vars[i].data; i++) {
 		t->addrconf_vars[i].data += (char*)p - (char*)&ipv6_devconf;
 		t->addrconf_vars[i].extra1 = idev; /* embedded; no ref */
@@ -4147,7 +4148,7 @@ static void addrconf_sysctl_register(struct inet6_dev *idev, struct ipv6_devconf
 	 */
 	dev_name = kstrdup(dev_name, GFP_KERNEL);
 	if (!dev_name)
-	    goto free;
+		goto free;
 
 	t->addrconf_dev[0].procname = dev_name;
 
@@ -4159,16 +4160,15 @@ static void addrconf_sysctl_register(struct inet6_dev *idev, struct ipv6_devconf
 	t->sysctl_header = register_sysctl_table(t->addrconf_root_dir);
 	if (t->sysctl_header == NULL)
 		goto free_procname;
-	else
-		p->sysctl = t;
+
+	p->sysctl = t;
 	return;
 
-	/* error path */
- free_procname:
+free_procname:
 	kfree(dev_name);
- free:
+free:
 	kfree(t);
-
+out:
 	return;
 }
 
-- 
cgit v1.2.3


From ece8edddf067d21c4e5abfe3f1205da1588edbb2 Mon Sep 17 00:00:00 2001
From: Zhu Yi <yi.zhu@intel.com>
Date: Thu, 22 Nov 2007 10:53:21 +0800
Subject: mac80211: hardware scan rework

The scan code in mac80211 makes the software scan assumption in various
places. For example, we stop the Tx queue during a software scan so that
all the Tx packets will be queued by the stack. We also drop frames not
related to scan in the software scan process. But these are not true for
hardware scan.

Some wireless hardwares (for example iwl3945/4965) has the ability to
perform the whole scan process by hardware and/or firmware. The hardware
scan is relative powerful in that it tries to maintain normal network
traffic while doing a scan in the background. Some drivers (i.e iwlwifi)
do provide a way to tune the hardware scan parameters (for example if the
STA is associated, what's the max time could the STA leave from the
associated channel, how long the scans get suspended after returning to
the service channel, etc). But basically this is transparent to the
stack. mac80211 should not stop Tx queues or drop Rx packets during a
hardware scan.

This patch resolves the above problem by spliting the current scan
indicator local->sta_scanning into local->sta_sw_scanning and
local->sta_hw_scanning. It then changes the scan related code to be aware
of hardware scan or software scan in various places. With this patch,
iwlwifi performs much better in the scan-while-associated condition and
disable_hw_scan=1 should never be required.

Cc: Mohamed Abbas <mohamed.abbas@intel.com>
Cc: Ben Cahill <ben.m.cahill@intel.com>
Signed-off-by: Zhu Yi <yi.zhu@intel.com>
Acked-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/mac80211/ieee80211.c       | 13 +++++----
 net/mac80211/ieee80211_i.h     |  6 ++--
 net/mac80211/ieee80211_ioctl.c |  6 ++--
 net/mac80211/ieee80211_sta.c   | 66 ++++++++++++++++++++++++++++--------------
 net/mac80211/rx.c              | 12 ++++++--
 net/mac80211/tx.c              |  2 +-
 6 files changed, 70 insertions(+), 35 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/ieee80211.c b/net/mac80211/ieee80211.c
index 6378850d858..4f8b6653e36 100644
--- a/net/mac80211/ieee80211.c
+++ b/net/mac80211/ieee80211.c
@@ -350,11 +350,14 @@ static int ieee80211_stop(struct net_device *dev)
 		synchronize_rcu();
 		skb_queue_purge(&sdata->u.sta.skb_queue);
 
-		if (!local->ops->hw_scan &&
-		    local->scan_dev == sdata->dev) {
-			local->sta_scanning = 0;
-			cancel_delayed_work(&local->scan_work);
+		if (local->scan_dev == sdata->dev) {
+			if (!local->ops->hw_scan) {
+				local->sta_sw_scanning = 0;
+				cancel_delayed_work(&local->scan_work);
+			} else
+				local->sta_hw_scanning = 0;
 		}
+
 		flush_workqueue(local->hw.workqueue);
 
 		sdata->u.sta.flags &= ~IEEE80211_STA_PRIVACY_INVOKED;
@@ -526,7 +529,7 @@ int ieee80211_hw_config(struct ieee80211_local *local)
 	struct ieee80211_channel *chan;
 	int ret = 0;
 
-	if (local->sta_scanning) {
+	if (local->sta_sw_scanning) {
 		chan = local->scan_channel;
 		mode = local->scan_hw_mode;
 	} else {
diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index 2be7fcebac4..e9109443cb1 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -470,7 +470,8 @@ struct ieee80211_local {
 
 	struct list_head interfaces;
 
-	int sta_scanning;
+	bool sta_sw_scanning;
+	bool sta_hw_scanning;
 	int scan_channel_idx;
 	enum { SCAN_SET_CHANNEL, SCAN_SEND_PROBE } scan_state;
 	unsigned long last_scan_completed;
@@ -745,7 +746,8 @@ int ieee80211_sta_req_scan(struct net_device *dev, u8 *ssid, size_t ssid_len);
 void ieee80211_sta_req_auth(struct net_device *dev,
 			    struct ieee80211_if_sta *ifsta);
 int ieee80211_sta_scan_results(struct net_device *dev, char *buf, size_t len);
-void ieee80211_sta_rx_scan(struct net_device *dev, struct sk_buff *skb,
+ieee80211_txrx_result ieee80211_sta_rx_scan(struct net_device *dev,
+					    struct sk_buff *skb,
 			   struct ieee80211_rx_status *rx_status);
 void ieee80211_rx_bss_list_init(struct net_device *dev);
 void ieee80211_rx_bss_list_deinit(struct net_device *dev);
diff --git a/net/mac80211/ieee80211_ioctl.c b/net/mac80211/ieee80211_ioctl.c
index bbd9bc572a5..dc03bd796fe 100644
--- a/net/mac80211/ieee80211_ioctl.c
+++ b/net/mac80211/ieee80211_ioctl.c
@@ -315,7 +315,7 @@ int ieee80211_set_channel(struct ieee80211_local *local, int channel, int freq)
 	}
 
 	if (set) {
-		if (local->sta_scanning)
+		if (local->sta_sw_scanning)
 			ret = 0;
 		else
 			ret = ieee80211_hw_config(local);
@@ -545,8 +545,10 @@ static int ieee80211_ioctl_giwscan(struct net_device *dev,
 {
 	int res;
 	struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr);
-	if (local->sta_scanning)
+
+	if (local->sta_sw_scanning || local->sta_hw_scanning)
 		return -EAGAIN;
+
 	res = ieee80211_sta_scan_results(dev, extra, data->length);
 	if (res >= 0) {
 		data->length = res;
diff --git a/net/mac80211/ieee80211_sta.c b/net/mac80211/ieee80211_sta.c
index 4f9be2fb2bf..231a663ad91 100644
--- a/net/mac80211/ieee80211_sta.c
+++ b/net/mac80211/ieee80211_sta.c
@@ -1483,8 +1483,18 @@ static void ieee80211_rx_bss_info(struct net_device *dev,
 		u32 supp_rates, prev_rates;
 		int i, j;
 
-		mode = local->sta_scanning ?
+		mode = local->sta_sw_scanning ?
 		       local->scan_hw_mode : local->oper_hw_mode;
+
+		if (local->sta_hw_scanning) {
+			/* search for the correct mode matches the beacon */
+			list_for_each_entry(mode, &local->modes_list, list)
+				if (mode->mode == rx_status->phymode)
+					break;
+
+			if (mode == NULL)
+				mode = local->oper_hw_mode;
+		}
 		rates = mode->rates;
 		num_rates = mode->num_rates;
 
@@ -1867,31 +1877,39 @@ static void ieee80211_sta_rx_queued_mgmt(struct net_device *dev,
 }
 
 
-void ieee80211_sta_rx_scan(struct net_device *dev, struct sk_buff *skb,
-			   struct ieee80211_rx_status *rx_status)
+ieee80211_txrx_result
+ieee80211_sta_rx_scan(struct net_device *dev, struct sk_buff *skb,
+		      struct ieee80211_rx_status *rx_status)
 {
 	struct ieee80211_mgmt *mgmt;
 	u16 fc;
 
-	if (skb->len < 24) {
-		dev_kfree_skb(skb);
-		return;
-	}
+	if (skb->len < 2)
+		return TXRX_DROP;
 
 	mgmt = (struct ieee80211_mgmt *) skb->data;
 	fc = le16_to_cpu(mgmt->frame_control);
 
+	if ((fc & IEEE80211_FCTL_FTYPE) == IEEE80211_FTYPE_CTL)
+		return TXRX_CONTINUE;
+
+	if (skb->len < 24)
+		return TXRX_DROP;
+
 	if ((fc & IEEE80211_FCTL_FTYPE) == IEEE80211_FTYPE_MGMT) {
 		if ((fc & IEEE80211_FCTL_STYPE) == IEEE80211_STYPE_PROBE_RESP) {
 			ieee80211_rx_mgmt_probe_resp(dev, mgmt,
 						     skb->len, rx_status);
+			dev_kfree_skb(skb);
+			return TXRX_QUEUED;
 		} else if ((fc & IEEE80211_FCTL_STYPE) == IEEE80211_STYPE_BEACON) {
 			ieee80211_rx_mgmt_beacon(dev, mgmt, skb->len,
 						 rx_status);
+			dev_kfree_skb(skb);
+			return TXRX_QUEUED;
 		}
 	}
-
-	dev_kfree_skb(skb);
+	return TXRX_CONTINUE;
 }
 
 
@@ -1981,7 +1999,7 @@ void ieee80211_sta_work(struct work_struct *work)
 	if (!netif_running(dev))
 		return;
 
-	if (local->sta_scanning)
+	if (local->sta_sw_scanning || local->sta_hw_scanning)
 		return;
 
 	if (sdata->type != IEEE80211_IF_TYPE_STA &&
@@ -2639,9 +2657,15 @@ void ieee80211_scan_completed(struct ieee80211_hw *hw)
 	union iwreq_data wrqu;
 
 	local->last_scan_completed = jiffies;
-	wmb();
-	local->sta_scanning = 0;
+	memset(&wrqu, 0, sizeof(wrqu));
+	wireless_send_event(dev, SIOCGIWSCAN, &wrqu, NULL);
 
+	if (local->sta_hw_scanning) {
+		local->sta_hw_scanning = 0;
+		goto done;
+	}
+
+	local->sta_sw_scanning = 0;
 	if (ieee80211_hw_config(local))
 		printk(KERN_DEBUG "%s: failed to restore operational "
 		       "channel after scan\n", dev->name);
@@ -2657,9 +2681,6 @@ void ieee80211_scan_completed(struct ieee80211_hw *hw)
 
 	netif_tx_unlock_bh(local->mdev);
 
-	memset(&wrqu, 0, sizeof(wrqu));
-	wireless_send_event(dev, SIOCGIWSCAN, &wrqu, NULL);
-
 	rcu_read_lock();
 	list_for_each_entry_rcu(sdata, &local->interfaces, list) {
 
@@ -2677,6 +2698,7 @@ void ieee80211_scan_completed(struct ieee80211_hw *hw)
 	}
 	rcu_read_unlock();
 
+done:
 	sdata = IEEE80211_DEV_TO_SUB_IF(dev);
 	if (sdata->type == IEEE80211_IF_TYPE_IBSS) {
 		struct ieee80211_if_sta *ifsta = &sdata->u.sta;
@@ -2699,7 +2721,7 @@ void ieee80211_sta_scan_work(struct work_struct *work)
 	int skip;
 	unsigned long next_delay = 0;
 
-	if (!local->sta_scanning)
+	if (!local->sta_sw_scanning)
 		return;
 
 	switch (local->scan_state) {
@@ -2762,7 +2784,7 @@ void ieee80211_sta_scan_work(struct work_struct *work)
 		break;
 	}
 
-	if (local->sta_scanning)
+	if (local->sta_sw_scanning)
 		queue_delayed_work(local->hw.workqueue, &local->scan_work,
 				   next_delay);
 }
@@ -2794,7 +2816,7 @@ static int ieee80211_sta_start_scan(struct net_device *dev,
 	  * ResultCode: SUCCESS, INVALID_PARAMETERS
 	 */
 
-	if (local->sta_scanning) {
+	if (local->sta_sw_scanning || local->sta_hw_scanning) {
 		if (local->scan_dev == dev)
 			return 0;
 		return -EBUSY;
@@ -2802,15 +2824,15 @@ static int ieee80211_sta_start_scan(struct net_device *dev,
 
 	if (local->ops->hw_scan) {
 		int rc = local->ops->hw_scan(local_to_hw(local),
-					    ssid, ssid_len);
+					     ssid, ssid_len);
 		if (!rc) {
-			local->sta_scanning = 1;
+			local->sta_hw_scanning = 1;
 			local->scan_dev = dev;
 		}
 		return rc;
 	}
 
-	local->sta_scanning = 1;
+	local->sta_sw_scanning = 1;
 
 	rcu_read_lock();
 	list_for_each_entry_rcu(sdata, &local->interfaces, list) {
@@ -2865,7 +2887,7 @@ int ieee80211_sta_req_scan(struct net_device *dev, u8 *ssid, size_t ssid_len)
 	if (sdata->type != IEEE80211_IF_TYPE_STA)
 		return ieee80211_sta_start_scan(dev, ssid, ssid_len);
 
-	if (local->sta_scanning) {
+	if (local->sta_sw_scanning || local->sta_hw_scanning) {
 		if (local->scan_dev == dev)
 			return 0;
 		return -EBUSY;
diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index a7263fc476b..a26aa7f5049 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -338,8 +338,14 @@ ieee80211_rx_h_passive_scan(struct ieee80211_txrx_data *rx)
 	struct ieee80211_local *local = rx->local;
 	struct sk_buff *skb = rx->skb;
 
-	if (unlikely(local->sta_scanning != 0)) {
-		ieee80211_sta_rx_scan(rx->dev, skb, rx->u.rx.status);
+	if (unlikely(local->sta_hw_scanning))
+		return ieee80211_sta_rx_scan(rx->dev, skb, rx->u.rx.status);
+
+	if (unlikely(local->sta_sw_scanning)) {
+		/* drop all the other packets during a software scan anyway */
+		if (ieee80211_sta_rx_scan(rx->dev, skb, rx->u.rx.status)
+		    != TXRX_QUEUED)
+			dev_kfree_skb(skb);
 		return TXRX_QUEUED;
 	}
 
@@ -1499,7 +1505,7 @@ void __ieee80211_rx(struct ieee80211_hw *hw, struct sk_buff *skb,
 		goto end;
 	}
 
-	if (unlikely(local->sta_scanning))
+	if (unlikely(local->sta_sw_scanning || local->sta_hw_scanning))
 		rx.flags |= IEEE80211_TXRXD_RXIN_SCAN;
 
 	if (__ieee80211_invoke_rx_handlers(local, local->rx_pre_handlers, &rx,
diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index 9ccf4b5a9aa..50ab4b2de1e 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -225,7 +225,7 @@ ieee80211_tx_h_check_assoc(struct ieee80211_txrx_data *tx)
 	if (unlikely(tx->flags & IEEE80211_TXRXD_TX_INJECTED))
 		return TXRX_CONTINUE;
 
-	if (unlikely(tx->local->sta_scanning != 0) &&
+	if (unlikely(tx->local->sta_sw_scanning) &&
 	    ((tx->fc & IEEE80211_FCTL_FTYPE) != IEEE80211_FTYPE_MGMT ||
 	     (tx->fc & IEEE80211_FCTL_STYPE) != IEEE80211_STYPE_PROBE_REQ))
 		return TXRX_DROP;
-- 
cgit v1.2.3


From 76ee65bfaa1435320a72989a6413467ce446ae23 Mon Sep 17 00:00:00 2001
From: Ron Rindjunsky <ron.rindjunsky@intel.com>
Date: Thu, 22 Nov 2007 19:49:12 +0200
Subject: mac80211: restructuring data Rx handlers

This patch restructures the Rx handlers chain by incorporating previously
handlers ieee80211_rx_h_802_1x_pae and ieee80211_rx_h_drop_unencrypted
into ieee80211_rx_h_data, already in 802.3 form. this scheme follows more
precisely after the IEEE802.11 data plane archituecture, and will prevent
code duplication to IEEE8021.11n A-MSDU handler.

added function:
 - ieee80211_data_to_8023: transfering 802.11 data frames to 802.3 frame
 - ieee80211_deliver_skb: delivering the 802.3 frames to upper stack
eliminated handlers:
 - ieee80211_rx_h_drop_unencrypted: now function ieee80211_drop_unencrypted
 - ieee80211_rx_h_802_1x_pae: now function ieee80211_802_1x_pae
changed handlers:
 - ieee80211_rx_h_data: now contains calls to four above function

Signed-off-by: Ron Rindjunsky <ron.rindjunsky@intel.com>
Acked-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/mac80211/ieee80211_i.h |   2 +-
 net/mac80211/rx.c          | 124 ++++++++++++++++++++++++++++-----------------
 net/mac80211/tx.c          |  12 +++--
 net/mac80211/util.c        |  14 +----
 4 files changed, 86 insertions(+), 66 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index e9109443cb1..e2aed0b2d8b 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -792,7 +792,7 @@ extern void *mac80211_wiphy_privid; /* for wiphy privid */
 extern const unsigned char rfc1042_header[6];
 extern const unsigned char bridge_tunnel_header[6];
 u8 *ieee80211_get_bssid(struct ieee80211_hdr *hdr, size_t len);
-int ieee80211_is_eapol(const struct sk_buff *skb);
+int ieee80211_is_eapol(const struct sk_buff *skb, int hdrlen);
 int ieee80211_frame_duration(struct ieee80211_local *local, size_t len,
 			     int rate, int erp, int short_preamble);
 void mac80211_ev_michael_mic_failure(struct net_device *dev, int keyidx,
diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index a26aa7f5049..478d85563bd 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -962,68 +962,64 @@ ieee80211_rx_h_remove_qos_control(struct ieee80211_txrx_data *rx)
 	return TXRX_CONTINUE;
 }
 
-static ieee80211_txrx_result
-ieee80211_rx_h_802_1x_pae(struct ieee80211_txrx_data *rx)
+static int
+ieee80211_drop_802_1x_pae(struct ieee80211_txrx_data *rx, int hdrlen)
 {
-	if (rx->sdata->eapol && ieee80211_is_eapol(rx->skb) &&
+	if (rx->sdata->eapol && ieee80211_is_eapol(rx->skb, hdrlen) &&
 	    rx->sdata->type != IEEE80211_IF_TYPE_STA &&
 	    (rx->flags & IEEE80211_TXRXD_RXRA_MATCH))
-		return TXRX_CONTINUE;
+		return 0;
 
 	if (unlikely(rx->sdata->ieee802_1x &&
 		     (rx->fc & IEEE80211_FCTL_FTYPE) == IEEE80211_FTYPE_DATA &&
 		     (rx->fc & IEEE80211_FCTL_STYPE) != IEEE80211_STYPE_NULLFUNC &&
 		     (!rx->sta || !(rx->sta->flags & WLAN_STA_AUTHORIZED)) &&
-		     !ieee80211_is_eapol(rx->skb))) {
+		     !ieee80211_is_eapol(rx->skb, hdrlen))) {
 #ifdef CONFIG_MAC80211_DEBUG
-		struct ieee80211_hdr *hdr =
-			(struct ieee80211_hdr *) rx->skb->data;
-		DECLARE_MAC_BUF(mac);
-		printk(KERN_DEBUG "%s: dropped frame from %s"
-		       " (unauthorized port)\n", rx->dev->name,
-		       print_mac(mac, hdr->addr2));
+		printk(KERN_DEBUG "%s: dropped frame "
+		       "(unauthorized port)\n", rx->dev->name);
 #endif /* CONFIG_MAC80211_DEBUG */
-		return TXRX_DROP;
+		return -EACCES;
 	}
 
-	return TXRX_CONTINUE;
+	return 0;
 }
 
-static ieee80211_txrx_result
-ieee80211_rx_h_drop_unencrypted(struct ieee80211_txrx_data *rx)
+static int
+ieee80211_drop_unencrypted(struct ieee80211_txrx_data *rx, int hdrlen)
 {
 	/*
 	 * Pass through unencrypted frames if the hardware has
 	 * decrypted them already.
 	 */
 	if (rx->u.rx.status->flag & RX_FLAG_DECRYPTED)
-		return TXRX_CONTINUE;
+		return 0;
 
 	/* Drop unencrypted frames if key is set. */
 	if (unlikely(!(rx->fc & IEEE80211_FCTL_PROTECTED) &&
 		     (rx->fc & IEEE80211_FCTL_FTYPE) == IEEE80211_FTYPE_DATA &&
 		     (rx->fc & IEEE80211_FCTL_STYPE) != IEEE80211_STYPE_NULLFUNC &&
 		     (rx->key || rx->sdata->drop_unencrypted) &&
-		     (rx->sdata->eapol == 0 || !ieee80211_is_eapol(rx->skb)))) {
+		     (rx->sdata->eapol == 0 ||
+		      !ieee80211_is_eapol(rx->skb, hdrlen)))) {
 		if (net_ratelimit())
 			printk(KERN_DEBUG "%s: RX non-WEP frame, but expected "
 			       "encryption\n", rx->dev->name);
-		return TXRX_DROP;
+		return -EACCES;
 	}
-	return TXRX_CONTINUE;
+	return 0;
 }
 
-static ieee80211_txrx_result
-ieee80211_rx_h_data(struct ieee80211_txrx_data *rx)
+static int
+ieee80211_data_to_8023(struct ieee80211_txrx_data *rx)
 {
 	struct net_device *dev = rx->dev;
-	struct ieee80211_local *local = rx->local;
 	struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) rx->skb->data;
 	u16 fc, hdrlen, ethertype;
 	u8 *payload;
 	u8 dst[ETH_ALEN];
 	u8 src[ETH_ALEN];
-	struct sk_buff *skb = rx->skb, *skb2;
+	struct sk_buff *skb = rx->skb;
 	struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
 	DECLARE_MAC_BUF(mac);
 	DECLARE_MAC_BUF(mac2);
@@ -1031,11 +1027,9 @@ ieee80211_rx_h_data(struct ieee80211_txrx_data *rx)
 	DECLARE_MAC_BUF(mac4);
 
 	fc = rx->fc;
-	if (unlikely((fc & IEEE80211_FCTL_FTYPE) != IEEE80211_FTYPE_DATA))
-		return TXRX_CONTINUE;
 
 	if (unlikely(!WLAN_FC_DATA_PRESENT(fc)))
-		return TXRX_DROP;
+		return -1;
 
 	hdrlen = ieee80211_get_hdrlen(fc);
 
@@ -1064,7 +1058,7 @@ ieee80211_rx_h_data(struct ieee80211_txrx_data *rx)
 				       print_mac(mac, hdr->addr1),
 				       print_mac(mac2, hdr->addr2),
 				       print_mac(mac3, hdr->addr3));
-			return TXRX_DROP;
+			return -1;
 		}
 		break;
 	case (IEEE80211_FCTL_TODS | IEEE80211_FCTL_FROMDS):
@@ -1081,7 +1075,7 @@ ieee80211_rx_h_data(struct ieee80211_txrx_data *rx)
 				       print_mac(mac2, hdr->addr2),
 				       print_mac(mac3, hdr->addr3),
 				       print_mac(mac4, hdr->addr4));
-			return TXRX_DROP;
+			return -1;
 		}
 		break;
 	case IEEE80211_FCTL_FROMDS:
@@ -1092,7 +1086,7 @@ ieee80211_rx_h_data(struct ieee80211_txrx_data *rx)
 		if (sdata->type != IEEE80211_IF_TYPE_STA ||
 		    (is_multicast_ether_addr(dst) &&
 		     !compare_ether_addr(src, dev->dev_addr)))
-			return TXRX_DROP;
+			return -1;
 		break;
 	case 0:
 		/* DA SA BSSID */
@@ -1108,21 +1102,20 @@ ieee80211_rx_h_data(struct ieee80211_txrx_data *rx)
 				       print_mac(mac2, hdr->addr2),
 				       print_mac(mac3, hdr->addr3));
 			}
-			return TXRX_DROP;
+			return -1;
 		}
 		break;
 	}
 
-	payload = skb->data + hdrlen;
-
 	if (unlikely(skb->len - hdrlen < 8)) {
 		if (net_ratelimit()) {
 			printk(KERN_DEBUG "%s: RX too short data frame "
 			       "payload\n", dev->name);
 		}
-		return TXRX_DROP;
+		return -1;
 	}
 
+	payload = skb->data + hdrlen;
 	ethertype = (payload[6] << 8) | payload[7];
 
 	if (likely((compare_ether_addr(payload, rfc1042_header) == 0 &&
@@ -1143,12 +1136,19 @@ ieee80211_rx_h_data(struct ieee80211_txrx_data *rx)
 		memcpy(ehdr->h_source, src, ETH_ALEN);
 		ehdr->h_proto = len;
 	}
-	skb->dev = dev;
+	return 0;
+}
 
-	skb2 = NULL;
+static void
+ieee80211_deliver_skb(struct ieee80211_txrx_data *rx)
+{
+	struct net_device *dev = rx->dev;
+	struct ieee80211_local *local = rx->local;
+	struct sk_buff *skb, *xmit_skb;
+	struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
 
-	dev->stats.rx_packets++;
-	dev->stats.rx_bytes += skb->len;
+	skb = rx->skb;
+	xmit_skb = NULL;
 
 	if (local->bridge_packets && (sdata->type == IEEE80211_IF_TYPE_AP
 	    || sdata->type == IEEE80211_IF_TYPE_VLAN) &&
@@ -1156,8 +1156,8 @@ ieee80211_rx_h_data(struct ieee80211_txrx_data *rx)
 		if (is_multicast_ether_addr(skb->data)) {
 			/* send multicast frames both to higher layers in
 			 * local net stack and back to the wireless media */
-			skb2 = skb_copy(skb, GFP_ATOMIC);
-			if (!skb2 && net_ratelimit())
+			xmit_skb = skb_copy(skb, GFP_ATOMIC);
+			if (!xmit_skb && net_ratelimit())
 				printk(KERN_DEBUG "%s: failed to clone "
 				       "multicast frame\n", dev->name);
 		} else {
@@ -1172,7 +1172,7 @@ ieee80211_rx_h_data(struct ieee80211_txrx_data *rx)
 				 * AP, so send the frame directly to it and
 				 * do not pass the frame to local net stack.
 				 */
-				skb2 = skb;
+				xmit_skb = skb;
 				skb = NULL;
 			}
 			if (dsta)
@@ -1187,13 +1187,45 @@ ieee80211_rx_h_data(struct ieee80211_txrx_data *rx)
 		netif_rx(skb);
 	}
 
-	if (skb2) {
+	if (xmit_skb) {
 		/* send to wireless media */
-		skb2->protocol = __constant_htons(ETH_P_802_3);
-		skb_set_network_header(skb2, 0);
-		skb_set_mac_header(skb2, 0);
-		dev_queue_xmit(skb2);
+		xmit_skb->protocol = __constant_htons(ETH_P_802_3);
+		skb_set_network_header(xmit_skb, 0);
+		skb_set_mac_header(xmit_skb, 0);
+		dev_queue_xmit(xmit_skb);
 	}
+}
+
+static ieee80211_txrx_result
+ieee80211_rx_h_data(struct ieee80211_txrx_data *rx)
+{
+	struct net_device *dev = rx->dev;
+	u16 fc;
+	int err, hdrlen;
+
+	fc = rx->fc;
+	if (unlikely((fc & IEEE80211_FCTL_FTYPE) != IEEE80211_FTYPE_DATA))
+		return TXRX_CONTINUE;
+
+	if (unlikely(!WLAN_FC_DATA_PRESENT(fc)))
+		return TXRX_DROP;
+
+	hdrlen = ieee80211_get_hdrlen(fc);
+
+	if ((ieee80211_drop_802_1x_pae(rx, hdrlen)) ||
+	    (ieee80211_drop_unencrypted(rx, hdrlen)))
+		return TXRX_DROP;
+
+	err = ieee80211_data_to_8023(rx);
+	if (unlikely(err))
+		return TXRX_DROP;
+
+	rx->skb->dev = dev;
+
+	dev->stats.rx_packets++;
+	dev->stats.rx_bytes += rx->skb->len;
+
+	ieee80211_deliver_skb(rx);
 
 	return TXRX_QUEUED;
 }
@@ -1347,8 +1379,6 @@ ieee80211_rx_handler ieee80211_rx_handlers[] =
 	 * are not passed to user space by these functions
 	 */
 	ieee80211_rx_h_remove_qos_control,
-	ieee80211_rx_h_802_1x_pae,
-	ieee80211_rx_h_drop_unencrypted,
 	ieee80211_rx_h_data,
 	ieee80211_rx_h_mgmt,
 	NULL
diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index 50ab4b2de1e..12c15588af6 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -420,7 +420,6 @@ ieee80211_tx_h_unicast_ps_buf(struct ieee80211_txrx_data *tx)
 	return TXRX_CONTINUE;
 }
 
-
 static ieee80211_txrx_result
 ieee80211_tx_h_ps_buf(struct ieee80211_txrx_data *tx)
 {
@@ -433,13 +432,15 @@ ieee80211_tx_h_ps_buf(struct ieee80211_txrx_data *tx)
 		return ieee80211_tx_h_multicast_ps_buf(tx);
 }
 
-
-
-
 static ieee80211_txrx_result
 ieee80211_tx_h_select_key(struct ieee80211_txrx_data *tx)
 {
 	struct ieee80211_key *key;
+	const struct ieee80211_hdr *hdr;
+	u16 fc;
+
+	hdr = (const struct ieee80211_hdr *) tx->skb->data;
+	fc = le16_to_cpu(hdr->frame_control);
 
 	if (unlikely(tx->u.tx.control->flags & IEEE80211_TXCTL_DO_NOT_ENCRYPT))
 		tx->key = NULL;
@@ -448,7 +449,8 @@ ieee80211_tx_h_select_key(struct ieee80211_txrx_data *tx)
 	else if ((key = rcu_dereference(tx->sdata->default_key)))
 		tx->key = key;
 	else if (tx->sdata->drop_unencrypted &&
-		 !(tx->sdata->eapol && ieee80211_is_eapol(tx->skb))) {
+		 !(tx->sdata->eapol &&
+		   ieee80211_is_eapol(tx->skb, ieee80211_get_hdrlen(fc)))) {
 		I802_DEBUG_INC(tx->local->tx_handlers_drop_unencrypted);
 		return TXRX_DROP;
 	} else {
diff --git a/net/mac80211/util.c b/net/mac80211/util.c
index 88f262baaa5..5227c16f113 100644
--- a/net/mac80211/util.c
+++ b/net/mac80211/util.c
@@ -218,23 +218,11 @@ int ieee80211_get_hdrlen_from_skb(const struct sk_buff *skb)
 }
 EXPORT_SYMBOL(ieee80211_get_hdrlen_from_skb);
 
-int ieee80211_is_eapol(const struct sk_buff *skb)
+int ieee80211_is_eapol(const struct sk_buff *skb, int hdrlen)
 {
-	const struct ieee80211_hdr *hdr;
-	u16 fc;
-	int hdrlen;
-
 	if (unlikely(skb->len < 10))
 		return 0;
 
-	hdr = (const struct ieee80211_hdr *) skb->data;
-	fc = le16_to_cpu(hdr->frame_control);
-
-	if (unlikely(!WLAN_FC_DATA_PRESENT(fc)))
-		return 0;
-
-	hdrlen = ieee80211_get_hdrlen(fc);
-
 	if (unlikely(skb->len >= hdrlen + sizeof(eapol_header) &&
 		     memcmp(skb->data + hdrlen, eapol_header,
 			    sizeof(eapol_header)) == 0))
-- 
cgit v1.2.3


From e38bad4766a110b61fa6038f10be16ced8c6cc38 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Wed, 28 Nov 2007 10:55:32 +0100
Subject: mac80211: make ieee80211_iterate_active_interfaces not need rtnl

Interface iteration in mac80211 can be done without holding any
locks because I converted it to RCU. Initially, I thought this
wouldn't be needed for ieee80211_iterate_active_interfaces but
it's turning out that multi-BSS AP support can be much simpler
in a driver if ieee80211_iterate_active_interfaces can be called
without holding locks. This converts it to use RCU, it adds a
requirement that the callback it invokes cannot sleep.

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/mac80211/util.c | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/util.c b/net/mac80211/util.c
index 5227c16f113..7b278e9aa1a 100644
--- a/net/mac80211/util.c
+++ b/net/mac80211/util.c
@@ -482,10 +482,9 @@ void ieee80211_iterate_active_interfaces(struct ieee80211_hw *hw,
 	struct ieee80211_local *local = hw_to_local(hw);
 	struct ieee80211_sub_if_data *sdata;
 
-	ASSERT_RTNL();
+	rcu_read_lock();
 
-	/* we hold the RTNL here so can safely walk the list */
-	list_for_each_entry(sdata, &local->interfaces, list) {
+	list_for_each_entry_rcu(sdata, &local->interfaces, list) {
 		switch (sdata->type) {
 		case IEEE80211_IF_TYPE_INVALID:
 		case IEEE80211_IF_TYPE_MNTR:
@@ -503,5 +502,7 @@ void ieee80211_iterate_active_interfaces(struct ieee80211_hw *hw,
 			iterator(data, sdata->dev->dev_addr,
 				 sdata->dev->ifindex);
 	}
+
+	rcu_read_unlock();
 }
 EXPORT_SYMBOL_GPL(ieee80211_iterate_active_interfaces);
-- 
cgit v1.2.3


From b1357a81a9ba1cf173747a0e50c24795640327f5 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Wed, 28 Nov 2007 11:04:21 +0100
Subject: mac80211: allow setting drop_unencrypted with wext

This patch allows wpa_supplicant to set the drop_unencrypted setting in
mac80211.

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/mac80211/ieee80211_ioctl.c | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'net')

diff --git a/net/mac80211/ieee80211_ioctl.c b/net/mac80211/ieee80211_ioctl.c
index dc03bd796fe..161f3bdec41 100644
--- a/net/mac80211/ieee80211_ioctl.c
+++ b/net/mac80211/ieee80211_ioctl.c
@@ -930,6 +930,9 @@ static int ieee80211_ioctl_siwauth(struct net_device *dev,
 	case IW_AUTH_RX_UNENCRYPTED_EAPOL:
 	case IW_AUTH_KEY_MGMT:
 		break;
+	case IW_AUTH_DROP_UNENCRYPTED:
+		sdata->drop_unencrypted = !!data->value;
+		break;
 	case IW_AUTH_PRIVACY_INVOKED:
 		if (sdata->type != IEEE80211_IF_TYPE_STA)
 			ret = -EINVAL;
-- 
cgit v1.2.3


From 82b3cad942ebf1f64798e6ec3c46c277822e1fce Mon Sep 17 00:00:00 2001
From: Ron Rindjunsky <ron.rindjunsky@intel.com>
Date: Sun, 16 Dec 2007 16:09:26 -0800
Subject: mac80211: adding MAC80211_HT_DEBUG config variable

This patch adds MAC80211_HT_DEBUG config variable
to separate HT debug features

Signed-off-by: Ron Rindjunsky <ron.rindjunsky@intel.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/mac80211/Kconfig | 10 ++++++++++
 1 file changed, 10 insertions(+)

(limited to 'net')

diff --git a/net/mac80211/Kconfig b/net/mac80211/Kconfig
index ce176e691af..09711b06280 100644
--- a/net/mac80211/Kconfig
+++ b/net/mac80211/Kconfig
@@ -51,6 +51,16 @@ config MAC80211_DEBUG
 	  If you are not trying to debug or develop the ieee80211
 	  subsystem, you most likely want to say N here.
 
+config MAC80211_HT_DEBUG
+       bool "Enable HT debugging output"
+       depends on MAC80211_DEBUG
+       ---help---
+       This option enables 802.11n High Throughput features
+       debug tracing output.
+
+       If you are not trying to debug of develop the ieee80211
+       subsystem, you most likely want to say N here.
+
 config MAC80211_VERBOSE_DEBUG
 	bool "Verbose debugging output"
 	depends on MAC80211_DEBUG
-- 
cgit v1.2.3


From 10816d40f2e9500057cb46d7608a362a1d10bb9b Mon Sep 17 00:00:00 2001
From: Ron Rindjunsky <ron.rindjunsky@intel.com>
Date: Mon, 26 Nov 2007 16:14:30 +0200
Subject: mac80211: adding 802.11n HT framework definitions

New structures:
 - ieee80211_ht_info: describing STA's HT capabilities
 - ieee80211_ht_bss_info: describing BSS's HT characteristics
Changed structures:
 - ieee80211_hw_mode: now also holds PHY HT capabilities for each HW mode
 - ieee80211_conf: ht_conf holds current self HT configuration
                   ht_bss_conf holds current BSS HT configuration
 - flag IEEE80211_CONF_SUPPORT_HT_MODE added to indicate if HT use is
   desired
 - sta_info: now also holds Peer's HT capabilities

Signed-off-by: Ron Rindjunsky <ron.rindjunsky@intel.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/mac80211/sta_info.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'net')

diff --git a/net/mac80211/sta_info.h b/net/mac80211/sta_info.h
index 8f7ebe41c02..e1a4ac1d48d 100644
--- a/net/mac80211/sta_info.h
+++ b/net/mac80211/sta_info.h
@@ -99,6 +99,9 @@ struct sta_info {
 
 	u16 listen_interval;
 
+	struct ieee80211_ht_info ht_info; /* 802.11n HT capabilities
+					     of this STA */
+
 #ifdef CONFIG_MAC80211_DEBUGFS
 	struct sta_info_debugfsdentries {
 		struct dentry *dir;
-- 
cgit v1.2.3


From c715350828b12ce3b29e655fec7a7d6b22245d00 Mon Sep 17 00:00:00 2001
From: Ron Rindjunsky <ron.rindjunsky@intel.com>
Date: Mon, 26 Nov 2007 16:14:31 +0200
Subject: mac80211: adding 802.11n IEs handling

This patch presents the ability to parse and compose HT IEs, and to put
the IE relevant data inside the mac80211's internal HT structures

Signed-off-by: Ron Rindjunsky <ron.rindjunsky@intel.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/mac80211/ieee80211_i.h   |  8 +++-
 net/mac80211/ieee80211_sta.c | 89 +++++++++++++++++++++++++++++++++++++++++++-
 2 files changed, 94 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index e2aed0b2d8b..982f9967299 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -89,6 +89,8 @@ struct ieee80211_sta_bss {
 	size_t rsn_ie_len;
 	u8 *wmm_ie;
 	size_t wmm_ie_len;
+	u8 *ht_ie;
+	size_t ht_ie_len;
 #define IEEE80211_MAX_SUPP_RATES 32
 	u8 supp_rates[IEEE80211_MAX_SUPP_RATES];
 	size_t supp_rates_len;
@@ -759,7 +761,11 @@ int ieee80211_sta_deauthenticate(struct net_device *dev, u16 reason);
 int ieee80211_sta_disassociate(struct net_device *dev, u16 reason);
 void ieee80211_erp_info_change_notify(struct net_device *dev, u8 changes);
 void ieee80211_reset_erp_info(struct net_device *dev);
-
+int ieee80211_ht_cap_ie_to_ht_info(struct ieee80211_ht_cap *ht_cap_ie,
+				   struct ieee80211_ht_info *ht_info);
+int ieee80211_ht_addt_info_ie_to_ht_bss_info(
+			struct ieee80211_ht_addt_info *ht_add_info_ie,
+			struct ieee80211_ht_bss_info *bss_info);
 /* ieee80211_iface.c */
 int ieee80211_if_add(struct net_device *dev, const char *name,
 		     struct net_device **new_dev, int type);
diff --git a/net/mac80211/ieee80211_sta.c b/net/mac80211/ieee80211_sta.c
index 231a663ad91..1f47afeb925 100644
--- a/net/mac80211/ieee80211_sta.c
+++ b/net/mac80211/ieee80211_sta.c
@@ -90,7 +90,8 @@ struct ieee802_11_elems {
 	u8 *ext_supp_rates;
 	u8 *wmm_info;
 	u8 *wmm_param;
-
+	u8 *ht_cap_elem;
+	u8 *ht_info_elem;
 	/* length of them, respectively */
 	u8 ssid_len;
 	u8 supp_rates_len;
@@ -106,6 +107,8 @@ struct ieee802_11_elems {
 	u8 ext_supp_rates_len;
 	u8 wmm_info_len;
 	u8 wmm_param_len;
+	u8 ht_cap_elem_len;
+	u8 ht_info_elem_len;
 };
 
 static void ieee802_11_parse_elems(u8 *start, size_t len,
@@ -190,6 +193,14 @@ static void ieee802_11_parse_elems(u8 *start, size_t len,
 			elems->ext_supp_rates = pos;
 			elems->ext_supp_rates_len = elen;
 			break;
+		case WLAN_EID_HT_CAPABILITY:
+			elems->ht_cap_elem = pos;
+			elems->ht_cap_elem_len = elen;
+			break;
+		case WLAN_EID_HT_EXTRA_INFO:
+			elems->ht_info_elem = pos;
+			elems->ht_info_elem_len = elen;
+			break;
 		default:
 			break;
 		}
@@ -332,6 +343,51 @@ static void ieee80211_handle_erp_ie(struct net_device *dev, u8 erp_value)
 		ieee80211_erp_info_change_notify(dev, changes);
 }
 
+int ieee80211_ht_cap_ie_to_ht_info(struct ieee80211_ht_cap *ht_cap_ie,
+				   struct ieee80211_ht_info *ht_info)
+{
+
+	if (ht_info == NULL)
+		return -EINVAL;
+
+	memset(ht_info, 0, sizeof(*ht_info));
+
+	if (ht_cap_ie) {
+		u8 ampdu_info = ht_cap_ie->ampdu_params_info;
+
+		ht_info->ht_supported = 1;
+		ht_info->cap = le16_to_cpu(ht_cap_ie->cap_info);
+		ht_info->ampdu_factor =
+			ampdu_info & IEEE80211_HT_CAP_AMPDU_FACTOR;
+		ht_info->ampdu_density =
+			(ampdu_info & IEEE80211_HT_CAP_AMPDU_DENSITY) >> 2;
+		memcpy(ht_info->supp_mcs_set, ht_cap_ie->supp_mcs_set, 16);
+	} else
+		ht_info->ht_supported = 0;
+
+	return 0;
+}
+
+int ieee80211_ht_addt_info_ie_to_ht_bss_info(
+			struct ieee80211_ht_addt_info *ht_add_info_ie,
+			struct ieee80211_ht_bss_info *bss_info)
+{
+	if (bss_info == NULL)
+		return -EINVAL;
+
+	memset(bss_info, 0, sizeof(*bss_info));
+
+	if (ht_add_info_ie) {
+		u16 op_mode;
+		op_mode = le16_to_cpu(ht_add_info_ie->operation_mode);
+
+		bss_info->primary_channel = ht_add_info_ie->control_chan;
+		bss_info->bss_cap = ht_add_info_ie->ht_param;
+		bss_info->bss_op_mode = (u8)(op_mode & 0xff);
+	}
+
+	return 0;
+}
 
 static void ieee80211_sta_send_associnfo(struct net_device *dev,
 					 struct ieee80211_if_sta *ifsta)
@@ -630,6 +686,19 @@ static void ieee80211_send_assoc(struct net_device *dev,
 		*pos++ = 1; /* WME ver */
 		*pos++ = 0;
 	}
+	/* wmm support is a must to HT */
+	if (wmm && mode->ht_info.ht_supported) {
+		__le16 tmp = cpu_to_le16(mode->ht_info.cap);
+		pos = skb_put(skb, sizeof(struct ieee80211_ht_cap)+2);
+		*pos++ = WLAN_EID_HT_CAPABILITY;
+		*pos++ = sizeof(struct ieee80211_ht_cap);
+		memset(pos, 0, sizeof(struct ieee80211_ht_cap));
+		memcpy(pos, &tmp, sizeof(u16));
+		pos += sizeof(u16);
+		*pos++ = (mode->ht_info.ampdu_factor |
+				(mode->ht_info.ampdu_density << 2));
+		memcpy(pos, mode->ht_info.supp_mcs_set, 16);
+	}
 
 	kfree(ifsta->assocreq_ies);
 	ifsta->assocreq_ies_len = (skb->data + skb->len) - ies;
@@ -1380,6 +1449,7 @@ static void ieee80211_rx_bss_free(struct ieee80211_sta_bss *bss)
 	kfree(bss->wpa_ie);
 	kfree(bss->rsn_ie);
 	kfree(bss->wmm_ie);
+	kfree(bss->ht_ie);
 	kfree(bss);
 }
 
@@ -1637,7 +1707,22 @@ static void ieee80211_rx_bss_info(struct net_device *dev,
 		bss->wmm_ie = NULL;
 		bss->wmm_ie_len = 0;
 	}
-
+	if (elems.ht_cap_elem &&
+	    (!bss->ht_ie || bss->ht_ie_len != elems.ht_cap_elem_len ||
+	     memcmp(bss->ht_ie, elems.ht_cap_elem, elems.ht_cap_elem_len))) {
+		kfree(bss->ht_ie);
+		bss->ht_ie = kmalloc(elems.ht_cap_elem_len + 2, GFP_ATOMIC);
+		if (bss->ht_ie) {
+			memcpy(bss->ht_ie, elems.ht_cap_elem - 2,
+			       elems.ht_cap_elem_len + 2);
+			bss->ht_ie_len = elems.ht_cap_elem_len + 2;
+		} else
+			bss->ht_ie_len = 0;
+	} else if (!elems.ht_cap_elem && bss->ht_ie) {
+		kfree(bss->ht_ie);
+		bss->ht_ie = NULL;
+		bss->ht_ie_len = 0;
+	}
 
 	bss->hw_mode = rx_status->phymode;
 	bss->freq = rx_status->freq;
-- 
cgit v1.2.3


From 9f985b0eee4070e494b9d62313da982cfef9b5e3 Mon Sep 17 00:00:00 2001
From: Ron Rindjunsky <ron.rindjunsky@intel.com>
Date: Mon, 26 Nov 2007 16:14:32 +0200
Subject: mac80211: adding 802.11n essential A-MPDU addBA capability

This patch adds the capability to identify and answer an add block ACK
request.
As this series of patches only adds HT handling with no aggregations,
(A-MPDU aggregations acceptance is not obligatory according to 802.11n
draft) we are currently sending back a refusal upon this request.

Signed-off-by: Ron Rindjunsky <ron.rindjunsky@intel.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/mac80211/ieee80211_sta.c | 124 +++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 124 insertions(+)

(limited to 'net')

diff --git a/net/mac80211/ieee80211_sta.c b/net/mac80211/ieee80211_sta.c
index 1f47afeb925..1d553d78b22 100644
--- a/net/mac80211/ieee80211_sta.c
+++ b/net/mac80211/ieee80211_sta.c
@@ -57,6 +57,13 @@
 
 #define ERP_INFO_USE_PROTECTION BIT(1)
 
+/* mgmt header + 1 byte action code */
+#define IEEE80211_MIN_ACTION_SIZE (24 + 1)
+
+#define IEEE80211_ADDBA_PARAM_POLICY_MASK 0x0002
+#define IEEE80211_ADDBA_PARAM_TID_MASK 0x003C
+#define IEEE80211_ADDBA_PARAM_BUF_SIZE_MASK 0xFFA0
+
 static void ieee80211_send_probe_req(struct net_device *dev, u8 *dst,
 				     u8 *ssid, size_t ssid_len);
 static struct ieee80211_sta_bss *
@@ -987,6 +994,91 @@ static void ieee80211_auth_challenge(struct net_device *dev,
 			    elems.challenge_len + 2, 1);
 }
 
+static void ieee80211_send_addba_resp(struct net_device *dev, u8 *da, u16 tid,
+					u8 dialog_token, u16 status, u16 policy,
+					u16 buf_size, u16 timeout)
+{
+	struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
+	struct ieee80211_if_sta *ifsta = &sdata->u.sta;
+	struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr);
+	struct sk_buff *skb;
+	struct ieee80211_mgmt *mgmt;
+	u16 capab;
+
+	skb = dev_alloc_skb(sizeof(*mgmt) + local->hw.extra_tx_headroom);
+	if (!skb) {
+		printk(KERN_DEBUG "%s: failed to allocate buffer "
+		       "for addba resp frame\n", dev->name);
+		return;
+	}
+
+	skb_reserve(skb, local->hw.extra_tx_headroom);
+	mgmt = (struct ieee80211_mgmt *) skb_put(skb, 24);
+	memset(mgmt, 0, 24);
+	memcpy(mgmt->da, da, ETH_ALEN);
+	memcpy(mgmt->sa, dev->dev_addr, ETH_ALEN);
+	if (sdata->type == IEEE80211_IF_TYPE_AP)
+		memcpy(mgmt->bssid, dev->dev_addr, ETH_ALEN);
+	else
+		memcpy(mgmt->bssid, ifsta->bssid, ETH_ALEN);
+	mgmt->frame_control = IEEE80211_FC(IEEE80211_FTYPE_MGMT,
+					   IEEE80211_STYPE_ACTION);
+
+	skb_put(skb, 1 + sizeof(mgmt->u.action.u.addba_resp));
+	mgmt->u.action.category = WLAN_CATEGORY_BACK;
+	mgmt->u.action.u.addba_resp.action_code = WLAN_ACTION_ADDBA_RESP;
+	mgmt->u.action.u.addba_resp.dialog_token = dialog_token;
+
+	capab = (u16)(policy << 1);	/* bit 1 aggregation policy */
+	capab |= (u16)(tid << 2); 	/* bit 5:2 TID number */
+	capab |= (u16)(buf_size << 6);	/* bit 15:6 max size of aggregation */
+
+	mgmt->u.action.u.addba_resp.capab = cpu_to_le16(capab);
+	mgmt->u.action.u.addba_resp.timeout = cpu_to_le16(timeout);
+	mgmt->u.action.u.addba_resp.status = cpu_to_le16(status);
+
+	ieee80211_sta_tx(dev, skb, 0);
+
+	return;
+}
+
+static void ieee80211_sta_process_addba_request(struct net_device *dev,
+						struct ieee80211_mgmt *mgmt,
+						size_t len)
+{
+	struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr);
+	struct sta_info *sta;
+	u16 capab, tid, timeout, ba_policy, buf_size, status;
+	u8 dialog_token;
+
+	sta = sta_info_get(local, mgmt->sa);
+	if (!sta)
+		return;
+
+	/* extract session parameters from addba request frame */
+	dialog_token = mgmt->u.action.u.addba_req.dialog_token;
+	timeout = le16_to_cpu(mgmt->u.action.u.addba_req.timeout);
+
+	capab = le16_to_cpu(mgmt->u.action.u.addba_req.capab);
+	ba_policy = (capab & IEEE80211_ADDBA_PARAM_POLICY_MASK) >> 1;
+	tid = (capab & IEEE80211_ADDBA_PARAM_TID_MASK) >> 2;
+	buf_size = (capab & IEEE80211_ADDBA_PARAM_BUF_SIZE_MASK) >> 6;
+
+	/* TODO - currently aggregation is declined (A-MPDU add BA request
+	* acceptance is not obligatory by 802.11n draft), but here is
+	* the entry point for dealing with it */
+#ifdef MAC80211_HT_DEBUG
+	if (net_ratelimit())
+		printk(KERN_DEBUG "Add Block Ack request arrived,"
+				   " currently denying it\n");
+#endif /* MAC80211_HT_DEBUG */
+
+	status = WLAN_STATUS_REQUEST_DECLINED;
+
+	ieee80211_send_addba_resp(sta->dev, sta->addr, tid, dialog_token,
+				status, 1, buf_size, timeout);
+	sta_info_put(sta);
+}
 
 static void ieee80211_rx_mgmt_auth(struct net_device *dev,
 				   struct ieee80211_if_sta *ifsta,
@@ -1870,6 +1962,34 @@ static void ieee80211_rx_mgmt_probe_req(struct net_device *dev,
 	ieee80211_sta_tx(dev, skb, 0);
 }
 
+void ieee80211_rx_mgmt_action(struct net_device *dev,
+			     struct ieee80211_if_sta *ifsta,
+			     struct ieee80211_mgmt *mgmt,
+			     size_t len)
+{
+	if (len < IEEE80211_MIN_ACTION_SIZE)
+		return;
+
+	switch (mgmt->u.action.category) {
+	case WLAN_CATEGORY_BACK:
+		switch (mgmt->u.action.u.addba_req.action_code) {
+		case WLAN_ACTION_ADDBA_REQ:
+			if (len < (IEEE80211_MIN_ACTION_SIZE +
+				   sizeof(mgmt->u.action.u.addba_req)))
+				break;
+			ieee80211_sta_process_addba_request(dev, mgmt, len);
+			break;
+		default:
+			if (net_ratelimit())
+			   printk(KERN_DEBUG "%s: received unsupported BACK\n",
+					dev->name);
+			break;
+		}
+		break;
+	default:
+		break;
+	}
+}
 
 void ieee80211_sta_rx_mgmt(struct net_device *dev, struct sk_buff *skb,
 			   struct ieee80211_rx_status *rx_status)
@@ -1899,6 +2019,7 @@ void ieee80211_sta_rx_mgmt(struct net_device *dev, struct sk_buff *skb,
 	case IEEE80211_STYPE_REASSOC_RESP:
 	case IEEE80211_STYPE_DEAUTH:
 	case IEEE80211_STYPE_DISASSOC:
+	case IEEE80211_STYPE_ACTION:
 		skb_queue_tail(&ifsta->skb_queue, skb);
 		queue_work(local->hw.workqueue, &ifsta->work);
 		return;
@@ -1956,6 +2077,9 @@ static void ieee80211_sta_rx_queued_mgmt(struct net_device *dev,
 	case IEEE80211_STYPE_DISASSOC:
 		ieee80211_rx_mgmt_disassoc(dev, ifsta, mgmt, skb->len);
 		break;
+	case IEEE80211_STYPE_ACTION:
+		ieee80211_rx_mgmt_action(dev, ifsta, mgmt, skb->len);
+		break;
 	}
 
 	kfree_skb(skb);
-- 
cgit v1.2.3


From fd4c7f2fce1737105208c564e1458c885918982d Mon Sep 17 00:00:00 2001
From: Ron Rindjunsky <ron.rindjunsky@intel.com>
Date: Mon, 26 Nov 2007 16:14:33 +0200
Subject: mac80211: adding 802.11n essential A-MSDU Rx capability

This patch adds the ability to receive and handle A-MSDU frames.

Signed-off-by: Ron Rindjunsky <ron.rindjunsky@intel.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/mac80211/ieee80211_i.h |   1 +
 net/mac80211/rx.c          | 124 +++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 125 insertions(+)

(limited to 'net')

diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index 982f9967299..66b6cf3a62a 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -155,6 +155,7 @@ struct ieee80211_txrx_data {
 			int load;
 			u32 tkip_iv32;
 			u16 tkip_iv16;
+			u8  amsdu_frame;
 		} rx;
 	} u;
 };
diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index 478d85563bd..4525d7332c8 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -243,6 +243,10 @@ ieee80211_rx_h_parse_qos(struct ieee80211_txrx_data *rx)
 		u8 *qc = data + ieee80211_get_hdrlen(rx->fc) - QOS_CONTROL_LEN;
 		/* frame has qos control */
 		tid = qc[0] & QOS_CONTROL_TID_MASK;
+		if (qc[0] & IEEE80211_QOS_CONTROL_A_MSDU_PRESENT)
+			rx->u.rx.amsdu_frame = 1;
+		else
+			rx->u.rx.amsdu_frame = 0;
 	} else {
 		if (unlikely((rx->fc & IEEE80211_FCTL_FTYPE) == IEEE80211_FTYPE_MGMT)) {
 			/* Separate TID for management frames */
@@ -1196,6 +1200,125 @@ ieee80211_deliver_skb(struct ieee80211_txrx_data *rx)
 	}
 }
 
+static ieee80211_txrx_result
+ieee80211_rx_h_amsdu(struct ieee80211_txrx_data *rx)
+{
+	struct net_device *dev = rx->dev;
+	struct ieee80211_local *local = rx->local;
+	u16 fc, ethertype;
+	u8 *payload;
+	struct sk_buff *skb = rx->skb, *frame = NULL;
+	const struct ethhdr *eth;
+	int remaining, err;
+	u8 dst[ETH_ALEN];
+	u8 src[ETH_ALEN];
+	DECLARE_MAC_BUF(mac);
+
+	fc = rx->fc;
+	if (unlikely((fc & IEEE80211_FCTL_FTYPE) != IEEE80211_FTYPE_DATA))
+		return TXRX_CONTINUE;
+
+	if (unlikely(!WLAN_FC_DATA_PRESENT(fc)))
+		return TXRX_DROP;
+
+	if (!rx->u.rx.amsdu_frame)
+		return TXRX_CONTINUE;
+
+	err = ieee80211_data_to_8023(rx);
+	if (unlikely(err))
+		return TXRX_DROP;
+
+	skb->dev = dev;
+
+	dev->stats.rx_packets++;
+	dev->stats.rx_bytes += skb->len;
+
+	/* skip the wrapping header */
+	eth = (struct ethhdr *) skb_pull(skb, sizeof(struct ethhdr));
+	if (!eth)
+		return TXRX_DROP;
+
+	while (skb != frame) {
+		u8 padding;
+		__be16 len = eth->h_proto;
+		unsigned int subframe_len = sizeof(struct ethhdr) + ntohs(len);
+
+		remaining = skb->len;
+		memcpy(dst, eth->h_dest, ETH_ALEN);
+		memcpy(src, eth->h_source, ETH_ALEN);
+
+		padding = ((4 - subframe_len) & 0x3);
+		/* the last MSDU has no padding */
+		if (subframe_len > remaining) {
+			printk(KERN_DEBUG "%s: wrong buffer size", dev->name);
+			return TXRX_DROP;
+		}
+
+		skb_pull(skb, sizeof(struct ethhdr));
+		/* if last subframe reuse skb */
+		if (remaining <= subframe_len + padding)
+			frame = skb;
+		else {
+			frame = dev_alloc_skb(local->hw.extra_tx_headroom +
+					      subframe_len);
+
+			if (frame == NULL)
+				return TXRX_DROP;
+
+			skb_reserve(frame, local->hw.extra_tx_headroom +
+				    sizeof(struct ethhdr));
+			memcpy(skb_put(frame, ntohs(len)), skb->data,
+				ntohs(len));
+
+			eth = (struct ethhdr *) skb_pull(skb, ntohs(len) +
+							padding);
+			if (!eth) {
+				printk(KERN_DEBUG "%s: wrong buffer size ",
+				       dev->name);
+				dev_kfree_skb(frame);
+				return TXRX_DROP;
+			}
+		}
+
+		skb_set_network_header(frame, 0);
+		frame->dev = dev;
+		frame->priority = skb->priority;
+		rx->skb = frame;
+
+		if ((ieee80211_drop_802_1x_pae(rx, 0)) ||
+		    (ieee80211_drop_unencrypted(rx, 0))) {
+			if (skb == frame) /* last frame */
+				return TXRX_DROP;
+			dev_kfree_skb(frame);
+			continue;
+		}
+
+		payload = frame->data;
+		ethertype = (payload[6] << 8) | payload[7];
+
+		if (likely((compare_ether_addr(payload, rfc1042_header) == 0 &&
+			ethertype != ETH_P_AARP && ethertype != ETH_P_IPX) ||
+			compare_ether_addr(payload,
+					   bridge_tunnel_header) == 0)) {
+			/* remove RFC1042 or Bridge-Tunnel
+			 * encapsulation and replace EtherType */
+			skb_pull(frame, 6);
+			memcpy(skb_push(frame, ETH_ALEN), src, ETH_ALEN);
+			memcpy(skb_push(frame, ETH_ALEN), dst, ETH_ALEN);
+		} else {
+			memcpy(skb_push(frame, sizeof(__be16)), &len,
+				sizeof(__be16));
+			memcpy(skb_push(frame, ETH_ALEN), src, ETH_ALEN);
+			memcpy(skb_push(frame, ETH_ALEN), dst, ETH_ALEN);
+		}
+
+
+		ieee80211_deliver_skb(rx);
+	}
+
+	return TXRX_QUEUED;
+}
+
 static ieee80211_txrx_result
 ieee80211_rx_h_data(struct ieee80211_txrx_data *rx)
 {
@@ -1379,6 +1502,7 @@ ieee80211_rx_handler ieee80211_rx_handlers[] =
 	 * are not passed to user space by these functions
 	 */
 	ieee80211_rx_h_remove_qos_control,
+	ieee80211_rx_h_amsdu,
 	ieee80211_rx_h_data,
 	ieee80211_rx_h_mgmt,
 	NULL
-- 
cgit v1.2.3


From d3c990fb26b78f60614885d9ecaf7b7686b7b098 Mon Sep 17 00:00:00 2001
From: Ron Rindjunsky <ron.rindjunsky@intel.com>
Date: Mon, 26 Nov 2007 16:14:34 +0200
Subject: mac80211: adding 802.11n configuration flows

This patch configures the 802.11n mode of operation
internally in ieee80211_conf structure and in the low-level
driver as well (through op conf_ht).
It does not include AP configuration flows.

Signed-off-by: Ron Rindjunsky <ron.rindjunsky@intel.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/mac80211/ieee80211.c     | 51 ++++++++++++++++++++++++++++++++++++++++++++
 net/mac80211/ieee80211_i.h   |  3 +++
 net/mac80211/ieee80211_sta.c | 32 +++++++++++++++++++++++++++
 3 files changed, 86 insertions(+)

(limited to 'net')

diff --git a/net/mac80211/ieee80211.c b/net/mac80211/ieee80211.c
index 4f8b6653e36..d6a97a68a62 100644
--- a/net/mac80211/ieee80211.c
+++ b/net/mac80211/ieee80211.c
@@ -34,6 +34,8 @@
 #include "debugfs.h"
 #include "debugfs_netdev.h"
 
+#define SUPP_MCS_SET_LEN 16
+
 /*
  * For seeing transmitted packets on monitor interfaces
  * we have a radiotap header too.
@@ -563,6 +565,55 @@ int ieee80211_hw_config(struct ieee80211_local *local)
 	return ret;
 }
 
+/**
+ * ieee80211_hw_config_ht should be used only after legacy configuration
+ * has been determined, as ht configuration depends upon the hardware's
+ * HT abilities for a _specific_ band.
+ */
+int ieee80211_hw_config_ht(struct ieee80211_local *local, int enable_ht,
+			   struct ieee80211_ht_info *req_ht_cap,
+			   struct ieee80211_ht_bss_info *req_bss_cap)
+{
+	struct ieee80211_conf *conf = &local->hw.conf;
+	struct ieee80211_hw_mode *mode = conf->mode;
+	int i;
+
+	/* HT is not supported */
+	if (!mode->ht_info.ht_supported) {
+		conf->flags &= ~IEEE80211_CONF_SUPPORT_HT_MODE;
+		return -EOPNOTSUPP;
+	}
+
+	/* disable HT */
+	if (!enable_ht) {
+		conf->flags &= ~IEEE80211_CONF_SUPPORT_HT_MODE;
+	} else {
+		conf->flags |= IEEE80211_CONF_SUPPORT_HT_MODE;
+		conf->ht_conf.cap = req_ht_cap->cap & mode->ht_info.cap;
+		conf->ht_conf.cap &= ~(IEEE80211_HT_CAP_MIMO_PS);
+		conf->ht_conf.cap |=
+			mode->ht_info.cap & IEEE80211_HT_CAP_MIMO_PS;
+		conf->ht_bss_conf.primary_channel =
+			req_bss_cap->primary_channel;
+		conf->ht_bss_conf.bss_cap = req_bss_cap->bss_cap;
+		conf->ht_bss_conf.bss_op_mode = req_bss_cap->bss_op_mode;
+		for (i = 0; i < SUPP_MCS_SET_LEN; i++)
+			conf->ht_conf.supp_mcs_set[i] =
+				mode->ht_info.supp_mcs_set[i] &
+				  req_ht_cap->supp_mcs_set[i];
+
+		/* In STA mode, this gives us indication
+		 * to the AP's mode of operation */
+		conf->ht_conf.ht_supported = 1;
+		conf->ht_conf.ampdu_factor = req_ht_cap->ampdu_factor;
+		conf->ht_conf.ampdu_density = req_ht_cap->ampdu_density;
+	}
+
+	local->ops->conf_ht(local_to_hw(local), &local->hw.conf);
+
+	return 0;
+}
+
 void ieee80211_erp_info_change_notify(struct net_device *dev, u8 changes)
 {
 	struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr);
diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index 66b6cf3a62a..2dcb9425fe8 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -708,6 +708,9 @@ int ieee80211_if_update_wds(struct net_device *dev, u8 *remote_addr);
 void ieee80211_if_setup(struct net_device *dev);
 struct ieee80211_rate *ieee80211_get_rate(struct ieee80211_local *local,
 					  int phymode, int hwrate);
+int ieee80211_hw_config_ht(struct ieee80211_local *local, int enable_ht,
+			   struct ieee80211_ht_info *req_ht_cap,
+			   struct ieee80211_ht_bss_info *req_bss_cap);
 
 /* ieee80211_ioctl.c */
 extern const struct iw_handler_def ieee80211_iw_handler_def;
diff --git a/net/mac80211/ieee80211_sta.c b/net/mac80211/ieee80211_sta.c
index 1d553d78b22..87830c04a1c 100644
--- a/net/mac80211/ieee80211_sta.c
+++ b/net/mac80211/ieee80211_sta.c
@@ -1437,6 +1437,19 @@ static void ieee80211_rx_mgmt_assoc_resp(struct net_device *dev,
 	}
 	sta->supp_rates = rates;
 
+	if (elems.ht_cap_elem && elems.ht_info_elem && elems.wmm_param &&
+	    local->ops->conf_ht) {
+		struct ieee80211_ht_bss_info bss_info;
+
+		ieee80211_ht_cap_ie_to_ht_info(
+				(struct ieee80211_ht_cap *)
+				elems.ht_cap_elem, &sta->ht_info);
+		ieee80211_ht_addt_info_ie_to_ht_bss_info(
+				(struct ieee80211_ht_addt_info *)
+				elems.ht_info_elem, &bss_info);
+		ieee80211_hw_config_ht(local, 1, &sta->ht_info, &bss_info);
+	}
+
 	rate_control_rate_init(sta, local);
 
 	if (elems.wmm_param && (ifsta->flags & IEEE80211_STA_WMM_ENABLED)) {
@@ -1859,6 +1872,8 @@ static void ieee80211_rx_mgmt_beacon(struct net_device *dev,
 	struct ieee80211_if_sta *ifsta;
 	size_t baselen;
 	struct ieee802_11_elems elems;
+	struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr);
+	struct ieee80211_conf *conf = &local->hw.conf;
 
 	ieee80211_rx_bss_info(dev, mgmt, len, rx_status, 1);
 
@@ -1881,6 +1896,23 @@ static void ieee80211_rx_mgmt_beacon(struct net_device *dev,
 	if (elems.erp_info && elems.erp_info_len >= 1)
 		ieee80211_handle_erp_ie(dev, elems.erp_info[0]);
 
+	if (elems.ht_cap_elem && elems.ht_info_elem &&
+	    elems.wmm_param && local->ops->conf_ht &&
+	    conf->flags & IEEE80211_CONF_SUPPORT_HT_MODE) {
+		struct ieee80211_ht_bss_info bss_info;
+
+		ieee80211_ht_addt_info_ie_to_ht_bss_info(
+				(struct ieee80211_ht_addt_info *)
+				elems.ht_info_elem, &bss_info);
+		/* check if AP changed bss inforamation */
+		if ((conf->ht_bss_conf.primary_channel !=
+		     bss_info.primary_channel) ||
+		    (conf->ht_bss_conf.bss_cap != bss_info.bss_cap) ||
+		    (conf->ht_bss_conf.bss_op_mode != bss_info.bss_op_mode))
+			ieee80211_hw_config_ht(local, 1, &conf->ht_conf,
+						&bss_info);
+	}
+
 	if (elems.wmm_param && (ifsta->flags & IEEE80211_STA_WMM_ENABLED)) {
 		ieee80211_sta_wmm_params(dev, ifsta, elems.wmm_param,
 					 elems.wmm_param_len);
-- 
cgit v1.2.3


From 64bd4b693f2d0b59673086e9c510068d5ad47cee Mon Sep 17 00:00:00 2001
From: Ron Rindjunsky <ron.rindjunsky@intel.com>
Date: Thu, 29 Nov 2007 10:35:53 +0200
Subject: mac80211: move A-MSDU identifier to flags

This patch moves u8 amsdu_frame in ieee80211_txrx_data to the flags
section as IEEE80211_TXRXD_RX_AMSDU

Signed-off-by: Ron Rindjunsky <ron.rindjunsky@intel.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/mac80211/ieee80211_i.h | 2 +-
 net/mac80211/rx.c          | 6 +++---
 2 files changed, 4 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index 2dcb9425fe8..b54ed5fe4aa 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -123,6 +123,7 @@ typedef enum {
 /* frame is destined to interface currently processed (incl. multicast frames) */
 #define IEEE80211_TXRXD_RXRA_MATCH		BIT(5)
 #define IEEE80211_TXRXD_TX_INJECTED		BIT(6)
+#define IEEE80211_TXRXD_RX_AMSDU		BIT(7)
 struct ieee80211_txrx_data {
 	struct sk_buff *skb;
 	struct net_device *dev;
@@ -155,7 +156,6 @@ struct ieee80211_txrx_data {
 			int load;
 			u32 tkip_iv32;
 			u16 tkip_iv16;
-			u8  amsdu_frame;
 		} rx;
 	} u;
 };
diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index 4525d7332c8..0020e409e57 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -244,9 +244,9 @@ ieee80211_rx_h_parse_qos(struct ieee80211_txrx_data *rx)
 		/* frame has qos control */
 		tid = qc[0] & QOS_CONTROL_TID_MASK;
 		if (qc[0] & IEEE80211_QOS_CONTROL_A_MSDU_PRESENT)
-			rx->u.rx.amsdu_frame = 1;
+			rx->flags |= IEEE80211_TXRXD_RX_AMSDU;
 		else
-			rx->u.rx.amsdu_frame = 0;
+			rx->flags &= ~IEEE80211_TXRXD_RX_AMSDU;
 	} else {
 		if (unlikely((rx->fc & IEEE80211_FCTL_FTYPE) == IEEE80211_FTYPE_MGMT)) {
 			/* Separate TID for management frames */
@@ -1221,7 +1221,7 @@ ieee80211_rx_h_amsdu(struct ieee80211_txrx_data *rx)
 	if (unlikely(!WLAN_FC_DATA_PRESENT(fc)))
 		return TXRX_DROP;
 
-	if (!rx->u.rx.amsdu_frame)
+	if (!(rx->flags & IEEE80211_TXRXD_RX_AMSDU))
 		return TXRX_CONTINUE;
 
 	err = ieee80211_data_to_8023(rx);
-- 
cgit v1.2.3


From 88fecd092e2e7677f8e3f5ab32c16d801bc85fd6 Mon Sep 17 00:00:00 2001
From: "John W. Linville" <linville@tuxdriver.com>
Date: Wed, 21 Nov 2007 11:54:22 -0500
Subject: mac80211: remove "bcn_int" and "capab" scan results info

These bits were dead code before "mac80211: Remove local->scan_flags"
(commit 6681dd3fd0e4d36a4547415853e83411baa7b705) and probably should
have been removed as part of that commit.

Signed-off-by: John W. Linville <linville@tuxdriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/mac80211/ieee80211_sta.c | 25 -------------------------
 1 file changed, 25 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/ieee80211_sta.c b/net/mac80211/ieee80211_sta.c
index 87830c04a1c..3c552fed2af 100644
--- a/net/mac80211/ieee80211_sta.c
+++ b/net/mac80211/ieee80211_sta.c
@@ -3260,31 +3260,6 @@ ieee80211_sta_scan_result(struct net_device *dev,
 		}
 	}
 
-	do {
-		char *buf;
-
-		buf = kmalloc(100, GFP_ATOMIC);
-		if (!buf)
-			break;
-
-		memset(&iwe, 0, sizeof(iwe));
-		iwe.cmd = IWEVCUSTOM;
-		sprintf(buf, "bcn_int=%d", bss->beacon_int);
-		iwe.u.data.length = strlen(buf);
-		current_ev = iwe_stream_add_point(current_ev, end_buf, &iwe,
-						  buf);
-
-		memset(&iwe, 0, sizeof(iwe));
-		iwe.cmd = IWEVCUSTOM;
-		sprintf(buf, "capab=0x%04x", bss->capability);
-		iwe.u.data.length = strlen(buf);
-		current_ev = iwe_stream_add_point(current_ev, end_buf, &iwe,
-						  buf);
-
-		kfree(buf);
-		break;
-	} while (0);
-
 	return current_ev;
 }
 
-- 
cgit v1.2.3


From edae58ead57319b0bcf16e48a32365145c7bf0b7 Mon Sep 17 00:00:00 2001
From: "John W. Linville" <linville@tuxdriver.com>
Date: Wed, 21 Nov 2007 15:24:35 -0500
Subject: softmac: mark as obsolete and schedule for removal

Schedule softmac for for removal in the 2.6.26 development window.

Signed-off-by: John W. Linville <linville@tuxdriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ieee80211/Kconfig | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/ieee80211/Kconfig b/net/ieee80211/Kconfig
index 1438adedbc8..bd501046c9c 100644
--- a/net/ieee80211/Kconfig
+++ b/net/ieee80211/Kconfig
@@ -1,8 +1,9 @@
 config IEEE80211
-	tristate "Generic IEEE 802.11 Networking Stack"
+	tristate "Generic IEEE 802.11 Networking Stack (DEPRECATED)"
 	---help---
 	This option enables the hardware independent IEEE 802.11
-	networking stack.
+	networking stack.  This component is deprecated in favor of the
+	mac80211 component.
 
 config IEEE80211_DEBUG
 	bool "Enable full debugging output"
-- 
cgit v1.2.3


From 66f27a52037c89183e83689b0531412577be0101 Mon Sep 17 00:00:00 2001
From: Pavel Emelyanov <xemul@openvz.org>
Date: Sun, 2 Dec 2007 00:55:54 +1100
Subject: [IPV4]: Unify and cleanup calls to devinet_sysctl_register

Currently this call is used to register sysctls for devices
and for the "default" confs. The "all" sysctls are registered
separately.

Besides, the inet_device is passed to this function, but it is
not needed there at all - just the device name and ifindex are
required.

Thanks to Herbert, who noticed, that this call doesn't even
require the devconf pointer (the last argument) - all we need
we can take from the in_device itself.

The fix is to make a __devinet_sysctl_register(), which registers
sysctls for all "devices" we need, including "default" and "all" :)

The original devinet_sysctl_register() works with struct net_device,
not the inet_device, and calls the introduced function, passing
the device name and ifindex (to be used as procname and ctl_name)
into it.

Signed-off-by: Pavel Emelyanov <xemul@openvz.org>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/devinet.c | 34 ++++++++++++++++------------------
 1 file changed, 16 insertions(+), 18 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index 72dd0ecb108..a3a7d301736 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -98,8 +98,7 @@ static BLOCKING_NOTIFIER_HEAD(inetaddr_chain);
 static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
 			 int destroy);
 #ifdef CONFIG_SYSCTL
-static void devinet_sysctl_register(struct in_device *in_dev,
-				    struct ipv4_devconf *p);
+static void devinet_sysctl_register(struct in_device *idev);
 static void devinet_sysctl_unregister(struct ipv4_devconf *p);
 #endif
 
@@ -173,7 +172,7 @@ static struct in_device *inetdev_init(struct net_device *dev)
 	in_dev_hold(in_dev);
 
 #ifdef CONFIG_SYSCTL
-	devinet_sysctl_register(in_dev, &in_dev->cnf);
+	devinet_sysctl_register(in_dev);
 #endif
 	ip_mc_init_dev(in_dev);
 	if (dev->flags & IFF_UP)
@@ -1119,7 +1118,7 @@ static int inetdev_event(struct notifier_block *this, unsigned long event,
 		neigh_sysctl_unregister(in_dev->arp_parms);
 		neigh_sysctl_register(dev, in_dev->arp_parms, NET_IPV4,
 				      NET_IPV4_NEIGH, "ipv4", NULL, NULL);
-		devinet_sysctl_register(in_dev, &in_dev->cnf);
+		devinet_sysctl_register(in_dev);
 #endif
 		break;
 	}
@@ -1501,13 +1500,11 @@ static struct devinet_sysctl_table {
 	},
 };
 
-static void devinet_sysctl_register(struct in_device *in_dev,
-				    struct ipv4_devconf *p)
+static void __devinet_sysctl_register(char *dev_name, int ctl_name,
+		struct ipv4_devconf *p)
 {
 	int i;
-	struct net_device *dev = in_dev ? in_dev->dev : NULL;
 	struct devinet_sysctl_table *t;
-	char *dev_name = NULL;
 
 	t = kmemdup(&devinet_sysctl, sizeof(*t), GFP_KERNEL);
 	if (!t)
@@ -1518,13 +1515,7 @@ static void devinet_sysctl_register(struct in_device *in_dev,
 		t->devinet_vars[i].extra1 = p;
 	}
 
-	if (dev) {
-		dev_name = dev->name;
-		t->devinet_dev[0].ctl_name = dev->ifindex;
-	} else {
-		dev_name = "default";
-		t->devinet_dev[0].ctl_name = NET_PROTO_CONF_DEFAULT;
-	}
+	t->devinet_dev[0].ctl_name = ctl_name;
 
 	/*
 	 * Make a copy of dev_name, because '.procname' is regarded as const
@@ -1556,6 +1547,12 @@ out:
 	return;
 }
 
+static void devinet_sysctl_register(struct in_device *idev)
+{
+	return __devinet_sysctl_register(idev->dev->name, idev->dev->ifindex,
+			&idev->cnf);
+}
+
 static void devinet_sysctl_unregister(struct ipv4_devconf *p)
 {
 	if (p->sysctl) {
@@ -1577,9 +1574,10 @@ void __init devinet_init(void)
 	rtnl_register(PF_INET, RTM_DELADDR, inet_rtm_deladdr, NULL);
 	rtnl_register(PF_INET, RTM_GETADDR, NULL, inet_dump_ifaddr);
 #ifdef CONFIG_SYSCTL
-	devinet_sysctl.sysctl_header =
-		register_sysctl_table(devinet_sysctl.devinet_root_dir);
-	devinet_sysctl_register(NULL, &ipv4_devconf_dflt);
+	__devinet_sysctl_register("all", NET_PROTO_CONF_ALL,
+			&ipv4_devconf);
+	__devinet_sysctl_register("default", NET_PROTO_CONF_DEFAULT,
+			&ipv4_devconf_dflt);
 #endif
 }
 
-- 
cgit v1.2.3


From bfada697bd534d2c16fd07fbef3a4924c4d4e014 Mon Sep 17 00:00:00 2001
From: Pavel Emelyanov <xemul@openvz.org>
Date: Sun, 2 Dec 2007 00:57:08 +1100
Subject: [IPV4]: Use ctl paths to register devinet sysctls

This looks very much like the patch for neighbors.

The path is also located on the stack and is prepared
inside the function. This time, the call to the registering
function is guarded with the RTNL lock, but I decided
to keep it on the stack not to litter the devinet.c file
with unneeded names and to make it look similar to the
neighbors code.

This is also intended to help us with the net namespaces
and saves the vmlinux size as well - this time by more
than 670 bytes.

The difference from the first version is just the patch
offsets, that changed due to changes in the patch #2.

Signed-off-by: Pavel Emelyanov <xemul@openvz.org>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/devinet.c | 69 ++++++++++++++++--------------------------------------
 1 file changed, 20 insertions(+), 49 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index a3a7d301736..9e2747aab25 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -1430,11 +1430,8 @@ int ipv4_doint_and_flush_strategy(ctl_table *table, int __user *name, int nlen,
 
 static struct devinet_sysctl_table {
 	struct ctl_table_header *sysctl_header;
-	ctl_table		devinet_vars[__NET_IPV4_CONF_MAX];
-	ctl_table		devinet_dev[2];
-	ctl_table		devinet_conf_dir[2];
-	ctl_table		devinet_proto_dir[2];
-	ctl_table		devinet_root_dir[2];
+	struct ctl_table devinet_vars[__NET_IPV4_CONF_MAX];
+	char *dev_name;
 } devinet_sysctl = {
 	.devinet_vars = {
 		DEVINET_SYSCTL_COMPLEX_ENTRY(FORWARDING, "forwarding",
@@ -1466,38 +1463,6 @@ static struct devinet_sysctl_table {
 		DEVINET_SYSCTL_FLUSHING_ENTRY(PROMOTE_SECONDARIES,
 					      "promote_secondaries"),
 	},
-	.devinet_dev = {
-		{
-			.ctl_name	= NET_PROTO_CONF_ALL,
-			.procname	= "all",
-			.mode		= 0555,
-			.child		= devinet_sysctl.devinet_vars,
-		},
-	},
-	.devinet_conf_dir = {
-		{
-			.ctl_name	= NET_IPV4_CONF,
-			.procname	= "conf",
-			.mode		= 0555,
-			.child		= devinet_sysctl.devinet_dev,
-		},
-	},
-	.devinet_proto_dir = {
-		{
-			.ctl_name	= NET_IPV4,
-			.procname	= "ipv4",
-			.mode		= 0555,
-			.child 		= devinet_sysctl.devinet_conf_dir,
-		},
-	},
-	.devinet_root_dir = {
-		{
-			.ctl_name	= CTL_NET,
-			.procname 	= "net",
-			.mode		= 0555,
-			.child		= devinet_sysctl.devinet_proto_dir,
-		},
-	},
 };
 
 static void __devinet_sysctl_register(char *dev_name, int ctl_name,
@@ -1506,6 +1471,16 @@ static void __devinet_sysctl_register(char *dev_name, int ctl_name,
 	int i;
 	struct devinet_sysctl_table *t;
 
+#define DEVINET_CTL_PATH_DEV	3
+
+	struct ctl_path devinet_ctl_path[] = {
+		{ .procname = "net", .ctl_name = CTL_NET, },
+		{ .procname = "ipv4", .ctl_name = NET_IPV4, },
+		{ .procname = "conf", .ctl_name = NET_IPV4_CONF, },
+		{ /* to be set */ },
+		{ },
+	};
+
 	t = kmemdup(&devinet_sysctl, sizeof(*t), GFP_KERNEL);
 	if (!t)
 		goto out;
@@ -1515,24 +1490,20 @@ static void __devinet_sysctl_register(char *dev_name, int ctl_name,
 		t->devinet_vars[i].extra1 = p;
 	}
 
-	t->devinet_dev[0].ctl_name = ctl_name;
-
 	/*
 	 * Make a copy of dev_name, because '.procname' is regarded as const
 	 * by sysctl and we wouldn't want anyone to change it under our feet
 	 * (see SIOCSIFNAME).
 	 */
-	dev_name = kstrdup(dev_name, GFP_KERNEL);
-	if (!dev_name)
+	t->dev_name = kstrdup(dev_name, GFP_KERNEL);
+	if (!t->dev_name)
 		goto free;
 
-	t->devinet_dev[0].procname    = dev_name;
-	t->devinet_dev[0].child	      = t->devinet_vars;
-	t->devinet_conf_dir[0].child  = t->devinet_dev;
-	t->devinet_proto_dir[0].child = t->devinet_conf_dir;
-	t->devinet_root_dir[0].child  = t->devinet_proto_dir;
+	devinet_ctl_path[DEVINET_CTL_PATH_DEV].procname = t->dev_name;
+	devinet_ctl_path[DEVINET_CTL_PATH_DEV].ctl_name = ctl_name;
 
-	t->sysctl_header = register_sysctl_table(t->devinet_root_dir);
+	t->sysctl_header = register_sysctl_paths(devinet_ctl_path,
+			t->devinet_vars);
 	if (!t->sysctl_header)
 		goto free_procname;
 
@@ -1540,7 +1511,7 @@ static void __devinet_sysctl_register(char *dev_name, int ctl_name,
 	return;
 
 free_procname:
-	kfree(dev_name);
+	kfree(t->dev_name);
 free:
 	kfree(t);
 out:
@@ -1559,7 +1530,7 @@ static void devinet_sysctl_unregister(struct ipv4_devconf *p)
 		struct devinet_sysctl_table *t = p->sysctl;
 		p->sysctl = NULL;
 		unregister_sysctl_table(t->sysctl_header);
-		kfree(t->devinet_dev[0].procname);
+		kfree(t->dev_name);
 		kfree(t);
 	}
 }
-- 
cgit v1.2.3


From f52295a9c55ccb4d9b3580ce889f958ac740a44b Mon Sep 17 00:00:00 2001
From: Pavel Emelyanov <xemul@openvz.org>
Date: Sun, 2 Dec 2007 00:58:37 +1100
Subject: [IPV6]: Unify and cleanup calls to addrconf_sysctl_register

Currently this call is (ab)used similar to devinet one - it
registers sysctls for devices and for the "default" confs, while
the "all" sysctls are registered separately. But unlike its
devinet brother, the passed inet6_device is needed.

The fix is to make a __addrconf_sysctl_register(), which registers
sysctls for all "devices" we need, including "default" and "all" :)

The original addrconf_sysctl_register() calls the introduced
function, passing the inet6_device, device name and ifindex (to
be used as procname and ctl_name) into it.

Thanks to Herbert again for pointing out, that we can shrink the
argument list to 1 :)

Signed-off-by: Pavel Emelyanov <xemul@openvz.org>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/addrconf.c | 32 ++++++++++++++++----------------
 1 file changed, 16 insertions(+), 16 deletions(-)

(limited to 'net')

diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 2d2886a0b66..ea1673d2907 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -101,7 +101,7 @@
 #define TIME_DELTA(a,b) ((unsigned long)((long)(a) - (long)(b)))
 
 #ifdef CONFIG_SYSCTL
-static void addrconf_sysctl_register(struct inet6_dev *idev, struct ipv6_devconf *p);
+static void addrconf_sysctl_register(struct inet6_dev *idev);
 static void addrconf_sysctl_unregister(struct ipv6_devconf *p);
 #endif
 
@@ -400,7 +400,7 @@ static struct inet6_dev * ipv6_add_dev(struct net_device *dev)
 			      NET_IPV6_NEIGH, "ipv6",
 			      &ndisc_ifinfo_sysctl_change,
 			      NULL);
-	addrconf_sysctl_register(ndev, &ndev->cnf);
+	addrconf_sysctl_register(ndev);
 #endif
 	/* protected by rtnl_lock */
 	rcu_assign_pointer(dev->ip6_ptr, ndev);
@@ -2386,7 +2386,7 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event,
 					      NET_IPV6, NET_IPV6_NEIGH, "ipv6",
 					      &ndisc_ifinfo_sysctl_change,
 					      NULL);
-			addrconf_sysctl_register(idev, &idev->cnf);
+			addrconf_sysctl_register(idev);
 #endif
 			err = snmp6_register_dev(idev);
 			if (err)
@@ -4118,12 +4118,11 @@ static struct addrconf_sysctl_table
 	},
 };
 
-static void addrconf_sysctl_register(struct inet6_dev *idev, struct ipv6_devconf *p)
+static void __addrconf_sysctl_register(char *dev_name, int ctl_name,
+		struct inet6_dev *idev, struct ipv6_devconf *p)
 {
 	int i;
-	struct net_device *dev = idev ? idev->dev : NULL;
 	struct addrconf_sysctl_table *t;
-	char *dev_name = NULL;
 
 	t = kmemdup(&addrconf_sysctl, sizeof(*t), GFP_KERNEL);
 	if (t == NULL)
@@ -4133,13 +4132,6 @@ static void addrconf_sysctl_register(struct inet6_dev *idev, struct ipv6_devconf
 		t->addrconf_vars[i].data += (char*)p - (char*)&ipv6_devconf;
 		t->addrconf_vars[i].extra1 = idev; /* embedded; no ref */
 	}
-	if (dev) {
-		dev_name = dev->name;
-		t->addrconf_dev[0].ctl_name = dev->ifindex;
-	} else {
-		dev_name = "default";
-		t->addrconf_dev[0].ctl_name = NET_PROTO_CONF_DEFAULT;
-	}
 
 	/*
 	 * Make a copy of dev_name, because '.procname' is regarded as const
@@ -4150,6 +4142,7 @@ static void addrconf_sysctl_register(struct inet6_dev *idev, struct ipv6_devconf
 	if (!dev_name)
 		goto free;
 
+	t->addrconf_dev[0].ctl_name = ctl_name;
 	t->addrconf_dev[0].procname = dev_name;
 
 	t->addrconf_dev[0].child = t->addrconf_vars;
@@ -4172,6 +4165,12 @@ out:
 	return;
 }
 
+static void addrconf_sysctl_register(struct inet6_dev *idev)
+{
+	__addrconf_sysctl_register(idev->dev->name, idev->dev->ifindex,
+			idev, &idev->cnf);
+}
+
 static void addrconf_sysctl_unregister(struct ipv6_devconf *p)
 {
 	if (p->sysctl) {
@@ -4270,9 +4269,10 @@ int __init addrconf_init(void)
 	ipv6_addr_label_rtnl_register();
 
 #ifdef CONFIG_SYSCTL
-	addrconf_sysctl.sysctl_header =
-		register_sysctl_table(addrconf_sysctl.addrconf_root_dir);
-	addrconf_sysctl_register(NULL, &ipv6_devconf_dflt);
+	__addrconf_sysctl_register("all", NET_PROTO_CONF_ALL,
+			NULL, &ipv6_devconf);
+	__addrconf_sysctl_register("default", NET_PROTO_CONF_DEFAULT,
+			NULL, &ipv6_devconf_dflt);
 #endif
 
 	return 0;
-- 
cgit v1.2.3


From 1dab62226dd9e0c0051229e7868363663546c772 Mon Sep 17 00:00:00 2001
From: Pavel Emelyanov <xemul@openvz.org>
Date: Sun, 2 Dec 2007 00:59:38 +1100
Subject: [IPV6]: Use ctl paths to register addrconf sysctls

This looks very much like the patch for ipv4's devinet.

This is also intended to help us with the net namespaces
and saves the ipv6.ko size by ~320 bytes.

The difference from the first version is just the patch
offsets, that changed due to changes in the patch #2.

Signed-off-by: Pavel Emelyanov <xemul@openvz.org>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/addrconf.c | 80 ++++++++++++++---------------------------------------
 1 file changed, 20 insertions(+), 60 deletions(-)

(limited to 'net')

diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index ea1673d2907..dbff389b700 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -3848,10 +3848,7 @@ static struct addrconf_sysctl_table
 {
 	struct ctl_table_header *sysctl_header;
 	ctl_table addrconf_vars[__NET_IPV6_MAX];
-	ctl_table addrconf_dev[2];
-	ctl_table addrconf_conf_dir[2];
-	ctl_table addrconf_proto_dir[2];
-	ctl_table addrconf_root_dir[2];
+	char *dev_name;
 } addrconf_sysctl __read_mostly = {
 	.sysctl_header = NULL,
 	.addrconf_vars = {
@@ -4072,50 +4069,6 @@ static struct addrconf_sysctl_table
 			.ctl_name	=	0,	/* sentinel */
 		}
 	},
-	.addrconf_dev = {
-		{
-			.ctl_name	=	NET_PROTO_CONF_ALL,
-			.procname	=	"all",
-			.mode		=	0555,
-			.child		=	addrconf_sysctl.addrconf_vars,
-		},
-		{
-			.ctl_name	=	0,	/* sentinel */
-		}
-	},
-	.addrconf_conf_dir = {
-		{
-			.ctl_name	=	NET_IPV6_CONF,
-			.procname	=	"conf",
-			.mode		=	0555,
-			.child		=	addrconf_sysctl.addrconf_dev,
-		},
-		{
-			.ctl_name	=	0,	/* sentinel */
-		}
-	},
-	.addrconf_proto_dir = {
-		{
-			.ctl_name	=	NET_IPV6,
-			.procname	=	"ipv6",
-			.mode		=	0555,
-			.child		=	addrconf_sysctl.addrconf_conf_dir,
-		},
-		{
-			.ctl_name	=	0,	/* sentinel */
-		}
-	},
-	.addrconf_root_dir = {
-		{
-			.ctl_name	=	CTL_NET,
-			.procname	=	"net",
-			.mode		=	0555,
-			.child		=	addrconf_sysctl.addrconf_proto_dir,
-		},
-		{
-			.ctl_name	=	0,	/* sentinel */
-		}
-	},
 };
 
 static void __addrconf_sysctl_register(char *dev_name, int ctl_name,
@@ -4124,6 +4077,17 @@ static void __addrconf_sysctl_register(char *dev_name, int ctl_name,
 	int i;
 	struct addrconf_sysctl_table *t;
 
+#define ADDRCONF_CTL_PATH_DEV	3
+
+	struct ctl_path addrconf_ctl_path[] = {
+		{ .procname = "net", .ctl_name = CTL_NET, },
+		{ .procname = "ipv6", .ctl_name = NET_IPV6, },
+		{ .procname = "conf", .ctl_name = NET_IPV6_CONF, },
+		{ /* to be set */ },
+		{ },
+	};
+
+
 	t = kmemdup(&addrconf_sysctl, sizeof(*t), GFP_KERNEL);
 	if (t == NULL)
 		goto out;
@@ -4138,19 +4102,15 @@ static void __addrconf_sysctl_register(char *dev_name, int ctl_name,
 	 * by sysctl and we wouldn't want anyone to change it under our feet
 	 * (see SIOCSIFNAME).
 	 */
-	dev_name = kstrdup(dev_name, GFP_KERNEL);
-	if (!dev_name)
+	t->dev_name = kstrdup(dev_name, GFP_KERNEL);
+	if (!t->dev_name)
 		goto free;
 
-	t->addrconf_dev[0].ctl_name = ctl_name;
-	t->addrconf_dev[0].procname = dev_name;
-
-	t->addrconf_dev[0].child = t->addrconf_vars;
-	t->addrconf_conf_dir[0].child = t->addrconf_dev;
-	t->addrconf_proto_dir[0].child = t->addrconf_conf_dir;
-	t->addrconf_root_dir[0].child = t->addrconf_proto_dir;
+	addrconf_ctl_path[ADDRCONF_CTL_PATH_DEV].procname = t->dev_name;
+	addrconf_ctl_path[ADDRCONF_CTL_PATH_DEV].ctl_name = ctl_name;
 
-	t->sysctl_header = register_sysctl_table(t->addrconf_root_dir);
+	t->sysctl_header = register_sysctl_paths(addrconf_ctl_path,
+			t->addrconf_vars);
 	if (t->sysctl_header == NULL)
 		goto free_procname;
 
@@ -4158,7 +4118,7 @@ static void __addrconf_sysctl_register(char *dev_name, int ctl_name,
 	return;
 
 free_procname:
-	kfree(dev_name);
+	kfree(t->dev_name);
 free:
 	kfree(t);
 out:
@@ -4177,7 +4137,7 @@ static void addrconf_sysctl_unregister(struct ipv6_devconf *p)
 		struct addrconf_sysctl_table *t = p->sysctl;
 		p->sysctl = NULL;
 		unregister_sysctl_table(t->sysctl_header);
-		kfree(t->addrconf_dev[0].procname);
+		kfree(t->dev_name);
 		kfree(t);
 	}
 }
-- 
cgit v1.2.3


From bce392f3b02755a8c615d4ced3d3b9cb1d9e3648 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= <ilpo.jarvinen@helsinki.fi>
Date: Sun, 2 Dec 2007 00:47:56 +0200
Subject: [TCP]: Move LOSTRETRANS MIB outside !(L|S) check
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Usually those skbs will have L set, not counting them as lost
retransmissions is misleading.

Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@helsinki.fi>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp_input.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 33d284e3f7f..6ca77f8bcee 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -1160,8 +1160,8 @@ static int tcp_mark_lost_retrans(struct sock *sk)
 				tp->lost_out += tcp_skb_pcount(skb);
 				TCP_SKB_CB(skb)->sacked |= TCPCB_LOST;
 				flag |= FLAG_DATA_SACKED;
-				NET_INC_STATS_BH(LINUX_MIB_TCPLOSTRETRANSMIT);
 			}
+			NET_INC_STATS_BH(LINUX_MIB_TCPLOSTRETRANSMIT);
 		} else {
 			if (before(ack_seq, new_low_seq))
 				new_low_seq = ack_seq;
-- 
cgit v1.2.3


From 407ef1de03e87225d75a9bed271f35ea6880f5f1 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= <ilpo.jarvinen@helsinki.fi>
Date: Sun, 2 Dec 2007 00:47:57 +0200
Subject: [TCP]: Remove superflucious FLAG_DATA_SACKED
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

To get there, highest_sack must have advanced. When it advances,
a new skb is SACKed, which already sets that FLAG. Besides, the
original purpose of it has puzzled me, never understood why
LOST bit setting of retransmitted skb is marked with
FLAG_DATA_SACKED.

Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@helsinki.fi>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp_input.c | 10 +++-------
 1 file changed, 3 insertions(+), 7 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 6ca77f8bcee..87111084280 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -1118,12 +1118,11 @@ static int tcp_is_sackblock_valid(struct tcp_sock *tp, int is_dsack,
  * highest SACK block). Also calculate the lowest snd_nxt among the remaining
  * retransmitted skbs to avoid some costly processing per ACKs.
  */
-static int tcp_mark_lost_retrans(struct sock *sk)
+static void tcp_mark_lost_retrans(struct sock *sk)
 {
 	const struct inet_connection_sock *icsk = inet_csk(sk);
 	struct tcp_sock *tp = tcp_sk(sk);
 	struct sk_buff *skb;
-	int flag = 0;
 	int cnt = 0;
 	u32 new_low_seq = tp->snd_nxt;
 	u32 received_upto = TCP_SKB_CB(tp->highest_sack)->end_seq;
@@ -1131,7 +1130,7 @@ static int tcp_mark_lost_retrans(struct sock *sk)
 	if (!tcp_is_fack(tp) || !tp->retrans_out ||
 	    !after(received_upto, tp->lost_retrans_low) ||
 	    icsk->icsk_ca_state != TCP_CA_Recovery)
-		return flag;
+		return;
 
 	tcp_for_write_queue(skb, sk) {
 		u32 ack_seq = TCP_SKB_CB(skb)->ack_seq;
@@ -1159,7 +1158,6 @@ static int tcp_mark_lost_retrans(struct sock *sk)
 			if (!(TCP_SKB_CB(skb)->sacked & (TCPCB_LOST|TCPCB_SACKED_ACKED))) {
 				tp->lost_out += tcp_skb_pcount(skb);
 				TCP_SKB_CB(skb)->sacked |= TCPCB_LOST;
-				flag |= FLAG_DATA_SACKED;
 			}
 			NET_INC_STATS_BH(LINUX_MIB_TCPLOSTRETRANSMIT);
 		} else {
@@ -1171,8 +1169,6 @@ static int tcp_mark_lost_retrans(struct sock *sk)
 
 	if (tp->retrans_out)
 		tp->lost_retrans_low = new_low_seq;
-
-	return flag;
 }
 
 static int tcp_check_dsack(struct tcp_sock *tp, struct sk_buff *ack_skb,
@@ -1603,7 +1599,7 @@ advance_sp:
 	for (j = 0; j < used_sacks; j++)
 		tp->recv_sack_cache[i++] = sp[j];
 
-	flag |= tcp_mark_lost_retrans(sk);
+	tcp_mark_lost_retrans(sk);
 
 	tcp_verify_left_out(tp);
 
-- 
cgit v1.2.3


From ede9f3b186bc3eb0fce084bdcab500efc3721a80 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= <ilpo.jarvinen@helsinki.fi>
Date: Sun, 2 Dec 2007 00:47:58 +0200
Subject: [TCP]: Unite identical code from two seqno split blocks
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Bogus seqno compares just mislead, the code is identical for
both sides of the seqno compare (and was even executed just
once because of return in between).

Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@helsinki.fi>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp_input.c | 7 +------
 1 file changed, 1 insertion(+), 6 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 87111084280..d313dea361a 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -1246,8 +1246,7 @@ static int tcp_sacktag_one(struct sk_buff *skb, struct tcp_sock *tp,
 	if (dup_sack && (sacked & TCPCB_RETRANS)) {
 		if (after(TCP_SKB_CB(skb)->end_seq, tp->undo_marker))
 			tp->undo_retrans--;
-		if (!after(TCP_SKB_CB(skb)->end_seq, tp->snd_una) &&
-		    (sacked & TCPCB_SACKED_ACKED))
+		if (sacked & TCPCB_SACKED_ACKED)
 			*reord = min(fack_count, *reord);
 	}
 
@@ -1310,10 +1309,6 @@ static int tcp_sacktag_one(struct sk_buff *skb, struct tcp_sock *tp,
 
 		if (after(TCP_SKB_CB(skb)->seq, tcp_highest_sack_seq(tp)))
 			tp->highest_sack = skb;
-
-	} else {
-		if (dup_sack && (sacked & TCPCB_RETRANS))
-			*reord = min(fack_count, *reord);
 	}
 
 	/* D-SACK. We can detect redundant retransmission in S|R and plain R
-- 
cgit v1.2.3


From c3a05c6050a339c92e49fae0ba77dbba0d41fd99 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= <ilpo.jarvinen@helsinki.fi>
Date: Sun, 2 Dec 2007 00:47:59 +0200
Subject: [TCP]: Cong.ctrl modules: remove unused good_ack from cong_avoid
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@helsinki.fi>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp_bic.c       | 3 +--
 net/ipv4/tcp_cong.c      | 2 +-
 net/ipv4/tcp_cubic.c     | 3 +--
 net/ipv4/tcp_highspeed.c | 3 +--
 net/ipv4/tcp_htcp.c      | 3 +--
 net/ipv4/tcp_hybla.c     | 5 ++---
 net/ipv4/tcp_illinois.c  | 3 +--
 net/ipv4/tcp_input.c     | 9 ++++-----
 net/ipv4/tcp_lp.c        | 4 ++--
 net/ipv4/tcp_scalable.c  | 3 +--
 net/ipv4/tcp_vegas.c     | 7 +++----
 net/ipv4/tcp_veno.c      | 7 +++----
 net/ipv4/tcp_yeah.c      | 3 +--
 13 files changed, 22 insertions(+), 33 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/tcp_bic.c b/net/ipv4/tcp_bic.c
index 5dba0fc8f57..5212ed9b0c9 100644
--- a/net/ipv4/tcp_bic.c
+++ b/net/ipv4/tcp_bic.c
@@ -136,8 +136,7 @@ static inline void bictcp_update(struct bictcp *ca, u32 cwnd)
 		ca->cnt = 1;
 }
 
-static void bictcp_cong_avoid(struct sock *sk, u32 ack,
-			      u32 in_flight, int data_acked)
+static void bictcp_cong_avoid(struct sock *sk, u32 ack, u32 in_flight)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
 	struct bictcp *ca = inet_csk_ca(sk);
diff --git a/net/ipv4/tcp_cong.c b/net/ipv4/tcp_cong.c
index 55fca1820c3..4451750b478 100644
--- a/net/ipv4/tcp_cong.c
+++ b/net/ipv4/tcp_cong.c
@@ -324,7 +324,7 @@ EXPORT_SYMBOL_GPL(tcp_slow_start);
 /* This is Jacobson's slow start and congestion avoidance.
  * SIGCOMM '88, p. 328.
  */
-void tcp_reno_cong_avoid(struct sock *sk, u32 ack, u32 in_flight, int flag)
+void tcp_reno_cong_avoid(struct sock *sk, u32 ack, u32 in_flight)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
 
diff --git a/net/ipv4/tcp_cubic.c b/net/ipv4/tcp_cubic.c
index 80bd084a9f9..3aa0b23c1ea 100644
--- a/net/ipv4/tcp_cubic.c
+++ b/net/ipv4/tcp_cubic.c
@@ -246,8 +246,7 @@ static inline void bictcp_update(struct bictcp *ca, u32 cwnd)
 		ca->cnt = 1;
 }
 
-static void bictcp_cong_avoid(struct sock *sk, u32 ack,
-			      u32 in_flight, int data_acked)
+static void bictcp_cong_avoid(struct sock *sk, u32 ack, u32 in_flight)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
 	struct bictcp *ca = inet_csk_ca(sk);
diff --git a/net/ipv4/tcp_highspeed.c b/net/ipv4/tcp_highspeed.c
index 14a073d8b60..8b6caaf75bb 100644
--- a/net/ipv4/tcp_highspeed.c
+++ b/net/ipv4/tcp_highspeed.c
@@ -109,8 +109,7 @@ static void hstcp_init(struct sock *sk)
 	tp->snd_cwnd_clamp = min_t(u32, tp->snd_cwnd_clamp, 0xffffffff/128);
 }
 
-static void hstcp_cong_avoid(struct sock *sk, u32 adk,
-			     u32 in_flight, int data_acked)
+static void hstcp_cong_avoid(struct sock *sk, u32 adk, u32 in_flight)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
 	struct hstcp *ca = inet_csk_ca(sk);
diff --git a/net/ipv4/tcp_htcp.c b/net/ipv4/tcp_htcp.c
index 5215691f276..af99776146f 100644
--- a/net/ipv4/tcp_htcp.c
+++ b/net/ipv4/tcp_htcp.c
@@ -225,8 +225,7 @@ static u32 htcp_recalc_ssthresh(struct sock *sk)
 	return max((tp->snd_cwnd * ca->beta) >> 7, 2U);
 }
 
-static void htcp_cong_avoid(struct sock *sk, u32 ack,
-			    u32 in_flight, int data_acked)
+static void htcp_cong_avoid(struct sock *sk, u32 ack, u32 in_flight)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
 	struct htcp *ca = inet_csk_ca(sk);
diff --git a/net/ipv4/tcp_hybla.c b/net/ipv4/tcp_hybla.c
index b3e55cf5617..44618b67591 100644
--- a/net/ipv4/tcp_hybla.c
+++ b/net/ipv4/tcp_hybla.c
@@ -85,8 +85,7 @@ static inline u32 hybla_fraction(u32 odds)
  *     o Give cwnd a new value based on the model proposed
  *     o remember increments <1
  */
-static void hybla_cong_avoid(struct sock *sk, u32 ack,
-			    u32 in_flight, int flag)
+static void hybla_cong_avoid(struct sock *sk, u32 ack, u32 in_flight)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
 	struct hybla *ca = inet_csk_ca(sk);
@@ -103,7 +102,7 @@ static void hybla_cong_avoid(struct sock *sk, u32 ack,
 		return;
 
 	if (!ca->hybla_en)
-		return tcp_reno_cong_avoid(sk, ack, in_flight, flag);
+		return tcp_reno_cong_avoid(sk, ack, in_flight);
 
 	if (ca->rho == 0)
 		hybla_recalc_param(sk);
diff --git a/net/ipv4/tcp_illinois.c b/net/ipv4/tcp_illinois.c
index 5aa5f5496d6..1eba160b72d 100644
--- a/net/ipv4/tcp_illinois.c
+++ b/net/ipv4/tcp_illinois.c
@@ -256,8 +256,7 @@ static void tcp_illinois_state(struct sock *sk, u8 new_state)
 /*
  * Increase window in response to successful acknowledgment.
  */
-static void tcp_illinois_cong_avoid(struct sock *sk, u32 ack,
-				    u32 in_flight, int flag)
+static void tcp_illinois_cong_avoid(struct sock *sk, u32 ack, u32 in_flight)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
 	struct illinois *ca = inet_csk_ca(sk);
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index d313dea361a..cb441188870 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -2710,11 +2710,10 @@ static inline void tcp_ack_update_rtt(struct sock *sk, const int flag,
 		tcp_ack_no_tstamp(sk, seq_rtt, flag);
 }
 
-static void tcp_cong_avoid(struct sock *sk, u32 ack,
-			   u32 in_flight, int good)
+static void tcp_cong_avoid(struct sock *sk, u32 ack, u32 in_flight)
 {
 	const struct inet_connection_sock *icsk = inet_csk(sk);
-	icsk->icsk_ca_ops->cong_avoid(sk, ack, in_flight, good);
+	icsk->icsk_ca_ops->cong_avoid(sk, ack, in_flight);
 	tcp_sk(sk)->snd_cwnd_stamp = tcp_time_stamp;
 }
 
@@ -3238,11 +3237,11 @@ static int tcp_ack(struct sock *sk, struct sk_buff *skb, int flag)
 		/* Advance CWND, if state allows this. */
 		if ((flag & FLAG_DATA_ACKED) && !frto_cwnd &&
 		    tcp_may_raise_cwnd(sk, flag))
-			tcp_cong_avoid(sk, ack, prior_in_flight, 0);
+			tcp_cong_avoid(sk, ack, prior_in_flight);
 		tcp_fastretrans_alert(sk, prior_packets - tp->packets_out, flag);
 	} else {
 		if ((flag & FLAG_DATA_ACKED) && !frto_cwnd)
-			tcp_cong_avoid(sk, ack, prior_in_flight, 1);
+			tcp_cong_avoid(sk, ack, prior_in_flight);
 	}
 
 	if ((flag & FLAG_FORWARD_PROGRESS) || !(flag&FLAG_NOT_DUP))
diff --git a/net/ipv4/tcp_lp.c b/net/ipv4/tcp_lp.c
index e7f5ef92cbd..ce3c41ff50b 100644
--- a/net/ipv4/tcp_lp.c
+++ b/net/ipv4/tcp_lp.c
@@ -115,12 +115,12 @@ static void tcp_lp_init(struct sock *sk)
  * Will only call newReno CA when away from inference.
  * From TCP-LP's paper, this will be handled in additive increasement.
  */
-static void tcp_lp_cong_avoid(struct sock *sk, u32 ack, u32 in_flight, int flag)
+static void tcp_lp_cong_avoid(struct sock *sk, u32 ack, u32 in_flight)
 {
 	struct lp *lp = inet_csk_ca(sk);
 
 	if (!(lp->flag & LP_WITHIN_INF))
-		tcp_reno_cong_avoid(sk, ack, in_flight, flag);
+		tcp_reno_cong_avoid(sk, ack, in_flight);
 }
 
 /**
diff --git a/net/ipv4/tcp_scalable.c b/net/ipv4/tcp_scalable.c
index be27a33a1c6..2747ec7bfb6 100644
--- a/net/ipv4/tcp_scalable.c
+++ b/net/ipv4/tcp_scalable.c
@@ -15,8 +15,7 @@
 #define TCP_SCALABLE_AI_CNT	50U
 #define TCP_SCALABLE_MD_SCALE	3
 
-static void tcp_scalable_cong_avoid(struct sock *sk, u32 ack,
-				    u32 in_flight, int flag)
+static void tcp_scalable_cong_avoid(struct sock *sk, u32 ack, u32 in_flight)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
 
diff --git a/net/ipv4/tcp_vegas.c b/net/ipv4/tcp_vegas.c
index 007304e9984..be24d6ee34b 100644
--- a/net/ipv4/tcp_vegas.c
+++ b/net/ipv4/tcp_vegas.c
@@ -162,14 +162,13 @@ void tcp_vegas_cwnd_event(struct sock *sk, enum tcp_ca_event event)
 }
 EXPORT_SYMBOL_GPL(tcp_vegas_cwnd_event);
 
-static void tcp_vegas_cong_avoid(struct sock *sk, u32 ack,
-				 u32 in_flight, int flag)
+static void tcp_vegas_cong_avoid(struct sock *sk, u32 ack, u32 in_flight)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
 	struct vegas *vegas = inet_csk_ca(sk);
 
 	if (!vegas->doing_vegas_now)
-		return tcp_reno_cong_avoid(sk, ack, in_flight, flag);
+		return tcp_reno_cong_avoid(sk, ack, in_flight);
 
 	/* The key players are v_beg_snd_una and v_beg_snd_nxt.
 	 *
@@ -228,7 +227,7 @@ static void tcp_vegas_cong_avoid(struct sock *sk, u32 ack,
 			/* We don't have enough RTT samples to do the Vegas
 			 * calculation, so we'll behave like Reno.
 			 */
-			tcp_reno_cong_avoid(sk, ack, in_flight, flag);
+			tcp_reno_cong_avoid(sk, ack, in_flight);
 		} else {
 			u32 rtt, target_cwnd, diff;
 
diff --git a/net/ipv4/tcp_veno.c b/net/ipv4/tcp_veno.c
index 8fb2aee0b1a..d16689e9851 100644
--- a/net/ipv4/tcp_veno.c
+++ b/net/ipv4/tcp_veno.c
@@ -114,14 +114,13 @@ static void tcp_veno_cwnd_event(struct sock *sk, enum tcp_ca_event event)
 		tcp_veno_init(sk);
 }
 
-static void tcp_veno_cong_avoid(struct sock *sk, u32 ack,
-				u32 in_flight, int flag)
+static void tcp_veno_cong_avoid(struct sock *sk, u32 ack, u32 in_flight)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
 	struct veno *veno = inet_csk_ca(sk);
 
 	if (!veno->doing_veno_now)
-		return tcp_reno_cong_avoid(sk, ack, in_flight, flag);
+		return tcp_reno_cong_avoid(sk, ack, in_flight);
 
 	/* limited by applications */
 	if (!tcp_is_cwnd_limited(sk, in_flight))
@@ -132,7 +131,7 @@ static void tcp_veno_cong_avoid(struct sock *sk, u32 ack,
 		/* We don't have enough rtt samples to do the Veno
 		 * calculation, so we'll behave like Reno.
 		 */
-		tcp_reno_cong_avoid(sk, ack, in_flight, flag);
+		tcp_reno_cong_avoid(sk, ack, in_flight);
 	} else {
 		u32 rtt, target_cwnd;
 
diff --git a/net/ipv4/tcp_yeah.c b/net/ipv4/tcp_yeah.c
index c107fba7430..e03b10183a8 100644
--- a/net/ipv4/tcp_yeah.c
+++ b/net/ipv4/tcp_yeah.c
@@ -69,8 +69,7 @@ static void tcp_yeah_pkts_acked(struct sock *sk, u32 pkts_acked, s32 rtt_us)
 	tcp_vegas_pkts_acked(sk, pkts_acked, rtt_us);
 }
 
-static void tcp_yeah_cong_avoid(struct sock *sk, u32 ack,
-				u32 in_flight, int flag)
+static void tcp_yeah_cong_avoid(struct sock *sk, u32 ack, u32 in_flight)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
 	struct yeah *yeah = inet_csk_ca(sk);
-- 
cgit v1.2.3


From 50c4817e9919132639be0adc387b509e04a9ed0a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= <ilpo.jarvinen@helsinki.fi>
Date: Sun, 2 Dec 2007 00:48:00 +0200
Subject: [TCP]: MTUprobe: prepare skb fields earlier
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

They better be valid when call to write_queue functions is made
once things that follow are going in.

Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@helsinki.fi>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp_output.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 98835afb8d8..c2aa4688dae 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -1342,7 +1342,6 @@ static int tcp_mtu_probe(struct sock *sk)
 	sk_charge_skb(sk, nskb);
 
 	skb = tcp_send_head(sk);
-	tcp_insert_write_queue_before(nskb, skb, sk);
 
 	TCP_SKB_CB(nskb)->seq = TCP_SKB_CB(skb)->seq;
 	TCP_SKB_CB(nskb)->end_seq = TCP_SKB_CB(skb)->seq + probe_size;
@@ -1351,6 +1350,8 @@ static int tcp_mtu_probe(struct sock *sk)
 	nskb->csum = 0;
 	nskb->ip_summed = skb->ip_summed;
 
+	tcp_insert_write_queue_before(nskb, skb, sk);
+
 	len = 0;
 	while (len < probe_size) {
 		next = tcp_write_queue_next(sk, skb);
-- 
cgit v1.2.3


From d67c58e9ae80ea577785111534e49d3ca757ec50 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= <ilpo.jarvinen@helsinki.fi>
Date: Sun, 2 Dec 2007 00:48:01 +0200
Subject: [TCP]: Remove local variable and use packets_in_flight directly
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Lines won't be that long and it's compiler's job to optimize
them.

Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@helsinki.fi>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp_output.c | 9 +++------
 1 file changed, 3 insertions(+), 6 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index c2aa4688dae..b41176f380d 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -1293,7 +1293,6 @@ static int tcp_mtu_probe(struct sock *sk)
 	int len;
 	int probe_size;
 	int size_needed;
-	unsigned int pif;
 	int copy;
 	int mss_now;
 
@@ -1326,11 +1325,9 @@ static int tcp_mtu_probe(struct sock *sk)
 	if (after(tp->snd_nxt + size_needed, tp->snd_una + tp->snd_wnd))
 		return 0;
 
-	/* Do we need to wait to drain cwnd? */
-	pif = tcp_packets_in_flight(tp);
-	if (pif + 2 > tp->snd_cwnd) {
-		/* With no packets in flight, don't stall. */
-		if (pif == 0)
+	/* Do we need to wait to drain cwnd? With none in flight, don't stall */
+	if (tcp_packets_in_flight(tp) + 2 > tp->snd_cwnd) {
+		if (!tcp_packets_in_flight(tp))
 			return -1;
 		else
 			return 0;
-- 
cgit v1.2.3


From 234b68607006f3721679e900809ccb99e8bfb10c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= <ilpo.jarvinen@helsinki.fi>
Date: Sun, 2 Dec 2007 00:48:02 +0200
Subject: [TCP]: Add tcp_for_write_queue_from_safe and use it in mtu_probe
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@helsinki.fi>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp_output.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index b41176f380d..7d8583a15d0 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -1350,9 +1350,7 @@ static int tcp_mtu_probe(struct sock *sk)
 	tcp_insert_write_queue_before(nskb, skb, sk);
 
 	len = 0;
-	while (len < probe_size) {
-		next = tcp_write_queue_next(sk, skb);
-
+	tcp_for_write_queue_from_safe(skb, next, sk) {
 		copy = min_t(int, skb->len, probe_size - len);
 		if (nskb->ip_summed)
 			skb_copy_bits(skb, 0, skb_put(nskb, copy), copy);
@@ -1381,7 +1379,9 @@ static int tcp_mtu_probe(struct sock *sk)
 		}
 
 		len += copy;
-		skb = next;
+
+		if (len >= probe_size)
+			break;
 	}
 	tcp_init_tso_segs(sk, nskb, nskb->len);
 
-- 
cgit v1.2.3


From 89d478f7f2160780be9d7c9c6bac9bd350d9ed96 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= <ilpo.jarvinen@helsinki.fi>
Date: Sun, 30 Dec 2007 04:35:27 -0800
Subject: [TCP]: Remove duplicated code block from clean_rtx_queue
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@helsinki.fi>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp_input.c | 50 +++++++++++++++++++++-----------------------------
 1 file changed, 21 insertions(+), 29 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index cb441188870..6bc594a63c9 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -2801,42 +2801,34 @@ static int tcp_clean_rtx_queue(struct sock *sk, s32 *seq_rtt_p,
 			tcp_mtup_probe_success(sk, skb);
 		}
 
-		if (sacked) {
-			if (sacked & TCPCB_RETRANS) {
-				if (sacked & TCPCB_SACKED_RETRANS)
-					tp->retrans_out -= packets_acked;
-				flag |= FLAG_RETRANS_DATA_ACKED;
-				ca_seq_rtt = -1;
-				seq_rtt = -1;
-				if ((flag & FLAG_DATA_ACKED) ||
-				    (packets_acked > 1))
-					flag |= FLAG_NONHEAD_RETRANS_ACKED;
-			} else {
-				ca_seq_rtt = now - scb->when;
-				last_ackt = skb->tstamp;
-				if (seq_rtt < 0) {
-					seq_rtt = ca_seq_rtt;
-				}
-				if (!(sacked & TCPCB_SACKED_ACKED))
-					reord = min(cnt, reord);
-			}
-
-			if (sacked & TCPCB_SACKED_ACKED)
-				tp->sacked_out -= packets_acked;
-			if (sacked & TCPCB_LOST)
-				tp->lost_out -= packets_acked;
-
-			if ((sacked & TCPCB_URG) && tp->urg_mode &&
-			    !before(end_seq, tp->snd_up))
-				tp->urg_mode = 0;
+		if (sacked & TCPCB_RETRANS) {
+			if (sacked & TCPCB_SACKED_RETRANS)
+				tp->retrans_out -= packets_acked;
+			flag |= FLAG_RETRANS_DATA_ACKED;
+			ca_seq_rtt = -1;
+			seq_rtt = -1;
+			if ((flag & FLAG_DATA_ACKED) ||
+			    (packets_acked > 1))
+				flag |= FLAG_NONHEAD_RETRANS_ACKED;
 		} else {
 			ca_seq_rtt = now - scb->when;
 			last_ackt = skb->tstamp;
 			if (seq_rtt < 0) {
 				seq_rtt = ca_seq_rtt;
 			}
-			reord = min(cnt, reord);
+			if (!(sacked & TCPCB_SACKED_ACKED))
+				reord = min(cnt, reord);
 		}
+
+		if (sacked & TCPCB_SACKED_ACKED)
+			tp->sacked_out -= packets_acked;
+		if (sacked & TCPCB_LOST)
+			tp->lost_out -= packets_acked;
+
+		if ((sacked & TCPCB_URG) && tp->urg_mode &&
+		    !before(end_seq, tp->snd_up))
+			tp->urg_mode = 0;
+
 		tp->packets_out -= packets_acked;
 		cnt += packets_acked;
 
-- 
cgit v1.2.3


From ea60658cde9f4df7d05b95c81a72a95ad07f0701 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= <ilpo.jarvinen@helsinki.fi>
Date: Sun, 2 Dec 2007 00:48:04 +0200
Subject: [TCP]: Add unlikely() to urgent handling in clean_rtx_queue
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@helsinki.fi>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp_input.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 6bc594a63c9..8e4d74ee31e 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -2825,8 +2825,8 @@ static int tcp_clean_rtx_queue(struct sock *sk, s32 *seq_rtt_p,
 		if (sacked & TCPCB_LOST)
 			tp->lost_out -= packets_acked;
 
-		if ((sacked & TCPCB_URG) && tp->urg_mode &&
-		    !before(end_seq, tp->snd_up))
+		if (unlikely((sacked & TCPCB_URG) && tp->urg_mode &&
+			     !before(end_seq, tp->snd_up)))
 			tp->urg_mode = 0;
 
 		tp->packets_out -= packets_acked;
-- 
cgit v1.2.3


From 7201883599ac8bff76300117155e299b1a54092f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= <ilpo.jarvinen@helsinki.fi>
Date: Sun, 30 Dec 2007 04:37:55 -0800
Subject: [TCP]: Cleanup local variables of clean_rtx_queue
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@helsinki.fi>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp_input.c | 26 ++++++++++++--------------
 1 file changed, 12 insertions(+), 14 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 8e4d74ee31e..263c536def5 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -2765,8 +2765,7 @@ static int tcp_clean_rtx_queue(struct sock *sk, s32 *seq_rtt_p,
 	u32 now = tcp_time_stamp;
 	int fully_acked = 1;
 	int flag = 0;
-	int prior_packets = tp->packets_out;
-	u32 cnt = 0;
+	u32 pkts_acked = 0;
 	u32 reord = tp->packets_out;
 	s32 seq_rtt = -1;
 	s32 ca_seq_rtt = -1;
@@ -2775,7 +2774,7 @@ static int tcp_clean_rtx_queue(struct sock *sk, s32 *seq_rtt_p,
 	while ((skb = tcp_write_queue_head(sk)) && skb != tcp_send_head(sk)) {
 		struct tcp_skb_cb *scb = TCP_SKB_CB(skb);
 		u32 end_seq;
-		u32 packets_acked;
+		u32 acked_pcount;
 		u8 sacked = scb->sacked;
 
 		/* Determine how many packets and what bytes were acked, tso and else */
@@ -2784,14 +2783,14 @@ static int tcp_clean_rtx_queue(struct sock *sk, s32 *seq_rtt_p,
 			    !after(tp->snd_una, scb->seq))
 				break;
 
-			packets_acked = tcp_tso_acked(sk, skb);
-			if (!packets_acked)
+			acked_pcount = tcp_tso_acked(sk, skb);
+			if (!acked_pcount)
 				break;
 
 			fully_acked = 0;
 			end_seq = tp->snd_una;
 		} else {
-			packets_acked = tcp_skb_pcount(skb);
+			acked_pcount = tcp_skb_pcount(skb);
 			end_seq = scb->end_seq;
 		}
 
@@ -2803,12 +2802,12 @@ static int tcp_clean_rtx_queue(struct sock *sk, s32 *seq_rtt_p,
 
 		if (sacked & TCPCB_RETRANS) {
 			if (sacked & TCPCB_SACKED_RETRANS)
-				tp->retrans_out -= packets_acked;
+				tp->retrans_out -= acked_pcount;
 			flag |= FLAG_RETRANS_DATA_ACKED;
 			ca_seq_rtt = -1;
 			seq_rtt = -1;
 			if ((flag & FLAG_DATA_ACKED) ||
-			    (packets_acked > 1))
+			    (acked_pcount > 1))
 				flag |= FLAG_NONHEAD_RETRANS_ACKED;
 		} else {
 			ca_seq_rtt = now - scb->when;
@@ -2817,20 +2816,20 @@ static int tcp_clean_rtx_queue(struct sock *sk, s32 *seq_rtt_p,
 				seq_rtt = ca_seq_rtt;
 			}
 			if (!(sacked & TCPCB_SACKED_ACKED))
-				reord = min(cnt, reord);
+				reord = min(pkts_acked, reord);
 		}
 
 		if (sacked & TCPCB_SACKED_ACKED)
-			tp->sacked_out -= packets_acked;
+			tp->sacked_out -= acked_pcount;
 		if (sacked & TCPCB_LOST)
-			tp->lost_out -= packets_acked;
+			tp->lost_out -= acked_pcount;
 
 		if (unlikely((sacked & TCPCB_URG) && tp->urg_mode &&
 			     !before(end_seq, tp->snd_up)))
 			tp->urg_mode = 0;
 
-		tp->packets_out -= packets_acked;
-		cnt += packets_acked;
+		tp->packets_out -= acked_pcount;
+		pkts_acked += acked_pcount;
 
 		/* Initial outgoing SYN's get put onto the write_queue
 		 * just like anything else we transmit.  It is not
@@ -2855,7 +2854,6 @@ static int tcp_clean_rtx_queue(struct sock *sk, s32 *seq_rtt_p,
 	}
 
 	if (flag & FLAG_ACKED) {
-		u32 pkts_acked = prior_packets - tp->packets_out;
 		const struct tcp_congestion_ops *ca_ops
 			= inet_csk(sk)->icsk_ca_ops;
 
-- 
cgit v1.2.3


From 6859d49475d4f32abe640372117e4b687906e6b6 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= <ilpo.jarvinen@helsinki.fi>
Date: Sun, 2 Dec 2007 00:48:06 +0200
Subject: [TCP]: Abstract tp->highest_sack accessing & point to next skb
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Pointing to the next skb is necessary to avoid referencing
already SACKed skbs which will soon be on a separate list.

Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@helsinki.fi>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp_input.c  | 27 +++++++++++++++------------
 net/ipv4/tcp_output.c | 11 +++--------
 2 files changed, 18 insertions(+), 20 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 263c536def5..bc2d5f70966 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -1125,7 +1125,7 @@ static void tcp_mark_lost_retrans(struct sock *sk)
 	struct sk_buff *skb;
 	int cnt = 0;
 	u32 new_low_seq = tp->snd_nxt;
-	u32 received_upto = TCP_SKB_CB(tp->highest_sack)->end_seq;
+	u32 received_upto = tcp_highest_sack_seq(tp);
 
 	if (!tcp_is_fack(tp) || !tp->retrans_out ||
 	    !after(received_upto, tp->lost_retrans_low) ||
@@ -1236,9 +1236,10 @@ static int tcp_match_skb_to_sack(struct sock *sk, struct sk_buff *skb,
 	return in_sack;
 }
 
-static int tcp_sacktag_one(struct sk_buff *skb, struct tcp_sock *tp,
+static int tcp_sacktag_one(struct sk_buff *skb, struct sock *sk,
 			   int *reord, int dup_sack, int fack_count)
 {
+	struct tcp_sock *tp = tcp_sk(sk);
 	u8 sacked = TCP_SKB_CB(skb)->sacked;
 	int flag = 0;
 
@@ -1307,8 +1308,8 @@ static int tcp_sacktag_one(struct sk_buff *skb, struct tcp_sock *tp,
 		if (fack_count > tp->fackets_out)
 			tp->fackets_out = fack_count;
 
-		if (after(TCP_SKB_CB(skb)->seq, tcp_highest_sack_seq(tp)))
-			tp->highest_sack = skb;
+		if (!before(TCP_SKB_CB(skb)->seq, tcp_highest_sack_seq(tp)))
+			tcp_advance_highest_sack(sk, skb);
 	}
 
 	/* D-SACK. We can detect redundant retransmission in S|R and plain R
@@ -1330,8 +1331,6 @@ static struct sk_buff *tcp_sacktag_walk(struct sk_buff *skb, struct sock *sk,
 					int dup_sack_in, int *fack_count,
 					int *reord, int *flag)
 {
-	struct tcp_sock *tp = tcp_sk(sk);
-
 	tcp_for_write_queue_from(skb, sk) {
 		int in_sack = 0;
 		int dup_sack = dup_sack_in;
@@ -1358,7 +1357,7 @@ static struct sk_buff *tcp_sacktag_walk(struct sk_buff *skb, struct sock *sk,
 			break;
 
 		if (in_sack)
-			*flag |= tcp_sacktag_one(skb, tp, reord, dup_sack, *fack_count);
+			*flag |= tcp_sacktag_one(skb, sk, reord, dup_sack, *fack_count);
 
 		*fack_count += tcp_skb_pcount(skb);
 	}
@@ -1429,7 +1428,7 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_
 	if (!tp->sacked_out) {
 		if (WARN_ON(tp->fackets_out))
 			tp->fackets_out = 0;
-		tp->highest_sack = tcp_write_queue_head(sk);
+		tcp_highest_sack_reset(sk);
 	}
 
 	found_dup_sack = tcp_check_dsack(tp, ack_skb, sp_wire,
@@ -1552,9 +1551,11 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_
 						       &fack_count, &reord, &flag);
 
 			/* ...tail remains todo... */
-			if (TCP_SKB_CB(tp->highest_sack)->end_seq == cache->end_seq) {
+			if (tcp_highest_sack_seq(tp) == cache->end_seq) {
 				/* ...but better entrypoint exists! */
-				skb = tcp_write_queue_next(sk, tp->highest_sack);
+				skb = tcp_highest_sack(sk);
+				if (skb == NULL)
+					break;
 				fack_count = tp->fackets_out;
 				cache++;
 				goto walk;
@@ -1566,8 +1567,10 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_
 			continue;
 		}
 
-		if (tp->sacked_out && !before(start_seq, tcp_highest_sack_seq(tp))) {
-			skb = tcp_write_queue_next(sk, tp->highest_sack);
+		if (!before(start_seq, tcp_highest_sack_seq(tp))) {
+			skb = tcp_highest_sack(sk);
+			if (skb == NULL)
+				break;
 			fack_count = tp->fackets_out;
 		}
 		skb = tcp_sacktag_skip(skb, sk, start_seq);
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 7d8583a15d0..9a985b55e7d 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -667,7 +667,7 @@ static void tcp_adjust_fackets_out(struct sock *sk, struct sk_buff *skb,
 	if (!tp->sacked_out || tcp_is_reno(tp))
 		return;
 
-	if (!before(tcp_highest_sack_seq(tp), TCP_SKB_CB(skb)->seq))
+	if (after(tcp_highest_sack_seq(tp), TCP_SKB_CB(skb)->seq))
 		tp->fackets_out -= decr;
 }
 
@@ -711,9 +711,6 @@ int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len, unsigned int mss
 	TCP_SKB_CB(buff)->end_seq = TCP_SKB_CB(skb)->end_seq;
 	TCP_SKB_CB(skb)->end_seq = TCP_SKB_CB(buff)->seq;
 
-	if (tcp_is_sack(tp) && tp->sacked_out && (skb == tp->highest_sack))
-		tp->highest_sack = buff;
-
 	/* PSH and FIN should only be set in the second packet. */
 	flags = TCP_SKB_CB(skb)->flags;
 	TCP_SKB_CB(skb)->flags = flags & ~(TCPCB_FLAG_FIN|TCPCB_FLAG_PSH);
@@ -1707,9 +1704,7 @@ static void tcp_retrans_try_collapse(struct sock *sk, struct sk_buff *skb, int m
 		BUG_ON(tcp_skb_pcount(skb) != 1 ||
 		       tcp_skb_pcount(next_skb) != 1);
 
-		if (WARN_ON(tcp_is_sack(tp) && tp->sacked_out &&
-		    (next_skb == tp->highest_sack)))
-			return;
+		tcp_highest_sack_combine(sk, next_skb, skb);
 
 		/* Ok.	We will be able to collapse the packet. */
 		tcp_unlink_write_queue(next_skb, sk);
@@ -2019,7 +2014,7 @@ void tcp_xmit_retransmit_queue(struct sock *sk)
 			break;
 		tp->forward_skb_hint = skb;
 
-		if (after(TCP_SKB_CB(skb)->seq, tcp_highest_sack_seq(tp)))
+		if (!before(TCP_SKB_CB(skb)->seq, tcp_highest_sack_seq(tp)))
 			break;
 
 		if (tcp_packets_in_flight(tp) >= tp->snd_cwnd)
-- 
cgit v1.2.3


From cb75994ec311b2cd50e5205efdcc0696abd6675d Mon Sep 17 00:00:00 2001
From: Wang Chen <wangchen@cn.fujitsu.com>
Date: Mon, 3 Dec 2007 22:33:28 +1100
Subject: [UDP]: Defer InDataGrams increment until recvmsg() does checksum

Thanks dave, herbert, gerrit, andi and other people for your
discussion about this problem.

UdpInDatagrams can be confusing because it counts packets that
might be dropped later.
Move UdpInDatagrams into recvmsg() as allowed by the RFC.

Signed-off-by: Wang Chen <wangchen@cn.fujitsu.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/udp.c | 7 +++----
 net/ipv6/udp.c | 4 +++-
 2 files changed, 6 insertions(+), 5 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 03c400ca14c..3465d4ad301 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -873,6 +873,8 @@ try_again:
 	if (err)
 		goto out_free;
 
+	UDP_INC_STATS_USER(UDP_MIB_INDATAGRAMS, is_udplite);
+
 	sock_recv_timestamp(msg, sk, skb);
 
 	/* Copy the address. */
@@ -966,10 +968,8 @@ int udp_queue_rcv_skb(struct sock * sk, struct sk_buff *skb)
 			int ret;
 
 			ret = (*up->encap_rcv)(sk, skb);
-			if (ret <= 0) {
-				UDP_INC_STATS_BH(UDP_MIB_INDATAGRAMS, up->pcflag);
+			if (ret <= 0)
 				return -ret;
-			}
 		}
 
 		/* FALLTHROUGH -- it's a UDP Packet */
@@ -1023,7 +1023,6 @@ int udp_queue_rcv_skb(struct sock * sk, struct sk_buff *skb)
 		goto drop;
 	}
 
-	UDP_INC_STATS_BH(UDP_MIB_INDATAGRAMS, up->pcflag);
 	return 0;
 
 drop:
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index ee1cc3f8599..b0474a618bb 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -164,6 +164,8 @@ try_again:
 	if (err)
 		goto out_free;
 
+	UDP6_INC_STATS_USER(UDP_MIB_INDATAGRAMS, is_udplite);
+
 	sock_recv_timestamp(msg, sk, skb);
 
 	/* Copy the address. */
@@ -292,7 +294,7 @@ int udpv6_queue_rcv_skb(struct sock * sk, struct sk_buff *skb)
 			UDP6_INC_STATS_BH(UDP_MIB_RCVBUFERRORS, up->pcflag);
 		goto drop;
 	}
-	UDP6_INC_STATS_BH(UDP_MIB_INDATAGRAMS, up->pcflag);
+
 	return 0;
 drop:
 	UDP6_INC_STATS_BH(UDP_MIB_INERRORS, up->pcflag);
-- 
cgit v1.2.3


From b2bf1e2659b1cba5e65f81781cfd530be447f80b Mon Sep 17 00:00:00 2001
From: Wang Chen <wangchen@cn.fujitsu.com>
Date: Mon, 3 Dec 2007 22:34:16 +1100
Subject: [UDP]: Clean up for IS_UDPLITE macro

Since we have macro IS_UDPLITE, we can use it.

Signed-off-by: Wang Chen <wangchen@cn.fujitsu.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/udp.c | 19 +++++++++++--------
 net/ipv6/udp.c | 14 ++++++++------
 2 files changed, 19 insertions(+), 14 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 3465d4ad301..5e67e324529 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -471,6 +471,7 @@ static int udp_push_pending_frames(struct sock *sk)
 	struct sk_buff *skb;
 	struct udphdr *uh;
 	int err = 0;
+	int is_udplite = IS_UDPLITE(sk);
 	__wsum csum = 0;
 
 	/* Grab the skbuff where UDP header space exists. */
@@ -486,7 +487,7 @@ static int udp_push_pending_frames(struct sock *sk)
 	uh->len = htons(up->len);
 	uh->check = 0;
 
-	if (up->pcflag)  				 /*     UDP-Lite      */
+	if (is_udplite)  				 /*     UDP-Lite      */
 		csum  = udplite_csum_outgoing(sk, skb);
 
 	else if (sk->sk_no_check == UDP_CSUM_NOXMIT) {   /* UDP csum disabled */
@@ -514,7 +515,7 @@ out:
 	up->len = 0;
 	up->pending = 0;
 	if (!err)
-		UDP_INC_STATS_USER(UDP_MIB_OUTDATAGRAMS, up->pcflag);
+		UDP_INC_STATS_USER(UDP_MIB_OUTDATAGRAMS, is_udplite);
 	return err;
 }
 
@@ -531,7 +532,7 @@ int udp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
 	__be32 daddr, faddr, saddr;
 	__be16 dport;
 	u8  tos;
-	int err, is_udplite = up->pcflag;
+	int err, is_udplite = IS_UDPLITE(sk);
 	int corkreq = up->corkflag || msg->msg_flags&MSG_MORE;
 	int (*getfrag)(void *, char *, int, int, int, struct sk_buff *);
 
@@ -942,6 +943,7 @@ int udp_queue_rcv_skb(struct sock * sk, struct sk_buff *skb)
 {
 	struct udp_sock *up = udp_sk(sk);
 	int rc;
+	int is_udplite = IS_UDPLITE(sk);
 
 	/*
 	 *	Charge it to the socket, dropping if the queue is full.
@@ -978,7 +980,7 @@ int udp_queue_rcv_skb(struct sock * sk, struct sk_buff *skb)
 	/*
 	 * 	UDP-Lite specific tests, ignored on UDP sockets
 	 */
-	if ((up->pcflag & UDPLITE_RECV_CC)  &&  UDP_SKB_CB(skb)->partial_cov) {
+	if ((is_udplite & UDPLITE_RECV_CC)  &&  UDP_SKB_CB(skb)->partial_cov) {
 
 		/*
 		 * MIB statistics other than incrementing the error count are
@@ -1019,14 +1021,14 @@ int udp_queue_rcv_skb(struct sock * sk, struct sk_buff *skb)
 	if ((rc = sock_queue_rcv_skb(sk,skb)) < 0) {
 		/* Note that an ENOMEM error is charged twice */
 		if (rc == -ENOMEM)
-			UDP_INC_STATS_BH(UDP_MIB_RCVBUFERRORS, up->pcflag);
+			UDP_INC_STATS_BH(UDP_MIB_RCVBUFERRORS, is_udplite);
 		goto drop;
 	}
 
 	return 0;
 
 drop:
-	UDP_INC_STATS_BH(UDP_MIB_INERRORS, up->pcflag);
+	UDP_INC_STATS_BH(UDP_MIB_INERRORS, is_udplite);
 	kfree_skb(skb);
 	return -1;
 }
@@ -1235,6 +1237,7 @@ int udp_lib_setsockopt(struct sock *sk, int level, int optname,
 	struct udp_sock *up = udp_sk(sk);
 	int val;
 	int err = 0;
+	int is_udplite = IS_UDPLITE(sk);
 
 	if (optlen<sizeof(int))
 		return -EINVAL;
@@ -1276,7 +1279,7 @@ int udp_lib_setsockopt(struct sock *sk, int level, int optname,
 	/* The sender sets actual checksum coverage length via this option.
 	 * The case coverage > packet length is handled by send module. */
 	case UDPLITE_SEND_CSCOV:
-		if (!up->pcflag)         /* Disable the option on UDP sockets */
+		if (!is_udplite)         /* Disable the option on UDP sockets */
 			return -ENOPROTOOPT;
 		if (val != 0 && val < 8) /* Illegal coverage: use default (8) */
 			val = 8;
@@ -1288,7 +1291,7 @@ int udp_lib_setsockopt(struct sock *sk, int level, int optname,
 	 * sense, this should be set to at least 8 (as done below). If zero is
 	 * used, this again means full checksum coverage.                     */
 	case UDPLITE_RECV_CSCOV:
-		if (!up->pcflag)         /* Disable the option on UDP sockets */
+		if (!is_udplite)         /* Disable the option on UDP sockets */
 			return -ENOPROTOOPT;
 		if (val != 0 && val < 8) /* Avoid silly minimal values.       */
 			val = 8;
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index b0474a618bb..77ab31b9923 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -260,6 +260,7 @@ int udpv6_queue_rcv_skb(struct sock * sk, struct sk_buff *skb)
 {
 	struct udp_sock *up = udp_sk(sk);
 	int rc;
+	int is_udplite = IS_UDPLITE(sk);
 
 	if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb))
 		goto drop;
@@ -267,7 +268,7 @@ int udpv6_queue_rcv_skb(struct sock * sk, struct sk_buff *skb)
 	/*
 	 * UDP-Lite specific tests, ignored on UDP sockets (see net/ipv4/udp.c).
 	 */
-	if ((up->pcflag & UDPLITE_RECV_CC)  &&  UDP_SKB_CB(skb)->partial_cov) {
+	if ((is_udplite & UDPLITE_RECV_CC)  &&  UDP_SKB_CB(skb)->partial_cov) {
 
 		if (up->pcrlen == 0) {          /* full coverage was set  */
 			LIMIT_NETDEBUG(KERN_WARNING "UDPLITE6: partial coverage"
@@ -291,13 +292,13 @@ int udpv6_queue_rcv_skb(struct sock * sk, struct sk_buff *skb)
 	if ((rc = sock_queue_rcv_skb(sk,skb)) < 0) {
 		/* Note that an ENOMEM error is charged twice */
 		if (rc == -ENOMEM)
-			UDP6_INC_STATS_BH(UDP_MIB_RCVBUFERRORS, up->pcflag);
+			UDP6_INC_STATS_BH(UDP_MIB_RCVBUFERRORS, is_udplite);
 		goto drop;
 	}
 
 	return 0;
 drop:
-	UDP6_INC_STATS_BH(UDP_MIB_INERRORS, up->pcflag);
+	UDP6_INC_STATS_BH(UDP_MIB_INERRORS, is_udplite);
 	kfree_skb(skb);
 	return -1;
 }
@@ -525,6 +526,7 @@ static int udp_v6_push_pending_frames(struct sock *sk)
 	struct inet_sock *inet = inet_sk(sk);
 	struct flowi *fl = &inet->cork.fl;
 	int err = 0;
+	int is_udplite = IS_UDPLITE(sk);
 	__wsum csum = 0;
 
 	/* Grab the skbuff where UDP header space exists. */
@@ -540,7 +542,7 @@ static int udp_v6_push_pending_frames(struct sock *sk)
 	uh->len = htons(up->len);
 	uh->check = 0;
 
-	if (up->pcflag)
+	if (is_udplite)
 		csum = udplite_csum_outgoing(sk, skb);
 	 else
 		csum = udp_csum_outgoing(sk, skb);
@@ -556,7 +558,7 @@ out:
 	up->len = 0;
 	up->pending = 0;
 	if (!err)
-		UDP6_INC_STATS_USER(UDP_MIB_OUTDATAGRAMS, up->pcflag);
+		UDP6_INC_STATS_USER(UDP_MIB_OUTDATAGRAMS, is_udplite);
 	return err;
 }
 
@@ -580,7 +582,7 @@ int udpv6_sendmsg(struct kiocb *iocb, struct sock *sk,
 	int corkreq = up->corkflag || msg->msg_flags&MSG_MORE;
 	int err;
 	int connected = 0;
-	int is_udplite = up->pcflag;
+	int is_udplite = IS_UDPLITE(sk);
 	int (*getfrag)(void *, char *, int, int, int, struct sk_buff *);
 
 	/* destination address check */
-- 
cgit v1.2.3


From bbca17680f82b1ba3122d41e8bc675aebf6d1cf2 Mon Sep 17 00:00:00 2001
From: Wang Chen <wangchen@cn.fujitsu.com>
Date: Mon, 3 Dec 2007 22:36:13 +1100
Subject: [UDP]: Counter increment should be in USER mode for recvmsg

System calls should be USER. So change the BH to USER for
UDP*_INC_STATS_BH().

Signed-off-by: Wang Chen <wangchen@cn.fujitsu.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/udp.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 5e67e324529..d0283b7fcec 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -899,7 +899,7 @@ out:
 	return err;
 
 csum_copy_err:
-	UDP_INC_STATS_BH(UDP_MIB_INERRORS, is_udplite);
+	UDP_INC_STATS_USER(UDP_MIB_INERRORS, is_udplite);
 
 	skb_kill_datagram(sk, skb, flags);
 
-- 
cgit v1.2.3


From 2fcb45b6b87914f072314e5b5d9c196f45984683 Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Mon, 3 Dec 2007 22:54:12 -0800
Subject: [IPSEC]: Use the correct family for input state lookup

When merging the input paths of IPsec I accidentally left a hard-coded
AF_INET for the state lookup call.  This broke IPv6 obviously.  This
patch fixes by getting the input callers to specify the family through
skb->cb.

Credit goes to Kazunori Miyazawa for diagnosing this and providing an
initial patch.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/xfrm4_input.c | 1 +
 net/ipv6/xfrm6_input.c | 1 +
 net/xfrm/xfrm_input.c  | 5 ++++-
 3 files changed, 6 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/ipv4/xfrm4_input.c b/net/ipv4/xfrm4_input.c
index 0c377a66b8b..33f990d56c9 100644
--- a/net/ipv4/xfrm4_input.c
+++ b/net/ipv4/xfrm4_input.c
@@ -39,6 +39,7 @@ drop:
 int xfrm4_rcv_encap(struct sk_buff *skb, int nexthdr, __be32 spi,
 		    int encap_type)
 {
+	XFRM_SPI_SKB_CB(skb)->family = AF_INET;
 	XFRM_SPI_SKB_CB(skb)->daddroff = offsetof(struct iphdr, daddr);
 	return xfrm_input(skb, nexthdr, spi, encap_type);
 }
diff --git a/net/ipv6/xfrm6_input.c b/net/ipv6/xfrm6_input.c
index e2c3efd2579..74f3aacebb5 100644
--- a/net/ipv6/xfrm6_input.c
+++ b/net/ipv6/xfrm6_input.c
@@ -23,6 +23,7 @@ int xfrm6_extract_input(struct xfrm_state *x, struct sk_buff *skb)
 
 int xfrm6_rcv_spi(struct sk_buff *skb, int nexthdr, __be32 spi)
 {
+	XFRM_SPI_SKB_CB(skb)->family = AF_INET6;
 	XFRM_SPI_SKB_CB(skb)->daddroff = offsetof(struct ipv6hdr, daddr);
 	return xfrm_input(skb, nexthdr, spi, 0);
 }
diff --git a/net/xfrm/xfrm_input.c b/net/xfrm/xfrm_input.c
index 96f42c1d2e8..8b2b1b59133 100644
--- a/net/xfrm/xfrm_input.c
+++ b/net/xfrm/xfrm_input.c
@@ -102,6 +102,7 @@ int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type)
 	__be32 seq;
 	struct xfrm_state *x;
 	xfrm_address_t *daddr;
+	unsigned int family;
 	int decaps = 0;
 	int async = 0;
 
@@ -127,6 +128,7 @@ int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type)
 
 	daddr = (xfrm_address_t *)(skb_network_header(skb) +
 				   XFRM_SPI_SKB_CB(skb)->daddroff);
+	family = XFRM_SPI_SKB_CB(skb)->family;
 
 	seq = 0;
 	if (!spi && (err = xfrm_parse_spi(skb, nexthdr, &spi, &seq)) != 0)
@@ -136,7 +138,7 @@ int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type)
 		if (skb->sp->len == XFRM_MAX_DEPTH)
 			goto drop;
 
-		x = xfrm_state_lookup(daddr, spi, nexthdr, AF_INET);
+		x = xfrm_state_lookup(daddr, spi, nexthdr, family);
 		if (x == NULL)
 			goto drop;
 
@@ -198,6 +200,7 @@ resume:
 		 * transport mode so the outer address is identical.
 		 */
 		daddr = &x->id.daddr;
+		family = x->outer_mode->afinfo->family;
 
 		err = xfrm_parse_spi(skb, nexthdr, &spi, &seq);
 		if (err < 0)
-- 
cgit v1.2.3


From 6ac552fdc6e96bf2f39c18d6e66b8c8080bbb06e Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Tue, 4 Dec 2007 00:19:38 -0800
Subject: [NETLINK]: af_netlink.c checkpatch cleanups

Fix large number of checkpatch errors.

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/netlink/af_netlink.c | 81 +++++++++++++++++++++++++-----------------------
 1 file changed, 43 insertions(+), 38 deletions(-)

(limited to 'net')

diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index 1518244ffad..2e02b19e455 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -156,7 +156,7 @@ static void netlink_sock_destruct(struct sock *sk)
 	skb_queue_purge(&sk->sk_receive_queue);
 
 	if (!sock_flag(sk, SOCK_DEAD)) {
-		printk("Freeing alive netlink socket %p\n", sk);
+		printk(KERN_ERR "Freeing alive netlink socket %p\n", sk);
 		return;
 	}
 	BUG_TRAP(!atomic_read(&sk->sk_rmem_alloc));
@@ -164,8 +164,8 @@ static void netlink_sock_destruct(struct sock *sk)
 	BUG_TRAP(!nlk_sk(sk)->groups);
 }
 
-/* This lock without WQ_FLAG_EXCLUSIVE is good on UP and it is _very_ bad on SMP.
- * Look, when several writers sleep and reader wakes them up, all but one
+/* This lock without WQ_FLAG_EXCLUSIVE is good on UP and it is _very_ bad on
+ * SMP. Look, when several writers sleep and reader wakes them up, all but one
  * immediately hit write lock and grab all the cpus. Exclusive sleep solves
  * this, _but_ remember, it adds useless work on UP machines.
  */
@@ -178,7 +178,7 @@ static void netlink_table_grab(void)
 		DECLARE_WAITQUEUE(wait, current);
 
 		add_wait_queue_exclusive(&nl_table_wait, &wait);
-		for(;;) {
+		for (;;) {
 			set_current_state(TASK_UNINTERRUPTIBLE);
 			if (atomic_read(&nl_table_users) == 0)
 				break;
@@ -192,13 +192,13 @@ static void netlink_table_grab(void)
 	}
 }
 
-static __inline__ void netlink_table_ungrab(void)
+static inline void netlink_table_ungrab(void)
 {
 	write_unlock_irq(&nl_table_lock);
 	wake_up(&nl_table_wait);
 }
 
-static __inline__ void
+static inline void
 netlink_lock_table(void)
 {
 	/* read_lock() synchronizes us to netlink_table_grab */
@@ -208,14 +208,15 @@ netlink_lock_table(void)
 	read_unlock(&nl_table_lock);
 }
 
-static __inline__ void
+static inline void
 netlink_unlock_table(void)
 {
 	if (atomic_dec_and_test(&nl_table_users))
 		wake_up(&nl_table_wait);
 }
 
-static __inline__ struct sock *netlink_lookup(struct net *net, int protocol, u32 pid)
+static inline struct sock *netlink_lookup(struct net *net, int protocol,
+					  u32 pid)
 {
 	struct nl_pid_hash *hash = &nl_table[protocol].hash;
 	struct hlist_head *head;
@@ -428,7 +429,7 @@ static int netlink_create(struct net *net, struct socket *sock, int protocol)
 	if (sock->type != SOCK_RAW && sock->type != SOCK_DGRAM)
 		return -ESOCKTNOSUPPORT;
 
-	if (protocol<0 || protocol >= MAX_LINKS)
+	if (protocol < 0 || protocol >= MAX_LINKS)
 		return -EPROTONOSUPPORT;
 
 	netlink_lock_table();
@@ -445,7 +446,8 @@ static int netlink_create(struct net *net, struct socket *sock, int protocol)
 	cb_mutex = nl_table[protocol].cb_mutex;
 	netlink_unlock_table();
 
-	if ((err = __netlink_create(net, sock, cb_mutex, protocol)) < 0)
+	err = __netlink_create(net, sock, cb_mutex, protocol);
+	if (err < 0)
 		goto out_module;
 
 	nlk = nlk_sk(sock->sk);
@@ -590,7 +592,7 @@ static int netlink_realloc_groups(struct sock *sk)
 		err = -ENOMEM;
 		goto out_unlock;
 	}
-	memset((char*)new_groups + NLGRPSZ(nlk->ngroups), 0,
+	memset((char *)new_groups + NLGRPSZ(nlk->ngroups), 0,
 	       NLGRPSZ(groups) - NLGRPSZ(nlk->ngroups));
 
 	nlk->groups = new_groups;
@@ -600,7 +602,8 @@ static int netlink_realloc_groups(struct sock *sk)
 	return err;
 }
 
-static int netlink_bind(struct socket *sock, struct sockaddr *addr, int addr_len)
+static int netlink_bind(struct socket *sock, struct sockaddr *addr,
+			int addr_len)
 {
 	struct sock *sk = sock->sk;
 	struct net *net = sk->sk_net;
@@ -651,7 +654,7 @@ static int netlink_connect(struct socket *sock, struct sockaddr *addr,
 	int err = 0;
 	struct sock *sk = sock->sk;
 	struct netlink_sock *nlk = nlk_sk(sk);
-	struct sockaddr_nl *nladdr=(struct sockaddr_nl*)addr;
+	struct sockaddr_nl *nladdr = (struct sockaddr_nl *)addr;
 
 	if (addr->sa_family == AF_UNSPEC) {
 		sk->sk_state	= NETLINK_UNCONNECTED;
@@ -678,11 +681,12 @@ static int netlink_connect(struct socket *sock, struct sockaddr *addr,
 	return err;
 }
 
-static int netlink_getname(struct socket *sock, struct sockaddr *addr, int *addr_len, int peer)
+static int netlink_getname(struct socket *sock, struct sockaddr *addr,
+			   int *addr_len, int peer)
 {
 	struct sock *sk = sock->sk;
 	struct netlink_sock *nlk = nlk_sk(sk);
-	struct sockaddr_nl *nladdr=(struct sockaddr_nl *)addr;
+	struct sockaddr_nl *nladdr = (struct sockaddr_nl *)addr;
 
 	nladdr->nl_family = AF_NETLINK;
 	nladdr->nl_pad = 0;
@@ -885,6 +889,7 @@ retry:
 
 	return netlink_sendskb(sk, skb);
 }
+EXPORT_SYMBOL(netlink_unicast);
 
 int netlink_has_listeners(struct sock *sk, unsigned int group)
 {
@@ -905,7 +910,8 @@ int netlink_has_listeners(struct sock *sk, unsigned int group)
 }
 EXPORT_SYMBOL_GPL(netlink_has_listeners);
 
-static __inline__ int netlink_broadcast_deliver(struct sock *sk, struct sk_buff *skb)
+static inline int netlink_broadcast_deliver(struct sock *sk,
+					    struct sk_buff *skb)
 {
 	struct netlink_sock *nlk = nlk_sk(sk);
 
@@ -1026,6 +1032,7 @@ int netlink_broadcast(struct sock *ssk, struct sk_buff *skb, u32 pid,
 		return -ENOBUFS;
 	return -ESRCH;
 }
+EXPORT_SYMBOL(netlink_broadcast);
 
 struct netlink_set_err_data {
 	struct sock *exclude_sk;
@@ -1182,7 +1189,7 @@ static int netlink_sendmsg(struct kiocb *kiocb, struct socket *sock,
 	struct sock_iocb *siocb = kiocb_to_siocb(kiocb);
 	struct sock *sk = sock->sk;
 	struct netlink_sock *nlk = nlk_sk(sk);
-	struct sockaddr_nl *addr=msg->msg_name;
+	struct sockaddr_nl *addr = msg->msg_name;
 	u32 dst_pid;
 	u32 dst_group;
 	struct sk_buff *skb;
@@ -1221,7 +1228,7 @@ static int netlink_sendmsg(struct kiocb *kiocb, struct socket *sock,
 		goto out;
 	err = -ENOBUFS;
 	skb = alloc_skb(len, GFP_KERNEL);
-	if (skb==NULL)
+	if (skb == NULL)
 		goto out;
 
 	NETLINK_CB(skb).pid	= nlk->pid;
@@ -1237,7 +1244,7 @@ static int netlink_sendmsg(struct kiocb *kiocb, struct socket *sock,
 	 */
 
 	err = -EFAULT;
-	if (memcpy_fromiovec(skb_put(skb,len), msg->msg_iov, len)) {
+	if (memcpy_fromiovec(skb_put(skb, len), msg->msg_iov, len)) {
 		kfree_skb(skb);
 		goto out;
 	}
@@ -1276,8 +1283,8 @@ static int netlink_recvmsg(struct kiocb *kiocb, struct socket *sock,
 
 	copied = 0;
 
-	skb = skb_recv_datagram(sk,flags,noblock,&err);
-	if (skb==NULL)
+	skb = skb_recv_datagram(sk, flags, noblock, &err);
+	if (skb == NULL)
 		goto out;
 
 	msg->msg_namelen = 0;
@@ -1292,7 +1299,7 @@ static int netlink_recvmsg(struct kiocb *kiocb, struct socket *sock,
 	err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copied);
 
 	if (msg->msg_name) {
-		struct sockaddr_nl *addr = (struct sockaddr_nl*)msg->msg_name;
+		struct sockaddr_nl *addr = (struct sockaddr_nl *)msg->msg_name;
 		addr->nl_family = AF_NETLINK;
 		addr->nl_pad    = 0;
 		addr->nl_pid	= NETLINK_CB(skb).pid;
@@ -1344,7 +1351,7 @@ netlink_kernel_create(struct net *net, int unit, unsigned int groups,
 
 	BUG_ON(!nl_table);
 
-	if (unit<0 || unit>=MAX_LINKS)
+	if (unit < 0 || unit >= MAX_LINKS)
 		return NULL;
 
 	if (sock_create_lite(PF_NETLINK, SOCK_DGRAM, unit, &sock))
@@ -1390,6 +1397,7 @@ out_sock_release:
 	sock_release(sock);
 	return NULL;
 }
+EXPORT_SYMBOL(netlink_kernel_create);
 
 /**
  * netlink_change_ngroups - change number of multicast groups
@@ -1461,6 +1469,7 @@ void netlink_set_nonroot(int protocol, unsigned int flags)
 	if ((unsigned int)protocol < MAX_LINKS)
 		nl_table[protocol].nl_nonroot = flags;
 }
+EXPORT_SYMBOL(netlink_set_nonroot);
 
 static void netlink_destroy_callback(struct netlink_callback *cb)
 {
@@ -1529,8 +1538,9 @@ errout:
 
 int netlink_dump_start(struct sock *ssk, struct sk_buff *skb,
 		       struct nlmsghdr *nlh,
-		       int (*dump)(struct sk_buff *skb, struct netlink_callback*),
-		       int (*done)(struct netlink_callback*))
+		       int (*dump)(struct sk_buff *skb,
+				   struct netlink_callback *),
+		       int (*done)(struct netlink_callback *))
 {
 	struct netlink_callback *cb;
 	struct sock *sk;
@@ -1571,6 +1581,7 @@ int netlink_dump_start(struct sock *ssk, struct sk_buff *skb,
 	 */
 	return -EINTR;
 }
+EXPORT_SYMBOL(netlink_dump_start);
 
 void netlink_ack(struct sk_buff *in_skb, struct nlmsghdr *nlh, int err)
 {
@@ -1605,6 +1616,7 @@ void netlink_ack(struct sk_buff *in_skb, struct nlmsghdr *nlh, int err)
 	memcpy(&errmsg->msg, nlh, err ? nlh->nlmsg_len : sizeof(*nlh));
 	netlink_unicast(in_skb->sk, skb, NETLINK_CB(in_skb).pid, MSG_DONTWAIT);
 }
+EXPORT_SYMBOL(netlink_ack);
 
 int netlink_rcv_skb(struct sk_buff *skb, int (*cb)(struct sk_buff *,
 						     struct nlmsghdr *))
@@ -1638,7 +1650,7 @@ ack:
 			netlink_ack(skb, nlh, err);
 
 skip:
-	        msglen = NLMSG_ALIGN(nlh->nlmsg_len);
+		msglen = NLMSG_ALIGN(nlh->nlmsg_len);
 		if (msglen > skb->len)
 			msglen = skb->len;
 		skb_pull(skb, msglen);
@@ -1646,6 +1658,7 @@ skip:
 
 	return 0;
 }
+EXPORT_SYMBOL(netlink_rcv_skb);
 
 /**
  * nlmsg_notify - send a notification netlink message
@@ -1678,6 +1691,7 @@ int nlmsg_notify(struct sock *sk, struct sk_buff *skb, u32 pid,
 
 	return err;
 }
+EXPORT_SYMBOL(nlmsg_notify);
 
 #ifdef CONFIG_PROC_FS
 struct nl_seq_iter {
@@ -1694,7 +1708,7 @@ static struct sock *netlink_seq_socket_idx(struct seq_file *seq, loff_t pos)
 	struct hlist_node *node;
 	loff_t off = 0;
 
-	for (i=0; i<MAX_LINKS; i++) {
+	for (i = 0; i < MAX_LINKS; i++) {
 		struct nl_pid_hash *hash = &nl_table[i].hash;
 
 		for (j = 0; j <= hash->mask; j++) {
@@ -1820,11 +1834,13 @@ int netlink_register_notifier(struct notifier_block *nb)
 {
 	return atomic_notifier_chain_register(&netlink_chain, nb);
 }
+EXPORT_SYMBOL(netlink_register_notifier);
 
 int netlink_unregister_notifier(struct notifier_block *nb)
 {
 	return atomic_notifier_chain_unregister(&netlink_chain, nb);
 }
+EXPORT_SYMBOL(netlink_unregister_notifier);
 
 static const struct proto_ops netlink_ops = {
 	.family =	PF_NETLINK,
@@ -1929,14 +1945,3 @@ panic:
 }
 
 core_initcall(netlink_proto_init);
-
-EXPORT_SYMBOL(netlink_ack);
-EXPORT_SYMBOL(netlink_rcv_skb);
-EXPORT_SYMBOL(netlink_broadcast);
-EXPORT_SYMBOL(netlink_dump_start);
-EXPORT_SYMBOL(netlink_kernel_create);
-EXPORT_SYMBOL(netlink_register_notifier);
-EXPORT_SYMBOL(netlink_set_nonroot);
-EXPORT_SYMBOL(netlink_unicast);
-EXPORT_SYMBOL(netlink_unregister_notifier);
-EXPORT_SYMBOL(nlmsg_notify);
-- 
cgit v1.2.3


From 338e8a79262c3709e314fbcc7ca193323e534934 Mon Sep 17 00:00:00 2001
From: Sven Schnelle <svens@bitebene.org>
Date: Tue, 4 Dec 2007 23:21:50 -0800
Subject: [NETFILTER]: x_tables: add TCPOPTSTRIP target

Signed-off-by: Sven Schnelle <svens@bitebene.org>
Signed-off-by: Jan Engelhardt <jengelh@gmx.de>
Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/netfilter/Kconfig          |   8 +++
 net/netfilter/Makefile         |   1 +
 net/netfilter/xt_TCPOPTSTRIP.c | 147 +++++++++++++++++++++++++++++++++++++++++
 3 files changed, 156 insertions(+)
 create mode 100644 net/netfilter/xt_TCPOPTSTRIP.c

(limited to 'net')

diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig
index 21a9fcc0379..693f861a03b 100644
--- a/net/netfilter/Kconfig
+++ b/net/netfilter/Kconfig
@@ -411,6 +411,14 @@ config NETFILTER_XT_TARGET_TCPMSS
 
 	  To compile it as a module, choose M here.  If unsure, say N.
 
+config NETFILTER_XT_TARGET_TCPOPTSTRIP
+	tristate '"TCPOPTSTRIP" target support (EXPERIMENTAL)'
+	depends on EXPERIMENTAL && NETFILTER_XTABLES
+	depends on IP_NF_MANGLE || IP6_NF_MANGLE
+	help
+	  This option adds a "TCPOPTSTRIP" target, which allows you to strip
+	  TCP options from TCP packets.
+
 config NETFILTER_XT_MATCH_COMMENT
 	tristate  '"comment" match support'
 	depends on NETFILTER_XTABLES
diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile
index ad0e36ebea3..7763dea17be 100644
--- a/net/netfilter/Makefile
+++ b/net/netfilter/Makefile
@@ -48,6 +48,7 @@ obj-$(CONFIG_NETFILTER_XT_TARGET_NFQUEUE) += xt_NFQUEUE.o
 obj-$(CONFIG_NETFILTER_XT_TARGET_NOTRACK) += xt_NOTRACK.o
 obj-$(CONFIG_NETFILTER_XT_TARGET_SECMARK) += xt_SECMARK.o
 obj-$(CONFIG_NETFILTER_XT_TARGET_TCPMSS) += xt_TCPMSS.o
+obj-$(CONFIG_NETFILTER_XT_TARGET_TCPOPTSTRIP) += xt_TCPOPTSTRIP.o
 obj-$(CONFIG_NETFILTER_XT_TARGET_TRACE) += xt_TRACE.o
 
 # matches
diff --git a/net/netfilter/xt_TCPOPTSTRIP.c b/net/netfilter/xt_TCPOPTSTRIP.c
new file mode 100644
index 00000000000..43d6ac224f5
--- /dev/null
+++ b/net/netfilter/xt_TCPOPTSTRIP.c
@@ -0,0 +1,147 @@
+/*
+ * A module for stripping a specific TCP option from TCP packets.
+ *
+ * Copyright (C) 2007 Sven Schnelle <svens@bitebene.org>
+ * Copyright © CC Computer Consultants GmbH, 2007
+ * Contact: Jan Engelhardt <jengelh@computergmbh.de>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <linux/ip.h>
+#include <linux/ipv6.h>
+#include <linux/tcp.h>
+#include <net/ipv6.h>
+#include <net/tcp.h>
+#include <linux/netfilter/x_tables.h>
+#include <linux/netfilter/xt_TCPOPTSTRIP.h>
+
+static inline unsigned int optlen(const u_int8_t *opt, unsigned int offset)
+{
+	/* Beware zero-length options: make finite progress */
+	if (opt[offset] <= TCPOPT_NOP || opt[offset+1] == 0)
+		return 1;
+	else
+		return opt[offset+1];
+}
+
+static unsigned int
+tcpoptstrip_mangle_packet(struct sk_buff *skb,
+			  const struct xt_tcpoptstrip_target_info *info,
+			  unsigned int tcphoff, unsigned int minlen)
+{
+	unsigned int optl, i, j;
+	struct tcphdr *tcph;
+	u_int16_t n, o;
+	u_int8_t *opt;
+
+	if (!skb_make_writable(skb, skb->len))
+		return NF_DROP;
+
+	tcph = (struct tcphdr *)(skb_network_header(skb) + tcphoff);
+	opt  = (u_int8_t *)tcph;
+
+	/*
+	 * Walk through all TCP options - if we find some option to remove,
+	 * set all octets to %TCPOPT_NOP and adjust checksum.
+	 */
+	for (i = sizeof(struct tcphdr); i < tcp_hdrlen(skb); i += optl) {
+		optl = optlen(opt, i);
+
+		if (i + optl > tcp_hdrlen(skb))
+			break;
+
+		if (!tcpoptstrip_test_bit(info->strip_bmap, opt[i]))
+			continue;
+
+		for (j = 0; j < optl; ++j) {
+			o = opt[i+j];
+			n = TCPOPT_NOP;
+			if ((i + j) % 2 == 0) {
+				o <<= 8;
+				n <<= 8;
+			}
+			inet_proto_csum_replace2(&tcph->check, skb, htons(o),
+						 htons(n), 0);
+		}
+		memset(opt + i, TCPOPT_NOP, optl);
+	}
+
+	return XT_CONTINUE;
+}
+
+static unsigned int
+tcpoptstrip_tg4(struct sk_buff *skb, const struct net_device *in,
+		const struct net_device *out, unsigned int hooknum,
+		const struct xt_target *target, const void *targinfo)
+{
+	return tcpoptstrip_mangle_packet(skb, targinfo, ip_hdrlen(skb),
+	       sizeof(struct iphdr) + sizeof(struct tcphdr));
+}
+
+#if defined(CONFIG_IP6_NF_MANGLE) || defined(CONFIG_IP6_NF_MANGLE_MODULE)
+static unsigned int
+tcpoptstrip_tg6(struct sk_buff *skb, const struct net_device *in,
+		const struct net_device *out, unsigned int hooknum,
+		const struct xt_target *target, const void *targinfo)
+{
+	struct ipv6hdr *ipv6h = ipv6_hdr(skb);
+	unsigned int tcphoff;
+	u_int8_t nexthdr;
+
+	nexthdr = ipv6h->nexthdr;
+	tcphoff = ipv6_skip_exthdr(skb, sizeof(*ipv6h), &nexthdr);
+	if (tcphoff < 0)
+		return NF_DROP;
+
+	return tcpoptstrip_mangle_packet(skb, targinfo, tcphoff,
+	       sizeof(*ipv6h) + sizeof(struct tcphdr));
+}
+#endif
+
+static struct xt_target tcpoptstrip_tg_reg[] __read_mostly = {
+	{
+		.name       = "TCPOPTSTRIP",
+		.family     = AF_INET,
+		.table      = "mangle",
+		.proto      = IPPROTO_TCP,
+		.target     = tcpoptstrip_tg4,
+		.targetsize = sizeof(struct xt_tcpoptstrip_target_info),
+		.me         = THIS_MODULE,
+	},
+#if defined(CONFIG_IP6_NF_MANGLE) || defined(CONFIG_IP6_NF_MANGLE_MODULE)
+	{
+		.name       = "TCPOPTSTRIP",
+		.family     = AF_INET6,
+		.table      = "mangle",
+		.proto      = IPPROTO_TCP,
+		.target     = tcpoptstrip_tg6,
+		.targetsize = sizeof(struct xt_tcpoptstrip_target_info),
+		.me         = THIS_MODULE,
+	},
+#endif
+};
+
+static int __init tcpoptstrip_tg_init(void)
+{
+	return xt_register_targets(tcpoptstrip_tg_reg,
+				   ARRAY_SIZE(tcpoptstrip_tg_reg));
+}
+
+static void __exit tcpoptstrip_tg_exit(void)
+{
+	xt_unregister_targets(tcpoptstrip_tg_reg,
+			      ARRAY_SIZE(tcpoptstrip_tg_reg));
+}
+
+module_init(tcpoptstrip_tg_init);
+module_exit(tcpoptstrip_tg_exit);
+MODULE_AUTHOR("Sven Schnelle <svens@bitebene.org>, Jan Engelhardt <jengelh@computergmbh.de>");
+MODULE_DESCRIPTION("netfilter \"TCPOPTSTRIP\" target module");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS("ipt_TCPOPTSTRIP");
+MODULE_ALIAS("ip6t_TCPOPTSTRIP");
-- 
cgit v1.2.3


From 4c610979576d8778c401a9b1d247ed14f6cee998 Mon Sep 17 00:00:00 2001
From: Li Zefan <lizf@cn.fujitsu.com>
Date: Tue, 4 Dec 2007 23:22:26 -0800
Subject: [NETFILTER]: replace list_for_each with list_for_each_entry

Signed-off-by: Li Zefan <lizf@cn.fujitsu.com>
Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/netfilter/ipt_CLUSTERIP.c | 6 ++----
 net/netfilter/core.c               | 8 ++++----
 2 files changed, 6 insertions(+), 8 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c b/net/ipv4/netfilter/ipt_CLUSTERIP.c
index 2f544dac72d..311361e1272 100644
--- a/net/ipv4/netfilter/ipt_CLUSTERIP.c
+++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c
@@ -109,11 +109,9 @@ clusterip_config_entry_put(struct clusterip_config *c)
 static struct clusterip_config *
 __clusterip_config_find(__be32 clusterip)
 {
-	struct list_head *pos;
+	struct clusterip_config *c;
 
-	list_for_each(pos, &clusterip_configs) {
-		struct clusterip_config *c = list_entry(pos,
-					struct clusterip_config, list);
+	list_for_each_entry(c, &clusterip_configs, list) {
 		if (c->clusterip == clusterip)
 			return c;
 	}
diff --git a/net/netfilter/core.c b/net/netfilter/core.c
index 631d2694831..e6d3a69b9e9 100644
--- a/net/netfilter/core.c
+++ b/net/netfilter/core.c
@@ -62,17 +62,17 @@ static DEFINE_MUTEX(nf_hook_mutex);
 
 int nf_register_hook(struct nf_hook_ops *reg)
 {
-	struct list_head *i;
+	struct nf_hook_ops *elem;
 	int err;
 
 	err = mutex_lock_interruptible(&nf_hook_mutex);
 	if (err < 0)
 		return err;
-	list_for_each(i, &nf_hooks[reg->pf][reg->hooknum]) {
-		if (reg->priority < ((struct nf_hook_ops *)i)->priority)
+	list_for_each_entry(elem, &nf_hooks[reg->pf][reg->hooknum], list) {
+		if (reg->priority < elem->priority)
 			break;
 	}
-	list_add_rcu(&reg->list, i->prev);
+	list_add_rcu(&reg->list, elem->list.prev);
 	mutex_unlock(&nf_hook_mutex);
 	return 0;
 }
-- 
cgit v1.2.3


From d3c5ee6d545b5372fd525ebe16988a5b6efeceb0 Mon Sep 17 00:00:00 2001
From: Jan Engelhardt <jengelh@computergmbh.de>
Date: Tue, 4 Dec 2007 23:24:03 -0800
Subject: [NETFILTER]: x_tables: consistent and unique symbol names

Give all Netfilter modules consistent and unique symbol names.

Signed-off-by: Jan Engelhardt <jengelh@computergmbh.de>
Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/netfilter/ipt_CLUSTERIP.c   | 41 +++++++--------
 net/ipv4/netfilter/ipt_ECN.c         | 35 ++++++-------
 net/ipv4/netfilter/ipt_LOG.c         | 36 ++++++-------
 net/ipv4/netfilter/ipt_MASQUERADE.c  | 35 ++++++-------
 net/ipv4/netfilter/ipt_NETMAP.c      | 35 ++++++-------
 net/ipv4/netfilter/ipt_REDIRECT.c    | 35 ++++++-------
 net/ipv4/netfilter/ipt_REJECT.c      | 37 +++++++-------
 net/ipv4/netfilter/ipt_SAME.c        | 40 +++++++--------
 net/ipv4/netfilter/ipt_TOS.c         | 35 ++++++-------
 net/ipv4/netfilter/ipt_TTL.c         | 34 ++++++-------
 net/ipv4/netfilter/ipt_ULOG.c        | 45 ++++++++---------
 net/ipv4/netfilter/ipt_addrtype.c    | 25 ++++-----
 net/ipv4/netfilter/ipt_ah.c          | 37 ++++++--------
 net/ipv4/netfilter/ipt_ecn.c         | 33 ++++++------
 net/ipv4/netfilter/ipt_iprange.c     | 26 +++++-----
 net/ipv4/netfilter/ipt_owner.c       | 38 ++++++--------
 net/ipv4/netfilter/ipt_recent.c      | 39 +++++++-------
 net/ipv4/netfilter/ipt_tos.c         | 27 ++++------
 net/ipv4/netfilter/ipt_ttl.c         | 24 ++++-----
 net/ipv6/netfilter/ip6t_HL.c         | 37 +++++++-------
 net/ipv6/netfilter/ip6t_LOG.c        | 36 ++++++-------
 net/ipv6/netfilter/ip6t_REJECT.c     | 37 +++++++-------
 net/ipv6/netfilter/ip6t_ah.c         | 37 ++++++--------
 net/ipv6/netfilter/ip6t_eui64.c      | 28 +++++------
 net/ipv6/netfilter/ip6t_frag.c       | 38 ++++++--------
 net/ipv6/netfilter/ip6t_hbh.c        | 42 +++++++---------
 net/ipv6/netfilter/ip6t_hl.c         | 24 ++++-----
 net/ipv6/netfilter/ip6t_ipv6header.c | 38 ++++++--------
 net/ipv6/netfilter/ip6t_mh.c         | 37 ++++++--------
 net/ipv6/netfilter/ip6t_owner.c      | 38 ++++++--------
 net/ipv6/netfilter/ip6t_rt.c         | 37 ++++++--------
 net/netfilter/xt_CLASSIFY.c          | 30 +++++------
 net/netfilter/xt_CONNMARK.c          | 56 +++++++++------------
 net/netfilter/xt_CONNSECMARK.c       | 47 ++++++++---------
 net/netfilter/xt_DSCP.c              | 51 +++++++++----------
 net/netfilter/xt_MARK.c              | 69 +++++++++++--------------
 net/netfilter/xt_NFLOG.c             | 36 +++++++------
 net/netfilter/xt_NFQUEUE.c           | 30 +++++------
 net/netfilter/xt_NOTRACK.c           | 28 +++++------
 net/netfilter/xt_SECMARK.c           | 38 +++++++-------
 net/netfilter/xt_TCPMSS.c            | 56 +++++++++------------
 net/netfilter/xt_TRACE.c             | 28 +++++------
 net/netfilter/xt_comment.c           | 31 +++++-------
 net/netfilter/xt_connbytes.c         | 52 +++++++++----------
 net/netfilter/xt_connlimit.c         | 50 +++++++++---------
 net/netfilter/xt_connmark.c          | 58 ++++++++++-----------
 net/netfilter/xt_conntrack.c         | 51 +++++++++----------
 net/netfilter/xt_dccp.c              | 41 +++++++--------
 net/netfilter/xt_dscp.c              | 56 +++++++++------------
 net/netfilter/xt_esp.c               | 41 +++++++--------
 net/netfilter/xt_hashlimit.c         | 63 +++++++++++------------
 net/netfilter/xt_helper.c            | 52 ++++++++-----------
 net/netfilter/xt_length.c            | 43 +++++++---------
 net/netfilter/xt_limit.c             | 50 ++++++++----------
 net/netfilter/xt_mac.c               | 29 +++++------
 net/netfilter/xt_mark.c              | 49 ++++++++----------
 net/netfilter/xt_multiport.c         | 98 +++++++++++++++---------------------
 net/netfilter/xt_physdev.c           | 43 +++++++---------
 net/netfilter/xt_pkttype.c           | 32 ++++++------
 net/netfilter/xt_policy.c            | 43 ++++++++--------
 net/netfilter/xt_quota.c             | 36 ++++++-------
 net/netfilter/xt_realm.c             | 28 +++++------
 net/netfilter/xt_sctp.c              | 41 +++++++--------
 net/netfilter/xt_state.c             | 50 ++++++++----------
 net/netfilter/xt_statistic.c         | 40 +++++++--------
 net/netfilter/xt_string.c            | 50 +++++++++---------
 net/netfilter/xt_tcpmss.c            | 31 +++++-------
 net/netfilter/xt_tcpudp.c            | 77 ++++++++++++----------------
 net/netfilter/xt_time.c              | 38 +++++++-------
 net/netfilter/xt_u32.c               | 27 +++++-----
 70 files changed, 1262 insertions(+), 1593 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c b/net/ipv4/netfilter/ipt_CLUSTERIP.c
index 311361e1272..b5de6bd3e43 100644
--- a/net/ipv4/netfilter/ipt_CLUSTERIP.c
+++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c
@@ -287,12 +287,9 @@ clusterip_responsible(const struct clusterip_config *config, u_int32_t hash)
  ***********************************************************************/
 
 static unsigned int
-target(struct sk_buff *skb,
-       const struct net_device *in,
-       const struct net_device *out,
-       unsigned int hooknum,
-       const struct xt_target *target,
-       const void *targinfo)
+clusterip_tg(struct sk_buff *skb, const struct net_device *in,
+             const struct net_device *out, unsigned int hooknum,
+             const struct xt_target *target, const void *targinfo)
 {
 	const struct ipt_clusterip_tgt_info *cipinfo = targinfo;
 	struct nf_conn *ct;
@@ -359,11 +356,9 @@ target(struct sk_buff *skb,
 }
 
 static bool
-checkentry(const char *tablename,
-	   const void *e_void,
-	   const struct xt_target *target,
-	   void *targinfo,
-	   unsigned int hook_mask)
+clusterip_tg_check(const char *tablename, const void *e_void,
+                   const struct xt_target *target, void *targinfo,
+                   unsigned int hook_mask)
 {
 	struct ipt_clusterip_tgt_info *cipinfo = targinfo;
 	const struct ipt_entry *e = e_void;
@@ -427,7 +422,7 @@ checkentry(const char *tablename,
 }
 
 /* drop reference count of cluster config when rule is deleted */
-static void destroy(const struct xt_target *target, void *targinfo)
+static void clusterip_tg_destroy(const struct xt_target *target, void *targinfo)
 {
 	struct ipt_clusterip_tgt_info *cipinfo = targinfo;
 
@@ -454,12 +449,12 @@ struct compat_ipt_clusterip_tgt_info
 };
 #endif /* CONFIG_COMPAT */
 
-static struct xt_target clusterip_tgt __read_mostly = {
+static struct xt_target clusterip_tg_reg __read_mostly = {
 	.name		= "CLUSTERIP",
 	.family		= AF_INET,
-	.target		= target,
-	.checkentry	= checkentry,
-	.destroy	= destroy,
+	.target		= clusterip_tg,
+	.checkentry	= clusterip_tg_check,
+	.destroy	= clusterip_tg_destroy,
 	.targetsize	= sizeof(struct ipt_clusterip_tgt_info),
 #ifdef CONFIG_COMPAT
 	.compatsize	= sizeof(struct compat_ipt_clusterip_tgt_info),
@@ -712,11 +707,11 @@ static const struct file_operations clusterip_proc_fops = {
 
 #endif /* CONFIG_PROC_FS */
 
-static int __init ipt_clusterip_init(void)
+static int __init clusterip_tg_init(void)
 {
 	int ret;
 
-	ret = xt_register_target(&clusterip_tgt);
+	ret = xt_register_target(&clusterip_tg_reg);
 	if (ret < 0)
 		return ret;
 
@@ -742,11 +737,11 @@ cleanup_hook:
 	nf_unregister_hook(&cip_arp_ops);
 #endif /* CONFIG_PROC_FS */
 cleanup_target:
-	xt_unregister_target(&clusterip_tgt);
+	xt_unregister_target(&clusterip_tg_reg);
 	return ret;
 }
 
-static void __exit ipt_clusterip_fini(void)
+static void __exit clusterip_tg_exit(void)
 {
 	printk(KERN_NOTICE "ClusterIP Version %s unloading\n",
 		CLUSTERIP_VERSION);
@@ -754,8 +749,8 @@ static void __exit ipt_clusterip_fini(void)
 	remove_proc_entry(clusterip_procdir->name, clusterip_procdir->parent);
 #endif
 	nf_unregister_hook(&cip_arp_ops);
-	xt_unregister_target(&clusterip_tgt);
+	xt_unregister_target(&clusterip_tg_reg);
 }
 
-module_init(ipt_clusterip_init);
-module_exit(ipt_clusterip_fini);
+module_init(clusterip_tg_init);
+module_exit(clusterip_tg_exit);
diff --git a/net/ipv4/netfilter/ipt_ECN.c b/net/ipv4/netfilter/ipt_ECN.c
index e8d5f686797..ab417649161 100644
--- a/net/ipv4/netfilter/ipt_ECN.c
+++ b/net/ipv4/netfilter/ipt_ECN.c
@@ -77,12 +77,9 @@ set_ect_tcp(struct sk_buff *skb, const struct ipt_ECN_info *einfo)
 }
 
 static unsigned int
-target(struct sk_buff *skb,
-       const struct net_device *in,
-       const struct net_device *out,
-       unsigned int hooknum,
-       const struct xt_target *target,
-       const void *targinfo)
+ecn_tg(struct sk_buff *skb, const struct net_device *in,
+       const struct net_device *out, unsigned int hooknum,
+       const struct xt_target *target, const void *targinfo)
 {
 	const struct ipt_ECN_info *einfo = targinfo;
 
@@ -99,11 +96,9 @@ target(struct sk_buff *skb,
 }
 
 static bool
-checkentry(const char *tablename,
-	   const void *e_void,
-	   const struct xt_target *target,
-	   void *targinfo,
-	   unsigned int hook_mask)
+ecn_tg_check(const char *tablename, const void *e_void,
+             const struct xt_target *target, void *targinfo,
+             unsigned int hook_mask)
 {
 	const struct ipt_ECN_info *einfo = (struct ipt_ECN_info *)targinfo;
 	const struct ipt_entry *e = e_void;
@@ -127,25 +122,25 @@ checkentry(const char *tablename,
 	return true;
 }
 
-static struct xt_target ipt_ecn_reg __read_mostly = {
+static struct xt_target ecn_tg_reg __read_mostly = {
 	.name		= "ECN",
 	.family		= AF_INET,
-	.target		= target,
+	.target		= ecn_tg,
 	.targetsize	= sizeof(struct ipt_ECN_info),
 	.table		= "mangle",
-	.checkentry	= checkentry,
+	.checkentry	= ecn_tg_check,
 	.me		= THIS_MODULE,
 };
 
-static int __init ipt_ecn_init(void)
+static int __init ecn_tg_init(void)
 {
-	return xt_register_target(&ipt_ecn_reg);
+	return xt_register_target(&ecn_tg_reg);
 }
 
-static void __exit ipt_ecn_fini(void)
+static void __exit ecn_tg_exit(void)
 {
-	xt_unregister_target(&ipt_ecn_reg);
+	xt_unregister_target(&ecn_tg_reg);
 }
 
-module_init(ipt_ecn_init);
-module_exit(ipt_ecn_fini);
+module_init(ecn_tg_init);
+module_exit(ecn_tg_exit);
diff --git a/net/ipv4/netfilter/ipt_LOG.c b/net/ipv4/netfilter/ipt_LOG.c
index 4b5e8216a4e..fba2155ffa3 100644
--- a/net/ipv4/netfilter/ipt_LOG.c
+++ b/net/ipv4/netfilter/ipt_LOG.c
@@ -418,12 +418,9 @@ ipt_log_packet(unsigned int pf,
 }
 
 static unsigned int
-ipt_log_target(struct sk_buff *skb,
-	       const struct net_device *in,
-	       const struct net_device *out,
-	       unsigned int hooknum,
-	       const struct xt_target *target,
-	       const void *targinfo)
+log_tg(struct sk_buff *skb, const struct net_device *in,
+       const struct net_device *out, unsigned int hooknum,
+       const struct xt_target *target, const void *targinfo)
 {
 	const struct ipt_log_info *loginfo = targinfo;
 	struct nf_loginfo li;
@@ -437,11 +434,10 @@ ipt_log_target(struct sk_buff *skb,
 	return XT_CONTINUE;
 }
 
-static bool ipt_log_checkentry(const char *tablename,
-			       const void *e,
-			       const struct xt_target *target,
-			       void *targinfo,
-			       unsigned int hook_mask)
+static bool
+log_tg_check(const char *tablename, const void *e,
+             const struct xt_target *target, void *targinfo,
+             unsigned int hook_mask)
 {
 	const struct ipt_log_info *loginfo = targinfo;
 
@@ -457,12 +453,12 @@ static bool ipt_log_checkentry(const char *tablename,
 	return true;
 }
 
-static struct xt_target ipt_log_reg __read_mostly = {
+static struct xt_target log_tg_reg __read_mostly = {
 	.name		= "LOG",
 	.family		= AF_INET,
-	.target		= ipt_log_target,
+	.target		= log_tg,
 	.targetsize	= sizeof(struct ipt_log_info),
-	.checkentry	= ipt_log_checkentry,
+	.checkentry	= log_tg_check,
 	.me		= THIS_MODULE,
 };
 
@@ -472,22 +468,22 @@ static struct nf_logger ipt_log_logger ={
 	.me		= THIS_MODULE,
 };
 
-static int __init ipt_log_init(void)
+static int __init log_tg_init(void)
 {
 	int ret;
 
-	ret = xt_register_target(&ipt_log_reg);
+	ret = xt_register_target(&log_tg_reg);
 	if (ret < 0)
 		return ret;
 	nf_log_register(PF_INET, &ipt_log_logger);
 	return 0;
 }
 
-static void __exit ipt_log_fini(void)
+static void __exit log_tg_exit(void)
 {
 	nf_log_unregister(&ipt_log_logger);
-	xt_unregister_target(&ipt_log_reg);
+	xt_unregister_target(&log_tg_reg);
 }
 
-module_init(ipt_log_init);
-module_exit(ipt_log_fini);
+module_init(log_tg_init);
+module_exit(log_tg_exit);
diff --git a/net/ipv4/netfilter/ipt_MASQUERADE.c b/net/ipv4/netfilter/ipt_MASQUERADE.c
index 5a18997bb3d..f54150356ce 100644
--- a/net/ipv4/netfilter/ipt_MASQUERADE.c
+++ b/net/ipv4/netfilter/ipt_MASQUERADE.c
@@ -32,11 +32,9 @@ static DEFINE_RWLOCK(masq_lock);
 
 /* FIXME: Multiple targets. --RR */
 static bool
-masquerade_check(const char *tablename,
-		 const void *e,
-		 const struct xt_target *target,
-		 void *targinfo,
-		 unsigned int hook_mask)
+masquerade_tg_check(const char *tablename, const void *e,
+                    const struct xt_target *target, void *targinfo,
+                    unsigned int hook_mask)
 {
 	const struct nf_nat_multi_range_compat *mr = targinfo;
 
@@ -52,12 +50,9 @@ masquerade_check(const char *tablename,
 }
 
 static unsigned int
-masquerade_target(struct sk_buff *skb,
-		  const struct net_device *in,
-		  const struct net_device *out,
-		  unsigned int hooknum,
-		  const struct xt_target *target,
-		  const void *targinfo)
+masquerade_tg(struct sk_buff *skb, const struct net_device *in,
+              const struct net_device *out, unsigned int hooknum,
+              const struct xt_target *target, const void *targinfo)
 {
 	struct nf_conn *ct;
 	struct nf_conn_nat *nat;
@@ -166,22 +161,22 @@ static struct notifier_block masq_inet_notifier = {
 	.notifier_call	= masq_inet_event,
 };
 
-static struct xt_target masquerade __read_mostly = {
+static struct xt_target masquerade_tg_reg __read_mostly = {
 	.name		= "MASQUERADE",
 	.family		= AF_INET,
-	.target		= masquerade_target,
+	.target		= masquerade_tg,
 	.targetsize	= sizeof(struct nf_nat_multi_range_compat),
 	.table		= "nat",
 	.hooks		= 1 << NF_INET_POST_ROUTING,
-	.checkentry	= masquerade_check,
+	.checkentry	= masquerade_tg_check,
 	.me		= THIS_MODULE,
 };
 
-static int __init ipt_masquerade_init(void)
+static int __init masquerade_tg_init(void)
 {
 	int ret;
 
-	ret = xt_register_target(&masquerade);
+	ret = xt_register_target(&masquerade_tg_reg);
 
 	if (ret == 0) {
 		/* Register for device down reports */
@@ -193,12 +188,12 @@ static int __init ipt_masquerade_init(void)
 	return ret;
 }
 
-static void __exit ipt_masquerade_fini(void)
+static void __exit masquerade_tg_exit(void)
 {
-	xt_unregister_target(&masquerade);
+	xt_unregister_target(&masquerade_tg_reg);
 	unregister_netdevice_notifier(&masq_dev_notifier);
 	unregister_inetaddr_notifier(&masq_inet_notifier);
 }
 
-module_init(ipt_masquerade_init);
-module_exit(ipt_masquerade_fini);
+module_init(masquerade_tg_init);
+module_exit(masquerade_tg_exit);
diff --git a/net/ipv4/netfilter/ipt_NETMAP.c b/net/ipv4/netfilter/ipt_NETMAP.c
index 973bbee7ee1..8b8263e6357 100644
--- a/net/ipv4/netfilter/ipt_NETMAP.c
+++ b/net/ipv4/netfilter/ipt_NETMAP.c
@@ -23,11 +23,9 @@ MODULE_AUTHOR("Svenning Soerensen <svenning@post5.tele.dk>");
 MODULE_DESCRIPTION("iptables 1:1 NAT mapping of IP networks target");
 
 static bool
-check(const char *tablename,
-      const void *e,
-      const struct xt_target *target,
-      void *targinfo,
-      unsigned int hook_mask)
+netmap_tg_check(const char *tablename, const void *e,
+                const struct xt_target *target, void *targinfo,
+                unsigned int hook_mask)
 {
 	const struct nf_nat_multi_range_compat *mr = targinfo;
 
@@ -43,12 +41,9 @@ check(const char *tablename,
 }
 
 static unsigned int
-target(struct sk_buff *skb,
-       const struct net_device *in,
-       const struct net_device *out,
-       unsigned int hooknum,
-       const struct xt_target *target,
-       const void *targinfo)
+netmap_tg(struct sk_buff *skb, const struct net_device *in,
+          const struct net_device *out, unsigned int hooknum,
+          const struct xt_target *target, const void *targinfo)
 {
 	struct nf_conn *ct;
 	enum ip_conntrack_info ctinfo;
@@ -78,28 +73,28 @@ target(struct sk_buff *skb,
 	return nf_nat_setup_info(ct, &newrange, hooknum);
 }
 
-static struct xt_target target_module __read_mostly = {
+static struct xt_target netmap_tg_reg __read_mostly = {
 	.name 		= "NETMAP",
 	.family		= AF_INET,
-	.target 	= target,
+	.target 	= netmap_tg,
 	.targetsize	= sizeof(struct nf_nat_multi_range_compat),
 	.table		= "nat",
 	.hooks		= (1 << NF_INET_PRE_ROUTING) |
 			  (1 << NF_INET_POST_ROUTING) |
 			  (1 << NF_INET_LOCAL_OUT),
-	.checkentry 	= check,
+	.checkentry 	= netmap_tg_check,
 	.me 		= THIS_MODULE
 };
 
-static int __init ipt_netmap_init(void)
+static int __init netmap_tg_init(void)
 {
-	return xt_register_target(&target_module);
+	return xt_register_target(&netmap_tg_reg);
 }
 
-static void __exit ipt_netmap_fini(void)
+static void __exit netmap_tg_exit(void)
 {
-	xt_unregister_target(&target_module);
+	xt_unregister_target(&netmap_tg_reg);
 }
 
-module_init(ipt_netmap_init);
-module_exit(ipt_netmap_fini);
+module_init(netmap_tg_init);
+module_exit(netmap_tg_exit);
diff --git a/net/ipv4/netfilter/ipt_REDIRECT.c b/net/ipv4/netfilter/ipt_REDIRECT.c
index 4757af293ba..74ce7e1e9d7 100644
--- a/net/ipv4/netfilter/ipt_REDIRECT.c
+++ b/net/ipv4/netfilter/ipt_REDIRECT.c
@@ -27,11 +27,9 @@ MODULE_DESCRIPTION("iptables REDIRECT target module");
 
 /* FIXME: Take multiple ranges --RR */
 static bool
-redirect_check(const char *tablename,
-	       const void *e,
-	       const struct xt_target *target,
-	       void *targinfo,
-	       unsigned int hook_mask)
+redirect_tg_check(const char *tablename, const void *e,
+                  const struct xt_target *target, void *targinfo,
+                  unsigned int hook_mask)
 {
 	const struct nf_nat_multi_range_compat *mr = targinfo;
 
@@ -47,12 +45,9 @@ redirect_check(const char *tablename,
 }
 
 static unsigned int
-redirect_target(struct sk_buff *skb,
-		const struct net_device *in,
-		const struct net_device *out,
-		unsigned int hooknum,
-		const struct xt_target *target,
-		const void *targinfo)
+redirect_tg(struct sk_buff *skb, const struct net_device *in,
+            const struct net_device *out, unsigned int hooknum,
+            const struct xt_target *target, const void *targinfo)
 {
 	struct nf_conn *ct;
 	enum ip_conntrack_info ctinfo;
@@ -95,26 +90,26 @@ redirect_target(struct sk_buff *skb,
 	return nf_nat_setup_info(ct, &newrange, hooknum);
 }
 
-static struct xt_target redirect_reg __read_mostly = {
+static struct xt_target redirect_tg_reg __read_mostly = {
 	.name		= "REDIRECT",
 	.family		= AF_INET,
-	.target		= redirect_target,
+	.target		= redirect_tg,
 	.targetsize	= sizeof(struct nf_nat_multi_range_compat),
 	.table		= "nat",
 	.hooks		= (1 << NF_INET_PRE_ROUTING) | (1 << NF_INET_LOCAL_OUT),
-	.checkentry	= redirect_check,
+	.checkentry	= redirect_tg_check,
 	.me		= THIS_MODULE,
 };
 
-static int __init ipt_redirect_init(void)
+static int __init redirect_tg_init(void)
 {
-	return xt_register_target(&redirect_reg);
+	return xt_register_target(&redirect_tg_reg);
 }
 
-static void __exit ipt_redirect_fini(void)
+static void __exit redirect_tg_exit(void)
 {
-	xt_unregister_target(&redirect_reg);
+	xt_unregister_target(&redirect_tg_reg);
 }
 
-module_init(ipt_redirect_init);
-module_exit(ipt_redirect_fini);
+module_init(redirect_tg_init);
+module_exit(redirect_tg_exit);
diff --git a/net/ipv4/netfilter/ipt_REJECT.c b/net/ipv4/netfilter/ipt_REJECT.c
index d55b262bf60..a299cebbd89 100644
--- a/net/ipv4/netfilter/ipt_REJECT.c
+++ b/net/ipv4/netfilter/ipt_REJECT.c
@@ -156,12 +156,10 @@ static inline void send_unreach(struct sk_buff *skb_in, int code)
 	icmp_send(skb_in, ICMP_DEST_UNREACH, code, 0);
 }
 
-static unsigned int reject(struct sk_buff *skb,
-			   const struct net_device *in,
-			   const struct net_device *out,
-			   unsigned int hooknum,
-			   const struct xt_target *target,
-			   const void *targinfo)
+static unsigned int
+reject_tg(struct sk_buff *skb, const struct net_device *in,
+          const struct net_device *out, unsigned int hooknum,
+          const struct xt_target *target, const void *targinfo)
 {
 	const struct ipt_reject_info *reject = targinfo;
 
@@ -205,11 +203,10 @@ static unsigned int reject(struct sk_buff *skb,
 	return NF_DROP;
 }
 
-static bool check(const char *tablename,
-		  const void *e_void,
-		  const struct xt_target *target,
-		  void *targinfo,
-		  unsigned int hook_mask)
+static bool
+reject_tg_check(const char *tablename, const void *e_void,
+                const struct xt_target *target, void *targinfo,
+                unsigned int hook_mask)
 {
 	const struct ipt_reject_info *rejinfo = targinfo;
 	const struct ipt_entry *e = e_void;
@@ -228,27 +225,27 @@ static bool check(const char *tablename,
 	return true;
 }
 
-static struct xt_target ipt_reject_reg __read_mostly = {
+static struct xt_target reject_tg_reg __read_mostly = {
 	.name		= "REJECT",
 	.family		= AF_INET,
-	.target		= reject,
+	.target		= reject_tg,
 	.targetsize	= sizeof(struct ipt_reject_info),
 	.table		= "filter",
 	.hooks		= (1 << NF_INET_LOCAL_IN) | (1 << NF_INET_FORWARD) |
 			  (1 << NF_INET_LOCAL_OUT),
-	.checkentry	= check,
+	.checkentry	= reject_tg_check,
 	.me		= THIS_MODULE,
 };
 
-static int __init ipt_reject_init(void)
+static int __init reject_tg_init(void)
 {
-	return xt_register_target(&ipt_reject_reg);
+	return xt_register_target(&reject_tg_reg);
 }
 
-static void __exit ipt_reject_fini(void)
+static void __exit reject_tg_exit(void)
 {
-	xt_unregister_target(&ipt_reject_reg);
+	xt_unregister_target(&reject_tg_reg);
 }
 
-module_init(ipt_reject_init);
-module_exit(ipt_reject_fini);
+module_init(reject_tg_init);
+module_exit(reject_tg_exit);
diff --git a/net/ipv4/netfilter/ipt_SAME.c b/net/ipv4/netfilter/ipt_SAME.c
index f2f62b5ce9a..a43923dab1e 100644
--- a/net/ipv4/netfilter/ipt_SAME.c
+++ b/net/ipv4/netfilter/ipt_SAME.c
@@ -28,11 +28,9 @@ MODULE_AUTHOR("Martin Josefsson <gandalf@wlug.westbo.se>");
 MODULE_DESCRIPTION("iptables special SNAT module for consistent sourceip");
 
 static bool
-same_check(const char *tablename,
-	      const void *e,
-	      const struct xt_target *target,
-	      void *targinfo,
-	      unsigned int hook_mask)
+same_tg_check(const char *tablename, const void *e,
+              const struct xt_target *target, void *targinfo,
+              unsigned int hook_mask)
 {
 	unsigned int count, countess, rangeip, index = 0;
 	struct ipt_same_info *mr = targinfo;
@@ -92,8 +90,7 @@ same_check(const char *tablename,
 	return true;
 }
 
-static void
-same_destroy(const struct xt_target *target, void *targinfo)
+static void same_tg_destroy(const struct xt_target *target, void *targinfo)
 {
 	struct ipt_same_info *mr = targinfo;
 
@@ -104,12 +101,9 @@ same_destroy(const struct xt_target *target, void *targinfo)
 }
 
 static unsigned int
-same_target(struct sk_buff *skb,
-		const struct net_device *in,
-		const struct net_device *out,
-		unsigned int hooknum,
-		const struct xt_target *target,
-		const void *targinfo)
+same_tg(struct sk_buff *skb, const struct net_device *in,
+        const struct net_device *out, unsigned int hooknum,
+        const struct xt_target *target, const void *targinfo)
 {
 	struct nf_conn *ct;
 	enum ip_conntrack_info ctinfo;
@@ -152,29 +146,29 @@ same_target(struct sk_buff *skb,
 	return nf_nat_setup_info(ct, &newrange, hooknum);
 }
 
-static struct xt_target same_reg __read_mostly = {
+static struct xt_target same_tg_reg __read_mostly = {
 	.name		= "SAME",
 	.family		= AF_INET,
-	.target		= same_target,
+	.target		= same_tg,
 	.targetsize	= sizeof(struct ipt_same_info),
 	.table		= "nat",
 	.hooks		= (1 << NF_INET_PRE_ROUTING) |
 			  (1 << NF_INET_POST_ROUTING),
-	.checkentry	= same_check,
-	.destroy	= same_destroy,
+	.checkentry	= same_tg_check,
+	.destroy	= same_tg_destroy,
 	.me		= THIS_MODULE,
 };
 
-static int __init ipt_same_init(void)
+static int __init same_tg_init(void)
 {
-	return xt_register_target(&same_reg);
+	return xt_register_target(&same_tg_reg);
 }
 
-static void __exit ipt_same_fini(void)
+static void __exit same_tg_exit(void)
 {
-	xt_unregister_target(&same_reg);
+	xt_unregister_target(&same_tg_reg);
 }
 
-module_init(ipt_same_init);
-module_exit(ipt_same_fini);
+module_init(same_tg_init);
+module_exit(same_tg_exit);
 
diff --git a/net/ipv4/netfilter/ipt_TOS.c b/net/ipv4/netfilter/ipt_TOS.c
index 7b4a6cac399..1a9244104b1 100644
--- a/net/ipv4/netfilter/ipt_TOS.c
+++ b/net/ipv4/netfilter/ipt_TOS.c
@@ -21,12 +21,9 @@ MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>");
 MODULE_DESCRIPTION("iptables TOS mangling module");
 
 static unsigned int
-target(struct sk_buff *skb,
-       const struct net_device *in,
-       const struct net_device *out,
-       unsigned int hooknum,
-       const struct xt_target *target,
-       const void *targinfo)
+tos_tg(struct sk_buff *skb, const struct net_device *in,
+       const struct net_device *out, unsigned int hooknum,
+       const struct xt_target *target, const void *targinfo)
 {
 	const struct ipt_tos_target_info *tosinfo = targinfo;
 	struct iphdr *iph = ip_hdr(skb);
@@ -44,11 +41,9 @@ target(struct sk_buff *skb,
 }
 
 static bool
-checkentry(const char *tablename,
-	   const void *e_void,
-	   const struct xt_target *target,
-	   void *targinfo,
-	   unsigned int hook_mask)
+tos_tg_check(const char *tablename, const void *e_void,
+             const struct xt_target *target, void *targinfo,
+             unsigned int hook_mask)
 {
 	const u_int8_t tos = ((struct ipt_tos_target_info *)targinfo)->tos;
 
@@ -63,25 +58,25 @@ checkentry(const char *tablename,
 	return true;
 }
 
-static struct xt_target ipt_tos_reg __read_mostly = {
+static struct xt_target tos_tg_reg __read_mostly = {
 	.name		= "TOS",
 	.family		= AF_INET,
-	.target		= target,
+	.target		= tos_tg,
 	.targetsize	= sizeof(struct ipt_tos_target_info),
 	.table		= "mangle",
-	.checkentry	= checkentry,
+	.checkentry	= tos_tg_check,
 	.me		= THIS_MODULE,
 };
 
-static int __init ipt_tos_init(void)
+static int __init tos_tg_init(void)
 {
-	return xt_register_target(&ipt_tos_reg);
+	return xt_register_target(&tos_tg_reg);
 }
 
-static void __exit ipt_tos_fini(void)
+static void __exit tos_tg_exit(void)
 {
-	xt_unregister_target(&ipt_tos_reg);
+	xt_unregister_target(&tos_tg_reg);
 }
 
-module_init(ipt_tos_init);
-module_exit(ipt_tos_fini);
+module_init(tos_tg_init);
+module_exit(tos_tg_exit);
diff --git a/net/ipv4/netfilter/ipt_TTL.c b/net/ipv4/netfilter/ipt_TTL.c
index 00ddfbea279..fa13cf6162b 100644
--- a/net/ipv4/netfilter/ipt_TTL.c
+++ b/net/ipv4/netfilter/ipt_TTL.c
@@ -20,10 +20,9 @@ MODULE_DESCRIPTION("IP tables TTL modification module");
 MODULE_LICENSE("GPL");
 
 static unsigned int
-ipt_ttl_target(struct sk_buff *skb,
-	       const struct net_device *in, const struct net_device *out,
-	       unsigned int hooknum, const struct xt_target *target,
-	       const void *targinfo)
+ttl_tg(struct sk_buff *skb, const struct net_device *in,
+       const struct net_device *out, unsigned int hooknum,
+       const struct xt_target *target, const void *targinfo)
 {
 	struct iphdr *iph;
 	const struct ipt_TTL_info *info = targinfo;
@@ -62,11 +61,10 @@ ipt_ttl_target(struct sk_buff *skb,
 	return XT_CONTINUE;
 }
 
-static bool ipt_ttl_checkentry(const char *tablename,
-		const void *e,
-		const struct xt_target *target,
-		void *targinfo,
-		unsigned int hook_mask)
+static bool
+ttl_tg_check(const char *tablename, const void *e,
+             const struct xt_target *target, void *targinfo,
+             unsigned int hook_mask)
 {
 	const struct ipt_TTL_info *info = targinfo;
 
@@ -80,25 +78,25 @@ static bool ipt_ttl_checkentry(const char *tablename,
 	return true;
 }
 
-static struct xt_target ipt_TTL __read_mostly = {
+static struct xt_target ttl_tg_reg __read_mostly = {
 	.name 		= "TTL",
 	.family		= AF_INET,
-	.target 	= ipt_ttl_target,
+	.target 	= ttl_tg,
 	.targetsize	= sizeof(struct ipt_TTL_info),
 	.table		= "mangle",
-	.checkentry 	= ipt_ttl_checkentry,
+	.checkentry 	= ttl_tg_check,
 	.me 		= THIS_MODULE,
 };
 
-static int __init ipt_ttl_init(void)
+static int __init ttl_tg_init(void)
 {
-	return xt_register_target(&ipt_TTL);
+	return xt_register_target(&ttl_tg_reg);
 }
 
-static void __exit ipt_ttl_fini(void)
+static void __exit ttl_tg_exit(void)
 {
-	xt_unregister_target(&ipt_TTL);
+	xt_unregister_target(&ttl_tg_reg);
 }
 
-module_init(ipt_ttl_init);
-module_exit(ipt_ttl_fini);
+module_init(ttl_tg_init);
+module_exit(ttl_tg_exit);
diff --git a/net/ipv4/netfilter/ipt_ULOG.c b/net/ipv4/netfilter/ipt_ULOG.c
index 212b830765a..4139042a63a 100644
--- a/net/ipv4/netfilter/ipt_ULOG.c
+++ b/net/ipv4/netfilter/ipt_ULOG.c
@@ -279,12 +279,10 @@ alloc_failure:
 	spin_unlock_bh(&ulog_lock);
 }
 
-static unsigned int ipt_ulog_target(struct sk_buff *skb,
-				    const struct net_device *in,
-				    const struct net_device *out,
-				    unsigned int hooknum,
-				    const struct xt_target *target,
-				    const void *targinfo)
+static unsigned int
+ulog_tg(struct sk_buff *skb, const struct net_device *in,
+        const struct net_device *out, unsigned int hooknum,
+        const struct xt_target *target, const void *targinfo)
 {
 	struct ipt_ulog_info *loginfo = (struct ipt_ulog_info *) targinfo;
 
@@ -318,11 +316,10 @@ static void ipt_logfn(unsigned int pf,
 	ipt_ulog_packet(hooknum, skb, in, out, &loginfo, prefix);
 }
 
-static bool ipt_ulog_checkentry(const char *tablename,
-				const void *e,
-				const struct xt_target *target,
-				void *targinfo,
-				unsigned int hookmask)
+static bool
+ulog_tg_check(const char *tablename, const void *e,
+              const struct xt_target *target, void *targinfo,
+              unsigned int hookmask)
 {
 	const struct ipt_ulog_info *loginfo = targinfo;
 
@@ -347,7 +344,7 @@ struct compat_ipt_ulog_info {
 	char		prefix[ULOG_PREFIX_LEN];
 };
 
-static void compat_from_user(void *dst, void *src)
+static void ulog_tg_compat_from_user(void *dst, void *src)
 {
 	const struct compat_ipt_ulog_info *cl = src;
 	struct ipt_ulog_info l = {
@@ -360,7 +357,7 @@ static void compat_from_user(void *dst, void *src)
 	memcpy(dst, &l, sizeof(l));
 }
 
-static int compat_to_user(void __user *dst, void *src)
+static int ulog_tg_compat_to_user(void __user *dst, void *src)
 {
 	const struct ipt_ulog_info *l = src;
 	struct compat_ipt_ulog_info cl = {
@@ -374,16 +371,16 @@ static int compat_to_user(void __user *dst, void *src)
 }
 #endif /* CONFIG_COMPAT */
 
-static struct xt_target ipt_ulog_reg __read_mostly = {
+static struct xt_target ulog_tg_reg __read_mostly = {
 	.name		= "ULOG",
 	.family		= AF_INET,
-	.target		= ipt_ulog_target,
+	.target		= ulog_tg,
 	.targetsize	= sizeof(struct ipt_ulog_info),
-	.checkentry	= ipt_ulog_checkentry,
+	.checkentry	= ulog_tg_check,
 #ifdef CONFIG_COMPAT
 	.compatsize	= sizeof(struct compat_ipt_ulog_info),
-	.compat_from_user = compat_from_user,
-	.compat_to_user	= compat_to_user,
+	.compat_from_user = ulog_tg_compat_from_user,
+	.compat_to_user	= ulog_tg_compat_to_user,
 #endif
 	.me		= THIS_MODULE,
 };
@@ -394,7 +391,7 @@ static struct nf_logger ipt_ulog_logger = {
 	.me		= THIS_MODULE,
 };
 
-static int __init ipt_ulog_init(void)
+static int __init ulog_tg_init(void)
 {
 	int ret, i;
 
@@ -415,7 +412,7 @@ static int __init ipt_ulog_init(void)
 	if (!nflognl)
 		return -ENOMEM;
 
-	ret = xt_register_target(&ipt_ulog_reg);
+	ret = xt_register_target(&ulog_tg_reg);
 	if (ret < 0) {
 		sock_release(nflognl->sk_socket);
 		return ret;
@@ -426,7 +423,7 @@ static int __init ipt_ulog_init(void)
 	return 0;
 }
 
-static void __exit ipt_ulog_fini(void)
+static void __exit ulog_tg_exit(void)
 {
 	ulog_buff_t *ub;
 	int i;
@@ -435,7 +432,7 @@ static void __exit ipt_ulog_fini(void)
 
 	if (nflog)
 		nf_log_unregister(&ipt_ulog_logger);
-	xt_unregister_target(&ipt_ulog_reg);
+	xt_unregister_target(&ulog_tg_reg);
 	sock_release(nflognl->sk_socket);
 
 	/* remove pending timers and free allocated skb's */
@@ -453,5 +450,5 @@ static void __exit ipt_ulog_fini(void)
 	}
 }
 
-module_init(ipt_ulog_init);
-module_exit(ipt_ulog_fini);
+module_init(ulog_tg_init);
+module_exit(ulog_tg_exit);
diff --git a/net/ipv4/netfilter/ipt_addrtype.c b/net/ipv4/netfilter/ipt_addrtype.c
index 59f01f7ba6b..b75421c5e08 100644
--- a/net/ipv4/netfilter/ipt_addrtype.c
+++ b/net/ipv4/netfilter/ipt_addrtype.c
@@ -27,10 +27,11 @@ static inline bool match_type(__be32 addr, u_int16_t mask)
 	return !!(mask & (1 << inet_addr_type(addr)));
 }
 
-static bool match(const struct sk_buff *skb,
-		  const struct net_device *in, const struct net_device *out,
-		  const struct xt_match *match, const void *matchinfo,
-		  int offset, unsigned int protoff, bool *hotdrop)
+static bool
+addrtype_mt(const struct sk_buff *skb, const struct net_device *in,
+            const struct net_device *out, const struct xt_match *match,
+            const void *matchinfo, int offset, unsigned int protoff,
+            bool *hotdrop)
 {
 	const struct ipt_addrtype_info *info = matchinfo;
 	const struct iphdr *iph = ip_hdr(skb);
@@ -44,23 +45,23 @@ static bool match(const struct sk_buff *skb,
 	return ret;
 }
 
-static struct xt_match addrtype_match __read_mostly = {
+static struct xt_match addrtype_mt_reg __read_mostly = {
 	.name		= "addrtype",
 	.family		= AF_INET,
-	.match		= match,
+	.match		= addrtype_mt,
 	.matchsize	= sizeof(struct ipt_addrtype_info),
 	.me		= THIS_MODULE
 };
 
-static int __init ipt_addrtype_init(void)
+static int __init addrtype_mt_init(void)
 {
-	return xt_register_match(&addrtype_match);
+	return xt_register_match(&addrtype_mt_reg);
 }
 
-static void __exit ipt_addrtype_fini(void)
+static void __exit addrtype_mt_exit(void)
 {
-	xt_unregister_match(&addrtype_match);
+	xt_unregister_match(&addrtype_mt_reg);
 }
 
-module_init(ipt_addrtype_init);
-module_exit(ipt_addrtype_fini);
+module_init(addrtype_mt_init);
+module_exit(addrtype_mt_exit);
diff --git a/net/ipv4/netfilter/ipt_ah.c b/net/ipv4/netfilter/ipt_ah.c
index 61b017fd743..2b2fb26c12a 100644
--- a/net/ipv4/netfilter/ipt_ah.c
+++ b/net/ipv4/netfilter/ipt_ah.c
@@ -37,14 +37,9 @@ spi_match(u_int32_t min, u_int32_t max, u_int32_t spi, bool invert)
 }
 
 static bool
-match(const struct sk_buff *skb,
-      const struct net_device *in,
-      const struct net_device *out,
-      const struct xt_match *match,
-      const void *matchinfo,
-      int offset,
-      unsigned int protoff,
-      bool *hotdrop)
+ah_mt(const struct sk_buff *skb, const struct net_device *in,
+      const struct net_device *out, const struct xt_match *match,
+      const void *matchinfo, int offset, unsigned int protoff, bool *hotdrop)
 {
 	struct ip_auth_hdr _ahdr;
 	const struct ip_auth_hdr *ah;
@@ -72,11 +67,9 @@ match(const struct sk_buff *skb,
 
 /* Called when user tries to insert an entry of this type. */
 static bool
-checkentry(const char *tablename,
-	   const void *ip_void,
-	   const struct xt_match *match,
-	   void *matchinfo,
-	   unsigned int hook_mask)
+ah_mt_check(const char *tablename, const void *ip_void,
+            const struct xt_match *match, void *matchinfo,
+            unsigned int hook_mask)
 {
 	const struct ipt_ah *ahinfo = matchinfo;
 
@@ -88,25 +81,25 @@ checkentry(const char *tablename,
 	return true;
 }
 
-static struct xt_match ah_match __read_mostly = {
+static struct xt_match ah_mt_reg __read_mostly = {
 	.name		= "ah",
 	.family		= AF_INET,
-	.match		= match,
+	.match		= ah_mt,
 	.matchsize	= sizeof(struct ipt_ah),
 	.proto		= IPPROTO_AH,
-	.checkentry	= checkentry,
+	.checkentry	= ah_mt_check,
 	.me		= THIS_MODULE,
 };
 
-static int __init ipt_ah_init(void)
+static int __init ah_mt_init(void)
 {
-	return xt_register_match(&ah_match);
+	return xt_register_match(&ah_mt_reg);
 }
 
-static void __exit ipt_ah_fini(void)
+static void __exit ah_mt_exit(void)
 {
-	xt_unregister_match(&ah_match);
+	xt_unregister_match(&ah_mt_reg);
 }
 
-module_init(ipt_ah_init);
-module_exit(ipt_ah_fini);
+module_init(ah_mt_init);
+module_exit(ah_mt_exit);
diff --git a/net/ipv4/netfilter/ipt_ecn.c b/net/ipv4/netfilter/ipt_ecn.c
index d6925c67406..ea1378460b0 100644
--- a/net/ipv4/netfilter/ipt_ecn.c
+++ b/net/ipv4/netfilter/ipt_ecn.c
@@ -67,10 +67,10 @@ static inline bool match_tcp(const struct sk_buff *skb,
 	return true;
 }
 
-static bool match(const struct sk_buff *skb,
-		  const struct net_device *in, const struct net_device *out,
-		  const struct xt_match *match, const void *matchinfo,
-		  int offset, unsigned int protoff, bool *hotdrop)
+static bool
+ecn_mt(const struct sk_buff *skb, const struct net_device *in,
+       const struct net_device *out, const struct xt_match *match,
+       const void *matchinfo, int offset, unsigned int protoff, bool *hotdrop)
 {
 	const struct ipt_ecn_info *info = matchinfo;
 
@@ -88,9 +88,10 @@ static bool match(const struct sk_buff *skb,
 	return true;
 }
 
-static bool checkentry(const char *tablename, const void *ip_void,
-		       const struct xt_match *match,
-		       void *matchinfo, unsigned int hook_mask)
+static bool
+ecn_mt_check(const char *tablename, const void *ip_void,
+             const struct xt_match *match, void *matchinfo,
+             unsigned int hook_mask)
 {
 	const struct ipt_ecn_info *info = matchinfo;
 	const struct ipt_ip *ip = ip_void;
@@ -111,24 +112,24 @@ static bool checkentry(const char *tablename, const void *ip_void,
 	return true;
 }
 
-static struct xt_match ecn_match __read_mostly = {
+static struct xt_match ecn_mt_reg __read_mostly = {
 	.name		= "ecn",
 	.family		= AF_INET,
-	.match		= match,
+	.match		= ecn_mt,
 	.matchsize	= sizeof(struct ipt_ecn_info),
-	.checkentry	= checkentry,
+	.checkentry	= ecn_mt_check,
 	.me		= THIS_MODULE,
 };
 
-static int __init ipt_ecn_init(void)
+static int __init ecn_mt_init(void)
 {
-	return xt_register_match(&ecn_match);
+	return xt_register_match(&ecn_mt_reg);
 }
 
-static void __exit ipt_ecn_fini(void)
+static void __exit ecn_mt_exit(void)
 {
-	xt_unregister_match(&ecn_match);
+	xt_unregister_match(&ecn_mt_reg);
 }
 
-module_init(ipt_ecn_init);
-module_exit(ipt_ecn_fini);
+module_init(ecn_mt_init);
+module_exit(ecn_mt_exit);
diff --git a/net/ipv4/netfilter/ipt_iprange.c b/net/ipv4/netfilter/ipt_iprange.c
index 0106dc955a6..82208ed5f70 100644
--- a/net/ipv4/netfilter/ipt_iprange.c
+++ b/net/ipv4/netfilter/ipt_iprange.c
@@ -18,12 +18,10 @@ MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>");
 MODULE_DESCRIPTION("iptables arbitrary IP range match module");
 
 static bool
-match(const struct sk_buff *skb,
-      const struct net_device *in,
-      const struct net_device *out,
-      const struct xt_match *match,
-      const void *matchinfo,
-      int offset, unsigned int protoff, bool *hotdrop)
+iprange_mt(const struct sk_buff *skb, const struct net_device *in,
+           const struct net_device *out, const struct xt_match *match,
+           const void *matchinfo, int offset, unsigned int protoff,
+           bool *hotdrop)
 {
 	const struct ipt_iprange_info *info = matchinfo;
 	const struct iphdr *iph = ip_hdr(skb);
@@ -57,23 +55,23 @@ match(const struct sk_buff *skb,
 	return true;
 }
 
-static struct xt_match iprange_match __read_mostly = {
+static struct xt_match iprange_mt_reg __read_mostly = {
 	.name		= "iprange",
 	.family		= AF_INET,
-	.match		= match,
+	.match		= iprange_mt,
 	.matchsize	= sizeof(struct ipt_iprange_info),
 	.me		= THIS_MODULE
 };
 
-static int __init ipt_iprange_init(void)
+static int __init iprange_mt_init(void)
 {
-	return xt_register_match(&iprange_match);
+	return xt_register_match(&iprange_mt_reg);
 }
 
-static void __exit ipt_iprange_fini(void)
+static void __exit iprange_mt_exit(void)
 {
-	xt_unregister_match(&iprange_match);
+	xt_unregister_match(&iprange_mt_reg);
 }
 
-module_init(ipt_iprange_init);
-module_exit(ipt_iprange_fini);
+module_init(iprange_mt_init);
+module_exit(iprange_mt_exit);
diff --git a/net/ipv4/netfilter/ipt_owner.c b/net/ipv4/netfilter/ipt_owner.c
index 6bc4bfea66d..4f1aa897d4b 100644
--- a/net/ipv4/netfilter/ipt_owner.c
+++ b/net/ipv4/netfilter/ipt_owner.c
@@ -22,14 +22,10 @@ MODULE_AUTHOR("Marc Boucher <marc@mbsi.ca>");
 MODULE_DESCRIPTION("iptables owner match");
 
 static bool
-match(const struct sk_buff *skb,
-      const struct net_device *in,
-      const struct net_device *out,
-      const struct xt_match *match,
-      const void *matchinfo,
-      int offset,
-      unsigned int protoff,
-      bool *hotdrop)
+owner_mt(const struct sk_buff *skb, const struct net_device *in,
+         const struct net_device *out, const struct xt_match *match,
+         const void *matchinfo, int offset, unsigned int protoff,
+         bool *hotdrop)
 {
 	const struct ipt_owner_info *info = matchinfo;
 
@@ -52,11 +48,9 @@ match(const struct sk_buff *skb,
 }
 
 static bool
-checkentry(const char *tablename,
-	   const void *ip,
-	   const struct xt_match *match,
-	   void *matchinfo,
-	   unsigned int hook_mask)
+owner_mt_check(const char *tablename, const void *ip,
+               const struct xt_match *match, void *matchinfo,
+               unsigned int hook_mask)
 {
 	const struct ipt_owner_info *info = matchinfo;
 
@@ -68,26 +62,26 @@ checkentry(const char *tablename,
 	return true;
 }
 
-static struct xt_match owner_match __read_mostly = {
+static struct xt_match owner_mt_reg __read_mostly = {
 	.name		= "owner",
 	.family		= AF_INET,
-	.match		= match,
+	.match		= owner_mt,
 	.matchsize	= sizeof(struct ipt_owner_info),
 	.hooks		= (1 << NF_INET_LOCAL_OUT) |
 			  (1 << NF_INET_POST_ROUTING),
-	.checkentry	= checkentry,
+	.checkentry	= owner_mt_check,
 	.me		= THIS_MODULE,
 };
 
-static int __init ipt_owner_init(void)
+static int __init owner_mt_init(void)
 {
-	return xt_register_match(&owner_match);
+	return xt_register_match(&owner_mt_reg);
 }
 
-static void __exit ipt_owner_fini(void)
+static void __exit owner_mt_exit(void)
 {
-	xt_unregister_match(&owner_match);
+	xt_unregister_match(&owner_mt_reg);
 }
 
-module_init(ipt_owner_init);
-module_exit(ipt_owner_fini);
+module_init(owner_mt_init);
+module_exit(owner_mt_exit);
diff --git a/net/ipv4/netfilter/ipt_recent.c b/net/ipv4/netfilter/ipt_recent.c
index 11d39fb5f38..4f3700d937a 100644
--- a/net/ipv4/netfilter/ipt_recent.c
+++ b/net/ipv4/netfilter/ipt_recent.c
@@ -170,10 +170,10 @@ static void recent_table_flush(struct recent_table *t)
 }
 
 static bool
-ipt_recent_match(const struct sk_buff *skb,
-		 const struct net_device *in, const struct net_device *out,
-		 const struct xt_match *match, const void *matchinfo,
-		 int offset, unsigned int protoff, bool *hotdrop)
+recent_mt(const struct sk_buff *skb, const struct net_device *in,
+          const struct net_device *out, const struct xt_match *match,
+          const void *matchinfo, int offset, unsigned int protoff,
+          bool *hotdrop)
 {
 	const struct ipt_recent_info *info = matchinfo;
 	struct recent_table *t;
@@ -236,9 +236,9 @@ out:
 }
 
 static bool
-ipt_recent_checkentry(const char *tablename, const void *ip,
-		      const struct xt_match *match, void *matchinfo,
-		      unsigned int hook_mask)
+recent_mt_check(const char *tablename, const void *ip,
+                const struct xt_match *match, void *matchinfo,
+                unsigned int hook_mask)
 {
 	const struct ipt_recent_info *info = matchinfo;
 	struct recent_table *t;
@@ -293,8 +293,7 @@ out:
 	return ret;
 }
 
-static void
-ipt_recent_destroy(const struct xt_match *match, void *matchinfo)
+static void recent_mt_destroy(const struct xt_match *match, void *matchinfo)
 {
 	const struct ipt_recent_info *info = matchinfo;
 	struct recent_table *t;
@@ -455,17 +454,17 @@ static const struct file_operations recent_fops = {
 };
 #endif /* CONFIG_PROC_FS */
 
-static struct xt_match recent_match __read_mostly = {
+static struct xt_match recent_mt_reg __read_mostly = {
 	.name		= "recent",
 	.family		= AF_INET,
-	.match		= ipt_recent_match,
+	.match		= recent_mt,
 	.matchsize	= sizeof(struct ipt_recent_info),
-	.checkentry	= ipt_recent_checkentry,
-	.destroy	= ipt_recent_destroy,
+	.checkentry	= recent_mt_check,
+	.destroy	= recent_mt_destroy,
 	.me		= THIS_MODULE,
 };
 
-static int __init ipt_recent_init(void)
+static int __init recent_mt_init(void)
 {
 	int err;
 
@@ -473,27 +472,27 @@ static int __init ipt_recent_init(void)
 		return -EINVAL;
 	ip_list_hash_size = 1 << fls(ip_list_tot);
 
-	err = xt_register_match(&recent_match);
+	err = xt_register_match(&recent_mt_reg);
 #ifdef CONFIG_PROC_FS
 	if (err)
 		return err;
 	proc_dir = proc_mkdir("ipt_recent", init_net.proc_net);
 	if (proc_dir == NULL) {
-		xt_unregister_match(&recent_match);
+		xt_unregister_match(&recent_mt_reg);
 		err = -ENOMEM;
 	}
 #endif
 	return err;
 }
 
-static void __exit ipt_recent_exit(void)
+static void __exit recent_mt_exit(void)
 {
 	BUG_ON(!list_empty(&tables));
-	xt_unregister_match(&recent_match);
+	xt_unregister_match(&recent_mt_reg);
 #ifdef CONFIG_PROC_FS
 	remove_proc_entry("ipt_recent", init_net.proc_net);
 #endif
 }
 
-module_init(ipt_recent_init);
-module_exit(ipt_recent_exit);
+module_init(recent_mt_init);
+module_exit(recent_mt_exit);
diff --git a/net/ipv4/netfilter/ipt_tos.c b/net/ipv4/netfilter/ipt_tos.c
index e740441c973..7d608682584 100644
--- a/net/ipv4/netfilter/ipt_tos.c
+++ b/net/ipv4/netfilter/ipt_tos.c
@@ -19,37 +19,32 @@ MODULE_LICENSE("GPL");
 MODULE_DESCRIPTION("iptables TOS match module");
 
 static bool
-match(const struct sk_buff *skb,
-      const struct net_device *in,
-      const struct net_device *out,
-      const struct xt_match *match,
-      const void *matchinfo,
-      int offset,
-      unsigned int protoff,
-      bool *hotdrop)
+tos_mt(const struct sk_buff *skb, const struct net_device *in,
+       const struct net_device *out, const struct xt_match *match,
+       const void *matchinfo, int offset, unsigned int protoff, bool *hotdrop)
 {
 	const struct ipt_tos_info *info = matchinfo;
 
 	return (ip_hdr(skb)->tos == info->tos) ^ info->invert;
 }
 
-static struct xt_match tos_match __read_mostly = {
+static struct xt_match tos_mt_reg __read_mostly = {
 	.name		= "tos",
 	.family		= AF_INET,
-	.match		= match,
+	.match		= tos_mt,
 	.matchsize	= sizeof(struct ipt_tos_info),
 	.me		= THIS_MODULE,
 };
 
-static int __init ipt_multiport_init(void)
+static int __init tos_mt_init(void)
 {
-	return xt_register_match(&tos_match);
+	return xt_register_match(&tos_mt_reg);
 }
 
-static void __exit ipt_multiport_fini(void)
+static void __exit tos_mt_exit(void)
 {
-	xt_unregister_match(&tos_match);
+	xt_unregister_match(&tos_mt_reg);
 }
 
-module_init(ipt_multiport_init);
-module_exit(ipt_multiport_fini);
+module_init(tos_mt_init);
+module_exit(tos_mt_exit);
diff --git a/net/ipv4/netfilter/ipt_ttl.c b/net/ipv4/netfilter/ipt_ttl.c
index a439900a4ba..b18d39162ff 100644
--- a/net/ipv4/netfilter/ipt_ttl.c
+++ b/net/ipv4/netfilter/ipt_ttl.c
@@ -18,10 +18,10 @@ MODULE_AUTHOR("Harald Welte <laforge@netfilter.org>");
 MODULE_DESCRIPTION("IP tables TTL matching module");
 MODULE_LICENSE("GPL");
 
-static bool match(const struct sk_buff *skb,
-		  const struct net_device *in, const struct net_device *out,
-		  const struct xt_match *match, const void *matchinfo,
-		  int offset, unsigned int protoff, bool *hotdrop)
+static bool
+ttl_mt(const struct sk_buff *skb, const struct net_device *in,
+       const struct net_device *out, const struct xt_match *match,
+       const void *matchinfo, int offset, unsigned int protoff, bool *hotdrop)
 {
 	const struct ipt_ttl_info *info = matchinfo;
 	const u8 ttl = ip_hdr(skb)->ttl;
@@ -44,23 +44,23 @@ static bool match(const struct sk_buff *skb,
 	return false;
 }
 
-static struct xt_match ttl_match __read_mostly = {
+static struct xt_match ttl_mt_reg __read_mostly = {
 	.name		= "ttl",
 	.family		= AF_INET,
-	.match		= match,
+	.match		= ttl_mt,
 	.matchsize	= sizeof(struct ipt_ttl_info),
 	.me		= THIS_MODULE,
 };
 
-static int __init ipt_ttl_init(void)
+static int __init ttl_mt_init(void)
 {
-	return xt_register_match(&ttl_match);
+	return xt_register_match(&ttl_mt_reg);
 }
 
-static void __exit ipt_ttl_fini(void)
+static void __exit ttl_mt_exit(void)
 {
-	xt_unregister_match(&ttl_match);
+	xt_unregister_match(&ttl_mt_reg);
 }
 
-module_init(ipt_ttl_init);
-module_exit(ipt_ttl_fini);
+module_init(ttl_mt_init);
+module_exit(ttl_mt_exit);
diff --git a/net/ipv6/netfilter/ip6t_HL.c b/net/ipv6/netfilter/ip6t_HL.c
index 9afc836fd45..cefb4253711 100644
--- a/net/ipv6/netfilter/ip6t_HL.c
+++ b/net/ipv6/netfilter/ip6t_HL.c
@@ -18,12 +18,10 @@ MODULE_AUTHOR("Maciej Soltysiak <solt@dns.toxicfilms.tv>");
 MODULE_DESCRIPTION("IP6 tables Hop Limit modification module");
 MODULE_LICENSE("GPL");
 
-static unsigned int ip6t_hl_target(struct sk_buff *skb,
-				   const struct net_device *in,
-				   const struct net_device *out,
-				   unsigned int hooknum,
-				   const struct xt_target *target,
-				   const void *targinfo)
+static unsigned int
+hl_tg6(struct sk_buff *skb, const struct net_device *in,
+       const struct net_device *out, unsigned int hooknum,
+       const struct xt_target *target, const void *targinfo)
 {
 	struct ipv6hdr *ip6h;
 	const struct ip6t_HL_info *info = targinfo;
@@ -58,11 +56,10 @@ static unsigned int ip6t_hl_target(struct sk_buff *skb,
 	return XT_CONTINUE;
 }
 
-static bool ip6t_hl_checkentry(const char *tablename,
-		const void *entry,
-		const struct xt_target *target,
-		void *targinfo,
-		unsigned int hook_mask)
+static bool
+hl_tg6_check(const char *tablename, const void *entry,
+             const struct xt_target *target, void *targinfo,
+             unsigned int hook_mask)
 {
 	const struct ip6t_HL_info *info = targinfo;
 
@@ -79,25 +76,25 @@ static bool ip6t_hl_checkentry(const char *tablename,
 	return true;
 }
 
-static struct xt_target ip6t_HL __read_mostly = {
+static struct xt_target hl_tg6_reg __read_mostly = {
 	.name 		= "HL",
 	.family		= AF_INET6,
-	.target		= ip6t_hl_target,
+	.target		= hl_tg6,
 	.targetsize	= sizeof(struct ip6t_HL_info),
 	.table		= "mangle",
-	.checkentry	= ip6t_hl_checkentry,
+	.checkentry	= hl_tg6_check,
 	.me		= THIS_MODULE
 };
 
-static int __init ip6t_hl_init(void)
+static int __init hl_tg6_init(void)
 {
-	return xt_register_target(&ip6t_HL);
+	return xt_register_target(&hl_tg6_reg);
 }
 
-static void __exit ip6t_hl_fini(void)
+static void __exit hl_tg6_exit(void)
 {
-	xt_unregister_target(&ip6t_HL);
+	xt_unregister_target(&hl_tg6_reg);
 }
 
-module_init(ip6t_hl_init);
-module_exit(ip6t_hl_fini);
+module_init(hl_tg6_init);
+module_exit(hl_tg6_exit);
diff --git a/net/ipv6/netfilter/ip6t_LOG.c b/net/ipv6/netfilter/ip6t_LOG.c
index 7a48c342df4..cd51c42727f 100644
--- a/net/ipv6/netfilter/ip6t_LOG.c
+++ b/net/ipv6/netfilter/ip6t_LOG.c
@@ -431,12 +431,9 @@ ip6t_log_packet(unsigned int pf,
 }
 
 static unsigned int
-ip6t_log_target(struct sk_buff *skb,
-		const struct net_device *in,
-		const struct net_device *out,
-		unsigned int hooknum,
-		const struct xt_target *target,
-		const void *targinfo)
+log_tg6(struct sk_buff *skb, const struct net_device *in,
+        const struct net_device *out, unsigned int hooknum,
+        const struct xt_target *target, const void *targinfo)
 {
 	const struct ip6t_log_info *loginfo = targinfo;
 	struct nf_loginfo li;
@@ -450,11 +447,10 @@ ip6t_log_target(struct sk_buff *skb,
 }
 
 
-static bool ip6t_log_checkentry(const char *tablename,
-				const void *entry,
-				const struct xt_target *target,
-				void *targinfo,
-				unsigned int hook_mask)
+static bool
+log_tg6_check(const char *tablename, const void *entry,
+              const struct xt_target *target, void *targinfo,
+              unsigned int hook_mask)
 {
 	const struct ip6t_log_info *loginfo = targinfo;
 
@@ -470,12 +466,12 @@ static bool ip6t_log_checkentry(const char *tablename,
 	return true;
 }
 
-static struct xt_target ip6t_log_reg __read_mostly = {
+static struct xt_target log_tg6_reg __read_mostly = {
 	.name 		= "LOG",
 	.family		= AF_INET6,
-	.target 	= ip6t_log_target,
+	.target 	= log_tg6,
 	.targetsize	= sizeof(struct ip6t_log_info),
-	.checkentry	= ip6t_log_checkentry,
+	.checkentry	= log_tg6_check,
 	.me 		= THIS_MODULE,
 };
 
@@ -485,22 +481,22 @@ static struct nf_logger ip6t_logger = {
 	.me		= THIS_MODULE,
 };
 
-static int __init ip6t_log_init(void)
+static int __init log_tg6_init(void)
 {
 	int ret;
 
-	ret = xt_register_target(&ip6t_log_reg);
+	ret = xt_register_target(&log_tg6_reg);
 	if (ret < 0)
 		return ret;
 	nf_log_register(PF_INET6, &ip6t_logger);
 	return 0;
 }
 
-static void __exit ip6t_log_fini(void)
+static void __exit log_tg6_exit(void)
 {
 	nf_log_unregister(&ip6t_logger);
-	xt_unregister_target(&ip6t_log_reg);
+	xt_unregister_target(&log_tg6_reg);
 }
 
-module_init(ip6t_log_init);
-module_exit(ip6t_log_fini);
+module_init(log_tg6_init);
+module_exit(log_tg6_exit);
diff --git a/net/ipv6/netfilter/ip6t_REJECT.c b/net/ipv6/netfilter/ip6t_REJECT.c
index 960ba1780a9..a951c2cb6de 100644
--- a/net/ipv6/netfilter/ip6t_REJECT.c
+++ b/net/ipv6/netfilter/ip6t_REJECT.c
@@ -170,12 +170,10 @@ send_unreach(struct sk_buff *skb_in, unsigned char code, unsigned int hooknum)
 	icmpv6_send(skb_in, ICMPV6_DEST_UNREACH, code, 0, NULL);
 }
 
-static unsigned int reject6_target(struct sk_buff *skb,
-			   const struct net_device *in,
-			   const struct net_device *out,
-			   unsigned int hooknum,
-			   const struct xt_target *target,
-			   const void *targinfo)
+static unsigned int
+reject_tg6(struct sk_buff *skb, const struct net_device *in,
+           const struct net_device *out, unsigned int hooknum,
+           const struct xt_target *target, const void *targinfo)
 {
 	const struct ip6t_reject_info *reject = targinfo;
 
@@ -214,11 +212,10 @@ static unsigned int reject6_target(struct sk_buff *skb,
 	return NF_DROP;
 }
 
-static bool check(const char *tablename,
-		  const void *entry,
-		  const struct xt_target *target,
-		  void *targinfo,
-		  unsigned int hook_mask)
+static bool
+reject_tg6_check(const char *tablename, const void *entry,
+                 const struct xt_target *target, void *targinfo,
+                 unsigned int hook_mask)
 {
 	const struct ip6t_reject_info *rejinfo = targinfo;
 	const struct ip6t_entry *e = entry;
@@ -237,27 +234,27 @@ static bool check(const char *tablename,
 	return true;
 }
 
-static struct xt_target ip6t_reject_reg __read_mostly = {
+static struct xt_target reject_tg6_reg __read_mostly = {
 	.name		= "REJECT",
 	.family		= AF_INET6,
-	.target		= reject6_target,
+	.target		= reject_tg6,
 	.targetsize	= sizeof(struct ip6t_reject_info),
 	.table		= "filter",
 	.hooks		= (1 << NF_INET_LOCAL_IN) | (1 << NF_INET_FORWARD) |
 			  (1 << NF_INET_LOCAL_OUT),
-	.checkentry	= check,
+	.checkentry	= reject_tg6_check,
 	.me		= THIS_MODULE
 };
 
-static int __init ip6t_reject_init(void)
+static int __init reject_tg6_init(void)
 {
-	return xt_register_target(&ip6t_reject_reg);
+	return xt_register_target(&reject_tg6_reg);
 }
 
-static void __exit ip6t_reject_fini(void)
+static void __exit reject_tg6_exit(void)
 {
-	xt_unregister_target(&ip6t_reject_reg);
+	xt_unregister_target(&reject_tg6_reg);
 }
 
-module_init(ip6t_reject_init);
-module_exit(ip6t_reject_fini);
+module_init(reject_tg6_init);
+module_exit(reject_tg6_exit);
diff --git a/net/ipv6/netfilter/ip6t_ah.c b/net/ipv6/netfilter/ip6t_ah.c
index 2a25fe25e0e..f5d08a8c011 100644
--- a/net/ipv6/netfilter/ip6t_ah.c
+++ b/net/ipv6/netfilter/ip6t_ah.c
@@ -37,14 +37,9 @@ spi_match(u_int32_t min, u_int32_t max, u_int32_t spi, bool invert)
 }
 
 static bool
-match(const struct sk_buff *skb,
-      const struct net_device *in,
-      const struct net_device *out,
-      const struct xt_match *match,
-      const void *matchinfo,
-      int offset,
-      unsigned int protoff,
-      bool *hotdrop)
+ah_mt6(const struct sk_buff *skb, const struct net_device *in,
+       const struct net_device *out, const struct xt_match *match,
+       const void *matchinfo, int offset, unsigned int protoff, bool *hotdrop)
 {
 	struct ip_auth_hdr _ah;
 	const struct ip_auth_hdr *ah;
@@ -100,11 +95,9 @@ match(const struct sk_buff *skb,
 
 /* Called when user tries to insert an entry of this type. */
 static bool
-checkentry(const char *tablename,
-	  const void *entry,
-	  const struct xt_match *match,
-	  void *matchinfo,
-	  unsigned int hook_mask)
+ah_mt6_check(const char *tablename, const void *entry,
+             const struct xt_match *match, void *matchinfo,
+             unsigned int hook_mask)
 {
 	const struct ip6t_ah *ahinfo = matchinfo;
 
@@ -115,24 +108,24 @@ checkentry(const char *tablename,
 	return true;
 }
 
-static struct xt_match ah_match __read_mostly = {
+static struct xt_match ah_mt6_reg __read_mostly = {
 	.name		= "ah",
 	.family		= AF_INET6,
-	.match		= match,
+	.match		= ah_mt6,
 	.matchsize	= sizeof(struct ip6t_ah),
-	.checkentry	= checkentry,
+	.checkentry	= ah_mt6_check,
 	.me		= THIS_MODULE,
 };
 
-static int __init ip6t_ah_init(void)
+static int __init ah_mt6_init(void)
 {
-	return xt_register_match(&ah_match);
+	return xt_register_match(&ah_mt6_reg);
 }
 
-static void __exit ip6t_ah_fini(void)
+static void __exit ah_mt6_exit(void)
 {
-	xt_unregister_match(&ah_match);
+	xt_unregister_match(&ah_mt6_reg);
 }
 
-module_init(ip6t_ah_init);
-module_exit(ip6t_ah_fini);
+module_init(ah_mt6_init);
+module_exit(ah_mt6_exit);
diff --git a/net/ipv6/netfilter/ip6t_eui64.c b/net/ipv6/netfilter/ip6t_eui64.c
index ff71269579d..dd9e67df914 100644
--- a/net/ipv6/netfilter/ip6t_eui64.c
+++ b/net/ipv6/netfilter/ip6t_eui64.c
@@ -20,14 +20,10 @@ MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Andras Kis-Szabo <kisza@sch.bme.hu>");
 
 static bool
-match(const struct sk_buff *skb,
-      const struct net_device *in,
-      const struct net_device *out,
-      const struct xt_match *match,
-      const void *matchinfo,
-      int offset,
-      unsigned int protoff,
-      bool *hotdrop)
+eui64_mt6(const struct sk_buff *skb, const struct net_device *in,
+          const struct net_device *out, const struct xt_match *match,
+          const void *matchinfo, int offset, unsigned int protoff,
+          bool *hotdrop)
 {
 	unsigned char eui64[8];
 	int i = 0;
@@ -62,25 +58,25 @@ match(const struct sk_buff *skb,
 	return false;
 }
 
-static struct xt_match eui64_match __read_mostly = {
+static struct xt_match eui64_mt6_reg __read_mostly = {
 	.name		= "eui64",
 	.family		= AF_INET6,
-	.match		= match,
+	.match		= eui64_mt6,
 	.matchsize	= sizeof(int),
 	.hooks		= (1 << NF_INET_PRE_ROUTING) | (1 << NF_INET_LOCAL_IN) |
 			  (1 << NF_INET_FORWARD),
 	.me		= THIS_MODULE,
 };
 
-static int __init ip6t_eui64_init(void)
+static int __init eui64_mt6_init(void)
 {
-	return xt_register_match(&eui64_match);
+	return xt_register_match(&eui64_mt6_reg);
 }
 
-static void __exit ip6t_eui64_fini(void)
+static void __exit eui64_mt6_exit(void)
 {
-	xt_unregister_match(&eui64_match);
+	xt_unregister_match(&eui64_mt6_reg);
 }
 
-module_init(ip6t_eui64_init);
-module_exit(ip6t_eui64_fini);
+module_init(eui64_mt6_init);
+module_exit(eui64_mt6_exit);
diff --git a/net/ipv6/netfilter/ip6t_frag.c b/net/ipv6/netfilter/ip6t_frag.c
index 968aeba0207..ae8c714a80d 100644
--- a/net/ipv6/netfilter/ip6t_frag.c
+++ b/net/ipv6/netfilter/ip6t_frag.c
@@ -35,14 +35,10 @@ id_match(u_int32_t min, u_int32_t max, u_int32_t id, bool invert)
 }
 
 static bool
-match(const struct sk_buff *skb,
-      const struct net_device *in,
-      const struct net_device *out,
-      const struct xt_match *match,
-      const void *matchinfo,
-      int offset,
-      unsigned int protoff,
-      bool *hotdrop)
+frag_mt6(const struct sk_buff *skb, const struct net_device *in,
+         const struct net_device *out, const struct xt_match *match,
+         const void *matchinfo, int offset, unsigned int protoff,
+         bool *hotdrop)
 {
 	struct frag_hdr _frag;
 	const struct frag_hdr *fh;
@@ -116,11 +112,9 @@ match(const struct sk_buff *skb,
 
 /* Called when user tries to insert an entry of this type. */
 static bool
-checkentry(const char *tablename,
-	   const void *ip,
-	   const struct xt_match *match,
-	   void *matchinfo,
-	   unsigned int hook_mask)
+frag_mt6_check(const char *tablename, const void *ip,
+               const struct xt_match *match, void *matchinfo,
+               unsigned int hook_mask)
 {
 	const struct ip6t_frag *fraginfo = matchinfo;
 
@@ -131,24 +125,24 @@ checkentry(const char *tablename,
 	return true;
 }
 
-static struct xt_match frag_match __read_mostly = {
+static struct xt_match frag_mt6_reg __read_mostly = {
 	.name		= "frag",
 	.family		= AF_INET6,
-	.match		= match,
+	.match		= frag_mt6,
 	.matchsize	= sizeof(struct ip6t_frag),
-	.checkentry	= checkentry,
+	.checkentry	= frag_mt6_check,
 	.me		= THIS_MODULE,
 };
 
-static int __init ip6t_frag_init(void)
+static int __init frag_mt6_init(void)
 {
-	return xt_register_match(&frag_match);
+	return xt_register_match(&frag_mt6_reg);
 }
 
-static void __exit ip6t_frag_fini(void)
+static void __exit frag_mt6_exit(void)
 {
-	xt_unregister_match(&frag_match);
+	xt_unregister_match(&frag_mt6_reg);
 }
 
-module_init(ip6t_frag_init);
-module_exit(ip6t_frag_fini);
+module_init(frag_mt6_init);
+module_exit(frag_mt6_exit);
diff --git a/net/ipv6/netfilter/ip6t_hbh.c b/net/ipv6/netfilter/ip6t_hbh.c
index e6ca6018b1e..b76e27dc73d 100644
--- a/net/ipv6/netfilter/ip6t_hbh.c
+++ b/net/ipv6/netfilter/ip6t_hbh.c
@@ -42,14 +42,10 @@ MODULE_ALIAS("ip6t_dst");
  */
 
 static bool
-match(const struct sk_buff *skb,
-      const struct net_device *in,
-      const struct net_device *out,
-      const struct xt_match *match,
-      const void *matchinfo,
-      int offset,
-      unsigned int protoff,
-      bool *hotdrop)
+hbh_mt6(const struct sk_buff *skb, const struct net_device *in,
+        const struct net_device *out, const struct xt_match *match,
+        const void *matchinfo, int offset, unsigned int protoff,
+        bool *hotdrop)
 {
 	struct ipv6_opt_hdr _optsh;
 	const struct ipv6_opt_hdr *oh;
@@ -171,11 +167,9 @@ match(const struct sk_buff *skb,
 
 /* Called when user tries to insert an entry of this type. */
 static bool
-checkentry(const char *tablename,
-	   const void *entry,
-	   const struct xt_match *match,
-	   void *matchinfo,
-	   unsigned int hook_mask)
+hbh_mt6_check(const char *tablename, const void *entry,
+              const struct xt_match *match, void *matchinfo,
+              unsigned int hook_mask)
 {
 	const struct ip6t_opts *optsinfo = matchinfo;
 
@@ -186,36 +180,36 @@ checkentry(const char *tablename,
 	return true;
 }
 
-static struct xt_match opts_match[] __read_mostly = {
+static struct xt_match hbh_mt6_reg[] __read_mostly = {
 	{
 		.name		= "hbh",
 		.family		= AF_INET6,
-		.match		= match,
+		.match		= hbh_mt6,
 		.matchsize	= sizeof(struct ip6t_opts),
-		.checkentry	= checkentry,
+		.checkentry	= hbh_mt6_check,
 		.me		= THIS_MODULE,
 		.data		= NEXTHDR_HOP,
 	},
 	{
 		.name		= "dst",
 		.family		= AF_INET6,
-		.match		= match,
+		.match		= hbh_mt6,
 		.matchsize	= sizeof(struct ip6t_opts),
-		.checkentry	= checkentry,
+		.checkentry	= hbh_mt6_check,
 		.me		= THIS_MODULE,
 		.data		= NEXTHDR_DEST,
 	},
 };
 
-static int __init ip6t_hbh_init(void)
+static int __init hbh_mt6_init(void)
 {
-	return xt_register_matches(opts_match, ARRAY_SIZE(opts_match));
+	return xt_register_matches(hbh_mt6_reg, ARRAY_SIZE(hbh_mt6_reg));
 }
 
-static void __exit ip6t_hbh_fini(void)
+static void __exit hbh_mt6_exit(void)
 {
-	xt_unregister_matches(opts_match, ARRAY_SIZE(opts_match));
+	xt_unregister_matches(hbh_mt6_reg, ARRAY_SIZE(hbh_mt6_reg));
 }
 
-module_init(ip6t_hbh_init);
-module_exit(ip6t_hbh_fini);
+module_init(hbh_mt6_init);
+module_exit(hbh_mt6_exit);
diff --git a/net/ipv6/netfilter/ip6t_hl.c b/net/ipv6/netfilter/ip6t_hl.c
index ca29ec00dc1..8f2d7d0ab40 100644
--- a/net/ipv6/netfilter/ip6t_hl.c
+++ b/net/ipv6/netfilter/ip6t_hl.c
@@ -19,10 +19,10 @@ MODULE_AUTHOR("Maciej Soltysiak <solt@dns.toxicfilms.tv>");
 MODULE_DESCRIPTION("IP tables Hop Limit matching module");
 MODULE_LICENSE("GPL");
 
-static bool match(const struct sk_buff *skb,
-		  const struct net_device *in, const struct net_device *out,
-		  const struct xt_match *match, const void *matchinfo,
-		  int offset, unsigned int protoff, bool *hotdrop)
+static bool
+hl_mt6(const struct sk_buff *skb, const struct net_device *in,
+       const struct net_device *out, const struct xt_match *match,
+       const void *matchinfo, int offset, unsigned int protoff, bool *hotdrop)
 {
 	const struct ip6t_hl_info *info = matchinfo;
 	const struct ipv6hdr *ip6h = ipv6_hdr(skb);
@@ -49,23 +49,23 @@ static bool match(const struct sk_buff *skb,
 	return false;
 }
 
-static struct xt_match hl_match __read_mostly = {
+static struct xt_match hl_mt6_reg __read_mostly = {
 	.name		= "hl",
 	.family		= AF_INET6,
-	.match		= match,
+	.match		= hl_mt6,
 	.matchsize	= sizeof(struct ip6t_hl_info),
 	.me		= THIS_MODULE,
 };
 
-static int __init ip6t_hl_init(void)
+static int __init hl_mt6_init(void)
 {
-	return xt_register_match(&hl_match);
+	return xt_register_match(&hl_mt6_reg);
 }
 
-static void __exit ip6t_hl_fini(void)
+static void __exit hl_mt6_exit(void)
 {
-	xt_unregister_match(&hl_match);
+	xt_unregister_match(&hl_mt6_reg);
 }
 
-module_init(ip6t_hl_init);
-module_exit(ip6t_hl_fini);
+module_init(hl_mt6_init);
+module_exit(hl_mt6_exit);
diff --git a/net/ipv6/netfilter/ip6t_ipv6header.c b/net/ipv6/netfilter/ip6t_ipv6header.c
index 2c65c2f9a4a..ae497e7ac11 100644
--- a/net/ipv6/netfilter/ip6t_ipv6header.c
+++ b/net/ipv6/netfilter/ip6t_ipv6header.c
@@ -27,14 +27,10 @@ MODULE_DESCRIPTION("IPv6 headers match");
 MODULE_AUTHOR("Andras Kis-Szabo <kisza@sch.bme.hu>");
 
 static bool
-ipv6header_match(const struct sk_buff *skb,
-		 const struct net_device *in,
-		 const struct net_device *out,
-		 const struct xt_match *match,
-		 const void *matchinfo,
-		 int offset,
-		 unsigned int protoff,
-		 bool *hotdrop)
+ipv6header_mt6(const struct sk_buff *skb, const struct net_device *in,
+               const struct net_device *out, const struct xt_match *match,
+               const void *matchinfo, int offset, unsigned int protoff,
+               bool *hotdrop)
 {
 	const struct ip6t_ipv6header_info *info = matchinfo;
 	unsigned int temp;
@@ -125,11 +121,9 @@ ipv6header_match(const struct sk_buff *skb,
 }
 
 static bool
-ipv6header_checkentry(const char *tablename,
-		      const void *ip,
-		      const struct xt_match *match,
-		      void *matchinfo,
-		      unsigned int hook_mask)
+ipv6header_mt6_check(const char *tablename, const void *ip,
+                     const struct xt_match *match, void *matchinfo,
+                     unsigned int hook_mask)
 {
 	const struct ip6t_ipv6header_info *info = matchinfo;
 
@@ -141,25 +135,25 @@ ipv6header_checkentry(const char *tablename,
 	return true;
 }
 
-static struct xt_match ip6t_ipv6header_match __read_mostly = {
+static struct xt_match ipv6header_mt6_reg __read_mostly = {
 	.name		= "ipv6header",
 	.family		= AF_INET6,
-	.match		= &ipv6header_match,
+	.match		= ipv6header_mt6,
 	.matchsize	= sizeof(struct ip6t_ipv6header_info),
-	.checkentry	= &ipv6header_checkentry,
+	.checkentry	= ipv6header_mt6_check,
 	.destroy	= NULL,
 	.me		= THIS_MODULE,
 };
 
-static int __init ipv6header_init(void)
+static int __init ipv6header_mt6_init(void)
 {
-	return xt_register_match(&ip6t_ipv6header_match);
+	return xt_register_match(&ipv6header_mt6_reg);
 }
 
-static void __exit ipv6header_exit(void)
+static void __exit ipv6header_mt6_exit(void)
 {
-	xt_unregister_match(&ip6t_ipv6header_match);
+	xt_unregister_match(&ipv6header_mt6_reg);
 }
 
-module_init(ipv6header_init);
-module_exit(ipv6header_exit);
+module_init(ipv6header_mt6_init);
+module_exit(ipv6header_mt6_exit);
diff --git a/net/ipv6/netfilter/ip6t_mh.c b/net/ipv6/netfilter/ip6t_mh.c
index 0fa714092dc..618e6b94b03 100644
--- a/net/ipv6/netfilter/ip6t_mh.c
+++ b/net/ipv6/netfilter/ip6t_mh.c
@@ -38,14 +38,9 @@ type_match(u_int8_t min, u_int8_t max, u_int8_t type, bool invert)
 }
 
 static bool
-match(const struct sk_buff *skb,
-	 const struct net_device *in,
-	 const struct net_device *out,
-	 const struct xt_match *match,
-	 const void *matchinfo,
-	 int offset,
-	 unsigned int protoff,
-	 bool *hotdrop)
+mh_mt6(const struct sk_buff *skb, const struct net_device *in,
+       const struct net_device *out, const struct xt_match *match,
+       const void *matchinfo, int offset, unsigned int protoff, bool *hotdrop)
 {
 	struct ip6_mh _mh;
 	const struct ip6_mh *mh;
@@ -77,11 +72,9 @@ match(const struct sk_buff *skb,
 
 /* Called when user tries to insert an entry of this type. */
 static bool
-mh_checkentry(const char *tablename,
-	      const void *entry,
-	      const struct xt_match *match,
-	      void *matchinfo,
-	      unsigned int hook_mask)
+mh_mt6_check(const char *tablename, const void *entry,
+             const struct xt_match *match, void *matchinfo,
+             unsigned int hook_mask)
 {
 	const struct ip6t_mh *mhinfo = matchinfo;
 
@@ -89,25 +82,25 @@ mh_checkentry(const char *tablename,
 	return !(mhinfo->invflags & ~IP6T_MH_INV_MASK);
 }
 
-static struct xt_match mh_match __read_mostly = {
+static struct xt_match mh_mt6_reg __read_mostly = {
 	.name		= "mh",
 	.family		= AF_INET6,
-	.checkentry	= mh_checkentry,
-	.match		= match,
+	.checkentry	= mh_mt6_check,
+	.match		= mh_mt6,
 	.matchsize	= sizeof(struct ip6t_mh),
 	.proto		= IPPROTO_MH,
 	.me		= THIS_MODULE,
 };
 
-static int __init ip6t_mh_init(void)
+static int __init mh_mt6_init(void)
 {
-	return xt_register_match(&mh_match);
+	return xt_register_match(&mh_mt6_reg);
 }
 
-static void __exit ip6t_mh_fini(void)
+static void __exit mh_mt6_exit(void)
 {
-	xt_unregister_match(&mh_match);
+	xt_unregister_match(&mh_mt6_reg);
 }
 
-module_init(ip6t_mh_init);
-module_exit(ip6t_mh_fini);
+module_init(mh_mt6_init);
+module_exit(mh_mt6_exit);
diff --git a/net/ipv6/netfilter/ip6t_owner.c b/net/ipv6/netfilter/ip6t_owner.c
index 1e0dc4a972c..6a52ed98516 100644
--- a/net/ipv6/netfilter/ip6t_owner.c
+++ b/net/ipv6/netfilter/ip6t_owner.c
@@ -24,14 +24,10 @@ MODULE_LICENSE("GPL");
 
 
 static bool
-match(const struct sk_buff *skb,
-      const struct net_device *in,
-      const struct net_device *out,
-      const struct xt_match *match,
-      const void *matchinfo,
-      int offset,
-      unsigned int protoff,
-      bool *hotdrop)
+owner_mt6(const struct sk_buff *skb, const struct net_device *in,
+          const struct net_device *out, const struct xt_match *match,
+          const void *matchinfo, int offset, unsigned int protoff,
+          bool *hotdrop)
 {
 	const struct ip6t_owner_info *info = matchinfo;
 
@@ -52,11 +48,9 @@ match(const struct sk_buff *skb,
 }
 
 static bool
-checkentry(const char *tablename,
-	   const void *ip,
-	   const struct xt_match *match,
-	   void *matchinfo,
-	   unsigned int hook_mask)
+owner_mt6_check(const char *tablename, const void *ip,
+                const struct xt_match *match, void *matchinfo,
+                unsigned int hook_mask)
 {
 	const struct ip6t_owner_info *info = matchinfo;
 
@@ -68,26 +62,26 @@ checkentry(const char *tablename,
 	return true;
 }
 
-static struct xt_match owner_match __read_mostly = {
+static struct xt_match owner_mt6_reg __read_mostly = {
 	.name		= "owner",
 	.family		= AF_INET6,
-	.match		= match,
+	.match		= owner_mt6,
 	.matchsize	= sizeof(struct ip6t_owner_info),
 	.hooks		= (1 << NF_INET_LOCAL_OUT) |
 			  (1 << NF_INET_POST_ROUTING),
-	.checkentry	= checkentry,
+	.checkentry	= owner_mt6_check,
 	.me		= THIS_MODULE,
 };
 
-static int __init ip6t_owner_init(void)
+static int __init owner_mt6_init(void)
 {
-	return xt_register_match(&owner_match);
+	return xt_register_match(&owner_mt6_reg);
 }
 
-static void __exit ip6t_owner_fini(void)
+static void __exit owner_mt6_exit(void)
 {
-	xt_unregister_match(&owner_match);
+	xt_unregister_match(&owner_mt6_reg);
 }
 
-module_init(ip6t_owner_init);
-module_exit(ip6t_owner_fini);
+module_init(owner_mt6_init);
+module_exit(owner_mt6_exit);
diff --git a/net/ipv6/netfilter/ip6t_rt.c b/net/ipv6/netfilter/ip6t_rt.c
index 357cea703bd..038cea6407d 100644
--- a/net/ipv6/netfilter/ip6t_rt.c
+++ b/net/ipv6/netfilter/ip6t_rt.c
@@ -37,14 +37,9 @@ segsleft_match(u_int32_t min, u_int32_t max, u_int32_t id, bool invert)
 }
 
 static bool
-match(const struct sk_buff *skb,
-      const struct net_device *in,
-      const struct net_device *out,
-      const struct xt_match *match,
-      const void *matchinfo,
-      int offset,
-      unsigned int protoff,
-      bool *hotdrop)
+rt_mt6(const struct sk_buff *skb, const struct net_device *in,
+       const struct net_device *out, const struct xt_match *match,
+       const void *matchinfo, int offset, unsigned int protoff, bool *hotdrop)
 {
 	struct ipv6_rt_hdr _route;
 	const struct ipv6_rt_hdr *rh;
@@ -195,11 +190,9 @@ match(const struct sk_buff *skb,
 
 /* Called when user tries to insert an entry of this type. */
 static bool
-checkentry(const char *tablename,
-	   const void *entry,
-	   const struct xt_match *match,
-	   void *matchinfo,
-	   unsigned int hook_mask)
+rt_mt6_check(const char *tablename, const void *entry,
+             const struct xt_match *match, void *matchinfo,
+             unsigned int hook_mask)
 {
 	const struct ip6t_rt *rtinfo = matchinfo;
 
@@ -218,24 +211,24 @@ checkentry(const char *tablename,
 	return true;
 }
 
-static struct xt_match rt_match __read_mostly = {
+static struct xt_match rt_mt6_reg __read_mostly = {
 	.name		= "rt",
 	.family		= AF_INET6,
-	.match		= match,
+	.match		= rt_mt6,
 	.matchsize	= sizeof(struct ip6t_rt),
-	.checkentry	= checkentry,
+	.checkentry	= rt_mt6_check,
 	.me		= THIS_MODULE,
 };
 
-static int __init ip6t_rt_init(void)
+static int __init rt_mt6_init(void)
 {
-	return xt_register_match(&rt_match);
+	return xt_register_match(&rt_mt6_reg);
 }
 
-static void __exit ip6t_rt_fini(void)
+static void __exit rt_mt6_exit(void)
 {
-	xt_unregister_match(&rt_match);
+	xt_unregister_match(&rt_mt6_reg);
 }
 
-module_init(ip6t_rt_init);
-module_exit(ip6t_rt_fini);
+module_init(rt_mt6_init);
+module_exit(rt_mt6_exit);
diff --git a/net/netfilter/xt_CLASSIFY.c b/net/netfilter/xt_CLASSIFY.c
index e4f7f86d7dd..8e83dd4c13a 100644
--- a/net/netfilter/xt_CLASSIFY.c
+++ b/net/netfilter/xt_CLASSIFY.c
@@ -27,12 +27,9 @@ MODULE_ALIAS("ipt_CLASSIFY");
 MODULE_ALIAS("ip6t_CLASSIFY");
 
 static unsigned int
-target(struct sk_buff *skb,
-       const struct net_device *in,
-       const struct net_device *out,
-       unsigned int hooknum,
-       const struct xt_target *target,
-       const void *targinfo)
+classify_tg(struct sk_buff *skb, const struct net_device *in,
+            const struct net_device *out, unsigned int hooknum,
+            const struct xt_target *target, const void *targinfo)
 {
 	const struct xt_classify_target_info *clinfo = targinfo;
 
@@ -40,11 +37,11 @@ target(struct sk_buff *skb,
 	return XT_CONTINUE;
 }
 
-static struct xt_target xt_classify_target[] __read_mostly = {
+static struct xt_target classify_tg_reg[] __read_mostly = {
 	{
 		.family		= AF_INET,
 		.name 		= "CLASSIFY",
-		.target 	= target,
+		.target 	= classify_tg,
 		.targetsize	= sizeof(struct xt_classify_target_info),
 		.table		= "mangle",
 		.hooks		= (1 << NF_INET_LOCAL_OUT) |
@@ -55,7 +52,7 @@ static struct xt_target xt_classify_target[] __read_mostly = {
 	{
 		.name 		= "CLASSIFY",
 		.family		= AF_INET6,
-		.target 	= target,
+		.target 	= classify_tg,
 		.targetsize	= sizeof(struct xt_classify_target_info),
 		.table		= "mangle",
 		.hooks		= (1 << NF_INET_LOCAL_OUT) |
@@ -65,17 +62,16 @@ static struct xt_target xt_classify_target[] __read_mostly = {
 	},
 };
 
-static int __init xt_classify_init(void)
+static int __init classify_tg_init(void)
 {
-	return xt_register_targets(xt_classify_target,
-				   ARRAY_SIZE(xt_classify_target));
+	return xt_register_targets(classify_tg_reg,
+	       ARRAY_SIZE(classify_tg_reg));
 }
 
-static void __exit xt_classify_fini(void)
+static void __exit classify_tg_exit(void)
 {
-	xt_unregister_targets(xt_classify_target,
-			      ARRAY_SIZE(xt_classify_target));
+	xt_unregister_targets(classify_tg_reg, ARRAY_SIZE(classify_tg_reg));
 }
 
-module_init(xt_classify_init);
-module_exit(xt_classify_fini);
+module_init(classify_tg_init);
+module_exit(classify_tg_exit);
diff --git a/net/netfilter/xt_CONNMARK.c b/net/netfilter/xt_CONNMARK.c
index 0621ca7de3b..0250bbea4c8 100644
--- a/net/netfilter/xt_CONNMARK.c
+++ b/net/netfilter/xt_CONNMARK.c
@@ -34,12 +34,9 @@ MODULE_ALIAS("ip6t_CONNMARK");
 #include <net/netfilter/nf_conntrack_ecache.h>
 
 static unsigned int
-target(struct sk_buff *skb,
-       const struct net_device *in,
-       const struct net_device *out,
-       unsigned int hooknum,
-       const struct xt_target *target,
-       const void *targinfo)
+connmark_tg(struct sk_buff *skb, const struct net_device *in,
+            const struct net_device *out, unsigned int hooknum,
+            const struct xt_target *target, const void *targinfo)
 {
 	const struct xt_connmark_target_info *markinfo = targinfo;
 	struct nf_conn *ct;
@@ -78,11 +75,9 @@ target(struct sk_buff *skb,
 }
 
 static bool
-checkentry(const char *tablename,
-	   const void *entry,
-	   const struct xt_target *target,
-	   void *targinfo,
-	   unsigned int hook_mask)
+connmark_tg_check(const char *tablename, const void *entry,
+                  const struct xt_target *target, void *targinfo,
+                  unsigned int hook_mask)
 {
 	const struct xt_connmark_target_info *matchinfo = targinfo;
 
@@ -107,7 +102,7 @@ checkentry(const char *tablename,
 }
 
 static void
-destroy(const struct xt_target *target, void *targinfo)
+connmark_tg_destroy(const struct xt_target *target, void *targinfo)
 {
 	nf_ct_l3proto_module_put(target->family);
 }
@@ -120,7 +115,7 @@ struct compat_xt_connmark_target_info {
 	u_int16_t	__pad2;
 };
 
-static void compat_from_user(void *dst, void *src)
+static void connmark_tg_compat_from_user(void *dst, void *src)
 {
 	const struct compat_xt_connmark_target_info *cm = src;
 	struct xt_connmark_target_info m = {
@@ -131,7 +126,7 @@ static void compat_from_user(void *dst, void *src)
 	memcpy(dst, &m, sizeof(m));
 }
 
-static int compat_to_user(void __user *dst, void *src)
+static int connmark_tg_compat_to_user(void __user *dst, void *src)
 {
 	const struct xt_connmark_target_info *m = src;
 	struct compat_xt_connmark_target_info cm = {
@@ -143,43 +138,42 @@ static int compat_to_user(void __user *dst, void *src)
 }
 #endif /* CONFIG_COMPAT */
 
-static struct xt_target xt_connmark_target[] __read_mostly = {
+static struct xt_target connmark_tg_reg[] __read_mostly = {
 	{
 		.name		= "CONNMARK",
 		.family		= AF_INET,
-		.checkentry	= checkentry,
-		.destroy	= destroy,
-		.target		= target,
+		.checkentry	= connmark_tg_check,
+		.destroy	= connmark_tg_destroy,
+		.target		= connmark_tg,
 		.targetsize	= sizeof(struct xt_connmark_target_info),
 #ifdef CONFIG_COMPAT
 		.compatsize	= sizeof(struct compat_xt_connmark_target_info),
-		.compat_from_user = compat_from_user,
-		.compat_to_user	= compat_to_user,
+		.compat_from_user = connmark_tg_compat_from_user,
+		.compat_to_user	= connmark_tg_compat_to_user,
 #endif
 		.me		= THIS_MODULE
 	},
 	{
 		.name		= "CONNMARK",
 		.family		= AF_INET6,
-		.checkentry	= checkentry,
-		.destroy	= destroy,
-		.target		= target,
+		.checkentry	= connmark_tg_check,
+		.destroy	= connmark_tg_destroy,
+		.target		= connmark_tg,
 		.targetsize	= sizeof(struct xt_connmark_target_info),
 		.me		= THIS_MODULE
 	},
 };
 
-static int __init xt_connmark_init(void)
+static int __init connmark_tg_init(void)
 {
-	return xt_register_targets(xt_connmark_target,
-				   ARRAY_SIZE(xt_connmark_target));
+	return xt_register_targets(connmark_tg_reg,
+	       ARRAY_SIZE(connmark_tg_reg));
 }
 
-static void __exit xt_connmark_fini(void)
+static void __exit connmark_tg_exit(void)
 {
-	xt_unregister_targets(xt_connmark_target,
-			      ARRAY_SIZE(xt_connmark_target));
+	xt_unregister_targets(connmark_tg_reg, ARRAY_SIZE(connmark_tg_reg));
 }
 
-module_init(xt_connmark_init);
-module_exit(xt_connmark_fini);
+module_init(connmark_tg_init);
+module_exit(connmark_tg_exit);
diff --git a/net/netfilter/xt_CONNSECMARK.c b/net/netfilter/xt_CONNSECMARK.c
index d8feba9bdb4..2c265e87f39 100644
--- a/net/netfilter/xt_CONNSECMARK.c
+++ b/net/netfilter/xt_CONNSECMARK.c
@@ -61,10 +61,10 @@ static void secmark_restore(struct sk_buff *skb)
 	}
 }
 
-static unsigned int target(struct sk_buff *skb, const struct net_device *in,
-			   const struct net_device *out, unsigned int hooknum,
-			   const struct xt_target *target,
-			   const void *targinfo)
+static unsigned int
+connsecmark_tg(struct sk_buff *skb, const struct net_device *in,
+               const struct net_device *out, unsigned int hooknum,
+               const struct xt_target *target, const void *targinfo)
 {
 	const struct xt_connsecmark_target_info *info = targinfo;
 
@@ -84,9 +84,10 @@ static unsigned int target(struct sk_buff *skb, const struct net_device *in,
 	return XT_CONTINUE;
 }
 
-static bool checkentry(const char *tablename, const void *entry,
-		       const struct xt_target *target, void *targinfo,
-		       unsigned int hook_mask)
+static bool
+connsecmark_tg_check(const char *tablename, const void *entry,
+                     const struct xt_target *target, void *targinfo,
+                     unsigned int hook_mask)
 {
 	const struct xt_connsecmark_target_info *info = targinfo;
 
@@ -109,18 +110,18 @@ static bool checkentry(const char *tablename, const void *entry,
 }
 
 static void
-destroy(const struct xt_target *target, void *targinfo)
+connsecmark_tg_destroy(const struct xt_target *target, void *targinfo)
 {
 	nf_ct_l3proto_module_put(target->family);
 }
 
-static struct xt_target xt_connsecmark_target[] __read_mostly = {
+static struct xt_target connsecmark_tg_reg[] __read_mostly = {
 	{
 		.name		= "CONNSECMARK",
 		.family		= AF_INET,
-		.checkentry	= checkentry,
-		.destroy	= destroy,
-		.target		= target,
+		.checkentry	= connsecmark_tg_check,
+		.destroy	= connsecmark_tg_destroy,
+		.target		= connsecmark_tg,
 		.targetsize	= sizeof(struct xt_connsecmark_target_info),
 		.table		= "mangle",
 		.me		= THIS_MODULE,
@@ -128,26 +129,26 @@ static struct xt_target xt_connsecmark_target[] __read_mostly = {
 	{
 		.name		= "CONNSECMARK",
 		.family		= AF_INET6,
-		.checkentry	= checkentry,
-		.destroy	= destroy,
-		.target		= target,
+		.checkentry	= connsecmark_tg_check,
+		.destroy	= connsecmark_tg_destroy,
+		.target		= connsecmark_tg,
 		.targetsize	= sizeof(struct xt_connsecmark_target_info),
 		.table		= "mangle",
 		.me		= THIS_MODULE,
 	},
 };
 
-static int __init xt_connsecmark_init(void)
+static int __init connsecmark_tg_init(void)
 {
-	return xt_register_targets(xt_connsecmark_target,
-				   ARRAY_SIZE(xt_connsecmark_target));
+	return xt_register_targets(connsecmark_tg_reg,
+	       ARRAY_SIZE(connsecmark_tg_reg));
 }
 
-static void __exit xt_connsecmark_fini(void)
+static void __exit connsecmark_tg_exit(void)
 {
-	xt_unregister_targets(xt_connsecmark_target,
-			      ARRAY_SIZE(xt_connsecmark_target));
+	xt_unregister_targets(connsecmark_tg_reg,
+	                      ARRAY_SIZE(connsecmark_tg_reg));
 }
 
-module_init(xt_connsecmark_init);
-module_exit(xt_connsecmark_fini);
+module_init(connsecmark_tg_init);
+module_exit(connsecmark_tg_exit);
diff --git a/net/netfilter/xt_DSCP.c b/net/netfilter/xt_DSCP.c
index 6322a933ab7..18823558797 100644
--- a/net/netfilter/xt_DSCP.c
+++ b/net/netfilter/xt_DSCP.c
@@ -25,12 +25,10 @@ MODULE_LICENSE("GPL");
 MODULE_ALIAS("ipt_DSCP");
 MODULE_ALIAS("ip6t_DSCP");
 
-static unsigned int target(struct sk_buff *skb,
-			   const struct net_device *in,
-			   const struct net_device *out,
-			   unsigned int hooknum,
-			   const struct xt_target *target,
-			   const void *targinfo)
+static unsigned int
+dscp_tg(struct sk_buff *skb, const struct net_device *in,
+        const struct net_device *out, unsigned int hooknum,
+        const struct xt_target *target, const void *targinfo)
 {
 	const struct xt_DSCP_info *dinfo = targinfo;
 	u_int8_t dscp = ipv4_get_dsfield(ip_hdr(skb)) >> XT_DSCP_SHIFT;
@@ -46,12 +44,10 @@ static unsigned int target(struct sk_buff *skb,
 	return XT_CONTINUE;
 }
 
-static unsigned int target6(struct sk_buff *skb,
-			    const struct net_device *in,
-			    const struct net_device *out,
-			    unsigned int hooknum,
-			    const struct xt_target *target,
-			    const void *targinfo)
+static unsigned int
+dscp_tg6(struct sk_buff *skb, const struct net_device *in,
+         const struct net_device *out, unsigned int hooknum,
+         const struct xt_target *target, const void *targinfo)
 {
 	const struct xt_DSCP_info *dinfo = targinfo;
 	u_int8_t dscp = ipv6_get_dsfield(ipv6_hdr(skb)) >> XT_DSCP_SHIFT;
@@ -66,11 +62,10 @@ static unsigned int target6(struct sk_buff *skb,
 	return XT_CONTINUE;
 }
 
-static bool checkentry(const char *tablename,
-		       const void *e_void,
-		       const struct xt_target *target,
-		       void *targinfo,
-		       unsigned int hook_mask)
+static bool
+dscp_tg_check(const char *tablename, const void *e_void,
+              const struct xt_target *target, void *targinfo,
+              unsigned int hook_mask)
 {
 	const u_int8_t dscp = ((struct xt_DSCP_info *)targinfo)->dscp;
 
@@ -81,12 +76,12 @@ static bool checkentry(const char *tablename,
 	return true;
 }
 
-static struct xt_target xt_dscp_target[] __read_mostly = {
+static struct xt_target dscp_tg_reg[] __read_mostly = {
 	{
 		.name		= "DSCP",
 		.family		= AF_INET,
-		.checkentry	= checkentry,
-		.target		= target,
+		.checkentry	= dscp_tg_check,
+		.target		= dscp_tg,
 		.targetsize	= sizeof(struct xt_DSCP_info),
 		.table		= "mangle",
 		.me		= THIS_MODULE,
@@ -94,23 +89,23 @@ static struct xt_target xt_dscp_target[] __read_mostly = {
 	{
 		.name		= "DSCP",
 		.family		= AF_INET6,
-		.checkentry	= checkentry,
-		.target		= target6,
+		.checkentry	= dscp_tg_check,
+		.target		= dscp_tg6,
 		.targetsize	= sizeof(struct xt_DSCP_info),
 		.table		= "mangle",
 		.me		= THIS_MODULE,
 	},
 };
 
-static int __init xt_dscp_target_init(void)
+static int __init dscp_tg_init(void)
 {
-	return xt_register_targets(xt_dscp_target, ARRAY_SIZE(xt_dscp_target));
+	return xt_register_targets(dscp_tg_reg, ARRAY_SIZE(dscp_tg_reg));
 }
 
-static void __exit xt_dscp_target_fini(void)
+static void __exit dscp_tg_exit(void)
 {
-	xt_unregister_targets(xt_dscp_target, ARRAY_SIZE(xt_dscp_target));
+	xt_unregister_targets(dscp_tg_reg, ARRAY_SIZE(dscp_tg_reg));
 }
 
-module_init(xt_dscp_target_init);
-module_exit(xt_dscp_target_fini);
+module_init(dscp_tg_init);
+module_exit(dscp_tg_exit);
diff --git a/net/netfilter/xt_MARK.c b/net/netfilter/xt_MARK.c
index bc6503d77d7..de32aa5b57f 100644
--- a/net/netfilter/xt_MARK.c
+++ b/net/netfilter/xt_MARK.c
@@ -22,12 +22,9 @@ MODULE_ALIAS("ipt_MARK");
 MODULE_ALIAS("ip6t_MARK");
 
 static unsigned int
-target_v0(struct sk_buff *skb,
-	  const struct net_device *in,
-	  const struct net_device *out,
-	  unsigned int hooknum,
-	  const struct xt_target *target,
-	  const void *targinfo)
+mark_tg_v0(struct sk_buff *skb, const struct net_device *in,
+           const struct net_device *out, unsigned int hooknum,
+           const struct xt_target *target, const void *targinfo)
 {
 	const struct xt_mark_target_info *markinfo = targinfo;
 
@@ -36,12 +33,9 @@ target_v0(struct sk_buff *skb,
 }
 
 static unsigned int
-target_v1(struct sk_buff *skb,
-	  const struct net_device *in,
-	  const struct net_device *out,
-	  unsigned int hooknum,
-	  const struct xt_target *target,
-	  const void *targinfo)
+mark_tg(struct sk_buff *skb, const struct net_device *in,
+        const struct net_device *out, unsigned int hooknum,
+        const struct xt_target *target, const void *targinfo)
 {
 	const struct xt_mark_target_info_v1 *markinfo = targinfo;
 	int mark = 0;
@@ -64,13 +58,10 @@ target_v1(struct sk_buff *skb,
 	return XT_CONTINUE;
 }
 
-
 static bool
-checkentry_v0(const char *tablename,
-	      const void *entry,
-	      const struct xt_target *target,
-	      void *targinfo,
-	      unsigned int hook_mask)
+mark_tg_check_v0(const char *tablename, const void *entry,
+                 const struct xt_target *target, void *targinfo,
+                 unsigned int hook_mask)
 {
 	const struct xt_mark_target_info *markinfo = targinfo;
 
@@ -82,11 +73,9 @@ checkentry_v0(const char *tablename,
 }
 
 static bool
-checkentry_v1(const char *tablename,
-	      const void *entry,
-	      const struct xt_target *target,
-	      void *targinfo,
-	      unsigned int hook_mask)
+mark_tg_check(const char *tablename, const void *entry,
+              const struct xt_target *target, void *targinfo,
+              unsigned int hook_mask)
 {
 	const struct xt_mark_target_info_v1 *markinfo = targinfo;
 
@@ -112,7 +101,7 @@ struct compat_xt_mark_target_info_v1 {
 	u_int16_t	__pad2;
 };
 
-static void compat_from_user_v1(void *dst, void *src)
+static void mark_tg_compat_from_user(void *dst, void *src)
 {
 	const struct compat_xt_mark_target_info_v1 *cm = src;
 	struct xt_mark_target_info_v1 m = {
@@ -122,7 +111,7 @@ static void compat_from_user_v1(void *dst, void *src)
 	memcpy(dst, &m, sizeof(m));
 }
 
-static int compat_to_user_v1(void __user *dst, void *src)
+static int mark_tg_compat_to_user(void __user *dst, void *src)
 {
 	const struct xt_mark_target_info_v1 *m = src;
 	struct compat_xt_mark_target_info_v1 cm = {
@@ -133,13 +122,13 @@ static int compat_to_user_v1(void __user *dst, void *src)
 }
 #endif /* CONFIG_COMPAT */
 
-static struct xt_target xt_mark_target[] __read_mostly = {
+static struct xt_target mark_tg_reg[] __read_mostly = {
 	{
 		.name		= "MARK",
 		.family		= AF_INET,
 		.revision	= 0,
-		.checkentry	= checkentry_v0,
-		.target		= target_v0,
+		.checkentry	= mark_tg_check_v0,
+		.target		= mark_tg_v0,
 		.targetsize	= sizeof(struct xt_mark_target_info),
 		.table		= "mangle",
 		.me		= THIS_MODULE,
@@ -148,13 +137,13 @@ static struct xt_target xt_mark_target[] __read_mostly = {
 		.name		= "MARK",
 		.family		= AF_INET,
 		.revision	= 1,
-		.checkentry	= checkentry_v1,
-		.target		= target_v1,
+		.checkentry	= mark_tg_check,
+		.target		= mark_tg,
 		.targetsize	= sizeof(struct xt_mark_target_info_v1),
 #ifdef CONFIG_COMPAT
 		.compatsize	= sizeof(struct compat_xt_mark_target_info_v1),
-		.compat_from_user = compat_from_user_v1,
-		.compat_to_user	= compat_to_user_v1,
+		.compat_from_user = mark_tg_compat_from_user,
+		.compat_to_user	= mark_tg_compat_to_user,
 #endif
 		.table		= "mangle",
 		.me		= THIS_MODULE,
@@ -163,23 +152,23 @@ static struct xt_target xt_mark_target[] __read_mostly = {
 		.name		= "MARK",
 		.family		= AF_INET6,
 		.revision	= 0,
-		.checkentry	= checkentry_v0,
-		.target		= target_v0,
+		.checkentry	= mark_tg_check_v0,
+		.target		= mark_tg_v0,
 		.targetsize	= sizeof(struct xt_mark_target_info),
 		.table		= "mangle",
 		.me		= THIS_MODULE,
 	},
 };
 
-static int __init xt_mark_init(void)
+static int __init mark_tg_init(void)
 {
-	return xt_register_targets(xt_mark_target, ARRAY_SIZE(xt_mark_target));
+	return xt_register_targets(mark_tg_reg, ARRAY_SIZE(mark_tg_reg));
 }
 
-static void __exit xt_mark_fini(void)
+static void __exit mark_tg_exit(void)
 {
-	xt_unregister_targets(xt_mark_target, ARRAY_SIZE(xt_mark_target));
+	xt_unregister_targets(mark_tg_reg, ARRAY_SIZE(mark_tg_reg));
 }
 
-module_init(xt_mark_init);
-module_exit(xt_mark_fini);
+module_init(mark_tg_init);
+module_exit(mark_tg_exit);
diff --git a/net/netfilter/xt_NFLOG.c b/net/netfilter/xt_NFLOG.c
index 9fb449ffbf8..83af124e88c 100644
--- a/net/netfilter/xt_NFLOG.c
+++ b/net/netfilter/xt_NFLOG.c
@@ -20,10 +20,9 @@ MODULE_ALIAS("ipt_NFLOG");
 MODULE_ALIAS("ip6t_NFLOG");
 
 static unsigned int
-nflog_target(struct sk_buff *skb,
-	     const struct net_device *in, const struct net_device *out,
-	     unsigned int hooknum, const struct xt_target *target,
-	     const void *targinfo)
+nflog_tg(struct sk_buff *skb, const struct net_device *in,
+         const struct net_device *out, unsigned int hooknum,
+         const struct xt_target *target, const void *targinfo)
 {
 	const struct xt_nflog_info *info = targinfo;
 	struct nf_loginfo li;
@@ -39,9 +38,9 @@ nflog_target(struct sk_buff *skb,
 }
 
 static bool
-nflog_checkentry(const char *tablename, const void *entry,
-		 const struct xt_target *target, void *targetinfo,
-		 unsigned int hookmask)
+nflog_tg_check(const char *tablename, const void *entry,
+               const struct xt_target *target, void *targetinfo,
+               unsigned int hookmask)
 {
 	const struct xt_nflog_info *info = targetinfo;
 
@@ -52,35 +51,34 @@ nflog_checkentry(const char *tablename, const void *entry,
 	return true;
 }
 
-static struct xt_target xt_nflog_target[] __read_mostly = {
+static struct xt_target nflog_tg_reg[] __read_mostly = {
 	{
 		.name		= "NFLOG",
 		.family		= AF_INET,
-		.checkentry	= nflog_checkentry,
-		.target		= nflog_target,
+		.checkentry	= nflog_tg_check,
+		.target		= nflog_tg,
 		.targetsize	= sizeof(struct xt_nflog_info),
 		.me		= THIS_MODULE,
 	},
 	{
 		.name		= "NFLOG",
 		.family		= AF_INET6,
-		.checkentry	= nflog_checkentry,
-		.target		= nflog_target,
+		.checkentry	= nflog_tg_check,
+		.target		= nflog_tg,
 		.targetsize	= sizeof(struct xt_nflog_info),
 		.me		= THIS_MODULE,
 	},
 };
 
-static int __init xt_nflog_init(void)
+static int __init nflog_tg_init(void)
 {
-	return xt_register_targets(xt_nflog_target,
-				   ARRAY_SIZE(xt_nflog_target));
+	return xt_register_targets(nflog_tg_reg, ARRAY_SIZE(nflog_tg_reg));
 }
 
-static void __exit xt_nflog_fini(void)
+static void __exit nflog_tg_exit(void)
 {
-	xt_unregister_targets(xt_nflog_target, ARRAY_SIZE(xt_nflog_target));
+	xt_unregister_targets(nflog_tg_reg, ARRAY_SIZE(nflog_tg_reg));
 }
 
-module_init(xt_nflog_init);
-module_exit(xt_nflog_fini);
+module_init(nflog_tg_init);
+module_exit(nflog_tg_exit);
diff --git a/net/netfilter/xt_NFQUEUE.c b/net/netfilter/xt_NFQUEUE.c
index c3984e9f766..16b57c23f35 100644
--- a/net/netfilter/xt_NFQUEUE.c
+++ b/net/netfilter/xt_NFQUEUE.c
@@ -24,52 +24,48 @@ MODULE_ALIAS("ip6t_NFQUEUE");
 MODULE_ALIAS("arpt_NFQUEUE");
 
 static unsigned int
-target(struct sk_buff *skb,
-       const struct net_device *in,
-       const struct net_device *out,
-       unsigned int hooknum,
-       const struct xt_target *target,
-       const void *targinfo)
+nfqueue_tg(struct sk_buff *skb, const struct net_device *in,
+           const struct net_device *out, unsigned int hooknum,
+           const struct xt_target *target, const void *targinfo)
 {
 	const struct xt_NFQ_info *tinfo = targinfo;
 
 	return NF_QUEUE_NR(tinfo->queuenum);
 }
 
-static struct xt_target xt_nfqueue_target[] __read_mostly = {
+static struct xt_target nfqueue_tg_reg[] __read_mostly = {
 	{
 		.name		= "NFQUEUE",
 		.family		= AF_INET,
-		.target		= target,
+		.target		= nfqueue_tg,
 		.targetsize	= sizeof(struct xt_NFQ_info),
 		.me		= THIS_MODULE,
 	},
 	{
 		.name		= "NFQUEUE",
 		.family		= AF_INET6,
-		.target		= target,
+		.target		= nfqueue_tg,
 		.targetsize	= sizeof(struct xt_NFQ_info),
 		.me		= THIS_MODULE,
 	},
 	{
 		.name		= "NFQUEUE",
 		.family		= NF_ARP,
-		.target		= target,
+		.target		= nfqueue_tg,
 		.targetsize	= sizeof(struct xt_NFQ_info),
 		.me		= THIS_MODULE,
 	},
 };
 
-static int __init xt_nfqueue_init(void)
+static int __init nfqueue_tg_init(void)
 {
-	return xt_register_targets(xt_nfqueue_target,
-				   ARRAY_SIZE(xt_nfqueue_target));
+	return xt_register_targets(nfqueue_tg_reg, ARRAY_SIZE(nfqueue_tg_reg));
 }
 
-static void __exit xt_nfqueue_fini(void)
+static void __exit nfqueue_tg_exit(void)
 {
-	xt_unregister_targets(xt_nfqueue_target, ARRAY_SIZE(xt_nfqueue_target));
+	xt_unregister_targets(nfqueue_tg_reg, ARRAY_SIZE(nfqueue_tg_reg));
 }
 
-module_init(xt_nfqueue_init);
-module_exit(xt_nfqueue_fini);
+module_init(nfqueue_tg_init);
+module_exit(nfqueue_tg_exit);
diff --git a/net/netfilter/xt_NOTRACK.c b/net/netfilter/xt_NOTRACK.c
index 4976ce18661..95712e4e997 100644
--- a/net/netfilter/xt_NOTRACK.c
+++ b/net/netfilter/xt_NOTRACK.c
@@ -12,12 +12,9 @@ MODULE_ALIAS("ipt_NOTRACK");
 MODULE_ALIAS("ip6t_NOTRACK");
 
 static unsigned int
-target(struct sk_buff *skb,
-       const struct net_device *in,
-       const struct net_device *out,
-       unsigned int hooknum,
-       const struct xt_target *target,
-       const void *targinfo)
+notrack_tg(struct sk_buff *skb, const struct net_device *in,
+           const struct net_device *out, unsigned int hooknum,
+           const struct xt_target *target, const void *targinfo)
 {
 	/* Previously seen (loopback)? Ignore. */
 	if (skb->nfct != NULL)
@@ -34,33 +31,32 @@ target(struct sk_buff *skb,
 	return XT_CONTINUE;
 }
 
-static struct xt_target xt_notrack_target[] __read_mostly = {
+static struct xt_target notrack_tg_reg[] __read_mostly = {
 	{
 		.name		= "NOTRACK",
 		.family		= AF_INET,
-		.target		= target,
+		.target		= notrack_tg,
 		.table		= "raw",
 		.me		= THIS_MODULE,
 	},
 	{
 		.name		= "NOTRACK",
 		.family		= AF_INET6,
-		.target		= target,
+		.target		= notrack_tg,
 		.table		= "raw",
 		.me		= THIS_MODULE,
 	},
 };
 
-static int __init xt_notrack_init(void)
+static int __init notrack_tg_init(void)
 {
-	return xt_register_targets(xt_notrack_target,
-				   ARRAY_SIZE(xt_notrack_target));
+	return xt_register_targets(notrack_tg_reg, ARRAY_SIZE(notrack_tg_reg));
 }
 
-static void __exit xt_notrack_fini(void)
+static void __exit notrack_tg_exit(void)
 {
-	xt_unregister_targets(xt_notrack_target, ARRAY_SIZE(xt_notrack_target));
+	xt_unregister_targets(notrack_tg_reg, ARRAY_SIZE(notrack_tg_reg));
 }
 
-module_init(xt_notrack_init);
-module_exit(xt_notrack_fini);
+module_init(notrack_tg_init);
+module_exit(notrack_tg_exit);
diff --git a/net/netfilter/xt_SECMARK.c b/net/netfilter/xt_SECMARK.c
index 235806eb6ec..7d5439c9f18 100644
--- a/net/netfilter/xt_SECMARK.c
+++ b/net/netfilter/xt_SECMARK.c
@@ -28,10 +28,10 @@ MODULE_ALIAS("ip6t_SECMARK");
 
 static u8 mode;
 
-static unsigned int target(struct sk_buff *skb, const struct net_device *in,
-			   const struct net_device *out, unsigned int hooknum,
-			   const struct xt_target *target,
-			   const void *targinfo)
+static unsigned int
+secmark_tg(struct sk_buff *skb, const struct net_device *in,
+           const struct net_device *out, unsigned int hooknum,
+           const struct xt_target *target, const void *targinfo)
 {
 	u32 secmark = 0;
 	const struct xt_secmark_target_info *info = targinfo;
@@ -81,9 +81,10 @@ static bool checkentry_selinux(struct xt_secmark_target_info *info)
 	return true;
 }
 
-static bool checkentry(const char *tablename, const void *entry,
-		       const struct xt_target *target, void *targinfo,
-		       unsigned int hook_mask)
+static bool
+secmark_tg_check(const char *tablename, const void *entry,
+                 const struct xt_target *target, void *targinfo,
+                 unsigned int hook_mask)
 {
 	struct xt_secmark_target_info *info = targinfo;
 
@@ -109,12 +110,12 @@ static bool checkentry(const char *tablename, const void *entry,
 	return true;
 }
 
-static struct xt_target xt_secmark_target[] __read_mostly = {
+static struct xt_target secmark_tg_reg[] __read_mostly = {
 	{
 		.name		= "SECMARK",
 		.family		= AF_INET,
-		.checkentry	= checkentry,
-		.target		= target,
+		.checkentry	= secmark_tg_check,
+		.target		= secmark_tg,
 		.targetsize	= sizeof(struct xt_secmark_target_info),
 		.table		= "mangle",
 		.me		= THIS_MODULE,
@@ -122,24 +123,23 @@ static struct xt_target xt_secmark_target[] __read_mostly = {
 	{
 		.name		= "SECMARK",
 		.family		= AF_INET6,
-		.checkentry	= checkentry,
-		.target		= target,
+		.checkentry	= secmark_tg_check,
+		.target		= secmark_tg,
 		.targetsize	= sizeof(struct xt_secmark_target_info),
 		.table		= "mangle",
 		.me		= THIS_MODULE,
 	},
 };
 
-static int __init xt_secmark_init(void)
+static int __init secmark_tg_init(void)
 {
-	return xt_register_targets(xt_secmark_target,
-				   ARRAY_SIZE(xt_secmark_target));
+	return xt_register_targets(secmark_tg_reg, ARRAY_SIZE(secmark_tg_reg));
 }
 
-static void __exit xt_secmark_fini(void)
+static void __exit secmark_tg_exit(void)
 {
-	xt_unregister_targets(xt_secmark_target, ARRAY_SIZE(xt_secmark_target));
+	xt_unregister_targets(secmark_tg_reg, ARRAY_SIZE(secmark_tg_reg));
 }
 
-module_init(xt_secmark_init);
-module_exit(xt_secmark_fini);
+module_init(secmark_tg_init);
+module_exit(secmark_tg_exit);
diff --git a/net/netfilter/xt_TCPMSS.c b/net/netfilter/xt_TCPMSS.c
index bf6249e4406..e4ee4bc81ff 100644
--- a/net/netfilter/xt_TCPMSS.c
+++ b/net/netfilter/xt_TCPMSS.c
@@ -135,12 +135,9 @@ tcpmss_mangle_packet(struct sk_buff *skb,
 }
 
 static unsigned int
-xt_tcpmss_target4(struct sk_buff *skb,
-		  const struct net_device *in,
-		  const struct net_device *out,
-		  unsigned int hooknum,
-		  const struct xt_target *target,
-		  const void *targinfo)
+tcpmss_tg4(struct sk_buff *skb, const struct net_device *in,
+           const struct net_device *out, unsigned int hooknum,
+           const struct xt_target *target, const void *targinfo)
 {
 	struct iphdr *iph = ip_hdr(skb);
 	__be16 newlen;
@@ -161,12 +158,9 @@ xt_tcpmss_target4(struct sk_buff *skb,
 
 #if defined(CONFIG_IP6_NF_IPTABLES) || defined(CONFIG_IP6_NF_IPTABLES_MODULE)
 static unsigned int
-xt_tcpmss_target6(struct sk_buff *skb,
-		  const struct net_device *in,
-		  const struct net_device *out,
-		  unsigned int hooknum,
-		  const struct xt_target *target,
-		  const void *targinfo)
+tcpmss_tg6(struct sk_buff *skb, const struct net_device *in,
+           const struct net_device *out, unsigned int hooknum,
+           const struct xt_target *target, const void *targinfo)
 {
 	struct ipv6hdr *ipv6h = ipv6_hdr(skb);
 	u8 nexthdr;
@@ -205,11 +199,9 @@ static inline bool find_syn_match(const struct xt_entry_match *m)
 }
 
 static bool
-xt_tcpmss_checkentry4(const char *tablename,
-		      const void *entry,
-		      const struct xt_target *target,
-		      void *targinfo,
-		      unsigned int hook_mask)
+tcpmss_tg4_check(const char *tablename, const void *entry,
+                 const struct xt_target *target, void *targinfo,
+                 unsigned int hook_mask)
 {
 	const struct xt_tcpmss_info *info = targinfo;
 	const struct ipt_entry *e = entry;
@@ -230,11 +222,9 @@ xt_tcpmss_checkentry4(const char *tablename,
 
 #if defined(CONFIG_IP6_NF_IPTABLES) || defined(CONFIG_IP6_NF_IPTABLES_MODULE)
 static bool
-xt_tcpmss_checkentry6(const char *tablename,
-		      const void *entry,
-		      const struct xt_target *target,
-		      void *targinfo,
-		      unsigned int hook_mask)
+tcpmss_tg6_check(const char *tablename, const void *entry,
+                 const struct xt_target *target, void *targinfo,
+                 unsigned int hook_mask)
 {
 	const struct xt_tcpmss_info *info = targinfo;
 	const struct ip6t_entry *e = entry;
@@ -254,12 +244,12 @@ xt_tcpmss_checkentry6(const char *tablename,
 }
 #endif
 
-static struct xt_target xt_tcpmss_reg[] __read_mostly = {
+static struct xt_target tcpmss_tg_reg[] __read_mostly = {
 	{
 		.family		= AF_INET,
 		.name		= "TCPMSS",
-		.checkentry	= xt_tcpmss_checkentry4,
-		.target		= xt_tcpmss_target4,
+		.checkentry	= tcpmss_tg4_check,
+		.target		= tcpmss_tg4,
 		.targetsize	= sizeof(struct xt_tcpmss_info),
 		.proto		= IPPROTO_TCP,
 		.me		= THIS_MODULE,
@@ -268,8 +258,8 @@ static struct xt_target xt_tcpmss_reg[] __read_mostly = {
 	{
 		.family		= AF_INET6,
 		.name		= "TCPMSS",
-		.checkentry	= xt_tcpmss_checkentry6,
-		.target		= xt_tcpmss_target6,
+		.checkentry	= tcpmss_tg6_check,
+		.target		= tcpmss_tg6,
 		.targetsize	= sizeof(struct xt_tcpmss_info),
 		.proto		= IPPROTO_TCP,
 		.me		= THIS_MODULE,
@@ -277,15 +267,15 @@ static struct xt_target xt_tcpmss_reg[] __read_mostly = {
 #endif
 };
 
-static int __init xt_tcpmss_init(void)
+static int __init tcpmss_tg_init(void)
 {
-	return xt_register_targets(xt_tcpmss_reg, ARRAY_SIZE(xt_tcpmss_reg));
+	return xt_register_targets(tcpmss_tg_reg, ARRAY_SIZE(tcpmss_tg_reg));
 }
 
-static void __exit xt_tcpmss_fini(void)
+static void __exit tcpmss_tg_exit(void)
 {
-	xt_unregister_targets(xt_tcpmss_reg, ARRAY_SIZE(xt_tcpmss_reg));
+	xt_unregister_targets(tcpmss_tg_reg, ARRAY_SIZE(tcpmss_tg_reg));
 }
 
-module_init(xt_tcpmss_init);
-module_exit(xt_tcpmss_fini);
+module_init(tcpmss_tg_init);
+module_exit(tcpmss_tg_exit);
diff --git a/net/netfilter/xt_TRACE.c b/net/netfilter/xt_TRACE.c
index 26c5d08ab2c..219b9d2445c 100644
--- a/net/netfilter/xt_TRACE.c
+++ b/net/netfilter/xt_TRACE.c
@@ -10,44 +10,40 @@ MODULE_ALIAS("ipt_TRACE");
 MODULE_ALIAS("ip6t_TRACE");
 
 static unsigned int
-target(struct sk_buff *skb,
-       const struct net_device *in,
-       const struct net_device *out,
-       unsigned int hooknum,
-       const struct xt_target *target,
-       const void *targinfo)
+trace_tg(struct sk_buff *skb, const struct net_device *in,
+         const struct net_device *out, unsigned int hooknum,
+         const struct xt_target *target, const void *targinfo)
 {
 	skb->nf_trace = 1;
 	return XT_CONTINUE;
 }
 
-static struct xt_target xt_trace_target[] __read_mostly = {
+static struct xt_target trace_tg_reg[] __read_mostly = {
 	{
 		.name		= "TRACE",
 		.family		= AF_INET,
-		.target		= target,
+		.target		= trace_tg,
 		.table		= "raw",
 		.me		= THIS_MODULE,
 	},
 	{
 		.name		= "TRACE",
 		.family		= AF_INET6,
-		.target		= target,
+		.target		= trace_tg,
 		.table		= "raw",
 		.me		= THIS_MODULE,
 	},
 };
 
-static int __init xt_trace_init(void)
+static int __init trace_tg_init(void)
 {
-	return xt_register_targets(xt_trace_target,
-				   ARRAY_SIZE(xt_trace_target));
+	return xt_register_targets(trace_tg_reg, ARRAY_SIZE(trace_tg_reg));
 }
 
-static void __exit xt_trace_fini(void)
+static void __exit trace_tg_exit(void)
 {
-	xt_unregister_targets(xt_trace_target, ARRAY_SIZE(xt_trace_target));
+	xt_unregister_targets(trace_tg_reg, ARRAY_SIZE(trace_tg_reg));
 }
 
-module_init(xt_trace_init);
-module_exit(xt_trace_fini);
+module_init(trace_tg_init);
+module_exit(trace_tg_exit);
diff --git a/net/netfilter/xt_comment.c b/net/netfilter/xt_comment.c
index 64bcdb0fe1e..4539d435ec7 100644
--- a/net/netfilter/xt_comment.c
+++ b/net/netfilter/xt_comment.c
@@ -16,46 +16,41 @@ MODULE_ALIAS("ipt_comment");
 MODULE_ALIAS("ip6t_comment");
 
 static bool
-match(const struct sk_buff *skb,
-      const struct net_device *in,
-      const struct net_device *out,
-      const struct xt_match *match,
-      const void *matchinfo,
-      int offset,
-      unsigned int protooff,
-      bool *hotdrop)
+comment_mt(const struct sk_buff *skb, const struct net_device *in,
+           const struct net_device *out, const struct xt_match *match,
+           const void *matchinfo, int offset, unsigned int protooff,
+           bool *hotdrop)
 {
 	/* We always match */
 	return true;
 }
 
-static struct xt_match xt_comment_match[] __read_mostly = {
+static struct xt_match comment_mt_reg[] __read_mostly = {
 	{
 		.name		= "comment",
 		.family		= AF_INET,
-		.match		= match,
+		.match		= comment_mt,
 		.matchsize	= sizeof(struct xt_comment_info),
 		.me		= THIS_MODULE
 	},
 	{
 		.name		= "comment",
 		.family		= AF_INET6,
-		.match		= match,
+		.match		= comment_mt,
 		.matchsize	= sizeof(struct xt_comment_info),
 		.me		= THIS_MODULE
 	},
 };
 
-static int __init xt_comment_init(void)
+static int __init comment_mt_init(void)
 {
-	return xt_register_matches(xt_comment_match,
-				   ARRAY_SIZE(xt_comment_match));
+	return xt_register_matches(comment_mt_reg, ARRAY_SIZE(comment_mt_reg));
 }
 
-static void __exit xt_comment_fini(void)
+static void __exit comment_mt_exit(void)
 {
-	xt_unregister_matches(xt_comment_match, ARRAY_SIZE(xt_comment_match));
+	xt_unregister_matches(comment_mt_reg, ARRAY_SIZE(comment_mt_reg));
 }
 
-module_init(xt_comment_init);
-module_exit(xt_comment_fini);
+module_init(comment_mt_init);
+module_exit(comment_mt_exit);
diff --git a/net/netfilter/xt_connbytes.c b/net/netfilter/xt_connbytes.c
index 9ec50139b9a..752b7d8bbc9 100644
--- a/net/netfilter/xt_connbytes.c
+++ b/net/netfilter/xt_connbytes.c
@@ -17,14 +17,10 @@ MODULE_ALIAS("ipt_connbytes");
 MODULE_ALIAS("ip6t_connbytes");
 
 static bool
-match(const struct sk_buff *skb,
-      const struct net_device *in,
-      const struct net_device *out,
-      const struct xt_match *match,
-      const void *matchinfo,
-      int offset,
-      unsigned int protoff,
-      bool *hotdrop)
+connbytes_mt(const struct sk_buff *skb, const struct net_device *in,
+             const struct net_device *out, const struct xt_match *match,
+             const void *matchinfo, int offset, unsigned int protoff,
+             bool *hotdrop)
 {
 	const struct xt_connbytes_info *sinfo = matchinfo;
 	const struct nf_conn *ct;
@@ -96,11 +92,10 @@ match(const struct sk_buff *skb,
 		return what >= sinfo->count.from;
 }
 
-static bool check(const char *tablename,
-		  const void *ip,
-		  const struct xt_match *match,
-		  void *matchinfo,
-		  unsigned int hook_mask)
+static bool
+connbytes_mt_check(const char *tablename, const void *ip,
+                   const struct xt_match *match, void *matchinfo,
+                   unsigned int hook_mask)
 {
 	const struct xt_connbytes_info *sinfo = matchinfo;
 
@@ -124,43 +119,42 @@ static bool check(const char *tablename,
 }
 
 static void
-destroy(const struct xt_match *match, void *matchinfo)
+connbytes_mt_destroy(const struct xt_match *match, void *matchinfo)
 {
 	nf_ct_l3proto_module_put(match->family);
 }
 
-static struct xt_match xt_connbytes_match[] __read_mostly = {
+static struct xt_match connbytes_mt_reg[] __read_mostly = {
 	{
 		.name		= "connbytes",
 		.family		= AF_INET,
-		.checkentry	= check,
-		.match		= match,
-		.destroy	= destroy,
+		.checkentry	= connbytes_mt_check,
+		.match		= connbytes_mt,
+		.destroy	= connbytes_mt_destroy,
 		.matchsize	= sizeof(struct xt_connbytes_info),
 		.me		= THIS_MODULE
 	},
 	{
 		.name		= "connbytes",
 		.family		= AF_INET6,
-		.checkentry	= check,
-		.match		= match,
-		.destroy	= destroy,
+		.checkentry	= connbytes_mt_check,
+		.match		= connbytes_mt,
+		.destroy	= connbytes_mt_destroy,
 		.matchsize	= sizeof(struct xt_connbytes_info),
 		.me		= THIS_MODULE
 	},
 };
 
-static int __init xt_connbytes_init(void)
+static int __init connbytes_mt_init(void)
 {
-	return xt_register_matches(xt_connbytes_match,
-				   ARRAY_SIZE(xt_connbytes_match));
+	return xt_register_matches(connbytes_mt_reg,
+	       ARRAY_SIZE(connbytes_mt_reg));
 }
 
-static void __exit xt_connbytes_fini(void)
+static void __exit connbytes_mt_exit(void)
 {
-	xt_unregister_matches(xt_connbytes_match,
-			      ARRAY_SIZE(xt_connbytes_match));
+	xt_unregister_matches(connbytes_mt_reg, ARRAY_SIZE(connbytes_mt_reg));
 }
 
-module_init(xt_connbytes_init);
-module_exit(xt_connbytes_fini);
+module_init(connbytes_mt_init);
+module_exit(connbytes_mt_exit);
diff --git a/net/netfilter/xt_connlimit.c b/net/netfilter/xt_connlimit.c
index d7becf08a93..26d12b00a9c 100644
--- a/net/netfilter/xt_connlimit.c
+++ b/net/netfilter/xt_connlimit.c
@@ -54,7 +54,7 @@ static inline unsigned int connlimit_iphash(__be32 addr)
 
 static inline unsigned int
 connlimit_iphash6(const union nf_conntrack_address *addr,
-		  const union nf_conntrack_address *mask)
+                  const union nf_conntrack_address *mask)
 {
 	union nf_conntrack_address res;
 	unsigned int i;
@@ -178,12 +178,11 @@ static int count_them(struct xt_connlimit_data *data,
 	return matches;
 }
 
-static bool connlimit_match(const struct sk_buff *skb,
-			    const struct net_device *in,
-			    const struct net_device *out,
-			    const struct xt_match *match,
-			    const void *matchinfo, int offset,
-			    unsigned int protoff, bool *hotdrop)
+static bool
+connlimit_mt(const struct sk_buff *skb, const struct net_device *in,
+             const struct net_device *out, const struct xt_match *match,
+             const void *matchinfo, int offset, unsigned int protoff,
+             bool *hotdrop)
 {
 	const struct xt_connlimit_info *info = matchinfo;
 	union nf_conntrack_address addr, mask;
@@ -227,9 +226,10 @@ static bool connlimit_match(const struct sk_buff *skb,
 	return false;
 }
 
-static bool connlimit_check(const char *tablename, const void *ip,
-			    const struct xt_match *match, void *matchinfo,
-			    unsigned int hook_mask)
+static bool
+connlimit_mt_check(const char *tablename, const void *ip,
+                   const struct xt_match *match, void *matchinfo,
+                   unsigned int hook_mask)
 {
 	struct xt_connlimit_info *info = matchinfo;
 	unsigned int i;
@@ -254,7 +254,8 @@ static bool connlimit_check(const char *tablename, const void *ip,
 	return true;
 }
 
-static void connlimit_destroy(const struct xt_match *match, void *matchinfo)
+static void
+connlimit_mt_destroy(const struct xt_match *match, void *matchinfo)
 {
 	struct xt_connlimit_info *info = matchinfo;
 	struct xt_connlimit_conn *conn;
@@ -274,39 +275,40 @@ static void connlimit_destroy(const struct xt_match *match, void *matchinfo)
 	kfree(info->data);
 }
 
-static struct xt_match connlimit_reg[] __read_mostly = {
+static struct xt_match connlimit_mt_reg[] __read_mostly = {
 	{
 		.name       = "connlimit",
 		.family     = AF_INET,
-		.checkentry = connlimit_check,
-		.match      = connlimit_match,
+		.checkentry = connlimit_mt_check,
+		.match      = connlimit_mt,
 		.matchsize  = sizeof(struct xt_connlimit_info),
-		.destroy    = connlimit_destroy,
+		.destroy    = connlimit_mt_destroy,
 		.me         = THIS_MODULE,
 	},
 	{
 		.name       = "connlimit",
 		.family     = AF_INET6,
-		.checkentry = connlimit_check,
-		.match      = connlimit_match,
+		.checkentry = connlimit_mt_check,
+		.match      = connlimit_mt,
 		.matchsize  = sizeof(struct xt_connlimit_info),
-		.destroy    = connlimit_destroy,
+		.destroy    = connlimit_mt_destroy,
 		.me         = THIS_MODULE,
 	},
 };
 
-static int __init xt_connlimit_init(void)
+static int __init connlimit_mt_init(void)
 {
-	return xt_register_matches(connlimit_reg, ARRAY_SIZE(connlimit_reg));
+	return xt_register_matches(connlimit_mt_reg,
+	       ARRAY_SIZE(connlimit_mt_reg));
 }
 
-static void __exit xt_connlimit_exit(void)
+static void __exit connlimit_mt_exit(void)
 {
-	xt_unregister_matches(connlimit_reg, ARRAY_SIZE(connlimit_reg));
+	xt_unregister_matches(connlimit_mt_reg, ARRAY_SIZE(connlimit_mt_reg));
 }
 
-module_init(xt_connlimit_init);
-module_exit(xt_connlimit_exit);
+module_init(connlimit_mt_init);
+module_exit(connlimit_mt_exit);
 MODULE_AUTHOR("Jan Engelhardt <jengelh@computergmbh.de>");
 MODULE_DESCRIPTION("netfilter xt_connlimit match module");
 MODULE_LICENSE("GPL");
diff --git a/net/netfilter/xt_connmark.c b/net/netfilter/xt_connmark.c
index 9f67920af41..7e0874af318 100644
--- a/net/netfilter/xt_connmark.c
+++ b/net/netfilter/xt_connmark.c
@@ -32,14 +32,10 @@ MODULE_ALIAS("ipt_connmark");
 MODULE_ALIAS("ip6t_connmark");
 
 static bool
-match(const struct sk_buff *skb,
-      const struct net_device *in,
-      const struct net_device *out,
-      const struct xt_match *match,
-      const void *matchinfo,
-      int offset,
-      unsigned int protoff,
-      bool *hotdrop)
+connmark_mt(const struct sk_buff *skb, const struct net_device *in,
+            const struct net_device *out, const struct xt_match *match,
+            const void *matchinfo, int offset, unsigned int protoff,
+            bool *hotdrop)
 {
 	const struct xt_connmark_info *info = matchinfo;
 	const struct nf_conn *ct;
@@ -53,11 +49,9 @@ match(const struct sk_buff *skb,
 }
 
 static bool
-checkentry(const char *tablename,
-	   const void *ip,
-	   const struct xt_match *match,
-	   void *matchinfo,
-	   unsigned int hook_mask)
+connmark_mt_check(const char *tablename, const void *ip,
+                  const struct xt_match *match, void *matchinfo,
+                  unsigned int hook_mask)
 {
 	const struct xt_connmark_info *cm = matchinfo;
 
@@ -74,7 +68,7 @@ checkentry(const char *tablename,
 }
 
 static void
-destroy(const struct xt_match *match, void *matchinfo)
+connmark_mt_destroy(const struct xt_match *match, void *matchinfo)
 {
 	nf_ct_l3proto_module_put(match->family);
 }
@@ -87,7 +81,7 @@ struct compat_xt_connmark_info {
 	u_int16_t	__pad2;
 };
 
-static void compat_from_user(void *dst, void *src)
+static void connmark_mt_compat_from_user(void *dst, void *src)
 {
 	const struct compat_xt_connmark_info *cm = src;
 	struct xt_connmark_info m = {
@@ -98,7 +92,7 @@ static void compat_from_user(void *dst, void *src)
 	memcpy(dst, &m, sizeof(m));
 }
 
-static int compat_to_user(void __user *dst, void *src)
+static int connmark_mt_compat_to_user(void __user *dst, void *src)
 {
 	const struct xt_connmark_info *m = src;
 	struct compat_xt_connmark_info cm = {
@@ -110,42 +104,42 @@ static int compat_to_user(void __user *dst, void *src)
 }
 #endif /* CONFIG_COMPAT */
 
-static struct xt_match xt_connmark_match[] __read_mostly = {
+static struct xt_match connmark_mt_reg[] __read_mostly = {
 	{
 		.name		= "connmark",
 		.family		= AF_INET,
-		.checkentry	= checkentry,
-		.match		= match,
-		.destroy	= destroy,
+		.checkentry	= connmark_mt_check,
+		.match		= connmark_mt,
+		.destroy	= connmark_mt_destroy,
 		.matchsize	= sizeof(struct xt_connmark_info),
 #ifdef CONFIG_COMPAT
 		.compatsize	= sizeof(struct compat_xt_connmark_info),
-		.compat_from_user = compat_from_user,
-		.compat_to_user	= compat_to_user,
+		.compat_from_user = connmark_mt_compat_from_user,
+		.compat_to_user	= connmark_mt_compat_to_user,
 #endif
 		.me		= THIS_MODULE
 	},
 	{
 		.name		= "connmark",
 		.family		= AF_INET6,
-		.checkentry	= checkentry,
-		.match		= match,
-		.destroy	= destroy,
+		.checkentry	= connmark_mt_check,
+		.match		= connmark_mt,
+		.destroy	= connmark_mt_destroy,
 		.matchsize	= sizeof(struct xt_connmark_info),
 		.me		= THIS_MODULE
 	},
 };
 
-static int __init xt_connmark_init(void)
+static int __init connmark_mt_init(void)
 {
-	return xt_register_matches(xt_connmark_match,
-				   ARRAY_SIZE(xt_connmark_match));
+	return xt_register_matches(connmark_mt_reg,
+	       ARRAY_SIZE(connmark_mt_reg));
 }
 
-static void __exit xt_connmark_fini(void)
+static void __exit connmark_mt_exit(void)
 {
-	xt_unregister_matches(xt_connmark_match, ARRAY_SIZE(xt_connmark_match));
+	xt_unregister_matches(connmark_mt_reg, ARRAY_SIZE(connmark_mt_reg));
 }
 
-module_init(xt_connmark_init);
-module_exit(xt_connmark_fini);
+module_init(connmark_mt_init);
+module_exit(connmark_mt_exit);
diff --git a/net/netfilter/xt_conntrack.c b/net/netfilter/xt_conntrack.c
index ca4b69f020a..eb7e135ca6c 100644
--- a/net/netfilter/xt_conntrack.c
+++ b/net/netfilter/xt_conntrack.c
@@ -20,14 +20,10 @@ MODULE_DESCRIPTION("iptables connection tracking match module");
 MODULE_ALIAS("ipt_conntrack");
 
 static bool
-match(const struct sk_buff *skb,
-      const struct net_device *in,
-      const struct net_device *out,
-      const struct xt_match *match,
-      const void *matchinfo,
-      int offset,
-      unsigned int protoff,
-      bool *hotdrop)
+conntrack_mt(const struct sk_buff *skb, const struct net_device *in,
+             const struct net_device *out, const struct xt_match *match,
+             const void *matchinfo, int offset, unsigned int protoff,
+             bool *hotdrop)
 {
 	const struct xt_conntrack_info *sinfo = matchinfo;
 	const struct nf_conn *ct;
@@ -115,11 +111,9 @@ match(const struct sk_buff *skb,
 }
 
 static bool
-checkentry(const char *tablename,
-	   const void *ip,
-	   const struct xt_match *match,
-	   void *matchinfo,
-	   unsigned int hook_mask)
+conntrack_mt_check(const char *tablename, const void *ip,
+                   const struct xt_match *match, void *matchinfo,
+                   unsigned int hook_mask)
 {
 	if (nf_ct_l3proto_try_module_get(match->family) < 0) {
 		printk(KERN_WARNING "can't load conntrack support for "
@@ -129,7 +123,8 @@ checkentry(const char *tablename,
 	return true;
 }
 
-static void destroy(const struct xt_match *match, void *matchinfo)
+static void
+conntrack_mt_destroy(const struct xt_match *match, void *matchinfo)
 {
 	nf_ct_l3proto_module_put(match->family);
 }
@@ -148,7 +143,7 @@ struct compat_xt_conntrack_info
 	u_int8_t			invflags;
 };
 
-static void compat_from_user(void *dst, void *src)
+static void conntrack_mt_compat_from_user(void *dst, void *src)
 {
 	const struct compat_xt_conntrack_info *cm = src;
 	struct xt_conntrack_info m = {
@@ -165,7 +160,7 @@ static void compat_from_user(void *dst, void *src)
 	memcpy(dst, &m, sizeof(m));
 }
 
-static int compat_to_user(void __user *dst, void *src)
+static int conntrack_mt_compat_to_user(void __user *dst, void *src)
 {
 	const struct xt_conntrack_info *m = src;
 	struct compat_xt_conntrack_info cm = {
@@ -183,30 +178,30 @@ static int compat_to_user(void __user *dst, void *src)
 }
 #endif
 
-static struct xt_match conntrack_match __read_mostly = {
+static struct xt_match conntrack_mt_reg __read_mostly = {
 	.name		= "conntrack",
-	.match		= match,
-	.checkentry	= checkentry,
-	.destroy	= destroy,
+	.match		= conntrack_mt,
+	.checkentry	= conntrack_mt_check,
+	.destroy	= conntrack_mt_destroy,
 	.matchsize	= sizeof(struct xt_conntrack_info),
 #ifdef CONFIG_COMPAT
 	.compatsize	= sizeof(struct compat_xt_conntrack_info),
-	.compat_from_user = compat_from_user,
-	.compat_to_user	= compat_to_user,
+	.compat_from_user = conntrack_mt_compat_from_user,
+	.compat_to_user	= conntrack_mt_compat_to_user,
 #endif
 	.family		= AF_INET,
 	.me		= THIS_MODULE,
 };
 
-static int __init xt_conntrack_init(void)
+static int __init conntrack_mt_init(void)
 {
-	return xt_register_match(&conntrack_match);
+	return xt_register_match(&conntrack_mt_reg);
 }
 
-static void __exit xt_conntrack_fini(void)
+static void __exit conntrack_mt_exit(void)
 {
-	xt_unregister_match(&conntrack_match);
+	xt_unregister_match(&conntrack_mt_reg);
 }
 
-module_init(xt_conntrack_init);
-module_exit(xt_conntrack_fini);
+module_init(conntrack_mt_init);
+module_exit(conntrack_mt_exit);
diff --git a/net/netfilter/xt_dccp.c b/net/netfilter/xt_dccp.c
index c2b1b24ee33..ab2f7e9f75a 100644
--- a/net/netfilter/xt_dccp.c
+++ b/net/netfilter/xt_dccp.c
@@ -93,14 +93,9 @@ match_option(u_int8_t option, const struct sk_buff *skb, unsigned int protoff,
 }
 
 static bool
-match(const struct sk_buff *skb,
-      const struct net_device *in,
-      const struct net_device *out,
-      const struct xt_match *match,
-      const void *matchinfo,
-      int offset,
-      unsigned int protoff,
-      bool *hotdrop)
+dccp_mt(const struct sk_buff *skb, const struct net_device *in,
+        const struct net_device *out, const struct xt_match *match,
+        const void *matchinfo, int offset, unsigned int protoff, bool *hotdrop)
 {
 	const struct xt_dccp_info *info = matchinfo;
 	struct dccp_hdr _dh, *dh;
@@ -128,11 +123,9 @@ match(const struct sk_buff *skb,
 }
 
 static bool
-checkentry(const char *tablename,
-	   const void *inf,
-	   const struct xt_match *match,
-	   void *matchinfo,
-	   unsigned int hook_mask)
+dccp_mt_check(const char *tablename, const void *inf,
+              const struct xt_match *match, void *matchinfo,
+              unsigned int hook_mask)
 {
 	const struct xt_dccp_info *info = matchinfo;
 
@@ -141,12 +134,12 @@ checkentry(const char *tablename,
 		&& !(info->invflags & ~info->flags);
 }
 
-static struct xt_match xt_dccp_match[] __read_mostly = {
+static struct xt_match dccp_mt_reg[] __read_mostly = {
 	{
 		.name 		= "dccp",
 		.family		= AF_INET,
-		.checkentry	= checkentry,
-		.match		= match,
+		.checkentry	= dccp_mt_check,
+		.match		= dccp_mt,
 		.matchsize	= sizeof(struct xt_dccp_info),
 		.proto		= IPPROTO_DCCP,
 		.me 		= THIS_MODULE,
@@ -154,15 +147,15 @@ static struct xt_match xt_dccp_match[] __read_mostly = {
 	{
 		.name 		= "dccp",
 		.family		= AF_INET6,
-		.checkentry	= checkentry,
-		.match		= match,
+		.checkentry	= dccp_mt_check,
+		.match		= dccp_mt,
 		.matchsize	= sizeof(struct xt_dccp_info),
 		.proto		= IPPROTO_DCCP,
 		.me 		= THIS_MODULE,
 	},
 };
 
-static int __init xt_dccp_init(void)
+static int __init dccp_mt_init(void)
 {
 	int ret;
 
@@ -172,7 +165,7 @@ static int __init xt_dccp_init(void)
 	dccp_optbuf = kmalloc(256 * 4, GFP_KERNEL);
 	if (!dccp_optbuf)
 		return -ENOMEM;
-	ret = xt_register_matches(xt_dccp_match, ARRAY_SIZE(xt_dccp_match));
+	ret = xt_register_matches(dccp_mt_reg, ARRAY_SIZE(dccp_mt_reg));
 	if (ret)
 		goto out_kfree;
 	return ret;
@@ -182,11 +175,11 @@ out_kfree:
 	return ret;
 }
 
-static void __exit xt_dccp_fini(void)
+static void __exit dccp_mt_exit(void)
 {
-	xt_unregister_matches(xt_dccp_match, ARRAY_SIZE(xt_dccp_match));
+	xt_unregister_matches(dccp_mt_reg, ARRAY_SIZE(dccp_mt_reg));
 	kfree(dccp_optbuf);
 }
 
-module_init(xt_dccp_init);
-module_exit(xt_dccp_fini);
+module_init(dccp_mt_init);
+module_exit(dccp_mt_exit);
diff --git a/net/netfilter/xt_dscp.c b/net/netfilter/xt_dscp.c
index dde6d66e0d3..63f7354ca9a 100644
--- a/net/netfilter/xt_dscp.c
+++ b/net/netfilter/xt_dscp.c
@@ -22,14 +22,10 @@ MODULE_LICENSE("GPL");
 MODULE_ALIAS("ipt_dscp");
 MODULE_ALIAS("ip6t_dscp");
 
-static bool match(const struct sk_buff *skb,
-		  const struct net_device *in,
-		  const struct net_device *out,
-		  const struct xt_match *match,
-		  const void *matchinfo,
-		  int offset,
-		  unsigned int protoff,
-		  bool *hotdrop)
+static bool
+dscp_mt(const struct sk_buff *skb, const struct net_device *in,
+        const struct net_device *out, const struct xt_match *match,
+        const void *matchinfo, int offset, unsigned int protoff, bool *hotdrop)
 {
 	const struct xt_dscp_info *info = matchinfo;
 	u_int8_t dscp = ipv4_get_dsfield(ip_hdr(skb)) >> XT_DSCP_SHIFT;
@@ -37,14 +33,11 @@ static bool match(const struct sk_buff *skb,
 	return (dscp == info->dscp) ^ !!info->invert;
 }
 
-static bool match6(const struct sk_buff *skb,
-		   const struct net_device *in,
-		   const struct net_device *out,
-		   const struct xt_match *match,
-		   const void *matchinfo,
-		   int offset,
-		   unsigned int protoff,
-		   bool *hotdrop)
+static bool
+dscp_mt6(const struct sk_buff *skb, const struct net_device *in,
+         const struct net_device *out, const struct xt_match *match,
+         const void *matchinfo, int offset, unsigned int protoff,
+         bool *hotdrop)
 {
 	const struct xt_dscp_info *info = matchinfo;
 	u_int8_t dscp = ipv6_get_dsfield(ipv6_hdr(skb)) >> XT_DSCP_SHIFT;
@@ -52,11 +45,10 @@ static bool match6(const struct sk_buff *skb,
 	return (dscp == info->dscp) ^ !!info->invert;
 }
 
-static bool checkentry(const char *tablename,
-		       const void *info,
-		       const struct xt_match *match,
-		       void *matchinfo,
-		       unsigned int hook_mask)
+static bool
+dscp_mt_check(const char *tablename, const void *info,
+              const struct xt_match *match, void *matchinfo,
+              unsigned int hook_mask)
 {
 	const u_int8_t dscp = ((struct xt_dscp_info *)matchinfo)->dscp;
 
@@ -68,34 +60,34 @@ static bool checkentry(const char *tablename,
 	return true;
 }
 
-static struct xt_match xt_dscp_match[] __read_mostly = {
+static struct xt_match dscp_mt_reg[] __read_mostly = {
 	{
 		.name		= "dscp",
 		.family		= AF_INET,
-		.checkentry	= checkentry,
-		.match		= match,
+		.checkentry	= dscp_mt_check,
+		.match		= dscp_mt,
 		.matchsize	= sizeof(struct xt_dscp_info),
 		.me		= THIS_MODULE,
 	},
 	{
 		.name		= "dscp",
 		.family		= AF_INET6,
-		.checkentry	= checkentry,
-		.match		= match6,
+		.checkentry	= dscp_mt_check,
+		.match		= dscp_mt6,
 		.matchsize	= sizeof(struct xt_dscp_info),
 		.me		= THIS_MODULE,
 	},
 };
 
-static int __init xt_dscp_match_init(void)
+static int __init dscp_mt_init(void)
 {
-	return xt_register_matches(xt_dscp_match, ARRAY_SIZE(xt_dscp_match));
+	return xt_register_matches(dscp_mt_reg, ARRAY_SIZE(dscp_mt_reg));
 }
 
-static void __exit xt_dscp_match_fini(void)
+static void __exit dscp_mt_exit(void)
 {
-	xt_unregister_matches(xt_dscp_match, ARRAY_SIZE(xt_dscp_match));
+	xt_unregister_matches(dscp_mt_reg, ARRAY_SIZE(dscp_mt_reg));
 }
 
-module_init(xt_dscp_match_init);
-module_exit(xt_dscp_match_fini);
+module_init(dscp_mt_init);
+module_exit(dscp_mt_exit);
diff --git a/net/netfilter/xt_esp.c b/net/netfilter/xt_esp.c
index b11378e001b..d7c90ac393d 100644
--- a/net/netfilter/xt_esp.c
+++ b/net/netfilter/xt_esp.c
@@ -43,14 +43,9 @@ spi_match(u_int32_t min, u_int32_t max, u_int32_t spi, bool invert)
 }
 
 static bool
-match(const struct sk_buff *skb,
-      const struct net_device *in,
-      const struct net_device *out,
-      const struct xt_match *match,
-      const void *matchinfo,
-      int offset,
-      unsigned int protoff,
-      bool *hotdrop)
+esp_mt(const struct sk_buff *skb, const struct net_device *in,
+       const struct net_device *out, const struct xt_match *match,
+       const void *matchinfo, int offset, unsigned int protoff, bool *hotdrop)
 {
 	struct ip_esp_hdr _esp, *eh;
 	const struct xt_esp *espinfo = matchinfo;
@@ -75,11 +70,9 @@ match(const struct sk_buff *skb,
 
 /* Called when user tries to insert an entry of this type. */
 static bool
-checkentry(const char *tablename,
-	   const void *ip_void,
-	   const struct xt_match *match,
-	   void *matchinfo,
-	   unsigned int hook_mask)
+esp_mt_check(const char *tablename, const void *ip_void,
+             const struct xt_match *match, void *matchinfo,
+             unsigned int hook_mask)
 {
 	const struct xt_esp *espinfo = matchinfo;
 
@@ -91,12 +84,12 @@ checkentry(const char *tablename,
 	return true;
 }
 
-static struct xt_match xt_esp_match[] __read_mostly = {
+static struct xt_match esp_mt_reg[] __read_mostly = {
 	{
 		.name		= "esp",
 		.family		= AF_INET,
-		.checkentry	= checkentry,
-		.match		= match,
+		.checkentry	= esp_mt_check,
+		.match		= esp_mt,
 		.matchsize	= sizeof(struct xt_esp),
 		.proto		= IPPROTO_ESP,
 		.me		= THIS_MODULE,
@@ -104,23 +97,23 @@ static struct xt_match xt_esp_match[] __read_mostly = {
 	{
 		.name		= "esp",
 		.family		= AF_INET6,
-		.checkentry	= checkentry,
-		.match		= match,
+		.checkentry	= esp_mt_check,
+		.match		= esp_mt,
 		.matchsize	= sizeof(struct xt_esp),
 		.proto		= IPPROTO_ESP,
 		.me		= THIS_MODULE,
 	},
 };
 
-static int __init xt_esp_init(void)
+static int __init esp_mt_init(void)
 {
-	return xt_register_matches(xt_esp_match, ARRAY_SIZE(xt_esp_match));
+	return xt_register_matches(esp_mt_reg, ARRAY_SIZE(esp_mt_reg));
 }
 
-static void __exit xt_esp_cleanup(void)
+static void __exit esp_mt_exit(void)
 {
-	xt_unregister_matches(xt_esp_match, ARRAY_SIZE(xt_esp_match));
+	xt_unregister_matches(esp_mt_reg, ARRAY_SIZE(esp_mt_reg));
 }
 
-module_init(xt_esp_init);
-module_exit(xt_esp_cleanup);
+module_init(esp_mt_init);
+module_exit(esp_mt_exit);
diff --git a/net/netfilter/xt_hashlimit.c b/net/netfilter/xt_hashlimit.c
index 2ef44d8560c..49a9e691dc1 100644
--- a/net/netfilter/xt_hashlimit.c
+++ b/net/netfilter/xt_hashlimit.c
@@ -441,14 +441,10 @@ hashlimit_init_dst(const struct xt_hashlimit_htable *hinfo,
 }
 
 static bool
-hashlimit_match(const struct sk_buff *skb,
-		const struct net_device *in,
-		const struct net_device *out,
-		const struct xt_match *match,
-		const void *matchinfo,
-		int offset,
-		unsigned int protoff,
-		bool *hotdrop)
+hashlimit_mt(const struct sk_buff *skb, const struct net_device *in,
+             const struct net_device *out, const struct xt_match *match,
+             const void *matchinfo, int offset, unsigned int protoff,
+             bool *hotdrop)
 {
 	const struct xt_hashlimit_info *r =
 		((const struct xt_hashlimit_info *)matchinfo)->u.master;
@@ -500,11 +496,9 @@ hotdrop:
 }
 
 static bool
-hashlimit_checkentry(const char *tablename,
-		     const void *inf,
-		     const struct xt_match *match,
-		     void *matchinfo,
-		     unsigned int hook_mask)
+hashlimit_mt_check(const char *tablename, const void *inf,
+                   const struct xt_match *match, void *matchinfo,
+                   unsigned int hook_mask)
 {
 	struct xt_hashlimit_info *r = matchinfo;
 
@@ -548,7 +542,7 @@ hashlimit_checkentry(const char *tablename,
 }
 
 static void
-hashlimit_destroy(const struct xt_match *match, void *matchinfo)
+hashlimit_mt_destroy(const struct xt_match *match, void *matchinfo)
 {
 	const struct xt_hashlimit_info *r = matchinfo;
 
@@ -563,7 +557,7 @@ struct compat_xt_hashlimit_info {
 	compat_uptr_t master;
 };
 
-static void compat_from_user(void *dst, void *src)
+static void hashlimit_mt_compat_from_user(void *dst, void *src)
 {
 	int off = offsetof(struct compat_xt_hashlimit_info, hinfo);
 
@@ -571,7 +565,7 @@ static void compat_from_user(void *dst, void *src)
 	memset(dst + off, 0, sizeof(struct compat_xt_hashlimit_info) - off);
 }
 
-static int compat_to_user(void __user *dst, void *src)
+static int hashlimit_mt_compat_to_user(void __user *dst, void *src)
 {
 	int off = offsetof(struct compat_xt_hashlimit_info, hinfo);
 
@@ -579,33 +573,33 @@ static int compat_to_user(void __user *dst, void *src)
 }
 #endif
 
-static struct xt_match xt_hashlimit[] __read_mostly = {
+static struct xt_match hashlimit_mt_reg[] __read_mostly = {
 	{
 		.name		= "hashlimit",
 		.family		= AF_INET,
-		.match		= hashlimit_match,
+		.match		= hashlimit_mt,
 		.matchsize	= sizeof(struct xt_hashlimit_info),
 #ifdef CONFIG_COMPAT
 		.compatsize	= sizeof(struct compat_xt_hashlimit_info),
-		.compat_from_user = compat_from_user,
-		.compat_to_user	= compat_to_user,
+		.compat_from_user = hashlimit_mt_compat_from_user,
+		.compat_to_user	= hashlimit_mt_compat_to_user,
 #endif
-		.checkentry	= hashlimit_checkentry,
-		.destroy	= hashlimit_destroy,
+		.checkentry	= hashlimit_mt_check,
+		.destroy	= hashlimit_mt_destroy,
 		.me		= THIS_MODULE
 	},
 	{
 		.name		= "hashlimit",
 		.family		= AF_INET6,
-		.match		= hashlimit_match,
+		.match		= hashlimit_mt,
 		.matchsize	= sizeof(struct xt_hashlimit_info),
 #ifdef CONFIG_COMPAT
 		.compatsize	= sizeof(struct compat_xt_hashlimit_info),
-		.compat_from_user = compat_from_user,
-		.compat_to_user	= compat_to_user,
+		.compat_from_user = hashlimit_mt_compat_from_user,
+		.compat_to_user	= hashlimit_mt_compat_to_user,
 #endif
-		.checkentry	= hashlimit_checkentry,
-		.destroy	= hashlimit_destroy,
+		.checkentry	= hashlimit_mt_check,
+		.destroy	= hashlimit_mt_destroy,
 		.me		= THIS_MODULE
 	},
 };
@@ -728,11 +722,12 @@ static const struct file_operations dl_file_ops = {
 	.release = seq_release
 };
 
-static int __init xt_hashlimit_init(void)
+static int __init hashlimit_mt_init(void)
 {
 	int err;
 
-	err = xt_register_matches(xt_hashlimit, ARRAY_SIZE(xt_hashlimit));
+	err = xt_register_matches(hashlimit_mt_reg,
+	      ARRAY_SIZE(hashlimit_mt_reg));
 	if (err < 0)
 		goto err1;
 
@@ -762,19 +757,19 @@ err4:
 err3:
 	kmem_cache_destroy(hashlimit_cachep);
 err2:
-	xt_unregister_matches(xt_hashlimit, ARRAY_SIZE(xt_hashlimit));
+	xt_unregister_matches(hashlimit_mt_reg, ARRAY_SIZE(hashlimit_mt_reg));
 err1:
 	return err;
 
 }
 
-static void __exit xt_hashlimit_fini(void)
+static void __exit hashlimit_mt_exit(void)
 {
 	remove_proc_entry("ipt_hashlimit", init_net.proc_net);
 	remove_proc_entry("ip6t_hashlimit", init_net.proc_net);
 	kmem_cache_destroy(hashlimit_cachep);
-	xt_unregister_matches(xt_hashlimit, ARRAY_SIZE(xt_hashlimit));
+	xt_unregister_matches(hashlimit_mt_reg, ARRAY_SIZE(hashlimit_mt_reg));
 }
 
-module_init(xt_hashlimit_init);
-module_exit(xt_hashlimit_fini);
+module_init(hashlimit_mt_init);
+module_exit(hashlimit_mt_exit);
diff --git a/net/netfilter/xt_helper.c b/net/netfilter/xt_helper.c
index d842c4a6d63..f342788a576 100644
--- a/net/netfilter/xt_helper.c
+++ b/net/netfilter/xt_helper.c
@@ -24,14 +24,10 @@ MODULE_ALIAS("ip6t_helper");
 
 
 static bool
-match(const struct sk_buff *skb,
-      const struct net_device *in,
-      const struct net_device *out,
-      const struct xt_match *match,
-      const void *matchinfo,
-      int offset,
-      unsigned int protoff,
-      bool *hotdrop)
+helper_mt(const struct sk_buff *skb, const struct net_device *in,
+          const struct net_device *out, const struct xt_match *match,
+          const void *matchinfo, int offset, unsigned int protoff,
+          bool *hotdrop)
 {
 	const struct xt_helper_info *info = matchinfo;
 	const struct nf_conn *ct;
@@ -61,11 +57,10 @@ match(const struct sk_buff *skb,
 	return ret;
 }
 
-static bool check(const char *tablename,
-		  const void *inf,
-		  const struct xt_match *match,
-		  void *matchinfo,
-		  unsigned int hook_mask)
+static bool
+helper_mt_check(const char *tablename, const void *inf,
+                const struct xt_match *match, void *matchinfo,
+                unsigned int hook_mask)
 {
 	struct xt_helper_info *info = matchinfo;
 
@@ -78,44 +73,41 @@ static bool check(const char *tablename,
 	return true;
 }
 
-static void
-destroy(const struct xt_match *match, void *matchinfo)
+static void helper_mt_destroy(const struct xt_match *match, void *matchinfo)
 {
 	nf_ct_l3proto_module_put(match->family);
 }
 
-static struct xt_match xt_helper_match[] __read_mostly = {
+static struct xt_match helper_mt_reg[] __read_mostly = {
 	{
 		.name		= "helper",
 		.family		= AF_INET,
-		.checkentry	= check,
-		.match		= match,
-		.destroy	= destroy,
+		.checkentry	= helper_mt_check,
+		.match		= helper_mt,
+		.destroy	= helper_mt_destroy,
 		.matchsize	= sizeof(struct xt_helper_info),
 		.me		= THIS_MODULE,
 	},
 	{
 		.name		= "helper",
 		.family		= AF_INET6,
-		.checkentry	= check,
-		.match		= match,
-		.destroy	= destroy,
+		.checkentry	= helper_mt_check,
+		.match		= helper_mt,
+		.destroy	= helper_mt_destroy,
 		.matchsize	= sizeof(struct xt_helper_info),
 		.me		= THIS_MODULE,
 	},
 };
 
-static int __init xt_helper_init(void)
+static int __init helper_mt_init(void)
 {
-	return xt_register_matches(xt_helper_match,
-				   ARRAY_SIZE(xt_helper_match));
+	return xt_register_matches(helper_mt_reg, ARRAY_SIZE(helper_mt_reg));
 }
 
-static void __exit xt_helper_fini(void)
+static void __exit helper_mt_exit(void)
 {
-	xt_unregister_matches(xt_helper_match, ARRAY_SIZE(xt_helper_match));
+	xt_unregister_matches(helper_mt_reg, ARRAY_SIZE(helper_mt_reg));
 }
 
-module_init(xt_helper_init);
-module_exit(xt_helper_fini);
-
+module_init(helper_mt_init);
+module_exit(helper_mt_exit);
diff --git a/net/netfilter/xt_length.c b/net/netfilter/xt_length.c
index 3dad173d973..ea545785965 100644
--- a/net/netfilter/xt_length.c
+++ b/net/netfilter/xt_length.c
@@ -21,14 +21,10 @@ MODULE_ALIAS("ipt_length");
 MODULE_ALIAS("ip6t_length");
 
 static bool
-match(const struct sk_buff *skb,
-      const struct net_device *in,
-      const struct net_device *out,
-      const struct xt_match *match,
-      const void *matchinfo,
-      int offset,
-      unsigned int protoff,
-      bool *hotdrop)
+length_mt(const struct sk_buff *skb, const struct net_device *in,
+          const struct net_device *out, const struct xt_match *match,
+          const void *matchinfo, int offset, unsigned int protoff,
+          bool *hotdrop)
 {
 	const struct xt_length_info *info = matchinfo;
 	u_int16_t pktlen = ntohs(ip_hdr(skb)->tot_len);
@@ -37,14 +33,10 @@ match(const struct sk_buff *skb,
 }
 
 static bool
-match6(const struct sk_buff *skb,
-       const struct net_device *in,
-       const struct net_device *out,
-       const struct xt_match *match,
-       const void *matchinfo,
-       int offset,
-       unsigned int protoff,
-       bool *hotdrop)
+length_mt6(const struct sk_buff *skb, const struct net_device *in,
+           const struct net_device *out, const struct xt_match *match,
+           const void *matchinfo, int offset, unsigned int protoff,
+           bool *hotdrop)
 {
 	const struct xt_length_info *info = matchinfo;
 	const u_int16_t pktlen = ntohs(ipv6_hdr(skb)->payload_len) +
@@ -53,33 +45,32 @@ match6(const struct sk_buff *skb,
 	return (pktlen >= info->min && pktlen <= info->max) ^ info->invert;
 }
 
-static struct xt_match xt_length_match[] __read_mostly = {
+static struct xt_match length_mt_reg[] __read_mostly = {
 	{
 		.name		= "length",
 		.family		= AF_INET,
-		.match		= match,
+		.match		= length_mt,
 		.matchsize	= sizeof(struct xt_length_info),
 		.me		= THIS_MODULE,
 	},
 	{
 		.name		= "length",
 		.family		= AF_INET6,
-		.match		= match6,
+		.match		= length_mt6,
 		.matchsize	= sizeof(struct xt_length_info),
 		.me		= THIS_MODULE,
 	},
 };
 
-static int __init xt_length_init(void)
+static int __init length_mt_init(void)
 {
-	return xt_register_matches(xt_length_match,
-				   ARRAY_SIZE(xt_length_match));
+	return xt_register_matches(length_mt_reg, ARRAY_SIZE(length_mt_reg));
 }
 
-static void __exit xt_length_fini(void)
+static void __exit length_mt_exit(void)
 {
-	xt_unregister_matches(xt_length_match, ARRAY_SIZE(xt_length_match));
+	xt_unregister_matches(length_mt_reg, ARRAY_SIZE(length_mt_reg));
 }
 
-module_init(xt_length_init);
-module_exit(xt_length_fini);
+module_init(length_mt_init);
+module_exit(length_mt_exit);
diff --git a/net/netfilter/xt_limit.c b/net/netfilter/xt_limit.c
index f263a77e57b..c9352dbf3a1 100644
--- a/net/netfilter/xt_limit.c
+++ b/net/netfilter/xt_limit.c
@@ -58,14 +58,10 @@ static DEFINE_SPINLOCK(limit_lock);
 #define CREDITS_PER_JIFFY POW2_BELOW32(MAX_CPJ)
 
 static bool
-ipt_limit_match(const struct sk_buff *skb,
-		const struct net_device *in,
-		const struct net_device *out,
-		const struct xt_match *match,
-		const void *matchinfo,
-		int offset,
-		unsigned int protoff,
-		bool *hotdrop)
+limit_mt(const struct sk_buff *skb, const struct net_device *in,
+         const struct net_device *out, const struct xt_match *match,
+         const void *matchinfo, int offset, unsigned int protoff,
+         bool *hotdrop)
 {
 	struct xt_rateinfo *r =
 		((const struct xt_rateinfo *)matchinfo)->master;
@@ -100,11 +96,9 @@ user2credits(u_int32_t user)
 }
 
 static bool
-ipt_limit_checkentry(const char *tablename,
-		     const void *inf,
-		     const struct xt_match *match,
-		     void *matchinfo,
-		     unsigned int hook_mask)
+limit_mt_check(const char *tablename, const void *inf,
+               const struct xt_match *match, void *matchinfo,
+               unsigned int hook_mask)
 {
 	struct xt_rateinfo *r = matchinfo;
 
@@ -143,7 +137,7 @@ struct compat_xt_rateinfo {
 
 /* To keep the full "prev" timestamp, the upper 32 bits are stored in the
  * master pointer, which does not need to be preserved. */
-static void compat_from_user(void *dst, void *src)
+static void limit_mt_compat_from_user(void *dst, void *src)
 {
 	const struct compat_xt_rateinfo *cm = src;
 	struct xt_rateinfo m = {
@@ -157,7 +151,7 @@ static void compat_from_user(void *dst, void *src)
 	memcpy(dst, &m, sizeof(m));
 }
 
-static int compat_to_user(void __user *dst, void *src)
+static int limit_mt_compat_to_user(void __user *dst, void *src)
 {
 	const struct xt_rateinfo *m = src;
 	struct compat_xt_rateinfo cm = {
@@ -173,39 +167,39 @@ static int compat_to_user(void __user *dst, void *src)
 }
 #endif /* CONFIG_COMPAT */
 
-static struct xt_match xt_limit_match[] __read_mostly = {
+static struct xt_match limit_mt_reg[] __read_mostly = {
 	{
 		.name		= "limit",
 		.family		= AF_INET,
-		.checkentry	= ipt_limit_checkentry,
-		.match		= ipt_limit_match,
+		.checkentry	= limit_mt_check,
+		.match		= limit_mt,
 		.matchsize	= sizeof(struct xt_rateinfo),
 #ifdef CONFIG_COMPAT
 		.compatsize	= sizeof(struct compat_xt_rateinfo),
-		.compat_from_user = compat_from_user,
-		.compat_to_user	= compat_to_user,
+		.compat_from_user = limit_mt_compat_from_user,
+		.compat_to_user	= limit_mt_compat_to_user,
 #endif
 		.me		= THIS_MODULE,
 	},
 	{
 		.name		= "limit",
 		.family		= AF_INET6,
-		.checkentry	= ipt_limit_checkentry,
-		.match		= ipt_limit_match,
+		.checkentry	= limit_mt_check,
+		.match		= limit_mt,
 		.matchsize	= sizeof(struct xt_rateinfo),
 		.me		= THIS_MODULE,
 	},
 };
 
-static int __init xt_limit_init(void)
+static int __init limit_mt_init(void)
 {
-	return xt_register_matches(xt_limit_match, ARRAY_SIZE(xt_limit_match));
+	return xt_register_matches(limit_mt_reg, ARRAY_SIZE(limit_mt_reg));
 }
 
-static void __exit xt_limit_fini(void)
+static void __exit limit_mt_exit(void)
 {
-	xt_unregister_matches(xt_limit_match, ARRAY_SIZE(xt_limit_match));
+	xt_unregister_matches(limit_mt_reg, ARRAY_SIZE(limit_mt_reg));
 }
 
-module_init(xt_limit_init);
-module_exit(xt_limit_fini);
+module_init(limit_mt_init);
+module_exit(limit_mt_exit);
diff --git a/net/netfilter/xt_mac.c b/net/netfilter/xt_mac.c
index 6ff4479ca63..7d89863a78b 100644
--- a/net/netfilter/xt_mac.c
+++ b/net/netfilter/xt_mac.c
@@ -25,14 +25,9 @@ MODULE_ALIAS("ipt_mac");
 MODULE_ALIAS("ip6t_mac");
 
 static bool
-match(const struct sk_buff *skb,
-      const struct net_device *in,
-      const struct net_device *out,
-      const struct xt_match *match,
-      const void *matchinfo,
-      int offset,
-      unsigned int protoff,
-      bool *hotdrop)
+mac_mt(const struct sk_buff *skb, const struct net_device *in,
+       const struct net_device *out, const struct xt_match *match,
+       const void *matchinfo, int offset, unsigned int protoff, bool *hotdrop)
 {
     const struct xt_mac_info *info = matchinfo;
 
@@ -44,11 +39,11 @@ match(const struct sk_buff *skb,
 		^ info->invert);
 }
 
-static struct xt_match xt_mac_match[] __read_mostly = {
+static struct xt_match mac_mt_reg[] __read_mostly = {
 	{
 		.name		= "mac",
 		.family		= AF_INET,
-		.match		= match,
+		.match		= mac_mt,
 		.matchsize	= sizeof(struct xt_mac_info),
 		.hooks		= (1 << NF_INET_PRE_ROUTING) |
 				  (1 << NF_INET_LOCAL_IN) |
@@ -58,7 +53,7 @@ static struct xt_match xt_mac_match[] __read_mostly = {
 	{
 		.name		= "mac",
 		.family		= AF_INET6,
-		.match		= match,
+		.match		= mac_mt,
 		.matchsize	= sizeof(struct xt_mac_info),
 		.hooks		= (1 << NF_INET_PRE_ROUTING) |
 				  (1 << NF_INET_LOCAL_IN) |
@@ -67,15 +62,15 @@ static struct xt_match xt_mac_match[] __read_mostly = {
 	},
 };
 
-static int __init xt_mac_init(void)
+static int __init mac_mt_init(void)
 {
-	return xt_register_matches(xt_mac_match, ARRAY_SIZE(xt_mac_match));
+	return xt_register_matches(mac_mt_reg, ARRAY_SIZE(mac_mt_reg));
 }
 
-static void __exit xt_mac_fini(void)
+static void __exit mac_mt_exit(void)
 {
-	xt_unregister_matches(xt_mac_match, ARRAY_SIZE(xt_mac_match));
+	xt_unregister_matches(mac_mt_reg, ARRAY_SIZE(mac_mt_reg));
 }
 
-module_init(xt_mac_init);
-module_exit(xt_mac_fini);
+module_init(mac_mt_init);
+module_exit(mac_mt_exit);
diff --git a/net/netfilter/xt_mark.c b/net/netfilter/xt_mark.c
index c02a7f8f392..650cdea97e7 100644
--- a/net/netfilter/xt_mark.c
+++ b/net/netfilter/xt_mark.c
@@ -20,14 +20,9 @@ MODULE_ALIAS("ipt_mark");
 MODULE_ALIAS("ip6t_mark");
 
 static bool
-match(const struct sk_buff *skb,
-      const struct net_device *in,
-      const struct net_device *out,
-      const struct xt_match *match,
-      const void *matchinfo,
-      int offset,
-      unsigned int protoff,
-      bool *hotdrop)
+mark_mt(const struct sk_buff *skb, const struct net_device *in,
+        const struct net_device *out, const struct xt_match *match,
+        const void *matchinfo, int offset, unsigned int protoff, bool *hotdrop)
 {
 	const struct xt_mark_info *info = matchinfo;
 
@@ -35,11 +30,9 @@ match(const struct sk_buff *skb,
 }
 
 static bool
-checkentry(const char *tablename,
-	   const void *entry,
-	   const struct xt_match *match,
-	   void *matchinfo,
-	   unsigned int hook_mask)
+mark_mt_check(const char *tablename, const void *entry,
+              const struct xt_match *match, void *matchinfo,
+              unsigned int hook_mask)
 {
 	const struct xt_mark_info *minfo = matchinfo;
 
@@ -58,7 +51,7 @@ struct compat_xt_mark_info {
 	u_int16_t	__pad2;
 };
 
-static void compat_from_user(void *dst, void *src)
+static void mark_mt_compat_from_user(void *dst, void *src)
 {
 	const struct compat_xt_mark_info *cm = src;
 	struct xt_mark_info m = {
@@ -69,7 +62,7 @@ static void compat_from_user(void *dst, void *src)
 	memcpy(dst, &m, sizeof(m));
 }
 
-static int compat_to_user(void __user *dst, void *src)
+static int mark_mt_compat_to_user(void __user *dst, void *src)
 {
 	const struct xt_mark_info *m = src;
 	struct compat_xt_mark_info cm = {
@@ -81,39 +74,39 @@ static int compat_to_user(void __user *dst, void *src)
 }
 #endif /* CONFIG_COMPAT */
 
-static struct xt_match xt_mark_match[] __read_mostly = {
+static struct xt_match mark_mt_reg[] __read_mostly = {
 	{
 		.name		= "mark",
 		.family		= AF_INET,
-		.checkentry	= checkentry,
-		.match		= match,
+		.checkentry	= mark_mt_check,
+		.match		= mark_mt,
 		.matchsize	= sizeof(struct xt_mark_info),
 #ifdef CONFIG_COMPAT
 		.compatsize	= sizeof(struct compat_xt_mark_info),
-		.compat_from_user = compat_from_user,
-		.compat_to_user	= compat_to_user,
+		.compat_from_user = mark_mt_compat_from_user,
+		.compat_to_user	= mark_mt_compat_to_user,
 #endif
 		.me		= THIS_MODULE,
 	},
 	{
 		.name		= "mark",
 		.family		= AF_INET6,
-		.checkentry	= checkentry,
-		.match		= match,
+		.checkentry	= mark_mt_check,
+		.match		= mark_mt,
 		.matchsize	= sizeof(struct xt_mark_info),
 		.me		= THIS_MODULE,
 	},
 };
 
-static int __init xt_mark_init(void)
+static int __init mark_mt_init(void)
 {
-	return xt_register_matches(xt_mark_match, ARRAY_SIZE(xt_mark_match));
+	return xt_register_matches(mark_mt_reg, ARRAY_SIZE(mark_mt_reg));
 }
 
-static void __exit xt_mark_fini(void)
+static void __exit mark_mt_exit(void)
 {
-	xt_unregister_matches(xt_mark_match, ARRAY_SIZE(xt_mark_match));
+	xt_unregister_matches(mark_mt_reg, ARRAY_SIZE(mark_mt_reg));
 }
 
-module_init(xt_mark_init);
-module_exit(xt_mark_fini);
+module_init(mark_mt_init);
+module_exit(mark_mt_exit);
diff --git a/net/netfilter/xt_multiport.c b/net/netfilter/xt_multiport.c
index e8ae10284ac..d03cc376811 100644
--- a/net/netfilter/xt_multiport.c
+++ b/net/netfilter/xt_multiport.c
@@ -34,8 +34,8 @@ MODULE_ALIAS("ip6t_multiport");
 
 /* Returns 1 if the port is matched by the test, 0 otherwise. */
 static inline bool
-ports_match(const u_int16_t *portlist, enum xt_multiport_flags flags,
-	    u_int8_t count, u_int16_t src, u_int16_t dst)
+ports_match_v0(const u_int16_t *portlist, enum xt_multiport_flags flags,
+	       u_int8_t count, u_int16_t src, u_int16_t dst)
 {
 	unsigned int i;
 	for (i = 0; i < count; i++) {
@@ -95,14 +95,10 @@ ports_match_v1(const struct xt_multiport_v1 *minfo,
 }
 
 static bool
-match(const struct sk_buff *skb,
-      const struct net_device *in,
-      const struct net_device *out,
-      const struct xt_match *match,
-      const void *matchinfo,
-      int offset,
-      unsigned int protoff,
-      bool *hotdrop)
+multiport_mt_v0(const struct sk_buff *skb, const struct net_device *in,
+                const struct net_device *out, const struct xt_match *match,
+                const void *matchinfo, int offset, unsigned int protoff,
+                bool *hotdrop)
 {
 	__be16 _ports[2], *pptr;
 	const struct xt_multiport *multiinfo = matchinfo;
@@ -120,20 +116,15 @@ match(const struct sk_buff *skb,
 		return false;
 	}
 
-	return ports_match(multiinfo->ports,
-			   multiinfo->flags, multiinfo->count,
-			   ntohs(pptr[0]), ntohs(pptr[1]));
+	return ports_match_v0(multiinfo->ports, multiinfo->flags,
+	       multiinfo->count, ntohs(pptr[0]), ntohs(pptr[1]));
 }
 
 static bool
-match_v1(const struct sk_buff *skb,
-	 const struct net_device *in,
-	 const struct net_device *out,
-	 const struct xt_match *match,
-	 const void *matchinfo,
-	 int offset,
-	 unsigned int protoff,
-	 bool *hotdrop)
+multiport_mt(const struct sk_buff *skb, const struct net_device *in,
+             const struct net_device *out, const struct xt_match *match,
+             const void *matchinfo, int offset, unsigned int protoff,
+             bool *hotdrop)
 {
 	__be16 _ports[2], *pptr;
 	const struct xt_multiport_v1 *multiinfo = matchinfo;
@@ -173,11 +164,9 @@ check(u_int16_t proto,
 
 /* Called when user tries to insert an entry of this type. */
 static bool
-checkentry(const char *tablename,
-	   const void *info,
-	   const struct xt_match *match,
-	   void *matchinfo,
-	   unsigned int hook_mask)
+multiport_mt_check_v0(const char *tablename, const void *info,
+                      const struct xt_match *match, void *matchinfo,
+                      unsigned int hook_mask)
 {
 	const struct ipt_ip *ip = info;
 	const struct xt_multiport *multiinfo = matchinfo;
@@ -187,11 +176,9 @@ checkentry(const char *tablename,
 }
 
 static bool
-checkentry_v1(const char *tablename,
-	      const void *info,
-	      const struct xt_match *match,
-	      void *matchinfo,
-	      unsigned int hook_mask)
+multiport_mt_check(const char *tablename, const void *info,
+                   const struct xt_match *match, void *matchinfo,
+                   unsigned int hook_mask)
 {
 	const struct ipt_ip *ip = info;
 	const struct xt_multiport_v1 *multiinfo = matchinfo;
@@ -201,11 +188,9 @@ checkentry_v1(const char *tablename,
 }
 
 static bool
-checkentry6(const char *tablename,
-	    const void *info,
-	    const struct xt_match *match,
-	    void *matchinfo,
-	    unsigned int hook_mask)
+multiport_mt6_check_v0(const char *tablename, const void *info,
+                       const struct xt_match *match, void *matchinfo,
+                       unsigned int hook_mask)
 {
 	const struct ip6t_ip6 *ip = info;
 	const struct xt_multiport *multiinfo = matchinfo;
@@ -215,11 +200,9 @@ checkentry6(const char *tablename,
 }
 
 static bool
-checkentry6_v1(const char *tablename,
-	       const void *info,
-	       const struct xt_match *match,
-	       void *matchinfo,
-	       unsigned int hook_mask)
+multiport_mt6_check(const char *tablename, const void *info,
+                    const struct xt_match *match, void *matchinfo,
+                    unsigned int hook_mask)
 {
 	const struct ip6t_ip6 *ip = info;
 	const struct xt_multiport_v1 *multiinfo = matchinfo;
@@ -228,13 +211,13 @@ checkentry6_v1(const char *tablename,
 		     multiinfo->count);
 }
 
-static struct xt_match xt_multiport_match[] __read_mostly = {
+static struct xt_match multiport_mt_reg[] __read_mostly = {
 	{
 		.name		= "multiport",
 		.family		= AF_INET,
 		.revision	= 0,
-		.checkentry	= checkentry,
-		.match		= match,
+		.checkentry	= multiport_mt_check_v0,
+		.match		= multiport_mt_v0,
 		.matchsize	= sizeof(struct xt_multiport),
 		.me		= THIS_MODULE,
 	},
@@ -242,8 +225,8 @@ static struct xt_match xt_multiport_match[] __read_mostly = {
 		.name		= "multiport",
 		.family		= AF_INET,
 		.revision	= 1,
-		.checkentry	= checkentry_v1,
-		.match		= match_v1,
+		.checkentry	= multiport_mt_check,
+		.match		= multiport_mt,
 		.matchsize	= sizeof(struct xt_multiport_v1),
 		.me		= THIS_MODULE,
 	},
@@ -251,8 +234,8 @@ static struct xt_match xt_multiport_match[] __read_mostly = {
 		.name		= "multiport",
 		.family		= AF_INET6,
 		.revision	= 0,
-		.checkentry	= checkentry6,
-		.match		= match,
+		.checkentry	= multiport_mt6_check_v0,
+		.match		= multiport_mt_v0,
 		.matchsize	= sizeof(struct xt_multiport),
 		.me		= THIS_MODULE,
 	},
@@ -260,24 +243,23 @@ static struct xt_match xt_multiport_match[] __read_mostly = {
 		.name		= "multiport",
 		.family		= AF_INET6,
 		.revision	= 1,
-		.checkentry	= checkentry6_v1,
-		.match		= match_v1,
+		.checkentry	= multiport_mt6_check,
+		.match		= multiport_mt,
 		.matchsize	= sizeof(struct xt_multiport_v1),
 		.me		= THIS_MODULE,
 	},
 };
 
-static int __init xt_multiport_init(void)
+static int __init multiport_mt_init(void)
 {
-	return xt_register_matches(xt_multiport_match,
-				   ARRAY_SIZE(xt_multiport_match));
+	return xt_register_matches(multiport_mt_reg,
+	       ARRAY_SIZE(multiport_mt_reg));
 }
 
-static void __exit xt_multiport_fini(void)
+static void __exit multiport_mt_exit(void)
 {
-	xt_unregister_matches(xt_multiport_match,
-			      ARRAY_SIZE(xt_multiport_match));
+	xt_unregister_matches(multiport_mt_reg, ARRAY_SIZE(multiport_mt_reg));
 }
 
-module_init(xt_multiport_init);
-module_exit(xt_multiport_fini);
+module_init(multiport_mt_init);
+module_exit(multiport_mt_exit);
diff --git a/net/netfilter/xt_physdev.c b/net/netfilter/xt_physdev.c
index e91aee74de5..678b6833a80 100644
--- a/net/netfilter/xt_physdev.c
+++ b/net/netfilter/xt_physdev.c
@@ -21,14 +21,10 @@ MODULE_ALIAS("ipt_physdev");
 MODULE_ALIAS("ip6t_physdev");
 
 static bool
-match(const struct sk_buff *skb,
-      const struct net_device *in,
-      const struct net_device *out,
-      const struct xt_match *match,
-      const void *matchinfo,
-      int offset,
-      unsigned int protoff,
-      bool *hotdrop)
+physdev_mt(const struct sk_buff *skb, const struct net_device *in,
+           const struct net_device *out, const struct xt_match *match,
+           const void *matchinfo, int offset, unsigned int protoff,
+           bool *hotdrop)
 {
 	int i;
 	static const char nulldevname[IFNAMSIZ];
@@ -99,11 +95,9 @@ match_outdev:
 }
 
 static bool
-checkentry(const char *tablename,
-		       const void *ip,
-		       const struct xt_match *match,
-		       void *matchinfo,
-		       unsigned int hook_mask)
+physdev_mt_check(const char *tablename, const void *ip,
+                 const struct xt_match *match, void *matchinfo,
+                 unsigned int hook_mask)
 {
 	const struct xt_physdev_info *info = matchinfo;
 
@@ -124,35 +118,34 @@ checkentry(const char *tablename,
 	return true;
 }
 
-static struct xt_match xt_physdev_match[] __read_mostly = {
+static struct xt_match physdev_mt_reg[] __read_mostly = {
 	{
 		.name		= "physdev",
 		.family		= AF_INET,
-		.checkentry	= checkentry,
-		.match		= match,
+		.checkentry	= physdev_mt_check,
+		.match		= physdev_mt,
 		.matchsize	= sizeof(struct xt_physdev_info),
 		.me		= THIS_MODULE,
 	},
 	{
 		.name		= "physdev",
 		.family		= AF_INET6,
-		.checkentry	= checkentry,
-		.match		= match,
+		.checkentry	= physdev_mt_check,
+		.match		= physdev_mt,
 		.matchsize	= sizeof(struct xt_physdev_info),
 		.me		= THIS_MODULE,
 	},
 };
 
-static int __init xt_physdev_init(void)
+static int __init physdev_mt_init(void)
 {
-	return xt_register_matches(xt_physdev_match,
-				   ARRAY_SIZE(xt_physdev_match));
+	return xt_register_matches(physdev_mt_reg, ARRAY_SIZE(physdev_mt_reg));
 }
 
-static void __exit xt_physdev_fini(void)
+static void __exit physdev_mt_exit(void)
 {
-	xt_unregister_matches(xt_physdev_match, ARRAY_SIZE(xt_physdev_match));
+	xt_unregister_matches(physdev_mt_reg, ARRAY_SIZE(physdev_mt_reg));
 }
 
-module_init(xt_physdev_init);
-module_exit(xt_physdev_fini);
+module_init(physdev_mt_init);
+module_exit(physdev_mt_exit);
diff --git a/net/netfilter/xt_pkttype.c b/net/netfilter/xt_pkttype.c
index a52925f12f3..c598bbed8e0 100644
--- a/net/netfilter/xt_pkttype.c
+++ b/net/netfilter/xt_pkttype.c
@@ -21,14 +21,11 @@ MODULE_DESCRIPTION("IP tables match to match on linklayer packet type");
 MODULE_ALIAS("ipt_pkttype");
 MODULE_ALIAS("ip6t_pkttype");
 
-static bool match(const struct sk_buff *skb,
-      const struct net_device *in,
-      const struct net_device *out,
-      const struct xt_match *match,
-      const void *matchinfo,
-      int offset,
-      unsigned int protoff,
-      bool *hotdrop)
+static bool
+pkttype_mt(const struct sk_buff *skb, const struct net_device *in,
+           const struct net_device *out, const struct xt_match *match,
+           const void *matchinfo, int offset, unsigned int protoff,
+           bool *hotdrop)
 {
 	u_int8_t type;
 	const struct xt_pkttype_info *info = matchinfo;
@@ -43,33 +40,32 @@ static bool match(const struct sk_buff *skb,
 	return (type == info->pkttype) ^ info->invert;
 }
 
-static struct xt_match xt_pkttype_match[] __read_mostly = {
+static struct xt_match pkttype_mt_reg[] __read_mostly = {
 	{
 		.name		= "pkttype",
 		.family		= AF_INET,
-		.match		= match,
+		.match		= pkttype_mt,
 		.matchsize	= sizeof(struct xt_pkttype_info),
 		.me		= THIS_MODULE,
 	},
 	{
 		.name		= "pkttype",
 		.family		= AF_INET6,
-		.match		= match,
+		.match		= pkttype_mt,
 		.matchsize	= sizeof(struct xt_pkttype_info),
 		.me		= THIS_MODULE,
 	},
 };
 
-static int __init xt_pkttype_init(void)
+static int __init pkttype_mt_init(void)
 {
-	return xt_register_matches(xt_pkttype_match,
-				   ARRAY_SIZE(xt_pkttype_match));
+	return xt_register_matches(pkttype_mt_reg, ARRAY_SIZE(pkttype_mt_reg));
 }
 
-static void __exit xt_pkttype_fini(void)
+static void __exit pkttype_mt_exit(void)
 {
-	xt_unregister_matches(xt_pkttype_match, ARRAY_SIZE(xt_pkttype_match));
+	xt_unregister_matches(pkttype_mt_reg, ARRAY_SIZE(pkttype_mt_reg));
 }
 
-module_init(xt_pkttype_init);
-module_exit(xt_pkttype_fini);
+module_init(pkttype_mt_init);
+module_exit(pkttype_mt_exit);
diff --git a/net/netfilter/xt_policy.c b/net/netfilter/xt_policy.c
index 2eaa6fd089c..5a017b8b72d 100644
--- a/net/netfilter/xt_policy.c
+++ b/net/netfilter/xt_policy.c
@@ -108,14 +108,11 @@ match_policy_out(const struct sk_buff *skb, const struct xt_policy_info *info,
 	return strict ? i == info->len : 0;
 }
 
-static bool match(const struct sk_buff *skb,
-		  const struct net_device *in,
-		  const struct net_device *out,
-		  const struct xt_match *match,
-		  const void *matchinfo,
-		  int offset,
-		  unsigned int protoff,
-		  bool *hotdrop)
+static bool
+policy_mt(const struct sk_buff *skb, const struct net_device *in,
+          const struct net_device *out, const struct xt_match *match,
+          const void *matchinfo, int offset, unsigned int protoff,
+          bool *hotdrop)
 {
 	const struct xt_policy_info *info = matchinfo;
 	int ret;
@@ -133,9 +130,10 @@ static bool match(const struct sk_buff *skb,
 	return ret;
 }
 
-static bool checkentry(const char *tablename, const void *ip_void,
-		       const struct xt_match *match,
-		       void *matchinfo, unsigned int hook_mask)
+static bool
+policy_mt_check(const char *tablename, const void *ip_void,
+                const struct xt_match *match, void *matchinfo,
+                unsigned int hook_mask)
 {
 	struct xt_policy_info *info = matchinfo;
 
@@ -163,37 +161,36 @@ static bool checkentry(const char *tablename, const void *ip_void,
 	return true;
 }
 
-static struct xt_match xt_policy_match[] __read_mostly = {
+static struct xt_match policy_mt_reg[] __read_mostly = {
 	{
 		.name		= "policy",
 		.family		= AF_INET,
-		.checkentry 	= checkentry,
-		.match		= match,
+		.checkentry 	= policy_mt_check,
+		.match		= policy_mt,
 		.matchsize	= sizeof(struct xt_policy_info),
 		.me		= THIS_MODULE,
 	},
 	{
 		.name		= "policy",
 		.family		= AF_INET6,
-		.checkentry	= checkentry,
-		.match		= match,
+		.checkentry	= policy_mt_check,
+		.match		= policy_mt,
 		.matchsize	= sizeof(struct xt_policy_info),
 		.me		= THIS_MODULE,
 	},
 };
 
-static int __init init(void)
+static int __init policy_mt_init(void)
 {
-	return xt_register_matches(xt_policy_match,
-				   ARRAY_SIZE(xt_policy_match));
+	return xt_register_matches(policy_mt_reg, ARRAY_SIZE(policy_mt_reg));
 }
 
-static void __exit fini(void)
+static void __exit policy_mt_exit(void)
 {
-	xt_unregister_matches(xt_policy_match, ARRAY_SIZE(xt_policy_match));
+	xt_unregister_matches(policy_mt_reg, ARRAY_SIZE(policy_mt_reg));
 }
 
-module_init(init);
-module_exit(fini);
+module_init(policy_mt_init);
+module_exit(policy_mt_exit);
 MODULE_ALIAS("ipt_policy");
 MODULE_ALIAS("ip6t_policy");
diff --git a/net/netfilter/xt_quota.c b/net/netfilter/xt_quota.c
index dae97445b87..887874b5684 100644
--- a/net/netfilter/xt_quota.c
+++ b/net/netfilter/xt_quota.c
@@ -17,10 +17,10 @@ MODULE_ALIAS("ip6t_quota");
 static DEFINE_SPINLOCK(quota_lock);
 
 static bool
-match(const struct sk_buff *skb,
-      const struct net_device *in, const struct net_device *out,
-      const struct xt_match *match, const void *matchinfo,
-      int offset, unsigned int protoff, bool *hotdrop)
+quota_mt(const struct sk_buff *skb, const struct net_device *in,
+         const struct net_device *out, const struct xt_match *match,
+         const void *matchinfo, int offset, unsigned int protoff,
+         bool *hotdrop)
 {
 	struct xt_quota_info *q =
 		((const struct xt_quota_info *)matchinfo)->master;
@@ -40,9 +40,9 @@ match(const struct sk_buff *skb,
 }
 
 static bool
-checkentry(const char *tablename, const void *entry,
-	   const struct xt_match *match, void *matchinfo,
-	   unsigned int hook_mask)
+quota_mt_check(const char *tablename, const void *entry,
+               const struct xt_match *match, void *matchinfo,
+               unsigned int hook_mask)
 {
 	struct xt_quota_info *q = matchinfo;
 
@@ -53,34 +53,34 @@ checkentry(const char *tablename, const void *entry,
 	return true;
 }
 
-static struct xt_match xt_quota_match[] __read_mostly = {
+static struct xt_match quota_mt_reg[] __read_mostly = {
 	{
 		.name		= "quota",
 		.family		= AF_INET,
-		.checkentry	= checkentry,
-		.match		= match,
+		.checkentry	= quota_mt_check,
+		.match		= quota_mt,
 		.matchsize	= sizeof(struct xt_quota_info),
 		.me		= THIS_MODULE
 	},
 	{
 		.name		= "quota",
 		.family		= AF_INET6,
-		.checkentry	= checkentry,
-		.match		= match,
+		.checkentry	= quota_mt_check,
+		.match		= quota_mt,
 		.matchsize	= sizeof(struct xt_quota_info),
 		.me		= THIS_MODULE
 	},
 };
 
-static int __init xt_quota_init(void)
+static int __init quota_mt_init(void)
 {
-	return xt_register_matches(xt_quota_match, ARRAY_SIZE(xt_quota_match));
+	return xt_register_matches(quota_mt_reg, ARRAY_SIZE(quota_mt_reg));
 }
 
-static void __exit xt_quota_fini(void)
+static void __exit quota_mt_exit(void)
 {
-	xt_unregister_matches(xt_quota_match, ARRAY_SIZE(xt_quota_match));
+	xt_unregister_matches(quota_mt_reg, ARRAY_SIZE(quota_mt_reg));
 }
 
-module_init(xt_quota_init);
-module_exit(xt_quota_fini);
+module_init(quota_mt_init);
+module_exit(quota_mt_exit);
diff --git a/net/netfilter/xt_realm.c b/net/netfilter/xt_realm.c
index 91113dcbe0f..63289b40c8d 100644
--- a/net/netfilter/xt_realm.c
+++ b/net/netfilter/xt_realm.c
@@ -22,14 +22,10 @@ MODULE_DESCRIPTION("X_tables realm match");
 MODULE_ALIAS("ipt_realm");
 
 static bool
-match(const struct sk_buff *skb,
-      const struct net_device *in,
-      const struct net_device *out,
-      const struct xt_match *match,
-      const void *matchinfo,
-      int offset,
-      unsigned int protoff,
-      bool *hotdrop)
+realm_mt(const struct sk_buff *skb, const struct net_device *in,
+         const struct net_device *out, const struct xt_match *match,
+         const void *matchinfo, int offset, unsigned int protoff,
+         bool *hotdrop)
 {
 	const struct xt_realm_info *info = matchinfo;
 	const struct dst_entry *dst = skb->dst;
@@ -37,9 +33,9 @@ match(const struct sk_buff *skb,
 	return (info->id == (dst->tclassid & info->mask)) ^ info->invert;
 }
 
-static struct xt_match realm_match __read_mostly = {
+static struct xt_match realm_mt_reg __read_mostly = {
 	.name		= "realm",
-	.match		= match,
+	.match		= realm_mt,
 	.matchsize	= sizeof(struct xt_realm_info),
 	.hooks		= (1 << NF_INET_POST_ROUTING) | (1 << NF_INET_FORWARD) |
 			  (1 << NF_INET_LOCAL_OUT) | (1 << NF_INET_LOCAL_IN),
@@ -47,15 +43,15 @@ static struct xt_match realm_match __read_mostly = {
 	.me		= THIS_MODULE
 };
 
-static int __init xt_realm_init(void)
+static int __init realm_mt_init(void)
 {
-	return xt_register_match(&realm_match);
+	return xt_register_match(&realm_mt_reg);
 }
 
-static void __exit xt_realm_fini(void)
+static void __exit realm_mt_exit(void)
 {
-	xt_unregister_match(&realm_match);
+	xt_unregister_match(&realm_mt_reg);
 }
 
-module_init(xt_realm_init);
-module_exit(xt_realm_fini);
+module_init(realm_mt_init);
+module_exit(realm_mt_exit);
diff --git a/net/netfilter/xt_sctp.c b/net/netfilter/xt_sctp.c
index 3358273a47b..1c8a4ee9cd0 100644
--- a/net/netfilter/xt_sctp.c
+++ b/net/netfilter/xt_sctp.c
@@ -116,14 +116,9 @@ match_packet(const struct sk_buff *skb,
 }
 
 static bool
-match(const struct sk_buff *skb,
-      const struct net_device *in,
-      const struct net_device *out,
-      const struct xt_match *match,
-      const void *matchinfo,
-      int offset,
-      unsigned int protoff,
-      bool *hotdrop)
+sctp_mt(const struct sk_buff *skb, const struct net_device *in,
+        const struct net_device *out, const struct xt_match *match,
+        const void *matchinfo, int offset, unsigned int protoff, bool *hotdrop)
 {
 	const struct xt_sctp_info *info = matchinfo;
 	sctp_sctphdr_t _sh, *sh;
@@ -153,11 +148,9 @@ match(const struct sk_buff *skb,
 }
 
 static bool
-checkentry(const char *tablename,
-	   const void *inf,
-	   const struct xt_match *match,
-	   void *matchinfo,
-	   unsigned int hook_mask)
+sctp_mt_check(const char *tablename, const void *inf,
+              const struct xt_match *match, void *matchinfo,
+              unsigned int hook_mask)
 {
 	const struct xt_sctp_info *info = matchinfo;
 
@@ -171,12 +164,12 @@ checkentry(const char *tablename,
 				| SCTP_CHUNK_MATCH_ONLY)));
 }
 
-static struct xt_match xt_sctp_match[] __read_mostly = {
+static struct xt_match sctp_mt_reg[] __read_mostly = {
 	{
 		.name		= "sctp",
 		.family		= AF_INET,
-		.checkentry	= checkentry,
-		.match		= match,
+		.checkentry	= sctp_mt_check,
+		.match		= sctp_mt,
 		.matchsize	= sizeof(struct xt_sctp_info),
 		.proto		= IPPROTO_SCTP,
 		.me		= THIS_MODULE
@@ -184,23 +177,23 @@ static struct xt_match xt_sctp_match[] __read_mostly = {
 	{
 		.name		= "sctp",
 		.family		= AF_INET6,
-		.checkentry	= checkentry,
-		.match		= match,
+		.checkentry	= sctp_mt_check,
+		.match		= sctp_mt,
 		.matchsize	= sizeof(struct xt_sctp_info),
 		.proto		= IPPROTO_SCTP,
 		.me		= THIS_MODULE
 	},
 };
 
-static int __init xt_sctp_init(void)
+static int __init sctp_mt_init(void)
 {
-	return xt_register_matches(xt_sctp_match, ARRAY_SIZE(xt_sctp_match));
+	return xt_register_matches(sctp_mt_reg, ARRAY_SIZE(sctp_mt_reg));
 }
 
-static void __exit xt_sctp_fini(void)
+static void __exit sctp_mt_exit(void)
 {
-	xt_unregister_matches(xt_sctp_match, ARRAY_SIZE(xt_sctp_match));
+	xt_unregister_matches(sctp_mt_reg, ARRAY_SIZE(sctp_mt_reg));
 }
 
-module_init(xt_sctp_init);
-module_exit(xt_sctp_fini);
+module_init(sctp_mt_init);
+module_exit(sctp_mt_exit);
diff --git a/net/netfilter/xt_state.c b/net/netfilter/xt_state.c
index e0a528df19a..2e1716d425b 100644
--- a/net/netfilter/xt_state.c
+++ b/net/netfilter/xt_state.c
@@ -21,14 +21,10 @@ MODULE_ALIAS("ipt_state");
 MODULE_ALIAS("ip6t_state");
 
 static bool
-match(const struct sk_buff *skb,
-      const struct net_device *in,
-      const struct net_device *out,
-      const struct xt_match *match,
-      const void *matchinfo,
-      int offset,
-      unsigned int protoff,
-      bool *hotdrop)
+state_mt(const struct sk_buff *skb, const struct net_device *in,
+         const struct net_device *out, const struct xt_match *match,
+         const void *matchinfo, int offset, unsigned int protoff,
+         bool *hotdrop)
 {
 	const struct xt_state_info *sinfo = matchinfo;
 	enum ip_conntrack_info ctinfo;
@@ -44,11 +40,10 @@ match(const struct sk_buff *skb,
 	return (sinfo->statemask & statebit);
 }
 
-static bool check(const char *tablename,
-		  const void *inf,
-		  const struct xt_match *match,
-		  void *matchinfo,
-		  unsigned int hook_mask)
+static bool
+state_mt_check(const char *tablename, const void *inf,
+               const struct xt_match *match, void *matchinfo,
+               unsigned int hook_mask)
 {
 	if (nf_ct_l3proto_try_module_get(match->family) < 0) {
 		printk(KERN_WARNING "can't load conntrack support for "
@@ -58,42 +53,41 @@ static bool check(const char *tablename,
 	return true;
 }
 
-static void
-destroy(const struct xt_match *match, void *matchinfo)
+static void state_mt_destroy(const struct xt_match *match, void *matchinfo)
 {
 	nf_ct_l3proto_module_put(match->family);
 }
 
-static struct xt_match xt_state_match[] __read_mostly = {
+static struct xt_match state_mt_reg[] __read_mostly = {
 	{
 		.name		= "state",
 		.family		= AF_INET,
-		.checkentry	= check,
-		.match		= match,
-		.destroy	= destroy,
+		.checkentry	= state_mt_check,
+		.match		= state_mt,
+		.destroy	= state_mt_destroy,
 		.matchsize	= sizeof(struct xt_state_info),
 		.me		= THIS_MODULE,
 	},
 	{
 		.name		= "state",
 		.family		= AF_INET6,
-		.checkentry	= check,
-		.match		= match,
-		.destroy	= destroy,
+		.checkentry	= state_mt_check,
+		.match		= state_mt,
+		.destroy	= state_mt_destroy,
 		.matchsize	= sizeof(struct xt_state_info),
 		.me		= THIS_MODULE,
 	},
 };
 
-static int __init xt_state_init(void)
+static int __init state_mt_init(void)
 {
-	return xt_register_matches(xt_state_match, ARRAY_SIZE(xt_state_match));
+	return xt_register_matches(state_mt_reg, ARRAY_SIZE(state_mt_reg));
 }
 
-static void __exit xt_state_fini(void)
+static void __exit state_mt_exit(void)
 {
-	xt_unregister_matches(xt_state_match, ARRAY_SIZE(xt_state_match));
+	xt_unregister_matches(state_mt_reg, ARRAY_SIZE(state_mt_reg));
 }
 
-module_init(xt_state_init);
-module_exit(xt_state_fini);
+module_init(state_mt_init);
+module_exit(state_mt_exit);
diff --git a/net/netfilter/xt_statistic.c b/net/netfilter/xt_statistic.c
index 4089dae4e28..fb166483acd 100644
--- a/net/netfilter/xt_statistic.c
+++ b/net/netfilter/xt_statistic.c
@@ -25,10 +25,10 @@ MODULE_ALIAS("ip6t_statistic");
 static DEFINE_SPINLOCK(nth_lock);
 
 static bool
-match(const struct sk_buff *skb,
-      const struct net_device *in, const struct net_device *out,
-      const struct xt_match *match, const void *matchinfo,
-      int offset, unsigned int protoff, bool *hotdrop)
+statistic_mt(const struct sk_buff *skb, const struct net_device *in,
+             const struct net_device *out, const struct xt_match *match,
+             const void *matchinfo, int offset, unsigned int protoff,
+             bool *hotdrop)
 {
 	struct xt_statistic_info *info = (struct xt_statistic_info *)matchinfo;
 	bool ret = info->flags & XT_STATISTIC_INVERT;
@@ -53,9 +53,9 @@ match(const struct sk_buff *skb,
 }
 
 static bool
-checkentry(const char *tablename, const void *entry,
-	   const struct xt_match *match, void *matchinfo,
-	   unsigned int hook_mask)
+statistic_mt_check(const char *tablename, const void *entry,
+                   const struct xt_match *match, void *matchinfo,
+                   unsigned int hook_mask)
 {
 	struct xt_statistic_info *info = matchinfo;
 
@@ -66,36 +66,36 @@ checkentry(const char *tablename, const void *entry,
 	return true;
 }
 
-static struct xt_match xt_statistic_match[] __read_mostly = {
+static struct xt_match statistic_mt_reg[] __read_mostly = {
 	{
 		.name		= "statistic",
 		.family		= AF_INET,
-		.checkentry	= checkentry,
-		.match		= match,
+		.checkentry	= statistic_mt_check,
+		.match		= statistic_mt,
 		.matchsize	= sizeof(struct xt_statistic_info),
 		.me		= THIS_MODULE,
 	},
 	{
 		.name		= "statistic",
 		.family		= AF_INET6,
-		.checkentry	= checkentry,
-		.match		= match,
+		.checkentry	= statistic_mt_check,
+		.match		= statistic_mt,
 		.matchsize	= sizeof(struct xt_statistic_info),
 		.me		= THIS_MODULE,
 	},
 };
 
-static int __init xt_statistic_init(void)
+static int __init statistic_mt_init(void)
 {
-	return xt_register_matches(xt_statistic_match,
-				   ARRAY_SIZE(xt_statistic_match));
+	return xt_register_matches(statistic_mt_reg,
+	       ARRAY_SIZE(statistic_mt_reg));
 }
 
-static void __exit xt_statistic_fini(void)
+static void __exit statistic_mt_exit(void)
 {
-	xt_unregister_matches(xt_statistic_match,
-			      ARRAY_SIZE(xt_statistic_match));
+	xt_unregister_matches(statistic_mt_reg,
+	                      ARRAY_SIZE(statistic_mt_reg));
 }
 
-module_init(xt_statistic_init);
-module_exit(xt_statistic_fini);
+module_init(statistic_mt_init);
+module_exit(statistic_mt_exit);
diff --git a/net/netfilter/xt_string.c b/net/netfilter/xt_string.c
index 864133442cd..90287844489 100644
--- a/net/netfilter/xt_string.c
+++ b/net/netfilter/xt_string.c
@@ -21,14 +21,11 @@ MODULE_LICENSE("GPL");
 MODULE_ALIAS("ipt_string");
 MODULE_ALIAS("ip6t_string");
 
-static bool match(const struct sk_buff *skb,
-		  const struct net_device *in,
-		  const struct net_device *out,
-		  const struct xt_match *match,
-		  const void *matchinfo,
-		  int offset,
-		  unsigned int protoff,
-		  bool *hotdrop)
+static bool
+string_mt(const struct sk_buff *skb, const struct net_device *in,
+          const struct net_device *out, const struct xt_match *match,
+          const void *matchinfo, int offset, unsigned int protoff,
+          bool *hotdrop)
 {
 	const struct xt_string_info *conf = matchinfo;
 	struct ts_state state;
@@ -42,11 +39,10 @@ static bool match(const struct sk_buff *skb,
 
 #define STRING_TEXT_PRIV(m) ((struct xt_string_info *) m)
 
-static bool checkentry(const char *tablename,
-		       const void *ip,
-		       const struct xt_match *match,
-		       void *matchinfo,
-		       unsigned int hook_mask)
+static bool
+string_mt_check(const char *tablename, const void *ip,
+                const struct xt_match *match, void *matchinfo,
+                unsigned int hook_mask)
 {
 	struct xt_string_info *conf = matchinfo;
 	struct ts_config *ts_conf;
@@ -68,41 +64,41 @@ static bool checkentry(const char *tablename,
 	return true;
 }
 
-static void destroy(const struct xt_match *match, void *matchinfo)
+static void string_mt_destroy(const struct xt_match *match, void *matchinfo)
 {
 	textsearch_destroy(STRING_TEXT_PRIV(matchinfo)->config);
 }
 
-static struct xt_match xt_string_match[] __read_mostly = {
+static struct xt_match string_mt_reg[] __read_mostly = {
 	{
 		.name 		= "string",
 		.family		= AF_INET,
-		.checkentry	= checkentry,
-		.match 		= match,
-		.destroy 	= destroy,
+		.checkentry	= string_mt_check,
+		.match 		= string_mt,
+		.destroy 	= string_mt_destroy,
 		.matchsize	= sizeof(struct xt_string_info),
 		.me 		= THIS_MODULE
 	},
 	{
 		.name 		= "string",
 		.family		= AF_INET6,
-		.checkentry	= checkentry,
-		.match 		= match,
-		.destroy 	= destroy,
+		.checkentry	= string_mt_check,
+		.match 		= string_mt,
+		.destroy 	= string_mt_destroy,
 		.matchsize	= sizeof(struct xt_string_info),
 		.me 		= THIS_MODULE
 	},
 };
 
-static int __init xt_string_init(void)
+static int __init string_mt_init(void)
 {
-	return xt_register_matches(xt_string_match, ARRAY_SIZE(xt_string_match));
+	return xt_register_matches(string_mt_reg, ARRAY_SIZE(string_mt_reg));
 }
 
-static void __exit xt_string_fini(void)
+static void __exit string_mt_exit(void)
 {
-	xt_unregister_matches(xt_string_match, ARRAY_SIZE(xt_string_match));
+	xt_unregister_matches(string_mt_reg, ARRAY_SIZE(string_mt_reg));
 }
 
-module_init(xt_string_init);
-module_exit(xt_string_fini);
+module_init(string_mt_init);
+module_exit(string_mt_exit);
diff --git a/net/netfilter/xt_tcpmss.c b/net/netfilter/xt_tcpmss.c
index 84d401bfafa..2a3e4c30e50 100644
--- a/net/netfilter/xt_tcpmss.c
+++ b/net/netfilter/xt_tcpmss.c
@@ -25,14 +25,10 @@ MODULE_ALIAS("ipt_tcpmss");
 MODULE_ALIAS("ip6t_tcpmss");
 
 static bool
-match(const struct sk_buff *skb,
-      const struct net_device *in,
-      const struct net_device *out,
-      const struct xt_match *match,
-      const void *matchinfo,
-      int offset,
-      unsigned int protoff,
-      bool *hotdrop)
+tcpmss_mt(const struct sk_buff *skb, const struct net_device *in,
+          const struct net_device *out, const struct xt_match *match,
+          const void *matchinfo, int offset, unsigned int protoff,
+          bool *hotdrop)
 {
 	const struct xt_tcpmss_match_info *info = matchinfo;
 	struct tcphdr _tcph, *th;
@@ -82,11 +78,11 @@ dropit:
 	return false;
 }
 
-static struct xt_match xt_tcpmss_match[] __read_mostly = {
+static struct xt_match tcpmss_mt_reg[] __read_mostly = {
 	{
 		.name		= "tcpmss",
 		.family		= AF_INET,
-		.match		= match,
+		.match		= tcpmss_mt,
 		.matchsize	= sizeof(struct xt_tcpmss_match_info),
 		.proto		= IPPROTO_TCP,
 		.me		= THIS_MODULE,
@@ -94,23 +90,22 @@ static struct xt_match xt_tcpmss_match[] __read_mostly = {
 	{
 		.name		= "tcpmss",
 		.family		= AF_INET6,
-		.match		= match,
+		.match		= tcpmss_mt,
 		.matchsize	= sizeof(struct xt_tcpmss_match_info),
 		.proto		= IPPROTO_TCP,
 		.me		= THIS_MODULE,
 	},
 };
 
-static int __init xt_tcpmss_init(void)
+static int __init tcpmss_mt_init(void)
 {
-	return xt_register_matches(xt_tcpmss_match,
-				   ARRAY_SIZE(xt_tcpmss_match));
+	return xt_register_matches(tcpmss_mt_reg, ARRAY_SIZE(tcpmss_mt_reg));
 }
 
-static void __exit xt_tcpmss_fini(void)
+static void __exit tcpmss_mt_exit(void)
 {
-	xt_unregister_matches(xt_tcpmss_match, ARRAY_SIZE(xt_tcpmss_match));
+	xt_unregister_matches(tcpmss_mt_reg, ARRAY_SIZE(tcpmss_mt_reg));
 }
 
-module_init(xt_tcpmss_init);
-module_exit(xt_tcpmss_fini);
+module_init(tcpmss_mt_init);
+module_exit(tcpmss_mt_exit);
diff --git a/net/netfilter/xt_tcpudp.c b/net/netfilter/xt_tcpudp.c
index 223f9bded67..6be5f2df5a5 100644
--- a/net/netfilter/xt_tcpudp.c
+++ b/net/netfilter/xt_tcpudp.c
@@ -68,14 +68,9 @@ tcp_find_option(u_int8_t option,
 }
 
 static bool
-tcp_match(const struct sk_buff *skb,
-	  const struct net_device *in,
-	  const struct net_device *out,
-	  const struct xt_match *match,
-	  const void *matchinfo,
-	  int offset,
-	  unsigned int protoff,
-	  bool *hotdrop)
+tcp_mt(const struct sk_buff *skb, const struct net_device *in,
+       const struct net_device *out, const struct xt_match *match,
+       const void *matchinfo, int offset, unsigned int protoff, bool *hotdrop)
 {
 	struct tcphdr _tcph, *th;
 	const struct xt_tcp *tcpinfo = matchinfo;
@@ -134,11 +129,9 @@ tcp_match(const struct sk_buff *skb,
 
 /* Called when user tries to insert an entry of this type. */
 static bool
-tcp_checkentry(const char *tablename,
-	       const void *info,
-	       const struct xt_match *match,
-	       void *matchinfo,
-	       unsigned int hook_mask)
+tcp_mt_check(const char *tablename, const void *info,
+             const struct xt_match *match, void *matchinfo,
+             unsigned int hook_mask)
 {
 	const struct xt_tcp *tcpinfo = matchinfo;
 
@@ -147,14 +140,9 @@ tcp_checkentry(const char *tablename,
 }
 
 static bool
-udp_match(const struct sk_buff *skb,
-	  const struct net_device *in,
-	  const struct net_device *out,
-	  const struct xt_match *match,
-	  const void *matchinfo,
-	  int offset,
-	  unsigned int protoff,
-	  bool *hotdrop)
+udp_mt(const struct sk_buff *skb, const struct net_device *in,
+       const struct net_device *out, const struct xt_match *match,
+       const void *matchinfo, int offset, unsigned int protoff, bool *hotdrop)
 {
 	struct udphdr _udph, *uh;
 	const struct xt_udp *udpinfo = matchinfo;
@@ -182,11 +170,9 @@ udp_match(const struct sk_buff *skb,
 
 /* Called when user tries to insert an entry of this type. */
 static bool
-udp_checkentry(const char *tablename,
-	       const void *info,
-	       const struct xt_match *match,
-	       void *matchinfo,
-	       unsigned int hook_mask)
+udp_mt_check(const char *tablename, const void *info,
+             const struct xt_match *match, void *matchinfo,
+             unsigned int hook_mask)
 {
 	const struct xt_udp *udpinfo = matchinfo;
 
@@ -194,12 +180,12 @@ udp_checkentry(const char *tablename,
 	return !(udpinfo->invflags & ~XT_UDP_INV_MASK);
 }
 
-static struct xt_match xt_tcpudp_match[] __read_mostly = {
+static struct xt_match tcpudp_mt_reg[] __read_mostly = {
 	{
 		.name		= "tcp",
 		.family		= AF_INET,
-		.checkentry	= tcp_checkentry,
-		.match		= tcp_match,
+		.checkentry	= tcp_mt_check,
+		.match		= tcp_mt,
 		.matchsize	= sizeof(struct xt_tcp),
 		.proto		= IPPROTO_TCP,
 		.me		= THIS_MODULE,
@@ -207,8 +193,8 @@ static struct xt_match xt_tcpudp_match[] __read_mostly = {
 	{
 		.name		= "tcp",
 		.family		= AF_INET6,
-		.checkentry	= tcp_checkentry,
-		.match		= tcp_match,
+		.checkentry	= tcp_mt_check,
+		.match		= tcp_mt,
 		.matchsize	= sizeof(struct xt_tcp),
 		.proto		= IPPROTO_TCP,
 		.me		= THIS_MODULE,
@@ -216,8 +202,8 @@ static struct xt_match xt_tcpudp_match[] __read_mostly = {
 	{
 		.name		= "udp",
 		.family		= AF_INET,
-		.checkentry	= udp_checkentry,
-		.match		= udp_match,
+		.checkentry	= udp_mt_check,
+		.match		= udp_mt,
 		.matchsize	= sizeof(struct xt_udp),
 		.proto		= IPPROTO_UDP,
 		.me		= THIS_MODULE,
@@ -225,8 +211,8 @@ static struct xt_match xt_tcpudp_match[] __read_mostly = {
 	{
 		.name		= "udp",
 		.family		= AF_INET6,
-		.checkentry	= udp_checkentry,
-		.match		= udp_match,
+		.checkentry	= udp_mt_check,
+		.match		= udp_mt,
 		.matchsize	= sizeof(struct xt_udp),
 		.proto		= IPPROTO_UDP,
 		.me		= THIS_MODULE,
@@ -234,8 +220,8 @@ static struct xt_match xt_tcpudp_match[] __read_mostly = {
 	{
 		.name		= "udplite",
 		.family		= AF_INET,
-		.checkentry	= udp_checkentry,
-		.match		= udp_match,
+		.checkentry	= udp_mt_check,
+		.match		= udp_mt,
 		.matchsize	= sizeof(struct xt_udp),
 		.proto		= IPPROTO_UDPLITE,
 		.me		= THIS_MODULE,
@@ -243,24 +229,23 @@ static struct xt_match xt_tcpudp_match[] __read_mostly = {
 	{
 		.name		= "udplite",
 		.family		= AF_INET6,
-		.checkentry	= udp_checkentry,
-		.match		= udp_match,
+		.checkentry	= udp_mt_check,
+		.match		= udp_mt,
 		.matchsize	= sizeof(struct xt_udp),
 		.proto		= IPPROTO_UDPLITE,
 		.me		= THIS_MODULE,
 	},
 };
 
-static int __init xt_tcpudp_init(void)
+static int __init tcpudp_mt_init(void)
 {
-	return xt_register_matches(xt_tcpudp_match,
-				   ARRAY_SIZE(xt_tcpudp_match));
+	return xt_register_matches(tcpudp_mt_reg, ARRAY_SIZE(tcpudp_mt_reg));
 }
 
-static void __exit xt_tcpudp_fini(void)
+static void __exit tcpudp_mt_exit(void)
 {
-	xt_unregister_matches(xt_tcpudp_match, ARRAY_SIZE(xt_tcpudp_match));
+	xt_unregister_matches(tcpudp_mt_reg, ARRAY_SIZE(tcpudp_mt_reg));
 }
 
-module_init(xt_tcpudp_init);
-module_exit(xt_tcpudp_fini);
+module_init(tcpudp_mt_init);
+module_exit(tcpudp_mt_exit);
diff --git a/net/netfilter/xt_time.c b/net/netfilter/xt_time.c
index f9c55dcd894..96da93c9ece 100644
--- a/net/netfilter/xt_time.c
+++ b/net/netfilter/xt_time.c
@@ -147,11 +147,10 @@ static void localtime_3(struct xtm *r, time_t time)
 	return;
 }
 
-static bool xt_time_match(const struct sk_buff *skb,
-                          const struct net_device *in,
-                          const struct net_device *out,
-                          const struct xt_match *match, const void *matchinfo,
-                          int offset, unsigned int protoff, bool *hotdrop)
+static bool
+time_mt(const struct sk_buff *skb, const struct net_device *in,
+        const struct net_device *out, const struct xt_match *match,
+        const void *matchinfo, int offset, unsigned int protoff, bool *hotdrop)
 {
 	const struct xt_time_info *info = matchinfo;
 	unsigned int packet_time;
@@ -216,9 +215,10 @@ static bool xt_time_match(const struct sk_buff *skb,
 	return true;
 }
 
-static bool xt_time_check(const char *tablename, const void *ip,
-                          const struct xt_match *match, void *matchinfo,
-                          unsigned int hook_mask)
+static bool
+time_mt_check(const char *tablename, const void *ip,
+              const struct xt_match *match, void *matchinfo,
+              unsigned int hook_mask)
 {
 	struct xt_time_info *info = matchinfo;
 
@@ -232,37 +232,37 @@ static bool xt_time_check(const char *tablename, const void *ip,
 	return true;
 }
 
-static struct xt_match xt_time_reg[] __read_mostly = {
+static struct xt_match time_mt_reg[] __read_mostly = {
 	{
 		.name       = "time",
 		.family     = AF_INET,
-		.match      = xt_time_match,
+		.match      = time_mt,
 		.matchsize  = sizeof(struct xt_time_info),
-		.checkentry = xt_time_check,
+		.checkentry = time_mt_check,
 		.me         = THIS_MODULE,
 	},
 	{
 		.name       = "time",
 		.family     = AF_INET6,
-		.match      = xt_time_match,
+		.match      = time_mt,
 		.matchsize  = sizeof(struct xt_time_info),
-		.checkentry = xt_time_check,
+		.checkentry = time_mt_check,
 		.me         = THIS_MODULE,
 	},
 };
 
-static int __init xt_time_init(void)
+static int __init time_mt_init(void)
 {
-	return xt_register_matches(xt_time_reg, ARRAY_SIZE(xt_time_reg));
+	return xt_register_matches(time_mt_reg, ARRAY_SIZE(time_mt_reg));
 }
 
-static void __exit xt_time_exit(void)
+static void __exit time_mt_exit(void)
 {
-	xt_unregister_matches(xt_time_reg, ARRAY_SIZE(xt_time_reg));
+	xt_unregister_matches(time_mt_reg, ARRAY_SIZE(time_mt_reg));
 }
 
-module_init(xt_time_init);
-module_exit(xt_time_exit);
+module_init(time_mt_init);
+module_exit(time_mt_exit);
 MODULE_AUTHOR("Jan Engelhardt <jengelh@computergmbh.de>");
 MODULE_DESCRIPTION("netfilter time match");
 MODULE_LICENSE("GPL");
diff --git a/net/netfilter/xt_u32.c b/net/netfilter/xt_u32.c
index af75b8c3f20..3d8f5b3986a 100644
--- a/net/netfilter/xt_u32.c
+++ b/net/netfilter/xt_u32.c
@@ -88,11 +88,10 @@ static bool u32_match_it(const struct xt_u32 *data,
 	return true;
 }
 
-static bool u32_match(const struct sk_buff *skb,
-		      const struct net_device *in,
-		      const struct net_device *out,
-		      const struct xt_match *match, const void *matchinfo,
-		      int offset, unsigned int protoff, bool *hotdrop)
+static bool
+u32_mt(const struct sk_buff *skb, const struct net_device *in,
+       const struct net_device *out, const struct xt_match *match,
+       const void *matchinfo, int offset, unsigned int protoff, bool *hotdrop)
 {
 	const struct xt_u32 *data = matchinfo;
 	bool ret;
@@ -101,35 +100,35 @@ static bool u32_match(const struct sk_buff *skb,
 	return ret ^ data->invert;
 }
 
-static struct xt_match u32_reg[] __read_mostly = {
+static struct xt_match u32_mt_reg[] __read_mostly = {
 	{
 		.name       = "u32",
 		.family     = AF_INET,
-		.match      = u32_match,
+		.match      = u32_mt,
 		.matchsize  = sizeof(struct xt_u32),
 		.me         = THIS_MODULE,
 	},
 	{
 		.name       = "u32",
 		.family     = AF_INET6,
-		.match      = u32_match,
+		.match      = u32_mt,
 		.matchsize  = sizeof(struct xt_u32),
 		.me         = THIS_MODULE,
 	},
 };
 
-static int __init xt_u32_init(void)
+static int __init u32_mt_init(void)
 {
-	return xt_register_matches(u32_reg, ARRAY_SIZE(u32_reg));
+	return xt_register_matches(u32_mt_reg, ARRAY_SIZE(u32_mt_reg));
 }
 
-static void __exit xt_u32_exit(void)
+static void __exit u32_mt_exit(void)
 {
-	xt_unregister_matches(u32_reg, ARRAY_SIZE(u32_reg));
+	xt_unregister_matches(u32_mt_reg, ARRAY_SIZE(u32_mt_reg));
 }
 
-module_init(xt_u32_init);
-module_exit(xt_u32_exit);
+module_init(u32_mt_init);
+module_exit(u32_mt_exit);
 MODULE_AUTHOR("Jan Engelhardt <jengelh@computergmbh.de>");
 MODULE_DESCRIPTION("netfilter u32 match module");
 MODULE_LICENSE("GPL");
-- 
cgit v1.2.3


From 259d4e41f3ec25f22169daece42729f597b89f9a Mon Sep 17 00:00:00 2001
From: Eric Dumazet <dada1@cosmosbay.com>
Date: Tue, 4 Dec 2007 23:24:56 -0800
Subject: [NETFILTER]: x_tables: struct xt_table_info diet

Instead of using a big array of NR_CPUS entries, we can compute the size
needed at runtime, using nr_cpu_ids

This should save some ram (especially on David's machines where NR_CPUS=4096 :
32 KB can be saved per table, and 64KB for dynamically allocated ones (because
of slab/slub alignements) )

In particular, the 'bootstrap' tables are not any more static (in data
section) but on stack as their size is now very small.

This also should reduce the size used on stack in compat functions
(get_info() declares an automatic variable, that could be bigger than kernel
stack size for big NR_CPUS)

Signed-off-by: Eric Dumazet <dada1@cosmosbay.com>
Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/netfilter/arp_tables.c |  5 ++---
 net/ipv4/netfilter/ip_tables.c  | 24 +++++++++---------------
 net/ipv6/netfilter/ip6_tables.c |  5 ++---
 net/netfilter/x_tables.c        |  2 +-
 4 files changed, 14 insertions(+), 22 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c
index 2909c92ecd9..a21722d5c9f 100644
--- a/net/ipv4/netfilter/arp_tables.c
+++ b/net/ipv4/netfilter/arp_tables.c
@@ -811,8 +811,7 @@ static int do_replace(void __user *user, unsigned int len)
 		return -ENOPROTOOPT;
 
 	/* overflow check */
-	if (tmp.size >= (INT_MAX - sizeof(struct xt_table_info)) / NR_CPUS -
-			SMP_CACHE_BYTES)
+	if (tmp.size >= INT_MAX / num_possible_cpus())
 		return -ENOMEM;
 	if (tmp.num_counters >= INT_MAX / sizeof(struct xt_counters))
 		return -ENOMEM;
@@ -1090,7 +1089,7 @@ int arpt_register_table(struct arpt_table *table,
 {
 	int ret;
 	struct xt_table_info *newinfo;
-	static struct xt_table_info bootstrap
+	struct xt_table_info bootstrap
 		= { 0, 0, 0, { 0 }, { 0 }, { } };
 	void *loc_cpu_entry;
 
diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c
index ca23c63ced3..87d369244bd 100644
--- a/net/ipv4/netfilter/ip_tables.c
+++ b/net/ipv4/netfilter/ip_tables.c
@@ -1090,7 +1090,8 @@ compat_calc_match(struct ipt_entry_match *m, int * size)
 	return 0;
 }
 
-static int compat_calc_entry(struct ipt_entry *e, struct xt_table_info *info,
+static int compat_calc_entry(struct ipt_entry *e,
+		const struct xt_table_info *info,
 		void *base, struct xt_table_info *newinfo)
 {
 	struct ipt_entry_target *t;
@@ -1118,22 +1119,17 @@ static int compat_calc_entry(struct ipt_entry *e, struct xt_table_info *info,
 	return 0;
 }
 
-static int compat_table_info(struct xt_table_info *info,
+static int compat_table_info(const struct xt_table_info *info,
 		struct xt_table_info *newinfo)
 {
 	void *loc_cpu_entry;
-	int i;
 
 	if (!newinfo || !info)
 		return -EINVAL;
 
-	memset(newinfo, 0, sizeof(struct xt_table_info));
-	newinfo->size = info->size;
-	newinfo->number = info->number;
-	for (i = 0; i < NF_INET_NUMHOOKS; i++) {
-		newinfo->hook_entry[i] = info->hook_entry[i];
-		newinfo->underflow[i] = info->underflow[i];
-	}
+	/* we dont care about newinfo->entries[] */
+	memcpy(newinfo, info, offsetof(struct xt_table_info, entries));
+	newinfo->initial_entries = 0;
 	loc_cpu_entry = info->entries[raw_smp_processor_id()];
 	return IPT_ENTRY_ITERATE(loc_cpu_entry, info->size,
 			compat_calc_entry, info, loc_cpu_entry, newinfo);
@@ -1327,8 +1323,7 @@ do_replace(void __user *user, unsigned int len)
 		return -ENOPROTOOPT;
 
 	/* overflow check */
-	if (tmp.size >= (INT_MAX - sizeof(struct xt_table_info)) / NR_CPUS -
-			SMP_CACHE_BYTES)
+	if (tmp.size >= INT_MAX / num_possible_cpus())
 		return -ENOMEM;
 	if (tmp.num_counters >= INT_MAX / sizeof(struct xt_counters))
 		return -ENOMEM;
@@ -1868,8 +1863,7 @@ compat_do_replace(void __user *user, unsigned int len)
 		return -ENOPROTOOPT;
 
 	/* overflow check */
-	if (tmp.size >= (INT_MAX - sizeof(struct xt_table_info)) / NR_CPUS -
-			SMP_CACHE_BYTES)
+	if (tmp.size >= INT_MAX / num_possible_cpus())
 		return -ENOMEM;
 	if (tmp.num_counters >= INT_MAX / sizeof(struct xt_counters))
 		return -ENOMEM;
@@ -2126,7 +2120,7 @@ int ipt_register_table(struct xt_table *table, const struct ipt_replace *repl)
 {
 	int ret;
 	struct xt_table_info *newinfo;
-	static struct xt_table_info bootstrap
+	struct xt_table_info bootstrap
 		= { 0, 0, 0, { 0 }, { 0 }, { } };
 	void *loc_cpu_entry;
 
diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c
index e1e87eff468..e60c1b4b1ec 100644
--- a/net/ipv6/netfilter/ip6_tables.c
+++ b/net/ipv6/netfilter/ip6_tables.c
@@ -1042,8 +1042,7 @@ do_replace(void __user *user, unsigned int len)
 		return -EFAULT;
 
 	/* overflow check */
-	if (tmp.size >= (INT_MAX - sizeof(struct xt_table_info)) / NR_CPUS -
-			SMP_CACHE_BYTES)
+	if (tmp.size >= INT_MAX / num_possible_cpus())
 		return -ENOMEM;
 	if (tmp.num_counters >= INT_MAX / sizeof(struct xt_counters))
 		return -ENOMEM;
@@ -1339,7 +1338,7 @@ int ip6t_register_table(struct xt_table *table,
 {
 	int ret;
 	struct xt_table_info *newinfo;
-	static struct xt_table_info bootstrap
+	struct xt_table_info bootstrap
 		= { 0, 0, 0, { 0 }, { 0 }, { } };
 	void *loc_cpu_entry;
 
diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c
index b6160e41eb1..07bb465d951 100644
--- a/net/netfilter/x_tables.c
+++ b/net/netfilter/x_tables.c
@@ -499,7 +499,7 @@ struct xt_table_info *xt_alloc_table_info(unsigned int size)
 	if ((SMP_ALIGN(size) >> PAGE_SHIFT) + 2 > num_physpages)
 		return NULL;
 
-	newinfo = kzalloc(sizeof(struct xt_table_info), GFP_KERNEL);
+	newinfo = kzalloc(XT_TABLE_INFO_SZ, GFP_KERNEL);
 	if (!newinfo)
 		return NULL;
 
-- 
cgit v1.2.3


From 9e67d5a739327c44885adebb4f3a538050be73e4 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Tue, 4 Dec 2007 23:25:26 -0800
Subject: [NETFILTER]: x_tables: remove obsolete overflow check

We're not multiplying the size with the number of CPUs anymore, so the
check is obsolete.

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/netfilter/arp_tables.c | 2 --
 net/ipv4/netfilter/ip_tables.c  | 2 --
 net/ipv6/netfilter/ip6_tables.c | 2 --
 3 files changed, 6 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c
index a21722d5c9f..d5cae7e906c 100644
--- a/net/ipv4/netfilter/arp_tables.c
+++ b/net/ipv4/netfilter/arp_tables.c
@@ -811,8 +811,6 @@ static int do_replace(void __user *user, unsigned int len)
 		return -ENOPROTOOPT;
 
 	/* overflow check */
-	if (tmp.size >= INT_MAX / num_possible_cpus())
-		return -ENOMEM;
 	if (tmp.num_counters >= INT_MAX / sizeof(struct xt_counters))
 		return -ENOMEM;
 
diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c
index 87d369244bd..64ffe57ef1b 100644
--- a/net/ipv4/netfilter/ip_tables.c
+++ b/net/ipv4/netfilter/ip_tables.c
@@ -1323,8 +1323,6 @@ do_replace(void __user *user, unsigned int len)
 		return -ENOPROTOOPT;
 
 	/* overflow check */
-	if (tmp.size >= INT_MAX / num_possible_cpus())
-		return -ENOMEM;
 	if (tmp.num_counters >= INT_MAX / sizeof(struct xt_counters))
 		return -ENOMEM;
 
diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c
index e60c1b4b1ec..d3e884a5c6a 100644
--- a/net/ipv6/netfilter/ip6_tables.c
+++ b/net/ipv6/netfilter/ip6_tables.c
@@ -1042,8 +1042,6 @@ do_replace(void __user *user, unsigned int len)
 		return -EFAULT;
 
 	/* overflow check */
-	if (tmp.size >= INT_MAX / num_possible_cpus())
-		return -ENOMEM;
 	if (tmp.num_counters >= INT_MAX / sizeof(struct xt_counters))
 		return -ENOMEM;
 
-- 
cgit v1.2.3


From 0265ab44bacc1a1e0e3f5873d8ca2d5a29e33db2 Mon Sep 17 00:00:00 2001
From: Jan Engelhardt <jengelh@computergmbh.de>
Date: Tue, 4 Dec 2007 23:27:38 -0800
Subject: [NETFILTER]: merge ipt_owner/ip6t_owner in xt_owner

xt_owner merges ipt_owner and ip6t_owner, and adds a flag to match
on socket (non-)existence.

Signed-off-by: Jan Engelhardt <jengelh@computergmbh.de>
Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/netfilter/Kconfig      |   9 --
 net/ipv4/netfilter/Makefile     |   1 -
 net/ipv4/netfilter/ipt_owner.c  |  87 -----------------
 net/ipv6/netfilter/Kconfig      |   9 --
 net/ipv6/netfilter/Makefile     |   1 -
 net/ipv6/netfilter/ip6t_owner.c |  87 -----------------
 net/netfilter/Kconfig           |   8 ++
 net/netfilter/Makefile          |   1 +
 net/netfilter/xt_owner.c        | 211 ++++++++++++++++++++++++++++++++++++++++
 9 files changed, 220 insertions(+), 194 deletions(-)
 delete mode 100644 net/ipv4/netfilter/ipt_owner.c
 delete mode 100644 net/ipv6/netfilter/ip6t_owner.c
 create mode 100644 net/netfilter/xt_owner.c

(limited to 'net')

diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig
index 9aca9c55687..6c563d908c7 100644
--- a/net/ipv4/netfilter/Kconfig
+++ b/net/ipv4/netfilter/Kconfig
@@ -111,15 +111,6 @@ config IP_NF_MATCH_TTL
 
 	  To compile it as a module, choose M here.  If unsure, say N.
 
-config IP_NF_MATCH_OWNER
-	tristate "Owner match support"
-	depends on IP_NF_IPTABLES
-	help
-	  Packet owner matching allows you to match locally-generated packets
-	  based on who created them: the user, group, process or session.
-
-	  To compile it as a module, choose M here.  If unsure, say N.
-
 config IP_NF_MATCH_ADDRTYPE
 	tristate  'address type match support'
 	depends on IP_NF_IPTABLES
diff --git a/net/ipv4/netfilter/Makefile b/net/ipv4/netfilter/Makefile
index 7456833d6ad..42199e93b86 100644
--- a/net/ipv4/netfilter/Makefile
+++ b/net/ipv4/netfilter/Makefile
@@ -45,7 +45,6 @@ obj-$(CONFIG_IP_NF_MATCH_ADDRTYPE) += ipt_addrtype.o
 obj-$(CONFIG_IP_NF_MATCH_AH) += ipt_ah.o
 obj-$(CONFIG_IP_NF_MATCH_ECN) += ipt_ecn.o
 obj-$(CONFIG_IP_NF_MATCH_IPRANGE) += ipt_iprange.o
-obj-$(CONFIG_IP_NF_MATCH_OWNER) += ipt_owner.o
 obj-$(CONFIG_IP_NF_MATCH_RECENT) += ipt_recent.o
 obj-$(CONFIG_IP_NF_MATCH_TOS) += ipt_tos.o
 obj-$(CONFIG_IP_NF_MATCH_TTL) += ipt_ttl.o
diff --git a/net/ipv4/netfilter/ipt_owner.c b/net/ipv4/netfilter/ipt_owner.c
deleted file mode 100644
index 4f1aa897d4b..00000000000
--- a/net/ipv4/netfilter/ipt_owner.c
+++ /dev/null
@@ -1,87 +0,0 @@
-/* Kernel module to match various things tied to sockets associated with
-   locally generated outgoing packets. */
-
-/* (C) 2000 Marc Boucher <marc@mbsi.ca>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#include <linux/module.h>
-#include <linux/skbuff.h>
-#include <linux/file.h>
-#include <linux/rcupdate.h>
-#include <net/sock.h>
-
-#include <linux/netfilter_ipv4/ipt_owner.h>
-#include <linux/netfilter/x_tables.h>
-
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Marc Boucher <marc@mbsi.ca>");
-MODULE_DESCRIPTION("iptables owner match");
-
-static bool
-owner_mt(const struct sk_buff *skb, const struct net_device *in,
-         const struct net_device *out, const struct xt_match *match,
-         const void *matchinfo, int offset, unsigned int protoff,
-         bool *hotdrop)
-{
-	const struct ipt_owner_info *info = matchinfo;
-
-	if (!skb->sk || !skb->sk->sk_socket || !skb->sk->sk_socket->file)
-		return false;
-
-	if(info->match & IPT_OWNER_UID) {
-		if ((skb->sk->sk_socket->file->f_uid != info->uid) ^
-		    !!(info->invert & IPT_OWNER_UID))
-			return false;
-	}
-
-	if(info->match & IPT_OWNER_GID) {
-		if ((skb->sk->sk_socket->file->f_gid != info->gid) ^
-		    !!(info->invert & IPT_OWNER_GID))
-			return false;
-	}
-
-	return true;
-}
-
-static bool
-owner_mt_check(const char *tablename, const void *ip,
-               const struct xt_match *match, void *matchinfo,
-               unsigned int hook_mask)
-{
-	const struct ipt_owner_info *info = matchinfo;
-
-	if (info->match & (IPT_OWNER_PID|IPT_OWNER_SID|IPT_OWNER_COMM)) {
-		printk("ipt_owner: pid, sid and command matching "
-		       "not supported anymore\n");
-		return false;
-	}
-	return true;
-}
-
-static struct xt_match owner_mt_reg __read_mostly = {
-	.name		= "owner",
-	.family		= AF_INET,
-	.match		= owner_mt,
-	.matchsize	= sizeof(struct ipt_owner_info),
-	.hooks		= (1 << NF_INET_LOCAL_OUT) |
-			  (1 << NF_INET_POST_ROUTING),
-	.checkentry	= owner_mt_check,
-	.me		= THIS_MODULE,
-};
-
-static int __init owner_mt_init(void)
-{
-	return xt_register_match(&owner_mt_reg);
-}
-
-static void __exit owner_mt_exit(void)
-{
-	xt_unregister_match(&owner_mt_reg);
-}
-
-module_init(owner_mt_init);
-module_exit(owner_mt_exit);
diff --git a/net/ipv6/netfilter/Kconfig b/net/ipv6/netfilter/Kconfig
index 838b8ddee8c..30d48529d98 100644
--- a/net/ipv6/netfilter/Kconfig
+++ b/net/ipv6/netfilter/Kconfig
@@ -89,15 +89,6 @@ config IP6_NF_MATCH_HL
 
 	  To compile it as a module, choose M here.  If unsure, say N.
 
-config IP6_NF_MATCH_OWNER
-	tristate "Owner match support"
-	depends on IP6_NF_IPTABLES
-	help
-	  Packet owner matching allows you to match locally-generated packets
-	  based on who created them: the user, group, process or session.
-
-	  To compile it as a module, choose M here.  If unsure, say N.
-
 config IP6_NF_MATCH_IPV6HEADER
 	tristate "IPv6 Extension Headers Match"
 	depends on IP6_NF_IPTABLES
diff --git a/net/ipv6/netfilter/Makefile b/net/ipv6/netfilter/Makefile
index e789ec44d23..fbf2c14ed88 100644
--- a/net/ipv6/netfilter/Makefile
+++ b/net/ipv6/netfilter/Makefile
@@ -23,7 +23,6 @@ obj-$(CONFIG_IP6_NF_MATCH_HL) += ip6t_hl.o
 obj-$(CONFIG_IP6_NF_MATCH_IPV6HEADER) += ip6t_ipv6header.o
 obj-$(CONFIG_IP6_NF_MATCH_MH) += ip6t_mh.o
 obj-$(CONFIG_IP6_NF_MATCH_OPTS) += ip6t_hbh.o
-obj-$(CONFIG_IP6_NF_MATCH_OWNER) += ip6t_owner.o
 obj-$(CONFIG_IP6_NF_MATCH_RT) += ip6t_rt.o
 
 # targets
diff --git a/net/ipv6/netfilter/ip6t_owner.c b/net/ipv6/netfilter/ip6t_owner.c
deleted file mode 100644
index 6a52ed98516..00000000000
--- a/net/ipv6/netfilter/ip6t_owner.c
+++ /dev/null
@@ -1,87 +0,0 @@
-/* Kernel module to match various things tied to sockets associated with
-   locally generated outgoing packets. */
-
-/* (C) 2000-2001 Marc Boucher <marc@mbsi.ca>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#include <linux/module.h>
-#include <linux/skbuff.h>
-#include <linux/file.h>
-#include <linux/rcupdate.h>
-#include <net/sock.h>
-
-#include <linux/netfilter_ipv6/ip6t_owner.h>
-#include <linux/netfilter_ipv6/ip6_tables.h>
-#include <linux/netfilter/x_tables.h>
-
-MODULE_AUTHOR("Marc Boucher <marc@mbsi.ca>");
-MODULE_DESCRIPTION("IP6 tables owner matching module");
-MODULE_LICENSE("GPL");
-
-
-static bool
-owner_mt6(const struct sk_buff *skb, const struct net_device *in,
-          const struct net_device *out, const struct xt_match *match,
-          const void *matchinfo, int offset, unsigned int protoff,
-          bool *hotdrop)
-{
-	const struct ip6t_owner_info *info = matchinfo;
-
-	if (!skb->sk || !skb->sk->sk_socket || !skb->sk->sk_socket->file)
-		return false;
-
-	if (info->match & IP6T_OWNER_UID)
-		if ((skb->sk->sk_socket->file->f_uid != info->uid) ^
-		    !!(info->invert & IP6T_OWNER_UID))
-			return false;
-
-	if (info->match & IP6T_OWNER_GID)
-		if ((skb->sk->sk_socket->file->f_gid != info->gid) ^
-		    !!(info->invert & IP6T_OWNER_GID))
-			return false;
-
-	return true;
-}
-
-static bool
-owner_mt6_check(const char *tablename, const void *ip,
-                const struct xt_match *match, void *matchinfo,
-                unsigned int hook_mask)
-{
-	const struct ip6t_owner_info *info = matchinfo;
-
-	if (info->match & (IP6T_OWNER_PID | IP6T_OWNER_SID)) {
-		printk("ipt_owner: pid and sid matching "
-		       "not supported anymore\n");
-		return false;
-	}
-	return true;
-}
-
-static struct xt_match owner_mt6_reg __read_mostly = {
-	.name		= "owner",
-	.family		= AF_INET6,
-	.match		= owner_mt6,
-	.matchsize	= sizeof(struct ip6t_owner_info),
-	.hooks		= (1 << NF_INET_LOCAL_OUT) |
-			  (1 << NF_INET_POST_ROUTING),
-	.checkentry	= owner_mt6_check,
-	.me		= THIS_MODULE,
-};
-
-static int __init owner_mt6_init(void)
-{
-	return xt_register_match(&owner_mt6_reg);
-}
-
-static void __exit owner_mt6_exit(void)
-{
-	xt_unregister_match(&owner_mt6_reg);
-}
-
-module_init(owner_mt6_init);
-module_exit(owner_mt6_exit);
diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig
index 693f861a03b..4bc0552b75f 100644
--- a/net/netfilter/Kconfig
+++ b/net/netfilter/Kconfig
@@ -554,6 +554,14 @@ config NETFILTER_XT_MATCH_MARK
 
 	  To compile it as a module, choose M here.  If unsure, say N.
 
+config NETFILTER_XT_MATCH_OWNER
+	tristate '"owner" match support'
+	depends on NETFILTER_XTABLES
+	---help---
+	Socket owner matching allows you to match locally-generated packets
+	based on who created the socket: the user or group. It is also
+	possible to check whether a socket actually exists.
+
 config NETFILTER_XT_MATCH_POLICY
 	tristate 'IPsec "policy" match support'
 	depends on NETFILTER_XTABLES && XFRM
diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile
index 7763dea17be..28f59a35aee 100644
--- a/net/netfilter/Makefile
+++ b/net/netfilter/Makefile
@@ -67,6 +67,7 @@ obj-$(CONFIG_NETFILTER_XT_MATCH_LIMIT) += xt_limit.o
 obj-$(CONFIG_NETFILTER_XT_MATCH_MAC) += xt_mac.o
 obj-$(CONFIG_NETFILTER_XT_MATCH_MARK) += xt_mark.o
 obj-$(CONFIG_NETFILTER_XT_MATCH_MULTIPORT) += xt_multiport.o
+obj-$(CONFIG_NETFILTER_XT_MATCH_OWNER) += xt_owner.o
 obj-$(CONFIG_NETFILTER_XT_MATCH_PHYSDEV) += xt_physdev.o
 obj-$(CONFIG_NETFILTER_XT_MATCH_PKTTYPE) += xt_pkttype.o
 obj-$(CONFIG_NETFILTER_XT_MATCH_POLICY) += xt_policy.o
diff --git a/net/netfilter/xt_owner.c b/net/netfilter/xt_owner.c
new file mode 100644
index 00000000000..4222fa2c1b1
--- /dev/null
+++ b/net/netfilter/xt_owner.c
@@ -0,0 +1,211 @@
+/*
+ * Kernel module to match various things tied to sockets associated with
+ * locally generated outgoing packets.
+ *
+ * (C) 2000 Marc Boucher <marc@mbsi.ca>
+ *
+ * Copyright © CC Computer Consultants GmbH, 2007
+ * Contact: <jengelh@computergmbh.de>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <linux/file.h>
+#include <net/sock.h>
+#include <linux/netfilter/x_tables.h>
+#include <linux/netfilter/xt_owner.h>
+#include <linux/netfilter_ipv4/ipt_owner.h>
+#include <linux/netfilter_ipv6/ip6t_owner.h>
+
+static bool
+owner_mt_v0(const struct sk_buff *skb, const struct net_device *in,
+            const struct net_device *out, const struct xt_match *match,
+            const void *matchinfo, int offset, unsigned int protoff,
+            bool *hotdrop)
+{
+	const struct ipt_owner_info *info = matchinfo;
+	const struct file *filp;
+
+	if (skb->sk == NULL || skb->sk->sk_socket == NULL)
+		return false;
+
+	filp = skb->sk->sk_socket->file;
+	if (filp == NULL)
+		return false;
+
+	if (info->match & IPT_OWNER_UID)
+		if ((filp->f_uid != info->uid) ^
+		    !!(info->invert & IPT_OWNER_UID))
+			return false;
+
+	if (info->match & IPT_OWNER_GID)
+		if ((filp->f_gid != info->gid) ^
+		    !!(info->invert & IPT_OWNER_GID))
+			return false;
+
+	return true;
+}
+
+static bool
+owner_mt6_v0(const struct sk_buff *skb, const struct net_device *in,
+             const struct net_device *out, const struct xt_match *match,
+             const void *matchinfo, int offset, unsigned int protoff,
+             bool *hotdrop)
+{
+	const struct ip6t_owner_info *info = matchinfo;
+	const struct file *filp;
+
+	if (skb->sk == NULL || skb->sk->sk_socket == NULL)
+		return false;
+
+	filp = skb->sk->sk_socket->file;
+	if (filp == NULL)
+		return false;
+
+	if (info->match & IP6T_OWNER_UID)
+		if ((filp->f_uid != info->uid) ^
+		    !!(info->invert & IP6T_OWNER_UID))
+			return false;
+
+	if (info->match & IP6T_OWNER_GID)
+		if ((filp->f_gid != info->gid) ^
+		    !!(info->invert & IP6T_OWNER_GID))
+			return false;
+
+	return true;
+}
+
+static bool
+owner_mt(const struct sk_buff *skb, const struct net_device *in,
+         const struct net_device *out, const struct xt_match *match,
+         const void *matchinfo, int offset, unsigned int protoff,
+         bool *hotdrop)
+{
+	const struct xt_owner_match_info *info = matchinfo;
+	const struct file *filp;
+
+	if (skb->sk == NULL || skb->sk->sk_socket == NULL)
+		return (info->match ^ info->invert) == 0;
+	else if (info->match & info->invert & XT_OWNER_SOCKET)
+		/*
+		 * Socket exists but user wanted ! --socket-exists.
+		 * (Single ampersands intended.)
+		 */
+		return false;
+
+	filp = skb->sk->sk_socket->file;
+	if (filp == NULL)
+		return ((info->match ^ info->invert) &
+		       (XT_OWNER_UID | XT_OWNER_GID)) == 0;
+
+	if (info->match & XT_OWNER_UID)
+		if ((filp->f_uid != info->uid) ^
+		    !!(info->invert & XT_OWNER_UID))
+			return false;
+
+	if (info->match & XT_OWNER_GID)
+		if ((filp->f_gid != info->gid) ^
+		    !!(info->invert & XT_OWNER_GID))
+			return false;
+
+	return true;
+}
+
+static bool
+owner_mt_check_v0(const char *tablename, const void *ip,
+                  const struct xt_match *match, void *matchinfo,
+                  unsigned int hook_mask)
+{
+	const struct ipt_owner_info *info = matchinfo;
+
+	if (info->match & (IPT_OWNER_PID | IPT_OWNER_SID | IPT_OWNER_COMM)) {
+		printk(KERN_WARNING KBUILD_MODNAME
+		       ": PID, SID and command matching is not "
+		       "supported anymore\n");
+		return false;
+	}
+
+	return true;
+}
+
+static bool
+owner_mt6_check_v0(const char *tablename, const void *ip,
+                   const struct xt_match *match, void *matchinfo,
+                   unsigned int hook_mask)
+{
+	const struct ip6t_owner_info *info = matchinfo;
+
+	if (info->match & (IP6T_OWNER_PID | IP6T_OWNER_SID)) {
+		printk(KERN_WARNING KBUILD_MODNAME
+		       ": PID and SID matching is not supported anymore\n");
+		return false;
+	}
+
+	return true;
+}
+
+static struct xt_match owner_mt_reg[] __read_mostly = {
+	{
+		.name       = "owner",
+		.revision   = 0,
+		.family     = AF_INET,
+		.match      = owner_mt_v0,
+		.matchsize  = sizeof(struct ipt_owner_info),
+		.checkentry = owner_mt_check_v0,
+		.hooks      = (1 << NF_INET_LOCAL_OUT) |
+		              (1 << NF_INET_POST_ROUTING),
+		.me         = THIS_MODULE,
+	},
+	{
+		.name       = "owner",
+		.revision   = 0,
+		.family     = AF_INET6,
+		.match      = owner_mt6_v0,
+		.matchsize  = sizeof(struct ip6t_owner_info),
+		.checkentry = owner_mt6_check_v0,
+		.hooks      = (1 << NF_INET_LOCAL_OUT) |
+		              (1 << NF_INET_POST_ROUTING),
+		.me         = THIS_MODULE,
+	},
+	{
+		.name       = "owner",
+		.revision   = 1,
+		.family     = AF_INET,
+		.match      = owner_mt,
+		.matchsize  = sizeof(struct xt_owner_match_info),
+		.hooks      = (1 << NF_INET_LOCAL_OUT) |
+		              (1 << NF_INET_POST_ROUTING),
+		.me         = THIS_MODULE,
+	},
+	{
+		.name       = "owner",
+		.revision   = 1,
+		.family     = AF_INET6,
+		.match      = owner_mt,
+		.matchsize  = sizeof(struct xt_owner_match_info),
+		.hooks      = (1 << NF_INET_LOCAL_OUT) |
+		              (1 << NF_INET_POST_ROUTING),
+		.me         = THIS_MODULE,
+	},
+};
+
+static int __init owner_mt_init(void)
+{
+	return xt_register_matches(owner_mt_reg, ARRAY_SIZE(owner_mt_reg));
+}
+
+static void __exit owner_mt_exit(void)
+{
+	xt_unregister_matches(owner_mt_reg, ARRAY_SIZE(owner_mt_reg));
+}
+
+module_init(owner_mt_init);
+module_exit(owner_mt_exit);
+MODULE_AUTHOR("Jan Engelhardt <jengelh@computergmbh.de>");
+MODULE_DESCRIPTION("netfilter \"owner\" match module");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS("ipt_owner");
+MODULE_ALIAS("ip6t_owner");
-- 
cgit v1.2.3


From 0553811612a6178365f3b062c30234913b218a96 Mon Sep 17 00:00:00 2001
From: Laszlo Attila Toth <panther@balabit.hu>
Date: Tue, 4 Dec 2007 23:28:46 -0800
Subject: [IPV4]: Add inet_dev_addr_type()

Address type search can be limited to an interface by
inet_dev_addr_type function.

Signed-off-by: Laszlo Attila Toth <panther@balabit.hu>
Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/fib_frontend.c | 21 +++++++++++++++++++--
 1 file changed, 19 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index e02aba5fa13..d90b42f3630 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -155,7 +155,12 @@ out:
 	return dev;
 }
 
-unsigned inet_addr_type(__be32 addr)
+/*
+ * Find address type as if only "dev" was present in the system. If
+ * on_dev is NULL then all interfaces are taken into consideration.
+ */
+static inline unsigned __inet_dev_addr_type(const struct net_device *dev,
+					    __be32 addr)
 {
 	struct flowi		fl = { .nl_u = { .ip4_u = { .daddr = addr } } };
 	struct fib_result	res;
@@ -175,13 +180,24 @@ unsigned inet_addr_type(__be32 addr)
 	if (local_table) {
 		ret = RTN_UNICAST;
 		if (!local_table->tb_lookup(local_table, &fl, &res)) {
-			ret = res.type;
+			if (!dev || dev == res.fi->fib_dev)
+				ret = res.type;
 			fib_res_put(&res);
 		}
 	}
 	return ret;
 }
 
+unsigned int inet_addr_type(__be32 addr)
+{
+	return __inet_dev_addr_type(NULL, addr);
+}
+
+unsigned int inet_dev_addr_type(const struct net_device *dev, __be32 addr)
+{
+       return __inet_dev_addr_type(dev, addr);
+}
+
 /* Given (packet source, input interface) and optional (dst, oif, tos):
    - (main) check, that source is valid i.e. not broadcast or our local
      address.
@@ -940,4 +956,5 @@ void __init ip_fib_init(void)
 }
 
 EXPORT_SYMBOL(inet_addr_type);
+EXPORT_SYMBOL(inet_dev_addr_type);
 EXPORT_SYMBOL(ip_dev_find);
-- 
cgit v1.2.3


From e2cf5ecbea861ff05105bbd40f4f0d7823d9e213 Mon Sep 17 00:00:00 2001
From: Laszlo Attila Toth <panther@balabit.hu>
Date: Tue, 4 Dec 2007 23:30:18 -0800
Subject: [NETFILTER]: ipt_addrtype: limit address type checking to an
 interface

Addrtype match has a new revision (1), which lets address type checking
limited to the interface the current packet belongs to. Either incoming
or outgoing interface can be used depending on the current hook. In the
FORWARD hook two maches should be used if both interfaces have to be checked.
The new structure is ipt_addrtype_info_v1.

Revision 0 lets older userspace programs use the match as earlier.
ipt_addrtype_info is used.

Signed-off-by: Laszlo Attila Toth <panther@balabit.hu>
Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/netfilter/ipt_addrtype.c | 104 ++++++++++++++++++++++++++++++++------
 1 file changed, 88 insertions(+), 16 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/netfilter/ipt_addrtype.c b/net/ipv4/netfilter/ipt_addrtype.c
index b75421c5e08..14394c6a3c2 100644
--- a/net/ipv4/netfilter/ipt_addrtype.c
+++ b/net/ipv4/netfilter/ipt_addrtype.c
@@ -2,6 +2,7 @@
  *  iptables module to match inet_addr_type() of an ip.
  *
  *  Copyright (c) 2004 Patrick McHardy <kaber@trash.net>
+ *  (C) 2007 Laszlo Attila Toth <panther@balabit.hu>
  *
  *  This program is free software; you can redistribute it and/or modify
  *  it under the terms of the GNU General Public License version 2 as
@@ -22,45 +23,116 @@ MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
 MODULE_DESCRIPTION("iptables addrtype match");
 
-static inline bool match_type(__be32 addr, u_int16_t mask)
+static inline bool match_type(const struct net_device *dev, __be32 addr,
+			      u_int16_t mask)
 {
-	return !!(mask & (1 << inet_addr_type(addr)));
+	return !!(mask & (1 << inet_dev_addr_type(dev, addr)));
 }
 
 static bool
-addrtype_mt(const struct sk_buff *skb, const struct net_device *in,
-            const struct net_device *out, const struct xt_match *match,
-            const void *matchinfo, int offset, unsigned int protoff,
-            bool *hotdrop)
+addrtype_mt_v0(const struct sk_buff *skb, const struct net_device *in,
+	       const struct net_device *out, const struct xt_match *match,
+	       const void *matchinfo, int offset, unsigned int protoff,
+	       bool *hotdrop)
 {
 	const struct ipt_addrtype_info *info = matchinfo;
 	const struct iphdr *iph = ip_hdr(skb);
 	bool ret = true;
 
 	if (info->source)
-		ret &= match_type(iph->saddr, info->source)^info->invert_source;
+		ret &= match_type(NULL, iph->saddr, info->source) ^
+		       info->invert_source;
 	if (info->dest)
-		ret &= match_type(iph->daddr, info->dest)^info->invert_dest;
+		ret &= match_type(NULL, iph->daddr, info->dest) ^
+		       info->invert_dest;
 
 	return ret;
 }
 
-static struct xt_match addrtype_mt_reg __read_mostly = {
-	.name		= "addrtype",
-	.family		= AF_INET,
-	.match		= addrtype_mt,
-	.matchsize	= sizeof(struct ipt_addrtype_info),
-	.me		= THIS_MODULE
+static bool
+addrtype_mt_v1(const struct sk_buff *skb, const struct net_device *in,
+	       const struct net_device *out, const struct xt_match *match,
+	       const void *matchinfo, int offset, unsigned int protoff,
+	       bool *hotdrop)
+{
+	const struct ipt_addrtype_info_v1 *info = matchinfo;
+	const struct iphdr *iph = ip_hdr(skb);
+	const struct net_device *dev = NULL;
+	bool ret = true;
+
+	if (info->flags & IPT_ADDRTYPE_LIMIT_IFACE_IN)
+		dev = in;
+	else if (info->flags & IPT_ADDRTYPE_LIMIT_IFACE_OUT)
+		dev = out;
+
+	if (info->source)
+		ret &= match_type(dev, iph->saddr, info->source) ^
+		       (info->flags & IPT_ADDRTYPE_INVERT_SOURCE);
+	if (ret && info->dest)
+		ret &= match_type(dev, iph->daddr, info->dest) ^
+		       (info->flags & IPT_ADDRTYPE_INVERT_DEST);
+	return ret;
+}
+
+static bool
+addrtype_mt_checkentry_v1(const char *tablename, const void *ip_void,
+			  const struct xt_match *match, void *matchinfo,
+			  unsigned int hook_mask)
+{
+	struct ipt_addrtype_info_v1 *info = matchinfo;
+
+	if (info->flags & IPT_ADDRTYPE_LIMIT_IFACE_IN &&
+	    info->flags & IPT_ADDRTYPE_LIMIT_IFACE_OUT) {
+		printk(KERN_ERR "ipt_addrtype: both incoming and outgoing "
+				"interface limitation cannot be selected\n");
+		return false;
+	}
+
+	if (hook_mask & (1 << NF_INET_PRE_ROUTING | 1 << NF_INET_LOCAL_IN) &&
+	    info->flags & IPT_ADDRTYPE_LIMIT_IFACE_OUT) {
+		printk(KERN_ERR "ipt_addrtype: output interface limitation "
+				"not valid in PRE_ROUTING and INPUT\n");
+		return false;
+	}
+
+	if (hook_mask & (1 << NF_INET_POST_ROUTING | 1 << NF_INET_LOCAL_OUT) &&
+	    info->flags & IPT_ADDRTYPE_LIMIT_IFACE_IN) {
+		printk(KERN_ERR "ipt_addrtype: input interface limitation "
+				"not valid in POST_ROUTING and OUTPUT\n");
+		return false;
+	}
+
+	return true;
+}
+
+static struct xt_match addrtype_mt_reg[] __read_mostly = {
+	{
+		.name		= "addrtype",
+		.family		= AF_INET,
+		.match		= addrtype_mt_v0,
+		.matchsize	= sizeof(struct ipt_addrtype_info),
+		.me		= THIS_MODULE
+	},
+	{
+		.name		= "addrtype",
+		.family		= AF_INET,
+		.revision	= 1,
+		.match		= addrtype_mt_v1,
+		.checkentry	= addrtype_mt_checkentry_v1,
+		.matchsize	= sizeof(struct ipt_addrtype_info_v1),
+		.me		= THIS_MODULE
+	}
 };
 
 static int __init addrtype_mt_init(void)
 {
-	return xt_register_match(&addrtype_mt_reg);
+	return xt_register_matches(addrtype_mt_reg,
+				   ARRAY_SIZE(addrtype_mt_reg));
 }
 
 static void __exit addrtype_mt_exit(void)
 {
-	xt_unregister_match(&addrtype_mt_reg);
+	xt_unregister_matches(addrtype_mt_reg, ARRAY_SIZE(addrtype_mt_reg));
 }
 
 module_init(addrtype_mt_init);
-- 
cgit v1.2.3


From 4c37799ccf6c722e0dad6a0677af22d1c23fb897 Mon Sep 17 00:00:00 2001
From: Jan Engelhardt <jengelh@computergmbh.de>
Date: Tue, 4 Dec 2007 23:31:59 -0800
Subject: [NETFILTER]: Use lowercase names for matches in Kconfig

Unify netfilter match kconfig descriptions

Consistently use lowercase for matches in kconfig one-line
descriptions and name the match module.

Signed-off-by: Jan Engelhardt <jengelh@computergmbh.de>
Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/netfilter/Kconfig | 12 ++++++------
 net/ipv6/netfilter/Kconfig | 16 ++++++++--------
 net/netfilter/Kconfig      |  8 ++++----
 3 files changed, 18 insertions(+), 18 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig
index 6c563d908c7..244e91daf04 100644
--- a/net/ipv4/netfilter/Kconfig
+++ b/net/ipv4/netfilter/Kconfig
@@ -55,7 +55,7 @@ config IP_NF_IPTABLES
 
 # The matches.
 config IP_NF_MATCH_IPRANGE
-	tristate "IP range match support"
+	tristate '"iprange" match support'
 	depends on IP_NF_IPTABLES
 	help
 	  This option makes possible to match IP addresses against IP address
@@ -73,7 +73,7 @@ config IP_NF_MATCH_TOS
 	  To compile it as a module, choose M here.  If unsure, say N.
 
 config IP_NF_MATCH_RECENT
-	tristate "recent match support"
+	tristate '"recent" match support'
 	depends on IP_NF_IPTABLES
 	help
 	  This match is used for creating one or many lists of recently
@@ -85,7 +85,7 @@ config IP_NF_MATCH_RECENT
 	  To compile it as a module, choose M here.  If unsure, say N.
 
 config IP_NF_MATCH_ECN
-	tristate "ECN match support"
+	tristate '"ecn" match support'
 	depends on IP_NF_IPTABLES
 	help
 	  This option adds a `ECN' match, which allows you to match against
@@ -94,7 +94,7 @@ config IP_NF_MATCH_ECN
 	  To compile it as a module, choose M here.  If unsure, say N.
 
 config IP_NF_MATCH_AH
-	tristate "AH match support"
+	tristate '"ah" match support'
 	depends on IP_NF_IPTABLES
 	help
 	  This match extension allows you to match a range of SPIs
@@ -103,7 +103,7 @@ config IP_NF_MATCH_AH
 	  To compile it as a module, choose M here.  If unsure, say N.
 
 config IP_NF_MATCH_TTL
-	tristate "TTL match support"
+	tristate '"ttl" match support'
 	depends on IP_NF_IPTABLES
 	help
 	  This adds CONFIG_IP_NF_MATCH_TTL option, which enabled the user
@@ -112,7 +112,7 @@ config IP_NF_MATCH_TTL
 	  To compile it as a module, choose M here.  If unsure, say N.
 
 config IP_NF_MATCH_ADDRTYPE
-	tristate  'address type match support'
+	tristate '"addrtype" address type match support'
 	depends on IP_NF_IPTABLES
 	help
 	  This option allows you to match what routing thinks of an address,
diff --git a/net/ipv6/netfilter/Kconfig b/net/ipv6/netfilter/Kconfig
index 30d48529d98..5374c665f8d 100644
--- a/net/ipv6/netfilter/Kconfig
+++ b/net/ipv6/netfilter/Kconfig
@@ -54,7 +54,7 @@ config IP6_NF_IPTABLES
 
 # The simple matches.
 config IP6_NF_MATCH_RT
-	tristate "Routing header match support"
+	tristate '"rt" Routing header match support'
 	depends on IP6_NF_IPTABLES
 	help
 	  rt matching allows you to match packets based on the routing
@@ -63,7 +63,7 @@ config IP6_NF_MATCH_RT
 	  To compile it as a module, choose M here.  If unsure, say N.
 
 config IP6_NF_MATCH_OPTS
-	tristate "Hop-by-hop and Dst opts header match support"
+	tristate '"hopbyhop" and "dst" opts header match support'
 	depends on IP6_NF_IPTABLES
 	help
 	  This allows one to match packets based on the hop-by-hop
@@ -72,7 +72,7 @@ config IP6_NF_MATCH_OPTS
 	  To compile it as a module, choose M here.  If unsure, say N.
 
 config IP6_NF_MATCH_FRAG
-	tristate "Fragmentation header match support"
+	tristate '"frag" Fragmentation header match support'
 	depends on IP6_NF_IPTABLES
 	help
 	  frag matching allows you to match packets based on the fragmentation
@@ -81,7 +81,7 @@ config IP6_NF_MATCH_FRAG
 	  To compile it as a module, choose M here.  If unsure, say N.
 
 config IP6_NF_MATCH_HL
-	tristate "HL match support"
+	tristate '"hl" match support'
 	depends on IP6_NF_IPTABLES
 	help
 	  HL matching allows you to match packets based on the hop
@@ -90,7 +90,7 @@ config IP6_NF_MATCH_HL
 	  To compile it as a module, choose M here.  If unsure, say N.
 
 config IP6_NF_MATCH_IPV6HEADER
-	tristate "IPv6 Extension Headers Match"
+	tristate '"ipv6header" IPv6 Extension Headers Match'
 	depends on IP6_NF_IPTABLES
 	help
 	  This module allows one to match packets based upon
@@ -99,7 +99,7 @@ config IP6_NF_MATCH_IPV6HEADER
 	  To compile it as a module, choose M here.  If unsure, say N.
 
 config IP6_NF_MATCH_AH
-	tristate "AH match support"
+	tristate '"ah" match support'
 	depends on IP6_NF_IPTABLES
 	help
 	  This module allows one to match AH packets.
@@ -107,7 +107,7 @@ config IP6_NF_MATCH_AH
 	  To compile it as a module, choose M here.  If unsure, say N.
 
 config IP6_NF_MATCH_MH
-	tristate "MH match support"
+	tristate '"mh" match support'
 	depends on IP6_NF_IPTABLES
 	help
 	  This module allows one to match MH packets.
@@ -115,7 +115,7 @@ config IP6_NF_MATCH_MH
 	  To compile it as a module, choose M here.  If unsure, say N.
 
 config IP6_NF_MATCH_EUI64
-	tristate "EUI64 address check"
+	tristate '"eui64" address check'
 	depends on IP6_NF_IPTABLES
 	help
 	  This module performs checking on the IPv6 source address
diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig
index 4bc0552b75f..d220607cc03 100644
--- a/net/netfilter/Kconfig
+++ b/net/netfilter/Kconfig
@@ -476,7 +476,7 @@ config NETFILTER_XT_MATCH_CONNTRACK
 	  To compile it as a module, choose M here.  If unsure, say N.
 
 config NETFILTER_XT_MATCH_DCCP
-	tristate  '"DCCP" protocol match support'
+	tristate '"dccp" protocol match support'
 	depends on NETFILTER_XTABLES
 	help
 	  With this option enabled, you will be able to use the iptables
@@ -487,7 +487,7 @@ config NETFILTER_XT_MATCH_DCCP
 	  <file:Documentation/kbuild/modules.txt>.  If unsure, say `N'.
 
 config NETFILTER_XT_MATCH_DSCP
-	tristate '"DSCP" match support'
+	tristate '"dscp" match support'
 	depends on NETFILTER_XTABLES
 	help
 	  This option adds a `DSCP' match, which allows you to match against
@@ -498,7 +498,7 @@ config NETFILTER_XT_MATCH_DSCP
 	  To compile it as a module, choose M here.  If unsure, say N.
 
 config NETFILTER_XT_MATCH_ESP
-	tristate '"ESP" match support'
+	tristate '"esp" match support'
 	depends on NETFILTER_XTABLES
 	help
 	  This match extension allows you to match a range of SPIs
@@ -573,7 +573,7 @@ config NETFILTER_XT_MATCH_POLICY
 	  To compile it as a module, choose M here.  If unsure, say N.
 
 config NETFILTER_XT_MATCH_MULTIPORT
-	tristate "Multiple port match support"
+	tristate '"multiport" Multiple port match support'
 	depends on NETFILTER_XTABLES
 	help
 	  Multiport matching allows you to match TCP or UDP packets based on
-- 
cgit v1.2.3


From c3b33e6a2cdefba38d83442ebae2ee42e853ea51 Mon Sep 17 00:00:00 2001
From: Jan Engelhardt <jengelh@computergmbh.de>
Date: Tue, 4 Dec 2007 23:37:54 -0800
Subject: [NETFILTER]: Merge ipt_tos into xt_dscp

Merge ipt_tos into xt_dscp.

Merge ipt_tos (tos v0 match) into xt_dscp. They both match on the same
field in the IPv4 header, so it seems reasonable to keep them in one
piece. This is part one of the implicit 4-patch series to move tos to
xtables and extend it by IPv6.

Signed-off-by: Jan Engelhardt <jengelh@computergmbh.de>
Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/netfilter/Kconfig   |  9 --------
 net/ipv4/netfilter/Makefile  |  1 -
 net/ipv4/netfilter/ipt_tos.c | 50 --------------------------------------------
 net/netfilter/Kconfig        |  6 +++++-
 net/netfilter/xt_dscp.c      | 24 +++++++++++++++++++--
 5 files changed, 27 insertions(+), 63 deletions(-)
 delete mode 100644 net/ipv4/netfilter/ipt_tos.c

(limited to 'net')

diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig
index 244e91daf04..232817c828c 100644
--- a/net/ipv4/netfilter/Kconfig
+++ b/net/ipv4/netfilter/Kconfig
@@ -63,15 +63,6 @@ config IP_NF_MATCH_IPRANGE
 
 	  To compile it as a module, choose M here.  If unsure, say N.
 
-config IP_NF_MATCH_TOS
-	tristate "TOS match support"
-	depends on IP_NF_IPTABLES
-	help
-	  TOS matching allows you to match packets based on the Type Of
-	  Service fields of the IP packet.
-
-	  To compile it as a module, choose M here.  If unsure, say N.
-
 config IP_NF_MATCH_RECENT
 	tristate '"recent" match support'
 	depends on IP_NF_IPTABLES
diff --git a/net/ipv4/netfilter/Makefile b/net/ipv4/netfilter/Makefile
index 42199e93b86..00c19c74ce7 100644
--- a/net/ipv4/netfilter/Makefile
+++ b/net/ipv4/netfilter/Makefile
@@ -46,7 +46,6 @@ obj-$(CONFIG_IP_NF_MATCH_AH) += ipt_ah.o
 obj-$(CONFIG_IP_NF_MATCH_ECN) += ipt_ecn.o
 obj-$(CONFIG_IP_NF_MATCH_IPRANGE) += ipt_iprange.o
 obj-$(CONFIG_IP_NF_MATCH_RECENT) += ipt_recent.o
-obj-$(CONFIG_IP_NF_MATCH_TOS) += ipt_tos.o
 obj-$(CONFIG_IP_NF_MATCH_TTL) += ipt_ttl.o
 
 # targets
diff --git a/net/ipv4/netfilter/ipt_tos.c b/net/ipv4/netfilter/ipt_tos.c
deleted file mode 100644
index 7d608682584..00000000000
--- a/net/ipv4/netfilter/ipt_tos.c
+++ /dev/null
@@ -1,50 +0,0 @@
-/* Kernel module to match TOS values. */
-
-/* (C) 1999-2001 Paul `Rusty' Russell
- * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#include <linux/ip.h>
-#include <linux/module.h>
-#include <linux/skbuff.h>
-
-#include <linux/netfilter_ipv4/ipt_tos.h>
-#include <linux/netfilter/x_tables.h>
-
-MODULE_LICENSE("GPL");
-MODULE_DESCRIPTION("iptables TOS match module");
-
-static bool
-tos_mt(const struct sk_buff *skb, const struct net_device *in,
-       const struct net_device *out, const struct xt_match *match,
-       const void *matchinfo, int offset, unsigned int protoff, bool *hotdrop)
-{
-	const struct ipt_tos_info *info = matchinfo;
-
-	return (ip_hdr(skb)->tos == info->tos) ^ info->invert;
-}
-
-static struct xt_match tos_mt_reg __read_mostly = {
-	.name		= "tos",
-	.family		= AF_INET,
-	.match		= tos_mt,
-	.matchsize	= sizeof(struct ipt_tos_info),
-	.me		= THIS_MODULE,
-};
-
-static int __init tos_mt_init(void)
-{
-	return xt_register_match(&tos_mt_reg);
-}
-
-static void __exit tos_mt_exit(void)
-{
-	xt_unregister_match(&tos_mt_reg);
-}
-
-module_init(tos_mt_init);
-module_exit(tos_mt_exit);
diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig
index d220607cc03..1804916e95f 100644
--- a/net/netfilter/Kconfig
+++ b/net/netfilter/Kconfig
@@ -487,7 +487,7 @@ config NETFILTER_XT_MATCH_DCCP
 	  <file:Documentation/kbuild/modules.txt>.  If unsure, say `N'.
 
 config NETFILTER_XT_MATCH_DSCP
-	tristate '"dscp" match support'
+	tristate '"dscp" and "tos" match support'
 	depends on NETFILTER_XTABLES
 	help
 	  This option adds a `DSCP' match, which allows you to match against
@@ -495,6 +495,10 @@ config NETFILTER_XT_MATCH_DSCP
 
 	  The DSCP field can have any value between 0x0 and 0x3f inclusive.
 
+	  It will also add a "tos" match, which allows you to match packets
+	  based on the Type Of Service fields of the IPv4 packet (which share
+	  the same bits as DSCP).
+
 	  To compile it as a module, choose M here.  If unsure, say N.
 
 config NETFILTER_XT_MATCH_ESP
diff --git a/net/netfilter/xt_dscp.c b/net/netfilter/xt_dscp.c
index 63f7354ca9a..75b0df990d4 100644
--- a/net/netfilter/xt_dscp.c
+++ b/net/netfilter/xt_dscp.c
@@ -13,14 +13,16 @@
 #include <linux/ipv6.h>
 #include <net/dsfield.h>
 
-#include <linux/netfilter/xt_dscp.h>
 #include <linux/netfilter/x_tables.h>
+#include <linux/netfilter/xt_dscp.h>
+#include <linux/netfilter_ipv4/ipt_tos.h>
 
 MODULE_AUTHOR("Harald Welte <laforge@netfilter.org>");
-MODULE_DESCRIPTION("x_tables DSCP matching module");
+MODULE_DESCRIPTION("x_tables DSCP/tos matching module");
 MODULE_LICENSE("GPL");
 MODULE_ALIAS("ipt_dscp");
 MODULE_ALIAS("ip6t_dscp");
+MODULE_ALIAS("ipt_tos");
 
 static bool
 dscp_mt(const struct sk_buff *skb, const struct net_device *in,
@@ -60,6 +62,16 @@ dscp_mt_check(const char *tablename, const void *info,
 	return true;
 }
 
+static bool tos_mt_v0(const struct sk_buff *skb, const struct net_device *in,
+                      const struct net_device *out,
+                      const struct xt_match *match, const void *matchinfo,
+                      int offset, unsigned int protoff, bool *hotdrop)
+{
+	const struct ipt_tos_info *info = matchinfo;
+
+	return (ip_hdr(skb)->tos == info->tos) ^ info->invert;
+}
+
 static struct xt_match dscp_mt_reg[] __read_mostly = {
 	{
 		.name		= "dscp",
@@ -77,6 +89,14 @@ static struct xt_match dscp_mt_reg[] __read_mostly = {
 		.matchsize	= sizeof(struct xt_dscp_info),
 		.me		= THIS_MODULE,
 	},
+	{
+		.name		= "tos",
+		.revision	= 0,
+		.family		= AF_INET,
+		.match		= tos_mt_v0,
+		.matchsize	= sizeof(struct ipt_tos_info),
+		.me		= THIS_MODULE,
+	},
 };
 
 static int __init dscp_mt_init(void)
-- 
cgit v1.2.3


From c9fd49680954714473d6cbd2546d6ff120f96840 Mon Sep 17 00:00:00 2001
From: Jan Engelhardt <jengelh@computergmbh.de>
Date: Tue, 4 Dec 2007 23:38:13 -0800
Subject: [NETFILTER]: Merge ipt_TOS into xt_DSCP

Merge ipt_TOS into xt_DSCP.

Merge ipt_TOS (tos v0 target) into xt_DSCP. They both modify the same
field in the IPv4 header, so it seems reasonable to keep them in one
piece. This is part two of the implicit 4-patch series to move tos to
xtables and extend it by IPv6.

Signed-off-by: Jan Engelhardt <jengelh@computergmbh.de>
Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/netfilter/Kconfig  | 10 ---------
 net/ipv4/netfilter/Makefile |  1 -
 net/netfilter/Kconfig       |  6 +++++-
 net/netfilter/xt_DSCP.c     | 51 +++++++++++++++++++++++++++++++++++++++++++++
 4 files changed, 56 insertions(+), 12 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig
index 232817c828c..b11231df62c 100644
--- a/net/ipv4/netfilter/Kconfig
+++ b/net/ipv4/netfilter/Kconfig
@@ -293,16 +293,6 @@ config IP_NF_MANGLE
 
 	  To compile it as a module, choose M here.  If unsure, say N.
 
-config IP_NF_TARGET_TOS
-	tristate "TOS target support"
-	depends on IP_NF_MANGLE
-	help
-	  This option adds a `TOS' target, which allows you to create rules in
-	  the `mangle' table which alter the Type Of Service field of an IP
-	  packet prior to routing.
-
-	  To compile it as a module, choose M here.  If unsure, say N.
-
 config IP_NF_TARGET_ECN
 	tristate "ECN target support"
 	depends on IP_NF_MANGLE
diff --git a/net/ipv4/netfilter/Makefile b/net/ipv4/netfilter/Makefile
index 00c19c74ce7..2fc05619f91 100644
--- a/net/ipv4/netfilter/Makefile
+++ b/net/ipv4/netfilter/Makefile
@@ -57,7 +57,6 @@ obj-$(CONFIG_IP_NF_TARGET_NETMAP) += ipt_NETMAP.o
 obj-$(CONFIG_IP_NF_TARGET_REDIRECT) += ipt_REDIRECT.o
 obj-$(CONFIG_IP_NF_TARGET_REJECT) += ipt_REJECT.o
 obj-$(CONFIG_IP_NF_TARGET_SAME) += ipt_SAME.o
-obj-$(CONFIG_IP_NF_TARGET_TOS) += ipt_TOS.o
 obj-$(CONFIG_IP_NF_TARGET_TTL) += ipt_TTL.o
 obj-$(CONFIG_IP_NF_TARGET_ULOG) += ipt_ULOG.o
 
diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig
index 1804916e95f..9c82d4cc86b 100644
--- a/net/netfilter/Kconfig
+++ b/net/netfilter/Kconfig
@@ -293,7 +293,7 @@ config NETFILTER_XT_TARGET_CONNMARK
 	  ipt_CONNMARK.ko.  If unsure, say `N'.
 
 config NETFILTER_XT_TARGET_DSCP
-	tristate '"DSCP" target support'
+	tristate '"DSCP" and "TOS" target support'
 	depends on NETFILTER_XTABLES
 	depends on IP_NF_MANGLE || IP6_NF_MANGLE
 	help
@@ -302,6 +302,10 @@ config NETFILTER_XT_TARGET_DSCP
 
 	  The DSCP field can have any value between 0x0 and 0x3f inclusive.
 
+	  It also adds the "TOS" target, which allows you to create rules in
+	  the "mangle" table which alter the Type Of Service field of an IPv4
+	  packet prior to routing.
+
 	  To compile it as a module, choose M here.  If unsure, say N.
 
 config NETFILTER_XT_TARGET_MARK
diff --git a/net/netfilter/xt_DSCP.c b/net/netfilter/xt_DSCP.c
index 18823558797..40a4f1d7191 100644
--- a/net/netfilter/xt_DSCP.c
+++ b/net/netfilter/xt_DSCP.c
@@ -18,12 +18,14 @@
 
 #include <linux/netfilter/x_tables.h>
 #include <linux/netfilter/xt_DSCP.h>
+#include <linux/netfilter_ipv4/ipt_TOS.h>
 
 MODULE_AUTHOR("Harald Welte <laforge@netfilter.org>");
 MODULE_DESCRIPTION("x_tables DSCP modification module");
 MODULE_LICENSE("GPL");
 MODULE_ALIAS("ipt_DSCP");
 MODULE_ALIAS("ip6t_DSCP");
+MODULE_ALIAS("ipt_TOS");
 
 static unsigned int
 dscp_tg(struct sk_buff *skb, const struct net_device *in,
@@ -76,6 +78,45 @@ dscp_tg_check(const char *tablename, const void *e_void,
 	return true;
 }
 
+static unsigned int
+tos_tg_v0(struct sk_buff *skb, const struct net_device *in,
+          const struct net_device *out, unsigned int hooknum,
+          const struct xt_target *target, const void *targinfo)
+{
+	const struct ipt_tos_target_info *info = targinfo;
+	struct iphdr *iph = ip_hdr(skb);
+	u_int8_t oldtos;
+
+	if ((iph->tos & IPTOS_TOS_MASK) != info->tos) {
+		if (!skb_make_writable(skb, sizeof(struct iphdr)))
+			return NF_DROP;
+
+		iph      = ip_hdr(skb);
+		oldtos   = iph->tos;
+		iph->tos = (iph->tos & IPTOS_PREC_MASK) | info->tos;
+		csum_replace2(&iph->check, htons(oldtos), htons(iph->tos));
+	}
+
+	return XT_CONTINUE;
+}
+
+static bool
+tos_tg_check_v0(const char *tablename, const void *e_void,
+                const struct xt_target *target, void *targinfo,
+                unsigned int hook_mask)
+{
+	const u_int8_t tos = ((struct ipt_tos_target_info *)targinfo)->tos;
+
+	if (tos != IPTOS_LOWDELAY && tos != IPTOS_THROUGHPUT &&
+	    tos != IPTOS_RELIABILITY && tos != IPTOS_MINCOST &&
+	    tos != IPTOS_NORMALSVC) {
+		printk(KERN_WARNING "TOS: bad tos value %#x\n", tos);
+		return false;
+	}
+
+	return true;
+}
+
 static struct xt_target dscp_tg_reg[] __read_mostly = {
 	{
 		.name		= "DSCP",
@@ -95,6 +136,16 @@ static struct xt_target dscp_tg_reg[] __read_mostly = {
 		.table		= "mangle",
 		.me		= THIS_MODULE,
 	},
+	{
+		.name		= "TOS",
+		.revision	= 0,
+		.family		= AF_INET,
+		.table		= "mangle",
+		.target		= tos_tg_v0,
+		.targetsize	= sizeof(struct ipt_tos_target_info),
+		.checkentry	= tos_tg_check_v0,
+		.me		= THIS_MODULE,
+	},
 };
 
 static int __init dscp_tg_init(void)
-- 
cgit v1.2.3


From f1095ab51d4297d4a84b64a65c71054183a73486 Mon Sep 17 00:00:00 2001
From: Jan Engelhardt <jengelh@computergmbh.de>
Date: Tue, 4 Dec 2007 23:38:30 -0800
Subject: [NETFILTER]: IPv6 capable xt_tos v1 match

Extends the xt_dscp match by xt_tos v1 to add support for selectively
matching any bit in the IPv4 TOS and IPv6 Priority fields. (ipt_tos
and xt_dscp only accepted a limited range of possible values.)

Signed-off-by: Jan Engelhardt <jengelh@computergmbh.de>
Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/netfilter/xt_dscp.c | 32 ++++++++++++++++++++++++++++++++
 1 file changed, 32 insertions(+)

(limited to 'net')

diff --git a/net/netfilter/xt_dscp.c b/net/netfilter/xt_dscp.c
index 75b0df990d4..834e4372031 100644
--- a/net/netfilter/xt_dscp.c
+++ b/net/netfilter/xt_dscp.c
@@ -23,6 +23,7 @@ MODULE_LICENSE("GPL");
 MODULE_ALIAS("ipt_dscp");
 MODULE_ALIAS("ip6t_dscp");
 MODULE_ALIAS("ipt_tos");
+MODULE_ALIAS("ip6t_tos");
 
 static bool
 dscp_mt(const struct sk_buff *skb, const struct net_device *in,
@@ -72,6 +73,21 @@ static bool tos_mt_v0(const struct sk_buff *skb, const struct net_device *in,
 	return (ip_hdr(skb)->tos == info->tos) ^ info->invert;
 }
 
+static bool tos_mt(const struct sk_buff *skb, const struct net_device *in,
+                   const struct net_device *out, const struct xt_match *match,
+                   const void *matchinfo, int offset, unsigned int protoff,
+                   bool *hotdrop)
+{
+	const struct xt_tos_match_info *info = matchinfo;
+
+	if (match->family == AF_INET)
+		return ((ip_hdr(skb)->tos & info->tos_mask) ==
+		       info->tos_value) ^ !!info->invert;
+	else
+		return ((ipv6_get_dsfield(ipv6_hdr(skb)) & info->tos_mask) ==
+		       info->tos_value) ^ !!info->invert;
+}
+
 static struct xt_match dscp_mt_reg[] __read_mostly = {
 	{
 		.name		= "dscp",
@@ -97,6 +113,22 @@ static struct xt_match dscp_mt_reg[] __read_mostly = {
 		.matchsize	= sizeof(struct ipt_tos_info),
 		.me		= THIS_MODULE,
 	},
+	{
+		.name		= "tos",
+		.revision	= 1,
+		.family		= AF_INET,
+		.match		= tos_mt,
+		.matchsize	= sizeof(struct xt_tos_match_info),
+		.me		= THIS_MODULE,
+	},
+	{
+		.name		= "tos",
+		.revision	= 1,
+		.family		= AF_INET6,
+		.match		= tos_mt,
+		.matchsize	= sizeof(struct xt_tos_match_info),
+		.me		= THIS_MODULE,
+	},
 };
 
 static int __init dscp_mt_init(void)
-- 
cgit v1.2.3


From 5c350e5a380333c64da8580fa134a2fd8e71fea4 Mon Sep 17 00:00:00 2001
From: Jan Engelhardt <jengelh@computergmbh.de>
Date: Tue, 4 Dec 2007 23:39:09 -0800
Subject: [NETFILTER]: IPv6 capable xt_TOS v1 target

Extends the xt_DSCP target by xt_TOS v1 to add support for selectively
setting and flipping any bit in the IPv4 TOS and IPv6 Priority fields.
(ipt_TOS and xt_DSCP only accepted a limited range of possible
values.)

Signed-off-by: Jan Engelhardt <jengelh@computergmbh.de>
Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/netfilter/Kconfig   |  2 +-
 net/netfilter/xt_DSCP.c | 63 +++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 64 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig
index 9c82d4cc86b..7bde6315999 100644
--- a/net/netfilter/Kconfig
+++ b/net/netfilter/Kconfig
@@ -304,7 +304,7 @@ config NETFILTER_XT_TARGET_DSCP
 
 	  It also adds the "TOS" target, which allows you to create rules in
 	  the "mangle" table which alter the Type Of Service field of an IPv4
-	  packet prior to routing.
+	  or the Priority field of an IPv6 packet, prior to routing.
 
 	  To compile it as a module, choose M here.  If unsure, say N.
 
diff --git a/net/netfilter/xt_DSCP.c b/net/netfilter/xt_DSCP.c
index 40a4f1d7191..fd7500ecadf 100644
--- a/net/netfilter/xt_DSCP.c
+++ b/net/netfilter/xt_DSCP.c
@@ -26,6 +26,7 @@ MODULE_LICENSE("GPL");
 MODULE_ALIAS("ipt_DSCP");
 MODULE_ALIAS("ip6t_DSCP");
 MODULE_ALIAS("ipt_TOS");
+MODULE_ALIAS("ip6t_TOS");
 
 static unsigned int
 dscp_tg(struct sk_buff *skb, const struct net_device *in,
@@ -117,6 +118,50 @@ tos_tg_check_v0(const char *tablename, const void *e_void,
 	return true;
 }
 
+static unsigned int
+tos_tg(struct sk_buff *skb, const struct net_device *in,
+       const struct net_device *out, unsigned int hooknum,
+       const struct xt_target *target, const void *targinfo)
+{
+	const struct xt_tos_target_info *info = targinfo;
+	struct iphdr *iph = ip_hdr(skb);
+	u_int8_t orig, nv;
+
+	orig = ipv4_get_dsfield(iph);
+	nv   = (orig & info->tos_mask) ^ info->tos_value;
+
+	if (orig != nv) {
+		if (!skb_make_writable(skb, sizeof(struct iphdr)))
+			return NF_DROP;
+		iph = ip_hdr(skb);
+		ipv4_change_dsfield(iph, ~0, nv);
+	}
+
+	return XT_CONTINUE;
+}
+
+static unsigned int
+tos_tg6(struct sk_buff *skb, const struct net_device *in,
+        const struct net_device *out, unsigned int hooknum,
+        const struct xt_target *target, const void *targinfo)
+{
+	const struct xt_tos_target_info *info = targinfo;
+	struct ipv6hdr *iph = ipv6_hdr(skb);
+	u_int8_t orig, nv;
+
+	orig = ipv6_get_dsfield(iph);
+	nv   = (orig & info->tos_mask) ^ info->tos_value;
+
+	if (orig != nv) {
+		if (!skb_make_writable(skb, sizeof(struct iphdr)))
+			return NF_DROP;
+		iph = ipv6_hdr(skb);
+		ipv6_change_dsfield(iph, ~0, nv);
+	}
+
+	return XT_CONTINUE;
+}
+
 static struct xt_target dscp_tg_reg[] __read_mostly = {
 	{
 		.name		= "DSCP",
@@ -146,6 +191,24 @@ static struct xt_target dscp_tg_reg[] __read_mostly = {
 		.checkentry	= tos_tg_check_v0,
 		.me		= THIS_MODULE,
 	},
+	{
+		.name		= "TOS",
+		.revision	= 1,
+		.family		= AF_INET,
+		.table		= "mangle",
+		.target		= tos_tg,
+		.targetsize	= sizeof(struct xt_tos_target_info),
+		.me		= THIS_MODULE,
+	},
+	{
+		.name		= "TOS",
+		.revision	= 1,
+		.family		= AF_INET6,
+		.table		= "mangle",
+		.target		= tos_tg6,
+		.targetsize	= sizeof(struct xt_tos_target_info),
+		.me		= THIS_MODULE,
+	},
 };
 
 static int __init dscp_tg_init(void)
-- 
cgit v1.2.3


From cb76c6a597350534d211ba79d92da1f9771f8226 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Tue, 4 Dec 2007 23:39:36 -0800
Subject: [NETFILTER]: ip_tables: remove obsolete SAME target

Remove the ipt_SAME target as scheduled in feature-removal-schedule.

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/netfilter/Kconfig    |   9 ---
 net/ipv4/netfilter/Makefile   |   1 -
 net/ipv4/netfilter/ipt_SAME.c | 174 ------------------------------------------
 3 files changed, 184 deletions(-)
 delete mode 100644 net/ipv4/netfilter/ipt_SAME.c

(limited to 'net')

diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig
index b11231df62c..ad26f66b53e 100644
--- a/net/ipv4/netfilter/Kconfig
+++ b/net/ipv4/netfilter/Kconfig
@@ -211,15 +211,6 @@ config IP_NF_TARGET_NETMAP
 
 	  To compile it as a module, choose M here.  If unsure, say N.
 
-config IP_NF_TARGET_SAME
-	tristate "SAME target support (OBSOLETE)"
-	depends on NF_NAT
-	help
-	  This option adds a `SAME' target, which works like the standard SNAT
-	  target, but attempts to give clients the same IP for all connections.
-
-	  To compile it as a module, choose M here.  If unsure, say N.
-
 config NF_NAT_SNMP_BASIC
 	tristate "Basic SNMP-ALG support (EXPERIMENTAL)"
 	depends on EXPERIMENTAL && NF_NAT
diff --git a/net/ipv4/netfilter/Makefile b/net/ipv4/netfilter/Makefile
index 2fc05619f91..fd7d4a5b436 100644
--- a/net/ipv4/netfilter/Makefile
+++ b/net/ipv4/netfilter/Makefile
@@ -56,7 +56,6 @@ obj-$(CONFIG_IP_NF_TARGET_MASQUERADE) += ipt_MASQUERADE.o
 obj-$(CONFIG_IP_NF_TARGET_NETMAP) += ipt_NETMAP.o
 obj-$(CONFIG_IP_NF_TARGET_REDIRECT) += ipt_REDIRECT.o
 obj-$(CONFIG_IP_NF_TARGET_REJECT) += ipt_REJECT.o
-obj-$(CONFIG_IP_NF_TARGET_SAME) += ipt_SAME.o
 obj-$(CONFIG_IP_NF_TARGET_TTL) += ipt_TTL.o
 obj-$(CONFIG_IP_NF_TARGET_ULOG) += ipt_ULOG.o
 
diff --git a/net/ipv4/netfilter/ipt_SAME.c b/net/ipv4/netfilter/ipt_SAME.c
deleted file mode 100644
index a43923dab1e..00000000000
--- a/net/ipv4/netfilter/ipt_SAME.c
+++ /dev/null
@@ -1,174 +0,0 @@
-/* Same.  Just like SNAT, only try to make the connections
- * 	  between client A and server B always have the same source ip.
- *
- * (C) 2000 Paul `Rusty' Russell
- * (C) 2001 Martin Josefsson
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-#include <linux/types.h>
-#include <linux/ip.h>
-#include <linux/timer.h>
-#include <linux/module.h>
-#include <linux/netfilter.h>
-#include <linux/netdevice.h>
-#include <linux/if.h>
-#include <linux/inetdevice.h>
-#include <net/protocol.h>
-#include <net/checksum.h>
-#include <linux/netfilter_ipv4.h>
-#include <linux/netfilter/x_tables.h>
-#include <net/netfilter/nf_nat_rule.h>
-#include <linux/netfilter_ipv4/ipt_SAME.h>
-
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Martin Josefsson <gandalf@wlug.westbo.se>");
-MODULE_DESCRIPTION("iptables special SNAT module for consistent sourceip");
-
-static bool
-same_tg_check(const char *tablename, const void *e,
-              const struct xt_target *target, void *targinfo,
-              unsigned int hook_mask)
-{
-	unsigned int count, countess, rangeip, index = 0;
-	struct ipt_same_info *mr = targinfo;
-
-	mr->ipnum = 0;
-
-	if (mr->rangesize < 1) {
-		pr_debug("same_check: need at least one dest range.\n");
-		return false;
-	}
-	if (mr->rangesize > IPT_SAME_MAX_RANGE) {
-		pr_debug("same_check: too many ranges specified, maximum "
-			 "is %u ranges\n", IPT_SAME_MAX_RANGE);
-		return false;
-	}
-	for (count = 0; count < mr->rangesize; count++) {
-		if (ntohl(mr->range[count].min_ip) >
-				ntohl(mr->range[count].max_ip)) {
-			pr_debug("same_check: min_ip is larger than max_ip in "
-				 "range `%u.%u.%u.%u-%u.%u.%u.%u'.\n",
-				 NIPQUAD(mr->range[count].min_ip),
-				 NIPQUAD(mr->range[count].max_ip));
-			return false;
-		}
-		if (!(mr->range[count].flags & IP_NAT_RANGE_MAP_IPS)) {
-			pr_debug("same_check: bad MAP_IPS.\n");
-			return false;
-		}
-		rangeip = (ntohl(mr->range[count].max_ip) -
-					ntohl(mr->range[count].min_ip) + 1);
-		mr->ipnum += rangeip;
-
-		pr_debug("same_check: range %u, ipnum = %u\n", count, rangeip);
-	}
-	pr_debug("same_check: total ipaddresses = %u\n", mr->ipnum);
-
-	mr->iparray = kmalloc((sizeof(u_int32_t) * mr->ipnum), GFP_KERNEL);
-	if (!mr->iparray) {
-		pr_debug("same_check: Couldn't allocate %Zu bytes "
-			 "for %u ipaddresses!\n",
-			 (sizeof(u_int32_t) * mr->ipnum), mr->ipnum);
-		return false;
-	}
-	pr_debug("same_check: Allocated %Zu bytes for %u ipaddresses.\n",
-		 (sizeof(u_int32_t) * mr->ipnum), mr->ipnum);
-
-	for (count = 0; count < mr->rangesize; count++) {
-		for (countess = ntohl(mr->range[count].min_ip);
-				countess <= ntohl(mr->range[count].max_ip);
-					countess++) {
-			mr->iparray[index] = countess;
-			pr_debug("same_check: Added ipaddress `%u.%u.%u.%u' "
-				 "in index %u.\n", HIPQUAD(countess), index);
-			index++;
-		}
-	}
-	return true;
-}
-
-static void same_tg_destroy(const struct xt_target *target, void *targinfo)
-{
-	struct ipt_same_info *mr = targinfo;
-
-	kfree(mr->iparray);
-
-	pr_debug("same_destroy: Deallocated %Zu bytes for %u ipaddresses.\n",
-		 (sizeof(u_int32_t) * mr->ipnum), mr->ipnum);
-}
-
-static unsigned int
-same_tg(struct sk_buff *skb, const struct net_device *in,
-        const struct net_device *out, unsigned int hooknum,
-        const struct xt_target *target, const void *targinfo)
-{
-	struct nf_conn *ct;
-	enum ip_conntrack_info ctinfo;
-	u_int32_t tmpip, aindex;
-	__be32 new_ip;
-	const struct ipt_same_info *same = targinfo;
-	struct nf_nat_range newrange;
-	const struct nf_conntrack_tuple *t;
-
-	NF_CT_ASSERT(hooknum == NF_INET_PRE_ROUTING ||
-			hooknum == NF_INET_POST_ROUTING);
-	ct = nf_ct_get(skb, &ctinfo);
-
-	t = &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple;
-
-	/* Base new source on real src ip and optionally dst ip,
-	   giving some hope for consistency across reboots.
-	   Here we calculate the index in same->iparray which
-	   holds the ipaddress we should use */
-
-	tmpip = ntohl(t->src.u3.ip);
-
-	if (!(same->info & IPT_SAME_NODST))
-		tmpip += ntohl(t->dst.u3.ip);
-	aindex = tmpip % same->ipnum;
-
-	new_ip = htonl(same->iparray[aindex]);
-
-	pr_debug("ipt_SAME: src=%u.%u.%u.%u dst=%u.%u.%u.%u, "
-		 "new src=%u.%u.%u.%u\n",
-		 NIPQUAD(t->src.u3.ip), NIPQUAD(t->dst.u3.ip), NIPQUAD(new_ip));
-
-	/* Transfer from original range. */
-	newrange = ((struct nf_nat_range)
-		{ same->range[0].flags, new_ip, new_ip,
-		  /* FIXME: Use ports from correct range! */
-		  same->range[0].min, same->range[0].max });
-
-	/* Hand modified range to generic setup. */
-	return nf_nat_setup_info(ct, &newrange, hooknum);
-}
-
-static struct xt_target same_tg_reg __read_mostly = {
-	.name		= "SAME",
-	.family		= AF_INET,
-	.target		= same_tg,
-	.targetsize	= sizeof(struct ipt_same_info),
-	.table		= "nat",
-	.hooks		= (1 << NF_INET_PRE_ROUTING) |
-			  (1 << NF_INET_POST_ROUTING),
-	.checkentry	= same_tg_check,
-	.destroy	= same_tg_destroy,
-	.me		= THIS_MODULE,
-};
-
-static int __init same_tg_init(void)
-{
-	return xt_register_target(&same_tg_reg);
-}
-
-static void __exit same_tg_exit(void)
-{
-	xt_unregister_target(&same_tg_reg);
-}
-
-module_init(same_tg_init);
-module_exit(same_tg_exit);
-
-- 
cgit v1.2.3


From 5859034d7eb8793d3d78d3af515c4175e7b9d03a Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Tue, 4 Dec 2007 23:40:05 -0800
Subject: [NETFILTER]: x_tables: add RATEEST target

Add new rate estimator target (using gen_estimator). In combination with
the rateest match (next patch) this can be used for load-based multipath
routing.

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/netfilter/Kconfig      |  10 +++
 net/netfilter/Makefile     |   1 +
 net/netfilter/xt_RATEEST.c | 204 +++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 215 insertions(+)
 create mode 100644 net/netfilter/xt_RATEEST.c

(limited to 'net')

diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig
index 7bde6315999..22d1f10e88b 100644
--- a/net/netfilter/Kconfig
+++ b/net/netfilter/Kconfig
@@ -357,6 +357,16 @@ config NETFILTER_XT_TARGET_NOTRACK
 	  If you want to compile it as a module, say M here and read
 	  <file:Documentation/kbuild/modules.txt>.  If unsure, say `N'.
 
+config NETFILTER_XT_TARGET_RATEEST
+	tristate '"RATEEST" target support'
+	depends on NETFILTER_XTABLES
+	help
+	  This option adds a `RATEEST' target, which allows to measure
+	  rates similar to TC estimators. The `rateest' match can be
+	  used to match on the measured rates.
+
+	  To compile it as a module, choose M here.  If unsure, say N.
+
 config NETFILTER_XT_TARGET_TRACE
 	tristate  '"TRACE" target support'
 	depends on NETFILTER_XTABLES
diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile
index 28f59a35aee..413afaad361 100644
--- a/net/netfilter/Makefile
+++ b/net/netfilter/Makefile
@@ -46,6 +46,7 @@ obj-$(CONFIG_NETFILTER_XT_TARGET_MARK) += xt_MARK.o
 obj-$(CONFIG_NETFILTER_XT_TARGET_NFLOG) += xt_NFLOG.o
 obj-$(CONFIG_NETFILTER_XT_TARGET_NFQUEUE) += xt_NFQUEUE.o
 obj-$(CONFIG_NETFILTER_XT_TARGET_NOTRACK) += xt_NOTRACK.o
+obj-$(CONFIG_NETFILTER_XT_TARGET_RATEEST) += xt_RATEEST.o
 obj-$(CONFIG_NETFILTER_XT_TARGET_SECMARK) += xt_SECMARK.o
 obj-$(CONFIG_NETFILTER_XT_TARGET_TCPMSS) += xt_TCPMSS.o
 obj-$(CONFIG_NETFILTER_XT_TARGET_TCPOPTSTRIP) += xt_TCPOPTSTRIP.o
diff --git a/net/netfilter/xt_RATEEST.c b/net/netfilter/xt_RATEEST.c
new file mode 100644
index 00000000000..c0088839fde
--- /dev/null
+++ b/net/netfilter/xt_RATEEST.c
@@ -0,0 +1,204 @@
+/*
+ * (C) 2007 Patrick McHardy <kaber@trash.net>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <linux/gen_stats.h>
+#include <linux/jhash.h>
+#include <linux/rtnetlink.h>
+#include <linux/random.h>
+#include <net/gen_stats.h>
+
+#include <linux/netfilter/x_tables.h>
+#include <linux/netfilter/xt_RATEEST.h>
+#include <net/netfilter/xt_rateest.h>
+
+static DEFINE_MUTEX(xt_rateest_mutex);
+
+#define RATEEST_HSIZE	16
+static struct hlist_head rateest_hash[RATEEST_HSIZE] __read_mostly;
+static unsigned int jhash_rnd __read_mostly;
+
+static unsigned int xt_rateest_hash(const char *name)
+{
+	return jhash(name, FIELD_SIZEOF(struct xt_rateest, name), jhash_rnd) &
+	       (RATEEST_HSIZE - 1);
+}
+
+static void xt_rateest_hash_insert(struct xt_rateest *est)
+{
+	unsigned int h;
+
+	h = xt_rateest_hash(est->name);
+	hlist_add_head(&est->list, &rateest_hash[h]);
+}
+
+struct xt_rateest *xt_rateest_lookup(const char *name)
+{
+	struct xt_rateest *est;
+	struct hlist_node *n;
+	unsigned int h;
+
+	h = xt_rateest_hash(name);
+	mutex_lock(&xt_rateest_mutex);
+	hlist_for_each_entry(est, n, &rateest_hash[h], list) {
+		if (strcmp(est->name, name) == 0) {
+			est->refcnt++;
+			mutex_unlock(&xt_rateest_mutex);
+			return est;
+		}
+	}
+	mutex_unlock(&xt_rateest_mutex);
+	return NULL;
+}
+EXPORT_SYMBOL_GPL(xt_rateest_lookup);
+
+void xt_rateest_put(struct xt_rateest *est)
+{
+	mutex_lock(&xt_rateest_mutex);
+	if (--est->refcnt == 0) {
+		hlist_del(&est->list);
+		gen_kill_estimator(&est->bstats, &est->rstats);
+		kfree(est);
+	}
+	mutex_unlock(&xt_rateest_mutex);
+}
+EXPORT_SYMBOL_GPL(xt_rateest_put);
+
+static unsigned int
+xt_rateest_tg(struct sk_buff *skb,
+	      const struct net_device *in,
+	      const struct net_device *out,
+	      unsigned int hooknum,
+	      const struct xt_target *target,
+	      const void *targinfo)
+{
+	const struct xt_rateest_target_info *info = targinfo;
+	struct gnet_stats_basic *stats = &info->est->bstats;
+
+	spin_lock_bh(&info->est->lock);
+	stats->bytes += skb->len;
+	stats->packets++;
+	spin_unlock_bh(&info->est->lock);
+
+	return XT_CONTINUE;
+}
+
+static bool
+xt_rateest_tg_checkentry(const char *tablename,
+			 const void *entry,
+			 const struct xt_target *target,
+			 void *targinfo,
+			 unsigned int hook_mask)
+{
+	struct xt_rateest_target_info *info = (void *)targinfo;
+	struct xt_rateest *est;
+	struct {
+		struct rtattr		opt;
+		struct gnet_estimator	est;
+	} cfg;
+
+	est = xt_rateest_lookup(info->name);
+	if (est) {
+		/*
+		 * If estimator parameters are specified, they must match the
+		 * existing estimator.
+		 */
+		if ((!info->interval && !info->ewma_log) ||
+		    (info->interval != est->params.interval ||
+		     info->ewma_log != est->params.ewma_log)) {
+			xt_rateest_put(est);
+			return false;
+		}
+		info->est = est;
+		return true;
+	}
+
+	est = kzalloc(sizeof(*est), GFP_KERNEL);
+	if (!est)
+		goto err1;
+
+	strlcpy(est->name, info->name, sizeof(est->name));
+	spin_lock_init(&est->lock);
+	est->refcnt		= 1;
+	est->params.interval	= info->interval;
+	est->params.ewma_log	= info->ewma_log;
+
+	cfg.opt.rta_len		= RTA_LENGTH(sizeof(cfg.est));
+	cfg.opt.rta_type	= TCA_STATS_RATE_EST;
+	cfg.est.interval	= info->interval;
+	cfg.est.ewma_log	= info->ewma_log;
+
+	if (gen_new_estimator(&est->bstats, &est->rstats, &est->lock,
+			      &cfg.opt) < 0)
+		goto err2;
+
+	info->est = est;
+	xt_rateest_hash_insert(est);
+
+	return true;
+
+err2:
+	kfree(est);
+err1:
+	return false;
+}
+
+static void xt_rateest_tg_destroy(const struct xt_target *target,
+				  void *targinfo)
+{
+	struct xt_rateest_target_info *info = targinfo;
+
+	xt_rateest_put(info->est);
+}
+
+static struct xt_target xt_rateest_target[] __read_mostly = {
+	{
+		.family		= AF_INET,
+		.name		= "RATEEST",
+		.target		= xt_rateest_tg,
+		.checkentry	= xt_rateest_tg_checkentry,
+		.destroy	= xt_rateest_tg_destroy,
+		.targetsize	= sizeof(struct xt_rateest_target_info),
+		.me		= THIS_MODULE,
+	},
+	{
+		.family		= AF_INET6,
+		.name		= "RATEEST",
+		.target		= xt_rateest_tg,
+		.checkentry	= xt_rateest_tg_checkentry,
+		.destroy	= xt_rateest_tg_destroy,
+		.targetsize	= sizeof(struct xt_rateest_target_info),
+		.me		= THIS_MODULE,
+	},
+};
+
+static int __init xt_rateest_tg_init(void)
+{
+	unsigned int i;
+
+	for (i = 0; i < ARRAY_SIZE(rateest_hash); i++)
+		INIT_HLIST_HEAD(&rateest_hash[i]);
+
+	get_random_bytes(&jhash_rnd, sizeof(jhash_rnd));
+	return xt_register_targets(xt_rateest_target,
+				   ARRAY_SIZE(xt_rateest_target));
+}
+
+static void __exit xt_rateest_tg_fini(void)
+{
+	xt_unregister_targets(xt_rateest_target, ARRAY_SIZE(xt_rateest_target));
+}
+
+
+MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("xtables rate estimator");
+MODULE_ALIAS("ipt_RATEEST");
+MODULE_ALIAS("ip6t_RATEEST");
+module_init(xt_rateest_tg_init);
+module_exit(xt_rateest_tg_fini);
-- 
cgit v1.2.3


From 50c164a81f1c0dfad056f99e5685537fdd0f07dd Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Tue, 4 Dec 2007 13:02:19 +0100
Subject: [NETFILTER]: x_tables: add rateest match

Add rate estimator match. The rate estimator match can match on
estimated rates by the RATEEST target. It supports matching on
absolute bps/pps values, comparing two rate estimators and matching
on the difference between two rate estimators.

This is what I use to route outgoing data connections from a FTP
server over two lines based on the  available bandwidth:

# estimate outgoing rates
iptables -t mangle -A POSTROUTING -o eth0 -j RATEEST --rateest-name eth0 \
                                                     --rateest-interval 250ms \
                                                     --rateest-ewma 0.5s
iptables -t mangle -A POSTROUTING -o ppp0 -j RATEEST --rateest-name ppp0 \
                                                     --rateest-interval 250ms \
                                                     --rateest-ewma 0.5s

# mark based on available bandwidth
iptables -t mangle -A BALANCE -m state --state NEW \
                              -m helper --helper ftp \
                              -m rateest --rateest-delta \
                                         --rateest1 eth0 \
                                         --rateest-bps1 2.5mbit \
                                         --rateest-gt \
                                         --rateest2 ppp0 \
                                         --rateest-bps2 2mbit \
                              -j CONNMARK --set-mark 0x1

iptables -t mangle -A BALANCE -m state --state NEW \
                              -m helper --helper ftp \
                              -m rateest --rateest-delta \
                                         --rateest1 ppp0 \
                                         --rateest-bps1 2mbit \
                                         --rateest-gt \
                                         --rateest2 eth0 \
                                         --rateest-bps2 2.5mbit \
                              -j CONNMARK --set-mark 0x2

iptables -t mangle -A BALANCE -j CONNMARK --restore-mark

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/netfilter/Kconfig      |  10 +++
 net/netfilter/Makefile     |   1 +
 net/netfilter/xt_rateest.c | 178 +++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 189 insertions(+)
 create mode 100644 net/netfilter/xt_rateest.c

(limited to 'net')

diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig
index 22d1f10e88b..4182393186a 100644
--- a/net/netfilter/Kconfig
+++ b/net/netfilter/Kconfig
@@ -631,6 +631,16 @@ config NETFILTER_XT_MATCH_QUOTA
 	  If you want to compile it as a module, say M here and read
 	  <file:Documentation/kbuild/modules.txt>.  If unsure, say `N'.
 
+config NETFILTER_XT_MATCH_RATEEST
+	tristate '"rateest" match support'
+	depends on NETFILTER_XTABLES
+	select NETFILTER_XT_TARGET_RATEEST
+	help
+	  This option adds a `rateest' match, which allows to match on the
+	  rate estimated by the RATEEST target.
+
+	  To compile it as a module, choose M here.  If unsure, say N.
+
 config NETFILTER_XT_MATCH_REALM
 	tristate  '"realm" match support'
 	depends on NETFILTER_XTABLES
diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile
index 413afaad361..3b9ea8fb3a0 100644
--- a/net/netfilter/Makefile
+++ b/net/netfilter/Makefile
@@ -73,6 +73,7 @@ obj-$(CONFIG_NETFILTER_XT_MATCH_PHYSDEV) += xt_physdev.o
 obj-$(CONFIG_NETFILTER_XT_MATCH_PKTTYPE) += xt_pkttype.o
 obj-$(CONFIG_NETFILTER_XT_MATCH_POLICY) += xt_policy.o
 obj-$(CONFIG_NETFILTER_XT_MATCH_QUOTA) += xt_quota.o
+obj-$(CONFIG_NETFILTER_XT_MATCH_RATEEST) += xt_rateest.o
 obj-$(CONFIG_NETFILTER_XT_MATCH_REALM) += xt_realm.o
 obj-$(CONFIG_NETFILTER_XT_MATCH_SCTP) += xt_sctp.o
 obj-$(CONFIG_NETFILTER_XT_MATCH_STATE) += xt_state.o
diff --git a/net/netfilter/xt_rateest.c b/net/netfilter/xt_rateest.c
new file mode 100644
index 00000000000..fdb86a51514
--- /dev/null
+++ b/net/netfilter/xt_rateest.c
@@ -0,0 +1,178 @@
+/*
+ * (C) 2007 Patrick McHardy <kaber@trash.net>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <linux/gen_stats.h>
+
+#include <linux/netfilter/x_tables.h>
+#include <linux/netfilter/xt_rateest.h>
+#include <net/netfilter/xt_rateest.h>
+
+
+static bool xt_rateest_mt(const struct sk_buff *skb,
+			  const struct net_device *in,
+			  const struct net_device *out,
+			  const struct xt_match *match,
+			  const void *matchinfo,
+			  int offset,
+			  unsigned int protoff,
+			  bool *hotdrop)
+{
+	const struct xt_rateest_match_info *info = matchinfo;
+	struct gnet_stats_rate_est *r;
+	u_int32_t bps1, bps2, pps1, pps2;
+	bool ret = true;
+
+	spin_lock_bh(&info->est1->lock);
+	r = &info->est1->rstats;
+	if (info->flags & XT_RATEEST_MATCH_DELTA) {
+		bps1 = info->bps1 >= r->bps ? info->bps1 - r->bps : 0;
+		pps1 = info->pps1 >= r->pps ? info->pps1 - r->pps : 0;
+	} else {
+		bps1 = r->bps;
+		pps1 = r->pps;
+	}
+	spin_unlock_bh(&info->est1->lock);
+
+	if (info->flags & XT_RATEEST_MATCH_ABS) {
+		bps2 = info->bps2;
+		pps2 = info->pps2;
+	} else {
+		spin_lock_bh(&info->est2->lock);
+		r = &info->est2->rstats;
+		if (info->flags & XT_RATEEST_MATCH_DELTA) {
+			bps2 = info->bps2 >= r->bps ? info->bps2 - r->bps : 0;
+			pps2 = info->pps2 >= r->pps ? info->pps2 - r->pps : 0;
+		} else {
+			bps2 = r->bps;
+			pps2 = r->pps;
+		}
+		spin_unlock_bh(&info->est2->lock);
+	}
+
+	switch (info->mode) {
+	case XT_RATEEST_MATCH_LT:
+		if (info->flags & XT_RATEEST_MATCH_BPS)
+			ret &= bps1 < bps2;
+		if (info->flags & XT_RATEEST_MATCH_PPS)
+			ret &= pps1 < pps2;
+		break;
+	case XT_RATEEST_MATCH_GT:
+		if (info->flags & XT_RATEEST_MATCH_BPS)
+			ret &= bps1 > bps2;
+		if (info->flags & XT_RATEEST_MATCH_PPS)
+			ret &= pps1 > pps2;
+		break;
+	case XT_RATEEST_MATCH_EQ:
+		if (info->flags & XT_RATEEST_MATCH_BPS)
+			ret &= bps1 == bps2;
+		if (info->flags & XT_RATEEST_MATCH_PPS)
+			ret &= pps2 == pps2;
+		break;
+	}
+
+	ret ^= info->flags & XT_RATEEST_MATCH_INVERT ? true : false;
+	return ret;
+}
+
+static bool xt_rateest_mt_checkentry(const char *tablename,
+				     const void *ip,
+				     const struct xt_match *match,
+				     void *matchinfo,
+				     unsigned int hook_mask)
+{
+	struct xt_rateest_match_info *info = (void *)matchinfo;
+	struct xt_rateest *est1, *est2;
+
+	if (hweight32(info->flags & (XT_RATEEST_MATCH_ABS |
+				     XT_RATEEST_MATCH_REL)) != 1)
+		goto err1;
+
+	if (!(info->flags & (XT_RATEEST_MATCH_BPS | XT_RATEEST_MATCH_PPS)))
+		goto err1;
+
+	switch (info->mode) {
+	case XT_RATEEST_MATCH_EQ:
+	case XT_RATEEST_MATCH_LT:
+	case XT_RATEEST_MATCH_GT:
+		break;
+	default:
+		goto err1;
+	}
+
+	est1 = xt_rateest_lookup(info->name1);
+	if (!est1)
+		goto err1;
+
+	if (info->flags & XT_RATEEST_MATCH_REL) {
+		est2 = xt_rateest_lookup(info->name2);
+		if (!est2)
+			goto err2;
+	} else
+		est2 = NULL;
+
+
+	info->est1 = est1;
+	info->est2 = est2;
+	return true;
+
+err2:
+	xt_rateest_put(est1);
+err1:
+	return false;
+}
+
+static void xt_rateest_mt_destroy(const struct xt_match *match,
+				  void *matchinfo)
+{
+	struct xt_rateest_match_info *info = (void *)matchinfo;
+
+	xt_rateest_put(info->est1);
+	if (info->est2)
+		xt_rateest_put(info->est2);
+}
+
+static struct xt_match xt_rateest_match[] __read_mostly = {
+	{
+		.family		= AF_INET,
+		.name		= "rateest",
+		.match		= xt_rateest_mt,
+		.checkentry	= xt_rateest_mt_checkentry,
+		.destroy	= xt_rateest_mt_destroy,
+		.matchsize	= sizeof(struct xt_rateest_match_info),
+		.me		= THIS_MODULE,
+	},
+	{
+		.family		= AF_INET6,
+		.name		= "rateest",
+		.match		= xt_rateest_mt,
+		.checkentry	= xt_rateest_mt_checkentry,
+		.destroy	= xt_rateest_mt_destroy,
+		.matchsize	= sizeof(struct xt_rateest_match_info),
+		.me		= THIS_MODULE,
+	},
+};
+
+static int __init xt_rateest_mt_init(void)
+{
+	return xt_register_matches(xt_rateest_match,
+				   ARRAY_SIZE(xt_rateest_match));
+}
+
+static void __exit xt_rateest_mt_fini(void)
+{
+	xt_unregister_matches(xt_rateest_match, ARRAY_SIZE(xt_rateest_match));
+}
+
+MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("xtables rate estimator match");
+MODULE_ALIAS("ipt_rateest");
+MODULE_ALIAS("ip6t_rateest");
+module_init(xt_rateest_mt_init);
+module_exit(xt_rateest_mt_fini);
-- 
cgit v1.2.3


From 17dfc93f6d7e5792c9c36dd70c8612721a091ae8 Mon Sep 17 00:00:00 2001
From: Maciej Soltysiak <maciej.soltysiak@ae.poznan.pl>
Date: Tue, 4 Dec 2007 23:50:38 -0800
Subject: [NETFILTER]: {ip,ip6}t_LOG: log GID

Log GID in addition to UID

Signed-off-by: Maciej Soltysiak <maciej.soltysiak@ae.poznan.pl>
Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/netfilter/ipt_LOG.c  | 4 +++-
 net/ipv6/netfilter/ip6t_LOG.c | 4 +++-
 2 files changed, 6 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/netfilter/ipt_LOG.c b/net/ipv4/netfilter/ipt_LOG.c
index fba2155ffa3..f8c613a6eb0 100644
--- a/net/ipv4/netfilter/ipt_LOG.c
+++ b/net/ipv4/netfilter/ipt_LOG.c
@@ -337,7 +337,9 @@ static void dump_packet(const struct nf_loginfo *info,
 	if ((logflags & IPT_LOG_UID) && !iphoff && skb->sk) {
 		read_lock_bh(&skb->sk->sk_callback_lock);
 		if (skb->sk->sk_socket && skb->sk->sk_socket->file)
-			printk("UID=%u ", skb->sk->sk_socket->file->f_uid);
+			printk("UID=%u GID=%u",
+				skb->sk->sk_socket->file->f_uid,
+				skb->sk->sk_socket->file->f_gid);
 		read_unlock_bh(&skb->sk->sk_callback_lock);
 	}
 
diff --git a/net/ipv6/netfilter/ip6t_LOG.c b/net/ipv6/netfilter/ip6t_LOG.c
index cd51c42727f..19523242991 100644
--- a/net/ipv6/netfilter/ip6t_LOG.c
+++ b/net/ipv6/netfilter/ip6t_LOG.c
@@ -362,7 +362,9 @@ static void dump_packet(const struct nf_loginfo *info,
 	if ((logflags & IP6T_LOG_UID) && recurse && skb->sk) {
 		read_lock_bh(&skb->sk->sk_callback_lock);
 		if (skb->sk->sk_socket && skb->sk->sk_socket->file)
-			printk("UID=%u ", skb->sk->sk_socket->file->f_uid);
+			printk("UID=%u GID=%u",
+				skb->sk->sk_socket->file->f_uid,
+				skb->sk->sk_socket->file->f_gid);
 		read_unlock_bh(&skb->sk->sk_callback_lock);
 	}
 }
-- 
cgit v1.2.3


From 193b23c5a0b270f045a4e77545e9020bfe73d5c4 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Tue, 4 Dec 2007 23:51:48 -0800
Subject: [NETFILTER]: xt_hashlimit: remove ip6tables module dependency

Switch from ipv6_find_hdr to ipv6_skip_exthdr to avoid pulling in ip6_tables
and ipv6 when only using it for IPv4.

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/netfilter/xt_hashlimit.c | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/netfilter/xt_hashlimit.c b/net/netfilter/xt_hashlimit.c
index 49a9e691dc1..951d4c82967 100644
--- a/net/netfilter/xt_hashlimit.c
+++ b/net/netfilter/xt_hashlimit.c
@@ -21,6 +21,7 @@
 #include <linux/in.h>
 #include <linux/ip.h>
 #include <linux/ipv6.h>
+#include <net/ipv6.h>
 #include <net/net_namespace.h>
 
 #include <linux/netfilter/x_tables.h>
@@ -379,7 +380,7 @@ hashlimit_init_dst(const struct xt_hashlimit_htable *hinfo,
 		   const struct sk_buff *skb, unsigned int protoff)
 {
 	__be16 _ports[2], *ports;
-	int nexthdr;
+	u8 nexthdr;
 
 	memset(dst, 0, sizeof(*dst));
 
@@ -407,8 +408,9 @@ hashlimit_init_dst(const struct xt_hashlimit_htable *hinfo,
 		if (!(hinfo->cfg.mode &
 		      (XT_HASHLIMIT_HASH_DPT | XT_HASHLIMIT_HASH_SPT)))
 			return 0;
-		nexthdr = ipv6_find_hdr(skb, &protoff, -1, NULL);
-		if (nexthdr < 0)
+		nexthdr = ipv6_hdr(skb)->nexthdr;
+		protoff = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr), &nexthdr);
+		if ((int)protoff < 0)
 			return -1;
 		break;
 #endif
-- 
cgit v1.2.3


From 1841a4c7ae106b7a3e2521db55f4d8bb8a0988d5 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Wed, 5 Dec 2007 01:22:05 -0800
Subject: [NETFILTER]: nf_ct_h323: remove ipv6 module dependency

nf_conntrack_h323 needs ip6_route_output for the call forwarding filter.
Add a ->route function to nf_afinfo and use that to avoid pulling in the
ipv6 module.

Fix the #ifdef for the IPv6 code while I'm at it - the IPv6 support is
only needed when IPv6 conntrack is enabled.

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/netfilter.c                   |  6 ++++++
 net/ipv6/netfilter.c                   |  7 +++++++
 net/netfilter/nf_conntrack_h323_main.c | 19 ++++++++++++-------
 3 files changed, 25 insertions(+), 7 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/netfilter.c b/net/ipv4/netfilter.c
index d9022467e08..599d448ef57 100644
--- a/net/ipv4/netfilter.c
+++ b/net/ipv4/netfilter.c
@@ -182,9 +182,15 @@ __sum16 nf_ip_checksum(struct sk_buff *skb, unsigned int hook,
 
 EXPORT_SYMBOL(nf_ip_checksum);
 
+static int nf_ip_route(struct dst_entry **dst, struct flowi *fl)
+{
+	return ip_route_output_key((struct rtable **)dst, fl);
+}
+
 static struct nf_afinfo nf_ip_afinfo = {
 	.family		= AF_INET,
 	.checksum	= nf_ip_checksum,
+	.route		= nf_ip_route,
 	.saveroute	= nf_ip_saveroute,
 	.reroute	= nf_ip_reroute,
 	.route_key_size	= sizeof(struct ip_rt_info),
diff --git a/net/ipv6/netfilter.c b/net/ipv6/netfilter.c
index 175e19f8025..281f732e3c9 100644
--- a/net/ipv6/netfilter.c
+++ b/net/ipv6/netfilter.c
@@ -81,6 +81,12 @@ static int nf_ip6_reroute(struct sk_buff *skb, const struct nf_info *info)
 	return 0;
 }
 
+static int nf_ip6_route(struct dst_entry **dst, struct flowi *fl)
+{
+	*dst = ip6_route_output(NULL, fl);
+	return (*dst)->error;
+}
+
 __sum16 nf_ip6_checksum(struct sk_buff *skb, unsigned int hook,
 			     unsigned int dataoff, u_int8_t protocol)
 {
@@ -118,6 +124,7 @@ EXPORT_SYMBOL(nf_ip6_checksum);
 static struct nf_afinfo nf_ip6_afinfo = {
 	.family		= AF_INET6,
 	.checksum	= nf_ip6_checksum,
+	.route		= nf_ip6_route,
 	.saveroute	= nf_ip6_saveroute,
 	.reroute	= nf_ip6_reroute,
 	.route_key_size	= sizeof(struct ip6_rt_info),
diff --git a/net/netfilter/nf_conntrack_h323_main.c b/net/netfilter/nf_conntrack_h323_main.c
index f23fd9598e1..c550257b546 100644
--- a/net/netfilter/nf_conntrack_h323_main.c
+++ b/net/netfilter/nf_conntrack_h323_main.c
@@ -708,9 +708,15 @@ static int callforward_do_filter(union nf_conntrack_address *src,
 				 union nf_conntrack_address *dst,
 				 int family)
 {
+	struct nf_afinfo *afinfo;
 	struct flowi fl1, fl2;
 	int ret = 0;
 
+	/* rcu_read_lock()ed by nf_hook_slow() */
+	afinfo = nf_get_afinfo(family);
+	if (!afinfo)
+		return 0;
+
 	memset(&fl1, 0, sizeof(fl1));
 	memset(&fl2, 0, sizeof(fl2));
 
@@ -720,8 +726,8 @@ static int callforward_do_filter(union nf_conntrack_address *src,
 
 		fl1.fl4_dst = src->ip;
 		fl2.fl4_dst = dst->ip;
-		if (ip_route_output_key(&rt1, &fl1) == 0) {
-			if (ip_route_output_key(&rt2, &fl2) == 0) {
+		if (!afinfo->route((struct dst_entry **)&rt1, &fl1)) {
+			if (!afinfo->route((struct dst_entry **)&rt2, &fl2)) {
 				if (rt1->rt_gateway == rt2->rt_gateway &&
 				    rt1->u.dst.dev  == rt2->u.dst.dev)
 					ret = 1;
@@ -731,16 +737,15 @@ static int callforward_do_filter(union nf_conntrack_address *src,
 		}
 		break;
 	}
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#if defined(CONFIG_NF_CONNTRACK_IPV6) || \
+    defined(CONFIG_NF_CONNTRACK_IPV6_MODULE)
 	case AF_INET6: {
 		struct rt6_info *rt1, *rt2;
 
 		memcpy(&fl1.fl6_dst, src, sizeof(fl1.fl6_dst));
 		memcpy(&fl2.fl6_dst, dst, sizeof(fl2.fl6_dst));
-		rt1 = (struct rt6_info *)ip6_route_output(NULL, &fl1);
-		if (rt1) {
-			rt2 = (struct rt6_info *)ip6_route_output(NULL, &fl2);
-			if (rt2) {
+		if (!afinfo->route((struct dst_entry **)&rt1, &fl1)) {
+			if (!afinfo->route((struct dst_entry **)&rt2, &fl2)) {
 				if (!memcmp(&rt1->rt6i_gateway, &rt2->rt6i_gateway,
 					    sizeof(rt1->rt6i_gateway)) &&
 				    rt1->u.dst.dev == rt2->u.dst.dev)
-- 
cgit v1.2.3


From 279c2c74b6a26fbd8c3dc100a59c3ac0ff7559fa Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Wed, 5 Dec 2007 01:22:24 -0800
Subject: [NETFILTER]: nf_conntrack_proto_icmp: kill extern declaration in .c
 file

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/netfilter/nf_conntrack_proto_icmp.c | 1 -
 1 file changed, 1 deletion(-)

(limited to 'net')

diff --git a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c
index 0e2c448ea38..4153e049498 100644
--- a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c
+++ b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c
@@ -128,7 +128,6 @@ static int icmp_new(struct nf_conn *conntrack,
 	return 1;
 }
 
-extern struct nf_conntrack_l3proto nf_conntrack_l3proto_ipv4;
 /* Returns conntrack if it dealt with ICMP, and filled in skb fields */
 static int
 icmp_error_message(struct sk_buff *skb,
-- 
cgit v1.2.3


From 41c5b317036fcb593d14b4dfd12e3318faf3af8a Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Wed, 5 Dec 2007 01:22:43 -0800
Subject: [NETFILTER]: Use nf_register_hooks for multiple registrations

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/ipvs/ip_vs_core.c | 104 +++++++++++++++++----------------------------
 net/sched/sch_ingress.c    |  44 ++++++++-----------
 2 files changed, 57 insertions(+), 91 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/ipvs/ip_vs_core.c b/net/ipv4/ipvs/ip_vs_core.c
index 30e8f757152..f5ba606f054 100644
--- a/net/ipv4/ipvs/ip_vs_core.c
+++ b/net/ipv4/ipvs/ip_vs_core.c
@@ -1025,43 +1025,42 @@ ip_vs_forward_icmp(unsigned int hooknum, struct sk_buff *skb,
 }
 
 
-/* After packet filtering, forward packet through VS/DR, VS/TUN,
-   or VS/NAT(change destination), so that filtering rules can be
-   applied to IPVS. */
-static struct nf_hook_ops ip_vs_in_ops = {
-	.hook		= ip_vs_in,
-	.owner		= THIS_MODULE,
-	.pf		= PF_INET,
-	.hooknum        = NF_INET_LOCAL_IN,
-	.priority       = 100,
-};
-
-/* After packet filtering, change source only for VS/NAT */
-static struct nf_hook_ops ip_vs_out_ops = {
-	.hook		= ip_vs_out,
-	.owner		= THIS_MODULE,
-	.pf		= PF_INET,
-	.hooknum        = NF_INET_FORWARD,
-	.priority       = 100,
-};
-
-/* After packet filtering (but before ip_vs_out_icmp), catch icmp
-   destined for 0.0.0.0/0, which is for incoming IPVS connections */
-static struct nf_hook_ops ip_vs_forward_icmp_ops = {
-	.hook		= ip_vs_forward_icmp,
-	.owner		= THIS_MODULE,
-	.pf		= PF_INET,
-	.hooknum        = NF_INET_FORWARD,
-	.priority       = 99,
-};
-
-/* Before the netfilter connection tracking, exit from POST_ROUTING */
-static struct nf_hook_ops ip_vs_post_routing_ops = {
-	.hook		= ip_vs_post_routing,
-	.owner		= THIS_MODULE,
-	.pf		= PF_INET,
-	.hooknum        = NF_INET_POST_ROUTING,
-	.priority       = NF_IP_PRI_NAT_SRC-1,
+static struct nf_hook_ops ip_vs_ops[]  = {
+	/* After packet filtering, forward packet through VS/DR, VS/TUN,
+	 * or VS/NAT(change destination), so that filtering rules can be
+	 * applied to IPVS. */
+	{
+		.hook		= ip_vs_in,
+		.owner		= THIS_MODULE,
+		.pf		= PF_INET,
+		.hooknum        = NF_INET_LOCAL_IN,
+		.priority       = 100,
+	},
+	/* After packet filtering, change source only for VS/NAT */
+	{
+		.hook		= ip_vs_out,
+		.owner		= THIS_MODULE,
+		.pf		= PF_INET,
+		.hooknum        = NF_INET_FORWARD,
+		.priority       = 100,
+	},
+	/* After packet filtering (but before ip_vs_out_icmp), catch icmp
+	 * destined for 0.0.0.0/0, which is for incoming IPVS connections */
+	{
+		.hook		= ip_vs_forward_icmp,
+		.owner		= THIS_MODULE,
+		.pf		= PF_INET,
+		.hooknum        = NF_INET_FORWARD,
+		.priority       = 99,
+	},
+	/* Before the netfilter connection tracking, exit from POST_ROUTING */
+	{
+		.hook		= ip_vs_post_routing,
+		.owner		= THIS_MODULE,
+		.pf		= PF_INET,
+		.hooknum        = NF_INET_POST_ROUTING,
+		.priority       = NF_IP_PRI_NAT_SRC-1,
+	},
 };
 
 
@@ -1092,37 +1091,15 @@ static int __init ip_vs_init(void)
 		goto cleanup_app;
 	}
 
-	ret = nf_register_hook(&ip_vs_in_ops);
+	ret = nf_register_hooks(ip_vs_ops, ARRAY_SIZE(ip_vs_ops));
 	if (ret < 0) {
-		IP_VS_ERR("can't register in hook.\n");
+		IP_VS_ERR("can't register hooks.\n");
 		goto cleanup_conn;
 	}
 
-	ret = nf_register_hook(&ip_vs_out_ops);
-	if (ret < 0) {
-		IP_VS_ERR("can't register out hook.\n");
-		goto cleanup_inops;
-	}
-	ret = nf_register_hook(&ip_vs_post_routing_ops);
-	if (ret < 0) {
-		IP_VS_ERR("can't register post_routing hook.\n");
-		goto cleanup_outops;
-	}
-	ret = nf_register_hook(&ip_vs_forward_icmp_ops);
-	if (ret < 0) {
-		IP_VS_ERR("can't register forward_icmp hook.\n");
-		goto cleanup_postroutingops;
-	}
-
 	IP_VS_INFO("ipvs loaded.\n");
 	return ret;
 
-  cleanup_postroutingops:
-	nf_unregister_hook(&ip_vs_post_routing_ops);
-  cleanup_outops:
-	nf_unregister_hook(&ip_vs_out_ops);
-  cleanup_inops:
-	nf_unregister_hook(&ip_vs_in_ops);
   cleanup_conn:
 	ip_vs_conn_cleanup();
   cleanup_app:
@@ -1136,10 +1113,7 @@ static int __init ip_vs_init(void)
 
 static void __exit ip_vs_cleanup(void)
 {
-	nf_unregister_hook(&ip_vs_forward_icmp_ops);
-	nf_unregister_hook(&ip_vs_post_routing_ops);
-	nf_unregister_hook(&ip_vs_out_ops);
-	nf_unregister_hook(&ip_vs_in_ops);
+	nf_unregister_hooks(ip_vs_ops, ARRAY_SIZE(ip_vs_ops));
 	ip_vs_conn_cleanup();
 	ip_vs_app_cleanup();
 	ip_vs_protocol_cleanup();
diff --git a/net/sched/sch_ingress.c b/net/sched/sch_ingress.c
index 902d82ea764..f4af9b604f3 100644
--- a/net/sched/sch_ingress.c
+++ b/net/sched/sch_ingress.c
@@ -231,20 +231,21 @@ ing_hook(unsigned int hook, struct sk_buff *skb,
 }
 
 /* after ipt_filter */
-static struct nf_hook_ops ing_ops = {
-	.hook           = ing_hook,
-	.owner		= THIS_MODULE,
-	.pf             = PF_INET,
-	.hooknum        = NF_INET_PRE_ROUTING,
-	.priority       = NF_IP_PRI_FILTER + 1,
-};
-
-static struct nf_hook_ops ing6_ops = {
-	.hook           = ing_hook,
-	.owner		= THIS_MODULE,
-	.pf             = PF_INET6,
-	.hooknum        = NF_INET_PRE_ROUTING,
-	.priority       = NF_IP6_PRI_FILTER + 1,
+static struct nf_hook_ops ing_ops[] = {
+	{
+		.hook           = ing_hook,
+		.owner		= THIS_MODULE,
+		.pf             = PF_INET,
+		.hooknum        = NF_INET_PRE_ROUTING,
+		.priority       = NF_IP_PRI_FILTER + 1,
+	},
+	{
+		.hook           = ing_hook,
+		.owner		= THIS_MODULE,
+		.pf             = PF_INET6,
+		.hooknum        = NF_INET_PRE_ROUTING,
+		.priority       = NF_IP6_PRI_FILTER + 1,
+	},
 };
 
 #endif
@@ -268,17 +269,11 @@ static int ingress_init(struct Qdisc *sch,struct rtattr *opt)
 #ifndef CONFIG_NET_CLS_ACT
 #ifdef CONFIG_NETFILTER
 	if (!nf_registered) {
-		if (nf_register_hook(&ing_ops) < 0) {
+		if (nf_register_hooks(ing_ops, ARRAY_SIZE(ing_ops)) < 0) {
 			printk("ingress qdisc registration error \n");
 			return -EINVAL;
 		}
 		nf_registered++;
-
-		if (nf_register_hook(&ing6_ops) < 0) {
-			printk("IPv6 ingress qdisc registration error, " \
-			    "disabling IPv6 support.\n");
-		} else
-			nf_registered++;
 	}
 #endif
 #endif
@@ -385,11 +380,8 @@ static void __exit ingress_module_exit(void)
 	unregister_qdisc(&ingress_qdisc_ops);
 #ifndef CONFIG_NET_CLS_ACT
 #ifdef CONFIG_NETFILTER
-	if (nf_registered) {
-		nf_unregister_hook(&ing_ops);
-		if (nf_registered > 1)
-			nf_unregister_hook(&ing6_ops);
-	}
+	if (nf_registered)
+		nf_unregister_hooks(ing_ops, ARRAY_SIZE(ing_ops));
 #endif
 #endif
 }
-- 
cgit v1.2.3


From 1999414a4ece2b8cea3fb3c4dc8fe06796256269 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Wed, 5 Dec 2007 01:23:00 -0800
Subject: [NETFILTER]: Mark hooks __read_mostly

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/bridge/br_netfilter.c                      | 2 +-
 net/bridge/netfilter/ebtable_filter.c          | 2 +-
 net/bridge/netfilter/ebtable_nat.c             | 2 +-
 net/decnet/netfilter/dn_rtmsg.c                | 2 +-
 net/ipv4/ipvs/ip_vs_core.c                     | 2 +-
 net/ipv4/netfilter/arptable_filter.c           | 2 +-
 net/ipv4/netfilter/ipt_CLUSTERIP.c             | 2 +-
 net/ipv4/netfilter/iptable_filter.c            | 2 +-
 net/ipv4/netfilter/iptable_mangle.c            | 2 +-
 net/ipv4/netfilter/iptable_raw.c               | 2 +-
 net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c | 2 +-
 net/ipv4/netfilter/nf_nat_standalone.c         | 2 +-
 net/ipv6/netfilter/ip6table_filter.c           | 2 +-
 net/ipv6/netfilter/ip6table_mangle.c           | 2 +-
 net/ipv6/netfilter/ip6table_raw.c              | 2 +-
 net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c | 2 +-
 net/sched/sch_ingress.c                        | 2 +-
 17 files changed, 17 insertions(+), 17 deletions(-)

(limited to 'net')

diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c
index f9ef3e58b4c..859fe4d8669 100644
--- a/net/bridge/br_netfilter.c
+++ b/net/bridge/br_netfilter.c
@@ -871,7 +871,7 @@ static unsigned int ip_sabotage_in(unsigned int hook, struct sk_buff *skb,
  * PF_BRIDGE/NF_BR_LOCAL_OUT functions don't get bridged traffic as input.
  * For br_nf_post_routing, we need (prio = NF_BR_PRI_LAST), because
  * ip_refrag() can return NF_STOLEN. */
-static struct nf_hook_ops br_nf_ops[] = {
+static struct nf_hook_ops br_nf_ops[] __read_mostly = {
 	{ .hook = br_nf_pre_routing,
 	  .owner = THIS_MODULE,
 	  .pf = PF_BRIDGE,
diff --git a/net/bridge/netfilter/ebtable_filter.c b/net/bridge/netfilter/ebtable_filter.c
index 210493f99bc..fb810908732 100644
--- a/net/bridge/netfilter/ebtable_filter.c
+++ b/net/bridge/netfilter/ebtable_filter.c
@@ -67,7 +67,7 @@ ebt_hook(unsigned int hook, struct sk_buff *skb, const struct net_device *in,
 	return ebt_do_table(hook, skb, in, out, &frame_filter);
 }
 
-static struct nf_hook_ops ebt_ops_filter[] = {
+static struct nf_hook_ops ebt_ops_filter[] __read_mostly = {
 	{
 		.hook		= ebt_hook,
 		.owner		= THIS_MODULE,
diff --git a/net/bridge/netfilter/ebtable_nat.c b/net/bridge/netfilter/ebtable_nat.c
index 3e58c2e5ee2..bc712730c54 100644
--- a/net/bridge/netfilter/ebtable_nat.c
+++ b/net/bridge/netfilter/ebtable_nat.c
@@ -74,7 +74,7 @@ ebt_nat_src(unsigned int hook, struct sk_buff *skb, const struct net_device *in
 	return ebt_do_table(hook, skb, in, out, &frame_nat);
 }
 
-static struct nf_hook_ops ebt_ops_nat[] = {
+static struct nf_hook_ops ebt_ops_nat[] __read_mostly = {
 	{
 		.hook		= ebt_nat_dst,
 		.owner		= THIS_MODULE,
diff --git a/net/decnet/netfilter/dn_rtmsg.c b/net/decnet/netfilter/dn_rtmsg.c
index 43fcd29046d..96375f2e64f 100644
--- a/net/decnet/netfilter/dn_rtmsg.c
+++ b/net/decnet/netfilter/dn_rtmsg.c
@@ -115,7 +115,7 @@ static inline void dnrmg_receive_user_skb(struct sk_buff *skb)
 	RCV_SKB_FAIL(-EINVAL);
 }
 
-static struct nf_hook_ops dnrmg_ops = {
+static struct nf_hook_ops dnrmg_ops __read_mostly = {
 	.hook		= dnrmg_hook,
 	.pf		= PF_DECnet,
 	.hooknum	= NF_DN_ROUTE,
diff --git a/net/ipv4/ipvs/ip_vs_core.c b/net/ipv4/ipvs/ip_vs_core.c
index f5ba606f054..041f5120808 100644
--- a/net/ipv4/ipvs/ip_vs_core.c
+++ b/net/ipv4/ipvs/ip_vs_core.c
@@ -1025,7 +1025,7 @@ ip_vs_forward_icmp(unsigned int hooknum, struct sk_buff *skb,
 }
 
 
-static struct nf_hook_ops ip_vs_ops[]  = {
+static struct nf_hook_ops ip_vs_ops[] __read_mostly = {
 	/* After packet filtering, forward packet through VS/DR, VS/TUN,
 	 * or VS/NAT(change destination), so that filtering rules can be
 	 * applied to IPVS. */
diff --git a/net/ipv4/netfilter/arptable_filter.c b/net/ipv4/netfilter/arptable_filter.c
index 302d3da5f69..7201511d54d 100644
--- a/net/ipv4/netfilter/arptable_filter.c
+++ b/net/ipv4/netfilter/arptable_filter.c
@@ -64,7 +64,7 @@ static unsigned int arpt_hook(unsigned int hook,
 	return arpt_do_table(skb, hook, in, out, &packet_filter);
 }
 
-static struct nf_hook_ops arpt_ops[] = {
+static struct nf_hook_ops arpt_ops[] __read_mostly = {
 	{
 		.hook		= arpt_hook,
 		.owner		= THIS_MODULE,
diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c b/net/ipv4/netfilter/ipt_CLUSTERIP.c
index b5de6bd3e43..dc1e7b41883 100644
--- a/net/ipv4/netfilter/ipt_CLUSTERIP.c
+++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c
@@ -551,7 +551,7 @@ arp_mangle(unsigned int hook,
 	return NF_ACCEPT;
 }
 
-static struct nf_hook_ops cip_arp_ops = {
+static struct nf_hook_ops cip_arp_ops __read_mostly = {
 	.hook = arp_mangle,
 	.pf = NF_ARP,
 	.hooknum = NF_ARP_OUT,
diff --git a/net/ipv4/netfilter/iptable_filter.c b/net/ipv4/netfilter/iptable_filter.c
index 06ab64e30e8..29bb4f9fbda 100644
--- a/net/ipv4/netfilter/iptable_filter.c
+++ b/net/ipv4/netfilter/iptable_filter.c
@@ -91,7 +91,7 @@ ipt_local_out_hook(unsigned int hook,
 	return ipt_do_table(skb, hook, in, out, &packet_filter);
 }
 
-static struct nf_hook_ops ipt_ops[] = {
+static struct nf_hook_ops ipt_ops[] __read_mostly = {
 	{
 		.hook		= ipt_hook,
 		.owner		= THIS_MODULE,
diff --git a/net/ipv4/netfilter/iptable_mangle.c b/net/ipv4/netfilter/iptable_mangle.c
index 0335827d3e4..5c4be202430 100644
--- a/net/ipv4/netfilter/iptable_mangle.c
+++ b/net/ipv4/netfilter/iptable_mangle.c
@@ -128,7 +128,7 @@ ipt_local_hook(unsigned int hook,
 	return ret;
 }
 
-static struct nf_hook_ops ipt_ops[] = {
+static struct nf_hook_ops ipt_ops[] __read_mostly = {
 	{
 		.hook		= ipt_route_hook,
 		.owner		= THIS_MODULE,
diff --git a/net/ipv4/netfilter/iptable_raw.c b/net/ipv4/netfilter/iptable_raw.c
index 66be2329559..dc34aa27453 100644
--- a/net/ipv4/netfilter/iptable_raw.c
+++ b/net/ipv4/netfilter/iptable_raw.c
@@ -74,7 +74,7 @@ ipt_local_hook(unsigned int hook,
 }
 
 /* 'raw' is the very first table. */
-static struct nf_hook_ops ipt_ops[] = {
+static struct nf_hook_ops ipt_ops[] __read_mostly = {
 	{
 		.hook = ipt_hook,
 		.pf = PF_INET,
diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
index c91725a8578..cd2d8451dda 100644
--- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
+++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
@@ -185,7 +185,7 @@ static unsigned int ipv4_conntrack_local(unsigned int hooknum,
 
 /* Connection tracking may drop packets, but never alters them, so
    make it the first hook. */
-static struct nf_hook_ops ipv4_conntrack_ops[] = {
+static struct nf_hook_ops ipv4_conntrack_ops[] __read_mostly = {
 	{
 		.hook		= ipv4_conntrack_defrag,
 		.owner		= THIS_MODULE,
diff --git a/net/ipv4/netfilter/nf_nat_standalone.c b/net/ipv4/netfilter/nf_nat_standalone.c
index 84172e9dcb1..a2b02f01cc5 100644
--- a/net/ipv4/netfilter/nf_nat_standalone.c
+++ b/net/ipv4/netfilter/nf_nat_standalone.c
@@ -273,7 +273,7 @@ nf_nat_adjust(unsigned int hooknum,
 
 /* We must be after connection tracking and before packet filtering. */
 
-static struct nf_hook_ops nf_nat_ops[] = {
+static struct nf_hook_ops nf_nat_ops[] __read_mostly = {
 	/* Before packet filtering, change destination */
 	{
 		.hook		= nf_nat_in,
diff --git a/net/ipv6/netfilter/ip6table_filter.c b/net/ipv6/netfilter/ip6table_filter.c
index 0ae072dd692..87d38d08aad 100644
--- a/net/ipv6/netfilter/ip6table_filter.c
+++ b/net/ipv6/netfilter/ip6table_filter.c
@@ -90,7 +90,7 @@ ip6t_local_out_hook(unsigned int hook,
 	return ip6t_do_table(skb, hook, in, out, &packet_filter);
 }
 
-static struct nf_hook_ops ip6t_ops[] = {
+static struct nf_hook_ops ip6t_ops[] __read_mostly = {
 	{
 		.hook		= ip6t_hook,
 		.owner		= THIS_MODULE,
diff --git a/net/ipv6/netfilter/ip6table_mangle.c b/net/ipv6/netfilter/ip6table_mangle.c
index 8e62b231682..d6082600bc5 100644
--- a/net/ipv6/netfilter/ip6table_mangle.c
+++ b/net/ipv6/netfilter/ip6table_mangle.c
@@ -120,7 +120,7 @@ ip6t_local_hook(unsigned int hook,
 	return ret;
 }
 
-static struct nf_hook_ops ip6t_ops[] = {
+static struct nf_hook_ops ip6t_ops[] __read_mostly = {
 	{
 		.hook		= ip6t_route_hook,
 		.owner		= THIS_MODULE,
diff --git a/net/ipv6/netfilter/ip6table_raw.c b/net/ipv6/netfilter/ip6table_raw.c
index 4fecd8de8cc..eccbaaa104a 100644
--- a/net/ipv6/netfilter/ip6table_raw.c
+++ b/net/ipv6/netfilter/ip6table_raw.c
@@ -54,7 +54,7 @@ ip6t_hook(unsigned int hook,
 	return ip6t_do_table(skb, hook, in, out, &packet_raw);
 }
 
-static struct nf_hook_ops ip6t_ops[] = {
+static struct nf_hook_ops ip6t_ops[] __read_mostly = {
 	{
 	  .hook = ip6t_hook,
 	  .pf = PF_INET6,
diff --git a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
index 50f46787fda..97a553036dd 100644
--- a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
+++ b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
@@ -258,7 +258,7 @@ static unsigned int ipv6_conntrack_local(unsigned int hooknum,
 	return ipv6_conntrack_in(hooknum, skb, in, out, okfn);
 }
 
-static struct nf_hook_ops ipv6_conntrack_ops[] = {
+static struct nf_hook_ops ipv6_conntrack_ops[] __read_mostly = {
 	{
 		.hook		= ipv6_defrag,
 		.owner		= THIS_MODULE,
diff --git a/net/sched/sch_ingress.c b/net/sched/sch_ingress.c
index f4af9b604f3..89c32a9bcc5 100644
--- a/net/sched/sch_ingress.c
+++ b/net/sched/sch_ingress.c
@@ -231,7 +231,7 @@ ing_hook(unsigned int hook, struct sk_buff *skb,
 }
 
 /* after ipt_filter */
-static struct nf_hook_ops ing_ops[] = {
+static struct nf_hook_ops ing_ops[] __read_mostly = {
 	{
 		.hook           = ing_hook,
 		.owner		= THIS_MODULE,
-- 
cgit v1.2.3


From 8b1cf0db2aced837fcd50072e81e32c5836a1ee1 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Wed, 5 Dec 2007 01:23:17 -0800
Subject: [NETFILTER]: nf_queue: minor cleanup

Clean up

if (x) y;

constructs. We've got nothing to hide :)

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/netfilter/nf_queue.c | 31 ++++++++++++++++++++-----------
 1 file changed, 20 insertions(+), 11 deletions(-)

(limited to 'net')

diff --git a/net/netfilter/nf_queue.c b/net/netfilter/nf_queue.c
index 0cef1433d66..81d010a05b9 100644
--- a/net/netfilter/nf_queue.c
+++ b/net/netfilter/nf_queue.c
@@ -138,15 +138,18 @@ static int __nf_queue(struct sk_buff *skb,
 	}
 
 	/* Bump dev refs so they don't vanish while packet is out */
-	if (indev) dev_hold(indev);
-	if (outdev) dev_hold(outdev);
-
+	if (indev)
+		dev_hold(indev);
+	if (outdev)
+		dev_hold(outdev);
 #ifdef CONFIG_BRIDGE_NETFILTER
 	if (skb->nf_bridge) {
 		physindev = skb->nf_bridge->physindev;
-		if (physindev) dev_hold(physindev);
+		if (physindev)
+			dev_hold(physindev);
 		physoutdev = skb->nf_bridge->physoutdev;
-		if (physoutdev) dev_hold(physoutdev);
+		if (physoutdev)
+			dev_hold(physoutdev);
 	}
 #endif
 	afinfo->saveroute(skb, info);
@@ -156,11 +159,15 @@ static int __nf_queue(struct sk_buff *skb,
 
 	if (status < 0) {
 		/* James M doesn't say fuck enough. */
-		if (indev) dev_put(indev);
-		if (outdev) dev_put(outdev);
+		if (indev)
+			dev_put(indev);
+		if (outdev)
+			dev_put(outdev);
 #ifdef CONFIG_BRIDGE_NETFILTER
-		if (physindev) dev_put(physindev);
-		if (physoutdev) dev_put(physoutdev);
+		if (physindev)
+			dev_put(physindev);
+		if (physoutdev)
+			dev_put(physoutdev);
 #endif
 		module_put(info->elem->owner);
 		kfree(info);
@@ -222,8 +229,10 @@ void nf_reinject(struct sk_buff *skb, struct nf_info *info,
 	rcu_read_lock();
 
 	/* Release those devices we held, or Alexey will kill me. */
-	if (info->indev) dev_put(info->indev);
-	if (info->outdev) dev_put(info->outdev);
+	if (info->indev)
+		dev_put(info->indev);
+	if (info->outdev)
+		dev_put(info->outdev);
 #ifdef CONFIG_BRIDGE_NETFILTER
 	if (skb->nf_bridge) {
 		if (skb->nf_bridge->physindev)
-- 
cgit v1.2.3


From fb46990dba94866462e90623e183d02ec591cf8f Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Wed, 5 Dec 2007 01:23:41 -0800
Subject: [NETFILTER]: nf_queue: remove unnecessary hook existance check

We hold a module reference for each queued packet, so the hook that
queued the packet can't disappear. Also remove an obsolete  comment
stating the opposite.

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/netfilter/core.c     |  5 -----
 net/netfilter/nf_queue.c | 13 -------------
 2 files changed, 18 deletions(-)

(limited to 'net')

diff --git a/net/netfilter/core.c b/net/netfilter/core.c
index e6d3a69b9e9..6819a4113e1 100644
--- a/net/netfilter/core.c
+++ b/net/netfilter/core.c
@@ -51,11 +51,6 @@ void nf_unregister_afinfo(struct nf_afinfo *afinfo)
 }
 EXPORT_SYMBOL_GPL(nf_unregister_afinfo);
 
-/* In this code, we can be waiting indefinitely for userspace to
- * service a packet if a hook returns NF_QUEUE.  We could keep a count
- * of skbuffs queued for userspace, and not deregister a hook unless
- * this is zero, but that sucks.  Now, we simply check when the
- * packets come back: if the hook is gone, the packet is discarded. */
 struct list_head nf_hooks[NPROTO][NF_MAX_HOOKS] __read_mostly;
 EXPORT_SYMBOL(nf_hooks);
 static DEFINE_MUTEX(nf_hook_mutex);
diff --git a/net/netfilter/nf_queue.c b/net/netfilter/nf_queue.c
index 81d010a05b9..0bea88c30e5 100644
--- a/net/netfilter/nf_queue.c
+++ b/net/netfilter/nf_queue.c
@@ -223,7 +223,6 @@ void nf_reinject(struct sk_buff *skb, struct nf_info *info,
 		 unsigned int verdict)
 {
 	struct list_head *elem = &info->elem->list;
-	struct list_head *i;
 	struct nf_afinfo *afinfo;
 
 	rcu_read_lock();
@@ -245,18 +244,6 @@ void nf_reinject(struct sk_buff *skb, struct nf_info *info,
 	/* Drop reference to owner of hook which queued us. */
 	module_put(info->elem->owner);
 
-	list_for_each_rcu(i, &nf_hooks[info->pf][info->hook]) {
-		if (i == elem)
-			break;
-	}
-
-	if (i == &nf_hooks[info->pf][info->hook]) {
-		/* The module which sent it to userspace is gone. */
-		NFDEBUG("%s: module disappeared, dropping packet.\n",
-			__FUNCTION__);
-		verdict = NF_DROP;
-	}
-
 	/* Continue traversal iff userspace said ok... */
 	if (verdict == NF_REPEAT) {
 		elem = elem->prev;
-- 
cgit v1.2.3


From e3ac5298159c5286cef86f0865d4fa6a606bd391 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Wed, 5 Dec 2007 01:23:57 -0800
Subject: [NETFILTER]: nf_queue: make queue_handler const

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/netfilter/ip_queue.c   |  2 +-
 net/ipv6/netfilter/ip6_queue.c  |  2 +-
 net/netfilter/nf_queue.c        | 12 ++++++------
 net/netfilter/nfnetlink_queue.c |  2 +-
 4 files changed, 9 insertions(+), 9 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/netfilter/ip_queue.c b/net/ipv4/netfilter/ip_queue.c
index 14d64a383db..062ff196f2c 100644
--- a/net/ipv4/netfilter/ip_queue.c
+++ b/net/ipv4/netfilter/ip_queue.c
@@ -645,7 +645,7 @@ static const struct file_operations ip_queue_proc_fops = {
 	.owner		= THIS_MODULE,
 };
 
-static struct nf_queue_handler nfqh = {
+static const struct nf_queue_handler nfqh = {
 	.name	= "ip_queue",
 	.outfn	= &ipq_enqueue_packet,
 };
diff --git a/net/ipv6/netfilter/ip6_queue.c b/net/ipv6/netfilter/ip6_queue.c
index e273605eef8..d6e971bd9fe 100644
--- a/net/ipv6/netfilter/ip6_queue.c
+++ b/net/ipv6/netfilter/ip6_queue.c
@@ -634,7 +634,7 @@ static const struct file_operations ip6_queue_proc_fops = {
 	.owner		= THIS_MODULE,
 };
 
-static struct nf_queue_handler nfqh = {
+static const struct nf_queue_handler nfqh = {
 	.name	= "ip6_queue",
 	.outfn	= &ipq_enqueue_packet,
 };
diff --git a/net/netfilter/nf_queue.c b/net/netfilter/nf_queue.c
index 0bea88c30e5..dd18126a1a6 100644
--- a/net/netfilter/nf_queue.c
+++ b/net/netfilter/nf_queue.c
@@ -15,13 +15,13 @@
  * long term mutex.  The handler must provide an an outfn() to accept packets
  * for queueing and must reinject all packets it receives, no matter what.
  */
-static struct nf_queue_handler *queue_handler[NPROTO];
+static const struct nf_queue_handler *queue_handler[NPROTO];
 
 static DEFINE_MUTEX(queue_handler_mutex);
 
 /* return EBUSY when somebody else is registered, return EEXIST if the
  * same handler is registered, return 0 in case of success. */
-int nf_register_queue_handler(int pf, struct nf_queue_handler *qh)
+int nf_register_queue_handler(int pf, const struct nf_queue_handler *qh)
 {
 	int ret;
 
@@ -44,7 +44,7 @@ int nf_register_queue_handler(int pf, struct nf_queue_handler *qh)
 EXPORT_SYMBOL(nf_register_queue_handler);
 
 /* The caller must flush their queue before this */
-int nf_unregister_queue_handler(int pf, struct nf_queue_handler *qh)
+int nf_unregister_queue_handler(int pf, const struct nf_queue_handler *qh)
 {
 	if (pf >= NPROTO)
 		return -EINVAL;
@@ -64,7 +64,7 @@ int nf_unregister_queue_handler(int pf, struct nf_queue_handler *qh)
 }
 EXPORT_SYMBOL(nf_unregister_queue_handler);
 
-void nf_unregister_queue_handlers(struct nf_queue_handler *qh)
+void nf_unregister_queue_handlers(const struct nf_queue_handler *qh)
 {
 	int pf;
 
@@ -98,7 +98,7 @@ static int __nf_queue(struct sk_buff *skb,
 	struct net_device *physoutdev = NULL;
 #endif
 	struct nf_afinfo *afinfo;
-	struct nf_queue_handler *qh;
+	const struct nf_queue_handler *qh;
 
 	/* QUEUE == DROP if noone is waiting, to be safe. */
 	rcu_read_lock();
@@ -313,7 +313,7 @@ static int seq_show(struct seq_file *s, void *v)
 {
 	int ret;
 	loff_t *pos = v;
-	struct nf_queue_handler *qh;
+	const struct nf_queue_handler *qh;
 
 	rcu_read_lock();
 	qh = rcu_dereference(queue_handler[*pos]);
diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c
index 3ceeffcf6f9..b75091c8ae5 100644
--- a/net/netfilter/nfnetlink_queue.c
+++ b/net/netfilter/nfnetlink_queue.c
@@ -849,7 +849,7 @@ static const struct nla_policy nfqa_cfg_policy[NFQA_CFG_MAX+1] = {
 	[NFQA_CFG_PARAMS]	= { .len = sizeof(struct nfqnl_msg_config_params) },
 };
 
-static struct nf_queue_handler nfqh = {
+static const struct nf_queue_handler nfqh = {
 	.name 	= "nf_queue",
 	.outfn	= &nfqnl_enqueue_packet,
 };
-- 
cgit v1.2.3


From f9d8928f8340ab8e76f1da4799cb19a6ff58b83d Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Wed, 5 Dec 2007 01:24:30 -0800
Subject: [NETFILTER]: nf_queue: remove unused data pointer

Remove the data pointer from struct nf_queue_handler. It has never been used
and is useless for the only handler that really matters, nfnetlink_queue,
since the handler is shared between all instances.

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/netfilter/ip_queue.c   | 2 +-
 net/ipv6/netfilter/ip6_queue.c  | 2 +-
 net/netfilter/nf_queue.c        | 2 +-
 net/netfilter/nfnetlink_queue.c | 2 +-
 4 files changed, 4 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/netfilter/ip_queue.c b/net/ipv4/netfilter/ip_queue.c
index 062ff196f2c..08e7f8b4e95 100644
--- a/net/ipv4/netfilter/ip_queue.c
+++ b/net/ipv4/netfilter/ip_queue.c
@@ -272,7 +272,7 @@ nlmsg_failure:
 
 static int
 ipq_enqueue_packet(struct sk_buff *skb, struct nf_info *info,
-		   unsigned int queuenum, void *data)
+		   unsigned int queuenum)
 {
 	int status = -EINVAL;
 	struct sk_buff *nskb;
diff --git a/net/ipv6/netfilter/ip6_queue.c b/net/ipv6/netfilter/ip6_queue.c
index d6e971bd9fe..5a9ca0d4fb2 100644
--- a/net/ipv6/netfilter/ip6_queue.c
+++ b/net/ipv6/netfilter/ip6_queue.c
@@ -269,7 +269,7 @@ nlmsg_failure:
 
 static int
 ipq_enqueue_packet(struct sk_buff *skb, struct nf_info *info,
-		   unsigned int queuenum, void *data)
+		   unsigned int queuenum)
 {
 	int status = -EINVAL;
 	struct sk_buff *nskb;
diff --git a/net/netfilter/nf_queue.c b/net/netfilter/nf_queue.c
index dd18126a1a6..c098ccbbbce 100644
--- a/net/netfilter/nf_queue.c
+++ b/net/netfilter/nf_queue.c
@@ -153,7 +153,7 @@ static int __nf_queue(struct sk_buff *skb,
 	}
 #endif
 	afinfo->saveroute(skb, info);
-	status = qh->outfn(skb, info, queuenum, qh->data);
+	status = qh->outfn(skb, info, queuenum);
 
 	rcu_read_unlock();
 
diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c
index b75091c8ae5..94ec1c263d0 100644
--- a/net/netfilter/nfnetlink_queue.c
+++ b/net/netfilter/nfnetlink_queue.c
@@ -534,7 +534,7 @@ nla_put_failure:
 
 static int
 nfqnl_enqueue_packet(struct sk_buff *skb, struct nf_info *info,
-		     unsigned int queuenum, void *data)
+		     unsigned int queuenum)
 {
 	int status = -EINVAL;
 	struct sk_buff *nskb;
-- 
cgit v1.2.3


From c01cd429fc118c5db92475c5f08b307718aa4efc Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Wed, 5 Dec 2007 01:24:48 -0800
Subject: [NETFILTER]: nf_queue: move queueing related functions/struct to
 seperate header

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/netfilter.c            | 1 +
 net/ipv4/netfilter/ip_queue.c   | 1 +
 net/ipv6/netfilter.c            | 1 +
 net/ipv6/netfilter/ip6_queue.c  | 1 +
 net/netfilter/nf_queue.c        | 1 +
 net/netfilter/nfnetlink_queue.c | 1 +
 6 files changed, 6 insertions(+)

(limited to 'net')

diff --git a/net/ipv4/netfilter.c b/net/ipv4/netfilter.c
index 599d448ef57..f7166084a5a 100644
--- a/net/ipv4/netfilter.c
+++ b/net/ipv4/netfilter.c
@@ -7,6 +7,7 @@
 #include <net/route.h>
 #include <net/xfrm.h>
 #include <net/ip.h>
+#include <net/netfilter/nf_queue.h>
 
 /* route_me_harder function, used by iptable_nat, iptable_mangle + ip_queue */
 int ip_route_me_harder(struct sk_buff *skb, unsigned addr_type)
diff --git a/net/ipv4/netfilter/ip_queue.c b/net/ipv4/netfilter/ip_queue.c
index 08e7f8b4e95..2966fbddce8 100644
--- a/net/ipv4/netfilter/ip_queue.c
+++ b/net/ipv4/netfilter/ip_queue.c
@@ -28,6 +28,7 @@
 #include <net/net_namespace.h>
 #include <net/sock.h>
 #include <net/route.h>
+#include <net/netfilter/nf_queue.h>
 
 #define IPQ_QMAX_DEFAULT 1024
 #define IPQ_PROC_FS_NAME "ip_queue"
diff --git a/net/ipv6/netfilter.c b/net/ipv6/netfilter.c
index 281f732e3c9..55ea9c6ec74 100644
--- a/net/ipv6/netfilter.c
+++ b/net/ipv6/netfilter.c
@@ -8,6 +8,7 @@
 #include <net/ip6_route.h>
 #include <net/xfrm.h>
 #include <net/ip6_checksum.h>
+#include <net/netfilter/nf_queue.h>
 
 int ip6_route_me_harder(struct sk_buff *skb)
 {
diff --git a/net/ipv6/netfilter/ip6_queue.c b/net/ipv6/netfilter/ip6_queue.c
index 5a9ca0d4fb2..7ff9915750a 100644
--- a/net/ipv6/netfilter/ip6_queue.c
+++ b/net/ipv6/netfilter/ip6_queue.c
@@ -29,6 +29,7 @@
 #include <net/sock.h>
 #include <net/ipv6.h>
 #include <net/ip6_route.h>
+#include <net/netfilter/nf_queue.h>
 #include <linux/netfilter_ipv4/ip_queue.h>
 #include <linux/netfilter_ipv4/ip_tables.h>
 #include <linux/netfilter_ipv6/ip6_tables.h>
diff --git a/net/netfilter/nf_queue.c b/net/netfilter/nf_queue.c
index c098ccbbbce..bd71f433b85 100644
--- a/net/netfilter/nf_queue.c
+++ b/net/netfilter/nf_queue.c
@@ -7,6 +7,7 @@
 #include <linux/seq_file.h>
 #include <linux/rcupdate.h>
 #include <net/protocol.h>
+#include <net/netfilter/nf_queue.h>
 
 #include "nf_internals.h"
 
diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c
index 94ec1c263d0..3a09f021065 100644
--- a/net/netfilter/nfnetlink_queue.c
+++ b/net/netfilter/nfnetlink_queue.c
@@ -27,6 +27,7 @@
 #include <linux/netfilter/nfnetlink_queue.h>
 #include <linux/list.h>
 #include <net/sock.h>
+#include <net/netfilter/nf_queue.h>
 
 #include <asm/atomic.h>
 
-- 
cgit v1.2.3


From 0ac41e81462de20f87242caac2b9084c202c33b7 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Wed, 5 Dec 2007 01:25:03 -0800
Subject: [NETFILTER]: {nf_netlink,ip,ip6}_queue: use list_for_each_entry

Use list_add_tail/list_for_each_entry instead of list_add and
list_for_each_prev as a preparation for switching to RCU.

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/netfilter/ip_queue.c   | 8 +++-----
 net/ipv6/netfilter/ip6_queue.c  | 8 +++-----
 net/netfilter/nfnetlink_queue.c | 8 +++-----
 3 files changed, 9 insertions(+), 15 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/netfilter/ip_queue.c b/net/ipv4/netfilter/ip_queue.c
index 2966fbddce8..9e72246ede2 100644
--- a/net/ipv4/netfilter/ip_queue.c
+++ b/net/ipv4/netfilter/ip_queue.c
@@ -73,7 +73,7 @@ ipq_issue_verdict(struct ipq_queue_entry *entry, int verdict)
 static inline void
 __ipq_enqueue_entry(struct ipq_queue_entry *entry)
 {
-       list_add(&entry->list, &queue_list);
+       list_add_tail(&entry->list, &queue_list);
        queue_total++;
 }
 
@@ -84,11 +84,9 @@ __ipq_enqueue_entry(struct ipq_queue_entry *entry)
 static inline struct ipq_queue_entry *
 __ipq_find_entry(ipq_cmpfn cmpfn, unsigned long data)
 {
-	struct list_head *p;
-
-	list_for_each_prev(p, &queue_list) {
-		struct ipq_queue_entry *entry = (struct ipq_queue_entry *)p;
+	struct ipq_queue_entry *entry;
 
+	list_for_each_entry(entry, &queue_list, list) {
 		if (!cmpfn || cmpfn(entry, data))
 			return entry;
 	}
diff --git a/net/ipv6/netfilter/ip6_queue.c b/net/ipv6/netfilter/ip6_queue.c
index 7ff9915750a..243a00bcd3d 100644
--- a/net/ipv6/netfilter/ip6_queue.c
+++ b/net/ipv6/netfilter/ip6_queue.c
@@ -71,7 +71,7 @@ ipq_issue_verdict(struct ipq_queue_entry *entry, int verdict)
 static inline void
 __ipq_enqueue_entry(struct ipq_queue_entry *entry)
 {
-       list_add(&entry->list, &queue_list);
+       list_add_tail(&entry->list, &queue_list);
        queue_total++;
 }
 
@@ -82,11 +82,9 @@ __ipq_enqueue_entry(struct ipq_queue_entry *entry)
 static inline struct ipq_queue_entry *
 __ipq_find_entry(ipq_cmpfn cmpfn, unsigned long data)
 {
-	struct list_head *p;
-
-	list_for_each_prev(p, &queue_list) {
-		struct ipq_queue_entry *entry = (struct ipq_queue_entry *)p;
+	struct ipq_queue_entry *entry;
 
+	list_for_each_entry(entry, &queue_list, list) {
 		if (!cmpfn || cmpfn(entry, data))
 			return entry;
 	}
diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c
index 3a09f021065..74d5ed9490a 100644
--- a/net/netfilter/nfnetlink_queue.c
+++ b/net/netfilter/nfnetlink_queue.c
@@ -231,7 +231,7 @@ static inline void
 __enqueue_entry(struct nfqnl_instance *queue,
 		      struct nfqnl_queue_entry *entry)
 {
-       list_add(&entry->list, &queue->queue_list);
+       list_add_tail(&entry->list, &queue->queue_list);
        queue->queue_total++;
 }
 
@@ -243,11 +243,9 @@ static inline struct nfqnl_queue_entry *
 __find_entry(struct nfqnl_instance *queue, nfqnl_cmpfn cmpfn,
 		   unsigned long data)
 {
-	struct list_head *p;
-
-	list_for_each_prev(p, &queue->queue_list) {
-		struct nfqnl_queue_entry *entry = (struct nfqnl_queue_entry *)p;
+	struct nfqnl_queue_entry *entry;
 
+	list_for_each_entry(entry, &queue->queue_list, list) {
 		if (!cmpfn || cmpfn(entry, data))
 			return entry;
 	}
-- 
cgit v1.2.3


From b43d8d85987bf21578e270c9f57c8503114ff399 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Wed, 5 Dec 2007 01:25:30 -0800
Subject: [NETFILTER]: nfnetlink_queue: deobfuscate entry lookups

A queue entry lookup currently looks like this:

find_dequeue_entry -> __find_dequeue_entry ->
	__find_entry -> cmpfn -> id_cmp

Use simple open-coded list walking and kill the cmpfn for
find_dequeue_entry. Instead add it to nfqnl_flush (after
similar cleanups) and use nfqnl_flush for both complete
flushes and flushing entries related to a device.

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/netfilter/nfnetlink_queue.c | 100 +++++++++++++---------------------------
 1 file changed, 31 insertions(+), 69 deletions(-)

(limited to 'net')

diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c
index 74d5ed9490a..cb901cf7577 100644
--- a/net/netfilter/nfnetlink_queue.c
+++ b/net/netfilter/nfnetlink_queue.c
@@ -171,7 +171,8 @@ out_unlock:
 	return NULL;
 }
 
-static void nfqnl_flush(struct nfqnl_instance *queue, int verdict);
+static void nfqnl_flush(struct nfqnl_instance *queue, nfqnl_cmpfn cmpfn,
+			unsigned long data);
 
 static void
 _instance_destroy2(struct nfqnl_instance *inst, int lock)
@@ -188,7 +189,7 @@ _instance_destroy2(struct nfqnl_instance *inst, int lock)
 		write_unlock_bh(&instances_lock);
 
 	/* then flush all pending skbs from the queue */
-	nfqnl_flush(inst, NF_DROP);
+	nfqnl_flush(inst, NULL, 0);
 
 	/* and finally put the refcount */
 	instance_put(inst);
@@ -235,54 +236,6 @@ __enqueue_entry(struct nfqnl_instance *queue,
        queue->queue_total++;
 }
 
-/*
- * Find and return a queued entry matched by cmpfn, or return the last
- * entry if cmpfn is NULL.
- */
-static inline struct nfqnl_queue_entry *
-__find_entry(struct nfqnl_instance *queue, nfqnl_cmpfn cmpfn,
-		   unsigned long data)
-{
-	struct nfqnl_queue_entry *entry;
-
-	list_for_each_entry(entry, &queue->queue_list, list) {
-		if (!cmpfn || cmpfn(entry, data))
-			return entry;
-	}
-	return NULL;
-}
-
-static inline void
-__dequeue_entry(struct nfqnl_instance *q, struct nfqnl_queue_entry *entry)
-{
-	list_del(&entry->list);
-	q->queue_total--;
-}
-
-static inline struct nfqnl_queue_entry *
-__find_dequeue_entry(struct nfqnl_instance *queue,
-		     nfqnl_cmpfn cmpfn, unsigned long data)
-{
-	struct nfqnl_queue_entry *entry;
-
-	entry = __find_entry(queue, cmpfn, data);
-	if (entry == NULL)
-		return NULL;
-
-	__dequeue_entry(queue, entry);
-	return entry;
-}
-
-
-static inline void
-__nfqnl_flush(struct nfqnl_instance *queue, int verdict)
-{
-	struct nfqnl_queue_entry *entry;
-
-	while ((entry = __find_dequeue_entry(queue, NULL, 0)))
-		issue_verdict(entry, verdict);
-}
-
 static inline int
 __nfqnl_set_mode(struct nfqnl_instance *queue,
 		 unsigned char mode, unsigned int range)
@@ -313,23 +266,42 @@ __nfqnl_set_mode(struct nfqnl_instance *queue,
 }
 
 static struct nfqnl_queue_entry *
-find_dequeue_entry(struct nfqnl_instance *queue,
-			 nfqnl_cmpfn cmpfn, unsigned long data)
+find_dequeue_entry(struct nfqnl_instance *queue, unsigned int id)
 {
-	struct nfqnl_queue_entry *entry;
+	struct nfqnl_queue_entry *entry = NULL, *i;
 
 	spin_lock_bh(&queue->lock);
-	entry = __find_dequeue_entry(queue, cmpfn, data);
+
+	list_for_each_entry(i, &queue->queue_list, list) {
+		if (i->id == id) {
+			entry = i;
+			break;
+		}
+	}
+
+	if (entry) {
+		list_del(&entry->list);
+		queue->queue_total--;
+	}
+
 	spin_unlock_bh(&queue->lock);
 
 	return entry;
 }
 
 static void
-nfqnl_flush(struct nfqnl_instance *queue, int verdict)
+nfqnl_flush(struct nfqnl_instance *queue, nfqnl_cmpfn cmpfn, unsigned long data)
 {
+	struct nfqnl_queue_entry *entry, *next;
+
 	spin_lock_bh(&queue->lock);
-	__nfqnl_flush(queue, verdict);
+	list_for_each_entry_safe(entry, next, &queue->queue_list, list) {
+		if (!cmpfn || cmpfn(entry, data)) {
+			list_del(&entry->list);
+			queue->queue_total--;
+			issue_verdict(entry, NF_DROP);
+		}
+	}
 	spin_unlock_bh(&queue->lock);
 }
 
@@ -644,12 +616,6 @@ nfqnl_mangle(void *data, int data_len, struct nfqnl_queue_entry *e)
 	return 0;
 }
 
-static inline int
-id_cmp(struct nfqnl_queue_entry *e, unsigned long id)
-{
-	return (id == e->id);
-}
-
 static int
 nfqnl_set_mode(struct nfqnl_instance *queue,
 	       unsigned char mode, unsigned int range)
@@ -706,12 +672,8 @@ nfqnl_dev_drop(int ifindex)
 		struct nfqnl_instance *inst;
 		struct hlist_head *head = &instance_table[i];
 
-		hlist_for_each_entry(inst, tmp, head, hlist) {
-			struct nfqnl_queue_entry *entry;
-			while ((entry = find_dequeue_entry(inst, dev_cmp,
-							   ifindex)) != NULL)
-				issue_verdict(entry, NF_DROP);
-		}
+		hlist_for_each_entry(inst, tmp, head, hlist)
+			nfqnl_flush(inst, dev_cmp, ifindex);
 	}
 
 	read_unlock_bh(&instances_lock);
@@ -811,7 +773,7 @@ nfqnl_recv_verdict(struct sock *ctnl, struct sk_buff *skb,
 		goto err_out_put;
 	}
 
-	entry = find_dequeue_entry(queue, id_cmp, ntohl(vhdr->id));
+	entry = find_dequeue_entry(queue, ntohl(vhdr->id));
 	if (entry == NULL) {
 		err = -ENOENT;
 		goto err_out_put;
-- 
cgit v1.2.3


From 9521409265d3bae939ace4c259f765c29339730f Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Wed, 5 Dec 2007 01:25:46 -0800
Subject: [NETFILTER]: ip_queue: deobfuscate entry lookups

A queue entry lookup currently looks like this:

ipq_find_dequeue_entry -> __ipq_find_dequeue_entry ->
	__ipq_find_entry -> cmpfn -> id_cmp

Use simple open-coded list walking and kill the cmpfn for
ipq_find_dequeue_entry. Instead add it to ipq_flush (after
similar cleanups) and use ipq_flush for both complete flushes
and flushing entries related to a device.

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/netfilter/ip_queue.c | 101 ++++++++++++++++--------------------------
 1 file changed, 37 insertions(+), 64 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/netfilter/ip_queue.c b/net/ipv4/netfilter/ip_queue.c
index 9e72246ede2..df2957c5bcb 100644
--- a/net/ipv4/netfilter/ip_queue.c
+++ b/net/ipv4/netfilter/ip_queue.c
@@ -77,52 +77,6 @@ __ipq_enqueue_entry(struct ipq_queue_entry *entry)
        queue_total++;
 }
 
-/*
- * Find and return a queued entry matched by cmpfn, or return the last
- * entry if cmpfn is NULL.
- */
-static inline struct ipq_queue_entry *
-__ipq_find_entry(ipq_cmpfn cmpfn, unsigned long data)
-{
-	struct ipq_queue_entry *entry;
-
-	list_for_each_entry(entry, &queue_list, list) {
-		if (!cmpfn || cmpfn(entry, data))
-			return entry;
-	}
-	return NULL;
-}
-
-static inline void
-__ipq_dequeue_entry(struct ipq_queue_entry *entry)
-{
-	list_del(&entry->list);
-	queue_total--;
-}
-
-static inline struct ipq_queue_entry *
-__ipq_find_dequeue_entry(ipq_cmpfn cmpfn, unsigned long data)
-{
-	struct ipq_queue_entry *entry;
-
-	entry = __ipq_find_entry(cmpfn, data);
-	if (entry == NULL)
-		return NULL;
-
-	__ipq_dequeue_entry(entry);
-	return entry;
-}
-
-
-static inline void
-__ipq_flush(int verdict)
-{
-	struct ipq_queue_entry *entry;
-
-	while ((entry = __ipq_find_dequeue_entry(NULL, 0)))
-		ipq_issue_verdict(entry, verdict);
-}
-
 static inline int
 __ipq_set_mode(unsigned char mode, unsigned int range)
 {
@@ -149,31 +103,59 @@ __ipq_set_mode(unsigned char mode, unsigned int range)
 	return status;
 }
 
+static void __ipq_flush(ipq_cmpfn cmpfn, unsigned long data);
+
 static inline void
 __ipq_reset(void)
 {
 	peer_pid = 0;
 	net_disable_timestamp();
 	__ipq_set_mode(IPQ_COPY_NONE, 0);
-	__ipq_flush(NF_DROP);
+	__ipq_flush(NULL, 0);
 }
 
 static struct ipq_queue_entry *
-ipq_find_dequeue_entry(ipq_cmpfn cmpfn, unsigned long data)
+ipq_find_dequeue_entry(unsigned long id)
 {
-	struct ipq_queue_entry *entry;
+	struct ipq_queue_entry *entry = NULL, *i;
 
 	write_lock_bh(&queue_lock);
-	entry = __ipq_find_dequeue_entry(cmpfn, data);
+
+	list_for_each_entry(i, &queue_list, list) {
+		if ((unsigned long)i == id) {
+			entry = i;
+			break;
+		}
+	}
+
+	if (entry) {
+		list_del(&entry->list);
+		queue_total--;
+	}
+
 	write_unlock_bh(&queue_lock);
 	return entry;
 }
 
 static void
-ipq_flush(int verdict)
+__ipq_flush(ipq_cmpfn cmpfn, unsigned long data)
+{
+	struct ipq_queue_entry *entry, *next;
+
+	list_for_each_entry_safe(entry, next, &queue_list, list) {
+		if (!cmpfn || cmpfn(entry, data)) {
+			list_del(&entry->list);
+			queue_total--;
+			ipq_issue_verdict(entry, NF_DROP);
+		}
+	}
+}
+
+static void
+ipq_flush(ipq_cmpfn cmpfn, unsigned long data)
 {
 	write_lock_bh(&queue_lock);
-	__ipq_flush(verdict);
+	__ipq_flush(cmpfn, data);
 	write_unlock_bh(&queue_lock);
 }
 
@@ -367,12 +349,6 @@ ipq_mangle_ipv4(ipq_verdict_msg_t *v, struct ipq_queue_entry *e)
 	return 0;
 }
 
-static inline int
-id_cmp(struct ipq_queue_entry *e, unsigned long id)
-{
-	return (id == (unsigned long )e);
-}
-
 static int
 ipq_set_verdict(struct ipq_verdict_msg *vmsg, unsigned int len)
 {
@@ -381,7 +357,7 @@ ipq_set_verdict(struct ipq_verdict_msg *vmsg, unsigned int len)
 	if (vmsg->value > NF_MAX_VERDICT)
 		return -EINVAL;
 
-	entry = ipq_find_dequeue_entry(id_cmp, vmsg->id);
+	entry = ipq_find_dequeue_entry(vmsg->id);
 	if (entry == NULL)
 		return -ENOENT;
 	else {
@@ -460,10 +436,7 @@ dev_cmp(struct ipq_queue_entry *entry, unsigned long ifindex)
 static void
 ipq_dev_drop(int ifindex)
 {
-	struct ipq_queue_entry *entry;
-
-	while ((entry = ipq_find_dequeue_entry(dev_cmp, ifindex)) != NULL)
-		ipq_issue_verdict(entry, NF_DROP);
+	ipq_flush(dev_cmp, ifindex);
 }
 
 #define RCV_SKB_FAIL(err) do { netlink_ack(skb, nlh, (err)); return; } while (0)
@@ -699,7 +672,7 @@ static void __exit ip_queue_fini(void)
 {
 	nf_unregister_queue_handlers(&nfqh);
 	synchronize_net();
-	ipq_flush(NF_DROP);
+	ipq_flush(NULL, 0);
 
 	unregister_sysctl_table(ipq_sysctl_header);
 	unregister_netdevice_notifier(&ipq_dev_notifier);
-- 
cgit v1.2.3


From 171b7fc4fc178a004aec8d06eb745c30ae726fb6 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Wed, 5 Dec 2007 01:26:02 -0800
Subject: [NETFILTER]: ip6_queue: deobfuscate entry lookups

A queue entry lookup currently looks like this:

ipq_find_dequeue_entry -> __ipq_find_dequeue_entry ->
	__ipq_find_entry -> cmpfn -> id_cmp

Use simple open-coded list walking and kill the cmpfn for
ipq_find_dequeue_entry. Instead add it to ipq_flush (after
similar cleanups) and use ipq_flush for both complete flushes
and flushing entries related to a device.

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/netfilter/ip6_queue.c | 101 +++++++++++++++--------------------------
 1 file changed, 37 insertions(+), 64 deletions(-)

(limited to 'net')

diff --git a/net/ipv6/netfilter/ip6_queue.c b/net/ipv6/netfilter/ip6_queue.c
index 243a00bcd3d..7d0780d02d0 100644
--- a/net/ipv6/netfilter/ip6_queue.c
+++ b/net/ipv6/netfilter/ip6_queue.c
@@ -75,52 +75,6 @@ __ipq_enqueue_entry(struct ipq_queue_entry *entry)
        queue_total++;
 }
 
-/*
- * Find and return a queued entry matched by cmpfn, or return the last
- * entry if cmpfn is NULL.
- */
-static inline struct ipq_queue_entry *
-__ipq_find_entry(ipq_cmpfn cmpfn, unsigned long data)
-{
-	struct ipq_queue_entry *entry;
-
-	list_for_each_entry(entry, &queue_list, list) {
-		if (!cmpfn || cmpfn(entry, data))
-			return entry;
-	}
-	return NULL;
-}
-
-static inline void
-__ipq_dequeue_entry(struct ipq_queue_entry *entry)
-{
-	list_del(&entry->list);
-	queue_total--;
-}
-
-static inline struct ipq_queue_entry *
-__ipq_find_dequeue_entry(ipq_cmpfn cmpfn, unsigned long data)
-{
-	struct ipq_queue_entry *entry;
-
-	entry = __ipq_find_entry(cmpfn, data);
-	if (entry == NULL)
-		return NULL;
-
-	__ipq_dequeue_entry(entry);
-	return entry;
-}
-
-
-static inline void
-__ipq_flush(int verdict)
-{
-	struct ipq_queue_entry *entry;
-
-	while ((entry = __ipq_find_dequeue_entry(NULL, 0)))
-		ipq_issue_verdict(entry, verdict);
-}
-
 static inline int
 __ipq_set_mode(unsigned char mode, unsigned int range)
 {
@@ -147,31 +101,59 @@ __ipq_set_mode(unsigned char mode, unsigned int range)
 	return status;
 }
 
+static void __ipq_flush(ipq_cmpfn cmpfn, unsigned long data);
+
 static inline void
 __ipq_reset(void)
 {
 	peer_pid = 0;
 	net_disable_timestamp();
 	__ipq_set_mode(IPQ_COPY_NONE, 0);
-	__ipq_flush(NF_DROP);
+	__ipq_flush(NULL, 0);
 }
 
 static struct ipq_queue_entry *
-ipq_find_dequeue_entry(ipq_cmpfn cmpfn, unsigned long data)
+ipq_find_dequeue_entry(unsigned long id)
 {
-	struct ipq_queue_entry *entry;
+	struct ipq_queue_entry *entry = NULL, *i;
 
 	write_lock_bh(&queue_lock);
-	entry = __ipq_find_dequeue_entry(cmpfn, data);
+
+	list_for_each_entry(i, &queue_list, list) {
+		if ((unsigned long)i == id) {
+			entry = i;
+			break;
+		}
+	}
+
+	if (entry) {
+		list_del(&entry->list);
+		queue_total--;
+	}
+
 	write_unlock_bh(&queue_lock);
 	return entry;
 }
 
 static void
-ipq_flush(int verdict)
+__ipq_flush(ipq_cmpfn cmpfn, unsigned long data)
+{
+	struct ipq_queue_entry *entry, *next;
+
+	list_for_each_entry_safe(entry, next, &queue_list, list) {
+		if (!cmpfn || cmpfn(entry, data)) {
+			list_del(&entry->list);
+			queue_total--;
+			ipq_issue_verdict(entry, NF_DROP);
+		}
+	}
+}
+
+static void
+ipq_flush(ipq_cmpfn cmpfn, unsigned long data)
 {
 	write_lock_bh(&queue_lock);
-	__ipq_flush(verdict);
+	__ipq_flush(cmpfn, data);
 	write_unlock_bh(&queue_lock);
 }
 
@@ -364,12 +346,6 @@ ipq_mangle_ipv6(ipq_verdict_msg_t *v, struct ipq_queue_entry *e)
 	return 0;
 }
 
-static inline int
-id_cmp(struct ipq_queue_entry *e, unsigned long id)
-{
-	return (id == (unsigned long )e);
-}
-
 static int
 ipq_set_verdict(struct ipq_verdict_msg *vmsg, unsigned int len)
 {
@@ -378,7 +354,7 @@ ipq_set_verdict(struct ipq_verdict_msg *vmsg, unsigned int len)
 	if (vmsg->value > NF_MAX_VERDICT)
 		return -EINVAL;
 
-	entry = ipq_find_dequeue_entry(id_cmp, vmsg->id);
+	entry = ipq_find_dequeue_entry(vmsg->id);
 	if (entry == NULL)
 		return -ENOENT;
 	else {
@@ -449,10 +425,7 @@ dev_cmp(struct ipq_queue_entry *entry, unsigned long ifindex)
 static void
 ipq_dev_drop(int ifindex)
 {
-	struct ipq_queue_entry *entry;
-
-	while ((entry = ipq_find_dequeue_entry(dev_cmp, ifindex)) != NULL)
-		ipq_issue_verdict(entry, NF_DROP);
+	ipq_flush(dev_cmp, ifindex);
 }
 
 #define RCV_SKB_FAIL(err) do { netlink_ack(skb, nlh, (err)); return; } while (0)
@@ -689,7 +662,7 @@ static void __exit ip6_queue_fini(void)
 {
 	nf_unregister_queue_handlers(&nfqh);
 	synchronize_net();
-	ipq_flush(NF_DROP);
+	ipq_flush(NULL, 0);
 
 	unregister_sysctl_table(ipq_sysctl_header);
 	unregister_netdevice_notifier(&ipq_dev_notifier);
-- 
cgit v1.2.3


From 7a6c6653b3a977087ec64d76817c7ee6e1df5b60 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Wed, 5 Dec 2007 01:26:18 -0800
Subject: [NETFILTER]: ip6_queue: resync dev-index based flushing

Resync dev_cmp to take bridge devices into account.

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/netfilter/ip6_queue.c | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/ipv6/netfilter/ip6_queue.c b/net/ipv6/netfilter/ip6_queue.c
index 7d0780d02d0..9c50cb19b39 100644
--- a/net/ipv6/netfilter/ip6_queue.c
+++ b/net/ipv6/netfilter/ip6_queue.c
@@ -418,7 +418,16 @@ dev_cmp(struct ipq_queue_entry *entry, unsigned long ifindex)
 	if (entry->info->outdev)
 		if (entry->info->outdev->ifindex == ifindex)
 			return 1;
-
+#ifdef CONFIG_BRIDGE_NETFILTER
+	if (entry->skb->nf_bridge) {
+		if (entry->skb->nf_bridge->physindev &&
+		    entry->skb->nf_bridge->physindev->ifindex == ifindex)
+			return 1;
+		if (entry->skb->nf_bridge->physoutdev &&
+		    entry->skb->nf_bridge->physoutdev->ifindex == ifindex)
+			return 1;
+	}
+#endif
 	return 0;
 }
 
-- 
cgit v1.2.3


From 02f014d88831f73b895c1fe09badb66c88e932d3 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Wed, 5 Dec 2007 01:26:33 -0800
Subject: [NETFILTER]: nf_queue: move list_head/skb/id to struct nf_info

Move common fields for queue management to struct nf_info and rename it
to struct nf_queue_entry. The avoids one allocation/free per packet and
simplifies the code a bit.

Alternatively we could add some private room at the tail, but since
all current users use identical structs this seems easier.

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/netfilter.c            | 14 ++++----
 net/ipv4/netfilter/ip_queue.c   | 68 +++++++++++++------------------------
 net/ipv6/netfilter.c            | 14 ++++----
 net/ipv6/netfilter/ip6_queue.c  | 67 +++++++++++++------------------------
 net/netfilter/nf_queue.c        | 65 ++++++++++++++++++++----------------
 net/netfilter/nfnetlink_queue.c | 74 +++++++++++++----------------------------
 6 files changed, 121 insertions(+), 181 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/netfilter.c b/net/ipv4/netfilter.c
index f7166084a5a..7bf5e4a199f 100644
--- a/net/ipv4/netfilter.c
+++ b/net/ipv4/netfilter.c
@@ -123,11 +123,12 @@ struct ip_rt_info {
 	u_int8_t tos;
 };
 
-static void nf_ip_saveroute(const struct sk_buff *skb, struct nf_info *info)
+static void nf_ip_saveroute(const struct sk_buff *skb,
+			    struct nf_queue_entry *entry)
 {
-	struct ip_rt_info *rt_info = nf_info_reroute(info);
+	struct ip_rt_info *rt_info = nf_queue_entry_reroute(entry);
 
-	if (info->hook == NF_INET_LOCAL_OUT) {
+	if (entry->hook == NF_INET_LOCAL_OUT) {
 		const struct iphdr *iph = ip_hdr(skb);
 
 		rt_info->tos = iph->tos;
@@ -136,11 +137,12 @@ static void nf_ip_saveroute(const struct sk_buff *skb, struct nf_info *info)
 	}
 }
 
-static int nf_ip_reroute(struct sk_buff *skb, const struct nf_info *info)
+static int nf_ip_reroute(struct sk_buff *skb,
+			 const struct nf_queue_entry *entry)
 {
-	const struct ip_rt_info *rt_info = nf_info_reroute(info);
+	const struct ip_rt_info *rt_info = nf_queue_entry_reroute(entry);
 
-	if (info->hook == NF_INET_LOCAL_OUT) {
+	if (entry->hook == NF_INET_LOCAL_OUT) {
 		const struct iphdr *iph = ip_hdr(skb);
 
 		if (!(iph->tos == rt_info->tos
diff --git a/net/ipv4/netfilter/ip_queue.c b/net/ipv4/netfilter/ip_queue.c
index df2957c5bcb..f1affd2344a 100644
--- a/net/ipv4/netfilter/ip_queue.c
+++ b/net/ipv4/netfilter/ip_queue.c
@@ -35,13 +35,7 @@
 #define NET_IPQ_QMAX 2088
 #define NET_IPQ_QMAX_NAME "ip_queue_maxlen"
 
-struct ipq_queue_entry {
-	struct list_head list;
-	struct nf_info *info;
-	struct sk_buff *skb;
-};
-
-typedef int (*ipq_cmpfn)(struct ipq_queue_entry *, unsigned long);
+typedef int (*ipq_cmpfn)(struct nf_queue_entry *, unsigned long);
 
 static unsigned char copy_mode __read_mostly = IPQ_COPY_NONE;
 static unsigned int queue_maxlen __read_mostly = IPQ_QMAX_DEFAULT;
@@ -56,7 +50,7 @@ static LIST_HEAD(queue_list);
 static DEFINE_MUTEX(ipqnl_mutex);
 
 static void
-ipq_issue_verdict(struct ipq_queue_entry *entry, int verdict)
+ipq_issue_verdict(struct nf_queue_entry *entry, int verdict)
 {
 	/* TCP input path (and probably other bits) assume to be called
 	 * from softirq context, not from syscall, like ipq_issue_verdict is
@@ -64,14 +58,12 @@ ipq_issue_verdict(struct ipq_queue_entry *entry, int verdict)
 	 * softirq, e.g.  We therefore emulate this by local_bh_disable() */
 
 	local_bh_disable();
-	nf_reinject(entry->skb, entry->info, verdict);
+	nf_reinject(entry, verdict);
 	local_bh_enable();
-
-	kfree(entry);
 }
 
 static inline void
-__ipq_enqueue_entry(struct ipq_queue_entry *entry)
+__ipq_enqueue_entry(struct nf_queue_entry *entry)
 {
        list_add_tail(&entry->list, &queue_list);
        queue_total++;
@@ -114,10 +106,10 @@ __ipq_reset(void)
 	__ipq_flush(NULL, 0);
 }
 
-static struct ipq_queue_entry *
+static struct nf_queue_entry *
 ipq_find_dequeue_entry(unsigned long id)
 {
-	struct ipq_queue_entry *entry = NULL, *i;
+	struct nf_queue_entry *entry = NULL, *i;
 
 	write_lock_bh(&queue_lock);
 
@@ -140,7 +132,7 @@ ipq_find_dequeue_entry(unsigned long id)
 static void
 __ipq_flush(ipq_cmpfn cmpfn, unsigned long data)
 {
-	struct ipq_queue_entry *entry, *next;
+	struct nf_queue_entry *entry, *next;
 
 	list_for_each_entry_safe(entry, next, &queue_list, list) {
 		if (!cmpfn || cmpfn(entry, data)) {
@@ -160,7 +152,7 @@ ipq_flush(ipq_cmpfn cmpfn, unsigned long data)
 }
 
 static struct sk_buff *
-ipq_build_packet_message(struct ipq_queue_entry *entry, int *errp)
+ipq_build_packet_message(struct nf_queue_entry *entry, int *errp)
 {
 	sk_buff_data_t old_tail;
 	size_t size = 0;
@@ -217,20 +209,20 @@ ipq_build_packet_message(struct ipq_queue_entry *entry, int *errp)
 	pmsg->timestamp_sec   = tv.tv_sec;
 	pmsg->timestamp_usec  = tv.tv_usec;
 	pmsg->mark            = entry->skb->mark;
-	pmsg->hook            = entry->info->hook;
+	pmsg->hook            = entry->hook;
 	pmsg->hw_protocol     = entry->skb->protocol;
 
-	if (entry->info->indev)
-		strcpy(pmsg->indev_name, entry->info->indev->name);
+	if (entry->indev)
+		strcpy(pmsg->indev_name, entry->indev->name);
 	else
 		pmsg->indev_name[0] = '\0';
 
-	if (entry->info->outdev)
-		strcpy(pmsg->outdev_name, entry->info->outdev->name);
+	if (entry->outdev)
+		strcpy(pmsg->outdev_name, entry->outdev->name);
 	else
 		pmsg->outdev_name[0] = '\0';
 
-	if (entry->info->indev && entry->skb->dev) {
+	if (entry->indev && entry->skb->dev) {
 		pmsg->hw_type = entry->skb->dev->type;
 		pmsg->hw_addrlen = dev_parse_header(entry->skb,
 						    pmsg->hw_addr);
@@ -252,28 +244,17 @@ nlmsg_failure:
 }
 
 static int
-ipq_enqueue_packet(struct sk_buff *skb, struct nf_info *info,
-		   unsigned int queuenum)
+ipq_enqueue_packet(struct nf_queue_entry *entry, unsigned int queuenum)
 {
 	int status = -EINVAL;
 	struct sk_buff *nskb;
-	struct ipq_queue_entry *entry;
 
 	if (copy_mode == IPQ_COPY_NONE)
 		return -EAGAIN;
 
-	entry = kmalloc(sizeof(*entry), GFP_ATOMIC);
-	if (entry == NULL) {
-		printk(KERN_ERR "ip_queue: OOM in ipq_enqueue_packet()\n");
-		return -ENOMEM;
-	}
-
-	entry->info = info;
-	entry->skb = skb;
-
 	nskb = ipq_build_packet_message(entry, &status);
 	if (nskb == NULL)
-		goto err_out_free;
+		return status;
 
 	write_lock_bh(&queue_lock);
 
@@ -307,14 +288,11 @@ err_out_free_nskb:
 
 err_out_unlock:
 	write_unlock_bh(&queue_lock);
-
-err_out_free:
-	kfree(entry);
 	return status;
 }
 
 static int
-ipq_mangle_ipv4(ipq_verdict_msg_t *v, struct ipq_queue_entry *e)
+ipq_mangle_ipv4(ipq_verdict_msg_t *v, struct nf_queue_entry *e)
 {
 	int diff;
 	int err;
@@ -352,7 +330,7 @@ ipq_mangle_ipv4(ipq_verdict_msg_t *v, struct ipq_queue_entry *e)
 static int
 ipq_set_verdict(struct ipq_verdict_msg *vmsg, unsigned int len)
 {
-	struct ipq_queue_entry *entry;
+	struct nf_queue_entry *entry;
 
 	if (vmsg->value > NF_MAX_VERDICT)
 		return -EINVAL;
@@ -412,13 +390,13 @@ ipq_receive_peer(struct ipq_peer_msg *pmsg,
 }
 
 static int
-dev_cmp(struct ipq_queue_entry *entry, unsigned long ifindex)
+dev_cmp(struct nf_queue_entry *entry, unsigned long ifindex)
 {
-	if (entry->info->indev)
-		if (entry->info->indev->ifindex == ifindex)
+	if (entry->indev)
+		if (entry->indev->ifindex == ifindex)
 			return 1;
-	if (entry->info->outdev)
-		if (entry->info->outdev->ifindex == ifindex)
+	if (entry->outdev)
+		if (entry->outdev->ifindex == ifindex)
 			return 1;
 #ifdef CONFIG_BRIDGE_NETFILTER
 	if (entry->skb->nf_bridge) {
diff --git a/net/ipv6/netfilter.c b/net/ipv6/netfilter.c
index 55ea9c6ec74..945e6ae1956 100644
--- a/net/ipv6/netfilter.c
+++ b/net/ipv6/netfilter.c
@@ -57,11 +57,12 @@ struct ip6_rt_info {
 	struct in6_addr saddr;
 };
 
-static void nf_ip6_saveroute(const struct sk_buff *skb, struct nf_info *info)
+static void nf_ip6_saveroute(const struct sk_buff *skb,
+			     struct nf_queue_entry *entry)
 {
-	struct ip6_rt_info *rt_info = nf_info_reroute(info);
+	struct ip6_rt_info *rt_info = nf_queue_entry_reroute(entry);
 
-	if (info->hook == NF_INET_LOCAL_OUT) {
+	if (entry->hook == NF_INET_LOCAL_OUT) {
 		struct ipv6hdr *iph = ipv6_hdr(skb);
 
 		rt_info->daddr = iph->daddr;
@@ -69,11 +70,12 @@ static void nf_ip6_saveroute(const struct sk_buff *skb, struct nf_info *info)
 	}
 }
 
-static int nf_ip6_reroute(struct sk_buff *skb, const struct nf_info *info)
+static int nf_ip6_reroute(struct sk_buff *skb,
+			  const struct nf_queue_entry *entry)
 {
-	struct ip6_rt_info *rt_info = nf_info_reroute(info);
+	struct ip6_rt_info *rt_info = nf_queue_entry_reroute(entry);
 
-	if (info->hook == NF_INET_LOCAL_OUT) {
+	if (entry->hook == NF_INET_LOCAL_OUT) {
 		struct ipv6hdr *iph = ipv6_hdr(skb);
 		if (!ipv6_addr_equal(&iph->daddr, &rt_info->daddr) ||
 		    !ipv6_addr_equal(&iph->saddr, &rt_info->saddr))
diff --git a/net/ipv6/netfilter/ip6_queue.c b/net/ipv6/netfilter/ip6_queue.c
index 9c50cb19b39..9014adae4fb 100644
--- a/net/ipv6/netfilter/ip6_queue.c
+++ b/net/ipv6/netfilter/ip6_queue.c
@@ -39,13 +39,7 @@
 #define NET_IPQ_QMAX 2088
 #define NET_IPQ_QMAX_NAME "ip6_queue_maxlen"
 
-struct ipq_queue_entry {
-	struct list_head list;
-	struct nf_info *info;
-	struct sk_buff *skb;
-};
-
-typedef int (*ipq_cmpfn)(struct ipq_queue_entry *, unsigned long);
+typedef int (*ipq_cmpfn)(struct nf_queue_entry *, unsigned long);
 
 static unsigned char copy_mode __read_mostly = IPQ_COPY_NONE;
 static unsigned int queue_maxlen __read_mostly = IPQ_QMAX_DEFAULT;
@@ -60,16 +54,15 @@ static LIST_HEAD(queue_list);
 static DEFINE_MUTEX(ipqnl_mutex);
 
 static void
-ipq_issue_verdict(struct ipq_queue_entry *entry, int verdict)
+ipq_issue_verdict(struct nf_queue_entry *entry, int verdict)
 {
 	local_bh_disable();
-	nf_reinject(entry->skb, entry->info, verdict);
+	nf_reinject(entry, verdict);
 	local_bh_enable();
-	kfree(entry);
 }
 
 static inline void
-__ipq_enqueue_entry(struct ipq_queue_entry *entry)
+__ipq_enqueue_entry(struct nf_queue_entry *entry)
 {
        list_add_tail(&entry->list, &queue_list);
        queue_total++;
@@ -112,10 +105,10 @@ __ipq_reset(void)
 	__ipq_flush(NULL, 0);
 }
 
-static struct ipq_queue_entry *
+static struct nf_queue_entry *
 ipq_find_dequeue_entry(unsigned long id)
 {
-	struct ipq_queue_entry *entry = NULL, *i;
+	struct nf_queue_entry *entry = NULL, *i;
 
 	write_lock_bh(&queue_lock);
 
@@ -138,7 +131,7 @@ ipq_find_dequeue_entry(unsigned long id)
 static void
 __ipq_flush(ipq_cmpfn cmpfn, unsigned long data)
 {
-	struct ipq_queue_entry *entry, *next;
+	struct nf_queue_entry *entry, *next;
 
 	list_for_each_entry_safe(entry, next, &queue_list, list) {
 		if (!cmpfn || cmpfn(entry, data)) {
@@ -158,7 +151,7 @@ ipq_flush(ipq_cmpfn cmpfn, unsigned long data)
 }
 
 static struct sk_buff *
-ipq_build_packet_message(struct ipq_queue_entry *entry, int *errp)
+ipq_build_packet_message(struct nf_queue_entry *entry, int *errp)
 {
 	sk_buff_data_t old_tail;
 	size_t size = 0;
@@ -215,20 +208,20 @@ ipq_build_packet_message(struct ipq_queue_entry *entry, int *errp)
 	pmsg->timestamp_sec   = tv.tv_sec;
 	pmsg->timestamp_usec  = tv.tv_usec;
 	pmsg->mark            = entry->skb->mark;
-	pmsg->hook            = entry->info->hook;
+	pmsg->hook            = entry->hook;
 	pmsg->hw_protocol     = entry->skb->protocol;
 
-	if (entry->info->indev)
-		strcpy(pmsg->indev_name, entry->info->indev->name);
+	if (entry->indev)
+		strcpy(pmsg->indev_name, entry->indev->name);
 	else
 		pmsg->indev_name[0] = '\0';
 
-	if (entry->info->outdev)
-		strcpy(pmsg->outdev_name, entry->info->outdev->name);
+	if (entry->outdev)
+		strcpy(pmsg->outdev_name, entry->outdev->name);
 	else
 		pmsg->outdev_name[0] = '\0';
 
-	if (entry->info->indev && entry->skb->dev) {
+	if (entry->indev && entry->skb->dev) {
 		pmsg->hw_type = entry->skb->dev->type;
 		pmsg->hw_addrlen = dev_parse_header(entry->skb, pmsg->hw_addr);
 	}
@@ -249,28 +242,17 @@ nlmsg_failure:
 }
 
 static int
-ipq_enqueue_packet(struct sk_buff *skb, struct nf_info *info,
-		   unsigned int queuenum)
+ipq_enqueue_packet(struct nf_queue_entry *entry, unsigned int queuenum)
 {
 	int status = -EINVAL;
 	struct sk_buff *nskb;
-	struct ipq_queue_entry *entry;
 
 	if (copy_mode == IPQ_COPY_NONE)
 		return -EAGAIN;
 
-	entry = kmalloc(sizeof(*entry), GFP_ATOMIC);
-	if (entry == NULL) {
-		printk(KERN_ERR "ip6_queue: OOM in ipq_enqueue_packet()\n");
-		return -ENOMEM;
-	}
-
-	entry->info = info;
-	entry->skb = skb;
-
 	nskb = ipq_build_packet_message(entry, &status);
 	if (nskb == NULL)
-		goto err_out_free;
+		return status;
 
 	write_lock_bh(&queue_lock);
 
@@ -304,14 +286,11 @@ err_out_free_nskb:
 
 err_out_unlock:
 	write_unlock_bh(&queue_lock);
-
-err_out_free:
-	kfree(entry);
 	return status;
 }
 
 static int
-ipq_mangle_ipv6(ipq_verdict_msg_t *v, struct ipq_queue_entry *e)
+ipq_mangle_ipv6(ipq_verdict_msg_t *v, struct nf_queue_entry *e)
 {
 	int diff;
 	int err;
@@ -349,7 +328,7 @@ ipq_mangle_ipv6(ipq_verdict_msg_t *v, struct ipq_queue_entry *e)
 static int
 ipq_set_verdict(struct ipq_verdict_msg *vmsg, unsigned int len)
 {
-	struct ipq_queue_entry *entry;
+	struct nf_queue_entry *entry;
 
 	if (vmsg->value > NF_MAX_VERDICT)
 		return -EINVAL;
@@ -409,14 +388,14 @@ ipq_receive_peer(struct ipq_peer_msg *pmsg,
 }
 
 static int
-dev_cmp(struct ipq_queue_entry *entry, unsigned long ifindex)
+dev_cmp(struct nf_queue_entry *entry, unsigned long ifindex)
 {
-	if (entry->info->indev)
-		if (entry->info->indev->ifindex == ifindex)
+	if (entry->indev)
+		if (entry->indev->ifindex == ifindex)
 			return 1;
 
-	if (entry->info->outdev)
-		if (entry->info->outdev->ifindex == ifindex)
+	if (entry->outdev)
+		if (entry->outdev->ifindex == ifindex)
 			return 1;
 #ifdef CONFIG_BRIDGE_NETFILTER
 	if (entry->skb->nf_bridge) {
diff --git a/net/netfilter/nf_queue.c b/net/netfilter/nf_queue.c
index bd71f433b85..d9d3dc4ce1a 100644
--- a/net/netfilter/nf_queue.c
+++ b/net/netfilter/nf_queue.c
@@ -93,7 +93,7 @@ static int __nf_queue(struct sk_buff *skb,
 		      unsigned int queuenum)
 {
 	int status;
-	struct nf_info *info;
+	struct nf_queue_entry *entry;
 #ifdef CONFIG_BRIDGE_NETFILTER
 	struct net_device *physindev = NULL;
 	struct net_device *physoutdev = NULL;
@@ -118,8 +118,8 @@ static int __nf_queue(struct sk_buff *skb,
 		return 1;
 	}
 
-	info = kmalloc(sizeof(*info) + afinfo->route_key_size, GFP_ATOMIC);
-	if (!info) {
+	entry = kmalloc(sizeof(*entry) + afinfo->route_key_size, GFP_ATOMIC);
+	if (!entry) {
 		if (net_ratelimit())
 			printk(KERN_ERR "OOM queueing packet %p\n",
 			       skb);
@@ -128,13 +128,20 @@ static int __nf_queue(struct sk_buff *skb,
 		return 1;
 	}
 
-	*info = (struct nf_info) {
-		(struct nf_hook_ops *)elem, pf, hook, indev, outdev, okfn };
+	*entry = (struct nf_queue_entry) {
+		.skb	= skb,
+		.elem	= list_entry(elem, struct nf_hook_ops, list),
+		.pf	= pf,
+		.hook	= hook,
+		.indev	= indev,
+		.outdev	= outdev,
+		.okfn	= okfn,
+	};
 
 	/* If it's going away, ignore hook. */
-	if (!try_module_get(info->elem->owner)) {
+	if (!try_module_get(entry->elem->owner)) {
 		rcu_read_unlock();
-		kfree(info);
+		kfree(entry);
 		return 0;
 	}
 
@@ -153,8 +160,8 @@ static int __nf_queue(struct sk_buff *skb,
 			dev_hold(physoutdev);
 	}
 #endif
-	afinfo->saveroute(skb, info);
-	status = qh->outfn(skb, info, queuenum);
+	afinfo->saveroute(skb, entry);
+	status = qh->outfn(entry, queuenum);
 
 	rcu_read_unlock();
 
@@ -170,8 +177,8 @@ static int __nf_queue(struct sk_buff *skb,
 		if (physoutdev)
 			dev_put(physoutdev);
 #endif
-		module_put(info->elem->owner);
-		kfree(info);
+		module_put(entry->elem->owner);
+		kfree(entry);
 		kfree_skb(skb);
 
 		return 1;
@@ -220,19 +227,19 @@ int nf_queue(struct sk_buff *skb,
 	return 1;
 }
 
-void nf_reinject(struct sk_buff *skb, struct nf_info *info,
-		 unsigned int verdict)
+void nf_reinject(struct nf_queue_entry *entry, unsigned int verdict)
 {
-	struct list_head *elem = &info->elem->list;
+	struct sk_buff *skb = entry->skb;
+	struct list_head *elem = &entry->elem->list;
 	struct nf_afinfo *afinfo;
 
 	rcu_read_lock();
 
 	/* Release those devices we held, or Alexey will kill me. */
-	if (info->indev)
-		dev_put(info->indev);
-	if (info->outdev)
-		dev_put(info->outdev);
+	if (entry->indev)
+		dev_put(entry->indev);
+	if (entry->outdev)
+		dev_put(entry->outdev);
 #ifdef CONFIG_BRIDGE_NETFILTER
 	if (skb->nf_bridge) {
 		if (skb->nf_bridge->physindev)
@@ -243,7 +250,7 @@ void nf_reinject(struct sk_buff *skb, struct nf_info *info,
 #endif
 
 	/* Drop reference to owner of hook which queued us. */
-	module_put(info->elem->owner);
+	module_put(entry->elem->owner);
 
 	/* Continue traversal iff userspace said ok... */
 	if (verdict == NF_REPEAT) {
@@ -252,28 +259,28 @@ void nf_reinject(struct sk_buff *skb, struct nf_info *info,
 	}
 
 	if (verdict == NF_ACCEPT) {
-		afinfo = nf_get_afinfo(info->pf);
-		if (!afinfo || afinfo->reroute(skb, info) < 0)
+		afinfo = nf_get_afinfo(entry->pf);
+		if (!afinfo || afinfo->reroute(skb, entry) < 0)
 			verdict = NF_DROP;
 	}
 
 	if (verdict == NF_ACCEPT) {
 	next_hook:
-		verdict = nf_iterate(&nf_hooks[info->pf][info->hook],
-				     skb, info->hook,
-				     info->indev, info->outdev, &elem,
-				     info->okfn, INT_MIN);
+		verdict = nf_iterate(&nf_hooks[entry->pf][entry->hook],
+				     skb, entry->hook,
+				     entry->indev, entry->outdev, &elem,
+				     entry->okfn, INT_MIN);
 	}
 
 	switch (verdict & NF_VERDICT_MASK) {
 	case NF_ACCEPT:
 	case NF_STOP:
-		info->okfn(skb);
+		entry->okfn(skb);
 	case NF_STOLEN:
 		break;
 	case NF_QUEUE:
-		if (!__nf_queue(skb, elem, info->pf, info->hook,
-				info->indev, info->outdev, info->okfn,
+		if (!__nf_queue(skb, elem, entry->pf, entry->hook,
+				entry->indev, entry->outdev, entry->okfn,
 				verdict >> NF_VERDICT_BITS))
 			goto next_hook;
 		break;
@@ -281,7 +288,7 @@ void nf_reinject(struct sk_buff *skb, struct nf_info *info,
 		kfree_skb(skb);
 	}
 	rcu_read_unlock();
-	kfree(info);
+	kfree(entry);
 	return;
 }
 EXPORT_SYMBOL(nf_reinject);
diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c
index cb901cf7577..a4937649d00 100644
--- a/net/netfilter/nfnetlink_queue.c
+++ b/net/netfilter/nfnetlink_queue.c
@@ -45,13 +45,6 @@
 #define QDEBUG(x, ...)
 #endif
 
-struct nfqnl_queue_entry {
-	struct list_head list;
-	struct nf_info *info;
-	struct sk_buff *skb;
-	unsigned int id;
-};
-
 struct nfqnl_instance {
 	struct hlist_node hlist;		/* global list of queues */
 	atomic_t use;
@@ -73,7 +66,7 @@ struct nfqnl_instance {
 	struct list_head queue_list;		/* packets in queue */
 };
 
-typedef int (*nfqnl_cmpfn)(struct nfqnl_queue_entry *, unsigned long);
+typedef int (*nfqnl_cmpfn)(struct nf_queue_entry *, unsigned long);
 
 static DEFINE_RWLOCK(instances_lock);
 
@@ -212,7 +205,7 @@ instance_destroy(struct nfqnl_instance *inst)
 
 
 static void
-issue_verdict(struct nfqnl_queue_entry *entry, int verdict)
+issue_verdict(struct nf_queue_entry *entry, int verdict)
 {
 	QDEBUG("entering for entry %p, verdict %u\n", entry, verdict);
 
@@ -222,15 +215,12 @@ issue_verdict(struct nfqnl_queue_entry *entry, int verdict)
 	 * softirq, e.g.  We therefore emulate this by local_bh_disable() */
 
 	local_bh_disable();
-	nf_reinject(entry->skb, entry->info, verdict);
+	nf_reinject(entry, verdict);
 	local_bh_enable();
-
-	kfree(entry);
 }
 
 static inline void
-__enqueue_entry(struct nfqnl_instance *queue,
-		      struct nfqnl_queue_entry *entry)
+__enqueue_entry(struct nfqnl_instance *queue, struct nf_queue_entry *entry)
 {
        list_add_tail(&entry->list, &queue->queue_list);
        queue->queue_total++;
@@ -265,10 +255,10 @@ __nfqnl_set_mode(struct nfqnl_instance *queue,
 	return status;
 }
 
-static struct nfqnl_queue_entry *
+static struct nf_queue_entry *
 find_dequeue_entry(struct nfqnl_instance *queue, unsigned int id)
 {
-	struct nfqnl_queue_entry *entry = NULL, *i;
+	struct nf_queue_entry *entry = NULL, *i;
 
 	spin_lock_bh(&queue->lock);
 
@@ -292,7 +282,7 @@ find_dequeue_entry(struct nfqnl_instance *queue, unsigned int id)
 static void
 nfqnl_flush(struct nfqnl_instance *queue, nfqnl_cmpfn cmpfn, unsigned long data)
 {
-	struct nfqnl_queue_entry *entry, *next;
+	struct nf_queue_entry *entry, *next;
 
 	spin_lock_bh(&queue->lock);
 	list_for_each_entry_safe(entry, next, &queue->queue_list, list) {
@@ -307,7 +297,7 @@ nfqnl_flush(struct nfqnl_instance *queue, nfqnl_cmpfn cmpfn, unsigned long data)
 
 static struct sk_buff *
 nfqnl_build_packet_message(struct nfqnl_instance *queue,
-			   struct nfqnl_queue_entry *entry, int *errp)
+			   struct nf_queue_entry *entry, int *errp)
 {
 	sk_buff_data_t old_tail;
 	size_t size;
@@ -316,7 +306,6 @@ nfqnl_build_packet_message(struct nfqnl_instance *queue,
 	struct nfqnl_msg_packet_hdr pmsg;
 	struct nlmsghdr *nlh;
 	struct nfgenmsg *nfmsg;
-	struct nf_info *entinf = entry->info;
 	struct sk_buff *entskb = entry->skb;
 	struct net_device *indev;
 	struct net_device *outdev;
@@ -336,7 +325,7 @@ nfqnl_build_packet_message(struct nfqnl_instance *queue,
 		+ nla_total_size(sizeof(struct nfqnl_msg_packet_hw))
 		+ nla_total_size(sizeof(struct nfqnl_msg_packet_timestamp));
 
-	outdev = entinf->outdev;
+	outdev = entry->outdev;
 
 	spin_lock_bh(&queue->lock);
 
@@ -379,23 +368,23 @@ nfqnl_build_packet_message(struct nfqnl_instance *queue,
 			NFNL_SUBSYS_QUEUE << 8 | NFQNL_MSG_PACKET,
 			sizeof(struct nfgenmsg));
 	nfmsg = NLMSG_DATA(nlh);
-	nfmsg->nfgen_family = entinf->pf;
+	nfmsg->nfgen_family = entry->pf;
 	nfmsg->version = NFNETLINK_V0;
 	nfmsg->res_id = htons(queue->queue_num);
 
 	pmsg.packet_id 		= htonl(entry->id);
 	pmsg.hw_protocol	= entskb->protocol;
-	pmsg.hook		= entinf->hook;
+	pmsg.hook		= entry->hook;
 
 	NLA_PUT(skb, NFQA_PACKET_HDR, sizeof(pmsg), &pmsg);
 
-	indev = entinf->indev;
+	indev = entry->indev;
 	if (indev) {
 		tmp_uint = htonl(indev->ifindex);
 #ifndef CONFIG_BRIDGE_NETFILTER
 		NLA_PUT(skb, NFQA_IFINDEX_INDEV, sizeof(tmp_uint), &tmp_uint);
 #else
-		if (entinf->pf == PF_BRIDGE) {
+		if (entry->pf == PF_BRIDGE) {
 			/* Case 1: indev is physical input device, we need to
 			 * look for bridge group (when called from
 			 * netfilter_bridge) */
@@ -425,7 +414,7 @@ nfqnl_build_packet_message(struct nfqnl_instance *queue,
 #ifndef CONFIG_BRIDGE_NETFILTER
 		NLA_PUT(skb, NFQA_IFINDEX_OUTDEV, sizeof(tmp_uint), &tmp_uint);
 #else
-		if (entinf->pf == PF_BRIDGE) {
+		if (entry->pf == PF_BRIDGE) {
 			/* Case 1: outdev is physical output device, we need to
 			 * look for bridge group (when called from
 			 * netfilter_bridge) */
@@ -504,13 +493,11 @@ nla_put_failure:
 }
 
 static int
-nfqnl_enqueue_packet(struct sk_buff *skb, struct nf_info *info,
-		     unsigned int queuenum)
+nfqnl_enqueue_packet(struct nf_queue_entry *entry, unsigned int queuenum)
 {
 	int status = -EINVAL;
 	struct sk_buff *nskb;
 	struct nfqnl_instance *queue;
-	struct nfqnl_queue_entry *entry;
 
 	QDEBUG("entered\n");
 
@@ -526,22 +513,11 @@ nfqnl_enqueue_packet(struct sk_buff *skb, struct nf_info *info,
 		goto err_out_put;
 	}
 
-	entry = kmalloc(sizeof(*entry), GFP_ATOMIC);
-	if (entry == NULL) {
-		if (net_ratelimit())
-			printk(KERN_ERR
-				"nf_queue: OOM in nfqnl_enqueue_packet()\n");
-		status = -ENOMEM;
-		goto err_out_put;
-	}
-
-	entry->info = info;
-	entry->skb = skb;
 	entry->id = atomic_inc_return(&queue->id_sequence);
 
 	nskb = nfqnl_build_packet_message(queue, entry, &status);
 	if (nskb == NULL)
-		goto err_out_free;
+		goto err_out_put;
 
 	spin_lock_bh(&queue->lock);
 
@@ -577,15 +553,13 @@ err_out_free_nskb:
 err_out_unlock:
 	spin_unlock_bh(&queue->lock);
 
-err_out_free:
-	kfree(entry);
 err_out_put:
 	instance_put(queue);
 	return status;
 }
 
 static int
-nfqnl_mangle(void *data, int data_len, struct nfqnl_queue_entry *e)
+nfqnl_mangle(void *data, int data_len, struct nf_queue_entry *e)
 {
 	int diff;
 	int err;
@@ -630,15 +604,13 @@ nfqnl_set_mode(struct nfqnl_instance *queue,
 }
 
 static int
-dev_cmp(struct nfqnl_queue_entry *entry, unsigned long ifindex)
+dev_cmp(struct nf_queue_entry *entry, unsigned long ifindex)
 {
-	struct nf_info *entinf = entry->info;
-
-	if (entinf->indev)
-		if (entinf->indev->ifindex == ifindex)
+	if (entry->indev)
+		if (entry->indev->ifindex == ifindex)
 			return 1;
-	if (entinf->outdev)
-		if (entinf->outdev->ifindex == ifindex)
+	if (entry->outdev)
+		if (entry->outdev->ifindex == ifindex)
 			return 1;
 #ifdef CONFIG_BRIDGE_NETFILTER
 	if (entry->skb->nf_bridge) {
@@ -748,7 +720,7 @@ nfqnl_recv_verdict(struct sock *ctnl, struct sk_buff *skb,
 	struct nfqnl_msg_verdict_hdr *vhdr;
 	struct nfqnl_instance *queue;
 	unsigned int verdict;
-	struct nfqnl_queue_entry *entry;
+	struct nf_queue_entry *entry;
 	int err;
 
 	queue = instance_lookup_get(queue_num);
-- 
cgit v1.2.3


From 4b3d15ef4a88683d93d1b76351297d2298a02a99 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Wed, 5 Dec 2007 01:27:02 -0800
Subject: [NETFILTER]: {nfnetlink,ip,ip6}_queue: kill issue_verdict

Now that issue_verdict doesn't need to free the queue entries anymore,
all it does is disable local BHs and call nf_reinject. Move the BH
disabling to the okfn invocation in nf_reinject and kill the
issue_verdict functions.

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/netfilter/ip_queue.c   | 17 ++---------------
 net/ipv6/netfilter/ip6_queue.c  | 12 ++----------
 net/netfilter/nf_queue.c        |  2 ++
 net/netfilter/nfnetlink_queue.c | 21 ++-------------------
 4 files changed, 8 insertions(+), 44 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/netfilter/ip_queue.c b/net/ipv4/netfilter/ip_queue.c
index f1affd2344a..68b12ce8ba5 100644
--- a/net/ipv4/netfilter/ip_queue.c
+++ b/net/ipv4/netfilter/ip_queue.c
@@ -49,19 +49,6 @@ static struct sock *ipqnl __read_mostly;
 static LIST_HEAD(queue_list);
 static DEFINE_MUTEX(ipqnl_mutex);
 
-static void
-ipq_issue_verdict(struct nf_queue_entry *entry, int verdict)
-{
-	/* TCP input path (and probably other bits) assume to be called
-	 * from softirq context, not from syscall, like ipq_issue_verdict is
-	 * called.  TCP input path deadlocks with locks taken from timer
-	 * softirq, e.g.  We therefore emulate this by local_bh_disable() */
-
-	local_bh_disable();
-	nf_reinject(entry, verdict);
-	local_bh_enable();
-}
-
 static inline void
 __ipq_enqueue_entry(struct nf_queue_entry *entry)
 {
@@ -138,7 +125,7 @@ __ipq_flush(ipq_cmpfn cmpfn, unsigned long data)
 		if (!cmpfn || cmpfn(entry, data)) {
 			list_del(&entry->list);
 			queue_total--;
-			ipq_issue_verdict(entry, NF_DROP);
+			nf_reinject(entry, NF_DROP);
 		}
 	}
 }
@@ -345,7 +332,7 @@ ipq_set_verdict(struct ipq_verdict_msg *vmsg, unsigned int len)
 			if (ipq_mangle_ipv4(vmsg, entry) < 0)
 				verdict = NF_DROP;
 
-		ipq_issue_verdict(entry, verdict);
+		nf_reinject(entry, verdict);
 		return 0;
 	}
 }
diff --git a/net/ipv6/netfilter/ip6_queue.c b/net/ipv6/netfilter/ip6_queue.c
index 9014adae4fb..e5b0059582f 100644
--- a/net/ipv6/netfilter/ip6_queue.c
+++ b/net/ipv6/netfilter/ip6_queue.c
@@ -53,14 +53,6 @@ static struct sock *ipqnl __read_mostly;
 static LIST_HEAD(queue_list);
 static DEFINE_MUTEX(ipqnl_mutex);
 
-static void
-ipq_issue_verdict(struct nf_queue_entry *entry, int verdict)
-{
-	local_bh_disable();
-	nf_reinject(entry, verdict);
-	local_bh_enable();
-}
-
 static inline void
 __ipq_enqueue_entry(struct nf_queue_entry *entry)
 {
@@ -137,7 +129,7 @@ __ipq_flush(ipq_cmpfn cmpfn, unsigned long data)
 		if (!cmpfn || cmpfn(entry, data)) {
 			list_del(&entry->list);
 			queue_total--;
-			ipq_issue_verdict(entry, NF_DROP);
+			nf_reinject(entry, NF_DROP);
 		}
 	}
 }
@@ -343,7 +335,7 @@ ipq_set_verdict(struct ipq_verdict_msg *vmsg, unsigned int len)
 			if (ipq_mangle_ipv6(vmsg, entry) < 0)
 				verdict = NF_DROP;
 
-		ipq_issue_verdict(entry, verdict);
+		nf_reinject(entry, verdict);
 		return 0;
 	}
 }
diff --git a/net/netfilter/nf_queue.c b/net/netfilter/nf_queue.c
index d9d3dc4ce1a..f0dc7270411 100644
--- a/net/netfilter/nf_queue.c
+++ b/net/netfilter/nf_queue.c
@@ -275,7 +275,9 @@ void nf_reinject(struct nf_queue_entry *entry, unsigned int verdict)
 	switch (verdict & NF_VERDICT_MASK) {
 	case NF_ACCEPT:
 	case NF_STOP:
+		local_bh_disable();
 		entry->okfn(skb);
+		local_bh_enable();
 	case NF_STOLEN:
 		break;
 	case NF_QUEUE:
diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c
index a4937649d00..d9ce3942af2 100644
--- a/net/netfilter/nfnetlink_queue.c
+++ b/net/netfilter/nfnetlink_queue.c
@@ -202,23 +202,6 @@ instance_destroy(struct nfqnl_instance *inst)
 	_instance_destroy2(inst, 1);
 }
 
-
-
-static void
-issue_verdict(struct nf_queue_entry *entry, int verdict)
-{
-	QDEBUG("entering for entry %p, verdict %u\n", entry, verdict);
-
-	/* TCP input path (and probably other bits) assume to be called
-	 * from softirq context, not from syscall, like issue_verdict is
-	 * called.  TCP input path deadlocks with locks taken from timer
-	 * softirq, e.g.  We therefore emulate this by local_bh_disable() */
-
-	local_bh_disable();
-	nf_reinject(entry, verdict);
-	local_bh_enable();
-}
-
 static inline void
 __enqueue_entry(struct nfqnl_instance *queue, struct nf_queue_entry *entry)
 {
@@ -289,7 +272,7 @@ nfqnl_flush(struct nfqnl_instance *queue, nfqnl_cmpfn cmpfn, unsigned long data)
 		if (!cmpfn || cmpfn(entry, data)) {
 			list_del(&entry->list);
 			queue->queue_total--;
-			issue_verdict(entry, NF_DROP);
+			nf_reinject(entry, NF_DROP);
 		}
 	}
 	spin_unlock_bh(&queue->lock);
@@ -761,7 +744,7 @@ nfqnl_recv_verdict(struct sock *ctnl, struct sk_buff *skb,
 		entry->skb->mark = ntohl(*(__be32 *)
 					 nla_data(nfqa[NFQA_MARK]));
 
-	issue_verdict(entry, verdict);
+	nf_reinject(entry, verdict);
 	instance_put(queue);
 	return 0;
 
-- 
cgit v1.2.3


From daaa8be2e0ec1c27d11e6724c8ebd8ed53914ae2 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Wed, 5 Dec 2007 01:27:19 -0800
Subject: [NETFILTER]: nf_queue: clean up error paths

Move duplicated error handling to end of function and add a helper function
to release the device and module references from the queue entry.

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/netfilter/nf_queue.c | 93 +++++++++++++++++++++---------------------------
 1 file changed, 40 insertions(+), 53 deletions(-)

(limited to 'net')

diff --git a/net/netfilter/nf_queue.c b/net/netfilter/nf_queue.c
index f0dc7270411..77965114b09 100644
--- a/net/netfilter/nf_queue.c
+++ b/net/netfilter/nf_queue.c
@@ -80,6 +80,27 @@ void nf_unregister_queue_handlers(const struct nf_queue_handler *qh)
 }
 EXPORT_SYMBOL_GPL(nf_unregister_queue_handlers);
 
+static void nf_queue_entry_release_refs(struct nf_queue_entry *entry)
+{
+	/* Release those devices we held, or Alexey will kill me. */
+	if (entry->indev)
+		dev_put(entry->indev);
+	if (entry->outdev)
+		dev_put(entry->outdev);
+#ifdef CONFIG_BRIDGE_NETFILTER
+	if (entry->skb->nf_bridge) {
+		struct nf_bridge_info *nf_bridge = entry->skb->nf_bridge;
+
+		if (nf_bridge->physindev)
+			dev_put(nf_bridge->physindev);
+		if (nf_bridge->physoutdev)
+			dev_put(nf_bridge->physoutdev);
+	}
+#endif
+	/* Drop reference to owner of hook which queued us. */
+	module_put(entry->elem->owner);
+}
+
 /*
  * Any packet that leaves via this function must come back
  * through nf_reinject().
@@ -93,10 +114,10 @@ static int __nf_queue(struct sk_buff *skb,
 		      unsigned int queuenum)
 {
 	int status;
-	struct nf_queue_entry *entry;
+	struct nf_queue_entry *entry = NULL;
 #ifdef CONFIG_BRIDGE_NETFILTER
-	struct net_device *physindev = NULL;
-	struct net_device *physoutdev = NULL;
+	struct net_device *physindev;
+	struct net_device *physoutdev;
 #endif
 	struct nf_afinfo *afinfo;
 	const struct nf_queue_handler *qh;
@@ -105,28 +126,16 @@ static int __nf_queue(struct sk_buff *skb,
 	rcu_read_lock();
 
 	qh = rcu_dereference(queue_handler[pf]);
-	if (!qh) {
-		rcu_read_unlock();
-		kfree_skb(skb);
-		return 1;
-	}
+	if (!qh)
+		goto err_unlock;
 
 	afinfo = nf_get_afinfo(pf);
-	if (!afinfo) {
-		rcu_read_unlock();
-		kfree_skb(skb);
-		return 1;
-	}
+	if (!afinfo)
+		goto err_unlock;
 
 	entry = kmalloc(sizeof(*entry) + afinfo->route_key_size, GFP_ATOMIC);
-	if (!entry) {
-		if (net_ratelimit())
-			printk(KERN_ERR "OOM queueing packet %p\n",
-			       skb);
-		rcu_read_unlock();
-		kfree_skb(skb);
-		return 1;
-	}
+	if (!entry)
+		goto err_unlock;
 
 	*entry = (struct nf_queue_entry) {
 		.skb	= skb,
@@ -166,25 +175,18 @@ static int __nf_queue(struct sk_buff *skb,
 	rcu_read_unlock();
 
 	if (status < 0) {
-		/* James M doesn't say fuck enough. */
-		if (indev)
-			dev_put(indev);
-		if (outdev)
-			dev_put(outdev);
-#ifdef CONFIG_BRIDGE_NETFILTER
-		if (physindev)
-			dev_put(physindev);
-		if (physoutdev)
-			dev_put(physoutdev);
-#endif
-		module_put(entry->elem->owner);
-		kfree(entry);
-		kfree_skb(skb);
-
-		return 1;
+		nf_queue_entry_release_refs(entry);
+		goto err;
 	}
 
 	return 1;
+
+err_unlock:
+	rcu_read_unlock();
+err:
+	kfree_skb(skb);
+	kfree(entry);
+	return 1;
 }
 
 int nf_queue(struct sk_buff *skb,
@@ -235,22 +237,7 @@ void nf_reinject(struct nf_queue_entry *entry, unsigned int verdict)
 
 	rcu_read_lock();
 
-	/* Release those devices we held, or Alexey will kill me. */
-	if (entry->indev)
-		dev_put(entry->indev);
-	if (entry->outdev)
-		dev_put(entry->outdev);
-#ifdef CONFIG_BRIDGE_NETFILTER
-	if (skb->nf_bridge) {
-		if (skb->nf_bridge->physindev)
-			dev_put(skb->nf_bridge->physindev);
-		if (skb->nf_bridge->physoutdev)
-			dev_put(skb->nf_bridge->physoutdev);
-	}
-#endif
-
-	/* Drop reference to owner of hook which queued us. */
-	module_put(entry->elem->owner);
+	nf_queue_entry_release_refs(entry);
 
 	/* Continue traversal iff userspace said ok... */
 	if (verdict == NF_REPEAT) {
-- 
cgit v1.2.3


From f9c639905018967e57ea24b07e82de9bcd76339f Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Wed, 5 Dec 2007 01:27:46 -0800
Subject: [NETFILTER]: remove annoying debugging message

Don't log "nf_hook: Verdict = QUEUE." message with NETFILTER_DEBUG=y.

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/netfilter/core.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/netfilter/core.c b/net/netfilter/core.c
index 6819a4113e1..95e18635ce7 100644
--- a/net/netfilter/core.c
+++ b/net/netfilter/core.c
@@ -178,8 +178,7 @@ next_hook:
 	} else if (verdict == NF_DROP) {
 		kfree_skb(skb);
 		ret = -EPERM;
-	} else if ((verdict & NF_VERDICT_MASK)  == NF_QUEUE) {
-		NFDEBUG("nf_hook: Verdict = QUEUE.\n");
+	} else if ((verdict & NF_VERDICT_MASK) == NF_QUEUE) {
 		if (!nf_queue(skb, elem, pf, hook, indev, outdev, okfn,
 			      verdict >> NF_VERDICT_BITS))
 			goto next_hook;
-- 
cgit v1.2.3


From e48b9b2fb383879a5d758d526b5eb8de4509f467 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Wed, 5 Dec 2007 01:28:10 -0800
Subject: [NETFILTER]: nfnetlink_queue: avoid unnecessary atomic operation

The sequence counter doesn't need to be an atomic_t, just move the increment
inside the locked section.

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/netfilter/nfnetlink_queue.c | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

(limited to 'net')

diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c
index d9ce3942af2..bd18de72e3c 100644
--- a/net/netfilter/nfnetlink_queue.c
+++ b/net/netfilter/nfnetlink_queue.c
@@ -56,7 +56,7 @@ struct nfqnl_instance {
 	unsigned int queue_dropped;
 	unsigned int queue_user_dropped;
 
-	atomic_t id_sequence;			/* 'sequence' of pkt ids */
+	unsigned int id_sequence;		/* 'sequence' of pkt ids */
 
 	u_int16_t queue_num;			/* number of this queue */
 	u_int8_t copy_mode;
@@ -139,7 +139,6 @@ instance_create(u_int16_t queue_num, int pid)
 	inst->queue_maxlen = NFQNL_QMAX_DEFAULT;
 	inst->copy_range = 0xfffff;
 	inst->copy_mode = NFQNL_COPY_NONE;
-	atomic_set(&inst->id_sequence, 0);
 	/* needs to be two, since we _put() after creation */
 	atomic_set(&inst->use, 2);
 	spin_lock_init(&inst->lock);
@@ -340,6 +339,8 @@ nfqnl_build_packet_message(struct nfqnl_instance *queue,
 		return NULL;
 	}
 
+	entry->id = queue->id_sequence++;
+
 	spin_unlock_bh(&queue->lock);
 
 	skb = alloc_skb(size, GFP_ATOMIC);
@@ -496,8 +497,6 @@ nfqnl_enqueue_packet(struct nf_queue_entry *entry, unsigned int queuenum)
 		goto err_out_put;
 	}
 
-	entry->id = atomic_inc_return(&queue->id_sequence);
-
 	nskb = nfqnl_build_packet_message(queue, entry, &status);
 	if (nskb == NULL)
 		goto err_out_put;
@@ -948,7 +947,7 @@ static int seq_show(struct seq_file *s, void *v)
 			  inst->peer_pid, inst->queue_total,
 			  inst->copy_mode, inst->copy_range,
 			  inst->queue_dropped, inst->queue_user_dropped,
-			  atomic_read(&inst->id_sequence),
+			  inst->id_sequence,
 			  atomic_read(&inst->use));
 }
 
-- 
cgit v1.2.3


From a3c8e7fd4b36bf6e12fef432cfa8a001dc0b7a26 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Wed, 5 Dec 2007 01:28:30 -0800
Subject: [NETFILTER]: nfnetlink_queue: fix checks in nfqnl_recv_config

The peer_pid must be checked in all cases when a queue exists, currently
it is not checked if for NFQA_CFG_QUEUE_MAXLEN when a NFQA_CFG_CMD
attribute exists in some cases. Same for the queue existance check,
which can cause a NULL pointer dereference.

Also consistently return -ENODEV for "queue not found". -ENOENT would
be better, but that is already used to indicate a queued skb id doesn't
exist.

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/netfilter/nfnetlink_queue.c | 31 ++++++++++++-------------------
 1 file changed, 12 insertions(+), 19 deletions(-)

(limited to 'net')

diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c
index bd18de72e3c..4abf62a6c05 100644
--- a/net/netfilter/nfnetlink_queue.c
+++ b/net/netfilter/nfnetlink_queue.c
@@ -781,8 +781,14 @@ nfqnl_recv_config(struct sock *ctnl, struct sk_buff *skb,
 	QDEBUG("entering for msg %u\n", NFNL_MSG_TYPE(nlh->nlmsg_type));
 
 	queue = instance_lookup_get(queue_num);
+	if (queue && queue->peer_pid != NETLINK_CB(skb).pid) {
+		ret = -EPERM;
+		goto out_put;
+	}
+
 	if (nfqa[NFQA_CFG_CMD]) {
 		struct nfqnl_msg_config_cmd *cmd;
+
 		cmd = nla_data(nfqa[NFQA_CFG_CMD]);
 		QDEBUG("found CFG_CMD\n");
 
@@ -798,12 +804,6 @@ nfqnl_recv_config(struct sock *ctnl, struct sk_buff *skb,
 		case NFQNL_CFG_CMD_UNBIND:
 			if (!queue)
 				return -ENODEV;
-
-			if (queue->peer_pid != NETLINK_CB(skb).pid) {
-				ret = -EPERM;
-				goto out_put;
-			}
-
 			instance_destroy(queue);
 			break;
 		case NFQNL_CFG_CMD_PF_BIND:
@@ -820,25 +820,13 @@ nfqnl_recv_config(struct sock *ctnl, struct sk_buff *skb,
 			ret = -EINVAL;
 			break;
 		}
-	} else {
-		if (!queue) {
-			QDEBUG("no config command, and no instance ENOENT\n");
-			ret = -ENOENT;
-			goto out_put;
-		}
-
-		if (queue->peer_pid != NETLINK_CB(skb).pid) {
-			QDEBUG("no config command, and wrong pid\n");
-			ret = -EPERM;
-			goto out_put;
-		}
 	}
 
 	if (nfqa[NFQA_CFG_PARAMS]) {
 		struct nfqnl_msg_config_params *params;
 
 		if (!queue) {
-			ret = -ENOENT;
+			ret = -ENODEV;
 			goto out_put;
 		}
 		params = nla_data(nfqa[NFQA_CFG_PARAMS]);
@@ -848,6 +836,11 @@ nfqnl_recv_config(struct sock *ctnl, struct sk_buff *skb,
 
 	if (nfqa[NFQA_CFG_QUEUE_MAXLEN]) {
 		__be32 *queue_maxlen;
+
+		if (!queue) {
+			ret = -ENODEV;
+			goto out_put;
+		}
 		queue_maxlen = nla_data(nfqa[NFQA_CFG_QUEUE_MAXLEN]);
 		spin_lock_bh(&queue->lock);
 		queue->queue_maxlen = ntohl(*queue_maxlen);
-- 
cgit v1.2.3


From 9872bec773c2e8503fec480c1e8a0c732517e257 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Wed, 5 Dec 2007 01:28:50 -0800
Subject: [NETFILTER]: nfnetlink: use RCU for queue instances hash

Use RCU for queue instances hash. Avoids multiple atomic operations
for each packet.

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/netfilter/nfnetlink_queue.c | 205 ++++++++++++++++++----------------------
 1 file changed, 92 insertions(+), 113 deletions(-)

(limited to 'net')

diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c
index 4abf62a6c05..449b880042a 100644
--- a/net/netfilter/nfnetlink_queue.c
+++ b/net/netfilter/nfnetlink_queue.c
@@ -47,7 +47,7 @@
 
 struct nfqnl_instance {
 	struct hlist_node hlist;		/* global list of queues */
-	atomic_t use;
+	struct rcu_head rcu;
 
 	int peer_pid;
 	unsigned int queue_maxlen;
@@ -68,7 +68,7 @@ struct nfqnl_instance {
 
 typedef int (*nfqnl_cmpfn)(struct nf_queue_entry *, unsigned long);
 
-static DEFINE_RWLOCK(instances_lock);
+static DEFINE_SPINLOCK(instances_lock);
 
 #define INSTANCE_BUCKETS	16
 static struct hlist_head instance_table[INSTANCE_BUCKETS];
@@ -79,52 +79,30 @@ static inline u_int8_t instance_hashfn(u_int16_t queue_num)
 }
 
 static struct nfqnl_instance *
-__instance_lookup(u_int16_t queue_num)
+instance_lookup(u_int16_t queue_num)
 {
 	struct hlist_head *head;
 	struct hlist_node *pos;
 	struct nfqnl_instance *inst;
 
 	head = &instance_table[instance_hashfn(queue_num)];
-	hlist_for_each_entry(inst, pos, head, hlist) {
+	hlist_for_each_entry_rcu(inst, pos, head, hlist) {
 		if (inst->queue_num == queue_num)
 			return inst;
 	}
 	return NULL;
 }
 
-static struct nfqnl_instance *
-instance_lookup_get(u_int16_t queue_num)
-{
-	struct nfqnl_instance *inst;
-
-	read_lock_bh(&instances_lock);
-	inst = __instance_lookup(queue_num);
-	if (inst)
-		atomic_inc(&inst->use);
-	read_unlock_bh(&instances_lock);
-
-	return inst;
-}
-
-static void
-instance_put(struct nfqnl_instance *inst)
-{
-	if (inst && atomic_dec_and_test(&inst->use)) {
-		QDEBUG("kfree(inst=%p)\n", inst);
-		kfree(inst);
-	}
-}
-
 static struct nfqnl_instance *
 instance_create(u_int16_t queue_num, int pid)
 {
 	struct nfqnl_instance *inst;
+	unsigned int h;
 
 	QDEBUG("entering for queue_num=%u, pid=%d\n", queue_num, pid);
 
-	write_lock_bh(&instances_lock);
-	if (__instance_lookup(queue_num)) {
+	spin_lock(&instances_lock);
+	if (instance_lookup(queue_num)) {
 		inst = NULL;
 		QDEBUG("aborting, instance already exists\n");
 		goto out_unlock;
@@ -139,18 +117,17 @@ instance_create(u_int16_t queue_num, int pid)
 	inst->queue_maxlen = NFQNL_QMAX_DEFAULT;
 	inst->copy_range = 0xfffff;
 	inst->copy_mode = NFQNL_COPY_NONE;
-	/* needs to be two, since we _put() after creation */
-	atomic_set(&inst->use, 2);
 	spin_lock_init(&inst->lock);
 	INIT_LIST_HEAD(&inst->queue_list);
+	INIT_RCU_HEAD(&inst->rcu);
 
 	if (!try_module_get(THIS_MODULE))
 		goto out_free;
 
-	hlist_add_head(&inst->hlist,
-		       &instance_table[instance_hashfn(queue_num)]);
+	h = instance_hashfn(queue_num);
+	hlist_add_head_rcu(&inst->hlist, &instance_table[h]);
 
-	write_unlock_bh(&instances_lock);
+	spin_unlock(&instances_lock);
 
 	QDEBUG("successfully created new instance\n");
 
@@ -159,7 +136,7 @@ instance_create(u_int16_t queue_num, int pid)
 out_free:
 	kfree(inst);
 out_unlock:
-	write_unlock_bh(&instances_lock);
+	spin_unlock(&instances_lock);
 	return NULL;
 }
 
@@ -167,38 +144,29 @@ static void nfqnl_flush(struct nfqnl_instance *queue, nfqnl_cmpfn cmpfn,
 			unsigned long data);
 
 static void
-_instance_destroy2(struct nfqnl_instance *inst, int lock)
+instance_destroy_rcu(struct rcu_head *head)
 {
-	/* first pull it out of the global list */
-	if (lock)
-		write_lock_bh(&instances_lock);
-
-	QDEBUG("removing instance %p (queuenum=%u) from hash\n",
-		inst, inst->queue_num);
-	hlist_del(&inst->hlist);
+	struct nfqnl_instance *inst = container_of(head, struct nfqnl_instance,
+						   rcu);
 
-	if (lock)
-		write_unlock_bh(&instances_lock);
-
-	/* then flush all pending skbs from the queue */
 	nfqnl_flush(inst, NULL, 0);
-
-	/* and finally put the refcount */
-	instance_put(inst);
-
+	kfree(inst);
 	module_put(THIS_MODULE);
 }
 
-static inline void
+static void
 __instance_destroy(struct nfqnl_instance *inst)
 {
-	_instance_destroy2(inst, 0);
+	hlist_del_rcu(&inst->hlist);
+	call_rcu(&inst->rcu, instance_destroy_rcu);
 }
 
-static inline void
+static void
 instance_destroy(struct nfqnl_instance *inst)
 {
-	_instance_destroy2(inst, 1);
+	spin_lock(&instances_lock);
+	__instance_destroy(inst);
+	spin_unlock(&instances_lock);
 }
 
 static inline void
@@ -485,7 +453,8 @@ nfqnl_enqueue_packet(struct nf_queue_entry *entry, unsigned int queuenum)
 
 	QDEBUG("entered\n");
 
-	queue = instance_lookup_get(queuenum);
+	/* rcu_read_lock()ed by nf_hook_slow() */
+	queue = instance_lookup(queuenum);
 	if (!queue) {
 		QDEBUG("no queue instance matching\n");
 		return -EINVAL;
@@ -493,13 +462,12 @@ nfqnl_enqueue_packet(struct nf_queue_entry *entry, unsigned int queuenum)
 
 	if (queue->copy_mode == NFQNL_COPY_NONE) {
 		QDEBUG("mode COPY_NONE, aborting\n");
-		status = -EAGAIN;
-		goto err_out_put;
+		return -EAGAIN;
 	}
 
 	nskb = nfqnl_build_packet_message(queue, entry, &status);
 	if (nskb == NULL)
-		goto err_out_put;
+		return status;
 
 	spin_lock_bh(&queue->lock);
 
@@ -526,7 +494,6 @@ nfqnl_enqueue_packet(struct nf_queue_entry *entry, unsigned int queuenum)
 	__enqueue_entry(queue, entry);
 
 	spin_unlock_bh(&queue->lock);
-	instance_put(queue);
 	return status;
 
 err_out_free_nskb:
@@ -534,9 +501,6 @@ err_out_free_nskb:
 
 err_out_unlock:
 	spin_unlock_bh(&queue->lock);
-
-err_out_put:
-	instance_put(queue);
 	return status;
 }
 
@@ -616,21 +580,18 @@ nfqnl_dev_drop(int ifindex)
 
 	QDEBUG("entering for ifindex %u\n", ifindex);
 
-	/* this only looks like we have to hold the readlock for a way too long
-	 * time, issue_verdict(),  nf_reinject(), ... - but we always only
-	 * issue NF_DROP, which is processed directly in nf_reinject() */
-	read_lock_bh(&instances_lock);
+	rcu_read_lock();
 
-	for  (i = 0; i < INSTANCE_BUCKETS; i++) {
+	for (i = 0; i < INSTANCE_BUCKETS; i++) {
 		struct hlist_node *tmp;
 		struct nfqnl_instance *inst;
 		struct hlist_head *head = &instance_table[i];
 
-		hlist_for_each_entry(inst, tmp, head, hlist)
+		hlist_for_each_entry_rcu(inst, tmp, head, hlist)
 			nfqnl_flush(inst, dev_cmp, ifindex);
 	}
 
-	read_unlock_bh(&instances_lock);
+	rcu_read_unlock();
 }
 
 #define RCV_SKB_FAIL(err) do { netlink_ack(skb, nlh, (err)); return; } while (0)
@@ -665,8 +626,8 @@ nfqnl_rcv_nl_event(struct notifier_block *this,
 		int i;
 
 		/* destroy all instances for this pid */
-		write_lock_bh(&instances_lock);
-		for  (i = 0; i < INSTANCE_BUCKETS; i++) {
+		spin_lock(&instances_lock);
+		for (i = 0; i < INSTANCE_BUCKETS; i++) {
 			struct hlist_node *tmp, *t2;
 			struct nfqnl_instance *inst;
 			struct hlist_head *head = &instance_table[i];
@@ -677,7 +638,7 @@ nfqnl_rcv_nl_event(struct notifier_block *this,
 					__instance_destroy(inst);
 			}
 		}
-		write_unlock_bh(&instances_lock);
+		spin_unlock(&instances_lock);
 	}
 	return NOTIFY_DONE;
 }
@@ -705,18 +666,21 @@ nfqnl_recv_verdict(struct sock *ctnl, struct sk_buff *skb,
 	struct nf_queue_entry *entry;
 	int err;
 
-	queue = instance_lookup_get(queue_num);
-	if (!queue)
-		return -ENODEV;
+	rcu_read_lock();
+	queue = instance_lookup(queue_num);
+	if (!queue) {
+		err = -ENODEV;
+		goto err_out_unlock;
+	}
 
 	if (queue->peer_pid != NETLINK_CB(skb).pid) {
 		err = -EPERM;
-		goto err_out_put;
+		goto err_out_unlock;
 	}
 
 	if (!nfqa[NFQA_VERDICT_HDR]) {
 		err = -EINVAL;
-		goto err_out_put;
+		goto err_out_unlock;
 	}
 
 	vhdr = nla_data(nfqa[NFQA_VERDICT_HDR]);
@@ -724,14 +688,15 @@ nfqnl_recv_verdict(struct sock *ctnl, struct sk_buff *skb,
 
 	if ((verdict & NF_VERDICT_MASK) > NF_MAX_VERDICT) {
 		err = -EINVAL;
-		goto err_out_put;
+		goto err_out_unlock;
 	}
 
 	entry = find_dequeue_entry(queue, ntohl(vhdr->id));
 	if (entry == NULL) {
 		err = -ENOENT;
-		goto err_out_put;
+		goto err_out_unlock;
 	}
+	rcu_read_unlock();
 
 	if (nfqa[NFQA_PAYLOAD]) {
 		if (nfqnl_mangle(nla_data(nfqa[NFQA_PAYLOAD]),
@@ -744,11 +709,10 @@ nfqnl_recv_verdict(struct sock *ctnl, struct sk_buff *skb,
 					 nla_data(nfqa[NFQA_MARK]));
 
 	nf_reinject(entry, verdict);
-	instance_put(queue);
 	return 0;
 
-err_out_put:
-	instance_put(queue);
+err_out_unlock:
+	rcu_read_unlock();
 	return err;
 }
 
@@ -776,45 +740,61 @@ nfqnl_recv_config(struct sock *ctnl, struct sk_buff *skb,
 	struct nfgenmsg *nfmsg = NLMSG_DATA(nlh);
 	u_int16_t queue_num = ntohs(nfmsg->res_id);
 	struct nfqnl_instance *queue;
+	struct nfqnl_msg_config_cmd *cmd = NULL;
 	int ret = 0;
 
 	QDEBUG("entering for msg %u\n", NFNL_MSG_TYPE(nlh->nlmsg_type));
 
-	queue = instance_lookup_get(queue_num);
+	if (nfqa[NFQA_CFG_CMD]) {
+		cmd = nla_data(nfqa[NFQA_CFG_CMD]);
+
+		/* Commands without queue context - might sleep */
+		switch (cmd->command) {
+		case NFQNL_CFG_CMD_PF_BIND:
+			ret = nf_register_queue_handler(ntohs(cmd->pf),
+							&nfqh);
+			break;
+		case NFQNL_CFG_CMD_PF_UNBIND:
+			ret = nf_unregister_queue_handler(ntohs(cmd->pf),
+							  &nfqh);
+			break;
+		default:
+			break;
+		}
+
+		if (ret < 0)
+			return ret;
+	}
+
+	rcu_read_lock();
+	queue = instance_lookup(queue_num);
 	if (queue && queue->peer_pid != NETLINK_CB(skb).pid) {
 		ret = -EPERM;
-		goto out_put;
+		goto err_out_unlock;
 	}
 
-	if (nfqa[NFQA_CFG_CMD]) {
-		struct nfqnl_msg_config_cmd *cmd;
-
-		cmd = nla_data(nfqa[NFQA_CFG_CMD]);
-		QDEBUG("found CFG_CMD\n");
-
+	if (cmd != NULL) {
 		switch (cmd->command) {
 		case NFQNL_CFG_CMD_BIND:
-			if (queue)
-				return -EBUSY;
-
+			if (queue) {
+				ret = -EBUSY;
+				goto err_out_unlock;
+			}
 			queue = instance_create(queue_num, NETLINK_CB(skb).pid);
-			if (!queue)
-				return -EINVAL;
+			if (!queue) {
+				ret = -EINVAL;
+				goto err_out_unlock;
+			}
 			break;
 		case NFQNL_CFG_CMD_UNBIND:
-			if (!queue)
-				return -ENODEV;
+			if (!queue) {
+				ret = -ENODEV;
+				goto err_out_unlock;
+			}
 			instance_destroy(queue);
 			break;
 		case NFQNL_CFG_CMD_PF_BIND:
-			QDEBUG("registering queue handler for pf=%u\n",
-				ntohs(cmd->pf));
-			ret = nf_register_queue_handler(ntohs(cmd->pf), &nfqh);
-			break;
 		case NFQNL_CFG_CMD_PF_UNBIND:
-			QDEBUG("unregistering queue handler for pf=%u\n",
-				ntohs(cmd->pf));
-			ret = nf_unregister_queue_handler(ntohs(cmd->pf), &nfqh);
 			break;
 		default:
 			ret = -EINVAL;
@@ -827,7 +807,7 @@ nfqnl_recv_config(struct sock *ctnl, struct sk_buff *skb,
 
 		if (!queue) {
 			ret = -ENODEV;
-			goto out_put;
+			goto err_out_unlock;
 		}
 		params = nla_data(nfqa[NFQA_CFG_PARAMS]);
 		nfqnl_set_mode(queue, params->copy_mode,
@@ -839,7 +819,7 @@ nfqnl_recv_config(struct sock *ctnl, struct sk_buff *skb,
 
 		if (!queue) {
 			ret = -ENODEV;
-			goto out_put;
+			goto err_out_unlock;
 		}
 		queue_maxlen = nla_data(nfqa[NFQA_CFG_QUEUE_MAXLEN]);
 		spin_lock_bh(&queue->lock);
@@ -847,8 +827,8 @@ nfqnl_recv_config(struct sock *ctnl, struct sk_buff *skb,
 		spin_unlock_bh(&queue->lock);
 	}
 
-out_put:
-	instance_put(queue);
+err_out_unlock:
+	rcu_read_unlock();
 	return ret;
 }
 
@@ -916,7 +896,7 @@ static struct hlist_node *get_idx(struct seq_file *seq, loff_t pos)
 
 static void *seq_start(struct seq_file *seq, loff_t *pos)
 {
-	read_lock_bh(&instances_lock);
+	spin_lock(&instances_lock);
 	return get_idx(seq, *pos);
 }
 
@@ -928,7 +908,7 @@ static void *seq_next(struct seq_file *s, void *v, loff_t *pos)
 
 static void seq_stop(struct seq_file *s, void *v)
 {
-	read_unlock_bh(&instances_lock);
+	spin_unlock(&instances_lock);
 }
 
 static int seq_show(struct seq_file *s, void *v)
@@ -940,8 +920,7 @@ static int seq_show(struct seq_file *s, void *v)
 			  inst->peer_pid, inst->queue_total,
 			  inst->copy_mode, inst->copy_range,
 			  inst->queue_dropped, inst->queue_user_dropped,
-			  inst->id_sequence,
-			  atomic_read(&inst->use));
+			  inst->id_sequence, 1);
 }
 
 static const struct seq_operations nfqnl_seq_ops = {
-- 
cgit v1.2.3


From c5de0dfde8b0c5ea3d8b3040db1967f9c1d9748e Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Wed, 5 Dec 2007 01:29:05 -0800
Subject: [NETFILTER]: nfnetlink_queue: kill useless wrapper

nfqnl_set_mode takes the queue lock and calls __nfqnl_set_mode. Just move
the code from __nfqnl_set_mode to nfqnl_set_mode since there is no other
user.

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/netfilter/nfnetlink_queue.c | 52 +++++++++++++++++------------------------
 1 file changed, 21 insertions(+), 31 deletions(-)

(limited to 'net')

diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c
index 449b880042a..37b7655df91 100644
--- a/net/netfilter/nfnetlink_queue.c
+++ b/net/netfilter/nfnetlink_queue.c
@@ -176,35 +176,6 @@ __enqueue_entry(struct nfqnl_instance *queue, struct nf_queue_entry *entry)
        queue->queue_total++;
 }
 
-static inline int
-__nfqnl_set_mode(struct nfqnl_instance *queue,
-		 unsigned char mode, unsigned int range)
-{
-	int status = 0;
-
-	switch (mode) {
-	case NFQNL_COPY_NONE:
-	case NFQNL_COPY_META:
-		queue->copy_mode = mode;
-		queue->copy_range = 0;
-		break;
-
-	case NFQNL_COPY_PACKET:
-		queue->copy_mode = mode;
-		/* we're using struct nlattr which has 16bit nla_len */
-		if (range > 0xffff)
-			queue->copy_range = 0xffff;
-		else
-			queue->copy_range = range;
-		break;
-
-	default:
-		status = -EINVAL;
-
-	}
-	return status;
-}
-
 static struct nf_queue_entry *
 find_dequeue_entry(struct nfqnl_instance *queue, unsigned int id)
 {
@@ -540,10 +511,29 @@ static int
 nfqnl_set_mode(struct nfqnl_instance *queue,
 	       unsigned char mode, unsigned int range)
 {
-	int status;
+	int status = 0;
 
 	spin_lock_bh(&queue->lock);
-	status = __nfqnl_set_mode(queue, mode, range);
+	switch (mode) {
+	case NFQNL_COPY_NONE:
+	case NFQNL_COPY_META:
+		queue->copy_mode = mode;
+		queue->copy_range = 0;
+		break;
+
+	case NFQNL_COPY_PACKET:
+		queue->copy_mode = mode;
+		/* we're using struct nlattr which has 16bit nla_len */
+		if (range > 0xffff)
+			queue->copy_range = 0xffff;
+		else
+			queue->copy_range = range;
+		break;
+
+	default:
+		status = -EINVAL;
+
+	}
 	spin_unlock_bh(&queue->lock);
 
 	return status;
-- 
cgit v1.2.3


From 2bd0119729cb4eac88c6161b3e1a3c3ebbb4768e Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Wed, 5 Dec 2007 01:29:23 -0800
Subject: [NETFILTER]: nfnetlink_queue: remove useless debugging

Originally I wanted to just remove the QDEBUG macro and use pr_debug, but
none of the messages seems worth keeping.

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/netfilter/nfnetlink_queue.c | 35 ++++-------------------------------
 1 file changed, 4 insertions(+), 31 deletions(-)

(limited to 'net')

diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c
index 37b7655df91..c3aba1e8c5c 100644
--- a/net/netfilter/nfnetlink_queue.c
+++ b/net/netfilter/nfnetlink_queue.c
@@ -37,14 +37,6 @@
 
 #define NFQNL_QMAX_DEFAULT 1024
 
-#if 0
-#define QDEBUG(x, args ...)	printk(KERN_DEBUG "%s(%d):%s():	" x, 	   \
-					__FILE__, __LINE__, __FUNCTION__,  \
-					## args)
-#else
-#define QDEBUG(x, ...)
-#endif
-
 struct nfqnl_instance {
 	struct hlist_node hlist;		/* global list of queues */
 	struct rcu_head rcu;
@@ -96,17 +88,12 @@ instance_lookup(u_int16_t queue_num)
 static struct nfqnl_instance *
 instance_create(u_int16_t queue_num, int pid)
 {
-	struct nfqnl_instance *inst;
+	struct nfqnl_instance *inst = NULL;
 	unsigned int h;
 
-	QDEBUG("entering for queue_num=%u, pid=%d\n", queue_num, pid);
-
 	spin_lock(&instances_lock);
-	if (instance_lookup(queue_num)) {
-		inst = NULL;
-		QDEBUG("aborting, instance already exists\n");
+	if (instance_lookup(queue_num))
 		goto out_unlock;
-	}
 
 	inst = kzalloc(sizeof(*inst), GFP_ATOMIC);
 	if (!inst)
@@ -129,8 +116,6 @@ instance_create(u_int16_t queue_num, int pid)
 
 	spin_unlock(&instances_lock);
 
-	QDEBUG("successfully created new instance\n");
-
 	return inst;
 
 out_free:
@@ -232,8 +217,6 @@ nfqnl_build_packet_message(struct nfqnl_instance *queue,
 	struct net_device *outdev;
 	__be32 tmp_uint;
 
-	QDEBUG("entered\n");
-
 	size =    NLMSG_ALIGN(sizeof(struct nfgenmsg))
 		+ nla_total_size(sizeof(struct nfqnl_msg_packet_hdr))
 		+ nla_total_size(sizeof(u_int32_t))	/* ifindex */
@@ -422,19 +405,13 @@ nfqnl_enqueue_packet(struct nf_queue_entry *entry, unsigned int queuenum)
 	struct sk_buff *nskb;
 	struct nfqnl_instance *queue;
 
-	QDEBUG("entered\n");
-
 	/* rcu_read_lock()ed by nf_hook_slow() */
 	queue = instance_lookup(queuenum);
-	if (!queue) {
-		QDEBUG("no queue instance matching\n");
+	if (!queue)
 		return -EINVAL;
-	}
 
-	if (queue->copy_mode == NFQNL_COPY_NONE) {
-		QDEBUG("mode COPY_NONE, aborting\n");
+	if (queue->copy_mode == NFQNL_COPY_NONE)
 		return -EAGAIN;
-	}
 
 	nskb = nfqnl_build_packet_message(queue, entry, &status);
 	if (nskb == NULL)
@@ -568,8 +545,6 @@ nfqnl_dev_drop(int ifindex)
 {
 	int i;
 
-	QDEBUG("entering for ifindex %u\n", ifindex);
-
 	rcu_read_lock();
 
 	for (i = 0; i < INSTANCE_BUCKETS; i++) {
@@ -733,8 +708,6 @@ nfqnl_recv_config(struct sock *ctnl, struct sk_buff *skb,
 	struct nfqnl_msg_config_cmd *cmd = NULL;
 	int ret = 0;
 
-	QDEBUG("entering for msg %u\n", NFNL_MSG_TYPE(nlh->nlmsg_type));
-
 	if (nfqa[NFQA_CFG_CMD]) {
 		cmd = nla_data(nfqa[NFQA_CFG_CMD]);
 
-- 
cgit v1.2.3


From 9d6023ab8b97f5074f9007c92d38fef9ae04e56b Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Wed, 5 Dec 2007 01:29:38 -0800
Subject: [NETFILTER]: nfnetlink_queue: mark hash table __read_mostly

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/netfilter/nfnetlink_queue.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c
index c3aba1e8c5c..de48fd3d8a3 100644
--- a/net/netfilter/nfnetlink_queue.c
+++ b/net/netfilter/nfnetlink_queue.c
@@ -63,7 +63,7 @@ typedef int (*nfqnl_cmpfn)(struct nf_queue_entry *, unsigned long);
 static DEFINE_SPINLOCK(instances_lock);
 
 #define INSTANCE_BUCKETS	16
-static struct hlist_head instance_table[INSTANCE_BUCKETS];
+static struct hlist_head instance_table[INSTANCE_BUCKETS] __read_mostly;
 
 static inline u_int8_t instance_hashfn(u_int16_t queue_num)
 {
-- 
cgit v1.2.3


From ea3a66ff5ae41daa1d1aa789140a70415eaf7b46 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Wed, 5 Dec 2007 01:30:02 -0800
Subject: [NETFILTER]: nfnetlink_queue: use endianness-aware attribute
 functions

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/netfilter/nfnetlink_queue.c | 60 ++++++++++++++++-------------------------
 1 file changed, 23 insertions(+), 37 deletions(-)

(limited to 'net')

diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c
index de48fd3d8a3..436b442d4ed 100644
--- a/net/netfilter/nfnetlink_queue.c
+++ b/net/netfilter/nfnetlink_queue.c
@@ -215,7 +215,6 @@ nfqnl_build_packet_message(struct nfqnl_instance *queue,
 	struct sk_buff *entskb = entry->skb;
 	struct net_device *indev;
 	struct net_device *outdev;
-	__be32 tmp_uint;
 
 	size =    NLMSG_ALIGN(sizeof(struct nfgenmsg))
 		+ nla_total_size(sizeof(struct nfqnl_msg_packet_hdr))
@@ -286,69 +285,57 @@ nfqnl_build_packet_message(struct nfqnl_instance *queue,
 
 	indev = entry->indev;
 	if (indev) {
-		tmp_uint = htonl(indev->ifindex);
 #ifndef CONFIG_BRIDGE_NETFILTER
-		NLA_PUT(skb, NFQA_IFINDEX_INDEV, sizeof(tmp_uint), &tmp_uint);
+		NLA_PUT_BE32(skb, NFQA_IFINDEX_INDEV, htonl(indev->ifindex));
 #else
 		if (entry->pf == PF_BRIDGE) {
 			/* Case 1: indev is physical input device, we need to
 			 * look for bridge group (when called from
 			 * netfilter_bridge) */
-			NLA_PUT(skb, NFQA_IFINDEX_PHYSINDEV, sizeof(tmp_uint),
-				&tmp_uint);
+			NLA_PUT_BE32(skb, NFQA_IFINDEX_PHYSINDEV,
+				     htonl(indev->ifindex));
 			/* this is the bridge group "brX" */
-			tmp_uint = htonl(indev->br_port->br->dev->ifindex);
-			NLA_PUT(skb, NFQA_IFINDEX_INDEV, sizeof(tmp_uint),
-				&tmp_uint);
+			NLA_PUT_BE32(skb, NFQA_IFINDEX_INDEV,
+				     htonl(indev->br_port->br->dev->ifindex));
 		} else {
 			/* Case 2: indev is bridge group, we need to look for
 			 * physical device (when called from ipv4) */
-			NLA_PUT(skb, NFQA_IFINDEX_INDEV, sizeof(tmp_uint),
-				&tmp_uint);
-			if (entskb->nf_bridge
-			    && entskb->nf_bridge->physindev) {
-				tmp_uint = htonl(entskb->nf_bridge->physindev->ifindex);
-				NLA_PUT(skb, NFQA_IFINDEX_PHYSINDEV,
-					sizeof(tmp_uint), &tmp_uint);
-			}
+			NLA_PUT_BE32(skb, NFQA_IFINDEX_INDEV,
+				     htonl(indev->ifindex));
+			if (entskb->nf_bridge && entskb->nf_bridge->physindev)
+				NLA_PUT_BE32(skb, NFQA_IFINDEX_PHYSINDEV,
+					     htonl(entskb->nf_bridge->physindev->ifindex));
 		}
 #endif
 	}
 
 	if (outdev) {
-		tmp_uint = htonl(outdev->ifindex);
 #ifndef CONFIG_BRIDGE_NETFILTER
-		NLA_PUT(skb, NFQA_IFINDEX_OUTDEV, sizeof(tmp_uint), &tmp_uint);
+		NLA_PUT_BE32(skb, NFQA_IFINDEX_OUTDEV, htonl(outdev->ifindex));
 #else
 		if (entry->pf == PF_BRIDGE) {
 			/* Case 1: outdev is physical output device, we need to
 			 * look for bridge group (when called from
 			 * netfilter_bridge) */
-			NLA_PUT(skb, NFQA_IFINDEX_PHYSOUTDEV, sizeof(tmp_uint),
-				&tmp_uint);
+			NLA_PUT_BE32(skb, NFQA_IFINDEX_PHYSOUTDEV,
+				     htonl(outdev->ifindex));
 			/* this is the bridge group "brX" */
-			tmp_uint = htonl(outdev->br_port->br->dev->ifindex);
-			NLA_PUT(skb, NFQA_IFINDEX_OUTDEV, sizeof(tmp_uint),
-				&tmp_uint);
+			NLA_PUT_BE32(skb, NFQA_IFINDEX_OUTDEV,
+				     htonl(outdev->br_port->br->dev->ifindex));
 		} else {
 			/* Case 2: outdev is bridge group, we need to look for
 			 * physical output device (when called from ipv4) */
-			NLA_PUT(skb, NFQA_IFINDEX_OUTDEV, sizeof(tmp_uint),
-				&tmp_uint);
-			if (entskb->nf_bridge
-			    && entskb->nf_bridge->physoutdev) {
-				tmp_uint = htonl(entskb->nf_bridge->physoutdev->ifindex);
-				NLA_PUT(skb, NFQA_IFINDEX_PHYSOUTDEV,
-					sizeof(tmp_uint), &tmp_uint);
-			}
+			NLA_PUT_BE32(skb, NFQA_IFINDEX_OUTDEV,
+				     htonl(outdev->ifindex));
+			if (entskb->nf_bridge && entskb->nf_bridge->physoutdev)
+				NLA_PUT_BE32(skb, NFQA_IFINDEX_PHYSOUTDEV,
+					     htonl(entskb->nf_bridge->physoutdev->ifindex));
 		}
 #endif
 	}
 
-	if (entskb->mark) {
-		tmp_uint = htonl(entskb->mark);
-		NLA_PUT(skb, NFQA_MARK, sizeof(u_int32_t), &tmp_uint);
-	}
+	if (entskb->mark)
+		NLA_PUT_BE32(skb, NFQA_MARK, htonl(entskb->mark));
 
 	if (indev && entskb->dev) {
 		struct nfqnl_msg_packet_hw phw;
@@ -670,8 +657,7 @@ nfqnl_recv_verdict(struct sock *ctnl, struct sk_buff *skb,
 	}
 
 	if (nfqa[NFQA_MARK])
-		entry->skb->mark = ntohl(*(__be32 *)
-					 nla_data(nfqa[NFQA_MARK]));
+		entry->skb->mark = ntohl(nla_get_be32(nfqa[NFQA_MARK]));
 
 	nf_reinject(entry, verdict);
 	return 0;
-- 
cgit v1.2.3


From 861934c7c888973da8bf621b3959e408531539e1 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Wed, 5 Dec 2007 01:30:29 -0800
Subject: [NETFILTER]: nfnetlink_queue: eliminate impossible switch case

We don't need a default case in nfqnl_build_packet_message(), the
copy_mode is validated when it is set. Tell the compiler about
the possible types and remove the default case. Saves 80b of
text on x86_64.

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/netfilter/nfnetlink_queue.c | 7 +------
 1 file changed, 1 insertion(+), 6 deletions(-)

(limited to 'net')

diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c
index 436b442d4ed..abd5ff9b89a 100644
--- a/net/netfilter/nfnetlink_queue.c
+++ b/net/netfilter/nfnetlink_queue.c
@@ -232,7 +232,7 @@ nfqnl_build_packet_message(struct nfqnl_instance *queue,
 
 	spin_lock_bh(&queue->lock);
 
-	switch (queue->copy_mode) {
+	switch ((enum nfqnl_config_mode)queue->copy_mode) {
 	case NFQNL_COPY_META:
 	case NFQNL_COPY_NONE:
 		data_len = 0;
@@ -253,11 +253,6 @@ nfqnl_build_packet_message(struct nfqnl_instance *queue,
 
 		size += nla_total_size(data_len);
 		break;
-
-	default:
-		*errp = -EINVAL;
-		spin_unlock_bh(&queue->lock);
-		return NULL;
 	}
 
 	entry->id = queue->id_sequence++;
-- 
cgit v1.2.3


From 0ef0f46580320a7f96c60b20b7a29b0bd820d2e7 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Wed, 5 Dec 2007 01:31:01 -0800
Subject: [NETFILTER]: nfnetlink_queue: remove useless enqueue status codes

The queueing core doesn't care about the exact return value from
the queue handler, so there's no need to go through the trouble
of returning a meaningful value as long as we indicate an error.

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/netfilter/nfnetlink_queue.c | 26 ++++++++++++--------------
 1 file changed, 12 insertions(+), 14 deletions(-)

(limited to 'net')

diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c
index abd5ff9b89a..6148a41aa97 100644
--- a/net/netfilter/nfnetlink_queue.c
+++ b/net/netfilter/nfnetlink_queue.c
@@ -203,7 +203,7 @@ nfqnl_flush(struct nfqnl_instance *queue, nfqnl_cmpfn cmpfn, unsigned long data)
 
 static struct sk_buff *
 nfqnl_build_packet_message(struct nfqnl_instance *queue,
-			   struct nf_queue_entry *entry, int *errp)
+			   struct nf_queue_entry *entry)
 {
 	sk_buff_data_t old_tail;
 	size_t size;
@@ -241,7 +241,7 @@ nfqnl_build_packet_message(struct nfqnl_instance *queue,
 	case NFQNL_COPY_PACKET:
 		if ((entskb->ip_summed == CHECKSUM_PARTIAL ||
 		     entskb->ip_summed == CHECKSUM_COMPLETE) &&
-		    (*errp = skb_checksum_help(entskb))) {
+		    skb_checksum_help(entskb)) {
 			spin_unlock_bh(&queue->lock);
 			return NULL;
 		}
@@ -374,7 +374,6 @@ nlmsg_failure:
 nla_put_failure:
 	if (skb)
 		kfree_skb(skb);
-	*errp = -EINVAL;
 	if (net_ratelimit())
 		printk(KERN_ERR "nf_queue: error creating packet message\n");
 	return NULL;
@@ -383,21 +382,21 @@ nla_put_failure:
 static int
 nfqnl_enqueue_packet(struct nf_queue_entry *entry, unsigned int queuenum)
 {
-	int status = -EINVAL;
 	struct sk_buff *nskb;
 	struct nfqnl_instance *queue;
+	int err;
 
 	/* rcu_read_lock()ed by nf_hook_slow() */
 	queue = instance_lookup(queuenum);
 	if (!queue)
-		return -EINVAL;
+		goto err_out;
 
 	if (queue->copy_mode == NFQNL_COPY_NONE)
-		return -EAGAIN;
+		goto err_out;
 
-	nskb = nfqnl_build_packet_message(queue, entry, &status);
+	nskb = nfqnl_build_packet_message(queue, entry);
 	if (nskb == NULL)
-		return status;
+		goto err_out;
 
 	spin_lock_bh(&queue->lock);
 
@@ -406,7 +405,6 @@ nfqnl_enqueue_packet(struct nf_queue_entry *entry, unsigned int queuenum)
 
 	if (queue->queue_total >= queue->queue_maxlen) {
 		queue->queue_dropped++;
-		status = -ENOSPC;
 		if (net_ratelimit())
 			  printk(KERN_WARNING "nf_queue: full at %d entries, "
 				 "dropping packets(s). Dropped: %d\n",
@@ -415,8 +413,8 @@ nfqnl_enqueue_packet(struct nf_queue_entry *entry, unsigned int queuenum)
 	}
 
 	/* nfnetlink_unicast will either free the nskb or add it to a socket */
-	status = nfnetlink_unicast(nskb, queue->peer_pid, MSG_DONTWAIT);
-	if (status < 0) {
+	err = nfnetlink_unicast(nskb, queue->peer_pid, MSG_DONTWAIT);
+	if (err < 0) {
 		queue->queue_user_dropped++;
 		goto err_out_unlock;
 	}
@@ -424,14 +422,14 @@ nfqnl_enqueue_packet(struct nf_queue_entry *entry, unsigned int queuenum)
 	__enqueue_entry(queue, entry);
 
 	spin_unlock_bh(&queue->lock);
-	return status;
+	return 0;
 
 err_out_free_nskb:
 	kfree_skb(nskb);
-
 err_out_unlock:
 	spin_unlock_bh(&queue->lock);
-	return status;
+err_out:
+	return -1;
 }
 
 static int
-- 
cgit v1.2.3


From 4ad9d4fa94800573edaca18ba2f34f27137c9bc0 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Wed, 5 Dec 2007 01:31:17 -0800
Subject: [NETFILTER]: nfnetlink_queue: update copyright

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/netfilter/nfnetlink_queue.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'net')

diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c
index 6148a41aa97..d94de48f6cd 100644
--- a/net/netfilter/nfnetlink_queue.c
+++ b/net/netfilter/nfnetlink_queue.c
@@ -3,6 +3,7 @@
  * userspace via nfetlink.
  *
  * (C) 2005 by Harald Welte <laforge@netfilter.org>
+ * (C) 2007 by Patrick McHardy <kaber@trash.net>
  *
  * Based on the old ipv4-only ip_queue.c:
  * (C) 2000-2002 James Morris <jmorris@intercode.com.au>
-- 
cgit v1.2.3


From ab4f58c77a189d1100383e0c901cb8e90c3ab4f7 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Wed, 5 Dec 2007 01:31:37 -0800
Subject: [NETFILTER]: remove NF_CONNTRACK_ENABLED option

Remove the NF_CONNTRACK_ENABLED option. It was meant for a smoother upgrade
to nf_conntrack, people having reconfigured their kernel at least once since
ip_conntrack was removed will have the NF_CONNTRACK option already set.
People upgrading from older kernels have to reconfigure a lot anyway.

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/netfilter/Kconfig | 7 +------
 1 file changed, 1 insertion(+), 6 deletions(-)

(limited to 'net')

diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig
index 4182393186a..772c58cc2c7 100644
--- a/net/netfilter/Kconfig
+++ b/net/netfilter/Kconfig
@@ -25,8 +25,7 @@ config NETFILTER_NETLINK_LOG
 	  and is also scheduled to replace the old syslog-based ipt_LOG
 	  and ip6t_LOG modules.
 
-# Rename this to NF_CONNTRACK in a 2.6.25
-config NF_CONNTRACK_ENABLED
+config NF_CONNTRACK
 	tristate "Netfilter connection tracking support"
 	help
 	  Connection tracking keeps a record of what packets have passed
@@ -40,10 +39,6 @@ config NF_CONNTRACK_ENABLED
 
 	  To compile it as a module, choose M here.  If unsure, say N.
 
-config NF_CONNTRACK
-	tristate
-	default NF_CONNTRACK_ENABLED
-
 config NF_CT_ACCT
 	bool "Connection tracking flow accounting"
 	depends on NF_CONNTRACK
-- 
cgit v1.2.3


From 2eeeba390ae28fcd742e44706c363cb36bc614c6 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Wed, 5 Dec 2007 01:31:52 -0800
Subject: [NETFILTER]: Select CONFIG_NETFILTER_NETLINK when needed

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/netfilter/Kconfig | 13 +++++--------
 1 file changed, 5 insertions(+), 8 deletions(-)

(limited to 'net')

diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig
index 772c58cc2c7..bb61f83c7a7 100644
--- a/net/netfilter/Kconfig
+++ b/net/netfilter/Kconfig
@@ -2,21 +2,18 @@ menu "Core Netfilter Configuration"
 	depends on NET && INET && NETFILTER
 
 config NETFILTER_NETLINK
-       tristate "Netfilter netlink interface"
-       help
-         If this option is enabled, the kernel will include support
-         for the new netfilter netlink interface.
+	tristate
 
 config NETFILTER_NETLINK_QUEUE
 	tristate "Netfilter NFQUEUE over NFNETLINK interface"
-	depends on NETFILTER_NETLINK
+	select NETFILTER_NETLINK
 	help
 	  If this option is enabled, the kernel will include support
 	  for queueing packets via NFNETLINK.
 	  
 config NETFILTER_NETLINK_LOG
 	tristate "Netfilter LOG over NFNETLINK interface"
-	depends on NETFILTER_NETLINK
+	select NETFILTER_NETLINK
 	help
 	  If this option is enabled, the kernel will include support
 	  for logging packets via NFNETLINK.
@@ -246,8 +243,8 @@ config NF_CONNTRACK_TFTP
 
 config NF_CT_NETLINK
 	tristate 'Connection tracking netlink interface (EXPERIMENTAL)'
-	depends on EXPERIMENTAL && NF_CONNTRACK && NETFILTER_NETLINK
-	depends on NF_CONNTRACK!=y || NETFILTER_NETLINK!=m
+	depends on EXPERIMENTAL && NF_CONNTRACK
+	select NETFILTER_NETLINK
 	depends on NF_NAT=n || NF_NAT
 	help
 	  This option enables support for a netlink-based userspace interface
-- 
cgit v1.2.3


From 7e2e109cef0d59abcb9aca8b82993e304ed8970c Mon Sep 17 00:00:00 2001
From: Pavel Emelyanov <xemul@openvz.org>
Date: Wed, 5 Dec 2007 01:36:23 -0800
Subject: [NET]: Remove unneeded ifdefs from sysctl_net_core.c

This file is already compiled out when the SYSCTL=n, so
these ifdefs, that enclose the whole file, can be removed.

Signed-off-by: Pavel Emelyanov <xemul@openvz.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/sysctl_net_core.c | 4 ----
 1 file changed, 4 deletions(-)

(limited to 'net')

diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c
index 113cc728dc3..277c8faaf73 100644
--- a/net/core/sysctl_net_core.c
+++ b/net/core/sysctl_net_core.c
@@ -13,8 +13,6 @@
 #include <net/sock.h>
 #include <net/xfrm.h>
 
-#ifdef CONFIG_SYSCTL
-
 ctl_table core_table[] = {
 #ifdef CONFIG_NET
 	{
@@ -151,5 +149,3 @@ ctl_table core_table[] = {
 	},
 	{ .ctl_name = 0 }
 };
-
-#endif
-- 
cgit v1.2.3


From 33eb9cfc700ae9ce621d47d6ca6d6b4ad7cd97f3 Mon Sep 17 00:00:00 2001
From: Pavel Emelyanov <xemul@openvz.org>
Date: Wed, 5 Dec 2007 01:37:34 -0800
Subject: [NET]: Isolate the net/core/ sysctl table

Using ctl paths we can put all the stuff, related to net/core/
sysctl table, into one file and remove all the references on it.

As a good side effect this hides the "core_table" name from
the global scope :)

Signed-off-by: Pavel Emelyanov <xemul@openvz.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/sysctl_net_core.c | 19 ++++++++++++++++++-
 net/sysctl_net.c           |  6 ------
 2 files changed, 18 insertions(+), 7 deletions(-)

(limited to 'net')

diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c
index 277c8faaf73..e322713e590 100644
--- a/net/core/sysctl_net_core.c
+++ b/net/core/sysctl_net_core.c
@@ -10,10 +10,11 @@
 #include <linux/module.h>
 #include <linux/socket.h>
 #include <linux/netdevice.h>
+#include <linux/init.h>
 #include <net/sock.h>
 #include <net/xfrm.h>
 
-ctl_table core_table[] = {
+static struct ctl_table net_core_table[] = {
 #ifdef CONFIG_NET
 	{
 		.ctl_name	= NET_CORE_WMEM_MAX,
@@ -149,3 +150,19 @@ ctl_table core_table[] = {
 	},
 	{ .ctl_name = 0 }
 };
+
+static __initdata struct ctl_path net_core_path[] = {
+	{ .procname = "net", .ctl_name = CTL_NET, },
+	{ .procname = "core", .ctl_name = NET_CORE, },
+	{ },
+};
+
+static __init int sysctl_core_init(void)
+{
+	struct ctl_table_header *hdr;
+
+	hdr = register_sysctl_paths(net_core_path, net_core_table);
+	return hdr == NULL ? -ENOMEM : 0;
+}
+
+__initcall(sysctl_core_init);
diff --git a/net/sysctl_net.c b/net/sysctl_net.c
index c50c793aa7f..747fc55b28f 100644
--- a/net/sysctl_net.c
+++ b/net/sysctl_net.c
@@ -31,12 +31,6 @@
 #endif
 
 struct ctl_table net_table[] = {
-	{
-		.ctl_name	= NET_CORE,
-		.procname	= "core",
-		.mode		= 0555,
-		.child		= core_table,
-	},
 #ifdef CONFIG_INET
 	{
 		.ctl_name	= NET_IPV4,
-- 
cgit v1.2.3


From 9ba639797606acbcd97be886f41fbce163914e7b Mon Sep 17 00:00:00 2001
From: Pavel Emelyanov <xemul@openvz.org>
Date: Wed, 5 Dec 2007 01:38:23 -0800
Subject: [IPV4]: Cleanup the sysctl_net_ipv4.c file

This includes several cleanups:

 * tune Makefile to compile out this file when SYSCTL=n. Now
   it looks like net/core/sysctl_net_core.c one;
 * move the ipv4_config to af_inet.c to exist all the time;
 * remove additional sysctl_ip_nonlocal_bind declaration
   (it is already declared in net/ip.h);
 * remove no nonger needed ifdefs from this file.

This is a preparation for using ctl paths for net/ipv4/
sysctl table.

Signed-off-by: Pavel Emelyanov <xemul@openvz.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/Makefile          |  3 ++-
 net/ipv4/af_inet.c         |  4 ++++
 net/ipv4/sysctl_net_ipv4.c | 13 -------------
 3 files changed, 6 insertions(+), 14 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/Makefile b/net/ipv4/Makefile
index 93fe3966805..ad40ef3f9eb 100644
--- a/net/ipv4/Makefile
+++ b/net/ipv4/Makefile
@@ -10,9 +10,10 @@ obj-y     := route.o inetpeer.o protocol.o \
 	     tcp_minisocks.o tcp_cong.o \
 	     datagram.o raw.o udp.o udplite.o \
 	     arp.o icmp.o devinet.o af_inet.o  igmp.o \
-	     sysctl_net_ipv4.o fib_frontend.o fib_semantics.o \
+	     fib_frontend.o fib_semantics.o \
 	     inet_fragment.o
 
+obj-$(CONFIG_SYSCTL) += sysctl_net_ipv4.o
 obj-$(CONFIG_IP_FIB_HASH) += fib_hash.o
 obj-$(CONFIG_IP_FIB_TRIE) += fib_trie.o
 obj-$(CONFIG_PROC_FS) += proc.o
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index c75f20b4993..0e4b6eba234 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -126,6 +126,10 @@ extern void ip_mc_drop_socket(struct sock *sk);
 static struct list_head inetsw[SOCK_MAX];
 static DEFINE_SPINLOCK(inetsw_lock);
 
+struct ipv4_config ipv4_config;
+
+EXPORT_SYMBOL(ipv4_config);
+
 /* New destruction routine */
 
 void inet_sock_destruct(struct sock *sk)
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index bec6fe88065..3546424765f 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -21,19 +21,10 @@
 #include <net/cipso_ipv4.h>
 #include <net/inet_frag.h>
 
-/* From af_inet.c */
-extern int sysctl_ip_nonlocal_bind;
-
-#ifdef CONFIG_SYSCTL
 static int zero;
 static int tcp_retr1_max = 255;
 static int ip_local_port_range_min[] = { 1, 1 };
 static int ip_local_port_range_max[] = { 65535, 65535 };
-#endif
-
-struct ipv4_config ipv4_config;
-
-#ifdef CONFIG_SYSCTL
 
 static
 int ipv4_sysctl_forward(ctl_table *ctl, int write, struct file * filp,
@@ -887,7 +878,3 @@ ctl_table ipv4_table[] = {
 	},
 	{ .ctl_name = 0 }
 };
-
-#endif /* CONFIG_SYSCTL */
-
-EXPORT_SYMBOL(ipv4_config);
-- 
cgit v1.2.3


From 3e37c3f9978839d91188e4ca0cc662a7245f28e4 Mon Sep 17 00:00:00 2001
From: Pavel Emelyanov <xemul@openvz.org>
Date: Wed, 5 Dec 2007 01:41:26 -0800
Subject: [IPV4]: Use ctl paths to register net/ipv4/ table

This is the same as I did for the net/core/ table in the
second patch in his series: use the paths and isolate the
whole table in the .c file.

Signed-off-by: Pavel Emelyanov <xemul@openvz.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/sysctl_net_ipv4.c | 19 ++++++++++++++++++-
 net/sysctl_net.c           |  8 --------
 2 files changed, 18 insertions(+), 9 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index 3546424765f..bfd0dec6238 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -13,6 +13,7 @@
 #include <linux/igmp.h>
 #include <linux/inetdevice.h>
 #include <linux/seqlock.h>
+#include <linux/init.h>
 #include <net/snmp.h>
 #include <net/icmp.h>
 #include <net/ip.h>
@@ -247,7 +248,7 @@ static int strategy_allowed_congestion_control(ctl_table *table, int __user *nam
 
 }
 
-ctl_table ipv4_table[] = {
+static struct ctl_table ipv4_table[] = {
 	{
 		.ctl_name	= NET_IPV4_TCP_TIMESTAMPS,
 		.procname	= "tcp_timestamps",
@@ -878,3 +879,19 @@ ctl_table ipv4_table[] = {
 	},
 	{ .ctl_name = 0 }
 };
+
+static __initdata struct ctl_path net_ipv4_path[] = {
+	{ .procname = "net", .ctl_name = CTL_NET, },
+	{ .procname = "ipv4", .ctl_name = NET_IPV4, },
+	{ },
+};
+
+static __init int sysctl_ipv4_init(void)
+{
+	struct ctl_table_header *hdr;
+
+	hdr = register_sysctl_paths(net_ipv4_path, ipv4_table);
+	return hdr == NULL ? -ENOMEM : 0;
+}
+
+__initcall(sysctl_ipv4_init);
diff --git a/net/sysctl_net.c b/net/sysctl_net.c
index 747fc55b28f..a4f0ed8d0e7 100644
--- a/net/sysctl_net.c
+++ b/net/sysctl_net.c
@@ -31,14 +31,6 @@
 #endif
 
 struct ctl_table net_table[] = {
-#ifdef CONFIG_INET
-	{
-		.ctl_name	= NET_IPV4,
-		.procname	= "ipv4",
-		.mode		= 0555,
-		.child		= ipv4_table
-	},
-#endif
 #ifdef CONFIG_TR
 	{
 		.ctl_name	= NET_TR,
-- 
cgit v1.2.3


From 36f0bebd9865dc7e327777fca34b75e65cbfd1a6 Mon Sep 17 00:00:00 2001
From: Pavel Emelyanov <xemul@openvz.org>
Date: Thu, 24 Jan 2008 17:04:49 -0800
Subject: [TR]: Use ctl paths to register net/token-ring/ table

The same thing for token-ring - use ctl paths and get
rid of external references on the tr_table.

Unfortunately, I couldn't split this patch into cleanup and
use-the-paths parts.

As a lame excuse I can say, that the cleanup is just moving
the tr_table from one file to another - closet to a single
variable, that this ctl table tunes. Since the source  file
becomes empty after the move, I remove it.

Signed-off-by: Pavel Emelyanov <xemul@openvz.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/802/Makefile         |  3 +--
 net/802/sysctl_net_802.c | 33 ---------------------------------
 net/802/tr.c             | 25 ++++++++++++++++++++++++-
 net/sysctl_net.c         |  8 --------
 4 files changed, 25 insertions(+), 44 deletions(-)
 delete mode 100644 net/802/sysctl_net_802.c

(limited to 'net')

diff --git a/net/802/Makefile b/net/802/Makefile
index 977704a54f6..68569ffddea 100644
--- a/net/802/Makefile
+++ b/net/802/Makefile
@@ -3,9 +3,8 @@
 #
 
 # Check the p8022 selections against net/core/Makefile.
-obj-$(CONFIG_SYSCTL)	+= sysctl_net_802.o
 obj-$(CONFIG_LLC)	+= p8022.o psnap.o
-obj-$(CONFIG_TR)	+= p8022.o psnap.o tr.o sysctl_net_802.o
+obj-$(CONFIG_TR)	+= p8022.o psnap.o tr.o
 obj-$(CONFIG_NET_FC)	+=                 fc.o
 obj-$(CONFIG_FDDI)	+=                 fddi.o
 obj-$(CONFIG_HIPPI)	+=                 hippi.o
diff --git a/net/802/sysctl_net_802.c b/net/802/sysctl_net_802.c
deleted file mode 100644
index ead56037398..00000000000
--- a/net/802/sysctl_net_802.c
+++ /dev/null
@@ -1,33 +0,0 @@
-/* -*- linux-c -*-
- *		sysctl_net_802.c: sysctl interface to net 802 subsystem.
- *
- *		Begun April 1, 1996, Mike Shaver.
- *		Added /proc/sys/net/802 directory entry (empty =) ). [MS]
- *
- *		This program is free software; you can redistribute it and/or
- *		modify it under the terms of the GNU General Public License
- *		as published by the Free Software Foundation; either version
- *		2 of the License, or (at your option) any later version.
- */
-
-#include <linux/mm.h>
-#include <linux/if_tr.h>
-#include <linux/sysctl.h>
-
-#ifdef CONFIG_TR
-extern int sysctl_tr_rif_timeout;
-#endif
-
-struct ctl_table tr_table[] = {
-#ifdef CONFIG_TR
-	{
-		.ctl_name	= NET_TR_RIF_TIMEOUT,
-		.procname	= "rif_timeout",
-		.data		= &sysctl_tr_rif_timeout,
-		.maxlen		= sizeof(int),
-		.mode		= 0644,
-		.proc_handler	= &proc_dointvec
-	},
-#endif /* CONFIG_TR */
-	{ 0 },
-};
diff --git a/net/802/tr.c b/net/802/tr.c
index 151855dd459..3f16b172055 100644
--- a/net/802/tr.c
+++ b/net/802/tr.c
@@ -35,6 +35,7 @@
 #include <linux/proc_fs.h>
 #include <linux/seq_file.h>
 #include <linux/init.h>
+#include <linux/sysctl.h>
 #include <net/arp.h>
 #include <net/net_namespace.h>
 
@@ -634,6 +635,26 @@ struct net_device *alloc_trdev(int sizeof_priv)
 	return alloc_netdev(sizeof_priv, "tr%d", tr_setup);
 }
 
+#ifdef CONFIG_SYSCTL
+static struct ctl_table tr_table[] = {
+	{
+		.ctl_name	= NET_TR_RIF_TIMEOUT,
+		.procname	= "rif_timeout",
+		.data		= &sysctl_tr_rif_timeout,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec
+	},
+	{ 0 },
+};
+
+static __initdata struct ctl_path tr_path[] = {
+	{ .procname = "net", .ctl_name = CTL_NET, },
+	{ .procname = "token-ring", .ctl_name = NET_TR, },
+	{ }
+};
+#endif
+
 /*
  *	Called during bootup.  We don't actually have to initialise
  *	too much for this.
@@ -644,7 +665,9 @@ static int __init rif_init(void)
 	rif_timer.expires  = jiffies + sysctl_tr_rif_timeout;
 	setup_timer(&rif_timer, rif_check_expire, 0);
 	add_timer(&rif_timer);
-
+#ifdef CONFIG_SYSCTL
+	register_sysctl_paths(tr_path, tr_table);
+#endif
 	proc_net_fops_create(&init_net, "tr_rif", S_IRUGO, &rif_seq_fops);
 	return 0;
 }
diff --git a/net/sysctl_net.c b/net/sysctl_net.c
index a4f0ed8d0e7..16ad14b5d57 100644
--- a/net/sysctl_net.c
+++ b/net/sysctl_net.c
@@ -31,14 +31,6 @@
 #endif
 
 struct ctl_table net_table[] = {
-#ifdef CONFIG_TR
-	{
-		.ctl_name	= NET_TR,
-		.procname	= "token-ring",
-		.mode		= 0555,
-		.child		= tr_table,
-	},
-#endif
 	{ 0 },
 };
 
-- 
cgit v1.2.3


From 08913681e484f3f0db949dd0809012e089846216 Mon Sep 17 00:00:00 2001
From: Pavel Emelyanov <xemul@openvz.org>
Date: Wed, 5 Dec 2007 01:42:49 -0800
Subject: [NET]: Remove the empty net_table

I have removed all the entries from this table (core_table,
ipv4_table and tr_table), so now we can safely drop it.

Signed-off-by: Pavel Emelyanov <xemul@openvz.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sysctl_net.c | 4 ----
 1 file changed, 4 deletions(-)

(limited to 'net')

diff --git a/net/sysctl_net.c b/net/sysctl_net.c
index 16ad14b5d57..665e856675a 100644
--- a/net/sysctl_net.c
+++ b/net/sysctl_net.c
@@ -30,10 +30,6 @@
 #include <linux/if_tr.h>
 #endif
 
-struct ctl_table net_table[] = {
-	{ 0 },
-};
-
 static struct list_head *
 net_ctl_header_lookup(struct ctl_table_root *root, struct nsproxy *namespaces)
 {
-- 
cgit v1.2.3


From 4a61b586cd7eaab6242eca58e8e6e3c8ebd88bd2 Mon Sep 17 00:00:00 2001
From: Pavel Emelyanov <xemul@openvz.org>
Date: Wed, 5 Dec 2007 01:43:25 -0800
Subject: [IPV6]: Make the ipv6/sysctl_net_ipv6.c compilation cleaner

Since this file is entirely enclosed with the
#ifdef CONFIG_SYSCTL/#endif pair, it's OK to move this
CONFIG_ into a Makefile.

Signed-off-by: Pavel Emelyanov <xemul@openvz.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/Makefile          | 4 ++--
 net/ipv6/sysctl_net_ipv6.c | 7 -------
 2 files changed, 2 insertions(+), 9 deletions(-)

(limited to 'net')

diff --git a/net/ipv6/Makefile b/net/ipv6/Makefile
index 5ffa9800305..24f3aa0f2a3 100644
--- a/net/ipv6/Makefile
+++ b/net/ipv6/Makefile
@@ -8,9 +8,9 @@ ipv6-objs :=	af_inet6.o anycast.o ip6_output.o ip6_input.o addrconf.o \
 		addrlabel.o \
 		route.o ip6_fib.o ipv6_sockglue.o ndisc.o udp.o udplite.o \
 		raw.o protocol.o icmp.o mcast.o reassembly.o tcp_ipv6.o \
-		exthdrs.o sysctl_net_ipv6.o datagram.o \
-		ip6_flowlabel.o inet6_connection_sock.o
+		exthdrs.o datagram.o ip6_flowlabel.o inet6_connection_sock.o
 
+ipv6-$(CONFIG_SYSCTL) = sysctl_net_ipv6.o
 ipv6-$(CONFIG_XFRM) += xfrm6_policy.o xfrm6_state.o xfrm6_input.o \
 	xfrm6_output.o
 ipv6-$(CONFIG_NETFILTER) += netfilter.o
diff --git a/net/ipv6/sysctl_net_ipv6.c b/net/ipv6/sysctl_net_ipv6.c
index 68bb2548e46..227efa726ac 100644
--- a/net/ipv6/sysctl_net_ipv6.c
+++ b/net/ipv6/sysctl_net_ipv6.c
@@ -14,8 +14,6 @@
 #include <net/addrconf.h>
 #include <net/inet_frag.h>
 
-#ifdef CONFIG_SYSCTL
-
 static ctl_table ipv6_table[] = {
 	{
 		.ctl_name	= NET_IPV6_ROUTE,
@@ -115,8 +113,3 @@ void ipv6_sysctl_unregister(void)
 {
 	unregister_sysctl_table(ipv6_sysctl_header);
 }
-
-#endif /* CONFIG_SYSCTL */
-
-
-
-- 
cgit v1.2.3


From 4d43b78ac27ca50fe42718192ac7c80474417389 Mon Sep 17 00:00:00 2001
From: Pavel Emelyanov <xemul@openvz.org>
Date: Wed, 5 Dec 2007 01:44:02 -0800
Subject: [IPV6]: Use sysctl paths to register ipv6 sysctl tables

I have already done this for core, ipv4 and tr tables, so repeat this
for the ipv6 ones.

This makes the ipv6.ko smaller and creates the ground needed for net
namespaces support in ipv6.ko ssctls.

Signed-off-by: Pavel Emelyanov <xemul@openvz.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/sysctl_net_ipv6.c | 26 ++++++--------------------
 1 file changed, 6 insertions(+), 20 deletions(-)

(limited to 'net')

diff --git a/net/ipv6/sysctl_net_ipv6.c b/net/ipv6/sysctl_net_ipv6.c
index 227efa726ac..0b5bec3cb79 100644
--- a/net/ipv6/sysctl_net_ipv6.c
+++ b/net/ipv6/sysctl_net_ipv6.c
@@ -82,31 +82,17 @@ static ctl_table ipv6_table[] = {
 	{ .ctl_name = 0 }
 };
 
-static struct ctl_table_header *ipv6_sysctl_header;
-
-static ctl_table ipv6_net_table[] = {
-	{
-		.ctl_name	= NET_IPV6,
-		.procname	= "ipv6",
-		.mode		= 0555,
-		.child		= ipv6_table
-	},
-	{ .ctl_name = 0 }
+static struct ctl_path ipv6_ctl_path[] = {
+	{ .procname = "net", .ctl_name = CTL_NET, },
+	{ .procname = "ipv6", .ctl_name = NET_IPV6, },
+	{ },
 };
 
-static ctl_table ipv6_root_table[] = {
-	{
-		.ctl_name	= CTL_NET,
-		.procname	= "net",
-		.mode		= 0555,
-		.child		= ipv6_net_table
-	},
-	{ .ctl_name = 0 }
-};
+static struct ctl_table_header *ipv6_sysctl_header;
 
 void ipv6_sysctl_register(void)
 {
-	ipv6_sysctl_header = register_sysctl_table(ipv6_root_table);
+	ipv6_sysctl_header = register_sysctl_paths(ipv6_ctl_path, ipv6_table);
 }
 
 void ipv6_sysctl_unregister(void)
-- 
cgit v1.2.3


From 68dd299bc84dede6aef32e6f4777a676314f5d21 Mon Sep 17 00:00:00 2001
From: Pavel Emelyanov <xemul@openvz.org>
Date: Wed, 5 Dec 2007 01:44:58 -0800
Subject: [INET]: Merge sys.net.ipv4.ip_forward and
 sys.net.ipv4.conf.all.forwarding

AFAIS these two entries should do the same thing - change the
forwarding state on ipv4_devconf and on all the devices.

I propose to merge the handlers together using ctl paths.

The inet_forward_change() is static after this and I move
it higher to be closer to other "propagation" helpers and
to avoid diff making patches based on { and } matching :)
i.e. - make them easier to read.

Signed-off-by: Pavel Emelyanov <xemul@openvz.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/devinet.c         | 66 ++++++++++++++++++++++++++++++----------------
 net/ipv4/sysctl_net_ipv4.c | 65 ---------------------------------------------
 2 files changed, 44 insertions(+), 87 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index 9e2747aab25..d1dc0150647 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -1263,6 +1263,28 @@ static void devinet_copy_dflt_conf(int i)
 	read_unlock(&dev_base_lock);
 }
 
+static void inet_forward_change(void)
+{
+	struct net_device *dev;
+	int on = IPV4_DEVCONF_ALL(FORWARDING);
+
+	IPV4_DEVCONF_ALL(ACCEPT_REDIRECTS) = !on;
+	IPV4_DEVCONF_DFLT(FORWARDING) = on;
+
+	read_lock(&dev_base_lock);
+	for_each_netdev(&init_net, dev) {
+		struct in_device *in_dev;
+		rcu_read_lock();
+		in_dev = __in_dev_get_rcu(dev);
+		if (in_dev)
+			IN_DEV_CONF_SET(in_dev, FORWARDING, on);
+		rcu_read_unlock();
+	}
+	read_unlock(&dev_base_lock);
+
+	rt_cache_flush(0);
+}
+
 static int devinet_conf_proc(ctl_table *ctl, int write,
 			     struct file* filp, void __user *buffer,
 			     size_t *lenp, loff_t *ppos)
@@ -1332,28 +1354,6 @@ static int devinet_conf_sysctl(ctl_table *table, int __user *name, int nlen,
 	return 1;
 }
 
-void inet_forward_change(void)
-{
-	struct net_device *dev;
-	int on = IPV4_DEVCONF_ALL(FORWARDING);
-
-	IPV4_DEVCONF_ALL(ACCEPT_REDIRECTS) = !on;
-	IPV4_DEVCONF_DFLT(FORWARDING) = on;
-
-	read_lock(&dev_base_lock);
-	for_each_netdev(&init_net, dev) {
-		struct in_device *in_dev;
-		rcu_read_lock();
-		in_dev = __in_dev_get_rcu(dev);
-		if (in_dev)
-			IN_DEV_CONF_SET(in_dev, FORWARDING, on);
-		rcu_read_unlock();
-	}
-	read_unlock(&dev_base_lock);
-
-	rt_cache_flush(0);
-}
-
 static int devinet_sysctl_forward(ctl_table *ctl, int write,
 				  struct file* filp, void __user *buffer,
 				  size_t *lenp, loff_t *ppos)
@@ -1536,6 +1536,27 @@ static void devinet_sysctl_unregister(struct ipv4_devconf *p)
 }
 #endif
 
+static struct ctl_table ctl_forward_entry[] = {
+	{
+		.ctl_name	= NET_IPV4_FORWARD,
+		.procname	= "ip_forward",
+		.data		= &ipv4_devconf.data[
+					NET_IPV4_CONF_FORWARDING - 1],
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= devinet_sysctl_forward,
+		.strategy	= devinet_conf_sysctl,
+		.extra1		= &ipv4_devconf,
+	},
+	{ },
+};
+
+static __initdata struct ctl_path net_ipv4_path[] = {
+	{ .procname = "net", .ctl_name = CTL_NET, },
+	{ .procname = "ipv4", .ctl_name = NET_IPV4, },
+	{ },
+};
+
 void __init devinet_init(void)
 {
 	register_gifconf(PF_INET, inet_gifconf);
@@ -1549,6 +1570,7 @@ void __init devinet_init(void)
 			&ipv4_devconf);
 	__devinet_sysctl_register("default", NET_PROTO_CONF_DEFAULT,
 			&ipv4_devconf_dflt);
+	register_sysctl_paths(net_ipv4_path, ctl_forward_entry);
 #endif
 }
 
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index bfd0dec6238..844f26fab06 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -27,62 +27,6 @@ static int tcp_retr1_max = 255;
 static int ip_local_port_range_min[] = { 1, 1 };
 static int ip_local_port_range_max[] = { 65535, 65535 };
 
-static
-int ipv4_sysctl_forward(ctl_table *ctl, int write, struct file * filp,
-			void __user *buffer, size_t *lenp, loff_t *ppos)
-{
-	int val = IPV4_DEVCONF_ALL(FORWARDING);
-	int ret;
-
-	ret = proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
-
-	if (write && IPV4_DEVCONF_ALL(FORWARDING) != val)
-		inet_forward_change();
-
-	return ret;
-}
-
-static int ipv4_sysctl_forward_strategy(ctl_table *table,
-			 int __user *name, int nlen,
-			 void __user *oldval, size_t __user *oldlenp,
-			 void __user *newval, size_t newlen)
-{
-	int *valp = table->data;
-	int new;
-
-	if (!newval || !newlen)
-		return 0;
-
-	if (newlen != sizeof(int))
-		return -EINVAL;
-
-	if (get_user(new, (int __user *)newval))
-		return -EFAULT;
-
-	if (new == *valp)
-		return 0;
-
-	if (oldval && oldlenp) {
-		size_t len;
-
-		if (get_user(len, oldlenp))
-			return -EFAULT;
-
-		if (len) {
-			if (len > table->maxlen)
-				len = table->maxlen;
-			if (copy_to_user(oldval, valp, len))
-				return -EFAULT;
-			if (put_user(len, oldlenp))
-				return -EFAULT;
-		}
-	}
-
-	*valp = new;
-	inet_forward_change();
-	return 1;
-}
-
 extern seqlock_t sysctl_port_range_lock;
 extern int sysctl_local_port_range[2];
 
@@ -281,15 +225,6 @@ static struct ctl_table ipv4_table[] = {
 		.mode		= 0644,
 		.proc_handler	= &proc_dointvec
 	},
-	{
-		.ctl_name	= NET_IPV4_FORWARD,
-		.procname	= "ip_forward",
-		.data		= &IPV4_DEVCONF_ALL(FORWARDING),
-		.maxlen		= sizeof(int),
-		.mode		= 0644,
-		.proc_handler	= &ipv4_sysctl_forward,
-		.strategy	= &ipv4_sysctl_forward_strategy
-	},
 	{
 		.ctl_name	= NET_IPV4_DEFAULT_TTL,
 		.procname	= "ip_default_ttl",
-- 
cgit v1.2.3


From c8fecf2242a0ab7230210665986b8ef915e1ae9e Mon Sep 17 00:00:00 2001
From: Pavel Emelyanov <xemul@openvz.org>
Date: Wed, 5 Dec 2007 01:50:24 -0800
Subject: [IPV6]: Eliminate difference in actions of sysctl and proc handler
 for conf.all.forwarding

The only difference in this case is that updating all.forwarding
causes the update in default.forwarding when done via proc, but
not via the system call.

Besides, this consolidates a good portion of code.

Signed-off-by: Pavel Emelyanov <xemul@openvz.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/addrconf.c | 56 +++++++++++++++++++----------------------------------
 1 file changed, 20 insertions(+), 36 deletions(-)

(limited to 'net')

diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index dbff389b700..95cf3aa41c9 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -476,6 +476,21 @@ static void addrconf_forward_change(void)
 	}
 	read_unlock(&dev_base_lock);
 }
+
+static void addrconf_fixup_forwarding(struct ctl_table *table, int *p, int old)
+{
+	if (p == &ipv6_devconf_dflt.forwarding)
+		return;
+
+	if (p == &ipv6_devconf.forwarding) {
+		ipv6_devconf_dflt.forwarding = ipv6_devconf.forwarding;
+		addrconf_forward_change();
+	} else if ((!*p) ^ (!old))
+		dev_forward_change((struct inet6_dev *)table->extra1);
+
+	if (*p)
+		rt6_purge_dflt_routers();
+}
 #endif
 
 /* Nobody refers to this ifaddr, destroy it */
@@ -3771,22 +3786,8 @@ int addrconf_sysctl_forward(ctl_table *ctl, int write, struct file * filp,
 
 	ret = proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
 
-	if (write && valp != &ipv6_devconf_dflt.forwarding) {
-		if (valp != &ipv6_devconf.forwarding) {
-			if ((!*valp) ^ (!val)) {
-				struct inet6_dev *idev = (struct inet6_dev *)ctl->extra1;
-				if (idev == NULL)
-					return ret;
-				dev_forward_change(idev);
-			}
-		} else {
-			ipv6_devconf_dflt.forwarding = ipv6_devconf.forwarding;
-			addrconf_forward_change();
-		}
-		if (*valp)
-			rt6_purge_dflt_routers();
-	}
-
+	if (write)
+		addrconf_fixup_forwarding(ctl, valp, val);
 	return ret;
 }
 
@@ -3797,6 +3798,7 @@ static int addrconf_sysctl_forward_strategy(ctl_table *table,
 					    void __user *newval, size_t newlen)
 {
 	int *valp = table->data;
+	int val = *valp;
 	int new;
 
 	if (!newval || !newlen)
@@ -3821,26 +3823,8 @@ static int addrconf_sysctl_forward_strategy(ctl_table *table,
 		}
 	}
 
-	if (valp != &ipv6_devconf_dflt.forwarding) {
-		if (valp != &ipv6_devconf.forwarding) {
-			struct inet6_dev *idev = (struct inet6_dev *)table->extra1;
-			int changed;
-			if (unlikely(idev == NULL))
-				return -ENODEV;
-			changed = (!*valp) ^ (!new);
-			*valp = new;
-			if (changed)
-				dev_forward_change(idev);
-		} else {
-			*valp = new;
-			addrconf_forward_change();
-		}
-
-		if (*valp)
-			rt6_purge_dflt_routers();
-	} else
-		*valp = new;
-
+	*valp = new;
+	addrconf_fixup_forwarding(table, valp, val);
 	return 1;
 }
 
-- 
cgit v1.2.3


From 27ab2568649d5ba6c5a20212079b7c4f6da4ca0d Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Wed, 5 Dec 2007 01:51:58 -0800
Subject: [UDP]: Avoid repeated counting of checksum errors due to peeking

Currently it is possible for two processes to peek on the same socket
and end up incrementing the error counter twice for the same packet.

This patch fixes it by making skb_kill_datagram return whether it
succeeded in unlinking the packet and only incrementing the counter
if it did.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/datagram.c | 9 ++++++++-
 net/ipv4/udp.c      | 5 ++---
 net/ipv6/udp.c      | 4 ++--
 3 files changed, 12 insertions(+), 6 deletions(-)

(limited to 'net')

diff --git a/net/core/datagram.c b/net/core/datagram.c
index 029b93e246b..fbd6c76436d 100644
--- a/net/core/datagram.c
+++ b/net/core/datagram.c
@@ -217,20 +217,27 @@ void skb_free_datagram(struct sock *sk, struct sk_buff *skb)
  *	This function currently only disables BH when acquiring the
  *	sk_receive_queue lock.  Therefore it must not be used in a
  *	context where that lock is acquired in an IRQ context.
+ *
+ *	It returns 0 if the packet was removed by us.
  */
 
-void skb_kill_datagram(struct sock *sk, struct sk_buff *skb, unsigned int flags)
+int skb_kill_datagram(struct sock *sk, struct sk_buff *skb, unsigned int flags)
 {
+	int err = 0;
+
 	if (flags & MSG_PEEK) {
+		err = -ENOENT;
 		spin_lock_bh(&sk->sk_receive_queue.lock);
 		if (skb == skb_peek(&sk->sk_receive_queue)) {
 			__skb_unlink(skb, &sk->sk_receive_queue);
 			atomic_dec(&skb->users);
+			err = 0;
 		}
 		spin_unlock_bh(&sk->sk_receive_queue.lock);
 	}
 
 	kfree_skb(skb);
+	return err;
 }
 
 EXPORT_SYMBOL(skb_kill_datagram);
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index d0283b7fcec..f50de5d5218 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -899,9 +899,8 @@ out:
 	return err;
 
 csum_copy_err:
-	UDP_INC_STATS_USER(UDP_MIB_INERRORS, is_udplite);
-
-	skb_kill_datagram(sk, skb, flags);
+	if (!skb_kill_datagram(sk, skb, flags))
+		UDP_INC_STATS_USER(UDP_MIB_INERRORS, is_udplite);
 
 	if (noblock)
 		return -EAGAIN;
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index 77ab31b9923..87bccec9882 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -207,8 +207,8 @@ out:
 	return err;
 
 csum_copy_err:
-	UDP6_INC_STATS_USER(UDP_MIB_INERRORS, is_udplite);
-	skb_kill_datagram(sk, skb, flags);
+	if (!skb_kill_datagram(sk, skb, flags))
+		UDP6_INC_STATS_USER(UDP_MIB_INERRORS, is_udplite);
 
 	if (flags & MSG_DONTWAIT)
 		return -EAGAIN;
-- 
cgit v1.2.3


From 1781f7f5804e52ee2d35328b129602146a8d8254 Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Tue, 11 Dec 2007 11:30:32 -0800
Subject: [UDP]: Restore missing inDatagrams increments

The previous move of the the UDP inDatagrams counter caused the
counting of encapsulated packets, SUNRPC data (as opposed to call)
packets and RXRPC packets to go missing.

This patch restores all of these.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/udp.c        | 6 +++++-
 net/ipv6/proc.c       | 1 +
 net/ipv6/udp.c        | 2 ++
 net/rxrpc/ar-input.c  | 4 ++++
 net/sunrpc/xprtsock.c | 6 +++++-
 5 files changed, 17 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index f50de5d5218..78cfcb4a1b3 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -110,6 +110,7 @@
  */
 
 DEFINE_SNMP_STAT(struct udp_mib, udp_statistics) __read_mostly;
+EXPORT_SYMBOL(udp_statistics);
 
 struct hlist_head udp_hash[UDP_HTABLE_SIZE];
 DEFINE_RWLOCK(udp_hash_lock);
@@ -969,8 +970,11 @@ int udp_queue_rcv_skb(struct sock * sk, struct sk_buff *skb)
 			int ret;
 
 			ret = (*up->encap_rcv)(sk, skb);
-			if (ret <= 0)
+			if (ret <= 0) {
+				UDP_INC_STATS_BH(UDP_MIB_INDATAGRAMS,
+						 is_udplite);
 				return -ret;
+			}
 		}
 
 		/* FALLTHROUGH -- it's a UDP Packet */
diff --git a/net/ipv6/proc.c b/net/ipv6/proc.c
index 44937616057..41e9980b3e0 100644
--- a/net/ipv6/proc.c
+++ b/net/ipv6/proc.c
@@ -27,6 +27,7 @@
 #include <net/ip.h>
 #include <net/sock.h>
 #include <net/tcp.h>
+#include <net/udp.h>
 #include <net/transp_v6.h>
 #include <net/ipv6.h>
 
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index 87bccec9882..36bdcd2e1b5 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -34,6 +34,7 @@
 #include <linux/ipv6.h>
 #include <linux/icmpv6.h>
 #include <linux/init.h>
+#include <linux/module.h>
 #include <linux/skbuff.h>
 #include <asm/uaccess.h>
 
@@ -51,6 +52,7 @@
 #include "udp_impl.h"
 
 DEFINE_SNMP_STAT(struct udp_mib, udp_stats_in6) __read_mostly;
+EXPORT_SYMBOL(udp_stats_in6);
 
 static inline int udp_v6_get_port(struct sock *sk, unsigned short snum)
 {
diff --git a/net/rxrpc/ar-input.c b/net/rxrpc/ar-input.c
index 91b5bbb003e..f446d9b9925 100644
--- a/net/rxrpc/ar-input.c
+++ b/net/rxrpc/ar-input.c
@@ -20,6 +20,7 @@
 #include <net/sock.h>
 #include <net/af_rxrpc.h>
 #include <net/ip.h>
+#include <net/udp.h>
 #include "ar-internal.h"
 
 unsigned long rxrpc_ack_timeout = 1;
@@ -707,10 +708,13 @@ void rxrpc_data_ready(struct sock *sk, int count)
 	if (skb_checksum_complete(skb)) {
 		rxrpc_free_skb(skb);
 		rxrpc_put_local(local);
+		UDP_INC_STATS_BH(UDP_MIB_INERRORS, 0);
 		_leave(" [CSUM failed]");
 		return;
 	}
 
+	UDP_INC_STATS_BH(UDP_MIB_INDATAGRAMS, 0);
+
 	/* the socket buffer we have is owned by UDP, with UDP's data all over
 	 * it, but we really want our own */
 	skb_orphan(skb);
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index 2f630a512ab..6fa52f44de0 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -838,8 +838,12 @@ static void xs_udp_data_ready(struct sock *sk, int len)
 		copied = repsize;
 
 	/* Suck it into the iovec, verify checksum if not done by hw. */
-	if (csum_partial_copy_to_xdr(&rovr->rq_private_buf, skb))
+	if (csum_partial_copy_to_xdr(&rovr->rq_private_buf, skb)) {
+		UDPX_INC_STATS_BH(sk, UDP_MIB_INERRORS);
 		goto out_unlock;
+	}
+
+	UDPX_INC_STATS_BH(sk, UDP_MIB_INDATAGRAMS);
 
 	/* Something worked... */
 	dst_confirm(skb->dst);
-- 
cgit v1.2.3


From a59322be07c964e916d15be3df473fb7ba20c41e Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Wed, 5 Dec 2007 01:53:40 -0800
Subject: [UDP]: Only increment counter on first peek/recv

The previous move of the the UDP inDatagrams counter caused each
peek of the same packet to be counted separately.  This may be
undesirable.

This patch fixes this by adding a bit to sk_buff to record whether
this packet has already been seen through skb_recv_datagram.  We
then only increment the counter when the packet is seen for the
first time.

The only dodgy part is the fact that skb_recv_datagram doesn't have
a good way of returning this new bit of information.  So I've added
a new function __skb_recv_datagram that does return this and made
skb_recv_datagram a wrapper around it.

The plan is to eventually replace all uses of skb_recv_datagram with
this new function at which time it can be renamed its proper name.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/datagram.c | 43 +++++++++++++++++++++++++++----------------
 net/ipv4/udp.c      |  7 +++++--
 net/ipv6/udp.c      |  7 +++++--
 3 files changed, 37 insertions(+), 20 deletions(-)

(limited to 'net')

diff --git a/net/core/datagram.c b/net/core/datagram.c
index fbd6c76436d..2d733131d7c 100644
--- a/net/core/datagram.c
+++ b/net/core/datagram.c
@@ -115,10 +115,10 @@ out_noerr:
 }
 
 /**
- *	skb_recv_datagram - Receive a datagram skbuff
+ *	__skb_recv_datagram - Receive a datagram skbuff
  *	@sk: socket
  *	@flags: MSG_ flags
- *	@noblock: blocking operation?
+ *	@peeked: returns non-zero if this packet has been seen before
  *	@err: error code returned
  *
  *	Get a datagram skbuff, understands the peeking, nonblocking wakeups
@@ -143,8 +143,8 @@ out_noerr:
  *	quite explicitly by POSIX 1003.1g, don't change them without having
  *	the standard around please.
  */
-struct sk_buff *skb_recv_datagram(struct sock *sk, unsigned flags,
-				  int noblock, int *err)
+struct sk_buff *__skb_recv_datagram(struct sock *sk, unsigned flags,
+				    int *peeked, int *err)
 {
 	struct sk_buff *skb;
 	long timeo;
@@ -156,7 +156,7 @@ struct sk_buff *skb_recv_datagram(struct sock *sk, unsigned flags,
 	if (error)
 		goto no_packet;
 
-	timeo = sock_rcvtimeo(sk, noblock);
+	timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT);
 
 	do {
 		/* Again only user level code calls this function, so nothing
@@ -165,18 +165,19 @@ struct sk_buff *skb_recv_datagram(struct sock *sk, unsigned flags,
 		 * Look at current nfs client by the way...
 		 * However, this function was corrent in any case. 8)
 		 */
-		if (flags & MSG_PEEK) {
-			unsigned long cpu_flags;
-
-			spin_lock_irqsave(&sk->sk_receive_queue.lock,
-					  cpu_flags);
-			skb = skb_peek(&sk->sk_receive_queue);
-			if (skb)
+		unsigned long cpu_flags;
+
+		spin_lock_irqsave(&sk->sk_receive_queue.lock, cpu_flags);
+		skb = skb_peek(&sk->sk_receive_queue);
+		if (skb) {
+			*peeked = skb->peeked;
+			if (flags & MSG_PEEK) {
+				skb->peeked = 1;
 				atomic_inc(&skb->users);
-			spin_unlock_irqrestore(&sk->sk_receive_queue.lock,
-					       cpu_flags);
-		} else
-			skb = skb_dequeue(&sk->sk_receive_queue);
+			} else
+				__skb_unlink(skb, &sk->sk_receive_queue);
+		}
+		spin_unlock_irqrestore(&sk->sk_receive_queue.lock, cpu_flags);
 
 		if (skb)
 			return skb;
@@ -194,6 +195,16 @@ no_packet:
 	*err = error;
 	return NULL;
 }
+EXPORT_SYMBOL(__skb_recv_datagram);
+
+struct sk_buff *skb_recv_datagram(struct sock *sk, unsigned flags,
+				  int noblock, int *err)
+{
+	int peeked;
+
+	return __skb_recv_datagram(sk, flags | (noblock ? MSG_DONTWAIT : 0),
+				   &peeked, err);
+}
 
 void skb_free_datagram(struct sock *sk, struct sk_buff *skb)
 {
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 78cfcb4a1b3..9ed6393c65d 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -827,6 +827,7 @@ int udp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
 	struct sockaddr_in *sin = (struct sockaddr_in *)msg->msg_name;
 	struct sk_buff *skb;
 	unsigned int ulen, copied;
+	int peeked;
 	int err;
 	int is_udplite = IS_UDPLITE(sk);
 
@@ -840,7 +841,8 @@ int udp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
 		return ip_recv_error(sk, msg, len);
 
 try_again:
-	skb = skb_recv_datagram(sk, flags, noblock, &err);
+	skb = __skb_recv_datagram(sk, flags | (noblock ? MSG_DONTWAIT : 0),
+				  &peeked, &err);
 	if (!skb)
 		goto out;
 
@@ -875,7 +877,8 @@ try_again:
 	if (err)
 		goto out_free;
 
-	UDP_INC_STATS_USER(UDP_MIB_INDATAGRAMS, is_udplite);
+	if (!peeked)
+		UDP_INC_STATS_USER(UDP_MIB_INDATAGRAMS, is_udplite);
 
 	sock_recv_timestamp(msg, sk, skb);
 
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index 36bdcd2e1b5..fa640765385 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -123,6 +123,7 @@ int udpv6_recvmsg(struct kiocb *iocb, struct sock *sk,
 	struct inet_sock *inet = inet_sk(sk);
 	struct sk_buff *skb;
 	unsigned int ulen, copied;
+	int peeked;
 	int err;
 	int is_udplite = IS_UDPLITE(sk);
 
@@ -133,7 +134,8 @@ int udpv6_recvmsg(struct kiocb *iocb, struct sock *sk,
 		return ipv6_recv_error(sk, msg, len);
 
 try_again:
-	skb = skb_recv_datagram(sk, flags, noblock, &err);
+	skb = __skb_recv_datagram(sk, flags | (noblock ? MSG_DONTWAIT : 0),
+				  &peeked, &err);
 	if (!skb)
 		goto out;
 
@@ -166,7 +168,8 @@ try_again:
 	if (err)
 		goto out_free;
 
-	UDP6_INC_STATS_USER(UDP_MIB_INDATAGRAMS, is_udplite);
+	if (!peeked)
+		UDP6_INC_STATS_USER(UDP_MIB_INDATAGRAMS, is_udplite);
 
 	sock_recv_timestamp(msg, sk, skb);
 
-- 
cgit v1.2.3


From 78c686e9faff05f1194e892d5b1273ce24021ff6 Mon Sep 17 00:00:00 2001
From: Pavel Emelyanov <xemul@openvz.org>
Date: Wed, 5 Dec 2007 21:13:48 -0800
Subject: [IPV4] ROUTE: Collect proc-related functions together

The net/ipv4/route.c file declares some entries for proc
to dump some routing info. The reading functions are
scattered over this file - collect them together.

Besides, remove a useless IP_RT_ACCT_CPU macro.

Signed-off-by: Pavel Emelyanov <xemul@openvz.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/route.c | 81 +++++++++++++++++++++++++++-----------------------------
 1 file changed, 39 insertions(+), 42 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index fcae074b7ae..9736f689562 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -481,6 +481,45 @@ static const struct file_operations rt_cpu_seq_fops = {
 	.release = seq_release,
 };
 
+#ifdef CONFIG_NET_CLS_ROUTE
+static int ip_rt_acct_read(char *buffer, char **start, off_t offset,
+			   int length, int *eof, void *data)
+{
+	unsigned int i;
+
+	if ((offset & 3) || (length & 3))
+		return -EIO;
+
+	if (offset >= sizeof(struct ip_rt_acct) * 256) {
+		*eof = 1;
+		return 0;
+	}
+
+	if (offset + length >= sizeof(struct ip_rt_acct) * 256) {
+		length = sizeof(struct ip_rt_acct) * 256 - offset;
+		*eof = 1;
+	}
+
+	offset /= sizeof(u32);
+
+	if (length > 0) {
+		u32 *dst = (u32 *) buffer;
+
+		*start = buffer;
+		memset(dst, 0, length);
+
+		for_each_possible_cpu(i) {
+			unsigned int j;
+			u32 *src;
+
+			src = ((u32 *) per_cpu_ptr(ip_rt_acct, i)) + offset;
+			for (j = 0; j < length/4; j++)
+				dst[j] += src[j];
+		}
+	}
+	return length;
+}
+#endif
 #endif /* CONFIG_PROC_FS */
 
 static __inline__ void rt_free(struct rtable *rt)
@@ -2898,48 +2937,6 @@ ctl_table ipv4_route_table[] = {
 
 #ifdef CONFIG_NET_CLS_ROUTE
 struct ip_rt_acct *ip_rt_acct __read_mostly;
-
-/* IP route accounting ptr for this logical cpu number. */
-#define IP_RT_ACCT_CPU(cpu) (per_cpu_ptr(ip_rt_acct, cpu))
-
-#ifdef CONFIG_PROC_FS
-static int ip_rt_acct_read(char *buffer, char **start, off_t offset,
-			   int length, int *eof, void *data)
-{
-	unsigned int i;
-
-	if ((offset & 3) || (length & 3))
-		return -EIO;
-
-	if (offset >= sizeof(struct ip_rt_acct) * 256) {
-		*eof = 1;
-		return 0;
-	}
-
-	if (offset + length >= sizeof(struct ip_rt_acct) * 256) {
-		length = sizeof(struct ip_rt_acct) * 256 - offset;
-		*eof = 1;
-	}
-
-	offset /= sizeof(u32);
-
-	if (length > 0) {
-		u32 *dst = (u32 *) buffer;
-
-		*start = buffer;
-		memset(dst, 0, length);
-
-		for_each_possible_cpu(i) {
-			unsigned int j;
-			u32 *src = ((u32 *) IP_RT_ACCT_CPU(i)) + offset;
-
-			for (j = 0; j < length/4; j++)
-				dst[j] += src[j];
-		}
-	}
-	return length;
-}
-#endif /* CONFIG_PROC_FS */
 #endif /* CONFIG_NET_CLS_ROUTE */
 
 static __initdata unsigned long rhash_entries;
-- 
cgit v1.2.3


From 107f163428b846d7cef68cb6f436788bb2f6c2e1 Mon Sep 17 00:00:00 2001
From: Pavel Emelyanov <xemul@openvz.org>
Date: Wed, 5 Dec 2007 21:14:28 -0800
Subject: [IPV4] ROUTE: Clean up proc files creation.

The rt_cache, stats/rt_cache and rt_acct(optional) files
creation looks a bit messy. Clean this out and join them
to other proc-related functions under the proper ifdef.

The struct net * argument in a new function will help net
namespaces patches look nicer.

Signed-off-by: Pavel Emelyanov <xemul@openvz.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/route.c | 54 ++++++++++++++++++++++++++++++++++++++++--------------
 1 file changed, 40 insertions(+), 14 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 9736f689562..193788381a5 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -520,6 +520,44 @@ static int ip_rt_acct_read(char *buffer, char **start, off_t offset,
 	return length;
 }
 #endif
+
+static __init int ip_rt_proc_init(struct net *net)
+{
+	struct proc_dir_entry *pde;
+
+	pde = proc_net_fops_create(net, "rt_cache", S_IRUGO,
+			&rt_cache_seq_fops);
+	if (!pde)
+		goto err1;
+
+	pde = create_proc_entry("rt_cache", S_IRUGO, net->proc_net_stat);
+	if (!pde)
+		goto err2;
+
+	pde->proc_fops = &rt_cpu_seq_fops;
+
+#ifdef CONFIG_NET_CLS_ROUTE
+	pde = create_proc_read_entry("rt_acct", 0, net->proc_net,
+			ip_rt_acct_read, NULL);
+	if (!pde)
+		goto err3;
+#endif
+	return 0;
+
+#ifdef CONFIG_NET_CLS_ROUTE
+err3:
+	remove_proc_entry("rt_cache", net->proc_net_stat);
+#endif
+err2:
+	remove_proc_entry("rt_cache", net->proc_net);
+err1:
+	return -ENOMEM;
+}
+#else
+static inline int ip_rt_proc_init(struct net *net)
+{
+	return 0;
+}
 #endif /* CONFIG_PROC_FS */
 
 static __inline__ void rt_free(struct rtable *rt)
@@ -3000,20 +3038,8 @@ int __init ip_rt_init(void)
 		ip_rt_secret_interval;
 	add_timer(&rt_secret_timer);
 
-#ifdef CONFIG_PROC_FS
-	{
-	struct proc_dir_entry *rtstat_pde = NULL; /* keep gcc happy */
-	if (!proc_net_fops_create(&init_net, "rt_cache", S_IRUGO, &rt_cache_seq_fops) ||
-	    !(rtstat_pde = create_proc_entry("rt_cache", S_IRUGO,
-					     init_net.proc_net_stat))) {
-		return -ENOMEM;
-	}
-	rtstat_pde->proc_fops = &rt_cpu_seq_fops;
-	}
-#ifdef CONFIG_NET_CLS_ROUTE
-	create_proc_read_entry("rt_acct", 0, init_net.proc_net, ip_rt_acct_read, NULL);
-#endif
-#endif
+	if (ip_rt_proc_init(&init_net))
+		printk(KERN_ERR "Unable to create route proc files\n");
 #ifdef CONFIG_XFRM
 	xfrm_init();
 	xfrm4_init();
-- 
cgit v1.2.3


From 1ff1cc202e9a7dbd9f54d1bce5adb44283497185 Mon Sep 17 00:00:00 2001
From: Pavel Emelyanov <xemul@openvz.org>
Date: Wed, 5 Dec 2007 21:15:05 -0800
Subject: [IPV4] ROUTE: Convert rt_hash_lock_init() macro into function

There's no need in having this function exist in a form
of macro. Properly formatted function looks much better.

Signed-off-by: Pavel Emelyanov <xemul@openvz.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/route.c | 25 +++++++++++++++++--------
 1 file changed, 17 insertions(+), 8 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 193788381a5..d95e48e8d65 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -235,16 +235,25 @@ struct rt_hash_bucket {
 
 static spinlock_t	*rt_hash_locks;
 # define rt_hash_lock_addr(slot) &rt_hash_locks[(slot) & (RT_HASH_LOCK_SZ - 1)]
-# define rt_hash_lock_init()	{ \
-		int i; \
-		rt_hash_locks = kmalloc(sizeof(spinlock_t) * RT_HASH_LOCK_SZ, GFP_KERNEL); \
-		if (!rt_hash_locks) panic("IP: failed to allocate rt_hash_locks\n"); \
-		for (i = 0; i < RT_HASH_LOCK_SZ; i++) \
-			spin_lock_init(&rt_hash_locks[i]); \
-		}
+
+static __init void rt_hash_lock_init(void)
+{
+	int i;
+
+	rt_hash_locks = kmalloc(sizeof(spinlock_t) * RT_HASH_LOCK_SZ,
+			GFP_KERNEL);
+	if (!rt_hash_locks)
+		panic("IP: failed to allocate rt_hash_locks\n");
+
+	for (i = 0; i < RT_HASH_LOCK_SZ; i++)
+		spin_lock_init(&rt_hash_locks[i]);
+}
 #else
 # define rt_hash_lock_addr(slot) NULL
-# define rt_hash_lock_init()
+
+static inline void rt_hash_lock_init(void)
+{
+}
 #endif
 
 static struct rt_hash_bucket 	*rt_hash_table;
-- 
cgit v1.2.3


From 43dc1701172b7f73c495dea6c165c73ab8934f76 Mon Sep 17 00:00:00 2001
From: Pavel Emelyanov <xemul@openvz.org>
Date: Wed, 5 Dec 2007 21:19:44 -0800
Subject: [ARP]: Minus one level of indentation in arp_req_set

The ATF_PUBL requests are handled completely separate from
the others. Emphasize it with a separate function. This also
reduces the indentation level.

The same issue exists with the arp_delete_request, but
when I tried to make it in one patch diff produced completely
unreadable patch. So I split it into two, but they may be
done with one commit.

Signed-off-by: Pavel Emelyanov <xemul@openvz.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/arp.c | 58 +++++++++++++++++++++++++++++++++-------------------------
 1 file changed, 33 insertions(+), 25 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c
index 54a76b8b803..04e0ecdd07b 100644
--- a/net/ipv4/arp.c
+++ b/net/ipv4/arp.c
@@ -952,37 +952,45 @@ out_of_mem:
  *	Set (create) an ARP cache entry.
  */
 
+static int arp_req_set_public(struct arpreq *r, struct net_device *dev)
+{
+	__be32 ip = ((struct sockaddr_in *)&r->arp_pa)->sin_addr.s_addr;
+	__be32 mask = ((struct sockaddr_in *)&r->arp_netmask)->sin_addr.s_addr;
+
+	if (mask && mask != htonl(0xFFFFFFFF))
+		return -EINVAL;
+	if (!dev && (r->arp_flags & ATF_COM)) {
+		dev = dev_getbyhwaddr(&init_net, r->arp_ha.sa_family,
+				r->arp_ha.sa_data);
+		if (!dev)
+			return -ENODEV;
+	}
+	if (mask) {
+		if (pneigh_lookup(&arp_tbl, &ip, dev, 1) == NULL)
+			return -ENOBUFS;
+		return 0;
+	}
+	if (dev == NULL) {
+		IPV4_DEVCONF_ALL(PROXY_ARP) = 1;
+		return 0;
+	}
+	if (__in_dev_get_rtnl(dev)) {
+		IN_DEV_CONF_SET(__in_dev_get_rtnl(dev), PROXY_ARP, 1);
+		return 0;
+	}
+	return -ENXIO;
+}
+
 static int arp_req_set(struct arpreq *r, struct net_device * dev)
 {
-	__be32 ip = ((struct sockaddr_in *) &r->arp_pa)->sin_addr.s_addr;
+	__be32 ip;
 	struct neighbour *neigh;
 	int err;
 
-	if (r->arp_flags&ATF_PUBL) {
-		__be32 mask = ((struct sockaddr_in *) &r->arp_netmask)->sin_addr.s_addr;
-		if (mask && mask != htonl(0xFFFFFFFF))
-			return -EINVAL;
-		if (!dev && (r->arp_flags & ATF_COM)) {
-			dev = dev_getbyhwaddr(&init_net, r->arp_ha.sa_family, r->arp_ha.sa_data);
-			if (!dev)
-				return -ENODEV;
-		}
-		if (mask) {
-			if (pneigh_lookup(&arp_tbl, &ip, dev, 1) == NULL)
-				return -ENOBUFS;
-			return 0;
-		}
-		if (dev == NULL) {
-			IPV4_DEVCONF_ALL(PROXY_ARP) = 1;
-			return 0;
-		}
-		if (__in_dev_get_rtnl(dev)) {
-			IN_DEV_CONF_SET(__in_dev_get_rtnl(dev), PROXY_ARP, 1);
-			return 0;
-		}
-		return -ENXIO;
-	}
+	if (r->arp_flags & ATF_PUBL)
+		return arp_req_set_public(r, dev);
 
+	ip = ((struct sockaddr_in *)&r->arp_pa)->sin_addr.s_addr;
 	if (r->arp_flags & ATF_PERM)
 		r->arp_flags |= ATF_COM;
 	if (dev == NULL) {
-- 
cgit v1.2.3


From 46479b432989d61c6f676adf21b4f4d8abd7bc26 Mon Sep 17 00:00:00 2001
From: Pavel Emelyanov <xemul@openvz.org>
Date: Wed, 5 Dec 2007 21:20:18 -0800
Subject: [ARP]: Minus one level of ndentation in arp_req_delete

The same cleanup for deletion requests.

Signed-off-by: Pavel Emelyanov <xemul@openvz.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/arp.c | 47 +++++++++++++++++++++++++++--------------------
 1 file changed, 27 insertions(+), 20 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c
index 04e0ecdd07b..e947ad9409b 100644
--- a/net/ipv4/arp.c
+++ b/net/ipv4/arp.c
@@ -1074,32 +1074,39 @@ static int arp_req_get(struct arpreq *r, struct net_device *dev)
 	return err;
 }
 
+static int arp_req_delete_public(struct arpreq *r, struct net_device *dev)
+{
+	__be32 ip = ((struct sockaddr_in *) &r->arp_pa)->sin_addr.s_addr;
+	__be32 mask = ((struct sockaddr_in *)&r->arp_netmask)->sin_addr.s_addr;
+
+	if (mask == htonl(0xFFFFFFFF))
+		return pneigh_delete(&arp_tbl, &ip, dev);
+
+	if (mask == 0) {
+		if (dev == NULL) {
+			IPV4_DEVCONF_ALL(PROXY_ARP) = 0;
+			return 0;
+		}
+		if (__in_dev_get_rtnl(dev)) {
+			IN_DEV_CONF_SET(__in_dev_get_rtnl(dev),
+					PROXY_ARP, 0);
+			return 0;
+		}
+		return -ENXIO;
+	}
+	return -EINVAL;
+}
+
 static int arp_req_delete(struct arpreq *r, struct net_device * dev)
 {
 	int err;
-	__be32 ip = ((struct sockaddr_in *)&r->arp_pa)->sin_addr.s_addr;
+	__be32 ip;
 	struct neighbour *neigh;
 
-	if (r->arp_flags & ATF_PUBL) {
-		__be32 mask =
-		       ((struct sockaddr_in *)&r->arp_netmask)->sin_addr.s_addr;
-		if (mask == htonl(0xFFFFFFFF))
-			return pneigh_delete(&arp_tbl, &ip, dev);
-		if (mask == 0) {
-			if (dev == NULL) {
-				IPV4_DEVCONF_ALL(PROXY_ARP) = 0;
-				return 0;
-			}
-			if (__in_dev_get_rtnl(dev)) {
-				IN_DEV_CONF_SET(__in_dev_get_rtnl(dev),
-						PROXY_ARP, 0);
-				return 0;
-			}
-			return -ENXIO;
-		}
-		return -EINVAL;
-	}
+	if (r->arp_flags & ATF_PUBL)
+		return arp_req_delete_public(r, dev);
 
+	ip = ((struct sockaddr_in *)&r->arp_pa)->sin_addr.s_addr;
 	if (dev == NULL) {
 		struct flowi fl = { .nl_u = { .ip4_u = { .daddr = ip,
 							 .tos = RTO_ONLINK } } };
-- 
cgit v1.2.3


From f8b33fdfafea0f909712a55fbb3d83b89f70f3f5 Mon Sep 17 00:00:00 2001
From: Pavel Emelyanov <xemul@openvz.org>
Date: Wed, 5 Dec 2007 21:20:50 -0800
Subject: [ARP]: Consolidate some code in arp_req_set/delete_publc

The PROXY_ARP is set on devconfigs in a similar way in
both calls.

Signed-off-by: Pavel Emelyanov <xemul@openvz.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/arp.c | 41 +++++++++++++++++++----------------------
 1 file changed, 19 insertions(+), 22 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c
index e947ad9409b..477f3e5fe8d 100644
--- a/net/ipv4/arp.c
+++ b/net/ipv4/arp.c
@@ -952,6 +952,19 @@ out_of_mem:
  *	Set (create) an ARP cache entry.
  */
 
+static int arp_req_set_proxy(struct net_device *dev, int on)
+{
+	if (dev == NULL) {
+		IPV4_DEVCONF_ALL(PROXY_ARP) = on;
+		return 0;
+	}
+	if (__in_dev_get_rtnl(dev)) {
+		IN_DEV_CONF_SET(__in_dev_get_rtnl(dev), PROXY_ARP, on);
+		return 0;
+	}
+	return -ENXIO;
+}
+
 static int arp_req_set_public(struct arpreq *r, struct net_device *dev)
 {
 	__be32 ip = ((struct sockaddr_in *)&r->arp_pa)->sin_addr.s_addr;
@@ -970,15 +983,8 @@ static int arp_req_set_public(struct arpreq *r, struct net_device *dev)
 			return -ENOBUFS;
 		return 0;
 	}
-	if (dev == NULL) {
-		IPV4_DEVCONF_ALL(PROXY_ARP) = 1;
-		return 0;
-	}
-	if (__in_dev_get_rtnl(dev)) {
-		IN_DEV_CONF_SET(__in_dev_get_rtnl(dev), PROXY_ARP, 1);
-		return 0;
-	}
-	return -ENXIO;
+
+	return arp_req_set_proxy(dev, 1);
 }
 
 static int arp_req_set(struct arpreq *r, struct net_device * dev)
@@ -1082,19 +1088,10 @@ static int arp_req_delete_public(struct arpreq *r, struct net_device *dev)
 	if (mask == htonl(0xFFFFFFFF))
 		return pneigh_delete(&arp_tbl, &ip, dev);
 
-	if (mask == 0) {
-		if (dev == NULL) {
-			IPV4_DEVCONF_ALL(PROXY_ARP) = 0;
-			return 0;
-		}
-		if (__in_dev_get_rtnl(dev)) {
-			IN_DEV_CONF_SET(__in_dev_get_rtnl(dev),
-					PROXY_ARP, 0);
-			return 0;
-		}
-		return -ENXIO;
-	}
-	return -EINVAL;
+	if (mask)
+		return -EINVAL;
+
+	return arp_req_set_proxy(dev, 0);
 }
 
 static int arp_req_delete(struct arpreq *r, struct net_device * dev)
-- 
cgit v1.2.3


From c40616c597bf02a2346cbf2f120283734b436245 Mon Sep 17 00:00:00 2001
From: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Date: Thu, 6 Dec 2007 12:26:38 -0200
Subject: [TFRC]: Provide central source file and debug facility

This patch changes the tfrc_lib module in the following manner:

 (1) a dedicated tfrc source file to call the packet history &
     loss interval init/exit functions.
 (2) a dedicated tfrc_pr_debug macro with toggle switch `tfrc_debug'.

Commiter note: renamed tfrc_module.c to tfrc.c, and made CONFIG_IP_DCCP_CCID3
select IP_DCCP_TFRC_LIB.

Signed-off-by: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Signed-off-by: Ian McDonald <ian.mcdonald@jandi.co.nz>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/dccp/ccids/Kconfig              | 13 ++++++----
 net/dccp/ccids/lib/Makefile         |  2 +-
 net/dccp/ccids/lib/packet_history.c | 27 +++------------------
 net/dccp/ccids/lib/packet_history.h |  3 +--
 net/dccp/ccids/lib/tfrc.c           | 48 +++++++++++++++++++++++++++++++++++++
 net/dccp/ccids/lib/tfrc.h           | 17 +++++++++----
 6 files changed, 75 insertions(+), 35 deletions(-)
 create mode 100644 net/dccp/ccids/lib/tfrc.c

(limited to 'net')

diff --git a/net/dccp/ccids/Kconfig b/net/dccp/ccids/Kconfig
index 3d7d867a7c4..12275943eab 100644
--- a/net/dccp/ccids/Kconfig
+++ b/net/dccp/ccids/Kconfig
@@ -38,6 +38,7 @@ config IP_DCCP_CCID2_DEBUG
 config IP_DCCP_CCID3
 	tristate "CCID3 (TCP-Friendly) (EXPERIMENTAL)"
 	def_tristate IP_DCCP
+	select IP_DCCP_TFRC_LIB
 	---help---
 	  CCID 3 denotes TCP-Friendly Rate Control (TFRC), an equation-based
 	  rate-controlled congestion control mechanism.  TFRC is designed to
@@ -63,10 +64,6 @@ config IP_DCCP_CCID3
 
 	  If in doubt, say M.
 
-config IP_DCCP_TFRC_LIB
-	depends on IP_DCCP_CCID3
-	def_tristate IP_DCCP_CCID3
-
 config IP_DCCP_CCID3_DEBUG
 	  bool "CCID3 debugging messages"
 	  depends on IP_DCCP_CCID3
@@ -110,5 +107,13 @@ config IP_DCCP_CCID3_RTO
 	    is serious network congestion: experimenting with larger values should
 	    therefore not be performed on WANs.
 
+config IP_DCCP_TFRC_LIB
+	tristate
+	default n
+
+config IP_DCCP_TFRC_DEBUG
+	bool
+	depends on IP_DCCP_TFRC_LIB
+	default y if IP_DCCP_CCID3_DEBUG
 
 endmenu
diff --git a/net/dccp/ccids/lib/Makefile b/net/dccp/ccids/lib/Makefile
index 5f940a6cbac..68c93e3d89d 100644
--- a/net/dccp/ccids/lib/Makefile
+++ b/net/dccp/ccids/lib/Makefile
@@ -1,3 +1,3 @@
 obj-$(CONFIG_IP_DCCP_TFRC_LIB) += dccp_tfrc_lib.o
 
-dccp_tfrc_lib-y := loss_interval.o packet_history.o tfrc_equation.o
+dccp_tfrc_lib-y := tfrc.o tfrc_equation.o packet_history.o loss_interval.o
diff --git a/net/dccp/ccids/lib/packet_history.c b/net/dccp/ccids/lib/packet_history.c
index 4805de99656..1d4d6ee9be4 100644
--- a/net/dccp/ccids/lib/packet_history.c
+++ b/net/dccp/ccids/lib/packet_history.c
@@ -35,7 +35,6 @@
  *  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  */
 
-#include <linux/module.h>
 #include <linux/string.h>
 #include "packet_history.h"
 
@@ -277,39 +276,19 @@ void dccp_rx_hist_purge(struct dccp_rx_hist *hist, struct list_head *list)
 
 EXPORT_SYMBOL_GPL(dccp_rx_hist_purge);
 
-extern int __init dccp_li_init(void);
-extern void dccp_li_exit(void);
-
-static __init int packet_history_init(void)
+__init int packet_history_init(void)
 {
-	if (dccp_li_init() != 0)
-		goto out;
-
 	tfrc_tx_hist = kmem_cache_create("tfrc_tx_hist",
 					 sizeof(struct tfrc_tx_hist_entry), 0,
 					 SLAB_HWCACHE_ALIGN, NULL);
-	if (tfrc_tx_hist == NULL)
-		goto out_li_exit;
 
-	return 0;
-out_li_exit:
-	dccp_li_exit();
-out:
-	return -ENOBUFS;
+	return tfrc_tx_hist == NULL ? -ENOBUFS : 0;
 }
-module_init(packet_history_init);
 
-static __exit void packet_history_exit(void)
+void packet_history_exit(void)
 {
 	if (tfrc_tx_hist != NULL) {
 		kmem_cache_destroy(tfrc_tx_hist);
 		tfrc_tx_hist = NULL;
 	}
-	dccp_li_exit();
 }
-module_exit(packet_history_exit);
-
-MODULE_AUTHOR("Ian McDonald <ian.mcdonald@jandi.co.nz>, "
-	      "Arnaldo Carvalho de Melo <acme@ghostprotocols.net>");
-MODULE_DESCRIPTION("DCCP TFRC library");
-MODULE_LICENSE("GPL");
diff --git a/net/dccp/ccids/lib/packet_history.h b/net/dccp/ccids/lib/packet_history.h
index 0670f46dd53..9a2642ed684 100644
--- a/net/dccp/ccids/lib/packet_history.h
+++ b/net/dccp/ccids/lib/packet_history.h
@@ -39,8 +39,7 @@
 #include <linux/ktime.h>
 #include <linux/list.h>
 #include <linux/slab.h>
-
-#include "../../dccp.h"
+#include "tfrc.h"
 
 /* Number of later packets received before one is considered lost */
 #define TFRC_RECV_NUM_LATE_LOSS	 3
diff --git a/net/dccp/ccids/lib/tfrc.c b/net/dccp/ccids/lib/tfrc.c
new file mode 100644
index 00000000000..3a7a1838a64
--- /dev/null
+++ b/net/dccp/ccids/lib/tfrc.c
@@ -0,0 +1,48 @@
+/*
+ * TFRC: main module holding the pieces of the TFRC library together
+ *
+ * Copyright (c) 2007 The University of Aberdeen, Scotland, UK
+ * Copyright (c) 2007 Arnaldo Carvalho de Melo <acme@redhat.com>
+ */
+#include <linux/module.h>
+#include <linux/moduleparam.h>
+#include "tfrc.h"
+
+#ifdef CONFIG_IP_DCCP_TFRC_DEBUG
+int tfrc_debug;
+module_param(tfrc_debug, bool, 0444);
+MODULE_PARM_DESC(tfrc_debug, "Enable debug messages");
+#endif
+
+extern int  dccp_li_init(void);
+extern void dccp_li_exit(void);
+extern int packet_history_init(void);
+extern void packet_history_exit(void);
+
+static int __init tfrc_module_init(void)
+{
+	int rc = dccp_li_init();
+
+	if (rc == 0) {
+		rc = packet_history_init();
+		if (rc != 0)
+			dccp_li_exit();
+	}
+
+	return rc;
+}
+
+static void __exit tfrc_module_exit(void)
+{
+	packet_history_exit();
+	dccp_li_exit();
+}
+
+module_init(tfrc_module_init);
+module_exit(tfrc_module_exit);
+
+MODULE_AUTHOR("Gerrit Renker <gerrit@erg.abdn.ac.uk>, "
+	      "Ian McDonald <ian.mcdonald@jandi.co.nz>, "
+	      "Arnaldo Carvalho de Melo <acme@redhat.com>");
+MODULE_DESCRIPTION("DCCP TFRC library");
+MODULE_LICENSE("GPL");
diff --git a/net/dccp/ccids/lib/tfrc.h b/net/dccp/ccids/lib/tfrc.h
index 5a0ba86df18..ab8848c0f8c 100644
--- a/net/dccp/ccids/lib/tfrc.h
+++ b/net/dccp/ccids/lib/tfrc.h
@@ -3,10 +3,11 @@
 /*
  *  net/dccp/ccids/lib/tfrc.h
  *
- *  Copyright (c) 2005 The University of Waikato, Hamilton, New Zealand.
- *  Copyright (c) 2005 Ian McDonald <ian.mcdonald@jandi.co.nz>
- *  Copyright (c) 2005 Arnaldo Carvalho de Melo <acme@conectiva.com.br>
- *  Copyright (c) 2003 Nils-Erik Mattsson, Joacim Haggmark, Magnus Erixzon
+ *  Copyright (c) 2007   The University of Aberdeen, Scotland, UK
+ *  Copyright (c) 2005-6 The University of Waikato, Hamilton, New Zealand.
+ *  Copyright (c) 2005-6 Ian McDonald <ian.mcdonald@jandi.co.nz>
+ *  Copyright (c) 2005   Arnaldo Carvalho de Melo <acme@conectiva.com.br>
+ *  Copyright (c) 2003   Nils-Erik Mattsson, Joacim Haggmark, Magnus Erixzon
  *
  *  This program is free software; you can redistribute it and/or modify
  *  it under the terms of the GNU General Public License as published by
@@ -15,6 +16,14 @@
  */
 #include <linux/types.h>
 #include <asm/div64.h>
+#include "../../dccp.h"
+
+#ifdef CONFIG_IP_DCCP_TFRC_DEBUG
+extern int tfrc_debug;
+#define tfrc_pr_debug(format, a...)	DCCP_PR_DEBUG(tfrc_debug, format, ##a)
+#else
+#define tfrc_pr_debug(format, a...)
+#endif
 
 /* integer-arithmetic divisions of type (a * 1000000)/b */
 static inline u64 scaled_div(u64 a, u32 b)
-- 
cgit v1.2.3


From 2180c41ca5c1a36c67f4140e80154699333109d2 Mon Sep 17 00:00:00 2001
From: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Date: Thu, 6 Dec 2007 12:27:15 -0200
Subject: [DCCP]: Introduce generic function to test for `data packets'

as per  RFC 4340, sec. 7.7.

Signed-off-by: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Signed-off-by: Ian McDonald <ian.mcdonald@jandi.co.nz>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/dccp/dccp.h | 12 ++++++++++++
 1 file changed, 12 insertions(+)

(limited to 'net')

diff --git a/net/dccp/dccp.h b/net/dccp/dccp.h
index ee97950d77d..f4a5ea116e3 100644
--- a/net/dccp/dccp.h
+++ b/net/dccp/dccp.h
@@ -334,6 +334,7 @@ struct dccp_skb_cb {
 
 #define DCCP_SKB_CB(__skb) ((struct dccp_skb_cb *)&((__skb)->cb[0]))
 
+/* RFC 4340, sec. 7.7 */
 static inline int dccp_non_data_packet(const struct sk_buff *skb)
 {
 	const __u8 type = DCCP_SKB_CB(skb)->dccpd_type;
@@ -346,6 +347,17 @@ static inline int dccp_non_data_packet(const struct sk_buff *skb)
 	       type == DCCP_PKT_SYNCACK;
 }
 
+/* RFC 4340, sec. 7.7 */
+static inline int dccp_data_packet(const struct sk_buff *skb)
+{
+	const __u8 type = DCCP_SKB_CB(skb)->dccpd_type;
+
+	return type == DCCP_PKT_DATA	 ||
+	       type == DCCP_PKT_DATAACK  ||
+	       type == DCCP_PKT_REQUEST  ||
+	       type == DCCP_PKT_RESPONSE;
+}
+
 static inline int dccp_packet_without_ack(const struct sk_buff *skb)
 {
 	const __u8 type = DCCP_SKB_CB(skb)->dccpd_type;
-- 
cgit v1.2.3


From e9c8b24a6ade50315f3c080799da45ddadf42269 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Thu, 6 Dec 2007 12:27:49 -0200
Subject: [TFRC]: Rename tfrc_tx_hist to tfrc_tx_hist_slab, for consistency

Signed-off-by: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/dccp/ccids/lib/packet_history.c | 20 ++++++++++----------
 1 file changed, 10 insertions(+), 10 deletions(-)

(limited to 'net')

diff --git a/net/dccp/ccids/lib/packet_history.c b/net/dccp/ccids/lib/packet_history.c
index 1d4d6ee9be4..b628714fb2a 100644
--- a/net/dccp/ccids/lib/packet_history.c
+++ b/net/dccp/ccids/lib/packet_history.c
@@ -53,7 +53,7 @@ struct tfrc_tx_hist_entry {
 /*
  * Transmitter History Routines
  */
-static struct kmem_cache *tfrc_tx_hist;
+static struct kmem_cache *tfrc_tx_hist_slab;
 
 static struct tfrc_tx_hist_entry *
 	tfrc_tx_hist_find_entry(struct tfrc_tx_hist_entry *head, u64 seqno)
@@ -66,7 +66,7 @@ static struct tfrc_tx_hist_entry *
 
 int tfrc_tx_hist_add(struct tfrc_tx_hist_entry **headp, u64 seqno)
 {
-	struct tfrc_tx_hist_entry *entry = kmem_cache_alloc(tfrc_tx_hist, gfp_any());
+	struct tfrc_tx_hist_entry *entry = kmem_cache_alloc(tfrc_tx_hist_slab, gfp_any());
 
 	if (entry == NULL)
 		return -ENOBUFS;
@@ -85,7 +85,7 @@ void tfrc_tx_hist_purge(struct tfrc_tx_hist_entry **headp)
 	while (head != NULL) {
 		struct tfrc_tx_hist_entry *next = head->next;
 
-		kmem_cache_free(tfrc_tx_hist, head);
+		kmem_cache_free(tfrc_tx_hist_slab, head);
 		head = next;
 	}
 
@@ -278,17 +278,17 @@ EXPORT_SYMBOL_GPL(dccp_rx_hist_purge);
 
 __init int packet_history_init(void)
 {
-	tfrc_tx_hist = kmem_cache_create("tfrc_tx_hist",
-					 sizeof(struct tfrc_tx_hist_entry), 0,
-					 SLAB_HWCACHE_ALIGN, NULL);
+	tfrc_tx_hist_slab = kmem_cache_create("tfrc_tx_hist",
+					      sizeof(struct tfrc_tx_hist_entry), 0,
+					      SLAB_HWCACHE_ALIGN, NULL);
 
-	return tfrc_tx_hist == NULL ? -ENOBUFS : 0;
+	return tfrc_tx_hist_slab == NULL ? -ENOBUFS : 0;
 }
 
 void packet_history_exit(void)
 {
-	if (tfrc_tx_hist != NULL) {
-		kmem_cache_destroy(tfrc_tx_hist);
-		tfrc_tx_hist = NULL;
+	if (tfrc_tx_hist_slab != NULL) {
+		kmem_cache_destroy(tfrc_tx_hist_slab);
+		tfrc_tx_hist_slab = NULL;
 	}
 }
-- 
cgit v1.2.3


From 34a9e7ea91bb4acb45ae5331e7403304029329b2 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Thu, 6 Dec 2007 12:28:13 -0200
Subject: [TFRC]: Make the rx history slab be global

This is in preparation for merging the new rx history code written by Gerrit Renker.

Signed-off-by: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/dccp/ccids/ccid3.c              | 35 +++-----------
 net/dccp/ccids/lib/packet_history.c | 95 +++++++++++++++++++------------------
 net/dccp/ccids/lib/packet_history.h | 43 ++---------------
 3 files changed, 60 insertions(+), 113 deletions(-)

(limited to 'net')

diff --git a/net/dccp/ccids/ccid3.c b/net/dccp/ccids/ccid3.c
index 49338370eb0..2ba0a7c470d 100644
--- a/net/dccp/ccids/ccid3.c
+++ b/net/dccp/ccids/ccid3.c
@@ -49,8 +49,6 @@ static int ccid3_debug;
 #define ccid3_pr_debug(format, a...)
 #endif
 
-static struct dccp_rx_hist *ccid3_rx_hist;
-
 /*
  *	Transmitter Half-Connection Routines
  */
@@ -807,9 +805,9 @@ static int ccid3_hc_rx_detect_loss(struct sock *sk,
 	}
 
 detect_out:
-	dccp_rx_hist_add_packet(ccid3_rx_hist, &hcrx->ccid3hcrx_hist,
-		   &hcrx->ccid3hcrx_li_hist, packet,
-		   hcrx->ccid3hcrx_seqno_nonloss);
+	dccp_rx_hist_add_packet(&hcrx->ccid3hcrx_hist,
+				&hcrx->ccid3hcrx_li_hist, packet,
+				hcrx->ccid3hcrx_seqno_nonloss);
 	return loss;
 }
 
@@ -852,8 +850,7 @@ static void ccid3_hc_rx_packet_recv(struct sock *sk, struct sk_buff *skb)
 		return;
 	}
 
-	packet = dccp_rx_hist_entry_new(ccid3_rx_hist, opt_recv->dccpor_ndp,
-					skb, GFP_ATOMIC);
+	packet = dccp_rx_hist_entry_new(opt_recv->dccpor_ndp, skb, GFP_ATOMIC);
 	if (unlikely(packet == NULL)) {
 		DCCP_WARN("%s(%p), Not enough mem to add rx packet "
 			  "to history, consider it lost!\n", dccp_role(sk), sk);
@@ -936,7 +933,7 @@ static void ccid3_hc_rx_exit(struct sock *sk)
 	ccid3_hc_rx_set_state(sk, TFRC_RSTATE_TERM);
 
 	/* Empty packet history */
-	dccp_rx_hist_purge(ccid3_rx_hist, &hcrx->ccid3hcrx_hist);
+	dccp_rx_hist_purge(&hcrx->ccid3hcrx_hist);
 
 	/* Empty loss interval history */
 	dccp_li_hist_purge(&hcrx->ccid3hcrx_li_hist);
@@ -1013,33 +1010,13 @@ MODULE_PARM_DESC(ccid3_debug, "Enable debug messages");
 
 static __init int ccid3_module_init(void)
 {
-	int rc = -ENOBUFS;
-
-	ccid3_rx_hist = dccp_rx_hist_new("ccid3");
-	if (ccid3_rx_hist == NULL)
-		goto out;
-
-	rc = ccid_register(&ccid3);
-	if (rc != 0)
-		goto out_free_rx;
-out:
-	return rc;
-
-out_free_rx:
-	dccp_rx_hist_delete(ccid3_rx_hist);
-	ccid3_rx_hist = NULL;
-	goto out;
+	return ccid_register(&ccid3);
 }
 module_init(ccid3_module_init);
 
 static __exit void ccid3_module_exit(void)
 {
 	ccid_unregister(&ccid3);
-
-	if (ccid3_rx_hist != NULL) {
-		dccp_rx_hist_delete(ccid3_rx_hist);
-		ccid3_rx_hist = NULL;
-	}
 }
 module_exit(ccid3_module_exit);
 
diff --git a/net/dccp/ccids/lib/packet_history.c b/net/dccp/ccids/lib/packet_history.c
index b628714fb2a..e1ab853c38d 100644
--- a/net/dccp/ccids/lib/packet_history.c
+++ b/net/dccp/ccids/lib/packet_history.c
@@ -114,48 +114,33 @@ EXPORT_SYMBOL_GPL(tfrc_tx_hist_rtt);
 /*
  * 	Receiver History Routines
  */
-struct dccp_rx_hist *dccp_rx_hist_new(const char *name)
+static struct kmem_cache *tfrc_rx_hist_slab;
+
+struct dccp_rx_hist_entry *dccp_rx_hist_entry_new(const u32 ndp,
+						  const struct sk_buff *skb,
+						  const gfp_t prio)
 {
-	struct dccp_rx_hist *hist = kmalloc(sizeof(*hist), GFP_ATOMIC);
-	static const char dccp_rx_hist_mask[] = "rx_hist_%s";
-	char *slab_name;
-
-	if (hist == NULL)
-		goto out;
-
-	slab_name = kmalloc(strlen(name) + sizeof(dccp_rx_hist_mask) - 1,
-			    GFP_ATOMIC);
-	if (slab_name == NULL)
-		goto out_free_hist;
-
-	sprintf(slab_name, dccp_rx_hist_mask, name);
-	hist->dccprxh_slab = kmem_cache_create(slab_name,
-					     sizeof(struct dccp_rx_hist_entry),
-					     0, SLAB_HWCACHE_ALIGN, NULL);
-	if (hist->dccprxh_slab == NULL)
-		goto out_free_slab_name;
-out:
-	return hist;
-out_free_slab_name:
-	kfree(slab_name);
-out_free_hist:
-	kfree(hist);
-	hist = NULL;
-	goto out;
-}
+	struct dccp_rx_hist_entry *entry = kmem_cache_alloc(tfrc_rx_hist_slab,
+							    prio);
 
-EXPORT_SYMBOL_GPL(dccp_rx_hist_new);
+	if (entry != NULL) {
+		const struct dccp_hdr *dh = dccp_hdr(skb);
 
-void dccp_rx_hist_delete(struct dccp_rx_hist *hist)
-{
-	const char* name = kmem_cache_name(hist->dccprxh_slab);
+		entry->dccphrx_seqno = DCCP_SKB_CB(skb)->dccpd_seq;
+		entry->dccphrx_ccval = dh->dccph_ccval;
+		entry->dccphrx_type  = dh->dccph_type;
+		entry->dccphrx_ndp   = ndp;
+		entry->dccphrx_tstamp = ktime_get_real();
+	}
 
-	kmem_cache_destroy(hist->dccprxh_slab);
-	kfree(name);
-	kfree(hist);
+	return entry;
 }
+EXPORT_SYMBOL_GPL(dccp_rx_hist_entry_new);
 
-EXPORT_SYMBOL_GPL(dccp_rx_hist_delete);
+static inline void dccp_rx_hist_entry_delete(struct dccp_rx_hist_entry *entry)
+{
+	kmem_cache_free(tfrc_rx_hist_slab, entry);
+}
 
 int dccp_rx_hist_find_entry(const struct list_head *list, const u64 seq,
 			    u8 *ccval)
@@ -192,11 +177,10 @@ struct dccp_rx_hist_entry *
 
 EXPORT_SYMBOL_GPL(dccp_rx_hist_find_data_packet);
 
-void dccp_rx_hist_add_packet(struct dccp_rx_hist *hist,
-			    struct list_head *rx_list,
-			    struct list_head *li_list,
-			    struct dccp_rx_hist_entry *packet,
-			    u64 nonloss_seqno)
+void dccp_rx_hist_add_packet(struct list_head *rx_list,
+			     struct list_head *li_list,
+			     struct dccp_rx_hist_entry *packet,
+			     u64 nonloss_seqno)
 {
 	struct dccp_rx_hist_entry *entry, *next;
 	u8 num_later = 0;
@@ -211,7 +195,7 @@ void dccp_rx_hist_add_packet(struct dccp_rx_hist *hist,
 				if (after48(nonloss_seqno,
 				   entry->dccphrx_seqno)) {
 					list_del_init(&entry->dccphrx_node);
-					dccp_rx_hist_entry_delete(hist, entry);
+					dccp_rx_hist_entry_delete(entry);
 				}
 			} else if (dccp_rx_hist_entry_data_packet(entry))
 				--num_later;
@@ -253,7 +237,7 @@ void dccp_rx_hist_add_packet(struct dccp_rx_hist *hist,
 					break;
 				case 3:
 					list_del_init(&entry->dccphrx_node);
-					dccp_rx_hist_entry_delete(hist, entry);
+					dccp_rx_hist_entry_delete(entry);
 					break;
 				}
 			} else if (dccp_rx_hist_entry_data_packet(entry))
@@ -264,13 +248,13 @@ void dccp_rx_hist_add_packet(struct dccp_rx_hist *hist,
 
 EXPORT_SYMBOL_GPL(dccp_rx_hist_add_packet);
 
-void dccp_rx_hist_purge(struct dccp_rx_hist *hist, struct list_head *list)
+void dccp_rx_hist_purge(struct list_head *list)
 {
 	struct dccp_rx_hist_entry *entry, *next;
 
 	list_for_each_entry_safe(entry, next, list, dccphrx_node) {
 		list_del_init(&entry->dccphrx_node);
-		kmem_cache_free(hist->dccprxh_slab, entry);
+		dccp_rx_hist_entry_delete(entry);
 	}
 }
 
@@ -281,8 +265,22 @@ __init int packet_history_init(void)
 	tfrc_tx_hist_slab = kmem_cache_create("tfrc_tx_hist",
 					      sizeof(struct tfrc_tx_hist_entry), 0,
 					      SLAB_HWCACHE_ALIGN, NULL);
+	if (tfrc_tx_hist_slab == NULL)
+		goto out_err;
 
-	return tfrc_tx_hist_slab == NULL ? -ENOBUFS : 0;
+	tfrc_rx_hist_slab = kmem_cache_create("tfrc_rx_hist",
+					      sizeof(struct dccp_rx_hist_entry), 0,
+					      SLAB_HWCACHE_ALIGN, NULL);
+	if (tfrc_rx_hist_slab == NULL)
+		goto out_free_tx;
+
+	return 0;
+
+out_free_tx:
+	kmem_cache_destroy(tfrc_tx_hist_slab);
+	tfrc_tx_hist_slab = NULL;
+out_err:
+	return -ENOBUFS;
 }
 
 void packet_history_exit(void)
@@ -291,4 +289,9 @@ void packet_history_exit(void)
 		kmem_cache_destroy(tfrc_tx_hist_slab);
 		tfrc_tx_hist_slab = NULL;
 	}
+
+	if (tfrc_rx_hist_slab != NULL) {
+		kmem_cache_destroy(tfrc_rx_hist_slab);
+		tfrc_rx_hist_slab = NULL;
+	}
 }
diff --git a/net/dccp/ccids/lib/packet_history.h b/net/dccp/ccids/lib/packet_history.h
index 9a2642ed684..34b180b0eda 100644
--- a/net/dccp/ccids/lib/packet_history.h
+++ b/net/dccp/ccids/lib/packet_history.h
@@ -66,34 +66,10 @@ struct dccp_rx_hist_entry {
 	ktime_t		 dccphrx_tstamp;
 };
 
-struct dccp_rx_hist {
-	struct kmem_cache *dccprxh_slab;
-};
-
-extern struct dccp_rx_hist *dccp_rx_hist_new(const char *name);
-extern void 		dccp_rx_hist_delete(struct dccp_rx_hist *hist);
-
-static inline struct dccp_rx_hist_entry *
-			dccp_rx_hist_entry_new(struct dccp_rx_hist *hist,
-					       const u32 ndp,
+extern struct dccp_rx_hist_entry *
+			dccp_rx_hist_entry_new(const u32 ndp,
 					       const struct sk_buff *skb,
-					       const gfp_t prio)
-{
-	struct dccp_rx_hist_entry *entry = kmem_cache_alloc(hist->dccprxh_slab,
-							    prio);
-
-	if (entry != NULL) {
-		const struct dccp_hdr *dh = dccp_hdr(skb);
-
-		entry->dccphrx_seqno = DCCP_SKB_CB(skb)->dccpd_seq;
-		entry->dccphrx_ccval = dh->dccph_ccval;
-		entry->dccphrx_type  = dh->dccph_type;
-		entry->dccphrx_ndp   = ndp;
-		entry->dccphrx_tstamp = ktime_get_real();
-	}
-
-	return entry;
-}
+					       const gfp_t prio);
 
 static inline struct dccp_rx_hist_entry *
 			dccp_rx_hist_head(struct list_head *list)
@@ -111,21 +87,12 @@ extern int dccp_rx_hist_find_entry(const struct list_head *list, const u64 seq,
 extern struct dccp_rx_hist_entry *
 		dccp_rx_hist_find_data_packet(const struct list_head *list);
 
-extern void dccp_rx_hist_add_packet(struct dccp_rx_hist *hist,
-				    struct list_head *rx_list,
+extern void dccp_rx_hist_add_packet(struct list_head *rx_list,
 				    struct list_head *li_list,
 				    struct dccp_rx_hist_entry *packet,
 				    u64 nonloss_seqno);
 
-static inline void dccp_rx_hist_entry_delete(struct dccp_rx_hist *hist,
-					     struct dccp_rx_hist_entry *entry)
-{
-	if (entry != NULL)
-		kmem_cache_free(hist->dccprxh_slab, entry);
-}
-
-extern void dccp_rx_hist_purge(struct dccp_rx_hist *hist,
-			       struct list_head *list);
+extern void dccp_rx_hist_purge(struct list_head *list);
 
 static inline int
 	dccp_rx_hist_entry_data_packet(const struct dccp_rx_hist_entry *entry)
-- 
cgit v1.2.3


From d58d1af03a6a3ddf296ae3aeb4ff234af4b15958 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Thu, 6 Dec 2007 12:28:39 -0200
Subject: [TFRC]: Rename dccp_rx_ to tfrc_rx_

This is in preparation for merging the new rx history code written by Gerrit Renker.

Signed-off-by: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/dccp/ccids/ccid3.c              | 32 ++++++-------
 net/dccp/ccids/lib/loss_interval.c  | 14 +++---
 net/dccp/ccids/lib/packet_history.c | 90 ++++++++++++++++++-------------------
 net/dccp/ccids/lib/packet_history.h | 48 ++++++++++----------
 4 files changed, 92 insertions(+), 92 deletions(-)

(limited to 'net')

diff --git a/net/dccp/ccids/ccid3.c b/net/dccp/ccids/ccid3.c
index 2ba0a7c470d..e0f87f22719 100644
--- a/net/dccp/ccids/ccid3.c
+++ b/net/dccp/ccids/ccid3.c
@@ -677,7 +677,7 @@ static void ccid3_hc_rx_send_feedback(struct sock *sk)
 {
 	struct ccid3_hc_rx_sock *hcrx = ccid3_hc_rx_sk(sk);
 	struct dccp_sock *dp = dccp_sk(sk);
-	struct dccp_rx_hist_entry *packet;
+	struct tfrc_rx_hist_entry *packet;
 	ktime_t now;
 	suseconds_t delta;
 
@@ -701,7 +701,7 @@ static void ccid3_hc_rx_send_feedback(struct sock *sk)
 		return;
 	}
 
-	packet = dccp_rx_hist_find_data_packet(&hcrx->ccid3hcrx_hist);
+	packet = tfrc_rx_hist_find_data_packet(&hcrx->ccid3hcrx_hist);
 	if (unlikely(packet == NULL)) {
 		DCCP_WARN("%s(%p), no data packet in history!\n",
 			  dccp_role(sk), sk);
@@ -709,7 +709,7 @@ static void ccid3_hc_rx_send_feedback(struct sock *sk)
 	}
 
 	hcrx->ccid3hcrx_tstamp_last_feedback = now;
-	hcrx->ccid3hcrx_ccval_last_counter   = packet->dccphrx_ccval;
+	hcrx->ccid3hcrx_ccval_last_counter   = packet->tfrchrx_ccval;
 	hcrx->ccid3hcrx_bytes_recv	     = 0;
 
 	if (hcrx->ccid3hcrx_p == 0)
@@ -752,12 +752,12 @@ static int ccid3_hc_rx_insert_options(struct sock *sk, struct sk_buff *skb)
 }
 
 static int ccid3_hc_rx_detect_loss(struct sock *sk,
-				    struct dccp_rx_hist_entry *packet)
+				    struct tfrc_rx_hist_entry *packet)
 {
 	struct ccid3_hc_rx_sock *hcrx = ccid3_hc_rx_sk(sk);
-	struct dccp_rx_hist_entry *rx_hist =
-				dccp_rx_hist_head(&hcrx->ccid3hcrx_hist);
-	u64 seqno = packet->dccphrx_seqno;
+	struct tfrc_rx_hist_entry *rx_hist =
+				tfrc_rx_hist_head(&hcrx->ccid3hcrx_hist);
+	u64 seqno = packet->tfrchrx_seqno;
 	u64 tmp_seqno;
 	int loss = 0;
 	u8 ccval;
@@ -766,9 +766,9 @@ static int ccid3_hc_rx_detect_loss(struct sock *sk,
 	tmp_seqno = hcrx->ccid3hcrx_seqno_nonloss;
 
 	if (!rx_hist ||
-	   follows48(packet->dccphrx_seqno, hcrx->ccid3hcrx_seqno_nonloss)) {
+	   follows48(packet->tfrchrx_seqno, hcrx->ccid3hcrx_seqno_nonloss)) {
 		hcrx->ccid3hcrx_seqno_nonloss = seqno;
-		hcrx->ccid3hcrx_ccval_nonloss = packet->dccphrx_ccval;
+		hcrx->ccid3hcrx_ccval_nonloss = packet->tfrchrx_ccval;
 		goto detect_out;
 	}
 
@@ -789,7 +789,7 @@ static int ccid3_hc_rx_detect_loss(struct sock *sk,
 		dccp_inc_seqno(&tmp_seqno);
 		hcrx->ccid3hcrx_seqno_nonloss = tmp_seqno;
 		dccp_inc_seqno(&tmp_seqno);
-		while (dccp_rx_hist_find_entry(&hcrx->ccid3hcrx_hist,
+		while (tfrc_rx_hist_find_entry(&hcrx->ccid3hcrx_hist,
 		   tmp_seqno, &ccval)) {
 			hcrx->ccid3hcrx_seqno_nonloss = tmp_seqno;
 			hcrx->ccid3hcrx_ccval_nonloss = ccval;
@@ -799,13 +799,13 @@ static int ccid3_hc_rx_detect_loss(struct sock *sk,
 
 	/* FIXME - this code could be simplified with above while */
 	/* but works at moment */
-	if (follows48(packet->dccphrx_seqno, hcrx->ccid3hcrx_seqno_nonloss)) {
+	if (follows48(packet->tfrchrx_seqno, hcrx->ccid3hcrx_seqno_nonloss)) {
 		hcrx->ccid3hcrx_seqno_nonloss = seqno;
-		hcrx->ccid3hcrx_ccval_nonloss = packet->dccphrx_ccval;
+		hcrx->ccid3hcrx_ccval_nonloss = packet->tfrchrx_ccval;
 	}
 
 detect_out:
-	dccp_rx_hist_add_packet(&hcrx->ccid3hcrx_hist,
+	tfrc_rx_hist_add_packet(&hcrx->ccid3hcrx_hist,
 				&hcrx->ccid3hcrx_li_hist, packet,
 				hcrx->ccid3hcrx_seqno_nonloss);
 	return loss;
@@ -815,7 +815,7 @@ static void ccid3_hc_rx_packet_recv(struct sock *sk, struct sk_buff *skb)
 {
 	struct ccid3_hc_rx_sock *hcrx = ccid3_hc_rx_sk(sk);
 	const struct dccp_options_received *opt_recv;
-	struct dccp_rx_hist_entry *packet;
+	struct tfrc_rx_hist_entry *packet;
 	u32 p_prev, r_sample, rtt_prev;
 	int loss, payload_size;
 	ktime_t now;
@@ -850,7 +850,7 @@ static void ccid3_hc_rx_packet_recv(struct sock *sk, struct sk_buff *skb)
 		return;
 	}
 
-	packet = dccp_rx_hist_entry_new(opt_recv->dccpor_ndp, skb, GFP_ATOMIC);
+	packet = tfrc_rx_hist_entry_new(opt_recv->dccpor_ndp, skb, GFP_ATOMIC);
 	if (unlikely(packet == NULL)) {
 		DCCP_WARN("%s(%p), Not enough mem to add rx packet "
 			  "to history, consider it lost!\n", dccp_role(sk), sk);
@@ -933,7 +933,7 @@ static void ccid3_hc_rx_exit(struct sock *sk)
 	ccid3_hc_rx_set_state(sk, TFRC_RSTATE_TERM);
 
 	/* Empty packet history */
-	dccp_rx_hist_purge(&hcrx->ccid3hcrx_hist);
+	tfrc_rx_hist_purge(&hcrx->ccid3hcrx_hist);
 
 	/* Empty loss interval history */
 	dccp_li_hist_purge(&hcrx->ccid3hcrx_li_hist);
diff --git a/net/dccp/ccids/lib/loss_interval.c b/net/dccp/ccids/lib/loss_interval.c
index f2ca4eb74dd..a5f59af8df4 100644
--- a/net/dccp/ccids/lib/loss_interval.c
+++ b/net/dccp/ccids/lib/loss_interval.c
@@ -129,7 +129,7 @@ static u32 dccp_li_calc_first_li(struct sock *sk,
 				 u16 s, u32 bytes_recv,
 				 u32 previous_x_recv)
 {
-	struct dccp_rx_hist_entry *entry, *next, *tail = NULL;
+	struct tfrc_rx_hist_entry *entry, *next, *tail = NULL;
 	u32 x_recv, p;
 	suseconds_t rtt, delta;
 	ktime_t tstamp = ktime_set(0, 0);
@@ -138,18 +138,18 @@ static u32 dccp_li_calc_first_li(struct sock *sk,
 	int step = 0;
 	u64 fval;
 
-	list_for_each_entry_safe(entry, next, hist_list, dccphrx_node) {
-		if (dccp_rx_hist_entry_data_packet(entry)) {
+	list_for_each_entry_safe(entry, next, hist_list, tfrchrx_node) {
+		if (tfrc_rx_hist_entry_data_packet(entry)) {
 			tail = entry;
 
 			switch (step) {
 			case 0:
-				tstamp	  = entry->dccphrx_tstamp;
-				win_count = entry->dccphrx_ccval;
+				tstamp	  = entry->tfrchrx_tstamp;
+				win_count = entry->tfrchrx_ccval;
 				step = 1;
 				break;
 			case 1:
-				interval = win_count - entry->dccphrx_ccval;
+				interval = win_count - entry->tfrchrx_ccval;
 				if (interval < 0)
 					interval += TFRC_WIN_COUNT_LIMIT;
 				if (interval > 4)
@@ -176,7 +176,7 @@ found:
 		return ~0;
 	}
 
-	delta = ktime_us_delta(tstamp, tail->dccphrx_tstamp);
+	delta = ktime_us_delta(tstamp, tail->tfrchrx_tstamp);
 	DCCP_BUG_ON(delta < 0);
 
 	rtt = delta * 4 / interval;
diff --git a/net/dccp/ccids/lib/packet_history.c b/net/dccp/ccids/lib/packet_history.c
index e1ab853c38d..255cca1ca73 100644
--- a/net/dccp/ccids/lib/packet_history.c
+++ b/net/dccp/ccids/lib/packet_history.c
@@ -116,58 +116,58 @@ EXPORT_SYMBOL_GPL(tfrc_tx_hist_rtt);
  */
 static struct kmem_cache *tfrc_rx_hist_slab;
 
-struct dccp_rx_hist_entry *dccp_rx_hist_entry_new(const u32 ndp,
+struct tfrc_rx_hist_entry *tfrc_rx_hist_entry_new(const u32 ndp,
 						  const struct sk_buff *skb,
 						  const gfp_t prio)
 {
-	struct dccp_rx_hist_entry *entry = kmem_cache_alloc(tfrc_rx_hist_slab,
+	struct tfrc_rx_hist_entry *entry = kmem_cache_alloc(tfrc_rx_hist_slab,
 							    prio);
 
 	if (entry != NULL) {
 		const struct dccp_hdr *dh = dccp_hdr(skb);
 
-		entry->dccphrx_seqno = DCCP_SKB_CB(skb)->dccpd_seq;
-		entry->dccphrx_ccval = dh->dccph_ccval;
-		entry->dccphrx_type  = dh->dccph_type;
-		entry->dccphrx_ndp   = ndp;
-		entry->dccphrx_tstamp = ktime_get_real();
+		entry->tfrchrx_seqno = DCCP_SKB_CB(skb)->dccpd_seq;
+		entry->tfrchrx_ccval = dh->dccph_ccval;
+		entry->tfrchrx_type  = dh->dccph_type;
+		entry->tfrchrx_ndp   = ndp;
+		entry->tfrchrx_tstamp = ktime_get_real();
 	}
 
 	return entry;
 }
-EXPORT_SYMBOL_GPL(dccp_rx_hist_entry_new);
+EXPORT_SYMBOL_GPL(tfrc_rx_hist_entry_new);
 
-static inline void dccp_rx_hist_entry_delete(struct dccp_rx_hist_entry *entry)
+static inline void tfrc_rx_hist_entry_delete(struct tfrc_rx_hist_entry *entry)
 {
 	kmem_cache_free(tfrc_rx_hist_slab, entry);
 }
 
-int dccp_rx_hist_find_entry(const struct list_head *list, const u64 seq,
+int tfrc_rx_hist_find_entry(const struct list_head *list, const u64 seq,
 			    u8 *ccval)
 {
-	struct dccp_rx_hist_entry *packet = NULL, *entry;
+	struct tfrc_rx_hist_entry *packet = NULL, *entry;
 
-	list_for_each_entry(entry, list, dccphrx_node)
-		if (entry->dccphrx_seqno == seq) {
+	list_for_each_entry(entry, list, tfrchrx_node)
+		if (entry->tfrchrx_seqno == seq) {
 			packet = entry;
 			break;
 		}
 
 	if (packet)
-		*ccval = packet->dccphrx_ccval;
+		*ccval = packet->tfrchrx_ccval;
 
 	return packet != NULL;
 }
 
-EXPORT_SYMBOL_GPL(dccp_rx_hist_find_entry);
-struct dccp_rx_hist_entry *
-		dccp_rx_hist_find_data_packet(const struct list_head *list)
+EXPORT_SYMBOL_GPL(tfrc_rx_hist_find_entry);
+struct tfrc_rx_hist_entry *
+		tfrc_rx_hist_find_data_packet(const struct list_head *list)
 {
-	struct dccp_rx_hist_entry *entry, *packet = NULL;
+	struct tfrc_rx_hist_entry *entry, *packet = NULL;
 
-	list_for_each_entry(entry, list, dccphrx_node)
-		if (entry->dccphrx_type == DCCP_PKT_DATA ||
-		    entry->dccphrx_type == DCCP_PKT_DATAACK) {
+	list_for_each_entry(entry, list, tfrchrx_node)
+		if (entry->tfrchrx_type == DCCP_PKT_DATA ||
+		    entry->tfrchrx_type == DCCP_PKT_DATAACK) {
 			packet = entry;
 			break;
 		}
@@ -175,29 +175,29 @@ struct dccp_rx_hist_entry *
 	return packet;
 }
 
-EXPORT_SYMBOL_GPL(dccp_rx_hist_find_data_packet);
+EXPORT_SYMBOL_GPL(tfrc_rx_hist_find_data_packet);
 
-void dccp_rx_hist_add_packet(struct list_head *rx_list,
+void tfrc_rx_hist_add_packet(struct list_head *rx_list,
 			     struct list_head *li_list,
-			     struct dccp_rx_hist_entry *packet,
+			     struct tfrc_rx_hist_entry *packet,
 			     u64 nonloss_seqno)
 {
-	struct dccp_rx_hist_entry *entry, *next;
+	struct tfrc_rx_hist_entry *entry, *next;
 	u8 num_later = 0;
 
-	list_add(&packet->dccphrx_node, rx_list);
+	list_add(&packet->tfrchrx_node, rx_list);
 
 	num_later = TFRC_RECV_NUM_LATE_LOSS + 1;
 
 	if (!list_empty(li_list)) {
-		list_for_each_entry_safe(entry, next, rx_list, dccphrx_node) {
+		list_for_each_entry_safe(entry, next, rx_list, tfrchrx_node) {
 			if (num_later == 0) {
 				if (after48(nonloss_seqno,
-				   entry->dccphrx_seqno)) {
-					list_del_init(&entry->dccphrx_node);
-					dccp_rx_hist_entry_delete(entry);
+				   entry->tfrchrx_seqno)) {
+					list_del_init(&entry->tfrchrx_node);
+					tfrc_rx_hist_entry_delete(entry);
 				}
-			} else if (dccp_rx_hist_entry_data_packet(entry))
+			} else if (tfrc_rx_hist_entry_data_packet(entry))
 				--num_later;
 		}
 	} else {
@@ -208,7 +208,7 @@ void dccp_rx_hist_add_packet(struct list_head *rx_list,
 		 * We have no loss interval history so we need at least one
 		 * rtt:s of data packets to approximate rtt.
 		 */
-		list_for_each_entry_safe(entry, next, rx_list, dccphrx_node) {
+		list_for_each_entry_safe(entry, next, rx_list, tfrchrx_node) {
 			if (num_later == 0) {
 				switch (step) {
 				case 0:
@@ -220,10 +220,10 @@ void dccp_rx_hist_add_packet(struct list_head *rx_list,
 					step = 2;
 					/* OK, find next data packet */
 					num_later = 1;
-					win_count = entry->dccphrx_ccval;
+					win_count = entry->tfrchrx_ccval;
 					break;
 				case 2:
-					tmp = win_count - entry->dccphrx_ccval;
+					tmp = win_count - entry->tfrchrx_ccval;
 					if (tmp < 0)
 						tmp += TFRC_WIN_COUNT_LIMIT;
 					if (tmp > TFRC_WIN_COUNT_PER_RTT + 1) {
@@ -236,29 +236,29 @@ void dccp_rx_hist_add_packet(struct list_head *rx_list,
 						num_later = 1;
 					break;
 				case 3:
-					list_del_init(&entry->dccphrx_node);
-					dccp_rx_hist_entry_delete(entry);
+					list_del_init(&entry->tfrchrx_node);
+					tfrc_rx_hist_entry_delete(entry);
 					break;
 				}
-			} else if (dccp_rx_hist_entry_data_packet(entry))
+			} else if (tfrc_rx_hist_entry_data_packet(entry))
 				--num_later;
 		}
 	}
 }
 
-EXPORT_SYMBOL_GPL(dccp_rx_hist_add_packet);
+EXPORT_SYMBOL_GPL(tfrc_rx_hist_add_packet);
 
-void dccp_rx_hist_purge(struct list_head *list)
+void tfrc_rx_hist_purge(struct list_head *list)
 {
-	struct dccp_rx_hist_entry *entry, *next;
+	struct tfrc_rx_hist_entry *entry, *next;
 
-	list_for_each_entry_safe(entry, next, list, dccphrx_node) {
-		list_del_init(&entry->dccphrx_node);
-		dccp_rx_hist_entry_delete(entry);
+	list_for_each_entry_safe(entry, next, list, tfrchrx_node) {
+		list_del_init(&entry->tfrchrx_node);
+		tfrc_rx_hist_entry_delete(entry);
 	}
 }
 
-EXPORT_SYMBOL_GPL(dccp_rx_hist_purge);
+EXPORT_SYMBOL_GPL(tfrc_rx_hist_purge);
 
 __init int packet_history_init(void)
 {
@@ -269,7 +269,7 @@ __init int packet_history_init(void)
 		goto out_err;
 
 	tfrc_rx_hist_slab = kmem_cache_create("tfrc_rx_hist",
-					      sizeof(struct dccp_rx_hist_entry), 0,
+					      sizeof(struct tfrc_rx_hist_entry), 0,
 					      SLAB_HWCACHE_ALIGN, NULL);
 	if (tfrc_rx_hist_slab == NULL)
 		goto out_free_tx;
diff --git a/net/dccp/ccids/lib/packet_history.h b/net/dccp/ccids/lib/packet_history.h
index 34b180b0eda..5b0b9834340 100644
--- a/net/dccp/ccids/lib/packet_history.h
+++ b/net/dccp/ccids/lib/packet_history.h
@@ -57,51 +57,51 @@ extern u32  tfrc_tx_hist_rtt(struct tfrc_tx_hist_entry *head,
 /*
  * 	Receiver History data structures and declarations
  */
-struct dccp_rx_hist_entry {
-	struct list_head dccphrx_node;
-	u64		 dccphrx_seqno:48,
-			 dccphrx_ccval:4,
-			 dccphrx_type:4;
-	u32		 dccphrx_ndp; /* In fact it is from 8 to 24 bits */
-	ktime_t		 dccphrx_tstamp;
+struct tfrc_rx_hist_entry {
+	struct list_head tfrchrx_node;
+	u64		 tfrchrx_seqno:48,
+			 tfrchrx_ccval:4,
+			 tfrchrx_type:4;
+	u32		 tfrchrx_ndp; /* In fact it is from 8 to 24 bits */
+	ktime_t		 tfrchrx_tstamp;
 };
 
-extern struct dccp_rx_hist_entry *
-			dccp_rx_hist_entry_new(const u32 ndp,
+extern struct tfrc_rx_hist_entry *
+			tfrc_rx_hist_entry_new(const u32 ndp,
 					       const struct sk_buff *skb,
 					       const gfp_t prio);
 
-static inline struct dccp_rx_hist_entry *
-			dccp_rx_hist_head(struct list_head *list)
+static inline struct tfrc_rx_hist_entry *
+			tfrc_rx_hist_head(struct list_head *list)
 {
-	struct dccp_rx_hist_entry *head = NULL;
+	struct tfrc_rx_hist_entry *head = NULL;
 
 	if (!list_empty(list))
-		head = list_entry(list->next, struct dccp_rx_hist_entry,
-				  dccphrx_node);
+		head = list_entry(list->next, struct tfrc_rx_hist_entry,
+				  tfrchrx_node);
 	return head;
 }
 
-extern int dccp_rx_hist_find_entry(const struct list_head *list, const u64 seq,
+extern int tfrc_rx_hist_find_entry(const struct list_head *list, const u64 seq,
 				   u8 *ccval);
-extern struct dccp_rx_hist_entry *
-		dccp_rx_hist_find_data_packet(const struct list_head *list);
+extern struct tfrc_rx_hist_entry *
+		tfrc_rx_hist_find_data_packet(const struct list_head *list);
 
-extern void dccp_rx_hist_add_packet(struct list_head *rx_list,
+extern void tfrc_rx_hist_add_packet(struct list_head *rx_list,
 				    struct list_head *li_list,
-				    struct dccp_rx_hist_entry *packet,
+				    struct tfrc_rx_hist_entry *packet,
 				    u64 nonloss_seqno);
 
-extern void dccp_rx_hist_purge(struct list_head *list);
+extern void tfrc_rx_hist_purge(struct list_head *list);
 
 static inline int
-	dccp_rx_hist_entry_data_packet(const struct dccp_rx_hist_entry *entry)
+	tfrc_rx_hist_entry_data_packet(const struct tfrc_rx_hist_entry *entry)
 {
-	return entry->dccphrx_type == DCCP_PKT_DATA ||
-	       entry->dccphrx_type == DCCP_PKT_DATAACK;
+	return entry->tfrchrx_type == DCCP_PKT_DATA ||
+	       entry->tfrchrx_type == DCCP_PKT_DATAACK;
 }
 
-extern u64 dccp_rx_hist_detect_loss(struct list_head *rx_list,
+extern u64 tfrc_rx_hist_detect_loss(struct list_head *rx_list,
 				    struct list_head *li_list, u8 *win_loss);
 
 #endif /* _DCCP_PKT_HIST_ */
-- 
cgit v1.2.3


From 30a0eacd479f1c7c15fe0496585ff29f76de3378 Mon Sep 17 00:00:00 2001
From: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Date: Thu, 6 Dec 2007 12:29:07 -0200
Subject: [CCID3]: The receiver of a half-connection does not set window
 counter values

Only the sender sets window counters [RFC 4342, sections 5 and 8.1].

Signed-off-by: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Signed-off-by: Ian McDonald <ian.mcdonald@jandi.co.nz>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/dccp/ccids/ccid3.c | 1 -
 1 file changed, 1 deletion(-)

(limited to 'net')

diff --git a/net/dccp/ccids/ccid3.c b/net/dccp/ccids/ccid3.c
index e0f87f22719..f5cfc2e2d7b 100644
--- a/net/dccp/ccids/ccid3.c
+++ b/net/dccp/ccids/ccid3.c
@@ -733,7 +733,6 @@ static int ccid3_hc_rx_insert_options(struct sock *sk, struct sk_buff *skb)
 		return 0;
 
 	hcrx = ccid3_hc_rx_sk(sk);
-	DCCP_SKB_CB(skb)->dccpd_ccval = hcrx->ccid3hcrx_ccval_last_counter;
 
 	if (dccp_packet_without_ack(skb))
 		return 0;
-- 
cgit v1.2.3


From b84a2189c4e1835c51fd6b974a0497be9bc4ba87 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Thu, 6 Dec 2007 13:18:11 -0200
Subject: [TFRC]: New rx history code

Credit here goes to Gerrit Renker, that provided the initial implementation for
this new codebase.

I modified it just to try to make it closer to the existing API, renaming some
functions, add namespacing and fix one bug where the tfrc_rx_hist_alloc was not
freeing the allocated ring entries on the error path.

Original changeset comment from Gerrit:
      -----------
This provides a new, self-contained and generic RX history service for TFRC
based protocols.

Details:
 * new data structure, initialisation and cleanup routines;
 * allocation of dccp_rx_hist entries local to packet_history.c,
   as a service exported by the dccp_tfrc_lib module.
 * interface to automatically track highest-received seqno;
 * receiver-based RTT estimation (needed for instance by RFC 3448, 6.3.1);
 * a generic function to test for `data packets' as per  RFC 4340, sec. 7.7.

Signed-off-by: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/dccp/ccids/ccid3.c              | 282 ++++++++++++-----------------------
 net/dccp/ccids/ccid3.h              |  14 +-
 net/dccp/ccids/lib/loss_interval.c  |  13 +-
 net/dccp/ccids/lib/packet_history.c | 290 ++++++++++++++++++++++--------------
 net/dccp/ccids/lib/packet_history.h |  83 +++++------
 5 files changed, 327 insertions(+), 355 deletions(-)

(limited to 'net')

diff --git a/net/dccp/ccids/ccid3.c b/net/dccp/ccids/ccid3.c
index f5cfc2e2d7b..bf95c3292d5 100644
--- a/net/dccp/ccids/ccid3.c
+++ b/net/dccp/ccids/ccid3.c
@@ -641,6 +641,15 @@ static int ccid3_hc_tx_getsockopt(struct sock *sk, const int optname, int len,
 /*
  *	Receiver Half-Connection Routines
  */
+
+/* CCID3 feedback types */
+enum ccid3_fback_type {
+	CCID3_FBACK_NONE = 0,
+	CCID3_FBACK_INITIAL,
+	CCID3_FBACK_PERIODIC,
+	CCID3_FBACK_PARAM_CHANGE
+};
+
 #ifdef CONFIG_IP_DCCP_CCID3_DEBUG
 static const char *ccid3_rx_state_name(enum ccid3_hc_rx_states state)
 {
@@ -667,59 +676,60 @@ static void ccid3_hc_rx_set_state(struct sock *sk,
 	hcrx->ccid3hcrx_state = state;
 }
 
-static inline void ccid3_hc_rx_update_s(struct ccid3_hc_rx_sock *hcrx, int len)
-{
-	if (likely(len > 0))	/* don't update on empty packets (e.g. ACKs) */
-		hcrx->ccid3hcrx_s = tfrc_ewma(hcrx->ccid3hcrx_s, len, 9);
-}
-
-static void ccid3_hc_rx_send_feedback(struct sock *sk)
+static void ccid3_hc_rx_send_feedback(struct sock *sk,
+				      const struct sk_buff *skb,
+				      enum ccid3_fback_type fbtype)
 {
 	struct ccid3_hc_rx_sock *hcrx = ccid3_hc_rx_sk(sk);
 	struct dccp_sock *dp = dccp_sk(sk);
-	struct tfrc_rx_hist_entry *packet;
 	ktime_t now;
-	suseconds_t delta;
+	s64 delta = 0;
 
 	ccid3_pr_debug("%s(%p) - entry \n", dccp_role(sk), sk);
 
+	if (unlikely(hcrx->ccid3hcrx_state == TFRC_RSTATE_TERM))
+		return;
+
 	now = ktime_get_real();
 
-	switch (hcrx->ccid3hcrx_state) {
-	case TFRC_RSTATE_NO_DATA:
+	switch (fbtype) {
+	case CCID3_FBACK_INITIAL:
 		hcrx->ccid3hcrx_x_recv = 0;
+		hcrx->ccid3hcrx_pinv   = ~0U;   /* see RFC 4342, 8.5 */
 		break;
-	case TFRC_RSTATE_DATA:
-		delta = ktime_us_delta(now,
-				       hcrx->ccid3hcrx_tstamp_last_feedback);
-		DCCP_BUG_ON(delta < 0);
-		hcrx->ccid3hcrx_x_recv =
-			scaled_div32(hcrx->ccid3hcrx_bytes_recv, delta);
+	case CCID3_FBACK_PARAM_CHANGE:
+		/*
+		 * When parameters change (new loss or p > p_prev), we do not
+		 * have a reliable estimate for R_m of [RFC 3448, 6.2] and so
+		 * need to  reuse the previous value of X_recv. However, when
+		 * X_recv was 0 (due to early loss), this would kill X down to
+		 * s/t_mbi (i.e. one packet in 64 seconds).
+		 * To avoid such drastic reduction, we approximate X_recv as
+		 * the number of bytes since last feedback.
+		 * This is a safe fallback, since X is bounded above by X_calc.
+		 */
+		if (hcrx->ccid3hcrx_x_recv > 0)
+			break;
+		/* fall through */
+	case CCID3_FBACK_PERIODIC:
+		delta = ktime_us_delta(now, hcrx->ccid3hcrx_tstamp_last_feedback);
+		if (delta <= 0)
+			DCCP_BUG("delta (%ld) <= 0", (long)delta);
+		else
+			hcrx->ccid3hcrx_x_recv =
+				scaled_div32(hcrx->ccid3hcrx_bytes_recv, delta);
 		break;
-	case TFRC_RSTATE_TERM:
-		DCCP_BUG("%s(%p) - Illegal state TERM", dccp_role(sk), sk);
+	default:
 		return;
 	}
 
-	packet = tfrc_rx_hist_find_data_packet(&hcrx->ccid3hcrx_hist);
-	if (unlikely(packet == NULL)) {
-		DCCP_WARN("%s(%p), no data packet in history!\n",
-			  dccp_role(sk), sk);
-		return;
-	}
+	ccid3_pr_debug("Interval %ldusec, X_recv=%u, 1/p=%u\n", (long)delta,
+		       hcrx->ccid3hcrx_x_recv, hcrx->ccid3hcrx_pinv);
 
 	hcrx->ccid3hcrx_tstamp_last_feedback = now;
-	hcrx->ccid3hcrx_ccval_last_counter   = packet->tfrchrx_ccval;
+	hcrx->ccid3hcrx_last_counter	     = dccp_hdr(skb)->dccph_ccval;
 	hcrx->ccid3hcrx_bytes_recv	     = 0;
 
-	if (hcrx->ccid3hcrx_p == 0)
-		hcrx->ccid3hcrx_pinv = ~0U;	/* see RFC 4342, 8.5 */
-	else if (hcrx->ccid3hcrx_p > 1000000) {
-		DCCP_WARN("p (%u) > 100%%\n", hcrx->ccid3hcrx_p);
-		hcrx->ccid3hcrx_pinv = 1;	/* use 100% in this case */
-	} else
-		hcrx->ccid3hcrx_pinv = 1000000 / hcrx->ccid3hcrx_p;
-
 	dp->dccps_hc_rx_insert_options = 1;
 	dccp_send_ack(sk);
 }
@@ -750,165 +760,74 @@ static int ccid3_hc_rx_insert_options(struct sock *sk, struct sk_buff *skb)
 	return 0;
 }
 
-static int ccid3_hc_rx_detect_loss(struct sock *sk,
-				    struct tfrc_rx_hist_entry *packet)
+static void ccid3_hc_rx_packet_recv(struct sock *sk, struct sk_buff *skb)
 {
 	struct ccid3_hc_rx_sock *hcrx = ccid3_hc_rx_sk(sk);
-	struct tfrc_rx_hist_entry *rx_hist =
-				tfrc_rx_hist_head(&hcrx->ccid3hcrx_hist);
-	u64 seqno = packet->tfrchrx_seqno;
-	u64 tmp_seqno;
-	int loss = 0;
-	u8 ccval;
-
-
-	tmp_seqno = hcrx->ccid3hcrx_seqno_nonloss;
-
-	if (!rx_hist ||
-	   follows48(packet->tfrchrx_seqno, hcrx->ccid3hcrx_seqno_nonloss)) {
-		hcrx->ccid3hcrx_seqno_nonloss = seqno;
-		hcrx->ccid3hcrx_ccval_nonloss = packet->tfrchrx_ccval;
-		goto detect_out;
-	}
-
-
-	while (dccp_delta_seqno(hcrx->ccid3hcrx_seqno_nonloss, seqno)
-	   > TFRC_RECV_NUM_LATE_LOSS) {
-		loss = 1;
-		dccp_li_update_li(sk,
-				  &hcrx->ccid3hcrx_li_hist,
-				  &hcrx->ccid3hcrx_hist,
-				  hcrx->ccid3hcrx_tstamp_last_feedback,
-				  hcrx->ccid3hcrx_s,
-				  hcrx->ccid3hcrx_bytes_recv,
-				  hcrx->ccid3hcrx_x_recv,
-				  hcrx->ccid3hcrx_seqno_nonloss,
-				  hcrx->ccid3hcrx_ccval_nonloss);
-		tmp_seqno = hcrx->ccid3hcrx_seqno_nonloss;
-		dccp_inc_seqno(&tmp_seqno);
-		hcrx->ccid3hcrx_seqno_nonloss = tmp_seqno;
-		dccp_inc_seqno(&tmp_seqno);
-		while (tfrc_rx_hist_find_entry(&hcrx->ccid3hcrx_hist,
-		   tmp_seqno, &ccval)) {
-			hcrx->ccid3hcrx_seqno_nonloss = tmp_seqno;
-			hcrx->ccid3hcrx_ccval_nonloss = ccval;
-			dccp_inc_seqno(&tmp_seqno);
+	enum ccid3_fback_type do_feedback = CCID3_FBACK_NONE;
+	const u32 ndp = dccp_sk(sk)->dccps_options_received.dccpor_ndp;
+	const bool is_data_packet = dccp_data_packet(skb);
+
+	if (unlikely(hcrx->ccid3hcrx_state == TFRC_RSTATE_NO_DATA)) {
+		if (is_data_packet) {
+			const u32 payload = skb->len - dccp_hdr(skb)->dccph_doff * 4;
+			do_feedback = CCID3_FBACK_INITIAL;
+			ccid3_hc_rx_set_state(sk, TFRC_RSTATE_DATA);
+			hcrx->ccid3hcrx_s = payload;
+			/*
+			 * Not necessary to update ccid3hcrx_bytes_recv here,
+			 * since X_recv = 0 for the first feedback packet (cf.
+			 * RFC 3448, 6.3) -- gerrit
+			 */
 		}
+		goto update_records;
 	}
 
-	/* FIXME - this code could be simplified with above while */
-	/* but works at moment */
-	if (follows48(packet->tfrchrx_seqno, hcrx->ccid3hcrx_seqno_nonloss)) {
-		hcrx->ccid3hcrx_seqno_nonloss = seqno;
-		hcrx->ccid3hcrx_ccval_nonloss = packet->tfrchrx_ccval;
-	}
-
-detect_out:
-	tfrc_rx_hist_add_packet(&hcrx->ccid3hcrx_hist,
-				&hcrx->ccid3hcrx_li_hist, packet,
-				hcrx->ccid3hcrx_seqno_nonloss);
-	return loss;
-}
-
-static void ccid3_hc_rx_packet_recv(struct sock *sk, struct sk_buff *skb)
-{
-	struct ccid3_hc_rx_sock *hcrx = ccid3_hc_rx_sk(sk);
-	const struct dccp_options_received *opt_recv;
-	struct tfrc_rx_hist_entry *packet;
-	u32 p_prev, r_sample, rtt_prev;
-	int loss, payload_size;
-	ktime_t now;
-
-	opt_recv = &dccp_sk(sk)->dccps_options_received;
+	if (tfrc_rx_hist_duplicate(&hcrx->ccid3hcrx_hist, skb))
+		return; /* done receiving */
 
-	switch (DCCP_SKB_CB(skb)->dccpd_type) {
-	case DCCP_PKT_ACK:
-		if (hcrx->ccid3hcrx_state == TFRC_RSTATE_NO_DATA)
-			return;
-	case DCCP_PKT_DATAACK:
-		if (opt_recv->dccpor_timestamp_echo == 0)
-			break;
-		r_sample = dccp_timestamp() - opt_recv->dccpor_timestamp_echo;
-		rtt_prev = hcrx->ccid3hcrx_rtt;
-		r_sample = dccp_sample_rtt(sk, 10 * r_sample);
-
-		if (hcrx->ccid3hcrx_state == TFRC_RSTATE_NO_DATA)
-			hcrx->ccid3hcrx_rtt = r_sample;
-		else
-			hcrx->ccid3hcrx_rtt = (hcrx->ccid3hcrx_rtt * 9) / 10 +
-					      r_sample / 10;
-
-		if (rtt_prev != hcrx->ccid3hcrx_rtt)
-			ccid3_pr_debug("%s(%p), New RTT=%uus, elapsed time=%u\n",
-				       dccp_role(sk), sk, hcrx->ccid3hcrx_rtt,
-				       opt_recv->dccpor_elapsed_time);
-		break;
-	case DCCP_PKT_DATA:
-		break;
-	default: /* We're not interested in other packet types, move along */
-		return;
-	}
-
-	packet = tfrc_rx_hist_entry_new(opt_recv->dccpor_ndp, skb, GFP_ATOMIC);
-	if (unlikely(packet == NULL)) {
-		DCCP_WARN("%s(%p), Not enough mem to add rx packet "
-			  "to history, consider it lost!\n", dccp_role(sk), sk);
-		return;
+	if (is_data_packet) {
+		const u32 payload = skb->len - dccp_hdr(skb)->dccph_doff * 4;
+		/*
+		 * Update moving-average of s and the sum of received payload bytes
+		 */
+		hcrx->ccid3hcrx_s = tfrc_ewma(hcrx->ccid3hcrx_s, payload, 9);
+		hcrx->ccid3hcrx_bytes_recv += payload;
 	}
 
-	loss = ccid3_hc_rx_detect_loss(sk, packet);
-
-	if (DCCP_SKB_CB(skb)->dccpd_type == DCCP_PKT_ACK)
-		return;
-
-	payload_size = skb->len - dccp_hdr(skb)->dccph_doff * 4;
-	ccid3_hc_rx_update_s(hcrx, payload_size);
+	/*
+	 * Handle pending losses and otherwise check for new loss
+	 */
+	if (tfrc_rx_hist_new_loss_indicated(&hcrx->ccid3hcrx_hist, skb, ndp))
+		goto update_records;
 
-	switch (hcrx->ccid3hcrx_state) {
-	case TFRC_RSTATE_NO_DATA:
-		ccid3_pr_debug("%s(%p, state=%s), skb=%p, sending initial "
-			       "feedback\n", dccp_role(sk), sk,
-			       dccp_state_name(sk->sk_state), skb);
-		ccid3_hc_rx_send_feedback(sk);
-		ccid3_hc_rx_set_state(sk, TFRC_RSTATE_DATA);
-		return;
-	case TFRC_RSTATE_DATA:
-		hcrx->ccid3hcrx_bytes_recv += payload_size;
-		if (loss)
-			break;
+	/*
+	 * Handle data packets: RTT sampling and monitoring p
+	 */
+	if (unlikely(!is_data_packet))
+		goto update_records;
 
-		now = ktime_get_real();
-		if ((ktime_us_delta(now, hcrx->ccid3hcrx_tstamp_last_ack) -
-		     (s64)hcrx->ccid3hcrx_rtt) >= 0) {
-			hcrx->ccid3hcrx_tstamp_last_ack = now;
-			ccid3_hc_rx_send_feedback(sk);
-		}
-		return;
-	case TFRC_RSTATE_TERM:
-		DCCP_BUG("%s(%p) - Illegal state TERM", dccp_role(sk), sk);
-		return;
+	if (list_empty(&hcrx->ccid3hcrx_li_hist)) {  /* no loss so far: p = 0 */
+		const u32 sample = tfrc_rx_hist_sample_rtt(&hcrx->ccid3hcrx_hist, skb);
+		/*
+		 * Empty loss history: no loss so far, hence p stays 0.
+		 * Sample RTT values, since an RTT estimate is required for the
+		 * computation of p when the first loss occurs; RFC 3448, 6.3.1.
+		 */
+		if (sample != 0)
+			hcrx->ccid3hcrx_rtt = tfrc_ewma(hcrx->ccid3hcrx_rtt, sample, 9);
 	}
 
-	/* Dealing with packet loss */
-	ccid3_pr_debug("%s(%p, state=%s), data loss! Reacting...\n",
-		       dccp_role(sk), sk, dccp_state_name(sk->sk_state));
-
-	p_prev = hcrx->ccid3hcrx_p;
-
-	/* Calculate loss event rate */
-	if (!list_empty(&hcrx->ccid3hcrx_li_hist)) {
-		u32 i_mean = dccp_li_hist_calc_i_mean(&hcrx->ccid3hcrx_li_hist);
+	/*
+	 * Check if the periodic once-per-RTT feedback is due; RFC 4342, 10.3
+	 */
+	if (SUB16(dccp_hdr(skb)->dccph_ccval, hcrx->ccid3hcrx_last_counter) > 3)
+		do_feedback = CCID3_FBACK_PERIODIC;
 
-		/* Scaling up by 1000000 as fixed decimal */
-		if (i_mean != 0)
-			hcrx->ccid3hcrx_p = 1000000 / i_mean;
-	} else
-		DCCP_BUG("empty loss history");
+update_records:
+	tfrc_rx_hist_add_packet(&hcrx->ccid3hcrx_hist, skb, ndp);
 
-	if (hcrx->ccid3hcrx_p > p_prev) {
-		ccid3_hc_rx_send_feedback(sk);
-		return;
-	}
+	if (do_feedback)
+		ccid3_hc_rx_send_feedback(sk, skb, do_feedback);
 }
 
 static int ccid3_hc_rx_init(struct ccid *ccid, struct sock *sk)
@@ -918,11 +837,8 @@ static int ccid3_hc_rx_init(struct ccid *ccid, struct sock *sk)
 	ccid3_pr_debug("entry\n");
 
 	hcrx->ccid3hcrx_state = TFRC_RSTATE_NO_DATA;
-	INIT_LIST_HEAD(&hcrx->ccid3hcrx_hist);
 	INIT_LIST_HEAD(&hcrx->ccid3hcrx_li_hist);
-	hcrx->ccid3hcrx_tstamp_last_feedback =
-		hcrx->ccid3hcrx_tstamp_last_ack = ktime_get_real();
-	return 0;
+	return tfrc_rx_hist_alloc(&hcrx->ccid3hcrx_hist);
 }
 
 static void ccid3_hc_rx_exit(struct sock *sk)
diff --git a/net/dccp/ccids/ccid3.h b/net/dccp/ccids/ccid3.h
index b842a7dd99d..3c33dc670cf 100644
--- a/net/dccp/ccids/ccid3.h
+++ b/net/dccp/ccids/ccid3.h
@@ -1,7 +1,8 @@
 /*
  *  net/dccp/ccids/ccid3.h
  *
- *  Copyright (c) 2005-6 The University of Waikato, Hamilton, New Zealand.
+ *  Copyright (c) 2005-7 The University of Waikato, Hamilton, New Zealand.
+ *  Copyright (c) 2007   The University of Aberdeen, Scotland, UK
  *
  *  An implementation of the DCCP protocol
  *
@@ -135,9 +136,7 @@ enum ccid3_hc_rx_states {
  *  @ccid3hcrx_x_recv  -  Receiver estimate of send rate (RFC 3448 4.3)
  *  @ccid3hcrx_rtt  -  Receiver estimate of rtt (non-standard)
  *  @ccid3hcrx_p  -  current loss event rate (RFC 3448 5.4)
- *  @ccid3hcrx_seqno_nonloss  -  Last received non-loss sequence number
- *  @ccid3hcrx_ccval_nonloss  -  Last received non-loss Window CCVal
- *  @ccid3hcrx_ccval_last_counter  -  Tracks window counter (RFC 4342, 8.1)
+ *  @ccid3hcrx_last_counter  -  Tracks window counter (RFC 4342, 8.1)
  *  @ccid3hcrx_state  -  receiver state, one of %ccid3_hc_rx_states
  *  @ccid3hcrx_bytes_recv  -  Total sum of DCCP payload bytes
  *  @ccid3hcrx_tstamp_last_feedback  -  Time at which last feedback was sent
@@ -152,14 +151,11 @@ struct ccid3_hc_rx_sock {
 #define ccid3hcrx_x_recv		ccid3hcrx_tfrc.tfrcrx_x_recv
 #define ccid3hcrx_rtt			ccid3hcrx_tfrc.tfrcrx_rtt
 #define ccid3hcrx_p			ccid3hcrx_tfrc.tfrcrx_p
-	u64				ccid3hcrx_seqno_nonloss:48,
-					ccid3hcrx_ccval_nonloss:4,
-					ccid3hcrx_ccval_last_counter:4;
+	u8				ccid3hcrx_last_counter:4;
 	enum ccid3_hc_rx_states		ccid3hcrx_state:8;
 	u32				ccid3hcrx_bytes_recv;
 	ktime_t				ccid3hcrx_tstamp_last_feedback;
-	ktime_t				ccid3hcrx_tstamp_last_ack;
-	struct list_head		ccid3hcrx_hist;
+	struct tfrc_rx_hist		ccid3hcrx_hist;
 	struct list_head		ccid3hcrx_li_hist;
 	u16				ccid3hcrx_s;
 	u32				ccid3hcrx_pinv;
diff --git a/net/dccp/ccids/lib/loss_interval.c b/net/dccp/ccids/lib/loss_interval.c
index a5f59af8df4..c0a933a1d28 100644
--- a/net/dccp/ccids/lib/loss_interval.c
+++ b/net/dccp/ccids/lib/loss_interval.c
@@ -129,6 +129,13 @@ static u32 dccp_li_calc_first_li(struct sock *sk,
 				 u16 s, u32 bytes_recv,
 				 u32 previous_x_recv)
 {
+/*
+ * FIXME:
+ * Will be rewritten in the upcoming new loss intervals code.
+ * Has to be commented ou because it relies on the old rx history
+ * data structures
+ */
+#if 0
 	struct tfrc_rx_hist_entry *entry, *next, *tail = NULL;
 	u32 x_recv, p;
 	suseconds_t rtt, delta;
@@ -216,10 +223,10 @@ found:
 	dccp_pr_debug("%s(%p), receive rate=%u bytes/s, implied "
 		      "loss rate=%u\n", dccp_role(sk), sk, x_recv, p);
 
-	if (p == 0)
-		return ~0;
-	else
+	if (p != 0)
 		return 1000000 / p;
+#endif
+	return ~0;
 }
 
 void dccp_li_update_li(struct sock *sk,
diff --git a/net/dccp/ccids/lib/packet_history.c b/net/dccp/ccids/lib/packet_history.c
index 255cca1ca73..4eddb2da833 100644
--- a/net/dccp/ccids/lib/packet_history.c
+++ b/net/dccp/ccids/lib/packet_history.c
@@ -36,7 +36,9 @@
  */
 
 #include <linux/string.h>
+#include <linux/slab.h>
 #include "packet_history.h"
+#include "../../dccp.h"
 
 /**
  *  tfrc_tx_hist_entry  -  Simple singly-linked TX history list
@@ -111,154 +113,214 @@ u32 tfrc_tx_hist_rtt(struct tfrc_tx_hist_entry *head, const u64 seqno,
 }
 EXPORT_SYMBOL_GPL(tfrc_tx_hist_rtt);
 
+
+/**
+ * tfrc_rx_hist_index - index to reach n-th entry after loss_start
+ */
+static inline u8 tfrc_rx_hist_index(const struct tfrc_rx_hist *h, const u8 n)
+{
+	return (h->loss_start + n) & TFRC_NDUPACK;
+}
+
+/**
+ * tfrc_rx_hist_last_rcv - entry with highest-received-seqno so far
+ */
+static inline struct tfrc_rx_hist_entry *
+			tfrc_rx_hist_last_rcv(const struct tfrc_rx_hist *h)
+{
+	return h->ring[tfrc_rx_hist_index(h, h->loss_count)];
+}
+
 /*
  * 	Receiver History Routines
  */
 static struct kmem_cache *tfrc_rx_hist_slab;
 
-struct tfrc_rx_hist_entry *tfrc_rx_hist_entry_new(const u32 ndp,
-						  const struct sk_buff *skb,
-						  const gfp_t prio)
+void tfrc_rx_hist_add_packet(struct tfrc_rx_hist *h,
+			     const struct sk_buff *skb,
+			     const u32 ndp)
 {
-	struct tfrc_rx_hist_entry *entry = kmem_cache_alloc(tfrc_rx_hist_slab,
-							    prio);
-
-	if (entry != NULL) {
-		const struct dccp_hdr *dh = dccp_hdr(skb);
-
-		entry->tfrchrx_seqno = DCCP_SKB_CB(skb)->dccpd_seq;
-		entry->tfrchrx_ccval = dh->dccph_ccval;
-		entry->tfrchrx_type  = dh->dccph_type;
-		entry->tfrchrx_ndp   = ndp;
-		entry->tfrchrx_tstamp = ktime_get_real();
-	}
-
-	return entry;
+	struct tfrc_rx_hist_entry *entry = tfrc_rx_hist_last_rcv(h);
+	const struct dccp_hdr *dh = dccp_hdr(skb);
+
+	entry->tfrchrx_seqno = DCCP_SKB_CB(skb)->dccpd_seq;
+	entry->tfrchrx_ccval = dh->dccph_ccval;
+	entry->tfrchrx_type  = dh->dccph_type;
+	entry->tfrchrx_ndp   = ndp;
+	entry->tfrchrx_tstamp = ktime_get_real();
 }
-EXPORT_SYMBOL_GPL(tfrc_rx_hist_entry_new);
+EXPORT_SYMBOL_GPL(tfrc_rx_hist_add_packet);
 
 static inline void tfrc_rx_hist_entry_delete(struct tfrc_rx_hist_entry *entry)
 {
 	kmem_cache_free(tfrc_rx_hist_slab, entry);
 }
 
-int tfrc_rx_hist_find_entry(const struct list_head *list, const u64 seq,
-			    u8 *ccval)
+/**
+ * tfrc_rx_hist_entry - return the n-th history entry after loss_start
+ */
+static inline struct tfrc_rx_hist_entry *
+		tfrc_rx_hist_entry(const struct tfrc_rx_hist *h, const u8 n)
 {
-	struct tfrc_rx_hist_entry *packet = NULL, *entry;
+	return h->ring[tfrc_rx_hist_index(h, n)];
+}
 
-	list_for_each_entry(entry, list, tfrchrx_node)
-		if (entry->tfrchrx_seqno == seq) {
-			packet = entry;
-			break;
-		}
+/**
+ * tfrc_rx_hist_loss_prev - entry with highest-received-seqno before loss was detected
+ */
+static inline struct tfrc_rx_hist_entry *
+			tfrc_rx_hist_loss_prev(const struct tfrc_rx_hist *h)
+{
+	return h->ring[h->loss_start];
+}
+
+/* has the packet contained in skb been seen before? */
+int tfrc_rx_hist_duplicate(struct tfrc_rx_hist *h, struct sk_buff *skb)
+{
+	const u64 seq = DCCP_SKB_CB(skb)->dccpd_seq;
+	int i;
+
+	if (dccp_delta_seqno(tfrc_rx_hist_loss_prev(h)->tfrchrx_seqno, seq) <= 0)
+		return 1;
 
-	if (packet)
-		*ccval = packet->tfrchrx_ccval;
+	for (i = 1; i <= h->loss_count; i++)
+		if (tfrc_rx_hist_entry(h, i)->tfrchrx_seqno == seq)
+			return 1;
 
-	return packet != NULL;
+	return 0;
 }
+EXPORT_SYMBOL_GPL(tfrc_rx_hist_duplicate);
 
-EXPORT_SYMBOL_GPL(tfrc_rx_hist_find_entry);
-struct tfrc_rx_hist_entry *
-		tfrc_rx_hist_find_data_packet(const struct list_head *list)
+/* initialise loss detection and disable RTT sampling */
+static inline void tfrc_rx_hist_loss_indicated(struct tfrc_rx_hist *h)
 {
-	struct tfrc_rx_hist_entry *entry, *packet = NULL;
+	h->loss_count = 1;
+}
 
-	list_for_each_entry(entry, list, tfrchrx_node)
-		if (entry->tfrchrx_type == DCCP_PKT_DATA ||
-		    entry->tfrchrx_type == DCCP_PKT_DATAACK) {
-			packet = entry;
-			break;
-		}
+/* indicate whether previously a packet was detected missing */
+static inline int tfrc_rx_hist_loss_pending(const struct tfrc_rx_hist *h)
+{
+	return h->loss_count;
+}
+
+/* any data packets missing between last reception and skb ? */
+int tfrc_rx_hist_new_loss_indicated(struct tfrc_rx_hist *h,
+				    const struct sk_buff *skb, u32 ndp)
+{
+	int delta = dccp_delta_seqno(tfrc_rx_hist_last_rcv(h)->tfrchrx_seqno,
+				     DCCP_SKB_CB(skb)->dccpd_seq);
+
+	if (delta > 1 && ndp < delta)
+		tfrc_rx_hist_loss_indicated(h);
 
-	return packet;
+	return tfrc_rx_hist_loss_pending(h);
 }
+EXPORT_SYMBOL_GPL(tfrc_rx_hist_new_loss_indicated);
 
-EXPORT_SYMBOL_GPL(tfrc_rx_hist_find_data_packet);
+int tfrc_rx_hist_alloc(struct tfrc_rx_hist *h)
+{
+	int i;
+
+	for (i = 0; i <= TFRC_NDUPACK; i++) {
+		h->ring[i] = kmem_cache_alloc(tfrc_rx_hist_slab, GFP_ATOMIC);
+		if (h->ring[i] == NULL)
+			goto out_free;
+	}
+
+	h->loss_count = h->loss_start = 0;
+	return 0;
+
+out_free:
+	while (i-- != 0) {
+		kmem_cache_free(tfrc_rx_hist_slab, h->ring[i]);
+		h->ring[i] = NULL;
+	}
+	return -ENOBUFS;
+}
+EXPORT_SYMBOL_GPL(tfrc_rx_hist_alloc);
 
-void tfrc_rx_hist_add_packet(struct list_head *rx_list,
-			     struct list_head *li_list,
-			     struct tfrc_rx_hist_entry *packet,
-			     u64 nonloss_seqno)
+void tfrc_rx_hist_purge(struct tfrc_rx_hist *h)
 {
-	struct tfrc_rx_hist_entry *entry, *next;
-	u8 num_later = 0;
-
-	list_add(&packet->tfrchrx_node, rx_list);
-
-	num_later = TFRC_RECV_NUM_LATE_LOSS + 1;
-
-	if (!list_empty(li_list)) {
-		list_for_each_entry_safe(entry, next, rx_list, tfrchrx_node) {
-			if (num_later == 0) {
-				if (after48(nonloss_seqno,
-				   entry->tfrchrx_seqno)) {
-					list_del_init(&entry->tfrchrx_node);
-					tfrc_rx_hist_entry_delete(entry);
-				}
-			} else if (tfrc_rx_hist_entry_data_packet(entry))
-				--num_later;
-		}
-	} else {
-		int step = 0;
-		u8 win_count = 0; /* Not needed, but lets shut up gcc */
-		int tmp;
-		/*
-		 * We have no loss interval history so we need at least one
-		 * rtt:s of data packets to approximate rtt.
-		 */
-		list_for_each_entry_safe(entry, next, rx_list, tfrchrx_node) {
-			if (num_later == 0) {
-				switch (step) {
-				case 0:
-					step = 1;
-					/* OK, find next data packet */
-					num_later = 1;
-					break;
-				case 1:
-					step = 2;
-					/* OK, find next data packet */
-					num_later = 1;
-					win_count = entry->tfrchrx_ccval;
-					break;
-				case 2:
-					tmp = win_count - entry->tfrchrx_ccval;
-					if (tmp < 0)
-						tmp += TFRC_WIN_COUNT_LIMIT;
-					if (tmp > TFRC_WIN_COUNT_PER_RTT + 1) {
-						/*
-						 * We have found a packet older
-						 * than one rtt remove the rest
-						 */
-						step = 3;
-					} else /* OK, find next data packet */
-						num_later = 1;
-					break;
-				case 3:
-					list_del_init(&entry->tfrchrx_node);
-					tfrc_rx_hist_entry_delete(entry);
-					break;
-				}
-			} else if (tfrc_rx_hist_entry_data_packet(entry))
-				--num_later;
+	int i;
+
+	for (i = 0; i <= TFRC_NDUPACK; ++i)
+		if (h->ring[i] != NULL) {
+			kmem_cache_free(tfrc_rx_hist_slab, h->ring[i]);
+			h->ring[i] = NULL;
 		}
-	}
 }
+EXPORT_SYMBOL_GPL(tfrc_rx_hist_purge);
 
-EXPORT_SYMBOL_GPL(tfrc_rx_hist_add_packet);
+/**
+ * tfrc_rx_hist_rtt_last_s - reference entry to compute RTT samples against
+ */
+static inline struct tfrc_rx_hist_entry *
+			tfrc_rx_hist_rtt_last_s(const struct tfrc_rx_hist *h)
+{
+	return h->ring[0];
+}
 
-void tfrc_rx_hist_purge(struct list_head *list)
+/**
+ * tfrc_rx_hist_rtt_prev_s: previously suitable (wrt rtt_last_s) RTT-sampling entry
+ */
+static inline struct tfrc_rx_hist_entry *
+			tfrc_rx_hist_rtt_prev_s(const struct tfrc_rx_hist *h)
 {
-	struct tfrc_rx_hist_entry *entry, *next;
+	return h->ring[h->rtt_sample_prev];
+}
 
-	list_for_each_entry_safe(entry, next, list, tfrchrx_node) {
-		list_del_init(&entry->tfrchrx_node);
-		tfrc_rx_hist_entry_delete(entry);
+/**
+ * tfrc_rx_hist_sample_rtt  -  Sample RTT from timestamp / CCVal
+ * Based on ideas presented in RFC 4342, 8.1. Returns 0 if it was not able
+ * to compute a sample with given data - calling function should check this.
+ */
+u32 tfrc_rx_hist_sample_rtt(struct tfrc_rx_hist *h, const struct sk_buff *skb)
+{
+	u32 sample = 0,
+	    delta_v = SUB16(dccp_hdr(skb)->dccph_ccval,
+			    tfrc_rx_hist_rtt_last_s(h)->tfrchrx_ccval);
+
+	if (delta_v < 1 || delta_v > 4) {	/* unsuitable CCVal delta */
+		if (h->rtt_sample_prev == 2) {	/* previous candidate stored */
+			sample = SUB16(tfrc_rx_hist_rtt_prev_s(h)->tfrchrx_ccval,
+				       tfrc_rx_hist_rtt_last_s(h)->tfrchrx_ccval);
+			if (sample)
+				sample = 4 / sample *
+				         ktime_us_delta(tfrc_rx_hist_rtt_prev_s(h)->tfrchrx_tstamp,
+							tfrc_rx_hist_rtt_last_s(h)->tfrchrx_tstamp);
+			else    /*
+				 * FIXME: This condition is in principle not
+				 * possible but occurs when CCID is used for
+				 * two-way data traffic. I have tried to trace
+				 * it, but the cause does not seem to be here.
+				 */
+				DCCP_BUG("please report to dccp@vger.kernel.org"
+					 " => prev = %u, last = %u",
+					 tfrc_rx_hist_rtt_prev_s(h)->tfrchrx_ccval,
+					 tfrc_rx_hist_rtt_last_s(h)->tfrchrx_ccval);
+		} else if (delta_v < 1) {
+			h->rtt_sample_prev = 1;
+			goto keep_ref_for_next_time;
+		}
+
+	} else if (delta_v == 4) /* optimal match */
+		sample = ktime_to_us(net_timedelta(tfrc_rx_hist_rtt_last_s(h)->tfrchrx_tstamp));
+	else {			 /* suboptimal match */
+		h->rtt_sample_prev = 2;
+		goto keep_ref_for_next_time;
 	}
-}
 
-EXPORT_SYMBOL_GPL(tfrc_rx_hist_purge);
+	if (unlikely(sample > DCCP_SANE_RTT_MAX)) {
+		DCCP_WARN("RTT sample %u too large, using max\n", sample);
+		sample = DCCP_SANE_RTT_MAX;
+	}
+
+	h->rtt_sample_prev = 0;	       /* use current entry as next reference */
+keep_ref_for_next_time:
+
+	return sample;
+}
+EXPORT_SYMBOL_GPL(tfrc_rx_hist_sample_rtt);
 
 __init int packet_history_init(void)
 {
diff --git a/net/dccp/ccids/lib/packet_history.h b/net/dccp/ccids/lib/packet_history.h
index 5b0b9834340..3dfd182b0e6 100644
--- a/net/dccp/ccids/lib/packet_history.h
+++ b/net/dccp/ccids/lib/packet_history.h
@@ -37,15 +37,9 @@
 #define _DCCP_PKT_HIST_
 
 #include <linux/ktime.h>
-#include <linux/list.h>
-#include <linux/slab.h>
-#include "tfrc.h"
+#include <linux/types.h>
 
-/* Number of later packets received before one is considered lost */
-#define TFRC_RECV_NUM_LATE_LOSS	 3
-
-#define TFRC_WIN_COUNT_PER_RTT	 4
-#define TFRC_WIN_COUNT_LIMIT	16
+struct sk_buff;
 
 struct tfrc_tx_hist_entry;
 
@@ -54,11 +48,20 @@ extern void tfrc_tx_hist_purge(struct tfrc_tx_hist_entry **headp);
 extern u32  tfrc_tx_hist_rtt(struct tfrc_tx_hist_entry *head,
 			     const u64 seqno, const ktime_t now);
 
-/*
- * 	Receiver History data structures and declarations
+/* Subtraction a-b modulo-16, respects circular wrap-around */
+#define SUB16(a, b) (((a) + 16 - (b)) & 0xF)
+
+/* Number of packets to wait after a missing packet (RFC 4342, 6.1) */
+#define TFRC_NDUPACK 3
+
+/**
+ * tfrc_rx_hist_entry - Store information about a single received packet
+ * @tfrchrx_seqno:	DCCP packet sequence number
+ * @tfrchrx_ccval:	window counter value of packet (RFC 4342, 8.1)
+ * @tfrchrx_ndp:	the NDP count (if any) of the packet
+ * @tfrchrx_tstamp:	actual receive time of packet
  */
 struct tfrc_rx_hist_entry {
-	struct list_head tfrchrx_node;
 	u64		 tfrchrx_seqno:48,
 			 tfrchrx_ccval:4,
 			 tfrchrx_type:4;
@@ -66,42 +69,30 @@ struct tfrc_rx_hist_entry {
 	ktime_t		 tfrchrx_tstamp;
 };
 
-extern struct tfrc_rx_hist_entry *
-			tfrc_rx_hist_entry_new(const u32 ndp,
-					       const struct sk_buff *skb,
-					       const gfp_t prio);
-
-static inline struct tfrc_rx_hist_entry *
-			tfrc_rx_hist_head(struct list_head *list)
-{
-	struct tfrc_rx_hist_entry *head = NULL;
-
-	if (!list_empty(list))
-		head = list_entry(list->next, struct tfrc_rx_hist_entry,
-				  tfrchrx_node);
-	return head;
-}
-
-extern int tfrc_rx_hist_find_entry(const struct list_head *list, const u64 seq,
-				   u8 *ccval);
-extern struct tfrc_rx_hist_entry *
-		tfrc_rx_hist_find_data_packet(const struct list_head *list);
-
-extern void tfrc_rx_hist_add_packet(struct list_head *rx_list,
-				    struct list_head *li_list,
-				    struct tfrc_rx_hist_entry *packet,
-				    u64 nonloss_seqno);
-
-extern void tfrc_rx_hist_purge(struct list_head *list);
+/**
+ * tfrc_rx_hist  -  RX history structure for TFRC-based protocols
+ *
+ * @ring:		Packet history for RTT sampling and loss detection
+ * @loss_count:		Number of entries in circular history
+ * @loss_start:		Movable index (for loss detection)
+ * @rtt_sample_prev:	Used during RTT sampling, points to candidate entry
+ */
+struct tfrc_rx_hist {
+	struct tfrc_rx_hist_entry *ring[TFRC_NDUPACK + 1];
+	u8			  loss_count:2,
+				  loss_start:2;
+#define rtt_sample_prev		  loss_start
+};
 
-static inline int
-	tfrc_rx_hist_entry_data_packet(const struct tfrc_rx_hist_entry *entry)
-{
-	return entry->tfrchrx_type == DCCP_PKT_DATA ||
-	       entry->tfrchrx_type == DCCP_PKT_DATAACK;
-}
+extern void tfrc_rx_hist_add_packet(struct tfrc_rx_hist *h,
+				    const struct sk_buff *skb, const u32 ndp);
 
-extern u64 tfrc_rx_hist_detect_loss(struct list_head *rx_list,
-				    struct list_head *li_list, u8 *win_loss);
+extern int tfrc_rx_hist_duplicate(struct tfrc_rx_hist *h, struct sk_buff *skb);
+extern int tfrc_rx_hist_new_loss_indicated(struct tfrc_rx_hist *h,
+					   const struct sk_buff *skb, u32 ndp);
+extern u32 tfrc_rx_hist_sample_rtt(struct tfrc_rx_hist *h,
+				   const struct sk_buff *skb);
+extern int tfrc_rx_hist_alloc(struct tfrc_rx_hist *h);
+extern void tfrc_rx_hist_purge(struct tfrc_rx_hist *h);
 
 #endif /* _DCCP_PKT_HIST_ */
-- 
cgit v1.2.3


From 5a3e55d68ec5baac578bf32ba67607088c763657 Mon Sep 17 00:00:00 2001
From: "Denis V. Lunev" <den@openvz.org>
Date: Fri, 7 Dec 2007 00:38:10 -0800
Subject: [NET]: Multiple namespaces in the all dst_ifdown routines.

Move dst entries to a namespace loopback to catch refcounting leaks.

Signed-off-by: Denis V. Lunev <den@openvz.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/dst.c          | 4 ++--
 net/ipv4/route.c        | 5 +++--
 net/ipv4/xfrm4_policy.c | 3 ++-
 net/ipv6/route.c        | 7 +++++--
 net/ipv6/xfrm6_policy.c | 3 ++-
 net/xfrm/xfrm_policy.c  | 2 +-
 6 files changed, 15 insertions(+), 9 deletions(-)

(limited to 'net')

diff --git a/net/core/dst.c b/net/core/dst.c
index f538061f4b9..5c6cfc4e7fd 100644
--- a/net/core/dst.c
+++ b/net/core/dst.c
@@ -279,11 +279,11 @@ static inline void dst_ifdown(struct dst_entry *dst, struct net_device *dev,
 	if (!unregister) {
 		dst->input = dst->output = dst_discard;
 	} else {
-		dst->dev = init_net.loopback_dev;
+		dst->dev = dst->dev->nd_net->loopback_dev;
 		dev_hold(dst->dev);
 		dev_put(dev);
 		if (dst->neighbour && dst->neighbour->dev == dev) {
-			dst->neighbour->dev = init_net.loopback_dev;
+			dst->neighbour->dev = dst->dev;
 			dev_put(dev);
 			dev_hold(dst->neighbour->dev);
 		}
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index d95e48e8d65..b576f8cd401 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -1512,8 +1512,9 @@ static void ipv4_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
 {
 	struct rtable *rt = (struct rtable *) dst;
 	struct in_device *idev = rt->idev;
-	if (dev != init_net.loopback_dev && idev && idev->dev == dev) {
-		struct in_device *loopback_idev = in_dev_get(init_net.loopback_dev);
+	if (dev != dev->nd_net->loopback_dev && idev && idev->dev == dev) {
+		struct in_device *loopback_idev =
+			in_dev_get(dev->nd_net->loopback_dev);
 		if (loopback_idev) {
 			rt->idev = loopback_idev;
 			in_dev_put(idev);
diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c
index b4948c170b3..10b72d185bb 100644
--- a/net/ipv4/xfrm4_policy.c
+++ b/net/ipv4/xfrm4_policy.c
@@ -214,7 +214,8 @@ static void xfrm4_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
 
 	xdst = (struct xfrm_dst *)dst;
 	if (xdst->u.rt.idev->dev == dev) {
-		struct in_device *loopback_idev = in_dev_get(init_net.loopback_dev);
+		struct in_device *loopback_idev =
+			in_dev_get(dev->nd_net->loopback_dev);
 		BUG_ON(!loopback_idev);
 
 		do {
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index e2c980dbe52..452111fa4c5 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -216,9 +216,12 @@ static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
 {
 	struct rt6_info *rt = (struct rt6_info *)dst;
 	struct inet6_dev *idev = rt->rt6i_idev;
+	struct net_device *loopback_dev =
+		dev->nd_net->loopback_dev;
 
-	if (dev != init_net.loopback_dev && idev != NULL && idev->dev == dev) {
-		struct inet6_dev *loopback_idev = in6_dev_get(init_net.loopback_dev);
+	if (dev != loopback_dev && idev != NULL && idev->dev == dev) {
+		struct inet6_dev *loopback_idev =
+			in6_dev_get(loopback_dev);
 		if (loopback_idev != NULL) {
 			rt->rt6i_idev = loopback_idev;
 			in6_dev_put(idev);
diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c
index a31dd531e19..4d54951cea0 100644
--- a/net/ipv6/xfrm6_policy.c
+++ b/net/ipv6/xfrm6_policy.c
@@ -229,7 +229,8 @@ static void xfrm6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
 
 	xdst = (struct xfrm_dst *)dst;
 	if (xdst->u.rt6.rt6i_idev->dev == dev) {
-		struct inet6_dev *loopback_idev = in6_dev_get(init_net.loopback_dev);
+		struct inet6_dev *loopback_idev =
+			in6_dev_get(dev->nd_net->loopback_dev);
 		BUG_ON(!loopback_idev);
 
 		do {
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index a76280a14e7..95dc581861e 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -1934,7 +1934,7 @@ static int stale_bundle(struct dst_entry *dst)
 void xfrm_dst_ifdown(struct dst_entry *dst, struct net_device *dev)
 {
 	while ((dst = dst->child) && dst->xfrm && dst->dev == dev) {
-		dst->dev = init_net.loopback_dev;
+		dst->dev = dev->nd_net->loopback_dev;
 		dev_hold(dst->dev);
 		dev_put(dev);
 	}
-- 
cgit v1.2.3


From d63bddbe90c4fd924b2155ca92a879393d856170 Mon Sep 17 00:00:00 2001
From: Daniel Lezcano <dlezcano@fr.ibm.com>
Date: Fri, 7 Dec 2007 00:40:34 -0800
Subject: [IPV6]: Make fib6_init to return an error code.

If there is an error in the initialization function, nothing is
followed up to the caller. So I add a return value to be set for the
init function.

Signed-off-by: Daniel Lezcano <dlezcano@fr.ibm.com>
Acked-by: Benjamin Thery <benjamin.thery@bull.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/ip6_fib.c | 14 +++++++++++---
 1 file changed, 11 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c
index 31b60a02512..c100b44f2b8 100644
--- a/net/ipv6/ip6_fib.c
+++ b/net/ipv6/ip6_fib.c
@@ -1473,16 +1473,24 @@ void fib6_run_gc(unsigned long dummy)
 	spin_unlock_bh(&fib6_gc_lock);
 }
 
-void __init fib6_init(void)
+int __init fib6_init(void)
 {
+	int ret;
 	fib6_node_kmem = kmem_cache_create("fib6_nodes",
 					   sizeof(struct fib6_node),
 					   0, SLAB_HWCACHE_ALIGN|SLAB_PANIC,
 					   NULL);
-
 	fib6_tables_init();
 
-	__rtnl_register(PF_INET6, RTM_GETROUTE, NULL, inet6_dump_fib);
+	ret = __rtnl_register(PF_INET6, RTM_GETROUTE, NULL, inet6_dump_fib);
+	if (ret)
+		goto out_kmem_cache_create;
+out:
+	return ret;
+
+out_kmem_cache_create:
+	kmem_cache_destroy(fib6_node_kmem);
+	goto out;
 }
 
 void fib6_gc_cleanup(void)
-- 
cgit v1.2.3


From 0013cabab30ec55830ce63d34c0bdd887eb87644 Mon Sep 17 00:00:00 2001
From: Daniel Lezcano <dlezcano@fr.ibm.com>
Date: Fri, 7 Dec 2007 00:42:11 -0800
Subject: [IPV6]: Make xfrm6_init to return an error code.

The xfrm initialization function does not return any error code, so if
there is an error, the caller can not be advise of that.  This patch
checks the return code of the different called functions in order to
return a successful or failed initialization.

Signed-off-by: Daniel Lezcano <dlezcano@fr.ibm.com>
Acked-by: Benjamin Thery <benjamin.thery@bull.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/xfrm6_policy.c | 22 +++++++++++++++++-----
 net/ipv6/xfrm6_state.c  |  4 ++--
 2 files changed, 19 insertions(+), 7 deletions(-)

(limited to 'net')

diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c
index 4d54951cea0..181cf91538f 100644
--- a/net/ipv6/xfrm6_policy.c
+++ b/net/ipv6/xfrm6_policy.c
@@ -269,9 +269,9 @@ static struct xfrm_policy_afinfo xfrm6_policy_afinfo = {
 	.fill_dst =		xfrm6_fill_dst,
 };
 
-static void __init xfrm6_policy_init(void)
+static int __init xfrm6_policy_init(void)
 {
-	xfrm_policy_register_afinfo(&xfrm6_policy_afinfo);
+	return xfrm_policy_register_afinfo(&xfrm6_policy_afinfo);
 }
 
 static void xfrm6_policy_fini(void)
@@ -279,10 +279,22 @@ static void xfrm6_policy_fini(void)
 	xfrm_policy_unregister_afinfo(&xfrm6_policy_afinfo);
 }
 
-void __init xfrm6_init(void)
+int __init xfrm6_init(void)
 {
-	xfrm6_policy_init();
-	xfrm6_state_init();
+	int ret;
+
+	ret = xfrm6_policy_init();
+	if (ret)
+		goto out;
+
+	ret = xfrm6_state_init();
+	if (ret)
+		goto out_policy;
+out:
+	return ret;
+out_policy:
+	xfrm6_policy_fini();
+	goto out;
 }
 
 void xfrm6_fini(void)
diff --git a/net/ipv6/xfrm6_state.c b/net/ipv6/xfrm6_state.c
index a7a7e8fd6a3..dc817e035e2 100644
--- a/net/ipv6/xfrm6_state.c
+++ b/net/ipv6/xfrm6_state.c
@@ -198,9 +198,9 @@ static struct xfrm_state_afinfo xfrm6_state_afinfo = {
 	.transport_finish	= xfrm6_transport_finish,
 };
 
-void __init xfrm6_state_init(void)
+int __init xfrm6_state_init(void)
 {
-	xfrm_state_register_afinfo(&xfrm6_state_afinfo);
+	return xfrm_state_register_afinfo(&xfrm6_state_afinfo);
 }
 
 void xfrm6_state_fini(void)
-- 
cgit v1.2.3


From 9eb87f3f7e0686a256c5bb4f886dede0171245f2 Mon Sep 17 00:00:00 2001
From: Daniel Lezcano <dlezcano@fr.ibm.com>
Date: Fri, 7 Dec 2007 00:42:52 -0800
Subject: [IPV6]: Make fib6_rules_init to return an error code.

When the fib_rules initialization finished, no return code is provided
so there is no way to know, for the caller, if the initialization has
been successful or has failed. This patch fix that.

Signed-off-by: Daniel Lezcano <dlezcano@fr.ibm.com>
Acked-by: Benjamin Thery <benjamin.thery@bull.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/fib_rules.c  |  5 +++--
 net/ipv6/fib6_rules.c | 19 ++++++++++++++++---
 2 files changed, 19 insertions(+), 5 deletions(-)

(limited to 'net')

diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c
index 0af0538343d..fcbf41c0a5d 100644
--- a/net/core/fib_rules.c
+++ b/net/core/fib_rules.c
@@ -102,7 +102,7 @@ errout:
 
 EXPORT_SYMBOL_GPL(fib_rules_register);
 
-static void cleanup_ops(struct fib_rules_ops *ops)
+void fib_rules_cleanup_ops(struct fib_rules_ops *ops)
 {
 	struct fib_rule *rule, *tmp;
 
@@ -111,6 +111,7 @@ static void cleanup_ops(struct fib_rules_ops *ops)
 		fib_rule_put(rule);
 	}
 }
+EXPORT_SYMBOL_GPL(fib_rules_cleanup_ops);
 
 int fib_rules_unregister(struct fib_rules_ops *ops)
 {
@@ -121,7 +122,7 @@ int fib_rules_unregister(struct fib_rules_ops *ops)
 	list_for_each_entry(o, &rules_ops, list) {
 		if (o == ops) {
 			list_del_rcu(&o->list);
-			cleanup_ops(ops);
+			fib_rules_cleanup_ops(ops);
 			goto out;
 		}
 	}
diff --git a/net/ipv6/fib6_rules.c b/net/ipv6/fib6_rules.c
index 428c6b0e26d..9ce2e0a6748 100644
--- a/net/ipv6/fib6_rules.c
+++ b/net/ipv6/fib6_rules.c
@@ -265,10 +265,23 @@ static int __init fib6_default_rules_init(void)
 	return 0;
 }
 
-void __init fib6_rules_init(void)
+int __init fib6_rules_init(void)
 {
-	BUG_ON(fib6_default_rules_init());
-	fib_rules_register(&fib6_rules_ops);
+	int ret;
+
+	ret = fib6_default_rules_init();
+	if (ret)
+		goto out;
+
+	ret = fib_rules_register(&fib6_rules_ops);
+	if (ret)
+		goto out_default_rules_init;
+out:
+	return ret;
+
+out_default_rules_init:
+	fib_rules_cleanup_ops(&fib6_rules_ops);
+	goto out;
 }
 
 void fib6_rules_cleanup(void)
-- 
cgit v1.2.3


From 433d49c3bb14b8a2351fe97df8359e4ad0de4c7c Mon Sep 17 00:00:00 2001
From: Daniel Lezcano <dlezcano@fr.ibm.com>
Date: Fri, 7 Dec 2007 00:43:48 -0800
Subject: [IPV6]: Make ip6_route_init to return an error code.

The route initialization function does not return any value to notify
if the initialization is successful or not. This patch checks all
calls made for the initilization in order to return a value for the
caller.

Unfortunately, proc_net_fops_create will return a NULL pointer if
CONFIG_PROC_FS is off, so we can not check the return code without an
ifdef CONFIG_PROC_FS block in the ip6_route_init function.

Signed-off-by: Daniel Lezcano <dlezcano@fr.ibm.com>
Acked-by: Benjamin Thery <benjamin.thery@bull.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/route.c | 64 ++++++++++++++++++++++++++++++++++++++++++++++----------
 1 file changed, 53 insertions(+), 11 deletions(-)

(limited to 'net')

diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 452111fa4c5..d7754abf921 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -2468,26 +2468,70 @@ ctl_table ipv6_route_table[] = {
 
 #endif
 
-void __init ip6_route_init(void)
+int __init ip6_route_init(void)
 {
+	int ret;
+
 	ip6_dst_ops.kmem_cachep =
 		kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
 				  SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL);
 	ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops.kmem_cachep;
 
-	fib6_init();
-	proc_net_fops_create(&init_net, "ipv6_route", 0, &ipv6_route_proc_fops);
-	proc_net_fops_create(&init_net, "rt6_stats", S_IRUGO, &rt6_stats_seq_fops);
+	ret = fib6_init();
+	if (ret)
+		goto out_kmem_cache;
+
+#ifdef CONFIG_PROC_FS
+	ret = -ENOMEM;
+	if (!proc_net_fops_create(&init_net, "ipv6_route",
+				  0, &ipv6_route_proc_fops))
+		goto out_fib6_init;
+
+	if (!proc_net_fops_create(&init_net, "rt6_stats",
+				  S_IRUGO, &rt6_stats_seq_fops))
+		goto out_proc_ipv6_route;
+#endif
+
 #ifdef CONFIG_XFRM
-	xfrm6_init();
+	ret = xfrm6_init();
+	if (ret)
+		goto out_proc_rt6_stats;
 #endif
 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
-	fib6_rules_init();
+	ret = fib6_rules_init();
+	if (ret)
+		goto xfrm6_init;
 #endif
+	ret = -ENOBUFS;
+	if (__rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL) ||
+	    __rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL) ||
+	    __rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL))
+		goto fib6_rules_init;
 
-	__rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL);
-	__rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL);
-	__rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL);
+	ret = 0;
+out:
+	return ret;
+
+fib6_rules_init:
+#ifdef CONFIG_IPV6_MULTIPLE_TABLES
+	fib6_rules_cleanup();
+xfrm6_init:
+#endif
+#ifdef CONFIG_XFRM
+	xfrm6_fini();
+out_proc_rt6_stats:
+#endif
+#ifdef CONFIG_PROC_FS
+	proc_net_remove(&init_net, "rt6_stats");
+out_proc_ipv6_route:
+	proc_net_remove(&init_net, "ipv6_route");
+out_fib6_init:
+#endif
+	rt6_ifdown(NULL);
+	fib6_gc_cleanup();
+out_kmem_cache:
+	kmem_cache_destroy(ip6_dst_ops.kmem_cachep);
+	goto out;
 }
 
 void ip6_route_cleanup(void)
@@ -2495,10 +2539,8 @@ void ip6_route_cleanup(void)
 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
 	fib6_rules_cleanup();
 #endif
-#ifdef CONFIG_PROC_FS
 	proc_net_remove(&init_net, "ipv6_route");
 	proc_net_remove(&init_net, "rt6_stats");
-#endif
 #ifdef CONFIG_XFRM
 	xfrm6_fini();
 #endif
-- 
cgit v1.2.3


From e2fddf5e96df4ac26f2e9ce63053d51cdf3cfe1e Mon Sep 17 00:00:00 2001
From: Daniel Lezcano <dlezcano@fr.ibm.com>
Date: Fri, 7 Dec 2007 00:44:29 -0800
Subject: [IPV6]: Make af_inet6 to check ip6_route_init return value.

The af_inet6 initialization function does not check the return code of
the route initilization, so if something goes wrong, the protocol
initialization will continue anyway.  This patch takes into account
the modification made in the different route's initialization
subroutines to check the return value and to make the protocol
initialization to fail.

Signed-off-by: Daniel Lezcano <dlezcano@fr.ibm.com>
Acked-by: Benjamin Thery <benjamin.thery@bull.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/af_inet6.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index 64135e2a309..5ab8ba7a586 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -849,7 +849,9 @@ static int __init inet6_init(void)
 	if (if6_proc_init())
 		goto proc_if6_fail;
 #endif
-	ip6_route_init();
+	err = ip6_route_init();
+	if (err)
+		goto ip6_route_fail;
 	ip6_flowlabel_init();
 	err = addrconf_init();
 	if (err)
@@ -874,6 +876,7 @@ out:
 addrconf_fail:
 	ip6_flowlabel_cleanup();
 	ip6_route_cleanup();
+ip6_route_fail:
 #ifdef CONFIG_PROC_FS
 	if6_proc_exit();
 proc_if6_fail:
@@ -904,6 +907,7 @@ icmp_fail:
 	cleanup_ipv6_mibs();
 out_unregister_sock:
 	sock_unregister(PF_INET6);
+	rtnl_unregister_all(PF_INET6);
 out_unregister_raw_proto:
 	proto_unregister(&rawv6_prot);
 out_unregister_udplite_proto:
-- 
cgit v1.2.3


From f845ab6b7dd872d027c27146c264e46bc16c656a Mon Sep 17 00:00:00 2001
From: Daniel Lezcano <dlezcano@fr.ibm.com>
Date: Fri, 7 Dec 2007 00:45:16 -0800
Subject: [IPV6] route6/fib6: Don't panic a kmem_cache_create.

If the kmem_cache_creation fails, the kernel will panic. It is
acceptable if the system is booting, but if the ipv6 protocol is
compiled as a module and it is loaded after the system has booted, do
we want to panic instead of just failing to initialize the protocol ?

The init function is now returning an error and this one is checked
for protocol initialization. So the ipv6 protocol will safely fails.

Signed-off-by: Daniel Lezcano <dlezcano@fr.ibm.com>
Acked-by: Benjamin Thery <benjamin.thery@bull.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/ip6_fib.c | 5 ++++-
 net/ipv6/route.c   | 5 ++++-
 2 files changed, 8 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c
index c100b44f2b8..5fae04506ad 100644
--- a/net/ipv6/ip6_fib.c
+++ b/net/ipv6/ip6_fib.c
@@ -1478,8 +1478,11 @@ int __init fib6_init(void)
 	int ret;
 	fib6_node_kmem = kmem_cache_create("fib6_nodes",
 					   sizeof(struct fib6_node),
-					   0, SLAB_HWCACHE_ALIGN|SLAB_PANIC,
+					   0, SLAB_HWCACHE_ALIGN,
 					   NULL);
+	if (!fib6_node_kmem)
+		return -ENOMEM;
+
 	fib6_tables_init();
 
 	ret = __rtnl_register(PF_INET6, RTM_GETROUTE, NULL, inet6_dump_fib);
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index d7754abf921..6f833cacfcf 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -2474,7 +2474,10 @@ int __init ip6_route_init(void)
 
 	ip6_dst_ops.kmem_cachep =
 		kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
-				  SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL);
+				  SLAB_HWCACHE_ALIGN, NULL);
+	if (!ip6_dst_ops.kmem_cachep)
+		return -ENOMEM;
+
 	ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops.kmem_cachep;
 
 	ret = fib6_init();
-- 
cgit v1.2.3


From bb803175865dd030420eebbc87298414c7725019 Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Fri, 7 Dec 2007 00:46:11 -0800
Subject: [IPV4]: Remove ip_fib_local_table and ip_fib_main_table defines.

There are only 2 users and it doesn't hurt to call fib_get_table
instead, and it makes it easier to make the fib network namespace
aware.

Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
Signed-off-by: Denis V. Lunev <den@openvz.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/fib_hash.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/fib_hash.c b/net/ipv4/fib_hash.c
index eb1aa8eead0..638185eb315 100644
--- a/net/ipv4/fib_hash.c
+++ b/net/ipv4/fib_hash.c
@@ -811,7 +811,8 @@ struct fib_iter_state {
 static struct fib_alias *fib_get_first(struct seq_file *seq)
 {
 	struct fib_iter_state *iter = seq->private;
-	struct fn_hash *table = (struct fn_hash *) ip_fib_main_table->tb_data;
+	struct fib_table *main_table = fib_get_table(RT_TABLE_MAIN);
+	struct fn_hash *table = (struct fn_hash *)main_table->tb_data;
 
 	iter->bucket    = 0;
 	iter->hash_head = NULL;
@@ -950,7 +951,7 @@ static void *fib_seq_start(struct seq_file *seq, loff_t *pos)
 	void *v = NULL;
 
 	read_lock(&fib_hash_lock);
-	if (ip_fib_main_table)
+	if (fib_get_table(RT_TABLE_MAIN))
 		v = *pos ? fib_get_idx(seq, *pos - 1) : SEQ_START_TOKEN;
 	return v;
 }
-- 
cgit v1.2.3


From 877a9bff3889512d7326d6bf0ba6ed3ddda6d772 Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Fri, 7 Dec 2007 00:47:47 -0800
Subject: [IPV4]: Move trie_local and trie_main into the proc iterator.

We only use these variables when displaying the trie in proc so
place them into the iterator to make this explicit.  We should
probably do something smarter to handle the CONFIG_IP_MULTIPLE_TABLES
case but at least this makes it clear that the silliness is limited
to the display in /proc.

Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
Signed-off-by: Denis V. Lunev <den@openvz.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/fib_trie.c | 47 ++++++++++++++++++++++++++++++++++-------------
 1 file changed, 34 insertions(+), 13 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index 1010b469d7d..13a80aa911d 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -164,7 +164,6 @@ static struct tnode *halve(struct trie *t, struct tnode *tn);
 static void tnode_free(struct tnode *tn);
 
 static struct kmem_cache *fn_alias_kmem __read_mostly;
-static struct trie *trie_local = NULL, *trie_main = NULL;
 
 static inline struct tnode *node_parent(struct node *node)
 {
@@ -2002,11 +2001,6 @@ struct fib_table * __init fib_hash_init(u32 id)
 
 	trie_init(t);
 
-	if (id == RT_TABLE_LOCAL)
-		trie_local = t;
-	else if (id == RT_TABLE_MAIN)
-		trie_main = t;
-
 	if (id == RT_TABLE_LOCAL)
 		printk(KERN_INFO "IPv4 FIB: Using LC-trie version %s\n", VERSION);
 
@@ -2016,6 +2010,7 @@ struct fib_table * __init fib_hash_init(u32 id)
 #ifdef CONFIG_PROC_FS
 /* Depth first Trie walk iterator */
 struct fib_trie_iter {
+	struct trie *trie_local, *trie_main;
 	struct tnode *tnode;
 	struct trie *trie;
 	unsigned index;
@@ -2182,7 +2177,20 @@ static void trie_show_stats(struct seq_file *seq, struct trie_stat *stat)
 
 static int fib_triestat_seq_show(struct seq_file *seq, void *v)
 {
+	struct trie *trie_local, *trie_main;
 	struct trie_stat *stat;
+	struct fib_table *tb;
+
+	trie_local = NULL;
+	tb = fib_get_table(RT_TABLE_LOCAL);
+	if (tb)
+		trie_local = (struct trie *) tb->tb_data;
+
+	trie_main = NULL;
+	tb = fib_get_table(RT_TABLE_MAIN);
+	if (tb)
+		trie_main = (struct trie *) tb->tb_data;
+
 
 	stat = kmalloc(sizeof(*stat), GFP_KERNEL);
 	if (!stat)
@@ -2226,13 +2234,13 @@ static struct node *fib_trie_get_idx(struct fib_trie_iter *iter,
 	loff_t idx = 0;
 	struct node *n;
 
-	for (n = fib_trie_get_first(iter, trie_local);
+	for (n = fib_trie_get_first(iter, iter->trie_local);
 	     n; ++idx, n = fib_trie_get_next(iter)) {
 		if (pos == idx)
 			return n;
 	}
 
-	for (n = fib_trie_get_first(iter, trie_main);
+	for (n = fib_trie_get_first(iter, iter->trie_main);
 	     n; ++idx, n = fib_trie_get_next(iter)) {
 		if (pos == idx)
 			return n;
@@ -2242,10 +2250,23 @@ static struct node *fib_trie_get_idx(struct fib_trie_iter *iter,
 
 static void *fib_trie_seq_start(struct seq_file *seq, loff_t *pos)
 {
+	struct fib_trie_iter *iter = seq->private;
+	struct fib_table *tb;
+
+	if (!iter->trie_local) {
+		tb = fib_get_table(RT_TABLE_LOCAL);
+		if (tb)
+			iter->trie_local = (struct trie *) tb->tb_data;
+	}
+	if (!iter->trie_main) {
+		tb = fib_get_table(RT_TABLE_MAIN);
+		if (tb)
+			iter->trie_main = (struct trie *) tb->tb_data;
+	}
 	rcu_read_lock();
 	if (*pos == 0)
 		return SEQ_START_TOKEN;
-	return fib_trie_get_idx(seq->private, *pos - 1);
+	return fib_trie_get_idx(iter, *pos - 1);
 }
 
 static void *fib_trie_seq_next(struct seq_file *seq, void *v, loff_t *pos)
@@ -2263,8 +2284,8 @@ static void *fib_trie_seq_next(struct seq_file *seq, void *v, loff_t *pos)
 		return v;
 
 	/* continue scan in next trie */
-	if (iter->trie == trie_local)
-		return fib_trie_get_first(iter, trie_main);
+	if (iter->trie == iter->trie_local)
+		return fib_trie_get_first(iter, iter->trie_main);
 
 	return NULL;
 }
@@ -2330,7 +2351,7 @@ static int fib_trie_seq_show(struct seq_file *seq, void *v)
 		return 0;
 
 	if (!node_parent(n)) {
-		if (iter->trie == trie_local)
+		if (iter->trie == iter->trie_local)
 			seq_puts(seq, "<local>:\n");
 		else
 			seq_puts(seq, "<main>:\n");
@@ -2429,7 +2450,7 @@ static int fib_route_seq_show(struct seq_file *seq, void *v)
 		return 0;
 	}
 
-	if (iter->trie == trie_local)
+	if (iter->trie == iter->trie_local)
 		return 0;
 	if (IS_TNODE(l))
 		return 0;
-- 
cgit v1.2.3


From 3b5b34fd2b0fdea4d2efbd55daefb1ad3d7d9039 Mon Sep 17 00:00:00 2001
From: Denis Cheng <crquan@gmail.com>
Date: Fri, 7 Dec 2007 00:49:17 -0800
Subject: [NET] net/core/dev.c: use LIST_HEAD instead of LIST_HEAD_INIT

single list_head variable initialized with LIST_HEAD_INIT could almost
always can be replaced with LIST_HEAD declaration, this shrinks the code
and looks better.

Signed-off-by: Denis Cheng <crquan@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/dev.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/core/dev.c b/net/core/dev.c
index d0d2675aaed..12f66e528a4 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -3492,7 +3492,7 @@ static int dev_new_index(struct net *net)
 
 /* Delayed registration/unregisteration */
 static DEFINE_SPINLOCK(net_todo_list_lock);
-static struct list_head net_todo_list = LIST_HEAD_INIT(net_todo_list);
+static LIST_HEAD(net_todo_list);
 
 static void net_set_todo(struct net_device *dev)
 {
-- 
cgit v1.2.3


From 1596c97aa896fdcee49d3bf90c91ed91e0d81492 Mon Sep 17 00:00:00 2001
From: Denis Cheng <crquan@gmail.com>
Date: Fri, 7 Dec 2007 00:49:47 -0800
Subject: [IPV4] net/ipv4/cipso_ipv4.c: use LIST_HEAD instead of LIST_HEAD_INIT

single list_head variable initialized with LIST_HEAD_INIT could almost
always can be replaced with LIST_HEAD declaration, this shrinks the code
and looks better.

Signed-off-by: Denis Cheng <crquan@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/cipso_ipv4.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/ipv4/cipso_ipv4.c b/net/ipv4/cipso_ipv4.c
index f18e88bc86e..d4dc4eb48d9 100644
--- a/net/ipv4/cipso_ipv4.c
+++ b/net/ipv4/cipso_ipv4.c
@@ -63,7 +63,7 @@ struct cipso_v4_domhsh_entry {
  * probably be turned into a hash table or something similar so we
  * can do quick lookups. */
 static DEFINE_SPINLOCK(cipso_v4_doi_list_lock);
-static struct list_head cipso_v4_doi_list = LIST_HEAD_INIT(cipso_v4_doi_list);
+static LIST_HEAD(cipso_v4_doi_list);
 
 /* Label mapping cache */
 int cipso_v4_cache_enabled = 1;
-- 
cgit v1.2.3


From 14d0e7b74e05a1983f5b607e90bc9bafa5ce9eb3 Mon Sep 17 00:00:00 2001
From: Denis Cheng <crquan@gmail.com>
Date: Fri, 7 Dec 2007 00:50:15 -0800
Subject: [LAPB] net/lapb/lapb_iface.c: use LIST_HEAD instead of LIST_HEAD_INIT

single list_head variable initialized with LIST_HEAD_INIT could almost
always can be replaced with LIST_HEAD declaration, this shrinks the code
and looks better.

Signed-off-by: Denis Cheng <crquan@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/lapb/lapb_iface.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/lapb/lapb_iface.c b/net/lapb/lapb_iface.c
index a2e7aa63fd8..2ba1bc4f3c3 100644
--- a/net/lapb/lapb_iface.c
+++ b/net/lapb/lapb_iface.c
@@ -39,7 +39,7 @@
 #include <linux/init.h>
 #include <net/lapb.h>
 
-static struct list_head lapb_list = LIST_HEAD_INIT(lapb_list);
+static LIST_HEAD(lapb_list);
 static DEFINE_RWLOCK(lapb_list_lock);
 
 /*
-- 
cgit v1.2.3


From 0e3cf7e9164048b79e7375bd66c9ef350e5b5bd7 Mon Sep 17 00:00:00 2001
From: Denis Cheng <crquan@gmail.com>
Date: Fri, 7 Dec 2007 00:50:43 -0800
Subject: [X25]: use LIST_HEAD instead of LIST_HEAD_INIT

single list_head variable initialized with LIST_HEAD_INIT could almost
always can be replaced with LIST_HEAD declaration, this shrinks the code
and looks better.

Signed-off-by: Denis Cheng <crquan@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/x25/x25_forward.c | 2 +-
 net/x25/x25_link.c    | 2 +-
 net/x25/x25_route.c   | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/x25/x25_forward.c b/net/x25/x25_forward.c
index 34478035e05..056a55f3a87 100644
--- a/net/x25/x25_forward.c
+++ b/net/x25/x25_forward.c
@@ -12,7 +12,7 @@
 #include <linux/init.h>
 #include <net/x25.h>
 
-struct list_head x25_forward_list = LIST_HEAD_INIT(x25_forward_list);
+LIST_HEAD(x25_forward_list);
 DEFINE_RWLOCK(x25_forward_list_lock);
 
 int x25_forward_call(struct x25_address *dest_addr, struct x25_neigh *from,
diff --git a/net/x25/x25_link.c b/net/x25/x25_link.c
index 753f2b64abe..e4e1b6e4953 100644
--- a/net/x25/x25_link.c
+++ b/net/x25/x25_link.c
@@ -30,7 +30,7 @@
 #include <linux/init.h>
 #include <net/x25.h>
 
-static struct list_head x25_neigh_list = LIST_HEAD_INIT(x25_neigh_list);
+static LIST_HEAD(x25_neigh_list);
 static DEFINE_RWLOCK(x25_neigh_list_lock);
 
 static void x25_t20timer_expiry(unsigned long);
diff --git a/net/x25/x25_route.c b/net/x25/x25_route.c
index 86b5b4da097..2c999ccf504 100644
--- a/net/x25/x25_route.c
+++ b/net/x25/x25_route.c
@@ -21,7 +21,7 @@
 #include <linux/init.h>
 #include <net/x25.h>
 
-struct list_head x25_route_list = LIST_HEAD_INIT(x25_route_list);
+LIST_HEAD(x25_route_list);
 DEFINE_RWLOCK(x25_route_list_lock);
 
 /*
-- 
cgit v1.2.3


From df01812eba19834e48abd43246abedfbc4feeb7e Mon Sep 17 00:00:00 2001
From: Denis Cheng <crquan@gmail.com>
Date: Fri, 7 Dec 2007 00:51:11 -0800
Subject: [XFRM] net/xfrm/xfrm_state.c: use LIST_HEAD instead of LIST_HEAD_INIT

single list_head variable initialized with LIST_HEAD_INIT could almost
always can be replaced with LIST_HEAD declaration, this shrinks the code
and looks better.

Signed-off-by: Denis Cheng <crquan@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/xfrm/xfrm_state.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c
index 51866b7fab3..b2343d48fe9 100644
--- a/net/xfrm/xfrm_state.c
+++ b/net/xfrm/xfrm_state.c
@@ -1654,7 +1654,7 @@ void xfrm_replay_advance(struct xfrm_state *x, __be32 net_seq)
 }
 EXPORT_SYMBOL(xfrm_replay_advance);
 
-static struct list_head xfrm_km_list = LIST_HEAD_INIT(xfrm_km_list);
+static LIST_HEAD(xfrm_km_list);
 static DEFINE_RWLOCK(xfrm_km_lock);
 
 void km_policy_notify(struct xfrm_policy *xp, int dir, struct km_event *c)
-- 
cgit v1.2.3


From b5e78337b50c0f3adda7faadb92f62bbdd5ffb56 Mon Sep 17 00:00:00 2001
From: Denis Cheng <crquan@gmail.com>
Date: Fri, 7 Dec 2007 00:51:45 -0800
Subject: [IUCV]: use LIST_HEAD instead of LIST_HEAD_INIT

these three list_head are all local variables, but can also use
LIST_HEAD.

Signed-off-by: Denis Cheng <crquan@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/iucv/iucv.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/iucv/iucv.c b/net/iucv/iucv.c
index 7698f6c459d..f13fe8821cb 100644
--- a/net/iucv/iucv.c
+++ b/net/iucv/iucv.c
@@ -1492,7 +1492,7 @@ static void iucv_tasklet_fn(unsigned long ignored)
 		[0x08] = iucv_message_pending,
 		[0x09] = iucv_message_pending,
 	};
-	struct list_head task_queue = LIST_HEAD_INIT(task_queue);
+	LIST_HEAD(task_queue);
 	struct iucv_irq_list *p, *n;
 
 	/* Serialize tasklet, iucv_path_sever and iucv_path_connect. */
@@ -1526,7 +1526,7 @@ static void iucv_tasklet_fn(unsigned long ignored)
 static void iucv_work_fn(struct work_struct *work)
 {
 	typedef void iucv_irq_fn(struct iucv_irq_data *);
-	struct list_head work_queue = LIST_HEAD_INIT(work_queue);
+	LIST_HEAD(work_queue);
 	struct iucv_irq_list *p, *n;
 
 	/* Serialize tasklet, iucv_path_sever and iucv_path_connect. */
-- 
cgit v1.2.3


From 024626e36d75fc8c6e32d50d4c68bfc3b8df5fdf Mon Sep 17 00:00:00 2001
From: Pavel Emelyanov <xemul@openvz.org>
Date: Sat, 8 Dec 2007 00:09:24 -0800
Subject: [NET] sysctl: make the sys.net.core sysctls per-namespace

Making them per-namespace is required for the following
two reasons:

 First, some ctl values have a per-namespace meaning.
 Second, making them writable from the sub-namespace
 is an isolation hole.

So I introduce the pernet operations to create these
tables. For init_net I use the existing statically
declared tables, for sub-namespace they are duplicated
and the write bits are removed from the mode.

Signed-off-by: Pavel Emelyanov <xemul@openvz.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/sysctl_net_core.c | 50 +++++++++++++++++++++++++++++++++++++++++-----
 1 file changed, 45 insertions(+), 5 deletions(-)

(limited to 'net')

diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c
index e322713e590..57a7eadb855 100644
--- a/net/core/sysctl_net_core.c
+++ b/net/core/sysctl_net_core.c
@@ -151,18 +151,58 @@ static struct ctl_table net_core_table[] = {
 	{ .ctl_name = 0 }
 };
 
-static __initdata struct ctl_path net_core_path[] = {
+static __net_initdata struct ctl_path net_core_path[] = {
 	{ .procname = "net", .ctl_name = CTL_NET, },
 	{ .procname = "core", .ctl_name = NET_CORE, },
 	{ },
 };
 
-static __init int sysctl_core_init(void)
+static __net_init int sysctl_core_net_init(struct net *net)
 {
-	struct ctl_table_header *hdr;
+	struct ctl_table *tbl, *tmp;
+
+	tbl = net_core_table;
+	if (net != &init_net) {
+		tbl = kmemdup(tbl, sizeof(net_core_table), GFP_KERNEL);
+		if (tbl == NULL)
+			goto err_dup;
+
+		for (tmp = tbl; tmp->procname; tmp++)
+			tmp->mode &= ~0222;
+	}
+
+	net->sysctl_core_hdr = register_net_sysctl_table(net,
+			net_core_path, tbl);
+	if (net->sysctl_core_hdr == NULL)
+		goto err_reg;
 
-	hdr = register_sysctl_paths(net_core_path, net_core_table);
-	return hdr == NULL ? -ENOMEM : 0;
+	return 0;
+
+err_reg:
+	if (tbl != net_core_table)
+		kfree(tbl);
+err_dup:
+	return -ENOMEM;
+}
+
+static __net_exit void sysctl_core_net_exit(struct net *net)
+{
+	struct ctl_table *tbl;
+
+	tbl = net->sysctl_core_hdr->ctl_table_arg;
+	unregister_net_sysctl_table(net->sysctl_core_hdr);
+	BUG_ON(tbl == net_core_table);
+	kfree(tbl);
+}
+
+static __net_initdata struct pernet_operations sysctl_core_ops = {
+	.init = sysctl_core_net_init,
+	.exit = sysctl_core_net_exit,
+};
+
+static __init int sysctl_core_init(void)
+{
+	return register_pernet_subsys(&sysctl_core_ops);
 }
 
 __initcall(sysctl_core_init);
-- 
cgit v1.2.3


From 790a35328991b01181ff5624bdb084053b6fac54 Mon Sep 17 00:00:00 2001
From: Pavel Emelyanov <xemul@openvz.org>
Date: Sat, 8 Dec 2007 00:11:51 -0800
Subject: [NET] sysctl: prepare core tables to point to netns variables

Some of ctl variables are going to be on the struct
net. Here's the way to adjust the ->data pointer on the
ctl_table-s to point on the right variable.

Since some pointers still point on the global variables,
I keep turning the write bits off on such tables.

This looks to become a common procedure for net sysctls,
so later parts of this code may migrate to some more
generic place.

Signed-off-by: Pavel Emelyanov <xemul@openvz.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/sysctl_net_core.c | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c
index 57a7eadb855..dc4cf7dda9d 100644
--- a/net/core/sysctl_net_core.c
+++ b/net/core/sysctl_net_core.c
@@ -167,8 +167,13 @@ static __net_init int sysctl_core_net_init(struct net *net)
 		if (tbl == NULL)
 			goto err_dup;
 
-		for (tmp = tbl; tmp->procname; tmp++)
-			tmp->mode &= ~0222;
+		for (tmp = tbl; tmp->procname; tmp++) {
+			if (tmp->data >= (void *)&init_net &&
+					tmp->data < (void *)(&init_net + 1))
+				tmp->data += (char *)net - (char *)&init_net;
+			else
+				tmp->mode &= ~0222;
+		}
 	}
 
 	net->sysctl_core_hdr = register_net_sysctl_table(net,
-- 
cgit v1.2.3


From b8e1f9b5c37e77cc8f978a58859b35fe5edd5542 Mon Sep 17 00:00:00 2001
From: Pavel Emelyanov <xemul@openvz.org>
Date: Sat, 8 Dec 2007 00:12:33 -0800
Subject: [NET] sysctl: make sysctl_somaxconn per-namespace

Just move the variable on the struct net and adjust
its usage.

Others sysctls from sys.net.core table are more
difficult to virtualize (i.e. make them per-namespace),
but I'll look at them as well a bit later.

Signed-off-by: Pavel Emelyanov <xemul@oenvz.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/sysctl_net_core.c | 4 +++-
 net/socket.c               | 8 ++++----
 2 files changed, 7 insertions(+), 5 deletions(-)

(limited to 'net')

diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c
index dc4cf7dda9d..130338f83ae 100644
--- a/net/core/sysctl_net_core.c
+++ b/net/core/sysctl_net_core.c
@@ -127,7 +127,7 @@ static struct ctl_table net_core_table[] = {
 	{
 		.ctl_name	= NET_CORE_SOMAXCONN,
 		.procname	= "somaxconn",
-		.data		= &sysctl_somaxconn,
+		.data		= &init_net.sysctl_somaxconn,
 		.maxlen		= sizeof(int),
 		.mode		= 0644,
 		.proc_handler	= &proc_dointvec
@@ -161,6 +161,8 @@ static __net_init int sysctl_core_net_init(struct net *net)
 {
 	struct ctl_table *tbl, *tmp;
 
+	net->sysctl_somaxconn = SOMAXCONN;
+
 	tbl = net_core_table;
 	if (net != &init_net) {
 		tbl = kmemdup(tbl, sizeof(net_core_table), GFP_KERNEL);
diff --git a/net/socket.c b/net/socket.c
index 9ebca5c695d..7651de00850 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -1365,17 +1365,17 @@ asmlinkage long sys_bind(int fd, struct sockaddr __user *umyaddr, int addrlen)
  *	ready for listening.
  */
 
-int sysctl_somaxconn __read_mostly = SOMAXCONN;
-
 asmlinkage long sys_listen(int fd, int backlog)
 {
 	struct socket *sock;
 	int err, fput_needed;
+	int somaxconn;
 
 	sock = sockfd_lookup_light(fd, &err, &fput_needed);
 	if (sock) {
-		if ((unsigned)backlog > sysctl_somaxconn)
-			backlog = sysctl_somaxconn;
+		somaxconn = sock->sk->sk_net->sysctl_somaxconn;
+		if ((unsigned)backlog > somaxconn)
+			backlog = somaxconn;
 
 		err = security_socket_listen(sock, backlog);
 		if (!err)
-- 
cgit v1.2.3


From 75314fb38364c81a573cd222f74d792409a7afba Mon Sep 17 00:00:00 2001
From: Daniel Lezcano <dlezcano@fr.ibm.com>
Date: Sat, 8 Dec 2007 00:13:32 -0800
Subject: [IPV6]: create route6 proc init-fini functions

Make the proc creation/destruction to be a separate function. That
allows to remove the #ifdef CONFIG_PROC_FS in the init/fini function
and make them more readable.

Signed-off-by: Daniel Lezcano <dlezcano@fr.ibm.com>
Acked-by: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/route.c | 58 ++++++++++++++++++++++++++++++++++++++------------------
 1 file changed, 40 insertions(+), 18 deletions(-)

(limited to 'net')

diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 6f833cacfcf..dbdae143ef5 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -2358,6 +2358,40 @@ static const struct file_operations rt6_stats_seq_fops = {
 	.llseek	 = seq_lseek,
 	.release = single_release,
 };
+
+static int ipv6_route_proc_init(struct net *net)
+{
+	int ret = -ENOMEM;
+	if (!proc_net_fops_create(net, "ipv6_route",
+				  0, &ipv6_route_proc_fops))
+		goto out;
+
+	if (!proc_net_fops_create(net, "rt6_stats",
+				  S_IRUGO, &rt6_stats_seq_fops))
+		goto out_ipv6_route;
+
+	ret = 0;
+out:
+	return ret;
+out_ipv6_route:
+	proc_net_remove(net, "ipv6_route");
+	goto out;
+}
+
+static void ipv6_route_proc_fini(struct net *net)
+{
+	proc_net_remove(net, "ipv6_route");
+	proc_net_remove(net, "rt6_stats");
+}
+#else
+static inline int ipv6_route_proc_init(struct net *net)
+{
+	return 0;
+}
+static inline void ipv6_route_proc_fini(struct net *net)
+{
+	return ;
+}
 #endif	/* CONFIG_PROC_FS */
 
 #ifdef CONFIG_SYSCTL
@@ -2484,21 +2518,14 @@ int __init ip6_route_init(void)
 	if (ret)
 		goto out_kmem_cache;
 
-#ifdef CONFIG_PROC_FS
-	ret = -ENOMEM;
-	if (!proc_net_fops_create(&init_net, "ipv6_route",
-				  0, &ipv6_route_proc_fops))
+	ret = ipv6_route_proc_init(&init_net);
+	if (ret)
 		goto out_fib6_init;
 
-	if (!proc_net_fops_create(&init_net, "rt6_stats",
-				  S_IRUGO, &rt6_stats_seq_fops))
-		goto out_proc_ipv6_route;
-#endif
-
 #ifdef CONFIG_XFRM
 	ret = xfrm6_init();
 	if (ret)
-		goto out_proc_rt6_stats;
+		goto out_proc_init;
 #endif
 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
 	ret = fib6_rules_init();
@@ -2522,14 +2549,10 @@ xfrm6_init:
 #endif
 #ifdef CONFIG_XFRM
 	xfrm6_fini();
-out_proc_rt6_stats:
 #endif
-#ifdef CONFIG_PROC_FS
-	proc_net_remove(&init_net, "rt6_stats");
-out_proc_ipv6_route:
-	proc_net_remove(&init_net, "ipv6_route");
+out_proc_init:
+	ipv6_route_proc_fini(&init_net);
 out_fib6_init:
-#endif
 	rt6_ifdown(NULL);
 	fib6_gc_cleanup();
 out_kmem_cache:
@@ -2542,8 +2565,7 @@ void ip6_route_cleanup(void)
 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
 	fib6_rules_cleanup();
 #endif
-	proc_net_remove(&init_net, "ipv6_route");
-	proc_net_remove(&init_net, "rt6_stats");
+	ipv6_route_proc_fini(&init_net);
 #ifdef CONFIG_XFRM
 	xfrm6_fini();
 #endif
-- 
cgit v1.2.3


From c35b7e72cd48bc7163b6900fb3689fa54b572bba Mon Sep 17 00:00:00 2001
From: Daniel Lezcano <dlezcano@fr.ibm.com>
Date: Sat, 8 Dec 2007 00:14:11 -0800
Subject: [IPV6]: remove ifdef in route6 for xfrm6

The following patch create the usual static inline functions to disable
the xfrm6_init and xfrm6_fini function when XFRM is off.
That's allow to remove some ifdef and make the code a little more clear.

Signed-off-by: Daniel Lezcano <dlezcano@fr.ibm.com>
Acked-by: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/route.c | 7 +------
 1 file changed, 1 insertion(+), 6 deletions(-)

(limited to 'net')

diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index dbdae143ef5..c4e890abb2d 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -2522,11 +2522,10 @@ int __init ip6_route_init(void)
 	if (ret)
 		goto out_fib6_init;
 
-#ifdef CONFIG_XFRM
 	ret = xfrm6_init();
 	if (ret)
 		goto out_proc_init;
-#endif
+
 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
 	ret = fib6_rules_init();
 	if (ret)
@@ -2547,9 +2546,7 @@ fib6_rules_init:
 	fib6_rules_cleanup();
 xfrm6_init:
 #endif
-#ifdef CONFIG_XFRM
 	xfrm6_fini();
-#endif
 out_proc_init:
 	ipv6_route_proc_fini(&init_net);
 out_fib6_init:
@@ -2566,9 +2563,7 @@ void ip6_route_cleanup(void)
 	fib6_rules_cleanup();
 #endif
 	ipv6_route_proc_fini(&init_net);
-#ifdef CONFIG_XFRM
 	xfrm6_fini();
-#endif
 	rt6_ifdown(NULL);
 	fib6_gc_cleanup();
 	kmem_cache_destroy(ip6_dst_ops.kmem_cachep);
-- 
cgit v1.2.3


From 7e5449c21562f1554d2c355db1ec9d3e4f434288 Mon Sep 17 00:00:00 2001
From: Daniel Lezcano <dlezcano@fr.ibm.com>
Date: Sat, 8 Dec 2007 00:14:54 -0800
Subject: [IPV6]: route6 remove ifdef for fib_rules

The patch defines the usual static inline functions when the code is
disabled for fib6_rules. That's allow to remove some ifdef in route.c
file and make the code a little more clear.

Signed-off-by: Daniel Lezcano <dlezcano@fr.ibm.com>
Acked-by: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/route.c | 7 +------
 1 file changed, 1 insertion(+), 6 deletions(-)

(limited to 'net')

diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index c4e890abb2d..11ef456d67c 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -2526,11 +2526,10 @@ int __init ip6_route_init(void)
 	if (ret)
 		goto out_proc_init;
 
-#ifdef CONFIG_IPV6_MULTIPLE_TABLES
 	ret = fib6_rules_init();
 	if (ret)
 		goto xfrm6_init;
-#endif
+
 	ret = -ENOBUFS;
 	if (__rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL) ||
 	    __rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL) ||
@@ -2542,10 +2541,8 @@ out:
 	return ret;
 
 fib6_rules_init:
-#ifdef CONFIG_IPV6_MULTIPLE_TABLES
 	fib6_rules_cleanup();
 xfrm6_init:
-#endif
 	xfrm6_fini();
 out_proc_init:
 	ipv6_route_proc_fini(&init_net);
@@ -2559,9 +2556,7 @@ out_kmem_cache:
 
 void ip6_route_cleanup(void)
 {
-#ifdef CONFIG_IPV6_MULTIPLE_TABLES
 	fib6_rules_cleanup();
-#endif
 	ipv6_route_proc_fini(&init_net);
 	xfrm6_fini();
 	rt6_ifdown(NULL);
-- 
cgit v1.2.3


From c17860a039bbde134324ad6f9331500635f5799d Mon Sep 17 00:00:00 2001
From: "Denis V. Lunev" <den@openvz.org>
Date: Sat, 8 Dec 2007 00:22:13 -0800
Subject: [IPV4]: no need pass pointer to a default into fib_detect_death

ipv4: no need pass pointer to a default into fib_detect_death

Signed-off-by: Denis V. Lunev <den@openvz.org>
Acked-by: Alexey Kuznetsov <kuznet@ms2.inr.ac.ru>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/fib_hash.c      | 4 ++--
 net/ipv4/fib_lookup.h    | 2 +-
 net/ipv4/fib_semantics.c | 6 +++---
 net/ipv4/fib_trie.c      | 4 ++--
 4 files changed, 8 insertions(+), 8 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/fib_hash.c b/net/ipv4/fib_hash.c
index 638185eb315..143a5213a18 100644
--- a/net/ipv4/fib_hash.c
+++ b/net/ipv4/fib_hash.c
@@ -314,7 +314,7 @@ fn_hash_select_default(struct fib_table *tb, const struct flowi *flp, struct fib
 				if (next_fi != res->fi)
 					break;
 			} else if (!fib_detect_death(fi, order, &last_resort,
-						     &last_idx, &fn_hash_last_dflt)) {
+						     &last_idx, fn_hash_last_dflt)) {
 				if (res->fi)
 					fib_info_put(res->fi);
 				res->fi = fi;
@@ -332,7 +332,7 @@ fn_hash_select_default(struct fib_table *tb, const struct flowi *flp, struct fib
 		goto out;
 	}
 
-	if (!fib_detect_death(fi, order, &last_resort, &last_idx, &fn_hash_last_dflt)) {
+	if (!fib_detect_death(fi, order, &last_resort, &last_idx, fn_hash_last_dflt)) {
 		if (res->fi)
 			fib_info_put(res->fi);
 		res->fi = fi;
diff --git a/net/ipv4/fib_lookup.h b/net/ipv4/fib_lookup.h
index eef9eec17e0..6c9dd4282db 100644
--- a/net/ipv4/fib_lookup.h
+++ b/net/ipv4/fib_lookup.h
@@ -36,6 +36,6 @@ extern struct fib_alias *fib_find_alias(struct list_head *fah,
 					u8 tos, u32 prio);
 extern int fib_detect_death(struct fib_info *fi, int order,
 			    struct fib_info **last_resort,
-			    int *last_idx, int *dflt);
+			    int *last_idx, int dflt);
 
 #endif /* _FIB_LOOKUP_H */
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index ec9b0dde9f9..bbd4a247b19 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -346,7 +346,7 @@ struct fib_alias *fib_find_alias(struct list_head *fah, u8 tos, u32 prio)
 }
 
 int fib_detect_death(struct fib_info *fi, int order,
-		     struct fib_info **last_resort, int *last_idx, int *dflt)
+		     struct fib_info **last_resort, int *last_idx, int dflt)
 {
 	struct neighbour *n;
 	int state = NUD_NONE;
@@ -358,10 +358,10 @@ int fib_detect_death(struct fib_info *fi, int order,
 	}
 	if (state==NUD_REACHABLE)
 		return 0;
-	if ((state&NUD_VALID) && order != *dflt)
+	if ((state&NUD_VALID) && order != dflt)
 		return 0;
 	if ((state&NUD_VALID) ||
-	    (*last_idx<0 && order > *dflt)) {
+	    (*last_idx<0 && order > dflt)) {
 		*last_resort = fi;
 		*last_idx = order;
 	}
diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index 13a80aa911d..d48a9bbcf54 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -1830,7 +1830,7 @@ fn_trie_select_default(struct fib_table *tb, const struct flowi *flp, struct fib
 			if (next_fi != res->fi)
 				break;
 		} else if (!fib_detect_death(fi, order, &last_resort,
-					     &last_idx, &trie_last_dflt)) {
+					     &last_idx, trie_last_dflt)) {
 			if (res->fi)
 				fib_info_put(res->fi);
 			res->fi = fi;
@@ -1846,7 +1846,7 @@ fn_trie_select_default(struct fib_table *tb, const struct flowi *flp, struct fib
 		goto out;
 	}
 
-	if (!fib_detect_death(fi, order, &last_resort, &last_idx, &trie_last_dflt)) {
+	if (!fib_detect_death(fi, order, &last_resort, &last_idx, trie_last_dflt)) {
 		if (res->fi)
 			fib_info_put(res->fi);
 		res->fi = fi;
-- 
cgit v1.2.3


From a2bbe6822f8928e254452765c07cb863633113b8 Mon Sep 17 00:00:00 2001
From: "Denis V. Lunev" <den@openvz.org>
Date: Sat, 8 Dec 2007 00:31:44 -0800
Subject: [IPV4]: Unify assignment of fi to fib_result

Signed-off-by: Denis V. Lunev <den@openvz.org>
Acked-by: Alexey Kuznetsov <kuznet@ms2.inr.ac.ru>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/fib_hash.c   | 19 ++++---------------
 net/ipv4/fib_lookup.h | 10 ++++++++++
 net/ipv4/fib_trie.c   | 19 ++++---------------
 3 files changed, 18 insertions(+), 30 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/fib_hash.c b/net/ipv4/fib_hash.c
index 143a5213a18..86087d45c64 100644
--- a/net/ipv4/fib_hash.c
+++ b/net/ipv4/fib_hash.c
@@ -315,10 +315,7 @@ fn_hash_select_default(struct fib_table *tb, const struct flowi *flp, struct fib
 					break;
 			} else if (!fib_detect_death(fi, order, &last_resort,
 						     &last_idx, fn_hash_last_dflt)) {
-				if (res->fi)
-					fib_info_put(res->fi);
-				res->fi = fi;
-				atomic_inc(&fi->fib_clntref);
+				fib_result_assign(res, fi);
 				fn_hash_last_dflt = order;
 				goto out;
 			}
@@ -333,21 +330,13 @@ fn_hash_select_default(struct fib_table *tb, const struct flowi *flp, struct fib
 	}
 
 	if (!fib_detect_death(fi, order, &last_resort, &last_idx, fn_hash_last_dflt)) {
-		if (res->fi)
-			fib_info_put(res->fi);
-		res->fi = fi;
-		atomic_inc(&fi->fib_clntref);
+		fib_result_assign(res, fi);
 		fn_hash_last_dflt = order;
 		goto out;
 	}
 
-	if (last_idx >= 0) {
-		if (res->fi)
-			fib_info_put(res->fi);
-		res->fi = last_resort;
-		if (last_resort)
-			atomic_inc(&last_resort->fib_clntref);
-	}
+	if (last_idx >= 0)
+		fib_result_assign(res, last_resort);
 	fn_hash_last_dflt = last_idx;
 out:
 	read_unlock(&fib_hash_lock);
diff --git a/net/ipv4/fib_lookup.h b/net/ipv4/fib_lookup.h
index 6c9dd4282db..26ee66d78c1 100644
--- a/net/ipv4/fib_lookup.h
+++ b/net/ipv4/fib_lookup.h
@@ -38,4 +38,14 @@ extern int fib_detect_death(struct fib_info *fi, int order,
 			    struct fib_info **last_resort,
 			    int *last_idx, int dflt);
 
+static inline void fib_result_assign(struct fib_result *res,
+				     struct fib_info *fi)
+{
+	if (res->fi != NULL)
+		fib_info_put(res->fi);
+	res->fi = fi;
+	if (fi != NULL)
+		atomic_inc(&fi->fib_clntref);
+}
+
 #endif /* _FIB_LOOKUP_H */
diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index d48a9bbcf54..c7c5c6c802d 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -1831,10 +1831,7 @@ fn_trie_select_default(struct fib_table *tb, const struct flowi *flp, struct fib
 				break;
 		} else if (!fib_detect_death(fi, order, &last_resort,
 					     &last_idx, trie_last_dflt)) {
-			if (res->fi)
-				fib_info_put(res->fi);
-			res->fi = fi;
-			atomic_inc(&fi->fib_clntref);
+			fib_result_assign(res, fi);
 			trie_last_dflt = order;
 			goto out;
 		}
@@ -1847,20 +1844,12 @@ fn_trie_select_default(struct fib_table *tb, const struct flowi *flp, struct fib
 	}
 
 	if (!fib_detect_death(fi, order, &last_resort, &last_idx, trie_last_dflt)) {
-		if (res->fi)
-			fib_info_put(res->fi);
-		res->fi = fi;
-		atomic_inc(&fi->fib_clntref);
+		fib_result_assign(res, fi);
 		trie_last_dflt = order;
 		goto out;
 	}
-	if (last_idx >= 0) {
-		if (res->fi)
-			fib_info_put(res->fi);
-		res->fi = last_resort;
-		if (last_resort)
-			atomic_inc(&last_resort->fib_clntref);
-	}
+	if (last_idx >= 0)
+		fib_result_assign(res, last_resort);
 	trie_last_dflt = last_idx;
  out:;
 	rcu_read_unlock();
-- 
cgit v1.2.3


From 971b893e79db0f7dccfcea15dbdebca3ca64a84d Mon Sep 17 00:00:00 2001
From: "Denis V. Lunev" <den@openvz.org>
Date: Sat, 8 Dec 2007 00:32:23 -0800
Subject: [IPV4]: last default route is a fib table property

Signed-off-by: Denis V. Lunev <den@openvz.org>
Acked-by: Alexey Kuznetsov <kuznet@ms2.inr.ac.ru>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/fib_hash.c | 16 ++++++++--------
 net/ipv4/fib_trie.c | 18 +++++++++---------
 2 files changed, 17 insertions(+), 17 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/fib_hash.c b/net/ipv4/fib_hash.c
index 86087d45c64..9d540415847 100644
--- a/net/ipv4/fib_hash.c
+++ b/net/ipv4/fib_hash.c
@@ -272,8 +272,6 @@ out:
 	return err;
 }
 
-static int fn_hash_last_dflt=-1;
-
 static void
 fn_hash_select_default(struct fib_table *tb, const struct flowi *flp, struct fib_result *res)
 {
@@ -314,9 +312,9 @@ fn_hash_select_default(struct fib_table *tb, const struct flowi *flp, struct fib
 				if (next_fi != res->fi)
 					break;
 			} else if (!fib_detect_death(fi, order, &last_resort,
-						     &last_idx, fn_hash_last_dflt)) {
+						&last_idx, tb->tb_default)) {
 				fib_result_assign(res, fi);
-				fn_hash_last_dflt = order;
+				tb->tb_default = order;
 				goto out;
 			}
 			fi = next_fi;
@@ -325,19 +323,20 @@ fn_hash_select_default(struct fib_table *tb, const struct flowi *flp, struct fib
 	}
 
 	if (order <= 0 || fi == NULL) {
-		fn_hash_last_dflt = -1;
+		tb->tb_default = -1;
 		goto out;
 	}
 
-	if (!fib_detect_death(fi, order, &last_resort, &last_idx, fn_hash_last_dflt)) {
+	if (!fib_detect_death(fi, order, &last_resort, &last_idx,
+				tb->tb_default)) {
 		fib_result_assign(res, fi);
-		fn_hash_last_dflt = order;
+		tb->tb_default = order;
 		goto out;
 	}
 
 	if (last_idx >= 0)
 		fib_result_assign(res, last_resort);
-	fn_hash_last_dflt = last_idx;
+	tb->tb_default = last_idx;
 out:
 	read_unlock(&fib_hash_lock);
 }
@@ -773,6 +772,7 @@ struct fib_table * __init fib_hash_init(u32 id)
 		return NULL;
 
 	tb->tb_id = id;
+	tb->tb_default = -1;
 	tb->tb_lookup = fn_hash_lookup;
 	tb->tb_insert = fn_hash_insert;
 	tb->tb_delete = fn_hash_delete;
diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index c7c5c6c802d..9126eea7167 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -1782,8 +1782,6 @@ static int fn_trie_flush(struct fib_table *tb)
 	return found;
 }
 
-static int trie_last_dflt = -1;
-
 static void
 fn_trie_select_default(struct fib_table *tb, const struct flowi *flp, struct fib_result *res)
 {
@@ -1830,28 +1828,29 @@ fn_trie_select_default(struct fib_table *tb, const struct flowi *flp, struct fib
 			if (next_fi != res->fi)
 				break;
 		} else if (!fib_detect_death(fi, order, &last_resort,
-					     &last_idx, trie_last_dflt)) {
+					     &last_idx, tb->tb_default)) {
 			fib_result_assign(res, fi);
-			trie_last_dflt = order;
+			tb->tb_default = order;
 			goto out;
 		}
 		fi = next_fi;
 		order++;
 	}
 	if (order <= 0 || fi == NULL) {
-		trie_last_dflt = -1;
+		tb->tb_default = -1;
 		goto out;
 	}
 
-	if (!fib_detect_death(fi, order, &last_resort, &last_idx, trie_last_dflt)) {
+	if (!fib_detect_death(fi, order, &last_resort, &last_idx,
+				tb->tb_default)) {
 		fib_result_assign(res, fi);
-		trie_last_dflt = order;
+		tb->tb_default = order;
 		goto out;
 	}
 	if (last_idx >= 0)
 		fib_result_assign(res, last_resort);
-	trie_last_dflt = last_idx;
- out:;
+	tb->tb_default = last_idx;
+out:
 	rcu_read_unlock();
 }
 
@@ -1978,6 +1977,7 @@ struct fib_table * __init fib_hash_init(u32 id)
 		return NULL;
 
 	tb->tb_id = id;
+	tb->tb_default = -1;
 	tb->tb_lookup = fn_trie_lookup;
 	tb->tb_insert = fn_trie_insert;
 	tb->tb_delete = fn_trie_delete;
-- 
cgit v1.2.3


From c69bce20dda7f79160856a338298d65a284ba303 Mon Sep 17 00:00:00 2001
From: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
Date: Wed, 23 Jan 2008 22:31:45 -0800
Subject: [NET]: Remove unused "mibalign" argument for snmp_mib_init().

With fixes from Arnaldo Carvalho de Melo.

Signed-off-by: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/af_inet.c  | 23 ++++++++---------------
 net/ipv6/addrconf.c |  9 +++------
 net/ipv6/af_inet6.c | 17 ++++++++---------
 3 files changed, 19 insertions(+), 30 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index 0e4b6eba234..7f8b27ff94f 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -1242,7 +1242,7 @@ unsigned long snmp_fold_field(void *mib[], int offt)
 }
 EXPORT_SYMBOL_GPL(snmp_fold_field);
 
-int snmp_mib_init(void *ptr[2], size_t mibsize, size_t mibalign)
+int snmp_mib_init(void *ptr[2], size_t mibsize)
 {
 	BUG_ON(ptr == NULL);
 	ptr[0] = __alloc_percpu(mibsize);
@@ -1296,32 +1296,25 @@ static struct net_protocol icmp_protocol = {
 static int __init init_ipv4_mibs(void)
 {
 	if (snmp_mib_init((void **)net_statistics,
-			  sizeof(struct linux_mib),
-			  __alignof__(struct linux_mib)) < 0)
+			  sizeof(struct linux_mib)) < 0)
 		goto err_net_mib;
 	if (snmp_mib_init((void **)ip_statistics,
-			  sizeof(struct ipstats_mib),
-			  __alignof__(struct ipstats_mib)) < 0)
+			  sizeof(struct ipstats_mib)) < 0)
 		goto err_ip_mib;
 	if (snmp_mib_init((void **)icmp_statistics,
-			  sizeof(struct icmp_mib),
-			  __alignof__(struct icmp_mib)) < 0)
+			  sizeof(struct icmp_mib)) < 0)
 		goto err_icmp_mib;
 	if (snmp_mib_init((void **)icmpmsg_statistics,
-			  sizeof(struct icmpmsg_mib),
-			  __alignof__(struct icmpmsg_mib)) < 0)
+			  sizeof(struct icmpmsg_mib)) < 0)
 		goto err_icmpmsg_mib;
 	if (snmp_mib_init((void **)tcp_statistics,
-			  sizeof(struct tcp_mib),
-			  __alignof__(struct tcp_mib)) < 0)
+			  sizeof(struct tcp_mib)) < 0)
 		goto err_tcp_mib;
 	if (snmp_mib_init((void **)udp_statistics,
-			  sizeof(struct udp_mib),
-			  __alignof__(struct udp_mib)) < 0)
+			  sizeof(struct udp_mib)) < 0)
 		goto err_udp_mib;
 	if (snmp_mib_init((void **)udplite_statistics,
-			  sizeof(struct udp_mib),
-			  __alignof__(struct udp_mib)) < 0)
+			  sizeof(struct udp_mib)) < 0)
 		goto err_udplite_mib;
 
 	tcp_mib_init();
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 95cf3aa41c9..ba6f7925c17 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -256,16 +256,13 @@ static void addrconf_mod_timer(struct inet6_ifaddr *ifp,
 static int snmp6_alloc_dev(struct inet6_dev *idev)
 {
 	if (snmp_mib_init((void **)idev->stats.ipv6,
-			  sizeof(struct ipstats_mib),
-			  __alignof__(struct ipstats_mib)) < 0)
+			  sizeof(struct ipstats_mib)) < 0)
 		goto err_ip;
 	if (snmp_mib_init((void **)idev->stats.icmpv6,
-			  sizeof(struct icmpv6_mib),
-			  __alignof__(struct icmpv6_mib)) < 0)
+			  sizeof(struct icmpv6_mib)) < 0)
 		goto err_icmp;
 	if (snmp_mib_init((void **)idev->stats.icmpv6msg,
-			  sizeof(struct icmpv6msg_mib),
-			  __alignof__(struct icmpv6msg_mib)) < 0)
+			  sizeof(struct icmpv6msg_mib)) < 0)
 		goto err_icmpmsg;
 
 	return 0;
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index 5ab8ba7a586..90d2f723fc4 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -714,20 +714,19 @@ EXPORT_SYMBOL_GPL(ipv6_opt_accepted);
 
 static int __init init_ipv6_mibs(void)
 {
-	if (snmp_mib_init((void **)ipv6_statistics, sizeof (struct ipstats_mib),
-			  __alignof__(struct ipstats_mib)) < 0)
+	if (snmp_mib_init((void **)ipv6_statistics,
+			  sizeof(struct ipstats_mib)) < 0)
 		goto err_ip_mib;
-	if (snmp_mib_init((void **)icmpv6_statistics, sizeof (struct icmpv6_mib),
-			  __alignof__(struct icmpv6_mib)) < 0)
+	if (snmp_mib_init((void **)icmpv6_statistics,
+			  sizeof(struct icmpv6_mib)) < 0)
 		goto err_icmp_mib;
 	if (snmp_mib_init((void **)icmpv6msg_statistics,
-	    sizeof (struct icmpv6msg_mib), __alignof__(struct icmpv6_mib)) < 0)
+			  sizeof(struct icmpv6msg_mib)) < 0)
 		goto err_icmpmsg_mib;
-	if (snmp_mib_init((void **)udp_stats_in6, sizeof (struct udp_mib),
-			  __alignof__(struct udp_mib)) < 0)
+	if (snmp_mib_init((void **)udp_stats_in6, sizeof (struct udp_mib)) < 0)
 		goto err_udp_mib;
-	if (snmp_mib_init((void **)udplite_stats_in6, sizeof (struct udp_mib),
-			  __alignof__(struct udp_mib)) < 0)
+	if (snmp_mib_init((void **)udplite_stats_in6,
+			  sizeof (struct udp_mib)) < 0)
 		goto err_udplite_mib;
 	return 0;
 
-- 
cgit v1.2.3


From 78282d2af598a1840934e2049a5c196885647f6a Mon Sep 17 00:00:00 2001
From: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Date: Sat, 8 Dec 2007 15:08:08 -0200
Subject: [TFRC]: Move comment.

Moved up the comment "Receiver routines" above the first occurrence of
RX history routines.

Signed-off-by: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/dccp/ccids/lib/packet_history.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

(limited to 'net')

diff --git a/net/dccp/ccids/lib/packet_history.c b/net/dccp/ccids/lib/packet_history.c
index 4eddb2da833..54cd23e7c27 100644
--- a/net/dccp/ccids/lib/packet_history.c
+++ b/net/dccp/ccids/lib/packet_history.c
@@ -114,6 +114,11 @@ u32 tfrc_tx_hist_rtt(struct tfrc_tx_hist_entry *head, const u64 seqno,
 EXPORT_SYMBOL_GPL(tfrc_tx_hist_rtt);
 
 
+/*
+ * 	Receiver History Routines
+ */
+static struct kmem_cache *tfrc_rx_hist_slab;
+
 /**
  * tfrc_rx_hist_index - index to reach n-th entry after loss_start
  */
@@ -131,11 +136,6 @@ static inline struct tfrc_rx_hist_entry *
 	return h->ring[tfrc_rx_hist_index(h, h->loss_count)];
 }
 
-/*
- * 	Receiver History Routines
- */
-static struct kmem_cache *tfrc_rx_hist_slab;
-
 void tfrc_rx_hist_add_packet(struct tfrc_rx_hist *h,
 			     const struct sk_buff *skb,
 			     const u32 ndp)
-- 
cgit v1.2.3


From 797eba424d4332f6aff5b741600b61e3d4b3d3f2 Mon Sep 17 00:00:00 2001
From: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Date: Sat, 8 Dec 2007 16:08:41 -0200
Subject: [TFRC]: The function tfrc_rx_hist_entry_delete() is not used anymore

Signed-off-by: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/dccp/ccids/lib/packet_history.c | 5 -----
 1 file changed, 5 deletions(-)

(limited to 'net')

diff --git a/net/dccp/ccids/lib/packet_history.c b/net/dccp/ccids/lib/packet_history.c
index 54cd23e7c27..af44082f56e 100644
--- a/net/dccp/ccids/lib/packet_history.c
+++ b/net/dccp/ccids/lib/packet_history.c
@@ -151,11 +151,6 @@ void tfrc_rx_hist_add_packet(struct tfrc_rx_hist *h,
 }
 EXPORT_SYMBOL_GPL(tfrc_rx_hist_add_packet);
 
-static inline void tfrc_rx_hist_entry_delete(struct tfrc_rx_hist_entry *entry)
-{
-	kmem_cache_free(tfrc_rx_hist_slab, entry);
-}
-
 /**
  * tfrc_rx_hist_entry - return the n-th history entry after loss_start
  */
-- 
cgit v1.2.3


From 385ac2e3f226c09cb71733df1899658e33a7850f Mon Sep 17 00:00:00 2001
From: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Date: Sat, 8 Dec 2007 16:26:59 -0200
Subject: [CCID3]: HC-receiver should not insert timestamps as HC-sender
 doesn't uses it

Signed-off-by: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/dccp/ccids/ccid3.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/dccp/ccids/ccid3.c b/net/dccp/ccids/ccid3.c
index bf95c3292d5..f8644bfd88f 100644
--- a/net/dccp/ccids/ccid3.c
+++ b/net/dccp/ccids/ccid3.c
@@ -750,8 +750,7 @@ static int ccid3_hc_rx_insert_options(struct sock *sk, struct sk_buff *skb)
 	x_recv = htonl(hcrx->ccid3hcrx_x_recv);
 	pinv   = htonl(hcrx->ccid3hcrx_pinv);
 
-	if (dccp_insert_option_timestamp(sk, skb) ||
-	    dccp_insert_option(sk, skb, TFRC_OPT_LOSS_EVENT_RATE,
+	if (dccp_insert_option(sk, skb, TFRC_OPT_LOSS_EVENT_RATE,
 			       &pinv, sizeof(pinv)) ||
 	    dccp_insert_option(sk, skb, TFRC_OPT_RECEIVE_RATE,
 			       &x_recv, sizeof(x_recv)))
-- 
cgit v1.2.3


From 005011211f559113686938c2c252b8ee1ab855b5 Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Tue, 11 Dec 2007 01:53:43 -0800
Subject: [IPSEC]: Add xfrm_input_state helper

This patch adds the xfrm_input_state helper function which returns the
current xfrm state being processed on the input path given an sk_buff.
This is currently only used by xfrm_input but will be used by ESP upon
asynchronous resumption.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/xfrm/xfrm_input.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/xfrm/xfrm_input.c b/net/xfrm/xfrm_input.c
index 8b2b1b59133..8624cbdb2a1 100644
--- a/net/xfrm/xfrm_input.c
+++ b/net/xfrm/xfrm_input.c
@@ -109,7 +109,7 @@ int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type)
 	/* A negative encap_type indicates async resumption. */
 	if (encap_type < 0) {
 		async = 1;
-		x = skb->sp->xvec[skb->sp->len - 1];
+		x = xfrm_input_state(skb);
 		seq = XFRM_SKB_CB(skb)->seq;
 		goto resume;
 	}
-- 
cgit v1.2.3


From 64b7d96167977850f4a24e52dd0a76b03c6542cf Mon Sep 17 00:00:00 2001
From: Eric Dumazet <dada1@cosmosbay.com>
Date: Tue, 11 Dec 2007 02:00:30 -0800
Subject: [NET]: dst_ifdown() cleanup

This cleanup shrinks size of net/core/dst.o on i386 from 1299 to 1289 bytes.
(This is because dev_hold()/dev_put() are doing atomic_inc()/atomic_dec() and
force compiler to re-evaluate memory contents.)

Signed-off-by: Eric Dumazet <dada1@cosmosbay.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/dst.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/core/dst.c b/net/core/dst.c
index 5c6cfc4e7fd..7eceebaabaa 100644
--- a/net/core/dst.c
+++ b/net/core/dst.c
@@ -284,8 +284,8 @@ static inline void dst_ifdown(struct dst_entry *dst, struct net_device *dev,
 		dev_put(dev);
 		if (dst->neighbour && dst->neighbour->dev == dev) {
 			dst->neighbour->dev = dst->dev;
+			dev_hold(dst->dev);
 			dev_put(dev);
-			dev_hold(dst->neighbour->dev);
 		}
 	}
 }
-- 
cgit v1.2.3


From ea72912c888661d1a847f1b1450643d4114097cf Mon Sep 17 00:00:00 2001
From: Eric Dumazet <dada1@cosmosbay.com>
Date: Tue, 11 Dec 2007 02:09:47 -0800
Subject: [NETLINK]: kzalloc() conversion

nl_pid_hash_alloc() is renamed to nl_pid_hash_zalloc().
It is now returning zeroed memory to its callers.

Signed-off-by: Eric Dumazet <dada1@cosmosbay.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/netlink/af_netlink.c | 13 ++++++-------
 1 file changed, 6 insertions(+), 7 deletions(-)

(limited to 'net')

diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index 2e02b19e455..dbd7cad1c9a 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -237,13 +237,14 @@ found:
 	return sk;
 }
 
-static inline struct hlist_head *nl_pid_hash_alloc(size_t size)
+static inline struct hlist_head *nl_pid_hash_zalloc(size_t size)
 {
 	if (size <= PAGE_SIZE)
-		return kmalloc(size, GFP_ATOMIC);
+		return kzalloc(size, GFP_ATOMIC);
 	else
 		return (struct hlist_head *)
-			__get_free_pages(GFP_ATOMIC, get_order(size));
+			__get_free_pages(GFP_ATOMIC | __GFP_ZERO,
+					 get_order(size));
 }
 
 static inline void nl_pid_hash_free(struct hlist_head *table, size_t size)
@@ -272,11 +273,10 @@ static int nl_pid_hash_rehash(struct nl_pid_hash *hash, int grow)
 		size *= 2;
 	}
 
-	table = nl_pid_hash_alloc(size);
+	table = nl_pid_hash_zalloc(size);
 	if (!table)
 		return 0;
 
-	memset(table, 0, size);
 	otable = hash->table;
 	hash->table = table;
 	hash->mask = mask;
@@ -1919,7 +1919,7 @@ static int __init netlink_proto_init(void)
 	for (i = 0; i < MAX_LINKS; i++) {
 		struct nl_pid_hash *hash = &nl_table[i].hash;
 
-		hash->table = nl_pid_hash_alloc(1 * sizeof(*hash->table));
+		hash->table = nl_pid_hash_zalloc(1 * sizeof(*hash->table));
 		if (!hash->table) {
 			while (i-- > 0)
 				nl_pid_hash_free(nl_table[i].hash.table,
@@ -1927,7 +1927,6 @@ static int __init netlink_proto_init(void)
 			kfree(nl_table);
 			goto panic;
 		}
-		memset(hash->table, 0, 1 * sizeof(*hash->table));
 		hash->max_shift = order;
 		hash->shift = 0;
 		hash->mask = 0;
-- 
cgit v1.2.3


From 1f9e636ea21bd648a5cbd2f744a1d39a5e183b20 Mon Sep 17 00:00:00 2001
From: Pavel Emelyanov <xemul@openvz.org>
Date: Tue, 11 Dec 2007 02:12:04 -0800
Subject: [TCP]: Use BUILD_BUG_ON for tcp_skb_cb size checking

The sizeof(struct tcp_skb_cb) should not be less than the
sizeof(skb->cb). This is checked in net/ipv4/tcp.c, but
this check can be made more gracefully.

Signed-off-by: Pavel Emelyanov <xemul@openvz.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp.c | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index eb77088f305..fdaf965a679 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -2563,7 +2563,6 @@ void tcp_done(struct sock *sk)
 }
 EXPORT_SYMBOL_GPL(tcp_done);
 
-extern void __skb_cb_too_small_for_tcp(int, int);
 extern struct tcp_congestion_ops tcp_reno;
 
 static __initdata unsigned long thash_entries;
@@ -2582,9 +2581,7 @@ void __init tcp_init(void)
 	unsigned long limit;
 	int order, i, max_share;
 
-	if (sizeof(struct tcp_skb_cb) > sizeof(skb->cb))
-		__skb_cb_too_small_for_tcp(sizeof(struct tcp_skb_cb),
-					   sizeof(skb->cb));
+	BUILD_BUG_ON(sizeof(struct tcp_skb_cb) > sizeof(skb->cb));
 
 	tcp_hashinfo.bind_bucket_cachep =
 		kmem_cache_create("tcp_bind_bucket",
-- 
cgit v1.2.3


From 95c9382a345262637d3e5c7da5f09f0f46daa930 Mon Sep 17 00:00:00 2001
From: Pavel Emelyanov <xemul@openvz.org>
Date: Tue, 11 Dec 2007 02:12:36 -0800
Subject: [INET]: Use BUILD_BUG_ON in inet_timewait_sock.c checks

Make the INET_TWDR_TWKILL_SLOTS vs sizeof(twdr->thread_slots)
check nicer.

Signed-off-by: Pavel Emelyanov <xemul@openvz.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/inet_timewait_sock.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/inet_timewait_sock.c b/net/ipv4/inet_timewait_sock.c
index a60b99e0ebd..d43e787031a 100644
--- a/net/ipv4/inet_timewait_sock.c
+++ b/net/ipv4/inet_timewait_sock.c
@@ -194,16 +194,14 @@ out:
 
 EXPORT_SYMBOL_GPL(inet_twdr_hangman);
 
-extern void twkill_slots_invalid(void);
-
 void inet_twdr_twkill_work(struct work_struct *work)
 {
 	struct inet_timewait_death_row *twdr =
 		container_of(work, struct inet_timewait_death_row, twkill_work);
 	int i;
 
-	if ((INET_TWDR_TWKILL_SLOTS - 1) > (sizeof(twdr->thread_slots) * 8))
-		twkill_slots_invalid();
+	BUILD_BUG_ON((INET_TWDR_TWKILL_SLOTS - 1) >
+			(sizeof(twdr->thread_slots) * 8));
 
 	while (twdr->thread_slots) {
 		spin_lock_bh(&twdr->death_lock);
-- 
cgit v1.2.3


From 51602b2a5e849babd94311e555bf1ba58151b8d4 Mon Sep 17 00:00:00 2001
From: Pavel Emelyanov <xemul@openvz.org>
Date: Tue, 11 Dec 2007 02:17:40 -0800
Subject: [IPV4]: Cleanup sysctl manipulations in devinet.c

This includes:

 * moving neigh_sysctl_(un)register calls inside
   devinet_sysctl_(un)register ones, as they are always
   called in pairs;
 * making __devinet_sysctl_unregister() to unregister
   the ipv4_devconf struct, while original devinet_sysctl_unregister()
   works with the in_device to handle both - devconf and
   neigh sysctls;
 * make stubs for CONFIG_SYSCTL=n case to get rid of
   in-code ifdefs.

Signed-off-by: Pavel Emelyanov <xemul@openvz.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/devinet.c | 59 +++++++++++++++++++++++++++---------------------------
 1 file changed, 29 insertions(+), 30 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index d1dc0150647..2dc2f7ed6bb 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -99,7 +99,14 @@ static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
 			 int destroy);
 #ifdef CONFIG_SYSCTL
 static void devinet_sysctl_register(struct in_device *idev);
-static void devinet_sysctl_unregister(struct ipv4_devconf *p);
+static void devinet_sysctl_unregister(struct in_device *idev);
+#else
+static inline void devinet_sysctl_register(struct in_device *idev)
+{
+}
+static inline void devinet_sysctl_unregister(struct in_device *idev)
+{
+}
 #endif
 
 /* Locks all the inet devices. */
@@ -163,17 +170,10 @@ static struct in_device *inetdev_init(struct net_device *dev)
 		goto out_kfree;
 	/* Reference in_dev->dev */
 	dev_hold(dev);
-#ifdef CONFIG_SYSCTL
-	neigh_sysctl_register(dev, in_dev->arp_parms, NET_IPV4,
-			      NET_IPV4_NEIGH, "ipv4", NULL, NULL);
-#endif
-
 	/* Account for reference dev->ip_ptr (below) */
 	in_dev_hold(in_dev);
 
-#ifdef CONFIG_SYSCTL
 	devinet_sysctl_register(in_dev);
-#endif
 	ip_mc_init_dev(in_dev);
 	if (dev->flags & IFF_UP)
 		ip_mc_up(in_dev);
@@ -212,15 +212,9 @@ static void inetdev_destroy(struct in_device *in_dev)
 		inet_free_ifa(ifa);
 	}
 
-#ifdef CONFIG_SYSCTL
-	devinet_sysctl_unregister(&in_dev->cnf);
-#endif
-
 	dev->ip_ptr = NULL;
 
-#ifdef CONFIG_SYSCTL
-	neigh_sysctl_unregister(in_dev->arp_parms);
-#endif
+	devinet_sysctl_unregister(in_dev);
 	neigh_parms_release(&arp_tbl, in_dev->arp_parms);
 	arp_ifdown(dev);
 
@@ -1113,13 +1107,8 @@ static int inetdev_event(struct notifier_block *this, unsigned long event,
 		 */
 		inetdev_changename(dev, in_dev);
 
-#ifdef CONFIG_SYSCTL
-		devinet_sysctl_unregister(&in_dev->cnf);
-		neigh_sysctl_unregister(in_dev->arp_parms);
-		neigh_sysctl_register(dev, in_dev->arp_parms, NET_IPV4,
-				      NET_IPV4_NEIGH, "ipv4", NULL, NULL);
+		devinet_sysctl_unregister(in_dev);
 		devinet_sysctl_register(in_dev);
-#endif
 		break;
 	}
 out:
@@ -1518,21 +1507,31 @@ out:
 	return;
 }
 
+static void __devinet_sysctl_unregister(struct ipv4_devconf *cnf)
+{
+	struct devinet_sysctl_table *t = cnf->sysctl;
+
+	if (t == NULL)
+		return;
+
+	cnf->sysctl = NULL;
+	unregister_sysctl_table(t->sysctl_header);
+	kfree(t->dev_name);
+	kfree(t);
+}
+
 static void devinet_sysctl_register(struct in_device *idev)
 {
-	return __devinet_sysctl_register(idev->dev->name, idev->dev->ifindex,
+	neigh_sysctl_register(idev->dev, idev->arp_parms, NET_IPV4,
+			NET_IPV4_NEIGH, "ipv4", NULL, NULL);
+	__devinet_sysctl_register(idev->dev->name, idev->dev->ifindex,
 			&idev->cnf);
 }
 
-static void devinet_sysctl_unregister(struct ipv4_devconf *p)
+static void devinet_sysctl_unregister(struct in_device *idev)
 {
-	if (p->sysctl) {
-		struct devinet_sysctl_table *t = p->sysctl;
-		p->sysctl = NULL;
-		unregister_sysctl_table(t->sysctl_header);
-		kfree(t->dev_name);
-		kfree(t);
-	}
+	__devinet_sysctl_unregister(&idev->cnf);
+	neigh_sysctl_unregister(idev->arp_parms);
 }
 #endif
 
-- 
cgit v1.2.3


From 0a3e78ac2c555441f5bc00588070058533bc8d6b Mon Sep 17 00:00:00 2001
From: Daniel Lezcano <dlezcano@fr.ibm.com>
Date: Tue, 11 Dec 2007 02:23:18 -0800
Subject: [IPV6]: make flowlabel to return an error

This patch makes the flowlab subsystem to return an error code and makes
some cleanup with procfs ifdefs.
The af_inet6 will use the flowlabel init return code to check the initialization
was correct.

Signed-off-by: Daniel Lezcano <dlezcano@fr.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/af_inet6.c      |  5 ++++-
 net/ipv6/ip6_flowlabel.c | 30 +++++++++++++++++++++++-------
 2 files changed, 27 insertions(+), 8 deletions(-)

(limited to 'net')

diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index 90d2f723fc4..614f3d905dd 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -851,7 +851,9 @@ static int __init inet6_init(void)
 	err = ip6_route_init();
 	if (err)
 		goto ip6_route_fail;
-	ip6_flowlabel_init();
+	err = ip6_flowlabel_init();
+	if (err)
+		goto ip6_flowlabel_fail;
 	err = addrconf_init();
 	if (err)
 		goto addrconf_fail;
@@ -874,6 +876,7 @@ out:
 
 addrconf_fail:
 	ip6_flowlabel_cleanup();
+ip6_flowlabel_fail:
 	ip6_route_cleanup();
 ip6_route_fail:
 #ifdef CONFIG_PROC_FS
diff --git a/net/ipv6/ip6_flowlabel.c b/net/ipv6/ip6_flowlabel.c
index b12cc22e774..d0babea8981 100644
--- a/net/ipv6/ip6_flowlabel.c
+++ b/net/ipv6/ip6_flowlabel.c
@@ -692,20 +692,36 @@ static const struct file_operations ip6fl_seq_fops = {
 	.llseek		=	seq_lseek,
 	.release	=	seq_release_private,
 };
-#endif
 
+static int ip6_flowlabel_proc_init(struct net *net)
+{
+	if (!proc_net_fops_create(net, "ip6_flowlabel", S_IRUGO, &ip6fl_seq_fops))
+		return -ENOMEM;
+	return 0;
+}
 
-void ip6_flowlabel_init(void)
+static void ip6_flowlabel_proc_fini(struct net *net)
 {
-#ifdef CONFIG_PROC_FS
-	proc_net_fops_create(&init_net, "ip6_flowlabel", S_IRUGO, &ip6fl_seq_fops);
+	proc_net_remove(net, "ip6_flowlabel");
+}
+#else
+static inline int ip6_flowlabel_proc_init(struct net *net)
+{
+	return 0;
+}
+static inline void ip6_flowlabel_proc_fini(struct net *net)
+{
+	return ;
+}
 #endif
+
+int ip6_flowlabel_init(void)
+{
+	return ip6_flowlabel_proc_init(&init_net);
 }
 
 void ip6_flowlabel_cleanup(void)
 {
 	del_timer(&ip6_fl_gc_timer);
-#ifdef CONFIG_PROC_FS
-	proc_net_remove(&init_net, "ip6_flowlabel");
-#endif
+	ip6_flowlabel_proc_fini(&init_net);
 }
-- 
cgit v1.2.3


From 248b238dc960a42aa235057ba0a51a98ae2b0f0d Mon Sep 17 00:00:00 2001
From: Daniel Lezcano <dlezcano@fr.ibm.com>
Date: Tue, 11 Dec 2007 02:23:54 -0800
Subject: [IPV6]: make extended headers to return an error at initialization

This patch factorize the code for the differents init functions for rthdr,
nodata, destopt in a single function exthdrs_init.
This function returns an error so the af_inet6 module can check correctly
the initialization.

Signed-off-by: Daniel Lezcano <dlezcano@fr.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/af_inet6.c | 10 ++++++---
 net/ipv6/exthdrs.c  | 64 ++++++++++++++++++++++++++++++++---------------------
 2 files changed, 46 insertions(+), 28 deletions(-)

(limited to 'net')

diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index 614f3d905dd..442c298c1d7 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -859,10 +859,11 @@ static int __init inet6_init(void)
 		goto addrconf_fail;
 
 	/* Init v6 extension headers. */
-	ipv6_rthdr_init();
+	err = ipv6_exthdrs_init();
+	if (err)
+		goto ipv6_exthdrs_fail;
+
 	ipv6_frag_init();
-	ipv6_nodata_init();
-	ipv6_destopt_init();
 
 	/* Init v6 transport protocols. */
 	udpv6_init();
@@ -874,6 +875,8 @@ static int __init inet6_init(void)
 out:
 	return err;
 
+ipv6_exthdrs_fail:
+	addrconf_cleanup();
 addrconf_fail:
 	ip6_flowlabel_cleanup();
 ip6_flowlabel_fail:
@@ -932,6 +935,7 @@ static void __exit inet6_exit(void)
 	/* Cleanup code parts. */
 	ipv6_packet_cleanup();
 
+	ipv6_exthdrs_exit();
 	addrconf_cleanup();
 	ip6_flowlabel_cleanup();
 	ip6_route_cleanup();
diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c
index cee06b1655c..2df34ed276f 100644
--- a/net/ipv6/exthdrs.c
+++ b/net/ipv6/exthdrs.c
@@ -308,28 +308,6 @@ static int ipv6_destopt_rcv(struct sk_buff *skb)
 	return -1;
 }
 
-static struct inet6_protocol destopt_protocol = {
-	.handler	=	ipv6_destopt_rcv,
-	.flags		=	INET6_PROTO_NOPOLICY | INET6_PROTO_GSO_EXTHDR,
-};
-
-void __init ipv6_destopt_init(void)
-{
-	if (inet6_add_protocol(&destopt_protocol, IPPROTO_DSTOPTS) < 0)
-		printk(KERN_ERR "ipv6_destopt_init: Could not register protocol\n");
-}
-
-static struct inet6_protocol nodata_protocol = {
-	.handler	=	dst_discard,
-	.flags		=	INET6_PROTO_NOPOLICY,
-};
-
-void __init ipv6_nodata_init(void)
-{
-	if (inet6_add_protocol(&nodata_protocol, IPPROTO_NONE) < 0)
-		printk(KERN_ERR "ipv6_nodata_init: Could not register protocol\n");
-}
-
 /********************************
   Routing header.
  ********************************/
@@ -527,12 +505,48 @@ static struct inet6_protocol rthdr_protocol = {
 	.flags		=	INET6_PROTO_NOPOLICY | INET6_PROTO_GSO_EXTHDR,
 };
 
-void __init ipv6_rthdr_init(void)
+static struct inet6_protocol destopt_protocol = {
+	.handler	=	ipv6_destopt_rcv,
+	.flags		=	INET6_PROTO_NOPOLICY | INET6_PROTO_GSO_EXTHDR,
+};
+
+static struct inet6_protocol nodata_protocol = {
+	.handler	=	dst_discard,
+	.flags		=	INET6_PROTO_NOPOLICY,
+};
+
+int __init ipv6_exthdrs_init(void)
 {
-	if (inet6_add_protocol(&rthdr_protocol, IPPROTO_ROUTING) < 0)
-		printk(KERN_ERR "ipv6_rthdr_init: Could not register protocol\n");
+	int ret;
+
+	ret = inet6_add_protocol(&rthdr_protocol, IPPROTO_ROUTING);
+	if (ret)
+		goto out;
+
+	ret = inet6_add_protocol(&destopt_protocol, IPPROTO_DSTOPTS);
+	if (ret)
+		goto out_rthdr;
+
+	ret = inet6_add_protocol(&nodata_protocol, IPPROTO_NONE);
+	if (ret)
+		goto out_destopt;
+
+out:
+	return ret;
+out_rthdr:
+	inet6_del_protocol(&rthdr_protocol, IPPROTO_ROUTING);
+out_destopt:
+	inet6_del_protocol(&destopt_protocol, IPPROTO_DSTOPTS);
+	goto out;
 };
 
+void ipv6_exthdrs_exit(void)
+{
+	inet6_del_protocol(&nodata_protocol, IPPROTO_NONE);
+	inet6_del_protocol(&destopt_protocol, IPPROTO_DSTOPTS);
+	inet6_del_protocol(&rthdr_protocol, IPPROTO_ROUTING);
+}
+
 /**********************************
   Hop-by-hop options.
  **********************************/
-- 
cgit v1.2.3


From 853cbbaaa4ccdf221be5ab6afe967aa9998546b7 Mon Sep 17 00:00:00 2001
From: Daniel Lezcano <dlezcano@fr.ibm.com>
Date: Tue, 11 Dec 2007 02:24:29 -0800
Subject: [IPV6]: make frag to return an error at initialization

This patch makes the frag_init to return an error code, so the af_inet6
module can handle the error.

Signed-off-by: Daniel Lezcano <dlezcano@fr.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/af_inet6.c   |  8 ++++++--
 net/ipv6/reassembly.c | 16 +++++++++++++---
 2 files changed, 19 insertions(+), 5 deletions(-)

(limited to 'net')

diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index 442c298c1d7..a75c4bc9281 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -863,7 +863,9 @@ static int __init inet6_init(void)
 	if (err)
 		goto ipv6_exthdrs_fail;
 
-	ipv6_frag_init();
+	err = ipv6_frag_init();
+	if (err)
+		goto ipv6_frag_fail;
 
 	/* Init v6 transport protocols. */
 	udpv6_init();
@@ -875,6 +877,8 @@ static int __init inet6_init(void)
 out:
 	return err;
 
+ipv6_frag_fail:
+	ipv6_exthdrs_exit();
 ipv6_exthdrs_fail:
 	addrconf_cleanup();
 addrconf_fail:
@@ -934,7 +938,7 @@ static void __exit inet6_exit(void)
 
 	/* Cleanup code parts. */
 	ipv6_packet_cleanup();
-
+	ipv6_frag_exit();
 	ipv6_exthdrs_exit();
 	addrconf_cleanup();
 	ip6_flowlabel_cleanup();
diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c
index 76c88a93b9b..bf4173daecb 100644
--- a/net/ipv6/reassembly.c
+++ b/net/ipv6/reassembly.c
@@ -632,11 +632,13 @@ static struct inet6_protocol frag_protocol =
 	.flags		=	INET6_PROTO_NOPOLICY,
 };
 
-void __init ipv6_frag_init(void)
+int __init ipv6_frag_init(void)
 {
-	if (inet6_add_protocol(&frag_protocol, IPPROTO_FRAGMENT) < 0)
-		printk(KERN_ERR "ipv6_frag_init: Could not register protocol\n");
+	int ret;
 
+	ret = inet6_add_protocol(&frag_protocol, IPPROTO_FRAGMENT);
+	if (ret)
+		goto out;
 	ip6_frags.ctl = &ip6_frags_ctl;
 	ip6_frags.hashfn = ip6_hashfn;
 	ip6_frags.constructor = ip6_frag_init;
@@ -646,4 +648,12 @@ void __init ipv6_frag_init(void)
 	ip6_frags.match = ip6_frag_match;
 	ip6_frags.frag_expire = ip6_frag_expire;
 	inet_frags_init(&ip6_frags);
+out:
+	return ret;
+}
+
+void ipv6_frag_exit(void)
+{
+	inet_frags_fini(&ip6_frags);
+	inet6_del_protocol(&frag_protocol, IPPROTO_FRAGMENT);
 }
-- 
cgit v1.2.3


From 87c3efbfdd1f98af14a1f60ff19f73d9a8d8da98 Mon Sep 17 00:00:00 2001
From: Daniel Lezcano <dlezcano@fr.ibm.com>
Date: Tue, 11 Dec 2007 02:25:01 -0800
Subject: [IPV6]: make inet6_register_protosw to return an error code

This patch makes the inet6_register_protosw to return an error code.
The different protocols can be aware the registration was successful or
not and can pass the error to the initial caller, af_inet6.

Signed-off-by: Daniel Lezcano <dlezcano@fr.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/af_inet6.c | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index a75c4bc9281..53b06de696b 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -565,21 +565,23 @@ static struct inet_protosw rawv6_protosw = {
 	.flags		= INET_PROTOSW_REUSE,
 };
 
-void
-inet6_register_protosw(struct inet_protosw *p)
+int inet6_register_protosw(struct inet_protosw *p)
 {
 	struct list_head *lh;
 	struct inet_protosw *answer;
-	int protocol = p->protocol;
 	struct list_head *last_perm;
+	int protocol = p->protocol;
+	int ret;
 
 	spin_lock_bh(&inetsw6_lock);
 
+	ret = -EINVAL;
 	if (p->type >= SOCK_MAX)
 		goto out_illegal;
 
 	/* If we are trying to override a permanent protocol, bail. */
 	answer = NULL;
+	ret = -EPERM;
 	last_perm = &inetsw6[p->type];
 	list_for_each(lh, &inetsw6[p->type]) {
 		answer = list_entry(lh, struct inet_protosw, list);
@@ -603,9 +605,10 @@ inet6_register_protosw(struct inet_protosw *p)
 	 * system automatically returns to the old behavior.
 	 */
 	list_add_rcu(&p->list, last_perm);
+	ret = 0;
 out:
 	spin_unlock_bh(&inetsw6_lock);
-	return;
+	return ret;
 
 out_permanent:
 	printk(KERN_ERR "Attempt to override permanent protocol %d.\n",
-- 
cgit v1.2.3


From 7f4e4868f3ce0e946f116c28fa4fe033be5e4ba9 Mon Sep 17 00:00:00 2001
From: Daniel Lezcano <dlezcano@fr.ibm.com>
Date: Tue, 11 Dec 2007 02:25:35 -0800
Subject: [IPV6]: make the protocol initialization to return an error code

This patchset makes the different protocols to return an error code, so
the af_inet6 module can check the initialization was correct or not.

The raw6 was taken into account to be consistent with the rest of the
protocols, but the registration is at the same place.
Because the raw6 has its own init function, the proto and the ops structure
can be moved inside the raw6.c file.

Signed-off-by: Daniel Lezcano <dlezcano@fr.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/af_inet6.c      | 77 +++++++++++++++++++++---------------------------
 net/ipv6/ipv6_sockglue.c |  3 +-
 net/ipv6/raw.c           | 52 ++++++++++++++++++++++++++++++++
 net/ipv6/tcp_ipv6.c      | 36 +++++++++++++++++-----
 net/ipv6/udp.c           | 26 +++++++++++++---
 net/ipv6/udplite.c       | 25 +++++++++++++---
 6 files changed, 159 insertions(+), 60 deletions(-)

(limited to 'net')

diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index 53b06de696b..34c20533ba5 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -529,42 +529,6 @@ static struct net_proto_family inet6_family_ops = {
 	.owner	= THIS_MODULE,
 };
 
-/* Same as inet6_dgram_ops, sans udp_poll.  */
-static const struct proto_ops inet6_sockraw_ops = {
-	.family		   = PF_INET6,
-	.owner		   = THIS_MODULE,
-	.release	   = inet6_release,
-	.bind		   = inet6_bind,
-	.connect	   = inet_dgram_connect,	/* ok		*/
-	.socketpair	   = sock_no_socketpair,	/* a do nothing	*/
-	.accept		   = sock_no_accept,		/* a do nothing	*/
-	.getname	   = inet6_getname,
-	.poll		   = datagram_poll,		/* ok		*/
-	.ioctl		   = inet6_ioctl,		/* must change  */
-	.listen		   = sock_no_listen,		/* ok		*/
-	.shutdown	   = inet_shutdown,		/* ok		*/
-	.setsockopt	   = sock_common_setsockopt,	/* ok		*/
-	.getsockopt	   = sock_common_getsockopt,	/* ok		*/
-	.sendmsg	   = inet_sendmsg,		/* ok		*/
-	.recvmsg	   = sock_common_recvmsg,	/* ok		*/
-	.mmap		   = sock_no_mmap,
-	.sendpage	   = sock_no_sendpage,
-#ifdef CONFIG_COMPAT
-	.compat_setsockopt = compat_sock_common_setsockopt,
-	.compat_getsockopt = compat_sock_common_getsockopt,
-#endif
-};
-
-static struct inet_protosw rawv6_protosw = {
-	.type		= SOCK_RAW,
-	.protocol	= IPPROTO_IP,	/* wild card */
-	.prot		= &rawv6_prot,
-	.ops		= &inet6_sockraw_ops,
-	.capability	= CAP_NET_RAW,
-	.no_check	= UDP_CSUM_DEFAULT,
-	.flags		= INET_PROTOSW_REUSE,
-};
-
 int inet6_register_protosw(struct inet_protosw *p)
 {
 	struct list_head *lh;
@@ -771,7 +735,6 @@ static int __init inet6_init(void)
 	__this_module.can_unload = &ipv6_unload;
 #endif
 #endif
-
 	err = proto_register(&tcpv6_prot, 1);
 	if (err)
 		goto out;
@@ -796,14 +759,16 @@ static int __init inet6_init(void)
 	/* We MUST register RAW sockets before we create the ICMP6,
 	 * IGMP6, or NDISC control sockets.
 	 */
-	inet6_register_protosw(&rawv6_protosw);
+	err = rawv6_init();
+	if (err)
+		goto out_unregister_raw_proto;
 
 	/* Register the family here so that the init calls below will
 	 * be able to create sockets. (?? is this dangerous ??)
 	 */
 	err = sock_register(&inet6_family_ops);
 	if (err)
-		goto out_unregister_raw_proto;
+		goto out_sock_register_fail;
 
 	/* Initialise ipv6 mibs */
 	err = init_ipv6_mibs();
@@ -871,15 +836,32 @@ static int __init inet6_init(void)
 		goto ipv6_frag_fail;
 
 	/* Init v6 transport protocols. */
-	udpv6_init();
-	udplitev6_init();
-	tcpv6_init();
+	err = udpv6_init();
+	if (err)
+		goto udpv6_fail;
 
-	ipv6_packet_init();
-	err = 0;
+	err = udplitev6_init();
+	if (err)
+		goto udplitev6_fail;
+
+	err = tcpv6_init();
+	if (err)
+		goto tcpv6_fail;
+
+	err = ipv6_packet_init();
+	if (err)
+		goto ipv6_packet_fail;
 out:
 	return err;
 
+ipv6_packet_fail:
+	tcpv6_exit();
+tcpv6_fail:
+	udplitev6_exit();
+udplitev6_fail:
+	udpv6_exit();
+udpv6_fail:
+	ipv6_frag_exit();
 ipv6_frag_fail:
 	ipv6_exthdrs_exit();
 ipv6_exthdrs_fail:
@@ -920,6 +902,8 @@ icmp_fail:
 out_unregister_sock:
 	sock_unregister(PF_INET6);
 	rtnl_unregister_all(PF_INET6);
+out_sock_register_fail:
+	rawv6_exit();
 out_unregister_raw_proto:
 	proto_unregister(&rawv6_prot);
 out_unregister_udplite_proto:
@@ -939,6 +923,10 @@ static void __exit inet6_exit(void)
 	/* Disallow any further netlink messages */
 	rtnl_unregister_all(PF_INET6);
 
+	udpv6_exit();
+	udplitev6_exit();
+	tcpv6_exit();
+
 	/* Cleanup code parts. */
 	ipv6_packet_cleanup();
 	ipv6_frag_exit();
@@ -961,6 +949,7 @@ static void __exit inet6_exit(void)
 	igmp6_cleanup();
 	ndisc_cleanup();
 	icmpv6_cleanup();
+	rawv6_exit();
 #ifdef CONFIG_SYSCTL
 	ipv6_sysctl_unregister();
 #endif
diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c
index 8c5f80fd03a..20fece4ad3d 100644
--- a/net/ipv6/ipv6_sockglue.c
+++ b/net/ipv6/ipv6_sockglue.c
@@ -1128,9 +1128,10 @@ int compat_ipv6_getsockopt(struct sock *sk, int level, int optname,
 EXPORT_SYMBOL(compat_ipv6_getsockopt);
 #endif
 
-void __init ipv6_packet_init(void)
+int __init ipv6_packet_init(void)
 {
 	dev_add_pack(&ipv6_packet_type);
+	return 0;
 }
 
 void ipv6_packet_cleanup(void)
diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c
index b34631e1b01..850b83e430b 100644
--- a/net/ipv6/raw.c
+++ b/net/ipv6/raw.c
@@ -1273,3 +1273,55 @@ void raw6_proc_exit(void)
 	proc_net_remove(&init_net, "raw6");
 }
 #endif	/* CONFIG_PROC_FS */
+
+/* Same as inet6_dgram_ops, sans udp_poll.  */
+static const struct proto_ops inet6_sockraw_ops = {
+	.family		   = PF_INET6,
+	.owner		   = THIS_MODULE,
+	.release	   = inet6_release,
+	.bind		   = inet6_bind,
+	.connect	   = inet_dgram_connect,	/* ok		*/
+	.socketpair	   = sock_no_socketpair,	/* a do nothing	*/
+	.accept		   = sock_no_accept,		/* a do nothing	*/
+	.getname	   = inet6_getname,
+	.poll		   = datagram_poll,		/* ok		*/
+	.ioctl		   = inet6_ioctl,		/* must change  */
+	.listen		   = sock_no_listen,		/* ok		*/
+	.shutdown	   = inet_shutdown,		/* ok		*/
+	.setsockopt	   = sock_common_setsockopt,	/* ok		*/
+	.getsockopt	   = sock_common_getsockopt,	/* ok		*/
+	.sendmsg	   = inet_sendmsg,		/* ok		*/
+	.recvmsg	   = sock_common_recvmsg,	/* ok		*/
+	.mmap		   = sock_no_mmap,
+	.sendpage	   = sock_no_sendpage,
+#ifdef CONFIG_COMPAT
+	.compat_setsockopt = compat_sock_common_setsockopt,
+	.compat_getsockopt = compat_sock_common_getsockopt,
+#endif
+};
+
+static struct inet_protosw rawv6_protosw = {
+	.type		= SOCK_RAW,
+	.protocol	= IPPROTO_IP,	/* wild card */
+	.prot		= &rawv6_prot,
+	.ops		= &inet6_sockraw_ops,
+	.capability	= CAP_NET_RAW,
+	.no_check	= UDP_CSUM_DEFAULT,
+	.flags		= INET_PROTOSW_REUSE,
+};
+
+int __init rawv6_init(void)
+{
+	int ret;
+
+	ret = inet6_register_protosw(&rawv6_protosw);
+	if (ret)
+		goto out;
+out:
+	return ret;
+}
+
+void __exit rawv6_exit(void)
+{
+	inet6_unregister_protosw(&rawv6_protosw);
+}
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 93980c3b83e..9544beb6d1c 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -2166,14 +2166,36 @@ static struct inet_protosw tcpv6_protosw = {
 				INET_PROTOSW_ICSK,
 };
 
-void __init tcpv6_init(void)
+int __init tcpv6_init(void)
 {
+	int ret;
+
+	ret = inet6_add_protocol(&tcpv6_protocol, IPPROTO_TCP);
+	if (ret)
+		goto out;
+
 	/* register inet6 protocol */
-	if (inet6_add_protocol(&tcpv6_protocol, IPPROTO_TCP) < 0)
-		printk(KERN_ERR "tcpv6_init: Could not register protocol\n");
-	inet6_register_protosw(&tcpv6_protosw);
+	ret = inet6_register_protosw(&tcpv6_protosw);
+	if (ret)
+		goto out_tcpv6_protocol;
+
+	ret = inet_csk_ctl_sock_create(&tcp6_socket, PF_INET6,
+				       SOCK_RAW, IPPROTO_TCP);
+	if (ret)
+		goto out_tcpv6_protosw;
+out:
+	return ret;
 
-	if (inet_csk_ctl_sock_create(&tcp6_socket, PF_INET6, SOCK_RAW,
-				     IPPROTO_TCP) < 0)
-		panic("Failed to create the TCPv6 control socket.\n");
+out_tcpv6_protocol:
+	inet6_del_protocol(&tcpv6_protocol, IPPROTO_TCP);
+out_tcpv6_protosw:
+	inet6_unregister_protosw(&tcpv6_protosw);
+	goto out;
+}
+
+void __exit tcpv6_exit(void)
+{
+	sock_release(tcp6_socket);
+	inet6_unregister_protosw(&tcpv6_protosw);
+	inet6_del_protocol(&tcpv6_protocol, IPPROTO_TCP);
 }
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index fa640765385..1e3bd39f54e 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -1016,9 +1016,27 @@ static struct inet_protosw udpv6_protosw = {
 };
 
 
-void __init udpv6_init(void)
+int __init udpv6_init(void)
 {
-	if (inet6_add_protocol(&udpv6_protocol, IPPROTO_UDP) < 0)
-		printk(KERN_ERR "udpv6_init: Could not register protocol\n");
-	inet6_register_protosw(&udpv6_protosw);
+	int ret;
+
+	ret = inet6_add_protocol(&udpv6_protocol, IPPROTO_UDP);
+	if (ret)
+		goto out;
+
+	ret = inet6_register_protosw(&udpv6_protosw);
+	if (ret)
+		goto out_udpv6_protocol;
+out:
+	return ret;
+
+out_udpv6_protocol:
+	inet6_del_protocol(&udpv6_protocol, IPPROTO_UDP);
+	goto out;
+}
+
+void __exit udpv6_exit(void)
+{
+	inet6_unregister_protosw(&udpv6_protosw);
+	inet6_del_protocol(&udpv6_protocol, IPPROTO_UDP);
 }
diff --git a/net/ipv6/udplite.c b/net/ipv6/udplite.c
index 5a0379f7141..f20b376689f 100644
--- a/net/ipv6/udplite.c
+++ b/net/ipv6/udplite.c
@@ -77,12 +77,29 @@ static struct inet_protosw udplite6_protosw = {
 	.flags		= INET_PROTOSW_PERMANENT,
 };
 
-void __init udplitev6_init(void)
+int __init udplitev6_init(void)
 {
-	if (inet6_add_protocol(&udplitev6_protocol, IPPROTO_UDPLITE) < 0)
-		printk(KERN_ERR "%s: Could not register.\n", __FUNCTION__);
+	int ret;
 
-	inet6_register_protosw(&udplite6_protosw);
+	ret = inet6_add_protocol(&udplitev6_protocol, IPPROTO_UDPLITE);
+	if (ret)
+		goto out;
+
+	ret = inet6_register_protosw(&udplite6_protosw);
+	if (ret)
+		goto out_udplitev6_protocol;
+out:
+	return ret;
+
+out_udplitev6_protocol:
+	inet6_del_protocol(&udplitev6_protocol, IPPROTO_UDPLITE);
+	goto out;
+}
+
+void __exit udplitev6_exit(void)
+{
+	inet6_unregister_protosw(&udplite6_protosw);
+	inet6_del_protocol(&udplitev6_protocol, IPPROTO_UDPLITE);
 }
 
 #ifdef CONFIG_PROC_FS
-- 
cgit v1.2.3


From a0a53c8ba95451feef6c1975016f0a1eb3044ad4 Mon Sep 17 00:00:00 2001
From: "Denis V. Lunev" <den@openvz.org>
Date: Tue, 11 Dec 2007 04:19:17 -0800
Subject: [NETNS]: struct net content re-work (v3)

Recently David Miller and Herbert Xu pointed out that struct net becomes
overbloated and un-maintainable. There are two solutions:
- provide a pointer to a network subsystem definition from struct net.
  This costs an additional dereferrence
- place sub-system definition into the structure itself. This will speedup
  run-time access at the cost of recompilation time

The second approach looks better for us. Other sub-systems will follow.

Signed-off-by: Denis V. Lunev <den@openvz.org>
Acked-by: Daniel Lezcano <dlezcano@fr.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/unix/af_unix.c         |  4 ++--
 net/unix/sysctl_net_unix.c | 12 ++++++------
 2 files changed, 8 insertions(+), 8 deletions(-)

(limited to 'net')

diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index b8a2189fb5c..63a9239571a 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -592,7 +592,7 @@ static struct sock * unix_create1(struct net *net, struct socket *sock)
 				&af_unix_sk_receive_queue_lock_key);
 
 	sk->sk_write_space	= unix_write_space;
-	sk->sk_max_ack_backlog	= net->sysctl_unix_max_dgram_qlen;
+	sk->sk_max_ack_backlog	= net->unx.sysctl_max_dgram_qlen;
 	sk->sk_destruct		= unix_sock_destructor;
 	u	  = unix_sk(sk);
 	u->dentry = NULL;
@@ -2138,7 +2138,7 @@ static int unix_net_init(struct net *net)
 {
 	int error = -ENOMEM;
 
-	net->sysctl_unix_max_dgram_qlen = 10;
+	net->unx.sysctl_max_dgram_qlen = 10;
 	if (unix_sysctl_register(net))
 		goto out;
 
diff --git a/net/unix/sysctl_net_unix.c b/net/unix/sysctl_net_unix.c
index 553ef6a487d..77513d7e35f 100644
--- a/net/unix/sysctl_net_unix.c
+++ b/net/unix/sysctl_net_unix.c
@@ -18,7 +18,7 @@ static ctl_table unix_table[] = {
 	{
 		.ctl_name	= NET_UNIX_MAX_DGRAM_QLEN,
 		.procname	= "max_dgram_qlen",
-		.data		= &init_net.sysctl_unix_max_dgram_qlen,
+		.data		= &init_net.unx.sysctl_max_dgram_qlen,
 		.maxlen		= sizeof(int),
 		.mode		= 0644,
 		.proc_handler	= &proc_dointvec
@@ -40,9 +40,9 @@ int unix_sysctl_register(struct net *net)
 	if (table == NULL)
 		goto err_alloc;
 
-	table[0].data = &net->sysctl_unix_max_dgram_qlen;
-	net->unix_ctl = register_net_sysctl_table(net, unix_path, table);
-	if (net->unix_ctl == NULL)
+	table[0].data = &net->unx.sysctl_max_dgram_qlen;
+	net->unx.ctl = register_net_sysctl_table(net, unix_path, table);
+	if (net->unx.ctl == NULL)
 		goto err_reg;
 
 	return 0;
@@ -57,8 +57,8 @@ void unix_sysctl_unregister(struct net *net)
 {
 	struct ctl_table *table;
 
-	table = net->unix_ctl->ctl_table_arg;
-	unregister_sysctl_table(net->unix_ctl);
+	table = net->unx.ctl->ctl_table_arg;
+	unregister_sysctl_table(net->unx.ctl);
 	kfree(table);
 }
 
-- 
cgit v1.2.3


From 2aaef4e47fef8a6c0bc7fc5d9d3eea4af290e04c Mon Sep 17 00:00:00 2001
From: "Denis V. Lunev" <den@openvz.org>
Date: Tue, 11 Dec 2007 04:19:54 -0800
Subject: [NETNS]: separate af_packet netns data

Signed-off-by: Denis V. Lunev <den@openvz.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/packet/af_packet.c | 28 ++++++++++++++--------------
 1 file changed, 14 insertions(+), 14 deletions(-)

(limited to 'net')

diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index ace29f1c4c5..485af5691d6 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -803,9 +803,9 @@ static int packet_release(struct socket *sock)
 	net = sk->sk_net;
 	po = pkt_sk(sk);
 
-	write_lock_bh(&net->packet_sklist_lock);
+	write_lock_bh(&net->packet.sklist_lock);
 	sk_del_node_init(sk);
-	write_unlock_bh(&net->packet_sklist_lock);
+	write_unlock_bh(&net->packet.sklist_lock);
 
 	/*
 	 *	Unhook packet receive handler.
@@ -1015,9 +1015,9 @@ static int packet_create(struct net *net, struct socket *sock, int protocol)
 		po->running = 1;
 	}
 
-	write_lock_bh(&net->packet_sklist_lock);
-	sk_add_node(sk, &net->packet_sklist);
-	write_unlock_bh(&net->packet_sklist_lock);
+	write_lock_bh(&net->packet.sklist_lock);
+	sk_add_node(sk, &net->packet.sklist);
+	write_unlock_bh(&net->packet.sklist_lock);
 	return(0);
 out:
 	return err;
@@ -1452,8 +1452,8 @@ static int packet_notifier(struct notifier_block *this, unsigned long msg, void
 	struct net_device *dev = data;
 	struct net *net = dev->nd_net;
 
-	read_lock(&net->packet_sklist_lock);
-	sk_for_each(sk, node, &net->packet_sklist) {
+	read_lock(&net->packet.sklist_lock);
+	sk_for_each(sk, node, &net->packet.sklist) {
 		struct packet_sock *po = pkt_sk(sk);
 
 		switch (msg) {
@@ -1492,7 +1492,7 @@ static int packet_notifier(struct notifier_block *this, unsigned long msg, void
 			break;
 		}
 	}
-	read_unlock(&net->packet_sklist_lock);
+	read_unlock(&net->packet.sklist_lock);
 	return NOTIFY_DONE;
 }
 
@@ -1862,7 +1862,7 @@ static inline struct sock *packet_seq_idx(struct net *net, loff_t off)
 	struct sock *s;
 	struct hlist_node *node;
 
-	sk_for_each(s, node, &net->packet_sklist) {
+	sk_for_each(s, node, &net->packet.sklist) {
 		if (!off--)
 			return s;
 	}
@@ -1872,7 +1872,7 @@ static inline struct sock *packet_seq_idx(struct net *net, loff_t off)
 static void *packet_seq_start(struct seq_file *seq, loff_t *pos)
 {
 	struct net *net = seq_file_net(seq);
-	read_lock(&net->packet_sklist_lock);
+	read_lock(&net->packet.sklist_lock);
 	return *pos ? packet_seq_idx(net, *pos - 1) : SEQ_START_TOKEN;
 }
 
@@ -1881,14 +1881,14 @@ static void *packet_seq_next(struct seq_file *seq, void *v, loff_t *pos)
 	struct net *net = seq->private;
 	++*pos;
 	return  (v == SEQ_START_TOKEN)
-		? sk_head(&net->packet_sklist)
+		? sk_head(&net->packet.sklist)
 		: sk_next((struct sock*)v) ;
 }
 
 static void packet_seq_stop(struct seq_file *seq, void *v)
 {
 	struct net *net = seq->private;
-	read_unlock(&net->packet_sklist_lock);
+	read_unlock(&net->packet.sklist_lock);
 }
 
 static int packet_seq_show(struct seq_file *seq, void *v)
@@ -1940,8 +1940,8 @@ static const struct file_operations packet_seq_fops = {
 
 static int packet_net_init(struct net *net)
 {
-	rwlock_init(&net->packet_sklist_lock);
-	INIT_HLIST_HEAD(&net->packet_sklist);
+	rwlock_init(&net->packet.sklist_lock);
+	INIT_HLIST_HEAD(&net->packet.sklist);
 
 	if (!proc_net_fops_create(net, "packet", 0, &packet_seq_fops))
 		return -ENOMEM;
-- 
cgit v1.2.3


From df8f83fdd6369e1ba85f089fd6fe26bb2ddcb36f Mon Sep 17 00:00:00 2001
From: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Date: Wed, 12 Dec 2007 12:24:49 -0200
Subject: [TFRC]: Put RX/TX initialisation into tfrc.c

This separates RX/TX initialisation and puts all packet history / loss intervals
initialisation into tfrc.c.
The organisation is uniform: slab declaration -> {rx,tx}_init() -> {rx,tx}_exit()

Signed-off-by: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/dccp/ccids/lib/packet_history.c | 68 +++++++++++++++++--------------------
 net/dccp/ccids/lib/tfrc.c           | 31 ++++++++++++-----
 2 files changed, 55 insertions(+), 44 deletions(-)

(limited to 'net')

diff --git a/net/dccp/ccids/lib/packet_history.c b/net/dccp/ccids/lib/packet_history.c
index af44082f56e..727b17d3bd0 100644
--- a/net/dccp/ccids/lib/packet_history.c
+++ b/net/dccp/ccids/lib/packet_history.c
@@ -57,6 +57,22 @@ struct tfrc_tx_hist_entry {
  */
 static struct kmem_cache *tfrc_tx_hist_slab;
 
+int __init tfrc_tx_packet_history_init(void)
+{
+	tfrc_tx_hist_slab = kmem_cache_create("tfrc_tx_hist",
+					      sizeof(struct tfrc_tx_hist_entry),
+					      0, SLAB_HWCACHE_ALIGN, NULL);
+	return tfrc_tx_hist_slab == NULL ? -ENOBUFS : 0;
+}
+
+void tfrc_tx_packet_history_exit(void)
+{
+	if (tfrc_tx_hist_slab != NULL) {
+		kmem_cache_destroy(tfrc_tx_hist_slab);
+		tfrc_tx_hist_slab = NULL;
+	}
+}
+
 static struct tfrc_tx_hist_entry *
 	tfrc_tx_hist_find_entry(struct tfrc_tx_hist_entry *head, u64 seqno)
 {
@@ -119,6 +135,22 @@ EXPORT_SYMBOL_GPL(tfrc_tx_hist_rtt);
  */
 static struct kmem_cache *tfrc_rx_hist_slab;
 
+int __init tfrc_rx_packet_history_init(void)
+{
+	tfrc_rx_hist_slab = kmem_cache_create("tfrc_rxh_cache",
+					      sizeof(struct tfrc_rx_hist_entry),
+					      0, SLAB_HWCACHE_ALIGN, NULL);
+	return tfrc_rx_hist_slab == NULL ? -ENOBUFS : 0;
+}
+
+void tfrc_rx_packet_history_exit(void)
+{
+	if (tfrc_rx_hist_slab != NULL) {
+		kmem_cache_destroy(tfrc_rx_hist_slab);
+		tfrc_rx_hist_slab = NULL;
+	}
+}
+
 /**
  * tfrc_rx_hist_index - index to reach n-th entry after loss_start
  */
@@ -316,39 +348,3 @@ keep_ref_for_next_time:
 	return sample;
 }
 EXPORT_SYMBOL_GPL(tfrc_rx_hist_sample_rtt);
-
-__init int packet_history_init(void)
-{
-	tfrc_tx_hist_slab = kmem_cache_create("tfrc_tx_hist",
-					      sizeof(struct tfrc_tx_hist_entry), 0,
-					      SLAB_HWCACHE_ALIGN, NULL);
-	if (tfrc_tx_hist_slab == NULL)
-		goto out_err;
-
-	tfrc_rx_hist_slab = kmem_cache_create("tfrc_rx_hist",
-					      sizeof(struct tfrc_rx_hist_entry), 0,
-					      SLAB_HWCACHE_ALIGN, NULL);
-	if (tfrc_rx_hist_slab == NULL)
-		goto out_free_tx;
-
-	return 0;
-
-out_free_tx:
-	kmem_cache_destroy(tfrc_tx_hist_slab);
-	tfrc_tx_hist_slab = NULL;
-out_err:
-	return -ENOBUFS;
-}
-
-void packet_history_exit(void)
-{
-	if (tfrc_tx_hist_slab != NULL) {
-		kmem_cache_destroy(tfrc_tx_hist_slab);
-		tfrc_tx_hist_slab = NULL;
-	}
-
-	if (tfrc_rx_hist_slab != NULL) {
-		kmem_cache_destroy(tfrc_rx_hist_slab);
-		tfrc_rx_hist_slab = NULL;
-	}
-}
diff --git a/net/dccp/ccids/lib/tfrc.c b/net/dccp/ccids/lib/tfrc.c
index 3a7a1838a64..20763fa75d4 100644
--- a/net/dccp/ccids/lib/tfrc.c
+++ b/net/dccp/ccids/lib/tfrc.c
@@ -14,27 +14,42 @@ module_param(tfrc_debug, bool, 0444);
 MODULE_PARM_DESC(tfrc_debug, "Enable debug messages");
 #endif
 
+extern int  tfrc_tx_packet_history_init(void);
+extern void tfrc_tx_packet_history_exit(void);
+extern int  tfrc_rx_packet_history_init(void);
+extern void tfrc_rx_packet_history_exit(void);
+
 extern int  dccp_li_init(void);
 extern void dccp_li_exit(void);
-extern int packet_history_init(void);
-extern void packet_history_exit(void);
 
 static int __init tfrc_module_init(void)
 {
 	int rc = dccp_li_init();
 
-	if (rc == 0) {
-		rc = packet_history_init();
-		if (rc != 0)
-			dccp_li_exit();
-	}
+	if (rc)
+		goto out;
+
+	rc = tfrc_tx_packet_history_init();
+	if (rc)
+		goto out_free_loss_intervals;
 
+	rc = tfrc_rx_packet_history_init();
+	if (rc)
+		goto out_free_tx_history;
+	return 0;
+
+out_free_tx_history:
+	tfrc_tx_packet_history_exit();
+out_free_loss_intervals:
+	dccp_li_exit();
+out:
 	return rc;
 }
 
 static void __exit tfrc_module_exit(void)
 {
-	packet_history_exit();
+	tfrc_rx_packet_history_exit();
+	tfrc_tx_packet_history_exit();
 	dccp_li_exit();
 }
 
-- 
cgit v1.2.3


From 8995a238ef6869bc5c80240440bc58452c7af283 Mon Sep 17 00:00:00 2001
From: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Date: Wed, 12 Dec 2007 12:28:40 -0200
Subject: [TFRC]: Loss interval code needs the macros/inlines that were moved

This moves the inlines (which were previously declared as macros) back into
packet_history.h since the loss detection code needs to be able to read entries
from the RX history in order to create the relevant loss entries: it needs at
least tfrc_rx_hist_loss_prev() and tfrc_rx_hist_last_rcv(), which in turn
require the definition of the other inlines (macros).

Signed-off-by: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/dccp/ccids/lib/packet_history.c | 35 -----------------------------------
 net/dccp/ccids/lib/packet_history.h | 35 +++++++++++++++++++++++++++++++++++
 2 files changed, 35 insertions(+), 35 deletions(-)

(limited to 'net')

diff --git a/net/dccp/ccids/lib/packet_history.c b/net/dccp/ccids/lib/packet_history.c
index 727b17d3bd0..dd2cf2d6b8f 100644
--- a/net/dccp/ccids/lib/packet_history.c
+++ b/net/dccp/ccids/lib/packet_history.c
@@ -151,23 +151,6 @@ void tfrc_rx_packet_history_exit(void)
 	}
 }
 
-/**
- * tfrc_rx_hist_index - index to reach n-th entry after loss_start
- */
-static inline u8 tfrc_rx_hist_index(const struct tfrc_rx_hist *h, const u8 n)
-{
-	return (h->loss_start + n) & TFRC_NDUPACK;
-}
-
-/**
- * tfrc_rx_hist_last_rcv - entry with highest-received-seqno so far
- */
-static inline struct tfrc_rx_hist_entry *
-			tfrc_rx_hist_last_rcv(const struct tfrc_rx_hist *h)
-{
-	return h->ring[tfrc_rx_hist_index(h, h->loss_count)];
-}
-
 void tfrc_rx_hist_add_packet(struct tfrc_rx_hist *h,
 			     const struct sk_buff *skb,
 			     const u32 ndp)
@@ -183,24 +166,6 @@ void tfrc_rx_hist_add_packet(struct tfrc_rx_hist *h,
 }
 EXPORT_SYMBOL_GPL(tfrc_rx_hist_add_packet);
 
-/**
- * tfrc_rx_hist_entry - return the n-th history entry after loss_start
- */
-static inline struct tfrc_rx_hist_entry *
-		tfrc_rx_hist_entry(const struct tfrc_rx_hist *h, const u8 n)
-{
-	return h->ring[tfrc_rx_hist_index(h, n)];
-}
-
-/**
- * tfrc_rx_hist_loss_prev - entry with highest-received-seqno before loss was detected
- */
-static inline struct tfrc_rx_hist_entry *
-			tfrc_rx_hist_loss_prev(const struct tfrc_rx_hist *h)
-{
-	return h->ring[h->loss_start];
-}
-
 /* has the packet contained in skb been seen before? */
 int tfrc_rx_hist_duplicate(struct tfrc_rx_hist *h, struct sk_buff *skb)
 {
diff --git a/net/dccp/ccids/lib/packet_history.h b/net/dccp/ccids/lib/packet_history.h
index 3dfd182b0e6..e58b0fcdc6a 100644
--- a/net/dccp/ccids/lib/packet_history.h
+++ b/net/dccp/ccids/lib/packet_history.h
@@ -84,6 +84,41 @@ struct tfrc_rx_hist {
 #define rtt_sample_prev		  loss_start
 };
 
+/**
+ * tfrc_rx_hist_index - index to reach n-th entry after loss_start
+ */
+static inline u8 tfrc_rx_hist_index(const struct tfrc_rx_hist *h, const u8 n)
+{
+	return (h->loss_start + n) & TFRC_NDUPACK;
+}
+
+/**
+ * tfrc_rx_hist_last_rcv - entry with highest-received-seqno so far
+ */
+static inline struct tfrc_rx_hist_entry *
+			tfrc_rx_hist_last_rcv(const struct tfrc_rx_hist *h)
+{
+	return h->ring[tfrc_rx_hist_index(h, h->loss_count)];
+}
+
+/**
+ * tfrc_rx_hist_entry - return the n-th history entry after loss_start
+ */
+static inline struct tfrc_rx_hist_entry *
+			tfrc_rx_hist_entry(const struct tfrc_rx_hist *h, const u8 n)
+{
+	return h->ring[tfrc_rx_hist_index(h, n)];
+}
+
+/**
+ * tfrc_rx_hist_loss_prev - entry with highest-received-seqno before loss was detected
+ */
+static inline struct tfrc_rx_hist_entry *
+			tfrc_rx_hist_loss_prev(const struct tfrc_rx_hist *h)
+{
+	return h->ring[h->loss_start];
+}
+
 extern void tfrc_rx_hist_add_packet(struct tfrc_rx_hist *h,
 				    const struct sk_buff *skb, const u32 ndp);
 
-- 
cgit v1.2.3


From 8a9c7e92e0ca97632126feee32ba2698b4eb6c8f Mon Sep 17 00:00:00 2001
From: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Date: Wed, 12 Dec 2007 13:50:51 -0200
Subject: [TFRC]: Ringbuffer to track loss interval history

A ringbuffer-based implementation of loss interval history is easier to
maintain, allocate, and update.

The `swap' routine to keep the RX history sorted is due to and was written
by Arnaldo Carvalho de Melo, simplifying an earlier macro-based variant.

Details:
 * access to the Loss Interval Records via macro wrappers (with safety checks);
 * simplified, on-demand allocation of entries (no extra memory consumption on
   lossless links); cache allocation is local to the module / exported as service;
 * provision of RFC-compliant algorithm to re-compute average loss interval;
 * provision of comprehensive, new loss detection algorithm
 	- support for all cases of loss, including re-ordered/duplicate packets;
 	- waiting for NDUPACK=3 packets to fill the hole;
	- updating loss records when a late-arriving packet fills a hole.

Signed-off-by: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Signed-off-by: Ian McDonald <ian.mcdonald@jandi.co.nz>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/dccp/ccids/lib/loss_interval.c  | 161 +++++++++++++++++++++++++-
 net/dccp/ccids/lib/loss_interval.h  |  56 ++++++++-
 net/dccp/ccids/lib/packet_history.c | 218 +++++++++++++++++++++++++++++++++++-
 net/dccp/ccids/lib/packet_history.h |  11 +-
 net/dccp/ccids/lib/tfrc.h           |   3 +
 5 files changed, 435 insertions(+), 14 deletions(-)

(limited to 'net')

diff --git a/net/dccp/ccids/lib/loss_interval.c b/net/dccp/ccids/lib/loss_interval.c
index c0a933a1d28..39980d1f535 100644
--- a/net/dccp/ccids/lib/loss_interval.c
+++ b/net/dccp/ccids/lib/loss_interval.c
@@ -1,6 +1,7 @@
 /*
  *  net/dccp/ccids/lib/loss_interval.c
  *
+ *  Copyright (c) 2007   The University of Aberdeen, Scotland, UK
  *  Copyright (c) 2005-7 The University of Waikato, Hamilton, New Zealand.
  *  Copyright (c) 2005-7 Ian McDonald <ian.mcdonald@jandi.co.nz>
  *  Copyright (c) 2005 Arnaldo Carvalho de Melo <acme@conectiva.com.br>
@@ -10,12 +11,7 @@
  *  the Free Software Foundation; either version 2 of the License, or
  *  (at your option) any later version.
  */
-
-#include <linux/module.h>
 #include <net/sock.h>
-#include "../../dccp.h"
-#include "loss_interval.h"
-#include "packet_history.h"
 #include "tfrc.h"
 
 #define DCCP_LI_HIST_IVAL_F_LENGTH  8
@@ -27,6 +23,54 @@ struct dccp_li_hist_entry {
 	u32		 dccplih_interval;
 };
 
+static struct kmem_cache  *tfrc_lh_slab  __read_mostly;
+/* Loss Interval weights from [RFC 3448, 5.4], scaled by 10 */
+static const int tfrc_lh_weights[NINTERVAL] = { 10, 10, 10, 10, 8, 6, 4, 2 };
+
+/* implements LIFO semantics on the array */
+static inline u8 LIH_INDEX(const u8 ctr)
+{
+	return (LIH_SIZE - 1 - (ctr % LIH_SIZE));
+}
+
+/* the `counter' index always points at the next entry to be populated */
+static inline struct tfrc_loss_interval *tfrc_lh_peek(struct tfrc_loss_hist *lh)
+{
+	return lh->counter ? lh->ring[LIH_INDEX(lh->counter - 1)] : NULL;
+}
+
+/* given i with 0 <= i <= k, return I_i as per the rfc3448bis notation */
+static inline u32 tfrc_lh_get_interval(struct tfrc_loss_hist *lh, const u8 i)
+{
+	BUG_ON(i >= lh->counter);
+	return lh->ring[LIH_INDEX(lh->counter - i - 1)]->li_length;
+}
+
+/*
+ *	On-demand allocation and de-allocation of entries
+ */
+static struct tfrc_loss_interval *tfrc_lh_demand_next(struct tfrc_loss_hist *lh)
+{
+	if (lh->ring[LIH_INDEX(lh->counter)] == NULL)
+		lh->ring[LIH_INDEX(lh->counter)] = kmem_cache_alloc(tfrc_lh_slab,
+								    GFP_ATOMIC);
+	return lh->ring[LIH_INDEX(lh->counter)];
+}
+
+void tfrc_lh_cleanup(struct tfrc_loss_hist *lh)
+{
+	if (!tfrc_lh_is_initialised(lh))
+		return;
+
+	for (lh->counter = 0; lh->counter < LIH_SIZE; lh->counter++)
+		if (lh->ring[LIH_INDEX(lh->counter)] != NULL) {
+			kmem_cache_free(tfrc_lh_slab,
+					lh->ring[LIH_INDEX(lh->counter)]);
+			lh->ring[LIH_INDEX(lh->counter)] = NULL;
+		}
+}
+EXPORT_SYMBOL_GPL(tfrc_lh_cleanup);
+
 static struct kmem_cache *dccp_li_cachep __read_mostly;
 
 static inline struct dccp_li_hist_entry *dccp_li_hist_entry_new(const gfp_t prio)
@@ -98,6 +142,65 @@ u32 dccp_li_hist_calc_i_mean(struct list_head *list)
 
 EXPORT_SYMBOL_GPL(dccp_li_hist_calc_i_mean);
 
+static void tfrc_lh_calc_i_mean(struct tfrc_loss_hist *lh)
+{
+	u32 i_i, i_tot0 = 0, i_tot1 = 0, w_tot = 0;
+	int i, k = tfrc_lh_length(lh) - 1; /* k is as in rfc3448bis, 5.4 */
+
+	for (i=0; i <= k; i++) {
+		i_i = tfrc_lh_get_interval(lh, i);
+
+		if (i < k) {
+			i_tot0 += i_i * tfrc_lh_weights[i];
+			w_tot  += tfrc_lh_weights[i];
+		}
+		if (i > 0)
+			i_tot1 += i_i * tfrc_lh_weights[i-1];
+	}
+
+	BUG_ON(w_tot == 0);
+	lh->i_mean = max(i_tot0, i_tot1) / w_tot;
+}
+
+/**
+ * tfrc_lh_update_i_mean  -  Update the `open' loss interval I_0
+ * For recomputing p: returns `true' if p > p_prev  <=>  1/p < 1/p_prev
+ */
+u8 tfrc_lh_update_i_mean(struct tfrc_loss_hist *lh, struct sk_buff *skb)
+{
+	struct tfrc_loss_interval *cur = tfrc_lh_peek(lh);
+	u32 old_i_mean = lh->i_mean;
+	s64 length;
+
+	if (cur == NULL)			/* not initialised */
+		return 0;
+
+	length = dccp_delta_seqno(cur->li_seqno, DCCP_SKB_CB(skb)->dccpd_seq);
+
+	if (length - cur->li_length <= 0)	/* duplicate or reordered */
+		return 0;
+
+	if (SUB16(dccp_hdr(skb)->dccph_ccval, cur->li_ccval) > 4)
+		/*
+		 * Implements RFC 4342, 10.2:
+		 * If a packet S (skb) exists whose seqno comes `after' the one
+		 * starting the current loss interval (cur) and if the modulo-16
+		 * distance from C(cur) to C(S) is greater than 4, consider all
+		 * subsequent packets as belonging to a new loss interval. This
+		 * test is necessary since CCVal may wrap between intervals.
+		 */
+		cur->li_is_closed = 1;
+
+	if (tfrc_lh_length(lh) == 1)		/* due to RFC 3448, 6.3.1 */
+		return 0;
+
+	cur->li_length = length;
+	tfrc_lh_calc_i_mean(lh);
+
+	return (lh->i_mean < old_i_mean);
+}
+EXPORT_SYMBOL_GPL(tfrc_lh_update_i_mean);
+
 static int dccp_li_hist_interval_new(struct list_head *list,
 				     const u64 seq_loss, const u8 win_loss)
 {
@@ -284,6 +387,54 @@ void dccp_li_update_li(struct sock *sk,
 
 EXPORT_SYMBOL_GPL(dccp_li_update_li);
 
+/* Determine if `new_loss' does begin a new loss interval [RFC 4342, 10.2] */
+static inline u8 tfrc_lh_is_new_loss(struct tfrc_loss_interval *cur,
+				     struct tfrc_rx_hist_entry *new_loss)
+{
+	return	dccp_delta_seqno(cur->li_seqno, new_loss->tfrchrx_seqno) > 0 &&
+		(cur->li_is_closed || SUB16(new_loss->tfrchrx_ccval, cur->li_ccval) > 4);
+}
+
+/** tfrc_lh_interval_add  -  Insert new record into the Loss Interval database
+ * @lh:		   Loss Interval database
+ * @rh:		   Receive history containing a fresh loss event
+ * @calc_first_li: Caller-dependent routine to compute length of first interval
+ * @sk:		   Used by @calc_first_li in caller-specific way (subtyping)
+ * Updates I_mean and returns 1 if a new interval has in fact been added to @lh.
+ */
+int tfrc_lh_interval_add(struct tfrc_loss_hist *lh, struct tfrc_rx_hist *rh,
+			 u32 (*calc_first_li)(struct sock *), struct sock *sk)
+{
+	struct tfrc_loss_interval *cur = tfrc_lh_peek(lh), *new;
+
+	if (cur != NULL && !tfrc_lh_is_new_loss(cur, tfrc_rx_hist_loss_prev(rh)))
+		return 0;
+
+	new = tfrc_lh_demand_next(lh);
+	if (unlikely(new == NULL)) {
+		DCCP_CRIT("Cannot allocate/add loss record.");
+		return 0;
+	}
+
+	new->li_seqno	  = tfrc_rx_hist_loss_prev(rh)->tfrchrx_seqno;
+	new->li_ccval	  = tfrc_rx_hist_loss_prev(rh)->tfrchrx_ccval;
+	new->li_is_closed = 0;
+
+	if (++lh->counter == 1)
+		lh->i_mean = new->li_length = (*calc_first_li)(sk);
+	else {
+		cur->li_length = dccp_delta_seqno(cur->li_seqno, new->li_seqno);
+		new->li_length = dccp_delta_seqno(new->li_seqno,
+				  tfrc_rx_hist_last_rcv(rh)->tfrchrx_seqno);
+		if (lh->counter > (2*LIH_SIZE))
+			lh->counter -= LIH_SIZE;
+
+		tfrc_lh_calc_i_mean(lh);
+	}
+	return 1;
+}
+EXPORT_SYMBOL_GPL(tfrc_lh_interval_add);
+
 int __init dccp_li_init(void)
 {
 	dccp_li_cachep = kmem_cache_create("dccp_li_hist",
diff --git a/net/dccp/ccids/lib/loss_interval.h b/net/dccp/ccids/lib/loss_interval.h
index 27bee92dae1..5e3c5c54a49 100644
--- a/net/dccp/ccids/lib/loss_interval.h
+++ b/net/dccp/ccids/lib/loss_interval.h
@@ -3,6 +3,7 @@
 /*
  *  net/dccp/ccids/lib/loss_interval.h
  *
+ *  Copyright (c) 2007   The University of Aberdeen, Scotland, UK
  *  Copyright (c) 2005-7 The University of Waikato, Hamilton, New Zealand.
  *  Copyright (c) 2005-7 Ian McDonald <ian.mcdonald@jandi.co.nz>
  *  Copyright (c) 2005 Arnaldo Carvalho de Melo <acme@conectiva.com.br>
@@ -12,11 +13,64 @@
  *  Software Foundation; either version 2 of the License, or (at your option)
  *  any later version.
  */
-
 #include <linux/ktime.h>
 #include <linux/list.h>
+#include <linux/slab.h>
+
+/*
+ * Number of loss intervals (RFC 4342, 8.6.1). The history size is one more than
+ * NINTERVAL, since the `open' interval I_0 is always stored as the first entry.
+ */
+#define NINTERVAL	8
+#define LIH_SIZE	(NINTERVAL + 1)
+
+/**
+ *  tfrc_loss_interval  -  Loss history record for TFRC-based protocols
+ *  @li_seqno:		Highest received seqno before the start of loss
+ *  @li_ccval:		The CCVal belonging to @li_seqno
+ *  @li_is_closed:	Whether @li_seqno is older than 1 RTT
+ *  @li_length:		Loss interval sequence length
+ */
+struct tfrc_loss_interval {
+	u64		 li_seqno:48,
+			 li_ccval:4,
+			 li_is_closed:1;
+	u32		 li_length;
+};
+
+/**
+ *  tfrc_loss_hist  -  Loss record database
+ *  @ring:	Circular queue managed in LIFO manner
+ *  @counter:	Current count of entries (can be more than %LIH_SIZE)
+ *  @i_mean:	Current Average Loss Interval [RFC 3448, 5.4]
+ */
+struct tfrc_loss_hist {
+	struct tfrc_loss_interval	*ring[LIH_SIZE];
+	u8				counter;
+	u32				i_mean;
+};
+
+static inline void tfrc_lh_init(struct tfrc_loss_hist *lh)
+{
+	memset(lh, 0, sizeof(struct tfrc_loss_hist));
+}
+
+static inline u8 tfrc_lh_is_initialised(struct tfrc_loss_hist *lh)
+{
+	return lh->counter > 0;
+}
+
+static inline u8 tfrc_lh_length(struct tfrc_loss_hist *lh)
+{
+	return min(lh->counter, (u8)LIH_SIZE);
+}
 
 extern void dccp_li_hist_purge(struct list_head *list);
+struct tfrc_rx_hist;
+extern int  tfrc_lh_interval_add(struct tfrc_loss_hist *, struct tfrc_rx_hist *,
+				 u32 (*first_li)(struct sock *), struct sock *);
+extern u8   tfrc_lh_update_i_mean(struct tfrc_loss_hist *lh, struct sk_buff *);
+extern void tfrc_lh_cleanup(struct tfrc_loss_hist *lh);
 
 extern u32 dccp_li_hist_calc_i_mean(struct list_head *list);
 
diff --git a/net/dccp/ccids/lib/packet_history.c b/net/dccp/ccids/lib/packet_history.c
index dd2cf2d6b8f..5b10a1ecf13 100644
--- a/net/dccp/ccids/lib/packet_history.c
+++ b/net/dccp/ccids/lib/packet_history.c
@@ -151,11 +151,10 @@ void tfrc_rx_packet_history_exit(void)
 	}
 }
 
-void tfrc_rx_hist_add_packet(struct tfrc_rx_hist *h,
-			     const struct sk_buff *skb,
-			     const u32 ndp)
+static inline void tfrc_rx_hist_entry_from_skb(struct tfrc_rx_hist_entry *entry,
+					       const struct sk_buff *skb,
+					       const u32 ndp)
 {
-	struct tfrc_rx_hist_entry *entry = tfrc_rx_hist_last_rcv(h);
 	const struct dccp_hdr *dh = dccp_hdr(skb);
 
 	entry->tfrchrx_seqno = DCCP_SKB_CB(skb)->dccpd_seq;
@@ -164,6 +163,15 @@ void tfrc_rx_hist_add_packet(struct tfrc_rx_hist *h,
 	entry->tfrchrx_ndp   = ndp;
 	entry->tfrchrx_tstamp = ktime_get_real();
 }
+
+void tfrc_rx_hist_add_packet(struct tfrc_rx_hist *h,
+			     const struct sk_buff *skb,
+			     const u32 ndp)
+{
+	struct tfrc_rx_hist_entry *entry = tfrc_rx_hist_last_rcv(h);
+
+	tfrc_rx_hist_entry_from_skb(entry, skb, ndp);
+}
 EXPORT_SYMBOL_GPL(tfrc_rx_hist_add_packet);
 
 /* has the packet contained in skb been seen before? */
@@ -209,6 +217,208 @@ int tfrc_rx_hist_new_loss_indicated(struct tfrc_rx_hist *h,
 }
 EXPORT_SYMBOL_GPL(tfrc_rx_hist_new_loss_indicated);
 
+static void tfrc_rx_hist_swap(struct tfrc_rx_hist *h, const u8 a, const u8 b)
+{
+	const u8 idx_a = tfrc_rx_hist_index(h, a),
+		 idx_b = tfrc_rx_hist_index(h, b);
+	struct tfrc_rx_hist_entry *tmp = h->ring[idx_a];
+
+	h->ring[idx_a] = h->ring[idx_b];
+	h->ring[idx_b] = tmp;
+}
+
+/*
+ * Private helper functions for loss detection.
+ *
+ * In the descriptions, `Si' refers to the sequence number of entry number i,
+ * whose NDP count is `Ni' (lower case is used for variables).
+ * Note: All __after_loss functions expect that a test against duplicates has
+ *       been performed already: the seqno of the skb must not be less than the
+ *       seqno of loss_prev; and it must not equal that of any valid hist_entry.
+ */
+static void __one_after_loss(struct tfrc_rx_hist *h, struct sk_buff *skb, u32 n2)
+{
+	u64 s0 = tfrc_rx_hist_loss_prev(h)->tfrchrx_seqno,
+	    s1 = tfrc_rx_hist_entry(h, 1)->tfrchrx_seqno,
+	    s2 = DCCP_SKB_CB(skb)->dccpd_seq;
+	int n1 = tfrc_rx_hist_entry(h, 1)->tfrchrx_ndp,
+	   d12 = dccp_delta_seqno(s1, s2), d2;
+
+	if (d12 > 0) {			/* S1  <  S2 */
+		h->loss_count = 2;
+		tfrc_rx_hist_entry_from_skb(tfrc_rx_hist_entry(h, 2), skb, n2);
+		return;
+	}
+
+	/* S0  <  S2  <  S1 */
+	d2 = dccp_delta_seqno(s0, s2);
+
+	if (d2 == 1 || n2 >= d2) {	/* S2 is direct successor of S0 */
+		int d21 = -d12;
+
+		if (d21 == 1 || n1 >= d21) {
+			/* hole is filled: S0, S2, and S1 are consecutive */
+			h->loss_count = 0;
+			h->loss_start = tfrc_rx_hist_index(h, 1);
+		} else
+			/* gap between S2 and S1: just update loss_prev */
+			tfrc_rx_hist_entry_from_skb(tfrc_rx_hist_loss_prev(h), skb, n2);
+
+	} else {			/* hole between S0 and S2 */
+		/*
+		 * Reorder history to insert S2 between S0 and s1
+		 */
+		tfrc_rx_hist_swap(h, 0, 3);
+		h->loss_start = tfrc_rx_hist_index(h, 3);
+		tfrc_rx_hist_entry_from_skb(tfrc_rx_hist_entry(h, 1), skb, n2);
+		h->loss_count = 2;
+	}
+}
+
+/* return 1 if a new loss event has been identified */
+static int __two_after_loss(struct tfrc_rx_hist *h, struct sk_buff *skb, u32 n3)
+{
+	u64 s0 = tfrc_rx_hist_loss_prev(h)->tfrchrx_seqno,
+	    s1 = tfrc_rx_hist_entry(h, 1)->tfrchrx_seqno,
+	    s2 = tfrc_rx_hist_entry(h, 2)->tfrchrx_seqno,
+	    s3 = DCCP_SKB_CB(skb)->dccpd_seq;
+	int n1 = tfrc_rx_hist_entry(h, 1)->tfrchrx_ndp,
+	   d23 = dccp_delta_seqno(s2, s3), d13, d3, d31;
+
+	if (d23 > 0) {			/* S2  <  S3 */
+		h->loss_count = 3;
+		tfrc_rx_hist_entry_from_skb(tfrc_rx_hist_entry(h, 3), skb, n3);
+		return 1;
+	}
+
+	/* S3  <  S2 */
+	d13 = dccp_delta_seqno(s1, s3);
+
+	if (d13 > 0) {
+		/*
+		 * The sequence number order is S1, S3, S2
+		 * Reorder history to insert entry between S1 and S2
+		 */
+		tfrc_rx_hist_swap(h, 2, 3);
+		tfrc_rx_hist_entry_from_skb(tfrc_rx_hist_entry(h, 2), skb, n3);
+		h->loss_count = 3;
+		return 1;
+	}
+
+	/* S0  <  S3  <  S1 */
+	d31 = -d13;
+	d3  = dccp_delta_seqno(s0, s3);
+
+	if (d3 == 1 || n3 >= d3) {	/* S3 is a successor of S0 */
+
+		if (d31 == 1 || n1 >= d31) {
+			/* hole between S0 and S1 filled by S3 */
+			int  d2 = dccp_delta_seqno(s1, s2),
+			     n2 = tfrc_rx_hist_entry(h, 2)->tfrchrx_ndp;
+
+			if (d2 == 1 || n2 >= d2) {
+				/* entire hole filled by S0, S3, S1, S2 */
+				h->loss_start = tfrc_rx_hist_index(h, 2);
+				h->loss_count = 0;
+			} else {
+				/* gap remains between S1 and S2 */
+				h->loss_start = tfrc_rx_hist_index(h, 1);
+				h->loss_count = 1;
+			}
+
+		} else /* gap exists between S3 and S1, loss_count stays at 2 */
+			tfrc_rx_hist_entry_from_skb(tfrc_rx_hist_loss_prev(h), skb, n3);
+
+		return 0;
+	}
+
+	/*
+	 * The remaining case: S3 is not a successor of S0.
+	 * Sequence order is S0, S3, S1, S2; reorder to insert between S0 and S1
+	 */
+	tfrc_rx_hist_swap(h, 0, 3);
+	h->loss_start = tfrc_rx_hist_index(h, 3);
+	tfrc_rx_hist_entry_from_skb(tfrc_rx_hist_entry(h, 1), skb, n3);
+	h->loss_count = 3;
+
+	return 1;
+}
+
+/* return the signed modulo-2^48 sequence number distance from entry e1 to e2 */
+static s64 tfrc_rx_hist_delta_seqno(struct tfrc_rx_hist *h, u8 e1, u8 e2)
+{
+	DCCP_BUG_ON(e1 > h->loss_count || e2 > h->loss_count);
+
+	return dccp_delta_seqno(tfrc_rx_hist_entry(h, e1)->tfrchrx_seqno,
+				tfrc_rx_hist_entry(h, e2)->tfrchrx_seqno);
+}
+
+/* recycle RX history records to continue loss detection if necessary */
+static void __three_after_loss(struct tfrc_rx_hist *h)
+{
+	/*
+	 * The distance between S0 and S1 is always greater than 1 and the NDP
+	 * count of S1 is smaller than this distance. Otherwise there would
+	 * have been no loss. Hence it is only necessary to see whether there
+	 * are further missing data packets between S1/S2 and S2/S3.
+	 */
+	int d2 = tfrc_rx_hist_delta_seqno(h, 1, 2),
+	    d3 = tfrc_rx_hist_delta_seqno(h, 2, 3),
+	    n2 = tfrc_rx_hist_entry(h, 2)->tfrchrx_ndp,
+	    n3 = tfrc_rx_hist_entry(h, 3)->tfrchrx_ndp;
+
+	if (d2 == 1 || n2 >= d2) {	/* S2 is successor to S1 */
+
+		if (d3 == 1 || n3 >= d3) {
+			/* S3 is successor of S2: entire hole is filled */
+			h->loss_start = tfrc_rx_hist_index(h, 3);
+			h->loss_count = 0;
+		} else {
+			/* gap between S2 and S3 */
+			h->loss_start = tfrc_rx_hist_index(h, 2);
+			h->loss_count = 1;
+		}
+
+	} else {			/* gap between S1 and S2 */
+		h->loss_start = tfrc_rx_hist_index(h, 1);
+		h->loss_count = 2;
+	}
+}
+
+/**
+ *  tfrc_rx_handle_loss  -  Loss detection and further processing
+ *  @h:		    The non-empty RX history object
+ *  @lh:	    Loss Intervals database to update
+ *  @skb:	    Currently received packet
+ *  @ndp:	    The NDP count belonging to @skb
+ *  @calc_first_li: Caller-dependent computation of first loss interval in @lh
+ *  @sk:	    Used by @calc_first_li (see tfrc_lh_interval_add)
+ *  Chooses action according to pending loss, updates LI database when a new
+ *  loss was detected, and does required post-processing. Returns 1 when caller
+ *  should send feedback, 0 otherwise.
+ */
+int tfrc_rx_handle_loss(struct tfrc_rx_hist *h,
+			struct tfrc_loss_hist *lh,
+			struct sk_buff *skb, u32 ndp,
+			u32 (*calc_first_li)(struct sock *), struct sock *sk)
+{
+	int is_new_loss = 0;
+
+	if (h->loss_count == 1) {
+		__one_after_loss(h, skb, ndp);
+	} else if (h->loss_count != 2) {
+		DCCP_BUG("invalid loss_count %d", h->loss_count);
+	} else if (__two_after_loss(h, skb, ndp)) {
+		/*
+		 * Update Loss Interval database and recycle RX records
+		 */
+		is_new_loss = tfrc_lh_interval_add(lh, h, calc_first_li, sk);
+		__three_after_loss(h);
+	}
+	return is_new_loss;
+}
+EXPORT_SYMBOL_GPL(tfrc_rx_handle_loss);
+
 int tfrc_rx_hist_alloc(struct tfrc_rx_hist *h)
 {
 	int i;
diff --git a/net/dccp/ccids/lib/packet_history.h b/net/dccp/ccids/lib/packet_history.h
index e58b0fcdc6a..24edd8df54b 100644
--- a/net/dccp/ccids/lib/packet_history.h
+++ b/net/dccp/ccids/lib/packet_history.h
@@ -36,10 +36,9 @@
 #ifndef _DCCP_PKT_HIST_
 #define _DCCP_PKT_HIST_
 
-#include <linux/ktime.h>
-#include <linux/types.h>
-
-struct sk_buff;
+#include <linux/list.h>
+#include <linux/slab.h>
+#include "tfrc.h"
 
 struct tfrc_tx_hist_entry;
 
@@ -125,6 +124,10 @@ extern void tfrc_rx_hist_add_packet(struct tfrc_rx_hist *h,
 extern int tfrc_rx_hist_duplicate(struct tfrc_rx_hist *h, struct sk_buff *skb);
 extern int tfrc_rx_hist_new_loss_indicated(struct tfrc_rx_hist *h,
 					   const struct sk_buff *skb, u32 ndp);
+struct tfrc_loss_hist;
+extern int  tfrc_rx_handle_loss(struct tfrc_rx_hist *, struct tfrc_loss_hist *,
+				struct sk_buff *skb, u32 ndp,
+				u32 (*first_li)(struct sock *), struct sock *);
 extern u32 tfrc_rx_hist_sample_rtt(struct tfrc_rx_hist *h,
 				   const struct sk_buff *skb);
 extern int tfrc_rx_hist_alloc(struct tfrc_rx_hist *h);
diff --git a/net/dccp/ccids/lib/tfrc.h b/net/dccp/ccids/lib/tfrc.h
index ab8848c0f8c..1fb1187bbf1 100644
--- a/net/dccp/ccids/lib/tfrc.h
+++ b/net/dccp/ccids/lib/tfrc.h
@@ -17,6 +17,9 @@
 #include <linux/types.h>
 #include <asm/div64.h>
 #include "../../dccp.h"
+/* internal includes that this module exports: */
+#include "loss_interval.h"
+#include "packet_history.h"
 
 #ifdef CONFIG_IP_DCCP_TFRC_DEBUG
 extern int tfrc_debug;
-- 
cgit v1.2.3


From db64196038e79b0460245d558e54ff4a21a52d1f Mon Sep 17 00:00:00 2001
From: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Date: Wed, 12 Dec 2007 13:57:14 -0200
Subject: [CCID3]: Redundant debugging output / documentation

Each time feedback is sent two lines are printed:

	ccid3_hc_rx_send_feedback: client ... - entry
	ccid3_hc_rx_send_feedback: Interval ...usec, X_recv=..., 1/p=...

The first line is redundant and thus removed.

Further, documentation of ccid3_hc_rx_sock (capitalisation) is made consistent.

Signed-off-by: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/dccp/ccids/ccid3.c | 2 --
 net/dccp/ccids/ccid3.h | 4 ++--
 2 files changed, 2 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/dccp/ccids/ccid3.c b/net/dccp/ccids/ccid3.c
index f8644bfd88f..8266dfde89c 100644
--- a/net/dccp/ccids/ccid3.c
+++ b/net/dccp/ccids/ccid3.c
@@ -685,8 +685,6 @@ static void ccid3_hc_rx_send_feedback(struct sock *sk,
 	ktime_t now;
 	s64 delta = 0;
 
-	ccid3_pr_debug("%s(%p) - entry \n", dccp_role(sk), sk);
-
 	if (unlikely(hcrx->ccid3hcrx_state == TFRC_RSTATE_TERM))
 		return;
 
diff --git a/net/dccp/ccids/ccid3.h b/net/dccp/ccids/ccid3.h
index 3c33dc670cf..6ceeb8013f5 100644
--- a/net/dccp/ccids/ccid3.h
+++ b/net/dccp/ccids/ccid3.h
@@ -135,9 +135,9 @@ enum ccid3_hc_rx_states {
  *
  *  @ccid3hcrx_x_recv  -  Receiver estimate of send rate (RFC 3448 4.3)
  *  @ccid3hcrx_rtt  -  Receiver estimate of rtt (non-standard)
- *  @ccid3hcrx_p  -  current loss event rate (RFC 3448 5.4)
+ *  @ccid3hcrx_p  -  Current loss event rate (RFC 3448 5.4)
  *  @ccid3hcrx_last_counter  -  Tracks window counter (RFC 4342, 8.1)
- *  @ccid3hcrx_state  -  receiver state, one of %ccid3_hc_rx_states
+ *  @ccid3hcrx_state  -  Receiver state, one of %ccid3_hc_rx_states
  *  @ccid3hcrx_bytes_recv  -  Total sum of DCCP payload bytes
  *  @ccid3hcrx_tstamp_last_feedback  -  Time at which last feedback was sent
  *  @ccid3hcrx_tstamp_last_ack  -  Time at which last feedback was sent
-- 
cgit v1.2.3


From de0d411cb8ea51175f52d935faead5c542b6e007 Mon Sep 17 00:00:00 2001
From: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Date: Wed, 12 Dec 2007 14:03:01 -0200
Subject: [TFRC]: CCID3 (and CCID4) needs to access these inlines

This moves two inlines back to packet_history.h: these are not private
to packet_history.c, but are needed by CCID3/4 to detect whether a new
loss is indicated, or whether a loss is already pending.

Signed-off-by: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/dccp/ccids/lib/packet_history.c | 26 --------------------------
 net/dccp/ccids/lib/packet_history.h | 35 +++++++++++++++++++++++++++++++----
 2 files changed, 31 insertions(+), 30 deletions(-)

(limited to 'net')

diff --git a/net/dccp/ccids/lib/packet_history.c b/net/dccp/ccids/lib/packet_history.c
index 5b10a1ecf13..20af1a69342 100644
--- a/net/dccp/ccids/lib/packet_history.c
+++ b/net/dccp/ccids/lib/packet_history.c
@@ -191,32 +191,6 @@ int tfrc_rx_hist_duplicate(struct tfrc_rx_hist *h, struct sk_buff *skb)
 }
 EXPORT_SYMBOL_GPL(tfrc_rx_hist_duplicate);
 
-/* initialise loss detection and disable RTT sampling */
-static inline void tfrc_rx_hist_loss_indicated(struct tfrc_rx_hist *h)
-{
-	h->loss_count = 1;
-}
-
-/* indicate whether previously a packet was detected missing */
-static inline int tfrc_rx_hist_loss_pending(const struct tfrc_rx_hist *h)
-{
-	return h->loss_count;
-}
-
-/* any data packets missing between last reception and skb ? */
-int tfrc_rx_hist_new_loss_indicated(struct tfrc_rx_hist *h,
-				    const struct sk_buff *skb, u32 ndp)
-{
-	int delta = dccp_delta_seqno(tfrc_rx_hist_last_rcv(h)->tfrchrx_seqno,
-				     DCCP_SKB_CB(skb)->dccpd_seq);
-
-	if (delta > 1 && ndp < delta)
-		tfrc_rx_hist_loss_indicated(h);
-
-	return tfrc_rx_hist_loss_pending(h);
-}
-EXPORT_SYMBOL_GPL(tfrc_rx_hist_new_loss_indicated);
-
 static void tfrc_rx_hist_swap(struct tfrc_rx_hist *h, const u8 a, const u8 b)
 {
 	const u8 idx_a = tfrc_rx_hist_index(h, a),
diff --git a/net/dccp/ccids/lib/packet_history.h b/net/dccp/ccids/lib/packet_history.h
index 24edd8df54b..c7eeda49cb2 100644
--- a/net/dccp/ccids/lib/packet_history.h
+++ b/net/dccp/ccids/lib/packet_history.h
@@ -118,16 +118,43 @@ static inline struct tfrc_rx_hist_entry *
 	return h->ring[h->loss_start];
 }
 
+/* initialise loss detection and disable RTT sampling */
+static inline void tfrc_rx_hist_loss_indicated(struct tfrc_rx_hist *h)
+{
+	h->loss_count = 1;
+}
+
+/* indicate whether previously a packet was detected missing */
+static inline int tfrc_rx_hist_loss_pending(const struct tfrc_rx_hist *h)
+{
+	return h->loss_count;
+}
+
+/* any data packets missing between last reception and skb ? */
+static inline int tfrc_rx_hist_new_loss_indicated(struct tfrc_rx_hist *h,
+						  const struct sk_buff *skb,
+						  u32 ndp)
+{
+	int delta = dccp_delta_seqno(tfrc_rx_hist_last_rcv(h)->tfrchrx_seqno,
+				     DCCP_SKB_CB(skb)->dccpd_seq);
+
+	if (delta > 1 && ndp < delta)
+		tfrc_rx_hist_loss_indicated(h);
+
+	return tfrc_rx_hist_loss_pending(h);
+}
+
 extern void tfrc_rx_hist_add_packet(struct tfrc_rx_hist *h,
 				    const struct sk_buff *skb, const u32 ndp);
 
 extern int tfrc_rx_hist_duplicate(struct tfrc_rx_hist *h, struct sk_buff *skb);
-extern int tfrc_rx_hist_new_loss_indicated(struct tfrc_rx_hist *h,
-					   const struct sk_buff *skb, u32 ndp);
+
 struct tfrc_loss_hist;
-extern int  tfrc_rx_handle_loss(struct tfrc_rx_hist *, struct tfrc_loss_hist *,
+extern int  tfrc_rx_handle_loss(struct tfrc_rx_hist *h,
+				struct tfrc_loss_hist *lh,
 				struct sk_buff *skb, u32 ndp,
-				u32 (*first_li)(struct sock *), struct sock *);
+				u32 (*first_li)(struct sock *sk),
+				struct sock *sk);
 extern u32 tfrc_rx_hist_sample_rtt(struct tfrc_rx_hist *h,
 				   const struct sk_buff *skb);
 extern int tfrc_rx_hist_alloc(struct tfrc_rx_hist *h);
-- 
cgit v1.2.3


From 954c2db868ce896325dced91d5fba5e2226897a4 Mon Sep 17 00:00:00 2001
From: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Date: Wed, 12 Dec 2007 14:06:14 -0200
Subject: [CCID3]: Interface CCID3 code with newer Loss Intervals Database

This hooks up the TFRC Loss Interval database with CCID 3 packet reception.
In addition, it makes the CCID-specific computation of the first loss
interval (which requires access to all the guts of CCID3) local to ccid3.c.

The patch also fixes an omission in the DCCP code, that of a default /
fallback RTT value (defined in section 3.4 of RFC 4340 as 0.2 sec); while
at it, the  upper bound of 4 seconds for an RTT sample has  been reduced to
match the initial TCP RTO value of 3 seconds from[RFC 1122, 4.2.3.1].

Signed-off-by: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Signed-off-by: Ian McDonald <ian.mcdonald@jandi.co.nz>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/dccp/ccids/ccid3.c             | 72 +++++++++++++++++++++++++++++++-------
 net/dccp/ccids/ccid3.h             | 10 +++---
 net/dccp/ccids/lib/loss_interval.c | 18 +++++-----
 net/dccp/ccids/lib/tfrc.c          | 10 +++---
 net/dccp/dccp.h                    |  7 ++--
 5 files changed, 84 insertions(+), 33 deletions(-)

(limited to 'net')

diff --git a/net/dccp/ccids/ccid3.c b/net/dccp/ccids/ccid3.c
index 8266dfde89c..8f112d18d45 100644
--- a/net/dccp/ccids/ccid3.c
+++ b/net/dccp/ccids/ccid3.c
@@ -1,6 +1,7 @@
 /*
  *  net/dccp/ccids/ccid3.c
  *
+ *  Copyright (c) 2007   The University of Aberdeen, Scotland, UK
  *  Copyright (c) 2005-7 The University of Waikato, Hamilton, New Zealand.
  *  Copyright (c) 2005-7 Ian McDonald <ian.mcdonald@jandi.co.nz>
  *
@@ -33,11 +34,7 @@
  *  along with this program; if not, write to the Free Software
  *  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  */
-#include "../ccid.h"
 #include "../dccp.h"
-#include "lib/packet_history.h"
-#include "lib/loss_interval.h"
-#include "lib/tfrc.h"
 #include "ccid3.h"
 
 #include <asm/unaligned.h>
@@ -757,6 +754,46 @@ static int ccid3_hc_rx_insert_options(struct sock *sk, struct sk_buff *skb)
 	return 0;
 }
 
+/** ccid3_first_li  -  Implements [RFC 3448, 6.3.1]
+ *
+ * Determine the length of the first loss interval via inverse lookup.
+ * Assume that X_recv can be computed by the throughput equation
+ *		    s
+ *	X_recv = --------
+ *		 R * fval
+ * Find some p such that f(p) = fval; return 1/p (scaled).
+ */
+static u32 ccid3_first_li(struct sock *sk)
+{
+	struct ccid3_hc_rx_sock *hcrx = ccid3_hc_rx_sk(sk);
+	u32 x_recv, p, delta;
+	u64 fval;
+
+	if (hcrx->ccid3hcrx_rtt == 0) {
+		DCCP_WARN("No RTT estimate available, using fallback RTT\n");
+		hcrx->ccid3hcrx_rtt = DCCP_FALLBACK_RTT;
+	}
+
+	delta = ktime_to_us(net_timedelta(hcrx->ccid3hcrx_tstamp_last_feedback));
+	x_recv = scaled_div32(hcrx->ccid3hcrx_bytes_recv, delta);
+	if (x_recv == 0) {		/* would also trigger divide-by-zero */
+		DCCP_WARN("X_recv==0\n");
+		if ((x_recv = hcrx->ccid3hcrx_x_recv) == 0) {
+			DCCP_BUG("stored value of X_recv is zero");
+			return ~0U;
+		}
+	}
+
+	fval = scaled_div(hcrx->ccid3hcrx_s, hcrx->ccid3hcrx_rtt);
+	fval = scaled_div32(fval, x_recv);
+	p = tfrc_calc_x_reverse_lookup(fval);
+
+	ccid3_pr_debug("%s(%p), receive rate=%u bytes/s, implied "
+		       "loss rate=%u\n", dccp_role(sk), sk, x_recv, p);
+
+	return p == 0 ? ~0U : scaled_div(1, p);
+}
+
 static void ccid3_hc_rx_packet_recv(struct sock *sk, struct sk_buff *skb)
 {
 	struct ccid3_hc_rx_sock *hcrx = ccid3_hc_rx_sk(sk);
@@ -794,6 +831,14 @@ static void ccid3_hc_rx_packet_recv(struct sock *sk, struct sk_buff *skb)
 	/*
 	 * Handle pending losses and otherwise check for new loss
 	 */
+	if (tfrc_rx_hist_loss_pending(&hcrx->ccid3hcrx_hist) &&
+	    tfrc_rx_handle_loss(&hcrx->ccid3hcrx_hist,
+				&hcrx->ccid3hcrx_li_hist,
+				skb, ndp, ccid3_first_li, sk) ) {
+		do_feedback = CCID3_FBACK_PARAM_CHANGE;
+		goto done_receiving;
+	}
+
 	if (tfrc_rx_hist_new_loss_indicated(&hcrx->ccid3hcrx_hist, skb, ndp))
 		goto update_records;
 
@@ -803,7 +848,7 @@ static void ccid3_hc_rx_packet_recv(struct sock *sk, struct sk_buff *skb)
 	if (unlikely(!is_data_packet))
 		goto update_records;
 
-	if (list_empty(&hcrx->ccid3hcrx_li_hist)) {  /* no loss so far: p = 0 */
+	if (!tfrc_lh_is_initialised(&hcrx->ccid3hcrx_li_hist)) {
 		const u32 sample = tfrc_rx_hist_sample_rtt(&hcrx->ccid3hcrx_hist, skb);
 		/*
 		 * Empty loss history: no loss so far, hence p stays 0.
@@ -812,6 +857,13 @@ static void ccid3_hc_rx_packet_recv(struct sock *sk, struct sk_buff *skb)
 		 */
 		if (sample != 0)
 			hcrx->ccid3hcrx_rtt = tfrc_ewma(hcrx->ccid3hcrx_rtt, sample, 9);
+
+	} else if (tfrc_lh_update_i_mean(&hcrx->ccid3hcrx_li_hist, skb)) {
+		/*
+		 * Step (3) of [RFC 3448, 6.1]: Recompute I_mean and, if I_mean
+		 * has decreased (resp. p has increased), send feedback now.
+		 */
+		do_feedback = CCID3_FBACK_PARAM_CHANGE;
 	}
 
 	/*
@@ -823,6 +875,7 @@ static void ccid3_hc_rx_packet_recv(struct sock *sk, struct sk_buff *skb)
 update_records:
 	tfrc_rx_hist_add_packet(&hcrx->ccid3hcrx_hist, skb, ndp);
 
+done_receiving:
 	if (do_feedback)
 		ccid3_hc_rx_send_feedback(sk, skb, do_feedback);
 }
@@ -831,10 +884,8 @@ static int ccid3_hc_rx_init(struct ccid *ccid, struct sock *sk)
 {
 	struct ccid3_hc_rx_sock *hcrx = ccid_priv(ccid);
 
-	ccid3_pr_debug("entry\n");
-
 	hcrx->ccid3hcrx_state = TFRC_RSTATE_NO_DATA;
-	INIT_LIST_HEAD(&hcrx->ccid3hcrx_li_hist);
+	tfrc_lh_init(&hcrx->ccid3hcrx_li_hist);
 	return tfrc_rx_hist_alloc(&hcrx->ccid3hcrx_hist);
 }
 
@@ -844,11 +895,8 @@ static void ccid3_hc_rx_exit(struct sock *sk)
 
 	ccid3_hc_rx_set_state(sk, TFRC_RSTATE_TERM);
 
-	/* Empty packet history */
 	tfrc_rx_hist_purge(&hcrx->ccid3hcrx_hist);
-
-	/* Empty loss interval history */
-	dccp_li_hist_purge(&hcrx->ccid3hcrx_li_hist);
+	tfrc_lh_cleanup(&hcrx->ccid3hcrx_li_hist);
 }
 
 static void ccid3_hc_rx_get_info(struct sock *sk, struct tcp_info *info)
diff --git a/net/dccp/ccids/ccid3.h b/net/dccp/ccids/ccid3.h
index 6ceeb8013f5..e9f6ff4f055 100644
--- a/net/dccp/ccids/ccid3.h
+++ b/net/dccp/ccids/ccid3.h
@@ -41,7 +41,7 @@
 #include <linux/list.h>
 #include <linux/types.h>
 #include <linux/tfrc.h>
-#include "lib/packet_history.h"
+#include "lib/tfrc.h"
 #include "../ccid.h"
 
 /* Two seconds as per RFC 3448 4.2 */
@@ -141,8 +141,8 @@ enum ccid3_hc_rx_states {
  *  @ccid3hcrx_bytes_recv  -  Total sum of DCCP payload bytes
  *  @ccid3hcrx_tstamp_last_feedback  -  Time at which last feedback was sent
  *  @ccid3hcrx_tstamp_last_ack  -  Time at which last feedback was sent
- *  @ccid3hcrx_hist  -  Packet history
- *  @ccid3hcrx_li_hist  -  Loss Interval History
+ *  @ccid3hcrx_hist  -  Packet history (loss detection + RTT sampling)
+ *  @ccid3hcrx_li_hist  -  Loss Interval database
  *  @ccid3hcrx_s  -  Received packet size in bytes
  *  @ccid3hcrx_pinv  -  Inverse of Loss Event Rate (RFC 4342, sec. 8.5)
  */
@@ -156,9 +156,9 @@ struct ccid3_hc_rx_sock {
 	u32				ccid3hcrx_bytes_recv;
 	ktime_t				ccid3hcrx_tstamp_last_feedback;
 	struct tfrc_rx_hist		ccid3hcrx_hist;
-	struct list_head		ccid3hcrx_li_hist;
+	struct tfrc_loss_hist		ccid3hcrx_li_hist;
 	u16				ccid3hcrx_s;
-	u32				ccid3hcrx_pinv;
+#define ccid3hcrx_pinv			ccid3hcrx_li_hist.i_mean
 };
 
 static inline struct ccid3_hc_rx_sock *ccid3_hc_rx_sk(const struct sock *sk)
diff --git a/net/dccp/ccids/lib/loss_interval.c b/net/dccp/ccids/lib/loss_interval.c
index 39980d1f535..8b962c1f14b 100644
--- a/net/dccp/ccids/lib/loss_interval.c
+++ b/net/dccp/ccids/lib/loss_interval.c
@@ -435,18 +435,18 @@ int tfrc_lh_interval_add(struct tfrc_loss_hist *lh, struct tfrc_rx_hist *rh,
 }
 EXPORT_SYMBOL_GPL(tfrc_lh_interval_add);
 
-int __init dccp_li_init(void)
+int __init tfrc_li_init(void)
 {
-	dccp_li_cachep = kmem_cache_create("dccp_li_hist",
-					   sizeof(struct dccp_li_hist_entry),
-					   0, SLAB_HWCACHE_ALIGN, NULL);
-	return dccp_li_cachep == NULL ? -ENOBUFS : 0;
+	tfrc_lh_slab = kmem_cache_create("tfrc_li_hist",
+					 sizeof(struct tfrc_loss_interval), 0,
+					 SLAB_HWCACHE_ALIGN, NULL);
+	return tfrc_lh_slab == NULL ? -ENOBUFS : 0;
 }
 
-void dccp_li_exit(void)
+void tfrc_li_exit(void)
 {
-	if (dccp_li_cachep != NULL) {
-		kmem_cache_destroy(dccp_li_cachep);
-		dccp_li_cachep = NULL;
+	if (tfrc_lh_slab != NULL) {
+		kmem_cache_destroy(tfrc_lh_slab);
+		tfrc_lh_slab = NULL;
 	}
 }
diff --git a/net/dccp/ccids/lib/tfrc.c b/net/dccp/ccids/lib/tfrc.c
index 20763fa75d4..d1dfbb8de64 100644
--- a/net/dccp/ccids/lib/tfrc.c
+++ b/net/dccp/ccids/lib/tfrc.c
@@ -19,12 +19,12 @@ extern void tfrc_tx_packet_history_exit(void);
 extern int  tfrc_rx_packet_history_init(void);
 extern void tfrc_rx_packet_history_exit(void);
 
-extern int  dccp_li_init(void);
-extern void dccp_li_exit(void);
+extern int  tfrc_li_init(void);
+extern void tfrc_li_exit(void);
 
 static int __init tfrc_module_init(void)
 {
-	int rc = dccp_li_init();
+	int rc = tfrc_li_init();
 
 	if (rc)
 		goto out;
@@ -41,7 +41,7 @@ static int __init tfrc_module_init(void)
 out_free_tx_history:
 	tfrc_tx_packet_history_exit();
 out_free_loss_intervals:
-	dccp_li_exit();
+	tfrc_li_exit();
 out:
 	return rc;
 }
@@ -50,7 +50,7 @@ static void __exit tfrc_module_exit(void)
 {
 	tfrc_rx_packet_history_exit();
 	tfrc_tx_packet_history_exit();
-	dccp_li_exit();
+	tfrc_li_exit();
 }
 
 module_init(tfrc_module_init);
diff --git a/net/dccp/dccp.h b/net/dccp/dccp.h
index f4a5ea116e3..07dcbe73b50 100644
--- a/net/dccp/dccp.h
+++ b/net/dccp/dccp.h
@@ -74,9 +74,12 @@ extern void dccp_time_wait(struct sock *sk, int state, int timeo);
 
 #define DCCP_RTO_MAX ((unsigned)(120 * HZ)) /* FIXME: using TCP value */
 
-/* bounds for sampled RTT values from packet exchanges (in usec) */
+/*
+ * RTT sampling: sanity bounds and fallback RTT value from RFC 4340, section 3.4
+ */
 #define DCCP_SANE_RTT_MIN	100
-#define DCCP_SANE_RTT_MAX	(4 * USEC_PER_SEC)
+#define DCCP_FALLBACK_RTT	(USEC_PER_SEC / 5)
+#define DCCP_SANE_RTT_MAX	(3 * USEC_PER_SEC)
 
 /* Maximal interval between probes for local resources.  */
 #define DCCP_RESOURCE_PROBE_INTERVAL ((unsigned)(HZ / 2U))
-- 
cgit v1.2.3


From 3f71c81ac37b27b824e9ce18fe17438dc2af4a16 Mon Sep 17 00:00:00 2001
From: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Date: Wed, 12 Dec 2007 14:23:08 -0200
Subject: [TFRC]: Remove previous loss intervals implementation

Signed-off-by: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Signed-off-by: Ian McDonald <ian.mcdonald@jandi.co.nz>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/dccp/ccids/lib/loss_interval.c | 266 -------------------------------------
 net/dccp/ccids/lib/loss_interval.h |  10 +-
 2 files changed, 1 insertion(+), 275 deletions(-)

(limited to 'net')

diff --git a/net/dccp/ccids/lib/loss_interval.c b/net/dccp/ccids/lib/loss_interval.c
index 8b962c1f14b..849e181e698 100644
--- a/net/dccp/ccids/lib/loss_interval.c
+++ b/net/dccp/ccids/lib/loss_interval.c
@@ -14,15 +14,6 @@
 #include <net/sock.h>
 #include "tfrc.h"
 
-#define DCCP_LI_HIST_IVAL_F_LENGTH  8
-
-struct dccp_li_hist_entry {
-	struct list_head dccplih_node;
-	u64		 dccplih_seqno:48,
-			 dccplih_win_count:4;
-	u32		 dccplih_interval;
-};
-
 static struct kmem_cache  *tfrc_lh_slab  __read_mostly;
 /* Loss Interval weights from [RFC 3448, 5.4], scaled by 10 */
 static const int tfrc_lh_weights[NINTERVAL] = { 10, 10, 10, 10, 8, 6, 4, 2 };
@@ -71,77 +62,6 @@ void tfrc_lh_cleanup(struct tfrc_loss_hist *lh)
 }
 EXPORT_SYMBOL_GPL(tfrc_lh_cleanup);
 
-static struct kmem_cache *dccp_li_cachep __read_mostly;
-
-static inline struct dccp_li_hist_entry *dccp_li_hist_entry_new(const gfp_t prio)
-{
-	return kmem_cache_alloc(dccp_li_cachep, prio);
-}
-
-static inline void dccp_li_hist_entry_delete(struct dccp_li_hist_entry *entry)
-{
-	if (entry != NULL)
-		kmem_cache_free(dccp_li_cachep, entry);
-}
-
-void dccp_li_hist_purge(struct list_head *list)
-{
-	struct dccp_li_hist_entry *entry, *next;
-
-	list_for_each_entry_safe(entry, next, list, dccplih_node) {
-		list_del_init(&entry->dccplih_node);
-		kmem_cache_free(dccp_li_cachep, entry);
-	}
-}
-
-EXPORT_SYMBOL_GPL(dccp_li_hist_purge);
-
-/* Weights used to calculate loss event rate */
-/*
- * These are integers as per section 8 of RFC3448. We can then divide by 4 *
- * when we use it.
- */
-static const int dccp_li_hist_w[DCCP_LI_HIST_IVAL_F_LENGTH] = {
-	4, 4, 4, 4, 3, 2, 1, 1,
-};
-
-u32 dccp_li_hist_calc_i_mean(struct list_head *list)
-{
-	struct dccp_li_hist_entry *li_entry, *li_next;
-	int i = 0;
-	u32 i_tot;
-	u32 i_tot0 = 0;
-	u32 i_tot1 = 0;
-	u32 w_tot  = 0;
-
-	list_for_each_entry_safe(li_entry, li_next, list, dccplih_node) {
-		if (li_entry->dccplih_interval != ~0U) {
-			i_tot0 += li_entry->dccplih_interval * dccp_li_hist_w[i];
-			w_tot  += dccp_li_hist_w[i];
-			if (i != 0)
-				i_tot1 += li_entry->dccplih_interval * dccp_li_hist_w[i - 1];
-		}
-
-
-		if (++i > DCCP_LI_HIST_IVAL_F_LENGTH)
-			break;
-	}
-
-	if (i != DCCP_LI_HIST_IVAL_F_LENGTH)
-		return 0;
-
-	i_tot = max(i_tot0, i_tot1);
-
-	if (!w_tot) {
-		DCCP_WARN("w_tot = 0\n");
-		return 1;
-	}
-
-	return i_tot / w_tot;
-}
-
-EXPORT_SYMBOL_GPL(dccp_li_hist_calc_i_mean);
-
 static void tfrc_lh_calc_i_mean(struct tfrc_loss_hist *lh)
 {
 	u32 i_i, i_tot0 = 0, i_tot1 = 0, w_tot = 0;
@@ -201,192 +121,6 @@ u8 tfrc_lh_update_i_mean(struct tfrc_loss_hist *lh, struct sk_buff *skb)
 }
 EXPORT_SYMBOL_GPL(tfrc_lh_update_i_mean);
 
-static int dccp_li_hist_interval_new(struct list_head *list,
-				     const u64 seq_loss, const u8 win_loss)
-{
-	struct dccp_li_hist_entry *entry;
-	int i;
-
-	for (i = 0; i < DCCP_LI_HIST_IVAL_F_LENGTH; i++) {
-		entry = dccp_li_hist_entry_new(GFP_ATOMIC);
-		if (entry == NULL) {
-			dccp_li_hist_purge(list);
-			DCCP_BUG("loss interval list entry is NULL");
-			return 0;
-		}
-		entry->dccplih_interval = ~0;
-		list_add(&entry->dccplih_node, list);
-	}
-
-	entry->dccplih_seqno     = seq_loss;
-	entry->dccplih_win_count = win_loss;
-	return 1;
-}
-
-/* calculate first loss interval
- *
- * returns estimated loss interval in usecs */
-static u32 dccp_li_calc_first_li(struct sock *sk,
-				 struct list_head *hist_list,
-				 ktime_t last_feedback,
-				 u16 s, u32 bytes_recv,
-				 u32 previous_x_recv)
-{
-/*
- * FIXME:
- * Will be rewritten in the upcoming new loss intervals code.
- * Has to be commented ou because it relies on the old rx history
- * data structures
- */
-#if 0
-	struct tfrc_rx_hist_entry *entry, *next, *tail = NULL;
-	u32 x_recv, p;
-	suseconds_t rtt, delta;
-	ktime_t tstamp = ktime_set(0, 0);
-	int interval = 0;
-	int win_count = 0;
-	int step = 0;
-	u64 fval;
-
-	list_for_each_entry_safe(entry, next, hist_list, tfrchrx_node) {
-		if (tfrc_rx_hist_entry_data_packet(entry)) {
-			tail = entry;
-
-			switch (step) {
-			case 0:
-				tstamp	  = entry->tfrchrx_tstamp;
-				win_count = entry->tfrchrx_ccval;
-				step = 1;
-				break;
-			case 1:
-				interval = win_count - entry->tfrchrx_ccval;
-				if (interval < 0)
-					interval += TFRC_WIN_COUNT_LIMIT;
-				if (interval > 4)
-					goto found;
-				break;
-			}
-		}
-	}
-
-	if (unlikely(step == 0)) {
-		DCCP_WARN("%s(%p), packet history has no data packets!\n",
-			  dccp_role(sk), sk);
-		return ~0;
-	}
-
-	if (unlikely(interval == 0)) {
-		DCCP_WARN("%s(%p), Could not find a win_count interval > 0. "
-			  "Defaulting to 1\n", dccp_role(sk), sk);
-		interval = 1;
-	}
-found:
-	if (!tail) {
-		DCCP_CRIT("tail is null\n");
-		return ~0;
-	}
-
-	delta = ktime_us_delta(tstamp, tail->tfrchrx_tstamp);
-	DCCP_BUG_ON(delta < 0);
-
-	rtt = delta * 4 / interval;
-	dccp_pr_debug("%s(%p), approximated RTT to %dus\n",
-		      dccp_role(sk), sk, (int)rtt);
-
-	/*
-	 * Determine the length of the first loss interval via inverse lookup.
-	 * Assume that X_recv can be computed by the throughput equation
-	 *		    s
-	 *	X_recv = --------
-	 *		 R * fval
-	 * Find some p such that f(p) = fval; return 1/p [RFC 3448, 6.3.1].
-	 */
-	if (rtt == 0) {			/* would result in divide-by-zero */
-		DCCP_WARN("RTT==0\n");
-		return ~0;
-	}
-
-	delta = ktime_us_delta(ktime_get_real(), last_feedback);
-	DCCP_BUG_ON(delta <= 0);
-
-	x_recv = scaled_div32(bytes_recv, delta);
-	if (x_recv == 0) {		/* would also trigger divide-by-zero */
-		DCCP_WARN("X_recv==0\n");
-		if (previous_x_recv == 0) {
-			DCCP_BUG("stored value of X_recv is zero");
-			return ~0;
-		}
-		x_recv = previous_x_recv;
-	}
-
-	fval = scaled_div(s, rtt);
-	fval = scaled_div32(fval, x_recv);
-	p = tfrc_calc_x_reverse_lookup(fval);
-
-	dccp_pr_debug("%s(%p), receive rate=%u bytes/s, implied "
-		      "loss rate=%u\n", dccp_role(sk), sk, x_recv, p);
-
-	if (p != 0)
-		return 1000000 / p;
-#endif
-	return ~0;
-}
-
-void dccp_li_update_li(struct sock *sk,
-		       struct list_head *li_hist_list,
-		       struct list_head *hist_list,
-		       ktime_t last_feedback, u16 s, u32 bytes_recv,
-		       u32 previous_x_recv, u64 seq_loss, u8 win_loss)
-{
-	struct dccp_li_hist_entry *head;
-	u64 seq_temp;
-
-	if (list_empty(li_hist_list)) {
-		if (!dccp_li_hist_interval_new(li_hist_list, seq_loss,
-					       win_loss))
-			return;
-
-		head = list_entry(li_hist_list->next, struct dccp_li_hist_entry,
-				  dccplih_node);
-		head->dccplih_interval = dccp_li_calc_first_li(sk, hist_list,
-							       last_feedback,
-							       s, bytes_recv,
-							       previous_x_recv);
-	} else {
-		struct dccp_li_hist_entry *entry;
-		struct list_head *tail;
-
-		head = list_entry(li_hist_list->next, struct dccp_li_hist_entry,
-				  dccplih_node);
-		/* FIXME win count check removed as was wrong */
-		/* should make this check with receive history */
-		/* and compare there as per section 10.2 of RFC4342 */
-
-		/* new loss event detected */
-		/* calculate last interval length */
-		seq_temp = dccp_delta_seqno(head->dccplih_seqno, seq_loss);
-		entry = dccp_li_hist_entry_new(GFP_ATOMIC);
-
-		if (entry == NULL) {
-			DCCP_BUG("out of memory - can not allocate entry");
-			return;
-		}
-
-		list_add(&entry->dccplih_node, li_hist_list);
-
-		tail = li_hist_list->prev;
-		list_del(tail);
-		kmem_cache_free(dccp_li_cachep, tail);
-
-		/* Create the newest interval */
-		entry->dccplih_seqno = seq_loss;
-		entry->dccplih_interval = seq_temp;
-		entry->dccplih_win_count = win_loss;
-	}
-}
-
-EXPORT_SYMBOL_GPL(dccp_li_update_li);
-
 /* Determine if `new_loss' does begin a new loss interval [RFC 4342, 10.2] */
 static inline u8 tfrc_lh_is_new_loss(struct tfrc_loss_interval *cur,
 				     struct tfrc_rx_hist_entry *new_loss)
diff --git a/net/dccp/ccids/lib/loss_interval.h b/net/dccp/ccids/lib/loss_interval.h
index 5e3c5c54a49..246018a3b26 100644
--- a/net/dccp/ccids/lib/loss_interval.h
+++ b/net/dccp/ccids/lib/loss_interval.h
@@ -65,19 +65,11 @@ static inline u8 tfrc_lh_length(struct tfrc_loss_hist *lh)
 	return min(lh->counter, (u8)LIH_SIZE);
 }
 
-extern void dccp_li_hist_purge(struct list_head *list);
 struct tfrc_rx_hist;
+
 extern int  tfrc_lh_interval_add(struct tfrc_loss_hist *, struct tfrc_rx_hist *,
 				 u32 (*first_li)(struct sock *), struct sock *);
 extern u8   tfrc_lh_update_i_mean(struct tfrc_loss_hist *lh, struct sk_buff *);
 extern void tfrc_lh_cleanup(struct tfrc_loss_hist *lh);
 
-extern u32 dccp_li_hist_calc_i_mean(struct list_head *list);
-
-extern void dccp_li_update_li(struct sock *sk,
-			      struct list_head *li_hist_list,
-			      struct list_head *hist_list,
-			      ktime_t last_feedback, u16 s,
-			      u32 bytes_recv, u32 previous_x_recv,
-			      u64 seq_loss, u8 win_loss);
 #endif /* _DCCP_LI_HIST_ */
-- 
cgit v1.2.3


From 815f4e57e9fc67456624ecde0515a901368c78d2 Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Wed, 12 Dec 2007 10:36:59 -0800
Subject: [IPSEC]: Make xfrm_lookup flags argument a bit-field

This patch introduces an enum for bits in the flags argument of xfrm_lookup.
This is so that we can cram more information into it later.

Since all current users use just the values 0 and 1, XFRM_LOOKUP_WAIT has
been added with the value 1 << 0 to represent the current meaning of flags.

The test in __xfrm_lookup has been changed accordingly.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/xfrm/xfrm_policy.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index 95dc581861e..3d516d57b5b 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -1565,7 +1565,7 @@ restart:
 				xfrm_pol_put(policy);
 				return -EREMOTE;
 			}
-			if (err == -EAGAIN && flags) {
+			if (err == -EAGAIN && (flags & XFRM_LOOKUP_WAIT)) {
 				DECLARE_WAITQUEUE(wait, current);
 
 				add_wait_queue(&km_waitq, &wait);
-- 
cgit v1.2.3


From d5422efe680fc55010c6ddca2370ca9548a96355 Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Wed, 12 Dec 2007 10:44:16 -0800
Subject: [IPSEC]: Added xfrm_decode_session_reverse and
 xfrmX_policy_check_reverse

RFC 4301 requires us to relookup ICMP traffic that does not match any
policies using the reverse of its payload.  This patch adds the functions
xfrm_decode_session_reverse and xfrmX_policy_check_reverse so we can get
the reverse flow to perform such a lookup.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/xfrm4_policy.c | 10 +++++-----
 net/ipv6/xfrm6_policy.c | 10 +++++-----
 net/xfrm/xfrm_policy.c  | 17 +++++++++++------
 3 files changed, 21 insertions(+), 16 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c
index 10b72d185bb..5ccae3a463c 100644
--- a/net/ipv4/xfrm4_policy.c
+++ b/net/ipv4/xfrm4_policy.c
@@ -115,7 +115,7 @@ static int xfrm4_fill_dst(struct xfrm_dst *xdst, struct net_device *dev)
 }
 
 static void
-_decode_session4(struct sk_buff *skb, struct flowi *fl)
+_decode_session4(struct sk_buff *skb, struct flowi *fl, int reverse)
 {
 	struct iphdr *iph = ip_hdr(skb);
 	u8 *xprth = skb_network_header(skb) + iph->ihl * 4;
@@ -131,8 +131,8 @@ _decode_session4(struct sk_buff *skb, struct flowi *fl)
 			if (pskb_may_pull(skb, xprth + 4 - skb->data)) {
 				__be16 *ports = (__be16 *)xprth;
 
-				fl->fl_ip_sport = ports[0];
-				fl->fl_ip_dport = ports[1];
+				fl->fl_ip_sport = ports[!!reverse];
+				fl->fl_ip_dport = ports[!reverse];
 			}
 			break;
 
@@ -174,8 +174,8 @@ _decode_session4(struct sk_buff *skb, struct flowi *fl)
 		}
 	}
 	fl->proto = iph->protocol;
-	fl->fl4_dst = iph->daddr;
-	fl->fl4_src = iph->saddr;
+	fl->fl4_dst = reverse ? iph->saddr : iph->daddr;
+	fl->fl4_src = reverse ? iph->daddr : iph->saddr;
 	fl->fl4_tos = iph->tos;
 }
 
diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c
index 181cf91538f..d26b7dc3f33 100644
--- a/net/ipv6/xfrm6_policy.c
+++ b/net/ipv6/xfrm6_policy.c
@@ -123,7 +123,7 @@ static int xfrm6_fill_dst(struct xfrm_dst *xdst, struct net_device *dev)
 }
 
 static inline void
-_decode_session6(struct sk_buff *skb, struct flowi *fl)
+_decode_session6(struct sk_buff *skb, struct flowi *fl, int reverse)
 {
 	u16 offset = skb_network_header_len(skb);
 	struct ipv6hdr *hdr = ipv6_hdr(skb);
@@ -132,8 +132,8 @@ _decode_session6(struct sk_buff *skb, struct flowi *fl)
 	u8 nexthdr = nh[IP6CB(skb)->nhoff];
 
 	memset(fl, 0, sizeof(struct flowi));
-	ipv6_addr_copy(&fl->fl6_dst, &hdr->daddr);
-	ipv6_addr_copy(&fl->fl6_src, &hdr->saddr);
+	ipv6_addr_copy(&fl->fl6_dst, reverse ? &hdr->saddr : &hdr->daddr);
+	ipv6_addr_copy(&fl->fl6_src, reverse ? &hdr->daddr : &hdr->saddr);
 
 	while (pskb_may_pull(skb, nh + offset + 1 - skb->data)) {
 		nh = skb_network_header(skb);
@@ -156,8 +156,8 @@ _decode_session6(struct sk_buff *skb, struct flowi *fl)
 			if (pskb_may_pull(skb, nh + offset + 4 - skb->data)) {
 				__be16 *ports = (__be16 *)exthdr;
 
-				fl->fl_ip_sport = ports[0];
-				fl->fl_ip_dport = ports[1];
+				fl->fl_ip_sport = ports[!!reverse];
+				fl->fl_ip_dport = ports[!reverse];
 			}
 			fl->proto = nexthdr;
 			return;
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index 3d516d57b5b..2e10d46c0e8 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -1732,8 +1732,8 @@ xfrm_policy_ok(struct xfrm_tmpl *tmpl, struct sec_path *sp, int start,
 	return start;
 }
 
-int
-xfrm_decode_session(struct sk_buff *skb, struct flowi *fl, unsigned short family)
+int __xfrm_decode_session(struct sk_buff *skb, struct flowi *fl,
+			  unsigned int family, int reverse)
 {
 	struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family);
 	int err;
@@ -1741,12 +1741,12 @@ xfrm_decode_session(struct sk_buff *skb, struct flowi *fl, unsigned short family
 	if (unlikely(afinfo == NULL))
 		return -EAFNOSUPPORT;
 
-	afinfo->decode_session(skb, fl);
+	afinfo->decode_session(skb, fl, reverse);
 	err = security_xfrm_decode_session(skb, &fl->secid);
 	xfrm_policy_put_afinfo(afinfo);
 	return err;
 }
-EXPORT_SYMBOL(xfrm_decode_session);
+EXPORT_SYMBOL(__xfrm_decode_session);
 
 static inline int secpath_has_nontransport(struct sec_path *sp, int k, int *idxp)
 {
@@ -1768,11 +1768,16 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb,
 	int npols = 0;
 	int xfrm_nr;
 	int pi;
+	int reverse;
 	struct flowi fl;
-	u8 fl_dir = policy_to_flow_dir(dir);
+	u8 fl_dir;
 	int xerr_idx = -1;
 
-	if (xfrm_decode_session(skb, &fl, family) < 0)
+	reverse = dir & ~XFRM_POLICY_MASK;
+	dir &= XFRM_POLICY_MASK;
+	fl_dir = policy_to_flow_dir(dir);
+
+	if (__xfrm_decode_session(skb, &fl, family, reverse) < 0)
 		return 0;
 	nf_nat_decode_session(skb, &fl, family);
 
-- 
cgit v1.2.3


From 8b7817f3a959ed99d7443afc12f78a7e1fcc2063 Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Wed, 12 Dec 2007 10:44:43 -0800
Subject: [IPSEC]: Add ICMP host relookup support

RFC 4301 requires us to relookup ICMP traffic that does not match any
policies using the reverse of its payload.  This patch implements this
for ICMP traffic that originates from or terminates on localhost.

This is activated on outbound with the new policy flag XFRM_POLICY_ICMP,
and on inbound by the new state flag XFRM_STATE_ICMP.

On inbound the policy check is now performed by the ICMP protocol so
that it can repeat the policy check where necessary.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/af_inet.c     |  1 +
 net/ipv4/icmp.c        | 82 ++++++++++++++++++++++++++++++++++++++++++++++++--
 net/ipv6/icmp.c        | 60 +++++++++++++++++++++++++++++++++---
 net/xfrm/xfrm_policy.c | 17 +++++++++--
 4 files changed, 150 insertions(+), 10 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index 7f8b27ff94f..5089a369e99 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -1291,6 +1291,7 @@ static struct net_protocol udp_protocol = {
 
 static struct net_protocol icmp_protocol = {
 	.handler =	icmp_rcv,
+	.no_policy =	1,
 };
 
 static int __init init_ipv4_mibs(void)
diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
index 13d74598d3e..3c41a6f7e6e 100644
--- a/net/ipv4/icmp.c
+++ b/net/ipv4/icmp.c
@@ -92,6 +92,7 @@
 #include <asm/system.h>
 #include <asm/uaccess.h>
 #include <net/checksum.h>
+#include <net/xfrm.h>
 
 /*
  *	Build xmit assembly blocks
@@ -563,11 +564,71 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info)
 				}
 			}
 		};
+		int err;
+		struct rtable *rt2;
+
 		security_skb_classify_flow(skb_in, &fl);
-		if (ip_route_output_key(&rt, &fl))
+		if (__ip_route_output_key(&rt, &fl))
+			goto out_unlock;
+
+		/* No need to clone since we're just using its address. */
+		rt2 = rt;
+
+		err = xfrm_lookup((struct dst_entry **)&rt, &fl, NULL, 0);
+		switch (err) {
+		case 0:
+			if (rt != rt2)
+				goto route_done;
+			break;
+		case -EPERM:
+			rt = NULL;
+			break;
+		default:
+			goto out_unlock;
+		}
+
+		if (xfrm_decode_session_reverse(skb_in, &fl, AF_INET))
+			goto out_unlock;
+
+		if (inet_addr_type(fl.fl4_src) == RTN_LOCAL)
+			err = __ip_route_output_key(&rt2, &fl);
+		else {
+			struct flowi fl2 = {};
+			struct dst_entry *odst;
+
+			fl2.fl4_dst = fl.fl4_src;
+			if (ip_route_output_key(&rt2, &fl2))
+				goto out_unlock;
+
+			/* Ugh! */
+			odst = skb_in->dst;
+			err = ip_route_input(skb_in, fl.fl4_dst, fl.fl4_src,
+					     RT_TOS(tos), rt2->u.dst.dev);
+
+			dst_release(&rt2->u.dst);
+			rt2 = (struct rtable *)skb_in->dst;
+			skb_in->dst = odst;
+		}
+
+		if (err)
+			goto out_unlock;
+
+		err = xfrm_lookup((struct dst_entry **)&rt2, &fl, NULL,
+				  XFRM_LOOKUP_ICMP);
+		if (err == -ENOENT) {
+			if (!rt)
+				goto out_unlock;
+			goto route_done;
+		}
+
+		dst_release(&rt->u.dst);
+		rt = rt2;
+
+		if (err)
 			goto out_unlock;
 	}
 
+route_done:
 	if (!icmpv4_xrlim_allow(rt, type, code))
 		goto ende;
 
@@ -916,6 +977,22 @@ int icmp_rcv(struct sk_buff *skb)
 	struct icmphdr *icmph;
 	struct rtable *rt = (struct rtable *)skb->dst;
 
+	if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb) &&
+	    skb->sp->xvec[skb->sp->len - 1]->props.flags & XFRM_STATE_ICMP) {
+		int nh;
+
+		if (!pskb_may_pull(skb, sizeof(*icmph) + sizeof(struct iphdr)))
+			goto drop;
+
+		nh = skb_network_offset(skb);
+		skb_set_network_header(skb, sizeof(*icmph));
+
+		if (!xfrm4_policy_check_reverse(NULL, XFRM_POLICY_IN, skb))
+			goto drop;
+
+		skb_set_network_header(skb, nh);
+	}
+
 	ICMP_INC_STATS_BH(ICMP_MIB_INMSGS);
 
 	switch (skb->ip_summed) {
@@ -929,8 +1006,7 @@ int icmp_rcv(struct sk_buff *skb)
 			goto error;
 	}
 
-	if (!pskb_pull(skb, sizeof(struct icmphdr)))
-		goto error;
+	__skb_pull(skb, sizeof(*icmph));
 
 	icmph = icmp_hdr(skb);
 
diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c
index 93c96cfd5ee..c0bea7bfaa8 100644
--- a/net/ipv6/icmp.c
+++ b/net/ipv6/icmp.c
@@ -63,6 +63,7 @@
 #include <net/ip6_route.h>
 #include <net/addrconf.h>
 #include <net/icmp.h>
+#include <net/xfrm.h>
 
 #include <asm/uaccess.h>
 #include <asm/system.h>
@@ -86,7 +87,7 @@ static int icmpv6_rcv(struct sk_buff *skb);
 
 static struct inet6_protocol icmpv6_protocol = {
 	.handler	=	icmpv6_rcv,
-	.flags		=	INET6_PROTO_FINAL,
+	.flags		=	INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL,
 };
 
 static __inline__ int icmpv6_xmit_lock(void)
@@ -310,8 +311,10 @@ void icmpv6_send(struct sk_buff *skb, int type, int code, __u32 info,
 	struct ipv6_pinfo *np;
 	struct in6_addr *saddr = NULL;
 	struct dst_entry *dst;
+	struct dst_entry *dst2;
 	struct icmp6hdr tmp_hdr;
 	struct flowi fl;
+	struct flowi fl2;
 	struct icmpv6_msg msg;
 	int iif = 0;
 	int addr_type = 0;
@@ -418,9 +421,42 @@ void icmpv6_send(struct sk_buff *skb, int type, int code, __u32 info,
 		goto out_dst_release;
 	}
 
-	if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0)
+	/* No need to clone since we're just using its address. */
+	dst2 = dst;
+
+	err = xfrm_lookup(&dst, &fl, sk, 0);
+	switch (err) {
+	case 0:
+		if (dst != dst2)
+			goto route_done;
+		break;
+	case -EPERM:
+		dst = NULL;
+		break;
+	default:
+		goto out;
+	}
+
+	if (xfrm_decode_session_reverse(skb, &fl2, AF_INET6))
+		goto out;
+
+	if (ip6_dst_lookup(sk, &dst2, &fl))
 		goto out;
 
+	err = xfrm_lookup(&dst2, &fl, sk, XFRM_LOOKUP_ICMP);
+	if (err == -ENOENT) {
+		if (!dst)
+			goto out;
+		goto route_done;
+	}
+
+	dst_release(dst);
+	dst = dst2;
+
+	if (err)
+		goto out;
+
+route_done:
 	if (ipv6_addr_is_multicast(&fl.fl6_dst))
 		hlimit = np->mcast_hops;
 	else
@@ -608,6 +644,22 @@ static int icmpv6_rcv(struct sk_buff *skb)
 	struct icmp6hdr *hdr;
 	int type;
 
+	if (xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb) &&
+	    skb->sp->xvec[skb->sp->len - 1]->props.flags & XFRM_STATE_ICMP) {
+		int nh;
+
+		if (!pskb_may_pull(skb, sizeof(*hdr) + sizeof(*orig_hdr)))
+			goto drop_no_count;
+
+		nh = skb_network_offset(skb);
+		skb_set_network_header(skb, sizeof(*hdr));
+
+		if (!xfrm6_policy_check_reverse(NULL, XFRM_POLICY_IN, skb))
+			goto drop_no_count;
+
+		skb_set_network_header(skb, nh);
+	}
+
 	ICMP6_INC_STATS_BH(idev, ICMP6_MIB_INMSGS);
 
 	saddr = &ipv6_hdr(skb)->saddr;
@@ -630,8 +682,7 @@ static int icmpv6_rcv(struct sk_buff *skb)
 		}
 	}
 
-	if (!pskb_pull(skb, sizeof(struct icmp6hdr)))
-		goto discard_it;
+	__skb_pull(skb, sizeof(*hdr));
 
 	hdr = icmp6_hdr(skb);
 
@@ -717,6 +768,7 @@ static int icmpv6_rcv(struct sk_buff *skb)
 
 discard_it:
 	ICMP6_INC_STATS_BH(idev, ICMP6_MIB_INERRORS);
+drop_no_count:
 	kfree_skb(skb);
 	return 0;
 }
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index 2e10d46c0e8..a83b5e1349e 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -1469,11 +1469,13 @@ restart:
 			goto dropdst;
 	}
 
+	err = -ENOENT;
+
 	if (!policy) {
 		/* To accelerate a bit...  */
 		if ((dst_orig->flags & DST_NOXFRM) ||
 		    !xfrm_policy_count[XFRM_POLICY_OUT])
-			return 0;
+			goto nopol;
 
 		policy = flow_cache_lookup(fl, dst_orig->ops->family,
 					   dir, xfrm_policy_lookup);
@@ -1483,14 +1485,18 @@ restart:
 	}
 
 	if (!policy)
-		return 0;
+		goto nopol;
 
 	family = dst_orig->ops->family;
-	policy->curlft.use_time = get_seconds();
 	pols[0] = policy;
 	npols ++;
 	xfrm_nr += pols[0]->xfrm_nr;
 
+	if ((flags & XFRM_LOOKUP_ICMP) && !(policy->flags & XFRM_POLICY_ICMP))
+		goto error;
+
+	policy->curlft.use_time = get_seconds();
+
 	switch (policy->action) {
 	default:
 	case XFRM_POLICY_BLOCK:
@@ -1649,6 +1655,11 @@ dropdst:
 	dst_release(dst_orig);
 	*dst_p = NULL;
 	return err;
+
+nopol:
+	if (flags & XFRM_LOOKUP_ICMP)
+		goto dropdst;
+	return 0;
 }
 EXPORT_SYMBOL(__xfrm_lookup);
 
-- 
cgit v1.2.3


From 81103a52f26d8630aa0c1dcddccaebb04d7922a8 Mon Sep 17 00:00:00 2001
From: "Denis V. Lunev" <den@openvz.org>
Date: Wed, 12 Dec 2007 10:47:38 -0800
Subject: [NETNS]: network namespace was passed into dev_getbyhwaddr but not
 used

Signed-off-by: Denis V. Lunev <den@openvz.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/dev.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/core/dev.c b/net/core/dev.c
index 12f66e528a4..4ced3836690 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -675,7 +675,7 @@ struct net_device *dev_getbyhwaddr(struct net *net, unsigned short type, char *h
 
 	ASSERT_RTNL();
 
-	for_each_netdev(&init_net, dev)
+	for_each_netdev(net, dev)
 		if (dev->type == type &&
 		    !memcmp(dev->dev_addr, ha, dev->addr_len))
 			return dev;
-- 
cgit v1.2.3


From 5533995b62d02dbbf930f2e59221c2d5ea05aab7 Mon Sep 17 00:00:00 2001
From: Michal Schmidt <mschmidt@redhat.com>
Date: Wed, 12 Dec 2007 11:01:43 -0800
Subject: [IPIP]: Allow rebinding the tunnel to another interface

Once created, an IP tunnel can't be bound to another device.
(reported as https://bugzilla.redhat.com/show_bug.cgi?id=419671)

To reproduce:

# create a tunnel:
ip tunnel add tunneltest0 mode ipip remote 10.0.0.1 dev eth0
# try to change the bounding device from eth0 to eth1:
ip tunnel change tunneltest0 dev eth1
# show the result:
ip tunnel show tunneltest0

tunneltest0: ip/ip  remote 10.0.0.1  local any  dev eth0  ttl inherit

Notice the bound device has not changed from eth0 to eth1.

This patch fixes it. When changing the binding, it also recalculates the
MTU according to the new bound device's MTU.

If the change is acceptable, I'll do the same for GRE and SIT tunnels.

Signed-off-by: Michal Schmidt <mschmidt@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/ipip.c | 66 ++++++++++++++++++++++++++++++++++-----------------------
 1 file changed, 40 insertions(+), 26 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c
index 8c2b2b0741d..160535b5170 100644
--- a/net/ipv4/ipip.c
+++ b/net/ipv4/ipip.c
@@ -651,6 +651,40 @@ tx_error:
 	return 0;
 }
 
+static void ipip_tunnel_bind_dev(struct net_device *dev)
+{
+	struct net_device *tdev = NULL;
+	struct ip_tunnel *tunnel;
+	struct iphdr *iph;
+
+	tunnel = netdev_priv(dev);
+	iph = &tunnel->parms.iph;
+
+	if (iph->daddr) {
+		struct flowi fl = { .oif = tunnel->parms.link,
+				    .nl_u = { .ip4_u =
+					      { .daddr = iph->daddr,
+						.saddr = iph->saddr,
+						.tos = RT_TOS(iph->tos) } },
+				    .proto = IPPROTO_IPIP };
+		struct rtable *rt;
+		if (!ip_route_output_key(&rt, &fl)) {
+			tdev = rt->u.dst.dev;
+			ip_rt_put(rt);
+		}
+		dev->flags |= IFF_POINTOPOINT;
+	}
+
+	if (!tdev && tunnel->parms.link)
+		tdev = __dev_get_by_index(&init_net, tunnel->parms.link);
+
+	if (tdev) {
+		dev->hard_header_len = tdev->hard_header_len + sizeof(struct iphdr);
+		dev->mtu = tdev->mtu - sizeof(struct iphdr);
+	}
+	dev->iflink = tunnel->parms.link;
+}
+
 static int
 ipip_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd)
 {
@@ -723,6 +757,11 @@ ipip_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd)
 				t->parms.iph.ttl = p.iph.ttl;
 				t->parms.iph.tos = p.iph.tos;
 				t->parms.iph.frag_off = p.iph.frag_off;
+				if (t->parms.link != p.link) {
+					t->parms.link = p.link;
+					ipip_tunnel_bind_dev(dev);
+					netdev_state_change(dev);
+				}
 			}
 			if (copy_to_user(ifr->ifr_ifru.ifru_data, &t->parms, sizeof(p)))
 				err = -EFAULT;
@@ -791,12 +830,9 @@ static void ipip_tunnel_setup(struct net_device *dev)
 
 static int ipip_tunnel_init(struct net_device *dev)
 {
-	struct net_device *tdev = NULL;
 	struct ip_tunnel *tunnel;
-	struct iphdr *iph;
 
 	tunnel = netdev_priv(dev);
-	iph = &tunnel->parms.iph;
 
 	tunnel->dev = dev;
 	strcpy(tunnel->parms.name, dev->name);
@@ -804,29 +840,7 @@ static int ipip_tunnel_init(struct net_device *dev)
 	memcpy(dev->dev_addr, &tunnel->parms.iph.saddr, 4);
 	memcpy(dev->broadcast, &tunnel->parms.iph.daddr, 4);
 
-	if (iph->daddr) {
-		struct flowi fl = { .oif = tunnel->parms.link,
-				    .nl_u = { .ip4_u =
-					      { .daddr = iph->daddr,
-						.saddr = iph->saddr,
-						.tos = RT_TOS(iph->tos) } },
-				    .proto = IPPROTO_IPIP };
-		struct rtable *rt;
-		if (!ip_route_output_key(&rt, &fl)) {
-			tdev = rt->u.dst.dev;
-			ip_rt_put(rt);
-		}
-		dev->flags |= IFF_POINTOPOINT;
-	}
-
-	if (!tdev && tunnel->parms.link)
-		tdev = __dev_get_by_index(&init_net, tunnel->parms.link);
-
-	if (tdev) {
-		dev->hard_header_len = tdev->hard_header_len + sizeof(struct iphdr);
-		dev->mtu = tdev->mtu - sizeof(struct iphdr);
-	}
-	dev->iflink = tunnel->parms.link;
+	ipip_tunnel_bind_dev(dev);
 
 	return 0;
 }
-- 
cgit v1.2.3


From 7233b9f33e72ca477034ff5cf901c89efba3a5bc Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Wed, 12 Dec 2007 18:47:48 -0800
Subject: [IPSEC]: Fix reversed ICMP6 policy check

The policy check I added for ICMP on IPv6 is reversed.  This
patch fixes that.

It also adds an skb->sp check so that unprotected packets that
fail the policy check do not crash the machine.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/icmp.c | 2 +-
 net/ipv6/icmp.c | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
index 3c41a6f7e6e..c41f3cc4fba 100644
--- a/net/ipv4/icmp.c
+++ b/net/ipv4/icmp.c
@@ -977,7 +977,7 @@ int icmp_rcv(struct sk_buff *skb)
 	struct icmphdr *icmph;
 	struct rtable *rt = (struct rtable *)skb->dst;
 
-	if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb) &&
+	if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb) && skb->sp &&
 	    skb->sp->xvec[skb->sp->len - 1]->props.flags & XFRM_STATE_ICMP) {
 		int nh;
 
diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c
index c0bea7bfaa8..1659d2fb01f 100644
--- a/net/ipv6/icmp.c
+++ b/net/ipv6/icmp.c
@@ -644,7 +644,7 @@ static int icmpv6_rcv(struct sk_buff *skb)
 	struct icmp6hdr *hdr;
 	int type;
 
-	if (xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb) &&
+	if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb) && skb->sp &&
 	    skb->sp->xvec[skb->sp->len - 1]->props.flags & XFRM_STATE_ICMP) {
 		int nh;
 
-- 
cgit v1.2.3


From bb72845e699d3c84e5f861b51db686107a51dea5 Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Wed, 12 Dec 2007 18:48:58 -0800
Subject: [IPSEC]: Make callers of xfrm_lookup to use XFRM_LOOKUP_WAIT

This patch converts all callers of xfrm_lookup that used an
explicit value of 1 to indiciate blocking to use the new flag
XFRM_LOOKUP_WAIT.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/dccp/ipv6.c       | 2 +-
 net/decnet/dn_route.c | 3 ++-
 net/ipv4/route.c      | 3 ++-
 net/ipv6/datagram.c   | 2 +-
 net/ipv6/raw.c        | 2 +-
 net/ipv6/tcp_ipv6.c   | 2 +-
 net/ipv6/udp.c        | 2 +-
 7 files changed, 9 insertions(+), 7 deletions(-)

(limited to 'net')

diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c
index 87c98fb86fa..a08e2cb1191 100644
--- a/net/dccp/ipv6.c
+++ b/net/dccp/ipv6.c
@@ -994,7 +994,7 @@ static int dccp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
 	if (final_p)
 		ipv6_addr_copy(&fl.fl6_dst, final_p);
 
-	err = __xfrm_lookup(&dst, &fl, sk, 1);
+	err = __xfrm_lookup(&dst, &fl, sk, XFRM_LOOKUP_WAIT);
 	if (err < 0) {
 		if (err == -EREMOTE)
 			err = ip6_dst_blackhole(sk, &dst, &fl);
diff --git a/net/decnet/dn_route.c b/net/decnet/dn_route.c
index 5d742f1420d..73a13075b7e 100644
--- a/net/decnet/dn_route.c
+++ b/net/decnet/dn_route.c
@@ -1196,7 +1196,8 @@ int dn_route_output_sock(struct dst_entry **pprt, struct flowi *fl, struct sock
 
 	err = __dn_route_output_key(pprt, fl, flags & MSG_TRYHARD);
 	if (err == 0 && fl->proto) {
-		err = xfrm_lookup(pprt, fl, sk, !(flags & MSG_DONTWAIT));
+		err = xfrm_lookup(pprt, fl, sk, (flags & MSG_DONTWAIT) ?
+						XFRM_LOOKUP_WAIT : 0);
 	}
 	return err;
 }
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index b576f8cd401..1b70ffd1261 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -2536,7 +2536,8 @@ int ip_route_output_flow(struct rtable **rp, struct flowi *flp, struct sock *sk,
 			flp->fl4_src = (*rp)->rt_src;
 		if (!flp->fl4_dst)
 			flp->fl4_dst = (*rp)->rt_dst;
-		err = __xfrm_lookup((struct dst_entry **)rp, flp, sk, flags);
+		err = __xfrm_lookup((struct dst_entry **)rp, flp, sk,
+				    flags ? XFRM_LOOKUP_WAIT : 0);
 		if (err == -EREMOTE)
 			err = ipv4_dst_blackhole(rp, flp, sk);
 
diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c
index 5d4245ab418..f49a06aa97d 100644
--- a/net/ipv6/datagram.c
+++ b/net/ipv6/datagram.c
@@ -177,7 +177,7 @@ ipv4_connected:
 	if (final_p)
 		ipv6_addr_copy(&fl.fl6_dst, final_p);
 
-	if ((err = __xfrm_lookup(&dst, &fl, sk, 1)) < 0) {
+	if ((err = __xfrm_lookup(&dst, &fl, sk, XFRM_LOOKUP_WAIT)) < 0) {
 		if (err == -EREMOTE)
 			err = ip6_dst_blackhole(sk, &dst, &fl);
 		if (err < 0)
diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c
index 850b83e430b..2663fd1323e 100644
--- a/net/ipv6/raw.c
+++ b/net/ipv6/raw.c
@@ -871,7 +871,7 @@ static int rawv6_sendmsg(struct kiocb *iocb, struct sock *sk,
 	if (final_p)
 		ipv6_addr_copy(&fl.fl6_dst, final_p);
 
-	if ((err = __xfrm_lookup(&dst, &fl, sk, 1)) < 0) {
+	if ((err = __xfrm_lookup(&dst, &fl, sk, XFRM_LOOKUP_WAIT)) < 0) {
 		if (err == -EREMOTE)
 			err = ip6_dst_blackhole(sk, &dst, &fl);
 		if (err < 0)
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 9544beb6d1c..0ef99864ef8 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -265,7 +265,7 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
 	if (final_p)
 		ipv6_addr_copy(&fl.fl6_dst, final_p);
 
-	if ((err = __xfrm_lookup(&dst, &fl, sk, 1)) < 0) {
+	if ((err = __xfrm_lookup(&dst, &fl, sk, XFRM_LOOKUP_WAIT)) < 0) {
 		if (err == -EREMOTE)
 			err = ip6_dst_blackhole(sk, &dst, &fl);
 		if (err < 0)
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index 1e3bd39f54e..8cbdcc9602d 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -757,7 +757,7 @@ do_udp_sendmsg:
 	if (final_p)
 		ipv6_addr_copy(&fl.fl6_dst, final_p);
 
-	if ((err = __xfrm_lookup(&dst, &fl, sk, 1)) < 0) {
+	if ((err = __xfrm_lookup(&dst, &fl, sk, XFRM_LOOKUP_WAIT)) < 0) {
 		if (err == -EREMOTE)
 			err = ip6_dst_blackhole(sk, &dst, &fl);
 		if (err < 0)
-- 
cgit v1.2.3


From aebcf82c1fe9231be5cb4f9c1362d5db39e7d7b2 Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Wed, 12 Dec 2007 18:54:16 -0800
Subject: [IPSEC]: Do not let packets pass when ICMP flag is off

This fixes a logical error in ICMP policy checks which lets
packets through if the state ICMP flag is off.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/icmp.c | 7 +++++--
 net/ipv6/icmp.c | 7 +++++--
 2 files changed, 10 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
index c41f3cc4fba..ce5b4be559a 100644
--- a/net/ipv4/icmp.c
+++ b/net/ipv4/icmp.c
@@ -977,10 +977,13 @@ int icmp_rcv(struct sk_buff *skb)
 	struct icmphdr *icmph;
 	struct rtable *rt = (struct rtable *)skb->dst;
 
-	if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb) && skb->sp &&
-	    skb->sp->xvec[skb->sp->len - 1]->props.flags & XFRM_STATE_ICMP) {
+	if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) {
 		int nh;
 
+		if (!(skb->sp && skb->sp->xvec[skb->sp->len - 1]->props.flags &
+				 XFRM_STATE_ICMP))
+			goto drop;
+
 		if (!pskb_may_pull(skb, sizeof(*icmph) + sizeof(struct iphdr)))
 			goto drop;
 
diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c
index 1659d2fb01f..c3bbd868730 100644
--- a/net/ipv6/icmp.c
+++ b/net/ipv6/icmp.c
@@ -644,10 +644,13 @@ static int icmpv6_rcv(struct sk_buff *skb)
 	struct icmp6hdr *hdr;
 	int type;
 
-	if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb) && skb->sp &&
-	    skb->sp->xvec[skb->sp->len - 1]->props.flags & XFRM_STATE_ICMP) {
+	if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) {
 		int nh;
 
+		if (!(skb->sp && skb->sp->xvec[skb->sp->len - 1]->props.flags &
+				 XFRM_STATE_ICMP))
+			goto drop_no_count;
+
 		if (!pskb_may_pull(skb, sizeof(*hdr) + sizeof(*orig_hdr)))
 			goto drop_no_count;
 
-- 
cgit v1.2.3


From a66207121f85c54a3df4d347a3c5bdf6cb154b09 Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Wed, 12 Dec 2007 19:21:56 -0800
Subject: [NET]: Check RTNL status in unregister_netdevice

The caller must hold the RTNL so let's check it in unregister_netdevice.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/dev.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'net')

diff --git a/net/core/dev.c b/net/core/dev.c
index 4ced3836690..7153e94f50a 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -3971,6 +3971,8 @@ void synchronize_net(void)
 
 void unregister_netdevice(struct net_device *dev)
 {
+	ASSERT_RTNL();
+
 	rollback_registered(dev);
 	/* Finish processing unregister after unlock */
 	net_set_todo(dev);
-- 
cgit v1.2.3


From 96eba69dbac767f4e287df39e6fa489d37f1aa7b Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Thu, 13 Dec 2007 05:24:40 -0800
Subject: [DECNET]: Fix inverted wait flag in xfrm_lookup call

My previous patch made the wait flag take the opposite value to what
it should be.  This patch fixes that.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/decnet/dn_route.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/decnet/dn_route.c b/net/decnet/dn_route.c
index 73a13075b7e..b712d0b3652 100644
--- a/net/decnet/dn_route.c
+++ b/net/decnet/dn_route.c
@@ -1197,7 +1197,7 @@ int dn_route_output_sock(struct dst_entry **pprt, struct flowi *fl, struct sock
 	err = __dn_route_output_key(pprt, fl, flags & MSG_TRYHARD);
 	if (err == 0 && fl->proto) {
 		err = xfrm_lookup(pprt, fl, sk, (flags & MSG_DONTWAIT) ?
-						XFRM_LOOKUP_WAIT : 0);
+						0 : XFRM_LOOKUP_WAIT);
 	}
 	return err;
 }
-- 
cgit v1.2.3


From 69567d0b63b7f4ffeb53fe746c87bd6efe1c284b Mon Sep 17 00:00:00 2001
From: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Date: Thu, 13 Dec 2007 11:28:43 -0200
Subject: [DCCP]: Perform SHUT_RD and SHUT_WR on receiving close

This patch performs two changes:

1) Close the write-end in addition to the read-end when a fin-like segment
  (Close or CloseReq) is received by DCCP. This accounts for the fact that DCCP,
  in contrast to TCP, does not have a half-close. RFC 4340 says in this respect
  that when a fin-like segment has been sent there is no guarantee at all that
  any   further data will be processed.
  Thus this patch performs SHUT_WR in addition to the SHUT_RD when a fin-like
  segment is encountered.

2) Minor change: I noted that code appears twice in different places and think it
   makes sense to put this into a self-contained function (dccp_enqueue()).

Signed-off-by: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Signed-off-by: Ian McDonald <ian.mcdonald@jandi.co.nz>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/dccp/input.c | 22 +++++++++++++++-------
 1 file changed, 15 insertions(+), 7 deletions(-)

(limited to 'net')

diff --git a/net/dccp/input.c b/net/dccp/input.c
index decf2f21149..dacd4fd3c63 100644
--- a/net/dccp/input.c
+++ b/net/dccp/input.c
@@ -22,16 +22,27 @@
 /* rate-limit for syncs in reply to sequence-invalid packets; RFC 4340, 7.5.4 */
 int sysctl_dccp_sync_ratelimit	__read_mostly = HZ / 8;
 
-static void dccp_fin(struct sock *sk, struct sk_buff *skb)
+static void dccp_enqueue_skb(struct sock *sk, struct sk_buff *skb)
 {
-	sk->sk_shutdown |= RCV_SHUTDOWN;
-	sock_set_flag(sk, SOCK_DONE);
 	__skb_pull(skb, dccp_hdr(skb)->dccph_doff * 4);
 	__skb_queue_tail(&sk->sk_receive_queue, skb);
 	skb_set_owner_r(skb, sk);
 	sk->sk_data_ready(sk, 0);
 }
 
+static void dccp_fin(struct sock *sk, struct sk_buff *skb)
+{
+	/*
+	 * On receiving Close/CloseReq, both RD/WR shutdown are performed.
+	 * RFC 4340, 8.3 says that we MAY send further Data/DataAcks after
+	 * receiving the closing segment, but there is no guarantee that such
+	 * data will be processed at all.
+	 */
+	sk->sk_shutdown = SHUTDOWN_MASK;
+	sock_set_flag(sk, SOCK_DONE);
+	dccp_enqueue_skb(sk, skb);
+}
+
 static int dccp_rcv_close(struct sock *sk, struct sk_buff *skb)
 {
 	int queued = 0;
@@ -282,10 +293,7 @@ static int __dccp_rcv_established(struct sock *sk, struct sk_buff *skb,
 		 * - sk_shutdown == RCV_SHUTDOWN, use Code 1, "Not Listening"
 		 * - sk_receive_queue is full, use Code 2, "Receive Buffer"
 		 */
-		__skb_pull(skb, dh->dccph_doff * 4);
-		__skb_queue_tail(&sk->sk_receive_queue, skb);
-		skb_set_owner_r(skb, sk);
-		sk->sk_data_ready(sk, 0);
+		dccp_enqueue_skb(sk, skb);
 		return 0;
 	case DCCP_PKT_ACK:
 		goto discard;
-- 
cgit v1.2.3


From 09f7709f4929666006931f1d4efc498a6d419bbc Mon Sep 17 00:00:00 2001
From: Daniel Lezcano <dlezcano@fr.ibm.com>
Date: Thu, 13 Dec 2007 05:34:58 -0800
Subject: [IPV6]: fix section mismatch warnings

Removed useless and buggy __exit section in the different
ipv6 subsystems. Otherwise they will be called inside an
init section during rollbacking in case of an error in the
protocol initialization.

Signed-off-by: Daniel Lezcano <dlezcano@fr.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/addrconf.c | 2 +-
 net/ipv6/raw.c      | 2 +-
 net/ipv6/tcp_ipv6.c | 2 +-
 net/ipv6/udp.c      | 2 +-
 net/ipv6/udplite.c  | 2 +-
 5 files changed, 5 insertions(+), 5 deletions(-)

(limited to 'net')

diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index ba6f7925c17..6a48bb88f46 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -4223,7 +4223,7 @@ errout:
 	return err;
 }
 
-void __exit addrconf_cleanup(void)
+void addrconf_cleanup(void)
 {
 	struct net_device *dev;
 	struct inet6_ifaddr *ifa;
diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c
index 2663fd1323e..45a580e843d 100644
--- a/net/ipv6/raw.c
+++ b/net/ipv6/raw.c
@@ -1321,7 +1321,7 @@ out:
 	return ret;
 }
 
-void __exit rawv6_exit(void)
+void rawv6_exit(void)
 {
 	inet6_unregister_protosw(&rawv6_protosw);
 }
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 0ef99864ef8..0268e118f0b 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -2193,7 +2193,7 @@ out_tcpv6_protosw:
 	goto out;
 }
 
-void __exit tcpv6_exit(void)
+void tcpv6_exit(void)
 {
 	sock_release(tcp6_socket);
 	inet6_unregister_protosw(&tcpv6_protosw);
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index 8cbdcc9602d..8b3e6d61bf5 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -1035,7 +1035,7 @@ out_udpv6_protocol:
 	goto out;
 }
 
-void __exit udpv6_exit(void)
+void udpv6_exit(void)
 {
 	inet6_unregister_protosw(&udpv6_protosw);
 	inet6_del_protocol(&udpv6_protocol, IPPROTO_UDP);
diff --git a/net/ipv6/udplite.c b/net/ipv6/udplite.c
index f20b376689f..39f070518e6 100644
--- a/net/ipv6/udplite.c
+++ b/net/ipv6/udplite.c
@@ -96,7 +96,7 @@ out_udplitev6_protocol:
 	goto out;
 }
 
-void __exit udplitev6_exit(void)
+void udplitev6_exit(void)
 {
 	inet6_unregister_protosw(&udplite6_protosw);
 	inet6_del_protocol(&udplitev6_protocol, IPPROTO_UDPLITE);
-- 
cgit v1.2.3


From 92d31920b84f258badf206eea8aaf5ac677ac535 Mon Sep 17 00:00:00 2001
From: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Date: Thu, 13 Dec 2007 12:02:43 -0200
Subject: [DCCP]: Shift the retransmit timer for active-close into output.c

When performing active close, RFC 4340, 8.3. requires to retransmit the
Close/CloseReq with a backoff-retransmit timer starting at intially 2 RTTs.

This patch shifts the existing code for active-close retransmit timer
into output.c, so that the retransmit timer is started when the first
Close/CloseReq is sent. Previously, the timer was started when, after
releasing the socket in dccp_close(), the actively-closing side had not yet
reached the CLOSED/TIMEWAIT state.

The patch further reduces the initial timeout from 3 seconds to the required
2 RTTs, where - in absence of a known RTT - the fallback value specified in
RFC 4340, 3.4 is used.

Signed-off-by: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Signed-off-by: Ian McDonald <ian.mcdonald@jandi.co.nz>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/dccp/output.c | 13 ++++++++++++-
 net/dccp/proto.c  | 18 ------------------
 2 files changed, 12 insertions(+), 19 deletions(-)

(limited to 'net')

diff --git a/net/dccp/output.c b/net/dccp/output.c
index 7caa7f57bb7..e97584aa489 100644
--- a/net/dccp/output.c
+++ b/net/dccp/output.c
@@ -574,7 +574,18 @@ void dccp_send_close(struct sock *sk, const int active)
 		dccp_write_xmit(sk, 1);
 		dccp_skb_entail(sk, skb);
 		dccp_transmit_skb(sk, skb_clone(skb, prio));
-		/* FIXME do we need a retransmit timer here? */
+		/*
+		 * Retransmission timer for active-close: RFC 4340, 8.3 requires
+		 * to retransmit the Close/CloseReq until the CLOSING/CLOSEREQ
+		 * state can be left. The initial timeout is 2 RTTs.
+		 * Since RTT measurement is done by the CCIDs, there is no easy
+		 * way to get an RTT sample. The fallback RTT from RFC 4340, 3.4
+		 * is too low (200ms); we use a high value to avoid unnecessary
+		 * retransmissions when the link RTT is > 0.2 seconds.
+		 * FIXME: Let main module sample RTTs and use that instead.
+		 */
+		inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
+					  DCCP_TIMEOUT_INIT, DCCP_RTO_MAX);
 	} else
 		dccp_transmit_skb(sk, skb);
 }
diff --git a/net/dccp/proto.c b/net/dccp/proto.c
index 60f40ec72ff..8a73c8f98d7 100644
--- a/net/dccp/proto.c
+++ b/net/dccp/proto.c
@@ -996,24 +996,6 @@ adjudge_to_death:
 	if (state != DCCP_CLOSED && sk->sk_state == DCCP_CLOSED)
 		goto out;
 
-	/*
-	 * The last release_sock may have processed the CLOSE or RESET
-	 * packet moving sock to CLOSED state, if not we have to fire
-	 * the CLOSE/CLOSEREQ retransmission timer, see "8.3. Termination"
-	 * in draft-ietf-dccp-spec-11. -acme
-	 */
-	if (sk->sk_state == DCCP_CLOSING) {
-		/* FIXME: should start at 2 * RTT */
-		/* Timer for repeating the CLOSE/CLOSEREQ until an answer. */
-		inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
-					  inet_csk(sk)->icsk_rto,
-					  DCCP_RTO_MAX);
-#if 0
-		/* Yeah, we should use sk->sk_prot->orphan_count, etc */
-		dccp_set_state(sk, DCCP_CLOSED);
-#endif
-	}
-
 	if (sk->sk_state == DCCP_CLOSED)
 		inet_csk_destroy_sock(sk);
 
-- 
cgit v1.2.3


From 28be5440044d5b19b0331f79fb3e81845ad6d77e Mon Sep 17 00:00:00 2001
From: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Date: Thu, 13 Dec 2007 12:16:23 -0200
Subject: [DCCP]: Use maximum-RTO backoff from DCCP spec

This removes another Fixme, using the TCP maximum RTO rather than the value
specified by the DCCP specification. Across the sections in RFC 4340, 64
seconds is consistently suggested as maximum RTO backoff value; and this is
the value which is now used.

I have checked both termination cases for retransmissions of Close/CloseReq:
with the default value 15 of `retries2', and an initial icsk_retransmit = 0,
it takes about 614 seconds to declare a non-responding peer as dead, after
which the final terminating Reset is sent. With the TCP maximum RTO value of
120 seconds it takes (as might be expected) almost twice as long, about 23
minutes.

Signed-off-by: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Signed-off-by: Ian McDonald <ian.mcdonald@jandi.co.nz>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/dccp/dccp.h | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/dccp/dccp.h b/net/dccp/dccp.h
index 07dcbe73b50..3af3320194f 100644
--- a/net/dccp/dccp.h
+++ b/net/dccp/dccp.h
@@ -72,7 +72,14 @@ extern void dccp_time_wait(struct sock *sk, int state, int timeo);
 /* RFC 1122, 4.2.3.1 initial RTO value */
 #define DCCP_TIMEOUT_INIT ((unsigned)(3 * HZ))
 
-#define DCCP_RTO_MAX ((unsigned)(120 * HZ)) /* FIXME: using TCP value */
+/*
+ * The maximum back-off value for retransmissions. This is needed for
+ *  - retransmitting client-Requests (sec. 8.1.1),
+ *  - retransmitting Close/CloseReq when closing (sec. 8.3),
+ *  - feature-negotiation retransmission (sec. 6.6.3),
+ *  - Acks in client-PARTOPEN state (sec. 8.1.5).
+ */
+#define DCCP_RTO_MAX ((unsigned)(64 * HZ))
 
 /*
  * RTT sampling: sanity bounds and fallback RTT value from RFC 4340, section 3.4
-- 
cgit v1.2.3


From b8599d20708fa3bde1e414689f3474560c2d990b Mon Sep 17 00:00:00 2001
From: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Date: Thu, 13 Dec 2007 12:25:01 -0200
Subject: [DCCP]: Support for server holding timewait state

This adds a socket option and signalling support for the case where the server
holds timewait state on closing the connection, as described in RFC 4340, 8.3.

Since holding timewait state at the server is the non-usual case, it is enabled
via a socket option. Documentation for this socket option has been added.

The setsockopt statement has been made resilient against different possible cases
of expressing boolean `true' values using a suggestion by Ian McDonald.

Signed-off-by: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Signed-off-by: Ian McDonald <ian.mcdonald@jandi.co.nz>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/dccp/output.c |  6 ++++--
 net/dccp/proto.c  | 13 ++++++++++++-
 2 files changed, 16 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/dccp/output.c b/net/dccp/output.c
index e97584aa489..b2e17910930 100644
--- a/net/dccp/output.c
+++ b/net/dccp/output.c
@@ -567,8 +567,10 @@ void dccp_send_close(struct sock *sk, const int active)
 
 	/* Reserve space for headers and prepare control bits. */
 	skb_reserve(skb, sk->sk_prot->max_header);
-	DCCP_SKB_CB(skb)->dccpd_type = dp->dccps_role == DCCP_ROLE_CLIENT ?
-					DCCP_PKT_CLOSE : DCCP_PKT_CLOSEREQ;
+	if (dp->dccps_role == DCCP_ROLE_SERVER && !dp->dccps_server_timewait)
+		DCCP_SKB_CB(skb)->dccpd_type = DCCP_PKT_CLOSEREQ;
+	else
+		DCCP_SKB_CB(skb)->dccpd_type = DCCP_PKT_CLOSE;
 
 	if (active) {
 		dccp_write_xmit(sk, 1);
diff --git a/net/dccp/proto.c b/net/dccp/proto.c
index 8a73c8f98d7..cc87c500bfb 100644
--- a/net/dccp/proto.c
+++ b/net/dccp/proto.c
@@ -551,6 +551,12 @@ static int do_dccp_setsockopt(struct sock *sk, int level, int optname,
 						     (struct dccp_so_feat __user *)
 						     optval);
 		break;
+	case DCCP_SOCKOPT_SERVER_TIMEWAIT:
+		if (dp->dccps_role != DCCP_ROLE_SERVER)
+			err = -EOPNOTSUPP;
+		else
+			dp->dccps_server_timewait = (val != 0);
+		break;
 	case DCCP_SOCKOPT_SEND_CSCOV:	/* sender side, RFC 4340, sec. 9.2 */
 		if (val < 0 || val > 15)
 			err = -EINVAL;
@@ -653,6 +659,10 @@ static int do_dccp_getsockopt(struct sock *sk, int level, int optname,
 		val = dp->dccps_mss_cache;
 		len = sizeof(val);
 		break;
+	case DCCP_SOCKOPT_SERVER_TIMEWAIT:
+		val = dp->dccps_server_timewait;
+		len = sizeof(val);
+		break;
 	case DCCP_SOCKOPT_SEND_CSCOV:
 		val = dp->dccps_pcslen;
 		len = sizeof(val);
@@ -918,7 +928,8 @@ static void dccp_terminate_connection(struct sock *sk)
 	case DCCP_OPEN:
 		dccp_send_close(sk, 1);
 
-		if (dccp_sk(sk)->dccps_role == DCCP_ROLE_SERVER)
+		if (dccp_sk(sk)->dccps_role == DCCP_ROLE_SERVER &&
+		    !dccp_sk(sk)->dccps_server_timewait)
 			next_state = DCCP_ACTIVE_CLOSEREQ;
 		else
 			next_state = DCCP_CLOSING;
-- 
cgit v1.2.3


From 7913350663e2756ecb91dd3a7c773806b943426e Mon Sep 17 00:00:00 2001
From: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Date: Thu, 13 Dec 2007 12:27:14 -0200
Subject: [DCCP]: Collapse repeated `len' statements into one

This replaces 4 individual assignments for `len' with a single
one, placed where the control flow of those 4 leads to.

Signed-off-by: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Signed-off-by: Ian McDonald <ian.mcdonald@jandi.co.nz>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/dccp/proto.c | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/dccp/proto.c b/net/dccp/proto.c
index cc87c500bfb..0bed4a6095b 100644
--- a/net/dccp/proto.c
+++ b/net/dccp/proto.c
@@ -657,19 +657,15 @@ static int do_dccp_getsockopt(struct sock *sk, int level, int optname,
 					       (__be32 __user *)optval, optlen);
 	case DCCP_SOCKOPT_GET_CUR_MPS:
 		val = dp->dccps_mss_cache;
-		len = sizeof(val);
 		break;
 	case DCCP_SOCKOPT_SERVER_TIMEWAIT:
 		val = dp->dccps_server_timewait;
-		len = sizeof(val);
 		break;
 	case DCCP_SOCKOPT_SEND_CSCOV:
 		val = dp->dccps_pcslen;
-		len = sizeof(val);
 		break;
 	case DCCP_SOCKOPT_RECV_CSCOV:
 		val = dp->dccps_pcrlen;
-		len = sizeof(val);
 		break;
 	case 128 ... 191:
 		return ccid_hc_rx_getsockopt(dp->dccps_hc_rx_ccid, sk, optname,
@@ -681,6 +677,7 @@ static int do_dccp_getsockopt(struct sock *sk, int level, int optname,
 		return -ENOPROTOOPT;
 	}
 
+	len = sizeof(val);
 	if (put_user(len, optlen) || copy_to_user(optval, &val, len))
 		return -EFAULT;
 
-- 
cgit v1.2.3


From 8b819412481494fb6861c08d360b75fabcbbfbbf Mon Sep 17 00:00:00 2001
From: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Date: Thu, 13 Dec 2007 12:29:24 -0200
Subject: [DCCP]: Allow to parse options on Request Sockets

The option parsing code currently only parses on full sk's. This causes a problem for
options sent during the initial handshake (in particular timestamps and feature-negotiation
options). Therefore, this patch extends the option parsing code with an additional argument
for request_socks: if it is non-NULL, options are parsed on the request socket, otherwise
the normal path (parsing on the sk) is used.

Subsequent patches, which implement feature negotiation during connection setup, make use
of this facility.

Signed-off-by: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Signed-off-by: Ian McDonald <ian.mcdonald@jandi.co.nz>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/dccp/input.c   |  6 +++---
 net/dccp/ipv4.c    |  8 ++++----
 net/dccp/ipv6.c    |  8 ++++----
 net/dccp/options.c | 34 +++++++++++++++++++++++-----------
 4 files changed, 34 insertions(+), 22 deletions(-)

(limited to 'net')

diff --git a/net/dccp/input.c b/net/dccp/input.c
index dacd4fd3c63..08392ed86c2 100644
--- a/net/dccp/input.c
+++ b/net/dccp/input.c
@@ -369,7 +369,7 @@ int dccp_rcv_established(struct sock *sk, struct sk_buff *skb,
 	if (dccp_check_seqno(sk, skb))
 		goto discard;
 
-	if (dccp_parse_options(sk, skb))
+	if (dccp_parse_options(sk, NULL, skb))
 		goto discard;
 
 	if (DCCP_SKB_CB(skb)->dccpd_ack_seq != DCCP_PKT_WITHOUT_ACK_SEQ)
@@ -427,7 +427,7 @@ static int dccp_rcv_request_sent_state_process(struct sock *sk,
 			goto out_invalid_packet;
 		}
 
-		if (dccp_parse_options(sk, skb))
+		if (dccp_parse_options(sk, NULL, skb))
 			goto out_invalid_packet;
 
 		/* Obtain usec RTT sample from SYN exchange (used by CCID 3) */
@@ -609,7 +609,7 @@ int dccp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
 		/*
 		 * Step 8: Process options and mark acknowledgeable
 		 */
-		if (dccp_parse_options(sk, skb))
+		if (dccp_parse_options(sk, NULL, skb))
 			goto discard;
 
 		if (dcb->dccpd_ack_seq != DCCP_PKT_WITHOUT_ACK_SEQ)
diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c
index db17b83e8d3..02fc91ce250 100644
--- a/net/dccp/ipv4.c
+++ b/net/dccp/ipv4.c
@@ -600,11 +600,12 @@ int dccp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
 	if (req == NULL)
 		goto drop;
 
-	if (dccp_parse_options(sk, skb))
-		goto drop_and_free;
-
 	dccp_reqsk_init(req, skb);
 
+	dreq = dccp_rsk(req);
+	if (dccp_parse_options(sk, dreq, skb))
+		goto drop_and_free;
+
 	if (security_inet_conn_request(sk, skb, req))
 		goto drop_and_free;
 
@@ -621,7 +622,6 @@ int dccp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
 	 * In fact we defer setting S.GSR, S.SWL, S.SWH to
 	 * dccp_create_openreq_child.
 	 */
-	dreq = dccp_rsk(req);
 	dreq->dreq_isr	   = dcb->dccpd_seq;
 	dreq->dreq_iss	   = dccp_v4_init_sequence(skb);
 	dreq->dreq_service = service;
diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c
index a08e2cb1191..f42b75ce7f5 100644
--- a/net/dccp/ipv6.c
+++ b/net/dccp/ipv6.c
@@ -415,11 +415,12 @@ static int dccp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
 	if (req == NULL)
 		goto drop;
 
-	if (dccp_parse_options(sk, skb))
-		goto drop_and_free;
-
 	dccp_reqsk_init(req, skb);
 
+	dreq = dccp_rsk(req);
+	if (dccp_parse_options(sk, dreq, skb))
+		goto drop_and_free;
+
 	if (security_inet_conn_request(sk, skb, req))
 		goto drop_and_free;
 
@@ -449,7 +450,6 @@ static int dccp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
 	 *   In fact we defer setting S.GSR, S.SWL, S.SWH to
 	 *   dccp_create_openreq_child.
 	 */
-	dreq = dccp_rsk(req);
 	dreq->dreq_isr	   = dcb->dccpd_seq;
 	dreq->dreq_iss	   = dccp_v6_init_sequence(skb);
 	dreq->dreq_service = service;
diff --git a/net/dccp/options.c b/net/dccp/options.c
index 523250b45ea..f496d4dc7ef 100644
--- a/net/dccp/options.c
+++ b/net/dccp/options.c
@@ -46,7 +46,13 @@ static u32 dccp_decode_value_var(const unsigned char *bf, const u8 len)
 	return value;
 }
 
-int dccp_parse_options(struct sock *sk, struct sk_buff *skb)
+/**
+ * dccp_parse_options  -  Parse DCCP options present in @skb
+ * @sk: client|server|listening dccp socket (when @dreq != NULL)
+ * @dreq: request socket to use during connection setup, or NULL
+ */
+int dccp_parse_options(struct sock *sk, struct dccp_request_sock *dreq,
+		       struct sk_buff *skb)
 {
 	struct dccp_sock *dp = dccp_sk(sk);
 	const struct dccp_hdr *dh = dccp_hdr(skb);
@@ -92,6 +98,20 @@ int dccp_parse_options(struct sock *sk, struct sk_buff *skb)
 				goto out_invalid_option;
 		}
 
+		/*
+		 * CCID-Specific Options (from RFC 4340, sec. 10.3):
+		 *
+		 * Option numbers 128 through 191 are for options sent from the
+		 * HC-Sender to the HC-Receiver; option numbers 192 through 255
+		 * are for options sent from the HC-Receiver to	the HC-Sender.
+		 *
+		 * CCID-specific options are ignored during connection setup, as
+		 * negotiation may still be in progress (see RFC 4340, 10.3).
+		 *
+		 */
+		if (dreq != NULL && opt >= 128)
+			goto ignore_option;
+
 		switch (opt) {
 		case DCCPO_PADDING:
 			break;
@@ -150,6 +170,7 @@ int dccp_parse_options(struct sock *sk, struct sk_buff *skb)
 			opt_val = get_unaligned((__be32 *)value);
 			opt_recv->dccpor_timestamp = ntohl(opt_val);
 
+			/* FIXME: if dreq != NULL, don't store this on listening socket */
 			dp->dccps_timestamp_echo = opt_recv->dccpor_timestamp;
 			dp->dccps_timestamp_time = ktime_get_real();
 
@@ -213,15 +234,6 @@ int dccp_parse_options(struct sock *sk, struct sk_buff *skb)
 			dccp_pr_debug("%s rx opt: ELAPSED_TIME=%d\n",
 				      dccp_role(sk), elapsed_time);
 			break;
-			/*
-			 * From RFC 4340, sec. 10.3:
-			 *
-			 *	Option numbers 128 through 191 are for
-			 *	options sent from the HC-Sender to the
-			 *	HC-Receiver; option numbers 192 through 255
-			 *	are for options sent from the HC-Receiver to
-			 *	the HC-Sender.
-			 */
 		case 128 ... 191: {
 			const u16 idx = value - options;
 
@@ -245,7 +257,7 @@ int dccp_parse_options(struct sock *sk, struct sk_buff *skb)
 				  "implemented, ignoring", sk, opt, len);
 			break;
 		}
-
+ignore_option:
 		if (opt != DCCPO_MANDATORY)
 			mandatory = 0;
 	}
-- 
cgit v1.2.3


From 8109616e2ef978d142ea45850efd4f102b9bdce4 Mon Sep 17 00:00:00 2001
From: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Date: Thu, 13 Dec 2007 12:31:26 -0200
Subject: [DCCP]: Add (missing) option parsing to request_sock processing

This adds option-parsing code to processing of Acks in the listening state
on request_socks on the server, serving two purposes
 (i)  resolves a FIXME (removed);
 (ii) paves the way for feature-negotiation during connection-setup.

There is an intended subtlety here with regard to dccp_check_req:

 Parsing options happens only after testing whether the received packet is
 a retransmitted Request.  Otherwise, if the Request contained (a possibly
 large number of) feature-negotiation options, recomputing state would have to
 happen each time a retransmitted Request arrives, which opens the door to an
 easy DoS attack.  Since in a genuine retransmission the options should not be
 different from the original, reusing the already computed state seems better.

 The other point is - if there are timestamp options on the Request, they will
 not be answered; which means that in the presence of retransmission (likely
 due to loss and/or other problems), the use of Request/Response RTT sampling
 is suspended, so that startup problems here do not propagate.

Signed-off-by: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Signed-off-by: Ian McDonald <ian.mcdonald@jandi.co.nz>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/dccp/minisocks.c | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

(limited to 'net')

diff --git a/net/dccp/minisocks.c b/net/dccp/minisocks.c
index 831b76e08d0..b1d5da61f6a 100644
--- a/net/dccp/minisocks.c
+++ b/net/dccp/minisocks.c
@@ -200,10 +200,10 @@ struct sock *dccp_check_req(struct sock *sk, struct sk_buff *skb,
 			    struct request_sock **prev)
 {
 	struct sock *child = NULL;
+	struct dccp_request_sock *dreq = dccp_rsk(req);
 
 	/* Check for retransmitted REQUEST */
 	if (dccp_hdr(skb)->dccph_type == DCCP_PKT_REQUEST) {
-		struct dccp_request_sock *dreq = dccp_rsk(req);
 
 		if (after48(DCCP_SKB_CB(skb)->dccpd_seq, dreq->dreq_isr)) {
 			dccp_pr_debug("Retransmitted REQUEST\n");
@@ -227,22 +227,22 @@ struct sock *dccp_check_req(struct sock *sk, struct sk_buff *skb,
 		goto drop;
 
 	/* Invalid ACK */
-	if (DCCP_SKB_CB(skb)->dccpd_ack_seq != dccp_rsk(req)->dreq_iss) {
+	if (DCCP_SKB_CB(skb)->dccpd_ack_seq != dreq->dreq_iss) {
 		dccp_pr_debug("Invalid ACK number: ack_seq=%llu, "
 			      "dreq_iss=%llu\n",
 			      (unsigned long long)
 			      DCCP_SKB_CB(skb)->dccpd_ack_seq,
-			      (unsigned long long)
-			      dccp_rsk(req)->dreq_iss);
+			      (unsigned long long) dreq->dreq_iss);
 		goto drop;
 	}
 
+	if (dccp_parse_options(sk, dreq, skb))
+		 goto drop;
+
 	child = inet_csk(sk)->icsk_af_ops->syn_recv_sock(sk, skb, req, NULL);
 	if (child == NULL)
 		goto listen_overflow;
 
-	/* FIXME: deal with options */
-
 	inet_csk_reqsk_queue_unlink(sk, req, prev);
 	inet_csk_reqsk_queue_removed(sk, req);
 	inet_csk_reqsk_queue_add(sk, req, child);
-- 
cgit v1.2.3


From b4d4f7c70fd3361c6c889752e08ea9be304cf5f4 Mon Sep 17 00:00:00 2001
From: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Date: Thu, 13 Dec 2007 12:37:19 -0200
Subject: [DCCP]: Handle timestamps on Request/Response exchange separately

In DCCP, timestamps can occur on packets anytime, CCID3 uses a timestamp(/echo) on the Request/Response
exchange. This patch addresses the following situation:
	* timestamps are recorded on the listening socket;
	* Responses are sent from dccp_request_sockets;
	* suppose two connections reach the listening socket with very small time in between:
	* the first timestamp value gets overwritten by the second connection request.

This is not really good, so this patch separates timestamps into
 * those which are received by the server during the initial handshake (on dccp_request_sock);
 * those which are received by the client or the client after connection establishment.

As before, a timestamp of 0 is regarded as indicating that no (meaningful) timestamp has been
received (in addition, a warning message is printed if hosts send 0-valued timestamps).

The timestamp-echoing now works as follows:
 * when a timestamp is present on the initial Request, it is placed into dreq, due to the
   call to dccp_parse_options in dccp_v{4,6}_conn_request;
 * when a timestamp is present on the Ack leading from RESPOND => OPEN, it is copied over
   from the request_sock into the child cocket in dccp_create_openreq_child;
 * timestamps received on an (established) dccp_sock are treated as before.

Since Elapsed Time is measured in hundredths of milliseconds (13.2), the new dccp_timestamp()
function is used, as it is expected that the time between receiving the timestamp and
sending the timestamp echo will be very small against the wrap-around time. As a byproduct,
this allows smaller timestamping-time fields.

Furthermore, inserting the Timestamp Echo option has been taken out of the block starting with
'!dccp_packet_without_ack()', since Timestamp Echo can be carried on any packet (5.8 and 13.3).

Signed-off-by: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Acked-by: Ian McDonald <ian.mcdonald@jandi.co.nz>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/dccp/minisocks.c | 21 ++++++++++++--------
 net/dccp/options.c   | 56 +++++++++++++++++++++++++++++++++-------------------
 2 files changed, 49 insertions(+), 28 deletions(-)

(limited to 'net')

diff --git a/net/dccp/minisocks.c b/net/dccp/minisocks.c
index b1d5da61f6a..027d1814e1a 100644
--- a/net/dccp/minisocks.c
+++ b/net/dccp/minisocks.c
@@ -117,11 +117,13 @@ struct sock *dccp_create_openreq_child(struct sock *sk,
 		struct dccp_sock *newdp = dccp_sk(newsk);
 		struct dccp_minisock *newdmsk = dccp_msk(newsk);
 
-		newdp->dccps_role	   = DCCP_ROLE_SERVER;
-		newdp->dccps_hc_rx_ackvec  = NULL;
-		newdp->dccps_service_list  = NULL;
-		newdp->dccps_service	   = dreq->dreq_service;
-		newicsk->icsk_rto	   = DCCP_TIMEOUT_INIT;
+		newdp->dccps_role	    = DCCP_ROLE_SERVER;
+		newdp->dccps_hc_rx_ackvec   = NULL;
+		newdp->dccps_service_list   = NULL;
+		newdp->dccps_service	    = dreq->dreq_service;
+		newdp->dccps_timestamp_echo = dreq->dreq_timestamp_echo;
+		newdp->dccps_timestamp_time = dreq->dreq_timestamp_time;
+		newicsk->icsk_rto	    = DCCP_TIMEOUT_INIT;
 
 		if (dccp_feat_clone(sk, newsk))
 			goto out_free;
@@ -303,9 +305,12 @@ EXPORT_SYMBOL_GPL(dccp_reqsk_send_ack);
 
 void dccp_reqsk_init(struct request_sock *req, struct sk_buff *skb)
 {
-	inet_rsk(req)->rmt_port = dccp_hdr(skb)->dccph_sport;
-	inet_rsk(req)->acked	= 0;
-	req->rcv_wnd		= sysctl_dccp_feat_sequence_window;
+	struct dccp_request_sock *dreq = dccp_rsk(req);
+
+	inet_rsk(req)->rmt_port	  = dccp_hdr(skb)->dccph_sport;
+	inet_rsk(req)->acked	  = 0;
+	req->rcv_wnd		  = sysctl_dccp_feat_sequence_window;
+	dreq->dreq_timestamp_echo = 0;
 }
 
 EXPORT_SYMBOL_GPL(dccp_reqsk_init);
diff --git a/net/dccp/options.c b/net/dccp/options.c
index f496d4dc7ef..0c996d8c79a 100644
--- a/net/dccp/options.c
+++ b/net/dccp/options.c
@@ -166,16 +166,27 @@ int dccp_parse_options(struct sock *sk, struct dccp_request_sock *dreq,
 		case DCCPO_TIMESTAMP:
 			if (len != 4)
 				goto out_invalid_option;
-
+			/*
+			 * RFC 4340 13.1: "The precise time corresponding to
+			 * Timestamp Value zero is not specified". We use
+			 * zero to indicate absence of a meaningful timestamp.
+			 */
 			opt_val = get_unaligned((__be32 *)value);
-			opt_recv->dccpor_timestamp = ntohl(opt_val);
-
-			/* FIXME: if dreq != NULL, don't store this on listening socket */
-			dp->dccps_timestamp_echo = opt_recv->dccpor_timestamp;
-			dp->dccps_timestamp_time = ktime_get_real();
+			if (unlikely(opt_val == 0)) {
+				DCCP_WARN("Timestamp with zero value\n");
+				break;
+			}
 
+			if (dreq != NULL) {
+				dreq->dreq_timestamp_echo = ntohl(opt_val);
+				dreq->dreq_timestamp_time = dccp_timestamp();
+			} else {
+				opt_recv->dccpor_timestamp =
+					dp->dccps_timestamp_echo = ntohl(opt_val);
+				dp->dccps_timestamp_time = dccp_timestamp();
+			}
 			dccp_pr_debug("%s rx opt: TIMESTAMP=%u, ackno=%llu\n",
-				      dccp_role(sk), opt_recv->dccpor_timestamp,
+				      dccp_role(sk), ntohl(opt_val),
 				      (unsigned long long)
 				      DCCP_SKB_CB(skb)->dccpd_ack_seq);
 			break;
@@ -393,16 +404,24 @@ int dccp_insert_option_timestamp(struct sock *sk, struct sk_buff *skb)
 
 EXPORT_SYMBOL_GPL(dccp_insert_option_timestamp);
 
-static int dccp_insert_option_timestamp_echo(struct sock *sk,
+static int dccp_insert_option_timestamp_echo(struct dccp_sock *dp,
+					     struct dccp_request_sock *dreq,
 					     struct sk_buff *skb)
 {
-	struct dccp_sock *dp = dccp_sk(sk);
 	__be32 tstamp_echo;
-	int len, elapsed_time_len;
 	unsigned char *to;
-	const suseconds_t delta = ktime_us_delta(ktime_get_real(),
-						 dp->dccps_timestamp_time);
-	u32 elapsed_time = delta / 10;
+	u32 elapsed_time, elapsed_time_len, len;
+
+	if (dreq != NULL) {
+		elapsed_time = dccp_timestamp() - dreq->dreq_timestamp_time;
+		tstamp_echo  = htonl(dreq->dreq_timestamp_echo);
+		dreq->dreq_timestamp_echo = 0;
+	} else {
+		elapsed_time = dccp_timestamp() - dp->dccps_timestamp_time;
+		tstamp_echo  = htonl(dp->dccps_timestamp_echo);
+		dp->dccps_timestamp_echo = 0;
+	}
+
 	elapsed_time_len = dccp_elapsed_time_len(elapsed_time);
 	len = 6 + elapsed_time_len;
 
@@ -415,7 +434,6 @@ static int dccp_insert_option_timestamp_echo(struct sock *sk,
 	*to++ = DCCPO_TIMESTAMP_ECHO;
 	*to++ = len;
 
-	tstamp_echo = htonl(dp->dccps_timestamp_echo);
 	memcpy(to, &tstamp_echo, 4);
 	to += 4;
 
@@ -427,8 +445,6 @@ static int dccp_insert_option_timestamp_echo(struct sock *sk,
 		memcpy(to, &var32, 4);
 	}
 
-	dp->dccps_timestamp_echo = 0;
-	dp->dccps_timestamp_time = ktime_set(0, 0);
 	return 0;
 }
 
@@ -537,10 +553,6 @@ int dccp_insert_options(struct sock *sk, struct sk_buff *skb)
 		    dccp_ackvec_pending(dp->dccps_hc_rx_ackvec) &&
 		    dccp_insert_option_ackvec(sk, skb))
 			return -1;
-
-		if (dp->dccps_timestamp_echo != 0 &&
-		    dccp_insert_option_timestamp_echo(sk, skb))
-			return -1;
 	}
 
 	if (dp->dccps_hc_rx_insert_options) {
@@ -564,6 +576,10 @@ int dccp_insert_options(struct sock *sk, struct sk_buff *skb)
 	    dccp_insert_option_timestamp(sk, skb))
 		return -1;
 
+	if (dp->dccps_timestamp_echo != 0 &&
+	    dccp_insert_option_timestamp_echo(dp, NULL, skb))
+		return -1;
+
 	/* XXX: insert other options when appropriate */
 
 	if (DCCP_SKB_CB(skb)->dccpd_opt_len != 0) {
-- 
cgit v1.2.3


From af3b867e2f6b72422bc7aacb1f1e26f47a9649bc Mon Sep 17 00:00:00 2001
From: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Date: Thu, 13 Dec 2007 12:38:11 -0200
Subject: [DCCP]: Support inserting options during the 3-way handshake

This provides a separate routine to insert options during the initial handshake.
The main purpose is to conduct feature negotiation, for the moment the only user
is the timestamp echo needed for the (CCID3) handshake RTT sample.

Padding of options has been put into a small separate routine, to be shared among
the two functions. This could also be used as a generic routine to finish inserting
options.

Also removed an `XXX' comment since its content was obvious.

Signed-off-by: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Signed-off-by: Ian McDonald <ian.mcdonald@jandi.co.nz>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/dccp/dccp.h    |  1 +
 net/dccp/options.c | 32 ++++++++++++++++++++++----------
 net/dccp/output.c  |  2 +-
 3 files changed, 24 insertions(+), 11 deletions(-)

(limited to 'net')

diff --git a/net/dccp/dccp.h b/net/dccp/dccp.h
index 3af3320194f..b138e2019d7 100644
--- a/net/dccp/dccp.h
+++ b/net/dccp/dccp.h
@@ -428,6 +428,7 @@ static inline int dccp_ack_pending(const struct sock *sk)
 }
 
 extern int dccp_insert_options(struct sock *sk, struct sk_buff *skb);
+extern int dccp_insert_options_rsk(struct dccp_request_sock*, struct sk_buff*);
 extern int dccp_insert_option_elapsed_time(struct sock *sk,
 					    struct sk_buff *skb,
 					    u32 elapsed_time);
diff --git a/net/dccp/options.c b/net/dccp/options.c
index 0c996d8c79a..bedb5daaa3c 100644
--- a/net/dccp/options.c
+++ b/net/dccp/options.c
@@ -537,6 +537,18 @@ static int dccp_insert_options_feat(struct sock *sk, struct sk_buff *skb)
 	return 0;
 }
 
+/* The length of all options needs to be a multiple of 4 (5.8) */
+static void dccp_insert_option_padding(struct sk_buff *skb)
+{
+	int padding = DCCP_SKB_CB(skb)->dccpd_opt_len % 4;
+
+	if (padding != 0) {
+		padding = 4 - padding;
+		memset(skb_push(skb, padding), 0, padding);
+		DCCP_SKB_CB(skb)->dccpd_opt_len += padding;
+	}
+}
+
 int dccp_insert_options(struct sock *sk, struct sk_buff *skb)
 {
 	struct dccp_sock *dp = dccp_sk(sk);
@@ -580,18 +592,18 @@ int dccp_insert_options(struct sock *sk, struct sk_buff *skb)
 	    dccp_insert_option_timestamp_echo(dp, NULL, skb))
 		return -1;
 
-	/* XXX: insert other options when appropriate */
+	dccp_insert_option_padding(skb);
+	return 0;
+}
 
-	if (DCCP_SKB_CB(skb)->dccpd_opt_len != 0) {
-		/* The length of all options has to be a multiple of 4 */
-		int padding = DCCP_SKB_CB(skb)->dccpd_opt_len % 4;
+int dccp_insert_options_rsk(struct dccp_request_sock *dreq, struct sk_buff *skb)
+{
+	DCCP_SKB_CB(skb)->dccpd_opt_len = 0;
 
-		if (padding != 0) {
-			padding = 4 - padding;
-			memset(skb_push(skb, padding), 0, padding);
-			DCCP_SKB_CB(skb)->dccpd_opt_len += padding;
-		}
-	}
+	if (dreq->dreq_timestamp_echo != 0 &&
+	    dccp_insert_option_timestamp_echo(NULL, dreq, skb))
+		return -1;
 
+	dccp_insert_option_padding(skb);
 	return 0;
 }
diff --git a/net/dccp/output.c b/net/dccp/output.c
index b2e17910930..5589a5e581f 100644
--- a/net/dccp/output.c
+++ b/net/dccp/output.c
@@ -303,7 +303,7 @@ struct sk_buff *dccp_make_response(struct sock *sk, struct dst_entry *dst,
 	DCCP_SKB_CB(skb)->dccpd_type = DCCP_PKT_RESPONSE;
 	DCCP_SKB_CB(skb)->dccpd_seq  = dreq->dreq_iss;
 
-	if (dccp_insert_options(sk, skb)) {
+	if (dccp_insert_options_rsk(dreq, skb)) {
 		kfree_skb(skb);
 		return NULL;
 	}
-- 
cgit v1.2.3


From dd6303df095d18b0c524a76a42f57bcc679b2039 Mon Sep 17 00:00:00 2001
From: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Date: Thu, 13 Dec 2007 12:40:40 -0200
Subject: [DCCP]: Remove unused and redundant validation functions

This removes two inlines which were both called in a single function only:

 1) dccp_feat_change() is always called with either DCCPO_CHANGE_L or DCCPO_CHANGE_R as argument
    * from dccp_set_socktopt_change() via do_dccp_setsockopt() with DCCP_SOCKOPT_CHANGE_R/L
    * from __dccp_feat_init() via dccp_feat_init() also with DCCP_SOCKOPT_CHANGE_R/L.

    Hence the dccp_feat_is_valid_type() is completely unnecessary and always returns true.

 2) Due to (1), the length test reduces to 'len >= 4', which in turn makes
    dccp_feat_is_valid_length() unnecessary.

Furthermore, the inline function dccp_feat_is_reserved() was unfolded,
since only called in a single place.

Signed-off-by: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Signed-off-by: Ian McDonald <ian.mcdonald@jandi.co.nz>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/dccp/feat.c | 12 ++++--------
 net/dccp/feat.h | 26 --------------------------
 2 files changed, 4 insertions(+), 34 deletions(-)

(limited to 'net')

diff --git a/net/dccp/feat.c b/net/dccp/feat.c
index 5ebdd86c1b9..084744e624d 100644
--- a/net/dccp/feat.c
+++ b/net/dccp/feat.c
@@ -24,11 +24,7 @@ int dccp_feat_change(struct dccp_minisock *dmsk, u8 type, u8 feature,
 
 	dccp_feat_debug(type, feature, *val);
 
-	if (!dccp_feat_is_valid_type(type)) {
-		DCCP_WARN("option type %d invalid in negotiation\n", type);
-		return 1;
-	}
-	if (!dccp_feat_is_valid_length(type, feature, len)) {
+	if (len > 3) {
 		DCCP_WARN("invalid length %d\n", len);
 		return 1;
 	}
@@ -637,12 +633,12 @@ const char *dccp_feat_name(const u8 feat)
 		[DCCPF_MIN_CSUM_COVER]	= "Min. Csum Coverage",
 		[DCCPF_DATA_CHECKSUM]	= "Send Data Checksum",
 	};
+	if (feat > DCCPF_DATA_CHECKSUM && feat < DCCPF_MIN_CCID_SPECIFIC)
+		return feature_names[DCCPF_RESERVED];
+
 	if (feat >= DCCPF_MIN_CCID_SPECIFIC)
 		return "CCID-specific";
 
-	if (dccp_feat_is_reserved(feat))
-		return feature_names[DCCPF_RESERVED];
-
 	return feature_names[feat];
 }
 
diff --git a/net/dccp/feat.h b/net/dccp/feat.h
index 177f7dee4d1..e272222c7ac 100644
--- a/net/dccp/feat.h
+++ b/net/dccp/feat.h
@@ -14,32 +14,6 @@
 #include <linux/types.h>
 #include "dccp.h"
 
-static inline int dccp_feat_is_valid_length(u8 type, u8 feature, u8 len)
-{
-	/* sec. 6.1: Confirm has at least length 3,
-	 * sec. 6.2: Change  has at least length 4 */
-	if (len < 3)
-		return 1;
-	if (len < 4  && (type == DCCPO_CHANGE_L || type == DCCPO_CHANGE_R))
-		return 1;
-	/* XXX: add per-feature length validation (sec. 6.6.8) */
-	return 0;
-}
-
-static inline int dccp_feat_is_reserved(const u8 feat)
-{
-	return (feat > DCCPF_DATA_CHECKSUM &&
-		feat < DCCPF_MIN_CCID_SPECIFIC) ||
-		feat == DCCPF_RESERVED;
-}
-
-/* feature negotiation knows only these four option types (RFC 4340, sec. 6) */
-static inline int dccp_feat_is_valid_type(const u8 optnum)
-{
-	return optnum >= DCCPO_CHANGE_L && optnum <= DCCPO_CONFIRM_R;
-
-}
-
 #ifdef CONFIG_IP_DCCP_DEBUG
 extern const char *dccp_feat_typename(const u8 type);
 extern const char *dccp_feat_name(const u8 feat);
-- 
cgit v1.2.3


From 5cdae198de33184c01226395b840f44f9834233a Mon Sep 17 00:00:00 2001
From: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Date: Thu, 13 Dec 2007 12:41:46 -0200
Subject: [DCCP]: Make code assumptions explicit

This removes several `XXX' references which indicate a missing support
for non-1-byte feature values: this is unnecessary, as all currently known
(standardised) SP feature values are 1-byte quantities.

Signed-off-by: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Signed-off-by: Ian McDonald <ian.mcdonald@jandi.co.nz>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/dccp/feat.c | 17 ++++++++++-------
 1 file changed, 10 insertions(+), 7 deletions(-)

(limited to 'net')

diff --git a/net/dccp/feat.c b/net/dccp/feat.c
index 084744e624d..4a4f6ce4498 100644
--- a/net/dccp/feat.c
+++ b/net/dccp/feat.c
@@ -4,10 +4,16 @@
  *  An implementation of the DCCP protocol
  *  Andrea Bittau <a.bittau@cs.ucl.ac.uk>
  *
- *      This program is free software; you can redistribute it and/or
- *      modify it under the terms of the GNU General Public License
- *      as published by the Free Software Foundation; either version
- *      2 of the License, or (at your option) any later version.
+ *  ASSUMPTIONS
+ *  -----------
+ *  o All currently known SP features have 1-byte quantities. If in the future
+ *    extensions of RFCs 4340..42 define features with item lengths larger than
+ *    one byte, a feature-specific extension of the code will be required.
+ *
+ *  This program is free software; you can redistribute it and/or
+ *  modify it under the terms of the GNU General Public License
+ *  as published by the Free Software Foundation; either version
+ *  2 of the License, or (at your option) any later version.
  */
 
 #include <linux/module.h>
@@ -95,7 +101,6 @@ static int dccp_feat_update_ccid(struct sock *sk, u8 type, u8 new_ccid_nr)
 	return 0;
 }
 
-/* XXX taking only u8 vals */
 static int dccp_feat_update(struct sock *sk, u8 type, u8 feat, u8 val)
 {
 	dccp_feat_debug(type, feat, val);
@@ -140,7 +145,6 @@ static int dccp_feat_reconcile(struct sock *sk, struct dccp_opt_pend *opt,
 	/* FIXME sanity check vals */
 
 	/* Are values in any order?  XXX Lame "algorithm" here */
-	/* XXX assume values are 1 byte */
 	for (i = 0; i < slen; i++) {
 		for (j = 0; j < rlen; j++) {
 			if (spref[i] == rpref[j]) {
@@ -175,7 +179,6 @@ static int dccp_feat_reconcile(struct sock *sk, struct dccp_opt_pend *opt,
 	}
 
 	/* need to put result and our preference list */
-	/* XXX assume 1 byte vals */
 	rlen = 1 + opt->dccpop_len;
 	rpref = kmalloc(rlen, GFP_ATOMIC);
 	if (rpref == NULL)
-- 
cgit v1.2.3


From cf86314cb7460423b0b7e611edad80a52dc90d10 Mon Sep 17 00:00:00 2001
From: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Date: Thu, 13 Dec 2007 12:48:19 -0200
Subject: [DCCP]: Ignore feature negotiation on Data packets

This implements [RFC 4340, p. 32]: "any feature negotiation options received
on DCCP-Data packets MUST be ignored".

Also added a FIXME for further processing, since the code currently (wrongly)
classifies empty Confirm options as invalid - this needs to be resolved in
a separate patch.

Signed-off-by: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Signed-off-by: Ian McDonald <ian.mcdonald@jandi.co.nz>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/dccp/options.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/dccp/options.c b/net/dccp/options.c
index bedb5daaa3c..d2a84a2fece 100644
--- a/net/dccp/options.c
+++ b/net/dccp/options.c
@@ -132,6 +132,8 @@ int dccp_parse_options(struct sock *sk, struct dccp_request_sock *dreq,
 		case DCCPO_CHANGE_L:
 			/* fall through */
 		case DCCPO_CHANGE_R:
+			if (pkt_type == DCCP_PKT_DATA)
+				break;
 			if (len < 2)
 				goto out_invalid_option;
 			rc = dccp_feat_change_recv(sk, opt, *value, value + 1,
@@ -148,7 +150,9 @@ int dccp_parse_options(struct sock *sk, struct dccp_request_sock *dreq,
 		case DCCPO_CONFIRM_L:
 			/* fall through */
 		case DCCPO_CONFIRM_R:
-			if (len < 2)
+			if (pkt_type == DCCP_PKT_DATA)
+				break;
+			if (len < 2)	/* FIXME this disallows empty confirm */
 				goto out_invalid_option;
 			if (dccp_feat_confirm_recv(sk, opt, *value,
 						   value + 1, len - 1))
-- 
cgit v1.2.3


From aef21785995778f710a60b563e03bf53ba455a47 Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Thu, 13 Dec 2007 09:30:59 -0800
Subject: [IPSEC]: Fix zero return value in xfrm_lookup on error

Further testing shows that my ICMP relookup patch can cause xfrm_lookup
to return zero on error which isn't very nice since it leads to the caller
dying on null pointer dereference.  The bug is due to not setting err
to ENOENT just before we leave xfrm_lookup in case of no policy.

This patch moves the err setting to where it should be.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/xfrm/xfrm_policy.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index a83b5e1349e..8023a3c0dad 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -1469,8 +1469,6 @@ restart:
 			goto dropdst;
 	}
 
-	err = -ENOENT;
-
 	if (!policy) {
 		/* To accelerate a bit...  */
 		if ((dst_orig->flags & DST_NOXFRM) ||
@@ -1492,6 +1490,7 @@ restart:
 	npols ++;
 	xfrm_nr += pols[0]->xfrm_nr;
 
+	err = -ENOENT;
 	if ((flags & XFRM_LOOKUP_ICMP) && !(policy->flags & XFRM_POLICY_ICMP))
 		goto error;
 
@@ -1657,6 +1656,7 @@ dropdst:
 	return err;
 
 nopol:
+	err = -ENOENT;
 	if (flags & XFRM_LOOKUP_ICMP)
 		goto dropdst;
 	return 0;
-- 
cgit v1.2.3


From 528c4ceb427dad4a3893ba3d1913782efae0cd0e Mon Sep 17 00:00:00 2001
From: "Denis V. Lunev" <den@openvz.org>
Date: Thu, 13 Dec 2007 09:45:12 -0800
Subject: [IPV6]: Always pass a valid nl_info to inet6_rt_notify.

This makes the code in the inet6_rt_notify more straightforward and provides
groud for namespace passing.

Signed-off-by: Denis V. Lunev <den@openvz.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/ip6_fib.c |  3 ++-
 net/ipv6/route.c   | 27 +++++++++++++--------------
 2 files changed, 15 insertions(+), 15 deletions(-)

(limited to 'net')

diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c
index 5fae04506ad..df05c6f2189 100644
--- a/net/ipv6/ip6_fib.c
+++ b/net/ipv6/ip6_fib.c
@@ -1315,6 +1315,7 @@ static int fib6_walk(struct fib6_walker_t *w)
 
 static int fib6_clean_node(struct fib6_walker_t *w)
 {
+	struct nl_info info = {};
 	int res;
 	struct rt6_info *rt;
 	struct fib6_cleaner_t *c = container_of(w, struct fib6_cleaner_t, w);
@@ -1323,7 +1324,7 @@ static int fib6_clean_node(struct fib6_walker_t *w)
 		res = c->func(rt, c->arg);
 		if (res < 0) {
 			w->leaf = rt;
-			res = fib6_del(rt, NULL);
+			res = fib6_del(rt, &info);
 			if (res) {
 #if RT6_DEBUG >= 2
 				printk(KERN_DEBUG "fib6_clean_node: del failed: rt=%p@%p err=%d\n", rt, rt->rt6i_node, res);
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 11ef456d67c..b80ef578420 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -609,7 +609,8 @@ static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info)
 
 int ip6_ins_rt(struct rt6_info *rt)
 {
-	return __ip6_ins_rt(rt, NULL);
+	struct nl_info info = {};
+	return __ip6_ins_rt(rt, &info);
 }
 
 static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort, struct in6_addr *daddr,
@@ -1266,7 +1267,8 @@ static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
 
 int ip6_del_rt(struct rt6_info *rt)
 {
-	return __ip6_del_rt(rt, NULL);
+	struct nl_info info = {};
+	return __ip6_del_rt(rt, &info);
 }
 
 static int ip6_route_del(struct fib6_config *cfg)
@@ -2243,29 +2245,26 @@ errout:
 void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info)
 {
 	struct sk_buff *skb;
-	u32 pid = 0, seq = 0;
-	struct nlmsghdr *nlh = NULL;
-	int err = -ENOBUFS;
-
-	if (info) {
-		pid = info->pid;
-		nlh = info->nlh;
-		if (nlh)
-			seq = nlh->nlmsg_seq;
-	}
+	u32 seq;
+	int err;
+
+	err = -ENOBUFS;
+	seq = info->nlh != NULL ? info->nlh->nlmsg_seq : 0;
 
 	skb = nlmsg_new(rt6_nlmsg_size(), gfp_any());
 	if (skb == NULL)
 		goto errout;
 
-	err = rt6_fill_node(skb, rt, NULL, NULL, 0, event, pid, seq, 0, 0);
+	err = rt6_fill_node(skb, rt, NULL, NULL, 0,
+				event, info->pid, seq, 0, 0);
 	if (err < 0) {
 		/* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
 		WARN_ON(err == -EMSGSIZE);
 		kfree_skb(skb);
 		goto errout;
 	}
-	err = rtnl_notify(skb, &init_net, pid, RTNLGRP_IPV6_ROUTE, nlh, gfp_any());
+	err = rtnl_notify(skb, &init_net, info->pid,
+				RTNLGRP_IPV6_ROUTE, info->nlh, gfp_any());
 errout:
 	if (err < 0)
 		rtnl_set_sk_err(&init_net, RTNLGRP_IPV6_ROUTE, err);
-- 
cgit v1.2.3


From ee34c1eb35923cc98b1c47488a615bf51a2a2afb Mon Sep 17 00:00:00 2001
From: Michal Schmidt <mschmidt@redhat.com>
Date: Thu, 13 Dec 2007 09:46:32 -0800
Subject: [IP_GRE]: Rebinding of GRE tunnels to other interfaces

This is similar to the change already done for IPIP tunnels.

Once created, a GRE tunnel can't be bound to another device.
To reproduce:

# create a tunnel:
ip tunnel add tunneltest0 mode gre remote 10.0.0.1 dev eth0
# try to change the bounding device from eth0 to eth1:
ip tunnel change tunneltest0 dev eth1
# show the result:
ip tunnel show tunneltest0

tunneltest0: gre/ip  remote 10.0.0.1  local any  dev eth0  ttl inherit

Notice the bound device has not changed from eth0 to eth1.

This patch fixes it. When changing the binding, it also recalculates the
MTU according to the new bound device's MTU.

Signed-off-by: Michal Schmidt <mschmidt@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/ip_gre.c | 102 ++++++++++++++++++++++++++++++++----------------------
 1 file changed, 60 insertions(+), 42 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index 4b93f32de10..0832f6e028b 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -896,6 +896,59 @@ tx_error:
 	return 0;
 }
 
+static void ipgre_tunnel_bind_dev(struct net_device *dev)
+{
+	struct net_device *tdev = NULL;
+	struct ip_tunnel *tunnel;
+	struct iphdr *iph;
+	int hlen = LL_MAX_HEADER;
+	int mtu = ETH_DATA_LEN;
+	int addend = sizeof(struct iphdr) + 4;
+
+	tunnel = netdev_priv(dev);
+	iph = &tunnel->parms.iph;
+
+	/* Guess output device to choose reasonable mtu and hard_header_len */
+
+	if (iph->daddr) {
+		struct flowi fl = { .oif = tunnel->parms.link,
+				    .nl_u = { .ip4_u =
+					      { .daddr = iph->daddr,
+						.saddr = iph->saddr,
+						.tos = RT_TOS(iph->tos) } },
+				    .proto = IPPROTO_GRE };
+		struct rtable *rt;
+		if (!ip_route_output_key(&rt, &fl)) {
+			tdev = rt->u.dst.dev;
+			ip_rt_put(rt);
+		}
+		dev->flags |= IFF_POINTOPOINT;
+	}
+
+	if (!tdev && tunnel->parms.link)
+		tdev = __dev_get_by_index(&init_net, tunnel->parms.link);
+
+	if (tdev) {
+		hlen = tdev->hard_header_len;
+		mtu = tdev->mtu;
+	}
+	dev->iflink = tunnel->parms.link;
+
+	/* Precalculate GRE options length */
+	if (tunnel->parms.o_flags&(GRE_CSUM|GRE_KEY|GRE_SEQ)) {
+		if (tunnel->parms.o_flags&GRE_CSUM)
+			addend += 4;
+		if (tunnel->parms.o_flags&GRE_KEY)
+			addend += 4;
+		if (tunnel->parms.o_flags&GRE_SEQ)
+			addend += 4;
+	}
+	dev->hard_header_len = hlen + addend;
+	dev->mtu = mtu - addend;
+	tunnel->hlen = addend;
+
+}
+
 static int
 ipgre_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd)
 {
@@ -983,6 +1036,11 @@ ipgre_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd)
 				t->parms.iph.ttl = p.iph.ttl;
 				t->parms.iph.tos = p.iph.tos;
 				t->parms.iph.frag_off = p.iph.frag_off;
+				if (t->parms.link != p.link) {
+					t->parms.link = p.link;
+					ipgre_tunnel_bind_dev(dev);
+					netdev_state_change(dev);
+				}
 			}
 			if (copy_to_user(ifr->ifr_ifru.ifru_data, &t->parms, sizeof(p)))
 				err = -EFAULT;
@@ -1162,12 +1220,8 @@ static void ipgre_tunnel_setup(struct net_device *dev)
 
 static int ipgre_tunnel_init(struct net_device *dev)
 {
-	struct net_device *tdev = NULL;
 	struct ip_tunnel *tunnel;
 	struct iphdr *iph;
-	int hlen = LL_MAX_HEADER;
-	int mtu = ETH_DATA_LEN;
-	int addend = sizeof(struct iphdr) + 4;
 
 	tunnel = netdev_priv(dev);
 	iph = &tunnel->parms.iph;
@@ -1178,23 +1232,9 @@ static int ipgre_tunnel_init(struct net_device *dev)
 	memcpy(dev->dev_addr, &tunnel->parms.iph.saddr, 4);
 	memcpy(dev->broadcast, &tunnel->parms.iph.daddr, 4);
 
-	/* Guess output device to choose reasonable mtu and hard_header_len */
+	ipgre_tunnel_bind_dev(dev);
 
 	if (iph->daddr) {
-		struct flowi fl = { .oif = tunnel->parms.link,
-				    .nl_u = { .ip4_u =
-					      { .daddr = iph->daddr,
-						.saddr = iph->saddr,
-						.tos = RT_TOS(iph->tos) } },
-				    .proto = IPPROTO_GRE };
-		struct rtable *rt;
-		if (!ip_route_output_key(&rt, &fl)) {
-			tdev = rt->u.dst.dev;
-			ip_rt_put(rt);
-		}
-
-		dev->flags |= IFF_POINTOPOINT;
-
 #ifdef CONFIG_NET_IPGRE_BROADCAST
 		if (MULTICAST(iph->daddr)) {
 			if (!iph->saddr)
@@ -1205,31 +1245,9 @@ static int ipgre_tunnel_init(struct net_device *dev)
 			dev->stop = ipgre_close;
 		}
 #endif
-	} else {
+	} else
 		dev->header_ops = &ipgre_header_ops;
-	}
-
-	if (!tdev && tunnel->parms.link)
-		tdev = __dev_get_by_index(&init_net, tunnel->parms.link);
-
-	if (tdev) {
-		hlen = tdev->hard_header_len;
-		mtu = tdev->mtu;
-	}
-	dev->iflink = tunnel->parms.link;
 
-	/* Precalculate GRE options length */
-	if (tunnel->parms.o_flags&(GRE_CSUM|GRE_KEY|GRE_SEQ)) {
-		if (tunnel->parms.o_flags&GRE_CSUM)
-			addend += 4;
-		if (tunnel->parms.o_flags&GRE_KEY)
-			addend += 4;
-		if (tunnel->parms.o_flags&GRE_SEQ)
-			addend += 4;
-	}
-	dev->hard_header_len = hlen + addend;
-	dev->mtu = mtu - addend;
-	tunnel->hlen = addend;
 	return 0;
 }
 
-- 
cgit v1.2.3


From 8a4a50f98bc13670bee94c40b94bc169e1263cd9 Mon Sep 17 00:00:00 2001
From: Michal Schmidt <mschmidt@redhat.com>
Date: Thu, 13 Dec 2007 09:47:00 -0800
Subject: [IPV6] sit: Rebinding of SIT tunnels to other interfaces

This is similar to the change already done for IPIP tunnels.

Once created, a SIT tunnel can't be bound to another device.
To reproduce:

# create a tunnel:
ip tunnel add tunneltest0 mode sit remote 10.0.0.1 dev eth0
# try to change the bounding device from eth0 to eth1:
ip tunnel change tunneltest0 dev eth1
# show the result:
ip tunnel show tunneltest0

tunneltest0: ipv6/ip  remote 10.0.0.1  local any  dev eth0  ttl inherit

Notice the bound device has not changed from eth0 to eth1.

This patch fixes it. When changing the binding, it also recalculates the
MTU according to the new bound device's MTU.

Signed-off-by: Michal Schmidt <mschmidt@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/sit.c | 70 +++++++++++++++++++++++++++++++++++-----------------------
 1 file changed, 42 insertions(+), 28 deletions(-)

(limited to 'net')

diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c
index b3b8513e9cb..1c6fddb80b3 100644
--- a/net/ipv6/sit.c
+++ b/net/ipv6/sit.c
@@ -669,6 +669,42 @@ tx_error:
 	return 0;
 }
 
+static void ipip6_tunnel_bind_dev(struct net_device *dev)
+{
+	struct net_device *tdev = NULL;
+	struct ip_tunnel *tunnel;
+	struct iphdr *iph;
+
+	tunnel = netdev_priv(dev);
+	iph = &tunnel->parms.iph;
+
+	if (iph->daddr) {
+		struct flowi fl = { .nl_u = { .ip4_u =
+					      { .daddr = iph->daddr,
+						.saddr = iph->saddr,
+						.tos = RT_TOS(iph->tos) } },
+				    .oif = tunnel->parms.link,
+				    .proto = IPPROTO_IPV6 };
+		struct rtable *rt;
+		if (!ip_route_output_key(&rt, &fl)) {
+			tdev = rt->u.dst.dev;
+			ip_rt_put(rt);
+		}
+		dev->flags |= IFF_POINTOPOINT;
+	}
+
+	if (!tdev && tunnel->parms.link)
+		tdev = __dev_get_by_index(&init_net, tunnel->parms.link);
+
+	if (tdev) {
+		dev->hard_header_len = tdev->hard_header_len + sizeof(struct iphdr);
+		dev->mtu = tdev->mtu - sizeof(struct iphdr);
+		if (dev->mtu < IPV6_MIN_MTU)
+			dev->mtu = IPV6_MIN_MTU;
+	}
+	dev->iflink = tunnel->parms.link;
+}
+
 static int
 ipip6_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd)
 {
@@ -740,6 +776,11 @@ ipip6_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd)
 			if (cmd == SIOCCHGTUNNEL) {
 				t->parms.iph.ttl = p.iph.ttl;
 				t->parms.iph.tos = p.iph.tos;
+				if (t->parms.link != p.link) {
+					t->parms.link = p.link;
+					ipip6_tunnel_bind_dev(dev);
+					netdev_state_change(dev);
+				}
 			}
 			if (copy_to_user(ifr->ifr_ifru.ifru_data, &t->parms, sizeof(p)))
 				err = -EFAULT;
@@ -808,12 +849,9 @@ static void ipip6_tunnel_setup(struct net_device *dev)
 
 static int ipip6_tunnel_init(struct net_device *dev)
 {
-	struct net_device *tdev = NULL;
 	struct ip_tunnel *tunnel;
-	struct iphdr *iph;
 
 	tunnel = netdev_priv(dev);
-	iph = &tunnel->parms.iph;
 
 	tunnel->dev = dev;
 	strcpy(tunnel->parms.name, dev->name);
@@ -821,31 +859,7 @@ static int ipip6_tunnel_init(struct net_device *dev)
 	memcpy(dev->dev_addr, &tunnel->parms.iph.saddr, 4);
 	memcpy(dev->broadcast, &tunnel->parms.iph.daddr, 4);
 
-	if (iph->daddr) {
-		struct flowi fl = { .nl_u = { .ip4_u =
-					      { .daddr = iph->daddr,
-						.saddr = iph->saddr,
-						.tos = RT_TOS(iph->tos) } },
-				    .oif = tunnel->parms.link,
-				    .proto = IPPROTO_IPV6 };
-		struct rtable *rt;
-		if (!ip_route_output_key(&rt, &fl)) {
-			tdev = rt->u.dst.dev;
-			ip_rt_put(rt);
-		}
-		dev->flags |= IFF_POINTOPOINT;
-	}
-
-	if (!tdev && tunnel->parms.link)
-		tdev = __dev_get_by_index(&init_net, tunnel->parms.link);
-
-	if (tdev) {
-		dev->hard_header_len = tdev->hard_header_len + sizeof(struct iphdr);
-		dev->mtu = tdev->mtu - sizeof(struct iphdr);
-		if (dev->mtu < IPV6_MIN_MTU)
-			dev->mtu = IPV6_MIN_MTU;
-	}
-	dev->iflink = tunnel->parms.link;
+	ipip6_tunnel_bind_dev(dev);
 
 	return 0;
 }
-- 
cgit v1.2.3


From f5026fabda54e5ab5d469d8cfac5f46b4d321ce9 Mon Sep 17 00:00:00 2001
From: "Denis V. Lunev" <den@openvz.org>
Date: Thu, 13 Dec 2007 09:47:57 -0800
Subject: [IPV4]: Thresholds in fib_trie.c are used as consts, so make them
 const.

There are several thresholds for trie fib hash management. They are used
in the code as a constants. Make them constants from the compiler point of
view.

Signed-off-by: Denis V. Lunev <den@openvz.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/fib_trie.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index 9126eea7167..a842204df7b 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -299,10 +299,10 @@ static inline void check_tnode(const struct tnode *tn)
 	WARN_ON(tn && tn->pos+tn->bits > 32);
 }
 
-static int halve_threshold = 25;
-static int inflate_threshold = 50;
-static int halve_threshold_root = 8;
-static int inflate_threshold_root = 15;
+static const int halve_threshold = 25;
+static const int inflate_threshold = 50;
+static const int halve_threshold_root = 8;
+static const int inflate_threshold_root = 15;
 
 
 static void __alias_free_mem(struct rcu_head *head)
-- 
cgit v1.2.3


From 9cb2345a8c49ea380437d02bb9fd9f291c0a005d Mon Sep 17 00:00:00 2001
From: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Date: Thu, 13 Dec 2007 23:31:14 -0200
Subject: [DCCP]: Documentation for CCID operations

This adds documentation for the ccid_operations structure.

Signed-off-by: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Signed-off-by: Ian McDonald <ian.mcdonald@jandi.co.nz>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/dccp/ccid.h | 35 ++++++++++++++++++++++++++++-------
 1 file changed, 28 insertions(+), 7 deletions(-)

(limited to 'net')

diff --git a/net/dccp/ccid.h b/net/dccp/ccid.h
index c65cb2453e4..e3cdd8afc80 100644
--- a/net/dccp/ccid.h
+++ b/net/dccp/ccid.h
@@ -23,14 +23,35 @@
 
 struct tcp_info;
 
+/**
+ *  struct ccid_operations  -  Interface to Congestion-Control Infrastructure
+ *
+ *  @ccid_id: numerical CCID ID (up to %CCID_MAX, cf. table 5 in RFC 4340, 10.)
+ *  @ccid_name: alphabetical identifier string for @ccid_id
+ *  @ccid_owner: module which implements/owns this CCID
+ *  @ccid_hc_{r,t}x_slab: memory pool for the receiver/sender half-connection
+ *  @ccid_hc_{r,t}x_obj_size: size of the receiver/sender half-connection socket
+ *
+ *  @ccid_hc_{r,t}x_init: CCID-specific initialisation routine (before startup)
+ *  @ccid_hc_{r,t}x_exit: CCID-specific cleanup routine (before destruction)
+ *  @ccid_hc_rx_packet_recv: implements the HC-receiver side
+ *  @ccid_hc_{r,t}x_parse_options: parsing routine for CCID/HC-specific options
+ *  @ccid_hc_{r,t}x_insert_options: insert routine for CCID/HC-specific options
+ *  @ccid_hc_tx_packet_recv: implements feedback processing for the HC-sender
+ *  @ccid_hc_tx_send_packet: implements the sending part of the HC-sender
+ *  @ccid_hc_tx_packet_sent: does accounting for packets in flight by HC-sender
+ *  @ccid_hc_{r,t}x_get_info: INET_DIAG information for HC-receiver/sender
+ *  @ccid_hc_{r,t}x_getsockopt: socket options specific to HC-receiver/sender
+ */
 struct ccid_operations {
-	unsigned char	ccid_id;
-	const char	*ccid_name;
-	struct module	*ccid_owner;
-	struct kmem_cache	*ccid_hc_rx_slab;
-	__u32		ccid_hc_rx_obj_size;
-	struct kmem_cache	*ccid_hc_tx_slab;
-	__u32		ccid_hc_tx_obj_size;
+	unsigned char		ccid_id;
+	const char		*ccid_name;
+	struct module		*ccid_owner;
+	struct kmem_cache	*ccid_hc_rx_slab,
+				*ccid_hc_tx_slab;
+	__u32			ccid_hc_rx_obj_size,
+				ccid_hc_tx_obj_size;
+	/* Interface Routines */
 	int		(*ccid_hc_rx_init)(struct ccid *ccid, struct sock *sk);
 	int		(*ccid_hc_tx_init)(struct ccid *ccid, struct sock *sk);
 	void		(*ccid_hc_rx_exit)(struct sock *sk);
-- 
cgit v1.2.3


From 84a97b0af8c29aa5a47cc5271968a9c6004fb91e Mon Sep 17 00:00:00 2001
From: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Date: Thu, 13 Dec 2007 23:33:25 -0200
Subject: [CCID]: More informative registration

The patch makes the registration messages of CCID 2/3 a bit more
informative: instead of repeating the CCID number as currently done,

        "CCID: Registered CCID 2 (ccid2)"  or
        "CCID: Registered CCID 3 (ccid3)",

the descriptive names of the CCID's (from RFCs) are now used:

	"CCID: Registered CCID 2 (TCP-like)" and
	"CCID: Registered CCID 3 (TCP-Friendly Rate Control)".

To allow spaces in the name, the slab name string has been changed to
refer to the numeric CCID identifier, using the same format as before.

Signed-off-by: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Signed-off-by: Ian McDonald <ian.mcdonald@jandi.co.nz>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/dccp/ccid.c        | 8 ++++----
 net/dccp/ccids/ccid2.c | 2 +-
 net/dccp/ccids/ccid3.c | 2 +-
 3 files changed, 6 insertions(+), 6 deletions(-)

(limited to 'net')

diff --git a/net/dccp/ccid.c b/net/dccp/ccid.c
index c45088b5e6f..4809753d12a 100644
--- a/net/dccp/ccid.c
+++ b/net/dccp/ccid.c
@@ -92,15 +92,15 @@ int ccid_register(struct ccid_operations *ccid_ops)
 
 	ccid_ops->ccid_hc_rx_slab =
 			ccid_kmem_cache_create(ccid_ops->ccid_hc_rx_obj_size,
-					       "%s_hc_rx_sock",
-					       ccid_ops->ccid_name);
+					       "ccid%u_hc_rx_sock",
+					       ccid_ops->ccid_id);
 	if (ccid_ops->ccid_hc_rx_slab == NULL)
 		goto out;
 
 	ccid_ops->ccid_hc_tx_slab =
 			ccid_kmem_cache_create(ccid_ops->ccid_hc_tx_obj_size,
-					       "%s_hc_tx_sock",
-					       ccid_ops->ccid_name);
+					       "ccid%u_hc_tx_sock",
+					       ccid_ops->ccid_id);
 	if (ccid_ops->ccid_hc_tx_slab == NULL)
 		goto out_free_rx_slab;
 
diff --git a/net/dccp/ccids/ccid2.c b/net/dccp/ccids/ccid2.c
index 48eeb4494e8..b5b52ebb269 100644
--- a/net/dccp/ccids/ccid2.c
+++ b/net/dccp/ccids/ccid2.c
@@ -770,7 +770,7 @@ static void ccid2_hc_rx_packet_recv(struct sock *sk, struct sk_buff *skb)
 
 static struct ccid_operations ccid2 = {
 	.ccid_id		= DCCPC_CCID2,
-	.ccid_name		= "ccid2",
+	.ccid_name		= "TCP-like",
 	.ccid_owner		= THIS_MODULE,
 	.ccid_hc_tx_obj_size	= sizeof(struct ccid2_hc_tx_sock),
 	.ccid_hc_tx_init	= ccid2_hc_tx_init,
diff --git a/net/dccp/ccids/ccid3.c b/net/dccp/ccids/ccid3.c
index 8f112d18d45..cd9b9ffe2ec 100644
--- a/net/dccp/ccids/ccid3.c
+++ b/net/dccp/ccids/ccid3.c
@@ -943,7 +943,7 @@ static int ccid3_hc_rx_getsockopt(struct sock *sk, const int optname, int len,
 
 static struct ccid_operations ccid3 = {
 	.ccid_id		   = DCCPC_CCID3,
-	.ccid_name		   = "ccid3",
+	.ccid_name		   = "TCP-Friendly Rate Control",
 	.ccid_owner		   = THIS_MODULE,
 	.ccid_hc_tx_obj_size	   = sizeof(struct ccid3_hc_tx_sock),
 	.ccid_hc_tx_init	   = ccid3_hc_tx_init,
-- 
cgit v1.2.3


From 6179983ad30c43313e153b35af52bd9ebd7745c3 Mon Sep 17 00:00:00 2001
From: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Date: Thu, 13 Dec 2007 23:37:55 -0200
Subject: [DCCP]: Introducing CCMPS

This introduces a CCMPS field for setting a CCID-specific upper bound on the application payload
size, as is defined in RFC 4340, section 14.

Only the TX CCID is considered in setting this limit, since the RX CCID generates comparatively
small (DCCP-Ack) feedback packets. The CCMPS field includes network and transport layer header
lengths. The only current CCMPS customer is CCID4 (via RFC 4828).

A wrapper is used to allow querying the CCMPS even at times where the CCID modules may not have
been fully negotiated yet.

In dccp_sync_mss() the variable `mss_now' has been renamed into `cur_mps', to reflect that we are
dealing with an MPS, but not an MSS.
Since the DCCP code closely follows the TCP code, the identifiers `dccp_sync_mss' and
`dccps_mss_cache' have been kept, as they have direct TCP counterparts.

Signed-off-by: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Signed-off-by: Ian McDonald <ian.mcdonald@jandi.co.nz>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/dccp/ccid.h   |  2 ++
 net/dccp/output.c | 30 +++++++++++++++++++++++-------
 2 files changed, 25 insertions(+), 7 deletions(-)

(limited to 'net')

diff --git a/net/dccp/ccid.h b/net/dccp/ccid.h
index e3cdd8afc80..fdeae7b5731 100644
--- a/net/dccp/ccid.h
+++ b/net/dccp/ccid.h
@@ -27,6 +27,7 @@ struct tcp_info;
  *  struct ccid_operations  -  Interface to Congestion-Control Infrastructure
  *
  *  @ccid_id: numerical CCID ID (up to %CCID_MAX, cf. table 5 in RFC 4340, 10.)
+ *  @ccid_ccmps: the CCMPS including network/transport headers (0 when disabled)
  *  @ccid_name: alphabetical identifier string for @ccid_id
  *  @ccid_owner: module which implements/owns this CCID
  *  @ccid_hc_{r,t}x_slab: memory pool for the receiver/sender half-connection
@@ -45,6 +46,7 @@ struct tcp_info;
  */
 struct ccid_operations {
 	unsigned char		ccid_id;
+	__u32			ccid_ccmps;
 	const char		*ccid_name;
 	struct module		*ccid_owner;
 	struct kmem_cache	*ccid_hc_rx_slab,
diff --git a/net/dccp/output.c b/net/dccp/output.c
index 5589a5e581f..3b763db3d86 100644
--- a/net/dccp/output.c
+++ b/net/dccp/output.c
@@ -133,15 +133,31 @@ static int dccp_transmit_skb(struct sock *sk, struct sk_buff *skb)
 	return -ENOBUFS;
 }
 
+/**
+ * dccp_determine_ccmps  -  Find out about CCID-specfic packet-size limits
+ * We only consider the HC-sender CCID for setting the CCMPS (RFC 4340, 14.),
+ * since the RX CCID is restricted to feedback packets (Acks), which are small
+ * in comparison with the data traffic. A value of 0 means "no current CCMPS".
+ */
+static u32 dccp_determine_ccmps(const struct dccp_sock *dp)
+{
+	const struct ccid *tx_ccid = dp->dccps_hc_tx_ccid;
+
+	if (tx_ccid == NULL || tx_ccid->ccid_ops == NULL)
+		return 0;
+	return tx_ccid->ccid_ops->ccid_ccmps;
+}
+
 unsigned int dccp_sync_mss(struct sock *sk, u32 pmtu)
 {
 	struct inet_connection_sock *icsk = inet_csk(sk);
 	struct dccp_sock *dp = dccp_sk(sk);
-	int mss_now = (pmtu - icsk->icsk_af_ops->net_header_len -
-		       sizeof(struct dccp_hdr) - sizeof(struct dccp_hdr_ext));
+	u32 ccmps = dccp_determine_ccmps(dp);
+	int cur_mps = ccmps ? min(pmtu, ccmps) : pmtu;
 
-	/* Now subtract optional transport overhead */
-	mss_now -= icsk->icsk_ext_hdr_len;
+	/* Account for header lengths and IPv4/v6 option overhead */
+	cur_mps -= (icsk->icsk_af_ops->net_header_len + icsk->icsk_ext_hdr_len +
+		    sizeof(struct dccp_hdr) + sizeof(struct dccp_hdr_ext));
 
 	/*
 	 * FIXME: this should come from the CCID infrastructure, where, say,
@@ -151,13 +167,13 @@ unsigned int dccp_sync_mss(struct sock *sk, u32 pmtu)
 	 * make it a multiple of 4
 	 */
 
-	mss_now -= ((5 + 6 + 10 + 6 + 6 + 6 + 3) / 4) * 4;
+	cur_mps -= ((5 + 6 + 10 + 6 + 6 + 6 + 3) / 4) * 4;
 
 	/* And store cached results */
 	icsk->icsk_pmtu_cookie = pmtu;
-	dp->dccps_mss_cache = mss_now;
+	dp->dccps_mss_cache = cur_mps;
 
-	return mss_now;
+	return cur_mps;
 }
 
 EXPORT_SYMBOL_GPL(dccp_sync_mss);
-- 
cgit v1.2.3


From 91c5ec3ed184b7ca08314457b6e202e3d2cfb65f Mon Sep 17 00:00:00 2001
From: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
Date: Wed, 12 Dec 2007 03:51:03 +0900
Subject: [BRIDGE]: Use cpu_to_be16() where appropriate.

Signed-off-by: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/bridge/br_input.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/bridge/br_input.c b/net/bridge/br_input.c
index 0ee79a726d9..255c00f60ce 100644
--- a/net/bridge/br_input.c
+++ b/net/bridge/br_input.c
@@ -109,7 +109,7 @@ static inline int is_link_local(const unsigned char *dest)
 {
 	__be16 *a = (__be16 *)dest;
 	static const __be16 *b = (const __be16 *)br_group_address;
-	static const __be16 m = __constant_cpu_to_be16(0xfff0);
+	static const __be16 m = cpu_to_be16(0xfff0);
 
 	return ((a[0] ^ b[0]) | (a[1] ^ b[1]) | ((a[2] ^ b[2]) & m)) == 0;
 }
-- 
cgit v1.2.3


From b98999dc382a4e59a250f2ac9e32beca668cba0b Mon Sep 17 00:00:00 2001
From: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
Date: Wed, 12 Dec 2007 03:51:49 +0900
Subject: [DECNET]: Use htons() where appropriate.

Signed-off-by: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/decnet/af_decnet.c  | 2 +-
 net/decnet/dn_nsp_out.c | 2 +-
 net/decnet/dn_route.c   | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/decnet/af_decnet.c b/net/decnet/af_decnet.c
index 57d57495183..acd48ee522d 100644
--- a/net/decnet/af_decnet.c
+++ b/net/decnet/af_decnet.c
@@ -1904,7 +1904,7 @@ static inline struct sk_buff *dn_alloc_send_pskb(struct sock *sk,
 	struct sk_buff *skb = sock_alloc_send_skb(sk, datalen,
 						   noblock, errcode);
 	if (skb) {
-		skb->protocol = __constant_htons(ETH_P_DNA_RT);
+		skb->protocol = htons(ETH_P_DNA_RT);
 		skb->pkt_type = PACKET_OUTGOING;
 	}
 	return skb;
diff --git a/net/decnet/dn_nsp_out.c b/net/decnet/dn_nsp_out.c
index 7404653880b..1964faf203e 100644
--- a/net/decnet/dn_nsp_out.c
+++ b/net/decnet/dn_nsp_out.c
@@ -124,7 +124,7 @@ struct sk_buff *dn_alloc_skb(struct sock *sk, int size, gfp_t pri)
 	if ((skb = alloc_skb(size + hdr, pri)) == NULL)
 		return NULL;
 
-	skb->protocol = __constant_htons(ETH_P_DNA_RT);
+	skb->protocol = htons(ETH_P_DNA_RT);
 	skb->pkt_type = PACKET_OUTGOING;
 
 	if (sk)
diff --git a/net/decnet/dn_route.c b/net/decnet/dn_route.c
index b712d0b3652..1ae5efcdbd5 100644
--- a/net/decnet/dn_route.c
+++ b/net/decnet/dn_route.c
@@ -1551,7 +1551,7 @@ static int dn_cache_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh, void
 			kfree_skb(skb);
 			return -ENODEV;
 		}
-		skb->protocol = __constant_htons(ETH_P_DNA_RT);
+		skb->protocol = htons(ETH_P_DNA_RT);
 		skb->dev = dev;
 		cb->src = fl.fld_src;
 		cb->dst = fl.fld_dst;
-- 
cgit v1.2.3


From 4e39430ae33dfd9b0f47e5a89028352382c3dd34 Mon Sep 17 00:00:00 2001
From: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
Date: Wed, 12 Dec 2007 03:52:26 +0900
Subject: [IEEE80211]: Use htons() where appropriate.

Signed-off-by: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ieee80211/ieee80211_rx.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/ieee80211/ieee80211_rx.c b/net/ieee80211/ieee80211_rx.c
index 21c0fadde03..13b12a67019 100644
--- a/net/ieee80211/ieee80211_rx.c
+++ b/net/ieee80211/ieee80211_rx.c
@@ -45,7 +45,7 @@ static void ieee80211_monitor_rx(struct ieee80211_device *ieee,
 	skb_reset_mac_header(skb);
 	skb_pull(skb, ieee80211_get_hdrlen(fc));
 	skb->pkt_type = PACKET_OTHERHOST;
-	skb->protocol = __constant_htons(ETH_P_80211_RAW);
+	skb->protocol = htons(ETH_P_80211_RAW);
 	memset(skb->cb, 0, sizeof(skb->cb));
 	netif_rx(skb);
 }
@@ -800,7 +800,7 @@ int ieee80211_rx(struct ieee80211_device *ieee, struct sk_buff *skb,
 	if (skb2 != NULL) {
 		/* send to wireless media */
 		skb2->dev = dev;
-		skb2->protocol = __constant_htons(ETH_P_802_3);
+		skb2->protocol = htons(ETH_P_802_3);
 		skb_reset_mac_header(skb2);
 		skb_reset_network_header(skb2);
 		/* skb2->network_header += ETH_HLEN; */
-- 
cgit v1.2.3


From 5661df7b6cc399cabd1ed6b6a929a0445b862c10 Mon Sep 17 00:00:00 2001
From: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
Date: Wed, 12 Dec 2007 03:53:11 +0900
Subject: [IPVS]: Use htons() where appropriate.

Signed-off-by: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/ipvs/ip_vs_proto.c     |  2 +-
 net/ipv4/ipvs/ip_vs_proto_esp.c | 16 ++++++++--------
 2 files changed, 9 insertions(+), 9 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/ipvs/ip_vs_proto.c b/net/ipv4/ipvs/ip_vs_proto.c
index c0e11ec8f0f..dde28a250d9 100644
--- a/net/ipv4/ipvs/ip_vs_proto.c
+++ b/net/ipv4/ipvs/ip_vs_proto.c
@@ -165,7 +165,7 @@ ip_vs_tcpudp_debug_packet(struct ip_vs_protocol *pp,
 	ih = skb_header_pointer(skb, offset, sizeof(_iph), &_iph);
 	if (ih == NULL)
 		sprintf(buf, "%s TRUNCATED", pp->name);
-	else if (ih->frag_off & __constant_htons(IP_OFFSET))
+	else if (ih->frag_off & htons(IP_OFFSET))
 		sprintf(buf, "%s %u.%u.%u.%u->%u.%u.%u.%u frag",
 			pp->name, NIPQUAD(ih->saddr),
 			NIPQUAD(ih->daddr));
diff --git a/net/ipv4/ipvs/ip_vs_proto_esp.c b/net/ipv4/ipvs/ip_vs_proto_esp.c
index c36ccf057a1..aef0d3ee8e4 100644
--- a/net/ipv4/ipvs/ip_vs_proto_esp.c
+++ b/net/ipv4/ipvs/ip_vs_proto_esp.c
@@ -52,15 +52,15 @@ esp_conn_in_get(const struct sk_buff *skb,
 	if (likely(!inverse)) {
 		cp = ip_vs_conn_in_get(IPPROTO_UDP,
 				       iph->saddr,
-				       __constant_htons(PORT_ISAKMP),
+				       htons(PORT_ISAKMP),
 				       iph->daddr,
-				       __constant_htons(PORT_ISAKMP));
+				       htons(PORT_ISAKMP));
 	} else {
 		cp = ip_vs_conn_in_get(IPPROTO_UDP,
 				       iph->daddr,
-				       __constant_htons(PORT_ISAKMP),
+				       htons(PORT_ISAKMP),
 				       iph->saddr,
-				       __constant_htons(PORT_ISAKMP));
+				       htons(PORT_ISAKMP));
 	}
 
 	if (!cp) {
@@ -89,15 +89,15 @@ esp_conn_out_get(const struct sk_buff *skb, struct ip_vs_protocol *pp,
 	if (likely(!inverse)) {
 		cp = ip_vs_conn_out_get(IPPROTO_UDP,
 					iph->saddr,
-					__constant_htons(PORT_ISAKMP),
+					htons(PORT_ISAKMP),
 					iph->daddr,
-					__constant_htons(PORT_ISAKMP));
+					htons(PORT_ISAKMP));
 	} else {
 		cp = ip_vs_conn_out_get(IPPROTO_UDP,
 					iph->daddr,
-					__constant_htons(PORT_ISAKMP),
+					htons(PORT_ISAKMP),
 					iph->saddr,
-					__constant_htons(PORT_ISAKMP));
+					htons(PORT_ISAKMP));
 	}
 
 	if (!cp) {
-- 
cgit v1.2.3


From 6d82de9e576ea84b549b927e6a7775d918a427b5 Mon Sep 17 00:00:00 2001
From: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
Date: Wed, 12 Dec 2007 03:53:26 +0900
Subject: [IRDA]: Use htons() where appropriate.

Signed-off-by: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/irda/iriap.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/irda/iriap.c b/net/irda/iriap.c
index a86a5d83786..390a790886e 100644
--- a/net/irda/iriap.c
+++ b/net/irda/iriap.c
@@ -579,7 +579,7 @@ static void iriap_getvaluebyclass_response(struct iriap_cb *self,
 	fp[n++] = ret_code;
 
 	/* Insert list length (MSB first) */
-	tmp_be16 = __constant_htons(0x0001);
+	tmp_be16 = htons(0x0001);
 	memcpy(fp+n, &tmp_be16, 2);  n += 2;
 
 	/* Insert object identifier ( MSB first) */
-- 
cgit v1.2.3


From f831e90971dc942a9f2fcc918a5739eb6d4ef4c5 Mon Sep 17 00:00:00 2001
From: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
Date: Wed, 12 Dec 2007 03:54:23 +0900
Subject: [MAC80211]: Use htons() where appropriate.

Signed-off-by: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/mac80211/rx.c  | 2 +-
 net/mac80211/wme.c | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index 0020e409e57..c542fc1c983 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -1193,7 +1193,7 @@ ieee80211_deliver_skb(struct ieee80211_txrx_data *rx)
 
 	if (xmit_skb) {
 		/* send to wireless media */
-		xmit_skb->protocol = __constant_htons(ETH_P_802_3);
+		xmit_skb->protocol = htons(ETH_P_802_3);
 		skb_set_network_header(xmit_skb, 0);
 		skb_set_mac_header(xmit_skb, 0);
 		dev_queue_xmit(xmit_skb);
diff --git a/net/mac80211/wme.c b/net/mac80211/wme.c
index 8dbdededdec..455fadc3d84 100644
--- a/net/mac80211/wme.c
+++ b/net/mac80211/wme.c
@@ -55,7 +55,7 @@ static inline unsigned classify_1d(struct sk_buff *skb, struct Qdisc *qd)
 
 	/* check there is a valid IP header present */
 	offset = ieee80211_get_hdrlen_from_skb(skb) + 8 /* LLC + proto */;
-	if (skb->protocol != __constant_htons(ETH_P_IP) ||
+	if (skb->protocol != htons(ETH_P_IP) ||
 	    skb->len < offset + sizeof(*ip))
 		return 0;
 
-- 
cgit v1.2.3


From ae445d172ab4d342a0a9d64df499cca8d5ad61b3 Mon Sep 17 00:00:00 2001
From: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
Date: Wed, 12 Dec 2007 03:55:22 +0900
Subject: [RXRPC]: Use cpu_to_be32() where appropriate.

Signed-off-by: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/rxrpc/ar-connection.c | 2 +-
 net/rxrpc/ar-input.c      | 4 ++--
 net/rxrpc/rxkad.c         | 4 ++--
 3 files changed, 5 insertions(+), 5 deletions(-)

(limited to 'net')

diff --git a/net/rxrpc/ar-connection.c b/net/rxrpc/ar-connection.c
index d6667f7bc85..3869a586675 100644
--- a/net/rxrpc/ar-connection.c
+++ b/net/rxrpc/ar-connection.c
@@ -651,7 +651,7 @@ rxrpc_incoming_connection(struct rxrpc_transport *trans,
 
 	candidate->trans = trans;
 	candidate->epoch = hdr->epoch;
-	candidate->cid = hdr->cid & __constant_cpu_to_be32(RXRPC_CIDMASK);
+	candidate->cid = hdr->cid & cpu_to_be32(RXRPC_CIDMASK);
 	candidate->service_id = hdr->serviceId;
 	candidate->security_ix = hdr->securityIndex;
 	candidate->in_clientflag = RXRPC_CLIENT_INITIATED;
diff --git a/net/rxrpc/ar-input.c b/net/rxrpc/ar-input.c
index f446d9b9925..f8a699e9296 100644
--- a/net/rxrpc/ar-input.c
+++ b/net/rxrpc/ar-input.c
@@ -595,7 +595,7 @@ dead_call:
 	read_unlock_bh(&conn->lock);
 
 	if (sp->hdr.flags & RXRPC_CLIENT_INITIATED &&
-	    sp->hdr.seq == __constant_cpu_to_be32(1)) {
+	    sp->hdr.seq == cpu_to_be32(1)) {
 		_debug("incoming call");
 		skb_queue_tail(&conn->trans->local->accept_queue, skb);
 		rxrpc_queue_work(&conn->trans->local->acceptor);
@@ -774,7 +774,7 @@ cant_route_call:
 	_debug("can't route call");
 	if (sp->hdr.flags & RXRPC_CLIENT_INITIATED &&
 	    sp->hdr.type == RXRPC_PACKET_TYPE_DATA) {
-		if (sp->hdr.seq == __constant_cpu_to_be32(1)) {
+		if (sp->hdr.seq == cpu_to_be32(1)) {
 			_debug("first packet");
 			skb_queue_tail(&local->accept_queue, skb);
 			rxrpc_queue_work(&local->acceptor);
diff --git a/net/rxrpc/rxkad.c b/net/rxrpc/rxkad.c
index 8e69d699383..f48434adb7c 100644
--- a/net/rxrpc/rxkad.c
+++ b/net/rxrpc/rxkad.c
@@ -284,7 +284,7 @@ static int rxkad_secure_packet(const struct rxrpc_call *call,
 
 	/* calculate the security checksum */
 	x = htonl(call->channel << (32 - RXRPC_CIDSHIFT));
-	x |= sp->hdr.seq & __constant_cpu_to_be32(0x3fffffff);
+	x |= sp->hdr.seq & cpu_to_be32(0x3fffffff);
 	tmpbuf.x[0] = sp->hdr.callNumber;
 	tmpbuf.x[1] = x;
 
@@ -518,7 +518,7 @@ static int rxkad_verify_packet(const struct rxrpc_call *call,
 
 	/* validate the security checksum */
 	x = htonl(call->channel << (32 - RXRPC_CIDSHIFT));
-	x |= sp->hdr.seq & __constant_cpu_to_be32(0x3fffffff);
+	x |= sp->hdr.seq & cpu_to_be32(0x3fffffff);
 	tmpbuf.x[0] = call->call_id;
 	tmpbuf.x[1] = x;
 
-- 
cgit v1.2.3


From 8d614434ab77b440b69e66a9bd44e46e7194c34a Mon Sep 17 00:00:00 2001
From: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
Date: Wed, 12 Dec 2007 03:55:42 +0900
Subject: [SUNRPC]: Use htonl() where appropriate.

Signed-off-by: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sunrpc/xprtrdma/rpc_rdma.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/sunrpc/xprtrdma/rpc_rdma.c b/net/sunrpc/xprtrdma/rpc_rdma.c
index ee8de7af2a5..1aa1580cda6 100644
--- a/net/sunrpc/xprtrdma/rpc_rdma.c
+++ b/net/sunrpc/xprtrdma/rpc_rdma.c
@@ -380,7 +380,7 @@ rpcrdma_marshal_req(struct rpc_rqst *rqst)
 	headerp->rm_xid = rqst->rq_xid;
 	headerp->rm_vers = xdr_one;
 	headerp->rm_credit = htonl(r_xprt->rx_buf.rb_max_requests);
-	headerp->rm_type = __constant_htonl(RDMA_MSG);
+	headerp->rm_type = htonl(RDMA_MSG);
 
 	/*
 	 * Chunks needed for results?
@@ -458,11 +458,11 @@ rpcrdma_marshal_req(struct rpc_rqst *rqst)
 						RPCRDMA_INLINE_PAD_VALUE(rqst));
 
 		if (padlen) {
-			headerp->rm_type = __constant_htonl(RDMA_MSGP);
+			headerp->rm_type = htonl(RDMA_MSGP);
 			headerp->rm_body.rm_padded.rm_align =
 				htonl(RPCRDMA_INLINE_PAD_VALUE(rqst));
 			headerp->rm_body.rm_padded.rm_thresh =
-				__constant_htonl(RPCRDMA_INLINE_PAD_THRESH);
+				htonl(RPCRDMA_INLINE_PAD_THRESH);
 			headerp->rm_body.rm_padded.rm_pempty[0] = xdr_zero;
 			headerp->rm_body.rm_padded.rm_pempty[1] = xdr_zero;
 			headerp->rm_body.rm_padded.rm_pempty[2] = xdr_zero;
-- 
cgit v1.2.3


From 9055e051b8d4b266054fe511a65a9888d30fa64f Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Fri, 14 Dec 2007 11:25:26 -0800
Subject: [UDP]: Move udp_stats_in6 into net/ipv4/udp.c

Now that external users may increment the counters directly, we need
to ensure that udp_stats_in6 is always available.  Otherwise we'd
either have to requrie the external users to be built as modules or
ipv6 to be built-in.

This isn't too bad because udp_stats_in6 is just a pair of pointers
plus an EXPORT, e.g., just 40 (16 + 24) bytes on x86-64.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/udp.c | 3 +++
 net/ipv6/udp.c | 3 ---
 2 files changed, 3 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 9ed6393c65d..3d602158565 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -112,6 +112,9 @@
 DEFINE_SNMP_STAT(struct udp_mib, udp_statistics) __read_mostly;
 EXPORT_SYMBOL(udp_statistics);
 
+DEFINE_SNMP_STAT(struct udp_mib, udp_stats_in6) __read_mostly;
+EXPORT_SYMBOL(udp_stats_in6);
+
 struct hlist_head udp_hash[UDP_HTABLE_SIZE];
 DEFINE_RWLOCK(udp_hash_lock);
 
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index 8b3e6d61bf5..c9a97b40551 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -51,9 +51,6 @@
 #include <linux/seq_file.h>
 #include "udp_impl.h"
 
-DEFINE_SNMP_STAT(struct udp_mib, udp_stats_in6) __read_mostly;
-EXPORT_SYMBOL(udp_stats_in6);
-
 static inline int udp_v6_get_port(struct sock *sk, unsigned short snum)
 {
 	return udp_get_port(sk, snum, ipv6_rcv_saddr_equal);
-- 
cgit v1.2.3


From 4bda4f250d21c3e4f2a2da5f4cef829a434a4046 Mon Sep 17 00:00:00 2001
From: Pavel Emelyanov <xemul@openvz.org>
Date: Fri, 14 Dec 2007 11:38:04 -0800
Subject: [XFRM]: Fix potential race vs xfrm_state(only)_find and
 xfrm_hash_resize.

The _find calls calculate the hash value using the
xfrm_state_hmask, without the xfrm_state_lock. But the
value of this mask can change in the _resize call under
the state_lock, so we risk to fail in finding the desired
entry in hash.

I think, that the hash value is better to calculate
under the state lock.

Signed-off-by: Pavel Emelyanov <xemul@openvz.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/xfrm/xfrm_state.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c
index b2343d48fe9..f7c0951c9fd 100644
--- a/net/xfrm/xfrm_state.c
+++ b/net/xfrm/xfrm_state.c
@@ -756,7 +756,7 @@ xfrm_state_find(xfrm_address_t *daddr, xfrm_address_t *saddr,
 		struct xfrm_policy *pol, int *err,
 		unsigned short family)
 {
-	unsigned int h = xfrm_dst_hash(daddr, saddr, tmpl->reqid, family);
+	unsigned int h;
 	struct hlist_node *entry;
 	struct xfrm_state *x, *x0;
 	int acquire_in_progress = 0;
@@ -764,6 +764,7 @@ xfrm_state_find(xfrm_address_t *daddr, xfrm_address_t *saddr,
 	struct xfrm_state *best = NULL;
 
 	spin_lock_bh(&xfrm_state_lock);
+	h = xfrm_dst_hash(daddr, saddr, tmpl->reqid, family);
 	hlist_for_each_entry(x, entry, xfrm_state_bydst+h, bydst) {
 		if (x->props.family == family &&
 		    x->props.reqid == tmpl->reqid &&
@@ -865,11 +866,12 @@ struct xfrm_state *
 xfrm_stateonly_find(xfrm_address_t *daddr, xfrm_address_t *saddr,
 		    unsigned short family, u8 mode, u8 proto, u32 reqid)
 {
-	unsigned int h = xfrm_dst_hash(daddr, saddr, reqid, family);
+	unsigned int h;
 	struct xfrm_state *rx = NULL, *x = NULL;
 	struct hlist_node *entry;
 
 	spin_lock(&xfrm_state_lock);
+	h = xfrm_dst_hash(daddr, saddr, reqid, family);
 	hlist_for_each_entry(x, entry, xfrm_state_bydst+h, bydst) {
 		if (x->props.family == family &&
 		    x->props.reqid == reqid &&
-- 
cgit v1.2.3


From ea40b324d791d86a42b858d073007de4d133227e Mon Sep 17 00:00:00 2001
From: Pavel Emelyanov <xemul@openvz.org>
Date: Sun, 16 Dec 2007 13:30:07 -0800
Subject: [IPV4]: Make __devinet_sysctl_register return an error

Currently, this function is void, so failures in creating
sysctls for new/renamed devices are not reported to anywhere.

Fixing this is another complex (needed?) task, but this
return value is needed during the namespaces creation to
handle the case, when we failed to create "all" and "default"
entries.

Signed-off-by: Pavel Emelyanov <xemul@openvz.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/devinet.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index 2dc2f7ed6bb..82def2c1c65 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -1454,8 +1454,8 @@ static struct devinet_sysctl_table {
 	},
 };
 
-static void __devinet_sysctl_register(char *dev_name, int ctl_name,
-		struct ipv4_devconf *p)
+static int __devinet_sysctl_register(struct net *net, char *dev_name,
+		int ctl_name, struct ipv4_devconf *p)
 {
 	int i;
 	struct devinet_sysctl_table *t;
@@ -1497,14 +1497,14 @@ static void __devinet_sysctl_register(char *dev_name, int ctl_name,
 		goto free_procname;
 
 	p->sysctl = t;
-	return;
+	return 0;
 
 free_procname:
 	kfree(t->dev_name);
 free:
 	kfree(t);
 out:
-	return;
+	return -ENOBUFS;
 }
 
 static void __devinet_sysctl_unregister(struct ipv4_devconf *cnf)
-- 
cgit v1.2.3


From 32e569b7277f13c4b27bb29c761189963e49ce7a Mon Sep 17 00:00:00 2001
From: Pavel Emelyanov <xemul@openvz.org>
Date: Sun, 16 Dec 2007 13:30:39 -0800
Subject: [IPV4]: Pass the net pointer to the arp_req_set_proxy()

This one will need to set the IPV4_DEVCONF_ALL(PROXY_ARP), but
there's no ways to get the net right in place, so we have to
pull one from the inet_ioctl's struct sock.

Signed-off-by: Pavel Emelyanov <xemul@openvz.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/af_inet.c |  2 +-
 net/ipv4/arp.c     | 28 ++++++++++++++++------------
 2 files changed, 17 insertions(+), 13 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index 5089a369e99..03633b7b9b4 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -798,7 +798,7 @@ int inet_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
 		case SIOCDARP:
 		case SIOCGARP:
 		case SIOCSARP:
-			err = arp_ioctl(cmd, (void __user *)arg);
+			err = arp_ioctl(sk->sk_net, cmd, (void __user *)arg);
 			break;
 		case SIOCGIFADDR:
 		case SIOCSIFADDR:
diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c
index 477f3e5fe8d..5daf504ba3b 100644
--- a/net/ipv4/arp.c
+++ b/net/ipv4/arp.c
@@ -952,7 +952,7 @@ out_of_mem:
  *	Set (create) an ARP cache entry.
  */
 
-static int arp_req_set_proxy(struct net_device *dev, int on)
+static int arp_req_set_proxy(struct net *net, struct net_device *dev, int on)
 {
 	if (dev == NULL) {
 		IPV4_DEVCONF_ALL(PROXY_ARP) = on;
@@ -965,7 +965,8 @@ static int arp_req_set_proxy(struct net_device *dev, int on)
 	return -ENXIO;
 }
 
-static int arp_req_set_public(struct arpreq *r, struct net_device *dev)
+static int arp_req_set_public(struct net *net, struct arpreq *r,
+		struct net_device *dev)
 {
 	__be32 ip = ((struct sockaddr_in *)&r->arp_pa)->sin_addr.s_addr;
 	__be32 mask = ((struct sockaddr_in *)&r->arp_netmask)->sin_addr.s_addr;
@@ -984,17 +985,18 @@ static int arp_req_set_public(struct arpreq *r, struct net_device *dev)
 		return 0;
 	}
 
-	return arp_req_set_proxy(dev, 1);
+	return arp_req_set_proxy(net, dev, 1);
 }
 
-static int arp_req_set(struct arpreq *r, struct net_device * dev)
+static int arp_req_set(struct net *net, struct arpreq *r,
+		struct net_device * dev)
 {
 	__be32 ip;
 	struct neighbour *neigh;
 	int err;
 
 	if (r->arp_flags & ATF_PUBL)
-		return arp_req_set_public(r, dev);
+		return arp_req_set_public(net, r, dev);
 
 	ip = ((struct sockaddr_in *)&r->arp_pa)->sin_addr.s_addr;
 	if (r->arp_flags & ATF_PERM)
@@ -1080,7 +1082,8 @@ static int arp_req_get(struct arpreq *r, struct net_device *dev)
 	return err;
 }
 
-static int arp_req_delete_public(struct arpreq *r, struct net_device *dev)
+static int arp_req_delete_public(struct net *net, struct arpreq *r,
+		struct net_device *dev)
 {
 	__be32 ip = ((struct sockaddr_in *) &r->arp_pa)->sin_addr.s_addr;
 	__be32 mask = ((struct sockaddr_in *)&r->arp_netmask)->sin_addr.s_addr;
@@ -1091,17 +1094,18 @@ static int arp_req_delete_public(struct arpreq *r, struct net_device *dev)
 	if (mask)
 		return -EINVAL;
 
-	return arp_req_set_proxy(dev, 0);
+	return arp_req_set_proxy(net, dev, 0);
 }
 
-static int arp_req_delete(struct arpreq *r, struct net_device * dev)
+static int arp_req_delete(struct net *net, struct arpreq *r,
+		struct net_device * dev)
 {
 	int err;
 	__be32 ip;
 	struct neighbour *neigh;
 
 	if (r->arp_flags & ATF_PUBL)
-		return arp_req_delete_public(r, dev);
+		return arp_req_delete_public(net, r, dev);
 
 	ip = ((struct sockaddr_in *)&r->arp_pa)->sin_addr.s_addr;
 	if (dev == NULL) {
@@ -1131,7 +1135,7 @@ static int arp_req_delete(struct arpreq *r, struct net_device * dev)
  *	Handle an ARP layer I/O control request.
  */
 
-int arp_ioctl(unsigned int cmd, void __user *arg)
+int arp_ioctl(struct net *net, unsigned int cmd, void __user *arg)
 {
 	int err;
 	struct arpreq r;
@@ -1179,10 +1183,10 @@ int arp_ioctl(unsigned int cmd, void __user *arg)
 
 	switch (cmd) {
 	case SIOCDARP:
-		err = arp_req_delete(&r, dev);
+		err = arp_req_delete(net, &r, dev);
 		break;
 	case SIOCSARP:
-		err = arp_req_set(&r, dev);
+		err = arp_req_set(net, &r, dev);
 		break;
 	case SIOCGARP:
 		err = arp_req_get(&r, dev);
-- 
cgit v1.2.3


From c0ce9fb304871daa0dd2bf0efb1b241b9fa4a279 Mon Sep 17 00:00:00 2001
From: Pavel Emelyanov <xemul@openvz.org>
Date: Sun, 16 Dec 2007 13:31:14 -0800
Subject: [IPV4]: Store the net pointer on devinet's ctl tables

Some handers and strategies of devinet sysctl tables need
to know the net to propagate the ctl change to all the
net devices.

I use the (currently unused) extra2 pointer on the tables
to get it.

Holding the reference on the struct net is not possible,
because otherwise we'll get a net->ctl_table->net circular
dependency. But since the ctl tables are unregistered during
the net destruction, this is safe to get it w/o additional
protection.

Signed-off-by: Pavel Emelyanov <xemul@openvz.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/devinet.c | 29 ++++++++++++++++++-----------
 1 file changed, 18 insertions(+), 11 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index 82def2c1c65..8b2a44c4f82 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -1236,12 +1236,12 @@ errout:
 
 #ifdef CONFIG_SYSCTL
 
-static void devinet_copy_dflt_conf(int i)
+static void devinet_copy_dflt_conf(struct net *net, int i)
 {
 	struct net_device *dev;
 
 	read_lock(&dev_base_lock);
-	for_each_netdev(&init_net, dev) {
+	for_each_netdev(net, dev) {
 		struct in_device *in_dev;
 		rcu_read_lock();
 		in_dev = __in_dev_get_rcu(dev);
@@ -1252,7 +1252,7 @@ static void devinet_copy_dflt_conf(int i)
 	read_unlock(&dev_base_lock);
 }
 
-static void inet_forward_change(void)
+static void inet_forward_change(struct net *net)
 {
 	struct net_device *dev;
 	int on = IPV4_DEVCONF_ALL(FORWARDING);
@@ -1261,7 +1261,7 @@ static void inet_forward_change(void)
 	IPV4_DEVCONF_DFLT(FORWARDING) = on;
 
 	read_lock(&dev_base_lock);
-	for_each_netdev(&init_net, dev) {
+	for_each_netdev(net, dev) {
 		struct in_device *in_dev;
 		rcu_read_lock();
 		in_dev = __in_dev_get_rcu(dev);
@@ -1282,12 +1282,13 @@ static int devinet_conf_proc(ctl_table *ctl, int write,
 
 	if (write) {
 		struct ipv4_devconf *cnf = ctl->extra1;
+		struct net *net = ctl->extra2;
 		int i = (int *)ctl->data - cnf->data;
 
 		set_bit(i, cnf->state);
 
 		if (cnf == &ipv4_devconf_dflt)
-			devinet_copy_dflt_conf(i);
+			devinet_copy_dflt_conf(net, i);
 	}
 
 	return ret;
@@ -1298,6 +1299,7 @@ static int devinet_conf_sysctl(ctl_table *table, int __user *name, int nlen,
 			       void __user *newval, size_t newlen)
 {
 	struct ipv4_devconf *cnf;
+	struct net *net;
 	int *valp = table->data;
 	int new;
 	int i;
@@ -1333,12 +1335,13 @@ static int devinet_conf_sysctl(ctl_table *table, int __user *name, int nlen,
 	*valp = new;
 
 	cnf = table->extra1;
+	net = table->extra2;
 	i = (int *)table->data - cnf->data;
 
 	set_bit(i, cnf->state);
 
 	if (cnf == &ipv4_devconf_dflt)
-		devinet_copy_dflt_conf(i);
+		devinet_copy_dflt_conf(net, i);
 
 	return 1;
 }
@@ -1352,8 +1355,10 @@ static int devinet_sysctl_forward(ctl_table *ctl, int write,
 	int ret = proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
 
 	if (write && *valp != val) {
+		struct net *net = ctl->extra2;
+
 		if (valp == &IPV4_DEVCONF_ALL(FORWARDING))
-			inet_forward_change();
+			inet_forward_change(net);
 		else if (valp != &IPV4_DEVCONF_DFLT(FORWARDING))
 			rt_cache_flush(0);
 	}
@@ -1477,6 +1482,7 @@ static int __devinet_sysctl_register(struct net *net, char *dev_name,
 	for (i = 0; i < ARRAY_SIZE(t->devinet_vars) - 1; i++) {
 		t->devinet_vars[i].data += (char *)p - (char *)&ipv4_devconf;
 		t->devinet_vars[i].extra1 = p;
+		t->devinet_vars[i].extra2 = net;
 	}
 
 	/*
@@ -1524,8 +1530,8 @@ static void devinet_sysctl_register(struct in_device *idev)
 {
 	neigh_sysctl_register(idev->dev, idev->arp_parms, NET_IPV4,
 			NET_IPV4_NEIGH, "ipv4", NULL, NULL);
-	__devinet_sysctl_register(idev->dev->name, idev->dev->ifindex,
-			&idev->cnf);
+	__devinet_sysctl_register(idev->dev->nd_net, idev->dev->name,
+			idev->dev->ifindex, &idev->cnf);
 }
 
 static void devinet_sysctl_unregister(struct in_device *idev)
@@ -1546,6 +1552,7 @@ static struct ctl_table ctl_forward_entry[] = {
 		.proc_handler	= devinet_sysctl_forward,
 		.strategy	= devinet_conf_sysctl,
 		.extra1		= &ipv4_devconf,
+		.extra2		= &init_net,
 	},
 	{ },
 };
@@ -1565,9 +1572,9 @@ void __init devinet_init(void)
 	rtnl_register(PF_INET, RTM_DELADDR, inet_rtm_deladdr, NULL);
 	rtnl_register(PF_INET, RTM_GETADDR, NULL, inet_dump_ifaddr);
 #ifdef CONFIG_SYSCTL
-	__devinet_sysctl_register("all", NET_PROTO_CONF_ALL,
+	__devinet_sysctl_register(&init_net, "all", NET_PROTO_CONF_ALL,
 			&ipv4_devconf);
-	__devinet_sysctl_register("default", NET_PROTO_CONF_DEFAULT,
+	__devinet_sysctl_register(&init_net, "default", NET_PROTO_CONF_DEFAULT,
 			&ipv4_devconf_dflt);
 	register_sysctl_paths(net_ipv4_path, ctl_forward_entry);
 #endif
-- 
cgit v1.2.3


From 752d14dc6aa9d0fc8f3b25e5052596fb549e5157 Mon Sep 17 00:00:00 2001
From: Pavel Emelyanov <xemul@openvz.org>
Date: Sun, 16 Dec 2007 13:31:47 -0800
Subject: [IPV4]: Move the devinet pointers on the struct net

This is the core.

Add all and default pointers on the netns_ipv4 and register
a new pernet subsys to initialize them.

Also add the ctl_table_header to register the
net.ipv4.ip_forward ctl.

I don't allocate additional memory for init_net, but use
global devinets.

Signed-off-by: Pavel Emelyanov <xemul@openvz.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/devinet.c | 105 ++++++++++++++++++++++++++++++++++++++++++++++++-----
 1 file changed, 96 insertions(+), 9 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index 8b2a44c4f82..a2d48173828 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -62,6 +62,7 @@
 #include <net/route.h>
 #include <net/ip_fib.h>
 #include <net/rtnetlink.h>
+#include <net/net_namespace.h>
 
 struct ipv4_devconf ipv4_devconf = {
 	.data = {
@@ -1497,7 +1498,7 @@ static int __devinet_sysctl_register(struct net *net, char *dev_name,
 	devinet_ctl_path[DEVINET_CTL_PATH_DEV].procname = t->dev_name;
 	devinet_ctl_path[DEVINET_CTL_PATH_DEV].ctl_name = ctl_name;
 
-	t->sysctl_header = register_sysctl_paths(devinet_ctl_path,
+	t->sysctl_header = register_net_sysctl_table(net, devinet_ctl_path,
 			t->devinet_vars);
 	if (!t->sysctl_header)
 		goto free_procname;
@@ -1557,27 +1558,113 @@ static struct ctl_table ctl_forward_entry[] = {
 	{ },
 };
 
-static __initdata struct ctl_path net_ipv4_path[] = {
+static __net_initdata struct ctl_path net_ipv4_path[] = {
 	{ .procname = "net", .ctl_name = CTL_NET, },
 	{ .procname = "ipv4", .ctl_name = NET_IPV4, },
 	{ },
 };
 
+static __net_init int devinet_init_net(struct net *net)
+{
+	int err;
+	struct ctl_table *tbl;
+	struct ipv4_devconf *all, *dflt;
+	struct ctl_table_header *forw_hdr;
+
+	err = -ENOMEM;
+	all = &ipv4_devconf;
+	dflt = &ipv4_devconf_dflt;
+	tbl = ctl_forward_entry;
+
+	if (net != &init_net) {
+		all = kmemdup(all, sizeof(ipv4_devconf), GFP_KERNEL);
+		if (all == NULL)
+			goto err_alloc_all;
+
+		dflt = kmemdup(dflt, sizeof(ipv4_devconf_dflt), GFP_KERNEL);
+		if (dflt == NULL)
+			goto err_alloc_dflt;
+
+		tbl = kmemdup(tbl, sizeof(ctl_forward_entry), GFP_KERNEL);
+		if (tbl == NULL)
+			goto err_alloc_ctl;
+
+		tbl[0].data = &all->data[NET_IPV4_CONF_FORWARDING - 1];
+		tbl[0].extra1 = all;
+		tbl[0].extra2 = net;
+	}
+
+#ifdef CONFIG_SYSCTL
+	err = __devinet_sysctl_register(net, "all",
+			NET_PROTO_CONF_ALL, all);
+	if (err < 0)
+		goto err_reg_all;
+
+	err = __devinet_sysctl_register(net, "default",
+			NET_PROTO_CONF_DEFAULT, dflt);
+	if (err < 0)
+		goto err_reg_dflt;
+
+	err = -ENOMEM;
+	forw_hdr = register_net_sysctl_table(net, net_ipv4_path, tbl);
+	if (forw_hdr == NULL)
+		goto err_reg_ctl;
+#endif
+
+	net->ipv4.forw_hdr = forw_hdr;
+	net->ipv4.devconf_all = all;
+	net->ipv4.devconf_dflt = dflt;
+	return 0;
+
+#ifdef CONFIG_SYSCTL
+err_reg_ctl:
+	__devinet_sysctl_unregister(dflt);
+err_reg_dflt:
+	__devinet_sysctl_unregister(all);
+err_reg_all:
+	if (tbl != ctl_forward_entry)
+		kfree(tbl);
+#endif
+err_alloc_ctl:
+	if (dflt != &ipv4_devconf_dflt)
+		kfree(dflt);
+err_alloc_dflt:
+	if (all != &ipv4_devconf)
+		kfree(all);
+err_alloc_all:
+	return err;
+}
+
+static __net_exit void devinet_exit_net(struct net *net)
+{
+	struct ctl_table *tbl;
+
+	tbl = net->ipv4.forw_hdr->ctl_table_arg;
+#ifdef CONFIG_SYSCTL
+	unregister_net_sysctl_table(net->ipv4.forw_hdr);
+	__devinet_sysctl_unregister(net->ipv4.devconf_dflt);
+	__devinet_sysctl_unregister(net->ipv4.devconf_all);
+#endif
+	kfree(tbl);
+	kfree(net->ipv4.devconf_dflt);
+	kfree(net->ipv4.devconf_all);
+}
+
+static __net_initdata struct pernet_operations devinet_ops = {
+	.init = devinet_init_net,
+	.exit = devinet_exit_net,
+};
+
 void __init devinet_init(void)
 {
+	register_pernet_subsys(&devinet_ops);
+
 	register_gifconf(PF_INET, inet_gifconf);
 	register_netdevice_notifier(&ip_netdev_notifier);
 
 	rtnl_register(PF_INET, RTM_NEWADDR, inet_rtm_newaddr, NULL);
 	rtnl_register(PF_INET, RTM_DELADDR, inet_rtm_deladdr, NULL);
 	rtnl_register(PF_INET, RTM_GETADDR, NULL, inet_dump_ifaddr);
-#ifdef CONFIG_SYSCTL
-	__devinet_sysctl_register(&init_net, "all", NET_PROTO_CONF_ALL,
-			&ipv4_devconf);
-	__devinet_sysctl_register(&init_net, "default", NET_PROTO_CONF_DEFAULT,
-			&ipv4_devconf_dflt);
-	register_sysctl_paths(net_ipv4_path, ctl_forward_entry);
-#endif
 }
 
 EXPORT_SYMBOL(in_dev_finish_destroy);
-- 
cgit v1.2.3


From 9355bbd685bf705a7f7bd6470b92ca0562c7a661 Mon Sep 17 00:00:00 2001
From: Pavel Emelyanov <xemul@openvz.org>
Date: Sun, 16 Dec 2007 13:32:16 -0800
Subject: [IPV4]: Switch users of ipv4_devconf_dflt to use the pernet one

They are all collected in the net/ipv4/devinet.c file and
mostly use the IPV4_DEVCONF_DFLT macro.

So I add the net parameter to it and patch users accordingly.

Signed-off-by: Pavel Emelyanov <xemul@openvz.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/devinet.c | 16 +++++++++-------
 1 file changed, 9 insertions(+), 7 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index a2d48173828..4c01c55c381 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -83,7 +83,8 @@ static struct ipv4_devconf ipv4_devconf_dflt = {
 	},
 };
 
-#define IPV4_DEVCONF_DFLT(attr) IPV4_DEVCONF(ipv4_devconf_dflt, attr)
+#define IPV4_DEVCONF_DFLT(net, attr) \
+	IPV4_DEVCONF((*net->ipv4.devconf_dflt), attr)
 
 static const struct nla_policy ifa_ipv4_policy[IFA_MAX+1] = {
 	[IFA_LOCAL]     	= { .type = NLA_U32 },
@@ -164,7 +165,8 @@ static struct in_device *inetdev_init(struct net_device *dev)
 	if (!in_dev)
 		goto out;
 	INIT_RCU_HEAD(&in_dev->rcu_head);
-	memcpy(&in_dev->cnf, &ipv4_devconf_dflt, sizeof(in_dev->cnf));
+	memcpy(&in_dev->cnf, dev->nd_net->ipv4.devconf_dflt,
+			sizeof(in_dev->cnf));
 	in_dev->cnf.sysctl = NULL;
 	in_dev->dev = dev;
 	if ((in_dev->arp_parms = neigh_parms_alloc(dev, &arp_tbl)) == NULL)
@@ -1247,7 +1249,7 @@ static void devinet_copy_dflt_conf(struct net *net, int i)
 		rcu_read_lock();
 		in_dev = __in_dev_get_rcu(dev);
 		if (in_dev && !test_bit(i, in_dev->cnf.state))
-			in_dev->cnf.data[i] = ipv4_devconf_dflt.data[i];
+			in_dev->cnf.data[i] = net->ipv4.devconf_dflt->data[i];
 		rcu_read_unlock();
 	}
 	read_unlock(&dev_base_lock);
@@ -1259,7 +1261,7 @@ static void inet_forward_change(struct net *net)
 	int on = IPV4_DEVCONF_ALL(FORWARDING);
 
 	IPV4_DEVCONF_ALL(ACCEPT_REDIRECTS) = !on;
-	IPV4_DEVCONF_DFLT(FORWARDING) = on;
+	IPV4_DEVCONF_DFLT(net, FORWARDING) = on;
 
 	read_lock(&dev_base_lock);
 	for_each_netdev(net, dev) {
@@ -1288,7 +1290,7 @@ static int devinet_conf_proc(ctl_table *ctl, int write,
 
 		set_bit(i, cnf->state);
 
-		if (cnf == &ipv4_devconf_dflt)
+		if (cnf == net->ipv4.devconf_dflt)
 			devinet_copy_dflt_conf(net, i);
 	}
 
@@ -1341,7 +1343,7 @@ static int devinet_conf_sysctl(ctl_table *table, int __user *name, int nlen,
 
 	set_bit(i, cnf->state);
 
-	if (cnf == &ipv4_devconf_dflt)
+	if (cnf == net->ipv4.devconf_dflt)
 		devinet_copy_dflt_conf(net, i);
 
 	return 1;
@@ -1360,7 +1362,7 @@ static int devinet_sysctl_forward(ctl_table *ctl, int write,
 
 		if (valp == &IPV4_DEVCONF_ALL(FORWARDING))
 			inet_forward_change(net);
-		else if (valp != &IPV4_DEVCONF_DFLT(FORWARDING))
+		else if (valp != &IPV4_DEVCONF_DFLT(net, FORWARDING))
 			rt_cache_flush(0);
 	}
 
-- 
cgit v1.2.3


From 586f12115264b767ea6a48ce081ca25a39c1e3dd Mon Sep 17 00:00:00 2001
From: Pavel Emelyanov <xemul@openvz.org>
Date: Sun, 16 Dec 2007 13:32:48 -0800
Subject: [IPV4]: Switch users of ipv4_devconf(_all) to use the pernet one

These are scattered over the code, but almost all the
"critical" places already have the proper struct net
at hand except for snmp proc showing function and routing
rtnl handler.

Signed-off-by: Pavel Emelyanov <xemul@openvz.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/arp.c     | 4 ++--
 net/ipv4/devinet.c | 6 +++---
 net/ipv4/igmp.c    | 4 ++--
 net/ipv4/ipmr.c    | 4 ++--
 net/ipv4/proc.c    | 3 ++-
 net/ipv4/route.c   | 2 +-
 6 files changed, 12 insertions(+), 11 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c
index 5daf504ba3b..1102fb3d801 100644
--- a/net/ipv4/arp.c
+++ b/net/ipv4/arp.c
@@ -860,7 +860,7 @@ static int arp_process(struct sk_buff *skb)
 
 	n = __neigh_lookup(&arp_tbl, &sip, dev, 0);
 
-	if (IPV4_DEVCONF_ALL(ARP_ACCEPT)) {
+	if (IPV4_DEVCONF_ALL(dev->nd_net, ARP_ACCEPT)) {
 		/* Unsolicited ARP is not accepted by default.
 		   It is possible, that this option should be enabled for some
 		   devices (strip is candidate)
@@ -955,7 +955,7 @@ out_of_mem:
 static int arp_req_set_proxy(struct net *net, struct net_device *dev, int on)
 {
 	if (dev == NULL) {
-		IPV4_DEVCONF_ALL(PROXY_ARP) = on;
+		IPV4_DEVCONF_ALL(net, PROXY_ARP) = on;
 		return 0;
 	}
 	if (__in_dev_get_rtnl(dev)) {
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index 4c01c55c381..1f21f4a2df8 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -1258,9 +1258,9 @@ static void devinet_copy_dflt_conf(struct net *net, int i)
 static void inet_forward_change(struct net *net)
 {
 	struct net_device *dev;
-	int on = IPV4_DEVCONF_ALL(FORWARDING);
+	int on = IPV4_DEVCONF_ALL(net, FORWARDING);
 
-	IPV4_DEVCONF_ALL(ACCEPT_REDIRECTS) = !on;
+	IPV4_DEVCONF_ALL(net, ACCEPT_REDIRECTS) = !on;
 	IPV4_DEVCONF_DFLT(net, FORWARDING) = on;
 
 	read_lock(&dev_base_lock);
@@ -1360,7 +1360,7 @@ static int devinet_sysctl_forward(ctl_table *ctl, int write,
 	if (write && *valp != val) {
 		struct net *net = ctl->extra2;
 
-		if (valp == &IPV4_DEVCONF_ALL(FORWARDING))
+		if (valp == &IPV4_DEVCONF_ALL(net, FORWARDING))
 			inet_forward_change(net);
 		else if (valp != &IPV4_DEVCONF_DFLT(net, FORWARDING))
 			rt_cache_flush(0);
diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c
index c560a9392b1..d3d5906e1b3 100644
--- a/net/ipv4/igmp.c
+++ b/net/ipv4/igmp.c
@@ -130,12 +130,12 @@
  */
 
 #define IGMP_V1_SEEN(in_dev) \
-	(IPV4_DEVCONF_ALL(FORCE_IGMP_VERSION) == 1 || \
+	(IPV4_DEVCONF_ALL(in_dev->dev->nd_net, FORCE_IGMP_VERSION) == 1 || \
 	 IN_DEV_CONF_GET((in_dev), FORCE_IGMP_VERSION) == 1 || \
 	 ((in_dev)->mr_v1_seen && \
 	  time_before(jiffies, (in_dev)->mr_v1_seen)))
 #define IGMP_V2_SEEN(in_dev) \
-	(IPV4_DEVCONF_ALL(FORCE_IGMP_VERSION) == 2 || \
+	(IPV4_DEVCONF_ALL(in_dev->dev->nd_net, FORCE_IGMP_VERSION) == 2 || \
 	 IN_DEV_CONF_GET((in_dev), FORCE_IGMP_VERSION) == 2 || \
 	 ((in_dev)->mr_v2_seen && \
 	  time_before(jiffies, (in_dev)->mr_v2_seen)))
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index 11879283ad5..9947f523862 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -849,7 +849,7 @@ static void mrtsock_destruct(struct sock *sk)
 {
 	rtnl_lock();
 	if (sk == mroute_socket) {
-		IPV4_DEVCONF_ALL(MC_FORWARDING)--;
+		IPV4_DEVCONF_ALL(sk->sk_net, MC_FORWARDING)--;
 
 		write_lock_bh(&mrt_lock);
 		mroute_socket=NULL;
@@ -898,7 +898,7 @@ int ip_mroute_setsockopt(struct sock *sk,int optname,char __user *optval,int opt
 			mroute_socket=sk;
 			write_unlock_bh(&mrt_lock);
 
-			IPV4_DEVCONF_ALL(MC_FORWARDING)++;
+			IPV4_DEVCONF_ALL(sk->sk_net, MC_FORWARDING)++;
 		}
 		rtnl_unlock();
 		return ret;
diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c
index ce34b281803..41734db677b 100644
--- a/net/ipv4/proc.c
+++ b/net/ipv4/proc.c
@@ -309,7 +309,8 @@ static int snmp_seq_show(struct seq_file *seq, void *v)
 		seq_printf(seq, " %s", snmp4_ipstats_list[i].name);
 
 	seq_printf(seq, "\nIp: %d %d",
-		   IPV4_DEVCONF_ALL(FORWARDING) ? 1 : 2, sysctl_ip_default_ttl);
+		   IPV4_DEVCONF_ALL(&init_net, FORWARDING) ? 1 : 2,
+		   sysctl_ip_default_ttl);
 
 	for (i = 0; snmp4_ipstats_list[i].name != NULL; i++)
 		seq_printf(seq, " %lu",
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 1b70ffd1261..36c7add8de8 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -2619,7 +2619,7 @@ static int rt_fill_info(struct sk_buff *skb, u32 pid, u32 seq, int event,
 		__be32 dst = rt->rt_dst;
 
 		if (MULTICAST(dst) && !LOCAL_MCAST(dst) &&
-		    IPV4_DEVCONF_ALL(MC_FORWARDING)) {
+		    IPV4_DEVCONF_ALL(&init_net, MC_FORWARDING)) {
 			int err = ipmr_get_route(skb, r, nowait);
 			if (err <= 0) {
 				if (!nowait) {
-- 
cgit v1.2.3


From 21cf2253ebcf070bc307e0b56d696a2519c75cb4 Mon Sep 17 00:00:00 2001
From: Joe Perches <joe@perches.com>
Date: Sun, 16 Dec 2007 13:44:00 -0800
Subject: [IPV4] net/core: Use ipv4_is_<type>

Signed-off-by: Joe Perches <joe@perches.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/netpoll.c | 3 ++-
 net/core/pktgen.c  | 8 +++++---
 2 files changed, 7 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/core/netpoll.c b/net/core/netpoll.c
index b1d5acd2fc7..6faa128a4c8 100644
--- a/net/core/netpoll.c
+++ b/net/core/netpoll.c
@@ -410,7 +410,8 @@ static void arp_reply(struct sk_buff *skb)
 	memcpy(&tip, arp_ptr, 4);
 
 	/* Should we ignore arp? */
-	if (tip != htonl(np->local_ip) || LOOPBACK(tip) || MULTICAST(tip))
+	if (tip != htonl(np->local_ip) ||
+	    ipv4_is_loopback(tip) || ipv4_is_multicast(tip))
 		return;
 
 	size = sizeof(struct arphdr) + 2 * (skb->dev->addr_len + 4);
diff --git a/net/core/pktgen.c b/net/core/pktgen.c
index 285ec3ed9b3..ede1feacddf 100644
--- a/net/core/pktgen.c
+++ b/net/core/pktgen.c
@@ -2358,9 +2358,11 @@ static void mod_cur_headers(struct pktgen_dev *pkt_dev)
 					t = random32() % (imx - imn) + imn;
 					s = htonl(t);
 
-					while (LOOPBACK(s) || MULTICAST(s)
-					       || BADCLASS(s) || ZERONET(s)
-					       || LOCAL_MCAST(s)) {
+					while (ipv4_is_loopback(s) ||
+					       ipv4_is_multicast(s) ||
+					       ipv4_is_badclass(s) ||
+					       ipv4_is_zeronet(s) ||
+					       ipv4_is_local_multicast(s)) {
 						t = random32() % (imx - imn) + imn;
 						s = htonl(t);
 					}
-- 
cgit v1.2.3


From f97c1e0c6ebdb606c97b6cb5e837c6110ac5a961 Mon Sep 17 00:00:00 2001
From: Joe Perches <joe@perches.com>
Date: Sun, 16 Dec 2007 13:45:43 -0800
Subject: [IPV4] net/ipv4: Use ipv4_is_<type>

Signed-off-by: Joe Perches <joe@perches.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/arp.c          |  2 +-
 net/ipv4/datagram.c     |  2 +-
 net/ipv4/devinet.c      |  4 ++--
 net/ipv4/fib_frontend.c |  6 +++---
 net/ipv4/igmp.c         | 12 ++++++------
 net/ipv4/ip_gre.c       | 23 ++++++++++++----------
 net/ipv4/ipmr.c         |  6 +++---
 net/ipv4/raw.c          |  2 +-
 net/ipv4/route.c        | 52 +++++++++++++++++++++++++++----------------------
 net/ipv4/udp.c          |  2 +-
 10 files changed, 60 insertions(+), 51 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c
index 1102fb3d801..fdf12d1c350 100644
--- a/net/ipv4/arp.c
+++ b/net/ipv4/arp.c
@@ -777,7 +777,7 @@ static int arp_process(struct sk_buff *skb)
  *	Check for bad requests for 127.x.x.x and requests for multicast
  *	addresses.  If this is one such, delete it.
  */
-	if (LOOPBACK(tip) || MULTICAST(tip))
+	if (ipv4_is_loopback(tip) || ipv4_is_multicast(tip))
 		goto out;
 
 /*
diff --git a/net/ipv4/datagram.c b/net/ipv4/datagram.c
index 0301dd468cf..0c0c73f368c 100644
--- a/net/ipv4/datagram.c
+++ b/net/ipv4/datagram.c
@@ -40,7 +40,7 @@ int ip4_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
 
 	oif = sk->sk_bound_dev_if;
 	saddr = inet->saddr;
-	if (MULTICAST(usin->sin_addr.s_addr)) {
+	if (ipv4_is_multicast(usin->sin_addr.s_addr)) {
 		if (!oif)
 			oif = inet->mc_index;
 		if (!saddr)
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index 1f21f4a2df8..44cb252d2f6 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -404,7 +404,7 @@ static int inet_set_ifa(struct net_device *dev, struct in_ifaddr *ifa)
 		in_dev_hold(in_dev);
 		ifa->ifa_dev = in_dev;
 	}
-	if (LOOPBACK(ifa->ifa_local))
+	if (ipv4_is_loopback(ifa->ifa_local))
 		ifa->ifa_scope = RT_SCOPE_HOST;
 	return inet_insert_ifa(ifa);
 }
@@ -583,7 +583,7 @@ static __inline__ int inet_abc_len(__be32 addr)
 {
 	int rc = -1;	/* Something else, probably a multicast. */
 
-	if (ZERONET(addr))
+	if (ipv4_is_zeronet(addr))
 		rc = 0;
 	else {
 		__u32 haddr = ntohl(addr);
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index d90b42f3630..ac6238a3b0f 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -167,9 +167,9 @@ static inline unsigned __inet_dev_addr_type(const struct net_device *dev,
 	unsigned ret = RTN_BROADCAST;
 	struct fib_table *local_table;
 
-	if (ZERONET(addr) || BADCLASS(addr))
+	if (ipv4_is_zeronet(addr) || ipv4_is_badclass(addr))
 		return RTN_BROADCAST;
-	if (MULTICAST(addr))
+	if (ipv4_is_multicast(addr))
 		return RTN_MULTICAST;
 
 #ifdef CONFIG_IP_MULTIPLE_TABLES
@@ -710,7 +710,7 @@ void fib_add_ifaddr(struct in_ifaddr *ifa)
 	if (ifa->ifa_broadcast && ifa->ifa_broadcast != htonl(0xFFFFFFFF))
 		fib_magic(RTM_NEWROUTE, RTN_BROADCAST, ifa->ifa_broadcast, 32, prim);
 
-	if (!ZERONET(prefix) && !(ifa->ifa_flags&IFA_F_SECONDARY) &&
+	if (!ipv4_is_zeronet(prefix) && !(ifa->ifa_flags&IFA_F_SECONDARY) &&
 	    (prefix != addr || ifa->ifa_prefixlen < 32)) {
 		fib_magic(RTM_NEWROUTE, dev->flags&IFF_LOOPBACK ? RTN_LOCAL :
 			  RTN_UNICAST, prefix, ifa->ifa_prefixlen, prim);
diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c
index d3d5906e1b3..285d26218a5 100644
--- a/net/ipv4/igmp.c
+++ b/net/ipv4/igmp.c
@@ -1742,7 +1742,7 @@ int ip_mc_join_group(struct sock *sk , struct ip_mreqn *imr)
 	int ifindex;
 	int count = 0;
 
-	if (!MULTICAST(addr))
+	if (!ipv4_is_multicast(addr))
 		return -EINVAL;
 
 	rtnl_lock();
@@ -1855,7 +1855,7 @@ int ip_mc_source(int add, int omode, struct sock *sk, struct
 	int leavegroup = 0;
 	int i, j, rv;
 
-	if (!MULTICAST(addr))
+	if (!ipv4_is_multicast(addr))
 		return -EINVAL;
 
 	rtnl_lock();
@@ -1985,7 +1985,7 @@ int ip_mc_msfilter(struct sock *sk, struct ip_msfilter *msf, int ifindex)
 	struct ip_sf_socklist *newpsl, *psl;
 	int leavegroup = 0;
 
-	if (!MULTICAST(addr))
+	if (!ipv4_is_multicast(addr))
 		return -EINVAL;
 	if (msf->imsf_fmode != MCAST_INCLUDE &&
 	    msf->imsf_fmode != MCAST_EXCLUDE)
@@ -2068,7 +2068,7 @@ int ip_mc_msfget(struct sock *sk, struct ip_msfilter *msf,
 	struct inet_sock *inet = inet_sk(sk);
 	struct ip_sf_socklist *psl;
 
-	if (!MULTICAST(addr))
+	if (!ipv4_is_multicast(addr))
 		return -EINVAL;
 
 	rtnl_lock();
@@ -2130,7 +2130,7 @@ int ip_mc_gsfget(struct sock *sk, struct group_filter *gsf,
 	if (psin->sin_family != AF_INET)
 		return -EINVAL;
 	addr = psin->sin_addr.s_addr;
-	if (!MULTICAST(addr))
+	if (!ipv4_is_multicast(addr))
 		return -EINVAL;
 
 	rtnl_lock();
@@ -2180,7 +2180,7 @@ int ip_mc_sf_allow(struct sock *sk, __be32 loc_addr, __be32 rmt_addr, int dif)
 	struct ip_sf_socklist *psl;
 	int i;
 
-	if (!MULTICAST(loc_addr))
+	if (!ipv4_is_multicast(loc_addr))
 		return 1;
 
 	for (pmc=inet->mc_list; pmc; pmc=pmc->next) {
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index 0832f6e028b..8b81deb8ff1 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -176,7 +176,8 @@ static struct ip_tunnel * ipgre_tunnel_lookup(__be32 remote, __be32 local, __be3
 	}
 	for (t = tunnels_l[h1]; t; t = t->next) {
 		if (local == t->parms.iph.saddr ||
-		     (local == t->parms.iph.daddr && MULTICAST(local))) {
+		     (local == t->parms.iph.daddr &&
+		      ipv4_is_multicast(local))) {
 			if (t->parms.i_key == key && (t->dev->flags&IFF_UP))
 				return t;
 		}
@@ -201,7 +202,7 @@ static struct ip_tunnel **__ipgre_bucket(struct ip_tunnel_parm *parms)
 
 	if (local)
 		prio |= 1;
-	if (remote && !MULTICAST(remote)) {
+	if (remote && !ipv4_is_multicast(remote)) {
 		prio |= 2;
 		h ^= HASH(remote);
 	}
@@ -367,7 +368,8 @@ static void ipgre_err(struct sk_buff *skb, u32 info)
 
 	read_lock(&ipgre_lock);
 	t = ipgre_tunnel_lookup(iph->daddr, iph->saddr, (flags&GRE_KEY) ? *(((__be32*)p) + (grehlen>>2) - 1) : 0);
-	if (t == NULL || t->parms.iph.daddr == 0 || MULTICAST(t->parms.iph.daddr))
+	if (t == NULL || t->parms.iph.daddr == 0 ||
+	    ipv4_is_multicast(t->parms.iph.daddr))
 		goto out;
 
 	if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED)
@@ -619,7 +621,7 @@ static int ipgre_rcv(struct sk_buff *skb)
 		skb_postpull_rcsum(skb, skb_transport_header(skb), offset);
 		skb->pkt_type = PACKET_HOST;
 #ifdef CONFIG_NET_IPGRE_BROADCAST
-		if (MULTICAST(iph->daddr)) {
+		if (ipv4_is_multicast(iph->daddr)) {
 			/* Looped back packet, drop it! */
 			if (((struct rtable*)skb->dst)->fl.iif == 0)
 				goto drop;
@@ -783,7 +785,8 @@ static int ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
 		struct rt6_info *rt6 = (struct rt6_info*)skb->dst;
 
 		if (rt6 && mtu < dst_mtu(skb->dst) && mtu >= IPV6_MIN_MTU) {
-			if ((tunnel->parms.iph.daddr && !MULTICAST(tunnel->parms.iph.daddr)) ||
+			if ((tunnel->parms.iph.daddr &&
+			     !ipv4_is_multicast(tunnel->parms.iph.daddr)) ||
 			    rt6->rt6i_dst.plen == 128) {
 				rt6->rt6i_flags |= RTF_MODIFIED;
 				skb->dst->metrics[RTAX_MTU-1] = mtu;
@@ -1009,7 +1012,7 @@ ipgre_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd)
 
 				t = netdev_priv(dev);
 
-				if (MULTICAST(p.iph.daddr))
+				if (ipv4_is_multicast(p.iph.daddr))
 					nflags = IFF_BROADCAST;
 				else if (p.iph.daddr)
 					nflags = IFF_POINTOPOINT;
@@ -1143,7 +1146,7 @@ static int ipgre_header(struct sk_buff *skb, struct net_device *dev,
 		memcpy(&iph->daddr, daddr, 4);
 		return t->hlen;
 	}
-	if (iph->daddr && !MULTICAST(iph->daddr))
+	if (iph->daddr && !ipv4_is_multicast(iph->daddr))
 		return t->hlen;
 
 	return -t->hlen;
@@ -1166,7 +1169,7 @@ static int ipgre_open(struct net_device *dev)
 {
 	struct ip_tunnel *t = netdev_priv(dev);
 
-	if (MULTICAST(t->parms.iph.daddr)) {
+	if (ipv4_is_multicast(t->parms.iph.daddr)) {
 		struct flowi fl = { .oif = t->parms.link,
 				    .nl_u = { .ip4_u =
 					      { .daddr = t->parms.iph.daddr,
@@ -1189,7 +1192,7 @@ static int ipgre_open(struct net_device *dev)
 static int ipgre_close(struct net_device *dev)
 {
 	struct ip_tunnel *t = netdev_priv(dev);
-	if (MULTICAST(t->parms.iph.daddr) && t->mlink) {
+	if (ipv4_is_multicast(t->parms.iph.daddr) && t->mlink) {
 		struct in_device *in_dev = inetdev_by_index(t->mlink);
 		if (in_dev) {
 			ip_mc_dec_group(in_dev, t->parms.iph.daddr);
@@ -1236,7 +1239,7 @@ static int ipgre_tunnel_init(struct net_device *dev)
 
 	if (iph->daddr) {
 #ifdef CONFIG_NET_IPGRE_BROADCAST
-		if (MULTICAST(iph->daddr)) {
+		if (ipv4_is_multicast(iph->daddr)) {
 			if (!iph->saddr)
 				return -EINVAL;
 			dev->flags = IFF_BROADCAST;
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index 9947f523862..772daf77878 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -749,7 +749,7 @@ static int ipmr_mfc_add(struct mfcctl *mfc, int mrtsock)
 		return 0;
 	}
 
-	if (!MULTICAST(mfc->mfcc_mcastgrp.s_addr))
+	if (!ipv4_is_multicast(mfc->mfcc_mcastgrp.s_addr))
 		return -EINVAL;
 
 	c=ipmr_cache_alloc();
@@ -1461,7 +1461,7 @@ int pim_rcv_v1(struct sk_buff * skb)
 	   b. packet is not a NULL-REGISTER
 	   c. packet is not truncated
 	 */
-	if (!MULTICAST(encap->daddr) ||
+	if (!ipv4_is_multicast(encap->daddr) ||
 	    encap->tot_len == 0 ||
 	    ntohs(encap->tot_len) + sizeof(*pim) > skb->len)
 		goto drop;
@@ -1517,7 +1517,7 @@ static int pim_rcv(struct sk_buff * skb)
 	/* check if the inner packet is destined to mcast group */
 	encap = (struct iphdr *)(skb_transport_header(skb) +
 				 sizeof(struct pimreghdr));
-	if (!MULTICAST(encap->daddr) ||
+	if (!ipv4_is_multicast(encap->daddr) ||
 	    encap->tot_len == 0 ||
 	    ntohs(encap->tot_len) + sizeof(*pim) > skb->len)
 		goto drop;
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c
index 2ff8214a530..5aec5a5e5f1 100644
--- a/net/ipv4/raw.c
+++ b/net/ipv4/raw.c
@@ -530,7 +530,7 @@ static int raw_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
 	if (msg->msg_flags & MSG_DONTROUTE)
 		tos |= RTO_ONLINK;
 
-	if (MULTICAST(daddr)) {
+	if (ipv4_is_multicast(daddr)) {
 		if (!ipc.oif)
 			ipc.oif = inet->mc_index;
 		if (!saddr)
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 36c7add8de8..1cc6c23cf75 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -1154,7 +1154,8 @@ void ip_rt_redirect(__be32 old_gw, __be32 daddr, __be32 new_gw,
 		return;
 
 	if (new_gw == old_gw || !IN_DEV_RX_REDIRECTS(in_dev)
-	    || MULTICAST(new_gw) || BADCLASS(new_gw) || ZERONET(new_gw))
+	    || ipv4_is_multicast(new_gw) || ipv4_is_badclass(new_gw)
+	    || ipv4_is_zeronet(new_gw))
 		goto reject_redirect;
 
 	if (!IN_DEV_SHARED_MEDIA(in_dev)) {
@@ -1633,12 +1634,12 @@ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr,
 	if (in_dev == NULL)
 		return -EINVAL;
 
-	if (MULTICAST(saddr) || BADCLASS(saddr) || LOOPBACK(saddr) ||
-	    skb->protocol != htons(ETH_P_IP))
+	if (ipv4_is_multicast(saddr) || ipv4_is_badclass(saddr) ||
+	    ipv4_is_loopback(saddr) || skb->protocol != htons(ETH_P_IP))
 		goto e_inval;
 
-	if (ZERONET(saddr)) {
-		if (!LOCAL_MCAST(daddr))
+	if (ipv4_is_zeronet(saddr)) {
+		if (!ipv4_is_local_multicast(daddr))
 			goto e_inval;
 		spec_dst = inet_select_addr(dev, 0, RT_SCOPE_LINK);
 	} else if (fib_validate_source(saddr, 0, tos, 0,
@@ -1680,7 +1681,7 @@ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr,
 	}
 
 #ifdef CONFIG_IP_MROUTE
-	if (!LOCAL_MCAST(daddr) && IN_DEV_MFORWARD(in_dev))
+	if (!ipv4_is_local_multicast(daddr) && IN_DEV_MFORWARD(in_dev))
 		rth->u.dst.input = ip_mr_input;
 #endif
 	RT_CACHE_STAT_INC(in_slow_mc);
@@ -1890,7 +1891,8 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr,
 	   by fib_lookup.
 	 */
 
-	if (MULTICAST(saddr) || BADCLASS(saddr) || LOOPBACK(saddr))
+	if (ipv4_is_multicast(saddr) || ipv4_is_badclass(saddr) ||
+	    ipv4_is_loopback(saddr))
 		goto martian_source;
 
 	if (daddr == htonl(0xFFFFFFFF) || (saddr == 0 && daddr == 0))
@@ -1899,10 +1901,11 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr,
 	/* Accept zero addresses only to limited broadcast;
 	 * I even do not know to fix it or not. Waiting for complains :-)
 	 */
-	if (ZERONET(saddr))
+	if (ipv4_is_zeronet(saddr))
 		goto martian_source;
 
-	if (BADCLASS(daddr) || ZERONET(daddr) || LOOPBACK(daddr))
+	if (ipv4_is_badclass(daddr) || ipv4_is_zeronet(daddr) ||
+	    ipv4_is_loopback(daddr))
 		goto martian_destination;
 
 	/*
@@ -1949,7 +1952,7 @@ brd_input:
 	if (skb->protocol != htons(ETH_P_IP))
 		goto e_inval;
 
-	if (ZERONET(saddr))
+	if (ipv4_is_zeronet(saddr))
 		spec_dst = inet_select_addr(dev, 0, RT_SCOPE_LINK);
 	else {
 		err = fib_validate_source(saddr, 0, tos, 0, dev, &spec_dst,
@@ -2079,7 +2082,7 @@ int ip_route_input(struct sk_buff *skb, __be32 daddr, __be32 saddr,
 	   Note, that multicast routers are not affected, because
 	   route cache entry is created eventually.
 	 */
-	if (MULTICAST(daddr)) {
+	if (ipv4_is_multicast(daddr)) {
 		struct in_device *in_dev;
 
 		rcu_read_lock();
@@ -2088,7 +2091,8 @@ int ip_route_input(struct sk_buff *skb, __be32 daddr, __be32 saddr,
 				ip_hdr(skb)->protocol);
 			if (our
 #ifdef CONFIG_IP_MROUTE
-			    || (!LOCAL_MCAST(daddr) && IN_DEV_MFORWARD(in_dev))
+			    || (!ipv4_is_local_multicast(daddr) &&
+				IN_DEV_MFORWARD(in_dev))
 #endif
 			    ) {
 				rcu_read_unlock();
@@ -2114,14 +2118,14 @@ static inline int __mkroute_output(struct rtable **result,
 	u32 tos = RT_FL_TOS(oldflp);
 	int err = 0;
 
-	if (LOOPBACK(fl->fl4_src) && !(dev_out->flags&IFF_LOOPBACK))
+	if (ipv4_is_loopback(fl->fl4_src) && !(dev_out->flags&IFF_LOOPBACK))
 		return -EINVAL;
 
 	if (fl->fl4_dst == htonl(0xFFFFFFFF))
 		res->type = RTN_BROADCAST;
-	else if (MULTICAST(fl->fl4_dst))
+	else if (ipv4_is_multicast(fl->fl4_dst))
 		res->type = RTN_MULTICAST;
-	else if (BADCLASS(fl->fl4_dst) || ZERONET(fl->fl4_dst))
+	else if (ipv4_is_badclass(fl->fl4_dst) || ipv4_is_zeronet(fl->fl4_dst))
 		return -EINVAL;
 
 	if (dev_out->flags & IFF_LOOPBACK)
@@ -2201,7 +2205,7 @@ static inline int __mkroute_output(struct rtable **result,
 #ifdef CONFIG_IP_MROUTE
 		if (res->type == RTN_MULTICAST) {
 			if (IN_DEV_MFORWARD(in_dev) &&
-			    !LOCAL_MCAST(oldflp->fl4_dst)) {
+			    !ipv4_is_local_multicast(oldflp->fl4_dst)) {
 				rth->u.dst.input = ip_mr_input;
 				rth->u.dst.output = ip_mc_output;
 			}
@@ -2271,9 +2275,9 @@ static int ip_route_output_slow(struct rtable **rp, const struct flowi *oldflp)
 
 	if (oldflp->fl4_src) {
 		err = -EINVAL;
-		if (MULTICAST(oldflp->fl4_src) ||
-		    BADCLASS(oldflp->fl4_src) ||
-		    ZERONET(oldflp->fl4_src))
+		if (ipv4_is_multicast(oldflp->fl4_src) ||
+		    ipv4_is_badclass(oldflp->fl4_src) ||
+		    ipv4_is_zeronet(oldflp->fl4_src))
 			goto out;
 
 		/* It is equivalent to inet_addr_type(saddr) == RTN_LOCAL */
@@ -2290,7 +2294,8 @@ static int ip_route_output_slow(struct rtable **rp, const struct flowi *oldflp)
 		 */
 
 		if (oldflp->oif == 0
-		    && (MULTICAST(oldflp->fl4_dst) || oldflp->fl4_dst == htonl(0xFFFFFFFF))) {
+		    && (ipv4_is_multicast(oldflp->fl4_dst) ||
+			oldflp->fl4_dst == htonl(0xFFFFFFFF))) {
 			/* Special hack: user can direct multicasts
 			   and limited broadcast via necessary interface
 			   without fiddling with IP_MULTICAST_IF or IP_PKTINFO.
@@ -2327,14 +2332,15 @@ static int ip_route_output_slow(struct rtable **rp, const struct flowi *oldflp)
 			goto out;	/* Wrong error code */
 		}
 
-		if (LOCAL_MCAST(oldflp->fl4_dst) || oldflp->fl4_dst == htonl(0xFFFFFFFF)) {
+		if (ipv4_is_local_multicast(oldflp->fl4_dst) ||
+		    oldflp->fl4_dst == htonl(0xFFFFFFFF)) {
 			if (!fl.fl4_src)
 				fl.fl4_src = inet_select_addr(dev_out, 0,
 							      RT_SCOPE_LINK);
 			goto make_route;
 		}
 		if (!fl.fl4_src) {
-			if (MULTICAST(oldflp->fl4_dst))
+			if (ipv4_is_multicast(oldflp->fl4_dst))
 				fl.fl4_src = inet_select_addr(dev_out, 0,
 							      fl.fl4_scope);
 			else if (!oldflp->fl4_dst)
@@ -2618,7 +2624,7 @@ static int rt_fill_info(struct sk_buff *skb, u32 pid, u32 seq, int event,
 #ifdef CONFIG_IP_MROUTE
 		__be32 dst = rt->rt_dst;
 
-		if (MULTICAST(dst) && !LOCAL_MCAST(dst) &&
+		if (ipv4_is_multicast(dst) && !ipv4_is_local_multicast(dst) &&
 		    IPV4_DEVCONF_ALL(&init_net, MC_FORWARDING)) {
 			int err = ipmr_get_route(skb, r, nowait);
 			if (err <= 0) {
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 3d602158565..1ce6b60b7f9 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -626,7 +626,7 @@ int udp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
 		connected = 0;
 	}
 
-	if (MULTICAST(daddr)) {
+	if (ipv4_is_multicast(daddr)) {
 		if (!ipc.oif)
 			ipc.oif = inet->mc_index;
 		if (!saddr)
-- 
cgit v1.2.3


From 37ef8dd7f3f2f228336e3779e7cec762d90e1f00 Mon Sep 17 00:00:00 2001
From: Joe Perches <joe@perches.com>
Date: Sun, 16 Dec 2007 13:46:15 -0800
Subject: [IPV4] net/netfilter: Use ipv4_is_<type>

Signed-off-by: Joe Perches <joe@perches.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/netfilter/xt_pkttype.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/netfilter/xt_pkttype.c b/net/netfilter/xt_pkttype.c
index c598bbed8e0..276244902ab 100644
--- a/net/netfilter/xt_pkttype.c
+++ b/net/netfilter/xt_pkttype.c
@@ -31,7 +31,7 @@ pkttype_mt(const struct sk_buff *skb, const struct net_device *in,
 	const struct xt_pkttype_info *info = matchinfo;
 
 	if (skb->pkt_type == PACKET_LOOPBACK)
-		type = MULTICAST(ip_hdr(skb)->daddr)
+		type = ipv4_is_multicast(ip_hdr(skb)->daddr)
 			? PACKET_MULTICAST
 			: PACKET_BROADCAST;
 	else
-- 
cgit v1.2.3


From b5cb2bbc4c6cb489941be881e8adfe136ee45b8e Mon Sep 17 00:00:00 2001
From: Joe Perches <joe@perches.com>
Date: Sun, 16 Dec 2007 13:46:59 -0800
Subject: [IPV4] sctp: Use ipv4_is_<type>

Signed-off-by: Joe Perches <joe@perches.com>
Acked-by: Vlad Yasevich <vladislav.yasevich@hp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sctp/protocol.c | 12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)

(limited to 'net')

diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c
index d50f610d1b0..dc22d710849 100644
--- a/net/sctp/protocol.c
+++ b/net/sctp/protocol.c
@@ -359,7 +359,7 @@ static int sctp_v4_addr_valid(union sctp_addr *addr,
 			      const struct sk_buff *skb)
 {
 	/* Is this a non-unicast address or a unusable SCTP address? */
-	if (IS_IPV4_UNUSABLE_ADDRESS(&addr->v4.sin_addr.s_addr))
+	if (IS_IPV4_UNUSABLE_ADDRESS(addr->v4.sin_addr.s_addr))
 		return 0;
 
 	/* Is this a broadcast address? */
@@ -408,13 +408,15 @@ static sctp_scope_t sctp_v4_scope(union sctp_addr *addr)
 	 */
 
 	/* Check for unusable SCTP addresses. */
-	if (IS_IPV4_UNUSABLE_ADDRESS(&addr->v4.sin_addr.s_addr)) {
+	if (IS_IPV4_UNUSABLE_ADDRESS(addr->v4.sin_addr.s_addr)) {
 		retval =  SCTP_SCOPE_UNUSABLE;
-	} else if (LOOPBACK(addr->v4.sin_addr.s_addr)) {
+	} else if (ipv4_is_loopback(addr->v4.sin_addr.s_addr)) {
 		retval = SCTP_SCOPE_LOOPBACK;
-	} else if (IS_IPV4_LINK_ADDRESS(&addr->v4.sin_addr.s_addr)) {
+	} else if (ipv4_is_linklocal_169(addr->v4.sin_addr.s_addr)) {
 		retval = SCTP_SCOPE_LINK;
-	} else if (IS_IPV4_PRIVATE_ADDRESS(&addr->v4.sin_addr.s_addr)) {
+	} else if (ipv4_is_private_10(addr->v4.sin_addr.s_addr) ||
+		   ipv4_is_private_172(addr->v4.sin_addr.s_addr) ||
+		   ipv4_is_private_192(addr->v4.sin_addr.s_addr)) {
 		retval = SCTP_SCOPE_PRIVATE;
 	} else {
 		retval = SCTP_SCOPE_GLOBAL;
-- 
cgit v1.2.3


From 1bf40954cf232a043a49623cf251f787c1871e64 Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Sun, 16 Dec 2007 14:04:02 -0800
Subject: [PACKET]: Fix /proc/net/packet crash due to bogus private pointer

The seq_open_net patch changed the meaning of seq->private.
Unfortunately it missed two spots in AF_PACKET, which still
used the old way of dereferencing seq->private, thus causing
weird and wonderful crashes when reading /proc/net/packet.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/packet/af_packet.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index 485af5691d6..43e49f46ad4 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -1878,7 +1878,7 @@ static void *packet_seq_start(struct seq_file *seq, loff_t *pos)
 
 static void *packet_seq_next(struct seq_file *seq, void *v, loff_t *pos)
 {
-	struct net *net = seq->private;
+	struct net *net = seq_file_net(seq);
 	++*pos;
 	return  (v == SEQ_START_TOKEN)
 		? sk_head(&net->packet.sklist)
@@ -1887,7 +1887,7 @@ static void *packet_seq_next(struct seq_file *seq, void *v, loff_t *pos)
 
 static void packet_seq_stop(struct seq_file *seq, void *v)
 {
-	struct net *net = seq->private;
+	struct net *net = seq_file_net(seq);
 	read_unlock(&net->packet.sklist_lock);
 }
 
-- 
cgit v1.2.3


From 9ad0977fe10bd5d052a6db7738afe017367c2e32 Mon Sep 17 00:00:00 2001
From: Vlad Yasevich <vladislav.yasevich@hp.com>
Date: Sun, 16 Dec 2007 14:06:41 -0800
Subject: [SCTP]: Use crc32c library for checksum calculations.

The crc32c library used an identical table and algorithm
as SCTP.  Switch to using the library instead of carrying
our own table.  Using crypto layer proved to have too
much overhead compared to using the library directly.

Signed-off-by: Vlad Yasevich <vladislav.yasevich@hp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sctp/Kconfig  |   1 +
 net/sctp/Makefile |   2 +-
 net/sctp/crc32c.c | 222 ------------------------------------------------------
 net/sctp/input.c  |   1 +
 net/sctp/output.c |   1 +
 5 files changed, 4 insertions(+), 223 deletions(-)
 delete mode 100644 net/sctp/crc32c.c

(limited to 'net')

diff --git a/net/sctp/Kconfig b/net/sctp/Kconfig
index 5390bc79215..0b79f869c4e 100644
--- a/net/sctp/Kconfig
+++ b/net/sctp/Kconfig
@@ -10,6 +10,7 @@ menuconfig IP_SCTP
 	select CRYPTO_HMAC
 	select CRYPTO_SHA1
 	select CRYPTO_MD5 if SCTP_HMAC_MD5
+	select LIBCRC32C
 	---help---
 	  Stream Control Transmission Protocol
 
diff --git a/net/sctp/Makefile b/net/sctp/Makefile
index 1da7204d9b4..f5356b9d5ee 100644
--- a/net/sctp/Makefile
+++ b/net/sctp/Makefile
@@ -9,7 +9,7 @@ sctp-y := sm_statetable.o sm_statefuns.o sm_sideeffect.o \
 	  transport.o chunk.o sm_make_chunk.o ulpevent.o \
 	  inqueue.o outqueue.o ulpqueue.o command.o \
 	  tsnmap.o bind_addr.o socket.o primitive.o \
-	  output.o input.o debug.o ssnmap.o proc.o crc32c.o \
+	  output.o input.o debug.o ssnmap.o proc.o \
 	  auth.o
 
 sctp-$(CONFIG_SCTP_DBG_OBJCNT) += objcnt.o
diff --git a/net/sctp/crc32c.c b/net/sctp/crc32c.c
deleted file mode 100644
index 181edabdb8c..00000000000
--- a/net/sctp/crc32c.c
+++ /dev/null
@@ -1,222 +0,0 @@
-/* SCTP kernel reference Implementation
- * Copyright (c) 1999-2001 Motorola, Inc.
- * Copyright (c) 2001-2003 International Business Machines, Corp.
- *
- * This file is part of the SCTP kernel reference Implementation
- *
- * SCTP Checksum functions
- *
- * The SCTP reference implementation is free software;
- * you can redistribute it and/or modify it under the terms of
- * the GNU General Public License as published by
- * the Free Software Foundation; either version 2, or (at your option)
- * any later version.
- *
- * The SCTP reference implementation is distributed in the hope that it
- * will be useful, but WITHOUT ANY WARRANTY; without even the implied
- *                 ************************
- * warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
- * See the GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with GNU CC; see the file COPYING.  If not, write to
- * the Free Software Foundation, 59 Temple Place - Suite 330,
- * Boston, MA 02111-1307, USA.
- *
- * Please send any bug reports or fixes you make to the
- * email address(es):
- *    lksctp developers <lksctp-developers@lists.sourceforge.net>
- *
- * Or submit a bug report through the following website:
- *    http://www.sf.net/projects/lksctp
- *
- * Written or modified by:
- *    Dinakaran Joseph
- *    Jon Grimm <jgrimm@us.ibm.com>
- *    Sridhar Samudrala <sri@us.ibm.com>
- *
- * Any bugs reported given to us we will try to fix... any fixes shared will
- * be incorporated into the next SCTP release.
- */
-
-/* The following code has been taken directly from
- * draft-ietf-tsvwg-sctpcsum-03.txt
- *
- * The code has now been modified specifically for SCTP knowledge.
- */
-
-#include <linux/types.h>
-#include <net/sctp/sctp.h>
-
-#define CRC32C_POLY 0x1EDC6F41
-#define CRC32C(c,d) (c=(c>>8)^crc_c[(c^(d))&0xFF])
-/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
-/* Copyright 2001, D. Otis.  Use this program, code or tables    */
-/* extracted from it, as desired without restriction.            */
-/*                                                               */
-/* 32 Bit Reflected CRC table generation for SCTP.               */
-/* To accommodate serial byte data being shifted out least       */
-/* significant bit first, the table's 32 bit words are reflected */
-/* which flips both byte and bit MS and LS positions.  The CRC   */
-/* is calculated MS bits first from the perspective of the serial*/
-/* stream.  The x^32 term is implied and the x^0 term may also   */
-/* be shown as +1.  The polynomial code used is 0x1EDC6F41.      */
-/* Castagnoli93                                                  */
-/* x^32+x^28+x^27+x^26+x^25+x^23+x^22+x^20+x^19+x^18+x^14+x^13+  */
-/* x^11+x^10+x^9+x^8+x^6+x^0                                     */
-/* Guy Castagnoli Stefan Braeuer and Martin Herrman              */
-/* "Optimization of Cyclic Redundancy-Check Codes                */
-/* with 24 and 32 Parity Bits",                                  */
-/* IEEE Transactions on Communications, Vol.41, No.6, June 1993  */
-/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
-static const __u32 crc_c[256] = {
-	0x00000000, 0xF26B8303, 0xE13B70F7, 0x1350F3F4,
-	0xC79A971F, 0x35F1141C, 0x26A1E7E8, 0xD4CA64EB,
-	0x8AD958CF, 0x78B2DBCC, 0x6BE22838, 0x9989AB3B,
-	0x4D43CFD0, 0xBF284CD3, 0xAC78BF27, 0x5E133C24,
-	0x105EC76F, 0xE235446C, 0xF165B798, 0x030E349B,
-	0xD7C45070, 0x25AFD373, 0x36FF2087, 0xC494A384,
-	0x9A879FA0, 0x68EC1CA3, 0x7BBCEF57, 0x89D76C54,
-	0x5D1D08BF, 0xAF768BBC, 0xBC267848, 0x4E4DFB4B,
-	0x20BD8EDE, 0xD2D60DDD, 0xC186FE29, 0x33ED7D2A,
-	0xE72719C1, 0x154C9AC2, 0x061C6936, 0xF477EA35,
-	0xAA64D611, 0x580F5512, 0x4B5FA6E6, 0xB93425E5,
-	0x6DFE410E, 0x9F95C20D, 0x8CC531F9, 0x7EAEB2FA,
-	0x30E349B1, 0xC288CAB2, 0xD1D83946, 0x23B3BA45,
-	0xF779DEAE, 0x05125DAD, 0x1642AE59, 0xE4292D5A,
-	0xBA3A117E, 0x4851927D, 0x5B016189, 0xA96AE28A,
-	0x7DA08661, 0x8FCB0562, 0x9C9BF696, 0x6EF07595,
-	0x417B1DBC, 0xB3109EBF, 0xA0406D4B, 0x522BEE48,
-	0x86E18AA3, 0x748A09A0, 0x67DAFA54, 0x95B17957,
-	0xCBA24573, 0x39C9C670, 0x2A993584, 0xD8F2B687,
-	0x0C38D26C, 0xFE53516F, 0xED03A29B, 0x1F682198,
-	0x5125DAD3, 0xA34E59D0, 0xB01EAA24, 0x42752927,
-	0x96BF4DCC, 0x64D4CECF, 0x77843D3B, 0x85EFBE38,
-	0xDBFC821C, 0x2997011F, 0x3AC7F2EB, 0xC8AC71E8,
-	0x1C661503, 0xEE0D9600, 0xFD5D65F4, 0x0F36E6F7,
-	0x61C69362, 0x93AD1061, 0x80FDE395, 0x72966096,
-	0xA65C047D, 0x5437877E, 0x4767748A, 0xB50CF789,
-	0xEB1FCBAD, 0x197448AE, 0x0A24BB5A, 0xF84F3859,
-	0x2C855CB2, 0xDEEEDFB1, 0xCDBE2C45, 0x3FD5AF46,
-	0x7198540D, 0x83F3D70E, 0x90A324FA, 0x62C8A7F9,
-	0xB602C312, 0x44694011, 0x5739B3E5, 0xA55230E6,
-	0xFB410CC2, 0x092A8FC1, 0x1A7A7C35, 0xE811FF36,
-	0x3CDB9BDD, 0xCEB018DE, 0xDDE0EB2A, 0x2F8B6829,
-	0x82F63B78, 0x709DB87B, 0x63CD4B8F, 0x91A6C88C,
-	0x456CAC67, 0xB7072F64, 0xA457DC90, 0x563C5F93,
-	0x082F63B7, 0xFA44E0B4, 0xE9141340, 0x1B7F9043,
-	0xCFB5F4A8, 0x3DDE77AB, 0x2E8E845F, 0xDCE5075C,
-	0x92A8FC17, 0x60C37F14, 0x73938CE0, 0x81F80FE3,
-	0x55326B08, 0xA759E80B, 0xB4091BFF, 0x466298FC,
-	0x1871A4D8, 0xEA1A27DB, 0xF94AD42F, 0x0B21572C,
-	0xDFEB33C7, 0x2D80B0C4, 0x3ED04330, 0xCCBBC033,
-	0xA24BB5A6, 0x502036A5, 0x4370C551, 0xB11B4652,
-	0x65D122B9, 0x97BAA1BA, 0x84EA524E, 0x7681D14D,
-	0x2892ED69, 0xDAF96E6A, 0xC9A99D9E, 0x3BC21E9D,
-	0xEF087A76, 0x1D63F975, 0x0E330A81, 0xFC588982,
-	0xB21572C9, 0x407EF1CA, 0x532E023E, 0xA145813D,
-	0x758FE5D6, 0x87E466D5, 0x94B49521, 0x66DF1622,
-	0x38CC2A06, 0xCAA7A905, 0xD9F75AF1, 0x2B9CD9F2,
-	0xFF56BD19, 0x0D3D3E1A, 0x1E6DCDEE, 0xEC064EED,
-	0xC38D26C4, 0x31E6A5C7, 0x22B65633, 0xD0DDD530,
-	0x0417B1DB, 0xF67C32D8, 0xE52CC12C, 0x1747422F,
-	0x49547E0B, 0xBB3FFD08, 0xA86F0EFC, 0x5A048DFF,
-	0x8ECEE914, 0x7CA56A17, 0x6FF599E3, 0x9D9E1AE0,
-	0xD3D3E1AB, 0x21B862A8, 0x32E8915C, 0xC083125F,
-	0x144976B4, 0xE622F5B7, 0xF5720643, 0x07198540,
-	0x590AB964, 0xAB613A67, 0xB831C993, 0x4A5A4A90,
-	0x9E902E7B, 0x6CFBAD78, 0x7FAB5E8C, 0x8DC0DD8F,
-	0xE330A81A, 0x115B2B19, 0x020BD8ED, 0xF0605BEE,
-	0x24AA3F05, 0xD6C1BC06, 0xC5914FF2, 0x37FACCF1,
-	0x69E9F0D5, 0x9B8273D6, 0x88D28022, 0x7AB90321,
-	0xAE7367CA, 0x5C18E4C9, 0x4F48173D, 0xBD23943E,
-	0xF36E6F75, 0x0105EC76, 0x12551F82, 0xE03E9C81,
-	0x34F4F86A, 0xC69F7B69, 0xD5CF889D, 0x27A40B9E,
-	0x79B737BA, 0x8BDCB4B9, 0x988C474D, 0x6AE7C44E,
-	0xBE2DA0A5, 0x4C4623A6, 0x5F16D052, 0xAD7D5351,
-};
-
-__u32 sctp_start_cksum(__u8 *buffer, __u16 length)
-{
-	__u32 crc32 = ~(__u32) 0;
-	__u32 i;
-
-	/* Optimize this routine to be SCTP specific, knowing how
-	 * to skip the checksum field of the SCTP header.
-	 */
-
-	/* Calculate CRC up to the checksum. */
-	for (i = 0; i < (sizeof(struct sctphdr) - sizeof(__u32)); i++)
-		CRC32C(crc32, buffer[i]);
-
-	/* Skip checksum field of the header. */
-	for (i = 0; i < sizeof(__u32); i++)
-		CRC32C(crc32, 0);
-
-	/* Calculate the rest of the CRC. */
-	for (i = sizeof(struct sctphdr); i < length ; i++)
-		CRC32C(crc32, buffer[i]);
-
-	return crc32;
-}
-
-__u32 sctp_update_cksum(__u8 *buffer, __u16 length, __u32 crc32)
-{
-	__u32 i;
-
-	for (i = 0; i < length ; i++)
-		CRC32C(crc32, buffer[i]);
-
-	return crc32;
-}
-
-#if 0
-__u32 sctp_update_copy_cksum(__u8 *to, __u8 *from, __u16 length, __u32 crc32)
-{
-	__u32 i;
-	__u32 *_to = (__u32 *)to;
-	__u32 *_from = (__u32 *)from;
-
-	for (i = 0; i < (length/4); i++) {
-		_to[i] = _from[i];
-		CRC32C(crc32, from[i*4]);
-		CRC32C(crc32, from[i*4+1]);
-		CRC32C(crc32, from[i*4+2]);
-		CRC32C(crc32, from[i*4+3]);
-	}
-
-	return crc32;
-}
-#endif  /*  0  */
-
-__u32 sctp_end_cksum(__u32 crc32)
-{
-	__u32 result;
-	__u8 byte0, byte1, byte2, byte3;
-
-	result = ~crc32;
-
-	/*  result  now holds the negated polynomial remainder;
-	 *  since the table and algorithm is "reflected" [williams95].
-	 *  That is,  result has the same value as if we mapped the message
-	 *  to a polyomial, computed the host-bit-order polynomial
-	 *  remainder, performed final negation, then did an end-for-end
-	 *  bit-reversal.
-	 *  Note that a 32-bit bit-reversal is identical to four inplace
-	 *  8-bit reversals followed by an end-for-end byteswap.
-	 *  In other words, the bytes of each bit are in the right order,
-	 *  but the bytes have been byteswapped.  So we now do an explicit
-	 *  byteswap.  On a little-endian machine, this byteswap and
-	 *  the final ntohl cancel out and could be elided.
-	 */
-	byte0 = result & 0xff;
-	byte1 = (result>>8) & 0xff;
-	byte2 = (result>>16) & 0xff;
-	byte3 = (result>>24) & 0xff;
-
-	crc32 = ((byte0 << 24) |
-		 (byte1 << 16) |
-		 (byte2 << 8)  |
-		 byte3);
-	return crc32;
-}
diff --git a/net/sctp/input.c b/net/sctp/input.c
index 91ae463b079..b08c7cb5c9d 100644
--- a/net/sctp/input.c
+++ b/net/sctp/input.c
@@ -60,6 +60,7 @@
 #include <net/xfrm.h>
 #include <net/sctp/sctp.h>
 #include <net/sctp/sm.h>
+#include <net/sctp/checksum.h>
 
 /* Forward declarations for internal helpers. */
 static int sctp_rcv_ootb(struct sk_buff *);
diff --git a/net/sctp/output.c b/net/sctp/output.c
index 847639d542c..5e811b91f21 100644
--- a/net/sctp/output.c
+++ b/net/sctp/output.c
@@ -60,6 +60,7 @@
 
 #include <net/sctp/sctp.h>
 #include <net/sctp/sm.h>
+#include <net/sctp/checksum.h>
 
 /* Forward declarations for private helpers. */
 static sctp_xmit_t sctp_packet_append_data(struct sctp_packet *packet,
-- 
cgit v1.2.3


From 8e138e7949490eebdccbd65b1f660a0488149a6b Mon Sep 17 00:00:00 2001
From: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Date: Mon, 17 Dec 2007 10:07:44 -0200
Subject: [CCID3]: Use a function to update p_inv, and p is never used

This patch
 1) concentrates previously scattered computation of p_inv into one function;
 2) removes the `p' element of the CCID3 RX sock (it is redundant);
 3) makes the tfrc_rx_info structure standalone, only used on demand.

Signed-off-by: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Signed-off-by: Ian McDonald <ian.mcdonald@jandi.co.nz>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/dccp/ccids/ccid3.c | 11 ++++++++---
 net/dccp/ccids/ccid3.h |  8 ++++----
 2 files changed, 12 insertions(+), 7 deletions(-)

(limited to 'net')

diff --git a/net/dccp/ccids/ccid3.c b/net/dccp/ccids/ccid3.c
index cd9b9ffe2ec..e31560daa0b 100644
--- a/net/dccp/ccids/ccid3.c
+++ b/net/dccp/ccids/ccid3.c
@@ -917,6 +917,7 @@ static int ccid3_hc_rx_getsockopt(struct sock *sk, const int optname, int len,
 				  u32 __user *optval, int __user *optlen)
 {
 	const struct ccid3_hc_rx_sock *hcrx;
+	struct tfrc_rx_info rx_info;
 	const void *val;
 
 	/* Listen socks doesn't have a private CCID block */
@@ -926,10 +927,14 @@ static int ccid3_hc_rx_getsockopt(struct sock *sk, const int optname, int len,
 	hcrx = ccid3_hc_rx_sk(sk);
 	switch (optname) {
 	case DCCP_SOCKOPT_CCID_RX_INFO:
-		if (len < sizeof(hcrx->ccid3hcrx_tfrc))
+		if (len < sizeof(rx_info))
 			return -EINVAL;
-		len = sizeof(hcrx->ccid3hcrx_tfrc);
-		val = &hcrx->ccid3hcrx_tfrc;
+		rx_info.tfrcrx_x_recv = hcrx->ccid3hcrx_x_recv;
+		rx_info.tfrcrx_rtt    = hcrx->ccid3hcrx_rtt;
+		rx_info.tfrcrx_p      = hcrx->ccid3hcrx_pinv == 0 ? ~0U :
+					   scaled_div(1, hcrx->ccid3hcrx_pinv);
+		len = sizeof(rx_info);
+		val = &rx_info;
 		break;
 	default:
 		return -ENOPROTOOPT;
diff --git a/net/dccp/ccids/ccid3.h b/net/dccp/ccids/ccid3.h
index e9f6ff4f055..49ca32bd7e7 100644
--- a/net/dccp/ccids/ccid3.h
+++ b/net/dccp/ccids/ccid3.h
@@ -139,6 +139,8 @@ enum ccid3_hc_rx_states {
  *  @ccid3hcrx_last_counter  -  Tracks window counter (RFC 4342, 8.1)
  *  @ccid3hcrx_state  -  Receiver state, one of %ccid3_hc_rx_states
  *  @ccid3hcrx_bytes_recv  -  Total sum of DCCP payload bytes
+ *  @ccid3hcrx_x_recv  -  Receiver estimate of send rate (RFC 3448, sec. 4.3)
+ *  @ccid3hcrx_rtt  -  Receiver estimate of RTT
  *  @ccid3hcrx_tstamp_last_feedback  -  Time at which last feedback was sent
  *  @ccid3hcrx_tstamp_last_ack  -  Time at which last feedback was sent
  *  @ccid3hcrx_hist  -  Packet history (loss detection + RTT sampling)
@@ -147,13 +149,11 @@ enum ccid3_hc_rx_states {
  *  @ccid3hcrx_pinv  -  Inverse of Loss Event Rate (RFC 4342, sec. 8.5)
  */
 struct ccid3_hc_rx_sock {
-	struct tfrc_rx_info		ccid3hcrx_tfrc;
-#define ccid3hcrx_x_recv		ccid3hcrx_tfrc.tfrcrx_x_recv
-#define ccid3hcrx_rtt			ccid3hcrx_tfrc.tfrcrx_rtt
-#define ccid3hcrx_p			ccid3hcrx_tfrc.tfrcrx_p
 	u8				ccid3hcrx_last_counter:4;
 	enum ccid3_hc_rx_states		ccid3hcrx_state:8;
 	u32				ccid3hcrx_bytes_recv;
+	u32				ccid3hcrx_x_recv;
+	u32				ccid3hcrx_rtt;
 	ktime_t				ccid3hcrx_tstamp_last_feedback;
 	struct tfrc_rx_hist		ccid3hcrx_hist;
 	struct tfrc_loss_hist		ccid3hcrx_li_hist;
-- 
cgit v1.2.3


From 5bd370a63daf62bb5520c258f04e91a4d9d274dd Mon Sep 17 00:00:00 2001
From: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Date: Mon, 17 Dec 2007 10:25:06 -0200
Subject: [CCID3]: Remove two irrelevant states in TX feedback handling

 * the NO_SENT state is only triggered in bidirectional mode,
   costing unnecessary processing.
 * the TERM (terminating) state is irrelevant.

Signed-off-by: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Signed-off-by: Ian McDonald <ian.mcdonald@jandi.co.nz>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/dccp/ccids/ccid3.c | 173 ++++++++++++++++++++++++-------------------------
 1 file changed, 84 insertions(+), 89 deletions(-)

(limited to 'net')

diff --git a/net/dccp/ccids/ccid3.c b/net/dccp/ccids/ccid3.c
index e31560daa0b..a93a556ad03 100644
--- a/net/dccp/ccids/ccid3.c
+++ b/net/dccp/ccids/ccid3.c
@@ -402,112 +402,107 @@ static void ccid3_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb)
 	if (!(DCCP_SKB_CB(skb)->dccpd_type == DCCP_PKT_ACK ||
 	      DCCP_SKB_CB(skb)->dccpd_type == DCCP_PKT_DATAACK))
 		return;
+	/* ... and only in the established state */
+	if (hctx->ccid3hctx_state != TFRC_SSTATE_FBACK &&
+	    hctx->ccid3hctx_state != TFRC_SSTATE_NO_FBACK)
+		return;
 
 	opt_recv = &hctx->ccid3hctx_options_received;
+	now = ktime_get_real();
 
-	switch (hctx->ccid3hctx_state) {
-	case TFRC_SSTATE_NO_FBACK:
-	case TFRC_SSTATE_FBACK:
-		now = ktime_get_real();
-
-		/* estimate RTT from history if ACK number is valid */
-		r_sample = tfrc_tx_hist_rtt(hctx->ccid3hctx_hist,
-					    DCCP_SKB_CB(skb)->dccpd_ack_seq, now);
-		if (r_sample == 0) {
-			DCCP_WARN("%s(%p): %s with bogus ACK-%llu\n", dccp_role(sk), sk,
-				  dccp_packet_name(DCCP_SKB_CB(skb)->dccpd_type),
-				  (unsigned long long)DCCP_SKB_CB(skb)->dccpd_ack_seq);
-			return;
-		}
+	/* Estimate RTT from history if ACK number is valid */
+	r_sample = tfrc_tx_hist_rtt(hctx->ccid3hctx_hist,
+				    DCCP_SKB_CB(skb)->dccpd_ack_seq, now);
+	if (r_sample == 0) {
+		DCCP_WARN("%s(%p): %s with bogus ACK-%llu\n", dccp_role(sk), sk,
+			  dccp_packet_name(DCCP_SKB_CB(skb)->dccpd_type),
+			  (unsigned long long)DCCP_SKB_CB(skb)->dccpd_ack_seq);
+		return;
+	}
+
+	/* Update receive rate in units of 64 * bytes/second */
+	hctx->ccid3hctx_x_recv = opt_recv->ccid3or_receive_rate;
+	hctx->ccid3hctx_x_recv <<= 6;
 
-		/* Update receive rate in units of 64 * bytes/second */
-		hctx->ccid3hctx_x_recv = opt_recv->ccid3or_receive_rate;
-		hctx->ccid3hctx_x_recv <<= 6;
+	/* Update loss event rate (which is scaled by 1e6) */
+	pinv = opt_recv->ccid3or_loss_event_rate;
+	if (pinv == ~0U || pinv == 0)	       /* see RFC 4342, 8.5   */
+		hctx->ccid3hctx_p = 0;
+	else				       /* can not exceed 100% */
+		hctx->ccid3hctx_p = 1000000 / pinv;
+	/*
+	 * Validate new RTT sample and update moving average
+	 */
+	r_sample = dccp_sample_rtt(sk, r_sample);
+	hctx->ccid3hctx_rtt = tfrc_ewma(hctx->ccid3hctx_rtt, r_sample, 9);
 
-		/* Update loss event rate */
-		pinv = opt_recv->ccid3or_loss_event_rate;
-		if (pinv == ~0U || pinv == 0)	       /* see RFC 4342, 8.5   */
-			hctx->ccid3hctx_p = 0;
-		else				       /* can not exceed 100% */
-			hctx->ccid3hctx_p = 1000000 / pinv;
+	if (hctx->ccid3hctx_state == TFRC_SSTATE_NO_FBACK) {
 		/*
-		 * Validate new RTT sample and update moving average
+		 * Larger Initial Windows [RFC 4342, sec. 5]
 		 */
-		r_sample = dccp_sample_rtt(sk, r_sample);
-		hctx->ccid3hctx_rtt = tfrc_ewma(hctx->ccid3hctx_rtt, r_sample, 9);
+		hctx->ccid3hctx_x    = rfc3390_initial_rate(sk);
+		hctx->ccid3hctx_t_ld = now;
 
-		if (hctx->ccid3hctx_state == TFRC_SSTATE_NO_FBACK) {
-			/*
-			 * Larger Initial Windows [RFC 4342, sec. 5]
-			 */
-			hctx->ccid3hctx_x    = rfc3390_initial_rate(sk);
-			hctx->ccid3hctx_t_ld = now;
+		ccid3_update_send_interval(hctx);
 
-			ccid3_update_send_interval(hctx);
+		ccid3_pr_debug("%s(%p), s=%u, MSS=%u, "
+			       "R_sample=%uus, X=%u\n", dccp_role(sk),
+			       sk, hctx->ccid3hctx_s,
+			       dccp_sk(sk)->dccps_mss_cache, r_sample,
+			       (unsigned)(hctx->ccid3hctx_x >> 6));
 
-			ccid3_pr_debug("%s(%p), s=%u, MSS=%u, "
-				       "R_sample=%uus, X=%u\n", dccp_role(sk),
-				       sk, hctx->ccid3hctx_s,
-				       dccp_sk(sk)->dccps_mss_cache, r_sample,
-				       (unsigned)(hctx->ccid3hctx_x >> 6));
+		ccid3_hc_tx_set_state(sk, TFRC_SSTATE_FBACK);
+	} else {
 
-			ccid3_hc_tx_set_state(sk, TFRC_SSTATE_FBACK);
-		} else {
+		/* Update sending rate (step 4 of [RFC 3448, 4.3]) */
+		if (hctx->ccid3hctx_p > 0)
+			hctx->ccid3hctx_x_calc =
+				tfrc_calc_x(hctx->ccid3hctx_s,
+					    hctx->ccid3hctx_rtt,
+					    hctx->ccid3hctx_p);
+		ccid3_hc_tx_update_x(sk, &now);
 
-			/* Update sending rate (step 4 of [RFC 3448, 4.3]) */
-			if (hctx->ccid3hctx_p > 0)
-				hctx->ccid3hctx_x_calc =
-					tfrc_calc_x(hctx->ccid3hctx_s,
-						    hctx->ccid3hctx_rtt,
-						    hctx->ccid3hctx_p);
-			ccid3_hc_tx_update_x(sk, &now);
-
-			ccid3_pr_debug("%s(%p), RTT=%uus (sample=%uus), s=%u, "
-				       "p=%u, X_calc=%u, X_recv=%u, X=%u\n",
-				       dccp_role(sk),
-				       sk, hctx->ccid3hctx_rtt, r_sample,
-				       hctx->ccid3hctx_s, hctx->ccid3hctx_p,
-				       hctx->ccid3hctx_x_calc,
-				       (unsigned)(hctx->ccid3hctx_x_recv >> 6),
-				       (unsigned)(hctx->ccid3hctx_x >> 6));
-		}
+		ccid3_pr_debug("%s(%p), RTT=%uus (sample=%uus), s=%u, "
+			       "p=%u, X_calc=%u, X_recv=%u, X=%u\n",
+			       dccp_role(sk),
+			       sk, hctx->ccid3hctx_rtt, r_sample,
+			       hctx->ccid3hctx_s, hctx->ccid3hctx_p,
+			       hctx->ccid3hctx_x_calc,
+			       (unsigned)(hctx->ccid3hctx_x_recv >> 6),
+			       (unsigned)(hctx->ccid3hctx_x >> 6));
+	}
 
-		/* unschedule no feedback timer */
-		sk_stop_timer(sk, &hctx->ccid3hctx_no_feedback_timer);
+	/* unschedule no feedback timer */
+	sk_stop_timer(sk, &hctx->ccid3hctx_no_feedback_timer);
 
-		/*
-		 * As we have calculated new ipi, delta, t_nom it is possible
-		 * that we now can send a packet, so wake up dccp_wait_for_ccid
-		 */
-		sk->sk_write_space(sk);
+	/*
+	 * As we have calculated new ipi, delta, t_nom it is possible
+	 * that we now can send a packet, so wake up dccp_wait_for_ccid
+	 */
+	sk->sk_write_space(sk);
 
-		/*
-		 * Update timeout interval for the nofeedback timer.
-		 * We use a configuration option to increase the lower bound.
-		 * This can help avoid triggering the nofeedback timer too
-		 * often ('spinning') on LANs with small RTTs.
-		 */
-		hctx->ccid3hctx_t_rto = max_t(u32, 4 * hctx->ccid3hctx_rtt,
-						   CONFIG_IP_DCCP_CCID3_RTO *
-						   (USEC_PER_SEC/1000));
-		/*
-		 * Schedule no feedback timer to expire in
-		 * max(t_RTO, 2 * s/X)  =  max(t_RTO, 2 * t_ipi)
-		 */
-		t_nfb = max(hctx->ccid3hctx_t_rto, 2 * hctx->ccid3hctx_t_ipi);
+	/*
+	 * Update timeout interval for the nofeedback timer.
+	 * We use a configuration option to increase the lower bound.
+	 * This can help avoid triggering the nofeedback timer too
+	 * often ('spinning') on LANs with small RTTs.
+	 */
+	hctx->ccid3hctx_t_rto = max_t(u32, 4 * hctx->ccid3hctx_rtt,
+					   (CONFIG_IP_DCCP_CCID3_RTO *
+					    (USEC_PER_SEC / 1000)));
+	/*
+	 * Schedule no feedback timer to expire in
+	 * max(t_RTO, 2 * s/X)  =  max(t_RTO, 2 * t_ipi)
+	 */
+	t_nfb = max(hctx->ccid3hctx_t_rto, 2 * hctx->ccid3hctx_t_ipi);
 
-		ccid3_pr_debug("%s(%p), Scheduled no feedback timer to "
-			       "expire in %lu jiffies (%luus)\n",
-			       dccp_role(sk),
-			       sk, usecs_to_jiffies(t_nfb), t_nfb);
+	ccid3_pr_debug("%s(%p), Scheduled no feedback timer to "
+		       "expire in %lu jiffies (%luus)\n",
+		       dccp_role(sk),
+		       sk, usecs_to_jiffies(t_nfb), t_nfb);
 
-		sk_reset_timer(sk, &hctx->ccid3hctx_no_feedback_timer,
-				   jiffies + usecs_to_jiffies(t_nfb));
-		break;
-	case TFRC_SSTATE_NO_SENT:	/* fall through */
-	case TFRC_SSTATE_TERM:		/* ignore feedback when closing */
-		break;
-	}
+	sk_reset_timer(sk, &hctx->ccid3hctx_no_feedback_timer,
+			   jiffies + usecs_to_jiffies(t_nfb));
 }
 
 static int ccid3_hc_tx_parse_options(struct sock *sk, unsigned char option,
-- 
cgit v1.2.3


From d8d1252f744cb7cebd6ba3a4b7feec31ff23ccde Mon Sep 17 00:00:00 2001
From: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Date: Mon, 17 Dec 2007 12:48:47 -0200
Subject: [CCID3]: Implement rfc3448bis changes to feedback reception

This implements the algorithm to update the allowed sending rate X upon
receiving feedback packets, as described in draft rfc3448bis, 4.2/4.3.

Signed-off-by: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Signed-off-by: Ian McDonald <ian.mcdonald@jandi.co.nz>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/dccp/ccids/ccid3.c | 47 ++++++++++++++++++++++++++---------------------
 1 file changed, 26 insertions(+), 21 deletions(-)

(limited to 'net')

diff --git a/net/dccp/ccids/ccid3.c b/net/dccp/ccids/ccid3.c
index a93a556ad03..1156fef1747 100644
--- a/net/dccp/ccids/ccid3.c
+++ b/net/dccp/ccids/ccid3.c
@@ -429,40 +429,46 @@ static void ccid3_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb)
 	if (pinv == ~0U || pinv == 0)	       /* see RFC 4342, 8.5   */
 		hctx->ccid3hctx_p = 0;
 	else				       /* can not exceed 100% */
-		hctx->ccid3hctx_p = 1000000 / pinv;
+		hctx->ccid3hctx_p = scaled_div(1, pinv);
 	/*
 	 * Validate new RTT sample and update moving average
 	 */
 	r_sample = dccp_sample_rtt(sk, r_sample);
 	hctx->ccid3hctx_rtt = tfrc_ewma(hctx->ccid3hctx_rtt, r_sample, 9);
-
+	/*
+	 * Update allowed sending rate X as per draft rfc3448bis-00, 4.2/3
+	 */
 	if (hctx->ccid3hctx_state == TFRC_SSTATE_NO_FBACK) {
-		/*
-		 * Larger Initial Windows [RFC 4342, sec. 5]
-		 */
-		hctx->ccid3hctx_x    = rfc3390_initial_rate(sk);
-		hctx->ccid3hctx_t_ld = now;
+		ccid3_hc_tx_set_state(sk, TFRC_SSTATE_FBACK);
 
-		ccid3_update_send_interval(hctx);
+		if (hctx->ccid3hctx_t_rto == 0) {
+			/*
+			 * Initial feedback packet: Larger Initial Windows (4.2)
+			 */
+			hctx->ccid3hctx_x    = rfc3390_initial_rate(sk);
+			hctx->ccid3hctx_t_ld = now;
 
-		ccid3_pr_debug("%s(%p), s=%u, MSS=%u, "
-			       "R_sample=%uus, X=%u\n", dccp_role(sk),
-			       sk, hctx->ccid3hctx_s,
-			       dccp_sk(sk)->dccps_mss_cache, r_sample,
-			       (unsigned)(hctx->ccid3hctx_x >> 6));
+			ccid3_update_send_interval(hctx);
 
-		ccid3_hc_tx_set_state(sk, TFRC_SSTATE_FBACK);
-	} else {
+			goto done_computing_x;
+		} else if (hctx->ccid3hctx_p == 0) {
+			/*
+			 * First feedback after nofeedback timer expiry (4.3)
+			 */
+			goto done_computing_x;
+		}
+	}
 
-		/* Update sending rate (step 4 of [RFC 3448, 4.3]) */
-		if (hctx->ccid3hctx_p > 0)
-			hctx->ccid3hctx_x_calc =
+	/* Update sending rate (step 4 of [RFC 3448, 4.3]) */
+	if (hctx->ccid3hctx_p > 0)
+		hctx->ccid3hctx_x_calc =
 				tfrc_calc_x(hctx->ccid3hctx_s,
 					    hctx->ccid3hctx_rtt,
 					    hctx->ccid3hctx_p);
-		ccid3_hc_tx_update_x(sk, &now);
+	ccid3_hc_tx_update_x(sk, &now);
 
-		ccid3_pr_debug("%s(%p), RTT=%uus (sample=%uus), s=%u, "
+done_computing_x:
+	ccid3_pr_debug("%s(%p), RTT=%uus (sample=%uus), s=%u, "
 			       "p=%u, X_calc=%u, X_recv=%u, X=%u\n",
 			       dccp_role(sk),
 			       sk, hctx->ccid3hctx_rtt, r_sample,
@@ -470,7 +476,6 @@ static void ccid3_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb)
 			       hctx->ccid3hctx_x_calc,
 			       (unsigned)(hctx->ccid3hctx_x_recv >> 6),
 			       (unsigned)(hctx->ccid3hctx_x >> 6));
-	}
 
 	/* unschedule no feedback timer */
 	sk_stop_timer(sk, &hctx->ccid3hctx_no_feedback_timer);
-- 
cgit v1.2.3


From 52515e77a7a69867c479db4c9efb6be832b82179 Mon Sep 17 00:00:00 2001
From: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Date: Mon, 17 Dec 2007 12:57:43 -0200
Subject: [CCID3]: Nofeedback timer according to rfc3448bis

This implements the changes to the nofeedback timer handling suggested
in draft rfc3448bis00, section 4.4. In particular, these changes mean:

 * better handling of the lossless case (p == 0)
 * the timestamp for computing t_ld becomes obsolete
 * much more recent document (RFC 3448 is almost 5 years old)
 * concepts in rfc3448bis arose from a real, working implementation
   (cf. sec. 12)

Signed-off-by: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Signed-off-by: Ian McDonald <ian.mcdonald@jandi.co.nz>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/dccp/ccids/ccid3.c | 63 +++++++++++++++++++++++---------------------------
 1 file changed, 29 insertions(+), 34 deletions(-)

(limited to 'net')

diff --git a/net/dccp/ccids/ccid3.c b/net/dccp/ccids/ccid3.c
index 1156fef1747..d292f23c002 100644
--- a/net/dccp/ccids/ccid3.c
+++ b/net/dccp/ccids/ccid3.c
@@ -131,12 +131,11 @@ static u32 ccid3_hc_tx_idle_rtt(struct ccid3_hc_tx_sock *hctx, ktime_t now)
  *
  */
 static void ccid3_hc_tx_update_x(struct sock *sk, ktime_t *stamp)
-
 {
 	struct ccid3_hc_tx_sock *hctx = ccid3_hc_tx_sk(sk);
 	__u64 min_rate = 2 * hctx->ccid3hctx_x_recv;
 	const  __u64 old_x = hctx->ccid3hctx_x;
-	ktime_t now = stamp? *stamp : ktime_get_real();
+	ktime_t now = stamp ? *stamp : ktime_get_real();
 
 	/*
 	 * Handle IDLE periods: do not reduce below RFC3390 initial sending rate
@@ -230,27 +229,27 @@ static void ccid3_hc_tx_no_feedback_timer(unsigned long data)
 	ccid3_pr_debug("%s(%p, state=%s) - entry \n", dccp_role(sk), sk,
 		       ccid3_tx_state_name(hctx->ccid3hctx_state));
 
-	switch (hctx->ccid3hctx_state) {
-	case TFRC_SSTATE_NO_FBACK:
-		/* RFC 3448, 4.4: Halve send rate directly */
+	if (hctx->ccid3hctx_state == TFRC_SSTATE_FBACK)
+		ccid3_hc_tx_set_state(sk, TFRC_SSTATE_NO_FBACK);
+	else if (hctx->ccid3hctx_state != TFRC_SSTATE_NO_FBACK)
+		goto out;
+
+	/*
+	 * Determine new allowed sending rate X as per draft rfc3448bis-00, 4.4
+	 */
+	if (hctx->ccid3hctx_t_rto == 0 ||	/* no feedback received yet */
+	    hctx->ccid3hctx_p == 0) {
+
+		/* halve send rate directly */
 		hctx->ccid3hctx_x = max(hctx->ccid3hctx_x / 2,
 					(((__u64)hctx->ccid3hctx_s) << 6) /
 								    TFRC_T_MBI);
-
-		ccid3_pr_debug("%s(%p, state=%s), updated tx rate to %u "
-			       "bytes/s\n", dccp_role(sk), sk,
-			       ccid3_tx_state_name(hctx->ccid3hctx_state),
-			       (unsigned)(hctx->ccid3hctx_x >> 6));
-		/* The value of R is still undefined and so we can not recompute
-		 * the timeout value. Keep initial value as per [RFC 4342, 5]. */
-		t_nfb = TFRC_INITIAL_TIMEOUT;
 		ccid3_update_send_interval(hctx);
-		break;
-	case TFRC_SSTATE_FBACK:
+	} else {
 		/*
-		 *  Modify the cached value of X_recv [RFC 3448, 4.4]
+		 *  Modify the cached value of X_recv
 		 *
-		 *  If (p == 0 || X_calc > 2 * X_recv)
+		 *  If (X_calc > 2 * X_recv)
 		 *    X_recv = max(X_recv / 2, s / (2 * t_mbi));
 		 *  Else
 		 *    X_recv = X_calc / 4;
@@ -259,32 +258,28 @@ static void ccid3_hc_tx_no_feedback_timer(unsigned long data)
 		 */
 		BUG_ON(hctx->ccid3hctx_p && !hctx->ccid3hctx_x_calc);
 
-		if (hctx->ccid3hctx_p == 0 ||
-		    (hctx->ccid3hctx_x_calc > (hctx->ccid3hctx_x_recv >> 5))) {
-
+		if (hctx->ccid3hctx_x_calc > (hctx->ccid3hctx_x_recv >> 5))
 			hctx->ccid3hctx_x_recv =
 				max(hctx->ccid3hctx_x_recv / 2,
 				    (((__u64)hctx->ccid3hctx_s) << 6) /
 							      (2 * TFRC_T_MBI));
-		} else {
+		else {
 			hctx->ccid3hctx_x_recv = hctx->ccid3hctx_x_calc;
 			hctx->ccid3hctx_x_recv <<= 4;
 		}
-		/* Now recalculate X [RFC 3448, 4.3, step (4)] */
 		ccid3_hc_tx_update_x(sk, NULL);
-		/*
-		 * Schedule no feedback timer to expire in
-		 * max(t_RTO, 2 * s/X)  =  max(t_RTO, 2 * t_ipi)
-		 * See comments in packet_recv() regarding the value of t_RTO.
-		 */
-		t_nfb = max(hctx->ccid3hctx_t_rto, 2 * hctx->ccid3hctx_t_ipi);
-		break;
-	case TFRC_SSTATE_NO_SENT:
-		DCCP_BUG("%s(%p) - Illegal state NO_SENT", dccp_role(sk), sk);
-		/* fall through */
-	case TFRC_SSTATE_TERM:
-		goto out;
 	}
+	ccid3_pr_debug("Reduced X to %llu/64 bytes/sec\n",
+			(unsigned long long)hctx->ccid3hctx_x);
+
+	/*
+	 * Set new timeout for the nofeedback timer.
+	 * See comments in packet_recv() regarding the value of t_RTO.
+	 */
+	if (unlikely(hctx->ccid3hctx_t_rto == 0))	/* no feedback yet */
+		t_nfb = TFRC_INITIAL_TIMEOUT;
+	else
+		t_nfb = max(hctx->ccid3hctx_t_rto, 2 * hctx->ccid3hctx_t_ipi);
 
 restart_timer:
 	sk_reset_timer(sk, &hctx->ccid3hctx_no_feedback_timer,
-- 
cgit v1.2.3


From a07a5a86d091699fd5e791765b8a79e6b1ef96cb Mon Sep 17 00:00:00 2001
From: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Date: Mon, 17 Dec 2007 12:58:04 -0200
Subject: [DCCP]: Remove unused inline function

The function follows48(), which is a special-case of dccp_delta_seqno(),
is nowhere used in the DCCP code, thus removed by this patch.

Signed-off-by: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Signed-off-by: Ian McDonald <ian.mcdonald@jandi.co.nz>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/dccp/dccp.h | 6 ------
 1 file changed, 6 deletions(-)

(limited to 'net')

diff --git a/net/dccp/dccp.h b/net/dccp/dccp.h
index b138e2019d7..ebe59d98721 100644
--- a/net/dccp/dccp.h
+++ b/net/dccp/dccp.h
@@ -153,12 +153,6 @@ static inline u64 max48(const u64 seq1, const u64 seq2)
 	return after48(seq1, seq2) ? seq1 : seq2;
 }
 
-/* is seq1 next seqno after seq2 */
-static inline int follows48(const u64 seq1, const u64 seq2)
-{
-	return dccp_delta_seqno(seq2, seq1) == 1;
-}
-
 enum {
 	DCCP_MIB_NUM = 0,
 	DCCP_MIB_ACTIVEOPENS,			/* ActiveOpens */
-- 
cgit v1.2.3


From c49e5ea322c2fb43f430abb3c4a49eae1394287e Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Tue, 11 Dec 2007 21:33:42 +0100
Subject: mac80211: conditionally include timestamp in radiotap information

This makes mac80211 include the low-level MAC timestamp
in the radiotap header if the driver indicated (by a new
RX flag) that the timestamp is valid.

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/mac80211/rx.c | 48 ++++++++++++++++++++++++++++++++----------------
 1 file changed, 32 insertions(+), 16 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index c542fc1c983..9cd59ecbcd6 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -79,8 +79,9 @@ ieee80211_rx_monitor(struct ieee80211_local *local, struct sk_buff *origskb,
 	struct ieee80211_sub_if_data *sdata;
 	struct ieee80211_rate *rate;
 	int needed_headroom = 0;
-	struct ieee80211_rtap_hdr {
-		struct ieee80211_radiotap_header hdr;
+	struct ieee80211_radiotap_header *rthdr;
+	__le64 *rttsft = NULL;
+	struct ieee80211_rtap_fixed_data {
 		u8 flags;
 		u8 rate;
 		__le16 chan_freq;
@@ -88,7 +89,7 @@ ieee80211_rx_monitor(struct ieee80211_local *local, struct sk_buff *origskb,
 		u8 antsignal;
 		u8 padding_for_rxflags;
 		__le16 rx_flags;
-	} __attribute__ ((packed)) *rthdr;
+	} __attribute__ ((packed)) *rtfixed;
 	struct sk_buff *skb, *skb2;
 	struct net_device *prev_dev = NULL;
 	int present_fcs_len = 0;
@@ -105,7 +106,8 @@ ieee80211_rx_monitor(struct ieee80211_local *local, struct sk_buff *origskb,
 	if (status->flag & RX_FLAG_RADIOTAP)
 		rtap_len = ieee80211_get_radiotap_len(origskb->data);
 	else
-		needed_headroom = sizeof(*rthdr);
+		/* room for radiotap header, always present fields and TSFT */
+		needed_headroom = sizeof(*rthdr) + sizeof(*rtfixed) + 8;
 
 	if (local->hw.flags & IEEE80211_HW_RX_INCLUDES_FCS)
 		present_fcs_len = FCS_LEN;
@@ -133,7 +135,7 @@ ieee80211_rx_monitor(struct ieee80211_local *local, struct sk_buff *origskb,
 		 * them allocate enough headroom to start with.
 		 */
 		if (skb_headroom(skb) < needed_headroom &&
-		    pskb_expand_head(skb, sizeof(*rthdr), 0, GFP_ATOMIC)) {
+		    pskb_expand_head(skb, needed_headroom, 0, GFP_ATOMIC)) {
 			dev_kfree_skb(skb);
 			return NULL;
 		}
@@ -152,42 +154,56 @@ ieee80211_rx_monitor(struct ieee80211_local *local, struct sk_buff *origskb,
 
 	/* if necessary, prepend radiotap information */
 	if (!(status->flag & RX_FLAG_RADIOTAP)) {
+		rtfixed = (void *) skb_push(skb, sizeof(*rtfixed));
+		rtap_len = sizeof(*rthdr) + sizeof(*rtfixed);
+		if (status->flag & RX_FLAG_TSFT) {
+			rttsft = (void *) skb_push(skb, sizeof(*rttsft));
+			rtap_len += 8;
+		}
 		rthdr = (void *) skb_push(skb, sizeof(*rthdr));
 		memset(rthdr, 0, sizeof(*rthdr));
-		rthdr->hdr.it_len = cpu_to_le16(sizeof(*rthdr));
-		rthdr->hdr.it_present =
+		memset(rtfixed, 0, sizeof(*rtfixed));
+		rthdr->it_present =
 			cpu_to_le32((1 << IEEE80211_RADIOTAP_FLAGS) |
 				    (1 << IEEE80211_RADIOTAP_RATE) |
 				    (1 << IEEE80211_RADIOTAP_CHANNEL) |
 				    (1 << IEEE80211_RADIOTAP_DB_ANTSIGNAL) |
 				    (1 << IEEE80211_RADIOTAP_RX_FLAGS));
-		rthdr->flags = local->hw.flags & IEEE80211_HW_RX_INCLUDES_FCS ?
-			       IEEE80211_RADIOTAP_F_FCS : 0;
+		rtfixed->flags = 0;
+		if (local->hw.flags & IEEE80211_HW_RX_INCLUDES_FCS)
+			rtfixed->flags |= IEEE80211_RADIOTAP_F_FCS;
+
+		if (rttsft) {
+			*rttsft = cpu_to_le64(status->mactime);
+			rthdr->it_present |=
+				cpu_to_le32(1 << IEEE80211_RADIOTAP_TSFT);
+		}
 
 		/* FIXME: when radiotap gets a 'bad PLCP' flag use it here */
-		rthdr->rx_flags = 0;
+		rtfixed->rx_flags = 0;
 		if (status->flag &
 		    (RX_FLAG_FAILED_FCS_CRC | RX_FLAG_FAILED_PLCP_CRC))
-			rthdr->rx_flags |=
+			rtfixed->rx_flags |=
 				cpu_to_le16(IEEE80211_RADIOTAP_F_RX_BADFCS);
 
 		rate = ieee80211_get_rate(local, status->phymode,
 					  status->rate);
 		if (rate)
-			rthdr->rate = rate->rate / 5;
+			rtfixed->rate = rate->rate / 5;
 
-		rthdr->chan_freq = cpu_to_le16(status->freq);
+		rtfixed->chan_freq = cpu_to_le16(status->freq);
 
 		if (status->phymode == MODE_IEEE80211A)
-			rthdr->chan_flags =
+			rtfixed->chan_flags =
 				cpu_to_le16(IEEE80211_CHAN_OFDM |
 					    IEEE80211_CHAN_5GHZ);
 		else
-			rthdr->chan_flags =
+			rtfixed->chan_flags =
 				cpu_to_le16(IEEE80211_CHAN_DYN |
 					    IEEE80211_CHAN_2GHZ);
 
-		rthdr->antsignal = status->ssi;
+		rtfixed->antsignal = status->ssi;
+		rthdr->it_len = cpu_to_le16(rtap_len);
 	}
 
 	skb_set_mac_header(skb, 0);
-- 
cgit v1.2.3


From 374fdfbc67837c1f4369eedb0f371ce3e6cce832 Mon Sep 17 00:00:00 2001
From: Dan Williams <dcbw@redhat.com>
Date: Wed, 12 Dec 2007 10:25:07 -0500
Subject: introduce WEXT scan capabilities

Introduce scan capabilities to WEXT so that userspace can do intelligent
things with scan behavior such as handling hidden SSIDs more gracefully.
If the driver reports a specific scan capability, the driver must
respect the options specified in the iw_scan_req structure when handling
the SIOCSIWSCAN call, unless it's mode or state does not allow it to do
so, in which case it must return an error.

This version switches to Dave Kilroy's suggestion of claiming unused
padding space for the scan_capa field.

Signed-off-by: Dan Williams <dcbw@redhat.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/mac80211/ieee80211_ioctl.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'net')

diff --git a/net/mac80211/ieee80211_ioctl.c b/net/mac80211/ieee80211_ioctl.c
index 161f3bdec41..04ddce76135 100644
--- a/net/mac80211/ieee80211_ioctl.c
+++ b/net/mac80211/ieee80211_ioctl.c
@@ -218,6 +218,8 @@ static int ieee80211_ioctl_giwrange(struct net_device *dev,
 	IW_EVENT_CAPA_SET(range->event_capa, SIOCGIWAP);
 	IW_EVENT_CAPA_SET(range->event_capa, SIOCGIWSCAN);
 
+	range->scan_capa |= IW_SCAN_CAPA_ESSID;
+
 	return 0;
 }
 
-- 
cgit v1.2.3


From ac8e27fd89ca16e02fa1f911480bc46d64edc9da Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Mon, 17 Dec 2007 21:45:52 -0800
Subject: [NETFILTER]: ip_tables: kill useless wrapper

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/netfilter/ip_tables.c | 8 +-------
 1 file changed, 1 insertion(+), 7 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c
index 64ffe57ef1b..a8f1a2a6c1d 100644
--- a/net/ipv4/netfilter/ip_tables.c
+++ b/net/ipv4/netfilter/ip_tables.c
@@ -1479,12 +1479,6 @@ struct compat_ipt_replace {
 	struct compat_ipt_entry	entries[0];
 };
 
-static inline int compat_copy_match_to_user(struct ipt_entry_match *m,
-		void __user **dstptr, compat_uint_t *size)
-{
-	return xt_compat_match_to_user(m, dstptr, size);
-}
-
 static int
 compat_copy_entry_to_user(struct ipt_entry *e, void __user **dstptr,
 			  compat_uint_t *size, struct xt_counters *counters,
@@ -1506,7 +1500,7 @@ compat_copy_entry_to_user(struct ipt_entry *e, void __user **dstptr,
 		goto out;
 
 	*dstptr += sizeof(struct compat_ipt_entry);
-	ret = IPT_MATCH_ITERATE(e, compat_copy_match_to_user, dstptr, size);
+	ret = IPT_MATCH_ITERATE(e, xt_compat_match_to_user, dstptr, size);
 	target_offset = e->target_offset - (origsize - *size);
 	if (ret)
 		goto out;
-- 
cgit v1.2.3


From 4b4782486d49547bc589b64acfb956bda05f0fca Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Mon, 17 Dec 2007 21:46:15 -0800
Subject: [NETFILTER]: ip_tables: reformat compat code

The compat code has some very odd formating, clean it up before porting
it to ip6_tables.

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/netfilter/ip_tables.c | 181 +++++++++++++++++++++--------------------
 1 file changed, 92 insertions(+), 89 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c
index a8f1a2a6c1d..7d24262331a 100644
--- a/net/ipv4/netfilter/ip_tables.c
+++ b/net/ipv4/netfilter/ip_tables.c
@@ -600,8 +600,8 @@ check_entry(struct ipt_entry *e, const char *name)
 }
 
 static inline int check_match(struct ipt_entry_match *m, const char *name,
-				const struct ipt_ip *ip, unsigned int hookmask,
-				unsigned int *i)
+			      const struct ipt_ip *ip,
+			      unsigned int hookmask, unsigned int *i)
 {
 	struct xt_match *match;
 	int ret;
@@ -624,10 +624,10 @@ static inline int check_match(struct ipt_entry_match *m, const char *name,
 
 static inline int
 find_check_match(struct ipt_entry_match *m,
-	    const char *name,
-	    const struct ipt_ip *ip,
-	    unsigned int hookmask,
-	    unsigned int *i)
+		 const char *name,
+		 const struct ipt_ip *ip,
+		 unsigned int hookmask,
+		 unsigned int *i)
 {
 	struct xt_match *match;
 	int ret;
@@ -663,8 +663,8 @@ static inline int check_target(struct ipt_entry *e, const char *name)
 			      name, e->comefrom, e->ip.proto,
 			      e->ip.invflags & IPT_INV_PROTO);
 	if (!ret && t->u.kernel.target->checkentry
-		   && !t->u.kernel.target->checkentry(name, e, target,
-						      t->data, e->comefrom)) {
+	    && !t->u.kernel.target->checkentry(name, e, target, t->data,
+					       e->comefrom)) {
 		duprintf("ip_tables: check failed for `%s'.\n",
 			 t->u.kernel.target->name);
 		ret = -EINVAL;
@@ -674,7 +674,7 @@ static inline int check_target(struct ipt_entry *e, const char *name)
 
 static inline int
 find_check_entry(struct ipt_entry *e, const char *name, unsigned int size,
-	    unsigned int *i)
+		 unsigned int *i)
 {
 	struct ipt_entry_target *t;
 	struct xt_target *target;
@@ -687,14 +687,14 @@ find_check_entry(struct ipt_entry *e, const char *name, unsigned int size,
 
 	j = 0;
 	ret = IPT_MATCH_ITERATE(e, find_check_match, name, &e->ip,
-							e->comefrom, &j);
+				e->comefrom, &j);
 	if (ret != 0)
 		goto cleanup_matches;
 
 	t = ipt_get_target(e);
 	target = try_then_request_module(xt_find_target(AF_INET,
-						     t->u.user.name,
-						     t->u.user.revision),
+							t->u.user.name,
+							t->u.user.revision),
 					 "ipt_%s", t->u.user.name);
 	if (IS_ERR(target) || !target) {
 		duprintf("find_check_entry: `%s' not found\n", t->u.user.name);
@@ -1020,7 +1020,7 @@ struct compat_delta {
 	short delta;
 };
 
-static struct compat_delta *compat_offsets = NULL;
+static struct compat_delta *compat_offsets;
 
 static int compat_add_offset(unsigned int offset, short delta)
 {
@@ -1046,7 +1046,7 @@ static void compat_flush_offsets(void)
 	struct compat_delta *tmp, *next;
 
 	if (compat_offsets) {
-		for(tmp = compat_offsets; tmp; tmp = next) {
+		for (tmp = compat_offsets; tmp; tmp = next) {
 			next = tmp->next;
 			kfree(tmp);
 		}
@@ -1059,7 +1059,7 @@ static short compat_calc_jump(unsigned int offset)
 	struct compat_delta *tmp;
 	short delta;
 
-	for(tmp = compat_offsets, delta = 0; tmp; tmp = tmp->next)
+	for (tmp = compat_offsets, delta = 0; tmp; tmp = tmp->next)
 		if (tmp->offset < offset)
 			delta += tmp->delta;
 	return delta;
@@ -1084,15 +1084,15 @@ static int compat_standard_to_user(void __user *dst, void *src)
 }
 
 static inline int
-compat_calc_match(struct ipt_entry_match *m, int * size)
+compat_calc_match(struct ipt_entry_match *m, int *size)
 {
 	*size += xt_compat_match_offset(m->u.kernel.match);
 	return 0;
 }
 
 static int compat_calc_entry(struct ipt_entry *e,
-		const struct xt_table_info *info,
-		void *base, struct xt_table_info *newinfo)
+			     const struct xt_table_info *info,
+			     void *base, struct xt_table_info *newinfo)
 {
 	struct ipt_entry_target *t;
 	unsigned int entry_offset;
@@ -1109,18 +1109,18 @@ static int compat_calc_entry(struct ipt_entry *e,
 		return ret;
 
 	for (i = 0; i < NF_INET_NUMHOOKS; i++) {
-		if (info->hook_entry[i] && (e < (struct ipt_entry *)
-				(base + info->hook_entry[i])))
+		if (info->hook_entry[i] &&
+		    (e < (struct ipt_entry *)(base + info->hook_entry[i])))
 			newinfo->hook_entry[i] -= off;
-		if (info->underflow[i] && (e < (struct ipt_entry *)
-				(base + info->underflow[i])))
+		if (info->underflow[i] &&
+		    (e < (struct ipt_entry *)(base + info->underflow[i])))
 			newinfo->underflow[i] -= off;
 	}
 	return 0;
 }
 
 static int compat_table_info(const struct xt_table_info *info,
-		struct xt_table_info *newinfo)
+			     struct xt_table_info *newinfo)
 {
 	void *loc_cpu_entry;
 
@@ -1132,7 +1132,8 @@ static int compat_table_info(const struct xt_table_info *info,
 	newinfo->initial_entries = 0;
 	loc_cpu_entry = info->entries[raw_smp_processor_id()];
 	return IPT_ENTRY_ITERATE(loc_cpu_entry, info->size,
-			compat_calc_entry, info, loc_cpu_entry, newinfo);
+				 compat_calc_entry, info, loc_cpu_entry,
+				 newinfo);
 }
 #endif
 
@@ -1157,7 +1158,7 @@ static int get_info(void __user *user, int *len, int compat)
 		xt_compat_lock(AF_INET);
 #endif
 	t = try_then_request_module(xt_find_table_lock(AF_INET, name),
-			"iptable_%s", name);
+				    "iptable_%s", name);
 	if (t && !IS_ERR(t)) {
 		struct ipt_getinfo info;
 		struct xt_table_info *private = t->private;
@@ -1167,14 +1168,14 @@ static int get_info(void __user *user, int *len, int compat)
 			struct xt_table_info tmp;
 			ret = compat_table_info(private, &tmp);
 			compat_flush_offsets();
-			private =  &tmp;
+			private = &tmp;
 		}
 #endif
 		info.valid_hooks = t->valid_hooks;
 		memcpy(info.hook_entry, private->hook_entry,
-				sizeof(info.hook_entry));
+		       sizeof(info.hook_entry));
 		memcpy(info.underflow, private->underflow,
-				sizeof(info.underflow));
+		       sizeof(info.underflow));
 		info.num_entries = private->number;
 		info.size = private->size;
 		strcpy(info.name, name);
@@ -1240,8 +1241,8 @@ get_entries(struct ipt_get_entries __user *uptr, int *len)
 
 static int
 __do_replace(const char *name, unsigned int valid_hooks,
-		struct xt_table_info *newinfo, unsigned int num_counters,
-		void __user *counters_ptr)
+	     struct xt_table_info *newinfo, unsigned int num_counters,
+	     void __user *counters_ptr)
 {
 	int ret;
 	struct xt_table *t;
@@ -1289,7 +1290,8 @@ __do_replace(const char *name, unsigned int valid_hooks,
 	get_counters(oldinfo, counters);
 	/* Decrease module usage counts and free resource */
 	loc_cpu_old_entry = oldinfo->entries[raw_smp_processor_id()];
-	IPT_ENTRY_ITERATE(loc_cpu_old_entry, oldinfo->size, cleanup_entry,NULL);
+	IPT_ENTRY_ITERATE(loc_cpu_old_entry, oldinfo->size, cleanup_entry,
+			  NULL);
 	xt_free_table_info(oldinfo);
 	if (copy_to_user(counters_ptr, counters,
 			 sizeof(struct xt_counters) * num_counters) != 0)
@@ -1346,9 +1348,8 @@ do_replace(void __user *user, unsigned int len)
 
 	duprintf("ip_tables: Translated table\n");
 
-	ret = __do_replace(tmp.name, tmp.valid_hooks,
-			      newinfo, tmp.num_counters,
-			      tmp.counters);
+	ret = __do_replace(tmp.name, tmp.valid_hooks, newinfo,
+			   tmp.num_counters, tmp.counters);
 	if (ret)
 		goto free_newinfo_untrans;
 	return 0;
@@ -1523,19 +1524,19 @@ out:
 
 static inline int
 compat_find_calc_match(struct ipt_entry_match *m,
-	    const char *name,
-	    const struct ipt_ip *ip,
-	    unsigned int hookmask,
-	    int *size, int *i)
+		       const char *name,
+		       const struct ipt_ip *ip,
+		       unsigned int hookmask,
+		       int *size, int *i)
 {
 	struct xt_match *match;
 
 	match = try_then_request_module(xt_find_match(AF_INET, m->u.user.name,
-						   m->u.user.revision),
+						      m->u.user.revision),
 					"ipt_%s", m->u.user.name);
 	if (IS_ERR(match) || !match) {
 		duprintf("compat_check_calc_match: `%s' not found\n",
-				m->u.user.name);
+			 m->u.user.name);
 		return match ? PTR_ERR(match) : -ENOENT;
 	}
 	m->u.kernel.match = match;
@@ -1572,14 +1573,14 @@ compat_release_entry(struct ipt_entry *e, unsigned int *i)
 
 static inline int
 check_compat_entry_size_and_hooks(struct ipt_entry *e,
-			   struct xt_table_info *newinfo,
-			   unsigned int *size,
-			   unsigned char *base,
-			   unsigned char *limit,
-			   unsigned int *hook_entries,
-			   unsigned int *underflows,
-			   unsigned int *i,
-			   const char *name)
+				  struct xt_table_info *newinfo,
+				  unsigned int *size,
+				  unsigned char *base,
+				  unsigned char *limit,
+				  unsigned int *hook_entries,
+				  unsigned int *underflows,
+				  unsigned int *i,
+				  const char *name)
 {
 	struct ipt_entry_target *t;
 	struct xt_target *target;
@@ -1594,7 +1595,7 @@ check_compat_entry_size_and_hooks(struct ipt_entry *e,
 	}
 
 	if (e->next_offset < sizeof(struct compat_ipt_entry) +
-			sizeof(struct compat_xt_entry_target)) {
+			     sizeof(struct compat_xt_entry_target)) {
 		duprintf("checking: element %p size %u\n",
 			 e, e->next_offset);
 		return -EINVAL;
@@ -1608,18 +1609,18 @@ check_compat_entry_size_and_hooks(struct ipt_entry *e,
 	entry_offset = (void *)e - (void *)base;
 	j = 0;
 	ret = IPT_MATCH_ITERATE(e, compat_find_calc_match, name, &e->ip,
-			e->comefrom, &off, &j);
+				e->comefrom, &off, &j);
 	if (ret != 0)
 		goto release_matches;
 
 	t = ipt_get_target(e);
 	target = try_then_request_module(xt_find_target(AF_INET,
-						     t->u.user.name,
-						     t->u.user.revision),
+							t->u.user.name,
+							t->u.user.revision),
 					 "ipt_%s", t->u.user.name);
 	if (IS_ERR(target) || !target) {
 		duprintf("check_compat_entry_size_and_hooks: `%s' not found\n",
-							t->u.user.name);
+			 t->u.user.name);
 		ret = target ? PTR_ERR(target) : -ENOENT;
 		goto release_matches;
 	}
@@ -1653,17 +1654,20 @@ release_matches:
 	return ret;
 }
 
-static inline int compat_copy_match_from_user(struct ipt_entry_match *m,
-	void **dstptr, compat_uint_t *size, const char *name,
-	const struct ipt_ip *ip, unsigned int hookmask)
+static inline int
+compat_copy_match_from_user(struct ipt_entry_match *m,
+			    void **dstptr, compat_uint_t *size,
+			    const char *name, const struct ipt_ip *ip,
+			    unsigned int hookmask)
 {
 	xt_compat_match_from_user(m, dstptr, size);
 	return 0;
 }
 
-static int compat_copy_entry_from_user(struct ipt_entry *e, void **dstptr,
-	unsigned int *size, const char *name,
-	struct xt_table_info *newinfo, unsigned char *base)
+static int
+compat_copy_entry_from_user(struct ipt_entry *e, void **dstptr,
+			    unsigned int *size, const char *name,
+			    struct xt_table_info *newinfo, unsigned char *base)
 {
 	struct ipt_entry_target *t;
 	struct xt_target *target;
@@ -1678,7 +1682,7 @@ static int compat_copy_entry_from_user(struct ipt_entry *e, void **dstptr,
 
 	*dstptr += sizeof(struct compat_ipt_entry);
 	ret = IPT_MATCH_ITERATE(e, compat_copy_match_from_user, dstptr, size,
-			name, &de->ip, de->comefrom);
+				name, &de->ip, de->comefrom);
 	if (ret)
 		return ret;
 	de->target_offset = e->target_offset - (origsize - *size);
@@ -1697,7 +1701,7 @@ static int compat_copy_entry_from_user(struct ipt_entry *e, void **dstptr,
 }
 
 static inline int compat_check_entry(struct ipt_entry *e, const char *name,
-						unsigned int *i)
+				     unsigned int *i)
 {
 	int j, ret;
 
@@ -1720,13 +1724,13 @@ static inline int compat_check_entry(struct ipt_entry *e, const char *name,
 
 static int
 translate_compat_table(const char *name,
-		unsigned int valid_hooks,
-		struct xt_table_info **pinfo,
-		void **pentry0,
-		unsigned int total_size,
-		unsigned int number,
-		unsigned int *hook_entries,
-		unsigned int *underflows)
+		       unsigned int valid_hooks,
+		       struct xt_table_info **pinfo,
+		       void **pentry0,
+		       unsigned int total_size,
+		       unsigned int number,
+		       unsigned int *hook_entries,
+		       unsigned int *underflows)
 {
 	unsigned int i, j;
 	struct xt_table_info *newinfo, *info;
@@ -1793,10 +1797,10 @@ translate_compat_table(const char *name,
 	}
 	entry1 = newinfo->entries[raw_smp_processor_id()];
 	pos = entry1;
-	size =  total_size;
+	size = total_size;
 	ret = IPT_ENTRY_ITERATE(entry0, total_size,
-			compat_copy_entry_from_user, &pos, &size,
-			name, newinfo, entry1);
+				compat_copy_entry_from_user, &pos, &size,
+				name, newinfo, entry1);
 	compat_flush_offsets();
 	xt_compat_unlock(AF_INET);
 	if (ret)
@@ -1808,11 +1812,11 @@ translate_compat_table(const char *name,
 
 	i = 0;
 	ret = IPT_ENTRY_ITERATE(entry1, newinfo->size, compat_check_entry,
-								name, &i);
+				name, &i);
 	if (ret) {
 		j -= i;
 		IPT_ENTRY_ITERATE_CONTINUE(entry1, newinfo->size, i,
-						compat_release_entry, &j);
+					   compat_release_entry, &j);
 		IPT_ENTRY_ITERATE(entry1, newinfo->size, cleanup_entry, &i);
 		xt_free_table_info(newinfo);
 		return ret;
@@ -1873,22 +1877,22 @@ compat_do_replace(void __user *user, unsigned int len)
 	}
 
 	ret = translate_compat_table(tmp.name, tmp.valid_hooks,
-			      &newinfo, &loc_cpu_entry, tmp.size,
-			      tmp.num_entries, tmp.hook_entry, tmp.underflow);
+				     &newinfo, &loc_cpu_entry, tmp.size,
+				     tmp.num_entries, tmp.hook_entry,
+				     tmp.underflow);
 	if (ret != 0)
 		goto free_newinfo;
 
 	duprintf("compat_do_replace: Translated table\n");
 
-	ret = __do_replace(tmp.name, tmp.valid_hooks,
-			      newinfo, tmp.num_counters,
-			      compat_ptr(tmp.counters));
+	ret = __do_replace(tmp.name, tmp.valid_hooks, newinfo,
+			   tmp.num_counters, compat_ptr(tmp.counters));
 	if (ret)
 		goto free_newinfo_untrans;
 	return 0;
 
  free_newinfo_untrans:
-	IPT_ENTRY_ITERATE(loc_cpu_entry, newinfo->size, cleanup_entry,NULL);
+	IPT_ENTRY_ITERATE(loc_cpu_entry, newinfo->size, cleanup_entry, NULL);
  free_newinfo:
 	xt_free_table_info(newinfo);
 	return ret;
@@ -1896,7 +1900,7 @@ compat_do_replace(void __user *user, unsigned int len)
 
 static int
 compat_do_ipt_set_ctl(struct sock *sk,	int cmd, void __user *user,
-		unsigned int len)
+		      unsigned int len)
 {
 	int ret;
 
@@ -1920,15 +1924,15 @@ compat_do_ipt_set_ctl(struct sock *sk,	int cmd, void __user *user,
 	return ret;
 }
 
-struct compat_ipt_get_entries
-{
+struct compat_ipt_get_entries {
 	char name[IPT_TABLE_MAXNAMELEN];
 	compat_uint_t size;
 	struct compat_ipt_entry entrytable[0];
 };
 
-static int compat_copy_entries_to_user(unsigned int total_size,
-		     struct xt_table *table, void __user *userptr)
+static int
+compat_copy_entries_to_user(unsigned int total_size, struct xt_table *table,
+			    void __user *userptr)
 {
 	struct xt_counters *counters;
 	struct xt_table_info *private = table->private;
@@ -1964,10 +1968,9 @@ compat_get_entries(struct compat_ipt_get_entries __user *uptr, int *len)
 	struct compat_ipt_get_entries get;
 	struct xt_table *t;
 
-
 	if (*len < sizeof(get)) {
 		duprintf("compat_get_entries: %u < %u\n",
-				*len, (unsigned int)sizeof(get));
+			 *len, (unsigned int)sizeof(get));
 		return -EINVAL;
 	}
 
@@ -1976,8 +1979,8 @@ compat_get_entries(struct compat_ipt_get_entries __user *uptr, int *len)
 
 	if (*len != sizeof(struct compat_ipt_get_entries) + get.size) {
 		duprintf("compat_get_entries: %u != %u\n", *len,
-			(unsigned int)(sizeof(struct compat_ipt_get_entries) +
-			get.size));
+			 (unsigned int)(sizeof(struct compat_ipt_get_entries) +
+			 get.size));
 		return -EINVAL;
 	}
 
@@ -1991,7 +1994,7 @@ compat_get_entries(struct compat_ipt_get_entries __user *uptr, int *len)
 		ret = compat_table_info(private, &info);
 		if (!ret && get.size == info.size) {
 			ret = compat_copy_entries_to_user(private->size,
-						   t, uptr->entrytable);
+							  t, uptr->entrytable);
 		} else if (!ret) {
 			duprintf("compat_get_entries: I've got %u not %u!\n",
 				 private->size,
-- 
cgit v1.2.3


From 8956695131b8a7878891667469899d667eb5892b Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Mon, 17 Dec 2007 21:46:40 -0800
Subject: [NETFILTER]: x_tables: make xt_compat_match_from_user usable in
 iterator macros

Make xt_compat_match_from_user return an int to make it usable in the
*tables iterator macros and kill a now unnecessary wrapper function.

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/netfilter/ip_tables.c | 13 +------------
 net/netfilter/x_tables.c       |  5 +++--
 2 files changed, 4 insertions(+), 14 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c
index 7d24262331a..4586af397ef 100644
--- a/net/ipv4/netfilter/ip_tables.c
+++ b/net/ipv4/netfilter/ip_tables.c
@@ -1654,16 +1654,6 @@ release_matches:
 	return ret;
 }
 
-static inline int
-compat_copy_match_from_user(struct ipt_entry_match *m,
-			    void **dstptr, compat_uint_t *size,
-			    const char *name, const struct ipt_ip *ip,
-			    unsigned int hookmask)
-{
-	xt_compat_match_from_user(m, dstptr, size);
-	return 0;
-}
-
 static int
 compat_copy_entry_from_user(struct ipt_entry *e, void **dstptr,
 			    unsigned int *size, const char *name,
@@ -1681,8 +1671,7 @@ compat_copy_entry_from_user(struct ipt_entry *e, void **dstptr,
 	memcpy(de, e, sizeof(struct ipt_entry));
 
 	*dstptr += sizeof(struct compat_ipt_entry);
-	ret = IPT_MATCH_ITERATE(e, compat_copy_match_from_user, dstptr, size,
-				name, &de->ip, de->comefrom);
+	ret = IPT_MATCH_ITERATE(e, xt_compat_match_from_user, dstptr, size);
 	if (ret)
 		return ret;
 	de->target_offset = e->target_offset - (origsize - *size);
diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c
index 07bb465d951..b95284ee4fd 100644
--- a/net/netfilter/x_tables.c
+++ b/net/netfilter/x_tables.c
@@ -342,8 +342,8 @@ int xt_compat_match_offset(struct xt_match *match)
 }
 EXPORT_SYMBOL_GPL(xt_compat_match_offset);
 
-void xt_compat_match_from_user(struct xt_entry_match *m, void **dstptr,
-			       int *size)
+int xt_compat_match_from_user(struct xt_entry_match *m, void **dstptr,
+			      int *size)
 {
 	struct xt_match *match = m->u.kernel.match;
 	struct compat_xt_entry_match *cm = (struct compat_xt_entry_match *)m;
@@ -365,6 +365,7 @@ void xt_compat_match_from_user(struct xt_entry_match *m, void **dstptr,
 
 	*size += off;
 	*dstptr += msize;
+	return 0;
 }
 EXPORT_SYMBOL_GPL(xt_compat_match_from_user);
 
-- 
cgit v1.2.3


From 30c08c41be75145b8850ea14b2d5ee4ee4b705d8 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Mon, 17 Dec 2007 21:47:14 -0800
Subject: [NETFILTER]: ip_tables: account for struct ipt_entry/struct
 compat_ipt_entry size diff

Account for size differences when dumping entries or calculating the
entry positions. This doesn't actually make any difference for IPv4
since the structures have the same size, but its logically correct
and needed for IPv6.

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/netfilter/ip_tables.c | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c
index 4586af397ef..cc896fe2fd9 100644
--- a/net/ipv4/netfilter/ip_tables.c
+++ b/net/ipv4/netfilter/ip_tables.c
@@ -1098,7 +1098,7 @@ static int compat_calc_entry(struct ipt_entry *e,
 	unsigned int entry_offset;
 	int off, i, ret;
 
-	off = 0;
+	off = sizeof(struct ipt_entry) - sizeof(struct compat_ipt_entry);
 	entry_offset = (void *)e - base;
 	IPT_MATCH_ITERATE(e, compat_calc_match, &off);
 	t = ipt_get_target(e);
@@ -1501,6 +1501,8 @@ compat_copy_entry_to_user(struct ipt_entry *e, void __user **dstptr,
 		goto out;
 
 	*dstptr += sizeof(struct compat_ipt_entry);
+	*size -= sizeof(struct ipt_entry) - sizeof(struct compat_ipt_entry);
+
 	ret = IPT_MATCH_ITERATE(e, xt_compat_match_to_user, dstptr, size);
 	target_offset = e->target_offset - (origsize - *size);
 	if (ret)
@@ -1605,7 +1607,7 @@ check_compat_entry_size_and_hooks(struct ipt_entry *e,
 	if (ret)
 		return ret;
 
-	off = 0;
+	off = sizeof(struct ipt_entry) - sizeof(struct compat_ipt_entry);
 	entry_offset = (void *)e - (void *)base;
 	j = 0;
 	ret = IPT_MATCH_ITERATE(e, compat_find_calc_match, name, &e->ip,
@@ -1671,6 +1673,8 @@ compat_copy_entry_from_user(struct ipt_entry *e, void **dstptr,
 	memcpy(de, e, sizeof(struct ipt_entry));
 
 	*dstptr += sizeof(struct compat_ipt_entry);
+	*size += sizeof(struct ipt_entry) - sizeof(struct compat_ipt_entry);
+
 	ret = IPT_MATCH_ITERATE(e, xt_compat_match_from_user, dstptr, size);
 	if (ret)
 		return ret;
-- 
cgit v1.2.3


From 73cd598df46a73d6f02063f2520df115a9b88aa5 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Mon, 17 Dec 2007 21:47:32 -0800
Subject: [NETFILTER]: ip_tables: fix compat types

Use compat types and compat iterators when dealing with compat entries for
clarity. This doesn't actually make a difference for ip_tables, but is
needed for ip6_tables and arp_tables.

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/netfilter/ip_tables.c | 51 ++++++++++++++++++++++--------------------
 1 file changed, 27 insertions(+), 24 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c
index cc896fe2fd9..d8caa1ed487 100644
--- a/net/ipv4/netfilter/ip_tables.c
+++ b/net/ipv4/netfilter/ip_tables.c
@@ -1559,7 +1559,7 @@ compat_release_match(struct ipt_entry_match *m, unsigned int *i)
 }
 
 static inline int
-compat_release_entry(struct ipt_entry *e, unsigned int *i)
+compat_release_entry(struct compat_ipt_entry *e, unsigned int *i)
 {
 	struct ipt_entry_target *t;
 
@@ -1567,14 +1567,14 @@ compat_release_entry(struct ipt_entry *e, unsigned int *i)
 		return 1;
 
 	/* Cleanup all matches */
-	IPT_MATCH_ITERATE(e, compat_release_match, NULL);
-	t = ipt_get_target(e);
+	COMPAT_IPT_MATCH_ITERATE(e, compat_release_match, NULL);
+	t = compat_ipt_get_target(e);
 	module_put(t->u.kernel.target->me);
 	return 0;
 }
 
 static inline int
-check_compat_entry_size_and_hooks(struct ipt_entry *e,
+check_compat_entry_size_and_hooks(struct compat_ipt_entry *e,
 				  struct xt_table_info *newinfo,
 				  unsigned int *size,
 				  unsigned char *base,
@@ -1603,19 +1603,20 @@ check_compat_entry_size_and_hooks(struct ipt_entry *e,
 		return -EINVAL;
 	}
 
-	ret = check_entry(e, name);
+	/* For purposes of check_entry casting the compat entry is fine */
+	ret = check_entry((struct ipt_entry *)e, name);
 	if (ret)
 		return ret;
 
 	off = sizeof(struct ipt_entry) - sizeof(struct compat_ipt_entry);
 	entry_offset = (void *)e - (void *)base;
 	j = 0;
-	ret = IPT_MATCH_ITERATE(e, compat_find_calc_match, name, &e->ip,
-				e->comefrom, &off, &j);
+	ret = COMPAT_IPT_MATCH_ITERATE(e, compat_find_calc_match, name,
+				       &e->ip, e->comefrom, &off, &j);
 	if (ret != 0)
 		goto release_matches;
 
-	t = ipt_get_target(e);
+	t = compat_ipt_get_target(e);
 	target = try_then_request_module(xt_find_target(AF_INET,
 							t->u.user.name,
 							t->u.user.revision),
@@ -1643,7 +1644,7 @@ check_compat_entry_size_and_hooks(struct ipt_entry *e,
 	}
 
 	/* Clear counters and comefrom */
-	e->counters = ((struct ipt_counters) { 0, 0 });
+	memset(&e->counters, 0, sizeof(e->counters));
 	e->comefrom = 0;
 
 	(*i)++;
@@ -1657,7 +1658,7 @@ release_matches:
 }
 
 static int
-compat_copy_entry_from_user(struct ipt_entry *e, void **dstptr,
+compat_copy_entry_from_user(struct compat_ipt_entry *e, void **dstptr,
 			    unsigned int *size, const char *name,
 			    struct xt_table_info *newinfo, unsigned char *base)
 {
@@ -1671,15 +1672,17 @@ compat_copy_entry_from_user(struct ipt_entry *e, void **dstptr,
 	origsize = *size;
 	de = (struct ipt_entry *)*dstptr;
 	memcpy(de, e, sizeof(struct ipt_entry));
+	memcpy(&de->counters, &e->counters, sizeof(e->counters));
 
-	*dstptr += sizeof(struct compat_ipt_entry);
+	*dstptr += sizeof(struct ipt_entry);
 	*size += sizeof(struct ipt_entry) - sizeof(struct compat_ipt_entry);
 
-	ret = IPT_MATCH_ITERATE(e, xt_compat_match_from_user, dstptr, size);
+	ret = COMPAT_IPT_MATCH_ITERATE(e, xt_compat_match_from_user,
+				       dstptr, size);
 	if (ret)
 		return ret;
 	de->target_offset = e->target_offset - (origsize - *size);
-	t = ipt_get_target(e);
+	t = compat_ipt_get_target(e);
 	target = t->u.kernel.target;
 	xt_compat_target_from_user(t, dstptr, size);
 
@@ -1746,11 +1749,11 @@ translate_compat_table(const char *name,
 	j = 0;
 	xt_compat_lock(AF_INET);
 	/* Walk through entries, checking offsets. */
-	ret = IPT_ENTRY_ITERATE(entry0, total_size,
-				check_compat_entry_size_and_hooks,
-				info, &size, entry0,
-				entry0 + total_size,
-				hook_entries, underflows, &j, name);
+	ret = COMPAT_IPT_ENTRY_ITERATE(entry0, total_size,
+				       check_compat_entry_size_and_hooks,
+				       info, &size, entry0,
+				       entry0 + total_size,
+				       hook_entries, underflows, &j, name);
 	if (ret != 0)
 		goto out_unlock;
 
@@ -1791,9 +1794,9 @@ translate_compat_table(const char *name,
 	entry1 = newinfo->entries[raw_smp_processor_id()];
 	pos = entry1;
 	size = total_size;
-	ret = IPT_ENTRY_ITERATE(entry0, total_size,
-				compat_copy_entry_from_user, &pos, &size,
-				name, newinfo, entry1);
+	ret = COMPAT_IPT_ENTRY_ITERATE(entry0, total_size,
+				       compat_copy_entry_from_user, &pos, &size,
+				       name, newinfo, entry1);
 	compat_flush_offsets();
 	xt_compat_unlock(AF_INET);
 	if (ret)
@@ -1808,8 +1811,8 @@ translate_compat_table(const char *name,
 				name, &i);
 	if (ret) {
 		j -= i;
-		IPT_ENTRY_ITERATE_CONTINUE(entry1, newinfo->size, i,
-					   compat_release_entry, &j);
+		COMPAT_IPT_ENTRY_ITERATE_CONTINUE(entry0, newinfo->size, i,
+						  compat_release_entry, &j);
 		IPT_ENTRY_ITERATE(entry1, newinfo->size, cleanup_entry, &i);
 		xt_free_table_info(newinfo);
 		return ret;
@@ -1828,7 +1831,7 @@ translate_compat_table(const char *name,
 free_newinfo:
 	xt_free_table_info(newinfo);
 out:
-	IPT_ENTRY_ITERATE(entry0, total_size, compat_release_entry, &j);
+	COMPAT_IPT_ENTRY_ITERATE(entry0, total_size, compat_release_entry, &j);
 	return ret;
 out_unlock:
 	compat_flush_offsets();
-- 
cgit v1.2.3


From b386d9f5960a9afce7f077edf2095fccfbb1a8e6 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Mon, 17 Dec 2007 21:47:48 -0800
Subject: [NETFILTER]: ip_tables: move compat offset calculation to x_tables

Its needed by ip6_tables and arp_tables as well.

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/netfilter/ip_tables.c | 67 +++++-------------------------------------
 net/netfilter/x_tables.c       | 58 ++++++++++++++++++++++++++++++++++++
 2 files changed, 66 insertions(+), 59 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c
index d8caa1ed487..07be12cc3fe 100644
--- a/net/ipv4/netfilter/ip_tables.c
+++ b/net/ipv4/netfilter/ip_tables.c
@@ -1014,63 +1014,12 @@ copy_entries_to_user(unsigned int total_size,
 }
 
 #ifdef CONFIG_COMPAT
-struct compat_delta {
-	struct compat_delta *next;
-	unsigned int offset;
-	short delta;
-};
-
-static struct compat_delta *compat_offsets;
-
-static int compat_add_offset(unsigned int offset, short delta)
-{
-	struct compat_delta *tmp;
-
-	tmp = kmalloc(sizeof(struct compat_delta), GFP_KERNEL);
-	if (!tmp)
-		return -ENOMEM;
-	tmp->offset = offset;
-	tmp->delta = delta;
-	if (compat_offsets) {
-		tmp->next = compat_offsets->next;
-		compat_offsets->next = tmp;
-	} else {
-		compat_offsets = tmp;
-		tmp->next = NULL;
-	}
-	return 0;
-}
-
-static void compat_flush_offsets(void)
-{
-	struct compat_delta *tmp, *next;
-
-	if (compat_offsets) {
-		for (tmp = compat_offsets; tmp; tmp = next) {
-			next = tmp->next;
-			kfree(tmp);
-		}
-		compat_offsets = NULL;
-	}
-}
-
-static short compat_calc_jump(unsigned int offset)
-{
-	struct compat_delta *tmp;
-	short delta;
-
-	for (tmp = compat_offsets, delta = 0; tmp; tmp = tmp->next)
-		if (tmp->offset < offset)
-			delta += tmp->delta;
-	return delta;
-}
-
 static void compat_standard_from_user(void *dst, void *src)
 {
 	int v = *(compat_int_t *)src;
 
 	if (v > 0)
-		v += compat_calc_jump(v);
+		v += xt_compat_calc_jump(AF_INET, v);
 	memcpy(dst, &v, sizeof(v));
 }
 
@@ -1079,7 +1028,7 @@ static int compat_standard_to_user(void __user *dst, void *src)
 	compat_int_t cv = *(int *)src;
 
 	if (cv > 0)
-		cv -= compat_calc_jump(cv);
+		cv -= xt_compat_calc_jump(AF_INET, cv);
 	return copy_to_user(dst, &cv, sizeof(cv)) ? -EFAULT : 0;
 }
 
@@ -1104,7 +1053,7 @@ static int compat_calc_entry(struct ipt_entry *e,
 	t = ipt_get_target(e);
 	off += xt_compat_target_offset(t->u.kernel.target);
 	newinfo->size -= off;
-	ret = compat_add_offset(entry_offset, off);
+	ret = xt_compat_add_offset(AF_INET, entry_offset, off);
 	if (ret)
 		return ret;
 
@@ -1167,7 +1116,7 @@ static int get_info(void __user *user, int *len, int compat)
 		if (compat) {
 			struct xt_table_info tmp;
 			ret = compat_table_info(private, &tmp);
-			compat_flush_offsets();
+			xt_compat_flush_offsets(AF_INET);
 			private = &tmp;
 		}
 #endif
@@ -1631,7 +1580,7 @@ check_compat_entry_size_and_hooks(struct compat_ipt_entry *e,
 
 	off += xt_compat_target_offset(target);
 	*size += off;
-	ret = compat_add_offset(entry_offset, off);
+	ret = xt_compat_add_offset(AF_INET, entry_offset, off);
 	if (ret)
 		goto out;
 
@@ -1797,7 +1746,7 @@ translate_compat_table(const char *name,
 	ret = COMPAT_IPT_ENTRY_ITERATE(entry0, total_size,
 				       compat_copy_entry_from_user, &pos, &size,
 				       name, newinfo, entry1);
-	compat_flush_offsets();
+	xt_compat_flush_offsets(AF_INET);
 	xt_compat_unlock(AF_INET);
 	if (ret)
 		goto free_newinfo;
@@ -1834,7 +1783,7 @@ out:
 	COMPAT_IPT_ENTRY_ITERATE(entry0, total_size, compat_release_entry, &j);
 	return ret;
 out_unlock:
-	compat_flush_offsets();
+	xt_compat_flush_offsets(AF_INET);
 	xt_compat_unlock(AF_INET);
 	goto out;
 }
@@ -1997,7 +1946,7 @@ compat_get_entries(struct compat_ipt_get_entries __user *uptr, int *len)
 				 get.size);
 			ret = -EINVAL;
 		}
-		compat_flush_offsets();
+		xt_compat_flush_offsets(AF_INET);
 		module_put(t->me);
 		xt_table_unlock(t);
 	} else
diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c
index b95284ee4fd..8d4fca96a4a 100644
--- a/net/netfilter/x_tables.c
+++ b/net/netfilter/x_tables.c
@@ -34,12 +34,21 @@ MODULE_DESCRIPTION("[ip,ip6,arp]_tables backend module");
 
 #define SMP_ALIGN(x) (((x) + SMP_CACHE_BYTES-1) & ~(SMP_CACHE_BYTES-1))
 
+struct compat_delta {
+	struct compat_delta *next;
+	unsigned int offset;
+	short delta;
+};
+
 struct xt_af {
 	struct mutex mutex;
 	struct list_head match;
 	struct list_head target;
 	struct list_head tables;
+#ifdef CONFIG_COMPAT
 	struct mutex compat_mutex;
+	struct compat_delta *compat_offsets;
+#endif
 };
 
 static struct xt_af *xt;
@@ -335,6 +344,54 @@ int xt_check_match(const struct xt_match *match, unsigned short family,
 EXPORT_SYMBOL_GPL(xt_check_match);
 
 #ifdef CONFIG_COMPAT
+int xt_compat_add_offset(int af, unsigned int offset, short delta)
+{
+	struct compat_delta *tmp;
+
+	tmp = kmalloc(sizeof(struct compat_delta), GFP_KERNEL);
+	if (!tmp)
+		return -ENOMEM;
+
+	tmp->offset = offset;
+	tmp->delta = delta;
+
+	if (xt[af].compat_offsets) {
+		tmp->next = xt[af].compat_offsets->next;
+		xt[af].compat_offsets->next = tmp;
+	} else {
+		xt[af].compat_offsets = tmp;
+		tmp->next = NULL;
+	}
+	return 0;
+}
+EXPORT_SYMBOL_GPL(xt_compat_add_offset);
+
+void xt_compat_flush_offsets(int af)
+{
+	struct compat_delta *tmp, *next;
+
+	if (xt[af].compat_offsets) {
+		for (tmp = xt[af].compat_offsets; tmp; tmp = next) {
+			next = tmp->next;
+			kfree(tmp);
+		}
+		xt[af].compat_offsets = NULL;
+	}
+}
+EXPORT_SYMBOL_GPL(xt_compat_flush_offsets);
+
+short xt_compat_calc_jump(int af, unsigned int offset)
+{
+	struct compat_delta *tmp;
+	short delta;
+
+	for (tmp = xt[af].compat_offsets, delta = 0; tmp; tmp = tmp->next)
+		if (tmp->offset < offset)
+			delta += tmp->delta;
+	return delta;
+}
+EXPORT_SYMBOL_GPL(xt_compat_calc_jump);
+
 int xt_compat_match_offset(struct xt_match *match)
 {
 	u_int16_t csize = match->compatsize ? : match->matchsize;
@@ -873,6 +930,7 @@ static int __init xt_init(void)
 		mutex_init(&xt[i].mutex);
 #ifdef CONFIG_COMPAT
 		mutex_init(&xt[i].compat_mutex);
+		xt[i].compat_offsets = NULL;
 #endif
 		INIT_LIST_HEAD(&xt[i].target);
 		INIT_LIST_HEAD(&xt[i].match);
-- 
cgit v1.2.3


From 72f36ec14fb5006886bc0655ec2b43bf1ad53a26 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Mon, 17 Dec 2007 21:48:02 -0800
Subject: [NETFILTER]: ip6_tables: kill a few useless defines/forward
 declarations

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/netfilter/ip6_tables.c | 16 ++--------------
 1 file changed, 2 insertions(+), 14 deletions(-)

(limited to 'net')

diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c
index d3e884a5c6a..b73e6b6d554 100644
--- a/net/ipv6/netfilter/ip6_tables.c
+++ b/net/ipv6/netfilter/ip6_tables.c
@@ -31,9 +31,6 @@ MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>");
 MODULE_DESCRIPTION("IPv6 packet filter");
 
-#define IPV6_HDR_LEN	(sizeof(struct ipv6hdr))
-#define IPV6_OPTHDR_LEN	(sizeof(struct ipv6_opt_hdr))
-
 /*#define DEBUG_IP_FIREWALL*/
 /*#define DEBUG_ALLOW_ALL*/ /* Useful for remote debugging */
 /*#define DEBUG_IP_FIREWALL_USER*/
@@ -76,12 +73,6 @@ do {								\
 
    Hence the start of any table is given by get_table() below.  */
 
-#if 0
-#define down(x) do { printk("DOWN:%u:" #x "\n", __LINE__); down(x); } while(0)
-#define down_interruptible(x) ({ int __r; printk("DOWNi:%u:" #x "\n", __LINE__); __r = down_interruptible(x); if (__r != 0) printk("ABORT-DOWNi:%u\n", __LINE__); __r; })
-#define up(x) do { printk("UP:%u:" #x "\n", __LINE__); up(x); } while(0)
-#endif
-
 /* Check for an extension */
 int
 ip6t_ext_hdr(u8 nexthdr)
@@ -399,9 +390,8 @@ ip6t_do_table(struct sk_buff *skb,
 				goto no_match;
 
 			ADD_COUNTER(e->counters,
-				    ntohs(ipv6_hdr(skb)->payload_len)
-				    + IPV6_HDR_LEN,
-				    1);
+				    ntohs(ipv6_hdr(skb)->payload_len) +
+				    sizeof(struct ipv6hdr), 1);
 
 			t = ip6t_get_target(e);
 			IP_NF_ASSERT(t->u.kernel.target);
@@ -657,8 +647,6 @@ err:
 	return ret;
 }
 
-static struct xt_target ip6t_standard_target;
-
 static inline int
 check_entry(struct ip6t_entry *e, const char *name, unsigned int size,
 	    unsigned int *i)
-- 
cgit v1.2.3


From f173c8a1f2c0ca39f45bb15b82ad5e6fe908556d Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Mon, 17 Dec 2007 21:48:17 -0800
Subject: [NETFILTER]: ip6_tables: move entry, match and target checks to
 seperate functions

Resync with ip_tables.c as preparation for compat support.

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/netfilter/ip6_tables.c | 129 +++++++++++++++++++++++++---------------
 1 file changed, 82 insertions(+), 47 deletions(-)

(limited to 'net')

diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c
index b73e6b6d554..655c221acd1 100644
--- a/net/ipv6/netfilter/ip6_tables.c
+++ b/net/ipv6/netfilter/ip6_tables.c
@@ -607,11 +607,55 @@ cleanup_match(struct ip6t_entry_match *m, unsigned int *i)
 }
 
 static inline int
-check_match(struct ip6t_entry_match *m,
-	    const char *name,
-	    const struct ip6t_ip6 *ipv6,
-	    unsigned int hookmask,
-	    unsigned int *i)
+check_entry(struct ip6t_entry *e, const char *name)
+{
+	struct ip6t_entry_target *t;
+
+	if (!ip6_checkentry(&e->ipv6)) {
+		duprintf("ip_tables: ip check failed %p %s.\n", e, name);
+		return -EINVAL;
+	}
+
+	if (e->target_offset + sizeof(struct ip6t_entry_target) >
+	    e->next_offset)
+		return -EINVAL;
+
+	t = ip6t_get_target(e);
+	if (e->target_offset + t->u.target_size > e->next_offset)
+		return -EINVAL;
+
+	return 0;
+}
+
+static inline int check_match(struct ip6t_entry_match *m, const char *name,
+			      const struct ip6t_ip6 *ipv6,
+			      unsigned int hookmask, unsigned int *i)
+{
+	struct xt_match *match;
+	int ret;
+
+	match = m->u.kernel.match;
+	ret = xt_check_match(match, AF_INET6, m->u.match_size - sizeof(*m),
+			     name, hookmask, ipv6->proto,
+			     ipv6->invflags & IP6T_INV_PROTO);
+	if (!ret && m->u.kernel.match->checkentry
+	    && !m->u.kernel.match->checkentry(name, ipv6, match, m->data,
+					      hookmask)) {
+		duprintf("ip_tables: check failed for `%s'.\n",
+			 m->u.kernel.match->name);
+		ret = -EINVAL;
+	}
+	if (!ret)
+		(*i)++;
+	return ret;
+}
+
+static inline int
+find_check_match(struct ip6t_entry_match *m,
+		 const char *name,
+		 const struct ip6t_ip6 *ipv6,
+		 unsigned int hookmask,
+		 unsigned int *i)
 {
 	struct xt_match *match;
 	int ret;
@@ -620,86 +664,77 @@ check_match(struct ip6t_entry_match *m,
 					m->u.user.revision),
 					"ip6t_%s", m->u.user.name);
 	if (IS_ERR(match) || !match) {
-		duprintf("check_match: `%s' not found\n", m->u.user.name);
+		duprintf("find_check_match: `%s' not found\n", m->u.user.name);
 		return match ? PTR_ERR(match) : -ENOENT;
 	}
 	m->u.kernel.match = match;
 
-	ret = xt_check_match(match, AF_INET6, m->u.match_size - sizeof(*m),
-			     name, hookmask, ipv6->proto,
-			     ipv6->invflags & IP6T_INV_PROTO);
+	ret = check_match(m, name, ipv6, hookmask, i);
 	if (ret)
 		goto err;
 
-	if (m->u.kernel.match->checkentry
-	    && !m->u.kernel.match->checkentry(name, ipv6, match,  m->data,
-					      hookmask)) {
-		duprintf("ip_tables: check failed for `%s'.\n",
-			 m->u.kernel.match->name);
-		ret = -EINVAL;
-		goto err;
-	}
-
-	(*i)++;
 	return 0;
 err:
 	module_put(m->u.kernel.match->me);
 	return ret;
 }
 
-static inline int
-check_entry(struct ip6t_entry *e, const char *name, unsigned int size,
-	    unsigned int *i)
+static inline int check_target(struct ip6t_entry *e, const char *name)
 {
 	struct ip6t_entry_target *t;
 	struct xt_target *target;
 	int ret;
-	unsigned int j;
 
-	if (!ip6_checkentry(&e->ipv6)) {
-		duprintf("ip_tables: ip check failed %p %s.\n", e, name);
-		return -EINVAL;
+	t = ip6t_get_target(e);
+	target = t->u.kernel.target;
+	ret = xt_check_target(target, AF_INET6, t->u.target_size - sizeof(*t),
+			      name, e->comefrom, e->ipv6.proto,
+			      e->ipv6.invflags & IP6T_INV_PROTO);
+	if (!ret && t->u.kernel.target->checkentry
+	    && !t->u.kernel.target->checkentry(name, e, target, t->data,
+					       e->comefrom)) {
+		duprintf("ip_tables: check failed for `%s'.\n",
+			 t->u.kernel.target->name);
+		ret = -EINVAL;
 	}
+	return ret;
+}
 
-	if (e->target_offset + sizeof(struct ip6t_entry_target) >
-								e->next_offset)
-		return -EINVAL;
+static inline int
+find_check_entry(struct ip6t_entry *e, const char *name, unsigned int size,
+		 unsigned int *i)
+{
+	struct ip6t_entry_target *t;
+	struct xt_target *target;
+	int ret;
+	unsigned int j;
+
+	ret = check_entry(e, name);
+	if (ret)
+		return ret;
 
 	j = 0;
-	ret = IP6T_MATCH_ITERATE(e, check_match, name, &e->ipv6, e->comefrom, &j);
+	ret = IP6T_MATCH_ITERATE(e, find_check_match, name, &e->ipv6,
+				 e->comefrom, &j);
 	if (ret != 0)
 		goto cleanup_matches;
 
 	t = ip6t_get_target(e);
-	ret = -EINVAL;
-	if (e->target_offset + t->u.target_size > e->next_offset)
-			goto cleanup_matches;
 	target = try_then_request_module(xt_find_target(AF_INET6,
 							t->u.user.name,
 							t->u.user.revision),
 					 "ip6t_%s", t->u.user.name);
 	if (IS_ERR(target) || !target) {
-		duprintf("check_entry: `%s' not found\n", t->u.user.name);
+		duprintf("find_check_entry: `%s' not found\n", t->u.user.name);
 		ret = target ? PTR_ERR(target) : -ENOENT;
 		goto cleanup_matches;
 	}
 	t->u.kernel.target = target;
 
-	ret = xt_check_target(target, AF_INET6, t->u.target_size - sizeof(*t),
-			      name, e->comefrom, e->ipv6.proto,
-			      e->ipv6.invflags & IP6T_INV_PROTO);
+	ret = check_target(e, name);
 	if (ret)
 		goto err;
 
-	if (t->u.kernel.target->checkentry
-		   && !t->u.kernel.target->checkentry(name, e, target, t->data,
-						      e->comefrom)) {
-		duprintf("ip_tables: check failed for `%s'.\n",
-			 t->u.kernel.target->name);
-		ret = -EINVAL;
-		goto err;
-	}
-
 	(*i)++;
 	return 0;
  err:
@@ -834,7 +869,7 @@ translate_table(const char *name,
 	/* Finally, each sanity check must pass */
 	i = 0;
 	ret = IP6T_ENTRY_ITERATE(entry0, newinfo->size,
-				check_entry, name, size, &i);
+				find_check_entry, name, size, &i);
 
 	if (ret != 0) {
 		IP6T_ENTRY_ITERATE(entry0, newinfo->size,
-- 
cgit v1.2.3


From 3b84e92b0d54864b0731c3ab3c20dd140bb3d7d9 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Mon, 17 Dec 2007 21:48:33 -0800
Subject: [NETFILTER]: ip6_tables: use vmalloc_node()

Consistently use vmalloc_node for all counter allocations.

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/netfilter/ip6_tables.c | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c
index 655c221acd1..d0b5fa6661f 100644
--- a/net/ipv6/netfilter/ip6_tables.c
+++ b/net/ipv6/netfilter/ip6_tables.c
@@ -959,7 +959,7 @@ copy_entries_to_user(unsigned int total_size,
 	   (other than comefrom, which userspace doesn't care
 	   about). */
 	countersize = sizeof(struct xt_counters) * private->number;
-	counters = vmalloc(countersize);
+	counters = vmalloc_node(countersize, numa_node_id());
 
 	if (counters == NULL)
 		return -ENOMEM;
@@ -1080,7 +1080,8 @@ do_replace(void __user *user, unsigned int len)
 		goto free_newinfo;
 	}
 
-	counters = vmalloc(tmp.num_counters * sizeof(struct xt_counters));
+	counters = vmalloc_node(tmp.num_counters * sizeof(struct xt_counters),
+				numa_node_id());
 	if (!counters) {
 		ret = -ENOMEM;
 		goto free_newinfo;
@@ -1186,7 +1187,7 @@ do_add_counters(void __user *user, unsigned int len)
 	if (len != sizeof(tmp) + tmp.num_counters*sizeof(struct xt_counters))
 		return -EINVAL;
 
-	paddc = vmalloc(len);
+	paddc = vmalloc_node(len, numa_node_id());
 	if (!paddc)
 		return -ENOMEM;
 
-- 
cgit v1.2.3


From ed1a6f5e77441c4020b8541b3f03f03e37d638e1 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Mon, 17 Dec 2007 21:49:51 -0800
Subject: [NETFILTER]: ip6_tables: move counter allocation to seperate function

More resyncing with ip_tables.c as preparation for compat support.

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/netfilter/ip6_tables.c | 31 ++++++++++++++++++++++---------
 1 file changed, 22 insertions(+), 9 deletions(-)

(limited to 'net')

diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c
index d0b5fa6661f..02be4fcb915 100644
--- a/net/ipv6/netfilter/ip6_tables.c
+++ b/net/ipv6/netfilter/ip6_tables.c
@@ -943,17 +943,11 @@ get_counters(const struct xt_table_info *t,
 	}
 }
 
-static int
-copy_entries_to_user(unsigned int total_size,
-		     struct xt_table *table,
-		     void __user *userptr)
+static inline struct xt_counters *alloc_counters(struct xt_table *table)
 {
-	unsigned int off, num, countersize;
-	struct ip6t_entry *e;
+	unsigned int countersize;
 	struct xt_counters *counters;
 	struct xt_table_info *private = table->private;
-	int ret = 0;
-	void *loc_cpu_entry;
 
 	/* We need atomic snapshot of counters: rest doesn't change
 	   (other than comefrom, which userspace doesn't care
@@ -962,13 +956,32 @@ copy_entries_to_user(unsigned int total_size,
 	counters = vmalloc_node(countersize, numa_node_id());
 
 	if (counters == NULL)
-		return -ENOMEM;
+		return ERR_PTR(-ENOMEM);
 
 	/* First, sum counters... */
 	write_lock_bh(&table->lock);
 	get_counters(private, counters);
 	write_unlock_bh(&table->lock);
 
+	return counters;
+}
+
+static int
+copy_entries_to_user(unsigned int total_size,
+		     struct xt_table *table,
+		     void __user *userptr)
+{
+	unsigned int off, num;
+	struct ip6t_entry *e;
+	struct xt_counters *counters;
+	struct xt_table_info *private = table->private;
+	int ret = 0;
+	void *loc_cpu_entry;
+
+	counters = alloc_counters(table);
+	if (IS_ERR(counters))
+		return PTR_ERR(counters);
+
 	/* choose the copy that is on ourc node/cpu */
 	loc_cpu_entry = private->entries[raw_smp_processor_id()];
 	if (copy_to_user(userptr, loc_cpu_entry, total_size) != 0) {
-- 
cgit v1.2.3


From 433665c9d110d783ea4043c59657f0437fcc31dd Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Mon, 17 Dec 2007 21:50:05 -0800
Subject: [NETFILTER]: ip6_tables: move IP6T_SO_GET_INFO handling to seperate
 function

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/netfilter/ip6_tables.c | 89 ++++++++++++++++++++++-------------------
 1 file changed, 47 insertions(+), 42 deletions(-)

(limited to 'net')

diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c
index 02be4fcb915..681316e40c6 100644
--- a/net/ipv6/netfilter/ip6_tables.c
+++ b/net/ipv6/netfilter/ip6_tables.c
@@ -1037,6 +1037,50 @@ copy_entries_to_user(unsigned int total_size,
 	return ret;
 }
 
+static int get_info(void __user *user, int *len)
+{
+	char name[IP6T_TABLE_MAXNAMELEN];
+	struct xt_table *t;
+	int ret;
+
+	if (*len != sizeof(struct ip6t_getinfo)) {
+		duprintf("length %u != %u\n", *len,
+			 sizeof(struct ip6t_getinfo));
+		return -EINVAL;
+	}
+
+	if (copy_from_user(name, user, sizeof(name)) != 0)
+		return -EFAULT;
+
+	name[IP6T_TABLE_MAXNAMELEN-1] = '\0';
+
+	t = try_then_request_module(xt_find_table_lock(AF_INET6, name),
+				    "ip6table_%s", name);
+	if (t && !IS_ERR(t)) {
+		struct ip6t_getinfo info;
+		struct xt_table_info *private = t->private;
+
+		info.valid_hooks = t->valid_hooks;
+		memcpy(info.hook_entry, private->hook_entry,
+		       sizeof(info.hook_entry));
+		memcpy(info.underflow, private->underflow,
+		       sizeof(info.underflow));
+		info.num_entries = private->number;
+		info.size = private->size;
+		memcpy(info.name, name, sizeof(info.name));
+
+		if (copy_to_user(user, &info, *len) != 0)
+			ret = -EFAULT;
+		else
+			ret = 0;
+
+		xt_table_unlock(t);
+		module_put(t->me);
+	} else
+		ret = t ? PTR_ERR(t) : -ENOENT;
+	return ret;
+}
+
 static int
 get_entries(const struct ip6t_get_entries *entries,
 	    struct ip6t_get_entries __user *uptr)
@@ -1274,48 +1318,9 @@ do_ip6t_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
 		return -EPERM;
 
 	switch (cmd) {
-	case IP6T_SO_GET_INFO: {
-		char name[IP6T_TABLE_MAXNAMELEN];
-		struct xt_table *t;
-
-		if (*len != sizeof(struct ip6t_getinfo)) {
-			duprintf("length %u != %u\n", *len,
-				 sizeof(struct ip6t_getinfo));
-			ret = -EINVAL;
-			break;
-		}
-
-		if (copy_from_user(name, user, sizeof(name)) != 0) {
-			ret = -EFAULT;
-			break;
-		}
-		name[IP6T_TABLE_MAXNAMELEN-1] = '\0';
-
-		t = try_then_request_module(xt_find_table_lock(AF_INET6, name),
-					    "ip6table_%s", name);
-		if (t && !IS_ERR(t)) {
-			struct ip6t_getinfo info;
-			struct xt_table_info *private = t->private;
-
-			info.valid_hooks = t->valid_hooks;
-			memcpy(info.hook_entry, private->hook_entry,
-			       sizeof(info.hook_entry));
-			memcpy(info.underflow, private->underflow,
-			       sizeof(info.underflow));
-			info.num_entries = private->number;
-			info.size = private->size;
-			memcpy(info.name, name, sizeof(info.name));
-
-			if (copy_to_user(user, &info, *len) != 0)
-				ret = -EFAULT;
-			else
-				ret = 0;
-			xt_table_unlock(t);
-			module_put(t->me);
-		} else
-			ret = t ? PTR_ERR(t) : -ENOENT;
-	}
-	break;
+	case IP6T_SO_GET_INFO:
+		ret = get_info(user, len);
+		break;
 
 	case IP6T_SO_GET_ENTRIES: {
 		struct ip6t_get_entries get;
-- 
cgit v1.2.3


From d924357c50d83e76d30dd5b81b5804815a2ae31c Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Mon, 17 Dec 2007 21:50:22 -0800
Subject: [NETFILTER]: ip6_tables: resync get_entries() with ip_tables

Resync get_entries() with ip_tables.c by moving the checks from the
setsockopt handler to the function itself.

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/netfilter/ip6_tables.c | 37 ++++++++++++++++++-------------------
 1 file changed, 18 insertions(+), 19 deletions(-)

(limited to 'net')

diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c
index 681316e40c6..6fcc0d5bc27 100644
--- a/net/ipv6/netfilter/ip6_tables.c
+++ b/net/ipv6/netfilter/ip6_tables.c
@@ -1082,17 +1082,29 @@ static int get_info(void __user *user, int *len)
 }
 
 static int
-get_entries(const struct ip6t_get_entries *entries,
-	    struct ip6t_get_entries __user *uptr)
+get_entries(struct ip6t_get_entries __user *uptr, int *len)
 {
 	int ret;
+	struct ip6t_get_entries get;
 	struct xt_table *t;
 
-	t = xt_find_table_lock(AF_INET6, entries->name);
+	if (*len < sizeof(get)) {
+		duprintf("get_entries: %u < %u\n", *len, sizeof(get));
+		return -EINVAL;
+	}
+	if (copy_from_user(&get, uptr, sizeof(get)) != 0)
+		return -EFAULT;
+	if (*len != sizeof(struct ip6t_get_entries) + get.size) {
+		duprintf("get_entries: %u != %u\n", *len,
+			 sizeof(struct ip6t_get_entries) + get.size);
+		return -EINVAL;
+	}
+
+	t = xt_find_table_lock(AF_INET6, get.name);
 	if (t && !IS_ERR(t)) {
 		struct xt_table_info *private = t->private;
 		duprintf("t->private->number = %u\n", private->number);
-		if (entries->size == private->size)
+		if (get.size == private->size)
 			ret = copy_entries_to_user(private->size,
 						   t, uptr->entrytable);
 		else {
@@ -1322,22 +1334,9 @@ do_ip6t_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
 		ret = get_info(user, len);
 		break;
 
-	case IP6T_SO_GET_ENTRIES: {
-		struct ip6t_get_entries get;
-
-		if (*len < sizeof(get)) {
-			duprintf("get_entries: %u < %u\n", *len, sizeof(get));
-			ret = -EINVAL;
-		} else if (copy_from_user(&get, user, sizeof(get)) != 0) {
-			ret = -EFAULT;
-		} else if (*len != sizeof(struct ip6t_get_entries) + get.size) {
-			duprintf("get_entries: %u != %u\n", *len,
-				 sizeof(struct ip6t_get_entries) + get.size);
-			ret = -EINVAL;
-		} else
-			ret = get_entries(&get, user);
+	case IP6T_SO_GET_ENTRIES:
+		ret = get_entries(user, len);
 		break;
-	}
 
 	case IP6T_SO_GET_REVISION_MATCH:
 	case IP6T_SO_GET_REVISION_TARGET: {
-- 
cgit v1.2.3


From 3bc3fe5eed5e866c0871db6d745f3bf58af004ef Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Mon, 17 Dec 2007 21:50:37 -0800
Subject: [NETFILTER]: ip6_tables: add compat support

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/compat.c                    | 106 ------
 net/ipv6/netfilter/ip6_tables.c | 823 +++++++++++++++++++++++++++++++++++++---
 2 files changed, 767 insertions(+), 162 deletions(-)

(limited to 'net')

diff --git a/net/compat.c b/net/compat.c
index f4ef4c04865..80013fb69a6 100644
--- a/net/compat.c
+++ b/net/compat.c
@@ -20,7 +20,6 @@
 #include <linux/syscalls.h>
 #include <linux/filter.h>
 #include <linux/compat.h>
-#include <linux/netfilter_ipv4/ip_tables.h>
 #include <linux/security.h>
 
 #include <net/scm.h>
@@ -316,107 +315,6 @@ void scm_detach_fds_compat(struct msghdr *kmsg, struct scm_cookie *scm)
 	__scm_destroy(scm);
 }
 
-/*
- * For now, we assume that the compatibility and native version
- * of struct ipt_entry are the same - sfr.  FIXME
- */
-struct compat_ipt_replace {
-	char			name[IPT_TABLE_MAXNAMELEN];
-	u32			valid_hooks;
-	u32			num_entries;
-	u32			size;
-	u32			hook_entry[NF_INET_NUMHOOKS];
-	u32			underflow[NF_INET_NUMHOOKS];
-	u32			num_counters;
-	compat_uptr_t		counters;	/* struct ipt_counters * */
-	struct ipt_entry	entries[0];
-};
-
-static int do_netfilter_replace(int fd, int level, int optname,
-				char __user *optval, int optlen)
-{
-	struct compat_ipt_replace __user *urepl;
-	struct ipt_replace __user *repl_nat;
-	char name[IPT_TABLE_MAXNAMELEN];
-	u32 origsize, tmp32, num_counters;
-	unsigned int repl_nat_size;
-	int ret;
-	int i;
-	compat_uptr_t ucntrs;
-
-	urepl = (struct compat_ipt_replace __user *)optval;
-	if (get_user(origsize, &urepl->size))
-		return -EFAULT;
-
-	/* Hack: Causes ipchains to give correct error msg --RR */
-	if (optlen != sizeof(*urepl) + origsize)
-		return -ENOPROTOOPT;
-
-	/* XXX Assumes that size of ipt_entry is the same both in
-	 *     native and compat environments.
-	 */
-	repl_nat_size = sizeof(*repl_nat) + origsize;
-	repl_nat = compat_alloc_user_space(repl_nat_size);
-
-	ret = -EFAULT;
-	if (put_user(origsize, &repl_nat->size))
-		goto out;
-
-	if (!access_ok(VERIFY_READ, urepl, optlen) ||
-	    !access_ok(VERIFY_WRITE, repl_nat, optlen))
-		goto out;
-
-	if (__copy_from_user(name, urepl->name, sizeof(urepl->name)) ||
-	    __copy_to_user(repl_nat->name, name, sizeof(repl_nat->name)))
-		goto out;
-
-	if (__get_user(tmp32, &urepl->valid_hooks) ||
-	    __put_user(tmp32, &repl_nat->valid_hooks))
-		goto out;
-
-	if (__get_user(tmp32, &urepl->num_entries) ||
-	    __put_user(tmp32, &repl_nat->num_entries))
-		goto out;
-
-	if (__get_user(num_counters, &urepl->num_counters) ||
-	    __put_user(num_counters, &repl_nat->num_counters))
-		goto out;
-
-	if (__get_user(ucntrs, &urepl->counters) ||
-	    __put_user(compat_ptr(ucntrs), &repl_nat->counters))
-		goto out;
-
-	if (__copy_in_user(&repl_nat->entries[0],
-			   &urepl->entries[0],
-			   origsize))
-		goto out;
-
-	for (i = 0; i < NF_INET_NUMHOOKS; i++) {
-		if (__get_user(tmp32, &urepl->hook_entry[i]) ||
-		    __put_user(tmp32, &repl_nat->hook_entry[i]) ||
-		    __get_user(tmp32, &urepl->underflow[i]) ||
-		    __put_user(tmp32, &repl_nat->underflow[i]))
-			goto out;
-	}
-
-	/*
-	 * Since struct ipt_counters just contains two u_int64_t members
-	 * we can just do the access_ok check here and pass the (converted)
-	 * pointer into the standard syscall.  We hope that the pointer is
-	 * not misaligned ...
-	 */
-	if (!access_ok(VERIFY_WRITE, compat_ptr(ucntrs),
-		       num_counters * sizeof(struct ipt_counters)))
-		goto out;
-
-
-	ret = sys_setsockopt(fd, level, optname,
-			     (char __user *)repl_nat, repl_nat_size);
-
-out:
-	return ret;
-}
-
 /*
  * A struct sock_filter is architecture independent.
  */
@@ -485,10 +383,6 @@ asmlinkage long compat_sys_setsockopt(int fd, int level, int optname,
 	int err;
 	struct socket *sock;
 
-	if (level == SOL_IPV6 && optname == IPT_SO_SET_REPLACE)
-		return do_netfilter_replace(fd, level, optname,
-					    optval, optlen);
-
 	if (optlen < 0)
 		return -EINVAL;
 
diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c
index 6fcc0d5bc27..db0dc96be55 100644
--- a/net/ipv6/netfilter/ip6_tables.c
+++ b/net/ipv6/netfilter/ip6_tables.c
@@ -19,9 +19,11 @@
 #include <linux/poison.h>
 #include <linux/icmpv6.h>
 #include <net/ipv6.h>
+#include <net/compat.h>
 #include <asm/uaccess.h>
 #include <linux/mutex.h>
 #include <linux/proc_fs.h>
+#include <linux/err.h>
 #include <linux/cpumask.h>
 
 #include <linux/netfilter_ipv6/ip6_tables.h>
@@ -1037,7 +1039,80 @@ copy_entries_to_user(unsigned int total_size,
 	return ret;
 }
 
-static int get_info(void __user *user, int *len)
+#ifdef CONFIG_COMPAT
+static void compat_standard_from_user(void *dst, void *src)
+{
+	int v = *(compat_int_t *)src;
+
+	if (v > 0)
+		v += xt_compat_calc_jump(AF_INET6, v);
+	memcpy(dst, &v, sizeof(v));
+}
+
+static int compat_standard_to_user(void __user *dst, void *src)
+{
+	compat_int_t cv = *(int *)src;
+
+	if (cv > 0)
+		cv -= xt_compat_calc_jump(AF_INET6, cv);
+	return copy_to_user(dst, &cv, sizeof(cv)) ? -EFAULT : 0;
+}
+
+static inline int
+compat_calc_match(struct ip6t_entry_match *m, int *size)
+{
+	*size += xt_compat_match_offset(m->u.kernel.match);
+	return 0;
+}
+
+static int compat_calc_entry(struct ip6t_entry *e,
+			     const struct xt_table_info *info,
+			     void *base, struct xt_table_info *newinfo)
+{
+	struct ip6t_entry_target *t;
+	unsigned int entry_offset;
+	int off, i, ret;
+
+	off = sizeof(struct ip6t_entry) - sizeof(struct compat_ip6t_entry);
+	entry_offset = (void *)e - base;
+	IP6T_MATCH_ITERATE(e, compat_calc_match, &off);
+	t = ip6t_get_target(e);
+	off += xt_compat_target_offset(t->u.kernel.target);
+	newinfo->size -= off;
+	ret = xt_compat_add_offset(AF_INET6, entry_offset, off);
+	if (ret)
+		return ret;
+
+	for (i = 0; i < NF_INET_NUMHOOKS; i++) {
+		if (info->hook_entry[i] &&
+		    (e < (struct ip6t_entry *)(base + info->hook_entry[i])))
+			newinfo->hook_entry[i] -= off;
+		if (info->underflow[i] &&
+		    (e < (struct ip6t_entry *)(base + info->underflow[i])))
+			newinfo->underflow[i] -= off;
+	}
+	return 0;
+}
+
+static int compat_table_info(const struct xt_table_info *info,
+			     struct xt_table_info *newinfo)
+{
+	void *loc_cpu_entry;
+
+	if (!newinfo || !info)
+		return -EINVAL;
+
+	/* we dont care about newinfo->entries[] */
+	memcpy(newinfo, info, offsetof(struct xt_table_info, entries));
+	newinfo->initial_entries = 0;
+	loc_cpu_entry = info->entries[raw_smp_processor_id()];
+	return IP6T_ENTRY_ITERATE(loc_cpu_entry, info->size,
+				  compat_calc_entry, info, loc_cpu_entry,
+				  newinfo);
+}
+#endif
+
+static int get_info(void __user *user, int *len, int compat)
 {
 	char name[IP6T_TABLE_MAXNAMELEN];
 	struct xt_table *t;
@@ -1053,13 +1128,24 @@ static int get_info(void __user *user, int *len)
 		return -EFAULT;
 
 	name[IP6T_TABLE_MAXNAMELEN-1] = '\0';
-
+#ifdef CONFIG_COMPAT
+	if (compat)
+		xt_compat_lock(AF_INET6);
+#endif
 	t = try_then_request_module(xt_find_table_lock(AF_INET6, name),
 				    "ip6table_%s", name);
 	if (t && !IS_ERR(t)) {
 		struct ip6t_getinfo info;
 		struct xt_table_info *private = t->private;
 
+#ifdef CONFIG_COMPAT
+		if (compat) {
+			struct xt_table_info tmp;
+			ret = compat_table_info(private, &tmp);
+			xt_compat_flush_offsets(AF_INET6);
+			private = &tmp;
+		}
+#endif
 		info.valid_hooks = t->valid_hooks;
 		memcpy(info.hook_entry, private->hook_entry,
 		       sizeof(info.hook_entry));
@@ -1078,6 +1164,10 @@ static int get_info(void __user *user, int *len)
 		module_put(t->me);
 	} else
 		ret = t ? PTR_ERR(t) : -ENOENT;
+#ifdef CONFIG_COMPAT
+	if (compat)
+		xt_compat_unlock(AF_INET6);
+#endif
 	return ret;
 }
 
@@ -1121,65 +1211,40 @@ get_entries(struct ip6t_get_entries __user *uptr, int *len)
 }
 
 static int
-do_replace(void __user *user, unsigned int len)
+__do_replace(const char *name, unsigned int valid_hooks,
+	     struct xt_table_info *newinfo, unsigned int num_counters,
+	     void __user *counters_ptr)
 {
 	int ret;
-	struct ip6t_replace tmp;
 	struct xt_table *t;
-	struct xt_table_info *newinfo, *oldinfo;
+	struct xt_table_info *oldinfo;
 	struct xt_counters *counters;
-	void *loc_cpu_entry, *loc_cpu_old_entry;
-
-	if (copy_from_user(&tmp, user, sizeof(tmp)) != 0)
-		return -EFAULT;
-
-	/* overflow check */
-	if (tmp.num_counters >= INT_MAX / sizeof(struct xt_counters))
-		return -ENOMEM;
-
-	newinfo = xt_alloc_table_info(tmp.size);
-	if (!newinfo)
-		return -ENOMEM;
-
-	/* choose the copy that is on our node/cpu */
-	loc_cpu_entry = newinfo->entries[raw_smp_processor_id()];
-	if (copy_from_user(loc_cpu_entry, user + sizeof(tmp),
-			   tmp.size) != 0) {
-		ret = -EFAULT;
-		goto free_newinfo;
-	}
+	void *loc_cpu_old_entry;
 
-	counters = vmalloc_node(tmp.num_counters * sizeof(struct xt_counters),
+	ret = 0;
+	counters = vmalloc_node(num_counters * sizeof(struct xt_counters),
 				numa_node_id());
 	if (!counters) {
 		ret = -ENOMEM;
-		goto free_newinfo;
+		goto out;
 	}
 
-	ret = translate_table(tmp.name, tmp.valid_hooks,
-			      newinfo, loc_cpu_entry, tmp.size, tmp.num_entries,
-			      tmp.hook_entry, tmp.underflow);
-	if (ret != 0)
-		goto free_newinfo_counters;
-
-	duprintf("ip_tables: Translated table\n");
-
-	t = try_then_request_module(xt_find_table_lock(AF_INET6, tmp.name),
-				    "ip6table_%s", tmp.name);
+	t = try_then_request_module(xt_find_table_lock(AF_INET6, name),
+				    "ip6table_%s", name);
 	if (!t || IS_ERR(t)) {
 		ret = t ? PTR_ERR(t) : -ENOENT;
 		goto free_newinfo_counters_untrans;
 	}
 
 	/* You lied! */
-	if (tmp.valid_hooks != t->valid_hooks) {
+	if (valid_hooks != t->valid_hooks) {
 		duprintf("Valid hook crap: %08X vs %08X\n",
-			 tmp.valid_hooks, t->valid_hooks);
+			 valid_hooks, t->valid_hooks);
 		ret = -EINVAL;
 		goto put_module;
 	}
 
-	oldinfo = xt_replace_table(t, tmp.num_counters, newinfo, &ret);
+	oldinfo = xt_replace_table(t, num_counters, newinfo, &ret);
 	if (!oldinfo)
 		goto put_module;
 
@@ -1197,10 +1262,11 @@ do_replace(void __user *user, unsigned int len)
 	get_counters(oldinfo, counters);
 	/* Decrease module usage counts and free resource */
 	loc_cpu_old_entry = oldinfo->entries[raw_smp_processor_id()];
-	IP6T_ENTRY_ITERATE(loc_cpu_old_entry, oldinfo->size, cleanup_entry,NULL);
+	IP6T_ENTRY_ITERATE(loc_cpu_old_entry, oldinfo->size, cleanup_entry,
+			   NULL);
 	xt_free_table_info(oldinfo);
-	if (copy_to_user(tmp.counters, counters,
-			 sizeof(struct xt_counters) * tmp.num_counters) != 0)
+	if (copy_to_user(counters_ptr, counters,
+			 sizeof(struct xt_counters) * num_counters) != 0)
 		ret = -EFAULT;
 	vfree(counters);
 	xt_table_unlock(t);
@@ -1210,9 +1276,54 @@ do_replace(void __user *user, unsigned int len)
 	module_put(t->me);
 	xt_table_unlock(t);
  free_newinfo_counters_untrans:
-	IP6T_ENTRY_ITERATE(loc_cpu_entry, newinfo->size, cleanup_entry,NULL);
- free_newinfo_counters:
 	vfree(counters);
+ out:
+	return ret;
+}
+
+static int
+do_replace(void __user *user, unsigned int len)
+{
+	int ret;
+	struct ip6t_replace tmp;
+	struct xt_table_info *newinfo;
+	void *loc_cpu_entry;
+
+	if (copy_from_user(&tmp, user, sizeof(tmp)) != 0)
+		return -EFAULT;
+
+	/* overflow check */
+	if (tmp.num_counters >= INT_MAX / sizeof(struct xt_counters))
+		return -ENOMEM;
+
+	newinfo = xt_alloc_table_info(tmp.size);
+	if (!newinfo)
+		return -ENOMEM;
+
+	/* choose the copy that is on our node/cpu */
+	loc_cpu_entry = newinfo->entries[raw_smp_processor_id()];
+	if (copy_from_user(loc_cpu_entry, user + sizeof(tmp),
+			   tmp.size) != 0) {
+		ret = -EFAULT;
+		goto free_newinfo;
+	}
+
+	ret = translate_table(tmp.name, tmp.valid_hooks,
+			      newinfo, loc_cpu_entry, tmp.size, tmp.num_entries,
+			      tmp.hook_entry, tmp.underflow);
+	if (ret != 0)
+		goto free_newinfo;
+
+	duprintf("ip_tables: Translated table\n");
+
+	ret = __do_replace(tmp.name, tmp.valid_hooks, newinfo,
+			   tmp.num_counters, tmp.counters);
+	if (ret)
+		goto free_newinfo_untrans;
+	return 0;
+
+ free_newinfo_untrans:
+	IP6T_ENTRY_ITERATE(loc_cpu_entry, newinfo->size, cleanup_entry, NULL);
  free_newinfo:
 	xt_free_table_info(newinfo);
 	return ret;
@@ -1241,31 +1352,59 @@ add_counter_to_entry(struct ip6t_entry *e,
 }
 
 static int
-do_add_counters(void __user *user, unsigned int len)
+do_add_counters(void __user *user, unsigned int len, int compat)
 {
 	unsigned int i;
-	struct xt_counters_info tmp, *paddc;
+	struct xt_counters_info tmp;
+	struct xt_counters *paddc;
+	unsigned int num_counters;
+	char *name;
+	int size;
+	void *ptmp;
 	struct xt_table_info *private;
 	struct xt_table *t;
 	int ret = 0;
 	void *loc_cpu_entry;
+#ifdef CONFIG_COMPAT
+	struct compat_xt_counters_info compat_tmp;
 
-	if (copy_from_user(&tmp, user, sizeof(tmp)) != 0)
+	if (compat) {
+		ptmp = &compat_tmp;
+		size = sizeof(struct compat_xt_counters_info);
+	} else
+#endif
+	{
+		ptmp = &tmp;
+		size = sizeof(struct xt_counters_info);
+	}
+
+	if (copy_from_user(ptmp, user, size) != 0)
 		return -EFAULT;
 
-	if (len != sizeof(tmp) + tmp.num_counters*sizeof(struct xt_counters))
+#ifdef CONFIG_COMPAT
+	if (compat) {
+		num_counters = compat_tmp.num_counters;
+		name = compat_tmp.name;
+	} else
+#endif
+	{
+		num_counters = tmp.num_counters;
+		name = tmp.name;
+	}
+
+	if (len != size + num_counters * sizeof(struct xt_counters))
 		return -EINVAL;
 
-	paddc = vmalloc_node(len, numa_node_id());
+	paddc = vmalloc_node(len - size, numa_node_id());
 	if (!paddc)
 		return -ENOMEM;
 
-	if (copy_from_user(paddc, user, len) != 0) {
+	if (copy_from_user(paddc, user + size, len - size) != 0) {
 		ret = -EFAULT;
 		goto free;
 	}
 
-	t = xt_find_table_lock(AF_INET6, tmp.name);
+	t = xt_find_table_lock(AF_INET6, name);
 	if (!t || IS_ERR(t)) {
 		ret = t ? PTR_ERR(t) : -ENOENT;
 		goto free;
@@ -1273,7 +1412,7 @@ do_add_counters(void __user *user, unsigned int len)
 
 	write_lock_bh(&t->lock);
 	private = t->private;
-	if (private->number != tmp.num_counters) {
+	if (private->number != num_counters) {
 		ret = -EINVAL;
 		goto unlock_up_free;
 	}
@@ -1284,7 +1423,7 @@ do_add_counters(void __user *user, unsigned int len)
 	IP6T_ENTRY_ITERATE(loc_cpu_entry,
 			  private->size,
 			  add_counter_to_entry,
-			  paddc->counters,
+			  paddc,
 			  &i);
  unlock_up_free:
 	write_unlock_bh(&t->lock);
@@ -1296,6 +1435,567 @@ do_add_counters(void __user *user, unsigned int len)
 	return ret;
 }
 
+#ifdef CONFIG_COMPAT
+struct compat_ip6t_replace {
+	char			name[IP6T_TABLE_MAXNAMELEN];
+	u32			valid_hooks;
+	u32			num_entries;
+	u32			size;
+	u32			hook_entry[NF_INET_NUMHOOKS];
+	u32			underflow[NF_INET_NUMHOOKS];
+	u32			num_counters;
+	compat_uptr_t		counters;	/* struct ip6t_counters * */
+	struct compat_ip6t_entry entries[0];
+};
+
+static int
+compat_copy_entry_to_user(struct ip6t_entry *e, void __user **dstptr,
+			  compat_uint_t *size, struct xt_counters *counters,
+			  unsigned int *i)
+{
+	struct ip6t_entry_target *t;
+	struct compat_ip6t_entry __user *ce;
+	u_int16_t target_offset, next_offset;
+	compat_uint_t origsize;
+	int ret;
+
+	ret = -EFAULT;
+	origsize = *size;
+	ce = (struct compat_ip6t_entry __user *)*dstptr;
+	if (copy_to_user(ce, e, sizeof(struct ip6t_entry)))
+		goto out;
+
+	if (copy_to_user(&ce->counters, &counters[*i], sizeof(counters[*i])))
+		goto out;
+
+	*dstptr += sizeof(struct compat_ip6t_entry);
+	*size -= sizeof(struct ip6t_entry) - sizeof(struct compat_ip6t_entry);
+
+	ret = IP6T_MATCH_ITERATE(e, xt_compat_match_to_user, dstptr, size);
+	target_offset = e->target_offset - (origsize - *size);
+	if (ret)
+		goto out;
+	t = ip6t_get_target(e);
+	ret = xt_compat_target_to_user(t, dstptr, size);
+	if (ret)
+		goto out;
+	ret = -EFAULT;
+	next_offset = e->next_offset - (origsize - *size);
+	if (put_user(target_offset, &ce->target_offset))
+		goto out;
+	if (put_user(next_offset, &ce->next_offset))
+		goto out;
+
+	(*i)++;
+	return 0;
+out:
+	return ret;
+}
+
+static inline int
+compat_find_calc_match(struct ip6t_entry_match *m,
+		       const char *name,
+		       const struct ip6t_ip6 *ipv6,
+		       unsigned int hookmask,
+		       int *size, int *i)
+{
+	struct xt_match *match;
+
+	match = try_then_request_module(xt_find_match(AF_INET6, m->u.user.name,
+						      m->u.user.revision),
+					"ip6t_%s", m->u.user.name);
+	if (IS_ERR(match) || !match) {
+		duprintf("compat_check_calc_match: `%s' not found\n",
+			 m->u.user.name);
+		return match ? PTR_ERR(match) : -ENOENT;
+	}
+	m->u.kernel.match = match;
+	*size += xt_compat_match_offset(match);
+
+	(*i)++;
+	return 0;
+}
+
+static inline int
+compat_release_match(struct ip6t_entry_match *m, unsigned int *i)
+{
+	if (i && (*i)-- == 0)
+		return 1;
+
+	module_put(m->u.kernel.match->me);
+	return 0;
+}
+
+static inline int
+compat_release_entry(struct compat_ip6t_entry *e, unsigned int *i)
+{
+	struct ip6t_entry_target *t;
+
+	if (i && (*i)-- == 0)
+		return 1;
+
+	/* Cleanup all matches */
+	COMPAT_IP6T_MATCH_ITERATE(e, compat_release_match, NULL);
+	t = compat_ip6t_get_target(e);
+	module_put(t->u.kernel.target->me);
+	return 0;
+}
+
+static inline int
+check_compat_entry_size_and_hooks(struct compat_ip6t_entry *e,
+				  struct xt_table_info *newinfo,
+				  unsigned int *size,
+				  unsigned char *base,
+				  unsigned char *limit,
+				  unsigned int *hook_entries,
+				  unsigned int *underflows,
+				  unsigned int *i,
+				  const char *name)
+{
+	struct ip6t_entry_target *t;
+	struct xt_target *target;
+	unsigned int entry_offset;
+	int ret, off, h, j;
+
+	duprintf("check_compat_entry_size_and_hooks %p\n", e);
+	if ((unsigned long)e % __alignof__(struct compat_ip6t_entry) != 0
+	    || (unsigned char *)e + sizeof(struct compat_ip6t_entry) >= limit) {
+		duprintf("Bad offset %p, limit = %p\n", e, limit);
+		return -EINVAL;
+	}
+
+	if (e->next_offset < sizeof(struct compat_ip6t_entry) +
+			     sizeof(struct compat_xt_entry_target)) {
+		duprintf("checking: element %p size %u\n",
+			 e, e->next_offset);
+		return -EINVAL;
+	}
+
+	/* For purposes of check_entry casting the compat entry is fine */
+	ret = check_entry((struct ip6t_entry *)e, name);
+	if (ret)
+		return ret;
+
+	off = sizeof(struct ip6t_entry) - sizeof(struct compat_ip6t_entry);
+	entry_offset = (void *)e - (void *)base;
+	j = 0;
+	ret = COMPAT_IP6T_MATCH_ITERATE(e, compat_find_calc_match, name,
+					&e->ipv6, e->comefrom, &off, &j);
+	if (ret != 0)
+		goto release_matches;
+
+	t = compat_ip6t_get_target(e);
+	target = try_then_request_module(xt_find_target(AF_INET6,
+							t->u.user.name,
+							t->u.user.revision),
+					 "ip6t_%s", t->u.user.name);
+	if (IS_ERR(target) || !target) {
+		duprintf("check_compat_entry_size_and_hooks: `%s' not found\n",
+			 t->u.user.name);
+		ret = target ? PTR_ERR(target) : -ENOENT;
+		goto release_matches;
+	}
+	t->u.kernel.target = target;
+
+	off += xt_compat_target_offset(target);
+	*size += off;
+	ret = xt_compat_add_offset(AF_INET6, entry_offset, off);
+	if (ret)
+		goto out;
+
+	/* Check hooks & underflows */
+	for (h = 0; h < NF_INET_NUMHOOKS; h++) {
+		if ((unsigned char *)e - base == hook_entries[h])
+			newinfo->hook_entry[h] = hook_entries[h];
+		if ((unsigned char *)e - base == underflows[h])
+			newinfo->underflow[h] = underflows[h];
+	}
+
+	/* Clear counters and comefrom */
+	memset(&e->counters, 0, sizeof(e->counters));
+	e->comefrom = 0;
+
+	(*i)++;
+	return 0;
+
+out:
+	module_put(t->u.kernel.target->me);
+release_matches:
+	IP6T_MATCH_ITERATE(e, compat_release_match, &j);
+	return ret;
+}
+
+static int
+compat_copy_entry_from_user(struct compat_ip6t_entry *e, void **dstptr,
+			    unsigned int *size, const char *name,
+			    struct xt_table_info *newinfo, unsigned char *base)
+{
+	struct ip6t_entry_target *t;
+	struct xt_target *target;
+	struct ip6t_entry *de;
+	unsigned int origsize;
+	int ret, h;
+
+	ret = 0;
+	origsize = *size;
+	de = (struct ip6t_entry *)*dstptr;
+	memcpy(de, e, sizeof(struct ip6t_entry));
+	memcpy(&de->counters, &e->counters, sizeof(e->counters));
+
+	*dstptr += sizeof(struct ip6t_entry);
+	*size += sizeof(struct ip6t_entry) - sizeof(struct compat_ip6t_entry);
+
+	ret = COMPAT_IP6T_MATCH_ITERATE(e, xt_compat_match_from_user,
+					dstptr, size);
+	if (ret)
+		return ret;
+	de->target_offset = e->target_offset - (origsize - *size);
+	t = compat_ip6t_get_target(e);
+	target = t->u.kernel.target;
+	xt_compat_target_from_user(t, dstptr, size);
+
+	de->next_offset = e->next_offset - (origsize - *size);
+	for (h = 0; h < NF_INET_NUMHOOKS; h++) {
+		if ((unsigned char *)de - base < newinfo->hook_entry[h])
+			newinfo->hook_entry[h] -= origsize - *size;
+		if ((unsigned char *)de - base < newinfo->underflow[h])
+			newinfo->underflow[h] -= origsize - *size;
+	}
+	return ret;
+}
+
+static inline int compat_check_entry(struct ip6t_entry *e, const char *name,
+				     unsigned int *i)
+{
+	int j, ret;
+
+	j = 0;
+	ret = IP6T_MATCH_ITERATE(e, check_match, name, &e->ipv6,
+				 e->comefrom, &j);
+	if (ret)
+		goto cleanup_matches;
+
+	ret = check_target(e, name);
+	if (ret)
+		goto cleanup_matches;
+
+	(*i)++;
+	return 0;
+
+ cleanup_matches:
+	IP6T_MATCH_ITERATE(e, cleanup_match, &j);
+	return ret;
+}
+
+static int
+translate_compat_table(const char *name,
+		       unsigned int valid_hooks,
+		       struct xt_table_info **pinfo,
+		       void **pentry0,
+		       unsigned int total_size,
+		       unsigned int number,
+		       unsigned int *hook_entries,
+		       unsigned int *underflows)
+{
+	unsigned int i, j;
+	struct xt_table_info *newinfo, *info;
+	void *pos, *entry0, *entry1;
+	unsigned int size;
+	int ret;
+
+	info = *pinfo;
+	entry0 = *pentry0;
+	size = total_size;
+	info->number = number;
+
+	/* Init all hooks to impossible value. */
+	for (i = 0; i < NF_INET_NUMHOOKS; i++) {
+		info->hook_entry[i] = 0xFFFFFFFF;
+		info->underflow[i] = 0xFFFFFFFF;
+	}
+
+	duprintf("translate_compat_table: size %u\n", info->size);
+	j = 0;
+	xt_compat_lock(AF_INET6);
+	/* Walk through entries, checking offsets. */
+	ret = COMPAT_IP6T_ENTRY_ITERATE(entry0, total_size,
+					check_compat_entry_size_and_hooks,
+					info, &size, entry0,
+					entry0 + total_size,
+					hook_entries, underflows, &j, name);
+	if (ret != 0)
+		goto out_unlock;
+
+	ret = -EINVAL;
+	if (j != number) {
+		duprintf("translate_compat_table: %u not %u entries\n",
+			 j, number);
+		goto out_unlock;
+	}
+
+	/* Check hooks all assigned */
+	for (i = 0; i < NF_INET_NUMHOOKS; i++) {
+		/* Only hooks which are valid */
+		if (!(valid_hooks & (1 << i)))
+			continue;
+		if (info->hook_entry[i] == 0xFFFFFFFF) {
+			duprintf("Invalid hook entry %u %u\n",
+				 i, hook_entries[i]);
+			goto out_unlock;
+		}
+		if (info->underflow[i] == 0xFFFFFFFF) {
+			duprintf("Invalid underflow %u %u\n",
+				 i, underflows[i]);
+			goto out_unlock;
+		}
+	}
+
+	ret = -ENOMEM;
+	newinfo = xt_alloc_table_info(size);
+	if (!newinfo)
+		goto out_unlock;
+
+	newinfo->number = number;
+	for (i = 0; i < NF_INET_NUMHOOKS; i++) {
+		newinfo->hook_entry[i] = info->hook_entry[i];
+		newinfo->underflow[i] = info->underflow[i];
+	}
+	entry1 = newinfo->entries[raw_smp_processor_id()];
+	pos = entry1;
+	size = total_size;
+	ret = COMPAT_IP6T_ENTRY_ITERATE(entry0, total_size,
+					compat_copy_entry_from_user,
+					&pos, &size, name, newinfo, entry1);
+	xt_compat_flush_offsets(AF_INET6);
+	xt_compat_unlock(AF_INET6);
+	if (ret)
+		goto free_newinfo;
+
+	ret = -ELOOP;
+	if (!mark_source_chains(newinfo, valid_hooks, entry1))
+		goto free_newinfo;
+
+	i = 0;
+	ret = IP6T_ENTRY_ITERATE(entry1, newinfo->size, compat_check_entry,
+				 name, &i);
+	if (ret) {
+		j -= i;
+		COMPAT_IP6T_ENTRY_ITERATE_CONTINUE(entry0, newinfo->size, i,
+						   compat_release_entry, &j);
+		IP6T_ENTRY_ITERATE(entry1, newinfo->size, cleanup_entry, &i);
+		xt_free_table_info(newinfo);
+		return ret;
+	}
+
+	/* And one copy for every other CPU */
+	for_each_possible_cpu(i)
+		if (newinfo->entries[i] && newinfo->entries[i] != entry1)
+			memcpy(newinfo->entries[i], entry1, newinfo->size);
+
+	*pinfo = newinfo;
+	*pentry0 = entry1;
+	xt_free_table_info(info);
+	return 0;
+
+free_newinfo:
+	xt_free_table_info(newinfo);
+out:
+	COMPAT_IP6T_ENTRY_ITERATE(entry0, total_size, compat_release_entry, &j);
+	return ret;
+out_unlock:
+	xt_compat_flush_offsets(AF_INET6);
+	xt_compat_unlock(AF_INET6);
+	goto out;
+}
+
+static int
+compat_do_replace(void __user *user, unsigned int len)
+{
+	int ret;
+	struct compat_ip6t_replace tmp;
+	struct xt_table_info *newinfo;
+	void *loc_cpu_entry;
+
+	if (copy_from_user(&tmp, user, sizeof(tmp)) != 0)
+		return -EFAULT;
+
+	/* overflow check */
+	if (tmp.size >= INT_MAX / num_possible_cpus())
+		return -ENOMEM;
+	if (tmp.num_counters >= INT_MAX / sizeof(struct xt_counters))
+		return -ENOMEM;
+
+	newinfo = xt_alloc_table_info(tmp.size);
+	if (!newinfo)
+		return -ENOMEM;
+
+	/* choose the copy that is our node/cpu */
+	loc_cpu_entry = newinfo->entries[raw_smp_processor_id()];
+	if (copy_from_user(loc_cpu_entry, user + sizeof(tmp),
+			   tmp.size) != 0) {
+		ret = -EFAULT;
+		goto free_newinfo;
+	}
+
+	ret = translate_compat_table(tmp.name, tmp.valid_hooks,
+				     &newinfo, &loc_cpu_entry, tmp.size,
+				     tmp.num_entries, tmp.hook_entry,
+				     tmp.underflow);
+	if (ret != 0)
+		goto free_newinfo;
+
+	duprintf("compat_do_replace: Translated table\n");
+
+	ret = __do_replace(tmp.name, tmp.valid_hooks, newinfo,
+			   tmp.num_counters, compat_ptr(tmp.counters));
+	if (ret)
+		goto free_newinfo_untrans;
+	return 0;
+
+ free_newinfo_untrans:
+	IP6T_ENTRY_ITERATE(loc_cpu_entry, newinfo->size, cleanup_entry, NULL);
+ free_newinfo:
+	xt_free_table_info(newinfo);
+	return ret;
+}
+
+static int
+compat_do_ip6t_set_ctl(struct sock *sk, int cmd, void __user *user,
+		       unsigned int len)
+{
+	int ret;
+
+	if (!capable(CAP_NET_ADMIN))
+		return -EPERM;
+
+	switch (cmd) {
+	case IP6T_SO_SET_REPLACE:
+		ret = compat_do_replace(user, len);
+		break;
+
+	case IP6T_SO_SET_ADD_COUNTERS:
+		ret = do_add_counters(user, len, 1);
+		break;
+
+	default:
+		duprintf("do_ip6t_set_ctl:  unknown request %i\n", cmd);
+		ret = -EINVAL;
+	}
+
+	return ret;
+}
+
+struct compat_ip6t_get_entries {
+	char name[IP6T_TABLE_MAXNAMELEN];
+	compat_uint_t size;
+	struct compat_ip6t_entry entrytable[0];
+};
+
+static int
+compat_copy_entries_to_user(unsigned int total_size, struct xt_table *table,
+			    void __user *userptr)
+{
+	struct xt_counters *counters;
+	struct xt_table_info *private = table->private;
+	void __user *pos;
+	unsigned int size;
+	int ret = 0;
+	void *loc_cpu_entry;
+	unsigned int i = 0;
+
+	counters = alloc_counters(table);
+	if (IS_ERR(counters))
+		return PTR_ERR(counters);
+
+	/* choose the copy that is on our node/cpu, ...
+	 * This choice is lazy (because current thread is
+	 * allowed to migrate to another cpu)
+	 */
+	loc_cpu_entry = private->entries[raw_smp_processor_id()];
+	pos = userptr;
+	size = total_size;
+	ret = IP6T_ENTRY_ITERATE(loc_cpu_entry, total_size,
+				 compat_copy_entry_to_user,
+				 &pos, &size, counters, &i);
+
+	vfree(counters);
+	return ret;
+}
+
+static int
+compat_get_entries(struct compat_ip6t_get_entries __user *uptr, int *len)
+{
+	int ret;
+	struct compat_ip6t_get_entries get;
+	struct xt_table *t;
+
+	if (*len < sizeof(get)) {
+		duprintf("compat_get_entries: %u < %u\n",
+			 *len, (unsigned int)sizeof(get));
+		return -EINVAL;
+	}
+
+	if (copy_from_user(&get, uptr, sizeof(get)) != 0)
+		return -EFAULT;
+
+	if (*len != sizeof(struct compat_ip6t_get_entries) + get.size) {
+		duprintf("compat_get_entries: %u != %u\n", *len,
+			 (unsigned int)(sizeof(struct compat_ip6t_get_entries) +
+			 get.size));
+		return -EINVAL;
+	}
+
+	xt_compat_lock(AF_INET6);
+	t = xt_find_table_lock(AF_INET6, get.name);
+	if (t && !IS_ERR(t)) {
+		struct xt_table_info *private = t->private;
+		struct xt_table_info info;
+		duprintf("t->private->number = %u\n",
+			 private->number);
+		ret = compat_table_info(private, &info);
+		if (!ret && get.size == info.size) {
+			ret = compat_copy_entries_to_user(private->size,
+							  t, uptr->entrytable);
+		} else if (!ret) {
+			duprintf("compat_get_entries: I've got %u not %u!\n",
+				 private->size,
+				 get.size);
+			ret = -EINVAL;
+		}
+		xt_compat_flush_offsets(AF_INET6);
+		module_put(t->me);
+		xt_table_unlock(t);
+	} else
+		ret = t ? PTR_ERR(t) : -ENOENT;
+
+	xt_compat_unlock(AF_INET6);
+	return ret;
+}
+
+static int do_ip6t_get_ctl(struct sock *, int, void __user *, int *);
+
+static int
+compat_do_ip6t_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
+{
+	int ret;
+
+	if (!capable(CAP_NET_ADMIN))
+		return -EPERM;
+
+	switch (cmd) {
+	case IP6T_SO_GET_INFO:
+		ret = get_info(user, len, 1);
+		break;
+	case IP6T_SO_GET_ENTRIES:
+		ret = compat_get_entries(user, len);
+		break;
+	default:
+		ret = do_ip6t_get_ctl(sk, cmd, user, len);
+	}
+	return ret;
+}
+#endif
+
 static int
 do_ip6t_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)
 {
@@ -1310,7 +2010,7 @@ do_ip6t_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)
 		break;
 
 	case IP6T_SO_SET_ADD_COUNTERS:
-		ret = do_add_counters(user, len);
+		ret = do_add_counters(user, len, 0);
 		break;
 
 	default:
@@ -1331,7 +2031,7 @@ do_ip6t_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
 
 	switch (cmd) {
 	case IP6T_SO_GET_INFO:
-		ret = get_info(user, len);
+		ret = get_info(user, len, 0);
 		break;
 
 	case IP6T_SO_GET_ENTRIES:
@@ -1483,6 +2183,11 @@ static struct xt_target ip6t_standard_target __read_mostly = {
 	.name		= IP6T_STANDARD_TARGET,
 	.targetsize	= sizeof(int),
 	.family		= AF_INET6,
+#ifdef CONFIG_COMPAT
+	.compatsize	= sizeof(compat_int_t),
+	.compat_from_user = compat_standard_from_user,
+	.compat_to_user	= compat_standard_to_user,
+#endif
 };
 
 static struct xt_target ip6t_error_target __read_mostly = {
@@ -1497,9 +2202,15 @@ static struct nf_sockopt_ops ip6t_sockopts = {
 	.set_optmin	= IP6T_BASE_CTL,
 	.set_optmax	= IP6T_SO_SET_MAX+1,
 	.set		= do_ip6t_set_ctl,
+#ifdef CONFIG_COMPAT
+	.compat_set	= compat_do_ip6t_set_ctl,
+#endif
 	.get_optmin	= IP6T_BASE_CTL,
 	.get_optmax	= IP6T_SO_GET_MAX+1,
 	.get		= do_ip6t_get_ctl,
+#ifdef CONFIG_COMPAT
+	.compat_get	= compat_do_ip6t_get_ctl,
+#endif
 	.owner		= THIS_MODULE,
 };
 
-- 
cgit v1.2.3


From 34f4c4295eb2580ca67677dea14a2839aedc403f Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Mon, 17 Dec 2007 21:50:53 -0800
Subject: [NETFILTER]: x_tables: enable compat translation for IPv6
 matches/targets

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/netfilter/xt_CONNMARK.c | 5 +++++
 net/netfilter/xt_connmark.c | 5 +++++
 net/netfilter/xt_limit.c    | 5 +++++
 net/netfilter/xt_mark.c     | 5 +++++
 4 files changed, 20 insertions(+)

(limited to 'net')

diff --git a/net/netfilter/xt_CONNMARK.c b/net/netfilter/xt_CONNMARK.c
index 0250bbea4c8..d96ee3e0ba2 100644
--- a/net/netfilter/xt_CONNMARK.c
+++ b/net/netfilter/xt_CONNMARK.c
@@ -160,6 +160,11 @@ static struct xt_target connmark_tg_reg[] __read_mostly = {
 		.destroy	= connmark_tg_destroy,
 		.target		= connmark_tg,
 		.targetsize	= sizeof(struct xt_connmark_target_info),
+#ifdef CONFIG_COMPAT
+		.compatsize	= sizeof(struct compat_xt_connmark_target_info),
+		.compat_from_user = connmark_tg_compat_from_user,
+		.compat_to_user	= connmark_tg_compat_to_user,
+#endif
 		.me		= THIS_MODULE
 	},
 };
diff --git a/net/netfilter/xt_connmark.c b/net/netfilter/xt_connmark.c
index 7e0874af318..b5c0f2fe699 100644
--- a/net/netfilter/xt_connmark.c
+++ b/net/netfilter/xt_connmark.c
@@ -126,6 +126,11 @@ static struct xt_match connmark_mt_reg[] __read_mostly = {
 		.match		= connmark_mt,
 		.destroy	= connmark_mt_destroy,
 		.matchsize	= sizeof(struct xt_connmark_info),
+#ifdef CONFIG_COMPAT
+		.compatsize	= sizeof(struct compat_xt_connmark_info),
+		.compat_from_user = connmark_mt_compat_from_user,
+		.compat_to_user	= connmark_mt_compat_to_user,
+#endif
 		.me		= THIS_MODULE
 	},
 };
diff --git a/net/netfilter/xt_limit.c b/net/netfilter/xt_limit.c
index c9352dbf3a1..2ef0dbf6392 100644
--- a/net/netfilter/xt_limit.c
+++ b/net/netfilter/xt_limit.c
@@ -187,6 +187,11 @@ static struct xt_match limit_mt_reg[] __read_mostly = {
 		.checkentry	= limit_mt_check,
 		.match		= limit_mt,
 		.matchsize	= sizeof(struct xt_rateinfo),
+#ifdef CONFIG_COMPAT
+		.compatsize	= sizeof(struct compat_xt_rateinfo),
+		.compat_from_user = limit_mt_compat_from_user,
+		.compat_to_user	= limit_mt_compat_to_user,
+#endif
 		.me		= THIS_MODULE,
 	},
 };
diff --git a/net/netfilter/xt_mark.c b/net/netfilter/xt_mark.c
index 650cdea97e7..ce8735e9762 100644
--- a/net/netfilter/xt_mark.c
+++ b/net/netfilter/xt_mark.c
@@ -94,6 +94,11 @@ static struct xt_match mark_mt_reg[] __read_mostly = {
 		.checkentry	= mark_mt_check,
 		.match		= mark_mt,
 		.matchsize	= sizeof(struct xt_mark_info),
+#ifdef CONFIG_COMPAT
+		.compatsize	= sizeof(struct compat_xt_mark_info),
+		.compat_from_user = mark_mt_compat_from_user,
+		.compat_to_user	= mark_mt_compat_to_user,
+#endif
 		.me		= THIS_MODULE,
 	},
 };
-- 
cgit v1.2.3


From 311af5cbeab8ce2b9252ce65272e427eeb8093b7 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Mon, 17 Dec 2007 21:51:14 -0800
Subject: [NETFILTER]: xt_MARK: support revision 1 for IPv6

The current netfilter SVN version includes support for this, so enable
it in the kernel as well.

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/netfilter/xt_MARK.c | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

(limited to 'net')

diff --git a/net/netfilter/xt_MARK.c b/net/netfilter/xt_MARK.c
index de32aa5b57f..5bf912120f4 100644
--- a/net/netfilter/xt_MARK.c
+++ b/net/netfilter/xt_MARK.c
@@ -158,6 +158,21 @@ static struct xt_target mark_tg_reg[] __read_mostly = {
 		.table		= "mangle",
 		.me		= THIS_MODULE,
 	},
+	{
+		.name		= "MARK",
+		.family		= AF_INET6,
+		.revision	= 1,
+		.checkentry	= mark_tg_check,
+		.target		= mark_tg,
+		.targetsize	= sizeof(struct xt_mark_target_info_v1),
+#ifdef CONFIG_COMPAT
+		.compatsize	= sizeof(struct compat_xt_mark_target_info_v1),
+		.compat_from_user = mark_tg_compat_from_user,
+		.compat_to_user	= mark_tg_compat_to_user,
+#endif
+		.table		= "mangle",
+		.me		= THIS_MODULE,
+	},
 };
 
 static int __init mark_tg_init(void)
-- 
cgit v1.2.3


From 1fe5723773656a8ee7a981caf7fee9adb0ac6259 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Mon, 17 Dec 2007 21:51:33 -0800
Subject: [NETFILTER]: xt_MARK: add compat support for revision 0

Old userspace doesn't support revision 1, especially for IPv6, which
is only available in the SVN snapshot.

Add compat support for revision 0.

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/netfilter/xt_MARK.c | 44 ++++++++++++++++++++++++++++++++++++++------
 1 file changed, 38 insertions(+), 6 deletions(-)

(limited to 'net')

diff --git a/net/netfilter/xt_MARK.c b/net/netfilter/xt_MARK.c
index 5bf912120f4..57c6d55e33d 100644
--- a/net/netfilter/xt_MARK.c
+++ b/net/netfilter/xt_MARK.c
@@ -94,6 +94,28 @@ mark_tg_check(const char *tablename, const void *entry,
 }
 
 #ifdef CONFIG_COMPAT
+struct compat_xt_mark_target_info {
+	compat_ulong_t	mark;
+};
+
+static void mark_tg_compat_from_user(void *dst, void *src)
+{
+	const struct compat_xt_mark_target_info *cm = src;
+	struct xt_mark_target_info m = {
+		.mark	= cm->mark,
+	};
+	memcpy(dst, &m, sizeof(m));
+}
+
+static int mark_tg_compat_to_user(void __user *dst, void *src)
+{
+	const struct xt_mark_target_info *m = src;
+	struct compat_xt_mark_target_info cm = {
+		.mark	= m->mark,
+	};
+	return copy_to_user(dst, &cm, sizeof(cm)) ? -EFAULT : 0;
+}
+
 struct compat_xt_mark_target_info_v1 {
 	compat_ulong_t	mark;
 	u_int8_t	mode;
@@ -101,7 +123,7 @@ struct compat_xt_mark_target_info_v1 {
 	u_int16_t	__pad2;
 };
 
-static void mark_tg_compat_from_user(void *dst, void *src)
+static void mark_tg_compat_from_user_v1(void *dst, void *src)
 {
 	const struct compat_xt_mark_target_info_v1 *cm = src;
 	struct xt_mark_target_info_v1 m = {
@@ -111,7 +133,7 @@ static void mark_tg_compat_from_user(void *dst, void *src)
 	memcpy(dst, &m, sizeof(m));
 }
 
-static int mark_tg_compat_to_user(void __user *dst, void *src)
+static int mark_tg_compat_to_user_v1(void __user *dst, void *src)
 {
 	const struct xt_mark_target_info_v1 *m = src;
 	struct compat_xt_mark_target_info_v1 cm = {
@@ -130,6 +152,11 @@ static struct xt_target mark_tg_reg[] __read_mostly = {
 		.checkentry	= mark_tg_check_v0,
 		.target		= mark_tg_v0,
 		.targetsize	= sizeof(struct xt_mark_target_info),
+#ifdef CONFIG_COMPAT
+		.compatsize	= sizeof(struct compat_xt_mark_target_info),
+		.compat_from_user = mark_tg_compat_from_user,
+		.compat_to_user	= mark_tg_compat_to_user,
+#endif
 		.table		= "mangle",
 		.me		= THIS_MODULE,
 	},
@@ -142,8 +169,8 @@ static struct xt_target mark_tg_reg[] __read_mostly = {
 		.targetsize	= sizeof(struct xt_mark_target_info_v1),
 #ifdef CONFIG_COMPAT
 		.compatsize	= sizeof(struct compat_xt_mark_target_info_v1),
-		.compat_from_user = mark_tg_compat_from_user,
-		.compat_to_user	= mark_tg_compat_to_user,
+		.compat_from_user = mark_tg_compat_from_user_v1,
+		.compat_to_user	= mark_tg_compat_to_user_v1,
 #endif
 		.table		= "mangle",
 		.me		= THIS_MODULE,
@@ -155,6 +182,11 @@ static struct xt_target mark_tg_reg[] __read_mostly = {
 		.checkentry	= mark_tg_check_v0,
 		.target		= mark_tg_v0,
 		.targetsize	= sizeof(struct xt_mark_target_info),
+#ifdef CONFIG_COMPAT
+		.compatsize	= sizeof(struct compat_xt_mark_target_info),
+		.compat_from_user = mark_tg_compat_from_user,
+		.compat_to_user	= mark_tg_compat_to_user,
+#endif
 		.table		= "mangle",
 		.me		= THIS_MODULE,
 	},
@@ -167,8 +199,8 @@ static struct xt_target mark_tg_reg[] __read_mostly = {
 		.targetsize	= sizeof(struct xt_mark_target_info_v1),
 #ifdef CONFIG_COMPAT
 		.compatsize	= sizeof(struct compat_xt_mark_target_info_v1),
-		.compat_from_user = mark_tg_compat_from_user,
-		.compat_to_user	= mark_tg_compat_to_user,
+		.compat_from_user = mark_tg_compat_from_user_v1,
+		.compat_to_user	= mark_tg_compat_to_user_v1,
 #endif
 		.table		= "mangle",
 		.me		= THIS_MODULE,
-- 
cgit v1.2.3


From 9c54795950d198e77144a18c94e7ed52ea0f3c77 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Mon, 17 Dec 2007 21:52:00 -0800
Subject: [NETFILTER]: {ip,ip6}_tables: reformat to eliminate differences

Reformat ip_tables.c and ip6_tables.c in order to eliminate non-functional
differences and minimize diff output.

This allows to get a view of the real differences using:

sed -e 's/IP6T/IPT/g' \
    -e 's/IP6/IP/g' \
    -e 's/INET6/INET/g' \
    -e 's/ip6t/ipt/g' \
    -e 's/ip6/ip/g' \
    -e 's/ipv6/ip/g' \
    -e 's/icmp6/icmp/g' \
    net/ipv6/netfilter/ip6_tables.c | \
    diff -wup /dev/stdin net/ipv4/netfilter/ip_tables.c

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/netfilter/ip_tables.c  | 66 +++++++++++++++++++----------------------
 net/ipv6/netfilter/ip6_tables.c | 46 ++++++++++++++--------------
 2 files changed, 53 insertions(+), 59 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c
index 07be12cc3fe..231f5d29075 100644
--- a/net/ipv4/netfilter/ip_tables.c
+++ b/net/ipv4/netfilter/ip_tables.c
@@ -74,7 +74,7 @@ do {								\
    Hence the start of any table is given by get_table() below.  */
 
 /* Returns whether matches rule or not. */
-static inline int
+static inline bool
 ip_packet_match(const struct iphdr *ip,
 		const char *indev,
 		const char *outdev,
@@ -102,7 +102,7 @@ ip_packet_match(const struct iphdr *ip,
 			NIPQUAD(ipinfo->dmsk.s_addr),
 			NIPQUAD(ipinfo->dst.s_addr),
 			ipinfo->invflags & IPT_INV_DSTIP ? " (INV)" : "");
-		return 0;
+		return false;
 	}
 
 	/* Look for ifname matches; this should unroll nicely. */
@@ -116,7 +116,7 @@ ip_packet_match(const struct iphdr *ip,
 		dprintf("VIA in mismatch (%s vs %s).%s\n",
 			indev, ipinfo->iniface,
 			ipinfo->invflags&IPT_INV_VIA_IN ?" (INV)":"");
-		return 0;
+		return false;
 	}
 
 	for (i = 0, ret = 0; i < IFNAMSIZ/sizeof(unsigned long); i++) {
@@ -129,7 +129,7 @@ ip_packet_match(const struct iphdr *ip,
 		dprintf("VIA out mismatch (%s vs %s).%s\n",
 			outdev, ipinfo->outiface,
 			ipinfo->invflags&IPT_INV_VIA_OUT ?" (INV)":"");
-		return 0;
+		return false;
 	}
 
 	/* Check specific protocol */
@@ -138,7 +138,7 @@ ip_packet_match(const struct iphdr *ip,
 		dprintf("Packet protocol %hi does not match %hi.%s\n",
 			ip->protocol, ipinfo->proto,
 			ipinfo->invflags&IPT_INV_PROTO ? " (INV)":"");
-		return 0;
+		return false;
 	}
 
 	/* If we have a fragment rule but the packet is not a fragment
@@ -146,10 +146,10 @@ ip_packet_match(const struct iphdr *ip,
 	if (FWINV((ipinfo->flags&IPT_F_FRAG) && !isfrag, IPT_INV_FRAG)) {
 		dprintf("Fragment rule but not fragment.%s\n",
 			ipinfo->invflags & IPT_INV_FRAG ? " (INV)" : "");
-		return 0;
+		return false;
 	}
 
-	return 1;
+	return true;
 }
 
 static inline bool
@@ -222,7 +222,7 @@ unconditional(const struct ipt_ip *ip)
 static const char *hooknames[] = {
 	[NF_INET_PRE_ROUTING]		= "PREROUTING",
 	[NF_INET_LOCAL_IN]		= "INPUT",
-	[NF_INET_FORWARD]			= "FORWARD",
+	[NF_INET_FORWARD]		= "FORWARD",
 	[NF_INET_LOCAL_OUT]		= "OUTPUT",
 	[NF_INET_POST_ROUTING]		= "POSTROUTING",
 };
@@ -467,8 +467,7 @@ mark_source_chains(struct xt_table_info *newinfo,
 	   to 0 as we leave), and comefrom to save source hook bitmask */
 	for (hook = 0; hook < NF_INET_NUMHOOKS; hook++) {
 		unsigned int pos = newinfo->hook_entry[hook];
-		struct ipt_entry *e
-			= (struct ipt_entry *)(entry0 + pos);
+		struct ipt_entry *e = (struct ipt_entry *)(entry0 + pos);
 
 		if (!(valid_hooks & (1 << hook)))
 			continue;
@@ -486,8 +485,7 @@ mark_source_chains(struct xt_table_info *newinfo,
 				       hook, pos, e->comefrom);
 				return 0;
 			}
-			e->comefrom
-				|= ((1 << hook) | (1 << NF_INET_NUMHOOKS));
+			e->comefrom |= ((1 << hook) | (1 << NF_INET_NUMHOOKS));
 
 			/* Unconditional return/END. */
 			if ((e->target_offset == sizeof(struct ipt_entry)
@@ -589,7 +587,8 @@ check_entry(struct ipt_entry *e, const char *name)
 		return -EINVAL;
 	}
 
-	if (e->target_offset + sizeof(struct ipt_entry_target) > e->next_offset)
+	if (e->target_offset + sizeof(struct ipt_entry_target) >
+	    e->next_offset)
 		return -EINVAL;
 
 	t = ipt_get_target(e);
@@ -633,7 +632,7 @@ find_check_match(struct ipt_entry_match *m,
 	int ret;
 
 	match = try_then_request_module(xt_find_match(AF_INET, m->u.user.name,
-						   m->u.user.revision),
+						      m->u.user.revision),
 					"ipt_%s", m->u.user.name);
 	if (IS_ERR(match) || !match) {
 		duprintf("find_check_match: `%s' not found\n", m->u.user.name);
@@ -959,7 +958,6 @@ copy_entries_to_user(unsigned int total_size,
 	 * allowed to migrate to another cpu)
 	 */
 	loc_cpu_entry = private->entries[raw_smp_processor_id()];
-	/* ... then copy entire thing ... */
 	if (copy_to_user(userptr, loc_cpu_entry, total_size) != 0) {
 		ret = -EFAULT;
 		goto free_counters;
@@ -1169,15 +1167,13 @@ get_entries(struct ipt_get_entries __user *uptr, int *len)
 	t = xt_find_table_lock(AF_INET, get.name);
 	if (t && !IS_ERR(t)) {
 		struct xt_table_info *private = t->private;
-		duprintf("t->private->number = %u\n",
-			 private->number);
+		duprintf("t->private->number = %u\n", private->number);
 		if (get.size == private->size)
 			ret = copy_entries_to_user(private->size,
 						   t, uptr->entrytable);
 		else {
 			duprintf("get_entries: I've got %u not %u!\n",
-				 private->size,
-				 get.size);
+				 private->size, get.size);
 			ret = -EINVAL;
 		}
 		module_put(t->me);
@@ -1281,7 +1277,7 @@ do_replace(void __user *user, unsigned int len)
 	if (!newinfo)
 		return -ENOMEM;
 
-	/* choose the copy that is our node/cpu */
+	/* choose the copy that is on our node/cpu */
 	loc_cpu_entry = newinfo->entries[raw_smp_processor_id()];
 	if (copy_from_user(loc_cpu_entry, user + sizeof(tmp),
 			   tmp.size) != 0) {
@@ -1304,7 +1300,7 @@ do_replace(void __user *user, unsigned int len)
 	return 0;
 
  free_newinfo_untrans:
-	IPT_ENTRY_ITERATE(loc_cpu_entry, newinfo->size, cleanup_entry,NULL);
+	IPT_ENTRY_ITERATE(loc_cpu_entry, newinfo->size, cleanup_entry, NULL);
  free_newinfo:
 	xt_free_table_info(newinfo);
 	return ret;
@@ -1651,7 +1647,8 @@ static inline int compat_check_entry(struct ipt_entry *e, const char *name,
 	int j, ret;
 
 	j = 0;
-	ret = IPT_MATCH_ITERATE(e, check_match, name, &e->ip, e->comefrom, &j);
+	ret = IPT_MATCH_ITERATE(e, check_match, name, &e->ip,
+				e->comefrom, &j);
 	if (ret)
 		goto cleanup_matches;
 
@@ -1744,8 +1741,8 @@ translate_compat_table(const char *name,
 	pos = entry1;
 	size = total_size;
 	ret = COMPAT_IPT_ENTRY_ITERATE(entry0, total_size,
-				       compat_copy_entry_from_user, &pos, &size,
-				       name, newinfo, entry1);
+				       compat_copy_entry_from_user,
+				       &pos, &size, name, newinfo, entry1);
 	xt_compat_flush_offsets(AF_INET);
 	xt_compat_unlock(AF_INET);
 	if (ret)
@@ -1813,7 +1810,7 @@ compat_do_replace(void __user *user, unsigned int len)
 	if (!newinfo)
 		return -ENOMEM;
 
-	/* choose the copy that is our node/cpu */
+	/* choose the copy that is on our node/cpu */
 	loc_cpu_entry = newinfo->entries[raw_smp_processor_id()];
 	if (copy_from_user(loc_cpu_entry, user + sizeof(tmp),
 			   tmp.size) != 0) {
@@ -1934,16 +1931,14 @@ compat_get_entries(struct compat_ipt_get_entries __user *uptr, int *len)
 	if (t && !IS_ERR(t)) {
 		struct xt_table_info *private = t->private;
 		struct xt_table_info info;
-		duprintf("t->private->number = %u\n",
-			 private->number);
+		duprintf("t->private->number = %u\n", private->number);
 		ret = compat_table_info(private, &info);
 		if (!ret && get.size == info.size) {
 			ret = compat_copy_entries_to_user(private->size,
 							  t, uptr->entrytable);
 		} else if (!ret) {
 			duprintf("compat_get_entries: I've got %u not %u!\n",
-				 private->size,
-				 get.size);
+				 private->size, get.size);
 			ret = -EINVAL;
 		}
 		xt_compat_flush_offsets(AF_INET);
@@ -1981,7 +1976,7 @@ compat_do_ipt_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
 #endif
 
 static int
-do_ipt_set_ctl(struct sock *sk,	int cmd, void __user *user, unsigned int len)
+do_ipt_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)
 {
 	int ret;
 
@@ -2068,9 +2063,7 @@ int ipt_register_table(struct xt_table *table, const struct ipt_replace *repl)
 	if (!newinfo)
 		return -ENOMEM;
 
-	/* choose the copy on our node/cpu
-	 * but dont care of preemption
-	 */
+	/* choose the copy on our node/cpu, but dont care about preemption */
 	loc_cpu_entry = newinfo->entries[raw_smp_processor_id()];
 	memcpy(loc_cpu_entry, repl->entries, repl->size);
 
@@ -2112,7 +2105,8 @@ icmp_type_code_match(u_int8_t test_type, u_int8_t min_code, u_int8_t max_code,
 		     u_int8_t type, u_int8_t code,
 		     bool invert)
 {
-	return ((test_type == 0xFF) || (type == test_type && code >= min_code && code <= max_code))
+	return ((test_type == 0xFF) ||
+		(type == test_type && code >= min_code && code <= max_code))
 		^ invert;
 }
 
@@ -2153,7 +2147,7 @@ icmp_match(const struct sk_buff *skb,
 /* Called when user tries to insert an entry of this type. */
 static bool
 icmp_checkentry(const char *tablename,
-	   const void *info,
+	   const void *entry,
 	   const struct xt_match *match,
 	   void *matchinfo,
 	   unsigned int hook_mask)
@@ -2204,9 +2198,9 @@ static struct xt_match icmp_matchstruct __read_mostly = {
 	.name		= "icmp",
 	.match		= icmp_match,
 	.matchsize	= sizeof(struct ipt_icmp),
+	.checkentry	= icmp_checkentry,
 	.proto		= IPPROTO_ICMP,
 	.family		= AF_INET,
-	.checkentry	= icmp_checkentry,
 };
 
 static int __init ip_tables_init(void)
diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c
index db0dc96be55..0a2ec4b346f 100644
--- a/net/ipv6/netfilter/ip6_tables.c
+++ b/net/ipv6/netfilter/ip6_tables.c
@@ -371,8 +371,8 @@ ip6t_do_table(struct sk_buff *skb,
 	 * match it. */
 
 	read_lock_bh(&table->lock);
-	private = table->private;
 	IP_NF_ASSERT(table->valid_hooks & (1 << hook));
+	private = table->private;
 	table_base = (void *)private->entries[smp_processor_id()];
 	e = get_entry(table_base, private->hook_entry[hook]);
 
@@ -496,9 +496,7 @@ mark_source_chains(struct xt_table_info *newinfo,
 	   to 0 as we leave), and comefrom to save source hook bitmask */
 	for (hook = 0; hook < NF_INET_NUMHOOKS; hook++) {
 		unsigned int pos = newinfo->hook_entry[hook];
-		struct ip6t_entry *e
-			= (struct ip6t_entry *)(entry0 + pos);
-		int visited = e->comefrom & (1 << hook);
+		struct ip6t_entry *e = (struct ip6t_entry *)(entry0 + pos);
 
 		if (!(valid_hooks & (1 << hook)))
 			continue;
@@ -509,14 +507,14 @@ mark_source_chains(struct xt_table_info *newinfo,
 		for (;;) {
 			struct ip6t_standard_target *t
 				= (void *)ip6t_get_target(e);
+			int visited = e->comefrom & (1 << hook);
 
 			if (e->comefrom & (1 << NF_INET_NUMHOOKS)) {
 				printk("iptables: loop hook %u pos %u %08X.\n",
 				       hook, pos, e->comefrom);
 				return 0;
 			}
-			e->comefrom
-				|= ((1 << hook) | (1 << NF_INET_NUMHOOKS));
+			e->comefrom |= ((1 << hook) | (1 << NF_INET_NUMHOOKS));
 
 			/* Unconditional return/END. */
 			if ((e->target_offset == sizeof(struct ip6t_entry)
@@ -663,7 +661,7 @@ find_check_match(struct ip6t_entry_match *m,
 	int ret;
 
 	match = try_then_request_module(xt_find_match(AF_INET6, m->u.user.name,
-					m->u.user.revision),
+						      m->u.user.revision),
 					"ip6t_%s", m->u.user.name);
 	if (IS_ERR(match) || !match) {
 		duprintf("find_check_match: `%s' not found\n", m->u.user.name);
@@ -885,7 +883,7 @@ translate_table(const char *name,
 			memcpy(newinfo->entries[i], entry0, newinfo->size);
 	}
 
-	return 0;
+	return ret;
 }
 
 /* Gets counters. */
@@ -984,7 +982,10 @@ copy_entries_to_user(unsigned int total_size,
 	if (IS_ERR(counters))
 		return PTR_ERR(counters);
 
-	/* choose the copy that is on ourc node/cpu */
+	/* choose the copy that is on our node/cpu, ...
+	 * This choice is lazy (because current thread is
+	 * allowed to migrate to another cpu)
+	 */
 	loc_cpu_entry = private->entries[raw_smp_processor_id()];
 	if (copy_to_user(userptr, loc_cpu_entry, total_size) != 0) {
 		ret = -EFAULT;
@@ -1199,7 +1200,7 @@ get_entries(struct ip6t_get_entries __user *uptr, int *len)
 						   t, uptr->entrytable);
 		else {
 			duprintf("get_entries: I've got %u not %u!\n",
-				 private->size, entries->size);
+				 private->size, get.size);
 			ret = -EINVAL;
 		}
 		module_put(t->me);
@@ -1361,8 +1362,8 @@ do_add_counters(void __user *user, unsigned int len, int compat)
 	char *name;
 	int size;
 	void *ptmp;
-	struct xt_table_info *private;
 	struct xt_table *t;
+	struct xt_table_info *private;
 	int ret = 0;
 	void *loc_cpu_entry;
 #ifdef CONFIG_COMPAT
@@ -1829,7 +1830,7 @@ compat_do_replace(void __user *user, unsigned int len)
 	if (!newinfo)
 		return -ENOMEM;
 
-	/* choose the copy that is our node/cpu */
+	/* choose the copy that is on our node/cpu */
 	loc_cpu_entry = newinfo->entries[raw_smp_processor_id()];
 	if (copy_from_user(loc_cpu_entry, user + sizeof(tmp),
 			   tmp.size) != 0) {
@@ -1950,16 +1951,14 @@ compat_get_entries(struct compat_ip6t_get_entries __user *uptr, int *len)
 	if (t && !IS_ERR(t)) {
 		struct xt_table_info *private = t->private;
 		struct xt_table_info info;
-		duprintf("t->private->number = %u\n",
-			 private->number);
+		duprintf("t->private->number = %u\n", private->number);
 		ret = compat_table_info(private, &info);
 		if (!ret && get.size == info.size) {
 			ret = compat_copy_entries_to_user(private->size,
 							  t, uptr->entrytable);
 		} else if (!ret) {
 			duprintf("compat_get_entries: I've got %u not %u!\n",
-				 private->size,
-				 get.size);
+				 private->size, get.size);
 			ret = -EINVAL;
 		}
 		xt_compat_flush_offsets(AF_INET6);
@@ -2072,8 +2071,7 @@ do_ip6t_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
 	return ret;
 }
 
-int ip6t_register_table(struct xt_table *table,
-			const struct ip6t_replace *repl)
+int ip6t_register_table(struct xt_table *table, const struct ip6t_replace *repl)
 {
 	int ret;
 	struct xt_table_info *newinfo;
@@ -2085,7 +2083,7 @@ int ip6t_register_table(struct xt_table *table,
 	if (!newinfo)
 		return -ENOMEM;
 
-	/* choose the copy on our node/cpu */
+	/* choose the copy on our node/cpu, but dont care about preemption */
 	loc_cpu_entry = newinfo->entries[raw_smp_processor_id()];
 	memcpy(loc_cpu_entry, repl->entries, repl->size);
 
@@ -2141,17 +2139,18 @@ icmp6_match(const struct sk_buff *skb,
 	   unsigned int protoff,
 	   bool *hotdrop)
 {
-	struct icmp6hdr _icmp, *ic;
+	struct icmp6hdr _icmph, *ic;
 	const struct ip6t_icmp *icmpinfo = matchinfo;
 
 	/* Must not be a fragment. */
 	if (offset)
 		return false;
 
-	ic = skb_header_pointer(skb, protoff, sizeof(_icmp), &_icmp);
+	ic = skb_header_pointer(skb, protoff, sizeof(_icmph), &_icmph);
 	if (ic == NULL) {
 		/* We've been asked to examine this packet, and we
-		   can't.  Hence, no choice but to drop. */
+		 * can't.  Hence, no choice but to drop.
+		 */
 		duprintf("Dropping evil ICMP tinygram.\n");
 		*hotdrop = true;
 		return false;
@@ -2216,7 +2215,7 @@ static struct nf_sockopt_ops ip6t_sockopts = {
 
 static struct xt_match icmp6_matchstruct __read_mostly = {
 	.name		= "icmp6",
-	.match		= &icmp6_match,
+	.match		= icmp6_match,
 	.matchsize	= sizeof(struct ip6t_icmp),
 	.checkentry	= icmp6_checkentry,
 	.proto		= IPPROTO_ICMPV6,
@@ -2265,6 +2264,7 @@ err1:
 static void __exit ip6_tables_fini(void)
 {
 	nf_unregister_sockopt(&ip6t_sockopts);
+
 	xt_unregister_match(&icmp6_matchstruct);
 	xt_unregister_target(&ip6t_error_target);
 	xt_unregister_target(&ip6t_standard_target);
-- 
cgit v1.2.3


From c9d8fe13175140c79982f9d29c6921328f9afad6 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Mon, 17 Dec 2007 21:52:15 -0800
Subject: [NETFILTER]: {ip,ip6}_tables: fix format strings

Use %zu for sizeof() and remove casts.

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/netfilter/ip_tables.c  | 20 ++++++++------------
 net/ipv6/netfilter/ip6_tables.c | 16 +++++++---------
 2 files changed, 15 insertions(+), 21 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c
index 231f5d29075..98c65ac47d4 100644
--- a/net/ipv4/netfilter/ip_tables.c
+++ b/net/ipv4/netfilter/ip_tables.c
@@ -1091,8 +1091,8 @@ static int get_info(void __user *user, int *len, int compat)
 	int ret;
 
 	if (*len != sizeof(struct ipt_getinfo)) {
-		duprintf("length %u != %u\n", *len,
-			(unsigned int)sizeof(struct ipt_getinfo));
+		duprintf("length %u != %zu\n", *len,
+			 sizeof(struct ipt_getinfo));
 		return -EINVAL;
 	}
 
@@ -1151,16 +1151,14 @@ get_entries(struct ipt_get_entries __user *uptr, int *len)
 	struct xt_table *t;
 
 	if (*len < sizeof(get)) {
-		duprintf("get_entries: %u < %d\n", *len,
-				(unsigned int)sizeof(get));
+		duprintf("get_entries: %u < %zu\n", *len, sizeof(get));
 		return -EINVAL;
 	}
 	if (copy_from_user(&get, uptr, sizeof(get)) != 0)
 		return -EFAULT;
 	if (*len != sizeof(struct ipt_get_entries) + get.size) {
-		duprintf("get_entries: %u != %u\n", *len,
-				(unsigned int)(sizeof(struct ipt_get_entries) +
-				get.size));
+		duprintf("get_entries: %u != %zu\n",
+			 *len, sizeof(get) + get.size);
 		return -EINVAL;
 	}
 
@@ -1911,8 +1909,7 @@ compat_get_entries(struct compat_ipt_get_entries __user *uptr, int *len)
 	struct xt_table *t;
 
 	if (*len < sizeof(get)) {
-		duprintf("compat_get_entries: %u < %u\n",
-			 *len, (unsigned int)sizeof(get));
+		duprintf("compat_get_entries: %u < %zu\n", *len, sizeof(get));
 		return -EINVAL;
 	}
 
@@ -1920,9 +1917,8 @@ compat_get_entries(struct compat_ipt_get_entries __user *uptr, int *len)
 		return -EFAULT;
 
 	if (*len != sizeof(struct compat_ipt_get_entries) + get.size) {
-		duprintf("compat_get_entries: %u != %u\n", *len,
-			 (unsigned int)(sizeof(struct compat_ipt_get_entries) +
-			 get.size));
+		duprintf("compat_get_entries: %u != %zu\n",
+			 *len, sizeof(get) + get.size);
 		return -EINVAL;
 	}
 
diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c
index 0a2ec4b346f..40893fc0592 100644
--- a/net/ipv6/netfilter/ip6_tables.c
+++ b/net/ipv6/netfilter/ip6_tables.c
@@ -1120,7 +1120,7 @@ static int get_info(void __user *user, int *len, int compat)
 	int ret;
 
 	if (*len != sizeof(struct ip6t_getinfo)) {
-		duprintf("length %u != %u\n", *len,
+		duprintf("length %u != %zu\n", *len,
 			 sizeof(struct ip6t_getinfo));
 		return -EINVAL;
 	}
@@ -1180,14 +1180,14 @@ get_entries(struct ip6t_get_entries __user *uptr, int *len)
 	struct xt_table *t;
 
 	if (*len < sizeof(get)) {
-		duprintf("get_entries: %u < %u\n", *len, sizeof(get));
+		duprintf("get_entries: %u < %zu\n", *len, sizeof(get));
 		return -EINVAL;
 	}
 	if (copy_from_user(&get, uptr, sizeof(get)) != 0)
 		return -EFAULT;
 	if (*len != sizeof(struct ip6t_get_entries) + get.size) {
-		duprintf("get_entries: %u != %u\n", *len,
-			 sizeof(struct ip6t_get_entries) + get.size);
+		duprintf("get_entries: %u != %zu\n",
+			 *len, sizeof(get) + get.size);
 		return -EINVAL;
 	}
 
@@ -1931,8 +1931,7 @@ compat_get_entries(struct compat_ip6t_get_entries __user *uptr, int *len)
 	struct xt_table *t;
 
 	if (*len < sizeof(get)) {
-		duprintf("compat_get_entries: %u < %u\n",
-			 *len, (unsigned int)sizeof(get));
+		duprintf("compat_get_entries: %u < %zu\n", *len, sizeof(get));
 		return -EINVAL;
 	}
 
@@ -1940,9 +1939,8 @@ compat_get_entries(struct compat_ip6t_get_entries __user *uptr, int *len)
 		return -EFAULT;
 
 	if (*len != sizeof(struct compat_ip6t_get_entries) + get.size) {
-		duprintf("compat_get_entries: %u != %u\n", *len,
-			 (unsigned int)(sizeof(struct compat_ip6t_get_entries) +
-			 get.size));
+		duprintf("compat_get_entries: %u != %zu\n",
+			 *len, sizeof(get) + get.size);
 		return -EINVAL;
 	}
 
-- 
cgit v1.2.3


From b5dd674b2a1de5925955a088b0a10f81484e975a Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Mon, 17 Dec 2007 21:52:35 -0800
Subject: [NETFILTER]: ip6_tables: fix stack leagage

Fix leakage of local variable on stack. This already got fixed in
ip_tables silently by the compat patches.

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/netfilter/ip6_tables.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c
index 40893fc0592..fc4c62fddfe 100644
--- a/net/ipv6/netfilter/ip6_tables.c
+++ b/net/ipv6/netfilter/ip6_tables.c
@@ -1154,7 +1154,7 @@ static int get_info(void __user *user, int *len, int compat)
 		       sizeof(info.underflow));
 		info.num_entries = private->number;
 		info.size = private->size;
-		memcpy(info.name, name, sizeof(info.name));
+		strcpy(info.name, name);
 
 		if (copy_to_user(user, &info, *len) != 0)
 			ret = -EFAULT;
-- 
cgit v1.2.3


From da4d0f6b3d3c7bcd00e097d48416e0a1dfde2a0f Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Mon, 17 Dec 2007 21:52:52 -0800
Subject: [NETFILTER]: ip6_tables: use raw_smp_processor_id() in
 do_add_counters()

Use raw_smp_processor_id() in do_add_counters() as in ip_tables.c.

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/netfilter/ip6_tables.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c
index fc4c62fddfe..d910d56d22d 100644
--- a/net/ipv6/netfilter/ip6_tables.c
+++ b/net/ipv6/netfilter/ip6_tables.c
@@ -1420,7 +1420,7 @@ do_add_counters(void __user *user, unsigned int len, int compat)
 
 	i = 0;
 	/* Choose the copy that is on our node */
-	loc_cpu_entry = private->entries[smp_processor_id()];
+	loc_cpu_entry = private->entries[raw_smp_processor_id()];
 	IP6T_ENTRY_ITERATE(loc_cpu_entry,
 			  private->size,
 			  add_counter_to_entry,
-- 
cgit v1.2.3


From 6d6a55f42d93eeba7add62c3ad6a5409c6fd24ae Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Mon, 17 Dec 2007 21:53:18 -0800
Subject: [NETFILTER]: ip_tables: remove ipchains compatibility hack

ipchains support has been removed years ago. kill last remains.

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/netfilter/ip_tables.c | 8 --------
 1 file changed, 8 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c
index 98c65ac47d4..439b2925765 100644
--- a/net/ipv4/netfilter/ip_tables.c
+++ b/net/ipv4/netfilter/ip_tables.c
@@ -1263,10 +1263,6 @@ do_replace(void __user *user, unsigned int len)
 	if (copy_from_user(&tmp, user, sizeof(tmp)) != 0)
 		return -EFAULT;
 
-	/* Hack: Causes ipchains to give correct error msg --RR */
-	if (len != sizeof(tmp) + tmp.size)
-		return -ENOPROTOOPT;
-
 	/* overflow check */
 	if (tmp.num_counters >= INT_MAX / sizeof(struct xt_counters))
 		return -ENOMEM;
@@ -1794,10 +1790,6 @@ compat_do_replace(void __user *user, unsigned int len)
 	if (copy_from_user(&tmp, user, sizeof(tmp)) != 0)
 		return -EFAULT;
 
-	/* Hack: Causes ipchains to give correct error msg --RR */
-	if (len != sizeof(tmp) + tmp.size)
-		return -ENOPROTOOPT;
-
 	/* overflow check */
 	if (tmp.size >= INT_MAX / num_possible_cpus())
 		return -ENOMEM;
-- 
cgit v1.2.3


From 03dafbbdf8a5e8a9e3e347a393930ed49506d00b Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Mon, 17 Dec 2007 21:55:16 -0800
Subject: [NETFILTER]: arp_tables: remove obsolete standard_check function

The size check is already performed by xt_check_target, no need
to do it again.

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/netfilter/arp_tables.c | 28 +++-------------------------
 1 file changed, 3 insertions(+), 25 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c
index d5cae7e906c..8b1842babaf 100644
--- a/net/ipv4/netfilter/arp_tables.c
+++ b/net/ipv4/netfilter/arp_tables.c
@@ -435,23 +435,6 @@ static int mark_source_chains(struct xt_table_info *newinfo,
 	return 1;
 }
 
-static inline int standard_check(const struct arpt_entry_target *t,
-				 unsigned int max_offset)
-{
-	/* Check standard info. */
-	if (t->u.target_size
-	    != ARPT_ALIGN(sizeof(struct arpt_standard_target))) {
-		duprintf("arpt_standard_check: target size %u != %Zu\n",
-			 t->u.target_size,
-			 ARPT_ALIGN(sizeof(struct arpt_standard_target)));
-		return 0;
-	}
-
-	return 1;
-}
-
-static struct arpt_target arpt_standard_target;
-
 static inline int check_entry(struct arpt_entry *e, const char *name, unsigned int size,
 			      unsigned int *i)
 {
@@ -486,14 +469,9 @@ static inline int check_entry(struct arpt_entry *e, const char *name, unsigned i
 	if (ret)
 		goto err;
 
-	if (t->u.kernel.target == &arpt_standard_target) {
-		if (!standard_check(t, size)) {
-			ret = -EINVAL;
-			goto err;
-		}
-	} else if (t->u.kernel.target->checkentry
-		   && !t->u.kernel.target->checkentry(name, e, target, t->data,
-						      e->comefrom)) {
+	if (t->u.kernel.target->checkentry
+	    && !t->u.kernel.target->checkentry(name, e, target, t->data,
+					       e->comefrom)) {
 		duprintf("arp_tables: check failed for `%s'.\n",
 			 t->u.kernel.target->name);
 		ret = -EINVAL;
-- 
cgit v1.2.3


From 197631201ec2833f50ac50e6aa7c8ee151e66dda Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Mon, 17 Dec 2007 21:55:59 -0800
Subject: [NETFILTER]: arp_tables: use vmalloc_node()

Use vmalloc_node() as in ip_tables.c.

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/netfilter/arp_tables.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c
index 8b1842babaf..fb4d77978ec 100644
--- a/net/ipv4/netfilter/arp_tables.c
+++ b/net/ipv4/netfilter/arp_tables.c
@@ -804,7 +804,8 @@ static int do_replace(void __user *user, unsigned int len)
 		goto free_newinfo;
 	}
 
-	counters = vmalloc(tmp.num_counters * sizeof(struct xt_counters));
+	counters = vmalloc_node(tmp.num_counters * sizeof(struct xt_counters),
+				numa_node_id());
 	if (!counters) {
 		ret = -ENOMEM;
 		goto free_newinfo;
@@ -902,7 +903,7 @@ static int do_add_counters(void __user *user, unsigned int len)
 	if (len != sizeof(tmp) + tmp.num_counters*sizeof(struct xt_counters))
 		return -EINVAL;
 
-	paddc = vmalloc(len);
+	paddc = vmalloc_node(len, numa_node_id());
 	if (!paddc)
 		return -ENOMEM;
 
-- 
cgit v1.2.3


From 70f0bfcf6a24e259d51f62354f866f42f8a3d317 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Mon, 17 Dec 2007 21:56:14 -0800
Subject: [NETFILTER]: arp_tables: remove ipchains compat hack

Remove compatiblity hack copied from ip_tables.c - ipchains didn't even
support arp_tables :)

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/netfilter/arp_tables.c | 4 ----
 1 file changed, 4 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c
index fb4d77978ec..cafb35ae285 100644
--- a/net/ipv4/netfilter/arp_tables.c
+++ b/net/ipv4/netfilter/arp_tables.c
@@ -784,10 +784,6 @@ static int do_replace(void __user *user, unsigned int len)
 	if (copy_from_user(&tmp, user, sizeof(tmp)) != 0)
 		return -EFAULT;
 
-	/* Hack: Causes ipchains to give correct error msg --RR */
-	if (len != sizeof(tmp) + tmp.size)
-		return -ENOPROTOOPT;
-
 	/* overflow check */
 	if (tmp.num_counters >= INT_MAX / sizeof(struct xt_counters))
 		return -ENOMEM;
-- 
cgit v1.2.3


From fb5b6095f320bd5a615049aa5fe8827ae9d1bf80 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Mon, 17 Dec 2007 21:56:33 -0800
Subject: [NETFILTER]: arp_tables: move entry and target checks to seperate
 functions

Resync with ip_tables.c as preparation for compat support.

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/netfilter/arp_tables.c | 58 +++++++++++++++++++++++++++++------------
 1 file changed, 41 insertions(+), 17 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c
index cafb35ae285..b0f43315842 100644
--- a/net/ipv4/netfilter/arp_tables.c
+++ b/net/ipv4/netfilter/arp_tables.c
@@ -435,12 +435,9 @@ static int mark_source_chains(struct xt_table_info *newinfo,
 	return 1;
 }
 
-static inline int check_entry(struct arpt_entry *e, const char *name, unsigned int size,
-			      unsigned int *i)
+static inline int check_entry(struct arpt_entry *e, const char *name)
 {
 	struct arpt_entry_target *t;
-	struct arpt_target *target;
-	int ret;
 
 	if (!arp_checkentry(&e->arp)) {
 		duprintf("arp_tables: arp check failed %p %s.\n", e, name);
@@ -454,30 +451,57 @@ static inline int check_entry(struct arpt_entry *e, const char *name, unsigned i
 	if (e->target_offset + t->u.target_size > e->next_offset)
 		return -EINVAL;
 
+	return 0;
+}
+
+static inline int check_target(struct arpt_entry *e, const char *name)
+{
+	struct arpt_entry_target *t;
+	struct arpt_target *target;
+	int ret;
+
+	t = arpt_get_target(e);
+	target = t->u.kernel.target;
+
+	ret = xt_check_target(target, NF_ARP, t->u.target_size - sizeof(*t),
+			      name, e->comefrom, 0, 0);
+	if (!ret && t->u.kernel.target->checkentry
+	    && !t->u.kernel.target->checkentry(name, e, target, t->data,
+					       e->comefrom)) {
+		duprintf("arp_tables: check failed for `%s'.\n",
+			 t->u.kernel.target->name);
+		ret = -EINVAL;
+	}
+	return ret;
+}
+
+static inline int
+find_check_entry(struct arpt_entry *e, const char *name, unsigned int size,
+		 unsigned int *i)
+{
+	struct arpt_entry_target *t;
+	struct arpt_target *target;
+	int ret;
+
+	ret = check_entry(e, name);
+	if (ret)
+		return ret;
+
+	t = arpt_get_target(e);
 	target = try_then_request_module(xt_find_target(NF_ARP, t->u.user.name,
 							t->u.user.revision),
 					 "arpt_%s", t->u.user.name);
 	if (IS_ERR(target) || !target) {
-		duprintf("check_entry: `%s' not found\n", t->u.user.name);
+		duprintf("find_check_entry: `%s' not found\n", t->u.user.name);
 		ret = target ? PTR_ERR(target) : -ENOENT;
 		goto out;
 	}
 	t->u.kernel.target = target;
 
-	ret = xt_check_target(target, NF_ARP, t->u.target_size - sizeof(*t),
-			      name, e->comefrom, 0, 0);
+	ret = check_target(e, name);
 	if (ret)
 		goto err;
 
-	if (t->u.kernel.target->checkentry
-	    && !t->u.kernel.target->checkentry(name, e, target, t->data,
-					       e->comefrom)) {
-		duprintf("arp_tables: check failed for `%s'.\n",
-			 t->u.kernel.target->name);
-		ret = -EINVAL;
-		goto err;
-	}
-
 	(*i)++;
 	return 0;
 err:
@@ -611,7 +635,7 @@ static int translate_table(const char *name,
 	/* Finally, each sanity check must pass */
 	i = 0;
 	ret = ARPT_ENTRY_ITERATE(entry0, newinfo->size,
-				 check_entry, name, size, &i);
+				 find_check_entry, name, size, &i);
 
 	if (ret != 0) {
 		ARPT_ENTRY_ITERATE(entry0, newinfo->size,
-- 
cgit v1.2.3


From 27e2c26b85b6b234411d94127201436c1ec9c002 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Mon, 17 Dec 2007 21:56:48 -0800
Subject: [NETFILTER]: arp_tables: move counter allocation to seperate function

More resyncing with ip_tables.c as preparation for compat support.

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/netfilter/arp_tables.c | 29 +++++++++++++++++++++--------
 1 file changed, 21 insertions(+), 8 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c
index b0f43315842..eaca06eace0 100644
--- a/net/ipv4/netfilter/arp_tables.c
+++ b/net/ipv4/netfilter/arp_tables.c
@@ -706,16 +706,11 @@ static void get_counters(const struct xt_table_info *t,
 	}
 }
 
-static int copy_entries_to_user(unsigned int total_size,
-				struct arpt_table *table,
-				void __user *userptr)
+static inline struct xt_counters *alloc_counters(struct arpt_table *table)
 {
-	unsigned int off, num, countersize;
-	struct arpt_entry *e;
+	unsigned int countersize;
 	struct xt_counters *counters;
 	struct xt_table_info *private = table->private;
-	int ret = 0;
-	void *loc_cpu_entry;
 
 	/* We need atomic snapshot of counters: rest doesn't change
 	 * (other than comefrom, which userspace doesn't care
@@ -725,13 +720,31 @@ static int copy_entries_to_user(unsigned int total_size,
 	counters = vmalloc_node(countersize, numa_node_id());
 
 	if (counters == NULL)
-		return -ENOMEM;
+		return ERR_PTR(-ENOMEM);
 
 	/* First, sum counters... */
 	write_lock_bh(&table->lock);
 	get_counters(private, counters);
 	write_unlock_bh(&table->lock);
 
+	return counters;
+}
+
+static int copy_entries_to_user(unsigned int total_size,
+				struct arpt_table *table,
+				void __user *userptr)
+{
+	unsigned int off, num;
+	struct arpt_entry *e;
+	struct xt_counters *counters;
+	struct xt_table_info *private = table->private;
+	int ret = 0;
+	void *loc_cpu_entry;
+
+	counters = alloc_counters(table);
+	if (IS_ERR(counters))
+		return PTR_ERR(counters);
+
 	loc_cpu_entry = private->entries[raw_smp_processor_id()];
 	/* ... then copy entire thing ... */
 	if (copy_to_user(userptr, loc_cpu_entry, total_size) != 0) {
-- 
cgit v1.2.3


From 41acd975b954ad6ec4943d77e5920f2eeaf54518 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Mon, 17 Dec 2007 22:26:24 -0800
Subject: [NETFILTER]: arp_tables: move ARPT_SO_GET_INFO handling to seperate
 function

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/netfilter/arp_tables.c | 88 +++++++++++++++++++++--------------------
 1 file changed, 46 insertions(+), 42 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c
index eaca06eace0..33e8d53eebf 100644
--- a/net/ipv4/netfilter/arp_tables.c
+++ b/net/ipv4/netfilter/arp_tables.c
@@ -782,6 +782,49 @@ static int copy_entries_to_user(unsigned int total_size,
 	return ret;
 }
 
+static int get_info(void __user *user, int *len)
+{
+	char name[ARPT_TABLE_MAXNAMELEN];
+	struct arpt_table *t;
+	int ret;
+
+	if (*len != sizeof(struct arpt_getinfo)) {
+		duprintf("length %u != %Zu\n", *len,
+			 sizeof(struct arpt_getinfo));
+		return -EINVAL;
+	}
+
+	if (copy_from_user(name, user, sizeof(name)) != 0)
+		return -EFAULT;
+
+	name[ARPT_TABLE_MAXNAMELEN-1] = '\0';
+
+	t = try_then_request_module(xt_find_table_lock(NF_ARP, name),
+				    "arptable_%s", name);
+	if (t && !IS_ERR(t)) {
+		struct arpt_getinfo info;
+		struct xt_table_info *private = t->private;
+
+		info.valid_hooks = t->valid_hooks;
+		memcpy(info.hook_entry, private->hook_entry,
+		       sizeof(info.hook_entry));
+		memcpy(info.underflow, private->underflow,
+		       sizeof(info.underflow));
+		info.num_entries = private->number;
+		info.size = private->size;
+		strcpy(info.name, name);
+
+		if (copy_to_user(user, &info, *len) != 0)
+			ret = -EFAULT;
+		else
+			ret = 0;
+		xt_table_unlock(t);
+		module_put(t->me);
+	} else
+		ret = t ? PTR_ERR(t) : -ENOENT;
+	return ret;
+}
+
 static int get_entries(const struct arpt_get_entries *entries,
 		       struct arpt_get_entries __user *uptr)
 {
@@ -1008,48 +1051,9 @@ static int do_arpt_get_ctl(struct sock *sk, int cmd, void __user *user, int *len
 		return -EPERM;
 
 	switch (cmd) {
-	case ARPT_SO_GET_INFO: {
-		char name[ARPT_TABLE_MAXNAMELEN];
-		struct arpt_table *t;
-
-		if (*len != sizeof(struct arpt_getinfo)) {
-			duprintf("length %u != %Zu\n", *len,
-				 sizeof(struct arpt_getinfo));
-			ret = -EINVAL;
-			break;
-		}
-
-		if (copy_from_user(name, user, sizeof(name)) != 0) {
-			ret = -EFAULT;
-			break;
-		}
-		name[ARPT_TABLE_MAXNAMELEN-1] = '\0';
-
-		t = try_then_request_module(xt_find_table_lock(NF_ARP, name),
-					    "arptable_%s", name);
-		if (t && !IS_ERR(t)) {
-			struct arpt_getinfo info;
-			struct xt_table_info *private = t->private;
-
-			info.valid_hooks = t->valid_hooks;
-			memcpy(info.hook_entry, private->hook_entry,
-			       sizeof(info.hook_entry));
-			memcpy(info.underflow, private->underflow,
-			       sizeof(info.underflow));
-			info.num_entries = private->number;
-			info.size = private->size;
-			strcpy(info.name, name);
-
-			if (copy_to_user(user, &info, *len) != 0)
-				ret = -EFAULT;
-			else
-				ret = 0;
-			xt_table_unlock(t);
-			module_put(t->me);
-		} else
-			ret = t ? PTR_ERR(t) : -ENOENT;
-	}
-	break;
+	case ARPT_SO_GET_INFO:
+		ret = get_info(user, len);
+		break;
 
 	case ARPT_SO_GET_ENTRIES: {
 		struct arpt_get_entries get;
-- 
cgit v1.2.3


From 11f6dff8af95d8d1d14bef70d384029c5acf6e04 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Mon, 17 Dec 2007 22:26:38 -0800
Subject: [NETFILTER]: arp_tables: resync get_entries() with ip_tables

Resync get_entries() with ip_tables.c by moving the checks from the
setsockopt handler to the function itself.

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/netfilter/arp_tables.c | 39 +++++++++++++++++++--------------------
 1 file changed, 19 insertions(+), 20 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c
index 33e8d53eebf..029df76b970 100644
--- a/net/ipv4/netfilter/arp_tables.c
+++ b/net/ipv4/netfilter/arp_tables.c
@@ -825,23 +825,35 @@ static int get_info(void __user *user, int *len)
 	return ret;
 }
 
-static int get_entries(const struct arpt_get_entries *entries,
-		       struct arpt_get_entries __user *uptr)
+static int get_entries(struct arpt_get_entries __user *uptr, int *len)
 {
 	int ret;
+	struct arpt_get_entries get;
 	struct arpt_table *t;
 
-	t = xt_find_table_lock(NF_ARP, entries->name);
+	if (*len < sizeof(get)) {
+		duprintf("get_entries: %u < %Zu\n", *len, sizeof(get));
+		return -EINVAL;
+	}
+	if (copy_from_user(&get, uptr, sizeof(get)) != 0)
+		return -EFAULT;
+	if (*len != sizeof(struct arpt_get_entries) + get.size) {
+		duprintf("get_entries: %u != %Zu\n", *len,
+			 sizeof(struct arpt_get_entries) + get.size);
+		return -EINVAL;
+	}
+
+	t = xt_find_table_lock(NF_ARP, get.name);
 	if (t && !IS_ERR(t)) {
 		struct xt_table_info *private = t->private;
 		duprintf("t->private->number = %u\n",
 			 private->number);
-		if (entries->size == private->size)
+		if (get.size == private->size)
 			ret = copy_entries_to_user(private->size,
 						   t, uptr->entrytable);
 		else {
 			duprintf("get_entries: I've got %u not %u!\n",
-				 private->size, entries->size);
+				 private->size, get.size);
 			ret = -EINVAL;
 		}
 		module_put(t->me);
@@ -1055,22 +1067,9 @@ static int do_arpt_get_ctl(struct sock *sk, int cmd, void __user *user, int *len
 		ret = get_info(user, len);
 		break;
 
-	case ARPT_SO_GET_ENTRIES: {
-		struct arpt_get_entries get;
-
-		if (*len < sizeof(get)) {
-			duprintf("get_entries: %u < %Zu\n", *len, sizeof(get));
-			ret = -EINVAL;
-		} else if (copy_from_user(&get, user, sizeof(get)) != 0) {
-			ret = -EFAULT;
-		} else if (*len != sizeof(struct arpt_get_entries) + get.size) {
-			duprintf("get_entries: %u != %Zu\n", *len,
-				 sizeof(struct arpt_get_entries) + get.size);
-			ret = -EINVAL;
-		} else
-			ret = get_entries(&get, user);
+	case ARPT_SO_GET_ENTRIES:
+		ret = get_entries(user, len);
 		break;
-	}
 
 	case ARPT_SO_GET_REVISION_TARGET: {
 		struct xt_get_revision rev;
-- 
cgit v1.2.3


From d6a2ba07c31b0497fc82a8c175400ea8747da2ef Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Mon, 17 Dec 2007 22:26:54 -0800
Subject: [NETFILTER]: arp_tables: add compat support

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/netfilter/arp_tables.c | 748 ++++++++++++++++++++++++++++++++++++----
 1 file changed, 690 insertions(+), 58 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c
index 029df76b970..ad2da6db0e2 100644
--- a/net/ipv4/netfilter/arp_tables.c
+++ b/net/ipv4/netfilter/arp_tables.c
@@ -19,9 +19,10 @@
 #include <linux/proc_fs.h>
 #include <linux/module.h>
 #include <linux/init.h>
-
-#include <asm/uaccess.h>
 #include <linux/mutex.h>
+#include <linux/err.h>
+#include <net/compat.h>
+#include <asm/uaccess.h>
 
 #include <linux/netfilter/x_tables.h>
 #include <linux/netfilter_arp/arp_tables.h>
@@ -782,7 +783,73 @@ static int copy_entries_to_user(unsigned int total_size,
 	return ret;
 }
 
-static int get_info(void __user *user, int *len)
+#ifdef CONFIG_COMPAT
+static void compat_standard_from_user(void *dst, void *src)
+{
+	int v = *(compat_int_t *)src;
+
+	if (v > 0)
+		v += xt_compat_calc_jump(NF_ARP, v);
+	memcpy(dst, &v, sizeof(v));
+}
+
+static int compat_standard_to_user(void __user *dst, void *src)
+{
+	compat_int_t cv = *(int *)src;
+
+	if (cv > 0)
+		cv -= xt_compat_calc_jump(NF_ARP, cv);
+	return copy_to_user(dst, &cv, sizeof(cv)) ? -EFAULT : 0;
+}
+
+static int compat_calc_entry(struct arpt_entry *e,
+			     const struct xt_table_info *info,
+			     void *base, struct xt_table_info *newinfo)
+{
+	struct arpt_entry_target *t;
+	unsigned int entry_offset;
+	int off, i, ret;
+
+	off = sizeof(struct arpt_entry) - sizeof(struct compat_arpt_entry);
+	entry_offset = (void *)e - base;
+
+	t = arpt_get_target(e);
+	off += xt_compat_target_offset(t->u.kernel.target);
+	newinfo->size -= off;
+	ret = xt_compat_add_offset(NF_ARP, entry_offset, off);
+	if (ret)
+		return ret;
+
+	for (i = 0; i < NF_ARP_NUMHOOKS; i++) {
+		if (info->hook_entry[i] &&
+		    (e < (struct arpt_entry *)(base + info->hook_entry[i])))
+			newinfo->hook_entry[i] -= off;
+		if (info->underflow[i] &&
+		    (e < (struct arpt_entry *)(base + info->underflow[i])))
+			newinfo->underflow[i] -= off;
+	}
+	return 0;
+}
+
+static int compat_table_info(const struct xt_table_info *info,
+			     struct xt_table_info *newinfo)
+{
+	void *loc_cpu_entry;
+
+	if (!newinfo || !info)
+		return -EINVAL;
+
+	/* we dont care about newinfo->entries[] */
+	memcpy(newinfo, info, offsetof(struct xt_table_info, entries));
+	newinfo->initial_entries = 0;
+	loc_cpu_entry = info->entries[raw_smp_processor_id()];
+	return ARPT_ENTRY_ITERATE(loc_cpu_entry, info->size,
+				  compat_calc_entry, info, loc_cpu_entry,
+				  newinfo);
+}
+#endif
+
+static int get_info(void __user *user, int *len, int compat)
 {
 	char name[ARPT_TABLE_MAXNAMELEN];
 	struct arpt_table *t;
@@ -798,13 +865,24 @@ static int get_info(void __user *user, int *len)
 		return -EFAULT;
 
 	name[ARPT_TABLE_MAXNAMELEN-1] = '\0';
-
+#ifdef CONFIG_COMPAT
+	if (compat)
+		xt_compat_lock(NF_ARP);
+#endif
 	t = try_then_request_module(xt_find_table_lock(NF_ARP, name),
 				    "arptable_%s", name);
 	if (t && !IS_ERR(t)) {
 		struct arpt_getinfo info;
 		struct xt_table_info *private = t->private;
 
+#ifdef CONFIG_COMPAT
+		if (compat) {
+			struct xt_table_info tmp;
+			ret = compat_table_info(private, &tmp);
+			xt_compat_flush_offsets(NF_ARP);
+			private = &tmp;
+		}
+#endif
 		info.valid_hooks = t->valid_hooks;
 		memcpy(info.hook_entry, private->hook_entry,
 		       sizeof(info.hook_entry));
@@ -822,6 +900,10 @@ static int get_info(void __user *user, int *len)
 		module_put(t->me);
 	} else
 		ret = t ? PTR_ERR(t) : -ENOENT;
+#ifdef CONFIG_COMPAT
+	if (compat)
+		xt_compat_unlock(NF_ARP);
+#endif
 	return ret;
 }
 
@@ -864,65 +946,41 @@ static int get_entries(struct arpt_get_entries __user *uptr, int *len)
 	return ret;
 }
 
-static int do_replace(void __user *user, unsigned int len)
+static int __do_replace(const char *name, unsigned int valid_hooks,
+			struct xt_table_info *newinfo,
+			unsigned int num_counters,
+			void __user *counters_ptr)
 {
 	int ret;
-	struct arpt_replace tmp;
 	struct arpt_table *t;
-	struct xt_table_info *newinfo, *oldinfo;
+	struct xt_table_info *oldinfo;
 	struct xt_counters *counters;
-	void *loc_cpu_entry, *loc_cpu_old_entry;
-
-	if (copy_from_user(&tmp, user, sizeof(tmp)) != 0)
-		return -EFAULT;
-
-	/* overflow check */
-	if (tmp.num_counters >= INT_MAX / sizeof(struct xt_counters))
-		return -ENOMEM;
-
-	newinfo = xt_alloc_table_info(tmp.size);
-	if (!newinfo)
-		return -ENOMEM;
+	void *loc_cpu_old_entry;
 
-	/* choose the copy that is on our node/cpu */
-	loc_cpu_entry = newinfo->entries[raw_smp_processor_id()];
-	if (copy_from_user(loc_cpu_entry, user + sizeof(tmp),
-			   tmp.size) != 0) {
-		ret = -EFAULT;
-		goto free_newinfo;
-	}
-
-	counters = vmalloc_node(tmp.num_counters * sizeof(struct xt_counters),
+	ret = 0;
+	counters = vmalloc_node(num_counters * sizeof(struct xt_counters),
 				numa_node_id());
 	if (!counters) {
 		ret = -ENOMEM;
-		goto free_newinfo;
+		goto out;
 	}
 
-	ret = translate_table(tmp.name, tmp.valid_hooks,
-			      newinfo, loc_cpu_entry, tmp.size, tmp.num_entries,
-			      tmp.hook_entry, tmp.underflow);
-	if (ret != 0)
-		goto free_newinfo_counters;
-
-	duprintf("arp_tables: Translated table\n");
-
-	t = try_then_request_module(xt_find_table_lock(NF_ARP, tmp.name),
-				    "arptable_%s", tmp.name);
+	t = try_then_request_module(xt_find_table_lock(NF_ARP, name),
+				    "arptable_%s", name);
 	if (!t || IS_ERR(t)) {
 		ret = t ? PTR_ERR(t) : -ENOENT;
 		goto free_newinfo_counters_untrans;
 	}
 
 	/* You lied! */
-	if (tmp.valid_hooks != t->valid_hooks) {
+	if (valid_hooks != t->valid_hooks) {
 		duprintf("Valid hook crap: %08X vs %08X\n",
-			 tmp.valid_hooks, t->valid_hooks);
+			 valid_hooks, t->valid_hooks);
 		ret = -EINVAL;
 		goto put_module;
 	}
 
-	oldinfo = xt_replace_table(t, tmp.num_counters, newinfo, &ret);
+	oldinfo = xt_replace_table(t, num_counters, newinfo, &ret);
 	if (!oldinfo)
 		goto put_module;
 
@@ -940,11 +998,12 @@ static int do_replace(void __user *user, unsigned int len)
 	get_counters(oldinfo, counters);
 	/* Decrease module usage counts and free resource */
 	loc_cpu_old_entry = oldinfo->entries[raw_smp_processor_id()];
-	ARPT_ENTRY_ITERATE(loc_cpu_old_entry, oldinfo->size, cleanup_entry,NULL);
+	ARPT_ENTRY_ITERATE(loc_cpu_old_entry, oldinfo->size, cleanup_entry,
+			   NULL);
 
 	xt_free_table_info(oldinfo);
-	if (copy_to_user(tmp.counters, counters,
-			 sizeof(struct xt_counters) * tmp.num_counters) != 0)
+	if (copy_to_user(counters_ptr, counters,
+			 sizeof(struct xt_counters) * num_counters) != 0)
 		ret = -EFAULT;
 	vfree(counters);
 	xt_table_unlock(t);
@@ -954,9 +1013,53 @@ static int do_replace(void __user *user, unsigned int len)
 	module_put(t->me);
 	xt_table_unlock(t);
  free_newinfo_counters_untrans:
-	ARPT_ENTRY_ITERATE(loc_cpu_entry, newinfo->size, cleanup_entry, NULL);
- free_newinfo_counters:
 	vfree(counters);
+ out:
+	return ret;
+}
+
+static int do_replace(void __user *user, unsigned int len)
+{
+	int ret;
+	struct arpt_replace tmp;
+	struct xt_table_info *newinfo;
+	void *loc_cpu_entry;
+
+	if (copy_from_user(&tmp, user, sizeof(tmp)) != 0)
+		return -EFAULT;
+
+	/* overflow check */
+	if (tmp.num_counters >= INT_MAX / sizeof(struct xt_counters))
+		return -ENOMEM;
+
+	newinfo = xt_alloc_table_info(tmp.size);
+	if (!newinfo)
+		return -ENOMEM;
+
+	/* choose the copy that is on our node/cpu */
+	loc_cpu_entry = newinfo->entries[raw_smp_processor_id()];
+	if (copy_from_user(loc_cpu_entry, user + sizeof(tmp),
+			   tmp.size) != 0) {
+		ret = -EFAULT;
+		goto free_newinfo;
+	}
+
+	ret = translate_table(tmp.name, tmp.valid_hooks,
+			      newinfo, loc_cpu_entry, tmp.size, tmp.num_entries,
+			      tmp.hook_entry, tmp.underflow);
+	if (ret != 0)
+		goto free_newinfo;
+
+	duprintf("arp_tables: Translated table\n");
+
+	ret = __do_replace(tmp.name, tmp.valid_hooks, newinfo,
+			   tmp.num_counters, tmp.counters);
+	if (ret)
+		goto free_newinfo_untrans;
+	return 0;
+
+ free_newinfo_untrans:
+	ARPT_ENTRY_ITERATE(loc_cpu_entry, newinfo->size, cleanup_entry, NULL);
  free_newinfo:
 	xt_free_table_info(newinfo);
 	return ret;
@@ -976,31 +1079,59 @@ static inline int add_counter_to_entry(struct arpt_entry *e,
 	return 0;
 }
 
-static int do_add_counters(void __user *user, unsigned int len)
+static int do_add_counters(void __user *user, unsigned int len, int compat)
 {
 	unsigned int i;
-	struct xt_counters_info tmp, *paddc;
+	struct xt_counters_info tmp;
+	struct xt_counters *paddc;
+	unsigned int num_counters;
+	char *name;
+	int size;
+	void *ptmp;
 	struct arpt_table *t;
 	struct xt_table_info *private;
 	int ret = 0;
 	void *loc_cpu_entry;
+#ifdef CONFIG_COMPAT
+	struct compat_xt_counters_info compat_tmp;
 
-	if (copy_from_user(&tmp, user, sizeof(tmp)) != 0)
+	if (compat) {
+		ptmp = &compat_tmp;
+		size = sizeof(struct compat_xt_counters_info);
+	} else
+#endif
+	{
+		ptmp = &tmp;
+		size = sizeof(struct xt_counters_info);
+	}
+
+	if (copy_from_user(ptmp, user, size) != 0)
 		return -EFAULT;
 
-	if (len != sizeof(tmp) + tmp.num_counters*sizeof(struct xt_counters))
+#ifdef CONFIG_COMPAT
+	if (compat) {
+		num_counters = compat_tmp.num_counters;
+		name = compat_tmp.name;
+	} else
+#endif
+	{
+		num_counters = tmp.num_counters;
+		name = tmp.name;
+	}
+
+	if (len != size + num_counters * sizeof(struct xt_counters))
 		return -EINVAL;
 
-	paddc = vmalloc_node(len, numa_node_id());
+	paddc = vmalloc_node(len - size, numa_node_id());
 	if (!paddc)
 		return -ENOMEM;
 
-	if (copy_from_user(paddc, user, len) != 0) {
+	if (copy_from_user(paddc, user + size, len - size) != 0) {
 		ret = -EFAULT;
 		goto free;
 	}
 
-	t = xt_find_table_lock(NF_ARP, tmp.name);
+	t = xt_find_table_lock(NF_ARP, name);
 	if (!t || IS_ERR(t)) {
 		ret = t ? PTR_ERR(t) : -ENOENT;
 		goto free;
@@ -1008,7 +1139,7 @@ static int do_add_counters(void __user *user, unsigned int len)
 
 	write_lock_bh(&t->lock);
 	private = t->private;
-	if (private->number != tmp.num_counters) {
+	if (private->number != num_counters) {
 		ret = -EINVAL;
 		goto unlock_up_free;
 	}
@@ -1019,7 +1150,7 @@ static int do_add_counters(void __user *user, unsigned int len)
 	ARPT_ENTRY_ITERATE(loc_cpu_entry,
 			   private->size,
 			   add_counter_to_entry,
-			   paddc->counters,
+			   paddc,
 			   &i);
  unlock_up_free:
 	write_unlock_bh(&t->lock);
@@ -1031,6 +1162,496 @@ static int do_add_counters(void __user *user, unsigned int len)
 	return ret;
 }
 
+#ifdef CONFIG_COMPAT
+static inline int
+compat_release_entry(struct compat_arpt_entry *e, unsigned int *i)
+{
+	struct arpt_entry_target *t;
+
+	if (i && (*i)-- == 0)
+		return 1;
+
+	t = compat_arpt_get_target(e);
+	module_put(t->u.kernel.target->me);
+	return 0;
+}
+
+static inline int
+check_compat_entry_size_and_hooks(struct compat_arpt_entry *e,
+				  struct xt_table_info *newinfo,
+				  unsigned int *size,
+				  unsigned char *base,
+				  unsigned char *limit,
+				  unsigned int *hook_entries,
+				  unsigned int *underflows,
+				  unsigned int *i,
+				  const char *name)
+{
+	struct arpt_entry_target *t;
+	struct xt_target *target;
+	unsigned int entry_offset;
+	int ret, off, h;
+
+	duprintf("check_compat_entry_size_and_hooks %p\n", e);
+	if ((unsigned long)e % __alignof__(struct compat_arpt_entry) != 0
+	    || (unsigned char *)e + sizeof(struct compat_arpt_entry) >= limit) {
+		duprintf("Bad offset %p, limit = %p\n", e, limit);
+		return -EINVAL;
+	}
+
+	if (e->next_offset < sizeof(struct compat_arpt_entry) +
+			     sizeof(struct compat_xt_entry_target)) {
+		duprintf("checking: element %p size %u\n",
+			 e, e->next_offset);
+		return -EINVAL;
+	}
+
+	/* For purposes of check_entry casting the compat entry is fine */
+	ret = check_entry((struct arpt_entry *)e, name);
+	if (ret)
+		return ret;
+
+	off = sizeof(struct arpt_entry) - sizeof(struct compat_arpt_entry);
+	entry_offset = (void *)e - (void *)base;
+
+	t = compat_arpt_get_target(e);
+	target = try_then_request_module(xt_find_target(NF_ARP,
+							t->u.user.name,
+							t->u.user.revision),
+					 "arpt_%s", t->u.user.name);
+	if (IS_ERR(target) || !target) {
+		duprintf("check_compat_entry_size_and_hooks: `%s' not found\n",
+			 t->u.user.name);
+		ret = target ? PTR_ERR(target) : -ENOENT;
+		goto out;
+	}
+	t->u.kernel.target = target;
+
+	off += xt_compat_target_offset(target);
+	*size += off;
+	ret = xt_compat_add_offset(NF_ARP, entry_offset, off);
+	if (ret)
+		goto release_target;
+
+	/* Check hooks & underflows */
+	for (h = 0; h < NF_ARP_NUMHOOKS; h++) {
+		if ((unsigned char *)e - base == hook_entries[h])
+			newinfo->hook_entry[h] = hook_entries[h];
+		if ((unsigned char *)e - base == underflows[h])
+			newinfo->underflow[h] = underflows[h];
+	}
+
+	/* Clear counters and comefrom */
+	memset(&e->counters, 0, sizeof(e->counters));
+	e->comefrom = 0;
+
+	(*i)++;
+	return 0;
+
+release_target:
+	module_put(t->u.kernel.target->me);
+out:
+	return ret;
+}
+
+static int
+compat_copy_entry_from_user(struct compat_arpt_entry *e, void **dstptr,
+			    unsigned int *size, const char *name,
+			    struct xt_table_info *newinfo, unsigned char *base)
+{
+	struct arpt_entry_target *t;
+	struct xt_target *target;
+	struct arpt_entry *de;
+	unsigned int origsize;
+	int ret, h;
+
+	ret = 0;
+	origsize = *size;
+	de = (struct arpt_entry *)*dstptr;
+	memcpy(de, e, sizeof(struct arpt_entry));
+	memcpy(&de->counters, &e->counters, sizeof(e->counters));
+
+	*dstptr += sizeof(struct arpt_entry);
+	*size += sizeof(struct arpt_entry) - sizeof(struct compat_arpt_entry);
+
+	de->target_offset = e->target_offset - (origsize - *size);
+	t = compat_arpt_get_target(e);
+	target = t->u.kernel.target;
+	xt_compat_target_from_user(t, dstptr, size);
+
+	de->next_offset = e->next_offset - (origsize - *size);
+	for (h = 0; h < NF_ARP_NUMHOOKS; h++) {
+		if ((unsigned char *)de - base < newinfo->hook_entry[h])
+			newinfo->hook_entry[h] -= origsize - *size;
+		if ((unsigned char *)de - base < newinfo->underflow[h])
+			newinfo->underflow[h] -= origsize - *size;
+	}
+	return ret;
+}
+
+static inline int compat_check_entry(struct arpt_entry *e, const char *name,
+				     unsigned int *i)
+{
+	int ret;
+
+	ret = check_target(e, name);
+	if (ret)
+		return ret;
+
+	(*i)++;
+	return 0;
+}
+
+static int translate_compat_table(const char *name,
+				  unsigned int valid_hooks,
+				  struct xt_table_info **pinfo,
+				  void **pentry0,
+				  unsigned int total_size,
+				  unsigned int number,
+				  unsigned int *hook_entries,
+				  unsigned int *underflows)
+{
+	unsigned int i, j;
+	struct xt_table_info *newinfo, *info;
+	void *pos, *entry0, *entry1;
+	unsigned int size;
+	int ret;
+
+	info = *pinfo;
+	entry0 = *pentry0;
+	size = total_size;
+	info->number = number;
+
+	/* Init all hooks to impossible value. */
+	for (i = 0; i < NF_ARP_NUMHOOKS; i++) {
+		info->hook_entry[i] = 0xFFFFFFFF;
+		info->underflow[i] = 0xFFFFFFFF;
+	}
+
+	duprintf("translate_compat_table: size %u\n", info->size);
+	j = 0;
+	xt_compat_lock(NF_ARP);
+	/* Walk through entries, checking offsets. */
+	ret = COMPAT_ARPT_ENTRY_ITERATE(entry0, total_size,
+					check_compat_entry_size_and_hooks,
+					info, &size, entry0,
+					entry0 + total_size,
+					hook_entries, underflows, &j, name);
+	if (ret != 0)
+		goto out_unlock;
+
+	ret = -EINVAL;
+	if (j != number) {
+		duprintf("translate_compat_table: %u not %u entries\n",
+			 j, number);
+		goto out_unlock;
+	}
+
+	/* Check hooks all assigned */
+	for (i = 0; i < NF_ARP_NUMHOOKS; i++) {
+		/* Only hooks which are valid */
+		if (!(valid_hooks & (1 << i)))
+			continue;
+		if (info->hook_entry[i] == 0xFFFFFFFF) {
+			duprintf("Invalid hook entry %u %u\n",
+				 i, hook_entries[i]);
+			goto out_unlock;
+		}
+		if (info->underflow[i] == 0xFFFFFFFF) {
+			duprintf("Invalid underflow %u %u\n",
+				 i, underflows[i]);
+			goto out_unlock;
+		}
+	}
+
+	ret = -ENOMEM;
+	newinfo = xt_alloc_table_info(size);
+	if (!newinfo)
+		goto out_unlock;
+
+	newinfo->number = number;
+	for (i = 0; i < NF_ARP_NUMHOOKS; i++) {
+		newinfo->hook_entry[i] = info->hook_entry[i];
+		newinfo->underflow[i] = info->underflow[i];
+	}
+	entry1 = newinfo->entries[raw_smp_processor_id()];
+	pos = entry1;
+	size = total_size;
+	ret = COMPAT_ARPT_ENTRY_ITERATE(entry0, total_size,
+					compat_copy_entry_from_user,
+					&pos, &size, name, newinfo, entry1);
+	xt_compat_flush_offsets(NF_ARP);
+	xt_compat_unlock(NF_ARP);
+	if (ret)
+		goto free_newinfo;
+
+	ret = -ELOOP;
+	if (!mark_source_chains(newinfo, valid_hooks, entry1))
+		goto free_newinfo;
+
+	i = 0;
+	ret = ARPT_ENTRY_ITERATE(entry1, newinfo->size, compat_check_entry,
+				 name, &i);
+	if (ret) {
+		j -= i;
+		COMPAT_ARPT_ENTRY_ITERATE_CONTINUE(entry0, newinfo->size, i,
+						   compat_release_entry, &j);
+		ARPT_ENTRY_ITERATE(entry1, newinfo->size, cleanup_entry, &i);
+		xt_free_table_info(newinfo);
+		return ret;
+	}
+
+	/* And one copy for every other CPU */
+	for_each_possible_cpu(i)
+		if (newinfo->entries[i] && newinfo->entries[i] != entry1)
+			memcpy(newinfo->entries[i], entry1, newinfo->size);
+
+	*pinfo = newinfo;
+	*pentry0 = entry1;
+	xt_free_table_info(info);
+	return 0;
+
+free_newinfo:
+	xt_free_table_info(newinfo);
+out:
+	COMPAT_ARPT_ENTRY_ITERATE(entry0, total_size, compat_release_entry, &j);
+	return ret;
+out_unlock:
+	xt_compat_flush_offsets(NF_ARP);
+	xt_compat_unlock(NF_ARP);
+	goto out;
+}
+
+struct compat_arpt_replace {
+	char				name[ARPT_TABLE_MAXNAMELEN];
+	u32				valid_hooks;
+	u32				num_entries;
+	u32				size;
+	u32				hook_entry[NF_ARP_NUMHOOKS];
+	u32				underflow[NF_ARP_NUMHOOKS];
+	u32				num_counters;
+	compat_uptr_t			counters;
+	struct compat_arpt_entry	entries[0];
+};
+
+static int compat_do_replace(void __user *user, unsigned int len)
+{
+	int ret;
+	struct compat_arpt_replace tmp;
+	struct xt_table_info *newinfo;
+	void *loc_cpu_entry;
+
+	if (copy_from_user(&tmp, user, sizeof(tmp)) != 0)
+		return -EFAULT;
+
+	/* overflow check */
+	if (tmp.size >= INT_MAX / num_possible_cpus())
+		return -ENOMEM;
+	if (tmp.num_counters >= INT_MAX / sizeof(struct xt_counters))
+		return -ENOMEM;
+
+	newinfo = xt_alloc_table_info(tmp.size);
+	if (!newinfo)
+		return -ENOMEM;
+
+	/* choose the copy that is on our node/cpu */
+	loc_cpu_entry = newinfo->entries[raw_smp_processor_id()];
+	if (copy_from_user(loc_cpu_entry, user + sizeof(tmp), tmp.size) != 0) {
+		ret = -EFAULT;
+		goto free_newinfo;
+	}
+
+	ret = translate_compat_table(tmp.name, tmp.valid_hooks,
+				     &newinfo, &loc_cpu_entry, tmp.size,
+				     tmp.num_entries, tmp.hook_entry,
+				     tmp.underflow);
+	if (ret != 0)
+		goto free_newinfo;
+
+	duprintf("compat_do_replace: Translated table\n");
+
+	ret = __do_replace(tmp.name, tmp.valid_hooks, newinfo,
+			   tmp.num_counters, compat_ptr(tmp.counters));
+	if (ret)
+		goto free_newinfo_untrans;
+	return 0;
+
+ free_newinfo_untrans:
+	ARPT_ENTRY_ITERATE(loc_cpu_entry, newinfo->size, cleanup_entry, NULL);
+ free_newinfo:
+	xt_free_table_info(newinfo);
+	return ret;
+}
+
+static int compat_do_arpt_set_ctl(struct sock *sk, int cmd, void __user *user,
+				  unsigned int len)
+{
+	int ret;
+
+	if (!capable(CAP_NET_ADMIN))
+		return -EPERM;
+
+	switch (cmd) {
+	case ARPT_SO_SET_REPLACE:
+		ret = compat_do_replace(user, len);
+		break;
+
+	case ARPT_SO_SET_ADD_COUNTERS:
+		ret = do_add_counters(user, len, 1);
+		break;
+
+	default:
+		duprintf("do_arpt_set_ctl:  unknown request %i\n", cmd);
+		ret = -EINVAL;
+	}
+
+	return ret;
+}
+
+static int compat_copy_entry_to_user(struct arpt_entry *e, void __user **dstptr,
+				     compat_uint_t *size,
+				     struct xt_counters *counters,
+				     unsigned int *i)
+{
+	struct arpt_entry_target *t;
+	struct compat_arpt_entry __user *ce;
+	u_int16_t target_offset, next_offset;
+	compat_uint_t origsize;
+	int ret;
+
+	ret = -EFAULT;
+	origsize = *size;
+	ce = (struct compat_arpt_entry __user *)*dstptr;
+	if (copy_to_user(ce, e, sizeof(struct arpt_entry)))
+		goto out;
+
+	if (copy_to_user(&ce->counters, &counters[*i], sizeof(counters[*i])))
+		goto out;
+
+	*dstptr += sizeof(struct compat_arpt_entry);
+	*size -= sizeof(struct arpt_entry) - sizeof(struct compat_arpt_entry);
+
+	target_offset = e->target_offset - (origsize - *size);
+
+	t = arpt_get_target(e);
+	ret = xt_compat_target_to_user(t, dstptr, size);
+	if (ret)
+		goto out;
+	ret = -EFAULT;
+	next_offset = e->next_offset - (origsize - *size);
+	if (put_user(target_offset, &ce->target_offset))
+		goto out;
+	if (put_user(next_offset, &ce->next_offset))
+		goto out;
+
+	(*i)++;
+	return 0;
+out:
+	return ret;
+}
+
+static int compat_copy_entries_to_user(unsigned int total_size,
+				       struct arpt_table *table,
+				       void __user *userptr)
+{
+	struct xt_counters *counters;
+	struct xt_table_info *private = table->private;
+	void __user *pos;
+	unsigned int size;
+	int ret = 0;
+	void *loc_cpu_entry;
+	unsigned int i = 0;
+
+	counters = alloc_counters(table);
+	if (IS_ERR(counters))
+		return PTR_ERR(counters);
+
+	/* choose the copy on our node/cpu */
+	loc_cpu_entry = private->entries[raw_smp_processor_id()];
+	pos = userptr;
+	size = total_size;
+	ret = ARPT_ENTRY_ITERATE(loc_cpu_entry, total_size,
+				 compat_copy_entry_to_user,
+				 &pos, &size, counters, &i);
+	vfree(counters);
+	return ret;
+}
+
+struct compat_arpt_get_entries {
+	char name[ARPT_TABLE_MAXNAMELEN];
+	compat_uint_t size;
+	struct compat_arpt_entry entrytable[0];
+};
+
+static int compat_get_entries(struct compat_arpt_get_entries __user *uptr,
+			      int *len)
+{
+	int ret;
+	struct compat_arpt_get_entries get;
+	struct arpt_table *t;
+
+	if (*len < sizeof(get)) {
+		duprintf("compat_get_entries: %u < %zu\n", *len, sizeof(get));
+		return -EINVAL;
+	}
+	if (copy_from_user(&get, uptr, sizeof(get)) != 0)
+		return -EFAULT;
+	if (*len != sizeof(struct compat_arpt_get_entries) + get.size) {
+		duprintf("compat_get_entries: %u != %zu\n",
+			 *len, sizeof(get) + get.size);
+		return -EINVAL;
+	}
+
+	xt_compat_lock(NF_ARP);
+	t = xt_find_table_lock(NF_ARP, get.name);
+	if (t && !IS_ERR(t)) {
+		struct xt_table_info *private = t->private;
+		struct xt_table_info info;
+
+		duprintf("t->private->number = %u\n", private->number);
+		ret = compat_table_info(private, &info);
+		if (!ret && get.size == info.size) {
+			ret = compat_copy_entries_to_user(private->size,
+							  t, uptr->entrytable);
+		} else if (!ret) {
+			duprintf("compat_get_entries: I've got %u not %u!\n",
+				 private->size, get.size);
+			ret = -EINVAL;
+		}
+		xt_compat_flush_offsets(NF_ARP);
+		module_put(t->me);
+		xt_table_unlock(t);
+	} else
+		ret = t ? PTR_ERR(t) : -ENOENT;
+
+	xt_compat_unlock(NF_ARP);
+	return ret;
+}
+
+static int do_arpt_get_ctl(struct sock *, int, void __user *, int *);
+
+static int compat_do_arpt_get_ctl(struct sock *sk, int cmd, void __user *user,
+				  int *len)
+{
+	int ret;
+
+	if (!capable(CAP_NET_ADMIN))
+		return -EPERM;
+
+	switch (cmd) {
+	case ARPT_SO_GET_INFO:
+		ret = get_info(user, len, 1);
+		break;
+	case ARPT_SO_GET_ENTRIES:
+		ret = compat_get_entries(user, len);
+		break;
+	default:
+		ret = do_arpt_get_ctl(sk, cmd, user, len);
+	}
+	return ret;
+}
+#endif
+
 static int do_arpt_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)
 {
 	int ret;
@@ -1044,7 +1665,7 @@ static int do_arpt_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned
 		break;
 
 	case ARPT_SO_SET_ADD_COUNTERS:
-		ret = do_add_counters(user, len);
+		ret = do_add_counters(user, len, 0);
 		break;
 
 	default:
@@ -1064,7 +1685,7 @@ static int do_arpt_get_ctl(struct sock *sk, int cmd, void __user *user, int *len
 
 	switch (cmd) {
 	case ARPT_SO_GET_INFO:
-		ret = get_info(user, len);
+		ret = get_info(user, len, 0);
 		break;
 
 	case ARPT_SO_GET_ENTRIES:
@@ -1156,6 +1777,11 @@ static struct arpt_target arpt_standard_target __read_mostly = {
 	.name		= ARPT_STANDARD_TARGET,
 	.targetsize	= sizeof(int),
 	.family		= NF_ARP,
+#ifdef CONFIG_COMPAT
+	.compatsize	= sizeof(compat_int_t),
+	.compat_from_user = compat_standard_from_user,
+	.compat_to_user	= compat_standard_to_user,
+#endif
 };
 
 static struct arpt_target arpt_error_target __read_mostly = {
@@ -1170,9 +1796,15 @@ static struct nf_sockopt_ops arpt_sockopts = {
 	.set_optmin	= ARPT_BASE_CTL,
 	.set_optmax	= ARPT_SO_SET_MAX+1,
 	.set		= do_arpt_set_ctl,
+#ifdef CONFIG_COMPAT
+	.compat_set	= compat_do_arpt_set_ctl,
+#endif
 	.get_optmin	= ARPT_BASE_CTL,
 	.get_optmax	= ARPT_SO_GET_MAX+1,
 	.get		= do_arpt_get_ctl,
+#ifdef CONFIG_COMPAT
+	.compat_get	= compat_do_arpt_get_ctl,
+#endif
 	.owner		= THIS_MODULE,
 };
 
-- 
cgit v1.2.3


From 170080645dac61816455afad807ffeb326ce79e8 Mon Sep 17 00:00:00 2001
From: Benjamin LaHaise <bcrl@kvack.org>
Date: Mon, 17 Dec 2007 22:27:36 -0800
Subject: [NETFILTER]: xt_TCPMSS: don't allow netfilter --setmss to increase
 mss

When terminating DSL connections for an assortment of random customers, I've
found it necessary to use iptables to clamp the MSS used for connections to
work around the various ICMP blackholes in the greater net.  Unfortunately,
the current behaviour in Linux is imperfect and actually make things worse,
so I'm proposing the following: increasing the MSS in a packet can never be
a good thing, so make --set-mss only lower the MSS in a packet.

Yes, I am aware of --clamp-mss-to-pmtu, but it doesn't work for outgoing
connections from clients (ie web traffic), as it only looks at the PMTU on
the destination route, not the source of the packet (the DSL interfaces in
question have a 1442 byte MTU while the destination ethernet interface is
1500 -- there are problematic hosts which use a 1300 byte MTU).  Reworking
that is probably a good idea at some point, but it's more work than this is.

Signed-off-by: Benjamin LaHaise <bcrl@kvack.org>
Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/netfilter/xt_TCPMSS.c | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/netfilter/xt_TCPMSS.c b/net/netfilter/xt_TCPMSS.c
index e4ee4bc81ff..a1bc77fcd68 100644
--- a/net/netfilter/xt_TCPMSS.c
+++ b/net/netfilter/xt_TCPMSS.c
@@ -88,8 +88,11 @@ tcpmss_mangle_packet(struct sk_buff *skb,
 
 			oldmss = (opt[i+2] << 8) | opt[i+3];
 
-			if (info->mss == XT_TCPMSS_CLAMP_PMTU &&
-			    oldmss <= newmss)
+			/* Never increase MSS, even when setting it, as
+			 * doing so results in problems for hosts that rely
+			 * on MSS being set correctly.
+			 */
+			if (oldmss <= newmss)
 				return 0;
 
 			opt[i+2] = (newmss & 0xff00) >> 8;
-- 
cgit v1.2.3


From 13eae15a244bb29beaa47bf86a24fd29ca7f8a4c Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Mon, 17 Dec 2007 22:28:00 -0800
Subject: [NETFILTER]: ctnetlink: add support for NAT sequence adjustments

The combination of NAT and helpers may produce TCP sequence adjustments.
In failover setups, this information needs to be replicated in order to
achieve a successful recovery of mangled, related connections. This patch is
particularly useful for conntrackd, see:

http://people.netfilter.org/pablo/conntrack-tools/

Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/netfilter/nf_nat_helper.c   |   3 +
 net/netfilter/nf_conntrack_netlink.c | 124 ++++++++++++++++++++++++++++++++++-
 2 files changed, 126 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/ipv4/netfilter/nf_nat_helper.c b/net/ipv4/netfilter/nf_nat_helper.c
index 53f79a310b4..d24f3d94739 100644
--- a/net/ipv4/netfilter/nf_nat_helper.c
+++ b/net/ipv4/netfilter/nf_nat_helper.c
@@ -20,6 +20,7 @@
 #include <linux/netfilter_ipv4.h>
 #include <net/netfilter/nf_conntrack.h>
 #include <net/netfilter/nf_conntrack_helper.h>
+#include <net/netfilter/nf_conntrack_ecache.h>
 #include <net/netfilter/nf_conntrack_expect.h>
 #include <net/netfilter/nf_nat.h>
 #include <net/netfilter/nf_nat_protocol.h>
@@ -191,6 +192,8 @@ nf_nat_mangle_tcp_packet(struct sk_buff *skb,
 		/* Tell TCP window tracking about seq change */
 		nf_conntrack_tcp_update(skb, ip_hdrlen(skb),
 					ct, CTINFO2DIR(ctinfo));
+
+		nf_conntrack_event_cache(IPCT_NATSEQADJ, skb);
 	}
 	return 1;
 }
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index a15971e9923..d7da167ef1b 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -254,6 +254,55 @@ nla_put_failure:
 #define ctnetlink_dump_mark(a, b) (0)
 #endif
 
+#ifdef CONFIG_NF_NAT_NEEDED
+static inline int
+dump_nat_seq_adj(struct sk_buff *skb, const struct nf_nat_seq *natseq, int type)
+{
+	__be32 tmp;
+	struct nlattr *nest_parms;
+
+	nest_parms = nla_nest_start(skb, type | NLA_F_NESTED);
+	if (!nest_parms)
+		goto nla_put_failure;
+
+	tmp = htonl(natseq->correction_pos);
+	NLA_PUT(skb, CTA_NAT_SEQ_CORRECTION_POS, sizeof(tmp), &tmp);
+	tmp = htonl(natseq->offset_before);
+	NLA_PUT(skb, CTA_NAT_SEQ_OFFSET_BEFORE, sizeof(tmp), &tmp);
+	tmp = htonl(natseq->offset_after);
+	NLA_PUT(skb, CTA_NAT_SEQ_OFFSET_AFTER, sizeof(tmp), &tmp);
+
+	nla_nest_end(skb, nest_parms);
+
+	return 0;
+
+nla_put_failure:
+	return -1;
+}
+
+static inline int
+ctnetlink_dump_nat_seq_adj(struct sk_buff *skb, const struct nf_conn *ct)
+{
+	struct nf_nat_seq *natseq;
+	struct nf_conn_nat *nat = nfct_nat(ct);
+
+	if (!(ct->status & IPS_SEQ_ADJUST) || !nat)
+		return 0;
+
+	natseq = &nat->seq[IP_CT_DIR_ORIGINAL];
+	if (dump_nat_seq_adj(skb, natseq, CTA_NAT_SEQ_ADJ_ORIG) == -1)
+		return -1;
+
+	natseq = &nat->seq[IP_CT_DIR_REPLY];
+	if (dump_nat_seq_adj(skb, natseq, CTA_NAT_SEQ_ADJ_REPLY) == -1)
+		return -1;
+
+	return 0;
+}
+#else
+#define ctnetlink_dump_nat_seq_adj(a, b) (0)
+#endif
+
 static inline int
 ctnetlink_dump_id(struct sk_buff *skb, const struct nf_conn *ct)
 {
@@ -321,7 +370,8 @@ ctnetlink_fill_info(struct sk_buff *skb, u32 pid, u32 seq,
 	    ctnetlink_dump_helpinfo(skb, ct) < 0 ||
 	    ctnetlink_dump_mark(skb, ct) < 0 ||
 	    ctnetlink_dump_id(skb, ct) < 0 ||
-	    ctnetlink_dump_use(skb, ct) < 0)
+	    ctnetlink_dump_use(skb, ct) < 0 ||
+	    ctnetlink_dump_nat_seq_adj(skb, ct) < 0)
 		goto nla_put_failure;
 
 	nlh->nlmsg_len = skb_tail_pointer(skb) - b;
@@ -424,6 +474,10 @@ static int ctnetlink_conntrack_event(struct notifier_block *this,
 		    (ctnetlink_dump_counters(skb, ct, IP_CT_DIR_ORIGINAL) < 0 ||
 		     ctnetlink_dump_counters(skb, ct, IP_CT_DIR_REPLY) < 0))
 			goto nla_put_failure;
+
+		if (events & IPCT_NATSEQADJ &&
+		    ctnetlink_dump_nat_seq_adj(skb, ct) < 0)
+			goto nla_put_failure;
 	}
 
 	nlh->nlmsg_len = skb->tail - b;
@@ -935,6 +989,66 @@ ctnetlink_change_protoinfo(struct nf_conn *ct, struct nlattr *cda[])
 	return err;
 }
 
+#ifdef CONFIG_NF_NAT_NEEDED
+static inline int
+change_nat_seq_adj(struct nf_nat_seq *natseq, struct nlattr *attr)
+{
+	struct nlattr *cda[CTA_NAT_SEQ_MAX+1];
+
+	nla_parse_nested(cda, CTA_NAT_SEQ_MAX, attr, NULL);
+
+	if (!cda[CTA_NAT_SEQ_CORRECTION_POS])
+		return -EINVAL;
+
+	natseq->correction_pos =
+		ntohl(*(__be32 *)nla_data(cda[CTA_NAT_SEQ_CORRECTION_POS]));
+
+	if (!cda[CTA_NAT_SEQ_OFFSET_BEFORE])
+		return -EINVAL;
+
+	natseq->offset_before =
+		ntohl(*(__be32 *)nla_data(cda[CTA_NAT_SEQ_OFFSET_BEFORE]));
+
+	if (!cda[CTA_NAT_SEQ_OFFSET_AFTER])
+		return -EINVAL;
+
+	natseq->offset_after =
+		ntohl(*(__be32 *)nla_data(cda[CTA_NAT_SEQ_OFFSET_AFTER]));
+
+	return 0;
+}
+
+static int
+ctnetlink_change_nat_seq_adj(struct nf_conn *ct, struct nlattr *cda[])
+{
+	int ret = 0;
+	struct nf_conn_nat *nat = nfct_nat(ct);
+
+	if (!nat)
+		return 0;
+
+	if (cda[CTA_NAT_SEQ_ADJ_ORIG]) {
+		ret = change_nat_seq_adj(&nat->seq[IP_CT_DIR_ORIGINAL],
+					 cda[CTA_NAT_SEQ_ADJ_ORIG]);
+		if (ret < 0)
+			return ret;
+
+		ct->status |= IPS_SEQ_ADJUST;
+	}
+
+	if (cda[CTA_NAT_SEQ_ADJ_REPLY]) {
+		ret = change_nat_seq_adj(&nat->seq[IP_CT_DIR_REPLY],
+					 cda[CTA_NAT_SEQ_ADJ_REPLY]);
+		if (ret < 0)
+			return ret;
+
+		ct->status |= IPS_SEQ_ADJUST;
+	}
+
+	return 0;
+}
+#endif
+
 static int
 ctnetlink_change_conntrack(struct nf_conn *ct, struct nlattr *cda[])
 {
@@ -969,6 +1083,14 @@ ctnetlink_change_conntrack(struct nf_conn *ct, struct nlattr *cda[])
 		ct->mark = ntohl(*(__be32 *)nla_data(cda[CTA_MARK]));
 #endif
 
+#ifdef CONFIG_NF_NAT_NEEDED
+	if (cda[CTA_NAT_SEQ_ADJ_ORIG] || cda[CTA_NAT_SEQ_ADJ_REPLY]) {
+		err = ctnetlink_change_nat_seq_adj(ct, cda);
+		if (err < 0)
+			return err;
+	}
+#endif
+
 	return 0;
 }
 
-- 
cgit v1.2.3


From 0f417ce989f84cfd5418e3b316064bfbb2708196 Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Mon, 17 Dec 2007 22:28:19 -0800
Subject: [NETFILTER]: ctnetlink: add support for master tuple event
 notification and dumping

This patch adds support for master tuple event notification and
dumping.  Conntrackd needs this information to recover related
connections appropriately.

Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/netfilter/nf_conntrack_netlink.c | 28 ++++++++++++++++++++++++++++
 1 file changed, 28 insertions(+)

(limited to 'net')

diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index d7da167ef1b..94027c84be5 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -254,6 +254,29 @@ nla_put_failure:
 #define ctnetlink_dump_mark(a, b) (0)
 #endif
 
+#define master_tuple(ct) &(ct->master->tuplehash[IP_CT_DIR_ORIGINAL].tuple)
+
+static inline int
+ctnetlink_dump_master(struct sk_buff *skb, const struct nf_conn *ct)
+{
+	struct nlattr *nest_parms;
+
+	if (!(ct->status & IPS_EXPECTED))
+		return 0;
+
+	nest_parms = nla_nest_start(skb, CTA_TUPLE_MASTER | NLA_F_NESTED);
+	if (!nest_parms)
+		goto nla_put_failure;
+	if (ctnetlink_dump_tuples(skb, master_tuple(ct)) < 0)
+		goto nla_put_failure;
+	nla_nest_end(skb, nest_parms);
+
+	return 0;
+
+nla_put_failure:
+	return -1;
+}
+
 #ifdef CONFIG_NF_NAT_NEEDED
 static inline int
 dump_nat_seq_adj(struct sk_buff *skb, const struct nf_nat_seq *natseq, int type)
@@ -371,6 +394,7 @@ ctnetlink_fill_info(struct sk_buff *skb, u32 pid, u32 seq,
 	    ctnetlink_dump_mark(skb, ct) < 0 ||
 	    ctnetlink_dump_id(skb, ct) < 0 ||
 	    ctnetlink_dump_use(skb, ct) < 0 ||
+	    ctnetlink_dump_master(skb, ct) < 0 ||
 	    ctnetlink_dump_nat_seq_adj(skb, ct) < 0)
 		goto nla_put_failure;
 
@@ -475,6 +499,10 @@ static int ctnetlink_conntrack_event(struct notifier_block *this,
 		     ctnetlink_dump_counters(skb, ct, IP_CT_DIR_REPLY) < 0))
 			goto nla_put_failure;
 
+		if (events & IPCT_RELATED &&
+		    ctnetlink_dump_master(skb, ct) < 0)
+			goto nla_put_failure;
+
 		if (events & IPCT_NATSEQADJ &&
 		    ctnetlink_dump_nat_seq_adj(skb, ct) < 0)
 			goto nla_put_failure;
-- 
cgit v1.2.3


From 37fccd8577d38e249dde71512fb38d2f6a4d9d3c Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Mon, 17 Dec 2007 22:28:41 -0800
Subject: [NETFILTER]: ctnetlink: add support for secmark

This patch adds support for James Morris' connsecmark.

Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/netfilter/nf_conntrack_netlink.c | 22 ++++++++++++++++++++++
 net/netfilter/xt_CONNSECMARK.c       |  5 ++++-
 2 files changed, 26 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index 94027c84be5..d4eedc68cc7 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -254,6 +254,22 @@ nla_put_failure:
 #define ctnetlink_dump_mark(a, b) (0)
 #endif
 
+#ifdef CONFIG_NF_CONNTRACK_SECMARK
+static inline int
+ctnetlink_dump_secmark(struct sk_buff *skb, const struct nf_conn *ct)
+{
+	__be32 mark = htonl(ct->secmark);
+
+	NLA_PUT(skb, CTA_SECMARK, sizeof(u_int32_t), &mark);
+	return 0;
+
+nla_put_failure:
+	return -1;
+}
+#else
+#define ctnetlink_dump_secmark(a, b) (0)
+#endif
+
 #define master_tuple(ct) &(ct->master->tuplehash[IP_CT_DIR_ORIGINAL].tuple)
 
 static inline int
@@ -392,6 +408,7 @@ ctnetlink_fill_info(struct sk_buff *skb, u32 pid, u32 seq,
 	    ctnetlink_dump_protoinfo(skb, ct) < 0 ||
 	    ctnetlink_dump_helpinfo(skb, ct) < 0 ||
 	    ctnetlink_dump_mark(skb, ct) < 0 ||
+	    ctnetlink_dump_secmark(skb, ct) < 0 ||
 	    ctnetlink_dump_id(skb, ct) < 0 ||
 	    ctnetlink_dump_use(skb, ct) < 0 ||
 	    ctnetlink_dump_master(skb, ct) < 0 ||
@@ -493,6 +510,11 @@ static int ctnetlink_conntrack_event(struct notifier_block *this,
 		    && ctnetlink_dump_mark(skb, ct) < 0)
 			goto nla_put_failure;
 #endif
+#ifdef CONFIG_NF_CONNTRACK_SECMARK
+		if ((events & IPCT_SECMARK || ct->secmark)
+		    && ctnetlink_dump_secmark(skb, ct) < 0)
+			goto nla_put_failure;
+#endif
 
 		if (events & IPCT_COUNTER_FILLING &&
 		    (ctnetlink_dump_counters(skb, ct, IP_CT_DIR_ORIGINAL) < 0 ||
diff --git a/net/netfilter/xt_CONNSECMARK.c b/net/netfilter/xt_CONNSECMARK.c
index 2c265e87f39..2333f7e29bc 100644
--- a/net/netfilter/xt_CONNSECMARK.c
+++ b/net/netfilter/xt_CONNSECMARK.c
@@ -20,6 +20,7 @@
 #include <linux/netfilter/x_tables.h>
 #include <linux/netfilter/xt_CONNSECMARK.h>
 #include <net/netfilter/nf_conntrack.h>
+#include <net/netfilter/nf_conntrack_ecache.h>
 
 #define PFX "CONNSECMARK: "
 
@@ -40,8 +41,10 @@ static void secmark_save(const struct sk_buff *skb)
 		enum ip_conntrack_info ctinfo;
 
 		ct = nf_ct_get(skb, &ctinfo);
-		if (ct && !ct->secmark)
+		if (ct && !ct->secmark) {
 			ct->secmark = skb->secmark;
+			nf_conntrack_event_cache(IPCT_SECMARK, skb);
+		}
 	}
 }
 
-- 
cgit v1.2.3


From c7212e9d3938258abe3fd17d15bb0d5c1856b8df Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Mon, 17 Dec 2007 22:29:02 -0800
Subject: [NETFILTER]: nf_conntrack_sctp: add ctnetlink support

This patch adds support for SCTP to ctnetlink.

Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/netfilter/nf_conntrack_proto_sctp.c | 10 ++++++++++
 1 file changed, 10 insertions(+)

(limited to 'net')

diff --git a/net/netfilter/nf_conntrack_proto_sctp.c b/net/netfilter/nf_conntrack_proto_sctp.c
index cb046751059..224612370f5 100644
--- a/net/netfilter/nf_conntrack_proto_sctp.c
+++ b/net/netfilter/nf_conntrack_proto_sctp.c
@@ -598,6 +598,11 @@ static struct nf_conntrack_l4proto nf_conntrack_l4proto_sctp4 __read_mostly = {
 	.packet 		= sctp_packet,
 	.new 			= sctp_new,
 	.me 			= THIS_MODULE,
+#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE)
+	.tuple_to_nlattr	= nf_ct_port_tuple_to_nlattr,
+	.nlattr_to_tuple	= nf_ct_port_nlattr_to_tuple,
+	.nla_policy		= nf_ct_port_nla_policy,
+#endif
 #ifdef CONFIG_SYSCTL
 	.ctl_table_users	= &sctp_sysctl_table_users,
 	.ctl_table_header	= &sctp_sysctl_header,
@@ -619,6 +624,11 @@ static struct nf_conntrack_l4proto nf_conntrack_l4proto_sctp6 __read_mostly = {
 	.packet 		= sctp_packet,
 	.new 			= sctp_new,
 	.me 			= THIS_MODULE,
+#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE)
+	.tuple_to_nlattr	= nf_ct_port_tuple_to_nlattr,
+	.nlattr_to_tuple	= nf_ct_port_nlattr_to_tuple,
+	.nla_policy		= nf_ct_port_nla_policy,
+#endif
 #ifdef CONFIG_SYSCTL
 	.ctl_table_users	= &sctp_sysctl_table_users,
 	.ctl_table_header	= &sctp_sysctl_header,
-- 
cgit v1.2.3


From 77236b6e33b06aaf756a86ed1965ca7d460b1b53 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Mon, 17 Dec 2007 22:29:45 -0800
Subject: [NETFILTER]: ctnetlink: use netlink attribute helpers

Use NLA_PUT_BE32, nla_get_be32() etc.

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c | 10 ++-
 net/ipv4/netfilter/nf_conntrack_proto_icmp.c   | 18 ++----
 net/ipv4/netfilter/nf_nat_core.c               | 12 ++--
 net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c | 18 ++----
 net/netfilter/nf_conntrack_core.c              | 10 ++-
 net/netfilter/nf_conntrack_netlink.c           | 87 +++++++++++---------------
 net/netfilter/nf_conntrack_proto_tcp.c         | 22 +++----
 7 files changed, 70 insertions(+), 107 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
index cd2d8451dda..78e64951137 100644
--- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
+++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
@@ -363,10 +363,8 @@ getorigdst(struct sock *sk, int optval, void __user *user, int *len)
 static int ipv4_tuple_to_nlattr(struct sk_buff *skb,
 				const struct nf_conntrack_tuple *tuple)
 {
-	NLA_PUT(skb, CTA_IP_V4_SRC, sizeof(u_int32_t),
-		&tuple->src.u3.ip);
-	NLA_PUT(skb, CTA_IP_V4_DST, sizeof(u_int32_t),
-		&tuple->dst.u3.ip);
+	NLA_PUT_BE32(skb, CTA_IP_V4_SRC, tuple->src.u3.ip);
+	NLA_PUT_BE32(skb, CTA_IP_V4_DST, tuple->dst.u3.ip);
 	return 0;
 
 nla_put_failure:
@@ -384,8 +382,8 @@ static int ipv4_nlattr_to_tuple(struct nlattr *tb[],
 	if (!tb[CTA_IP_V4_SRC] || !tb[CTA_IP_V4_DST])
 		return -EINVAL;
 
-	t->src.u3.ip = *(__be32 *)nla_data(tb[CTA_IP_V4_SRC]);
-	t->dst.u3.ip = *(__be32 *)nla_data(tb[CTA_IP_V4_DST]);
+	t->src.u3.ip = nla_get_be32(tb[CTA_IP_V4_SRC]);
+	t->dst.u3.ip = nla_get_be32(tb[CTA_IP_V4_DST]);
 
 	return 0;
 }
diff --git a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c
index 4153e049498..3e2e5cdda9d 100644
--- a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c
+++ b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c
@@ -234,12 +234,9 @@ icmp_error(struct sk_buff *skb, unsigned int dataoff,
 static int icmp_tuple_to_nlattr(struct sk_buff *skb,
 				const struct nf_conntrack_tuple *t)
 {
-	NLA_PUT(skb, CTA_PROTO_ICMP_ID, sizeof(u_int16_t),
-		&t->src.u.icmp.id);
-	NLA_PUT(skb, CTA_PROTO_ICMP_TYPE, sizeof(u_int8_t),
-		&t->dst.u.icmp.type);
-	NLA_PUT(skb, CTA_PROTO_ICMP_CODE, sizeof(u_int8_t),
-		&t->dst.u.icmp.code);
+	NLA_PUT_BE16(skb, CTA_PROTO_ICMP_ID, t->src.u.icmp.id);
+	NLA_PUT_U8(skb, CTA_PROTO_ICMP_TYPE, t->dst.u.icmp.type);
+	NLA_PUT_U8(skb, CTA_PROTO_ICMP_CODE, t->dst.u.icmp.code);
 
 	return 0;
 
@@ -261,12 +258,9 @@ static int icmp_nlattr_to_tuple(struct nlattr *tb[],
 	    || !tb[CTA_PROTO_ICMP_ID])
 		return -EINVAL;
 
-	tuple->dst.u.icmp.type =
-			*(u_int8_t *)nla_data(tb[CTA_PROTO_ICMP_TYPE]);
-	tuple->dst.u.icmp.code =
-			*(u_int8_t *)nla_data(tb[CTA_PROTO_ICMP_CODE]);
-	tuple->src.u.icmp.id =
-			*(__be16 *)nla_data(tb[CTA_PROTO_ICMP_ID]);
+	tuple->dst.u.icmp.type = nla_get_u8(tb[CTA_PROTO_ICMP_TYPE]);
+	tuple->dst.u.icmp.code = nla_get_u8(tb[CTA_PROTO_ICMP_CODE]);
+	tuple->src.u.icmp.id = nla_get_be16(tb[CTA_PROTO_ICMP_ID]);
 
 	if (tuple->dst.u.icmp.type >= sizeof(invmap)
 	    || !invmap[tuple->dst.u.icmp.type])
diff --git a/net/ipv4/netfilter/nf_nat_core.c b/net/ipv4/netfilter/nf_nat_core.c
index 746c2efddcd..4ee67e9f42b 100644
--- a/net/ipv4/netfilter/nf_nat_core.c
+++ b/net/ipv4/netfilter/nf_nat_core.c
@@ -547,10 +547,8 @@ int
 nf_nat_port_range_to_nlattr(struct sk_buff *skb,
 			    const struct nf_nat_range *range)
 {
-	NLA_PUT(skb, CTA_PROTONAT_PORT_MIN, sizeof(__be16),
-		&range->min.tcp.port);
-	NLA_PUT(skb, CTA_PROTONAT_PORT_MAX, sizeof(__be16),
-		&range->max.tcp.port);
+	NLA_PUT_BE16(skb, CTA_PROTONAT_PORT_MIN, range->min.tcp.port);
+	NLA_PUT_BE16(skb, CTA_PROTONAT_PORT_MAX, range->max.tcp.port);
 
 	return 0;
 
@@ -568,8 +566,7 @@ nf_nat_port_nlattr_to_range(struct nlattr *tb[], struct nf_nat_range *range)
 
 	if (tb[CTA_PROTONAT_PORT_MIN]) {
 		ret = 1;
-		range->min.tcp.port =
-			*(__be16 *)nla_data(tb[CTA_PROTONAT_PORT_MIN]);
+		range->min.tcp.port = nla_get_be16(tb[CTA_PROTONAT_PORT_MIN]);
 	}
 
 	if (!tb[CTA_PROTONAT_PORT_MAX]) {
@@ -577,8 +574,7 @@ nf_nat_port_nlattr_to_range(struct nlattr *tb[], struct nf_nat_range *range)
 			range->max.tcp.port = range->min.tcp.port;
 	} else {
 		ret = 1;
-		range->max.tcp.port =
-			*(__be16 *)nla_data(tb[CTA_PROTONAT_PORT_MAX]);
+		range->max.tcp.port = nla_get_be16(tb[CTA_PROTONAT_PORT_MAX]);
 	}
 
 	return ret;
diff --git a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c
index e99384f9764..44689d44441 100644
--- a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c
+++ b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c
@@ -213,12 +213,9 @@ icmpv6_error(struct sk_buff *skb, unsigned int dataoff,
 static int icmpv6_tuple_to_nlattr(struct sk_buff *skb,
 				  const struct nf_conntrack_tuple *t)
 {
-	NLA_PUT(skb, CTA_PROTO_ICMPV6_ID, sizeof(u_int16_t),
-		&t->src.u.icmp.id);
-	NLA_PUT(skb, CTA_PROTO_ICMPV6_TYPE, sizeof(u_int8_t),
-		&t->dst.u.icmp.type);
-	NLA_PUT(skb, CTA_PROTO_ICMPV6_CODE, sizeof(u_int8_t),
-		&t->dst.u.icmp.code);
+	NLA_PUT_BE16(skb, CTA_PROTO_ICMPV6_ID, t->src.u.icmp.id);
+	NLA_PUT_U8(skb, CTA_PROTO_ICMPV6_TYPE, t->dst.u.icmp.type);
+	NLA_PUT_U8(skb, CTA_PROTO_ICMPV6_CODE, t->dst.u.icmp.code);
 
 	return 0;
 
@@ -240,12 +237,9 @@ static int icmpv6_nlattr_to_tuple(struct nlattr *tb[],
 	    || !tb[CTA_PROTO_ICMPV6_ID])
 		return -EINVAL;
 
-	tuple->dst.u.icmp.type =
-			*(u_int8_t *)nla_data(tb[CTA_PROTO_ICMPV6_TYPE]);
-	tuple->dst.u.icmp.code =
-			*(u_int8_t *)nla_data(tb[CTA_PROTO_ICMPV6_CODE]);
-	tuple->src.u.icmp.id =
-			*(__be16 *)nla_data(tb[CTA_PROTO_ICMPV6_ID]);
+	tuple->dst.u.icmp.type = nla_get_u8(tb[CTA_PROTO_ICMPV6_TYPE]);
+	tuple->dst.u.icmp.code = nla_get_u8(tb[CTA_PROTO_ICMPV6_CODE]);
+	tuple->src.u.icmp.id = nla_get_be16(tb[CTA_PROTO_ICMPV6_ID]);
 
 	if (tuple->dst.u.icmp.type < 128
 	    || tuple->dst.u.icmp.type - 128 >= sizeof(invmap)
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index a4d5cdeb011..5920f953f78 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -831,10 +831,8 @@ EXPORT_SYMBOL_GPL(__nf_ct_refresh_acct);
 int nf_ct_port_tuple_to_nlattr(struct sk_buff *skb,
 			       const struct nf_conntrack_tuple *tuple)
 {
-	NLA_PUT(skb, CTA_PROTO_SRC_PORT, sizeof(u_int16_t),
-		&tuple->src.u.tcp.port);
-	NLA_PUT(skb, CTA_PROTO_DST_PORT, sizeof(u_int16_t),
-		&tuple->dst.u.tcp.port);
+	NLA_PUT_BE16(skb, CTA_PROTO_SRC_PORT, tuple->src.u.tcp.port);
+	NLA_PUT_BE16(skb, CTA_PROTO_DST_PORT, tuple->dst.u.tcp.port);
 	return 0;
 
 nla_put_failure:
@@ -854,8 +852,8 @@ int nf_ct_port_nlattr_to_tuple(struct nlattr *tb[],
 	if (!tb[CTA_PROTO_SRC_PORT] || !tb[CTA_PROTO_DST_PORT])
 		return -EINVAL;
 
-	t->src.u.tcp.port = *(__be16 *)nla_data(tb[CTA_PROTO_SRC_PORT]);
-	t->dst.u.tcp.port = *(__be16 *)nla_data(tb[CTA_PROTO_DST_PORT]);
+	t->src.u.tcp.port = nla_get_be16(tb[CTA_PROTO_SRC_PORT]);
+	t->dst.u.tcp.port = nla_get_be16(tb[CTA_PROTO_DST_PORT]);
 
 	return 0;
 }
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index d4eedc68cc7..dcd0c9a4bb7 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -59,7 +59,7 @@ ctnetlink_dump_tuples_proto(struct sk_buff *skb,
 	nest_parms = nla_nest_start(skb, CTA_TUPLE_PROTO | NLA_F_NESTED);
 	if (!nest_parms)
 		goto nla_put_failure;
-	NLA_PUT(skb, CTA_PROTO_NUM, sizeof(u_int8_t), &tuple->dst.protonum);
+	NLA_PUT_U8(skb, CTA_PROTO_NUM, tuple->dst.protonum);
 
 	if (likely(l4proto->tuple_to_nlattr))
 		ret = l4proto->tuple_to_nlattr(skb, tuple);
@@ -120,8 +120,7 @@ ctnetlink_dump_tuples(struct sk_buff *skb,
 static inline int
 ctnetlink_dump_status(struct sk_buff *skb, const struct nf_conn *ct)
 {
-	__be32 status = htonl((u_int32_t) ct->status);
-	NLA_PUT(skb, CTA_STATUS, sizeof(status), &status);
+	NLA_PUT_BE32(skb, CTA_STATUS, htonl(ct->status));
 	return 0;
 
 nla_put_failure:
@@ -131,15 +130,12 @@ nla_put_failure:
 static inline int
 ctnetlink_dump_timeout(struct sk_buff *skb, const struct nf_conn *ct)
 {
-	long timeout_l = ct->timeout.expires - jiffies;
-	__be32 timeout;
+	long timeout = (ct->timeout.expires - jiffies) / HZ;
 
-	if (timeout_l < 0)
+	if (timeout < 0)
 		timeout = 0;
-	else
-		timeout = htonl(timeout_l / HZ);
 
-	NLA_PUT(skb, CTA_TIMEOUT, sizeof(timeout), &timeout);
+	NLA_PUT_BE32(skb, CTA_TIMEOUT, htonl(timeout));
 	return 0;
 
 nla_put_failure:
@@ -193,7 +189,7 @@ ctnetlink_dump_helpinfo(struct sk_buff *skb, const struct nf_conn *ct)
 	nest_helper = nla_nest_start(skb, CTA_HELP | NLA_F_NESTED);
 	if (!nest_helper)
 		goto nla_put_failure;
-	NLA_PUT(skb, CTA_HELP_NAME, strlen(helper->name), helper->name);
+	NLA_PUT_STRING(skb, CTA_HELP_NAME, helper->name);
 
 	if (helper->to_nlattr)
 		helper->to_nlattr(skb, ct);
@@ -215,17 +211,15 @@ ctnetlink_dump_counters(struct sk_buff *skb, const struct nf_conn *ct,
 {
 	enum ctattr_type type = dir ? CTA_COUNTERS_REPLY: CTA_COUNTERS_ORIG;
 	struct nlattr *nest_count;
-	__be32 tmp;
 
 	nest_count = nla_nest_start(skb, type | NLA_F_NESTED);
 	if (!nest_count)
 		goto nla_put_failure;
 
-	tmp = htonl(ct->counters[dir].packets);
-	NLA_PUT(skb, CTA_COUNTERS32_PACKETS, sizeof(u_int32_t), &tmp);
-
-	tmp = htonl(ct->counters[dir].bytes);
-	NLA_PUT(skb, CTA_COUNTERS32_BYTES, sizeof(u_int32_t), &tmp);
+	NLA_PUT_BE32(skb, CTA_COUNTERS32_PACKETS,
+		     htonl(ct->counters[dir].packets));
+	NLA_PUT_BE32(skb, CTA_COUNTERS32_BYTES,
+		     htonl(ct->counters[dir].bytes));
 
 	nla_nest_end(skb, nest_count);
 
@@ -242,9 +236,7 @@ nla_put_failure:
 static inline int
 ctnetlink_dump_mark(struct sk_buff *skb, const struct nf_conn *ct)
 {
-	__be32 mark = htonl(ct->mark);
-
-	NLA_PUT(skb, CTA_MARK, sizeof(u_int32_t), &mark);
+	NLA_PUT_BE32(skb, CTA_MARK, htonl(ct->mark));
 	return 0;
 
 nla_put_failure:
@@ -258,9 +250,7 @@ nla_put_failure:
 static inline int
 ctnetlink_dump_secmark(struct sk_buff *skb, const struct nf_conn *ct)
 {
-	__be32 mark = htonl(ct->secmark);
-
-	NLA_PUT(skb, CTA_SECMARK, sizeof(u_int32_t), &mark);
+	NLA_PUT_BE32(skb, CTA_SECMARK, htonl(ct->secmark));
 	return 0;
 
 nla_put_failure:
@@ -297,19 +287,18 @@ nla_put_failure:
 static inline int
 dump_nat_seq_adj(struct sk_buff *skb, const struct nf_nat_seq *natseq, int type)
 {
-	__be32 tmp;
 	struct nlattr *nest_parms;
 
 	nest_parms = nla_nest_start(skb, type | NLA_F_NESTED);
 	if (!nest_parms)
 		goto nla_put_failure;
 
-	tmp = htonl(natseq->correction_pos);
-	NLA_PUT(skb, CTA_NAT_SEQ_CORRECTION_POS, sizeof(tmp), &tmp);
-	tmp = htonl(natseq->offset_before);
-	NLA_PUT(skb, CTA_NAT_SEQ_OFFSET_BEFORE, sizeof(tmp), &tmp);
-	tmp = htonl(natseq->offset_after);
-	NLA_PUT(skb, CTA_NAT_SEQ_OFFSET_AFTER, sizeof(tmp), &tmp);
+	NLA_PUT_BE32(skb, CTA_NAT_SEQ_CORRECTION_POS,
+		     htonl(natseq->correction_pos));
+	NLA_PUT_BE32(skb, CTA_NAT_SEQ_OFFSET_BEFORE,
+		     htonl(natseq->offset_before));
+	NLA_PUT_BE32(skb, CTA_NAT_SEQ_OFFSET_AFTER,
+		     htonl(natseq->offset_after));
 
 	nla_nest_end(skb, nest_parms);
 
@@ -345,8 +334,7 @@ ctnetlink_dump_nat_seq_adj(struct sk_buff *skb, const struct nf_conn *ct)
 static inline int
 ctnetlink_dump_id(struct sk_buff *skb, const struct nf_conn *ct)
 {
-	__be32 id = htonl((unsigned long)ct);
-	NLA_PUT(skb, CTA_ID, sizeof(u_int32_t), &id);
+	NLA_PUT_BE32(skb, CTA_ID, htonl((unsigned long)ct));
 	return 0;
 
 nla_put_failure:
@@ -356,9 +344,7 @@ nla_put_failure:
 static inline int
 ctnetlink_dump_use(struct sk_buff *skb, const struct nf_conn *ct)
 {
-	__be32 use = htonl(atomic_read(&ct->ct_general.use));
-
-	NLA_PUT(skb, CTA_USE, sizeof(u_int32_t), &use);
+	NLA_PUT_BE32(skb, CTA_USE, htonl(atomic_read(&ct->ct_general.use)));
 	return 0;
 
 nla_put_failure:
@@ -646,7 +632,7 @@ ctnetlink_parse_tuple_proto(struct nlattr *attr,
 
 	if (!tb[CTA_PROTO_NUM])
 		return -EINVAL;
-	tuple->dst.protonum = *(u_int8_t *)nla_data(tb[CTA_PROTO_NUM]);
+	tuple->dst.protonum = nla_get_u8(tb[CTA_PROTO_NUM]);
 
 	l4proto = nf_ct_l4proto_find_get(tuple->src.l3num, tuple->dst.protonum);
 
@@ -751,12 +737,12 @@ nfnetlink_parse_nat(struct nlattr *nat,
 		return err;
 
 	if (tb[CTA_NAT_MINIP])
-		range->min_ip = *(__be32 *)nla_data(tb[CTA_NAT_MINIP]);
+		range->min_ip = nla_get_be32(tb[CTA_NAT_MINIP]);
 
 	if (!tb[CTA_NAT_MAXIP])
 		range->max_ip = range->min_ip;
 	else
-		range->max_ip = *(__be32 *)nla_data(tb[CTA_NAT_MAXIP]);
+		range->max_ip = nla_get_be32(tb[CTA_NAT_MAXIP]);
 
 	if (range->min_ip)
 		range->flags |= IP_NAT_RANGE_MAP_IPS;
@@ -826,7 +812,7 @@ ctnetlink_del_conntrack(struct sock *ctnl, struct sk_buff *skb,
 	ct = nf_ct_tuplehash_to_ctrack(h);
 
 	if (cda[CTA_ID]) {
-		u_int32_t id = ntohl(*(__be32 *)nla_data(cda[CTA_ID]));
+		u_int32_t id = ntohl(nla_get_be32(cda[CTA_ID]));
 		if (id != (u32)(unsigned long)ct) {
 			nf_ct_put(ct);
 			return -ENOENT;
@@ -906,7 +892,7 @@ static inline int
 ctnetlink_change_status(struct nf_conn *ct, struct nlattr *cda[])
 {
 	unsigned long d;
-	unsigned int status = ntohl(*(__be32 *)nla_data(cda[CTA_STATUS]));
+	unsigned int status = ntohl(nla_get_be32(cda[CTA_STATUS]));
 	d = ct->status ^ status;
 
 	if (d & (IPS_EXPECTED|IPS_CONFIRMED|IPS_DYING))
@@ -1008,7 +994,7 @@ ctnetlink_change_helper(struct nf_conn *ct, struct nlattr *cda[])
 static inline int
 ctnetlink_change_timeout(struct nf_conn *ct, struct nlattr *cda[])
 {
-	u_int32_t timeout = ntohl(*(__be32 *)nla_data(cda[CTA_TIMEOUT]));
+	u_int32_t timeout = ntohl(nla_get_be32(cda[CTA_TIMEOUT]));
 
 	if (!del_timer(&ct->timeout))
 		return -ETIME;
@@ -1051,19 +1037,19 @@ change_nat_seq_adj(struct nf_nat_seq *natseq, struct nlattr *attr)
 		return -EINVAL;
 
 	natseq->correction_pos =
-		ntohl(*(__be32 *)nla_data(cda[CTA_NAT_SEQ_CORRECTION_POS]));
+		ntohl(nla_get_be32(cda[CTA_NAT_SEQ_CORRECTION_POS]));
 
 	if (!cda[CTA_NAT_SEQ_OFFSET_BEFORE])
 		return -EINVAL;
 
 	natseq->offset_before =
-		ntohl(*(__be32 *)nla_data(cda[CTA_NAT_SEQ_OFFSET_BEFORE]));
+		ntohl(nla_get_be32(cda[CTA_NAT_SEQ_OFFSET_BEFORE]));
 
 	if (!cda[CTA_NAT_SEQ_OFFSET_AFTER])
 		return -EINVAL;
 
 	natseq->offset_after =
-		ntohl(*(__be32 *)nla_data(cda[CTA_NAT_SEQ_OFFSET_AFTER]));
+		ntohl(nla_get_be32(cda[CTA_NAT_SEQ_OFFSET_AFTER]));
 
 	return 0;
 }
@@ -1130,7 +1116,7 @@ ctnetlink_change_conntrack(struct nf_conn *ct, struct nlattr *cda[])
 
 #if defined(CONFIG_NF_CONNTRACK_MARK)
 	if (cda[CTA_MARK])
-		ct->mark = ntohl(*(__be32 *)nla_data(cda[CTA_MARK]));
+		ct->mark = ntohl(nla_get_be32(cda[CTA_MARK]));
 #endif
 
 #ifdef CONFIG_NF_NAT_NEEDED
@@ -1161,7 +1147,7 @@ ctnetlink_create_conntrack(struct nlattr *cda[],
 
 	if (!cda[CTA_TIMEOUT])
 		goto err;
-	ct->timeout.expires = ntohl(*(__be32 *)nla_data(cda[CTA_TIMEOUT]));
+	ct->timeout.expires = ntohl(nla_get_be32(cda[CTA_TIMEOUT]));
 
 	ct->timeout.expires = jiffies + ct->timeout.expires * HZ;
 	ct->status |= IPS_CONFIRMED;
@@ -1180,7 +1166,7 @@ ctnetlink_create_conntrack(struct nlattr *cda[],
 
 #if defined(CONFIG_NF_CONNTRACK_MARK)
 	if (cda[CTA_MARK])
-		ct->mark = ntohl(*(__be32 *)nla_data(cda[CTA_MARK]));
+		ct->mark = ntohl(nla_get_be32(cda[CTA_MARK]));
 #endif
 
 	helper = nf_ct_helper_find_get(rtuple);
@@ -1371,7 +1357,6 @@ ctnetlink_exp_dump_expect(struct sk_buff *skb,
 {
 	struct nf_conn *master = exp->master;
 	__be32 timeout = htonl((exp->timeout.expires - jiffies) / HZ);
-	__be32 id = htonl((unsigned long)exp);
 
 	if (ctnetlink_exp_dump_tuple(skb, &exp->tuple, CTA_EXPECT_TUPLE) < 0)
 		goto nla_put_failure;
@@ -1382,8 +1367,8 @@ ctnetlink_exp_dump_expect(struct sk_buff *skb,
 				 CTA_EXPECT_MASTER) < 0)
 		goto nla_put_failure;
 
-	NLA_PUT(skb, CTA_EXPECT_TIMEOUT, sizeof(timeout), &timeout);
-	NLA_PUT(skb, CTA_EXPECT_ID, sizeof(u_int32_t), &id);
+	NLA_PUT_BE32(skb, CTA_EXPECT_TIMEOUT, timeout);
+	NLA_PUT_BE32(skb, CTA_EXPECT_ID, htonl((unsigned long)exp));
 
 	return 0;
 
@@ -1556,7 +1541,7 @@ ctnetlink_get_expect(struct sock *ctnl, struct sk_buff *skb,
 		return -ENOENT;
 
 	if (cda[CTA_EXPECT_ID]) {
-		__be32 id = *(__be32 *)nla_data(cda[CTA_EXPECT_ID]);
+		__be32 id = nla_get_be32(cda[CTA_EXPECT_ID]);
 		if (ntohl(id) != (u32)(unsigned long)exp) {
 			nf_ct_expect_put(exp);
 			return -ENOENT;
@@ -1610,7 +1595,7 @@ ctnetlink_del_expect(struct sock *ctnl, struct sk_buff *skb,
 			return -ENOENT;
 
 		if (cda[CTA_EXPECT_ID]) {
-			__be32 id = *(__be32 *)nla_data(cda[CTA_EXPECT_ID]);
+			__be32 id = nla_get_be32(cda[CTA_EXPECT_ID]);
 			if (ntohl(id) != (u32)(unsigned long)exp) {
 				nf_ct_expect_put(exp);
 				return -ENOENT;
diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c
index d96f18863fd..600b476d225 100644
--- a/net/netfilter/nf_conntrack_proto_tcp.c
+++ b/net/netfilter/nf_conntrack_proto_tcp.c
@@ -1072,14 +1072,13 @@ static int tcp_to_nlattr(struct sk_buff *skb, struct nlattr *nla,
 	if (!nest_parms)
 		goto nla_put_failure;
 
-	NLA_PUT(skb, CTA_PROTOINFO_TCP_STATE, sizeof(u_int8_t),
-		&ct->proto.tcp.state);
+	NLA_PUT_U8(skb, CTA_PROTOINFO_TCP_STATE, ct->proto.tcp.state);
 
-	NLA_PUT(skb, CTA_PROTOINFO_TCP_WSCALE_ORIGINAL, sizeof(u_int8_t),
-		&ct->proto.tcp.seen[0].td_scale);
+	NLA_PUT_U8(skb, CTA_PROTOINFO_TCP_WSCALE_ORIGINAL,
+		   ct->proto.tcp.seen[0].td_scale);
 
-	NLA_PUT(skb, CTA_PROTOINFO_TCP_WSCALE_REPLY, sizeof(u_int8_t),
-		&ct->proto.tcp.seen[1].td_scale);
+	NLA_PUT_U8(skb, CTA_PROTOINFO_TCP_WSCALE_REPLY,
+		   ct->proto.tcp.seen[1].td_scale);
 
 	tmp.flags = ct->proto.tcp.seen[0].flags;
 	NLA_PUT(skb, CTA_PROTOINFO_TCP_FLAGS_ORIGINAL,
@@ -1126,8 +1125,7 @@ static int nlattr_to_tcp(struct nlattr *cda[], struct nf_conn *ct)
 		return -EINVAL;
 
 	write_lock_bh(&tcp_lock);
-	ct->proto.tcp.state =
-		*(u_int8_t *)nla_data(tb[CTA_PROTOINFO_TCP_STATE]);
+	ct->proto.tcp.state = nla_get_u8(tb[CTA_PROTOINFO_TCP_STATE]);
 
 	if (tb[CTA_PROTOINFO_TCP_FLAGS_ORIGINAL]) {
 		struct nf_ct_tcp_flags *attr =
@@ -1147,10 +1145,10 @@ static int nlattr_to_tcp(struct nlattr *cda[], struct nf_conn *ct)
 	    tb[CTA_PROTOINFO_TCP_WSCALE_REPLY] &&
 	    ct->proto.tcp.seen[0].flags & IP_CT_TCP_FLAG_WINDOW_SCALE &&
 	    ct->proto.tcp.seen[1].flags & IP_CT_TCP_FLAG_WINDOW_SCALE) {
-		ct->proto.tcp.seen[0].td_scale = *(u_int8_t *)
-			nla_data(tb[CTA_PROTOINFO_TCP_WSCALE_ORIGINAL]);
-		ct->proto.tcp.seen[1].td_scale = *(u_int8_t *)
-			nla_data(tb[CTA_PROTOINFO_TCP_WSCALE_REPLY]);
+		ct->proto.tcp.seen[0].td_scale =
+			nla_get_u8(tb[CTA_PROTOINFO_TCP_WSCALE_ORIGINAL]);
+		ct->proto.tcp.seen[1].td_scale =
+			nla_get_u8(tb[CTA_PROTOINFO_TCP_WSCALE_REPLY]);
 	}
 	write_unlock_bh(&tcp_lock);
 
-- 
cgit v1.2.3


From d978e5daec544ec72b28bf72a30dc9ac3da23a35 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Mon, 17 Dec 2007 22:37:03 -0800
Subject: [NETFILTER]: ctnetlink: fix expectation timeout dumping

When the timer is late its timeout might be before the current time,
in which case a very large value is dumped.

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/netfilter/nf_conntrack_netlink.c | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index dcd0c9a4bb7..75012585efe 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -1356,7 +1356,10 @@ ctnetlink_exp_dump_expect(struct sk_buff *skb,
 			  const struct nf_conntrack_expect *exp)
 {
 	struct nf_conn *master = exp->master;
-	__be32 timeout = htonl((exp->timeout.expires - jiffies) / HZ);
+	long timeout = (exp->timeout.expires - jiffies) / HZ;
+
+	if (timeout < 0)
+		timeout = 0;
 
 	if (ctnetlink_exp_dump_tuple(skb, &exp->tuple, CTA_EXPECT_TUPLE) < 0)
 		goto nla_put_failure;
@@ -1367,7 +1370,7 @@ ctnetlink_exp_dump_expect(struct sk_buff *skb,
 				 CTA_EXPECT_MASTER) < 0)
 		goto nla_put_failure;
 
-	NLA_PUT_BE32(skb, CTA_EXPECT_TIMEOUT, timeout);
+	NLA_PUT_BE32(skb, CTA_EXPECT_TIMEOUT, htonl(timeout));
 	NLA_PUT_BE32(skb, CTA_EXPECT_ID, htonl((unsigned long)exp));
 
 	return 0;
-- 
cgit v1.2.3


From 3ee9e760387c38558df976bc2905959826adf331 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Mon, 17 Dec 2007 22:37:20 -0800
Subject: [NETFILTER]: nf_nat_proto_gre: add missing module reference

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/netfilter/nf_nat_proto_gre.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'net')

diff --git a/net/ipv4/netfilter/nf_nat_proto_gre.c b/net/ipv4/netfilter/nf_nat_proto_gre.c
index b820f996035..945e0ae78ff 100644
--- a/net/ipv4/netfilter/nf_nat_proto_gre.c
+++ b/net/ipv4/netfilter/nf_nat_proto_gre.c
@@ -138,6 +138,7 @@ gre_manip_pkt(struct sk_buff *skb, unsigned int iphdroff,
 static struct nf_nat_protocol gre __read_mostly = {
 	.name			= "GRE",
 	.protonum		= IPPROTO_GRE,
+	.me			= THIS_MODULE,
 	.manip_pkt		= gre_manip_pkt,
 	.in_range		= gre_in_range,
 	.unique_tuple		= gre_unique_tuple,
-- 
cgit v1.2.3


From 2b628a0866860d44652362aafe403e5b5895583d Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Mon, 17 Dec 2007 22:37:36 -0800
Subject: [NETFILTER]: nf_nat: mark NAT protocols const

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/netfilter/nf_nat_core.c          | 20 ++++++++++----------
 net/ipv4/netfilter/nf_nat_proto_gre.c     |  2 +-
 net/ipv4/netfilter/nf_nat_proto_icmp.c    |  2 +-
 net/ipv4/netfilter/nf_nat_proto_tcp.c     |  2 +-
 net/ipv4/netfilter/nf_nat_proto_udp.c     |  2 +-
 net/ipv4/netfilter/nf_nat_proto_unknown.c |  2 +-
 net/netfilter/nf_conntrack_netlink.c      |  2 +-
 7 files changed, 16 insertions(+), 16 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/netfilter/nf_nat_core.c b/net/ipv4/netfilter/nf_nat_core.c
index 4ee67e9f42b..a772445228a 100644
--- a/net/ipv4/netfilter/nf_nat_core.c
+++ b/net/ipv4/netfilter/nf_nat_core.c
@@ -42,18 +42,18 @@ static int nf_nat_vmalloced;
 static struct hlist_head *bysource;
 
 #define MAX_IP_NAT_PROTO 256
-static struct nf_nat_protocol *nf_nat_protos[MAX_IP_NAT_PROTO];
+static const struct nf_nat_protocol *nf_nat_protos[MAX_IP_NAT_PROTO];
 
-static inline struct nf_nat_protocol *
+static inline const struct nf_nat_protocol *
 __nf_nat_proto_find(u_int8_t protonum)
 {
 	return rcu_dereference(nf_nat_protos[protonum]);
 }
 
-struct nf_nat_protocol *
+const struct nf_nat_protocol *
 nf_nat_proto_find_get(u_int8_t protonum)
 {
-	struct nf_nat_protocol *p;
+	const struct nf_nat_protocol *p;
 
 	rcu_read_lock();
 	p = __nf_nat_proto_find(protonum);
@@ -66,7 +66,7 @@ nf_nat_proto_find_get(u_int8_t protonum)
 EXPORT_SYMBOL_GPL(nf_nat_proto_find_get);
 
 void
-nf_nat_proto_put(struct nf_nat_protocol *p)
+nf_nat_proto_put(const struct nf_nat_protocol *p)
 {
 	module_put(p->me);
 }
@@ -105,7 +105,7 @@ static int
 in_range(const struct nf_conntrack_tuple *tuple,
 	 const struct nf_nat_range *range)
 {
-	struct nf_nat_protocol *proto;
+	const struct nf_nat_protocol *proto;
 	int ret = 0;
 
 	/* If we are supposed to map IPs, then we must be in the
@@ -226,7 +226,7 @@ get_unique_tuple(struct nf_conntrack_tuple *tuple,
 		 struct nf_conn *ct,
 		 enum nf_nat_manip_type maniptype)
 {
-	struct nf_nat_protocol *proto;
+	const struct nf_nat_protocol *proto;
 
 	/* 1) If this srcip/proto/src-proto-part is currently mapped,
 	   and that same mapping gives a unique tuple within the given
@@ -355,7 +355,7 @@ manip_pkt(u_int16_t proto,
 	  enum nf_nat_manip_type maniptype)
 {
 	struct iphdr *iph;
-	struct nf_nat_protocol *p;
+	const struct nf_nat_protocol *p;
 
 	if (!skb_make_writable(skb, iphdroff + sizeof(*iph)))
 		return 0;
@@ -515,7 +515,7 @@ int nf_nat_icmp_reply_translation(struct nf_conn *ct,
 EXPORT_SYMBOL_GPL(nf_nat_icmp_reply_translation);
 
 /* Protocol registration. */
-int nf_nat_protocol_register(struct nf_nat_protocol *proto)
+int nf_nat_protocol_register(const struct nf_nat_protocol *proto)
 {
 	int ret = 0;
 
@@ -532,7 +532,7 @@ int nf_nat_protocol_register(struct nf_nat_protocol *proto)
 EXPORT_SYMBOL(nf_nat_protocol_register);
 
 /* Noone stores the protocol anywhere; simply delete it. */
-void nf_nat_protocol_unregister(struct nf_nat_protocol *proto)
+void nf_nat_protocol_unregister(const struct nf_nat_protocol *proto)
 {
 	write_lock_bh(&nf_nat_lock);
 	rcu_assign_pointer(nf_nat_protos[proto->protonum],
diff --git a/net/ipv4/netfilter/nf_nat_proto_gre.c b/net/ipv4/netfilter/nf_nat_proto_gre.c
index 945e0ae78ff..9fa272e7311 100644
--- a/net/ipv4/netfilter/nf_nat_proto_gre.c
+++ b/net/ipv4/netfilter/nf_nat_proto_gre.c
@@ -135,7 +135,7 @@ gre_manip_pkt(struct sk_buff *skb, unsigned int iphdroff,
 	return 1;
 }
 
-static struct nf_nat_protocol gre __read_mostly = {
+static const struct nf_nat_protocol gre = {
 	.name			= "GRE",
 	.protonum		= IPPROTO_GRE,
 	.me			= THIS_MODULE,
diff --git a/net/ipv4/netfilter/nf_nat_proto_icmp.c b/net/ipv4/netfilter/nf_nat_proto_icmp.c
index 088bb141c15..a0e44c953cb 100644
--- a/net/ipv4/netfilter/nf_nat_proto_icmp.c
+++ b/net/ipv4/netfilter/nf_nat_proto_icmp.c
@@ -71,7 +71,7 @@ icmp_manip_pkt(struct sk_buff *skb,
 	return 1;
 }
 
-struct nf_nat_protocol nf_nat_protocol_icmp = {
+const struct nf_nat_protocol nf_nat_protocol_icmp = {
 	.name			= "ICMP",
 	.protonum		= IPPROTO_ICMP,
 	.me			= THIS_MODULE,
diff --git a/net/ipv4/netfilter/nf_nat_proto_tcp.c b/net/ipv4/netfilter/nf_nat_proto_tcp.c
index 633c53ff3d3..da23e9fbe67 100644
--- a/net/ipv4/netfilter/nf_nat_proto_tcp.c
+++ b/net/ipv4/netfilter/nf_nat_proto_tcp.c
@@ -137,7 +137,7 @@ tcp_manip_pkt(struct sk_buff *skb,
 	return 1;
 }
 
-struct nf_nat_protocol nf_nat_protocol_tcp = {
+const struct nf_nat_protocol nf_nat_protocol_tcp = {
 	.name			= "TCP",
 	.protonum		= IPPROTO_TCP,
 	.me			= THIS_MODULE,
diff --git a/net/ipv4/netfilter/nf_nat_proto_udp.c b/net/ipv4/netfilter/nf_nat_proto_udp.c
index 9c6519cd15f..10df4db078a 100644
--- a/net/ipv4/netfilter/nf_nat_proto_udp.c
+++ b/net/ipv4/netfilter/nf_nat_proto_udp.c
@@ -127,7 +127,7 @@ udp_manip_pkt(struct sk_buff *skb,
 	return 1;
 }
 
-struct nf_nat_protocol nf_nat_protocol_udp = {
+const struct nf_nat_protocol nf_nat_protocol_udp = {
 	.name			= "UDP",
 	.protonum		= IPPROTO_UDP,
 	.me			= THIS_MODULE,
diff --git a/net/ipv4/netfilter/nf_nat_proto_unknown.c b/net/ipv4/netfilter/nf_nat_proto_unknown.c
index cfd2742e970..a26efeb073c 100644
--- a/net/ipv4/netfilter/nf_nat_proto_unknown.c
+++ b/net/ipv4/netfilter/nf_nat_proto_unknown.c
@@ -45,7 +45,7 @@ unknown_manip_pkt(struct sk_buff *skb,
 	return 1;
 }
 
-struct nf_nat_protocol nf_nat_unknown_protocol = {
+const struct nf_nat_protocol nf_nat_unknown_protocol = {
 	.name			= "unknown",
 	/* .me isn't set: getting a ref to this cannot fail. */
 	.manip_pkt		= unknown_manip_pkt,
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index 75012585efe..7851065ef20 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -695,7 +695,7 @@ static int nfnetlink_parse_nat_proto(struct nlattr *attr,
 				     struct nf_nat_range *range)
 {
 	struct nlattr *tb[CTA_PROTONAT_MAX+1];
-	struct nf_nat_protocol *npt;
+	const struct nf_nat_protocol *npt;
 	int err;
 
 	err = nla_parse_nested(tb, CTA_PROTONAT_MAX, attr, protonat_nla_policy);
-- 
cgit v1.2.3


From ce4b1cebdcd4779097e9862670e5c5208e76712b Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Mon, 17 Dec 2007 22:37:52 -0800
Subject: [NETFILTER]: nf_nat: sprinkle a few __read_mostlys

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/netfilter/nf_nat_core.c | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/netfilter/nf_nat_core.c b/net/ipv4/netfilter/nf_nat_core.c
index a772445228a..7cc179cfc7c 100644
--- a/net/ipv4/netfilter/nf_nat_core.c
+++ b/net/ipv4/netfilter/nf_nat_core.c
@@ -33,16 +33,17 @@
 
 static DEFINE_RWLOCK(nf_nat_lock);
 
-static struct nf_conntrack_l3proto *l3proto = NULL;
+static struct nf_conntrack_l3proto *l3proto __read_mostly;
 
 /* Calculated at init based on memory size */
-static unsigned int nf_nat_htable_size;
+static unsigned int nf_nat_htable_size __read_mostly;
 static int nf_nat_vmalloced;
 
-static struct hlist_head *bysource;
+static struct hlist_head *bysource __read_mostly;
 
 #define MAX_IP_NAT_PROTO 256
-static const struct nf_nat_protocol *nf_nat_protos[MAX_IP_NAT_PROTO];
+static const struct nf_nat_protocol *nf_nat_protos[MAX_IP_NAT_PROTO]
+						__read_mostly;
 
 static inline const struct nf_nat_protocol *
 __nf_nat_proto_find(u_int8_t protonum)
-- 
cgit v1.2.3


From cc01dcbd26865addfe9eb5431f1f9dbc511515ba Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Mon, 17 Dec 2007 22:38:20 -0800
Subject: [NETFILTER]: nf_nat: pass manip type instead of hook to
 nf_nat_setup_info

nf_nat_setup_info gets the hook number and translates that to the
manip type to perform. This is a relict from the time when one
manip per hook could exist, the exact hook number doesn't matter
anymore, its converted to the manip type. Most callers already
know what kind of NAT they want to perform, so pass the maniptype
in directly.

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/netfilter/ipt_MASQUERADE.c  |  2 +-
 net/ipv4/netfilter/ipt_NETMAP.c      |  2 +-
 net/ipv4/netfilter/ipt_REDIRECT.c    |  2 +-
 net/ipv4/netfilter/nf_nat_core.c     |  9 +++------
 net/ipv4/netfilter/nf_nat_h323.c     | 16 ++++------------
 net/ipv4/netfilter/nf_nat_helper.c   |  6 ++----
 net/ipv4/netfilter/nf_nat_pptp.c     |  6 ++----
 net/ipv4/netfilter/nf_nat_rule.c     |  8 ++++----
 net/ipv4/netfilter/nf_nat_sip.c      |  6 ++----
 net/netfilter/nf_conntrack_netlink.c | 10 ++++------
 10 files changed, 24 insertions(+), 43 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/netfilter/ipt_MASQUERADE.c b/net/ipv4/netfilter/ipt_MASQUERADE.c
index f54150356ce..1cbff7b3084 100644
--- a/net/ipv4/netfilter/ipt_MASQUERADE.c
+++ b/net/ipv4/netfilter/ipt_MASQUERADE.c
@@ -95,7 +95,7 @@ masquerade_tg(struct sk_buff *skb, const struct net_device *in,
 		  mr->range[0].min, mr->range[0].max });
 
 	/* Hand modified range to generic setup. */
-	return nf_nat_setup_info(ct, &newrange, hooknum);
+	return nf_nat_setup_info(ct, &newrange, IP_NAT_MANIP_SRC);
 }
 
 static int
diff --git a/net/ipv4/netfilter/ipt_NETMAP.c b/net/ipv4/netfilter/ipt_NETMAP.c
index 8b8263e6357..5b71ef4d848 100644
--- a/net/ipv4/netfilter/ipt_NETMAP.c
+++ b/net/ipv4/netfilter/ipt_NETMAP.c
@@ -70,7 +70,7 @@ netmap_tg(struct sk_buff *skb, const struct net_device *in,
 		  mr->range[0].min, mr->range[0].max });
 
 	/* Hand modified range to generic setup. */
-	return nf_nat_setup_info(ct, &newrange, hooknum);
+	return nf_nat_setup_info(ct, &newrange, HOOK2MANIP(hooknum));
 }
 
 static struct xt_target netmap_tg_reg __read_mostly = {
diff --git a/net/ipv4/netfilter/ipt_REDIRECT.c b/net/ipv4/netfilter/ipt_REDIRECT.c
index 74ce7e1e9d7..3d9ec5c34c5 100644
--- a/net/ipv4/netfilter/ipt_REDIRECT.c
+++ b/net/ipv4/netfilter/ipt_REDIRECT.c
@@ -87,7 +87,7 @@ redirect_tg(struct sk_buff *skb, const struct net_device *in,
 		  mr->range[0].min, mr->range[0].max });
 
 	/* Hand modified range to generic setup. */
-	return nf_nat_setup_info(ct, &newrange, hooknum);
+	return nf_nat_setup_info(ct, &newrange, IP_NAT_MANIP_DST);
 }
 
 static struct xt_target redirect_tg_reg __read_mostly = {
diff --git a/net/ipv4/netfilter/nf_nat_core.c b/net/ipv4/netfilter/nf_nat_core.c
index 7cc179cfc7c..aec157d0ad9 100644
--- a/net/ipv4/netfilter/nf_nat_core.c
+++ b/net/ipv4/netfilter/nf_nat_core.c
@@ -277,12 +277,11 @@ out:
 unsigned int
 nf_nat_setup_info(struct nf_conn *ct,
 		  const struct nf_nat_range *range,
-		  unsigned int hooknum)
+		  enum nf_nat_manip_type maniptype)
 {
 	struct nf_conntrack_tuple curr_tuple, new_tuple;
 	struct nf_conn_nat *nat;
 	int have_to_hash = !(ct->status & IPS_NAT_DONE_MASK);
-	enum nf_nat_manip_type maniptype = HOOK2MANIP(hooknum);
 
 	/* nat helper or nfctnetlink also setup binding */
 	nat = nfct_nat(ct);
@@ -294,10 +293,8 @@ nf_nat_setup_info(struct nf_conn *ct,
 		}
 	}
 
-	NF_CT_ASSERT(hooknum == NF_INET_PRE_ROUTING ||
-		     hooknum == NF_INET_POST_ROUTING ||
-		     hooknum == NF_INET_LOCAL_IN ||
-		     hooknum == NF_INET_LOCAL_OUT);
+	NF_CT_ASSERT(maniptype == IP_NAT_MANIP_SRC ||
+		     maniptype == IP_NAT_MANIP_DST);
 	BUG_ON(nf_nat_initialized(ct, maniptype));
 
 	/* What we've got will look like inverse of reply. Normally
diff --git a/net/ipv4/netfilter/nf_nat_h323.c b/net/ipv4/netfilter/nf_nat_h323.c
index 0f226df76f5..2e4bdee92c4 100644
--- a/net/ipv4/netfilter/nf_nat_h323.c
+++ b/net/ipv4/netfilter/nf_nat_h323.c
@@ -389,18 +389,14 @@ static void ip_nat_q931_expect(struct nf_conn *new,
 	/* Change src to where master sends to */
 	range.flags = IP_NAT_RANGE_MAP_IPS;
 	range.min_ip = range.max_ip = new->tuplehash[!this->dir].tuple.src.u3.ip;
-
-	/* hook doesn't matter, but it has to do source manip */
-	nf_nat_setup_info(new, &range, NF_INET_POST_ROUTING);
+	nf_nat_setup_info(new, &range, IP_NAT_MANIP_SRC);
 
 	/* For DST manip, map port here to where it's expected. */
 	range.flags = (IP_NAT_RANGE_MAP_IPS | IP_NAT_RANGE_PROTO_SPECIFIED);
 	range.min = range.max = this->saved_proto;
 	range.min_ip = range.max_ip =
 	    new->master->tuplehash[!this->dir].tuple.src.u3.ip;
-
-	/* hook doesn't matter, but it has to do destination manip */
-	nf_nat_setup_info(new, &range, NF_INET_PRE_ROUTING);
+	nf_nat_setup_info(new, &range, IP_NAT_MANIP_DST);
 }
 
 /****************************************************************************/
@@ -479,17 +475,13 @@ static void ip_nat_callforwarding_expect(struct nf_conn *new,
 	/* Change src to where master sends to */
 	range.flags = IP_NAT_RANGE_MAP_IPS;
 	range.min_ip = range.max_ip = new->tuplehash[!this->dir].tuple.src.u3.ip;
-
-	/* hook doesn't matter, but it has to do source manip */
-	nf_nat_setup_info(new, &range, NF_INET_POST_ROUTING);
+	nf_nat_setup_info(new, &range, IP_NAT_MANIP_SRC);
 
 	/* For DST manip, map port here to where it's expected. */
 	range.flags = (IP_NAT_RANGE_MAP_IPS | IP_NAT_RANGE_PROTO_SPECIFIED);
 	range.min = range.max = this->saved_proto;
 	range.min_ip = range.max_ip = this->saved_ip;
-
-	/* hook doesn't matter, but it has to do destination manip */
-	nf_nat_setup_info(new, &range, NF_INET_PRE_ROUTING);
+	nf_nat_setup_info(new, &range, IP_NAT_MANIP_DST);
 }
 
 /****************************************************************************/
diff --git a/net/ipv4/netfilter/nf_nat_helper.c b/net/ipv4/netfilter/nf_nat_helper.c
index d24f3d94739..4c0232842e7 100644
--- a/net/ipv4/netfilter/nf_nat_helper.c
+++ b/net/ipv4/netfilter/nf_nat_helper.c
@@ -433,15 +433,13 @@ void nf_nat_follow_master(struct nf_conn *ct,
 	range.flags = IP_NAT_RANGE_MAP_IPS;
 	range.min_ip = range.max_ip
 		= ct->master->tuplehash[!exp->dir].tuple.dst.u3.ip;
-	/* hook doesn't matter, but it has to do source manip */
-	nf_nat_setup_info(ct, &range, NF_INET_POST_ROUTING);
+	nf_nat_setup_info(ct, &range, IP_NAT_MANIP_SRC);
 
 	/* For DST manip, map port here to where it's expected. */
 	range.flags = (IP_NAT_RANGE_MAP_IPS | IP_NAT_RANGE_PROTO_SPECIFIED);
 	range.min = range.max = exp->saved_proto;
 	range.min_ip = range.max_ip
 		= ct->master->tuplehash[!exp->dir].tuple.src.u3.ip;
-	/* hook doesn't matter, but it has to do destination manip */
-	nf_nat_setup_info(ct, &range, NF_INET_PRE_ROUTING);
+	nf_nat_setup_info(ct, &range, IP_NAT_MANIP_DST);
 }
 EXPORT_SYMBOL(nf_nat_follow_master);
diff --git a/net/ipv4/netfilter/nf_nat_pptp.c b/net/ipv4/netfilter/nf_nat_pptp.c
index c540999f509..e63b944a2eb 100644
--- a/net/ipv4/netfilter/nf_nat_pptp.c
+++ b/net/ipv4/netfilter/nf_nat_pptp.c
@@ -93,8 +93,7 @@ static void pptp_nat_expected(struct nf_conn *ct,
 		range.flags |= IP_NAT_RANGE_PROTO_SPECIFIED;
 		range.min = range.max = exp->saved_proto;
 	}
-	/* hook doesn't matter, but it has to do source manip */
-	nf_nat_setup_info(ct, &range, NF_INET_POST_ROUTING);
+	nf_nat_setup_info(ct, &range, IP_NAT_MANIP_SRC);
 
 	/* For DST manip, map port here to where it's expected. */
 	range.flags = IP_NAT_RANGE_MAP_IPS;
@@ -104,8 +103,7 @@ static void pptp_nat_expected(struct nf_conn *ct,
 		range.flags |= IP_NAT_RANGE_PROTO_SPECIFIED;
 		range.min = range.max = exp->saved_proto;
 	}
-	/* hook doesn't matter, but it has to do destination manip */
-	nf_nat_setup_info(ct, &range, NF_INET_PRE_ROUTING);
+	nf_nat_setup_info(ct, &range, IP_NAT_MANIP_DST);
 }
 
 /* outbound packets == from PNS to PAC */
diff --git a/net/ipv4/netfilter/nf_nat_rule.c b/net/ipv4/netfilter/nf_nat_rule.c
index ee39ed87bb0..4391aec56ab 100644
--- a/net/ipv4/netfilter/nf_nat_rule.c
+++ b/net/ipv4/netfilter/nf_nat_rule.c
@@ -87,7 +87,7 @@ static unsigned int ipt_snat_target(struct sk_buff *skb,
 			    ctinfo == IP_CT_RELATED + IP_CT_IS_REPLY));
 	NF_CT_ASSERT(out);
 
-	return nf_nat_setup_info(ct, &mr->range[0], hooknum);
+	return nf_nat_setup_info(ct, &mr->range[0], IP_NAT_MANIP_SRC);
 }
 
 /* Before 2.6.11 we did implicit source NAT if required. Warn about change. */
@@ -133,7 +133,7 @@ static unsigned int ipt_dnat_target(struct sk_buff *skb,
 		warn_if_extra_mangle(ip_hdr(skb)->daddr,
 				     mr->range[0].min_ip);
 
-	return nf_nat_setup_info(ct, &mr->range[0], hooknum);
+	return nf_nat_setup_info(ct, &mr->range[0], IP_NAT_MANIP_DST);
 }
 
 static bool ipt_snat_checkentry(const char *tablename,
@@ -184,7 +184,7 @@ alloc_null_binding(struct nf_conn *ct, unsigned int hooknum)
 
 	pr_debug("Allocating NULL binding for %p (%u.%u.%u.%u)\n",
 		 ct, NIPQUAD(ip));
-	return nf_nat_setup_info(ct, &range, hooknum);
+	return nf_nat_setup_info(ct, &range, HOOK2MANIP(hooknum));
 }
 
 unsigned int
@@ -203,7 +203,7 @@ alloc_null_binding_confirmed(struct nf_conn *ct, unsigned int hooknum)
 
 	pr_debug("Allocating NULL binding for confirmed %p (%u.%u.%u.%u)\n",
 		 ct, NIPQUAD(ip));
-	return nf_nat_setup_info(ct, &range, hooknum);
+	return nf_nat_setup_info(ct, &range, HOOK2MANIP(hooknum));
 }
 
 int nf_nat_rule_find(struct sk_buff *skb,
diff --git a/net/ipv4/netfilter/nf_nat_sip.c b/net/ipv4/netfilter/nf_nat_sip.c
index b8c0720cf42..606a170bf4c 100644
--- a/net/ipv4/netfilter/nf_nat_sip.c
+++ b/net/ipv4/netfilter/nf_nat_sip.c
@@ -228,15 +228,13 @@ static void ip_nat_sdp_expect(struct nf_conn *ct,
 	range.flags = IP_NAT_RANGE_MAP_IPS;
 	range.min_ip = range.max_ip
 		= ct->master->tuplehash[!exp->dir].tuple.dst.u3.ip;
-	/* hook doesn't matter, but it has to do source manip */
-	nf_nat_setup_info(ct, &range, NF_INET_POST_ROUTING);
+	nf_nat_setup_info(ct, &range, IP_NAT_MANIP_SRC);
 
 	/* For DST manip, map port here to where it's expected. */
 	range.flags = (IP_NAT_RANGE_MAP_IPS | IP_NAT_RANGE_PROTO_SPECIFIED);
 	range.min = range.max = exp->saved_proto;
 	range.min_ip = range.max_ip = exp->saved_ip;
-	/* hook doesn't matter, but it has to do destination manip */
-	nf_nat_setup_info(ct, &range, NF_INET_PRE_ROUTING);
+	nf_nat_setup_info(ct, &range, IP_NAT_MANIP_DST);
 }
 
 /* So, this packet has hit the connection tracking matching code.
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index 7851065ef20..3a065f43ddd 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -918,19 +918,17 @@ ctnetlink_change_status(struct nf_conn *ct, struct nlattr *cda[])
 			if (nfnetlink_parse_nat(cda[CTA_NAT_DST], ct,
 						&range) < 0)
 				return -EINVAL;
-			if (nf_nat_initialized(ct,
-					       HOOK2MANIP(NF_INET_PRE_ROUTING)))
+			if (nf_nat_initialized(ct, IP_NAT_MANIP_DST))
 				return -EEXIST;
-			nf_nat_setup_info(ct, &range, NF_INET_PRE_ROUTING);
+			nf_nat_setup_info(ct, &range, IP_NAT_MANIP_DST);
 		}
 		if (cda[CTA_NAT_SRC]) {
 			if (nfnetlink_parse_nat(cda[CTA_NAT_SRC], ct,
 						&range) < 0)
 				return -EINVAL;
-			if (nf_nat_initialized(ct,
-					       HOOK2MANIP(NF_INET_POST_ROUTING)))
+			if (nf_nat_initialized(ct, IP_NAT_MANIP_SRC))
 				return -EEXIST;
-			nf_nat_setup_info(ct, &range, NF_INET_POST_ROUTING);
+			nf_nat_setup_info(ct, &range, IP_NAT_MANIP_SRC);
 		}
 #endif
 	}
-- 
cgit v1.2.3


From f01ffbd6e7d001ccf9168b33507958a51ce0ffcf Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Mon, 17 Dec 2007 22:38:49 -0800
Subject: [NETFILTER]: nf_log: move logging stuff to seperate header

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/bridge/netfilter/ebt_log.c                 | 1 +
 net/bridge/netfilter/ebt_ulog.c                | 1 +
 net/ipv4/netfilter/ip_tables.c                 | 1 +
 net/ipv4/netfilter/ipt_LOG.c                   | 1 +
 net/ipv4/netfilter/ipt_ULOG.c                  | 1 +
 net/ipv4/netfilter/nf_conntrack_proto_icmp.c   | 1 +
 net/ipv6/netfilter/ip6_tables.c                | 1 +
 net/ipv6/netfilter/ip6t_LOG.c                  | 1 +
 net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c | 1 +
 net/netfilter/nf_conntrack_proto_tcp.c         | 1 +
 net/netfilter/nf_conntrack_proto_udp.c         | 1 +
 net/netfilter/nf_conntrack_proto_udplite.c     | 1 +
 net/netfilter/nf_log.c                         | 1 +
 net/netfilter/nfnetlink_log.c                  | 1 +
 net/netfilter/xt_NFLOG.c                       | 1 +
 15 files changed, 15 insertions(+)

(limited to 'net')

diff --git a/net/bridge/netfilter/ebt_log.c b/net/bridge/netfilter/ebt_log.c
index 457815fb558..fcb3b54dc19 100644
--- a/net/bridge/netfilter/ebt_log.c
+++ b/net/bridge/netfilter/ebt_log.c
@@ -17,6 +17,7 @@
 #include <linux/in.h>
 #include <linux/if_arp.h>
 #include <linux/spinlock.h>
+#include <net/netfilter/nf_log.h>
 
 static DEFINE_SPINLOCK(ebt_log_lock);
 
diff --git a/net/bridge/netfilter/ebt_ulog.c b/net/bridge/netfilter/ebt_ulog.c
index e7cfd30bac7..1b9ca07f44f 100644
--- a/net/bridge/netfilter/ebt_ulog.c
+++ b/net/bridge/netfilter/ebt_ulog.c
@@ -38,6 +38,7 @@
 #include <linux/netdevice.h>
 #include <linux/netfilter_bridge/ebtables.h>
 #include <linux/netfilter_bridge/ebt_ulog.h>
+#include <net/netfilter/nf_log.h>
 #include <net/sock.h>
 #include "../br_private.h"
 
diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c
index 439b2925765..271f6a5d3d4 100644
--- a/net/ipv4/netfilter/ip_tables.c
+++ b/net/ipv4/netfilter/ip_tables.c
@@ -26,6 +26,7 @@
 
 #include <linux/netfilter/x_tables.h>
 #include <linux/netfilter_ipv4/ip_tables.h>
+#include <net/netfilter/nf_log.h>
 
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>");
diff --git a/net/ipv4/netfilter/ipt_LOG.c b/net/ipv4/netfilter/ipt_LOG.c
index f8c613a6eb0..4b346e59bf2 100644
--- a/net/ipv4/netfilter/ipt_LOG.c
+++ b/net/ipv4/netfilter/ipt_LOG.c
@@ -22,6 +22,7 @@
 #include <linux/netfilter.h>
 #include <linux/netfilter/x_tables.h>
 #include <linux/netfilter_ipv4/ipt_LOG.h>
+#include <net/netfilter/nf_log.h>
 
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>");
diff --git a/net/ipv4/netfilter/ipt_ULOG.c b/net/ipv4/netfilter/ipt_ULOG.c
index 4139042a63a..1d8e146345e 100644
--- a/net/ipv4/netfilter/ipt_ULOG.c
+++ b/net/ipv4/netfilter/ipt_ULOG.c
@@ -43,6 +43,7 @@
 #include <linux/netfilter.h>
 #include <linux/netfilter/x_tables.h>
 #include <linux/netfilter_ipv4/ipt_ULOG.h>
+#include <net/netfilter/nf_log.h>
 #include <net/sock.h>
 #include <linux/bitops.h>
 #include <asm/unaligned.h>
diff --git a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c
index 3e2e5cdda9d..cd0d6690627 100644
--- a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c
+++ b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c
@@ -18,6 +18,7 @@
 #include <net/netfilter/nf_conntrack_tuple.h>
 #include <net/netfilter/nf_conntrack_l4proto.h>
 #include <net/netfilter/nf_conntrack_core.h>
+#include <net/netfilter/nf_log.h>
 
 static unsigned long nf_ct_icmp_timeout __read_mostly = 30*HZ;
 
diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c
index d910d56d22d..bb50d0e6673 100644
--- a/net/ipv6/netfilter/ip6_tables.c
+++ b/net/ipv6/netfilter/ip6_tables.c
@@ -28,6 +28,7 @@
 
 #include <linux/netfilter_ipv6/ip6_tables.h>
 #include <linux/netfilter/x_tables.h>
+#include <net/netfilter/nf_log.h>
 
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>");
diff --git a/net/ipv6/netfilter/ip6t_LOG.c b/net/ipv6/netfilter/ip6t_LOG.c
index 19523242991..e6a2b1e9469 100644
--- a/net/ipv6/netfilter/ip6t_LOG.c
+++ b/net/ipv6/netfilter/ip6t_LOG.c
@@ -23,6 +23,7 @@
 #include <linux/netfilter.h>
 #include <linux/netfilter/x_tables.h>
 #include <linux/netfilter_ipv6/ip6_tables.h>
+#include <net/netfilter/nf_log.h>
 
 MODULE_AUTHOR("Jan Rekorajski <baggins@pld.org.pl>");
 MODULE_DESCRIPTION("IP6 tables LOG target module");
diff --git a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c
index 44689d44441..02d60dfbab8 100644
--- a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c
+++ b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c
@@ -24,6 +24,7 @@
 #include <net/netfilter/nf_conntrack_l4proto.h>
 #include <net/netfilter/nf_conntrack_core.h>
 #include <net/netfilter/ipv6/nf_conntrack_icmpv6.h>
+#include <net/netfilter/nf_log.h>
 
 static unsigned long nf_ct_icmpv6_timeout __read_mostly = 30*HZ;
 
diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c
index 600b476d225..1d496b912bd 100644
--- a/net/netfilter/nf_conntrack_proto_tcp.c
+++ b/net/netfilter/nf_conntrack_proto_tcp.c
@@ -24,6 +24,7 @@
 #include <net/netfilter/nf_conntrack.h>
 #include <net/netfilter/nf_conntrack_l4proto.h>
 #include <net/netfilter/nf_conntrack_ecache.h>
+#include <net/netfilter/nf_log.h>
 
 /* Protects conntrack->proto.tcp */
 static DEFINE_RWLOCK(tcp_lock);
diff --git a/net/netfilter/nf_conntrack_proto_udp.c b/net/netfilter/nf_conntrack_proto_udp.c
index 570a2e10947..7ac60731956 100644
--- a/net/netfilter/nf_conntrack_proto_udp.c
+++ b/net/netfilter/nf_conntrack_proto_udp.c
@@ -21,6 +21,7 @@
 #include <linux/netfilter_ipv6.h>
 #include <net/netfilter/nf_conntrack_l4proto.h>
 #include <net/netfilter/nf_conntrack_ecache.h>
+#include <net/netfilter/nf_log.h>
 
 static unsigned int nf_ct_udp_timeout __read_mostly = 30*HZ;
 static unsigned int nf_ct_udp_timeout_stream __read_mostly = 180*HZ;
diff --git a/net/netfilter/nf_conntrack_proto_udplite.c b/net/netfilter/nf_conntrack_proto_udplite.c
index 7e116d5766d..6518bcd17d6 100644
--- a/net/netfilter/nf_conntrack_proto_udplite.c
+++ b/net/netfilter/nf_conntrack_proto_udplite.c
@@ -22,6 +22,7 @@
 #include <linux/netfilter_ipv6.h>
 #include <net/netfilter/nf_conntrack_l4proto.h>
 #include <net/netfilter/nf_conntrack_ecache.h>
+#include <net/netfilter/nf_log.h>
 
 static unsigned int nf_ct_udplite_timeout __read_mostly = 30*HZ;
 static unsigned int nf_ct_udplite_timeout_stream __read_mostly = 180*HZ;
diff --git a/net/netfilter/nf_log.c b/net/netfilter/nf_log.c
index d67c4fbf603..fad97d69481 100644
--- a/net/netfilter/nf_log.c
+++ b/net/netfilter/nf_log.c
@@ -6,6 +6,7 @@
 #include <linux/netfilter.h>
 #include <linux/seq_file.h>
 #include <net/protocol.h>
+#include <net/netfilter/nf_log.h>
 
 #include "nf_internals.h"
 
diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c
index 2c7bd2eb029..959a0cb131f 100644
--- a/net/netfilter/nfnetlink_log.c
+++ b/net/netfilter/nfnetlink_log.c
@@ -29,6 +29,7 @@
 #include <linux/jhash.h>
 #include <linux/random.h>
 #include <net/sock.h>
+#include <net/netfilter/nf_log.h>
 
 #include <asm/atomic.h>
 
diff --git a/net/netfilter/xt_NFLOG.c b/net/netfilter/xt_NFLOG.c
index 83af124e88c..866facfa4f4 100644
--- a/net/netfilter/xt_NFLOG.c
+++ b/net/netfilter/xt_NFLOG.c
@@ -12,6 +12,7 @@
 
 #include <linux/netfilter/x_tables.h>
 #include <linux/netfilter/xt_NFLOG.h>
+#include <net/netfilter/nf_log.h>
 
 MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
 MODULE_DESCRIPTION("x_tables NFLOG target");
-- 
cgit v1.2.3


From 7b2f9631e789c3e7d59201c21f09a24cd6ce3b1a Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Mon, 17 Dec 2007 22:39:08 -0800
Subject: [NETFILTER]: nf_log: constify struct nf_logger and nf_log_packet
 loginfo arg

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/bridge/netfilter/ebt_log.c  |  2 +-
 net/bridge/netfilter/ebt_ulog.c |  2 +-
 net/ipv4/netfilter/ipt_LOG.c    |  2 +-
 net/ipv6/netfilter/ip6t_LOG.c   |  2 +-
 net/netfilter/nf_log.c          | 10 +++++-----
 net/netfilter/nfnetlink_log.c   |  2 +-
 6 files changed, 10 insertions(+), 10 deletions(-)

(limited to 'net')

diff --git a/net/bridge/netfilter/ebt_log.c b/net/bridge/netfilter/ebt_log.c
index fcb3b54dc19..3be9e989855 100644
--- a/net/bridge/netfilter/ebt_log.c
+++ b/net/bridge/netfilter/ebt_log.c
@@ -183,7 +183,7 @@ static struct ebt_watcher log =
 	.me		= THIS_MODULE,
 };
 
-static struct nf_logger ebt_log_logger = {
+static const struct nf_logger ebt_log_logger = {
 	.name 		= "ebt_log",
 	.logfn		= &ebt_log_packet,
 	.me		= THIS_MODULE,
diff --git a/net/bridge/netfilter/ebt_ulog.c b/net/bridge/netfilter/ebt_ulog.c
index 1b9ca07f44f..b73ba28bcbe 100644
--- a/net/bridge/netfilter/ebt_ulog.c
+++ b/net/bridge/netfilter/ebt_ulog.c
@@ -279,7 +279,7 @@ static struct ebt_watcher ulog = {
 	.me		= THIS_MODULE,
 };
 
-static struct nf_logger ebt_ulog_logger = {
+static const struct nf_logger ebt_ulog_logger = {
 	.name		= EBT_ULOG_WATCHER,
 	.logfn		= &ebt_log_packet,
 	.me		= THIS_MODULE,
diff --git a/net/ipv4/netfilter/ipt_LOG.c b/net/ipv4/netfilter/ipt_LOG.c
index 4b346e59bf2..5acdddfa6ad 100644
--- a/net/ipv4/netfilter/ipt_LOG.c
+++ b/net/ipv4/netfilter/ipt_LOG.c
@@ -465,7 +465,7 @@ static struct xt_target log_tg_reg __read_mostly = {
 	.me		= THIS_MODULE,
 };
 
-static struct nf_logger ipt_log_logger ={
+static const struct nf_logger ipt_log_logger ={
 	.name		= "ipt_LOG",
 	.logfn		= &ipt_log_packet,
 	.me		= THIS_MODULE,
diff --git a/net/ipv6/netfilter/ip6t_LOG.c b/net/ipv6/netfilter/ip6t_LOG.c
index e6a2b1e9469..474c2b12621 100644
--- a/net/ipv6/netfilter/ip6t_LOG.c
+++ b/net/ipv6/netfilter/ip6t_LOG.c
@@ -478,7 +478,7 @@ static struct xt_target log_tg6_reg __read_mostly = {
 	.me 		= THIS_MODULE,
 };
 
-static struct nf_logger ip6t_logger = {
+static const struct nf_logger ip6t_logger = {
 	.name		= "ip6t_LOG",
 	.logfn		= &ip6t_log_packet,
 	.me		= THIS_MODULE,
diff --git a/net/netfilter/nf_log.c b/net/netfilter/nf_log.c
index fad97d69481..ed9116dc78b 100644
--- a/net/netfilter/nf_log.c
+++ b/net/netfilter/nf_log.c
@@ -15,12 +15,12 @@
 
 #define NF_LOG_PREFIXLEN		128
 
-static struct nf_logger *nf_loggers[NPROTO];
+static const struct nf_logger *nf_loggers[NPROTO] __read_mostly;
 static DEFINE_MUTEX(nf_log_mutex);
 
 /* return EBUSY if somebody else is registered, EEXIST if the same logger
  * is registred, 0 on success. */
-int nf_log_register(int pf, struct nf_logger *logger)
+int nf_log_register(int pf, const struct nf_logger *logger)
 {
 	int ret;
 
@@ -58,7 +58,7 @@ void nf_log_unregister_pf(int pf)
 }
 EXPORT_SYMBOL(nf_log_unregister_pf);
 
-void nf_log_unregister(struct nf_logger *logger)
+void nf_log_unregister(const struct nf_logger *logger)
 {
 	int i;
 
@@ -78,12 +78,12 @@ void nf_log_packet(int pf,
 		   const struct sk_buff *skb,
 		   const struct net_device *in,
 		   const struct net_device *out,
-		   struct nf_loginfo *loginfo,
+		   const struct nf_loginfo *loginfo,
 		   const char *fmt, ...)
 {
 	va_list args;
 	char prefix[NF_LOG_PREFIXLEN];
-	struct nf_logger *logger;
+	const struct nf_logger *logger;
 
 	rcu_read_lock();
 	logger = rcu_dereference(nf_loggers[pf]);
diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c
index 959a0cb131f..02e63577e15 100644
--- a/net/netfilter/nfnetlink_log.c
+++ b/net/netfilter/nfnetlink_log.c
@@ -726,7 +726,7 @@ nfulnl_recv_unsupp(struct sock *ctnl, struct sk_buff *skb,
 	return -ENOTSUPP;
 }
 
-static struct nf_logger nfulnl_logger = {
+static const struct nf_logger nfulnl_logger = {
 	.name	= "nfnetlink_log",
 	.logfn	= &nfulnl_log_packet,
 	.me	= THIS_MODULE,
-- 
cgit v1.2.3


From a7c42955e036127f793ad955d3ec718494efb1eb Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Mon, 17 Dec 2007 22:39:27 -0800
Subject: [NETFILTER]: nf_log: remove incomprehensible comment

Whatever that comment tries to say, I don't get it and it looks like
a leftover from the time when RCU wasn't used properly.

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/netfilter/nf_log.c | 1 -
 1 file changed, 1 deletion(-)

(limited to 'net')

diff --git a/net/netfilter/nf_log.c b/net/netfilter/nf_log.c
index ed9116dc78b..4f5f2885fca 100644
--- a/net/netfilter/nf_log.c
+++ b/net/netfilter/nf_log.c
@@ -91,7 +91,6 @@ void nf_log_packet(int pf,
 		va_start(args, fmt);
 		vsnprintf(prefix, sizeof(prefix), fmt, args);
 		va_end(args);
-		/* We must read logging before nf_logfn[pf] */
 		logger->logfn(pf, hooknum, skb, in, out, loginfo, prefix);
 	} else if (net_ratelimit()) {
 		printk(KERN_WARNING "nf_log_packet: can\'t log since "
-- 
cgit v1.2.3


From c0506365a928adfd5608ed6873a705ae18e2daaf Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Mon, 17 Dec 2007 22:39:55 -0800
Subject: [NETFILTER]: nfnetlink_log: fix checks in nfulnl_recv_config

Similar to the nfnetlink_queue fixes:

The peer_pid must be checked in all cases when a logging instance exists,
additionally we must check whether an instance exists before attempting
to configure it to avoid NULL ptr dereferences.

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/netfilter/nfnetlink_log.c | 49 ++++++++++++++++++++++++-------------------
 1 file changed, 27 insertions(+), 22 deletions(-)

(limited to 'net')

diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c
index 02e63577e15..3dcc6f51a52 100644
--- a/net/netfilter/nfnetlink_log.c
+++ b/net/netfilter/nfnetlink_log.c
@@ -753,9 +753,15 @@ nfulnl_recv_config(struct sock *ctnl, struct sk_buff *skb,
 	UDEBUG("entering for msg %u\n", NFNL_MSG_TYPE(nlh->nlmsg_type));
 
 	inst = instance_lookup_get(group_num);
+	if (inst && inst->peer_pid != NETLINK_CB(skb).pid) {
+		ret = -EPERM;
+		goto out_put;
+	}
+
 	if (nfula[NFULA_CFG_CMD]) {
 		u_int8_t pf = nfmsg->nfgen_family;
 		struct nfulnl_msg_config_cmd *cmd;
+
 		cmd = nla_data(nfula[NFULA_CFG_CMD]);
 		UDEBUG("found CFG_CMD for\n");
 
@@ -779,11 +785,6 @@ nfulnl_recv_config(struct sock *ctnl, struct sk_buff *skb,
 				goto out;
 			}
 
-			if (inst->peer_pid != NETLINK_CB(skb).pid) {
-				ret = -EPERM;
-				goto out_put;
-			}
-
 			instance_destroy(inst);
 			goto out;
 		case NFULNL_CFG_CMD_PF_BIND:
@@ -800,29 +801,16 @@ nfulnl_recv_config(struct sock *ctnl, struct sk_buff *skb,
 			ret = -EINVAL;
 			break;
 		}
-
-		if (!inst)
-			goto out;
-	} else {
-		if (!inst) {
-			UDEBUG("no config command, and no instance for "
-				"group=%u pid=%u =>ENOENT\n",
-				group_num, NETLINK_CB(skb).pid);
-			ret = -ENOENT;
-			goto out;
-		}
-
-		if (inst->peer_pid != NETLINK_CB(skb).pid) {
-			UDEBUG("no config command, and wrong pid\n");
-			ret = -EPERM;
-			goto out_put;
-		}
 	}
 
 	if (nfula[NFULA_CFG_MODE]) {
 		struct nfulnl_msg_config_mode *params;
 		params = nla_data(nfula[NFULA_CFG_MODE]);
 
+		if (!inst) {
+			ret = -ENODEV;
+			goto out;
+		}
 		nfulnl_set_mode(inst, params->copy_mode,
 				ntohl(params->copy_range));
 	}
@@ -831,6 +819,10 @@ nfulnl_recv_config(struct sock *ctnl, struct sk_buff *skb,
 		__be32 timeout =
 			*(__be32 *)nla_data(nfula[NFULA_CFG_TIMEOUT]);
 
+		if (!inst) {
+			ret = -ENODEV;
+			goto out;
+		}
 		nfulnl_set_timeout(inst, ntohl(timeout));
 	}
 
@@ -838,6 +830,10 @@ nfulnl_recv_config(struct sock *ctnl, struct sk_buff *skb,
 		__be32 nlbufsiz =
 			*(__be32 *)nla_data(nfula[NFULA_CFG_NLBUFSIZ]);
 
+		if (!inst) {
+			ret = -ENODEV;
+			goto out;
+		}
 		nfulnl_set_nlbufsiz(inst, ntohl(nlbufsiz));
 	}
 
@@ -845,12 +841,21 @@ nfulnl_recv_config(struct sock *ctnl, struct sk_buff *skb,
 		__be32 qthresh =
 			*(__be32 *)nla_data(nfula[NFULA_CFG_QTHRESH]);
 
+		if (!inst) {
+			ret = -ENODEV;
+			goto out;
+		}
 		nfulnl_set_qthresh(inst, ntohl(qthresh));
 	}
 
 	if (nfula[NFULA_CFG_FLAGS]) {
 		__be16 flags =
 			*(__be16 *)nla_data(nfula[NFULA_CFG_FLAGS]);
+
+		if (!inst) {
+			ret = -ENODEV;
+			goto out;
+		}
 		nfulnl_set_flags(inst, ntohs(flags));
 	}
 
-- 
cgit v1.2.3


From cd21f0ac43f8d7f23573a01ada7fb2c96e686ff8 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Mon, 17 Dec 2007 22:40:19 -0800
Subject: [NETFILTER]: nfnetlink_{queue,log}: return ENOTSUPP for unknown cfg
 commands

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/netfilter/nfnetlink_log.c   | 2 +-
 net/netfilter/nfnetlink_queue.c | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c
index 3dcc6f51a52..325e93a9059 100644
--- a/net/netfilter/nfnetlink_log.c
+++ b/net/netfilter/nfnetlink_log.c
@@ -798,7 +798,7 @@ nfulnl_recv_config(struct sock *ctnl, struct sk_buff *skb,
 			nf_log_unregister_pf(pf);
 			break;
 		default:
-			ret = -EINVAL;
+			ret = -ENOTSUPP;
 			break;
 		}
 	}
diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c
index d94de48f6cd..370f0af50f4 100644
--- a/net/netfilter/nfnetlink_queue.c
+++ b/net/netfilter/nfnetlink_queue.c
@@ -740,7 +740,7 @@ nfqnl_recv_config(struct sock *ctnl, struct sk_buff *skb,
 		case NFQNL_CFG_CMD_PF_UNBIND:
 			break;
 		default:
-			ret = -EINVAL;
+			ret = -ENOTSUPP;
 			break;
 		}
 	}
-- 
cgit v1.2.3


From 1792bab4caaa85bae858799bb6231f171f59b58a Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Mon, 17 Dec 2007 22:41:02 -0800
Subject: [NETFILTER]: nfnetlink_log: remove excessive debugging

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/netfilter/nfnetlink_log.c | 45 -------------------------------------------
 1 file changed, 45 deletions(-)

(limited to 'net')

diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c
index 325e93a9059..c12e1d1bd00 100644
--- a/net/netfilter/nfnetlink_log.c
+++ b/net/netfilter/nfnetlink_log.c
@@ -45,14 +45,6 @@
 #define PRINTR(x, args...)	do { if (net_ratelimit()) \
 				     printk(x, ## args); } while (0);
 
-#if 0
-#define UDEBUG(x, args ...)	printk(KERN_DEBUG "%s(%d):%s():	" x, 	   \
-					__FILE__, __LINE__, __FUNCTION__,  \
-					## args)
-#else
-#define UDEBUG(x, ...)
-#endif
-
 struct nfulnl_instance {
 	struct hlist_node hlist;	/* global list of instances */
 	spinlock_t lock;
@@ -93,8 +85,6 @@ __instance_lookup(u_int16_t group_num)
 	struct hlist_node *pos;
 	struct nfulnl_instance *inst;
 
-	UDEBUG("entering (group_num=%u)\n", group_num);
-
 	head = &instance_table[instance_hashfn(group_num)];
 	hlist_for_each_entry(inst, pos, head, hlist) {
 		if (inst->group_num == group_num)
@@ -127,7 +117,6 @@ static void
 instance_put(struct nfulnl_instance *inst)
 {
 	if (inst && atomic_dec_and_test(&inst->use)) {
-		UDEBUG("kfree(inst=%p)\n", inst);
 		kfree(inst);
 		module_put(THIS_MODULE);
 	}
@@ -140,13 +129,9 @@ instance_create(u_int16_t group_num, int pid)
 {
 	struct nfulnl_instance *inst;
 
-	UDEBUG("entering (group_num=%u, pid=%d)\n", group_num,
-		pid);
-
 	write_lock_bh(&instances_lock);
 	if (__instance_lookup(group_num)) {
 		inst = NULL;
-		UDEBUG("aborting, instance already exists\n");
 		goto out_unlock;
 	}
 
@@ -178,9 +163,6 @@ instance_create(u_int16_t group_num, int pid)
 	hlist_add_head(&inst->hlist,
 		       &instance_table[instance_hashfn(group_num)]);
 
-	UDEBUG("newly added node: %p, next=%p\n", &inst->hlist,
-		inst->hlist.next);
-
 	write_unlock_bh(&instances_lock);
 
 	return inst;
@@ -196,9 +178,6 @@ static void
 __instance_destroy(struct nfulnl_instance *inst)
 {
 	/* first pull it out of the global list */
-	UDEBUG("removing instance %p (queuenum=%u) from hash\n",
-		inst, inst->group_num);
-
 	hlist_del(&inst->hlist);
 
 	/* then flush all pending packets from skb */
@@ -306,8 +285,6 @@ nfulnl_alloc_skb(unsigned int inst_size, unsigned int pkt_size)
 	struct sk_buff *skb;
 	unsigned int n;
 
-	UDEBUG("entered (%u, %u)\n", inst_size, pkt_size);
-
 	/* alloc skb which should be big enough for a whole multipart
 	 * message.  WARNING: has to be <= 128k due to slab restrictions */
 
@@ -342,10 +319,6 @@ __nfulnl_send(struct nfulnl_instance *inst)
 			  sizeof(struct nfgenmsg));
 
 	status = nfnetlink_unicast(inst->skb, inst->peer_pid, MSG_DONTWAIT);
-	if (status < 0) {
-		UDEBUG("netlink_unicast() failed\n");
-		/* FIXME: statistics */
-	}
 
 	inst->qlen = 0;
 	inst->skb = NULL;
@@ -369,8 +342,6 @@ nfulnl_timer(unsigned long data)
 {
 	struct nfulnl_instance *inst = (struct nfulnl_instance *)data;
 
-	UDEBUG("timer function called, flushing buffer\n");
-
 	spin_lock_bh(&inst->lock);
 	if (inst->skb)
 		__nfulnl_send(inst);
@@ -397,8 +368,6 @@ __build_packet_message(struct nfulnl_instance *inst,
 	__be32 tmp_uint;
 	sk_buff_data_t old_tail = inst->skb->tail;
 
-	UDEBUG("entered\n");
-
 	nlh = NLMSG_PUT(inst->skb, 0, 0,
 			NFNL_SUBSYS_ULOG << 8 | NFULNL_MSG_PACKET,
 			sizeof(struct nfgenmsg));
@@ -544,7 +513,6 @@ __build_packet_message(struct nfulnl_instance *inst,
 	return 0;
 
 nlmsg_failure:
-	UDEBUG("nlmsg_failure\n");
 nla_put_failure:
 	PRINTR(KERN_ERR "nfnetlink_log: error creating log nlmsg\n");
 	return -1;
@@ -609,8 +577,6 @@ nfulnl_log_packet(unsigned int pf,
 		+ nla_total_size(sizeof(struct nfulnl_msg_packet_hw))
 		+ nla_total_size(sizeof(struct nfulnl_msg_packet_timestamp));
 
-	UDEBUG("initial size=%u\n", size);
-
 	spin_lock_bh(&inst->lock);
 
 	if (inst->flags & NFULNL_CFG_F_SEQ)
@@ -637,7 +603,6 @@ nfulnl_log_packet(unsigned int pf,
 			data_len = inst->copy_range;
 
 		size += nla_total_size(data_len);
-		UDEBUG("copy_packet, therefore size now %u\n", size);
 		break;
 
 	default:
@@ -648,8 +613,6 @@ nfulnl_log_packet(unsigned int pf,
 	    size > skb_tailroom(inst->skb) - sizeof(struct nfgenmsg)) {
 		/* either the queue len is too high or we don't have
 		 * enough room in the skb left. flush to userspace. */
-		UDEBUG("flushing old skb\n");
-
 		__nfulnl_flush(inst);
 	}
 
@@ -659,7 +622,6 @@ nfulnl_log_packet(unsigned int pf,
 			goto alloc_failure;
 	}
 
-	UDEBUG("qlen %d, qthreshold %d\n", inst->qlen, qthreshold);
 	inst->qlen++;
 
 	__build_packet_message(inst, skb, data_len, pf,
@@ -681,7 +643,6 @@ unlock_and_release:
 	return;
 
 alloc_failure:
-	UDEBUG("error allocating skb\n");
 	/* FIXME: statistics */
 	goto unlock_and_release;
 }
@@ -704,7 +665,6 @@ nfulnl_rcv_nl_event(struct notifier_block *this,
 			struct hlist_head *head = &instance_table[i];
 
 			hlist_for_each_entry_safe(inst, tmp, t2, head, hlist) {
-				UDEBUG("node = %p\n", inst);
 				if ((n->net == &init_net) &&
 				    (n->pid == inst->peer_pid))
 					__instance_destroy(inst);
@@ -750,8 +710,6 @@ nfulnl_recv_config(struct sock *ctnl, struct sk_buff *skb,
 	struct nfulnl_instance *inst;
 	int ret = 0;
 
-	UDEBUG("entering for msg %u\n", NFNL_MSG_TYPE(nlh->nlmsg_type));
-
 	inst = instance_lookup_get(group_num);
 	if (inst && inst->peer_pid != NETLINK_CB(skb).pid) {
 		ret = -EPERM;
@@ -763,7 +721,6 @@ nfulnl_recv_config(struct sock *ctnl, struct sk_buff *skb,
 		struct nfulnl_msg_config_cmd *cmd;
 
 		cmd = nla_data(nfula[NFULA_CFG_CMD]);
-		UDEBUG("found CFG_CMD for\n");
 
 		switch (cmd->command) {
 		case NFULNL_CFG_CMD_BIND:
@@ -788,11 +745,9 @@ nfulnl_recv_config(struct sock *ctnl, struct sk_buff *skb,
 			instance_destroy(inst);
 			goto out;
 		case NFULNL_CFG_CMD_PF_BIND:
-			UDEBUG("registering log handler for pf=%u\n", pf);
 			ret = nf_log_register(pf, &nfulnl_logger);
 			break;
 		case NFULNL_CFG_CMD_PF_UNBIND:
-			UDEBUG("unregistering log handler for pf=%u\n", pf);
 			/* This is a bug and a feature.  We cannot unregister
 			 * other handlers, like nfnetlink_inst can */
 			nf_log_unregister_pf(pf);
-- 
cgit v1.2.3


From baab2ce7d2a8dbf6280ab09c011cfec1dd5972de Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Mon, 17 Dec 2007 22:41:21 -0800
Subject: [NETFILTER]: nfnetlink_{queue,log}: return proper error codes in
 instance_create

Currently we return EINVAL for "instance exists", "allocation failed" and
"module unloaded below us", which is completely inapproriate.

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/netfilter/nfnetlink_log.c   | 14 +++++++++-----
 net/netfilter/nfnetlink_queue.c | 21 ++++++++++++++-------
 2 files changed, 23 insertions(+), 12 deletions(-)

(limited to 'net')

diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c
index c12e1d1bd00..ac58dc9d724 100644
--- a/net/netfilter/nfnetlink_log.c
+++ b/net/netfilter/nfnetlink_log.c
@@ -128,19 +128,23 @@ static struct nfulnl_instance *
 instance_create(u_int16_t group_num, int pid)
 {
 	struct nfulnl_instance *inst;
+	int err;
 
 	write_lock_bh(&instances_lock);
 	if (__instance_lookup(group_num)) {
-		inst = NULL;
+		err = -EEXIST;
 		goto out_unlock;
 	}
 
 	inst = kzalloc(sizeof(*inst), GFP_ATOMIC);
-	if (!inst)
+	if (!inst) {
+		err = -ENOMEM;
 		goto out_unlock;
+	}
 
 	if (!try_module_get(THIS_MODULE)) {
 		kfree(inst);
+		err = -EAGAIN;
 		goto out_unlock;
 	}
 
@@ -169,7 +173,7 @@ instance_create(u_int16_t group_num, int pid)
 
 out_unlock:
 	write_unlock_bh(&instances_lock);
-	return NULL;
+	return ERR_PTR(err);
 }
 
 static void __nfulnl_flush(struct nfulnl_instance *inst);
@@ -731,8 +735,8 @@ nfulnl_recv_config(struct sock *ctnl, struct sk_buff *skb,
 
 			inst = instance_create(group_num,
 					       NETLINK_CB(skb).pid);
-			if (!inst) {
-				ret = -EINVAL;
+			if (IS_ERR(inst)) {
+				ret = PTR_ERR(inst);
 				goto out;
 			}
 			break;
diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c
index 370f0af50f4..51476f82bb5 100644
--- a/net/netfilter/nfnetlink_queue.c
+++ b/net/netfilter/nfnetlink_queue.c
@@ -89,16 +89,21 @@ instance_lookup(u_int16_t queue_num)
 static struct nfqnl_instance *
 instance_create(u_int16_t queue_num, int pid)
 {
-	struct nfqnl_instance *inst = NULL;
+	struct nfqnl_instance *inst;
 	unsigned int h;
+	int err;
 
 	spin_lock(&instances_lock);
-	if (instance_lookup(queue_num))
+	if (instance_lookup(queue_num)) {
+		err = -EEXIST;
 		goto out_unlock;
+	}
 
 	inst = kzalloc(sizeof(*inst), GFP_ATOMIC);
-	if (!inst)
+	if (!inst) {
+		err = -ENOMEM;
 		goto out_unlock;
+	}
 
 	inst->queue_num = queue_num;
 	inst->peer_pid = pid;
@@ -109,8 +114,10 @@ instance_create(u_int16_t queue_num, int pid)
 	INIT_LIST_HEAD(&inst->queue_list);
 	INIT_RCU_HEAD(&inst->rcu);
 
-	if (!try_module_get(THIS_MODULE))
+	if (!try_module_get(THIS_MODULE)) {
+		err = -EAGAIN;
 		goto out_free;
+	}
 
 	h = instance_hashfn(queue_num);
 	hlist_add_head_rcu(&inst->hlist, &instance_table[h]);
@@ -123,7 +130,7 @@ out_free:
 	kfree(inst);
 out_unlock:
 	spin_unlock(&instances_lock);
-	return NULL;
+	return ERR_PTR(err);
 }
 
 static void nfqnl_flush(struct nfqnl_instance *queue, nfqnl_cmpfn cmpfn,
@@ -724,8 +731,8 @@ nfqnl_recv_config(struct sock *ctnl, struct sk_buff *skb,
 				goto err_out_unlock;
 			}
 			queue = instance_create(queue_num, NETLINK_CB(skb).pid);
-			if (!queue) {
-				ret = -EINVAL;
+			if (IS_ERR(queue)) {
+				ret = PTR_ERR(queue);
 				goto err_out_unlock;
 			}
 			break;
-- 
cgit v1.2.3


From 0dfedd28746266a35b3008cb6bb03466115e95b9 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Mon, 17 Dec 2007 22:41:35 -0800
Subject: [NETFILTER]: nfnetlink_log: use endianness-aware attribute functions

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/netfilter/nfnetlink_log.c | 87 +++++++++++++++++--------------------------
 1 file changed, 35 insertions(+), 52 deletions(-)

(limited to 'net')

diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c
index ac58dc9d724..950b1f0713d 100644
--- a/net/netfilter/nfnetlink_log.c
+++ b/net/netfilter/nfnetlink_log.c
@@ -389,32 +389,27 @@ __build_packet_message(struct nfulnl_instance *inst,
 		NLA_PUT(inst->skb, NFULA_PREFIX, plen, prefix);
 
 	if (indev) {
-		tmp_uint = htonl(indev->ifindex);
 #ifndef CONFIG_BRIDGE_NETFILTER
-		NLA_PUT(inst->skb, NFULA_IFINDEX_INDEV, sizeof(tmp_uint),
-			&tmp_uint);
+		NLA_PUT_BE32(inst->skb, NFULA_IFINDEX_INDEV,
+			     htonl(indev->ifindex));
 #else
 		if (pf == PF_BRIDGE) {
 			/* Case 1: outdev is physical input device, we need to
 			 * look for bridge group (when called from
 			 * netfilter_bridge) */
-			NLA_PUT(inst->skb, NFULA_IFINDEX_PHYSINDEV,
-				sizeof(tmp_uint), &tmp_uint);
+			NLA_PUT_BE32(inst->skb, NFULA_IFINDEX_PHYSINDEV,
+				     htonl(indev->ifindex));
 			/* this is the bridge group "brX" */
-			tmp_uint = htonl(indev->br_port->br->dev->ifindex);
-			NLA_PUT(inst->skb, NFULA_IFINDEX_INDEV,
-				sizeof(tmp_uint), &tmp_uint);
+			NLA_PUT_BE32(inst->skb, NFULA_IFINDEX_INDEV,
+				     htonl(indev->br_port->br->dev->ifindex));
 		} else {
 			/* Case 2: indev is bridge group, we need to look for
 			 * physical device (when called from ipv4) */
-			NLA_PUT(inst->skb, NFULA_IFINDEX_INDEV,
-				sizeof(tmp_uint), &tmp_uint);
-			if (skb->nf_bridge && skb->nf_bridge->physindev) {
-				tmp_uint =
-				    htonl(skb->nf_bridge->physindev->ifindex);
-				NLA_PUT(inst->skb, NFULA_IFINDEX_PHYSINDEV,
-					sizeof(tmp_uint), &tmp_uint);
-			}
+			NLA_PUT_BE32(inst->skb, NFULA_IFINDEX_INDEV,
+				     htonl(indev->ifindex));
+			if (skb->nf_bridge && skb->nf_bridge->physindev)
+				NLA_PUT_BE32(inst->skb, NFULA_IFINDEX_PHYSINDEV,
+					     htonl(skb->nf_bridge->physindev->ifindex));
 		}
 #endif
 	}
@@ -422,38 +417,32 @@ __build_packet_message(struct nfulnl_instance *inst,
 	if (outdev) {
 		tmp_uint = htonl(outdev->ifindex);
 #ifndef CONFIG_BRIDGE_NETFILTER
-		NLA_PUT(inst->skb, NFULA_IFINDEX_OUTDEV, sizeof(tmp_uint),
-			&tmp_uint);
+		NLA_PUT_BE32(inst->skb, NFULA_IFINDEX_OUTDEV,
+			     htonl(outdev->ifindex));
 #else
 		if (pf == PF_BRIDGE) {
 			/* Case 1: outdev is physical output device, we need to
 			 * look for bridge group (when called from
 			 * netfilter_bridge) */
-			NLA_PUT(inst->skb, NFULA_IFINDEX_PHYSOUTDEV,
-				sizeof(tmp_uint), &tmp_uint);
+			NLA_PUT_BE32(inst->skb, NFULA_IFINDEX_PHYSOUTDEV,
+				     htonl(outdev->ifindex));
 			/* this is the bridge group "brX" */
-			tmp_uint = htonl(outdev->br_port->br->dev->ifindex);
-			NLA_PUT(inst->skb, NFULA_IFINDEX_OUTDEV,
-				sizeof(tmp_uint), &tmp_uint);
+			NLA_PUT_BE32(inst->skb, NFULA_IFINDEX_OUTDEV,
+				     htonl(outdev->br_port->br->dev->ifindex));
 		} else {
 			/* Case 2: indev is a bridge group, we need to look
 			 * for physical device (when called from ipv4) */
-			NLA_PUT(inst->skb, NFULA_IFINDEX_OUTDEV,
-				sizeof(tmp_uint), &tmp_uint);
-			if (skb->nf_bridge && skb->nf_bridge->physoutdev) {
-				tmp_uint =
-				    htonl(skb->nf_bridge->physoutdev->ifindex);
-				NLA_PUT(inst->skb, NFULA_IFINDEX_PHYSOUTDEV,
-					sizeof(tmp_uint), &tmp_uint);
-			}
+			NLA_PUT_BE32(inst->skb, NFULA_IFINDEX_OUTDEV,
+				     htonl(outdev->ifindex));
+			if (skb->nf_bridge && skb->nf_bridge->physoutdev)
+				NLA_PUT_BE32(inst->skb, NFULA_IFINDEX_PHYSOUTDEV,
+					     htonl(skb->nf_bridge->physoutdev->ifindex));
 		}
 #endif
 	}
 
-	if (skb->mark) {
-		tmp_uint = htonl(skb->mark);
-		NLA_PUT(inst->skb, NFULA_MARK, sizeof(tmp_uint), &tmp_uint);
-	}
+	if (skb->mark)
+		NLA_PUT_BE32(inst->skb, NFULA_MARK, htonl(skb->mark));
 
 	if (indev && skb->dev) {
 		struct nfulnl_msg_packet_hw phw;
@@ -480,21 +469,19 @@ __build_packet_message(struct nfulnl_instance *inst,
 			__be32 uid = htonl(skb->sk->sk_socket->file->f_uid);
 			/* need to unlock here since NLA_PUT may goto */
 			read_unlock_bh(&skb->sk->sk_callback_lock);
-			NLA_PUT(inst->skb, NFULA_UID, sizeof(uid), &uid);
+			NLA_PUT_BE32(inst->skb, NFULA_UID, uid);
 		} else
 			read_unlock_bh(&skb->sk->sk_callback_lock);
 	}
 
 	/* local sequence number */
-	if (inst->flags & NFULNL_CFG_F_SEQ) {
-		tmp_uint = htonl(inst->seq++);
-		NLA_PUT(inst->skb, NFULA_SEQ, sizeof(tmp_uint), &tmp_uint);
-	}
+	if (inst->flags & NFULNL_CFG_F_SEQ)
+		NLA_PUT_BE32(inst->skb, NFULA_SEQ, htonl(inst->seq++));
+
 	/* global sequence number */
-	if (inst->flags & NFULNL_CFG_F_SEQ_GLOBAL) {
-		tmp_uint = htonl(atomic_inc_return(&global_seq));
-		NLA_PUT(inst->skb, NFULA_SEQ_GLOBAL, sizeof(tmp_uint), &tmp_uint);
-	}
+	if (inst->flags & NFULNL_CFG_F_SEQ_GLOBAL)
+		NLA_PUT_BE32(inst->skb, NFULA_SEQ_GLOBAL,
+			     htonl(atomic_inc_return(&global_seq)));
 
 	if (data_len) {
 		struct nlattr *nla;
@@ -775,8 +762,7 @@ nfulnl_recv_config(struct sock *ctnl, struct sk_buff *skb,
 	}
 
 	if (nfula[NFULA_CFG_TIMEOUT]) {
-		__be32 timeout =
-			*(__be32 *)nla_data(nfula[NFULA_CFG_TIMEOUT]);
+		__be32 timeout = nla_get_be32(nfula[NFULA_CFG_TIMEOUT]);
 
 		if (!inst) {
 			ret = -ENODEV;
@@ -786,8 +772,7 @@ nfulnl_recv_config(struct sock *ctnl, struct sk_buff *skb,
 	}
 
 	if (nfula[NFULA_CFG_NLBUFSIZ]) {
-		__be32 nlbufsiz =
-			*(__be32 *)nla_data(nfula[NFULA_CFG_NLBUFSIZ]);
+		__be32 nlbufsiz = nla_get_be32(nfula[NFULA_CFG_NLBUFSIZ]);
 
 		if (!inst) {
 			ret = -ENODEV;
@@ -797,8 +782,7 @@ nfulnl_recv_config(struct sock *ctnl, struct sk_buff *skb,
 	}
 
 	if (nfula[NFULA_CFG_QTHRESH]) {
-		__be32 qthresh =
-			*(__be32 *)nla_data(nfula[NFULA_CFG_QTHRESH]);
+		__be32 qthresh = nla_get_be32(nfula[NFULA_CFG_QTHRESH]);
 
 		if (!inst) {
 			ret = -ENODEV;
@@ -808,8 +792,7 @@ nfulnl_recv_config(struct sock *ctnl, struct sk_buff *skb,
 	}
 
 	if (nfula[NFULA_CFG_FLAGS]) {
-		__be16 flags =
-			*(__be16 *)nla_data(nfula[NFULA_CFG_FLAGS]);
+		__be16 flags = nla_get_be16(nfula[NFULA_CFG_FLAGS]);
 
 		if (!inst) {
 			ret = -ENODEV;
-- 
cgit v1.2.3


From 76aa1ce139f649e432272f6ad75204b763ef13bd Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Mon, 17 Dec 2007 22:41:52 -0800
Subject: [NETFILTER]: nfnetlink_log: include GID in netlink message

Similar to Maciej Soltysiak's ipt_LOG patch, include GID in addition
to UID in netlink message.

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/netfilter/nfnetlink_log.c | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'net')

diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c
index 950b1f0713d..5013cb97ce2 100644
--- a/net/netfilter/nfnetlink_log.c
+++ b/net/netfilter/nfnetlink_log.c
@@ -467,9 +467,11 @@ __build_packet_message(struct nfulnl_instance *inst,
 		read_lock_bh(&skb->sk->sk_callback_lock);
 		if (skb->sk->sk_socket && skb->sk->sk_socket->file) {
 			__be32 uid = htonl(skb->sk->sk_socket->file->f_uid);
+			__be32 gid = htons(skb->sk->sk_socket->file->f_gid);
 			/* need to unlock here since NLA_PUT may goto */
 			read_unlock_bh(&skb->sk->sk_callback_lock);
 			NLA_PUT_BE32(inst->skb, NFULA_UID, uid);
+			NLA_PUT_BE32(inst->skb, NFULA_GID, gid);
 		} else
 			read_unlock_bh(&skb->sk->sk_callback_lock);
 	}
@@ -564,6 +566,7 @@ nfulnl_log_packet(unsigned int pf,
 #endif
 		+ nla_total_size(sizeof(u_int32_t))	/* mark */
 		+ nla_total_size(sizeof(u_int32_t))	/* uid */
+		+ nla_total_size(sizeof(u_int32_t))	/* gid */
 		+ nla_total_size(plen)			/* prefix */
 		+ nla_total_size(sizeof(struct nfulnl_msg_packet_hw))
 		+ nla_total_size(sizeof(struct nfulnl_msg_packet_timestamp));
-- 
cgit v1.2.3


From 1e796fda00f06bac584f0e4ad8750ab9430d79d3 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Mon, 17 Dec 2007 22:42:27 -0800
Subject: [NETFILTER]: constify nf_afinfo

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/netfilter.c                   | 2 +-
 net/ipv6/netfilter.c                   | 2 +-
 net/netfilter/core.c                   | 6 +++---
 net/netfilter/nf_conntrack_h323_main.c | 2 +-
 net/netfilter/nf_queue.c               | 4 ++--
 5 files changed, 8 insertions(+), 8 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/netfilter.c b/net/ipv4/netfilter.c
index 7bf5e4a199f..4011f8f987c 100644
--- a/net/ipv4/netfilter.c
+++ b/net/ipv4/netfilter.c
@@ -190,7 +190,7 @@ static int nf_ip_route(struct dst_entry **dst, struct flowi *fl)
 	return ip_route_output_key((struct rtable **)dst, fl);
 }
 
-static struct nf_afinfo nf_ip_afinfo = {
+static const struct nf_afinfo nf_ip_afinfo = {
 	.family		= AF_INET,
 	.checksum	= nf_ip_checksum,
 	.route		= nf_ip_route,
diff --git a/net/ipv6/netfilter.c b/net/ipv6/netfilter.c
index 945e6ae1956..2e06724dc34 100644
--- a/net/ipv6/netfilter.c
+++ b/net/ipv6/netfilter.c
@@ -124,7 +124,7 @@ __sum16 nf_ip6_checksum(struct sk_buff *skb, unsigned int hook,
 
 EXPORT_SYMBOL(nf_ip6_checksum);
 
-static struct nf_afinfo nf_ip6_afinfo = {
+static const struct nf_afinfo nf_ip6_afinfo = {
 	.family		= AF_INET6,
 	.checksum	= nf_ip6_checksum,
 	.route		= nf_ip6_route,
diff --git a/net/netfilter/core.c b/net/netfilter/core.c
index 95e18635ce7..e0263445484 100644
--- a/net/netfilter/core.c
+++ b/net/netfilter/core.c
@@ -26,10 +26,10 @@
 
 static DEFINE_MUTEX(afinfo_mutex);
 
-struct nf_afinfo *nf_afinfo[NPROTO] __read_mostly;
+const struct nf_afinfo *nf_afinfo[NPROTO] __read_mostly;
 EXPORT_SYMBOL(nf_afinfo);
 
-int nf_register_afinfo(struct nf_afinfo *afinfo)
+int nf_register_afinfo(const struct nf_afinfo *afinfo)
 {
 	int err;
 
@@ -42,7 +42,7 @@ int nf_register_afinfo(struct nf_afinfo *afinfo)
 }
 EXPORT_SYMBOL_GPL(nf_register_afinfo);
 
-void nf_unregister_afinfo(struct nf_afinfo *afinfo)
+void nf_unregister_afinfo(const struct nf_afinfo *afinfo)
 {
 	mutex_lock(&afinfo_mutex);
 	rcu_assign_pointer(nf_afinfo[afinfo->family], NULL);
diff --git a/net/netfilter/nf_conntrack_h323_main.c b/net/netfilter/nf_conntrack_h323_main.c
index c550257b546..b636ca60a77 100644
--- a/net/netfilter/nf_conntrack_h323_main.c
+++ b/net/netfilter/nf_conntrack_h323_main.c
@@ -708,7 +708,7 @@ static int callforward_do_filter(union nf_conntrack_address *src,
 				 union nf_conntrack_address *dst,
 				 int family)
 {
-	struct nf_afinfo *afinfo;
+	const struct nf_afinfo *afinfo;
 	struct flowi fl1, fl2;
 	int ret = 0;
 
diff --git a/net/netfilter/nf_queue.c b/net/netfilter/nf_queue.c
index 77965114b09..bfc2928c191 100644
--- a/net/netfilter/nf_queue.c
+++ b/net/netfilter/nf_queue.c
@@ -119,7 +119,7 @@ static int __nf_queue(struct sk_buff *skb,
 	struct net_device *physindev;
 	struct net_device *physoutdev;
 #endif
-	struct nf_afinfo *afinfo;
+	const struct nf_afinfo *afinfo;
 	const struct nf_queue_handler *qh;
 
 	/* QUEUE == DROP if noone is waiting, to be safe. */
@@ -233,7 +233,7 @@ void nf_reinject(struct nf_queue_entry *entry, unsigned int verdict)
 {
 	struct sk_buff *skb = entry->skb;
 	struct list_head *elem = &entry->elem->list;
-	struct nf_afinfo *afinfo;
+	const struct nf_afinfo *afinfo;
 
 	rcu_read_lock();
 
-- 
cgit v1.2.3


From 051578ccbcdad3b24b621dfb652194e36759e8d5 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Mon, 17 Dec 2007 22:42:51 -0800
Subject: [NETFILTER]: nf_nat: properly use RCU for ip_nat_decode_session

We need to use rcu_assign_pointer/rcu_dereference to avoid races.
Also remove an obsolete CONFIG_IP_NAT_NEEDED ifdef.

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/netfilter/nf_nat_standalone.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/netfilter/nf_nat_standalone.c b/net/ipv4/netfilter/nf_nat_standalone.c
index a2b02f01cc5..99b2c788d5a 100644
--- a/net/ipv4/netfilter/nf_nat_standalone.c
+++ b/net/ipv4/netfilter/nf_nat_standalone.c
@@ -332,7 +332,7 @@ static int __init nf_nat_standalone_init(void)
 
 #ifdef CONFIG_XFRM
 	BUG_ON(ip_nat_decode_session != NULL);
-	ip_nat_decode_session = nat_decode_session;
+	rcu_assign_pointer(ip_nat_decode_session, nat_decode_session);
 #endif
 	ret = nf_nat_rule_init();
 	if (ret < 0) {
@@ -350,7 +350,7 @@ static int __init nf_nat_standalone_init(void)
 	nf_nat_rule_cleanup();
  cleanup_decode_session:
 #ifdef CONFIG_XFRM
-	ip_nat_decode_session = NULL;
+	rcu_assign_pointer(ip_nat_decode_session, NULL);
 	synchronize_net();
 #endif
 	return ret;
@@ -361,7 +361,7 @@ static void __exit nf_nat_standalone_fini(void)
 	nf_unregister_hooks(nf_nat_ops, ARRAY_SIZE(nf_nat_ops));
 	nf_nat_rule_cleanup();
 #ifdef CONFIG_XFRM
-	ip_nat_decode_session = NULL;
+	rcu_assign_pointer(ip_nat_decode_session, NULL);
 	synchronize_net();
 #endif
 	/* Conntrack caches are unregistered in nf_conntrack_cleanup */
-- 
cgit v1.2.3


From df54aae02210e1acf3a1d2ffac9b29003835710c Mon Sep 17 00:00:00 2001
From: Jan Engelhardt <jengelh@computergmbh.de>
Date: Mon, 17 Dec 2007 22:43:15 -0800
Subject: [NETFILTER]: x_tables: use %u format specifiers

Use %u format specifiers as ->family is unsigned.

Signed-off-by: Jan Engelhardt <jengelh@computergmbh.de>
Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/netfilter/ipt_CLUSTERIP.c | 2 +-
 net/netfilter/xt_CONNMARK.c        | 2 +-
 net/netfilter/xt_CONNSECMARK.c     | 2 +-
 net/netfilter/xt_connbytes.c       | 2 +-
 net/netfilter/xt_connmark.c        | 2 +-
 net/netfilter/xt_conntrack.c       | 2 +-
 net/netfilter/xt_helper.c          | 2 +-
 net/netfilter/xt_state.c           | 2 +-
 8 files changed, 8 insertions(+), 8 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c b/net/ipv4/netfilter/ipt_CLUSTERIP.c
index dc1e7b41883..a48e26449fd 100644
--- a/net/ipv4/netfilter/ipt_CLUSTERIP.c
+++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c
@@ -414,7 +414,7 @@ clusterip_tg_check(const char *tablename, const void *e_void,
 
 	if (nf_ct_l3proto_try_module_get(target->family) < 0) {
 		printk(KERN_WARNING "can't load conntrack support for "
-				    "proto=%d\n", target->family);
+				    "proto=%u\n", target->family);
 		return false;
 	}
 
diff --git a/net/netfilter/xt_CONNMARK.c b/net/netfilter/xt_CONNMARK.c
index d96ee3e0ba2..ec2eb341d2e 100644
--- a/net/netfilter/xt_CONNMARK.c
+++ b/net/netfilter/xt_CONNMARK.c
@@ -95,7 +95,7 @@ connmark_tg_check(const char *tablename, const void *entry,
 	}
 	if (nf_ct_l3proto_try_module_get(target->family) < 0) {
 		printk(KERN_WARNING "can't load conntrack support for "
-				    "proto=%d\n", target->family);
+				    "proto=%u\n", target->family);
 		return false;
 	}
 	return true;
diff --git a/net/netfilter/xt_CONNSECMARK.c b/net/netfilter/xt_CONNSECMARK.c
index 2333f7e29bc..024106bdd37 100644
--- a/net/netfilter/xt_CONNSECMARK.c
+++ b/net/netfilter/xt_CONNSECMARK.c
@@ -106,7 +106,7 @@ connsecmark_tg_check(const char *tablename, const void *entry,
 
 	if (nf_ct_l3proto_try_module_get(target->family) < 0) {
 		printk(KERN_WARNING "can't load conntrack support for "
-				    "proto=%d\n", target->family);
+				    "proto=%u\n", target->family);
 		return false;
 	}
 	return true;
diff --git a/net/netfilter/xt_connbytes.c b/net/netfilter/xt_connbytes.c
index 752b7d8bbc9..7d4940adc3c 100644
--- a/net/netfilter/xt_connbytes.c
+++ b/net/netfilter/xt_connbytes.c
@@ -111,7 +111,7 @@ connbytes_mt_check(const char *tablename, const void *ip,
 
 	if (nf_ct_l3proto_try_module_get(match->family) < 0) {
 		printk(KERN_WARNING "can't load conntrack support for "
-				    "proto=%d\n", match->family);
+				    "proto=%u\n", match->family);
 		return false;
 	}
 
diff --git a/net/netfilter/xt_connmark.c b/net/netfilter/xt_connmark.c
index b5c0f2fe699..8ad875bc158 100644
--- a/net/netfilter/xt_connmark.c
+++ b/net/netfilter/xt_connmark.c
@@ -61,7 +61,7 @@ connmark_mt_check(const char *tablename, const void *ip,
 	}
 	if (nf_ct_l3proto_try_module_get(match->family) < 0) {
 		printk(KERN_WARNING "can't load conntrack support for "
-				    "proto=%d\n", match->family);
+				    "proto=%u\n", match->family);
 		return false;
 	}
 	return true;
diff --git a/net/netfilter/xt_conntrack.c b/net/netfilter/xt_conntrack.c
index eb7e135ca6c..8c1d448c973 100644
--- a/net/netfilter/xt_conntrack.c
+++ b/net/netfilter/xt_conntrack.c
@@ -117,7 +117,7 @@ conntrack_mt_check(const char *tablename, const void *ip,
 {
 	if (nf_ct_l3proto_try_module_get(match->family) < 0) {
 		printk(KERN_WARNING "can't load conntrack support for "
-				    "proto=%d\n", match->family);
+				    "proto=%u\n", match->family);
 		return false;
 	}
 	return true;
diff --git a/net/netfilter/xt_helper.c b/net/netfilter/xt_helper.c
index f342788a576..5d063e51f88 100644
--- a/net/netfilter/xt_helper.c
+++ b/net/netfilter/xt_helper.c
@@ -66,7 +66,7 @@ helper_mt_check(const char *tablename, const void *inf,
 
 	if (nf_ct_l3proto_try_module_get(match->family) < 0) {
 		printk(KERN_WARNING "can't load conntrack support for "
-				    "proto=%d\n", match->family);
+				    "proto=%u\n", match->family);
 		return false;
 	}
 	info->name[29] = '\0';
diff --git a/net/netfilter/xt_state.c b/net/netfilter/xt_state.c
index 2e1716d425b..a776dc36a19 100644
--- a/net/netfilter/xt_state.c
+++ b/net/netfilter/xt_state.c
@@ -47,7 +47,7 @@ state_mt_check(const char *tablename, const void *inf,
 {
 	if (nf_ct_l3proto_try_module_get(match->family) < 0) {
 		printk(KERN_WARNING "can't load conntrack support for "
-				    "proto=%d\n", match->family);
+				    "proto=%u\n", match->family);
 		return false;
 	}
 	return true;
-- 
cgit v1.2.3


From 643a2c15a407faf08101a20e1a3461160711899d Mon Sep 17 00:00:00 2001
From: Jan Engelhardt <jengelh@computergmbh.de>
Date: Mon, 17 Dec 2007 22:43:50 -0800
Subject: [NETFILTER]: Introduce nf_inet_address

A few netfilter modules provide their own union of IPv4 and IPv6
address storage. Will unify that in this patch series.

(1/4): Rename union nf_conntrack_address to union nf_inet_addr and
move it to x_tables.h.

Signed-off-by: Jan Engelhardt <jengelh@computergmbh.de>
Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/netfilter/nf_nat_h323.c       | 10 +++++-----
 net/netfilter/nf_conntrack_expect.c    |  4 ++--
 net/netfilter/nf_conntrack_ftp.c       |  2 +-
 net/netfilter/nf_conntrack_h323_main.c | 34 +++++++++++++++++-----------------
 net/netfilter/nf_conntrack_sip.c       |  8 ++++----
 net/netfilter/xt_connlimit.c           | 20 ++++++++++----------
 6 files changed, 39 insertions(+), 39 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/netfilter/nf_nat_h323.c b/net/ipv4/netfilter/nf_nat_h323.c
index 2e4bdee92c4..a121989fdad 100644
--- a/net/ipv4/netfilter/nf_nat_h323.c
+++ b/net/ipv4/netfilter/nf_nat_h323.c
@@ -76,7 +76,7 @@ static int set_addr(struct sk_buff *skb,
 static int set_h225_addr(struct sk_buff *skb,
 			 unsigned char **data, int dataoff,
 			 TransportAddress *taddr,
-			 union nf_conntrack_address *addr, __be16 port)
+			 union nf_inet_addr *addr, __be16 port)
 {
 	return set_addr(skb, data, dataoff, taddr->ipAddress.ip,
 			addr->ip, port);
@@ -86,7 +86,7 @@ static int set_h225_addr(struct sk_buff *skb,
 static int set_h245_addr(struct sk_buff *skb,
 			 unsigned char **data, int dataoff,
 			 H245_TransportAddress *taddr,
-			 union nf_conntrack_address *addr, __be16 port)
+			 union nf_inet_addr *addr, __be16 port)
 {
 	return set_addr(skb, data, dataoff,
 			taddr->unicastAddress.iPAddress.network,
@@ -103,7 +103,7 @@ static int set_sig_addr(struct sk_buff *skb, struct nf_conn *ct,
 	int dir = CTINFO2DIR(ctinfo);
 	int i;
 	__be16 port;
-	union nf_conntrack_address addr;
+	union nf_inet_addr addr;
 
 	for (i = 0; i < count; i++) {
 		if (get_h225_addr(ct, *data, &taddr[i], &addr, &port)) {
@@ -155,7 +155,7 @@ static int set_ras_addr(struct sk_buff *skb, struct nf_conn *ct,
 	int dir = CTINFO2DIR(ctinfo);
 	int i;
 	__be16 port;
-	union nf_conntrack_address addr;
+	union nf_inet_addr addr;
 
 	for (i = 0; i < count; i++) {
 		if (get_h225_addr(ct, *data, &taddr[i], &addr, &port) &&
@@ -408,7 +408,7 @@ static int nat_q931(struct sk_buff *skb, struct nf_conn *ct,
 	struct nf_ct_h323_master *info = &nfct_help(ct)->help.ct_h323_info;
 	int dir = CTINFO2DIR(ctinfo);
 	u_int16_t nated_port = ntohs(port);
-	union nf_conntrack_address addr;
+	union nf_inet_addr addr;
 
 	/* Set expectations for NAT */
 	exp->saved_proto.tcp.port = exp->tuple.dst.u.tcp.port;
diff --git a/net/netfilter/nf_conntrack_expect.c b/net/netfilter/nf_conntrack_expect.c
index 175c8d1a199..0efbf343eac 100644
--- a/net/netfilter/nf_conntrack_expect.c
+++ b/net/netfilter/nf_conntrack_expect.c
@@ -226,8 +226,8 @@ struct nf_conntrack_expect *nf_ct_expect_alloc(struct nf_conn *me)
 EXPORT_SYMBOL_GPL(nf_ct_expect_alloc);
 
 void nf_ct_expect_init(struct nf_conntrack_expect *exp, int family,
-		       union nf_conntrack_address *saddr,
-		       union nf_conntrack_address *daddr,
+		       union nf_inet_addr *saddr,
+		       union nf_inet_addr *daddr,
 		       u_int8_t proto, __be16 *src, __be16 *dst)
 {
 	int len;
diff --git a/net/netfilter/nf_conntrack_ftp.c b/net/netfilter/nf_conntrack_ftp.c
index 6df259067f7..6770baf2e84 100644
--- a/net/netfilter/nf_conntrack_ftp.c
+++ b/net/netfilter/nf_conntrack_ftp.c
@@ -358,7 +358,7 @@ static int help(struct sk_buff *skb,
 	unsigned int matchlen, matchoff;
 	struct nf_ct_ftp_master *ct_ftp_info = &nfct_help(ct)->help.ct_ftp_info;
 	struct nf_conntrack_expect *exp;
-	union nf_conntrack_address *daddr;
+	union nf_inet_addr *daddr;
 	struct nf_conntrack_man cmd = {};
 	unsigned int i;
 	int found = 0, ends_in_nl;
diff --git a/net/netfilter/nf_conntrack_h323_main.c b/net/netfilter/nf_conntrack_h323_main.c
index b636ca60a77..872c1aa3124 100644
--- a/net/netfilter/nf_conntrack_h323_main.c
+++ b/net/netfilter/nf_conntrack_h323_main.c
@@ -50,12 +50,12 @@ MODULE_PARM_DESC(callforward_filter, "only create call forwarding expectations "
 int (*set_h245_addr_hook) (struct sk_buff *skb,
 			   unsigned char **data, int dataoff,
 			   H245_TransportAddress *taddr,
-			   union nf_conntrack_address *addr, __be16 port)
+			   union nf_inet_addr *addr, __be16 port)
 			   __read_mostly;
 int (*set_h225_addr_hook) (struct sk_buff *skb,
 			   unsigned char **data, int dataoff,
 			   TransportAddress *taddr,
-			   union nf_conntrack_address *addr, __be16 port)
+			   union nf_inet_addr *addr, __be16 port)
 			   __read_mostly;
 int (*set_sig_addr_hook) (struct sk_buff *skb,
 			  struct nf_conn *ct,
@@ -214,7 +214,7 @@ static int get_tpkt_data(struct sk_buff *skb, unsigned int protoff,
 /****************************************************************************/
 static int get_h245_addr(struct nf_conn *ct, unsigned char *data,
 			 H245_TransportAddress *taddr,
-			 union nf_conntrack_address *addr, __be16 *port)
+			 union nf_inet_addr *addr, __be16 *port)
 {
 	unsigned char *p;
 	int family = ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.l3num;
@@ -257,7 +257,7 @@ static int expect_rtp_rtcp(struct sk_buff *skb, struct nf_conn *ct,
 	int ret = 0;
 	__be16 port;
 	__be16 rtp_port, rtcp_port;
-	union nf_conntrack_address addr;
+	union nf_inet_addr addr;
 	struct nf_conntrack_expect *rtp_exp;
 	struct nf_conntrack_expect *rtcp_exp;
 	typeof(nat_rtp_rtcp_hook) nat_rtp_rtcp;
@@ -330,7 +330,7 @@ static int expect_t120(struct sk_buff *skb,
 	int dir = CTINFO2DIR(ctinfo);
 	int ret = 0;
 	__be16 port;
-	union nf_conntrack_address addr;
+	union nf_inet_addr addr;
 	struct nf_conntrack_expect *exp;
 	typeof(nat_t120_hook) nat_t120;
 
@@ -623,7 +623,7 @@ static struct nf_conntrack_helper nf_conntrack_helper_h245 __read_mostly = {
 /****************************************************************************/
 int get_h225_addr(struct nf_conn *ct, unsigned char *data,
 		  TransportAddress *taddr,
-		  union nf_conntrack_address *addr, __be16 *port)
+		  union nf_inet_addr *addr, __be16 *port)
 {
 	unsigned char *p;
 	int family = ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.l3num;
@@ -662,7 +662,7 @@ static int expect_h245(struct sk_buff *skb, struct nf_conn *ct,
 	int dir = CTINFO2DIR(ctinfo);
 	int ret = 0;
 	__be16 port;
-	union nf_conntrack_address addr;
+	union nf_inet_addr addr;
 	struct nf_conntrack_expect *exp;
 	typeof(nat_h245_hook) nat_h245;
 
@@ -704,8 +704,8 @@ static int expect_h245(struct sk_buff *skb, struct nf_conn *ct,
 
 /* If the calling party is on the same side of the forward-to party,
  * we don't need to track the second call */
-static int callforward_do_filter(union nf_conntrack_address *src,
-				 union nf_conntrack_address *dst,
+static int callforward_do_filter(union nf_inet_addr *src,
+				 union nf_inet_addr *dst,
 				 int family)
 {
 	const struct nf_afinfo *afinfo;
@@ -772,7 +772,7 @@ static int expect_callforwarding(struct sk_buff *skb,
 	int dir = CTINFO2DIR(ctinfo);
 	int ret = 0;
 	__be16 port;
-	union nf_conntrack_address addr;
+	union nf_inet_addr addr;
 	struct nf_conntrack_expect *exp;
 	typeof(nat_callforwarding_hook) nat_callforwarding;
 
@@ -828,7 +828,7 @@ static int process_setup(struct sk_buff *skb, struct nf_conn *ct,
 	int ret;
 	int i;
 	__be16 port;
-	union nf_conntrack_address addr;
+	union nf_inet_addr addr;
 	typeof(set_h225_addr_hook) set_h225_addr;
 
 	pr_debug("nf_ct_q931: Setup\n");
@@ -1200,7 +1200,7 @@ static unsigned char *get_udp_data(struct sk_buff *skb, unsigned int protoff,
 
 /****************************************************************************/
 static struct nf_conntrack_expect *find_expect(struct nf_conn *ct,
-					       union nf_conntrack_address *addr,
+					       union nf_inet_addr *addr,
 					       __be16 port)
 {
 	struct nf_conntrack_expect *exp;
@@ -1242,7 +1242,7 @@ static int expect_q931(struct sk_buff *skb, struct nf_conn *ct,
 	int ret = 0;
 	int i;
 	__be16 port;
-	union nf_conntrack_address addr;
+	union nf_inet_addr addr;
 	struct nf_conntrack_expect *exp;
 	typeof(nat_q931_hook) nat_q931;
 
@@ -1311,7 +1311,7 @@ static int process_gcf(struct sk_buff *skb, struct nf_conn *ct,
 	int dir = CTINFO2DIR(ctinfo);
 	int ret = 0;
 	__be16 port;
-	union nf_conntrack_address addr;
+	union nf_inet_addr addr;
 	struct nf_conntrack_expect *exp;
 
 	pr_debug("nf_ct_ras: GCF\n");
@@ -1471,7 +1471,7 @@ static int process_arq(struct sk_buff *skb, struct nf_conn *ct,
 	struct nf_ct_h323_master *info = &nfct_help(ct)->help.ct_h323_info;
 	int dir = CTINFO2DIR(ctinfo);
 	__be16 port;
-	union nf_conntrack_address addr;
+	union nf_inet_addr addr;
 	typeof(set_h225_addr_hook) set_h225_addr;
 
 	pr_debug("nf_ct_ras: ARQ\n");
@@ -1513,7 +1513,7 @@ static int process_acf(struct sk_buff *skb, struct nf_conn *ct,
 	int dir = CTINFO2DIR(ctinfo);
 	int ret = 0;
 	__be16 port;
-	union nf_conntrack_address addr;
+	union nf_inet_addr addr;
 	struct nf_conntrack_expect *exp;
 	typeof(set_sig_addr_hook) set_sig_addr;
 
@@ -1576,7 +1576,7 @@ static int process_lcf(struct sk_buff *skb, struct nf_conn *ct,
 	int dir = CTINFO2DIR(ctinfo);
 	int ret = 0;
 	__be16 port;
-	union nf_conntrack_address addr;
+	union nf_inet_addr addr;
 	struct nf_conntrack_expect *exp;
 
 	pr_debug("nf_ct_ras: LCF\n");
diff --git a/net/netfilter/nf_conntrack_sip.c b/net/netfilter/nf_conntrack_sip.c
index 515abffc4a0..47d8947cf26 100644
--- a/net/netfilter/nf_conntrack_sip.c
+++ b/net/netfilter/nf_conntrack_sip.c
@@ -247,7 +247,7 @@ static int skp_digits_len(struct nf_conn *ct, const char *dptr,
 }
 
 static int parse_addr(struct nf_conn *ct, const char *cp, const char **endp,
-		      union nf_conntrack_address *addr, const char *limit)
+		      union nf_inet_addr *addr, const char *limit)
 {
 	const char *end;
 	int family = ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.l3num;
@@ -275,7 +275,7 @@ static int parse_addr(struct nf_conn *ct, const char *cp, const char **endp,
 static int epaddr_len(struct nf_conn *ct, const char *dptr,
 		      const char *limit, int *shift)
 {
-	union nf_conntrack_address addr;
+	union nf_inet_addr addr;
 	const char *aux = dptr;
 
 	if (!parse_addr(ct, dptr, &dptr, &addr, limit)) {
@@ -366,7 +366,7 @@ EXPORT_SYMBOL_GPL(ct_sip_get_info);
 static int set_expected_rtp(struct sk_buff *skb,
 			    struct nf_conn *ct,
 			    enum ip_conntrack_info ctinfo,
-			    union nf_conntrack_address *addr,
+			    union nf_inet_addr *addr,
 			    __be16 port,
 			    const char *dptr)
 {
@@ -403,7 +403,7 @@ static int sip_help(struct sk_buff *skb,
 		    enum ip_conntrack_info ctinfo)
 {
 	int family = ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.l3num;
-	union nf_conntrack_address addr;
+	union nf_inet_addr addr;
 	unsigned int dataoff, datalen;
 	const char *dptr;
 	int ret = NF_ACCEPT;
diff --git a/net/netfilter/xt_connlimit.c b/net/netfilter/xt_connlimit.c
index 26d12b00a9c..b7a684607c7 100644
--- a/net/netfilter/xt_connlimit.c
+++ b/net/netfilter/xt_connlimit.c
@@ -53,10 +53,10 @@ static inline unsigned int connlimit_iphash(__be32 addr)
 }
 
 static inline unsigned int
-connlimit_iphash6(const union nf_conntrack_address *addr,
-                  const union nf_conntrack_address *mask)
+connlimit_iphash6(const union nf_inet_addr *addr,
+                  const union nf_inet_addr *mask)
 {
-	union nf_conntrack_address res;
+	union nf_inet_addr res;
 	unsigned int i;
 
 	if (unlikely(!connlimit_rnd_inited)) {
@@ -81,14 +81,14 @@ static inline bool already_closed(const struct nf_conn *conn)
 }
 
 static inline unsigned int
-same_source_net(const union nf_conntrack_address *addr,
-		const union nf_conntrack_address *mask,
-		const union nf_conntrack_address *u3, unsigned int family)
+same_source_net(const union nf_inet_addr *addr,
+		const union nf_inet_addr *mask,
+		const union nf_inet_addr *u3, unsigned int family)
 {
 	if (family == AF_INET) {
 		return (addr->ip & mask->ip) == (u3->ip & mask->ip);
 	} else {
-		union nf_conntrack_address lh, rh;
+		union nf_inet_addr lh, rh;
 		unsigned int i;
 
 		for (i = 0; i < ARRAY_SIZE(addr->ip6); ++i) {
@@ -102,8 +102,8 @@ same_source_net(const union nf_conntrack_address *addr,
 
 static int count_them(struct xt_connlimit_data *data,
 		      const struct nf_conntrack_tuple *tuple,
-		      const union nf_conntrack_address *addr,
-		      const union nf_conntrack_address *mask,
+		      const union nf_inet_addr *addr,
+		      const union nf_inet_addr *mask,
 		      const struct xt_match *match)
 {
 	struct nf_conntrack_tuple_hash *found;
@@ -185,7 +185,7 @@ connlimit_mt(const struct sk_buff *skb, const struct net_device *in,
              bool *hotdrop)
 {
 	const struct xt_connlimit_info *info = matchinfo;
-	union nf_conntrack_address addr, mask;
+	union nf_inet_addr addr, mask;
 	struct nf_conntrack_tuple tuple;
 	const struct nf_conntrack_tuple *tuple_ptr = &tuple;
 	enum ip_conntrack_info ctinfo;
-- 
cgit v1.2.3


From e79ec50b9587c175f65f98550d66ad5b96c05dd9 Mon Sep 17 00:00:00 2001
From: Jan Engelhardt <jengelh@computergmbh.de>
Date: Mon, 17 Dec 2007 22:44:06 -0800
Subject: [NETFILTER]: Parenthesize macro parameters

Parenthesize macro parameters.

Signed-off-by: Jan Engelhardt <jengelh@computergmbh.de>
Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/netfilter/arp_tables.c         | 3 ++-
 net/ipv4/netfilter/ip_tables.c          | 3 ++-
 net/ipv4/netfilter/nf_nat_snmp_basic.c  | 2 +-
 net/ipv6/netfilter/ip6_tables.c         | 2 +-
 net/netfilter/nf_conntrack_h323_asn1.c  | 8 ++++----
 net/netfilter/nf_conntrack_netlink.c    | 2 +-
 net/netfilter/nf_conntrack_proto_sctp.c | 8 ++++----
 net/netfilter/xt_conntrack.c            | 3 ++-
 net/netfilter/xt_policy.c               | 2 +-
 net/netfilter/xt_string.c               | 2 +-
 10 files changed, 19 insertions(+), 16 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c
index ad2da6db0e2..b4a810c28ac 100644
--- a/net/ipv4/netfilter/arp_tables.c
+++ b/net/ipv4/netfilter/arp_tables.c
@@ -84,7 +84,7 @@ static inline int arp_packet_match(const struct arphdr *arphdr,
 	__be32 src_ipaddr, tgt_ipaddr;
 	int i, ret;
 
-#define FWINV(bool,invflg) ((bool) ^ !!(arpinfo->invflags & invflg))
+#define FWINV(bool, invflg) ((bool) ^ !!(arpinfo->invflags & (invflg)))
 
 	if (FWINV((arphdr->ar_op & arpinfo->arpop_mask) != arpinfo->arpop,
 		  ARPT_INV_ARPOP)) {
@@ -180,6 +180,7 @@ static inline int arp_packet_match(const struct arphdr *arphdr,
 	}
 
 	return 1;
+#undef FWINV
 }
 
 static inline int arp_checkentry(const struct arpt_arp *arp)
diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c
index 271f6a5d3d4..f5b66ec18b0 100644
--- a/net/ipv4/netfilter/ip_tables.c
+++ b/net/ipv4/netfilter/ip_tables.c
@@ -85,7 +85,7 @@ ip_packet_match(const struct iphdr *ip,
 	size_t i;
 	unsigned long ret;
 
-#define FWINV(bool,invflg) ((bool) ^ !!(ipinfo->invflags & invflg))
+#define FWINV(bool, invflg) ((bool) ^ !!(ipinfo->invflags & (invflg)))
 
 	if (FWINV((ip->saddr&ipinfo->smsk.s_addr) != ipinfo->src.s_addr,
 		  IPT_INV_SRCIP)
@@ -216,6 +216,7 @@ unconditional(const struct ipt_ip *ip)
 			return 0;
 
 	return 1;
+#undef FWINV
 }
 
 #if defined(CONFIG_NETFILTER_XT_TARGET_TRACE) || \
diff --git a/net/ipv4/netfilter/nf_nat_snmp_basic.c b/net/ipv4/netfilter/nf_nat_snmp_basic.c
index 03709d6b4b0..07f2a49926d 100644
--- a/net/ipv4/netfilter/nf_nat_snmp_basic.c
+++ b/net/ipv4/netfilter/nf_nat_snmp_basic.c
@@ -60,7 +60,7 @@ MODULE_ALIAS("ip_nat_snmp_basic");
 
 #define SNMP_PORT 161
 #define SNMP_TRAP_PORT 162
-#define NOCT1(n) (*(u8 *)n)
+#define NOCT1(n) (*(u8 *)(n))
 
 static int debug;
 static DEFINE_SPINLOCK(snmp_lock);
diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c
index bb50d0e6673..4ed16d254b9 100644
--- a/net/ipv6/netfilter/ip6_tables.c
+++ b/net/ipv6/netfilter/ip6_tables.c
@@ -102,7 +102,7 @@ ip6_packet_match(const struct sk_buff *skb,
 	unsigned long ret;
 	const struct ipv6hdr *ipv6 = ipv6_hdr(skb);
 
-#define FWINV(bool,invflg) ((bool) ^ !!(ip6info->invflags & invflg))
+#define FWINV(bool, invflg) ((bool) ^ !!(ip6info->invflags & (invflg)))
 
 	if (FWINV(ipv6_masked_addr_cmp(&ipv6->saddr, &ip6info->smsk,
 				       &ip6info->src), IP6T_INV_SRCIP)
diff --git a/net/netfilter/nf_conntrack_h323_asn1.c b/net/netfilter/nf_conntrack_h323_asn1.c
index a869403b229..ff66fba514f 100644
--- a/net/netfilter/nf_conntrack_h323_asn1.c
+++ b/net/netfilter/nf_conntrack_h323_asn1.c
@@ -100,10 +100,10 @@ typedef struct {
 } bitstr_t;
 
 /* Tool Functions */
-#define INC_BIT(bs) if((++bs->bit)>7){bs->cur++;bs->bit=0;}
-#define INC_BITS(bs,b) if((bs->bit+=b)>7){bs->cur+=bs->bit>>3;bs->bit&=7;}
-#define BYTE_ALIGN(bs) if(bs->bit){bs->cur++;bs->bit=0;}
-#define CHECK_BOUND(bs,n) if(bs->cur+(n)>bs->end)return(H323_ERROR_BOUND)
+#define INC_BIT(bs) if((++(bs)->bit)>7){(bs)->cur++;(bs)->bit=0;}
+#define INC_BITS(bs,b) if(((bs)->bit+=(b))>7){(bs)->cur+=(bs)->bit>>3;(bs)->bit&=7;}
+#define BYTE_ALIGN(bs) if((bs)->bit){(bs)->cur++;(bs)->bit=0;}
+#define CHECK_BOUND(bs,n) if((bs)->cur+(n)>(bs)->end)return(H323_ERROR_BOUND)
 static unsigned get_len(bitstr_t * bs);
 static unsigned get_bit(bitstr_t * bs);
 static unsigned get_bits(bitstr_t * bs, unsigned b);
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index 3a065f43ddd..d93d58d688b 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -534,7 +534,7 @@ static int ctnetlink_done(struct netlink_callback *cb)
 	return 0;
 }
 
-#define L3PROTO(ct) ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.l3num
+#define L3PROTO(ct) (ct)->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.l3num
 
 static int
 ctnetlink_dump_table(struct sk_buff *skb, struct netlink_callback *cb)
diff --git a/net/netfilter/nf_conntrack_proto_sctp.c b/net/netfilter/nf_conntrack_proto_sctp.c
index 224612370f5..9296fd28189 100644
--- a/net/netfilter/nf_conntrack_proto_sctp.c
+++ b/net/netfilter/nf_conntrack_proto_sctp.c
@@ -186,10 +186,10 @@ static int sctp_print_conntrack(struct seq_file *s,
 }
 
 #define for_each_sctp_chunk(skb, sch, _sch, offset, dataoff, count)	\
-for (offset = dataoff + sizeof(sctp_sctphdr_t), count = 0;		\
-	offset < skb->len &&						\
-	(sch = skb_header_pointer(skb, offset, sizeof(_sch), &_sch));	\
-	offset += (ntohs(sch->length) + 3) & ~3, count++)
+for ((offset) = (dataoff) + sizeof(sctp_sctphdr_t), (count) = 0;	\
+	(offset) < (skb)->len &&					\
+	((sch) = skb_header_pointer((skb), (offset), sizeof(_sch), &(_sch)));	\
+	(offset) += (ntohs((sch)->length) + 3) & ~3, (count)++)
 
 /* Some validity checks to make sure the chunks are fine */
 static int do_basic_checks(struct nf_conn *conntrack,
diff --git a/net/netfilter/xt_conntrack.c b/net/netfilter/xt_conntrack.c
index 8c1d448c973..3f8bfbaa9b1 100644
--- a/net/netfilter/xt_conntrack.c
+++ b/net/netfilter/xt_conntrack.c
@@ -32,7 +32,7 @@ conntrack_mt(const struct sk_buff *skb, const struct net_device *in,
 
 	ct = nf_ct_get(skb, &ctinfo);
 
-#define FWINV(bool,invflg) ((bool) ^ !!(sinfo->invflags & invflg))
+#define FWINV(bool, invflg) ((bool) ^ !!(sinfo->invflags & (invflg)))
 
 	if (ct == &nf_conntrack_untracked)
 		statebit = XT_CONNTRACK_STATE_UNTRACKED;
@@ -108,6 +108,7 @@ conntrack_mt(const struct sk_buff *skb, const struct net_device *in,
 			return false;
 	}
 	return true;
+#undef FWINV
 }
 
 static bool
diff --git a/net/netfilter/xt_policy.c b/net/netfilter/xt_policy.c
index 5a017b8b72d..46ee7e81a7a 100644
--- a/net/netfilter/xt_policy.c
+++ b/net/netfilter/xt_policy.c
@@ -38,7 +38,7 @@ match_xfrm_state(const struct xfrm_state *x, const struct xt_policy_elem *e,
 		 unsigned short family)
 {
 #define MATCH_ADDR(x,y,z)	(!e->match.x ||			       \
-				 (xt_addr_cmp(&e->x, &e->y, z, family) \
+				 (xt_addr_cmp(&e->x, &e->y, (z), family) \
 				  ^ e->invert.x))
 #define MATCH(x,y)		(!e->match.x || ((e->x == (y)) ^ e->invert.x))
 
diff --git a/net/netfilter/xt_string.c b/net/netfilter/xt_string.c
index 90287844489..aff7a116055 100644
--- a/net/netfilter/xt_string.c
+++ b/net/netfilter/xt_string.c
@@ -37,7 +37,7 @@ string_mt(const struct sk_buff *skb, const struct net_device *in,
 			     != UINT_MAX) ^ conf->invert;
 }
 
-#define STRING_TEXT_PRIV(m) ((struct xt_string_info *) m)
+#define STRING_TEXT_PRIV(m) ((struct xt_string_info *)(m))
 
 static bool
 string_mt_check(const char *tablename, const void *ip,
-- 
cgit v1.2.3


From 22c2d8bca212a655c120fd6617328ffa3480afad Mon Sep 17 00:00:00 2001
From: Jan Engelhardt <jengelh@computergmbh.de>
Date: Mon, 17 Dec 2007 22:44:47 -0800
Subject: [NETFILTER]: xt_connlimit: use the new union nf_inet_addr

Signed-off-by: Jan Engelhardt <jengelh@computergmbh.de>
Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/netfilter/xt_connlimit.c | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/netfilter/xt_connlimit.c b/net/netfilter/xt_connlimit.c
index b7a684607c7..6a9e2a35718 100644
--- a/net/netfilter/xt_connlimit.c
+++ b/net/netfilter/xt_connlimit.c
@@ -185,7 +185,7 @@ connlimit_mt(const struct sk_buff *skb, const struct net_device *in,
              bool *hotdrop)
 {
 	const struct xt_connlimit_info *info = matchinfo;
-	union nf_inet_addr addr, mask;
+	union nf_inet_addr addr;
 	struct nf_conntrack_tuple tuple;
 	const struct nf_conntrack_tuple *tuple_ptr = &tuple;
 	enum ip_conntrack_info ctinfo;
@@ -202,15 +202,14 @@ connlimit_mt(const struct sk_buff *skb, const struct net_device *in,
 	if (match->family == AF_INET6) {
 		const struct ipv6hdr *iph = ipv6_hdr(skb);
 		memcpy(&addr.ip6, &iph->saddr, sizeof(iph->saddr));
-		memcpy(&mask.ip6, info->v6_mask, sizeof(info->v6_mask));
 	} else {
 		const struct iphdr *iph = ip_hdr(skb);
 		addr.ip = iph->saddr;
-		mask.ip = info->v4_mask;
 	}
 
 	spin_lock_bh(&info->data->lock);
-	connections = count_them(info->data, tuple_ptr, &addr, &mask, match);
+	connections = count_them(info->data, tuple_ptr, &addr,
+	                         &info->mask, match);
 	spin_unlock_bh(&info->data->lock);
 
 	if (connections < 0) {
-- 
cgit v1.2.3


From e2f82ac3fcffffca59751b65124544d11ed8be4a Mon Sep 17 00:00:00 2001
From: Eric Dumazet <dada1@cosmosbay.com>
Date: Mon, 17 Dec 2007 22:45:13 -0800
Subject: [NETFILTER]: xt_hashlimit: speedup hash_dst()

1) Using jhash2() instead of jhash() is a litle bit faster if applicable.

2) Thanks to jhash, hash value uses full 32 bits.
   Instead of returning hash % size (implying a divide)
   we return the high 32 bits of the (hash * size) that will
   give results between [0 and size-1] and same hash distribution.

  On most cpus, a multiply is less expensive than a divide, by an order
  of magnitude.

Signed-off-by: Eric Dumazet <dada1@cosmosbay.com>
Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/netfilter/xt_hashlimit.c | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/netfilter/xt_hashlimit.c b/net/netfilter/xt_hashlimit.c
index 951d4c82967..651c1d26564 100644
--- a/net/netfilter/xt_hashlimit.c
+++ b/net/netfilter/xt_hashlimit.c
@@ -105,7 +105,16 @@ static inline bool dst_cmp(const struct dsthash_ent *ent,
 static u_int32_t
 hash_dst(const struct xt_hashlimit_htable *ht, const struct dsthash_dst *dst)
 {
-	return jhash(dst, sizeof(*dst), ht->rnd) % ht->cfg.size;
+	u_int32_t hash = jhash2((const u32 *)dst,
+				sizeof(*dst)/sizeof(u32),
+				ht->rnd);
+	/*
+	 * Instead of returning hash % ht->cfg.size (implying a divide)
+	 * we return the high 32 bits of the (hash * ht->cfg.size) that will
+	 * give results between [0 and cfg.size-1] and same hash distribution,
+	 * but using a multiply, less expensive than a divide
+	 */
+	return ((u64)hash * ht->cfg.size) >> 32;
 }
 
 static struct dsthash_ent *
-- 
cgit v1.2.3


From 7b21e09d1c17ef0296ec5a6df231a6c5c87b2fd7 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <dada1@cosmosbay.com>
Date: Mon, 17 Dec 2007 22:45:28 -0800
Subject: [NETFILTER]: xt_hashlimit: reduce overhead without IPv6

This patch generalizes the (CONFIG_IP6_NF_IPTABLES || CONFIG_IP6_NF_IPTABLES_MODULE)
test done in hashlimit_init_dst() to all the xt_hashlimit module.

This permits a size reduction of "struct dsthash_dst". This saves memory and
cpu for IPV4 only hosts.

Signed-off-by: Eric Dumazet <dada1@cosmosbay.com>
Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/netfilter/xt_hashlimit.c | 20 +++++++++++++++++---
 1 file changed, 17 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/netfilter/xt_hashlimit.c b/net/netfilter/xt_hashlimit.c
index 651c1d26564..c35d220a7ae 100644
--- a/net/netfilter/xt_hashlimit.c
+++ b/net/netfilter/xt_hashlimit.c
@@ -20,8 +20,11 @@
 #include <linux/mm.h>
 #include <linux/in.h>
 #include <linux/ip.h>
+#if defined(CONFIG_IP6_NF_IPTABLES) || defined(CONFIG_IP6_NF_IPTABLES_MODULE)
 #include <linux/ipv6.h>
 #include <net/ipv6.h>
+#endif
+
 #include <net/net_namespace.h>
 
 #include <linux/netfilter/x_tables.h>
@@ -48,10 +51,12 @@ struct dsthash_dst {
 			__be32 src;
 			__be32 dst;
 		} ip;
+#if defined(CONFIG_IP6_NF_IPTABLES) || defined(CONFIG_IP6_NF_IPTABLES_MODULE)
 		struct {
 			__be32 src[4];
 			__be32 dst[4];
 		} ip6;
+#endif
 	} addr;
 	__be16 src_port;
 	__be16 dst_port;
@@ -599,6 +604,7 @@ static struct xt_match hashlimit_mt_reg[] __read_mostly = {
 		.destroy	= hashlimit_mt_destroy,
 		.me		= THIS_MODULE
 	},
+#if defined(CONFIG_IP6_NF_IPTABLES) || defined(CONFIG_IP6_NF_IPTABLES_MODULE)
 	{
 		.name		= "hashlimit",
 		.family		= AF_INET6,
@@ -613,6 +619,7 @@ static struct xt_match hashlimit_mt_reg[] __read_mostly = {
 		.destroy	= hashlimit_mt_destroy,
 		.me		= THIS_MODULE
 	},
+#endif
 };
 
 /* PROC stuff */
@@ -675,6 +682,7 @@ static int dl_seq_real_show(struct dsthash_ent *ent, int family,
 				 ntohs(ent->dst.dst_port),
 				 ent->rateinfo.credit, ent->rateinfo.credit_cap,
 				 ent->rateinfo.cost);
+#if defined(CONFIG_IP6_NF_IPTABLES) || defined(CONFIG_IP6_NF_IPTABLES_MODULE)
 	case AF_INET6:
 		return seq_printf(s, "%ld " NIP6_FMT ":%u->"
 				     NIP6_FMT ":%u %u %u %u\n",
@@ -685,6 +693,7 @@ static int dl_seq_real_show(struct dsthash_ent *ent, int family,
 				 ntohs(ent->dst.dst_port),
 				 ent->rateinfo.credit, ent->rateinfo.credit_cap,
 				 ent->rateinfo.cost);
+#endif
 	default:
 		BUG();
 		return 0;
@@ -756,14 +765,17 @@ static int __init hashlimit_mt_init(void)
 				"entry\n");
 		goto err3;
 	}
+	err = 0;
+#if defined(CONFIG_IP6_NF_IPTABLES) || defined(CONFIG_IP6_NF_IPTABLES_MODULE)
 	hashlimit_procdir6 = proc_mkdir("ip6t_hashlimit", init_net.proc_net);
 	if (!hashlimit_procdir6) {
 		printk(KERN_ERR "xt_hashlimit: unable to create proc dir "
 				"entry\n");
-		goto err4;
+		err = -ENOMEM;
 	}
-	return 0;
-err4:
+#endif
+	if (!err)
+		return 0;
 	remove_proc_entry("ipt_hashlimit", init_net.proc_net);
 err3:
 	kmem_cache_destroy(hashlimit_cachep);
@@ -777,7 +789,9 @@ err1:
 static void __exit hashlimit_mt_exit(void)
 {
 	remove_proc_entry("ipt_hashlimit", init_net.proc_net);
+#if defined(CONFIG_IP6_NF_IPTABLES) || defined(CONFIG_IP6_NF_IPTABLES_MODULE)
 	remove_proc_entry("ip6t_hashlimit", init_net.proc_net);
+#endif
 	kmem_cache_destroy(hashlimit_cachep);
 	xt_unregister_matches(hashlimit_mt_reg, ARRAY_SIZE(hashlimit_mt_reg));
 }
-- 
cgit v1.2.3


From 34498825cb9062192b77fa02dae672a4fe6eec70 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Mon, 17 Dec 2007 22:45:52 -0800
Subject: [NETFILTER]: non-power-of-two jhash optimizations

Apply Eric Dumazet's jhash optimizations where applicable. Quoting Eric:

Thanks to jhash, hash value uses full 32 bits. Instead of returning
hash % size (implying a divide) we return the high 32 bits of the
(hash * size) that will give results between [0 and size-1] and same
hash distribution.

On most cpus, a multiply is less expensive than a divide, by an order
of magnitude.

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/netfilter/ipt_CLUSTERIP.c  |  2 +-
 net/ipv4/netfilter/nf_nat_core.c    | 10 +++++++---
 net/netfilter/nf_conntrack_core.c   |  2 +-
 net/netfilter/nf_conntrack_expect.c |  8 +++++---
 4 files changed, 14 insertions(+), 8 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c b/net/ipv4/netfilter/ipt_CLUSTERIP.c
index a48e26449fd..df39ca07fb1 100644
--- a/net/ipv4/netfilter/ipt_CLUSTERIP.c
+++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c
@@ -273,7 +273,7 @@ clusterip_hashfn(const struct sk_buff *skb,
 	}
 
 	/* node numbers are 1..n, not 0..n */
-	return (hashval % config->num_total_nodes) + 1;
+	return (((u64)hashval * config->num_total_nodes) >> 32) + 1;
 }
 
 static inline int
diff --git a/net/ipv4/netfilter/nf_nat_core.c b/net/ipv4/netfilter/nf_nat_core.c
index aec157d0ad9..e53ae1ef8f5 100644
--- a/net/ipv4/netfilter/nf_nat_core.c
+++ b/net/ipv4/netfilter/nf_nat_core.c
@@ -77,10 +77,13 @@ EXPORT_SYMBOL_GPL(nf_nat_proto_put);
 static inline unsigned int
 hash_by_src(const struct nf_conntrack_tuple *tuple)
 {
+	unsigned int hash;
+
 	/* Original src, to ensure we map it consistently if poss. */
-	return jhash_3words((__force u32)tuple->src.u3.ip,
+	hash = jhash_3words((__force u32)tuple->src.u3.ip,
 			    (__force u32)tuple->src.u.all,
-			    tuple->dst.protonum, 0) % nf_nat_htable_size;
+			    tuple->dst.protonum, 0);
+	return ((u64)hash * nf_nat_htable_size) >> 32;
 }
 
 /* Is this tuple already taken? (not by us) */
@@ -211,7 +214,8 @@ find_best_ips_proto(struct nf_conntrack_tuple *tuple,
 	maxip = ntohl(range->max_ip);
 	j = jhash_2words((__force u32)tuple->src.u3.ip,
 			 (__force u32)tuple->dst.u3.ip, 0);
-	*var_ipp = htonl(minip + j % (maxip - minip + 1));
+	j = ((u64)j * (maxip - minip + 1)) >> 32;
+	*var_ipp = htonl(minip + j);
 }
 
 /* Manipulate the tuple into the range given.  For NF_INET_POST_ROUTING,
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index 5920f953f78..25564073ee2 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -81,7 +81,7 @@ static u_int32_t __hash_conntrack(const struct nf_conntrack_tuple *tuple,
 		   ((__force __u16)tuple->src.u.all << 16) |
 		    (__force __u16)tuple->dst.u.all);
 
-	return jhash_2words(a, b, rnd) % size;
+	return ((u64)jhash_2words(a, b, rnd) * size) >> 32;
 }
 
 static inline u_int32_t hash_conntrack(const struct nf_conntrack_tuple *tuple)
diff --git a/net/netfilter/nf_conntrack_expect.c b/net/netfilter/nf_conntrack_expect.c
index 0efbf343eac..e0cd9d00aa6 100644
--- a/net/netfilter/nf_conntrack_expect.c
+++ b/net/netfilter/nf_conntrack_expect.c
@@ -73,15 +73,17 @@ static void nf_ct_expectation_timed_out(unsigned long ul_expect)
 
 static unsigned int nf_ct_expect_dst_hash(const struct nf_conntrack_tuple *tuple)
 {
+	unsigned int hash;
+
 	if (unlikely(!nf_ct_expect_hash_rnd_initted)) {
 		get_random_bytes(&nf_ct_expect_hash_rnd, 4);
 		nf_ct_expect_hash_rnd_initted = 1;
 	}
 
-	return jhash2(tuple->dst.u3.all, ARRAY_SIZE(tuple->dst.u3.all),
+	hash = jhash2(tuple->dst.u3.all, ARRAY_SIZE(tuple->dst.u3.all),
 		      (((tuple->dst.protonum ^ tuple->src.l3num) << 16) |
-		       (__force __u16)tuple->dst.u.all) ^ nf_ct_expect_hash_rnd) %
-	       nf_ct_expect_hsize;
+		       (__force __u16)tuple->dst.u.all) ^ nf_ct_expect_hash_rnd);
+	return ((u64)hash * nf_ct_expect_hsize) >> 32;
 }
 
 struct nf_conntrack_expect *
-- 
cgit v1.2.3


From 33b8e776056202aceaf4c90f465d0f4ee53432ac Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Mon, 17 Dec 2007 22:47:05 -0800
Subject: [NETFILTER]: Add CONFIG_NETFILTER_ADVANCED option

The NETFILTER_ADVANCED option hides lots of the rather obscure netfilter
options when disabled and provides defaults (M) that should allow to
run a distribution firewall without further thinking.

Defaults to 'y' to avoid breaking current configurations.

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/Kconfig                  | 12 ++++++++
 net/bridge/netfilter/Kconfig |  2 +-
 net/decnet/netfilter/Kconfig |  1 +
 net/ipv4/netfilter/Kconfig   | 26 +++++++++++++++-
 net/ipv6/netfilter/Kconfig   | 23 ++++++++++++--
 net/netfilter/Kconfig        | 71 ++++++++++++++++++++++++++++++++++++++++----
 6 files changed, 124 insertions(+), 11 deletions(-)

(limited to 'net')

diff --git a/net/Kconfig b/net/Kconfig
index 58ed2f4199d..b6a5d454f2f 100644
--- a/net/Kconfig
+++ b/net/Kconfig
@@ -144,9 +144,21 @@ config NETFILTER_DEBUG
 	  You can say Y here if you want to get additional messages useful in
 	  debugging the netfilter code.
 
+config NETFILTER_ADVANCED
+	bool "Advanced netfilter configuration"
+	depends on NETFILTER
+	default y
+	help
+	  If you say Y here you can select between all the netfilter modules.
+	  If you say N the more ununsual ones will not be shown and the
+	  basic ones needed by most people will default to 'M'.
+
+	  If unsure, say Y.
+
 config BRIDGE_NETFILTER
 	bool "Bridged IP/ARP packets filtering"
 	depends on BRIDGE && NETFILTER && INET
+	depends on NETFILTER_ADVANCED
 	default y
 	---help---
 	  Enabling this option will let arptables resp. iptables see bridged
diff --git a/net/bridge/netfilter/Kconfig b/net/bridge/netfilter/Kconfig
index b84fc6075fe..4a3e2bf892c 100644
--- a/net/bridge/netfilter/Kconfig
+++ b/net/bridge/netfilter/Kconfig
@@ -3,7 +3,7 @@
 #
 
 menu "Bridge: Netfilter Configuration"
-	depends on BRIDGE && NETFILTER
+	depends on BRIDGE && BRIDGE_NETFILTER
 
 config BRIDGE_NF_EBTABLES
 	tristate "Ethernet Bridge tables (ebtables) support"
diff --git a/net/decnet/netfilter/Kconfig b/net/decnet/netfilter/Kconfig
index ecdb3f9f14c..2f81de5e752 100644
--- a/net/decnet/netfilter/Kconfig
+++ b/net/decnet/netfilter/Kconfig
@@ -4,6 +4,7 @@
 
 menu "DECnet: Netfilter Configuration"
 	depends on DECNET && NETFILTER && EXPERIMENTAL
+	depends on NETFILTER_ADVANCED
 
 config DECNET_NF_GRABULATOR
 	tristate "Routing message grabulator (for userland routing daemon)"
diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig
index ad26f66b53e..cface714edf 100644
--- a/net/ipv4/netfilter/Kconfig
+++ b/net/ipv4/netfilter/Kconfig
@@ -8,6 +8,7 @@ menu "IP: Netfilter Configuration"
 config NF_CONNTRACK_IPV4
 	tristate "IPv4 connection tracking support (required for NAT)"
 	depends on NF_CONNTRACK
+	default m if NETFILTER_ADVANCED=n
 	---help---
 	  Connection tracking keeps a record of what packets have passed
 	  through your machine, in order to figure out how they are related
@@ -32,6 +33,7 @@ config NF_CONNTRACK_PROC_COMPAT
 
 config IP_NF_QUEUE
 	tristate "IP Userspace queueing via NETLINK (OBSOLETE)"
+	depends on NETFILTER_ADVANCED
 	help
 	  Netfilter has the ability to queue packets to user space: the
 	  netlink device can be used to access them using this driver.
@@ -44,6 +46,7 @@ config IP_NF_QUEUE
 
 config IP_NF_IPTABLES
 	tristate "IP tables support (required for filtering/masq/NAT)"
+	default m if NETFILTER_ADVANCED=n
 	select NETFILTER_XTABLES
 	help
 	  iptables is a general, extensible packet identification framework.
@@ -57,6 +60,7 @@ config IP_NF_IPTABLES
 config IP_NF_MATCH_IPRANGE
 	tristate '"iprange" match support'
 	depends on IP_NF_IPTABLES
+	depends on NETFILTER_ADVANCED
 	help
 	  This option makes possible to match IP addresses against IP address
 	  ranges.
@@ -66,6 +70,7 @@ config IP_NF_MATCH_IPRANGE
 config IP_NF_MATCH_RECENT
 	tristate '"recent" match support'
 	depends on IP_NF_IPTABLES
+	depends on NETFILTER_ADVANCED
 	help
 	  This match is used for creating one or many lists of recently
 	  used addresses and then matching against that/those list(s).
@@ -78,6 +83,7 @@ config IP_NF_MATCH_RECENT
 config IP_NF_MATCH_ECN
 	tristate '"ecn" match support'
 	depends on IP_NF_IPTABLES
+	depends on NETFILTER_ADVANCED
 	help
 	  This option adds a `ECN' match, which allows you to match against
 	  the IPv4 and TCP header ECN fields.
@@ -87,6 +93,7 @@ config IP_NF_MATCH_ECN
 config IP_NF_MATCH_AH
 	tristate '"ah" match support'
 	depends on IP_NF_IPTABLES
+	depends on NETFILTER_ADVANCED
 	help
 	  This match extension allows you to match a range of SPIs
 	  inside AH header of IPSec packets.
@@ -96,6 +103,7 @@ config IP_NF_MATCH_AH
 config IP_NF_MATCH_TTL
 	tristate '"ttl" match support'
 	depends on IP_NF_IPTABLES
+	depends on NETFILTER_ADVANCED
 	help
 	  This adds CONFIG_IP_NF_MATCH_TTL option, which enabled the user
 	  to match packets by their TTL value.
@@ -105,10 +113,11 @@ config IP_NF_MATCH_TTL
 config IP_NF_MATCH_ADDRTYPE
 	tristate '"addrtype" address type match support'
 	depends on IP_NF_IPTABLES
+	depends on NETFILTER_ADVANCED
 	help
 	  This option allows you to match what routing thinks of an address,
 	  eg. UNICAST, LOCAL, BROADCAST, ...
-	
+
 	  If you want to compile it as a module, say M here and read
 	  <file:Documentation/kbuild/modules.txt>.  If unsure, say `N'.
 
@@ -116,6 +125,7 @@ config IP_NF_MATCH_ADDRTYPE
 config IP_NF_FILTER
 	tristate "Packet filtering"
 	depends on IP_NF_IPTABLES
+	default m if NETFILTER_ADVANCED=n
 	help
 	  Packet filtering defines a table `filter', which has a series of
 	  rules for simple packet filtering at local input, forwarding and
@@ -126,6 +136,7 @@ config IP_NF_FILTER
 config IP_NF_TARGET_REJECT
 	tristate "REJECT target support"
 	depends on IP_NF_FILTER
+	default m if NETFILTER_ADVANCED=n
 	help
 	  The REJECT target allows a filtering rule to specify that an ICMP
 	  error should be issued in response to an incoming packet, rather
@@ -136,6 +147,7 @@ config IP_NF_TARGET_REJECT
 config IP_NF_TARGET_LOG
 	tristate "LOG target support"
 	depends on IP_NF_IPTABLES
+	default m if NETFILTER_ADVANCED=n
 	help
 	  This option adds a `LOG' target, which allows you to create rules in
 	  any iptables table which records the packet header to the syslog.
@@ -145,6 +157,7 @@ config IP_NF_TARGET_LOG
 config IP_NF_TARGET_ULOG
 	tristate "ULOG target support"
 	depends on IP_NF_IPTABLES
+	default m if NETFILTER_ADVANCED=n
 	---help---
 
 	  This option enables the old IPv4-only "ipt_ULOG" implementation
@@ -165,6 +178,7 @@ config IP_NF_TARGET_ULOG
 config NF_NAT
 	tristate "Full NAT"
 	depends on IP_NF_IPTABLES && NF_CONNTRACK_IPV4
+	default m if NETFILTER_ADVANCED=n
 	help
 	  The Full NAT option allows masquerading, port forwarding and other
 	  forms of full Network Address Port Translation.  It is controlled by
@@ -180,6 +194,7 @@ config NF_NAT_NEEDED
 config IP_NF_TARGET_MASQUERADE
 	tristate "MASQUERADE target support"
 	depends on NF_NAT
+	default m if NETFILTER_ADVANCED=n
 	help
 	  Masquerading is a special case of NAT: all outgoing connections are
 	  changed to seem to come from a particular interface's address, and
@@ -192,6 +207,7 @@ config IP_NF_TARGET_MASQUERADE
 config IP_NF_TARGET_REDIRECT
 	tristate "REDIRECT target support"
 	depends on NF_NAT
+	depends on NETFILTER_ADVANCED
 	help
 	  REDIRECT is a special case of NAT: all incoming connections are
 	  mapped onto the incoming interface's address, causing the packets to
@@ -203,6 +219,7 @@ config IP_NF_TARGET_REDIRECT
 config IP_NF_TARGET_NETMAP
 	tristate "NETMAP target support"
 	depends on NF_NAT
+	depends on NETFILTER_ADVANCED
 	help
 	  NETMAP is an implementation of static 1:1 NAT mapping of network
 	  addresses. It maps the network address part, while keeping the host
@@ -214,6 +231,7 @@ config IP_NF_TARGET_NETMAP
 config NF_NAT_SNMP_BASIC
 	tristate "Basic SNMP-ALG support (EXPERIMENTAL)"
 	depends on EXPERIMENTAL && NF_NAT
+	depends on NETFILTER_ADVANCED
 	---help---
 
 	  This module implements an Application Layer Gateway (ALG) for
@@ -277,6 +295,7 @@ config NF_NAT_SIP
 config IP_NF_MANGLE
 	tristate "Packet mangling"
 	depends on IP_NF_IPTABLES
+	default m if NETFILTER_ADVANCED=n
 	help
 	  This option adds a `mangle' table to iptables: see the man page for
 	  iptables(8).  This table is used for various packet alterations
@@ -287,6 +306,7 @@ config IP_NF_MANGLE
 config IP_NF_TARGET_ECN
 	tristate "ECN target support"
 	depends on IP_NF_MANGLE
+	depends on NETFILTER_ADVANCED
 	---help---
 	  This option adds a `ECN' target, which can be used in the iptables mangle
 	  table.  
@@ -301,6 +321,7 @@ config IP_NF_TARGET_ECN
 config IP_NF_TARGET_TTL
 	tristate  'TTL target support'
 	depends on IP_NF_MANGLE
+	depends on NETFILTER_ADVANCED
 	help
 	  This option adds a `TTL' target, which enables the user to modify
 	  the TTL value of the IP header.
@@ -316,6 +337,7 @@ config IP_NF_TARGET_CLUSTERIP
 	tristate "CLUSTERIP target support (EXPERIMENTAL)"
 	depends on IP_NF_MANGLE && EXPERIMENTAL
 	depends on NF_CONNTRACK_IPV4
+	depends on NETFILTER_ADVANCED
 	select NF_CONNTRACK_MARK
 	help
 	  The CLUSTERIP target allows you to build load-balancing clusters of
@@ -328,6 +350,7 @@ config IP_NF_TARGET_CLUSTERIP
 config IP_NF_RAW
 	tristate  'raw table support (required for NOTRACK/TRACE)'
 	depends on IP_NF_IPTABLES
+	depends on NETFILTER_ADVANCED
 	help
 	  This option adds a `raw' table to iptables. This table is the very
 	  first in the netfilter framework and hooks in at the PREROUTING
@@ -340,6 +363,7 @@ config IP_NF_RAW
 config IP_NF_ARPTABLES
 	tristate "ARP tables support"
 	select NETFILTER_XTABLES
+	depends on NETFILTER_ADVANCED
 	help
 	  arptables is a general, extensible packet identification framework.
 	  The ARP packet filtering and mangling (manipulation)subsystems
diff --git a/net/ipv6/netfilter/Kconfig b/net/ipv6/netfilter/Kconfig
index 5374c665f8d..a6b4a9a1053 100644
--- a/net/ipv6/netfilter/Kconfig
+++ b/net/ipv6/netfilter/Kconfig
@@ -8,6 +8,7 @@ menu "IPv6: Netfilter Configuration (EXPERIMENTAL)"
 config NF_CONNTRACK_IPV6
 	tristate "IPv6 connection tracking support (EXPERIMENTAL)"
 	depends on INET && IPV6 && EXPERIMENTAL && NF_CONNTRACK
+	default m if NETFILTER_ADVANCED=n
 	---help---
 	  Connection tracking keeps a record of what packets have passed
 	  through your machine, in order to figure out how they are related
@@ -22,6 +23,7 @@ config NF_CONNTRACK_IPV6
 config IP6_NF_QUEUE
 	tristate "IP6 Userspace queueing via NETLINK (OBSOLETE)"
 	depends on INET && IPV6 && NETFILTER && EXPERIMENTAL
+	depends on NETFILTER_ADVANCED
 	---help---
 
 	  This option adds a queue handler to the kernel for IPv6
@@ -44,6 +46,7 @@ config IP6_NF_IPTABLES
 	tristate "IP6 tables support (required for filtering)"
 	depends on INET && IPV6 && EXPERIMENTAL
 	select NETFILTER_XTABLES
+	default m if NETFILTER_ADVANCED=n
 	help
 	  ip6tables is a general, extensible packet identification framework.
 	  Currently only the packet filtering and packet mangling subsystem
@@ -56,6 +59,7 @@ config IP6_NF_IPTABLES
 config IP6_NF_MATCH_RT
 	tristate '"rt" Routing header match support'
 	depends on IP6_NF_IPTABLES
+	depends on NETFILTER_ADVANCED
 	help
 	  rt matching allows you to match packets based on the routing
 	  header of the packet.
@@ -65,6 +69,7 @@ config IP6_NF_MATCH_RT
 config IP6_NF_MATCH_OPTS
 	tristate '"hopbyhop" and "dst" opts header match support'
 	depends on IP6_NF_IPTABLES
+	depends on NETFILTER_ADVANCED
 	help
 	  This allows one to match packets based on the hop-by-hop
 	  and destination options headers of a packet.
@@ -74,6 +79,7 @@ config IP6_NF_MATCH_OPTS
 config IP6_NF_MATCH_FRAG
 	tristate '"frag" Fragmentation header match support'
 	depends on IP6_NF_IPTABLES
+	depends on NETFILTER_ADVANCED
 	help
 	  frag matching allows you to match packets based on the fragmentation
 	  header of the packet.
@@ -83,6 +89,7 @@ config IP6_NF_MATCH_FRAG
 config IP6_NF_MATCH_HL
 	tristate '"hl" match support'
 	depends on IP6_NF_IPTABLES
+	depends on NETFILTER_ADVANCED
 	help
 	  HL matching allows you to match packets based on the hop
 	  limit of the packet.
@@ -92,6 +99,7 @@ config IP6_NF_MATCH_HL
 config IP6_NF_MATCH_IPV6HEADER
 	tristate '"ipv6header" IPv6 Extension Headers Match'
 	depends on IP6_NF_IPTABLES
+	depends on NETFILTER_ADVANCED
 	help
 	  This module allows one to match packets based upon
 	  the ipv6 extension headers.
@@ -101,6 +109,7 @@ config IP6_NF_MATCH_IPV6HEADER
 config IP6_NF_MATCH_AH
 	tristate '"ah" match support'
 	depends on IP6_NF_IPTABLES
+	depends on NETFILTER_ADVANCED
 	help
 	  This module allows one to match AH packets.
 
@@ -109,6 +118,7 @@ config IP6_NF_MATCH_AH
 config IP6_NF_MATCH_MH
 	tristate '"mh" match support'
 	depends on IP6_NF_IPTABLES
+	depends on NETFILTER_ADVANCED
 	help
 	  This module allows one to match MH packets.
 
@@ -117,6 +127,7 @@ config IP6_NF_MATCH_MH
 config IP6_NF_MATCH_EUI64
 	tristate '"eui64" address check'
 	depends on IP6_NF_IPTABLES
+	depends on NETFILTER_ADVANCED
 	help
 	  This module performs checking on the IPv6 source address
 	  Compares the last 64 bits with the EUI64 (delivered
@@ -128,6 +139,7 @@ config IP6_NF_MATCH_EUI64
 config IP6_NF_FILTER
 	tristate "Packet filtering"
 	depends on IP6_NF_IPTABLES
+	default m if NETFILTER_ADVANCED=n
 	help
 	  Packet filtering defines a table `filter', which has a series of
 	  rules for simple packet filtering at local input, forwarding and
@@ -138,6 +150,7 @@ config IP6_NF_FILTER
 config IP6_NF_TARGET_LOG
 	tristate "LOG target support"
 	depends on IP6_NF_FILTER
+	default m if NETFILTER_ADVANCED=n
 	help
 	  This option adds a `LOG' target, which allows you to create rules in
 	  any iptables table which records the packet header to the syslog.
@@ -147,6 +160,7 @@ config IP6_NF_TARGET_LOG
 config IP6_NF_TARGET_REJECT
 	tristate "REJECT target support"
 	depends on IP6_NF_FILTER
+	default m if NETFILTER_ADVANCED=n
 	help
 	  The REJECT target allows a filtering rule to specify that an ICMPv6
 	  error should be issued in response to an incoming packet, rather
@@ -157,6 +171,7 @@ config IP6_NF_TARGET_REJECT
 config IP6_NF_MANGLE
 	tristate "Packet mangling"
 	depends on IP6_NF_IPTABLES
+	default m if NETFILTER_ADVANCED=n
 	help
 	  This option adds a `mangle' table to iptables: see the man page for
 	  iptables(8).  This table is used for various packet alterations
@@ -167,27 +182,29 @@ config IP6_NF_MANGLE
 config IP6_NF_TARGET_HL
 	tristate  'HL (hoplimit) target support'
 	depends on IP6_NF_MANGLE
+	depends on NETFILTER_ADVANCED
 	help
 	  This option adds a `HL' target, which enables the user to decrement
 	  the hoplimit value of the IPv6 header or set it to a given (lower)
 	  value.
-	
+
 	  While it is safe to decrement the hoplimit value, this option also
 	  enables functionality to increment and set the hoplimit value of the
 	  IPv6 header to arbitrary values.  This is EXTREMELY DANGEROUS since
 	  you can easily create immortal packets that loop forever on the
-	  network.  
+	  network.
 
 	  To compile it as a module, choose M here.  If unsure, say N.
 
 config IP6_NF_RAW
 	tristate  'raw table support (required for TRACE)'
 	depends on IP6_NF_IPTABLES
+	depends on NETFILTER_ADVANCED
 	help
 	  This option adds a `raw' table to ip6tables. This table is the very
 	  first in the netfilter framework and hooks in at the PREROUTING
 	  and OUTPUT chains.
-	
+
 	  If you want to compile it as a module, say M here and read
 	  <file:Documentation/kbuild/modules.txt>.  If unsure, say `N'.
 
diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig
index bb61f83c7a7..96dbe9f56bc 100644
--- a/net/netfilter/Kconfig
+++ b/net/netfilter/Kconfig
@@ -6,6 +6,7 @@ config NETFILTER_NETLINK
 
 config NETFILTER_NETLINK_QUEUE
 	tristate "Netfilter NFQUEUE over NFNETLINK interface"
+	depends on NETFILTER_ADVANCED
 	select NETFILTER_NETLINK
 	help
 	  If this option is enabled, the kernel will include support
@@ -13,6 +14,7 @@ config NETFILTER_NETLINK_QUEUE
 	  
 config NETFILTER_NETLINK_LOG
 	tristate "Netfilter LOG over NFNETLINK interface"
+	default m if NETFILTER_ADVANCED=n
 	select NETFILTER_NETLINK
 	help
 	  If this option is enabled, the kernel will include support
@@ -24,6 +26,7 @@ config NETFILTER_NETLINK_LOG
 
 config NF_CONNTRACK
 	tristate "Netfilter connection tracking support"
+	default m if NETFILTER_ADVANCED=n
 	help
 	  Connection tracking keeps a record of what packets have passed
 	  through your machine, in order to figure out how they are related
@@ -38,6 +41,7 @@ config NF_CONNTRACK
 
 config NF_CT_ACCT
 	bool "Connection tracking flow accounting"
+	depends on NETFILTER_ADVANCED
 	depends on NF_CONNTRACK
 	help
 	  If this option is enabled, the connection tracking code will
@@ -50,6 +54,7 @@ config NF_CT_ACCT
 
 config NF_CONNTRACK_MARK
 	bool  'Connection mark tracking support'
+	depends on NETFILTER_ADVANCED
 	depends on NF_CONNTRACK
 	help
 	  This option enables support for connection marks, used by the
@@ -60,6 +65,7 @@ config NF_CONNTRACK_MARK
 config NF_CONNTRACK_SECMARK
 	bool  'Connection tracking security mark support'
 	depends on NF_CONNTRACK && NETWORK_SECMARK
+	default m if NETFILTER_ADVANCED=n
 	help
 	  This option enables security markings to be applied to
 	  connections.  Typically they are copied to connections from
@@ -72,6 +78,7 @@ config NF_CONNTRACK_SECMARK
 config NF_CONNTRACK_EVENTS
 	bool "Connection tracking events (EXPERIMENTAL)"
 	depends on EXPERIMENTAL && NF_CONNTRACK
+	depends on NETFILTER_ADVANCED
 	help
 	  If this option is enabled, the connection tracking code will
 	  provide a notifier chain that can be used by other kernel code
@@ -86,7 +93,7 @@ config NF_CT_PROTO_GRE
 config NF_CT_PROTO_SCTP
 	tristate 'SCTP protocol connection tracking support (EXPERIMENTAL)'
 	depends on EXPERIMENTAL && NF_CONNTRACK
-	default n
+	depends on NETFILTER_ADVANCED
 	help
 	  With this option enabled, the layer 3 independent connection
 	  tracking code will be able to do state tracking on SCTP connections.
@@ -97,6 +104,7 @@ config NF_CT_PROTO_SCTP
 config NF_CT_PROTO_UDPLITE
 	tristate 'UDP-Lite protocol connection tracking support (EXPERIMENTAL)'
 	depends on EXPERIMENTAL && NF_CONNTRACK
+	depends on NETFILTER_ADVANCED
 	help
 	  With this option enabled, the layer 3 independent connection
 	  tracking code will be able to do state tracking on UDP-Lite
@@ -107,6 +115,7 @@ config NF_CT_PROTO_UDPLITE
 config NF_CONNTRACK_AMANDA
 	tristate "Amanda backup protocol support"
 	depends on NF_CONNTRACK
+	depends on NETFILTER_ADVANCED
 	select TEXTSEARCH
 	select TEXTSEARCH_KMP
 	help
@@ -122,6 +131,7 @@ config NF_CONNTRACK_AMANDA
 config NF_CONNTRACK_FTP
 	tristate "FTP protocol support"
 	depends on NF_CONNTRACK
+	default m if NETFILTER_ADVANCED=n
 	help
 	  Tracking FTP connections is problematic: special helpers are
 	  required for tracking them, and doing masquerading and other forms
@@ -136,6 +146,7 @@ config NF_CONNTRACK_FTP
 config NF_CONNTRACK_H323
 	tristate "H.323 protocol support (EXPERIMENTAL)"
 	depends on EXPERIMENTAL && NF_CONNTRACK && (IPV6 || IPV6=n)
+	depends on NETFILTER_ADVANCED
 	help
 	  H.323 is a VoIP signalling protocol from ITU-T. As one of the most
 	  important VoIP protocols, it is widely used by voice hardware and
@@ -155,6 +166,7 @@ config NF_CONNTRACK_H323
 config NF_CONNTRACK_IRC
 	tristate "IRC protocol support"
 	depends on NF_CONNTRACK
+	default m if NETFILTER_ADVANCED=n
 	help
 	  There is a commonly-used extension to IRC called
 	  Direct Client-to-Client Protocol (DCC).  This enables users to send
@@ -170,6 +182,7 @@ config NF_CONNTRACK_IRC
 config NF_CONNTRACK_NETBIOS_NS
 	tristate "NetBIOS name service protocol support (EXPERIMENTAL)"
 	depends on EXPERIMENTAL && NF_CONNTRACK
+	depends on NETFILTER_ADVANCED
 	help
 	  NetBIOS name service requests are sent as broadcast messages from an
 	  unprivileged port and responded to with unicast messages to the
@@ -189,6 +202,7 @@ config NF_CONNTRACK_NETBIOS_NS
 config NF_CONNTRACK_PPTP
 	tristate "PPtP protocol support"
 	depends on NF_CONNTRACK
+	depends on NETFILTER_ADVANCED
 	select NF_CT_PROTO_GRE
 	help
 	  This module adds support for PPTP (Point to Point Tunnelling
@@ -208,6 +222,7 @@ config NF_CONNTRACK_PPTP
 config NF_CONNTRACK_SANE
 	tristate "SANE protocol support (EXPERIMENTAL)"
 	depends on EXPERIMENTAL && NF_CONNTRACK
+	depends on NETFILTER_ADVANCED
 	help
 	  SANE is a protocol for remote access to scanners as implemented
 	  by the 'saned' daemon. Like FTP, it uses separate control and
@@ -221,6 +236,7 @@ config NF_CONNTRACK_SANE
 config NF_CONNTRACK_SIP
 	tristate "SIP protocol support (EXPERIMENTAL)"
 	depends on EXPERIMENTAL && NF_CONNTRACK
+	default m if NETFILTER_ADVANCED=n
 	help
 	  SIP is an application-layer control protocol that can establish,
 	  modify, and terminate multimedia sessions (conferences) such as
@@ -233,6 +249,7 @@ config NF_CONNTRACK_SIP
 config NF_CONNTRACK_TFTP
 	tristate "TFTP protocol support"
 	depends on NF_CONNTRACK
+	depends on NETFILTER_ADVANCED
 	help
 	  TFTP connection tracking helper, this is required depending
 	  on how restrictive your ruleset is.
@@ -246,11 +263,13 @@ config NF_CT_NETLINK
 	depends on EXPERIMENTAL && NF_CONNTRACK
 	select NETFILTER_NETLINK
 	depends on NF_NAT=n || NF_NAT
+	default m if NETFILTER_ADVANCED=n
 	help
 	  This option enables support for a netlink-based userspace interface
 
 config NETFILTER_XTABLES
 	tristate "Netfilter Xtables support (required for ip_tables)"
+	default m if NETFILTER_ADVANCED=n
 	help
 	  This is required if you intend to use any of ip_tables,
 	  ip6_tables or arp_tables.
@@ -260,6 +279,7 @@ config NETFILTER_XTABLES
 config NETFILTER_XT_TARGET_CLASSIFY
 	tristate '"CLASSIFY" target support'
 	depends on NETFILTER_XTABLES
+	depends on NETFILTER_ADVANCED
 	help
 	  This option adds a `CLASSIFY' target, which enables the user to set
 	  the priority of a packet. Some qdiscs can use this value for
@@ -274,12 +294,13 @@ config NETFILTER_XT_TARGET_CONNMARK
 	depends on NETFILTER_XTABLES
 	depends on IP_NF_MANGLE || IP6_NF_MANGLE
 	depends on NF_CONNTRACK
+	depends on NETFILTER_ADVANCED
 	select NF_CONNTRACK_MARK
 	help
 	  This option adds a `CONNMARK' target, which allows one to manipulate
 	  the connection mark value.  Similar to the MARK target, but
 	  affects the connection mark value rather than the packet mark value.
-	
+
 	  If you want to compile it as a module, say M here and read
 	  <file:Documentation/kbuild/modules.txt>.  The module will be called
 	  ipt_CONNMARK.ko.  If unsure, say `N'.
@@ -288,6 +309,7 @@ config NETFILTER_XT_TARGET_DSCP
 	tristate '"DSCP" and "TOS" target support'
 	depends on NETFILTER_XTABLES
 	depends on IP_NF_MANGLE || IP6_NF_MANGLE
+	depends on NETFILTER_ADVANCED
 	help
 	  This option adds a `DSCP' target, which allows you to manipulate
 	  the IPv4/IPv6 header DSCP field (differentiated services codepoint).
@@ -303,6 +325,7 @@ config NETFILTER_XT_TARGET_DSCP
 config NETFILTER_XT_TARGET_MARK
 	tristate '"MARK" target support'
 	depends on NETFILTER_XTABLES
+	default m if NETFILTER_ADVANCED=n
 	help
 	  This option adds a `MARK' target, which allows you to create rules
 	  in the `mangle' table which alter the netfilter mark (nfmark) field
@@ -316,6 +339,7 @@ config NETFILTER_XT_TARGET_MARK
 config NETFILTER_XT_TARGET_NFQUEUE
 	tristate '"NFQUEUE" target Support'
 	depends on NETFILTER_XTABLES
+	depends on NETFILTER_ADVANCED
 	help
 	  This target replaced the old obsolete QUEUE target.
 
@@ -327,6 +351,7 @@ config NETFILTER_XT_TARGET_NFQUEUE
 config NETFILTER_XT_TARGET_NFLOG
 	tristate '"NFLOG" target support'
 	depends on NETFILTER_XTABLES
+	default m if NETFILTER_ADVANCED=n
 	help
 	  This option enables the NFLOG target, which allows to LOG
 	  messages through the netfilter logging API, which can use
@@ -340,12 +365,13 @@ config NETFILTER_XT_TARGET_NOTRACK
 	depends on NETFILTER_XTABLES
 	depends on IP_NF_RAW || IP6_NF_RAW
 	depends on NF_CONNTRACK
+	depends on NETFILTER_ADVANCED
 	help
 	  The NOTRACK target allows a select rule to specify
 	  which packets *not* to enter the conntrack/NAT
 	  subsystem with all the consequences (no ICMP error tracking,
 	  no protocol helpers for the selected packets).
-	
+
 	  If you want to compile it as a module, say M here and read
 	  <file:Documentation/kbuild/modules.txt>.  If unsure, say `N'.
 
@@ -363,6 +389,7 @@ config NETFILTER_XT_TARGET_TRACE
 	tristate  '"TRACE" target support'
 	depends on NETFILTER_XTABLES
 	depends on IP_NF_RAW || IP6_NF_RAW
+	depends on NETFILTER_ADVANCED
 	help
 	  The TRACE target allows you to mark packets so that the kernel
 	  will log every rule which match the packets as those traverse
@@ -374,6 +401,7 @@ config NETFILTER_XT_TARGET_TRACE
 config NETFILTER_XT_TARGET_SECMARK
 	tristate '"SECMARK" target support'
 	depends on NETFILTER_XTABLES && NETWORK_SECMARK
+	default m if NETFILTER_ADVANCED=n
 	help
 	  The SECMARK target allows security marking of network
 	  packets, for use with security subsystems.
@@ -383,6 +411,7 @@ config NETFILTER_XT_TARGET_SECMARK
 config NETFILTER_XT_TARGET_CONNSECMARK
 	tristate '"CONNSECMARK" target support'
 	depends on NETFILTER_XTABLES && NF_CONNTRACK && NF_CONNTRACK_SECMARK
+	default m if NETFILTER_ADVANCED=n
 	help
 	  The CONNSECMARK target copies security markings from packets
 	  to connections, and restores security markings from connections
@@ -394,6 +423,7 @@ config NETFILTER_XT_TARGET_CONNSECMARK
 config NETFILTER_XT_TARGET_TCPMSS
 	tristate '"TCPMSS" target support'
 	depends on NETFILTER_XTABLES && (IPV6 || IPV6=n)
+	default m if NETFILTER_ADVANCED=n
 	---help---
 	  This option adds a `TCPMSS' target, which allows you to alter the
 	  MSS value of TCP SYN packets, to control the maximum size for that
@@ -421,6 +451,7 @@ config NETFILTER_XT_TARGET_TCPOPTSTRIP
 	tristate '"TCPOPTSTRIP" target support (EXPERIMENTAL)'
 	depends on EXPERIMENTAL && NETFILTER_XTABLES
 	depends on IP_NF_MANGLE || IP6_NF_MANGLE
+	depends on NETFILTER_ADVANCED
 	help
 	  This option adds a "TCPOPTSTRIP" target, which allows you to strip
 	  TCP options from TCP packets.
@@ -428,6 +459,7 @@ config NETFILTER_XT_TARGET_TCPOPTSTRIP
 config NETFILTER_XT_MATCH_COMMENT
 	tristate  '"comment" match support'
 	depends on NETFILTER_XTABLES
+	depends on NETFILTER_ADVANCED
 	help
 	  This option adds a `comment' dummy-match, which allows you to put
 	  comments in your iptables ruleset.
@@ -439,6 +471,7 @@ config NETFILTER_XT_MATCH_CONNBYTES
 	tristate  '"connbytes" per-connection counter match support'
 	depends on NETFILTER_XTABLES
 	depends on NF_CONNTRACK
+	depends on NETFILTER_ADVANCED
 	select NF_CT_ACCT
 	help
 	  This option adds a `connbytes' match, which allows you to match the
@@ -451,6 +484,7 @@ config NETFILTER_XT_MATCH_CONNLIMIT
 	tristate '"connlimit" match support"'
 	depends on NETFILTER_XTABLES
 	depends on NF_CONNTRACK
+	depends on NETFILTER_ADVANCED
 	---help---
 	  This match allows you to match against the number of parallel
 	  connections to a server per client IP address (or address block).
@@ -459,11 +493,12 @@ config NETFILTER_XT_MATCH_CONNMARK
 	tristate  '"connmark" connection mark match support'
 	depends on NETFILTER_XTABLES
 	depends on NF_CONNTRACK
+	depends on NETFILTER_ADVANCED
 	select NF_CONNTRACK_MARK
 	help
 	  This option adds a `connmark' match, which allows you to match the
 	  connection mark value previously set for the session by `CONNMARK'. 
-	
+
 	  If you want to compile it as a module, say M here and read
 	  <file:Documentation/kbuild/modules.txt>.  The module will be called
 	  ipt_connmark.ko.  If unsure, say `N'.
@@ -472,6 +507,7 @@ config NETFILTER_XT_MATCH_CONNTRACK
 	tristate '"conntrack" connection tracking match support'
 	depends on NETFILTER_XTABLES
 	depends on NF_CONNTRACK
+	default m if NETFILTER_ADVANCED=n
 	help
 	  This is a general conntrack match module, a superset of the state match.
 
@@ -484,6 +520,7 @@ config NETFILTER_XT_MATCH_CONNTRACK
 config NETFILTER_XT_MATCH_DCCP
 	tristate '"dccp" protocol match support'
 	depends on NETFILTER_XTABLES
+	depends on NETFILTER_ADVANCED
 	help
 	  With this option enabled, you will be able to use the iptables
 	  `dccp' match in order to match on DCCP source/destination ports
@@ -495,6 +532,7 @@ config NETFILTER_XT_MATCH_DCCP
 config NETFILTER_XT_MATCH_DSCP
 	tristate '"dscp" and "tos" match support'
 	depends on NETFILTER_XTABLES
+	depends on NETFILTER_ADVANCED
 	help
 	  This option adds a `DSCP' match, which allows you to match against
 	  the IPv4/IPv6 header DSCP field (differentiated services codepoint).
@@ -510,6 +548,7 @@ config NETFILTER_XT_MATCH_DSCP
 config NETFILTER_XT_MATCH_ESP
 	tristate '"esp" match support'
 	depends on NETFILTER_XTABLES
+	depends on NETFILTER_ADVANCED
 	help
 	  This match extension allows you to match a range of SPIs
 	  inside ESP header of IPSec packets.
@@ -520,6 +559,7 @@ config NETFILTER_XT_MATCH_HELPER
 	tristate '"helper" match support'
 	depends on NETFILTER_XTABLES
 	depends on NF_CONNTRACK
+	depends on NETFILTER_ADVANCED
 	help
 	  Helper matching allows you to match packets in dynamic connections
 	  tracked by a conntrack-helper, ie. ip_conntrack_ftp
@@ -529,6 +569,7 @@ config NETFILTER_XT_MATCH_HELPER
 config NETFILTER_XT_MATCH_LENGTH
 	tristate '"length" match support'
 	depends on NETFILTER_XTABLES
+	depends on NETFILTER_ADVANCED
 	help
 	  This option allows you to match the length of a packet against a
 	  specific value or range of values.
@@ -538,6 +579,7 @@ config NETFILTER_XT_MATCH_LENGTH
 config NETFILTER_XT_MATCH_LIMIT
 	tristate '"limit" match support'
 	depends on NETFILTER_XTABLES
+	depends on NETFILTER_ADVANCED
 	help
 	  limit matching allows you to control the rate at which a rule can be
 	  matched: mainly useful in combination with the LOG target ("LOG
@@ -548,6 +590,7 @@ config NETFILTER_XT_MATCH_LIMIT
 config NETFILTER_XT_MATCH_MAC
 	tristate '"mac" address match support'
 	depends on NETFILTER_XTABLES
+	depends on NETFILTER_ADVANCED
 	help
 	  MAC matching allows you to match packets based on the source
 	  Ethernet address of the packet.
@@ -557,6 +600,7 @@ config NETFILTER_XT_MATCH_MAC
 config NETFILTER_XT_MATCH_MARK
 	tristate '"mark" match support'
 	depends on NETFILTER_XTABLES
+	default m if NETFILTER_ADVANCED=n
 	help
 	  Netfilter mark matching allows you to match packets based on the
 	  `nfmark' value in the packet.  This can be set by the MARK target
@@ -567,6 +611,7 @@ config NETFILTER_XT_MATCH_MARK
 config NETFILTER_XT_MATCH_OWNER
 	tristate '"owner" match support'
 	depends on NETFILTER_XTABLES
+	depends on NETFILTER_ADVANCED
 	---help---
 	Socket owner matching allows you to match locally-generated packets
 	based on who created the socket: the user or group. It is also
@@ -575,6 +620,7 @@ config NETFILTER_XT_MATCH_OWNER
 config NETFILTER_XT_MATCH_POLICY
 	tristate 'IPsec "policy" match support'
 	depends on NETFILTER_XTABLES && XFRM
+	default m if NETFILTER_ADVANCED=n
 	help
 	  Policy matching allows you to match packets based on the
 	  IPsec policy that was used during decapsulation/will
@@ -585,6 +631,7 @@ config NETFILTER_XT_MATCH_POLICY
 config NETFILTER_XT_MATCH_MULTIPORT
 	tristate '"multiport" Multiple port match support'
 	depends on NETFILTER_XTABLES
+	depends on NETFILTER_ADVANCED
 	help
 	  Multiport matching allows you to match TCP or UDP packets based on
 	  a series of source or destination ports: normally a rule can only
@@ -595,6 +642,7 @@ config NETFILTER_XT_MATCH_MULTIPORT
 config NETFILTER_XT_MATCH_PHYSDEV
 	tristate '"physdev" match support'
 	depends on NETFILTER_XTABLES && BRIDGE && BRIDGE_NETFILTER
+	depends on NETFILTER_ADVANCED
 	help
 	  Physdev packet matching matches against the physical bridge ports
 	  the IP packet arrived on or will leave by.
@@ -604,6 +652,7 @@ config NETFILTER_XT_MATCH_PHYSDEV
 config NETFILTER_XT_MATCH_PKTTYPE
 	tristate '"pkttype" packet type match support'
 	depends on NETFILTER_XTABLES
+	depends on NETFILTER_ADVANCED
 	help
 	  Packet type matching allows you to match a packet by
 	  its "class", eg. BROADCAST, MULTICAST, ...
@@ -616,6 +665,7 @@ config NETFILTER_XT_MATCH_PKTTYPE
 config NETFILTER_XT_MATCH_QUOTA
 	tristate '"quota" match support'
 	depends on NETFILTER_XTABLES
+	depends on NETFILTER_ADVANCED
 	help
 	  This option adds a `quota' match, which allows to match on a
 	  byte counter.
@@ -636,20 +686,22 @@ config NETFILTER_XT_MATCH_RATEEST
 config NETFILTER_XT_MATCH_REALM
 	tristate  '"realm" match support'
 	depends on NETFILTER_XTABLES
+	depends on NETFILTER_ADVANCED
 	select NET_CLS_ROUTE
 	help
 	  This option adds a `realm' match, which allows you to use the realm
 	  key from the routing subsystem inside iptables.
-	
+
 	  This match pretty much resembles the CONFIG_NET_CLS_ROUTE4 option 
 	  in tc world.
-	
+
 	  If you want to compile it as a module, say M here and read
 	  <file:Documentation/kbuild/modules.txt>.  If unsure, say `N'.
 
 config NETFILTER_XT_MATCH_SCTP
 	tristate  '"sctp" protocol match support (EXPERIMENTAL)'
 	depends on NETFILTER_XTABLES && EXPERIMENTAL
+	depends on NETFILTER_ADVANCED
 	help
 	  With this option enabled, you will be able to use the 
 	  `sctp' match in order to match on SCTP source/destination ports
@@ -662,6 +714,7 @@ config NETFILTER_XT_MATCH_STATE
 	tristate '"state" match support'
 	depends on NETFILTER_XTABLES
 	depends on NF_CONNTRACK
+	default m if NETFILTER_ADVANCED=n
 	help
 	  Connection state matching allows you to match packets based on their
 	  relationship to a tracked connection (ie. previous packets).  This
@@ -672,6 +725,7 @@ config NETFILTER_XT_MATCH_STATE
 config NETFILTER_XT_MATCH_STATISTIC
 	tristate '"statistic" match support'
 	depends on NETFILTER_XTABLES
+	depends on NETFILTER_ADVANCED
 	help
 	  This option adds a `statistic' match, which allows you to match
 	  on packets periodically or randomly with a given percentage.
@@ -681,6 +735,7 @@ config NETFILTER_XT_MATCH_STATISTIC
 config NETFILTER_XT_MATCH_STRING
 	tristate  '"string" match support'
 	depends on NETFILTER_XTABLES
+	depends on NETFILTER_ADVANCED
 	select TEXTSEARCH
 	select TEXTSEARCH_KMP
 	select TEXTSEARCH_BM
@@ -694,6 +749,7 @@ config NETFILTER_XT_MATCH_STRING
 config NETFILTER_XT_MATCH_TCPMSS
 	tristate '"tcpmss" match support'
 	depends on NETFILTER_XTABLES
+	depends on NETFILTER_ADVANCED
 	help
 	  This option adds a `tcpmss' match, which allows you to examine the
 	  MSS value of TCP SYN packets, which control the maximum packet size
@@ -704,6 +760,7 @@ config NETFILTER_XT_MATCH_TCPMSS
 config NETFILTER_XT_MATCH_TIME
 	tristate '"time" match support'
 	depends on NETFILTER_XTABLES
+	depends on NETFILTER_ADVANCED
 	---help---
 	  This option adds a "time" match, which allows you to match based on
 	  the packet arrival time (at the machine which netfilter is running)
@@ -718,6 +775,7 @@ config NETFILTER_XT_MATCH_TIME
 config NETFILTER_XT_MATCH_U32
 	tristate '"u32" match support'
 	depends on NETFILTER_XTABLES
+	depends on NETFILTER_ADVANCED
 	---help---
 	  u32 allows you to extract quantities of up to 4 bytes from a packet,
 	  AND them with specified masks, shift them by specified amounts and
@@ -731,6 +789,7 @@ config NETFILTER_XT_MATCH_U32
 config NETFILTER_XT_MATCH_HASHLIMIT
 	tristate '"hashlimit" match support'
 	depends on NETFILTER_XTABLES && (IP6_NF_IPTABLES || IP6_NF_IPTABLES=n)
+	depends on NETFILTER_ADVANCED
 	help
 	  This option adds a `hashlimit' match.
 
-- 
cgit v1.2.3


From 910ef70aa301eb018255683499b8e51426c213a0 Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Tue, 18 Dec 2007 22:14:25 -0800
Subject: [IPSEC]: Do xfrm_state_check_space before encapsulation

While merging the IPsec output path I moved the encapsulation output
operation to the top of the loop so that it sits outside of the locked
section.  Unfortunately in doing so it now sits in front of the space
check as well which could be a fatal error.

This patch rearranges the calls so that the space check happens as
the thing on the output path.

This patch also fixes an incorrect goto should the encapsulation output
fail.

Thanks to Kazunori MIYAZAWA for finding this bug.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/xfrm/xfrm_output.c | 18 ++++++------------
 1 file changed, 6 insertions(+), 12 deletions(-)

(limited to 'net')

diff --git a/net/xfrm/xfrm_output.c b/net/xfrm/xfrm_output.c
index 3c277a4d0e7..26fa0cb78c9 100644
--- a/net/xfrm/xfrm_output.c
+++ b/net/xfrm/xfrm_output.c
@@ -33,16 +33,6 @@ static int xfrm_state_check_space(struct xfrm_state *x, struct sk_buff *skb)
 	return 0;
 }
 
-static int xfrm_state_check(struct xfrm_state *x, struct sk_buff *skb)
-{
-	int err = xfrm_state_check_expire(x);
-	if (err < 0)
-		goto err;
-	err = xfrm_state_check_space(x, skb);
-err:
-	return err;
-}
-
 static int xfrm_output_one(struct sk_buff *skb, int err)
 {
 	struct dst_entry *dst = skb->dst;
@@ -52,12 +42,16 @@ static int xfrm_output_one(struct sk_buff *skb, int err)
 		goto resume;
 
 	do {
+		err = xfrm_state_check_space(x, skb);
+		if (err)
+			goto error_nolock;
+
 		err = x->outer_mode->output(x, skb);
 		if (err)
-			goto error;
+			goto error_nolock;
 
 		spin_lock_bh(&x->lock);
-		err = xfrm_state_check(x, skb);
+		err = xfrm_state_check_expire(x);
 		if (err)
 			goto error;
 
-- 
cgit v1.2.3


From 98f0b0a3a412eade153c7cf00c6b863600980d89 Mon Sep 17 00:00:00 2001
From: Ron Rindjunsky <ron.rindjunsky@intel.com>
Date: Tue, 18 Dec 2007 17:23:53 +0200
Subject: mac80211: pass in PS_POLL frames

This patch fixes should_drop_frame function to pass in ps poll control
frames required for power save functioanlity. Interface types that do not
have interest for PS POLL frames now drop it in handler.

Signed-off-by: Ron Rindjunsky <ron.rindjunsky@intel.com>
Acked-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/mac80211/rx.c   | 11 +++++++++--
 net/mac80211/util.c |  7 ++++++-
 2 files changed, 15 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index 9cd59ecbcd6..e65da5780cd 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -61,8 +61,10 @@ static inline int should_drop_frame(struct ieee80211_rx_status *status,
 		return 1;
 	if (unlikely(skb->len < 16 + present_fcs_len + radiotap_len))
 		return 1;
-	if ((hdr->frame_control & cpu_to_le16(IEEE80211_FCTL_FTYPE)) ==
-			cpu_to_le16(IEEE80211_FTYPE_CTL))
+	if (((hdr->frame_control & cpu_to_le16(IEEE80211_FCTL_FTYPE)) ==
+			cpu_to_le16(IEEE80211_FTYPE_CTL)) &&
+	    ((hdr->frame_control & cpu_to_le16(IEEE80211_FCTL_STYPE)) !=
+			cpu_to_le16(IEEE80211_STYPE_PSPOLL)))
 		return 1;
 	return 0;
 }
@@ -896,6 +898,7 @@ ieee80211_rx_h_defragment(struct ieee80211_txrx_data *rx)
 static ieee80211_txrx_result
 ieee80211_rx_h_ps_poll(struct ieee80211_txrx_data *rx)
 {
+	struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(rx->dev);
 	struct sk_buff *skb;
 	int no_pending_pkts;
 	DECLARE_MAC_BUF(mac);
@@ -906,6 +909,10 @@ ieee80211_rx_h_ps_poll(struct ieee80211_txrx_data *rx)
 		   !(rx->flags & IEEE80211_TXRXD_RXRA_MATCH)))
 		return TXRX_CONTINUE;
 
+	if ((sdata->type != IEEE80211_IF_TYPE_AP) &&
+	    (sdata->type != IEEE80211_IF_TYPE_VLAN))
+		return TXRX_DROP;
+
 	skb = skb_dequeue(&rx->sta->tx_filtered);
 	if (!skb) {
 		skb = skb_dequeue(&rx->sta->ps_tx_buf);
diff --git a/net/mac80211/util.c b/net/mac80211/util.c
index 7b278e9aa1a..fb7fd896cd0 100644
--- a/net/mac80211/util.c
+++ b/net/mac80211/util.c
@@ -135,13 +135,16 @@ u8 *ieee80211_get_bssid(struct ieee80211_hdr *hdr, size_t len)
 {
 	u16 fc;
 
-	if (len < 24)
+	 /* drop ACK/CTS frames and incorrect hdr len (ctrl) */
+	if (len < 16)
 		return NULL;
 
 	fc = le16_to_cpu(hdr->frame_control);
 
 	switch (fc & IEEE80211_FCTL_FTYPE) {
 	case IEEE80211_FTYPE_DATA:
+		if (len < 24) /* drop incorrect hdr len (data) */
+			return NULL;
 		switch (fc & (IEEE80211_FCTL_TODS | IEEE80211_FCTL_FROMDS)) {
 		case IEEE80211_FCTL_TODS:
 			return hdr->addr1;
@@ -154,6 +157,8 @@ u8 *ieee80211_get_bssid(struct ieee80211_hdr *hdr, size_t len)
 		}
 		break;
 	case IEEE80211_FTYPE_MGMT:
+		if (len < 24) /* drop incorrect hdr len (mgmt) */
+			return NULL;
 		return hdr->addr3;
 	case IEEE80211_FTYPE_CTL:
 		if ((fc & IEEE80211_FCTL_STYPE) == IEEE80211_STYPE_PSPOLL)
-- 
cgit v1.2.3


From 1abbe498e4b5e4f2000dfc30a0fa25be9553530e Mon Sep 17 00:00:00 2001
From: Mattias Nissler <mattias.nissler@gmx.de>
Date: Thu, 20 Dec 2007 13:50:07 +0100
Subject: mac80211: clean up rate selection

Move some code out of rc80211_simple since it's probably needed for all rate
selection algorithms, and fix iwlwifi accordingly. While at it, clean up the
rate_control_get_rate() interface.

Signed-off-by: Stefano Brivio <stefano.brivio@polimi.it>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/mac80211/ieee80211.c      |  6 ++--
 net/mac80211/ieee80211_rate.c | 47 ++++++++++++++++++++++++++++
 net/mac80211/ieee80211_rate.h | 73 +++++++++++++++++++++++++++----------------
 net/mac80211/ieee80211_sta.c  | 13 +++-----
 net/mac80211/rc80211_simple.c | 64 +++++++------------------------------
 net/mac80211/tx.c             | 42 +++++++++++--------------
 6 files changed, 129 insertions(+), 116 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/ieee80211.c b/net/mac80211/ieee80211.c
index d6a97a68a62..5bf7a5bebfc 100644
--- a/net/mac80211/ieee80211.c
+++ b/net/mac80211/ieee80211.c
@@ -859,10 +859,8 @@ void ieee80211_tx_status(struct ieee80211_hw *hw, struct sk_buff *skb,
 			sta_info_put(sta);
 			return;
 		}
-	} else {
-		/* FIXME: STUPID to call this with both local and local->mdev */
-		rate_control_tx_status(local, local->mdev, skb, status);
-	}
+	} else
+		rate_control_tx_status(local->mdev, skb, status);
 
 	ieee80211_led_tx(local, 0);
 
diff --git a/net/mac80211/ieee80211_rate.c b/net/mac80211/ieee80211_rate.c
index c3f27839374..e495b0998b4 100644
--- a/net/mac80211/ieee80211_rate.c
+++ b/net/mac80211/ieee80211_rate.c
@@ -147,6 +147,53 @@ static void rate_control_release(struct kref *kref)
 	kfree(ctrl_ref);
 }
 
+void rate_control_get_rate(struct net_device *dev,
+			   struct ieee80211_hw_mode *mode, struct sk_buff *skb,
+			   struct rate_selection *sel)
+{
+	struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr);
+	struct rate_control_ref *ref = local->rate_ctrl;
+	struct ieee80211_sub_if_data *sdata;
+	struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data;
+	struct sta_info *sta = sta_info_get(local, hdr->addr1);
+	int i;
+	u16 fc;
+
+	memset(sel, 0, sizeof(struct rate_selection));
+
+	/* Send management frames and broadcast/multicast data using lowest
+	 * rate. */
+	fc = le16_to_cpu(hdr->frame_control);
+	if ((fc & IEEE80211_FCTL_FTYPE) != IEEE80211_FTYPE_DATA ||
+	    is_multicast_ether_addr(hdr->addr1))
+		sel->rate = rate_lowest(local, mode, sta);
+
+	/* If a forced rate is in effect, select it. */
+	sdata = IEEE80211_DEV_TO_SUB_IF(dev);
+	if (sdata->bss && sdata->bss->force_unicast_rateidx > -1)
+		sel->rate = &mode->rates[sdata->bss->force_unicast_rateidx];
+
+	/* If we haven't found the rate yet, ask the rate control algo. */
+	if (!sel->rate)
+		ref->ops->get_rate(ref->priv, dev, mode, skb, sel);
+
+	/* Select a non-ERP backup rate. */
+	if (!sel->nonerp) {
+		for (i = 0; i < mode->num_rates - 1; i++) {
+			struct ieee80211_rate *rate = &mode->rates[i];
+			if (sel->rate->rate < rate->rate)
+				break;
+
+			if (rate_supported(sta, mode, i) &&
+			    !(rate->flags & IEEE80211_RATE_ERP))
+				sel->nonerp = rate;
+		}
+	}
+
+	if (sta)
+		sta_info_put(sta);
+}
+
 struct rate_control_ref *rate_control_get(struct rate_control_ref *ref)
 {
 	kref_get(&ref->kref);
diff --git a/net/mac80211/ieee80211_rate.h b/net/mac80211/ieee80211_rate.h
index 23688139ffb..787134bd6ab 100644
--- a/net/mac80211/ieee80211_rate.h
+++ b/net/mac80211/ieee80211_rate.h
@@ -18,31 +18,24 @@
 #include "ieee80211_i.h"
 #include "sta_info.h"
 
-#define RATE_CONTROL_NUM_DOWN 20
-#define RATE_CONTROL_NUM_UP   15
-
-
-struct rate_control_extra {
-	/* values from rate_control_get_rate() to the caller: */
-	struct ieee80211_rate *probe; /* probe with this rate, or NULL for no
-				       * probing */
+struct rate_selection {
+	/* Selected transmission rate */
+	struct ieee80211_rate *rate;
+	/* Non-ERP rate to use if mac80211 decides it cannot use an ERP rate */
 	struct ieee80211_rate *nonerp;
-
-	/* parameters from the caller to rate_control_get_rate(): */
-	struct ieee80211_hw_mode *mode;
-	u16 ethertype;
+	/* probe with this rate, or NULL for no probing */
+	struct ieee80211_rate *probe;
 };
 
-
 struct rate_control_ops {
 	struct module *module;
 	const char *name;
 	void (*tx_status)(void *priv, struct net_device *dev,
 			  struct sk_buff *skb,
 			  struct ieee80211_tx_status *status);
-	struct ieee80211_rate *(*get_rate)(void *priv, struct net_device *dev,
-					   struct sk_buff *skb,
-					   struct rate_control_extra *extra);
+	void (*get_rate)(void *priv, struct net_device *dev,
+			 struct ieee80211_hw_mode *mode, struct sk_buff *skb,
+			 struct rate_selection *sel);
 	void (*rate_init)(void *priv, void *priv_sta,
 			  struct ieee80211_local *local, struct sta_info *sta);
 	void (*clear)(void *priv);
@@ -75,25 +68,20 @@ void ieee80211_rate_control_unregister(struct rate_control_ops *ops);
  * first available algorithm. */
 struct rate_control_ref *rate_control_alloc(const char *name,
 					    struct ieee80211_local *local);
+void rate_control_get_rate(struct net_device *dev,
+			   struct ieee80211_hw_mode *mode, struct sk_buff *skb,
+			   struct rate_selection *sel);
 struct rate_control_ref *rate_control_get(struct rate_control_ref *ref);
 void rate_control_put(struct rate_control_ref *ref);
 
-static inline void rate_control_tx_status(struct ieee80211_local *local,
-					  struct net_device *dev,
+static inline void rate_control_tx_status(struct net_device *dev,
 					  struct sk_buff *skb,
 					  struct ieee80211_tx_status *status)
 {
+	struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr);
 	struct rate_control_ref *ref = local->rate_ctrl;
-	ref->ops->tx_status(ref->priv, dev, skb, status);
-}
 
-
-static inline struct ieee80211_rate *
-rate_control_get_rate(struct ieee80211_local *local, struct net_device *dev,
-		      struct sk_buff *skb, struct rate_control_extra *extra)
-{
-	struct rate_control_ref *ref = local->rate_ctrl;
-	return ref->ops->get_rate(ref->priv, dev, skb, extra);
+	ref->ops->tx_status(ref->priv, dev, skb, status);
 }
 
 
@@ -142,6 +130,37 @@ static inline void rate_control_remove_sta_debugfs(struct sta_info *sta)
 #endif
 }
 
+static inline int
+rate_supported(struct sta_info *sta, struct ieee80211_hw_mode *mode, int index)
+{
+	return (sta == NULL || sta->supp_rates & BIT(index)) &&
+	       (mode->rates[index].flags & IEEE80211_RATE_SUPPORTED);
+}
+
+static inline int
+rate_lowest_index(struct ieee80211_local *local, struct ieee80211_hw_mode *mode,
+		  struct sta_info *sta)
+{
+	int i;
+
+	for (i = 0; i < mode->num_rates; i++) {
+		if (rate_supported(sta, mode, i))
+			return i;
+	}
+
+	/* warn when we cannot find a rate. */
+	WARN_ON(1);
+
+	return 0;
+}
+
+static inline struct ieee80211_rate *
+rate_lowest(struct ieee80211_local *local, struct ieee80211_hw_mode *mode,
+	    struct sta_info *sta)
+{
+	return &mode->rates[rate_lowest_index(local, mode, sta)];
+}
+
 
 /* functions for rate control related to a device */
 int ieee80211_init_rate_ctrl_alg(struct ieee80211_local *local,
diff --git a/net/mac80211/ieee80211_sta.c b/net/mac80211/ieee80211_sta.c
index 3c552fed2af..3978ad606b5 100644
--- a/net/mac80211/ieee80211_sta.c
+++ b/net/mac80211/ieee80211_sta.c
@@ -2463,9 +2463,8 @@ static int ieee80211_sta_join_ibss(struct net_device *dev,
 	struct sk_buff *skb;
 	struct ieee80211_mgmt *mgmt;
 	struct ieee80211_tx_control control;
-	struct ieee80211_rate *rate;
 	struct ieee80211_hw_mode *mode;
-	struct rate_control_extra extra;
+	struct rate_selection ratesel;
 	u8 *pos;
 	struct ieee80211_sub_if_data *sdata;
 
@@ -2550,18 +2549,16 @@ static int ieee80211_sta_join_ibss(struct net_device *dev,
 		}
 
 		memset(&control, 0, sizeof(control));
-		memset(&extra, 0, sizeof(extra));
-		extra.mode = local->oper_hw_mode;
-		rate = rate_control_get_rate(local, dev, skb, &extra);
-		if (!rate) {
+		rate_control_get_rate(dev, local->oper_hw_mode, skb, &ratesel);
+		if (!ratesel.rate) {
 			printk(KERN_DEBUG "%s: Failed to determine TX rate "
 			       "for IBSS beacon\n", dev->name);
 			break;
 		}
 		control.tx_rate =
 			((sdata->flags & IEEE80211_SDATA_SHORT_PREAMBLE) &&
-			(rate->flags & IEEE80211_RATE_PREAMBLE2)) ?
-			rate->val2 : rate->val;
+			(ratesel.rate->flags & IEEE80211_RATE_PREAMBLE2)) ?
+			ratesel.rate->val2 : ratesel.rate->val;
 		control.antenna_sel_tx = local->hw.conf.antenna_sel_tx;
 		control.power_level = local->hw.conf.power_level;
 		control.flags |= IEEE80211_TXCTL_NO_ACK;
diff --git a/net/mac80211/rc80211_simple.c b/net/mac80211/rc80211_simple.c
index da72737364e..c1c8b76a56a 100644
--- a/net/mac80211/rc80211_simple.c
+++ b/net/mac80211/rc80211_simple.c
@@ -23,6 +23,8 @@
 /* This is a minimal implementation of TX rate controlling that can be used
  * as the default when no improved mechanisms are available. */
 
+#define RATE_CONTROL_NUM_DOWN 20
+#define RATE_CONTROL_NUM_UP   15
 
 #define RATE_CONTROL_EMERG_DEC 2
 #define RATE_CONTROL_INTERVAL (HZ / 20)
@@ -87,26 +89,6 @@ static void rate_control_rate_dec(struct ieee80211_local *local,
 	}
 }
 
-
-static struct ieee80211_rate *
-rate_control_lowest_rate(struct ieee80211_local *local,
-			 struct ieee80211_hw_mode *mode)
-{
-	int i;
-
-	for (i = 0; i < mode->num_rates; i++) {
-		struct ieee80211_rate *rate = &mode->rates[i];
-
-		if (rate->flags & IEEE80211_RATE_SUPPORTED)
-			return rate;
-	}
-
-	printk(KERN_DEBUG "rate_control_lowest_rate - no supported rates "
-	       "found\n");
-	return &mode->rates[0];
-}
-
-
 struct global_rate_control {
 	int dummy;
 };
@@ -216,56 +198,32 @@ static void rate_control_simple_tx_status(void *priv, struct net_device *dev,
 }
 
 
-static struct ieee80211_rate *
+static void
 rate_control_simple_get_rate(void *priv, struct net_device *dev,
+			     struct ieee80211_hw_mode *mode,
 			     struct sk_buff *skb,
-			     struct rate_control_extra *extra)
+			     struct rate_selection *sel)
 {
 	struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr);
-	struct ieee80211_sub_if_data *sdata;
 	struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data;
-	struct ieee80211_hw_mode *mode = extra->mode;
 	struct sta_info *sta;
-	int rateidx, nonerp_idx;
-	u16 fc;
-
-	memset(extra, 0, sizeof(*extra));
-
-	fc = le16_to_cpu(hdr->frame_control);
-	if ((fc & IEEE80211_FCTL_FTYPE) != IEEE80211_FTYPE_DATA ||
-	    (hdr->addr1[0] & 0x01)) {
-		/* Send management frames and broadcast/multicast data using
-		 * lowest rate. */
-		/* TODO: this could probably be improved.. */
-		return rate_control_lowest_rate(local, mode);
-	}
+	int rateidx;
 
 	sta = sta_info_get(local, hdr->addr1);
 
-	if (!sta)
-		return rate_control_lowest_rate(local, mode);
-
-	sdata = IEEE80211_DEV_TO_SUB_IF(dev);
-	if (sdata->bss && sdata->bss->force_unicast_rateidx > -1)
-		sta->txrate = sdata->bss->force_unicast_rateidx;
+	if (!sta) {
+		sel->rate = rate_lowest(local, mode, NULL);
+		return;
+	}
 
 	rateidx = sta->txrate;
 
 	if (rateidx >= mode->num_rates)
 		rateidx = mode->num_rates - 1;
 
-	sta->last_txrate = rateidx;
-	nonerp_idx = rateidx;
-	while (nonerp_idx > 0 &&
-	       ((mode->rates[nonerp_idx].flags & IEEE80211_RATE_ERP) ||
-		!(mode->rates[nonerp_idx].flags & IEEE80211_RATE_SUPPORTED) ||
-		!(sta->supp_rates & BIT(nonerp_idx))))
-		nonerp_idx--;
-	extra->nonerp = &mode->rates[nonerp_idx];
-
 	sta_info_put(sta);
 
-	return &mode->rates[rateidx];
+	sel->rate = &mode->rates[rateidx];
 }
 
 
diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index 12c15588af6..4655e303865 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -569,21 +569,17 @@ ieee80211_tx_h_encrypt(struct ieee80211_txrx_data *tx)
 static ieee80211_txrx_result
 ieee80211_tx_h_rate_ctrl(struct ieee80211_txrx_data *tx)
 {
-	struct rate_control_extra extra;
+	struct rate_selection rsel;
 
 	if (likely(!tx->u.tx.rate)) {
-		memset(&extra, 0, sizeof(extra));
-		extra.mode = tx->u.tx.mode;
-		extra.ethertype = tx->ethertype;
-
-		tx->u.tx.rate = rate_control_get_rate(tx->local, tx->dev,
-						      tx->skb, &extra);
-		if (unlikely(extra.probe != NULL)) {
+		rate_control_get_rate(tx->dev, tx->u.tx.mode, tx->skb, &rsel);
+		tx->u.tx.rate = rsel.rate;
+		if (unlikely(rsel.probe != NULL)) {
 			tx->u.tx.control->flags |=
 				IEEE80211_TXCTL_RATE_CTRL_PROBE;
 			tx->flags |= IEEE80211_TXRXD_TXPROBE_LAST_FRAG;
 			tx->u.tx.control->alt_retry_rate = tx->u.tx.rate->val;
-			tx->u.tx.rate = extra.probe;
+			tx->u.tx.rate = rsel.probe;
 		} else
 			tx->u.tx.control->alt_retry_rate = -1;
 
@@ -594,14 +590,14 @@ ieee80211_tx_h_rate_ctrl(struct ieee80211_txrx_data *tx)
 
 	if (tx->u.tx.mode->mode == MODE_IEEE80211G &&
 	    (tx->sdata->flags & IEEE80211_SDATA_USE_PROTECTION) &&
-	    (tx->flags & IEEE80211_TXRXD_FRAGMENTED) && extra.nonerp) {
+	    (tx->flags & IEEE80211_TXRXD_FRAGMENTED) && rsel.nonerp) {
 		tx->u.tx.last_frag_rate = tx->u.tx.rate;
-		if (extra.probe)
+		if (rsel.probe)
 			tx->flags &= ~IEEE80211_TXRXD_TXPROBE_LAST_FRAG;
 		else
 			tx->flags |= IEEE80211_TXRXD_TXPROBE_LAST_FRAG;
-		tx->u.tx.rate = extra.nonerp;
-		tx->u.tx.control->rate = extra.nonerp;
+		tx->u.tx.rate = rsel.nonerp;
+		tx->u.tx.control->rate = rsel.nonerp;
 		tx->u.tx.control->flags &= ~IEEE80211_TXCTL_RATE_CTRL_PROBE;
 	} else {
 		tx->u.tx.last_frag_rate = tx->u.tx.rate;
@@ -1667,8 +1663,7 @@ struct sk_buff *ieee80211_beacon_get(struct ieee80211_hw *hw, int if_id,
 	struct net_device *bdev;
 	struct ieee80211_sub_if_data *sdata = NULL;
 	struct ieee80211_if_ap *ap = NULL;
-	struct ieee80211_rate *rate;
-	struct rate_control_extra extra;
+	struct rate_selection rsel;
 	u8 *b_head, *b_tail;
 	int bh_len, bt_len;
 
@@ -1712,14 +1707,13 @@ struct sk_buff *ieee80211_beacon_get(struct ieee80211_hw *hw, int if_id,
 	}
 
 	if (control) {
-		memset(&extra, 0, sizeof(extra));
-		extra.mode = local->oper_hw_mode;
-
-		rate = rate_control_get_rate(local, local->mdev, skb, &extra);
-		if (!rate) {
+		rate_control_get_rate(local->mdev, local->oper_hw_mode, skb,
+				      &rsel);
+		if (!rsel.rate) {
 			if (net_ratelimit()) {
-				printk(KERN_DEBUG "%s: ieee80211_beacon_get: no rate "
-				       "found\n", wiphy_name(local->hw.wiphy));
+				printk(KERN_DEBUG "%s: ieee80211_beacon_get: "
+				       "no rate found\n",
+				       wiphy_name(local->hw.wiphy));
 			}
 			dev_kfree_skb(skb);
 			return NULL;
@@ -1727,8 +1721,8 @@ struct sk_buff *ieee80211_beacon_get(struct ieee80211_hw *hw, int if_id,
 
 		control->tx_rate =
 			((sdata->flags & IEEE80211_SDATA_SHORT_PREAMBLE) &&
-			(rate->flags & IEEE80211_RATE_PREAMBLE2)) ?
-			rate->val2 : rate->val;
+			(rsel.rate->flags & IEEE80211_RATE_PREAMBLE2)) ?
+			rsel.rate->val2 : rsel.rate->val;
 		control->antenna_sel_tx = local->hw.conf.antenna_sel_tx;
 		control->power_level = local->hw.conf.power_level;
 		control->flags |= IEEE80211_TXCTL_NO_ACK;
-- 
cgit v1.2.3


From ad01837593338f13508463fa11c8dbf8109a1e5d Mon Sep 17 00:00:00 2001
From: Mattias Nissler <mattias.nissler@gmx.de>
Date: Wed, 19 Dec 2007 01:25:57 +0100
Subject: mac80211: add PID controller based rate control algorithm

Add a new rate control algorithm based on a PID controller. It samples the
percentage of failed frames over time, feeds the result into the controller and
uses its output to control the TX rate.

Signed-off-by: Mattias Nissler <mattias.nissler@gmx.de>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/mac80211/Kconfig          |  12 ++
 net/mac80211/Makefile         |   1 +
 net/mac80211/ieee80211.c      |  27 +++-
 net/mac80211/ieee80211_rate.h |   3 +
 net/mac80211/rc80211_pid.c    | 365 ++++++++++++++++++++++++++++++++++++++++++
 5 files changed, 403 insertions(+), 5 deletions(-)
 create mode 100644 net/mac80211/rc80211_pid.c

(limited to 'net')

diff --git a/net/mac80211/Kconfig b/net/mac80211/Kconfig
index 09711b06280..9f8663b412d 100644
--- a/net/mac80211/Kconfig
+++ b/net/mac80211/Kconfig
@@ -25,6 +25,18 @@ config MAC80211_RCSIMPLE
 	  Say Y unless you know you will have another algorithm
 	  available.
 
+config MAC80211_RCPID
+	bool "'PID' rate control algorithm" if EMBEDDED
+	default y
+	depends on MAC80211
+	help
+	  This option enables a TX rate control algorithm for
+	  mac80211 that uses a PID controller to select the TX
+	  rate.
+
+	  Say Y unless you're sure you want to use a different
+	  rate control algorithm.
+
 config MAC80211_LEDS
 	bool "Enable LED triggers"
 	depends on MAC80211 && LEDS_TRIGGERS
diff --git a/net/mac80211/Makefile b/net/mac80211/Makefile
index 1e6237b3484..62c01ca099e 100644
--- a/net/mac80211/Makefile
+++ b/net/mac80211/Makefile
@@ -4,6 +4,7 @@ mac80211-objs-$(CONFIG_MAC80211_LEDS) += ieee80211_led.o
 mac80211-objs-$(CONFIG_MAC80211_DEBUGFS) += debugfs.o debugfs_sta.o debugfs_netdev.o debugfs_key.o
 mac80211-objs-$(CONFIG_NET_SCHED) += wme.o
 mac80211-objs-$(CONFIG_MAC80211_RCSIMPLE) += rc80211_simple.o
+mac80211-objs-$(CONFIG_MAC80211_RCPID) += rc80211_pid.o
 
 mac80211-objs := \
 	ieee80211.o \
diff --git a/net/mac80211/ieee80211.c b/net/mac80211/ieee80211.c
index 5bf7a5bebfc..3d7b4341914 100644
--- a/net/mac80211/ieee80211.c
+++ b/net/mac80211/ieee80211.c
@@ -1315,23 +1315,37 @@ static int __init ieee80211_init(void)
 #ifdef CONFIG_MAC80211_RCSIMPLE
 	ret = ieee80211_rate_control_register(&mac80211_rcsimple);
 	if (ret)
-		return ret;
+		goto fail;
+#endif
+
+#ifdef CONFIG_MAC80211_RCPID
+	ret = ieee80211_rate_control_register(&mac80211_rcpid);
+	if (ret)
+		goto fail;
 #endif
 
 	ret = ieee80211_wme_register();
 	if (ret) {
-#ifdef CONFIG_MAC80211_RCSIMPLE
-		ieee80211_rate_control_unregister(&mac80211_rcsimple);
-#endif
 		printk(KERN_DEBUG "ieee80211_init: failed to "
 		       "initialize WME (err=%d)\n", ret);
-		return ret;
+		goto fail;
 	}
 
 	ieee80211_debugfs_netdev_init();
 	ieee80211_regdomain_init();
 
 	return 0;
+
+fail:
+
+#ifdef CONFIG_MAC80211_RCSIMPLE
+	ieee80211_rate_control_unregister(&mac80211_rcsimple);
+#endif
+#ifdef CONFIG_MAC80211_RCPID
+	ieee80211_rate_control_unregister(&mac80211_rcpid);
+#endif
+
+	return ret;
 }
 
 static void __exit ieee80211_exit(void)
@@ -1339,6 +1353,9 @@ static void __exit ieee80211_exit(void)
 #ifdef CONFIG_MAC80211_RCSIMPLE
 	ieee80211_rate_control_unregister(&mac80211_rcsimple);
 #endif
+#ifdef CONFIG_MAC80211_RCPID
+	ieee80211_rate_control_unregister(&mac80211_rcpid);
+#endif
 
 	ieee80211_wme_unregister();
 	ieee80211_debugfs_netdev_exit();
diff --git a/net/mac80211/ieee80211_rate.h b/net/mac80211/ieee80211_rate.h
index 787134bd6ab..3eb0696f375 100644
--- a/net/mac80211/ieee80211_rate.h
+++ b/net/mac80211/ieee80211_rate.h
@@ -61,6 +61,9 @@ struct rate_control_ref {
 /* default 'simple' algorithm */
 extern struct rate_control_ops mac80211_rcsimple;
 
+/* 'PID' algorithm */
+extern struct rate_control_ops mac80211_rcpid;
+
 int ieee80211_rate_control_register(struct rate_control_ops *ops);
 void ieee80211_rate_control_unregister(struct rate_control_ops *ops);
 
diff --git a/net/mac80211/rc80211_pid.c b/net/mac80211/rc80211_pid.c
new file mode 100644
index 00000000000..b358824b5ac
--- /dev/null
+++ b/net/mac80211/rc80211_pid.c
@@ -0,0 +1,365 @@
+/*
+ * Copyright 2002-2005, Instant802 Networks, Inc.
+ * Copyright 2005, Devicescape Software, Inc.
+ * Copyright 2007, Mattias Nissler <mattias.nissler@gmx.de>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/netdevice.h>
+#include <linux/types.h>
+#include <linux/skbuff.h>
+
+#include <net/mac80211.h>
+#include "ieee80211_rate.h"
+
+
+/* This is an implementation of a TX rate control algorithm that uses a PID
+ * controller. Given a target failed frames rate, the controller decides about
+ * TX rate changes to meet the target failed frames rate.
+ *
+ * The controller basically computes the following:
+ *
+ * adj = CP * err + CI * err_avg + CD * (err - last_err)
+ *
+ * where
+ * 	adj	adjustment value that is used to switch TX rate (see below)
+ * 	err	current error: target vs. current failed frames percentage
+ * 	last_err	last error
+ * 	err_avg	average (i.e. poor man's integral) of recent errors
+ * 	CP	Proportional coefficient
+ * 	CI	Integral coefficient
+ * 	CD	Derivative coefficient
+ *
+ * CP, CI, CD are subject to careful tuning.
+ *
+ * The integral component uses a exponential moving average approach instead of
+ * an actual sliding window. The advantage is that we don't need to keep an
+ * array of the last N error values and computation is easier.
+ *
+ * Once we have the adj value, we need to map it to a TX rate to be selected.
+ * For now, we depend on the rates to be ordered in a way such that more robust
+ * rates (i.e. such that exhibit a lower framed failed percentage) come first.
+ * E.g. for the 802.11b/g case, we first have the b rates in ascending order,
+ * then the g rates. The adj simply decides the index of the TX rate in the list
+ * to switch to (relative to the current TX rate entry).
+ *
+ * Note that for the computations we use a fixed-point representation to avoid
+ * floating point arithmetic. Hence, all values are shifted left by
+ * RC_PID_ARITH_SHIFT.
+ */
+
+/* Sampling period for measuring percentage of failed frames. */
+#define RC_PID_INTERVAL (HZ / 8)
+
+/* Exponential averaging smoothness (used for I part of PID controller) */
+#define RC_PID_SMOOTHING_SHIFT 3
+#define RC_PID_SMOOTHING (1 << RC_PID_SMOOTHING_SHIFT)
+
+/* Fixed point arithmetic shifting amount. */
+#define RC_PID_ARITH_SHIFT 8
+
+/* Fixed point arithmetic factor. */
+#define RC_PID_ARITH_FACTOR (1 << RC_PID_ARITH_SHIFT)
+
+/* Proportional PID component coefficient. */
+#define RC_PID_COEFF_P 15
+/* Integral PID component coefficient. */
+#define RC_PID_COEFF_I 9
+/* Derivative PID component coefficient. */
+#define RC_PID_COEFF_D 15
+
+/* Target failed frames rate for the PID controller. NB: This effectively gives
+ * maximum failed frames percentage we're willing to accept. If the wireless
+ * link quality is good, the controller will fail to adjust failed frames
+ * percentage to the target. This is intentional.
+ */
+#define RC_PID_TARGET_PF (11 << RC_PID_ARITH_SHIFT)
+
+struct rc_pid_sta_info {
+	unsigned long last_change;
+	unsigned long last_sample;
+
+	u32 tx_num_failed;
+	u32 tx_num_xmit;
+
+	/* Average failed frames percentage error (i.e. actual vs. target
+	 * percentage), scaled by RC_PID_SMOOTHING. This value is computed
+	 * using using an exponential weighted average technique:
+	 *
+	 *           (RC_PID_SMOOTHING - 1) * err_avg_old + err
+	 * err_avg = ------------------------------------------
+	 *                       RC_PID_SMOOTHING
+	 *
+	 * where err_avg is the new approximation, err_avg_old the previous one
+	 * and err is the error w.r.t. to the current failed frames percentage
+	 * sample. Note that the bigger RC_PID_SMOOTHING the more weight is
+	 * given to the previous estimate, resulting in smoother behavior (i.e.
+	 * corresponding to a longer integration window).
+	 *
+	 * For computation, we actually don't use the above formula, but this
+	 * one:
+	 *
+	 * err_avg_scaled = err_avg_old_scaled - err_avg_old + err
+	 *
+	 * where:
+	 * 	err_avg_scaled = err * RC_PID_SMOOTHING
+	 * 	err_avg_old_scaled = err_avg_old * RC_PID_SMOOTHING
+	 *
+	 * This avoids floating point numbers and the per_failed_old value can
+	 * easily be obtained by shifting per_failed_old_scaled right by
+	 * RC_PID_SMOOTHING_SHIFT.
+	 */
+	s32 err_avg_sc;
+
+	/* Last framed failes percentage sample */
+	u32 last_pf;
+};
+
+/* Algorithm parameters. We keep them on a per-algorithm approach, so they can
+ * be tuned individually for each interface.
+ */
+struct rc_pid_info {
+
+	/* The failed frames percentage target. */
+	u32 target;
+
+	/* P, I and D coefficients. */
+	s32 coeff_p;
+	s32 coeff_i;
+	s32 coeff_d;
+};
+
+
+static void rate_control_pid_adjust_rate(struct ieee80211_local *local,
+					 struct sta_info *sta, int adj)
+{
+	struct ieee80211_sub_if_data *sdata;
+	struct ieee80211_hw_mode *mode;
+	int newidx = sta->txrate + adj;
+	int maxrate;
+	int back = (adj > 0) ? 1 : -1;
+
+	sdata = IEEE80211_DEV_TO_SUB_IF(sta->dev);
+	if (sdata->bss && sdata->bss->force_unicast_rateidx > -1) {
+		/* forced unicast rate - do not change STA rate */
+		return;
+	}
+
+	mode = local->oper_hw_mode;
+	maxrate = sdata->bss ? sdata->bss->max_ratectrl_rateidx : -1;
+
+	if (newidx < 0)
+		newidx = 0;
+	else if (newidx >= mode->num_rates)
+		newidx = mode->num_rates - 1;
+
+	while (newidx != sta->txrate) {
+		if (rate_supported(sta, mode, newidx) &&
+		    (maxrate < 0 || newidx <= maxrate)) {
+			sta->txrate = newidx;
+			break;
+		}
+
+		newidx += back;
+	}
+}
+
+static void rate_control_pid_sample(struct rc_pid_info *pinfo,
+				    struct ieee80211_local *local,
+				    struct sta_info *sta)
+{
+	struct rc_pid_sta_info *spinfo = sta->rate_ctrl_priv;
+	u32 pf;
+	s32 err_avg;
+	s32 err_prop;
+	s32 err_int;
+	s32 err_der;
+	int adj;
+
+	spinfo = sta->rate_ctrl_priv;
+	spinfo->last_sample = jiffies;
+
+	/* If no frames were transmitted, we assume the old sample is
+	 * still a good measurement and copy it. */
+	if (spinfo->tx_num_xmit == 0)
+		pf = spinfo->last_pf;
+	else {
+		pf = spinfo->tx_num_failed * 100 / spinfo->tx_num_xmit;
+		pf <<= RC_PID_ARITH_SHIFT;
+
+		spinfo->tx_num_xmit = 0;
+		spinfo->tx_num_failed = 0;
+	}
+
+	/* Compute the proportional, integral and derivative errors. */
+	err_prop = RC_PID_TARGET_PF - pf;
+
+	err_avg = spinfo->err_avg_sc >> RC_PID_SMOOTHING_SHIFT;
+	spinfo->err_avg_sc = spinfo->err_avg_sc - err_avg + err_prop;
+	err_int = spinfo->err_avg_sc >> RC_PID_SMOOTHING_SHIFT;
+
+	err_der = pf - spinfo->last_pf;
+	spinfo->last_pf = pf;
+
+	/* Compute the controller output. */
+	adj = (err_prop * pinfo->coeff_p + err_int * pinfo->coeff_i
+	      + err_der * pinfo->coeff_d);
+
+	/* We need to do an arithmetic right shift. ISO C says this is
+	 * implementation defined for negative left operands. Hence, be
+	 * careful to get it right, also for negative values. */
+	adj = (adj < 0) ? -((-adj) >> (2 * RC_PID_ARITH_SHIFT)) :
+			  adj >> (2 * RC_PID_ARITH_SHIFT);
+
+	/* Change rate. */
+	if (adj)
+		rate_control_pid_adjust_rate(local, sta, adj);
+}
+
+static void rate_control_pid_tx_status(void *priv, struct net_device *dev,
+				       struct sk_buff *skb,
+				       struct ieee80211_tx_status *status)
+{
+	struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr);
+	struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data;
+	struct rc_pid_info *pinfo = priv;
+	struct sta_info *sta;
+	struct rc_pid_sta_info *spinfo;
+
+	sta = sta_info_get(local, hdr->addr1);
+
+	if (!sta)
+		return;
+
+	/* Ignore all frames that were sent with a different rate than the rate
+	 * we currently advise mac80211 to use. */
+	if (status->control.rate != &local->oper_hw_mode->rates[sta->txrate])
+		return;
+
+	spinfo = sta->rate_ctrl_priv;
+	spinfo->tx_num_xmit++;
+
+	/* We count frames that totally failed to be transmitted as two bad
+	 * frames, those that made it out but had some retries as one good and
+	 * one bad frame. */
+	if (status->excessive_retries) {
+		spinfo->tx_num_failed += 2;
+		spinfo->tx_num_xmit++;
+	} else if (status->retry_count) {
+		spinfo->tx_num_failed++;
+		spinfo->tx_num_xmit++;
+	}
+
+	if (status->excessive_retries) {
+		sta->tx_retry_failed++;
+		sta->tx_num_consecutive_failures++;
+		sta->tx_num_mpdu_fail++;
+	} else {
+		sta->last_ack_rssi[0] = sta->last_ack_rssi[1];
+		sta->last_ack_rssi[1] = sta->last_ack_rssi[2];
+		sta->last_ack_rssi[2] = status->ack_signal;
+		sta->tx_num_consecutive_failures = 0;
+		sta->tx_num_mpdu_ok++;
+	}
+	sta->tx_retry_count += status->retry_count;
+	sta->tx_num_mpdu_fail += status->retry_count;
+
+	/* Update PID controller state. */
+	if (time_after(jiffies, spinfo->last_sample + RC_PID_INTERVAL))
+		rate_control_pid_sample(pinfo, local, sta);
+
+	sta_info_put(sta);
+}
+
+static void rate_control_pid_get_rate(void *priv, struct net_device *dev,
+				      struct ieee80211_hw_mode *mode,
+				      struct sk_buff *skb,
+				      struct rate_selection *sel)
+{
+	struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr);
+	struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data;
+	struct sta_info *sta;
+	int rateidx;
+
+	sta = sta_info_get(local, hdr->addr1);
+
+	if (!sta) {
+		sel->rate = rate_lowest(local, mode, NULL);
+		sta_info_put(sta);
+		return;
+	}
+
+	rateidx = sta->txrate;
+
+	if (rateidx >= mode->num_rates)
+		rateidx = mode->num_rates - 1;
+
+	sta_info_put(sta);
+
+	sel->rate = &mode->rates[rateidx];
+}
+
+static void rate_control_pid_rate_init(void *priv, void *priv_sta,
+					  struct ieee80211_local *local,
+					  struct sta_info *sta)
+{
+	/* TODO: This routine should consider using RSSI from previous packets
+	 * as we need to have IEEE 802.1X auth succeed immediately after assoc..
+	 * Until that method is implemented, we will use the lowest supported
+	 * rate as a workaround. */
+	sta->txrate = rate_lowest_index(local, local->oper_hw_mode, sta);
+}
+
+static void *rate_control_pid_alloc(struct ieee80211_local *local)
+{
+	struct rc_pid_info *pinfo;
+
+	pinfo = kmalloc(sizeof(*pinfo), GFP_ATOMIC);
+
+	pinfo->target = RC_PID_TARGET_PF;
+	pinfo->coeff_p = RC_PID_COEFF_P;
+	pinfo->coeff_i = RC_PID_COEFF_I;
+	pinfo->coeff_d = RC_PID_COEFF_D;
+
+	return pinfo;
+}
+
+static void rate_control_pid_free(void *priv)
+{
+	struct rc_pid_info *pinfo = priv;
+	kfree(pinfo);
+}
+
+static void rate_control_pid_clear(void *priv)
+{
+}
+
+static void *rate_control_pid_alloc_sta(void *priv, gfp_t gfp)
+{
+	struct rc_pid_sta_info *spinfo;
+
+	spinfo = kzalloc(sizeof(*spinfo), gfp);
+
+	return spinfo;
+}
+
+static void rate_control_pid_free_sta(void *priv, void *priv_sta)
+{
+	struct rc_pid_sta_info *spinfo = priv_sta;
+	kfree(spinfo);
+}
+
+struct rate_control_ops mac80211_rcpid = {
+	.name = "pid",
+	.tx_status = rate_control_pid_tx_status,
+	.get_rate = rate_control_pid_get_rate,
+	.rate_init = rate_control_pid_rate_init,
+	.clear = rate_control_pid_clear,
+	.alloc = rate_control_pid_alloc,
+	.free = rate_control_pid_free,
+	.alloc_sta = rate_control_pid_alloc_sta,
+	.free_sta = rate_control_pid_free_sta,
+};
-- 
cgit v1.2.3


From 195ad6a3ac8b5c4eef4916efcb673e96e6f09d89 Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Thu, 20 Dec 2007 13:53:40 -0800
Subject: [IPSEC]: Rename tunnel-mode functions to avoid collisions with
 tunnels

It appears that I've managed to create two different functions both
called xfrm6_tunnel_output.  This is because we have the plain tunnel
encapsulation named xfrmX_tunnel as well as the tunnel-mode encapsulation
which lives in the files xfrmX_mode_tunnel.c.

This patch renames functions from the latter to use the xfrmX_mode_tunnel
prefix to avoid name-space conflicts.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/xfrm4_mode_tunnel.c | 16 ++++++++--------
 net/ipv6/xfrm6_mode_tunnel.c | 16 ++++++++--------
 2 files changed, 16 insertions(+), 16 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/xfrm4_mode_tunnel.c b/net/ipv4/xfrm4_mode_tunnel.c
index aa335dba8ff..8dee617ee90 100644
--- a/net/ipv4/xfrm4_mode_tunnel.c
+++ b/net/ipv4/xfrm4_mode_tunnel.c
@@ -26,7 +26,7 @@ static inline void ipip_ecn_decapsulate(struct sk_buff *skb)
  *
  * The top IP header will be constructed per RFC 2401.
  */
-static int xfrm4_tunnel_output(struct xfrm_state *x, struct sk_buff *skb)
+static int xfrm4_mode_tunnel_output(struct xfrm_state *x, struct sk_buff *skb)
 {
 	struct dst_entry *dst = skb->dst;
 	struct iphdr *top_iph;
@@ -63,7 +63,7 @@ static int xfrm4_tunnel_output(struct xfrm_state *x, struct sk_buff *skb)
 	return 0;
 }
 
-static int xfrm4_tunnel_input(struct xfrm_state *x, struct sk_buff *skb)
+static int xfrm4_mode_tunnel_input(struct xfrm_state *x, struct sk_buff *skb)
 {
 	const unsigned char *old_mac;
 	int err = -EINVAL;
@@ -94,21 +94,21 @@ out:
 }
 
 static struct xfrm_mode xfrm4_tunnel_mode = {
-	.input2 = xfrm4_tunnel_input,
+	.input2 = xfrm4_mode_tunnel_input,
 	.input = xfrm_prepare_input,
-	.output2 = xfrm4_tunnel_output,
+	.output2 = xfrm4_mode_tunnel_output,
 	.output = xfrm4_prepare_output,
 	.owner = THIS_MODULE,
 	.encap = XFRM_MODE_TUNNEL,
 	.flags = XFRM_MODE_FLAG_TUNNEL,
 };
 
-static int __init xfrm4_tunnel_init(void)
+static int __init xfrm4_mode_tunnel_init(void)
 {
 	return xfrm_register_mode(&xfrm4_tunnel_mode, AF_INET);
 }
 
-static void __exit xfrm4_tunnel_exit(void)
+static void __exit xfrm4_mode_tunnel_exit(void)
 {
 	int err;
 
@@ -116,7 +116,7 @@ static void __exit xfrm4_tunnel_exit(void)
 	BUG_ON(err);
 }
 
-module_init(xfrm4_tunnel_init);
-module_exit(xfrm4_tunnel_exit);
+module_init(xfrm4_mode_tunnel_init);
+module_exit(xfrm4_mode_tunnel_exit);
 MODULE_LICENSE("GPL");
 MODULE_ALIAS_XFRM_MODE(AF_INET, XFRM_MODE_TUNNEL);
diff --git a/net/ipv6/xfrm6_mode_tunnel.c b/net/ipv6/xfrm6_mode_tunnel.c
index f7d0d661265..0c742faaa30 100644
--- a/net/ipv6/xfrm6_mode_tunnel.c
+++ b/net/ipv6/xfrm6_mode_tunnel.c
@@ -29,7 +29,7 @@ static inline void ipip6_ecn_decapsulate(struct sk_buff *skb)
  *
  * The top IP header will be constructed per RFC 2401.
  */
-static int xfrm6_tunnel_output(struct xfrm_state *x, struct sk_buff *skb)
+static int xfrm6_mode_tunnel_output(struct xfrm_state *x, struct sk_buff *skb)
 {
 	struct dst_entry *dst = skb->dst;
 	struct ipv6hdr *top_iph;
@@ -58,7 +58,7 @@ static int xfrm6_tunnel_output(struct xfrm_state *x, struct sk_buff *skb)
 	return 0;
 }
 
-static int xfrm6_tunnel_input(struct xfrm_state *x, struct sk_buff *skb)
+static int xfrm6_mode_tunnel_input(struct xfrm_state *x, struct sk_buff *skb)
 {
 	int err = -EINVAL;
 	const unsigned char *old_mac;
@@ -89,21 +89,21 @@ out:
 }
 
 static struct xfrm_mode xfrm6_tunnel_mode = {
-	.input2 = xfrm6_tunnel_input,
+	.input2 = xfrm6_mode_tunnel_input,
 	.input = xfrm_prepare_input,
-	.output2 = xfrm6_tunnel_output,
+	.output2 = xfrm6_mode_tunnel_output,
 	.output = xfrm6_prepare_output,
 	.owner = THIS_MODULE,
 	.encap = XFRM_MODE_TUNNEL,
 	.flags = XFRM_MODE_FLAG_TUNNEL,
 };
 
-static int __init xfrm6_tunnel_init(void)
+static int __init xfrm6_mode_tunnel_init(void)
 {
 	return xfrm_register_mode(&xfrm6_tunnel_mode, AF_INET6);
 }
 
-static void __exit xfrm6_tunnel_exit(void)
+static void __exit xfrm6_mode_tunnel_exit(void)
 {
 	int err;
 
@@ -111,7 +111,7 @@ static void __exit xfrm6_tunnel_exit(void)
 	BUG_ON(err);
 }
 
-module_init(xfrm6_tunnel_init);
-module_exit(xfrm6_tunnel_exit);
+module_init(xfrm6_mode_tunnel_init);
+module_exit(xfrm6_mode_tunnel_exit);
 MODULE_LICENSE("GPL");
 MODULE_ALIAS_XFRM_MODE(AF_INET6, XFRM_MODE_TUNNEL);
-- 
cgit v1.2.3


From 6afd2e83cd86b17b074e1854d063b8ec590d7f5b Mon Sep 17 00:00:00 2001
From: Vlad Yasevich <vladislav.yasevich@hp.com>
Date: Thu, 20 Dec 2007 14:08:04 -0800
Subject: [SCTP]: Discard unauthenticated ASCONF and ASCONF ACK chunks

Now that we support AUTH, discard unauthenticated ASCONF and ASCONF ACK
chunks as mandated in the ADD-IP spec.

Signed-off-by: Vlad Yasevich <vladislav.yasevich@hp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sctp/sm_statefuns.c | 18 ++++++++++++++++++
 1 file changed, 18 insertions(+)

(limited to 'net')

diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c
index d247ed4ee42..b6aaa7e97d8 100644
--- a/net/sctp/sm_statefuns.c
+++ b/net/sctp/sm_statefuns.c
@@ -3377,6 +3377,15 @@ sctp_disposition_t sctp_sf_do_asconf(const struct sctp_endpoint *ep,
 		return sctp_sf_pdiscard(ep, asoc, type, arg, commands);
 	}
 
+	/* ADD-IP: Section 4.1.1
+	 * This chunk MUST be sent in an authenticated way by using
+	 * the mechanism defined in [I-D.ietf-tsvwg-sctp-auth]. If this chunk
+	 * is received unauthenticated it MUST be silently discarded as
+	 * described in [I-D.ietf-tsvwg-sctp-auth].
+	 */
+	if (!sctp_addip_noauth && !chunk->auth)
+		return sctp_sf_discard_chunk(ep, asoc, type, arg, commands);
+
 	/* Make sure that the ASCONF ADDIP chunk has a valid length.  */
 	if (!sctp_chunk_length_valid(chunk, sizeof(sctp_addip_chunk_t)))
 		return sctp_sf_violation_chunklen(ep, asoc, type, arg,
@@ -3463,6 +3472,15 @@ sctp_disposition_t sctp_sf_do_asconf_ack(const struct sctp_endpoint *ep,
 		return sctp_sf_pdiscard(ep, asoc, type, arg, commands);
 	}
 
+	/* ADD-IP, Section 4.1.2:
+	 * This chunk MUST be sent in an authenticated way by using
+	 * the mechanism defined in [I-D.ietf-tsvwg-sctp-auth]. If this chunk
+	 * is received unauthenticated it MUST be silently discarded as
+	 * described in [I-D.ietf-tsvwg-sctp-auth].
+	 */
+	if (!sctp_addip_noauth && !asconf_ack->auth)
+		return sctp_sf_discard_chunk(ep, asoc, type, arg, commands);
+
 	/* Make sure that the ADDIP chunk has a valid length.  */
 	if (!sctp_chunk_length_valid(asconf_ack, sizeof(sctp_addip_chunk_t)))
 		return sctp_sf_violation_chunklen(ep, asoc, type, arg,
-- 
cgit v1.2.3


From 42e30bf3463cd37d73839376662cb79b4d5c416c Mon Sep 17 00:00:00 2001
From: Vlad Yasevich <vladislav.yasevich@hp.com>
Date: Thu, 20 Dec 2007 14:08:56 -0800
Subject: [SCTP]: Handle the wildcard ADD-IP Address parameter

The Address Parameter in the parameter list of the ASCONF chunk
may be a wildcard address.  In this case special processing
is required.  For the 'add' case, the source IP of the packet is
added.  In the 'del' case, all addresses except the source IP
of packet are removed. In the "mark primary" case, the source
address is marked as primary.

Signed-off-by: Vlad Yasevich <vladislav.yasevich@hp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sctp/associola.c     | 17 +++++++++++++++++
 net/sctp/sm_make_chunk.c | 40 ++++++++++++++++++++++++++++++++++++----
 2 files changed, 53 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/sctp/associola.c b/net/sctp/associola.c
index 33ae9b01131..61bebb9b96e 100644
--- a/net/sctp/associola.c
+++ b/net/sctp/associola.c
@@ -730,6 +730,23 @@ struct sctp_transport *sctp_assoc_lookup_paddr(
 	return NULL;
 }
 
+/* Remove all transports except a give one */
+void sctp_assoc_del_nonprimary_peers(struct sctp_association *asoc,
+				     struct sctp_transport *primary)
+{
+	struct sctp_transport	*temp;
+	struct sctp_transport	*t;
+
+	list_for_each_entry_safe(t, temp, &asoc->peer.transport_addr_list,
+				 transports) {
+		/* if the current transport is not the primary one, delete it */
+		if (t != primary)
+			sctp_assoc_rm_peer(asoc, t);
+	}
+
+	return;
+}
+
 /* Engage in transport control operations.
  * Mark the transport up or down and send a notification to the user.
  * Select and update the new active and retran paths.
diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c
index 3cc629d3c9f..64790b53323 100644
--- a/net/sctp/sm_make_chunk.c
+++ b/net/sctp/sm_make_chunk.c
@@ -2727,7 +2727,6 @@ static __be16 sctp_process_asconf_param(struct sctp_association *asoc,
 	struct sctp_transport *peer;
 	struct sctp_af *af;
 	union sctp_addr	addr;
-	struct list_head *pos;
 	union sctp_addr_param *addr_param;
 
 	addr_param = (union sctp_addr_param *)
@@ -2738,8 +2737,24 @@ static __be16 sctp_process_asconf_param(struct sctp_association *asoc,
 		return SCTP_ERROR_INV_PARAM;
 
 	af->from_addr_param(&addr, addr_param, htons(asoc->peer.port), 0);
+
+	/* ADDIP 4.2.1  This parameter MUST NOT contain a broadcast
+	 * or multicast address.
+	 * (note: wildcard is permitted and requires special handling so
+	 *  make sure we check for that)
+	 */
+	if (!af->is_any(&addr) && !af->addr_valid(&addr, NULL, asconf->skb))
+		return SCTP_ERROR_INV_PARAM;
+
 	switch (asconf_param->param_hdr.type) {
 	case SCTP_PARAM_ADD_IP:
+		/* Section 4.2.1:
+		 * If the address 0.0.0.0 or ::0 is provided, the source
+		 * address of the packet MUST be added.
+		 */
+		if (af->is_any(&addr))
+			memcpy(&addr, &asconf->source, sizeof(addr));
+
 		/* ADDIP 4.3 D9) If an endpoint receives an ADD IP address
 		 * request and does not have the local resources to add this
 		 * new address to the association, it MUST return an Error
@@ -2761,8 +2776,7 @@ static __be16 sctp_process_asconf_param(struct sctp_association *asoc,
 		 * MUST send an Error Cause TLV with the error cause set to the
 		 * new error code 'Request to Delete Last Remaining IP Address'.
 		 */
-		pos = asoc->peer.transport_addr_list.next;
-		if (pos->next == &asoc->peer.transport_addr_list)
+		if (asoc->peer.transport_count == 1)
 			return SCTP_ERROR_DEL_LAST_IP;
 
 		/* ADDIP 4.3 D8) If a request is received to delete an IP
@@ -2775,9 +2789,27 @@ static __be16 sctp_process_asconf_param(struct sctp_association *asoc,
 		if (sctp_cmp_addr_exact(sctp_source(asconf), &addr))
 			return SCTP_ERROR_DEL_SRC_IP;
 
-		sctp_assoc_del_peer(asoc, &addr);
+		/* Section 4.2.2
+		 * If the address 0.0.0.0 or ::0 is provided, all
+		 * addresses of the peer except	the source address of the
+		 * packet MUST be deleted.
+		 */
+		if (af->is_any(&addr)) {
+			sctp_assoc_set_primary(asoc, asconf->transport);
+			sctp_assoc_del_nonprimary_peers(asoc,
+							asconf->transport);
+		} else
+			sctp_assoc_del_peer(asoc, &addr);
 		break;
 	case SCTP_PARAM_SET_PRIMARY:
+		/* ADDIP Section 4.2.4
+		 * If the address 0.0.0.0 or ::0 is provided, the receiver
+		 * MAY mark the source address of the packet as its
+		 * primary.
+		 */
+		if (af->is_any(&addr))
+			memcpy(&addr.v4, sctp_source(asconf), sizeof(addr));
+
 		peer = sctp_assoc_lookup_paddr(asoc, &addr);
 		if (!peer)
 			return SCTP_ERROR_INV_PARAM;
-- 
cgit v1.2.3


From d6de3097592b7ae7f8e233a4dafb088e2aa8170f Mon Sep 17 00:00:00 2001
From: Vlad Yasevich <vladislav.yasevich@hp.com>
Date: Thu, 20 Dec 2007 14:10:00 -0800
Subject: [SCTP]: Add the handling of "Set Primary IP Address" parameter to
 INIT

The ADD-IP "Set Primary IP Address" parameter is allowed in the
INIT/INIT-ACK exchange.  Allow processing of this parameter during
the INIT/INIT-ACK.

Signed-off-by: Vlad Yasevich <vladislav.yasevich@hp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sctp/sm_make_chunk.c | 27 +++++++++++++++++++++++++++
 1 file changed, 27 insertions(+)

(limited to 'net')

diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c
index 64790b53323..8138bbd9321 100644
--- a/net/sctp/sm_make_chunk.c
+++ b/net/sctp/sm_make_chunk.c
@@ -1969,6 +1969,11 @@ static sctp_ierror_t sctp_verify_param(const struct sctp_association *asoc,
 	case SCTP_PARAM_SUPPORTED_EXT:
 		break;
 
+	case SCTP_PARAM_SET_PRIMARY:
+		if (sctp_addip_enable)
+			break;
+		goto fallthrough;
+
 	case SCTP_PARAM_HOST_NAME_ADDRESS:
 		/* Tell the peer, we won't support this param.  */
 		sctp_process_hn_param(asoc, param, chunk, err_chunk);
@@ -2286,6 +2291,8 @@ static int sctp_process_param(struct sctp_association *asoc,
 	sctp_scope_t scope;
 	time_t stale;
 	struct sctp_af *af;
+	union sctp_addr_param *addr_param;
+	struct sctp_transport *t;
 
 	/* We maintain all INIT parameters in network byte order all the
 	 * time.  This allows us to not worry about whether the parameters
@@ -2376,6 +2383,26 @@ static int sctp_process_param(struct sctp_association *asoc,
 		asoc->peer.adaptation_ind = param.aind->adaptation_ind;
 		break;
 
+	case SCTP_PARAM_SET_PRIMARY:
+		addr_param = param.v + sizeof(sctp_addip_param_t);
+
+		af = sctp_get_af_specific(param_type2af(param.p->type));
+		af->from_addr_param(&addr, addr_param,
+				    htons(asoc->peer.port), 0);
+
+		/* if the address is invalid, we can't process it.
+		 * XXX: see spec for what to do.
+		 */
+		if (!af->addr_valid(&addr, NULL, NULL))
+			break;
+
+		t = sctp_assoc_lookup_paddr(asoc, &addr);
+		if (!t)
+			break;
+
+		sctp_assoc_set_primary(asoc, t);
+		break;
+
 	case SCTP_PARAM_SUPPORTED_EXT:
 		sctp_process_ext_param(asoc, param);
 		break;
-- 
cgit v1.2.3


From df21857714398acb8b24a8bb5a6d2286dd9c59ef Mon Sep 17 00:00:00 2001
From: Vlad Yasevich <vladislav.yasevich@hp.com>
Date: Thu, 20 Dec 2007 14:10:38 -0800
Subject: [SCTP]: Update association lookup to look at ASCONF chunks as well

ADD-IP draft section 5.2 specifies that if an association can not
be found using the source and destination of the IP packet,
then, if the packet contains ASCONF chunks, the Address Parameter
TLV should be used to lookup an association.

Signed-off-by: Vlad Yasevich <vladislav.yasevich@hp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sctp/input.c | 124 +++++++++++++++++++++++++++++++++++++++++++++----------
 1 file changed, 103 insertions(+), 21 deletions(-)

(limited to 'net')

diff --git a/net/sctp/input.c b/net/sctp/input.c
index b08c7cb5c9d..d695f710fc7 100644
--- a/net/sctp/input.c
+++ b/net/sctp/input.c
@@ -891,14 +891,6 @@ static struct sctp_association *__sctp_rcv_init_lookup(struct sk_buff *skb,
 
 	ch = (sctp_chunkhdr_t *) skb->data;
 
-	/* The code below will attempt to walk the chunk and extract
-	 * parameter information.  Before we do that, we need to verify
-	 * that the chunk length doesn't cause overflow.  Otherwise, we'll
-	 * walk off the end.
-	 */
-	if (WORD_ROUND(ntohs(ch->length)) > skb->len)
-		return NULL;
-
 	/*
 	 * This code will NOT touch anything inside the chunk--it is
 	 * strictly READ-ONLY.
@@ -935,6 +927,44 @@ static struct sctp_association *__sctp_rcv_init_lookup(struct sk_buff *skb,
 	return NULL;
 }
 
+/* ADD-IP, Section 5.2
+ * When an endpoint receives an ASCONF Chunk from the remote peer
+ * special procedures may be needed to identify the association the
+ * ASCONF Chunk is associated with. To properly find the association
+ * the following procedures SHOULD be followed:
+ *
+ * D2) If the association is not found, use the address found in the
+ * Address Parameter TLV combined with the port number found in the
+ * SCTP common header. If found proceed to rule D4.
+ *
+ * D2-ext) If more than one ASCONF Chunks are packed together, use the
+ * address found in the ASCONF Address Parameter TLV of each of the
+ * subsequent ASCONF Chunks. If found, proceed to rule D4.
+ */
+static struct sctp_association *__sctp_rcv_asconf_lookup(
+					sctp_chunkhdr_t *ch,
+					const union sctp_addr *laddr,
+					__be32 peer_port,
+					struct sctp_transport **transportp)
+{
+	sctp_addip_chunk_t *asconf = (struct sctp_addip_chunk *)ch;
+	struct sctp_af *af;
+	union sctp_addr_param *param;
+	union sctp_addr paddr;
+
+	/* Skip over the ADDIP header and find the Address parameter */
+	param = (union sctp_addr_param *)(asconf + 1);
+
+	af = sctp_get_af_specific(param_type2af(param->v4.param_hdr.type));
+	if (unlikely(!af))
+		return NULL;
+
+	af->from_addr_param(&paddr, param, peer_port, 0);
+
+	return __sctp_lookup_association(laddr, &paddr, transportp);
+}
+
+
 /* SCTP-AUTH, Section 6.3:
 *    If the receiver does not find a STCB for a packet containing an AUTH
 *    chunk as the first chunk and not a COOKIE-ECHO chunk as the second
@@ -943,20 +973,64 @@ static struct sctp_association *__sctp_rcv_init_lookup(struct sk_buff *skb,
 *
 * This means that any chunks that can help us identify the association need
 * to be looked at to find this assocation.
-*
-* TODO: The only chunk currently defined that can do that is ASCONF, but we
-* don't support that functionality yet.
 */
-static struct sctp_association *__sctp_rcv_auth_lookup(struct sk_buff *skb,
-				      const union sctp_addr *paddr,
+static struct sctp_association *__sctp_rcv_walk_lookup(struct sk_buff *skb,
 				      const union sctp_addr *laddr,
 				      struct sctp_transport **transportp)
 {
-	/* XXX - walk through the chunks looking for something that can
-	 * help us find the association.  INIT, and INIT-ACK are not permitted.
-	 * That leaves ASCONF, but we don't support that yet.
+	struct sctp_association *asoc = NULL;
+	sctp_chunkhdr_t *ch;
+	int have_auth = 0;
+	unsigned int chunk_num = 1;
+	__u8 *ch_end;
+
+	/* Walk through the chunks looking for AUTH or ASCONF chunks
+	 * to help us find the association.
 	 */
-	return NULL;
+	ch = (sctp_chunkhdr_t *) skb->data;
+	do {
+		/* Break out if chunk length is less then minimal. */
+		if (ntohs(ch->length) < sizeof(sctp_chunkhdr_t))
+			break;
+
+		ch_end = ((__u8 *)ch) + WORD_ROUND(ntohs(ch->length));
+		if (ch_end > skb_tail_pointer(skb))
+			break;
+
+		switch(ch->type) {
+		    case SCTP_CID_AUTH:
+			    have_auth = chunk_num;
+			    break;
+
+		    case SCTP_CID_COOKIE_ECHO:
+			    /* If a packet arrives containing an AUTH chunk as
+			     * a first chunk, a COOKIE-ECHO chunk as the second
+			     * chunk, and possibly more chunks after them, and
+			     * the receiver does not have an STCB for that
+			     * packet, then authentication is based on
+			     * the contents of the COOKIE- ECHO chunk.
+			     */
+			    if (have_auth == 1 && chunk_num == 2)
+				    return NULL;
+			    break;
+
+		    case SCTP_CID_ASCONF:
+			    if (have_auth || sctp_addip_noauth)
+				    asoc = __sctp_rcv_asconf_lookup(ch, laddr,
+							sctp_hdr(skb)->source,
+							transportp);
+		    default:
+			    break;
+		}
+
+		if (asoc)
+			break;
+
+		ch = (sctp_chunkhdr_t *) ch_end;
+		chunk_num++;
+	} while (ch_end < skb_tail_pointer(skb));
+
+	return asoc;
 }
 
 /*
@@ -966,7 +1040,6 @@ static struct sctp_association *__sctp_rcv_auth_lookup(struct sk_buff *skb,
  * chunks.
  */
 static struct sctp_association *__sctp_rcv_lookup_harder(struct sk_buff *skb,
-				      const union sctp_addr *paddr,
 				      const union sctp_addr *laddr,
 				      struct sctp_transport **transportp)
 {
@@ -974,6 +1047,14 @@ static struct sctp_association *__sctp_rcv_lookup_harder(struct sk_buff *skb,
 
 	ch = (sctp_chunkhdr_t *) skb->data;
 
+	/* The code below will attempt to walk the chunk and extract
+	 * parameter information.  Before we do that, we need to verify
+	 * that the chunk length doesn't cause overflow.  Otherwise, we'll
+	 * walk off the end.
+	 */
+	if (WORD_ROUND(ntohs(ch->length)) > skb->len)
+		return NULL;
+
 	/* If this is INIT/INIT-ACK look inside the chunk too. */
 	switch (ch->type) {
 	case SCTP_CID_INIT:
@@ -981,11 +1062,12 @@ static struct sctp_association *__sctp_rcv_lookup_harder(struct sk_buff *skb,
 		return __sctp_rcv_init_lookup(skb, laddr, transportp);
 		break;
 
-	case SCTP_CID_AUTH:
-		return __sctp_rcv_auth_lookup(skb, paddr, laddr, transportp);
+	default:
+		return __sctp_rcv_walk_lookup(skb, laddr, transportp);
 		break;
 	}
 
+
 	return NULL;
 }
 
@@ -1004,7 +1086,7 @@ static struct sctp_association *__sctp_rcv_lookup(struct sk_buff *skb,
 	 * parameters within the INIT or INIT-ACK.
 	 */
 	if (!asoc)
-		asoc = __sctp_rcv_lookup_harder(skb, paddr, laddr, transportp);
+		asoc = __sctp_rcv_lookup_harder(skb, laddr, transportp);
 
 	return asoc;
 }
-- 
cgit v1.2.3


From ba8a06daed7d7c8785c92c343da9e202e6988fda Mon Sep 17 00:00:00 2001
From: Vlad Yasevich <vladislav.yasevich@hp.com>
Date: Thu, 20 Dec 2007 14:11:11 -0800
Subject: [SCTP]: ADD-IP updates the states where ASCONFs can be sent

   C4)  Both ASCONF and ASCONF-ACK Chunks MUST NOT be sent in any SCTP
        state except ESTABLISHED, SHUTDOWN-PENDING, SHUTDOWN-RECEIVED,
        and SHUTDOWN-SENT.

Signed-off-by: Vlad Yasevich <vladislav.yasevich@hp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sctp/sm_statetable.c | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

(limited to 'net')

diff --git a/net/sctp/sm_statetable.c b/net/sctp/sm_statetable.c
index a93a4bc8f68..e6016e41ffa 100644
--- a/net/sctp/sm_statetable.c
+++ b/net/sctp/sm_statetable.c
@@ -457,11 +457,11 @@ static const sctp_sm_table_entry_t chunk_event_table[SCTP_NUM_BASE_CHUNK_TYPES][
 	/* SCTP_STATE_ESTABLISHED */ \
 	TYPE_SCTP_FUNC(sctp_sf_do_asconf), \
 	/* SCTP_STATE_SHUTDOWN_PENDING */ \
-	TYPE_SCTP_FUNC(sctp_sf_discard_chunk), \
+	TYPE_SCTP_FUNC(sctp_sf_do_asconf), \
 	/* SCTP_STATE_SHUTDOWN_SENT */ \
-	TYPE_SCTP_FUNC(sctp_sf_discard_chunk), \
+	TYPE_SCTP_FUNC(sctp_sf_do_asconf), \
 	/* SCTP_STATE_SHUTDOWN_RECEIVED */ \
-	TYPE_SCTP_FUNC(sctp_sf_discard_chunk), \
+	TYPE_SCTP_FUNC(sctp_sf_do_asconf), \
 	/* SCTP_STATE_SHUTDOWN_ACK_SENT */ \
 	TYPE_SCTP_FUNC(sctp_sf_discard_chunk), \
 } /* TYPE_SCTP_ASCONF */
@@ -478,11 +478,11 @@ static const sctp_sm_table_entry_t chunk_event_table[SCTP_NUM_BASE_CHUNK_TYPES][
 	/* SCTP_STATE_ESTABLISHED */ \
 	TYPE_SCTP_FUNC(sctp_sf_do_asconf_ack), \
 	/* SCTP_STATE_SHUTDOWN_PENDING */ \
-	TYPE_SCTP_FUNC(sctp_sf_discard_chunk), \
+	TYPE_SCTP_FUNC(sctp_sf_do_asconf_ack), \
 	/* SCTP_STATE_SHUTDOWN_SENT */ \
-	TYPE_SCTP_FUNC(sctp_sf_discard_chunk), \
+	TYPE_SCTP_FUNC(sctp_sf_do_asconf_ack), \
 	/* SCTP_STATE_SHUTDOWN_RECEIVED */ \
-	TYPE_SCTP_FUNC(sctp_sf_discard_chunk), \
+	TYPE_SCTP_FUNC(sctp_sf_do_asconf_ack), \
 	/* SCTP_STATE_SHUTDOWN_ACK_SENT */ \
 	TYPE_SCTP_FUNC(sctp_sf_discard_chunk), \
 } /* TYPE_SCTP_ASCONF_ACK */
@@ -691,11 +691,11 @@ chunk_event_table_unknown[SCTP_STATE_NUM_STATES] = {
 	/* SCTP_STATE_ESTABLISHED */ \
 	TYPE_SCTP_FUNC(sctp_sf_do_prm_asconf), \
 	/* SCTP_STATE_SHUTDOWN_PENDING */ \
-	TYPE_SCTP_FUNC(sctp_sf_error_shutdown), \
+	TYPE_SCTP_FUNC(sctp_sf_do_prm_asconf), \
 	/* SCTP_STATE_SHUTDOWN_SENT */ \
-	TYPE_SCTP_FUNC(sctp_sf_error_shutdown), \
+	TYPE_SCTP_FUNC(sctp_sf_do_prm_asconf), \
 	/* SCTP_STATE_SHUTDOWN_RECEIVED */ \
-	TYPE_SCTP_FUNC(sctp_sf_error_shutdown), \
+	TYPE_SCTP_FUNC(sctp_sf_do_prm_asconf), \
 	/* SCTP_STATE_SHUTDOWN_ACK_SENT */ \
 	TYPE_SCTP_FUNC(sctp_sf_error_shutdown), \
 } /* TYPE_SCTP_PRIMITIVE_REQUESTHEARTBEAT */
-- 
cgit v1.2.3


From a08de64d074b36a56ee3bb985cd171281db78e96 Mon Sep 17 00:00:00 2001
From: Vlad Yasevich <vladislav.yasevich@hp.com>
Date: Thu, 20 Dec 2007 14:11:47 -0800
Subject: [SCTP]: Update ASCONF processing to conform to spec.

The processing of the ASCONF chunks has changed a lot in the
spec.  New items are:
    1. A list of ASCONF-ACK chunks is now cached
    2. The source of the packet is used in response.
    3. New handling for unexpect ASCONF chunks.

Signed-off-by: Vlad Yasevich <vladislav.yasevich@hp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sctp/associola.c     | 58 +++++++++++++++++++++++++++++++++++++++++--
 net/sctp/outqueue.c      | 29 ++++++++++++++++++++--
 net/sctp/sm_make_chunk.c | 12 ++++-----
 net/sctp/sm_statefuns.c  | 64 +++++++++++++++++++++++++++++++-----------------
 4 files changed, 130 insertions(+), 33 deletions(-)

(limited to 'net')

diff --git a/net/sctp/associola.c b/net/sctp/associola.c
index 61bebb9b96e..a016e78061f 100644
--- a/net/sctp/associola.c
+++ b/net/sctp/associola.c
@@ -61,6 +61,7 @@
 
 /* Forward declarations for internal functions. */
 static void sctp_assoc_bh_rcv(struct work_struct *work);
+static void sctp_assoc_free_asconf_acks(struct sctp_association *asoc);
 
 
 /* 1st Level Abstractions. */
@@ -242,6 +243,7 @@ static struct sctp_association *sctp_association_init(struct sctp_association *a
 	asoc->addip_serial = asoc->c.initial_tsn;
 
 	INIT_LIST_HEAD(&asoc->addip_chunk_list);
+	INIT_LIST_HEAD(&asoc->asconf_ack_list);
 
 	/* Make an empty list of remote transport addresses.  */
 	INIT_LIST_HEAD(&asoc->peer.transport_addr_list);
@@ -431,8 +433,7 @@ void sctp_association_free(struct sctp_association *asoc)
 	asoc->peer.transport_count = 0;
 
 	/* Free any cached ASCONF_ACK chunk. */
-	if (asoc->addip_last_asconf_ack)
-		sctp_chunk_free(asoc->addip_last_asconf_ack);
+	sctp_assoc_free_asconf_acks(asoc);
 
 	/* Free any cached ASCONF chunk. */
 	if (asoc->addip_last_asconf)
@@ -1485,3 +1486,56 @@ retry:
 	asoc->assoc_id = (sctp_assoc_t) assoc_id;
 	return error;
 }
+
+/* Free asconf_ack cache */
+static void sctp_assoc_free_asconf_acks(struct sctp_association *asoc)
+{
+	struct sctp_chunk *ack;
+	struct sctp_chunk *tmp;
+
+	list_for_each_entry_safe(ack, tmp, &asoc->asconf_ack_list,
+				transmitted_list) {
+		list_del_init(&ack->transmitted_list);
+		sctp_chunk_free(ack);
+	}
+}
+
+/* Clean up the ASCONF_ACK queue */
+void sctp_assoc_clean_asconf_ack_cache(const struct sctp_association *asoc)
+{
+	struct sctp_chunk *ack;
+	struct sctp_chunk *tmp;
+
+	/* We can remove all the entries from the queue upto
+	 * the "Peer-Sequence-Number".
+	 */
+	list_for_each_entry_safe(ack, tmp, &asoc->asconf_ack_list,
+				transmitted_list) {
+		if (ack->subh.addip_hdr->serial ==
+				htonl(asoc->peer.addip_serial))
+			break;
+
+		list_del_init(&ack->transmitted_list);
+		sctp_chunk_free(ack);
+	}
+}
+
+/* Find the ASCONF_ACK whose serial number matches ASCONF */
+struct sctp_chunk *sctp_assoc_lookup_asconf_ack(
+					const struct sctp_association *asoc,
+					__be32 serial)
+{
+	struct sctp_chunk *ack = NULL;
+
+	/* Walk through the list of cached ASCONF-ACKs and find the
+	 * ack chunk whose serial number matches that of the request.
+	 */
+	list_for_each_entry(ack, &asoc->asconf_ack_list, transmitted_list) {
+		if (ack->subh.addip_hdr->serial == serial) {
+			sctp_chunk_hold(ack);
+			break;
+		}
+	}
+
+	return ack;
+}
diff --git a/net/sctp/outqueue.c b/net/sctp/outqueue.c
index fa76f235169..a42af865c2e 100644
--- a/net/sctp/outqueue.c
+++ b/net/sctp/outqueue.c
@@ -716,7 +716,29 @@ int sctp_outq_flush(struct sctp_outq *q, int rtx_timeout)
 		new_transport = chunk->transport;
 
 		if (!new_transport) {
-			new_transport = asoc->peer.active_path;
+			/*
+			 * If we have a prior transport pointer, see if
+			 * the destination address of the chunk
+			 * matches the destination address of the
+			 * current transport.  If not a match, then
+			 * try to look up the transport with a given
+			 * destination address.  We do this because
+			 * after processing ASCONFs, we may have new
+			 * transports created.
+			 */
+			if (transport &&
+			    sctp_cmp_addr_exact(&chunk->dest,
+						&transport->ipaddr))
+					new_transport = transport;
+			else
+				new_transport = sctp_assoc_lookup_paddr(asoc,
+								&chunk->dest);
+
+			/* if we still don't have a new transport, then
+			 * use the current active path.
+			 */
+			if (!new_transport)
+				new_transport = asoc->peer.active_path;
 		} else if ((new_transport->state == SCTP_INACTIVE) ||
 			   (new_transport->state == SCTP_UNCONFIRMED)) {
 			/* If the chunk is Heartbeat or Heartbeat Ack,
@@ -729,9 +751,12 @@ int sctp_outq_flush(struct sctp_outq *q, int rtx_timeout)
 			 * address of the IP datagram containing the
 			 * HEARTBEAT chunk to which this ack is responding.
 			 * ...
+			 *
+			 * ASCONF_ACKs also must be sent to the source.
 			 */
 			if (chunk->chunk_hdr->type != SCTP_CID_HEARTBEAT &&
-			    chunk->chunk_hdr->type != SCTP_CID_HEARTBEAT_ACK)
+			    chunk->chunk_hdr->type != SCTP_CID_HEARTBEAT_ACK &&
+			    chunk->chunk_hdr->type != SCTP_CID_ASCONF_ACK)
 				new_transport = asoc->peer.active_path;
 		}
 
diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c
index 8138bbd9321..7fd6a6b6861 100644
--- a/net/sctp/sm_make_chunk.c
+++ b/net/sctp/sm_make_chunk.c
@@ -1275,6 +1275,9 @@ nodata:
 /* Release the memory occupied by a chunk.  */
 static void sctp_chunk_destroy(struct sctp_chunk *chunk)
 {
+	BUG_ON(!list_empty(&chunk->list));
+	list_del_init(&chunk->transmitted_list);
+
 	/* Free the chunk skb data and the SCTP_chunk stub itself. */
 	dev_kfree_skb(chunk->skb);
 
@@ -1285,9 +1288,6 @@ static void sctp_chunk_destroy(struct sctp_chunk *chunk)
 /* Possibly, free the chunk.  */
 void sctp_chunk_free(struct sctp_chunk *chunk)
 {
-	BUG_ON(!list_empty(&chunk->list));
-	list_del_init(&chunk->transmitted_list);
-
 	/* Release our reference on the message tracker. */
 	if (chunk->msg)
 		sctp_datamsg_put(chunk->msg);
@@ -2980,11 +2980,9 @@ done:
 	 * after freeing the reference to old asconf ack if any.
 	 */
 	if (asconf_ack) {
-		if (asoc->addip_last_asconf_ack)
-			sctp_chunk_free(asoc->addip_last_asconf_ack);
-
 		sctp_chunk_hold(asconf_ack);
-		asoc->addip_last_asconf_ack = asconf_ack;
+		list_add_tail(&asconf_ack->transmitted_list,
+			      &asoc->asconf_ack_list);
 	}
 
 	return asconf_ack;
diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c
index b6aaa7e97d8..a1be9d93f1a 100644
--- a/net/sctp/sm_statefuns.c
+++ b/net/sctp/sm_statefuns.c
@@ -3402,48 +3402,68 @@ sctp_disposition_t sctp_sf_do_asconf(const struct sctp_endpoint *ep,
 
 	/* Verify the ASCONF chunk before processing it. */
 	if (!sctp_verify_asconf(asoc,
-	    (sctp_paramhdr_t *)((void *)addr_param + length),
-	    (void *)chunk->chunk_end,
-	    &err_param))
+			    (sctp_paramhdr_t *)((void *)addr_param + length),
+			    (void *)chunk->chunk_end,
+			    &err_param))
 		return sctp_sf_violation_paramlen(ep, asoc, type,
-			   (void *)&err_param, commands);
+						  (void *)&err_param, commands);
 
-	/* ADDIP 4.2 C1) Compare the value of the serial number to the value
+	/* ADDIP 5.2 E1) Compare the value of the serial number to the value
 	 * the endpoint stored in a new association variable
 	 * 'Peer-Serial-Number'.
 	 */
 	if (serial == asoc->peer.addip_serial + 1) {
-		/* ADDIP 4.2 C2) If the value found in the serial number is
-		 * equal to the ('Peer-Serial-Number' + 1), the endpoint MUST
-		 * do V1-V5.
+		/* If this is the first instance of ASCONF in the packet,
+		 * we can clean our old ASCONF-ACKs.
+		 */
+		if (!chunk->has_asconf)
+			sctp_assoc_clean_asconf_ack_cache(asoc);
+
+		/* ADDIP 5.2 E4) When the Sequence Number matches the next one
+		 * expected, process the ASCONF as described below and after
+		 * processing the ASCONF Chunk, append an ASCONF-ACK Chunk to
+		 * the response packet and cache a copy of it (in the event it
+		 * later needs to be retransmitted).
+		 *
+		 * Essentially, do V1-V5.
 		 */
 		asconf_ack = sctp_process_asconf((struct sctp_association *)
 						 asoc, chunk);
 		if (!asconf_ack)
 			return SCTP_DISPOSITION_NOMEM;
-	} else if (serial == asoc->peer.addip_serial) {
-		/* ADDIP 4.2 C3) If the value found in the serial number is
-		 * equal to the value stored in the 'Peer-Serial-Number'
-		 * IMPLEMENTATION NOTE: As an optimization a receiver may wish
-		 * to save the last ASCONF-ACK for some predetermined period of
-		 * time and instead of re-processing the ASCONF (with the same
-		 * serial number) it may just re-transmit the ASCONF-ACK.
+	} else if (serial < asoc->peer.addip_serial + 1) {
+		/* ADDIP 5.2 E2)
+		 * If the value found in the Sequence Number is less than the
+		 * ('Peer- Sequence-Number' + 1), simply skip to the next
+		 * ASCONF, and include in the outbound response packet
+		 * any previously cached ASCONF-ACK response that was
+		 * sent and saved that matches the Sequence Number of the
+		 * ASCONF.  Note: It is possible that no cached ASCONF-ACK
+		 * Chunk exists.  This will occur when an older ASCONF
+		 * arrives out of order.  In such a case, the receiver
+		 * should skip the ASCONF Chunk and not include ASCONF-ACK
+		 * Chunk for that chunk.
 		 */
-		if (asoc->addip_last_asconf_ack)
-			asconf_ack = asoc->addip_last_asconf_ack;
-		else
+		asconf_ack = sctp_assoc_lookup_asconf_ack(asoc, hdr->serial);
+		if (!asconf_ack)
 			return SCTP_DISPOSITION_DISCARD;
 	} else {
-		/* ADDIP 4.2 C4) Otherwise, the ASCONF Chunk is discarded since
+		/* ADDIP 5.2 E5) Otherwise, the ASCONF Chunk is discarded since
 		 * it must be either a stale packet or from an attacker.
 		 */
 		return SCTP_DISPOSITION_DISCARD;
 	}
 
-	/* ADDIP 4.2 C5) In both cases C2 and C3 the ASCONF-ACK MUST be sent
-	 * back to the source address contained in the IP header of the ASCONF
-	 * being responded to.
+	/* ADDIP 5.2 E6)  The destination address of the SCTP packet
+	 * containing the ASCONF-ACK Chunks MUST be the source address of
+	 * the SCTP packet that held the ASCONF Chunks.
+	 *
+	 * To do this properly, we'll set the destination address of the chunk
+	 * and at the transmit time, will try look up the transport to use.
+	 * Since ASCONFs may be bundled, the correct transport may not be
+	 * created untill we process the entire packet, thus this workaround.
 	 */
+	asconf_ack->dest = chunk->source;
 	sctp_add_cmd_sf(commands, SCTP_CMD_REPLY, SCTP_CHUNK(asconf_ack));
 
 	return SCTP_DISPOSITION_CONSUME;
-- 
cgit v1.2.3


From f57d96b2e92d209ab3991bba9a44e0d6ef7614a8 Mon Sep 17 00:00:00 2001
From: Vlad Yasevich <vladislav.yasevich@hp.com>
Date: Thu, 20 Dec 2007 14:12:24 -0800
Subject: [SCTP]: Change use_as_src into a full address state

Signed-off-by: Vlad Yasevich <vladislav.yasevich@hp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sctp/bind_addr.c     | 9 +++++----
 net/sctp/ipv6.c          | 2 +-
 net/sctp/protocol.c      | 8 ++++----
 net/sctp/sm_make_chunk.c | 6 +++---
 net/sctp/socket.c        | 8 ++++----
 5 files changed, 17 insertions(+), 16 deletions(-)

(limited to 'net')

diff --git a/net/sctp/bind_addr.c b/net/sctp/bind_addr.c
index 6a7d01091f0..43266117478 100644
--- a/net/sctp/bind_addr.c
+++ b/net/sctp/bind_addr.c
@@ -171,7 +171,7 @@ void sctp_bind_addr_free(struct sctp_bind_addr *bp)
 
 /* Add an address to the bind address list in the SCTP_bind_addr structure. */
 int sctp_add_bind_addr(struct sctp_bind_addr *bp, union sctp_addr *new,
-		       __u8 use_as_src, gfp_t gfp)
+		       __u8 addr_state, gfp_t gfp)
 {
 	struct sctp_sockaddr_entry *addr;
 
@@ -188,7 +188,7 @@ int sctp_add_bind_addr(struct sctp_bind_addr *bp, union sctp_addr *new,
 	if (!addr->a.v4.sin_port)
 		addr->a.v4.sin_port = htons(bp->port);
 
-	addr->use_as_src = use_as_src;
+	addr->state = addr_state;
 	addr->valid = 1;
 
 	INIT_LIST_HEAD(&addr->list);
@@ -312,7 +312,7 @@ int sctp_raw_to_bind_addrs(struct sctp_bind_addr *bp, __u8 *raw_addr_list,
 		}
 
 		af->from_addr_param(&addr, rawaddr, htons(port), 0);
-		retval = sctp_add_bind_addr(bp, &addr, 1, gfp);
+		retval = sctp_add_bind_addr(bp, &addr, SCTP_ADDR_SRC, gfp);
 		if (retval) {
 			/* Can't finish building the list, clean up. */
 			sctp_bind_addr_clean(bp);
@@ -411,7 +411,8 @@ static int sctp_copy_one_addr(struct sctp_bind_addr *dest,
 		    (((AF_INET6 == addr->sa.sa_family) &&
 		      (flags & SCTP_ADDR6_ALLOWED) &&
 		      (flags & SCTP_ADDR6_PEERSUPP))))
-			error = sctp_add_bind_addr(dest, addr, 1, gfp);
+			error = sctp_add_bind_addr(dest, addr, SCTP_ADDR_SRC,
+						    gfp);
 	}
 
 	return error;
diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c
index 7f31ff638bc..bd04aed673c 100644
--- a/net/sctp/ipv6.c
+++ b/net/sctp/ipv6.c
@@ -330,7 +330,7 @@ static void sctp_v6_get_saddr(struct sctp_association *asoc,
 	list_for_each_entry_rcu(laddr, &bp->address_list, list) {
 		if (!laddr->valid)
 			continue;
-		if ((laddr->use_as_src) &&
+		if ((laddr->state == SCTP_ADDR_SRC) &&
 		    (laddr->a.sa.sa_family == AF_INET6) &&
 		    (scope <= sctp_scope(&laddr->a))) {
 			bmatchlen = sctp_v6_addr_match_len(daddr, &laddr->a);
diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c
index dc22d710849..e466e00b9a9 100644
--- a/net/sctp/protocol.c
+++ b/net/sctp/protocol.c
@@ -229,8 +229,8 @@ int sctp_copy_local_addr_list(struct sctp_bind_addr *bp, sctp_scope_t scope,
 			    (((AF_INET6 == addr->a.sa.sa_family) &&
 			      (copy_flags & SCTP_ADDR6_ALLOWED) &&
 			      (copy_flags & SCTP_ADDR6_PEERSUPP)))) {
-				error = sctp_add_bind_addr(bp, &addr->a, 1,
-						    GFP_ATOMIC);
+				error = sctp_add_bind_addr(bp, &addr->a,
+						    SCTP_ADDR_SRC, GFP_ATOMIC);
 				if (error)
 					goto end_copy;
 			}
@@ -472,7 +472,7 @@ static struct dst_entry *sctp_v4_get_dst(struct sctp_association *asoc,
 		 */
 		rcu_read_lock();
 		list_for_each_entry_rcu(laddr, &bp->address_list, list) {
-			if (!laddr->valid || !laddr->use_as_src)
+			if (!laddr->valid || (laddr->state != SCTP_ADDR_SRC))
 				continue;
 			sctp_v4_dst_saddr(&dst_saddr, dst, htons(bp->port));
 			if (sctp_v4_cmp_addr(&dst_saddr, &laddr->a))
@@ -494,7 +494,7 @@ static struct dst_entry *sctp_v4_get_dst(struct sctp_association *asoc,
 	list_for_each_entry_rcu(laddr, &bp->address_list, list) {
 		if (!laddr->valid)
 			continue;
-		if ((laddr->use_as_src) &&
+		if ((laddr->state == SCTP_ADDR_SRC) &&
 		    (AF_INET == laddr->a.sa.sa_family)) {
 			fl.fl4_src = laddr->a.v4.sin_addr.s_addr;
 			if (!ip_route_output_key(&rt, &fl)) {
diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c
index 7fd6a6b6861..46f54188f00 100644
--- a/net/sctp/sm_make_chunk.c
+++ b/net/sctp/sm_make_chunk.c
@@ -1692,8 +1692,8 @@ no_hmac:
 
 	/* Also, add the destination address. */
 	if (list_empty(&retval->base.bind_addr.address_list)) {
-		sctp_add_bind_addr(&retval->base.bind_addr, &chunk->dest, 1,
-				GFP_ATOMIC);
+		sctp_add_bind_addr(&retval->base.bind_addr, &chunk->dest,
+				SCTP_ADDR_SRC, GFP_ATOMIC);
 	}
 
 	retval->next_tsn = retval->c.initial_tsn;
@@ -3016,7 +3016,7 @@ static int sctp_asconf_param_success(struct sctp_association *asoc,
 		local_bh_disable();
 		list_for_each_entry(saddr, &bp->address_list, list) {
 			if (sctp_cmp_addr_exact(&saddr->a, &addr))
-				saddr->use_as_src = 1;
+				saddr->state = SCTP_ADDR_SRC;
 		}
 		local_bh_enable();
 		break;
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index dc2f9221f09..7a8650f01d0 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -390,7 +390,7 @@ SCTP_STATIC int sctp_do_bind(struct sock *sk, union sctp_addr *addr, int len)
 	/* Add the address to the bind address list.
 	 * Use GFP_ATOMIC since BHs will be disabled.
 	 */
-	ret = sctp_add_bind_addr(bp, addr, 1, GFP_ATOMIC);
+	ret = sctp_add_bind_addr(bp, addr, SCTP_ADDR_SRC, GFP_ATOMIC);
 
 	/* Copy back into socket for getsockname() use. */
 	if (!ret) {
@@ -585,8 +585,8 @@ static int sctp_send_asconf_add_ip(struct sock		*sk,
 			addr = (union sctp_addr *)addr_buf;
 			af = sctp_get_af_specific(addr->v4.sin_family);
 			memcpy(&saveaddr, addr, af->sockaddr_len);
-			retval = sctp_add_bind_addr(bp, &saveaddr, 0,
-						    GFP_ATOMIC);
+			retval = sctp_add_bind_addr(bp, &saveaddr,
+						    SCTP_ADDR_NEW, GFP_ATOMIC);
 			addr_buf += af->sockaddr_len;
 		}
 	}
@@ -777,7 +777,7 @@ static int sctp_send_asconf_del_ip(struct sock		*sk,
 			af = sctp_get_af_specific(laddr->v4.sin_family);
 			list_for_each_entry(saddr, &bp->address_list, list) {
 				if (sctp_cmp_addr_exact(&saddr->a, laddr))
-					saddr->use_as_src = 0;
+					saddr->state = SCTP_ADDR_DEL;
 			}
 			addr_buf += af->sockaddr_len;
 		}
-- 
cgit v1.2.3


From 75205f478331cc64ce729ea72d3c8c1837fb59cb Mon Sep 17 00:00:00 2001
From: Vlad Yasevich <vladislav.yasevich@hp.com>
Date: Thu, 20 Dec 2007 14:12:59 -0800
Subject: [SCTP]: Implement ADD-IP special case processing for ABORT chunk

ADD-IP spec has a special case for processing ABORTs:
    F4) ... One special consideration is that ABORT
        Chunks arriving destined to the IP address being deleted MUST be
        ignored (see Section 5.3.1 for further details).

Check if the address we received on is in the DEL state, and if
so, ignore the ABORT.

Signed-off-by: Vlad Yasevich <vladislav.yasevich@hp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sctp/bind_addr.c    | 26 +++++++++++++++++++++++++
 net/sctp/sm_statefuns.c | 52 +++++++++++++++++++++++++++++++++++++++++++++----
 2 files changed, 74 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/sctp/bind_addr.c b/net/sctp/bind_addr.c
index 43266117478..13fbfb449a5 100644
--- a/net/sctp/bind_addr.c
+++ b/net/sctp/bind_addr.c
@@ -353,6 +353,32 @@ int sctp_bind_addr_match(struct sctp_bind_addr *bp,
 	return match;
 }
 
+/* Get the state of the entry in the bind_addr_list */
+int sctp_bind_addr_state(const struct sctp_bind_addr *bp,
+			 const union sctp_addr *addr)
+{
+	struct sctp_sockaddr_entry *laddr;
+	struct sctp_af *af;
+	int state = -1;
+
+	af = sctp_get_af_specific(addr->sa.sa_family);
+	if (unlikely(!af))
+		return state;
+
+	rcu_read_lock();
+	list_for_each_entry_rcu(laddr, &bp->address_list, list) {
+		if (!laddr->valid)
+			continue;
+		if (af->cmp_addr(&laddr->a, addr)) {
+			state = laddr->state;
+			break;
+		}
+	}
+	rcu_read_unlock();
+
+	return state;
+}
+
 /* Find the first address in the bind address list that is not present in
  * the addrs packed array.
  */
diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c
index a1be9d93f1a..0c9f37eb7d8 100644
--- a/net/sctp/sm_statefuns.c
+++ b/net/sctp/sm_statefuns.c
@@ -143,6 +143,12 @@ static sctp_ierror_t sctp_sf_authenticate(const struct sctp_endpoint *ep,
 				    const sctp_subtype_t type,
 				    struct sctp_chunk *chunk);
 
+static sctp_disposition_t __sctp_sf_do_9_1_abort(const struct sctp_endpoint *ep,
+					const struct sctp_association *asoc,
+					const sctp_subtype_t type,
+					void *arg,
+					sctp_cmd_seq_t *commands);
+
 /* Small helper function that checks if the chunk length
  * is of the appropriate length.  The 'required_length' argument
  * is set to be the size of a specific chunk we are testing.
@@ -2073,11 +2079,20 @@ sctp_disposition_t sctp_sf_shutdown_pending_abort(
 	if (!sctp_chunk_length_valid(chunk, sizeof(sctp_abort_chunk_t)))
 		return sctp_sf_pdiscard(ep, asoc, type, arg, commands);
 
+	/* ADD-IP: Special case for ABORT chunks
+	 * F4)  One special consideration is that ABORT Chunks arriving
+	 * destined to the IP address being deleted MUST be
+	 * ignored (see Section 5.3.1 for further details).
+	 */
+	if (SCTP_ADDR_DEL ==
+		    sctp_bind_addr_state(&asoc->base.bind_addr, &chunk->dest))
+		return sctp_sf_discard_chunk(ep, asoc, type, arg, commands);
+
 	/* Stop the T5-shutdown guard timer.  */
 	sctp_add_cmd_sf(commands, SCTP_CMD_TIMER_STOP,
 			SCTP_TO(SCTP_EVENT_TIMEOUT_T5_SHUTDOWN_GUARD));
 
-	return sctp_sf_do_9_1_abort(ep, asoc, type, arg, commands);
+	return __sctp_sf_do_9_1_abort(ep, asoc, type, arg, commands);
 }
 
 /*
@@ -2109,6 +2124,15 @@ sctp_disposition_t sctp_sf_shutdown_sent_abort(const struct sctp_endpoint *ep,
 	if (!sctp_chunk_length_valid(chunk, sizeof(sctp_abort_chunk_t)))
 		return sctp_sf_pdiscard(ep, asoc, type, arg, commands);
 
+	/* ADD-IP: Special case for ABORT chunks
+	 * F4)  One special consideration is that ABORT Chunks arriving
+	 * destined to the IP address being deleted MUST be
+	 * ignored (see Section 5.3.1 for further details).
+	 */
+	if (SCTP_ADDR_DEL ==
+		    sctp_bind_addr_state(&asoc->base.bind_addr, &chunk->dest))
+		return sctp_sf_discard_chunk(ep, asoc, type, arg, commands);
+
 	/* Stop the T2-shutdown timer. */
 	sctp_add_cmd_sf(commands, SCTP_CMD_TIMER_STOP,
 			SCTP_TO(SCTP_EVENT_TIMEOUT_T2_SHUTDOWN));
@@ -2117,7 +2141,7 @@ sctp_disposition_t sctp_sf_shutdown_sent_abort(const struct sctp_endpoint *ep,
 	sctp_add_cmd_sf(commands, SCTP_CMD_TIMER_STOP,
 			SCTP_TO(SCTP_EVENT_TIMEOUT_T5_SHUTDOWN_GUARD));
 
-	return sctp_sf_do_9_1_abort(ep, asoc, type, arg, commands);
+	return __sctp_sf_do_9_1_abort(ep, asoc, type, arg, commands);
 }
 
 /*
@@ -2344,8 +2368,6 @@ sctp_disposition_t sctp_sf_do_9_1_abort(const struct sctp_endpoint *ep,
 					sctp_cmd_seq_t *commands)
 {
 	struct sctp_chunk *chunk = arg;
-	unsigned len;
-	__be16 error = SCTP_ERROR_NO_ERROR;
 
 	if (!sctp_vtag_verify_either(chunk, asoc))
 		return sctp_sf_pdiscard(ep, asoc, type, arg, commands);
@@ -2363,6 +2385,28 @@ sctp_disposition_t sctp_sf_do_9_1_abort(const struct sctp_endpoint *ep,
 	if (!sctp_chunk_length_valid(chunk, sizeof(sctp_abort_chunk_t)))
 		return sctp_sf_pdiscard(ep, asoc, type, arg, commands);
 
+	/* ADD-IP: Special case for ABORT chunks
+	 * F4)  One special consideration is that ABORT Chunks arriving
+	 * destined to the IP address being deleted MUST be
+	 * ignored (see Section 5.3.1 for further details).
+	 */
+	if (SCTP_ADDR_DEL ==
+		    sctp_bind_addr_state(&asoc->base.bind_addr, &chunk->dest))
+		return sctp_sf_discard_chunk(ep, asoc, type, arg, commands);
+
+	return __sctp_sf_do_9_1_abort(ep, asoc, type, arg, commands);
+}
+
+static sctp_disposition_t __sctp_sf_do_9_1_abort(const struct sctp_endpoint *ep,
+					const struct sctp_association *asoc,
+					const sctp_subtype_t type,
+					void *arg,
+					sctp_cmd_seq_t *commands)
+{
+	struct sctp_chunk *chunk = arg;
+	unsigned len;
+	__be16 error = SCTP_ERROR_NO_ERROR;
+
 	/* See if we have an error cause code in the chunk.  */
 	len = ntohs(chunk->chunk_hdr->length);
 	if (len >= sizeof(struct sctp_chunkhdr) + sizeof(struct sctp_errhdr))
-- 
cgit v1.2.3


From d6701191329b51793bc56724548f0863d2149c29 Mon Sep 17 00:00:00 2001
From: Vlad Yasevich <vladislav.yasevich@hp.com>
Date: Thu, 20 Dec 2007 14:13:31 -0800
Subject: [SCTP]: Follow Add-IP security consideratiosn wrt INIT/INIT-ACK

The Security Considerations section of RFC 5061 has the following
text:

   If an SCTP endpoint that supports this extension receives an INIT
   that indicates that the peer supports the ASCONF extension but does
   NOT support the [RFC4895] extension, the receiver of such an INIT
   MUST send an ABORT in response.  Note that an implementation is
   allowed to silently discard such an INIT as an option as well, but
   under NO circumstance is an implementation allowed to proceed with
   the association setup by sending an INIT-ACK in response.

   An implementation that receives an INIT-ACK that indicates that the
   peer does not support the [RFC4895] extension MUST NOT send the
   COOKIE-ECHO to establish the association.  Instead, the
   implementation MUST discard the INIT-ACK and report to the upper-
   layer user that an association cannot be established destroying the
   Transmission Control Block (TCB).

Follow the recomendations.

Signed-off-by: Vlad Yasevich <vladislav.yasevich@hp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sctp/sm_make_chunk.c | 47 +++++++++++++++++++++++++++++++++++++++++++----
 net/sctp/sm_statefuns.c  |  7 +++----
 2 files changed, 46 insertions(+), 8 deletions(-)

(limited to 'net')

diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c
index 46f54188f00..dd98763c8b0 100644
--- a/net/sctp/sm_make_chunk.c
+++ b/net/sctp/sm_make_chunk.c
@@ -1836,6 +1836,39 @@ static int sctp_process_hn_param(const struct sctp_association *asoc,
 	return 0;
 }
 
+static int sctp_verify_ext_param(union sctp_params param)
+{
+	__u16 num_ext = ntohs(param.p->length) - sizeof(sctp_paramhdr_t);
+	int have_auth = 0;
+	int have_asconf = 0;
+	int i;
+
+	for (i = 0; i < num_ext; i++) {
+		switch (param.ext->chunks[i]) {
+		    case SCTP_CID_AUTH:
+			    have_auth = 1;
+			    break;
+		    case SCTP_CID_ASCONF:
+		    case SCTP_CID_ASCONF_ACK:
+			    have_asconf = 1;
+			    break;
+		}
+	}
+
+	/* ADD-IP Security: The draft requires us to ABORT or ignore the
+	 * INIT/INIT-ACK if ADD-IP is listed, but AUTH is not.  Do this
+	 * only if ADD-IP is turned on and we are not backward-compatible
+	 * mode.
+	 */
+	if (sctp_addip_noauth)
+		return 1;
+
+	if (sctp_addip_enable && !have_auth && have_asconf)
+		return 0;
+
+	return 1;
+}
+
 static void sctp_process_ext_param(struct sctp_association *asoc,
 				    union sctp_params param)
 {
@@ -1966,7 +1999,11 @@ static sctp_ierror_t sctp_verify_param(const struct sctp_association *asoc,
 	case SCTP_PARAM_UNRECOGNIZED_PARAMETERS:
 	case SCTP_PARAM_ECN_CAPABLE:
 	case SCTP_PARAM_ADAPTATION_LAYER_IND:
+		break;
+
 	case SCTP_PARAM_SUPPORTED_EXT:
+		if (!sctp_verify_ext_param(param))
+			return SCTP_IERROR_ABORT;
 		break;
 
 	case SCTP_PARAM_SET_PRIMARY:
@@ -2139,10 +2176,11 @@ int sctp_process_init(struct sctp_association *asoc, sctp_cid_t cid,
 					!asoc->peer.peer_hmacs))
 		asoc->peer.auth_capable = 0;
 
-
-	/* If the peer claims support for ADD-IP without support
-	 * for AUTH, disable support for ADD-IP.
-	 * Do this only if backward compatible mode is turned off.
+	/* In a non-backward compatible mode, if the peer claims
+	 * support for ADD-IP but not AUTH,  the ADD-IP spec states
+	 * that we MUST ABORT the association. Section 6.  The section
+	 * also give us an option to silently ignore the packet, which
+	 * is what we'll do here.
 	 */
 	if (!sctp_addip_noauth &&
 	     (asoc->peer.asconf_capable && !asoc->peer.auth_capable)) {
@@ -2150,6 +2188,7 @@ int sctp_process_init(struct sctp_association *asoc, sctp_cid_t cid,
 						  SCTP_PARAM_DEL_IP |
 						  SCTP_PARAM_SET_PRIMARY);
 		asoc->peer.asconf_capable = 0;
+		goto clean_up;
 	}
 
 	/* Walk list of transports, removing transports in the UNKNOWN state. */
diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c
index 0c9f37eb7d8..511d8c9a171 100644
--- a/net/sctp/sm_statefuns.c
+++ b/net/sctp/sm_statefuns.c
@@ -507,7 +507,9 @@ sctp_disposition_t sctp_sf_do_5_1C_ack(const struct sctp_endpoint *ep,
 			      &err_chunk)) {
 
 		/* This chunk contains fatal error. It is to be discarded.
-		 * Send an ABORT, with causes if there is any.
+		 * Send an ABORT, with causes.  If there are no causes,
+		 * then there wasn't enough memory.  Just terminate
+		 * the association.
 		 */
 		if (err_chunk) {
 			packet = sctp_abort_pkt_new(ep, asoc, arg,
@@ -526,9 +528,6 @@ sctp_disposition_t sctp_sf_do_5_1C_ack(const struct sctp_endpoint *ep,
 			} else {
 				error = SCTP_ERROR_NO_RESOURCE;
 			}
-		} else {
-			sctp_sf_tabort_8_4_8(ep, asoc, type, arg, commands);
-			error = SCTP_ERROR_INV_PARAM;
 		}
 
 		/* SCTP-AUTH, Section 6.3:
-- 
cgit v1.2.3


From 152da81deb9a4870aeac352336184b2b14d4b2ba Mon Sep 17 00:00:00 2001
From: Pavel Emelyanov <xemul@openvz.org>
Date: Thu, 20 Dec 2007 15:31:33 -0800
Subject: [INET]: Uninline the __inet_hash function.

This one is used in quite many places in the networking code and
seems to big to be inline.

After the patch net/ipv4/build-in.o loses ~650 bytes:
add/remove: 2/0 grow/shrink: 0/5 up/down: 461/-1114 (-653)
function                                     old     new   delta
__inet_hash_nolisten                           -     282    +282
__inet_hash                                    -     179    +179
tcp_sacktag_write_queue                     2255    2254      -1
__inet_lookup_listener                       284     274     -10
tcp_v4_syn_recv_sock                         755     493    -262
tcp_v4_hash                                  389      35    -354
inet_hash_connect                           1086     599    -487

This version addresses the issue pointed by Eric, that
while being inline this function was optimized by gcc
in respect to the 'listen_possible' argument.

Signed-off-by: Pavel Emelyanov <xemul@openvz.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/dccp/ipv4.c            |  2 +-
 net/ipv4/inet_hashtables.c | 46 ++++++++++++++++++++++++++++++++++++++++++++--
 net/ipv4/tcp_ipv4.c        |  2 +-
 3 files changed, 46 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c
index 02fc91ce250..f450df2fc86 100644
--- a/net/dccp/ipv4.c
+++ b/net/dccp/ipv4.c
@@ -408,7 +408,7 @@ struct sock *dccp_v4_request_recv_sock(struct sock *sk, struct sk_buff *skb,
 
 	dccp_sync_mss(newsk, dst_mtu(dst));
 
-	__inet_hash(&dccp_hashinfo, newsk, 0);
+	__inet_hash_nolisten(&dccp_hashinfo, newsk);
 	__inet_inherit_port(&dccp_hashinfo, sk, newsk);
 
 	return newsk;
diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c
index 67704da04fc..8dfd5a691e5 100644
--- a/net/ipv4/inet_hashtables.c
+++ b/net/ipv4/inet_hashtables.c
@@ -267,6 +267,48 @@ static inline u32 inet_sk_port_offset(const struct sock *sk)
 					  inet->dport);
 }
 
+void __inet_hash_nolisten(struct inet_hashinfo *hashinfo, struct sock *sk)
+{
+	struct hlist_head *list;
+	rwlock_t *lock;
+	struct inet_ehash_bucket *head;
+
+	BUG_TRAP(sk_unhashed(sk));
+
+	sk->sk_hash = inet_sk_ehashfn(sk);
+	head = inet_ehash_bucket(hashinfo, sk->sk_hash);
+	list = &head->chain;
+	lock = inet_ehash_lockp(hashinfo, sk->sk_hash);
+
+	write_lock(lock);
+	__sk_add_node(sk, list);
+	sock_prot_inc_use(sk->sk_prot);
+	write_unlock(lock);
+}
+EXPORT_SYMBOL_GPL(__inet_hash_nolisten);
+
+void __inet_hash(struct inet_hashinfo *hashinfo, struct sock *sk)
+{
+	struct hlist_head *list;
+	rwlock_t *lock;
+
+	if (sk->sk_state != TCP_LISTEN) {
+		__inet_hash_nolisten(hashinfo, sk);
+		return;
+	}
+
+	BUG_TRAP(sk_unhashed(sk));
+	list = &hashinfo->listening_hash[inet_sk_listen_hashfn(sk)];
+	lock = &hashinfo->lhash_lock;
+
+	inet_listen_wlock(hashinfo);
+	__sk_add_node(sk, list);
+	sock_prot_inc_use(sk->sk_prot);
+	write_unlock(lock);
+	wake_up(&hashinfo->lhash_wait);
+}
+EXPORT_SYMBOL_GPL(__inet_hash);
+
 /*
  * Bind a port for a connect operation and hash it.
  */
@@ -334,7 +376,7 @@ ok:
 		inet_bind_hash(sk, tb, port);
 		if (sk_unhashed(sk)) {
 			inet_sk(sk)->sport = htons(port);
-			__inet_hash(hinfo, sk, 0);
+			__inet_hash_nolisten(hinfo, sk);
 		}
 		spin_unlock(&head->lock);
 
@@ -351,7 +393,7 @@ ok:
 	tb  = inet_csk(sk)->icsk_bind_hash;
 	spin_lock_bh(&head->lock);
 	if (sk_head(&tb->owners) == sk && !sk->sk_bind_node.next) {
-		__inet_hash(hinfo, sk, 0);
+		__inet_hash_nolisten(hinfo, sk);
 		spin_unlock_bh(&head->lock);
 		return 0;
 	} else {
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 652c32368cc..fc9bdd8b7dc 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -1478,7 +1478,7 @@ struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
 	}
 #endif
 
-	__inet_hash(&tcp_hashinfo, newsk, 0);
+	__inet_hash_nolisten(&tcp_hashinfo, newsk);
 	__inet_inherit_port(&tcp_hashinfo, sk, newsk);
 
 	return newsk;
-- 
cgit v1.2.3


From 77a5ba55dab7b4ece12f37c717022819e3f77b44 Mon Sep 17 00:00:00 2001
From: Pavel Emelyanov <xemul@openvz.org>
Date: Thu, 20 Dec 2007 15:32:17 -0800
Subject: [INET]: Uninline the __inet_lookup_established function.

This is -700 bytes from the net/ipv4/built-in.o

add/remove: 1/0 grow/shrink: 1/3 up/down: 340/-1040 (-700)
function                                     old     new   delta
__inet_lookup_established                      -     339    +339
tcp_sacktag_write_queue                     2254    2255      +1
tcp_v4_err                                  1304     973    -331
tcp_v4_rcv                                  2089    1744    -345
tcp_v4_do_rcv                                826     462    -364

Exporting is for dccp module (used via e.g. inet_lookup).

Signed-off-by: Pavel Emelyanov <xemul@openvz.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/inet_hashtables.c | 38 ++++++++++++++++++++++++++++++++++++++
 1 file changed, 38 insertions(+)

(limited to 'net')

diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c
index 8dfd5a691e5..2e5814a8436 100644
--- a/net/ipv4/inet_hashtables.c
+++ b/net/ipv4/inet_hashtables.c
@@ -190,6 +190,44 @@ sherry_cache:
 }
 EXPORT_SYMBOL_GPL(__inet_lookup_listener);
 
+struct sock * __inet_lookup_established(struct inet_hashinfo *hashinfo,
+				  const __be32 saddr, const __be16 sport,
+				  const __be32 daddr, const u16 hnum,
+				  const int dif)
+{
+	INET_ADDR_COOKIE(acookie, saddr, daddr)
+	const __portpair ports = INET_COMBINED_PORTS(sport, hnum);
+	struct sock *sk;
+	const struct hlist_node *node;
+	/* Optimize here for direct hit, only listening connections can
+	 * have wildcards anyways.
+	 */
+	unsigned int hash = inet_ehashfn(daddr, hnum, saddr, sport);
+	struct inet_ehash_bucket *head = inet_ehash_bucket(hashinfo, hash);
+	rwlock_t *lock = inet_ehash_lockp(hashinfo, hash);
+
+	prefetch(head->chain.first);
+	read_lock(lock);
+	sk_for_each(sk, node, &head->chain) {
+		if (INET_MATCH(sk, hash, acookie, saddr, daddr, ports, dif))
+			goto hit; /* You sunk my battleship! */
+	}
+
+	/* Must check for a TIME_WAIT'er before going to listener hash. */
+	sk_for_each(sk, node, &head->twchain) {
+		if (INET_TW_MATCH(sk, hash, acookie, saddr, daddr, ports, dif))
+			goto hit;
+	}
+	sk = NULL;
+out:
+	read_unlock(lock);
+	return sk;
+hit:
+	sock_hold(sk);
+	goto out;
+}
+EXPORT_SYMBOL_GPL(__inet_lookup_established);
+
 /* called with local bh disabled */
 static int __inet_check_established(struct inet_timewait_death_row *death_row,
 				    struct sock *sk, __u16 lport,
-- 
cgit v1.2.3


From 7054fb9376e111d0edc06efcedbac6930a6caf76 Mon Sep 17 00:00:00 2001
From: Pavel Emelyanov <xemul@openvz.org>
Date: Thu, 20 Dec 2007 15:32:54 -0800
Subject: [INET]: Uninline the inet_twsk_put function.

This one is not that big, but is widely used: saves 1200 bytes
from net/ipv4/built-in.o

add/remove: 1/0 grow/shrink: 1/12 up/down: 97/-1300 (-1203)
function                                     old     new   delta
inet_twsk_put                                  -      87     +87
__inet_lookup_listener                       274     284     +10
tcp_sacktag_write_queue                     2255    2254      -1
tcp_time_wait                                482     411     -71
__inet_check_established                     796     722     -74
tcp_v4_err                                   973     898     -75
__inet_twsk_kill                             230     154     -76
inet_twsk_deschedule                         180     103     -77
tcp_v4_do_rcv                                462     384     -78
inet_hash_connect                            686     607     -79
inet_twdr_do_twkill_work                     236     150     -86
inet_twdr_twcal_tick                         395     307     -88
tcp_v4_rcv                                  1744    1480    -264
tcp_timewait_state_process                   975     644    -331

Export it for ipv6 module.

Signed-off-by: Pavel Emelyanov <xemul@openvz.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/inet_timewait_sock.c | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

(limited to 'net')

diff --git a/net/ipv4/inet_timewait_sock.c b/net/ipv4/inet_timewait_sock.c
index d43e787031a..1b7db420896 100644
--- a/net/ipv4/inet_timewait_sock.c
+++ b/net/ipv4/inet_timewait_sock.c
@@ -48,6 +48,21 @@ static void __inet_twsk_kill(struct inet_timewait_sock *tw,
 	inet_twsk_put(tw);
 }
 
+void inet_twsk_put(struct inet_timewait_sock *tw)
+{
+	if (atomic_dec_and_test(&tw->tw_refcnt)) {
+		struct module *owner = tw->tw_prot->owner;
+		twsk_destructor((struct sock *)tw);
+#ifdef SOCK_REFCNT_DEBUG
+		printk(KERN_DEBUG "%s timewait_sock %p released\n",
+		       tw->tw_prot->name, tw);
+#endif
+		kmem_cache_free(tw->tw_prot->twsk_prot->twsk_slab, tw);
+		module_put(owner);
+	}
+}
+EXPORT_SYMBOL_GPL(inet_twsk_put);
+
 /*
  * Enter the time wait state. This is called with locally disabled BH.
  * Essentially we whip up a timewait bucket, copy the relevant info into it
-- 
cgit v1.2.3


From a43d8994b959a6daeeadcd1be6d4a9045b7029ac Mon Sep 17 00:00:00 2001
From: Pavel Emelyanov <xemul@openvz.org>
Date: Thu, 20 Dec 2007 15:49:05 -0800
Subject: [NEIGH]: Make neigh_add_timer symmetrical to neigh_del_timer.

The neigh_del_timer() looks sane - it removes the timer and
(conditionally) puts the neighbor. I expected, that the
neigh_add_timer() is symmetrical to the del one - i.e. it
holds the neighbor and arms the timer - but it turned out
that it was not so.

I think, that making them look symmetrical makes the code
more readable.

Signed-off-by: Pavel Emelyanov <xemul@openvz.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/neighbour.c | 25 +++++++++++--------------
 1 file changed, 11 insertions(+), 14 deletions(-)

(limited to 'net')

diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index 4b6dd1e66f1..9a283fcde9a 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -165,6 +165,16 @@ static int neigh_forced_gc(struct neigh_table *tbl)
 	return shrunk;
 }
 
+static void neigh_add_timer(struct neighbour *n, unsigned long when)
+{
+	neigh_hold(n);
+	if (unlikely(mod_timer(&n->timer, when))) {
+		printk("NEIGH: BUG, double timer add, state is %x\n",
+		       n->nud_state);
+		dump_stack();
+	}
+}
+
 static int neigh_del_timer(struct neighbour *n)
 {
 	if ((n->nud_state & NUD_IN_TIMER) &&
@@ -716,15 +726,6 @@ static __inline__ int neigh_max_probes(struct neighbour *n)
 		p->ucast_probes + p->app_probes + p->mcast_probes);
 }
 
-static inline void neigh_add_timer(struct neighbour *n, unsigned long when)
-{
-	if (unlikely(mod_timer(&n->timer, when))) {
-		printk("NEIGH: BUG, double timer add, state is %x\n",
-		       n->nud_state);
-		dump_stack();
-	}
-}
-
 /* Called when a timer expires for a neighbour entry. */
 
 static void neigh_timer_handler(unsigned long arg)
@@ -856,7 +857,6 @@ int __neigh_event_send(struct neighbour *neigh, struct sk_buff *skb)
 			atomic_set(&neigh->probes, neigh->parms->ucast_probes);
 			neigh->nud_state     = NUD_INCOMPLETE;
 			neigh->updated = jiffies;
-			neigh_hold(neigh);
 			neigh_add_timer(neigh, now + 1);
 		} else {
 			neigh->nud_state = NUD_FAILED;
@@ -869,7 +869,6 @@ int __neigh_event_send(struct neighbour *neigh, struct sk_buff *skb)
 		}
 	} else if (neigh->nud_state & NUD_STALE) {
 		NEIGH_PRINTK2("neigh %p is delayed.\n", neigh);
-		neigh_hold(neigh);
 		neigh->nud_state = NUD_DELAY;
 		neigh->updated = jiffies;
 		neigh_add_timer(neigh,
@@ -1013,13 +1012,11 @@ int neigh_update(struct neighbour *neigh, const u8 *lladdr, u8 new,
 
 	if (new != old) {
 		neigh_del_timer(neigh);
-		if (new & NUD_IN_TIMER) {
-			neigh_hold(neigh);
+		if (new & NUD_IN_TIMER)
 			neigh_add_timer(neigh, (jiffies +
 						((new & NUD_REACHABLE) ?
 						 neigh->parms->reachable_time :
 						 0)));
-		}
 		neigh->nud_state = new;
 	}
 
-- 
cgit v1.2.3


From bd515c3e48ececd774eb3128e81b669dbbd32637 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= <ilpo.jarvinen@helsinki.fi>
Date: Thu, 20 Dec 2007 20:36:03 -0800
Subject: [TCP]: Fix TSO deferring
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

I'd say that most of what tcp_tso_should_defer had in between
there was dead code because of this.

Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@helsinki.fi>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp_output.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 9a985b55e7d..7c50271ddc3 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -1230,7 +1230,8 @@ static int tcp_tso_should_defer(struct sock *sk, struct sk_buff *skb)
 		goto send_now;
 
 	/* Defer for less than two clock ticks. */
-	if (!tp->tso_deferred && ((jiffies<<1)>>1) - (tp->tso_deferred>>1) > 1)
+	if (tp->tso_deferred &&
+	    ((jiffies << 1) >> 1) - (tp->tso_deferred >> 1) > 1)
 		goto send_now;
 
 	in_flight = tcp_packets_in_flight(tp);
-- 
cgit v1.2.3


From a1b051405bc16222d92c73b0c26d65b333a154ee Mon Sep 17 00:00:00 2001
From: Masahide NAKAMURA <nakam@linux-ipv6.org>
Date: Thu, 20 Dec 2007 20:41:12 -0800
Subject: [XFRM] IPv6: Fix dst/routing check at transformation.

IPv6 specific thing is wrongly removed from transformation at net-2.6.25.
This patch recovers it with current design.

o Update "path" of xfrm_dst since IPv6 transformation should
  care about routing changes. It is required by MIPv6 and
  off-link destined IPsec.
o Rename nfheader_len which is for non-fragment transformation used by
  MIPv6 to rt6i_nfheader_len as IPv6 name space.

Signed-off-by: Masahide NAKAMURA <nakam@linux-ipv6.org>
Acked-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/xfrm4_policy.c |  7 +++++++
 net/ipv6/ip6_output.c   |  4 ++--
 net/ipv6/xfrm6_policy.c | 17 +++++++++++++++++
 net/xfrm/xfrm_policy.c  | 21 +++++++++++++++++++++
 4 files changed, 47 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c
index 5ccae3a463c..656345f75e0 100644
--- a/net/ipv4/xfrm4_policy.c
+++ b/net/ipv4/xfrm4_policy.c
@@ -84,6 +84,12 @@ static int xfrm4_get_tos(struct flowi *fl)
 	return fl->fl4_tos;
 }
 
+static int xfrm4_init_path(struct xfrm_dst *path, struct dst_entry *dst,
+			   int nfheader_len)
+{
+	return 0;
+}
+
 static int xfrm4_fill_dst(struct xfrm_dst *xdst, struct net_device *dev)
 {
 	struct rtable *rt = (struct rtable *)xdst->route;
@@ -251,6 +257,7 @@ static struct xfrm_policy_afinfo xfrm4_policy_afinfo = {
 	.find_bundle = 		__xfrm4_find_bundle,
 	.decode_session =	_decode_session4,
 	.get_tos =		xfrm4_get_tos,
+	.init_path =		xfrm4_init_path,
 	.fill_dst =		xfrm4_fill_dst,
 };
 
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index d54da616e3a..4686646058d 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -1126,7 +1126,7 @@ int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to,
 		sk->sk_sndmsg_page = NULL;
 		sk->sk_sndmsg_off = 0;
 		exthdrlen = rt->u.dst.header_len + (opt ? opt->opt_flen : 0) -
-			    rt->nfheader_len;
+			    rt->rt6i_nfheader_len;
 		length += exthdrlen;
 		transhdrlen += exthdrlen;
 	} else {
@@ -1141,7 +1141,7 @@ int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to,
 
 	hh_len = LL_RESERVED_SPACE(rt->u.dst.dev);
 
-	fragheaderlen = sizeof(struct ipv6hdr) + rt->nfheader_len +
+	fragheaderlen = sizeof(struct ipv6hdr) + rt->rt6i_nfheader_len +
 			(opt ? opt->opt_nflen : 0);
 	maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen - sizeof(struct frag_hdr);
 
diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c
index d26b7dc3f33..cf373b46a1b 100644
--- a/net/ipv6/xfrm6_policy.c
+++ b/net/ipv6/xfrm6_policy.c
@@ -98,6 +98,20 @@ static int xfrm6_get_tos(struct flowi *fl)
 	return 0;
 }
 
+static int xfrm6_init_path(struct xfrm_dst *path, struct dst_entry *dst,
+			   int nfheader_len)
+{
+	if (dst->ops->family == AF_INET6) {
+		struct rt6_info *rt = (struct rt6_info*)dst;
+		if (rt->rt6i_node)
+			path->path_cookie = rt->rt6i_node->fn_sernum;
+	}
+
+	path->u.rt6.rt6i_nfheader_len = nfheader_len;
+
+	return 0;
+}
+
 static int xfrm6_fill_dst(struct xfrm_dst *xdst, struct net_device *dev)
 {
 	struct rt6_info *rt = (struct rt6_info*)xdst->route;
@@ -115,6 +129,8 @@ static int xfrm6_fill_dst(struct xfrm_dst *xdst, struct net_device *dev)
 						   RTF_LOCAL);
 	xdst->u.rt6.rt6i_metric = rt->rt6i_metric;
 	xdst->u.rt6.rt6i_node = rt->rt6i_node;
+	if (rt->rt6i_node)
+		xdst->route_cookie = rt->rt6i_node->fn_sernum;
 	xdst->u.rt6.rt6i_gateway = rt->rt6i_gateway;
 	xdst->u.rt6.rt6i_dst = rt->rt6i_dst;
 	xdst->u.rt6.rt6i_src = rt->rt6i_src;
@@ -266,6 +282,7 @@ static struct xfrm_policy_afinfo xfrm6_policy_afinfo = {
 	.find_bundle =		__xfrm6_find_bundle,
 	.decode_session =	_decode_session6,
 	.get_tos =		xfrm6_get_tos,
+	.init_path =		xfrm6_init_path,
 	.fill_dst =		xfrm6_fill_dst,
 };
 
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index 8023a3c0dad..521cb6e1256 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -1266,6 +1266,23 @@ static inline struct xfrm_dst *xfrm_alloc_dst(int family)
 	return xdst;
 }
 
+static inline int xfrm_init_path(struct xfrm_dst *path, struct dst_entry *dst,
+				 int nfheader_len)
+{
+	struct xfrm_policy_afinfo *afinfo =
+		xfrm_policy_get_afinfo(dst->ops->family);
+	int err;
+
+	if (!afinfo)
+		return -EINVAL;
+
+	err = afinfo->init_path(path, dst, nfheader_len);
+
+	xfrm_policy_put_afinfo(afinfo);
+
+	return err;
+}
+
 static inline int xfrm_fill_dst(struct xfrm_dst *xdst, struct net_device *dev)
 {
 	struct xfrm_policy_afinfo *afinfo =
@@ -1298,6 +1315,7 @@ static struct dst_entry *xfrm_bundle_create(struct xfrm_policy *policy,
 	int i = 0;
 	int err;
 	int header_len = 0;
+	int nfheader_len = 0;
 	int trailer_len = 0;
 	int tos;
 	int family = policy->selector.family;
@@ -1352,6 +1370,8 @@ static struct dst_entry *xfrm_bundle_create(struct xfrm_policy *policy,
 		dst_prev = dst1;
 
 		header_len += xfrm[i]->props.header_len;
+		if (xfrm[i]->type->flags & XFRM_TYPE_NON_FRAGMENT)
+			nfheader_len += xfrm[i]->props.header_len;
 		trailer_len += xfrm[i]->props.trailer_len;
 	}
 
@@ -1366,6 +1386,7 @@ static struct dst_entry *xfrm_bundle_create(struct xfrm_policy *policy,
 	/* Copy neighbout for reachability confirmation */
 	dst0->neighbour = neigh_clone(dst->neighbour);
 
+	xfrm_init_path((struct xfrm_dst *)dst0, dst, nfheader_len);
 	xfrm_init_pmtu(dst_prev);
 
 	for (dst_prev = dst0; dst_prev != dst; dst_prev = dst_prev->child) {
-- 
cgit v1.2.3


From 9473e1f631de339c50bde1e3bd09e1045fe90fd5 Mon Sep 17 00:00:00 2001
From: Masahide NAKAMURA <nakam@linux-ipv6.org>
Date: Thu, 20 Dec 2007 20:41:57 -0800
Subject: [XFRM] MIPv6: Fix to input RO state correctly.

Disable spin_lock during xfrm_type.input() function.
Follow design as IPsec inbound does.

Signed-off-by: Masahide NAKAMURA <nakam@linux-ipv6.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/xfrm6_input.c | 54 +++++++++++++++++++++++++++-----------------------
 1 file changed, 29 insertions(+), 25 deletions(-)

(limited to 'net')

diff --git a/net/ipv6/xfrm6_input.c b/net/ipv6/xfrm6_input.c
index 74f3aacebb5..f835ab458f5 100644
--- a/net/ipv6/xfrm6_input.c
+++ b/net/ipv6/xfrm6_input.c
@@ -63,10 +63,26 @@ int xfrm6_input_addr(struct sk_buff *skb, xfrm_address_t *daddr,
 	struct xfrm_state *x = NULL;
 	int wildcard = 0;
 	xfrm_address_t *xany;
-	struct xfrm_state *xfrm_vec_one = NULL;
 	int nh = 0;
 	int i = 0;
 
+	/* Allocate new secpath or COW existing one. */
+	if (!skb->sp || atomic_read(&skb->sp->refcnt) != 1) {
+		struct sec_path *sp;
+
+		sp = secpath_dup(skb->sp);
+		if (!sp) {
+			goto drop;
+		}
+		if (skb->sp)
+			secpath_put(skb->sp);
+		skb->sp = sp;
+	}
+
+	if (1 + skb->sp->len == XFRM_MAX_DEPTH) {
+		goto drop;
+	}
+
 	xany = (xfrm_address_t *)&in6addr_any;
 
 	for (i = 0; i < 3; i++) {
@@ -119,47 +135,35 @@ int xfrm6_input_addr(struct sk_buff *skb, xfrm_address_t *daddr,
 			continue;
 		}
 
+		spin_unlock(&x->lock);
+
 		nh = x->type->input(x, skb);
 		if (nh <= 0) {
-			spin_unlock(&x->lock);
 			xfrm_state_put(x);
 			x = NULL;
 			continue;
 		}
 
-		x->curlft.bytes += skb->len;
-		x->curlft.packets++;
-
-		spin_unlock(&x->lock);
-
-		xfrm_vec_one = x;
+		/* Found a state */
 		break;
 	}
 
-	if (!xfrm_vec_one)
+	if (!x) {
 		goto drop;
-
-	/* Allocate new secpath or COW existing one. */
-	if (!skb->sp || atomic_read(&skb->sp->refcnt) != 1) {
-		struct sec_path *sp;
-		sp = secpath_dup(skb->sp);
-		if (!sp)
-			goto drop;
-		if (skb->sp)
-			secpath_put(skb->sp);
-		skb->sp = sp;
 	}
 
-	if (1 + skb->sp->len > XFRM_MAX_DEPTH)
-		goto drop;
+	skb->sp->xvec[skb->sp->len++] = x;
+
+	spin_lock(&x->lock);
 
-	skb->sp->xvec[skb->sp->len] = xfrm_vec_one;
-	skb->sp->len ++;
+	x->curlft.bytes += skb->len;
+	x->curlft.packets++;
+
+	spin_unlock(&x->lock);
 
 	return 1;
+
 drop:
-	if (xfrm_vec_one)
-		xfrm_state_put(xfrm_vec_one);
 	return -1;
 }
 
-- 
cgit v1.2.3


From 558f82ef6e0d25e87f7468c07b6db1fbbf95a855 Mon Sep 17 00:00:00 2001
From: Masahide NAKAMURA <nakam@linux-ipv6.org>
Date: Thu, 20 Dec 2007 20:42:57 -0800
Subject: [XFRM]: Define packet dropping statistics.

This statistics is shown factor dropped by transformation
at /proc/net/xfrm_stat for developer.
It is a counter designed from current transformation source code
and defined as linux private MIB.

See Documentation/networking/xfrm_proc.txt for the detail.

Signed-off-by: Masahide NAKAMURA <nakam@linux-ipv6.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/xfrm/Makefile      |  1 +
 net/xfrm/xfrm_policy.c | 24 +++++++++++++
 net/xfrm/xfrm_proc.c   | 96 ++++++++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 121 insertions(+)
 create mode 100644 net/xfrm/xfrm_proc.c

(limited to 'net')

diff --git a/net/xfrm/Makefile b/net/xfrm/Makefile
index 45744a3d3a5..332cfb0ff56 100644
--- a/net/xfrm/Makefile
+++ b/net/xfrm/Makefile
@@ -4,5 +4,6 @@
 
 obj-$(CONFIG_XFRM) := xfrm_policy.o xfrm_state.o xfrm_hash.o \
 		      xfrm_input.o xfrm_output.o xfrm_algo.o
+obj-$(CONFIG_XFRM_STATISTICS) += xfrm_proc.o
 obj-$(CONFIG_XFRM_USER) += xfrm_user.o
 
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index 521cb6e1256..32ddb7b12e7 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -27,11 +27,19 @@
 #include <net/dst.h>
 #include <net/xfrm.h>
 #include <net/ip.h>
+#ifdef CONFIG_XFRM_STATISTICS
+#include <net/snmp.h>
+#endif
 
 #include "xfrm_hash.h"
 
 int sysctl_xfrm_larval_drop __read_mostly;
 
+#ifdef CONFIG_XFRM_STATISTICS
+DEFINE_SNMP_STAT(struct linux_xfrm_mib, xfrm_statistics) __read_mostly;
+EXPORT_SYMBOL(xfrm_statistics);
+#endif
+
 DEFINE_MUTEX(xfrm_cfg_mutex);
 EXPORT_SYMBOL(xfrm_cfg_mutex);
 
@@ -2258,6 +2266,16 @@ static struct notifier_block xfrm_dev_notifier = {
 	0
 };
 
+#ifdef CONFIG_XFRM_STATISTICS
+static int __init xfrm_statistics_init(void)
+{
+	if (snmp_mib_init((void **)xfrm_statistics,
+			  sizeof(struct linux_xfrm_mib)) < 0)
+		return -ENOMEM;
+	return 0;
+}
+#endif
+
 static void __init xfrm_policy_init(void)
 {
 	unsigned int hmask, sz;
@@ -2294,9 +2312,15 @@ static void __init xfrm_policy_init(void)
 
 void __init xfrm_init(void)
 {
+#ifdef CONFIG_XFRM_STATISTICS
+	xfrm_statistics_init();
+#endif
 	xfrm_state_init();
 	xfrm_policy_init();
 	xfrm_input_init();
+#ifdef CONFIG_XFRM_STATISTICS
+	xfrm_proc_init();
+#endif
 }
 
 #ifdef CONFIG_AUDITSYSCALL
diff --git a/net/xfrm/xfrm_proc.c b/net/xfrm/xfrm_proc.c
new file mode 100644
index 00000000000..31d035415ec
--- /dev/null
+++ b/net/xfrm/xfrm_proc.c
@@ -0,0 +1,96 @@
+/*
+ * xfrm_proc.c
+ *
+ * Copyright (C)2006-2007 USAGI/WIDE Project
+ *
+ * Authors:	Masahide NAKAMURA <nakam@linux-ipv6.org>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+#include <linux/proc_fs.h>
+#include <linux/seq_file.h>
+#include <net/snmp.h>
+#include <net/xfrm.h>
+
+static struct snmp_mib xfrm_mib_list[] = {
+	SNMP_MIB_ITEM("XfrmInError", LINUX_MIB_XFRMINERROR),
+	SNMP_MIB_ITEM("XfrmInBufferError", LINUX_MIB_XFRMINBUFFERERROR),
+	SNMP_MIB_ITEM("XfrmInHdrError", LINUX_MIB_XFRMINHDRERROR),
+	SNMP_MIB_ITEM("XfrmInNoStates", LINUX_MIB_XFRMINNOSTATES),
+	SNMP_MIB_ITEM("XfrmInStateProtoError", LINUX_MIB_XFRMINSTATEPROTOERROR),
+	SNMP_MIB_ITEM("XfrmInStateModeError", LINUX_MIB_XFRMINSTATEMODEERROR),
+	SNMP_MIB_ITEM("XfrmInSeqOutOfWindow", LINUX_MIB_XFRMINSEQOUTOFWINDOW),
+	SNMP_MIB_ITEM("XfrmInStateExpired", LINUX_MIB_XFRMINSTATEEXPIRED),
+	SNMP_MIB_ITEM("XfrmInStateMismatch", LINUX_MIB_XFRMINSTATEMISMATCH),
+	SNMP_MIB_ITEM("XfrmInStateInvalid", LINUX_MIB_XFRMINSTATEINVALID),
+	SNMP_MIB_ITEM("XfrmInTmplMismatch", LINUX_MIB_XFRMINTMPLMISMATCH),
+	SNMP_MIB_ITEM("XfrmInNoPols", LINUX_MIB_XFRMINNOPOLS),
+	SNMP_MIB_ITEM("XfrmInPolBlock", LINUX_MIB_XFRMINPOLBLOCK),
+	SNMP_MIB_ITEM("XfrmInPolError", LINUX_MIB_XFRMINPOLERROR),
+	SNMP_MIB_ITEM("XfrmOutError", LINUX_MIB_XFRMOUTERROR),
+	SNMP_MIB_ITEM("XfrmOutBundleGenError", LINUX_MIB_XFRMOUTBUNDLEGENERROR),
+	SNMP_MIB_ITEM("XfrmOutBundleCheckError", LINUX_MIB_XFRMOUTBUNDLECHECKERROR),
+	SNMP_MIB_ITEM("XfrmOutNoStates", LINUX_MIB_XFRMOUTNOSTATES),
+	SNMP_MIB_ITEM("XfrmOutStateProtoError", LINUX_MIB_XFRMOUTSTATEPROTOERROR),
+	SNMP_MIB_ITEM("XfrmOutStateModeError", LINUX_MIB_XFRMOUTSTATEMODEERROR),
+	SNMP_MIB_ITEM("XfrmOutStateExpired", LINUX_MIB_XFRMOUTSTATEEXPIRED),
+	SNMP_MIB_ITEM("XfrmOutPolBlock", LINUX_MIB_XFRMOUTPOLBLOCK),
+	SNMP_MIB_ITEM("XfrmOutPolDead", LINUX_MIB_XFRMOUTPOLDEAD),
+	SNMP_MIB_ITEM("XfrmOutPolError", LINUX_MIB_XFRMOUTPOLERROR),
+	SNMP_MIB_SENTINEL
+};
+
+static unsigned long
+fold_field(void *mib[], int offt)
+{
+        unsigned long res = 0;
+        int i;
+
+        for_each_possible_cpu(i) {
+                res += *(((unsigned long *)per_cpu_ptr(mib[0], i)) + offt);
+                res += *(((unsigned long *)per_cpu_ptr(mib[1], i)) + offt);
+        }
+        return res;
+}
+
+static int xfrm_statistics_seq_show(struct seq_file *seq, void *v)
+{
+	int i;
+	for (i=0; xfrm_mib_list[i].name; i++)
+		seq_printf(seq, "%-24s\t%lu\n", xfrm_mib_list[i].name,
+			   fold_field((void **)xfrm_statistics,
+				      xfrm_mib_list[i].entry));
+	return 0;
+}
+
+static int xfrm_statistics_seq_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, xfrm_statistics_seq_show, NULL);
+}
+
+static struct file_operations xfrm_statistics_seq_fops = {
+	.owner	 = THIS_MODULE,
+	.open	 = xfrm_statistics_seq_open,
+	.read	 = seq_read,
+	.llseek	 = seq_lseek,
+	.release = single_release,
+};
+
+int __init xfrm_proc_init(void)
+{
+	int rc = 0;
+
+	if (!proc_net_fops_create(&init_net, "xfrm_stat", S_IRUGO,
+				  &xfrm_statistics_seq_fops))
+		goto stat_fail;
+
+ out:
+	return rc;
+
+ stat_fail:
+	rc = -ENOMEM;
+	goto out;
+}
-- 
cgit v1.2.3


From 0aa647746e5602e608220c10e51f49709a030f5d Mon Sep 17 00:00:00 2001
From: Masahide NAKAMURA <nakam@linux-ipv6.org>
Date: Thu, 20 Dec 2007 20:43:36 -0800
Subject: [XFRM]: Support to increment packet dropping statistics.

Signed-off-by: Masahide NAKAMURA <nakam@linux-ipv6.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/xfrm6_input.c |  3 +++
 net/xfrm/xfrm_input.c  | 41 ++++++++++++++++++++++++--------
 net/xfrm/xfrm_output.c |  6 ++++-
 net/xfrm/xfrm_policy.c | 63 ++++++++++++++++++++++++++++++++++++++++----------
 4 files changed, 90 insertions(+), 23 deletions(-)

(limited to 'net')

diff --git a/net/ipv6/xfrm6_input.c b/net/ipv6/xfrm6_input.c
index f835ab458f5..6644fc6d542 100644
--- a/net/ipv6/xfrm6_input.c
+++ b/net/ipv6/xfrm6_input.c
@@ -72,6 +72,7 @@ int xfrm6_input_addr(struct sk_buff *skb, xfrm_address_t *daddr,
 
 		sp = secpath_dup(skb->sp);
 		if (!sp) {
+			XFRM_INC_STATS(LINUX_MIB_XFRMINERROR);
 			goto drop;
 		}
 		if (skb->sp)
@@ -80,6 +81,7 @@ int xfrm6_input_addr(struct sk_buff *skb, xfrm_address_t *daddr,
 	}
 
 	if (1 + skb->sp->len == XFRM_MAX_DEPTH) {
+		XFRM_INC_STATS(LINUX_MIB_XFRMINBUFFERERROR);
 		goto drop;
 	}
 
@@ -149,6 +151,7 @@ int xfrm6_input_addr(struct sk_buff *skb, xfrm_address_t *daddr,
 	}
 
 	if (!x) {
+		XFRM_INC_STATS(LINUX_MIB_XFRMINNOSTATES);
 		goto drop;
 	}
 
diff --git a/net/xfrm/xfrm_input.c b/net/xfrm/xfrm_input.c
index 8624cbdb2a1..493243fc5fe 100644
--- a/net/xfrm/xfrm_input.c
+++ b/net/xfrm/xfrm_input.c
@@ -119,8 +119,10 @@ int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type)
 		struct sec_path *sp;
 
 		sp = secpath_dup(skb->sp);
-		if (!sp)
+		if (!sp) {
+			XFRM_INC_STATS(LINUX_MIB_XFRMINERROR);
 			goto drop;
+		}
 		if (skb->sp)
 			secpath_put(skb->sp);
 		skb->sp = sp;
@@ -131,31 +133,45 @@ int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type)
 	family = XFRM_SPI_SKB_CB(skb)->family;
 
 	seq = 0;
-	if (!spi && (err = xfrm_parse_spi(skb, nexthdr, &spi, &seq)) != 0)
+	if (!spi && (err = xfrm_parse_spi(skb, nexthdr, &spi, &seq)) != 0) {
+		XFRM_INC_STATS(LINUX_MIB_XFRMINHDRERROR);
 		goto drop;
+	}
 
 	do {
-		if (skb->sp->len == XFRM_MAX_DEPTH)
+		if (skb->sp->len == XFRM_MAX_DEPTH) {
+			XFRM_INC_STATS(LINUX_MIB_XFRMINBUFFERERROR);
 			goto drop;
+		}
 
 		x = xfrm_state_lookup(daddr, spi, nexthdr, family);
-		if (x == NULL)
+		if (x == NULL) {
+			XFRM_INC_STATS(LINUX_MIB_XFRMINNOSTATES);
 			goto drop;
+		}
 
 		skb->sp->xvec[skb->sp->len++] = x;
 
 		spin_lock(&x->lock);
-		if (unlikely(x->km.state != XFRM_STATE_VALID))
+		if (unlikely(x->km.state != XFRM_STATE_VALID)) {
+			XFRM_INC_STATS(LINUX_MIB_XFRMINSTATEINVALID);
 			goto drop_unlock;
+		}
 
-		if ((x->encap ? x->encap->encap_type : 0) != encap_type)
+		if ((x->encap ? x->encap->encap_type : 0) != encap_type) {
+			XFRM_INC_STATS(LINUX_MIB_XFRMINSTATEINVALID);
 			goto drop_unlock;
+		}
 
-		if (x->props.replay_window && xfrm_replay_check(x, seq))
+		if (x->props.replay_window && xfrm_replay_check(x, seq)) {
+			XFRM_INC_STATS(LINUX_MIB_XFRMINSEQOUTOFWINDOW);
 			goto drop_unlock;
+		}
 
-		if (xfrm_state_check_expire(x))
+		if (xfrm_state_check_expire(x)) {
+			XFRM_INC_STATS(LINUX_MIB_XFRMINSTATEEXPIRED);
 			goto drop_unlock;
+		}
 
 		spin_unlock(&x->lock);
 
@@ -171,6 +187,7 @@ resume:
 		if (nexthdr <= 0) {
 			if (nexthdr == -EBADMSG)
 				x->stats.integrity_failed++;
+			XFRM_INC_STATS(LINUX_MIB_XFRMINSTATEPROTOERROR);
 			goto drop_unlock;
 		}
 
@@ -187,8 +204,10 @@ resume:
 
 		XFRM_MODE_SKB_CB(skb)->protocol = nexthdr;
 
-		if (x->inner_mode->input(x, skb))
+		if (x->inner_mode->input(x, skb)) {
+			XFRM_INC_STATS(LINUX_MIB_XFRMINSTATEMODEERROR);
 			goto drop;
+		}
 
 		if (x->outer_mode->flags & XFRM_MODE_FLAG_TUNNEL) {
 			decaps = 1;
@@ -203,8 +222,10 @@ resume:
 		family = x->outer_mode->afinfo->family;
 
 		err = xfrm_parse_spi(skb, nexthdr, &spi, &seq);
-		if (err < 0)
+		if (err < 0) {
+			XFRM_INC_STATS(LINUX_MIB_XFRMINHDRERROR);
 			goto drop;
+		}
 	} while (!err);
 
 	nf_reset(skb);
diff --git a/net/xfrm/xfrm_output.c b/net/xfrm/xfrm_output.c
index 26fa0cb78c9..867484a046a 100644
--- a/net/xfrm/xfrm_output.c
+++ b/net/xfrm/xfrm_output.c
@@ -69,10 +69,13 @@ static int xfrm_output_one(struct sk_buff *skb, int err)
 		err = x->type->output(x, skb);
 
 resume:
-		if (err)
+		if (err) {
+			XFRM_INC_STATS(LINUX_MIB_XFRMOUTSTATEPROTOERROR);
 			goto error_nolock;
+		}
 
 		if (!(skb->dst = dst_pop(dst))) {
+			XFRM_INC_STATS(LINUX_MIB_XFRMOUTERROR);
 			err = -EHOSTUNREACH;
 			goto error_nolock;
 		}
@@ -167,6 +170,7 @@ int xfrm_output(struct sk_buff *skb)
 	if (skb->ip_summed == CHECKSUM_PARTIAL) {
 		err = skb_checksum_help(skb);
 		if (err) {
+			XFRM_INC_STATS(LINUX_MIB_XFRMOUTERROR);
 			kfree_skb(skb);
 			return err;
 		}
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index 32ddb7b12e7..74807a7d3d6 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -1494,8 +1494,10 @@ restart:
 	if (sk && sk->sk_policy[XFRM_POLICY_OUT]) {
 		policy = xfrm_sk_policy_lookup(sk, XFRM_POLICY_OUT, fl);
 		err = PTR_ERR(policy);
-		if (IS_ERR(policy))
+		if (IS_ERR(policy)) {
+			XFRM_INC_STATS(LINUX_MIB_XFRMOUTPOLERROR);
 			goto dropdst;
+		}
 	}
 
 	if (!policy) {
@@ -1529,6 +1531,7 @@ restart:
 	default:
 	case XFRM_POLICY_BLOCK:
 		/* Prohibit the flow */
+		XFRM_INC_STATS(LINUX_MIB_XFRMOUTPOLBLOCK);
 		err = -EPERM;
 		goto error;
 
@@ -1548,6 +1551,7 @@ restart:
 		 */
 		dst = xfrm_find_bundle(fl, policy, family);
 		if (IS_ERR(dst)) {
+			XFRM_INC_STATS(LINUX_MIB_XFRMOUTBUNDLECHECKERROR);
 			err = PTR_ERR(dst);
 			goto error;
 		}
@@ -1562,10 +1566,12 @@ restart:
 							    XFRM_POLICY_OUT);
 			if (pols[1]) {
 				if (IS_ERR(pols[1])) {
+					XFRM_INC_STATS(LINUX_MIB_XFRMOUTPOLERROR);
 					err = PTR_ERR(pols[1]);
 					goto error;
 				}
 				if (pols[1]->action == XFRM_POLICY_BLOCK) {
+					XFRM_INC_STATS(LINUX_MIB_XFRMOUTPOLBLOCK);
 					err = -EPERM;
 					goto error;
 				}
@@ -1611,6 +1617,7 @@ restart:
 				nx = xfrm_tmpl_resolve(pols, npols, fl, xfrm, family);
 
 				if (nx == -EAGAIN && signal_pending(current)) {
+					XFRM_INC_STATS(LINUX_MIB_XFRMOUTNOSTATES);
 					err = -ERESTART;
 					goto error;
 				}
@@ -1621,8 +1628,10 @@ restart:
 				}
 				err = nx;
 			}
-			if (err < 0)
+			if (err < 0) {
+				XFRM_INC_STATS(LINUX_MIB_XFRMOUTNOSTATES);
 				goto error;
+			}
 		}
 		if (nx == 0) {
 			/* Flow passes not transformed. */
@@ -1632,8 +1641,10 @@ restart:
 
 		dst = xfrm_bundle_create(policy, xfrm, nx, fl, dst_orig);
 		err = PTR_ERR(dst);
-		if (IS_ERR(dst))
+		if (IS_ERR(dst)) {
+			XFRM_INC_STATS(LINUX_MIB_XFRMOUTBUNDLEGENERROR);
 			goto error;
+		}
 
 		for (pi = 0; pi < npols; pi++) {
 			read_lock_bh(&pols[pi]->lock);
@@ -1652,6 +1663,10 @@ restart:
 			if (dst)
 				dst_free(dst);
 
+			if (pol_dead)
+				XFRM_INC_STATS(LINUX_MIB_XFRMOUTPOLDEAD);
+			else
+				XFRM_INC_STATS(LINUX_MIB_XFRMOUTBUNDLECHECKERROR);
 			err = -EHOSTUNREACH;
 			goto error;
 		}
@@ -1664,6 +1679,7 @@ restart:
 			write_unlock_bh(&policy->lock);
 			if (dst)
 				dst_free(dst);
+			XFRM_INC_STATS(LINUX_MIB_XFRMOUTBUNDLECHECKERROR);
 			goto error;
 		}
 
@@ -1817,8 +1833,11 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb,
 	dir &= XFRM_POLICY_MASK;
 	fl_dir = policy_to_flow_dir(dir);
 
-	if (__xfrm_decode_session(skb, &fl, family, reverse) < 0)
+	if (__xfrm_decode_session(skb, &fl, family, reverse) < 0) {
+		XFRM_INC_STATS(LINUX_MIB_XFRMINHDRERROR);
 		return 0;
+	}
+
 	nf_nat_decode_session(skb, &fl, family);
 
 	/* First, check used SA against their selectors. */
@@ -1827,28 +1846,35 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb,
 
 		for (i=skb->sp->len-1; i>=0; i--) {
 			struct xfrm_state *x = skb->sp->xvec[i];
-			if (!xfrm_selector_match(&x->sel, &fl, family))
+			if (!xfrm_selector_match(&x->sel, &fl, family)) {
+				XFRM_INC_STATS(LINUX_MIB_XFRMINSTATEMISMATCH);
 				return 0;
+			}
 		}
 	}
 
 	pol = NULL;
 	if (sk && sk->sk_policy[dir]) {
 		pol = xfrm_sk_policy_lookup(sk, dir, &fl);
-		if (IS_ERR(pol))
+		if (IS_ERR(pol)) {
+			XFRM_INC_STATS(LINUX_MIB_XFRMINPOLERROR);
 			return 0;
+		}
 	}
 
 	if (!pol)
 		pol = flow_cache_lookup(&fl, family, fl_dir,
 					xfrm_policy_lookup);
 
-	if (IS_ERR(pol))
+	if (IS_ERR(pol)) {
+		XFRM_INC_STATS(LINUX_MIB_XFRMINPOLERROR);
 		return 0;
+	}
 
 	if (!pol) {
 		if (skb->sp && secpath_has_nontransport(skb->sp, 0, &xerr_idx)) {
 			xfrm_secpath_reject(xerr_idx, skb, &fl);
+			XFRM_INC_STATS(LINUX_MIB_XFRMINNOPOLS);
 			return 0;
 		}
 		return 1;
@@ -1864,8 +1890,10 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb,
 						    &fl, family,
 						    XFRM_POLICY_IN);
 		if (pols[1]) {
-			if (IS_ERR(pols[1]))
+			if (IS_ERR(pols[1])) {
+				XFRM_INC_STATS(LINUX_MIB_XFRMINPOLERROR);
 				return 0;
+			}
 			pols[1]->curlft.use_time = get_seconds();
 			npols ++;
 		}
@@ -1886,10 +1914,14 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb,
 
 		for (pi = 0; pi < npols; pi++) {
 			if (pols[pi] != pol &&
-			    pols[pi]->action != XFRM_POLICY_ALLOW)
+			    pols[pi]->action != XFRM_POLICY_ALLOW) {
+				XFRM_INC_STATS(LINUX_MIB_XFRMINPOLBLOCK);
 				goto reject;
-			if (ti + pols[pi]->xfrm_nr >= XFRM_MAX_DEPTH)
+			}
+			if (ti + pols[pi]->xfrm_nr >= XFRM_MAX_DEPTH) {
+				XFRM_INC_STATS(LINUX_MIB_XFRMINBUFFERERROR);
 				goto reject_error;
+			}
 			for (i = 0; i < pols[pi]->xfrm_nr; i++)
 				tpp[ti++] = &pols[pi]->xfrm_vec[i];
 		}
@@ -1911,16 +1943,20 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb,
 				if (k < -1)
 					/* "-2 - errored_index" returned */
 					xerr_idx = -(2+k);
+				XFRM_INC_STATS(LINUX_MIB_XFRMINTMPLMISMATCH);
 				goto reject;
 			}
 		}
 
-		if (secpath_has_nontransport(sp, k, &xerr_idx))
+		if (secpath_has_nontransport(sp, k, &xerr_idx)) {
+			XFRM_INC_STATS(LINUX_MIB_XFRMINTMPLMISMATCH);
 			goto reject;
+		}
 
 		xfrm_pols_put(pols, npols);
 		return 1;
 	}
+	XFRM_INC_STATS(LINUX_MIB_XFRMINPOLBLOCK);
 
 reject:
 	xfrm_secpath_reject(xerr_idx, skb, &fl);
@@ -1934,8 +1970,11 @@ int __xfrm_route_forward(struct sk_buff *skb, unsigned short family)
 {
 	struct flowi fl;
 
-	if (xfrm_decode_session(skb, &fl, family) < 0)
+	if (xfrm_decode_session(skb, &fl, family) < 0) {
+		/* XXX: we should have something like FWDHDRERROR here. */
+		XFRM_INC_STATS(LINUX_MIB_XFRMINHDRERROR);
 		return 0;
+	}
 
 	return xfrm_lookup(&skb->dst, &fl, NULL, 0) == 0;
 }
-- 
cgit v1.2.3


From 8ea843495df36036cb7f22f61994b34f8362b443 Mon Sep 17 00:00:00 2001
From: Masahide NAKAMURA <nakam@linux-ipv6.org>
Date: Thu, 20 Dec 2007 20:44:02 -0800
Subject: [XFRM]: Add packet processing statistics option.

Signed-off-by: Masahide NAKAMURA <nakam@linux-ipv6.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/xfrm/Kconfig | 10 ++++++++++
 1 file changed, 10 insertions(+)

(limited to 'net')

diff --git a/net/xfrm/Kconfig b/net/xfrm/Kconfig
index 577a4f821b9..6b5b50f427f 100644
--- a/net/xfrm/Kconfig
+++ b/net/xfrm/Kconfig
@@ -35,6 +35,16 @@ config XFRM_MIGRATE
 
 	  If unsure, say N.
 
+config XFRM_STATISTICS
+	bool "Transformation statistics (EXPERIMENTAL)"
+	depends on XFRM && PROC_FS && EXPERIMENTAL
+	---help---
+	  This statistics is not a SNMP/MIB specification but shows
+	  statistics about transformation error (or almost error) factor
+	  at packet processing for developer.
+
+	  If unsure, say N.
+
 config NET_KEY
 	tristate "PF_KEY sockets"
 	select XFRM
-- 
cgit v1.2.3


From 68277accb3a5f004344f4346498640601b8b7016 Mon Sep 17 00:00:00 2001
From: Paul Moore <paul.moore@hp.com>
Date: Thu, 20 Dec 2007 20:49:33 -0800
Subject: [XFRM]: Assorted IPsec fixups

This patch fixes a number of small but potentially troublesome things in the
XFRM/IPsec code:

 * Use the 'audit_enabled' variable already in include/linux/audit.h
   Removed the need for extern declarations local to each XFRM audit fuction

 * Convert 'sid' to 'secid' everywhere we can
   The 'sid' name is specific to SELinux, 'secid' is the common naming
   convention used by the kernel when refering to tokenized LSM labels,
   unfortunately we have to leave 'ctx_sid' in 'struct xfrm_sec_ctx' otherwise
   we risk breaking userspace

 * Convert address display to use standard NIP* macros
   Similar to what was recently done with the SPD audit code, this also also
   includes the removal of some unnecessary memcpy() calls

 * Move common code to xfrm_audit_common_stateinfo()
   Code consolidation from the "less is more" book on software development

 * Proper spacing around commas in function arguments
   Minor style tweak since I was already touching the code

Signed-off-by: Paul Moore <paul.moore@hp.com>
Acked-by: James Morris <jmorris@namei.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/xfrm/xfrm_policy.c | 15 +++++++-------
 net/xfrm/xfrm_state.c  | 53 +++++++++++++++++++++-----------------------------
 2 files changed, 29 insertions(+), 39 deletions(-)

(limited to 'net')

diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index 74807a7d3d6..abc3e39b115 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -24,6 +24,7 @@
 #include <linux/netfilter.h>
 #include <linux/module.h>
 #include <linux/cache.h>
+#include <linux/audit.h>
 #include <net/dst.h>
 #include <net/xfrm.h>
 #include <net/ip.h>
@@ -2401,15 +2402,14 @@ static inline void xfrm_audit_common_policyinfo(struct xfrm_policy *xp,
 	}
 }
 
-void
-xfrm_audit_policy_add(struct xfrm_policy *xp, int result, u32 auid, u32 sid)
+void xfrm_audit_policy_add(struct xfrm_policy *xp, int result,
+			   u32 auid, u32 secid)
 {
 	struct audit_buffer *audit_buf;
-	extern int audit_enabled;
 
 	if (audit_enabled == 0)
 		return;
-	audit_buf = xfrm_audit_start(auid, sid);
+	audit_buf = xfrm_audit_start(auid, secid);
 	if (audit_buf == NULL)
 		return;
 	audit_log_format(audit_buf, " op=SPD-add res=%u", result);
@@ -2418,15 +2418,14 @@ xfrm_audit_policy_add(struct xfrm_policy *xp, int result, u32 auid, u32 sid)
 }
 EXPORT_SYMBOL_GPL(xfrm_audit_policy_add);
 
-void
-xfrm_audit_policy_delete(struct xfrm_policy *xp, int result, u32 auid, u32 sid)
+void xfrm_audit_policy_delete(struct xfrm_policy *xp, int result,
+			      u32 auid, u32 secid)
 {
 	struct audit_buffer *audit_buf;
-	extern int audit_enabled;
 
 	if (audit_enabled == 0)
 		return;
-	audit_buf = xfrm_audit_start(auid, sid);
+	audit_buf = xfrm_audit_start(auid, secid);
 	if (audit_buf == NULL)
 		return;
 	audit_log_format(audit_buf, " op=SPD-delete res=%u", result);
diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c
index f7c0951c9fd..9e57378c51d 100644
--- a/net/xfrm/xfrm_state.c
+++ b/net/xfrm/xfrm_state.c
@@ -19,6 +19,7 @@
 #include <linux/ipsec.h>
 #include <linux/module.h>
 #include <linux/cache.h>
+#include <linux/audit.h>
 #include <asm/uaccess.h>
 
 #include "xfrm_hash.h"
@@ -1998,69 +1999,59 @@ void __init xfrm_state_init(void)
 static inline void xfrm_audit_common_stateinfo(struct xfrm_state *x,
 					       struct audit_buffer *audit_buf)
 {
-	if (x->security)
+	struct xfrm_sec_ctx *ctx = x->security;
+	u32 spi = ntohl(x->id.spi);
+
+	if (ctx)
 		audit_log_format(audit_buf, " sec_alg=%u sec_doi=%u sec_obj=%s",
-				 x->security->ctx_alg, x->security->ctx_doi,
-				 x->security->ctx_str);
+				 ctx->ctx_alg, ctx->ctx_doi, ctx->ctx_str);
 
 	switch(x->props.family) {
 	case AF_INET:
-		audit_log_format(audit_buf, " src=%u.%u.%u.%u dst=%u.%u.%u.%u",
+		audit_log_format(audit_buf,
+				 " src=" NIPQUAD_FMT " dst=" NIPQUAD_FMT,
 				 NIPQUAD(x->props.saddr.a4),
 				 NIPQUAD(x->id.daddr.a4));
 		break;
 	case AF_INET6:
-		{
-			struct in6_addr saddr6, daddr6;
-
-			memcpy(&saddr6, x->props.saddr.a6,
-				sizeof(struct in6_addr));
-			memcpy(&daddr6, x->id.daddr.a6,
-				sizeof(struct in6_addr));
-			audit_log_format(audit_buf,
-					 " src=" NIP6_FMT " dst=" NIP6_FMT,
-					 NIP6(saddr6), NIP6(daddr6));
-		}
+		audit_log_format(audit_buf,
+				 " src=" NIP6_FMT " dst=" NIP6_FMT,
+				 NIP6(*(struct in6_addr *)x->props.saddr.a6),
+				 NIP6(*(struct in6_addr *)x->id.daddr.a6));
 		break;
 	}
+
+	audit_log_format(audit_buf, " spi=%u(0x%x)", spi, spi);
 }
 
-void
-xfrm_audit_state_add(struct xfrm_state *x, int result, u32 auid, u32 sid)
+void xfrm_audit_state_add(struct xfrm_state *x, int result,
+			  u32 auid, u32 secid)
 {
 	struct audit_buffer *audit_buf;
-	u32 spi;
-	extern int audit_enabled;
 
 	if (audit_enabled == 0)
 		return;
-	audit_buf = xfrm_audit_start(auid, sid);
+	audit_buf = xfrm_audit_start(auid, secid);
 	if (audit_buf == NULL)
 		return;
-	audit_log_format(audit_buf, " op=SAD-add res=%u",result);
+	audit_log_format(audit_buf, " op=SAD-add res=%u", result);
 	xfrm_audit_common_stateinfo(x, audit_buf);
-	spi = ntohl(x->id.spi);
-	audit_log_format(audit_buf, " spi=%u(0x%x)", spi, spi);
 	audit_log_end(audit_buf);
 }
 EXPORT_SYMBOL_GPL(xfrm_audit_state_add);
 
-void
-xfrm_audit_state_delete(struct xfrm_state *x, int result, u32 auid, u32 sid)
+void xfrm_audit_state_delete(struct xfrm_state *x, int result,
+			     u32 auid, u32 secid)
 {
 	struct audit_buffer *audit_buf;
-	u32 spi;
-	extern int audit_enabled;
 
 	if (audit_enabled == 0)
 		return;
-	audit_buf = xfrm_audit_start(auid, sid);
+	audit_buf = xfrm_audit_start(auid, secid);
 	if (audit_buf == NULL)
 		return;
-	audit_log_format(audit_buf, " op=SAD-delete res=%u",result);
+	audit_log_format(audit_buf, " op=SAD-delete res=%u", result);
 	xfrm_audit_common_stateinfo(x, audit_buf);
-	spi = ntohl(x->id.spi);
-	audit_log_format(audit_buf, " spi=%u(0x%x)", spi, spi);
 	audit_log_end(audit_buf);
 }
 EXPORT_SYMBOL_GPL(xfrm_audit_state_delete);
-- 
cgit v1.2.3


From b92edbe0b8a36a833c16b0cbafb6e899b81ffc08 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <dada1@cosmosbay.com>
Date: Thu, 20 Dec 2007 21:48:32 -0800
Subject: [TCP] Avoid two divides in tcp_output.c

Because 'free_space' variable in __tcp_select_window() is signed,
expression (free_space / 2) forces compiler to emit an integer divide.

This can be changed to a plain right shift, less expensive.

Signed-off-by: Eric Dumazet <dada1@cosmosbay.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp_output.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 7c50271ddc3..9a9510acb14 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -1627,7 +1627,7 @@ u32 __tcp_select_window(struct sock *sk)
 	if (mss > full_space)
 		mss = full_space;
 
-	if (free_space < full_space/2) {
+	if (free_space < (full_space >> 1)) {
 		icsk->icsk_ack.quick = 0;
 
 		if (tcp_memory_pressure)
@@ -1666,7 +1666,7 @@ u32 __tcp_select_window(struct sock *sk)
 		if (window <= free_space - mss || window > free_space)
 			window = (free_space/mss)*mss;
 		else if (mss == full_space &&
-			 free_space > window + full_space/2)
+			 free_space > window + (full_space >> 1))
 			window = free_space;
 	}
 
-- 
cgit v1.2.3


From c21b39aca4f8f4975784e54cd3a1b80bab80dcc0 Mon Sep 17 00:00:00 2001
From: Stefano Brivio <stefano.brivio@polimi.it>
Date: Wed, 19 Dec 2007 01:26:16 +0100
Subject: mac80211: make PID rate control algorithm the default

This makes the new PID TX rate control algorithm the default instead of the
rc80211_simple rate control algorithm. The simple algorithm was flawed in
several ways: it wasn't responsive at all and didn't age the information it was
relying on properly. The PID algorithm allows us to tune characteristics such
as responsiveness by adjusting parameters and was found to generally behave
better.

The default algorithm can be overridden to select simple instead. Which
ever algorithm is the default is included as part of the mac80211
module automatically. The other algorithm (simple vs. pid) can
be selected for inclusion as well. If EMBEDDED is selected then
the choice is available to have no default specified and neither
algorithm included in mac80211. The default algorithm can be set
through a modparam.

While at it, mark rc80211-simple as deprecated, and schedule it
for removal.

Signed-off-by: Stefano Brivio <stefano.brivio@polimi.it>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/mac80211/Kconfig          | 72 +++++++++++++++++++++++++++++++++++--------
 net/mac80211/Makefile         |  4 +--
 net/mac80211/ieee80211.c      | 12 ++++----
 net/mac80211/ieee80211_rate.c | 24 +++++++++++----
 4 files changed, 85 insertions(+), 27 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/Kconfig b/net/mac80211/Kconfig
index 9f8663b412d..297f4d967e2 100644
--- a/net/mac80211/Kconfig
+++ b/net/mac80211/Kconfig
@@ -13,29 +13,75 @@ config MAC80211
 	This option enables the hardware independent IEEE 802.11
 	networking stack.
 
-config MAC80211_RCSIMPLE
-	bool "'simple' rate control algorithm" if EMBEDDED
+config MAC80211_RC_DEFAULT_CHOICE
+	bool "Choose default rate control algorithm" if EMBEDDED
 	default y
 	depends on MAC80211
-	help
-	  This option allows you to turn off the 'simple' rate
-	  control algorithm in mac80211. If you do turn it off,
-	  you absolutely need another rate control algorithm.
+	---help---
+	  This options enables selection of a default rate control
+	  algorithm to be built into the mac80211 module.  Alternate
+	  rate control algorithms might be built into the mac80211
+	  module as well.
+
+choice
+	prompt "Default rate control algorithm"
+	default MAC80211_RC_DEFAULT_PID
+	depends on MAC80211 && MAC80211_RC_DEFAULT_CHOICE
+	---help---
+	  This option selects the default rate control algorithm
+	  mac80211 will use. Note that this default can still be
+	  overriden through the ieee80211_default_rc_algo module
+	  parameter.
+
+config MAC80211_RC_DEFAULT_PID
+	bool "PID controller based rate control algorithm"
+	select MAC80211_RC_PID
+	---help---
+	  Select the PID controller based rate control as the
+	  default rate control algorithm. You should choose
+	  this unless you know what you are doing.
+
+config MAC80211_RC_DEFAULT_SIMPLE
+	bool "Simple rate control algorithm"
+	select MAC80211_RC_SIMPLE
+	---help---
+	  Select the simple rate control as the default rate
+	  control algorithm. Note that this is a non-responsive,
+	  dumb algorithm. You should choose the PID rate control
+	  instead.
+
+endchoice
 
-	  Say Y unless you know you will have another algorithm
-	  available.
+config MAC80211_RC_DEFAULT
+	string
+	depends on MAC80211
+	default "pid" if MAC80211_RC_DEFAULT_PID
+	default "simple" if MAC80211_RC_DEFAULT_SIMPLE
+	default ""
 
-config MAC80211_RCPID
-	bool "'PID' rate control algorithm" if EMBEDDED
+config MAC80211_RC_PID
+	bool "PID controller based rate control algorithm"
 	default y
 	depends on MAC80211
-	help
+	---help---
 	  This option enables a TX rate control algorithm for
 	  mac80211 that uses a PID controller to select the TX
 	  rate.
 
-	  Say Y unless you're sure you want to use a different
-	  rate control algorithm.
+	  Say Y or M unless you're sure you want to use a
+	  different rate control algorithm.
+
+config MAC80211_RC_SIMPLE
+	bool "Simple rate control algorithm (DEPRECATED)"
+	default n
+	depends on MAC80211
+	---help---
+	  This option enables a very simple, non-responsive TX
+	  rate control algorithm. This algorithm is deprecated
+	  and will be removed from the kernel in near future.
+	  It has been replaced by the PID algorithm.
+
+	  Say N unless you know what you are doing.
 
 config MAC80211_LEDS
 	bool "Enable LED triggers"
diff --git a/net/mac80211/Makefile b/net/mac80211/Makefile
index 62c01ca099e..a375f0477da 100644
--- a/net/mac80211/Makefile
+++ b/net/mac80211/Makefile
@@ -3,8 +3,8 @@ obj-$(CONFIG_MAC80211) += mac80211.o
 mac80211-objs-$(CONFIG_MAC80211_LEDS) += ieee80211_led.o
 mac80211-objs-$(CONFIG_MAC80211_DEBUGFS) += debugfs.o debugfs_sta.o debugfs_netdev.o debugfs_key.o
 mac80211-objs-$(CONFIG_NET_SCHED) += wme.o
-mac80211-objs-$(CONFIG_MAC80211_RCSIMPLE) += rc80211_simple.o
-mac80211-objs-$(CONFIG_MAC80211_RCPID) += rc80211_pid.o
+mac80211-objs-$(CONFIG_MAC80211_RC_SIMPLE) += rc80211_simple.o
+mac80211-objs-$(CONFIG_MAC80211_RC_PID) += rc80211_pid.o
 
 mac80211-objs := \
 	ieee80211.o \
diff --git a/net/mac80211/ieee80211.c b/net/mac80211/ieee80211.c
index 3d7b4341914..c0dbf77547f 100644
--- a/net/mac80211/ieee80211.c
+++ b/net/mac80211/ieee80211.c
@@ -1312,13 +1312,13 @@ static int __init ieee80211_init(void)
 
 	BUILD_BUG_ON(sizeof(struct ieee80211_tx_packet_data) > sizeof(skb->cb));
 
-#ifdef CONFIG_MAC80211_RCSIMPLE
+#ifdef CONFIG_MAC80211_RC_SIMPLE
 	ret = ieee80211_rate_control_register(&mac80211_rcsimple);
 	if (ret)
 		goto fail;
 #endif
 
-#ifdef CONFIG_MAC80211_RCPID
+#ifdef CONFIG_MAC80211_RC_PID
 	ret = ieee80211_rate_control_register(&mac80211_rcpid);
 	if (ret)
 		goto fail;
@@ -1338,10 +1338,10 @@ static int __init ieee80211_init(void)
 
 fail:
 
-#ifdef CONFIG_MAC80211_RCSIMPLE
+#ifdef CONFIG_MAC80211_RC_SIMPLE
 	ieee80211_rate_control_unregister(&mac80211_rcsimple);
 #endif
-#ifdef CONFIG_MAC80211_RCPID
+#ifdef CONFIG_MAC80211_RC_PID
 	ieee80211_rate_control_unregister(&mac80211_rcpid);
 #endif
 
@@ -1350,10 +1350,10 @@ fail:
 
 static void __exit ieee80211_exit(void)
 {
-#ifdef CONFIG_MAC80211_RCSIMPLE
+#ifdef CONFIG_MAC80211_RC_SIMPLE
 	ieee80211_rate_control_unregister(&mac80211_rcsimple);
 #endif
-#ifdef CONFIG_MAC80211_RCPID
+#ifdef CONFIG_MAC80211_RC_PID
 	ieee80211_rate_control_unregister(&mac80211_rcpid);
 #endif
 
diff --git a/net/mac80211/ieee80211_rate.c b/net/mac80211/ieee80211_rate.c
index e495b0998b4..65fc9ad615e 100644
--- a/net/mac80211/ieee80211_rate.c
+++ b/net/mac80211/ieee80211_rate.c
@@ -21,6 +21,11 @@ struct rate_control_alg {
 static LIST_HEAD(rate_ctrl_algs);
 static DEFINE_MUTEX(rate_ctrl_mutex);
 
+static char *ieee80211_default_rc_algo = CONFIG_MAC80211_RC_DEFAULT;
+module_param(ieee80211_default_rc_algo, charp, 0644);
+MODULE_PARM_DESC(ieee80211_default_rc_algo,
+		 "Default rate control algorithm for mac80211 to use");
+
 int ieee80211_rate_control_register(struct rate_control_ops *ops)
 {
 	struct rate_control_alg *alg;
@@ -89,21 +94,27 @@ ieee80211_try_rate_control_ops_get(const char *name)
 	return ops;
 }
 
-/* Get the rate control algorithm. If `name' is NULL, get the first
- * available algorithm. */
+/* Get the rate control algorithm. */
 static struct rate_control_ops *
 ieee80211_rate_control_ops_get(const char *name)
 {
 	struct rate_control_ops *ops;
+	const char *alg_name;
 
 	if (!name)
-		name = "simple";
+		alg_name = ieee80211_default_rc_algo;
+	else
+		alg_name = name;
 
-	ops = ieee80211_try_rate_control_ops_get(name);
+	ops = ieee80211_try_rate_control_ops_get(alg_name);
 	if (!ops) {
-		request_module("rc80211_%s", name);
-		ops = ieee80211_try_rate_control_ops_get(name);
+		request_module("rc80211_%s", alg_name);
+		ops = ieee80211_try_rate_control_ops_get(alg_name);
 	}
+	if (!ops && name)
+		/* try default if specific alg requested but not found */
+		ops = ieee80211_try_rate_control_ops_get(ieee80211_default_rc_algo);
+
 	return ops;
 }
 
@@ -244,3 +255,4 @@ void rate_control_deinitialize(struct ieee80211_local *local)
 	local->rate_ctrl = NULL;
 	rate_control_put(ref);
 }
+
-- 
cgit v1.2.3


From 90d501d610c0065dce43120c26744a49d8e0490c Mon Sep 17 00:00:00 2001
From: Stefano Brivio <stefano.brivio@polimi.it>
Date: Wed, 19 Dec 2007 01:26:34 +0100
Subject: rc80211-pid: add rate behaviour learning algorithm

This patch introduces a learning algorithm in order for the PID controller
to learn how to map adjustment values to rates. This is better described in
code comments.

Signed-off-by: Stefano Brivio <stefano.brivio@polimi.it>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/mac80211/rc80211_pid.c | 181 ++++++++++++++++++++++++++++++++++++++++-----
 1 file changed, 161 insertions(+), 20 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/rc80211_pid.c b/net/mac80211/rc80211_pid.c
index b358824b5ac..1116dc67ddd 100644
--- a/net/mac80211/rc80211_pid.c
+++ b/net/mac80211/rc80211_pid.c
@@ -2,6 +2,7 @@
  * Copyright 2002-2005, Instant802 Networks, Inc.
  * Copyright 2005, Devicescape Software, Inc.
  * Copyright 2007, Mattias Nissler <mattias.nissler@gmx.de>
+ * Copyright 2007, Stefano Brivio <stefano.brivio@polimi.it>
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
@@ -39,12 +40,18 @@
  * an actual sliding window. The advantage is that we don't need to keep an
  * array of the last N error values and computation is easier.
  *
- * Once we have the adj value, we need to map it to a TX rate to be selected.
- * For now, we depend on the rates to be ordered in a way such that more robust
- * rates (i.e. such that exhibit a lower framed failed percentage) come first.
- * E.g. for the 802.11b/g case, we first have the b rates in ascending order,
- * then the g rates. The adj simply decides the index of the TX rate in the list
- * to switch to (relative to the current TX rate entry).
+ * Once we have the adj value, we map it to a rate by means of a learning
+ * algorithm. This algorithm keeps the state of the percentual failed frames
+ * difference between rates. The behaviour of the lowest available rate is kept
+ * as a reference value, and every time we switch between two rates, we compute
+ * the difference between the failed frames each rate exhibited. By doing so,
+ * we compare behaviours which different rates exhibited in adjacent timeslices,
+ * thus the comparison is minimally affected by external conditions. This
+ * difference gets propagated to the whole set of measurements, so that the
+ * reference is always the same. Periodically, we normalize this set so that
+ * recent events weigh the most. By comparing the adj value with this set, we
+ * avoid pejorative switches to lower rates and allow for switches to higher
+ * rates if they behaved well.
  *
  * Note that for the computations we use a fixed-point representation to avoid
  * floating point arithmetic. Hence, all values are shifted left by
@@ -78,6 +85,16 @@
  */
 #define RC_PID_TARGET_PF (11 << RC_PID_ARITH_SHIFT)
 
+/* Rate behaviour normalization quantity over time. */
+#define RC_PID_NORM_OFFSET 3
+
+/* Push high rates right after loading. */
+#define RC_PID_FAST_START 0
+
+/* Arithmetic right shift for positive and negative values for ISO C. */
+#define RC_PID_DO_ARITH_RIGHT_SHIFT(x, y) \
+	(x) < 0 ? -((-(x)) >> (y)) : (x) >> (y)
+
 struct rc_pid_sta_info {
 	unsigned long last_change;
 	unsigned long last_sample;
@@ -121,6 +138,18 @@ struct rc_pid_sta_info {
 /* Algorithm parameters. We keep them on a per-algorithm approach, so they can
  * be tuned individually for each interface.
  */
+struct rc_pid_rateinfo {
+
+	/* Map sorted rates to rates in ieee80211_hw_mode. */
+	int index;
+
+	/* Map rates in ieee80211_hw_mode to sorted rates. */
+	int rev_index;
+
+	/* Comparison with the lowest rate. */
+	int diff;
+};
+
 struct rc_pid_info {
 
 	/* The failed frames percentage target. */
@@ -130,15 +159,56 @@ struct rc_pid_info {
 	s32 coeff_p;
 	s32 coeff_i;
 	s32 coeff_d;
+
+	/* Rates information. */
+	struct rc_pid_rateinfo *rinfo;
+
+	/* Index of the last used rate. */
+	int oldrate;
 };
 
+/* Shift the adjustment so that we won't switch to a lower rate if it exhibited
+ * a worse failed frames behaviour and we'll choose the highest rate whose
+ * failed frames behaviour is not worse than the one of the original rate
+ * target. While at it, check that the adjustment is within the ranges. Then,
+ * provide the new rate index. */
+static int rate_control_pid_shift_adjust(struct rc_pid_rateinfo *r,
+					 int adj, int cur, int l)
+{
+	int i, j, k, tmp;
+
+	if (cur + adj < 0)
+		return 0;
+	if (cur + adj >= l)
+		return l - 1;
+
+	i = r[cur + adj].rev_index;
+
+	j = r[cur].rev_index;
+
+	if (adj < 0) {
+			tmp = i;
+			for (k = j; k >= i; k--)
+				if (r[k].diff <= r[j].diff)
+					tmp = k;
+			return r[tmp].index;
+	} else if (adj > 0) {
+			tmp = i;
+			for (k = i + 1; k + i < l; k++)
+				if (r[k].diff <= r[i].diff)
+					tmp = k;
+			return r[tmp].index;
+	}
+	return cur + adj;
+}
 
 static void rate_control_pid_adjust_rate(struct ieee80211_local *local,
-					 struct sta_info *sta, int adj)
+					 struct sta_info *sta, int adj,
+					 struct rc_pid_rateinfo *rinfo)
 {
 	struct ieee80211_sub_if_data *sdata;
 	struct ieee80211_hw_mode *mode;
-	int newidx = sta->txrate + adj;
+	int newidx;
 	int maxrate;
 	int back = (adj > 0) ? 1 : -1;
 
@@ -151,10 +221,8 @@ static void rate_control_pid_adjust_rate(struct ieee80211_local *local,
 	mode = local->oper_hw_mode;
 	maxrate = sdata->bss ? sdata->bss->max_ratectrl_rateidx : -1;
 
-	if (newidx < 0)
-		newidx = 0;
-	else if (newidx >= mode->num_rates)
-		newidx = mode->num_rates - 1;
+	newidx = rate_control_pid_shift_adjust(rinfo, adj, sta->txrate,
+					       mode->num_rates);
 
 	while (newidx != sta->txrate) {
 		if (rate_supported(sta, mode, newidx) &&
@@ -167,18 +235,37 @@ static void rate_control_pid_adjust_rate(struct ieee80211_local *local,
 	}
 }
 
+/* Normalize the failed frames per-rate differences. */
+static void rate_control_pid_normalize(struct rc_pid_rateinfo *r, int l)
+{
+	int i;
+
+	if (r[0].diff > RC_PID_NORM_OFFSET)
+		r[0].diff -= RC_PID_NORM_OFFSET;
+	else if (r[0].diff < -RC_PID_NORM_OFFSET)
+		r[0].diff += RC_PID_NORM_OFFSET;
+	for (i = 0; i < l - 1; i++)
+		if (r[i + 1].diff > r[i].diff + RC_PID_NORM_OFFSET)
+			r[i + 1].diff -= RC_PID_NORM_OFFSET;
+		else if (r[i + 1].diff <= r[i].diff)
+			r[i + 1].diff += RC_PID_NORM_OFFSET;
+}
+
 static void rate_control_pid_sample(struct rc_pid_info *pinfo,
 				    struct ieee80211_local *local,
 				    struct sta_info *sta)
 {
 	struct rc_pid_sta_info *spinfo = sta->rate_ctrl_priv;
+	struct rc_pid_rateinfo *rinfo = pinfo->rinfo;
+	struct ieee80211_hw_mode *mode;
 	u32 pf;
 	s32 err_avg;
 	s32 err_prop;
 	s32 err_int;
 	s32 err_der;
-	int adj;
+	int adj, i, j, tmp;
 
+	mode = local->oper_hw_mode;
 	spinfo = sta->rate_ctrl_priv;
 	spinfo->last_sample = jiffies;
 
@@ -194,6 +281,20 @@ static void rate_control_pid_sample(struct rc_pid_info *pinfo,
 		spinfo->tx_num_failed = 0;
 	}
 
+	/* If we just switched rate, update the rate behaviour info. */
+	if (pinfo->oldrate != sta->txrate) {
+
+		i = rinfo[pinfo->oldrate].rev_index;
+		j = rinfo[sta->txrate].rev_index;
+
+		tmp = (pf - spinfo->last_pf);
+		tmp = RC_PID_DO_ARITH_RIGHT_SHIFT(tmp, RC_PID_ARITH_SHIFT);
+
+		rinfo[j].diff = rinfo[i].diff + tmp;
+		pinfo->oldrate = sta->txrate;
+	}
+	rate_control_pid_normalize(rinfo, mode->num_rates);
+
 	/* Compute the proportional, integral and derivative errors. */
 	err_prop = RC_PID_TARGET_PF - pf;
 
@@ -207,16 +308,11 @@ static void rate_control_pid_sample(struct rc_pid_info *pinfo,
 	/* Compute the controller output. */
 	adj = (err_prop * pinfo->coeff_p + err_int * pinfo->coeff_i
 	      + err_der * pinfo->coeff_d);
-
-	/* We need to do an arithmetic right shift. ISO C says this is
-	 * implementation defined for negative left operands. Hence, be
-	 * careful to get it right, also for negative values. */
-	adj = (adj < 0) ? -((-adj) >> (2 * RC_PID_ARITH_SHIFT)) :
-			  adj >> (2 * RC_PID_ARITH_SHIFT);
+	adj = RC_PID_DO_ARITH_RIGHT_SHIFT(adj, 2 * RC_PID_ARITH_SHIFT);
 
 	/* Change rate. */
 	if (adj)
-		rate_control_pid_adjust_rate(local, sta, adj);
+		rate_control_pid_adjust_rate(local, sta, adj, rinfo);
 }
 
 static void rate_control_pid_tx_status(void *priv, struct net_device *dev,
@@ -316,13 +412,57 @@ static void rate_control_pid_rate_init(void *priv, void *priv_sta,
 static void *rate_control_pid_alloc(struct ieee80211_local *local)
 {
 	struct rc_pid_info *pinfo;
+	struct rc_pid_rateinfo *rinfo;
+	struct ieee80211_hw_mode *mode;
+	int i, j, tmp;
+	bool s;
 
 	pinfo = kmalloc(sizeof(*pinfo), GFP_ATOMIC);
+	if (!pinfo)
+		return NULL;
+
+	/* We can safely assume that oper_hw_mode won't change unless we get
+	 * reinitialized. */
+	mode = local->oper_hw_mode;
+	rinfo = kmalloc(sizeof(*rinfo) * mode->num_rates, GFP_ATOMIC);
+	if (!rinfo) {
+		kfree(pinfo);
+		return NULL;
+	}
+
+	/* Sort the rates. This is optimized for the most common case (i.e.
+	 * almost-sorted CCK+OFDM rates). Kind of bubble-sort with reversed
+	 * mapping too. */
+	for (i = 0; i < mode->num_rates; i++) {
+		rinfo[i].index = i;
+		rinfo[i].rev_index = i;
+		if (RC_PID_FAST_START)
+			rinfo[i].diff = 0;
+		else
+			rinfo[i].diff = i * RC_PID_NORM_OFFSET;
+	}
+	for (i = 1; i < mode->num_rates; i++) {
+		s = 0;
+		for (j = 0; j < mode->num_rates - i; j++)
+			if (unlikely(mode->rates[rinfo[j].index].rate >
+				     mode->rates[rinfo[j + 1].index].rate)) {
+				tmp = rinfo[j].index;
+				rinfo[j].index = rinfo[j + 1].index;
+				rinfo[j + 1].index = tmp;
+				rinfo[rinfo[j].index].rev_index = j;
+				rinfo[rinfo[j + 1].index].rev_index = j + 1;
+				s = 1;
+			}
+		if (!s)
+			break;
+	}
 
 	pinfo->target = RC_PID_TARGET_PF;
 	pinfo->coeff_p = RC_PID_COEFF_P;
 	pinfo->coeff_i = RC_PID_COEFF_I;
 	pinfo->coeff_d = RC_PID_COEFF_D;
+	pinfo->rinfo = rinfo;
+	pinfo->oldrate = 0;
 
 	return pinfo;
 }
@@ -330,6 +470,7 @@ static void *rate_control_pid_alloc(struct ieee80211_local *local)
 static void rate_control_pid_free(void *priv)
 {
 	struct rc_pid_info *pinfo = priv;
+	kfree(pinfo->rinfo);
 	kfree(pinfo);
 }
 
-- 
cgit v1.2.3


From 1dc4d1e6a1e81fee0d488cec4fcd39269ec51318 Mon Sep 17 00:00:00 2001
From: Stefano Brivio <stefano.brivio@polimi.it>
Date: Wed, 19 Dec 2007 01:26:52 +0100
Subject: rc80211-pid: add sharpening factor

This patch introduces a PID sharpening factor for faster response after
association and low activity events.

Signed-off-by: Stefano Brivio <stefano.brivio@polimi.it>
Signed-off-by: Mattias Nissler <mattias.nissler@gmx.de>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/mac80211/rc80211_pid.c | 35 +++++++++++++++++++++++++++--------
 1 file changed, 27 insertions(+), 8 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/rc80211_pid.c b/net/mac80211/rc80211_pid.c
index 1116dc67ddd..7f8cf27ad2f 100644
--- a/net/mac80211/rc80211_pid.c
+++ b/net/mac80211/rc80211_pid.c
@@ -23,13 +23,16 @@
  *
  * The controller basically computes the following:
  *
- * adj = CP * err + CI * err_avg + CD * (err - last_err)
+ * adj = CP * err + CI * err_avg + CD * (err - last_err) * (1 + sharpening)
  *
  * where
  * 	adj	adjustment value that is used to switch TX rate (see below)
  * 	err	current error: target vs. current failed frames percentage
  * 	last_err	last error
  * 	err_avg	average (i.e. poor man's integral) of recent errors
+ *	sharpening	non-zero when fast response is needed (i.e. right after
+ *			association or no frames sent for a long time), heading
+ * 			to zero over time
  * 	CP	Proportional coefficient
  * 	CI	Integral coefficient
  * 	CD	Derivative coefficient
@@ -65,6 +68,10 @@
 #define RC_PID_SMOOTHING_SHIFT 3
 #define RC_PID_SMOOTHING (1 << RC_PID_SMOOTHING_SHIFT)
 
+/* Sharpening factor (used for D part of PID controller) */
+#define RC_PID_SHARPENING_FACTOR 0
+#define RC_PID_SHARPENING_DURATION 0
+
 /* Fixed point arithmetic shifting amount. */
 #define RC_PID_ARITH_SHIFT 8
 
@@ -131,8 +138,11 @@ struct rc_pid_sta_info {
 	 */
 	s32 err_avg_sc;
 
-	/* Last framed failes percentage sample */
+	/* Last framed failes percentage sample. */
 	u32 last_pf;
+
+	/* Sharpening needed. */
+	u8 sharp_cnt;
 };
 
 /* Algorithm parameters. We keep them on a per-algorithm approach, so they can
@@ -267,20 +277,26 @@ static void rate_control_pid_sample(struct rc_pid_info *pinfo,
 
 	mode = local->oper_hw_mode;
 	spinfo = sta->rate_ctrl_priv;
+
+	/* In case nothing happened during the previous control interval, turn
+	 * the sharpening factor on. */
+	if (jiffies - spinfo->last_sample > 2 * RC_PID_INTERVAL)
+		spinfo->sharp_cnt = RC_PID_SHARPENING_DURATION;
+
 	spinfo->last_sample = jiffies;
 
-	/* If no frames were transmitted, we assume the old sample is
+	/* This should never happen, but in case, we assume the old sample is
 	 * still a good measurement and copy it. */
-	if (spinfo->tx_num_xmit == 0)
+	if (unlikely(spinfo->tx_num_xmit == 0))
 		pf = spinfo->last_pf;
 	else {
 		pf = spinfo->tx_num_failed * 100 / spinfo->tx_num_xmit;
 		pf <<= RC_PID_ARITH_SHIFT;
-
-		spinfo->tx_num_xmit = 0;
-		spinfo->tx_num_failed = 0;
 	}
 
+	spinfo->tx_num_xmit = 0;
+	spinfo->tx_num_failed = 0;
+
 	/* If we just switched rate, update the rate behaviour info. */
 	if (pinfo->oldrate != sta->txrate) {
 
@@ -302,8 +318,11 @@ static void rate_control_pid_sample(struct rc_pid_info *pinfo,
 	spinfo->err_avg_sc = spinfo->err_avg_sc - err_avg + err_prop;
 	err_int = spinfo->err_avg_sc >> RC_PID_SMOOTHING_SHIFT;
 
-	err_der = pf - spinfo->last_pf;
+	err_der = pf - spinfo->last_pf
+		  * (1 + RC_PID_SHARPENING_FACTOR * spinfo->sharp_cnt);
 	spinfo->last_pf = pf;
+	if (spinfo->sharp_cnt)
+			spinfo->sharp_cnt--;
 
 	/* Compute the controller output. */
 	adj = (err_prop * pinfo->coeff_p + err_int * pinfo->coeff_i
-- 
cgit v1.2.3


From 12446c67fea1e5bc74c58e43ef53eea308cdda61 Mon Sep 17 00:00:00 2001
From: Mattias Nissler <mattias.nissler@gmx.de>
Date: Wed, 19 Dec 2007 01:27:18 +0100
Subject: rc80211-pid: add debugging

This adds a new debugfs file from which rate control relevant events can be
read one event per line. The output includes the current time, so graphs can be
created showing the rate control parameters. This helps in evaluating and
tuning rate control parameters. While at it, we split headers and code for
better readability.

Signed-off-by: Mattias Nissler <mattias.nissler@gmx.de>
Signed-off-by: Stefano Brivio <stefano.brivio@polimi.it>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/mac80211/Makefile              |  11 +-
 net/mac80211/rc80211_pid.c         | 525 -------------------------------------
 net/mac80211/rc80211_pid.h         | 242 +++++++++++++++++
 net/mac80211/rc80211_pid_algo.c    | 444 +++++++++++++++++++++++++++++++
 net/mac80211/rc80211_pid_debugfs.c | 223 ++++++++++++++++
 5 files changed, 918 insertions(+), 527 deletions(-)
 delete mode 100644 net/mac80211/rc80211_pid.c
 create mode 100644 net/mac80211/rc80211_pid.h
 create mode 100644 net/mac80211/rc80211_pid_algo.c
 create mode 100644 net/mac80211/rc80211_pid_debugfs.c

(limited to 'net')

diff --git a/net/mac80211/Makefile b/net/mac80211/Makefile
index a375f0477da..06aea8009cd 100644
--- a/net/mac80211/Makefile
+++ b/net/mac80211/Makefile
@@ -1,10 +1,17 @@
 obj-$(CONFIG_MAC80211) += mac80211.o
 
 mac80211-objs-$(CONFIG_MAC80211_LEDS) += ieee80211_led.o
-mac80211-objs-$(CONFIG_MAC80211_DEBUGFS) += debugfs.o debugfs_sta.o debugfs_netdev.o debugfs_key.o
 mac80211-objs-$(CONFIG_NET_SCHED) += wme.o
 mac80211-objs-$(CONFIG_MAC80211_RC_SIMPLE) += rc80211_simple.o
-mac80211-objs-$(CONFIG_MAC80211_RC_PID) += rc80211_pid.o
+mac80211-objs-$(CONFIG_MAC80211_RC_PID) += rc80211_pid_algo.o
+
+mac80211-debugfs-objs-$(CONFIG_MAC80211_RC_PID) += rc80211_pid_debugfs.o
+mac80211-objs-$(CONFIG_MAC80211_DEBUGFS) += \
+	debugfs.o \
+	debugfs_sta.o \
+	debugfs_netdev.o \
+	debugfs_key.o \
+	$(mac80211-debugfs-objs-y)
 
 mac80211-objs := \
 	ieee80211.o \
diff --git a/net/mac80211/rc80211_pid.c b/net/mac80211/rc80211_pid.c
deleted file mode 100644
index 7f8cf27ad2f..00000000000
--- a/net/mac80211/rc80211_pid.c
+++ /dev/null
@@ -1,525 +0,0 @@
-/*
- * Copyright 2002-2005, Instant802 Networks, Inc.
- * Copyright 2005, Devicescape Software, Inc.
- * Copyright 2007, Mattias Nissler <mattias.nissler@gmx.de>
- * Copyright 2007, Stefano Brivio <stefano.brivio@polimi.it>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#include <linux/netdevice.h>
-#include <linux/types.h>
-#include <linux/skbuff.h>
-
-#include <net/mac80211.h>
-#include "ieee80211_rate.h"
-
-
-/* This is an implementation of a TX rate control algorithm that uses a PID
- * controller. Given a target failed frames rate, the controller decides about
- * TX rate changes to meet the target failed frames rate.
- *
- * The controller basically computes the following:
- *
- * adj = CP * err + CI * err_avg + CD * (err - last_err) * (1 + sharpening)
- *
- * where
- * 	adj	adjustment value that is used to switch TX rate (see below)
- * 	err	current error: target vs. current failed frames percentage
- * 	last_err	last error
- * 	err_avg	average (i.e. poor man's integral) of recent errors
- *	sharpening	non-zero when fast response is needed (i.e. right after
- *			association or no frames sent for a long time), heading
- * 			to zero over time
- * 	CP	Proportional coefficient
- * 	CI	Integral coefficient
- * 	CD	Derivative coefficient
- *
- * CP, CI, CD are subject to careful tuning.
- *
- * The integral component uses a exponential moving average approach instead of
- * an actual sliding window. The advantage is that we don't need to keep an
- * array of the last N error values and computation is easier.
- *
- * Once we have the adj value, we map it to a rate by means of a learning
- * algorithm. This algorithm keeps the state of the percentual failed frames
- * difference between rates. The behaviour of the lowest available rate is kept
- * as a reference value, and every time we switch between two rates, we compute
- * the difference between the failed frames each rate exhibited. By doing so,
- * we compare behaviours which different rates exhibited in adjacent timeslices,
- * thus the comparison is minimally affected by external conditions. This
- * difference gets propagated to the whole set of measurements, so that the
- * reference is always the same. Periodically, we normalize this set so that
- * recent events weigh the most. By comparing the adj value with this set, we
- * avoid pejorative switches to lower rates and allow for switches to higher
- * rates if they behaved well.
- *
- * Note that for the computations we use a fixed-point representation to avoid
- * floating point arithmetic. Hence, all values are shifted left by
- * RC_PID_ARITH_SHIFT.
- */
-
-/* Sampling period for measuring percentage of failed frames. */
-#define RC_PID_INTERVAL (HZ / 8)
-
-/* Exponential averaging smoothness (used for I part of PID controller) */
-#define RC_PID_SMOOTHING_SHIFT 3
-#define RC_PID_SMOOTHING (1 << RC_PID_SMOOTHING_SHIFT)
-
-/* Sharpening factor (used for D part of PID controller) */
-#define RC_PID_SHARPENING_FACTOR 0
-#define RC_PID_SHARPENING_DURATION 0
-
-/* Fixed point arithmetic shifting amount. */
-#define RC_PID_ARITH_SHIFT 8
-
-/* Fixed point arithmetic factor. */
-#define RC_PID_ARITH_FACTOR (1 << RC_PID_ARITH_SHIFT)
-
-/* Proportional PID component coefficient. */
-#define RC_PID_COEFF_P 15
-/* Integral PID component coefficient. */
-#define RC_PID_COEFF_I 9
-/* Derivative PID component coefficient. */
-#define RC_PID_COEFF_D 15
-
-/* Target failed frames rate for the PID controller. NB: This effectively gives
- * maximum failed frames percentage we're willing to accept. If the wireless
- * link quality is good, the controller will fail to adjust failed frames
- * percentage to the target. This is intentional.
- */
-#define RC_PID_TARGET_PF (11 << RC_PID_ARITH_SHIFT)
-
-/* Rate behaviour normalization quantity over time. */
-#define RC_PID_NORM_OFFSET 3
-
-/* Push high rates right after loading. */
-#define RC_PID_FAST_START 0
-
-/* Arithmetic right shift for positive and negative values for ISO C. */
-#define RC_PID_DO_ARITH_RIGHT_SHIFT(x, y) \
-	(x) < 0 ? -((-(x)) >> (y)) : (x) >> (y)
-
-struct rc_pid_sta_info {
-	unsigned long last_change;
-	unsigned long last_sample;
-
-	u32 tx_num_failed;
-	u32 tx_num_xmit;
-
-	/* Average failed frames percentage error (i.e. actual vs. target
-	 * percentage), scaled by RC_PID_SMOOTHING. This value is computed
-	 * using using an exponential weighted average technique:
-	 *
-	 *           (RC_PID_SMOOTHING - 1) * err_avg_old + err
-	 * err_avg = ------------------------------------------
-	 *                       RC_PID_SMOOTHING
-	 *
-	 * where err_avg is the new approximation, err_avg_old the previous one
-	 * and err is the error w.r.t. to the current failed frames percentage
-	 * sample. Note that the bigger RC_PID_SMOOTHING the more weight is
-	 * given to the previous estimate, resulting in smoother behavior (i.e.
-	 * corresponding to a longer integration window).
-	 *
-	 * For computation, we actually don't use the above formula, but this
-	 * one:
-	 *
-	 * err_avg_scaled = err_avg_old_scaled - err_avg_old + err
-	 *
-	 * where:
-	 * 	err_avg_scaled = err * RC_PID_SMOOTHING
-	 * 	err_avg_old_scaled = err_avg_old * RC_PID_SMOOTHING
-	 *
-	 * This avoids floating point numbers and the per_failed_old value can
-	 * easily be obtained by shifting per_failed_old_scaled right by
-	 * RC_PID_SMOOTHING_SHIFT.
-	 */
-	s32 err_avg_sc;
-
-	/* Last framed failes percentage sample. */
-	u32 last_pf;
-
-	/* Sharpening needed. */
-	u8 sharp_cnt;
-};
-
-/* Algorithm parameters. We keep them on a per-algorithm approach, so they can
- * be tuned individually for each interface.
- */
-struct rc_pid_rateinfo {
-
-	/* Map sorted rates to rates in ieee80211_hw_mode. */
-	int index;
-
-	/* Map rates in ieee80211_hw_mode to sorted rates. */
-	int rev_index;
-
-	/* Comparison with the lowest rate. */
-	int diff;
-};
-
-struct rc_pid_info {
-
-	/* The failed frames percentage target. */
-	u32 target;
-
-	/* P, I and D coefficients. */
-	s32 coeff_p;
-	s32 coeff_i;
-	s32 coeff_d;
-
-	/* Rates information. */
-	struct rc_pid_rateinfo *rinfo;
-
-	/* Index of the last used rate. */
-	int oldrate;
-};
-
-/* Shift the adjustment so that we won't switch to a lower rate if it exhibited
- * a worse failed frames behaviour and we'll choose the highest rate whose
- * failed frames behaviour is not worse than the one of the original rate
- * target. While at it, check that the adjustment is within the ranges. Then,
- * provide the new rate index. */
-static int rate_control_pid_shift_adjust(struct rc_pid_rateinfo *r,
-					 int adj, int cur, int l)
-{
-	int i, j, k, tmp;
-
-	if (cur + adj < 0)
-		return 0;
-	if (cur + adj >= l)
-		return l - 1;
-
-	i = r[cur + adj].rev_index;
-
-	j = r[cur].rev_index;
-
-	if (adj < 0) {
-			tmp = i;
-			for (k = j; k >= i; k--)
-				if (r[k].diff <= r[j].diff)
-					tmp = k;
-			return r[tmp].index;
-	} else if (adj > 0) {
-			tmp = i;
-			for (k = i + 1; k + i < l; k++)
-				if (r[k].diff <= r[i].diff)
-					tmp = k;
-			return r[tmp].index;
-	}
-	return cur + adj;
-}
-
-static void rate_control_pid_adjust_rate(struct ieee80211_local *local,
-					 struct sta_info *sta, int adj,
-					 struct rc_pid_rateinfo *rinfo)
-{
-	struct ieee80211_sub_if_data *sdata;
-	struct ieee80211_hw_mode *mode;
-	int newidx;
-	int maxrate;
-	int back = (adj > 0) ? 1 : -1;
-
-	sdata = IEEE80211_DEV_TO_SUB_IF(sta->dev);
-	if (sdata->bss && sdata->bss->force_unicast_rateidx > -1) {
-		/* forced unicast rate - do not change STA rate */
-		return;
-	}
-
-	mode = local->oper_hw_mode;
-	maxrate = sdata->bss ? sdata->bss->max_ratectrl_rateidx : -1;
-
-	newidx = rate_control_pid_shift_adjust(rinfo, adj, sta->txrate,
-					       mode->num_rates);
-
-	while (newidx != sta->txrate) {
-		if (rate_supported(sta, mode, newidx) &&
-		    (maxrate < 0 || newidx <= maxrate)) {
-			sta->txrate = newidx;
-			break;
-		}
-
-		newidx += back;
-	}
-}
-
-/* Normalize the failed frames per-rate differences. */
-static void rate_control_pid_normalize(struct rc_pid_rateinfo *r, int l)
-{
-	int i;
-
-	if (r[0].diff > RC_PID_NORM_OFFSET)
-		r[0].diff -= RC_PID_NORM_OFFSET;
-	else if (r[0].diff < -RC_PID_NORM_OFFSET)
-		r[0].diff += RC_PID_NORM_OFFSET;
-	for (i = 0; i < l - 1; i++)
-		if (r[i + 1].diff > r[i].diff + RC_PID_NORM_OFFSET)
-			r[i + 1].diff -= RC_PID_NORM_OFFSET;
-		else if (r[i + 1].diff <= r[i].diff)
-			r[i + 1].diff += RC_PID_NORM_OFFSET;
-}
-
-static void rate_control_pid_sample(struct rc_pid_info *pinfo,
-				    struct ieee80211_local *local,
-				    struct sta_info *sta)
-{
-	struct rc_pid_sta_info *spinfo = sta->rate_ctrl_priv;
-	struct rc_pid_rateinfo *rinfo = pinfo->rinfo;
-	struct ieee80211_hw_mode *mode;
-	u32 pf;
-	s32 err_avg;
-	s32 err_prop;
-	s32 err_int;
-	s32 err_der;
-	int adj, i, j, tmp;
-
-	mode = local->oper_hw_mode;
-	spinfo = sta->rate_ctrl_priv;
-
-	/* In case nothing happened during the previous control interval, turn
-	 * the sharpening factor on. */
-	if (jiffies - spinfo->last_sample > 2 * RC_PID_INTERVAL)
-		spinfo->sharp_cnt = RC_PID_SHARPENING_DURATION;
-
-	spinfo->last_sample = jiffies;
-
-	/* This should never happen, but in case, we assume the old sample is
-	 * still a good measurement and copy it. */
-	if (unlikely(spinfo->tx_num_xmit == 0))
-		pf = spinfo->last_pf;
-	else {
-		pf = spinfo->tx_num_failed * 100 / spinfo->tx_num_xmit;
-		pf <<= RC_PID_ARITH_SHIFT;
-	}
-
-	spinfo->tx_num_xmit = 0;
-	spinfo->tx_num_failed = 0;
-
-	/* If we just switched rate, update the rate behaviour info. */
-	if (pinfo->oldrate != sta->txrate) {
-
-		i = rinfo[pinfo->oldrate].rev_index;
-		j = rinfo[sta->txrate].rev_index;
-
-		tmp = (pf - spinfo->last_pf);
-		tmp = RC_PID_DO_ARITH_RIGHT_SHIFT(tmp, RC_PID_ARITH_SHIFT);
-
-		rinfo[j].diff = rinfo[i].diff + tmp;
-		pinfo->oldrate = sta->txrate;
-	}
-	rate_control_pid_normalize(rinfo, mode->num_rates);
-
-	/* Compute the proportional, integral and derivative errors. */
-	err_prop = RC_PID_TARGET_PF - pf;
-
-	err_avg = spinfo->err_avg_sc >> RC_PID_SMOOTHING_SHIFT;
-	spinfo->err_avg_sc = spinfo->err_avg_sc - err_avg + err_prop;
-	err_int = spinfo->err_avg_sc >> RC_PID_SMOOTHING_SHIFT;
-
-	err_der = pf - spinfo->last_pf
-		  * (1 + RC_PID_SHARPENING_FACTOR * spinfo->sharp_cnt);
-	spinfo->last_pf = pf;
-	if (spinfo->sharp_cnt)
-			spinfo->sharp_cnt--;
-
-	/* Compute the controller output. */
-	adj = (err_prop * pinfo->coeff_p + err_int * pinfo->coeff_i
-	      + err_der * pinfo->coeff_d);
-	adj = RC_PID_DO_ARITH_RIGHT_SHIFT(adj, 2 * RC_PID_ARITH_SHIFT);
-
-	/* Change rate. */
-	if (adj)
-		rate_control_pid_adjust_rate(local, sta, adj, rinfo);
-}
-
-static void rate_control_pid_tx_status(void *priv, struct net_device *dev,
-				       struct sk_buff *skb,
-				       struct ieee80211_tx_status *status)
-{
-	struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr);
-	struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data;
-	struct rc_pid_info *pinfo = priv;
-	struct sta_info *sta;
-	struct rc_pid_sta_info *spinfo;
-
-	sta = sta_info_get(local, hdr->addr1);
-
-	if (!sta)
-		return;
-
-	/* Ignore all frames that were sent with a different rate than the rate
-	 * we currently advise mac80211 to use. */
-	if (status->control.rate != &local->oper_hw_mode->rates[sta->txrate])
-		return;
-
-	spinfo = sta->rate_ctrl_priv;
-	spinfo->tx_num_xmit++;
-
-	/* We count frames that totally failed to be transmitted as two bad
-	 * frames, those that made it out but had some retries as one good and
-	 * one bad frame. */
-	if (status->excessive_retries) {
-		spinfo->tx_num_failed += 2;
-		spinfo->tx_num_xmit++;
-	} else if (status->retry_count) {
-		spinfo->tx_num_failed++;
-		spinfo->tx_num_xmit++;
-	}
-
-	if (status->excessive_retries) {
-		sta->tx_retry_failed++;
-		sta->tx_num_consecutive_failures++;
-		sta->tx_num_mpdu_fail++;
-	} else {
-		sta->last_ack_rssi[0] = sta->last_ack_rssi[1];
-		sta->last_ack_rssi[1] = sta->last_ack_rssi[2];
-		sta->last_ack_rssi[2] = status->ack_signal;
-		sta->tx_num_consecutive_failures = 0;
-		sta->tx_num_mpdu_ok++;
-	}
-	sta->tx_retry_count += status->retry_count;
-	sta->tx_num_mpdu_fail += status->retry_count;
-
-	/* Update PID controller state. */
-	if (time_after(jiffies, spinfo->last_sample + RC_PID_INTERVAL))
-		rate_control_pid_sample(pinfo, local, sta);
-
-	sta_info_put(sta);
-}
-
-static void rate_control_pid_get_rate(void *priv, struct net_device *dev,
-				      struct ieee80211_hw_mode *mode,
-				      struct sk_buff *skb,
-				      struct rate_selection *sel)
-{
-	struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr);
-	struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data;
-	struct sta_info *sta;
-	int rateidx;
-
-	sta = sta_info_get(local, hdr->addr1);
-
-	if (!sta) {
-		sel->rate = rate_lowest(local, mode, NULL);
-		sta_info_put(sta);
-		return;
-	}
-
-	rateidx = sta->txrate;
-
-	if (rateidx >= mode->num_rates)
-		rateidx = mode->num_rates - 1;
-
-	sta_info_put(sta);
-
-	sel->rate = &mode->rates[rateidx];
-}
-
-static void rate_control_pid_rate_init(void *priv, void *priv_sta,
-					  struct ieee80211_local *local,
-					  struct sta_info *sta)
-{
-	/* TODO: This routine should consider using RSSI from previous packets
-	 * as we need to have IEEE 802.1X auth succeed immediately after assoc..
-	 * Until that method is implemented, we will use the lowest supported
-	 * rate as a workaround. */
-	sta->txrate = rate_lowest_index(local, local->oper_hw_mode, sta);
-}
-
-static void *rate_control_pid_alloc(struct ieee80211_local *local)
-{
-	struct rc_pid_info *pinfo;
-	struct rc_pid_rateinfo *rinfo;
-	struct ieee80211_hw_mode *mode;
-	int i, j, tmp;
-	bool s;
-
-	pinfo = kmalloc(sizeof(*pinfo), GFP_ATOMIC);
-	if (!pinfo)
-		return NULL;
-
-	/* We can safely assume that oper_hw_mode won't change unless we get
-	 * reinitialized. */
-	mode = local->oper_hw_mode;
-	rinfo = kmalloc(sizeof(*rinfo) * mode->num_rates, GFP_ATOMIC);
-	if (!rinfo) {
-		kfree(pinfo);
-		return NULL;
-	}
-
-	/* Sort the rates. This is optimized for the most common case (i.e.
-	 * almost-sorted CCK+OFDM rates). Kind of bubble-sort with reversed
-	 * mapping too. */
-	for (i = 0; i < mode->num_rates; i++) {
-		rinfo[i].index = i;
-		rinfo[i].rev_index = i;
-		if (RC_PID_FAST_START)
-			rinfo[i].diff = 0;
-		else
-			rinfo[i].diff = i * RC_PID_NORM_OFFSET;
-	}
-	for (i = 1; i < mode->num_rates; i++) {
-		s = 0;
-		for (j = 0; j < mode->num_rates - i; j++)
-			if (unlikely(mode->rates[rinfo[j].index].rate >
-				     mode->rates[rinfo[j + 1].index].rate)) {
-				tmp = rinfo[j].index;
-				rinfo[j].index = rinfo[j + 1].index;
-				rinfo[j + 1].index = tmp;
-				rinfo[rinfo[j].index].rev_index = j;
-				rinfo[rinfo[j + 1].index].rev_index = j + 1;
-				s = 1;
-			}
-		if (!s)
-			break;
-	}
-
-	pinfo->target = RC_PID_TARGET_PF;
-	pinfo->coeff_p = RC_PID_COEFF_P;
-	pinfo->coeff_i = RC_PID_COEFF_I;
-	pinfo->coeff_d = RC_PID_COEFF_D;
-	pinfo->rinfo = rinfo;
-	pinfo->oldrate = 0;
-
-	return pinfo;
-}
-
-static void rate_control_pid_free(void *priv)
-{
-	struct rc_pid_info *pinfo = priv;
-	kfree(pinfo->rinfo);
-	kfree(pinfo);
-}
-
-static void rate_control_pid_clear(void *priv)
-{
-}
-
-static void *rate_control_pid_alloc_sta(void *priv, gfp_t gfp)
-{
-	struct rc_pid_sta_info *spinfo;
-
-	spinfo = kzalloc(sizeof(*spinfo), gfp);
-
-	return spinfo;
-}
-
-static void rate_control_pid_free_sta(void *priv, void *priv_sta)
-{
-	struct rc_pid_sta_info *spinfo = priv_sta;
-	kfree(spinfo);
-}
-
-struct rate_control_ops mac80211_rcpid = {
-	.name = "pid",
-	.tx_status = rate_control_pid_tx_status,
-	.get_rate = rate_control_pid_get_rate,
-	.rate_init = rate_control_pid_rate_init,
-	.clear = rate_control_pid_clear,
-	.alloc = rate_control_pid_alloc,
-	.free = rate_control_pid_free,
-	.alloc_sta = rate_control_pid_alloc_sta,
-	.free_sta = rate_control_pid_free_sta,
-};
diff --git a/net/mac80211/rc80211_pid.h b/net/mac80211/rc80211_pid.h
new file mode 100644
index 00000000000..5d0056c1513
--- /dev/null
+++ b/net/mac80211/rc80211_pid.h
@@ -0,0 +1,242 @@
+/*
+ * Copyright 2007, Mattias Nissler <mattias.nissler@gmx.de>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef RC80211_PID_H
+#define RC80211_PID_H
+
+/* Sampling period for measuring percentage of failed frames. */
+#define RC_PID_INTERVAL (HZ / 8)
+
+/* Exponential averaging smoothness (used for I part of PID controller) */
+#define RC_PID_SMOOTHING_SHIFT 3
+#define RC_PID_SMOOTHING (1 << RC_PID_SMOOTHING_SHIFT)
+
+/* Sharpening factor (used for D part of PID controller) */
+#define RC_PID_SHARPENING_FACTOR 0
+#define RC_PID_SHARPENING_DURATION 0
+
+/* Fixed point arithmetic shifting amount. */
+#define RC_PID_ARITH_SHIFT 8
+
+/* Fixed point arithmetic factor. */
+#define RC_PID_ARITH_FACTOR (1 << RC_PID_ARITH_SHIFT)
+
+/* Proportional PID component coefficient. */
+#define RC_PID_COEFF_P 15
+/* Integral PID component coefficient. */
+#define RC_PID_COEFF_I 9
+/* Derivative PID component coefficient. */
+#define RC_PID_COEFF_D 15
+
+/* Target failed frames rate for the PID controller. NB: This effectively gives
+ * maximum failed frames percentage we're willing to accept. If the wireless
+ * link quality is good, the controller will fail to adjust failed frames
+ * percentage to the target. This is intentional.
+ */
+#define RC_PID_TARGET_PF (11 << RC_PID_ARITH_SHIFT)
+
+/* Rate behaviour normalization quantity over time. */
+#define RC_PID_NORM_OFFSET 3
+
+/* Push high rates right after loading. */
+#define RC_PID_FAST_START 0
+
+/* Arithmetic right shift for positive and negative values for ISO C. */
+#define RC_PID_DO_ARITH_RIGHT_SHIFT(x, y) \
+	(x) < 0 ? -((-(x)) >> (y)) : (x) >> (y)
+
+enum rc_pid_event_type {
+	RC_PID_EVENT_TYPE_TX_STATUS,
+	RC_PID_EVENT_TYPE_RATE_CHANGE,
+	RC_PID_EVENT_TYPE_TX_RATE,
+	RC_PID_EVENT_TYPE_PF_SAMPLE,
+};
+
+union rc_pid_event_data {
+	/* RC_PID_EVENT_TX_STATUS */
+	struct {
+		struct ieee80211_tx_status tx_status;
+	};
+	/* RC_PID_EVENT_TYPE_RATE_CHANGE */
+	/* RC_PID_EVENT_TYPE_TX_RATE */
+	struct {
+		int index;
+		int rate;
+	};
+	/* RC_PID_EVENT_TYPE_PF_SAMPLE */
+	struct {
+		s32 pf_sample;
+		s32 prop_err;
+		s32 int_err;
+		s32 der_err;
+	};
+};
+
+struct rc_pid_event {
+	/* The time when the event occured */
+	unsigned long timestamp;
+
+	/* Event ID number */
+	unsigned int id;
+
+	/* Type of event */
+	enum rc_pid_event_type type;
+
+	/* type specific data */
+	union rc_pid_event_data data;
+};
+
+/* Size of the event ring buffer. */
+#define RC_PID_EVENT_RING_SIZE 32
+
+struct rc_pid_event_buffer {
+	/* Counter that generates event IDs */
+	unsigned int ev_count;
+
+	/* Ring buffer of events */
+	struct rc_pid_event ring[RC_PID_EVENT_RING_SIZE];
+
+	/* Index to the entry in events_buf to be reused */
+	unsigned int next_entry;
+
+	/* Lock that guards against concurrent access to this buffer struct */
+	spinlock_t lock;
+
+	/* Wait queue for poll/select and blocking I/O */
+	wait_queue_head_t waitqueue;
+};
+
+struct rc_pid_events_file_info {
+	/* The event buffer we read */
+	struct rc_pid_event_buffer *events;
+
+	/* The entry we have should read next */
+	unsigned int next_entry;
+};
+
+void rate_control_pid_event_tx_status(struct rc_pid_event_buffer *buf,
+					     struct ieee80211_tx_status *stat);
+
+void rate_control_pid_event_rate_change(struct rc_pid_event_buffer *buf,
+					       int index, int rate);
+
+void rate_control_pid_event_tx_rate(struct rc_pid_event_buffer *buf,
+					   int index, int rate);
+
+void rate_control_pid_event_pf_sample(struct rc_pid_event_buffer *buf,
+					     s32 pf_sample, s32 prop_err,
+					     s32 int_err, s32 der_err);
+
+void rate_control_pid_add_sta_debugfs(void *priv, void *priv_sta,
+					     struct dentry *dir);
+
+void rate_control_pid_remove_sta_debugfs(void *priv, void *priv_sta);
+
+struct rc_pid_sta_info {
+	unsigned long last_change;
+	unsigned long last_sample;
+
+	u32 tx_num_failed;
+	u32 tx_num_xmit;
+
+	/* Average failed frames percentage error (i.e. actual vs. target
+	 * percentage), scaled by RC_PID_SMOOTHING. This value is computed
+	 * using using an exponential weighted average technique:
+	 *
+	 *           (RC_PID_SMOOTHING - 1) * err_avg_old + err
+	 * err_avg = ------------------------------------------
+	 *                       RC_PID_SMOOTHING
+	 *
+	 * where err_avg is the new approximation, err_avg_old the previous one
+	 * and err is the error w.r.t. to the current failed frames percentage
+	 * sample. Note that the bigger RC_PID_SMOOTHING the more weight is
+	 * given to the previous estimate, resulting in smoother behavior (i.e.
+	 * corresponding to a longer integration window).
+	 *
+	 * For computation, we actually don't use the above formula, but this
+	 * one:
+	 *
+	 * err_avg_scaled = err_avg_old_scaled - err_avg_old + err
+	 *
+	 * where:
+	 * 	err_avg_scaled = err * RC_PID_SMOOTHING
+	 * 	err_avg_old_scaled = err_avg_old * RC_PID_SMOOTHING
+	 *
+	 * This avoids floating point numbers and the per_failed_old value can
+	 * easily be obtained by shifting per_failed_old_scaled right by
+	 * RC_PID_SMOOTHING_SHIFT.
+	 */
+	s32 err_avg_sc;
+
+	/* Last framed failes percentage sample. */
+	u32 last_pf;
+
+	/* Sharpening needed. */
+	u8 sharp_cnt;
+
+#ifdef CONFIG_MAC80211_DEBUGFS
+	/* Event buffer */
+	struct rc_pid_event_buffer events;
+
+	/* Events debugfs file entry */
+	struct dentry *events_entry;
+#endif
+};
+
+/* Algorithm parameters. We keep them on a per-algorithm approach, so they can
+ * be tuned individually for each interface.
+ */
+struct rc_pid_rateinfo {
+
+	/* Map sorted rates to rates in ieee80211_hw_mode. */
+	int index;
+
+	/* Map rates in ieee80211_hw_mode to sorted rates. */
+	int rev_index;
+
+	/* Did we do any measurement on this rate? */
+	bool valid;
+
+	/* Comparison with the lowest rate. */
+	int diff;
+};
+
+struct rc_pid_info {
+
+	/* The failed frames percentage target. */
+	unsigned int target;
+
+	/* Rate at which failed frames percentage is sampled in 0.001s. */
+	unsigned int sampling_period;
+
+	/* P, I and D coefficients. */
+	int coeff_p;
+	int coeff_i;
+	int coeff_d;
+
+	/* Exponential averaging shift. */
+	unsigned int smoothing_shift;
+
+	/* Sharpening shift and duration. */
+	unsigned int sharpen_shift;
+	unsigned int sharpen_duration;
+
+	/* Normalization offset. */
+	unsigned int norm_offset;
+
+	/* Fast starst parameter. */
+	unsigned int fast_start;
+
+	/* Rates information. */
+	struct rc_pid_rateinfo *rinfo;
+
+	/* Index of the last used rate. */
+	int oldrate;
+};
+
+#endif /* RC80211_PID_H */
diff --git a/net/mac80211/rc80211_pid_algo.c b/net/mac80211/rc80211_pid_algo.c
new file mode 100644
index 00000000000..3fac3a5d7e0
--- /dev/null
+++ b/net/mac80211/rc80211_pid_algo.c
@@ -0,0 +1,444 @@
+/*
+ * Copyright 2002-2005, Instant802 Networks, Inc.
+ * Copyright 2005, Devicescape Software, Inc.
+ * Copyright 2007, Mattias Nissler <mattias.nissler@gmx.de>
+ * Copyright 2007, Stefano Brivio <stefano.brivio@polimi.it>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/netdevice.h>
+#include <linux/types.h>
+#include <linux/skbuff.h>
+
+#include <net/mac80211.h>
+#include "ieee80211_rate.h"
+
+#include "rc80211_pid.h"
+
+
+/* This is an implementation of a TX rate control algorithm that uses a PID
+ * controller. Given a target failed frames rate, the controller decides about
+ * TX rate changes to meet the target failed frames rate.
+ *
+ * The controller basically computes the following:
+ *
+ * adj = CP * err + CI * err_avg + CD * (err - last_err) * (1 + sharpening)
+ *
+ * where
+ * 	adj	adjustment value that is used to switch TX rate (see below)
+ * 	err	current error: target vs. current failed frames percentage
+ * 	last_err	last error
+ * 	err_avg	average (i.e. poor man's integral) of recent errors
+ *	sharpening	non-zero when fast response is needed (i.e. right after
+ *			association or no frames sent for a long time), heading
+ * 			to zero over time
+ * 	CP	Proportional coefficient
+ * 	CI	Integral coefficient
+ * 	CD	Derivative coefficient
+ *
+ * CP, CI, CD are subject to careful tuning.
+ *
+ * The integral component uses a exponential moving average approach instead of
+ * an actual sliding window. The advantage is that we don't need to keep an
+ * array of the last N error values and computation is easier.
+ *
+ * Once we have the adj value, we map it to a rate by means of a learning
+ * algorithm. This algorithm keeps the state of the percentual failed frames
+ * difference between rates. The behaviour of the lowest available rate is kept
+ * as a reference value, and every time we switch between two rates, we compute
+ * the difference between the failed frames each rate exhibited. By doing so,
+ * we compare behaviours which different rates exhibited in adjacent timeslices,
+ * thus the comparison is minimally affected by external conditions. This
+ * difference gets propagated to the whole set of measurements, so that the
+ * reference is always the same. Periodically, we normalize this set so that
+ * recent events weigh the most. By comparing the adj value with this set, we
+ * avoid pejorative switches to lower rates and allow for switches to higher
+ * rates if they behaved well.
+ *
+ * Note that for the computations we use a fixed-point representation to avoid
+ * floating point arithmetic. Hence, all values are shifted left by
+ * RC_PID_ARITH_SHIFT.
+ */
+
+
+/* Shift the adjustment so that we won't switch to a lower rate if it exhibited
+ * a worse failed frames behaviour and we'll choose the highest rate whose
+ * failed frames behaviour is not worse than the one of the original rate
+ * target. While at it, check that the adjustment is within the ranges. Then,
+ * provide the new rate index. */
+static int rate_control_pid_shift_adjust(struct rc_pid_rateinfo *r,
+					 int adj, int cur, int l)
+{
+	int i, j, k, tmp;
+
+	if (cur + adj < 0)
+		return 0;
+	if (cur + adj >= l)
+		return l - 1;
+
+	i = r[cur + adj].rev_index;
+
+	j = r[cur].rev_index;
+
+	if (adj < 0) {
+			tmp = i;
+			for (k = j; k >= i; k--)
+				if (r[k].diff <= r[j].diff)
+					tmp = k;
+			return r[tmp].index;
+	} else if (adj > 0) {
+			tmp = i;
+			for (k = i + 1; k + i < l; k++)
+				if (r[k].diff <= r[i].diff)
+					tmp = k;
+			return r[tmp].index;
+	}
+	return cur + adj;
+}
+
+static void rate_control_pid_adjust_rate(struct ieee80211_local *local,
+					 struct sta_info *sta, int adj,
+					 struct rc_pid_rateinfo *rinfo)
+{
+	struct ieee80211_sub_if_data *sdata;
+	struct ieee80211_hw_mode *mode;
+	int newidx;
+	int maxrate;
+	int back = (adj > 0) ? 1 : -1;
+
+	sdata = IEEE80211_DEV_TO_SUB_IF(sta->dev);
+	if (sdata->bss && sdata->bss->force_unicast_rateidx > -1) {
+		/* forced unicast rate - do not change STA rate */
+		return;
+	}
+
+	mode = local->oper_hw_mode;
+	maxrate = sdata->bss ? sdata->bss->max_ratectrl_rateidx : -1;
+
+	newidx = rate_control_pid_shift_adjust(rinfo, adj, sta->txrate,
+					       mode->num_rates);
+
+	while (newidx != sta->txrate) {
+		if (rate_supported(sta, mode, newidx) &&
+		    (maxrate < 0 || newidx <= maxrate)) {
+			sta->txrate = newidx;
+			break;
+		}
+
+		newidx += back;
+	}
+
+#ifdef CONFIG_MAC80211_DEBUGFS
+	rate_control_pid_event_rate_change(
+		&((struct rc_pid_sta_info *)sta->rate_ctrl_priv)->events,
+		newidx, mode->rates[newidx].rate);
+#endif
+}
+
+/* Normalize the failed frames per-rate differences. */
+static void rate_control_pid_normalize(struct rc_pid_rateinfo *r, int l)
+{
+	int i;
+
+	if (r[0].diff > RC_PID_NORM_OFFSET)
+		r[0].diff -= RC_PID_NORM_OFFSET;
+	else if (r[0].diff < -RC_PID_NORM_OFFSET)
+		r[0].diff += RC_PID_NORM_OFFSET;
+	for (i = 0; i < l - 1; i++)
+		if (r[i + 1].diff > r[i].diff + RC_PID_NORM_OFFSET)
+			r[i + 1].diff -= RC_PID_NORM_OFFSET;
+		else if (r[i + 1].diff <= r[i].diff)
+			r[i + 1].diff += RC_PID_NORM_OFFSET;
+}
+
+static void rate_control_pid_sample(struct rc_pid_info *pinfo,
+				    struct ieee80211_local *local,
+				    struct sta_info *sta)
+{
+	struct rc_pid_sta_info *spinfo = sta->rate_ctrl_priv;
+	struct rc_pid_rateinfo *rinfo = pinfo->rinfo;
+	struct ieee80211_hw_mode *mode;
+	u32 pf;
+	s32 err_avg;
+	s32 err_prop;
+	s32 err_int;
+	s32 err_der;
+	int adj, i, j, tmp;
+
+	mode = local->oper_hw_mode;
+	spinfo = sta->rate_ctrl_priv;
+
+	/* In case nothing happened during the previous control interval, turn
+	 * the sharpening factor on. */
+	if (jiffies - spinfo->last_sample > 2 * RC_PID_INTERVAL)
+		spinfo->sharp_cnt = RC_PID_SHARPENING_DURATION;
+
+	spinfo->last_sample = jiffies;
+
+	/* This should never happen, but in case, we assume the old sample is
+	 * still a good measurement and copy it. */
+	if (unlikely(spinfo->tx_num_xmit == 0))
+		pf = spinfo->last_pf;
+	else {
+		pf = spinfo->tx_num_failed * 100 / spinfo->tx_num_xmit;
+		pf <<= RC_PID_ARITH_SHIFT;
+	}
+
+	spinfo->tx_num_xmit = 0;
+	spinfo->tx_num_failed = 0;
+
+	/* If we just switched rate, update the rate behaviour info. */
+	if (pinfo->oldrate != sta->txrate) {
+
+		i = rinfo[pinfo->oldrate].rev_index;
+		j = rinfo[sta->txrate].rev_index;
+
+		tmp = (pf - spinfo->last_pf);
+		tmp = RC_PID_DO_ARITH_RIGHT_SHIFT(tmp, RC_PID_ARITH_SHIFT);
+
+		rinfo[j].diff = rinfo[i].diff + tmp;
+		pinfo->oldrate = sta->txrate;
+	}
+	rate_control_pid_normalize(rinfo, mode->num_rates);
+
+	/* Compute the proportional, integral and derivative errors. */
+	err_prop = RC_PID_TARGET_PF - pf;
+
+	err_avg = spinfo->err_avg_sc >> RC_PID_SMOOTHING_SHIFT;
+	spinfo->err_avg_sc = spinfo->err_avg_sc - err_avg + err_prop;
+	err_int = spinfo->err_avg_sc >> RC_PID_SMOOTHING_SHIFT;
+
+	err_der = pf - spinfo->last_pf
+		  * (1 + RC_PID_SHARPENING_FACTOR * spinfo->sharp_cnt);
+	spinfo->last_pf = pf;
+	if (spinfo->sharp_cnt)
+			spinfo->sharp_cnt--;
+
+#ifdef CONFIG_MAC80211_DEBUGFS
+	rate_control_pid_event_pf_sample(&spinfo->events, pf, err_prop, err_int,
+					 err_der);
+#endif
+
+	/* Compute the controller output. */
+	adj = (err_prop * pinfo->coeff_p + err_int * pinfo->coeff_i
+	      + err_der * pinfo->coeff_d);
+	adj = RC_PID_DO_ARITH_RIGHT_SHIFT(adj, 2 * RC_PID_ARITH_SHIFT);
+
+	/* Change rate. */
+	if (adj)
+		rate_control_pid_adjust_rate(local, sta, adj, rinfo);
+}
+
+static void rate_control_pid_tx_status(void *priv, struct net_device *dev,
+				       struct sk_buff *skb,
+				       struct ieee80211_tx_status *status)
+{
+	struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr);
+	struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data;
+	struct rc_pid_info *pinfo = priv;
+	struct sta_info *sta;
+	struct rc_pid_sta_info *spinfo;
+
+	sta = sta_info_get(local, hdr->addr1);
+
+	if (!sta)
+		return;
+
+	/* Ignore all frames that were sent with a different rate than the rate
+	 * we currently advise mac80211 to use. */
+	if (status->control.rate != &local->oper_hw_mode->rates[sta->txrate])
+		return;
+
+	spinfo = sta->rate_ctrl_priv;
+	spinfo->tx_num_xmit++;
+
+#ifdef CONFIG_MAC80211_DEBUGFS
+	rate_control_pid_event_tx_status(&spinfo->events, status);
+#endif
+
+	/* We count frames that totally failed to be transmitted as two bad
+	 * frames, those that made it out but had some retries as one good and
+	 * one bad frame. */
+	if (status->excessive_retries) {
+		spinfo->tx_num_failed += 2;
+		spinfo->tx_num_xmit++;
+	} else if (status->retry_count) {
+		spinfo->tx_num_failed++;
+		spinfo->tx_num_xmit++;
+	}
+
+	if (status->excessive_retries) {
+		sta->tx_retry_failed++;
+		sta->tx_num_consecutive_failures++;
+		sta->tx_num_mpdu_fail++;
+	} else {
+		sta->last_ack_rssi[0] = sta->last_ack_rssi[1];
+		sta->last_ack_rssi[1] = sta->last_ack_rssi[2];
+		sta->last_ack_rssi[2] = status->ack_signal;
+		sta->tx_num_consecutive_failures = 0;
+		sta->tx_num_mpdu_ok++;
+	}
+	sta->tx_retry_count += status->retry_count;
+	sta->tx_num_mpdu_fail += status->retry_count;
+
+	/* Update PID controller state. */
+	if (time_after(jiffies, spinfo->last_sample + RC_PID_INTERVAL))
+		rate_control_pid_sample(pinfo, local, sta);
+
+	sta_info_put(sta);
+}
+
+static void rate_control_pid_get_rate(void *priv, struct net_device *dev,
+				      struct ieee80211_hw_mode *mode,
+				      struct sk_buff *skb,
+				      struct rate_selection *sel)
+{
+	struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr);
+	struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data;
+	struct sta_info *sta;
+	int rateidx;
+
+	sta = sta_info_get(local, hdr->addr1);
+
+	if (!sta) {
+		sel->rate = rate_lowest(local, mode, NULL);
+		sta_info_put(sta);
+		return;
+	}
+
+	rateidx = sta->txrate;
+
+	if (rateidx >= mode->num_rates)
+		rateidx = mode->num_rates - 1;
+
+	sta_info_put(sta);
+
+	sel->rate = &mode->rates[rateidx];
+
+#ifdef CONFIG_MAC80211_DEBUGFS
+	rate_control_pid_event_tx_rate(
+		&((struct rc_pid_sta_info *) sta->rate_ctrl_priv)->events,
+		rateidx, mode->rates[rateidx].rate);
+#endif
+}
+
+static void rate_control_pid_rate_init(void *priv, void *priv_sta,
+					  struct ieee80211_local *local,
+					  struct sta_info *sta)
+{
+	/* TODO: This routine should consider using RSSI from previous packets
+	 * as we need to have IEEE 802.1X auth succeed immediately after assoc..
+	 * Until that method is implemented, we will use the lowest supported
+	 * rate as a workaround. */
+	sta->txrate = rate_lowest_index(local, local->oper_hw_mode, sta);
+}
+
+static void *rate_control_pid_alloc(struct ieee80211_local *local)
+{
+	struct rc_pid_info *pinfo;
+	struct rc_pid_rateinfo *rinfo;
+	struct ieee80211_hw_mode *mode;
+	int i, j, tmp;
+	bool s;
+
+	pinfo = kmalloc(sizeof(*pinfo), GFP_ATOMIC);
+	if (!pinfo)
+		return NULL;
+
+	/* We can safely assume that oper_hw_mode won't change unless we get
+	 * reinitialized. */
+	mode = local->oper_hw_mode;
+	rinfo = kmalloc(sizeof(*rinfo) * mode->num_rates, GFP_ATOMIC);
+	if (!rinfo) {
+		kfree(pinfo);
+		return NULL;
+	}
+
+	/* Sort the rates. This is optimized for the most common case (i.e.
+	 * almost-sorted CCK+OFDM rates). Kind of bubble-sort with reversed
+	 * mapping too. */
+	for (i = 0; i < mode->num_rates; i++) {
+		rinfo[i].index = i;
+		rinfo[i].rev_index = i;
+		if (RC_PID_FAST_START)
+			rinfo[i].diff = 0;
+		else
+			rinfo[i].diff = i * RC_PID_NORM_OFFSET;
+	}
+	for (i = 1; i < mode->num_rates; i++) {
+		s = 0;
+		for (j = 0; j < mode->num_rates - i; j++)
+			if (unlikely(mode->rates[rinfo[j].index].rate >
+				     mode->rates[rinfo[j + 1].index].rate)) {
+				tmp = rinfo[j].index;
+				rinfo[j].index = rinfo[j + 1].index;
+				rinfo[j + 1].index = tmp;
+				rinfo[rinfo[j].index].rev_index = j;
+				rinfo[rinfo[j + 1].index].rev_index = j + 1;
+				s = 1;
+			}
+		if (!s)
+			break;
+	}
+
+	pinfo->target = RC_PID_TARGET_PF;
+	pinfo->coeff_p = RC_PID_COEFF_P;
+	pinfo->coeff_i = RC_PID_COEFF_I;
+	pinfo->coeff_d = RC_PID_COEFF_D;
+	pinfo->rinfo = rinfo;
+	pinfo->oldrate = 0;
+
+	return pinfo;
+}
+
+static void rate_control_pid_free(void *priv)
+{
+	struct rc_pid_info *pinfo = priv;
+	kfree(pinfo->rinfo);
+	kfree(pinfo);
+}
+
+static void rate_control_pid_clear(void *priv)
+{
+}
+
+static void *rate_control_pid_alloc_sta(void *priv, gfp_t gfp)
+{
+	struct rc_pid_sta_info *spinfo;
+
+	spinfo = kzalloc(sizeof(*spinfo), gfp);
+	if (spinfo == NULL)
+		return NULL;
+
+#ifdef CONFIG_MAC80211_DEBUGFS
+	spin_lock_init(&spinfo->events.lock);
+	init_waitqueue_head(&spinfo->events.waitqueue);
+#endif
+
+	return spinfo;
+}
+
+static void rate_control_pid_free_sta(void *priv, void *priv_sta)
+{
+	struct rc_pid_sta_info *spinfo = priv_sta;
+	kfree(spinfo);
+}
+
+struct rate_control_ops mac80211_rcpid = {
+	.name = "pid",
+	.tx_status = rate_control_pid_tx_status,
+	.get_rate = rate_control_pid_get_rate,
+	.rate_init = rate_control_pid_rate_init,
+	.clear = rate_control_pid_clear,
+	.alloc = rate_control_pid_alloc,
+	.free = rate_control_pid_free,
+	.alloc_sta = rate_control_pid_alloc_sta,
+	.free_sta = rate_control_pid_free_sta,
+#ifdef CONFIG_MAC80211_DEBUGFS
+	.add_sta_debugfs = rate_control_pid_add_sta_debugfs,
+	.remove_sta_debugfs = rate_control_pid_remove_sta_debugfs,
+#endif
+};
diff --git a/net/mac80211/rc80211_pid_debugfs.c b/net/mac80211/rc80211_pid_debugfs.c
new file mode 100644
index 00000000000..91818e4ff00
--- /dev/null
+++ b/net/mac80211/rc80211_pid_debugfs.c
@@ -0,0 +1,223 @@
+/*
+ * Copyright 2007, Mattias Nissler <mattias.nissler@gmx.de>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/spinlock.h>
+#include <linux/poll.h>
+#include <linux/netdevice.h>
+#include <linux/types.h>
+#include <linux/skbuff.h>
+
+#include <net/mac80211.h>
+#include "ieee80211_rate.h"
+
+#include "rc80211_pid.h"
+
+static void rate_control_pid_event(struct rc_pid_event_buffer *buf,
+				   enum rc_pid_event_type type,
+				   union rc_pid_event_data *data)
+{
+	struct rc_pid_event *ev;
+	unsigned long status;
+
+	spin_lock_irqsave(&buf->lock, status);
+	ev = &(buf->ring[buf->next_entry]);
+	buf->next_entry = (buf->next_entry + 1) % RC_PID_EVENT_RING_SIZE;
+
+	ev->timestamp = jiffies;
+	ev->id = buf->ev_count++;
+	ev->type = type;
+	ev->data = *data;
+
+	spin_unlock_irqrestore(&buf->lock, status);
+
+	wake_up_all(&buf->waitqueue);
+}
+
+void rate_control_pid_event_tx_status(struct rc_pid_event_buffer *buf,
+					     struct ieee80211_tx_status *stat)
+{
+	union rc_pid_event_data evd;
+
+	memcpy(&evd.tx_status, stat, sizeof(struct ieee80211_tx_status));
+	rate_control_pid_event(buf, RC_PID_EVENT_TYPE_TX_STATUS, &evd);
+}
+
+void rate_control_pid_event_rate_change(struct rc_pid_event_buffer *buf,
+					       int index, int rate)
+{
+	union rc_pid_event_data evd;
+
+	evd.index = index;
+	evd.rate = rate;
+	rate_control_pid_event(buf, RC_PID_EVENT_TYPE_RATE_CHANGE, &evd);
+}
+
+void rate_control_pid_event_tx_rate(struct rc_pid_event_buffer *buf,
+					   int index, int rate)
+{
+	union rc_pid_event_data evd;
+
+	evd.index = index;
+	evd.rate = rate;
+	rate_control_pid_event(buf, RC_PID_EVENT_TYPE_TX_RATE, &evd);
+}
+
+void rate_control_pid_event_pf_sample(struct rc_pid_event_buffer *buf,
+					     s32 pf_sample, s32 prop_err,
+					     s32 int_err, s32 der_err)
+{
+	union rc_pid_event_data evd;
+
+	evd.pf_sample = pf_sample;
+	evd.prop_err = prop_err;
+	evd.int_err = int_err;
+	evd.der_err = der_err;
+	rate_control_pid_event(buf, RC_PID_EVENT_TYPE_PF_SAMPLE, &evd);
+}
+
+static int rate_control_pid_events_open(struct inode *inode, struct file *file)
+{
+	struct rc_pid_sta_info *sinfo = inode->i_private;
+	struct rc_pid_event_buffer *events = &sinfo->events;
+	struct rc_pid_events_file_info *file_info;
+	unsigned int status;
+
+	/* Allocate a state struct */
+	file_info = kmalloc(sizeof(*file_info), GFP_KERNEL);
+	if (file_info == NULL)
+		return -ENOMEM;
+
+	spin_lock_irqsave(&events->lock, status);
+
+	file_info->next_entry = events->next_entry;
+	file_info->events = events;
+
+	spin_unlock_irqrestore(&events->lock, status);
+
+	file->private_data = file_info;
+
+	return 0;
+}
+
+static int rate_control_pid_events_release(struct inode *inode,
+					   struct file *file)
+{
+	struct rc_pid_events_file_info *file_info = file->private_data;
+
+	kfree(file_info);
+
+	return 0;
+}
+
+static unsigned int rate_control_pid_events_poll(struct file *file,
+						 poll_table *wait)
+{
+	struct rc_pid_events_file_info *file_info = file->private_data;
+
+	poll_wait(file, &file_info->events->waitqueue, wait);
+
+	return POLLIN | POLLRDNORM;
+}
+
+#define RC_PID_PRINT_BUF_SIZE 64
+
+static ssize_t rate_control_pid_events_read(struct file *file, char __user *buf,
+					    size_t length, loff_t *offset)
+{
+	struct rc_pid_events_file_info *file_info = file->private_data;
+	struct rc_pid_event_buffer *events = file_info->events;
+	struct rc_pid_event *ev;
+	char pb[RC_PID_PRINT_BUF_SIZE];
+	int ret;
+	int p;
+	unsigned int status;
+
+	/* Check if there is something to read. */
+	if (events->next_entry == file_info->next_entry) {
+		if (file->f_flags & O_NONBLOCK)
+			return -EAGAIN;
+
+		/* Wait */
+		ret = wait_event_interruptible(events->waitqueue,
+				events->next_entry != file_info->next_entry);
+
+		if (ret)
+			return ret;
+	}
+
+	/* Write out one event per call. I don't care whether it's a little
+	 * inefficient, this is debugging code anyway. */
+	spin_lock_irqsave(&events->lock, status);
+
+	/* Get an event */
+	ev = &(events->ring[file_info->next_entry]);
+	file_info->next_entry = (file_info->next_entry + 1) %
+				RC_PID_EVENT_RING_SIZE;
+
+	/* Print information about the event. Note that userpace needs to
+	 * provide large enough buffers. */
+	length = length < RC_PID_PRINT_BUF_SIZE ?
+		 length : RC_PID_PRINT_BUF_SIZE;
+	p = snprintf(pb, length, "%u %lu ", ev->id, ev->timestamp);
+	switch (ev->type) {
+	case RC_PID_EVENT_TYPE_TX_STATUS:
+		p += snprintf(pb + p, length - p, "tx_status %u %u",
+			      ev->data.tx_status.excessive_retries,
+			      ev->data.tx_status.retry_count);
+		break;
+	case RC_PID_EVENT_TYPE_RATE_CHANGE:
+		p += snprintf(pb + p, length - p, "rate_change %d %d",
+			      ev->data.index, ev->data.rate);
+		break;
+	case RC_PID_EVENT_TYPE_TX_RATE:
+		p += snprintf(pb + p, length - p, "tx_rate %d %d",
+			      ev->data.index, ev->data.rate);
+		break;
+	case RC_PID_EVENT_TYPE_PF_SAMPLE:
+		p += snprintf(pb + p, length - p,
+			      "pf_sample %d %d %d %d",
+			      ev->data.pf_sample, ev->data.prop_err,
+			      ev->data.int_err, ev->data.der_err);
+		break;
+	}
+	p += snprintf(pb + p, length - p, "\n");
+
+	spin_unlock_irqrestore(&events->lock, status);
+
+	if (copy_to_user(buf, pb, p))
+		return -EFAULT;
+
+	return p;
+}
+
+#undef RC_PID_PRINT_BUF_SIZE
+
+struct file_operations rc_pid_fop_events = {
+	.owner = THIS_MODULE,
+	.read = rate_control_pid_events_read,
+	.poll = rate_control_pid_events_poll,
+	.open = rate_control_pid_events_open,
+	.release = rate_control_pid_events_release,
+};
+
+void rate_control_pid_add_sta_debugfs(void *priv, void *priv_sta,
+					     struct dentry *dir)
+{
+	struct rc_pid_sta_info *spinfo = priv_sta;
+
+	spinfo->events_entry = debugfs_create_file("rc_pid_events", S_IRUGO,
+						   dir, spinfo,
+						   &rc_pid_fop_events);
+}
+
+void rate_control_pid_remove_sta_debugfs(void *priv, void *priv_sta)
+{
+	struct rc_pid_sta_info *spinfo = priv_sta;
+
+	debugfs_remove(spinfo->events_entry);
+}
-- 
cgit v1.2.3


From 1946b74ce03c4edecabde80d027da00a7eab56ca Mon Sep 17 00:00:00 2001
From: Mattias Nissler <mattias.nissler@gmx.de>
Date: Thu, 20 Dec 2007 13:27:26 +0100
Subject: rc80211-pid: export tuning parameters through debugfs

This adds all the tunable parameters used by rc80211_pid to debugfs for easy
testing and tuning.

Signed-off-by: Mattias Nissler <mattias.nissler@gmx.de>
Signed-off-by: Stefano Brivio <stefano.brivio@polimi.it>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/mac80211/rc80211_pid.h      |  23 ++++++++-
 net/mac80211/rc80211_pid_algo.c | 112 +++++++++++++++++++++++++++++++---------
 2 files changed, 110 insertions(+), 25 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/rc80211_pid.h b/net/mac80211/rc80211_pid.h
index 5d0056c1513..425eb708182 100644
--- a/net/mac80211/rc80211_pid.h
+++ b/net/mac80211/rc80211_pid.h
@@ -119,6 +119,20 @@ struct rc_pid_events_file_info {
 	unsigned int next_entry;
 };
 
+struct rc_pid_debugfs_entries {
+	struct dentry *dir;
+	struct dentry *target;
+	struct dentry *sampling_period;
+	struct dentry *coeff_p;
+	struct dentry *coeff_i;
+	struct dentry *coeff_d;
+	struct dentry *smoothing_shift;
+	struct dentry *sharpen_factor;
+	struct dentry *sharpen_duration;
+	struct dentry *norm_offset;
+	struct dentry *fast_start;
+};
+
 void rate_control_pid_event_tx_status(struct rc_pid_event_buffer *buf,
 					     struct ieee80211_tx_status *stat);
 
@@ -222,8 +236,8 @@ struct rc_pid_info {
 	/* Exponential averaging shift. */
 	unsigned int smoothing_shift;
 
-	/* Sharpening shift and duration. */
-	unsigned int sharpen_shift;
+	/* Sharpening factor and duration. */
+	unsigned int sharpen_factor;
 	unsigned int sharpen_duration;
 
 	/* Normalization offset. */
@@ -237,6 +251,11 @@ struct rc_pid_info {
 
 	/* Index of the last used rate. */
 	int oldrate;
+
+#ifdef CONFIG_MAC80211_DEBUGFS
+	/* Debugfs entries created for the parameters above. */
+	struct rc_pid_debugfs_entries dentries;
+#endif
 };
 
 #endif /* RC80211_PID_H */
diff --git a/net/mac80211/rc80211_pid_algo.c b/net/mac80211/rc80211_pid_algo.c
index 3fac3a5d7e0..631e4688826 100644
--- a/net/mac80211/rc80211_pid_algo.c
+++ b/net/mac80211/rc80211_pid_algo.c
@@ -139,19 +139,20 @@ static void rate_control_pid_adjust_rate(struct ieee80211_local *local,
 }
 
 /* Normalize the failed frames per-rate differences. */
-static void rate_control_pid_normalize(struct rc_pid_rateinfo *r, int l)
+static void rate_control_pid_normalize(struct rc_pid_info *pinfo, int l)
 {
-	int i;
+	int i, norm_offset = pinfo->norm_offset;
+	struct rc_pid_rateinfo *r = pinfo->rinfo;
 
-	if (r[0].diff > RC_PID_NORM_OFFSET)
-		r[0].diff -= RC_PID_NORM_OFFSET;
-	else if (r[0].diff < -RC_PID_NORM_OFFSET)
-		r[0].diff += RC_PID_NORM_OFFSET;
+	if (r[0].diff > norm_offset)
+		r[0].diff -= norm_offset;
+	else if (r[0].diff < -norm_offset)
+		r[0].diff += norm_offset;
 	for (i = 0; i < l - 1; i++)
-		if (r[i + 1].diff > r[i].diff + RC_PID_NORM_OFFSET)
-			r[i + 1].diff -= RC_PID_NORM_OFFSET;
+		if (r[i + 1].diff > r[i].diff + norm_offset)
+			r[i + 1].diff -= norm_offset;
 		else if (r[i + 1].diff <= r[i].diff)
-			r[i + 1].diff += RC_PID_NORM_OFFSET;
+			r[i + 1].diff += norm_offset;
 }
 
 static void rate_control_pid_sample(struct rc_pid_info *pinfo,
@@ -163,18 +164,22 @@ static void rate_control_pid_sample(struct rc_pid_info *pinfo,
 	struct ieee80211_hw_mode *mode;
 	u32 pf;
 	s32 err_avg;
-	s32 err_prop;
-	s32 err_int;
-	s32 err_der;
+	u32 err_prop;
+	u32 err_int;
+	u32 err_der;
 	int adj, i, j, tmp;
+	unsigned long period;
 
 	mode = local->oper_hw_mode;
 	spinfo = sta->rate_ctrl_priv;
 
 	/* In case nothing happened during the previous control interval, turn
 	 * the sharpening factor on. */
-	if (jiffies - spinfo->last_sample > 2 * RC_PID_INTERVAL)
-		spinfo->sharp_cnt = RC_PID_SHARPENING_DURATION;
+	period = (HZ * pinfo->sampling_period + 500) / 1000;
+	if (!period)
+		period = 1;
+	if (jiffies - spinfo->last_sample > 2 * period)
+		spinfo->sharp_cnt = pinfo->sharpen_duration;
 
 	spinfo->last_sample = jiffies;
 
@@ -202,17 +207,17 @@ static void rate_control_pid_sample(struct rc_pid_info *pinfo,
 		rinfo[j].diff = rinfo[i].diff + tmp;
 		pinfo->oldrate = sta->txrate;
 	}
-	rate_control_pid_normalize(rinfo, mode->num_rates);
+	rate_control_pid_normalize(pinfo, mode->num_rates);
 
 	/* Compute the proportional, integral and derivative errors. */
-	err_prop = RC_PID_TARGET_PF - pf;
+	err_prop = pinfo->target - pf;
 
-	err_avg = spinfo->err_avg_sc >> RC_PID_SMOOTHING_SHIFT;
+	err_avg = spinfo->err_avg_sc >> pinfo->smoothing_shift;
 	spinfo->err_avg_sc = spinfo->err_avg_sc - err_avg + err_prop;
-	err_int = spinfo->err_avg_sc >> RC_PID_SMOOTHING_SHIFT;
+	err_int = spinfo->err_avg_sc >> pinfo->smoothing_shift;
 
-	err_der = pf - spinfo->last_pf
-		  * (1 + RC_PID_SHARPENING_FACTOR * spinfo->sharp_cnt);
+	err_der = (pf - spinfo->last_pf) *
+		  (1 + pinfo->sharpen_factor * spinfo->sharp_cnt);
 	spinfo->last_pf = pf;
 	if (spinfo->sharp_cnt)
 			spinfo->sharp_cnt--;
@@ -241,6 +246,7 @@ static void rate_control_pid_tx_status(void *priv, struct net_device *dev,
 	struct rc_pid_info *pinfo = priv;
 	struct sta_info *sta;
 	struct rc_pid_sta_info *spinfo;
+	unsigned long period;
 
 	sta = sta_info_get(local, hdr->addr1);
 
@@ -285,7 +291,10 @@ static void rate_control_pid_tx_status(void *priv, struct net_device *dev,
 	sta->tx_num_mpdu_fail += status->retry_count;
 
 	/* Update PID controller state. */
-	if (time_after(jiffies, spinfo->last_sample + RC_PID_INTERVAL))
+	period = (HZ * pinfo->sampling_period + 500) / 1000;
+	if (!period)
+		period = 1;
+	if (time_after(jiffies, spinfo->last_sample + period))
 		rate_control_pid_sample(pinfo, local, sta);
 
 	sta_info_put(sta);
@@ -343,6 +352,9 @@ static void *rate_control_pid_alloc(struct ieee80211_local *local)
 	struct ieee80211_hw_mode *mode;
 	int i, j, tmp;
 	bool s;
+#ifdef CONFIG_MAC80211_DEBUGFS
+	struct rc_pid_debugfs_entries *de;
+#endif
 
 	pinfo = kmalloc(sizeof(*pinfo), GFP_ATOMIC);
 	if (!pinfo)
@@ -363,10 +375,10 @@ static void *rate_control_pid_alloc(struct ieee80211_local *local)
 	for (i = 0; i < mode->num_rates; i++) {
 		rinfo[i].index = i;
 		rinfo[i].rev_index = i;
-		if (RC_PID_FAST_START)
+		if (pinfo->fast_start)
 			rinfo[i].diff = 0;
 		else
-			rinfo[i].diff = i * RC_PID_NORM_OFFSET;
+			rinfo[i].diff = i * pinfo->norm_offset;
 	}
 	for (i = 1; i < mode->num_rates; i++) {
 		s = 0;
@@ -385,18 +397,72 @@ static void *rate_control_pid_alloc(struct ieee80211_local *local)
 	}
 
 	pinfo->target = RC_PID_TARGET_PF;
+	pinfo->sampling_period = RC_PID_INTERVAL;
 	pinfo->coeff_p = RC_PID_COEFF_P;
 	pinfo->coeff_i = RC_PID_COEFF_I;
 	pinfo->coeff_d = RC_PID_COEFF_D;
+	pinfo->smoothing_shift = RC_PID_SMOOTHING_SHIFT;
+	pinfo->sharpen_factor = RC_PID_SHARPENING_FACTOR;
+	pinfo->sharpen_duration = RC_PID_SHARPENING_DURATION;
+	pinfo->norm_offset = RC_PID_NORM_OFFSET;
+	pinfo->fast_start = RC_PID_FAST_START;
 	pinfo->rinfo = rinfo;
 	pinfo->oldrate = 0;
 
+#ifdef CONFIG_MAC80211_DEBUGFS
+	de = &pinfo->dentries;
+	de->dir = debugfs_create_dir("rc80211_pid",
+				     local->hw.wiphy->debugfsdir);
+	de->target = debugfs_create_u32("target_pf", S_IRUSR | S_IWUSR,
+					de->dir, &pinfo->target);
+	de->sampling_period = debugfs_create_u32("sampling_period",
+						 S_IRUSR | S_IWUSR, de->dir,
+						 &pinfo->sampling_period);
+	de->coeff_p = debugfs_create_u32("coeff_p", S_IRUSR | S_IWUSR,
+					 de->dir, &pinfo->coeff_p);
+	de->coeff_i = debugfs_create_u32("coeff_i", S_IRUSR | S_IWUSR,
+					 de->dir, &pinfo->coeff_i);
+	de->coeff_d = debugfs_create_u32("coeff_d", S_IRUSR | S_IWUSR,
+					 de->dir, &pinfo->coeff_d);
+	de->smoothing_shift = debugfs_create_u32("smoothing_shift",
+						 S_IRUSR | S_IWUSR, de->dir,
+						 &pinfo->smoothing_shift);
+	de->sharpen_factor = debugfs_create_u32("sharpen_factor",
+					       S_IRUSR | S_IWUSR, de->dir,
+					       &pinfo->sharpen_factor);
+	de->sharpen_duration = debugfs_create_u32("sharpen_duration",
+						  S_IRUSR | S_IWUSR, de->dir,
+						  &pinfo->sharpen_duration);
+	de->norm_offset = debugfs_create_u32("norm_offset",
+					     S_IRUSR | S_IWUSR, de->dir,
+					     &pinfo->norm_offset);
+	de->fast_start = debugfs_create_bool("fast_start",
+					     S_IRUSR | S_IWUSR, de->dir,
+					     &pinfo->fast_start);
+#endif
+
 	return pinfo;
 }
 
 static void rate_control_pid_free(void *priv)
 {
 	struct rc_pid_info *pinfo = priv;
+#ifdef CONFIG_MAC80211_DEBUGFS
+	struct rc_pid_debugfs_entries *de = &pinfo->dentries;
+
+	debugfs_remove(de->fast_start);
+	debugfs_remove(de->norm_offset);
+	debugfs_remove(de->sharpen_duration);
+	debugfs_remove(de->sharpen_factor);
+	debugfs_remove(de->smoothing_shift);
+	debugfs_remove(de->coeff_d);
+	debugfs_remove(de->coeff_i);
+	debugfs_remove(de->coeff_p);
+	debugfs_remove(de->sampling_period);
+	debugfs_remove(de->target);
+	debugfs_remove(de->dir);
+#endif
+
 	kfree(pinfo->rinfo);
 	kfree(pinfo);
 }
-- 
cgit v1.2.3


From ce3edf6d0b979fa4d5da7204fd8c6f77f2b8622a Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Wed, 19 Dec 2007 01:31:22 +0100
Subject: mac80211: clean up eapol frame handling/port control

This cleans up the eapol frame handling and some related code in the
receive and transmit paths. After this patch
 * EAPOL frames addressed to us or the EAPOL group address are
   always accepted regardless of whether they are encrypted or not
 * other frames from a station are dropped if PAE is enabled and
   the station is not authorized
 * unencrypted frames (except the EAPOL frames above) are dropped if
   drop_unencrypted is enabled
 * some superfluous code that eth_type_trans handles anyway is gone
 * port control is done for transmitted packets

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/mac80211/debugfs_netdev.c  |  27 +++------
 net/mac80211/ieee80211_i.h     |  22 +++----
 net/mac80211/ieee80211_iface.c |   1 -
 net/mac80211/rx.c              | 133 +++++++++++++++++++++--------------------
 net/mac80211/tx.c              |  53 +++++++++-------
 5 files changed, 119 insertions(+), 117 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/debugfs_netdev.c b/net/mac80211/debugfs_netdev.c
index f0e6ab7eb62..bf715d28643 100644
--- a/net/mac80211/debugfs_netdev.c
+++ b/net/mac80211/debugfs_netdev.c
@@ -91,8 +91,7 @@ static const struct file_operations name##_ops = {			\
 /* common attributes */
 IEEE80211_IF_FILE(channel_use, channel_use, DEC);
 IEEE80211_IF_FILE(drop_unencrypted, drop_unencrypted, DEC);
-IEEE80211_IF_FILE(eapol, eapol, DEC);
-IEEE80211_IF_FILE(ieee8021_x, ieee802_1x, DEC);
+IEEE80211_IF_FILE(ieee802_1x_pac, ieee802_1x_pac, DEC);
 
 /* STA/IBSS attributes */
 IEEE80211_IF_FILE(state, u.sta.state, DEC);
@@ -170,8 +169,7 @@ static void add_sta_files(struct ieee80211_sub_if_data *sdata)
 {
 	DEBUGFS_ADD(channel_use, sta);
 	DEBUGFS_ADD(drop_unencrypted, sta);
-	DEBUGFS_ADD(eapol, sta);
-	DEBUGFS_ADD(ieee8021_x, sta);
+	DEBUGFS_ADD(ieee802_1x_pac, sta);
 	DEBUGFS_ADD(state, sta);
 	DEBUGFS_ADD(bssid, sta);
 	DEBUGFS_ADD(prev_bssid, sta);
@@ -192,8 +190,7 @@ static void add_ap_files(struct ieee80211_sub_if_data *sdata)
 {
 	DEBUGFS_ADD(channel_use, ap);
 	DEBUGFS_ADD(drop_unencrypted, ap);
-	DEBUGFS_ADD(eapol, ap);
-	DEBUGFS_ADD(ieee8021_x, ap);
+	DEBUGFS_ADD(ieee802_1x_pac, ap);
 	DEBUGFS_ADD(num_sta_ps, ap);
 	DEBUGFS_ADD(dtim_period, ap);
 	DEBUGFS_ADD(dtim_count, ap);
@@ -209,8 +206,7 @@ static void add_wds_files(struct ieee80211_sub_if_data *sdata)
 {
 	DEBUGFS_ADD(channel_use, wds);
 	DEBUGFS_ADD(drop_unencrypted, wds);
-	DEBUGFS_ADD(eapol, wds);
-	DEBUGFS_ADD(ieee8021_x, wds);
+	DEBUGFS_ADD(ieee802_1x_pac, wds);
 	DEBUGFS_ADD(peer, wds);
 }
 
@@ -218,8 +214,7 @@ static void add_vlan_files(struct ieee80211_sub_if_data *sdata)
 {
 	DEBUGFS_ADD(channel_use, vlan);
 	DEBUGFS_ADD(drop_unencrypted, vlan);
-	DEBUGFS_ADD(eapol, vlan);
-	DEBUGFS_ADD(ieee8021_x, vlan);
+	DEBUGFS_ADD(ieee802_1x_pac, vlan);
 }
 
 static void add_monitor_files(struct ieee80211_sub_if_data *sdata)
@@ -263,8 +258,7 @@ static void del_sta_files(struct ieee80211_sub_if_data *sdata)
 {
 	DEBUGFS_DEL(channel_use, sta);
 	DEBUGFS_DEL(drop_unencrypted, sta);
-	DEBUGFS_DEL(eapol, sta);
-	DEBUGFS_DEL(ieee8021_x, sta);
+	DEBUGFS_DEL(ieee802_1x_pac, sta);
 	DEBUGFS_DEL(state, sta);
 	DEBUGFS_DEL(bssid, sta);
 	DEBUGFS_DEL(prev_bssid, sta);
@@ -285,8 +279,7 @@ static void del_ap_files(struct ieee80211_sub_if_data *sdata)
 {
 	DEBUGFS_DEL(channel_use, ap);
 	DEBUGFS_DEL(drop_unencrypted, ap);
-	DEBUGFS_DEL(eapol, ap);
-	DEBUGFS_DEL(ieee8021_x, ap);
+	DEBUGFS_DEL(ieee802_1x_pac, ap);
 	DEBUGFS_DEL(num_sta_ps, ap);
 	DEBUGFS_DEL(dtim_period, ap);
 	DEBUGFS_DEL(dtim_count, ap);
@@ -302,8 +295,7 @@ static void del_wds_files(struct ieee80211_sub_if_data *sdata)
 {
 	DEBUGFS_DEL(channel_use, wds);
 	DEBUGFS_DEL(drop_unencrypted, wds);
-	DEBUGFS_DEL(eapol, wds);
-	DEBUGFS_DEL(ieee8021_x, wds);
+	DEBUGFS_DEL(ieee802_1x_pac, wds);
 	DEBUGFS_DEL(peer, wds);
 }
 
@@ -311,8 +303,7 @@ static void del_vlan_files(struct ieee80211_sub_if_data *sdata)
 {
 	DEBUGFS_DEL(channel_use, vlan);
 	DEBUGFS_DEL(drop_unencrypted, vlan);
-	DEBUGFS_DEL(eapol, vlan);
-	DEBUGFS_DEL(ieee8021_x, vlan);
+	DEBUGFS_DEL(ieee802_1x_pac, vlan);
 }
 
 static void del_monitor_files(struct ieee80211_sub_if_data *sdata)
diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index b54ed5fe4aa..214109b8d95 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -306,11 +306,11 @@ struct ieee80211_sub_if_data {
 	unsigned int flags;
 
 	int drop_unencrypted;
-	int eapol; /* 0 = process EAPOL frames as normal data frames,
-		    * 1 = send EAPOL frames through wlan#ap to hostapd
-		    *     (default) */
-	int ieee802_1x; /* IEEE 802.1X PAE - drop packet to/from unauthorized
-			 * port */
+	/*
+	 * IEEE 802.1X Port access control in effect,
+	 * drop packets to/from unauthorized port
+	 */
+	int ieee802_1x_pac;
 
 	u16 sequence;
 
@@ -339,8 +339,7 @@ struct ieee80211_sub_if_data {
 		struct {
 			struct dentry *channel_use;
 			struct dentry *drop_unencrypted;
-			struct dentry *eapol;
-			struct dentry *ieee8021_x;
+			struct dentry *ieee802_1x_pac;
 			struct dentry *state;
 			struct dentry *bssid;
 			struct dentry *prev_bssid;
@@ -359,8 +358,7 @@ struct ieee80211_sub_if_data {
 		struct {
 			struct dentry *channel_use;
 			struct dentry *drop_unencrypted;
-			struct dentry *eapol;
-			struct dentry *ieee8021_x;
+			struct dentry *ieee802_1x_pac;
 			struct dentry *num_sta_ps;
 			struct dentry *dtim_period;
 			struct dentry *dtim_count;
@@ -374,15 +372,13 @@ struct ieee80211_sub_if_data {
 		struct {
 			struct dentry *channel_use;
 			struct dentry *drop_unencrypted;
-			struct dentry *eapol;
-			struct dentry *ieee8021_x;
+			struct dentry *ieee802_1x_pac;
 			struct dentry *peer;
 		} wds;
 		struct {
 			struct dentry *channel_use;
 			struct dentry *drop_unencrypted;
-			struct dentry *eapol;
-			struct dentry *ieee8021_x;
+			struct dentry *ieee802_1x_pac;
 		} vlan;
 		struct {
 			struct dentry *mode;
diff --git a/net/mac80211/ieee80211_iface.c b/net/mac80211/ieee80211_iface.c
index 43e505d2945..7cfd8660b23 100644
--- a/net/mac80211/ieee80211_iface.c
+++ b/net/mac80211/ieee80211_iface.c
@@ -22,7 +22,6 @@ void ieee80211_if_sdata_init(struct ieee80211_sub_if_data *sdata)
 
 	/* Default values for sub-interface parameters */
 	sdata->drop_unencrypted = 0;
-	sdata->eapol = 1;
 	for (i = 0; i < IEEE80211_FRAGMENT_MAX; i++)
 		skb_queue_head_init(&sdata->fragments[i].skb_list);
 
diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index e65da5780cd..505159f8dff 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -208,7 +208,7 @@ ieee80211_rx_monitor(struct ieee80211_local *local, struct sk_buff *origskb,
 		rthdr->it_len = cpu_to_le16(rtap_len);
 	}
 
-	skb_set_mac_header(skb, 0);
+	skb_reset_mac_header(skb);
 	skb->ip_summed = CHECKSUM_UNNECESSARY;
 	skb->pkt_type = PACKET_OTHERHOST;
 	skb->protocol = htons(ETH_P_802_2);
@@ -405,18 +405,6 @@ ieee80211_rx_h_check(struct ieee80211_txrx_data *rx)
 		return TXRX_DROP;
 	}
 
-	if (!(rx->flags & IEEE80211_TXRXD_RXRA_MATCH))
-		rx->skb->pkt_type = PACKET_OTHERHOST;
-	else if (compare_ether_addr(rx->dev->dev_addr, hdr->addr1) == 0)
-		rx->skb->pkt_type = PACKET_HOST;
-	else if (is_multicast_ether_addr(hdr->addr1)) {
-		if (is_broadcast_ether_addr(hdr->addr1))
-			rx->skb->pkt_type = PACKET_BROADCAST;
-		else
-			rx->skb->pkt_type = PACKET_MULTICAST;
-	} else
-		rx->skb->pkt_type = PACKET_OTHERHOST;
-
 	/* Drop disallowed frame classes based on STA auth/assoc state;
 	 * IEEE 802.11, Chap 5.5.
 	 *
@@ -990,18 +978,10 @@ ieee80211_rx_h_remove_qos_control(struct ieee80211_txrx_data *rx)
 }
 
 static int
-ieee80211_drop_802_1x_pae(struct ieee80211_txrx_data *rx, int hdrlen)
+ieee80211_802_1x_port_control(struct ieee80211_txrx_data *rx)
 {
-	if (rx->sdata->eapol && ieee80211_is_eapol(rx->skb, hdrlen) &&
-	    rx->sdata->type != IEEE80211_IF_TYPE_STA &&
-	    (rx->flags & IEEE80211_TXRXD_RXRA_MATCH))
-		return 0;
-
-	if (unlikely(rx->sdata->ieee802_1x &&
-		     (rx->fc & IEEE80211_FCTL_FTYPE) == IEEE80211_FTYPE_DATA &&
-		     (rx->fc & IEEE80211_FCTL_STYPE) != IEEE80211_STYPE_NULLFUNC &&
-		     (!rx->sta || !(rx->sta->flags & WLAN_STA_AUTHORIZED)) &&
-		     !ieee80211_is_eapol(rx->skb, hdrlen))) {
+	if (unlikely(rx->sdata->ieee802_1x_pac &&
+		     (!rx->sta || !(rx->sta->flags & WLAN_STA_AUTHORIZED)))) {
 #ifdef CONFIG_MAC80211_DEBUG
 		printk(KERN_DEBUG "%s: dropped frame "
 		       "(unauthorized port)\n", rx->dev->name);
@@ -1013,7 +993,7 @@ ieee80211_drop_802_1x_pae(struct ieee80211_txrx_data *rx, int hdrlen)
 }
 
 static int
-ieee80211_drop_unencrypted(struct ieee80211_txrx_data *rx, int hdrlen)
+ieee80211_drop_unencrypted(struct ieee80211_txrx_data *rx)
 {
 	/*
 	 * Pass through unencrypted frames if the hardware has
@@ -1026,9 +1006,7 @@ ieee80211_drop_unencrypted(struct ieee80211_txrx_data *rx, int hdrlen)
 	if (unlikely(!(rx->fc & IEEE80211_FCTL_PROTECTED) &&
 		     (rx->fc & IEEE80211_FCTL_FTYPE) == IEEE80211_FTYPE_DATA &&
 		     (rx->fc & IEEE80211_FCTL_STYPE) != IEEE80211_STYPE_NULLFUNC &&
-		     (rx->key || rx->sdata->drop_unencrypted) &&
-		     (rx->sdata->eapol == 0 ||
-		      !ieee80211_is_eapol(rx->skb, hdrlen)))) {
+		     (rx->key || rx->sdata->drop_unencrypted))) {
 		if (net_ratelimit())
 			printk(KERN_DEBUG "%s: RX non-WEP frame, but expected "
 			       "encryption\n", rx->dev->name);
@@ -1156,6 +1134,7 @@ ieee80211_data_to_8023(struct ieee80211_txrx_data *rx)
 	} else {
 		struct ethhdr *ehdr;
 		__be16 len;
+
 		skb_pull(skb, hdrlen);
 		len = htons(skb->len);
 		ehdr = (struct ethhdr *) skb_push(skb, sizeof(struct ethhdr));
@@ -1166,6 +1145,34 @@ ieee80211_data_to_8023(struct ieee80211_txrx_data *rx)
 	return 0;
 }
 
+/*
+ * requires that rx->skb is a frame with ethernet header
+ */
+static bool ieee80211_frame_allowed(struct ieee80211_txrx_data *rx)
+{
+	static const u8 pae_group_addr[ETH_ALEN]
+		= { 0x01, 0x80, 0xC2, 0x00, 0x00, 0x03 };
+	struct ethhdr *ehdr = (struct ethhdr *) rx->skb->data;
+
+	/*
+	 * Allow EAPOL frames to us/the PAE group address regardless
+	 * of whether the frame was encrypted or not.
+	 */
+	if (ehdr->h_proto == htons(ETH_P_PAE) &&
+	    (compare_ether_addr(ehdr->h_dest, rx->dev->dev_addr) == 0 ||
+	     compare_ether_addr(ehdr->h_dest, pae_group_addr) == 0))
+		return true;
+
+	if (ieee80211_802_1x_port_control(rx) ||
+	    ieee80211_drop_unencrypted(rx))
+		return false;
+
+	return true;
+}
+
+/*
+ * requires that rx->skb is a frame with ethernet header
+ */
 static void
 ieee80211_deliver_skb(struct ieee80211_txrx_data *rx)
 {
@@ -1173,31 +1180,32 @@ ieee80211_deliver_skb(struct ieee80211_txrx_data *rx)
 	struct ieee80211_local *local = rx->local;
 	struct sk_buff *skb, *xmit_skb;
 	struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
+	struct ethhdr *ehdr = (struct ethhdr *) rx->skb->data;
+	struct sta_info *dsta;
 
 	skb = rx->skb;
 	xmit_skb = NULL;
 
-	if (local->bridge_packets && (sdata->type == IEEE80211_IF_TYPE_AP
-	    || sdata->type == IEEE80211_IF_TYPE_VLAN) &&
+	if (local->bridge_packets && (sdata->type == IEEE80211_IF_TYPE_AP ||
+				      sdata->type == IEEE80211_IF_TYPE_VLAN) &&
 	    (rx->flags & IEEE80211_TXRXD_RXRA_MATCH)) {
-		if (is_multicast_ether_addr(skb->data)) {
-			/* send multicast frames both to higher layers in
-			 * local net stack and back to the wireless media */
+		if (is_multicast_ether_addr(ehdr->h_dest)) {
+			/*
+			 * send multicast frames both to higher layers in
+			 * local net stack and back to the wireless medium
+			 */
 			xmit_skb = skb_copy(skb, GFP_ATOMIC);
 			if (!xmit_skb && net_ratelimit())
 				printk(KERN_DEBUG "%s: failed to clone "
 				       "multicast frame\n", dev->name);
 		} else {
-			struct sta_info *dsta;
 			dsta = sta_info_get(local, skb->data);
-			if (dsta && !dsta->dev) {
-				if (net_ratelimit())
-					printk(KERN_DEBUG "Station with null "
-					       "dev structure!\n");
-			} else if (dsta && dsta->dev == dev) {
-				/* Destination station is associated to this
-				 * AP, so send the frame directly to it and
-				 * do not pass the frame to local net stack.
+			if (dsta && dsta->dev == dev) {
+				/*
+				 * The destination station is associated to
+				 * this AP (in this VLAN), so send the frame
+				 * directly to it and do not pass it to local
+				 * net stack.
 				 */
 				xmit_skb = skb;
 				skb = NULL;
@@ -1217,8 +1225,8 @@ ieee80211_deliver_skb(struct ieee80211_txrx_data *rx)
 	if (xmit_skb) {
 		/* send to wireless media */
 		xmit_skb->protocol = htons(ETH_P_802_3);
-		skb_set_network_header(xmit_skb, 0);
-		skb_set_mac_header(xmit_skb, 0);
+		skb_reset_network_header(xmit_skb);
+		skb_reset_mac_header(xmit_skb);
 		dev_queue_xmit(xmit_skb);
 	}
 }
@@ -1303,38 +1311,36 @@ ieee80211_rx_h_amsdu(struct ieee80211_txrx_data *rx)
 			}
 		}
 
-		skb_set_network_header(frame, 0);
+		skb_reset_network_header(frame);
 		frame->dev = dev;
 		frame->priority = skb->priority;
 		rx->skb = frame;
 
-		if ((ieee80211_drop_802_1x_pae(rx, 0)) ||
-		    (ieee80211_drop_unencrypted(rx, 0))) {
-			if (skb == frame) /* last frame */
-				return TXRX_DROP;
-			dev_kfree_skb(frame);
-			continue;
-		}
-
 		payload = frame->data;
 		ethertype = (payload[6] << 8) | payload[7];
 
 		if (likely((compare_ether_addr(payload, rfc1042_header) == 0 &&
-			ethertype != ETH_P_AARP && ethertype != ETH_P_IPX) ||
-			compare_ether_addr(payload,
-					   bridge_tunnel_header) == 0)) {
+			    ethertype != ETH_P_AARP && ethertype != ETH_P_IPX) ||
+			   compare_ether_addr(payload,
+					      bridge_tunnel_header) == 0)) {
 			/* remove RFC1042 or Bridge-Tunnel
 			 * encapsulation and replace EtherType */
 			skb_pull(frame, 6);
 			memcpy(skb_push(frame, ETH_ALEN), src, ETH_ALEN);
 			memcpy(skb_push(frame, ETH_ALEN), dst, ETH_ALEN);
 		} else {
-			memcpy(skb_push(frame, sizeof(__be16)), &len,
-				sizeof(__be16));
+			memcpy(skb_push(frame, sizeof(__be16)),
+			       &len, sizeof(__be16));
 			memcpy(skb_push(frame, ETH_ALEN), src, ETH_ALEN);
 			memcpy(skb_push(frame, ETH_ALEN), dst, ETH_ALEN);
 		}
 
+		if (!ieee80211_frame_allowed(rx)) {
+			if (skb == frame) /* last frame */
+				return TXRX_DROP;
+			dev_kfree_skb(frame);
+			continue;
+		}
 
 		ieee80211_deliver_skb(rx);
 	}
@@ -1347,7 +1353,7 @@ ieee80211_rx_h_data(struct ieee80211_txrx_data *rx)
 {
 	struct net_device *dev = rx->dev;
 	u16 fc;
-	int err, hdrlen;
+	int err;
 
 	fc = rx->fc;
 	if (unlikely((fc & IEEE80211_FCTL_FTYPE) != IEEE80211_FTYPE_DATA))
@@ -1356,16 +1362,13 @@ ieee80211_rx_h_data(struct ieee80211_txrx_data *rx)
 	if (unlikely(!WLAN_FC_DATA_PRESENT(fc)))
 		return TXRX_DROP;
 
-	hdrlen = ieee80211_get_hdrlen(fc);
-
-	if ((ieee80211_drop_802_1x_pae(rx, hdrlen)) ||
-	    (ieee80211_drop_unencrypted(rx, hdrlen)))
-		return TXRX_DROP;
-
 	err = ieee80211_data_to_8023(rx);
 	if (unlikely(err))
 		return TXRX_DROP;
 
+	if (!ieee80211_frame_allowed(rx))
+		return TXRX_DROP;
+
 	rx->skb->dev = dev;
 
 	dev->stats.rx_packets++;
diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index 4655e303865..6dbd9184288 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -261,18 +261,6 @@ ieee80211_tx_h_check_assoc(struct ieee80211_txrx_data *tx)
 		return TXRX_CONTINUE;
 	}
 
-	if (unlikely(/* !injected && */ tx->sdata->ieee802_1x &&
-		     !(sta_flags & WLAN_STA_AUTHORIZED))) {
-#ifdef CONFIG_MAC80211_VERBOSE_DEBUG
-		DECLARE_MAC_BUF(mac);
-		printk(KERN_DEBUG "%s: dropped frame to %s"
-		       " (unauthorized port)\n", tx->dev->name,
-		       print_mac(mac, hdr->addr1));
-#endif
-		I802_DEBUG_INC(tx->local->tx_handlers_drop_unauth_port);
-		return TXRX_DROP;
-	}
-
 	return TXRX_CONTINUE;
 }
 
@@ -449,8 +437,7 @@ ieee80211_tx_h_select_key(struct ieee80211_txrx_data *tx)
 	else if ((key = rcu_dereference(tx->sdata->default_key)))
 		tx->key = key;
 	else if (tx->sdata->drop_unencrypted &&
-		 !(tx->sdata->eapol &&
-		   ieee80211_is_eapol(tx->skb, ieee80211_get_hdrlen(fc)))) {
+		 !ieee80211_is_eapol(tx->skb, ieee80211_get_hdrlen(fc))) {
 		I802_DEBUG_INC(tx->local->tx_handlers_drop_unencrypted);
 		return TXRX_DROP;
 	} else {
@@ -1346,6 +1333,7 @@ int ieee80211_subif_start_xmit(struct sk_buff *skb,
 	int encaps_len, skip_header_bytes;
 	int nh_pos, h_pos;
 	struct sta_info *sta;
+	u32 sta_flags = 0;
 
 	sdata = IEEE80211_DEV_TO_SUB_IF(dev);
 	if (unlikely(skb->len < ETH_HLEN)) {
@@ -1361,7 +1349,6 @@ int ieee80211_subif_start_xmit(struct sk_buff *skb,
 	/* convert Ethernet header to proper 802.11 header (based on
 	 * operation mode) */
 	ethertype = (skb->data[12] << 8) | skb->data[13];
-	/* TODO: handling for 802.1x authorized/unauthorized port */
 	fc = IEEE80211_FTYPE_DATA | IEEE80211_STYPE_DATA;
 
 	switch (sdata->type) {
@@ -1403,16 +1390,42 @@ int ieee80211_subif_start_xmit(struct sk_buff *skb,
 		goto fail;
 	}
 
-	/* receiver is QoS enabled, use a QoS type frame */
 	sta = sta_info_get(local, hdr.addr1);
 	if (sta) {
-		if (sta->flags & WLAN_STA_WME) {
-			fc |= IEEE80211_STYPE_QOS_DATA;
-			hdrlen += 2;
-		}
+		sta_flags = sta->flags;
 		sta_info_put(sta);
 	}
 
+	/* receiver is QoS enabled, use a QoS type frame */
+	if (sta_flags & WLAN_STA_WME) {
+		fc |= IEEE80211_STYPE_QOS_DATA;
+		hdrlen += 2;
+	}
+
+	/*
+	 * If port access control is enabled, drop frames to unauthorised
+	 * stations unless they are EAPOL frames from the local station.
+	 */
+	if (unlikely(sdata->ieee802_1x_pac &&
+		     !(sta_flags & WLAN_STA_AUTHORIZED) &&
+		     !(ethertype == ETH_P_PAE &&
+		       compare_ether_addr(dev->dev_addr,
+					  skb->data + ETH_ALEN) == 0))) {
+#ifdef CONFIG_MAC80211_VERBOSE_DEBUG
+		DECLARE_MAC_BUF(mac);
+
+		if (net_ratelimit())
+			printk(KERN_DEBUG "%s: dropped frame to %s"
+			       " (unauthorized port)\n", dev->name,
+			       print_mac(mac, hdr.addr1));
+#endif
+
+		I802_DEBUG_INC(local->tx_handlers_drop_unauth_port);
+
+		ret = 0;
+		goto fail;
+	}
+
 	hdr.frame_control = cpu_to_le16(fc);
 	hdr.duration_id = 0;
 	hdr.seq_ctrl = 0;
-- 
cgit v1.2.3


From 678f5f7117d5780d3a51b201c9f44b7bf90f6a76 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Wed, 19 Dec 2007 01:31:23 +0100
Subject: mac80211: clean up eapol handling in TX path

The previous patch left only one user of the ieee80211_is_eapol()
function and that user can be eliminated easily by introducing
a new "frame is EAPOL" flag to handle the frame specially (we
already have this information) instead of doing the (expensive)
ieee80211_is_eapol() all the time.

Also, allow unencrypted frames to be sent when they are injected.

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/mac80211/ieee80211.c   |  2 ++
 net/mac80211/ieee80211_i.h |  2 +-
 net/mac80211/tx.c          |  7 ++++++-
 net/mac80211/util.c        | 17 -----------------
 4 files changed, 9 insertions(+), 19 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/ieee80211.c b/net/mac80211/ieee80211.c
index c0dbf77547f..9c14e3d303c 100644
--- a/net/mac80211/ieee80211.c
+++ b/net/mac80211/ieee80211.c
@@ -732,6 +732,8 @@ static void ieee80211_remove_tx_extra(struct ieee80211_local *local,
 		pkt_data->flags |= IEEE80211_TXPD_DO_NOT_ENCRYPT;
 	if (control->flags & IEEE80211_TXCTL_REQUEUE)
 		pkt_data->flags |= IEEE80211_TXPD_REQUEUE;
+	if (control->flags & IEEE80211_TXCTL_EAPOL_FRAME)
+		pkt_data->flags |= IEEE80211_TXPD_EAPOL_FRAME;
 	pkt_data->queue = control->queue;
 
 	hdrlen = ieee80211_get_hdrlen_from_skb(skb);
diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index 214109b8d95..baf53c04712 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -164,6 +164,7 @@ struct ieee80211_txrx_data {
 #define IEEE80211_TXPD_REQ_TX_STATUS	BIT(0)
 #define IEEE80211_TXPD_DO_NOT_ENCRYPT	BIT(1)
 #define IEEE80211_TXPD_REQUEUE		BIT(2)
+#define IEEE80211_TXPD_EAPOL_FRAME	BIT(3)
 /* Stored in sk_buff->cb */
 struct ieee80211_tx_packet_data {
 	int ifindex;
@@ -798,7 +799,6 @@ extern void *mac80211_wiphy_privid; /* for wiphy privid */
 extern const unsigned char rfc1042_header[6];
 extern const unsigned char bridge_tunnel_header[6];
 u8 *ieee80211_get_bssid(struct ieee80211_hdr *hdr, size_t len);
-int ieee80211_is_eapol(const struct sk_buff *skb, int hdrlen);
 int ieee80211_frame_duration(struct ieee80211_local *local, size_t len,
 			     int rate, int erp, int short_preamble);
 void mac80211_ev_michael_mic_failure(struct net_device *dev, int keyidx,
diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index 6dbd9184288..e177a8dc23b 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -437,7 +437,8 @@ ieee80211_tx_h_select_key(struct ieee80211_txrx_data *tx)
 	else if ((key = rcu_dereference(tx->sdata->default_key)))
 		tx->key = key;
 	else if (tx->sdata->drop_unencrypted &&
-		 !ieee80211_is_eapol(tx->skb, ieee80211_get_hdrlen(fc))) {
+		 !(tx->u.tx.control->flags & IEEE80211_TXCTL_EAPOL_FRAME) &&
+		 !(tx->flags & IEEE80211_TXRXD_TX_INJECTED)) {
 		I802_DEBUG_INC(tx->local->tx_handlers_drop_unencrypted);
 		return TXRX_DROP;
 	} else {
@@ -1241,6 +1242,8 @@ int ieee80211_master_start_xmit(struct sk_buff *skb,
 		control.flags |= IEEE80211_TXCTL_DO_NOT_ENCRYPT;
 	if (pkt_data->flags & IEEE80211_TXPD_REQUEUE)
 		control.flags |= IEEE80211_TXCTL_REQUEUE;
+	if (pkt_data->flags & IEEE80211_TXPD_EAPOL_FRAME)
+		control.flags |= IEEE80211_TXCTL_EAPOL_FRAME;
 	control.queue = pkt_data->queue;
 
 	ret = ieee80211_tx(odev, skb, &control);
@@ -1514,6 +1517,8 @@ int ieee80211_subif_start_xmit(struct sk_buff *skb,
 	pkt_data = (struct ieee80211_tx_packet_data *)skb->cb;
 	memset(pkt_data, 0, sizeof(struct ieee80211_tx_packet_data));
 	pkt_data->ifindex = dev->ifindex;
+	if (ethertype == ETH_P_PAE)
+		pkt_data->flags |= IEEE80211_TXPD_EAPOL_FRAME;
 
 	skb->dev = local->mdev;
 	dev->stats.tx_packets++;
diff --git a/net/mac80211/util.c b/net/mac80211/util.c
index fb7fd896cd0..2b02b2b9d64 100644
--- a/net/mac80211/util.c
+++ b/net/mac80211/util.c
@@ -40,10 +40,6 @@ const unsigned char rfc1042_header[] =
 const unsigned char bridge_tunnel_header[] =
 	{ 0xaa, 0xaa, 0x03, 0x00, 0x00, 0xf8 };
 
-/* No encapsulation header if EtherType < 0x600 (=length) */
-static const unsigned char eapol_header[] =
-	{ 0xaa, 0xaa, 0x03, 0x00, 0x00, 0x00, 0x88, 0x8e };
-
 
 static int rate_list_match(const int *rate_list, int rate)
 {
@@ -223,19 +219,6 @@ int ieee80211_get_hdrlen_from_skb(const struct sk_buff *skb)
 }
 EXPORT_SYMBOL(ieee80211_get_hdrlen_from_skb);
 
-int ieee80211_is_eapol(const struct sk_buff *skb, int hdrlen)
-{
-	if (unlikely(skb->len < 10))
-		return 0;
-
-	if (unlikely(skb->len >= hdrlen + sizeof(eapol_header) &&
-		     memcmp(skb->data + hdrlen, eapol_header,
-			    sizeof(eapol_header)) == 0))
-		return 1;
-
-	return 0;
-}
-
 void ieee80211_tx_set_iswep(struct ieee80211_txrx_data *tx)
 {
 	struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) tx->skb->data;
-- 
cgit v1.2.3


From 4e20cb293cc0b30f32a53011fd6b38493d9fdcaa Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Wed, 19 Dec 2007 01:31:24 +0100
Subject: mac80211: make ieee80211_rx_mgmt_action static

The function is only used locally.

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/mac80211/ieee80211_sta.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/ieee80211_sta.c b/net/mac80211/ieee80211_sta.c
index 3978ad606b5..2e225aecb87 100644
--- a/net/mac80211/ieee80211_sta.c
+++ b/net/mac80211/ieee80211_sta.c
@@ -1994,10 +1994,10 @@ static void ieee80211_rx_mgmt_probe_req(struct net_device *dev,
 	ieee80211_sta_tx(dev, skb, 0);
 }
 
-void ieee80211_rx_mgmt_action(struct net_device *dev,
-			     struct ieee80211_if_sta *ifsta,
-			     struct ieee80211_mgmt *mgmt,
-			     size_t len)
+static void ieee80211_rx_mgmt_action(struct net_device *dev,
+				     struct ieee80211_if_sta *ifsta,
+				     struct ieee80211_mgmt *mgmt,
+				     size_t len)
 {
 	if (len < IEEE80211_MIN_ACTION_SIZE)
 		return;
-- 
cgit v1.2.3


From 7d54d0ddd66678ada6635159dac1eb82ccbe34b5 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Wed, 19 Dec 2007 01:31:25 +0100
Subject: mac80211: allow easier multicast/broadcast buffering in hardware

There are various decisions influencing the decision whether to buffer
a frame for after the next DTIM beacon. The "do we have stations in PS
mode" condition cannot be tested by the driver so mac80211 has to do
that. To ease driver writing for hardware that can buffer frames until
after the next DTIM beacon, introduce a new txctl flag telling the
driver to buffer a specific frame.

While at it, restructure and comment the code for multicast buffering
and remove spurious "inline" directives.

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Cc: Michael Buesch <mb@bu3sch.de>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/mac80211/tx.c | 32 +++++++++++++++++++++++---------
 1 file changed, 23 insertions(+), 9 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index e177a8dc23b..f7aff2e97ee 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -322,16 +322,27 @@ static void purge_old_ps_buffers(struct ieee80211_local *local)
 	       wiphy_name(local->hw.wiphy), purged);
 }
 
-static inline ieee80211_txrx_result
+static ieee80211_txrx_result
 ieee80211_tx_h_multicast_ps_buf(struct ieee80211_txrx_data *tx)
 {
-	/* broadcast/multicast frame */
-	/* If any of the associated stations is in power save mode,
-	 * the frame is buffered to be sent after DTIM beacon frame */
-	if ((tx->local->hw.flags & IEEE80211_HW_HOST_BROADCAST_PS_BUFFERING) &&
-	    tx->sdata->type != IEEE80211_IF_TYPE_WDS &&
-	    tx->sdata->bss && atomic_read(&tx->sdata->bss->num_sta_ps) &&
-	    !(tx->fc & IEEE80211_FCTL_ORDER)) {
+	/*
+	 * broadcast/multicast frame
+	 *
+	 * If any of the associated stations is in power save mode,
+	 * the frame is buffered to be sent after DTIM beacon frame.
+	 * This is done either by the hardware or us.
+	 */
+
+	/* not AP/IBSS or ordered frame */
+	if (!tx->sdata->bss || (tx->fc & IEEE80211_FCTL_ORDER))
+		return TXRX_CONTINUE;
+
+	/* no stations in PS mode */
+	if (!atomic_read(&tx->sdata->bss->num_sta_ps))
+		return TXRX_CONTINUE;
+
+	/* buffered in mac80211 */
+	if (tx->local->hw.flags & IEEE80211_HW_HOST_BROADCAST_PS_BUFFERING) {
 		if (tx->local->total_ps_buffered >= TOTAL_MAX_TX_BUFFER)
 			purge_old_ps_buffers(tx->local);
 		if (skb_queue_len(&tx->sdata->bss->ps_bc_buf) >=
@@ -348,10 +359,13 @@ ieee80211_tx_h_multicast_ps_buf(struct ieee80211_txrx_data *tx)
 		return TXRX_QUEUED;
 	}
 
+	/* buffered in hardware */
+	tx->u.tx.control->flags |= IEEE80211_TXCTL_SEND_AFTER_DTIM;
+
 	return TXRX_CONTINUE;
 }
 
-static inline ieee80211_txrx_result
+static ieee80211_txrx_result
 ieee80211_tx_h_unicast_ps_buf(struct ieee80211_txrx_data *tx)
 {
 	struct sta_info *sta = tx->sta;
-- 
cgit v1.2.3


From 41ade00f21a72d30911c6351a93823a491fffa39 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Wed, 19 Dec 2007 02:03:29 +0100
Subject: cfg80211/nl80211: introduce key handling

This introduces key handling to cfg80211/nl80211. Default
and group keys can be added, changed and removed; sequence
counters for each key can be retrieved.

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/wireless/core.c    |   3 +
 net/wireless/nl80211.c | 289 +++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 292 insertions(+)

(limited to 'net')

diff --git a/net/wireless/core.c b/net/wireless/core.c
index febc33bc9c0..cfc5fc5f9e7 100644
--- a/net/wireless/core.c
+++ b/net/wireless/core.c
@@ -184,6 +184,9 @@ struct wiphy *wiphy_new(struct cfg80211_ops *ops, int sizeof_priv)
 	struct cfg80211_registered_device *drv;
 	int alloc_size;
 
+	WARN_ON(!ops->add_key && ops->del_key);
+	WARN_ON(ops->add_key && !ops->del_key);
+
 	alloc_size = sizeof(*drv) + sizeof_priv;
 
 	drv = kzalloc(alloc_size, GFP_KERNEL);
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 48b0d453e4e..09093638852 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -61,6 +61,14 @@ static struct nla_policy nl80211_policy[NL80211_ATTR_MAX+1] __read_mostly = {
 	[NL80211_ATTR_IFTYPE] = { .type = NLA_U32 },
 	[NL80211_ATTR_IFINDEX] = { .type = NLA_U32 },
 	[NL80211_ATTR_IFNAME] = { .type = NLA_NUL_STRING, .len = IFNAMSIZ-1 },
+
+	[NL80211_ATTR_MAC] = { .type = NLA_BINARY, .len = ETH_ALEN },
+
+	[NL80211_ATTR_KEY_DATA] = { .type = NLA_BINARY,
+				    .len = WLAN_MAX_KEY_LEN },
+	[NL80211_ATTR_KEY_IDX] = { .type = NLA_U8 },
+	[NL80211_ATTR_KEY_CIPHER] = { .type = NLA_U32 },
+	[NL80211_ATTR_KEY_DEFAULT] = { .type = NLA_FLAG },
 };
 
 /* message building helper */
@@ -335,6 +343,263 @@ static int nl80211_del_interface(struct sk_buff *skb, struct genl_info *info)
 	return err;
 }
 
+struct get_key_cookie {
+	struct sk_buff *msg;
+	int error;
+};
+
+static void get_key_callback(void *c, struct key_params *params)
+{
+	struct get_key_cookie *cookie = c;
+
+	if (params->key)
+		NLA_PUT(cookie->msg, NL80211_ATTR_KEY_DATA,
+			params->key_len, params->key);
+
+	if (params->seq)
+		NLA_PUT(cookie->msg, NL80211_ATTR_KEY_SEQ,
+			params->seq_len, params->seq);
+
+	if (params->cipher)
+		NLA_PUT_U32(cookie->msg, NL80211_ATTR_KEY_CIPHER,
+			    params->cipher);
+
+	return;
+ nla_put_failure:
+	cookie->error = 1;
+}
+
+static int nl80211_get_key(struct sk_buff *skb, struct genl_info *info)
+{
+	struct cfg80211_registered_device *drv;
+	int err;
+	struct net_device *dev;
+	u8 key_idx = 0;
+	u8 *mac_addr = NULL;
+	struct get_key_cookie cookie = {
+		.error = 0,
+	};
+	void *hdr;
+	struct sk_buff *msg;
+
+	if (info->attrs[NL80211_ATTR_KEY_IDX])
+		key_idx = nla_get_u8(info->attrs[NL80211_ATTR_KEY_IDX]);
+
+	if (key_idx > 3)
+		return -EINVAL;
+
+	if (info->attrs[NL80211_ATTR_MAC])
+		mac_addr = nla_data(info->attrs[NL80211_ATTR_MAC]);
+
+	err = get_drv_dev_by_info_ifindex(info, &drv, &dev);
+	if (err)
+		return err;
+
+	if (!drv->ops->get_key) {
+		err = -EOPNOTSUPP;
+		goto out;
+	}
+
+	msg = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL);
+	if (!msg) {
+		err = -ENOMEM;
+		goto out;
+	}
+
+	hdr = nl80211hdr_put(msg, info->snd_pid, info->snd_seq, 0,
+			     NL80211_CMD_NEW_KEY);
+
+	if (IS_ERR(hdr)) {
+		err = PTR_ERR(hdr);
+		goto out;
+	}
+
+	cookie.msg = msg;
+
+	NLA_PUT_U32(msg, NL80211_ATTR_IFINDEX, dev->ifindex);
+	NLA_PUT_U8(msg, NL80211_ATTR_KEY_IDX, key_idx);
+	if (mac_addr)
+		NLA_PUT(msg, NL80211_ATTR_MAC, ETH_ALEN, mac_addr);
+
+	rtnl_lock();
+	err = drv->ops->get_key(&drv->wiphy, dev, key_idx, mac_addr,
+				&cookie, get_key_callback);
+	rtnl_unlock();
+
+	if (err)
+		goto out;
+
+	if (cookie.error)
+		goto nla_put_failure;
+
+	genlmsg_end(msg, hdr);
+	err = genlmsg_unicast(msg, info->snd_pid);
+	goto out;
+
+ nla_put_failure:
+	err = -ENOBUFS;
+	nlmsg_free(msg);
+ out:
+	cfg80211_put_dev(drv);
+	dev_put(dev);
+	return err;
+}
+
+static int nl80211_set_key(struct sk_buff *skb, struct genl_info *info)
+{
+	struct cfg80211_registered_device *drv;
+	int err;
+	struct net_device *dev;
+	u8 key_idx;
+
+	if (!info->attrs[NL80211_ATTR_KEY_IDX])
+		return -EINVAL;
+
+	key_idx = nla_get_u8(info->attrs[NL80211_ATTR_KEY_IDX]);
+
+	if (key_idx > 3)
+		return -EINVAL;
+
+	/* currently only support setting default key */
+	if (!info->attrs[NL80211_ATTR_KEY_DEFAULT])
+		return -EINVAL;
+
+	err = get_drv_dev_by_info_ifindex(info, &drv, &dev);
+	if (err)
+		return err;
+
+	if (!drv->ops->set_default_key) {
+		err = -EOPNOTSUPP;
+		goto out;
+	}
+
+	rtnl_lock();
+	err = drv->ops->set_default_key(&drv->wiphy, dev, key_idx);
+	rtnl_unlock();
+
+ out:
+	cfg80211_put_dev(drv);
+	dev_put(dev);
+	return err;
+}
+
+static int nl80211_new_key(struct sk_buff *skb, struct genl_info *info)
+{
+	struct cfg80211_registered_device *drv;
+	int err;
+	struct net_device *dev;
+	struct key_params params;
+	u8 key_idx = 0;
+	u8 *mac_addr = NULL;
+
+	memset(&params, 0, sizeof(params));
+
+	if (!info->attrs[NL80211_ATTR_KEY_CIPHER])
+		return -EINVAL;
+
+	if (info->attrs[NL80211_ATTR_KEY_DATA]) {
+		params.key = nla_data(info->attrs[NL80211_ATTR_KEY_DATA]);
+		params.key_len = nla_len(info->attrs[NL80211_ATTR_KEY_DATA]);
+	}
+
+	if (info->attrs[NL80211_ATTR_KEY_IDX])
+		key_idx = nla_get_u8(info->attrs[NL80211_ATTR_KEY_IDX]);
+
+	params.cipher = nla_get_u32(info->attrs[NL80211_ATTR_KEY_CIPHER]);
+
+	if (info->attrs[NL80211_ATTR_MAC])
+		mac_addr = nla_data(info->attrs[NL80211_ATTR_MAC]);
+
+	if (key_idx > 3)
+		return -EINVAL;
+
+	/*
+	 * Disallow pairwise keys with non-zero index unless it's WEP
+	 * (because current deployments use pairwise WEP keys with
+	 * non-zero indizes but 802.11i clearly specifies to use zero)
+	 */
+	if (mac_addr && key_idx &&
+	    params.cipher != WLAN_CIPHER_SUITE_WEP40 &&
+	    params.cipher != WLAN_CIPHER_SUITE_WEP104)
+		return -EINVAL;
+
+	/* TODO: add definitions for the lengths to linux/ieee80211.h */
+	switch (params.cipher) {
+	case WLAN_CIPHER_SUITE_WEP40:
+		if (params.key_len != 5)
+			return -EINVAL;
+		break;
+	case WLAN_CIPHER_SUITE_TKIP:
+		if (params.key_len != 32)
+			return -EINVAL;
+		break;
+	case WLAN_CIPHER_SUITE_CCMP:
+		if (params.key_len != 16)
+			return -EINVAL;
+		break;
+	case WLAN_CIPHER_SUITE_WEP104:
+		if (params.key_len != 13)
+			return -EINVAL;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	err = get_drv_dev_by_info_ifindex(info, &drv, &dev);
+	if (err)
+		return err;
+
+	if (!drv->ops->add_key) {
+		err = -EOPNOTSUPP;
+		goto out;
+	}
+
+	rtnl_lock();
+	err = drv->ops->add_key(&drv->wiphy, dev, key_idx, mac_addr, &params);
+	rtnl_unlock();
+
+ out:
+	cfg80211_put_dev(drv);
+	dev_put(dev);
+	return err;
+}
+
+static int nl80211_del_key(struct sk_buff *skb, struct genl_info *info)
+{
+	struct cfg80211_registered_device *drv;
+	int err;
+	struct net_device *dev;
+	u8 key_idx = 0;
+	u8 *mac_addr = NULL;
+
+	if (info->attrs[NL80211_ATTR_KEY_IDX])
+		key_idx = nla_get_u8(info->attrs[NL80211_ATTR_KEY_IDX]);
+
+	if (key_idx > 3)
+		return -EINVAL;
+
+	if (info->attrs[NL80211_ATTR_MAC])
+		mac_addr = nla_data(info->attrs[NL80211_ATTR_MAC]);
+
+	err = get_drv_dev_by_info_ifindex(info, &drv, &dev);
+	if (err)
+		return err;
+
+	if (!drv->ops->del_key) {
+		err = -EOPNOTSUPP;
+		goto out;
+	}
+
+	rtnl_lock();
+	err = drv->ops->del_key(&drv->wiphy, dev, key_idx, mac_addr);
+	rtnl_unlock();
+
+ out:
+	cfg80211_put_dev(drv);
+	dev_put(dev);
+	return err;
+}
+
 static struct genl_ops nl80211_ops[] = {
 	{
 		.cmd = NL80211_CMD_GET_WIPHY,
@@ -374,6 +639,30 @@ static struct genl_ops nl80211_ops[] = {
 		.policy = nl80211_policy,
 		.flags = GENL_ADMIN_PERM,
 	},
+	{
+		.cmd = NL80211_CMD_GET_KEY,
+		.doit = nl80211_get_key,
+		.policy = nl80211_policy,
+		.flags = GENL_ADMIN_PERM,
+	},
+	{
+		.cmd = NL80211_CMD_SET_KEY,
+		.doit = nl80211_set_key,
+		.policy = nl80211_policy,
+		.flags = GENL_ADMIN_PERM,
+	},
+	{
+		.cmd = NL80211_CMD_NEW_KEY,
+		.doit = nl80211_new_key,
+		.policy = nl80211_policy,
+		.flags = GENL_ADMIN_PERM,
+	},
+	{
+		.cmd = NL80211_CMD_DEL_KEY,
+		.doit = nl80211_del_key,
+		.policy = nl80211_policy,
+		.flags = GENL_ADMIN_PERM,
+	},
 };
 
 /* multicast groups */
-- 
cgit v1.2.3


From e8cbb4cbeb7642d179b01c35adf036ddb65f3dd0 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Wed, 19 Dec 2007 02:03:30 +0100
Subject: mac80211: support adding/removing keys via cfg80211

This adds the necessary hooks to mac80211 to allow userspace
to edit keys with cfg80211 (through nl80211.)

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/mac80211/cfg.c | 91 ++++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 91 insertions(+)

(limited to 'net')

diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index 9e2bc1fd023..d49f7b58b05 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -6,6 +6,7 @@
  * This file is GPLv2 as found in COPYING.
  */
 
+#include <linux/ieee80211.h>
 #include <linux/nl80211.h>
 #include <linux/rtnetlink.h>
 #include <net/net_namespace.h>
@@ -99,8 +100,98 @@ static int ieee80211_change_iface(struct wiphy *wiphy, int ifindex,
 	return 0;
 }
 
+static int ieee80211_add_key(struct wiphy *wiphy, struct net_device *dev,
+			     u8 key_idx, u8 *mac_addr,
+			     struct key_params *params)
+{
+	struct ieee80211_sub_if_data *sdata;
+	struct sta_info *sta = NULL;
+	enum ieee80211_key_alg alg;
+	int ret;
+
+	sdata = IEEE80211_DEV_TO_SUB_IF(dev);
+
+	switch (params->cipher) {
+	case WLAN_CIPHER_SUITE_WEP40:
+	case WLAN_CIPHER_SUITE_WEP104:
+		alg = ALG_WEP;
+		break;
+	case WLAN_CIPHER_SUITE_TKIP:
+		alg = ALG_TKIP;
+		break;
+	case WLAN_CIPHER_SUITE_CCMP:
+		alg = ALG_CCMP;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	if (mac_addr) {
+		sta = sta_info_get(sdata->local, mac_addr);
+		if (!sta)
+			return -ENOENT;
+	}
+
+	ret = 0;
+	if (!ieee80211_key_alloc(sdata, sta, alg, key_idx,
+				 params->key_len, params->key))
+		ret = -ENOMEM;
+
+	if (sta)
+		sta_info_put(sta);
+
+	return ret;
+}
+
+static int ieee80211_del_key(struct wiphy *wiphy, struct net_device *dev,
+			     u8 key_idx, u8 *mac_addr)
+{
+	struct ieee80211_sub_if_data *sdata;
+	struct sta_info *sta;
+	int ret;
+
+	sdata = IEEE80211_DEV_TO_SUB_IF(dev);
+
+	if (mac_addr) {
+		sta = sta_info_get(sdata->local, mac_addr);
+		if (!sta)
+			return -ENOENT;
+
+		ret = 0;
+		if (sta->key)
+			ieee80211_key_free(sta->key);
+		else
+			ret = -ENOENT;
+
+		sta_info_put(sta);
+		return ret;
+	}
+
+	if (!sdata->keys[key_idx])
+		return -ENOENT;
+
+	ieee80211_key_free(sdata->keys[key_idx]);
+
+	return 0;
+}
+
+static int ieee80211_config_default_key(struct wiphy *wiphy,
+					struct net_device *dev,
+					u8 key_idx)
+{
+	struct ieee80211_sub_if_data *sdata;
+
+	sdata = IEEE80211_DEV_TO_SUB_IF(dev);
+	ieee80211_set_default_key(sdata, key_idx);
+
+	return 0;
+}
+
 struct cfg80211_ops mac80211_config_ops = {
 	.add_virtual_intf = ieee80211_add_iface,
 	.del_virtual_intf = ieee80211_del_iface,
 	.change_virtual_intf = ieee80211_change_iface,
+	.add_key = ieee80211_add_key,
+	.del_key = ieee80211_del_key,
+	.set_default_key = ieee80211_config_default_key,
 };
-- 
cgit v1.2.3


From 62da92fb75c346b503bca765fd1337e08771c9fe Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Wed, 19 Dec 2007 02:03:31 +0100
Subject: mac80211: support getting key sequence counters via cfg80211

This implements cfg80211's get_key() to allow retrieving the sequence
counter for a TKIP or CCMP key from userspace. It also cleans up and
documents the associated low-level driver interface.

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/mac80211/cfg.c | 85 +++++++++++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 84 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index d49f7b58b05..4c1ce353c66 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -1,7 +1,7 @@
 /*
  * mac80211 configuration hooks for cfg80211
  *
- * Copyright 2006	Johannes Berg <johannes@sipsolutions.net>
+ * Copyright 2006, 2007	Johannes Berg <johannes@sipsolutions.net>
  *
  * This file is GPLv2 as found in COPYING.
  */
@@ -175,6 +175,88 @@ static int ieee80211_del_key(struct wiphy *wiphy, struct net_device *dev,
 	return 0;
 }
 
+static int ieee80211_get_key(struct wiphy *wiphy, struct net_device *dev,
+			     u8 key_idx, u8 *mac_addr, void *cookie,
+			     void (*callback)(void *cookie,
+					      struct key_params *params))
+{
+	struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
+	struct sta_info *sta = NULL;
+	u8 seq[6] = {0};
+	struct key_params params;
+	struct ieee80211_key *key;
+	u32 iv32;
+	u16 iv16;
+	int err = -ENOENT;
+
+	if (mac_addr) {
+		sta = sta_info_get(sdata->local, mac_addr);
+		if (!sta)
+			goto out;
+
+		key = sta->key;
+	} else
+		key = sdata->keys[key_idx];
+
+	if (!key)
+		goto out;
+
+	memset(&params, 0, sizeof(params));
+
+	switch (key->conf.alg) {
+	case ALG_TKIP:
+		params.cipher = WLAN_CIPHER_SUITE_TKIP;
+
+		iv32 = key->u.tkip.iv32;
+		iv16 = key->u.tkip.iv16;
+
+		if (key->flags & KEY_FLAG_UPLOADED_TO_HARDWARE &&
+		    sdata->local->ops->get_tkip_seq)
+			sdata->local->ops->get_tkip_seq(
+				local_to_hw(sdata->local),
+				key->conf.hw_key_idx,
+				&iv32, &iv16);
+
+		seq[0] = iv16 & 0xff;
+		seq[1] = (iv16 >> 8) & 0xff;
+		seq[2] = iv32 & 0xff;
+		seq[3] = (iv32 >> 8) & 0xff;
+		seq[4] = (iv32 >> 16) & 0xff;
+		seq[5] = (iv32 >> 24) & 0xff;
+		params.seq = seq;
+		params.seq_len = 6;
+		break;
+	case ALG_CCMP:
+		params.cipher = WLAN_CIPHER_SUITE_CCMP;
+		seq[0] = key->u.ccmp.tx_pn[5];
+		seq[1] = key->u.ccmp.tx_pn[4];
+		seq[2] = key->u.ccmp.tx_pn[3];
+		seq[3] = key->u.ccmp.tx_pn[2];
+		seq[4] = key->u.ccmp.tx_pn[1];
+		seq[5] = key->u.ccmp.tx_pn[0];
+		params.seq = seq;
+		params.seq_len = 6;
+		break;
+	case ALG_WEP:
+		if (key->conf.keylen == 5)
+			params.cipher = WLAN_CIPHER_SUITE_WEP40;
+		else
+			params.cipher = WLAN_CIPHER_SUITE_WEP104;
+		break;
+	}
+
+	params.key = key->conf.key;
+	params.key_len = key->conf.keylen;
+
+	callback(cookie, &params);
+	err = 0;
+
+ out:
+	if (sta)
+		sta_info_put(sta);
+	return err;
+}
+
 static int ieee80211_config_default_key(struct wiphy *wiphy,
 					struct net_device *dev,
 					u8 key_idx)
@@ -193,5 +275,6 @@ struct cfg80211_ops mac80211_config_ops = {
 	.change_virtual_intf = ieee80211_change_iface,
 	.add_key = ieee80211_add_key,
 	.del_key = ieee80211_del_key,
+	.get_key = ieee80211_get_key,
 	.set_default_key = ieee80211_config_default_key,
 };
-- 
cgit v1.2.3


From ed1b6cc7f80f831e192704b05b9917f9cc37be15 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Wed, 19 Dec 2007 02:03:32 +0100
Subject: cfg80211/nl80211: add beacon settings

This adds the necessary API to cfg80211/nl80211 to allow
changing beaconing settings.

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/wireless/nl80211.c | 133 +++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 133 insertions(+)

(limited to 'net')

diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 09093638852..306ae019ea8 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -69,6 +69,13 @@ static struct nla_policy nl80211_policy[NL80211_ATTR_MAX+1] __read_mostly = {
 	[NL80211_ATTR_KEY_IDX] = { .type = NLA_U8 },
 	[NL80211_ATTR_KEY_CIPHER] = { .type = NLA_U32 },
 	[NL80211_ATTR_KEY_DEFAULT] = { .type = NLA_FLAG },
+
+	[NL80211_ATTR_BEACON_INTERVAL] = { .type = NLA_U32 },
+	[NL80211_ATTR_DTIM_PERIOD] = { .type = NLA_U32 },
+	[NL80211_ATTR_BEACON_HEAD] = { .type = NLA_BINARY,
+				       .len = IEEE80211_MAX_DATA_LEN },
+	[NL80211_ATTR_BEACON_TAIL] = { .type = NLA_BINARY,
+				       .len = IEEE80211_MAX_DATA_LEN },
 };
 
 /* message building helper */
@@ -600,6 +607,114 @@ static int nl80211_del_key(struct sk_buff *skb, struct genl_info *info)
 	return err;
 }
 
+static int nl80211_addset_beacon(struct sk_buff *skb, struct genl_info *info)
+{
+        int (*call)(struct wiphy *wiphy, struct net_device *dev,
+		    struct beacon_parameters *info);
+	struct cfg80211_registered_device *drv;
+	int err;
+	struct net_device *dev;
+	struct beacon_parameters params;
+	int haveinfo = 0;
+
+	err = get_drv_dev_by_info_ifindex(info, &drv, &dev);
+	if (err)
+		return err;
+
+	switch (info->genlhdr->cmd) {
+	case NL80211_CMD_NEW_BEACON:
+		/* these are required for NEW_BEACON */
+		if (!info->attrs[NL80211_ATTR_BEACON_INTERVAL] ||
+		    !info->attrs[NL80211_ATTR_DTIM_PERIOD] ||
+		    !info->attrs[NL80211_ATTR_BEACON_HEAD]) {
+			err = -EINVAL;
+			goto out;
+		}
+
+		call = drv->ops->add_beacon;
+		break;
+	case NL80211_CMD_SET_BEACON:
+		call = drv->ops->set_beacon;
+		break;
+	default:
+		WARN_ON(1);
+		err = -EOPNOTSUPP;
+		goto out;
+	}
+
+	if (!call) {
+		err = -EOPNOTSUPP;
+		goto out;
+	}
+
+	memset(&params, 0, sizeof(params));
+
+	if (info->attrs[NL80211_ATTR_BEACON_INTERVAL]) {
+		params.interval =
+		    nla_get_u32(info->attrs[NL80211_ATTR_BEACON_INTERVAL]);
+		haveinfo = 1;
+	}
+
+	if (info->attrs[NL80211_ATTR_DTIM_PERIOD]) {
+		params.dtim_period =
+		    nla_get_u32(info->attrs[NL80211_ATTR_DTIM_PERIOD]);
+		haveinfo = 1;
+	}
+
+	if (info->attrs[NL80211_ATTR_BEACON_HEAD]) {
+		params.head = nla_data(info->attrs[NL80211_ATTR_BEACON_HEAD]);
+		params.head_len =
+		    nla_len(info->attrs[NL80211_ATTR_BEACON_HEAD]);
+		haveinfo = 1;
+	}
+
+	if (info->attrs[NL80211_ATTR_BEACON_TAIL]) {
+		params.tail = nla_data(info->attrs[NL80211_ATTR_BEACON_TAIL]);
+		params.tail_len =
+		    nla_len(info->attrs[NL80211_ATTR_BEACON_TAIL]);
+		haveinfo = 1;
+	}
+
+	if (!haveinfo) {
+		err = -EINVAL;
+		goto out;
+	}
+
+	rtnl_lock();
+	err = call(&drv->wiphy, dev, &params);
+	rtnl_unlock();
+
+ out:
+	cfg80211_put_dev(drv);
+	dev_put(dev);
+	return err;
+}
+
+static int nl80211_del_beacon(struct sk_buff *skb, struct genl_info *info)
+{
+	struct cfg80211_registered_device *drv;
+	int err;
+	struct net_device *dev;
+
+	err = get_drv_dev_by_info_ifindex(info, &drv, &dev);
+	if (err)
+		return err;
+
+	if (!drv->ops->del_beacon) {
+		err = -EOPNOTSUPP;
+		goto out;
+	}
+
+	rtnl_lock();
+	err = drv->ops->del_beacon(&drv->wiphy, dev);
+	rtnl_unlock();
+
+ out:
+	cfg80211_put_dev(drv);
+	dev_put(dev);
+	return err;
+}
+
 static struct genl_ops nl80211_ops[] = {
 	{
 		.cmd = NL80211_CMD_GET_WIPHY,
@@ -663,6 +778,24 @@ static struct genl_ops nl80211_ops[] = {
 		.policy = nl80211_policy,
 		.flags = GENL_ADMIN_PERM,
 	},
+	{
+		.cmd = NL80211_CMD_SET_BEACON,
+		.policy = nl80211_policy,
+		.flags = GENL_ADMIN_PERM,
+		.doit = nl80211_addset_beacon,
+	},
+	{
+		.cmd = NL80211_CMD_NEW_BEACON,
+		.policy = nl80211_policy,
+		.flags = GENL_ADMIN_PERM,
+		.doit = nl80211_addset_beacon,
+	},
+	{
+		.cmd = NL80211_CMD_DEL_BEACON,
+		.policy = nl80211_policy,
+		.flags = GENL_ADMIN_PERM,
+		.doit = nl80211_del_beacon,
+	},
 };
 
 /* multicast groups */
-- 
cgit v1.2.3


From 5727ef1b2e797a1922f5bc239b6afb2b4cfb80bc Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Wed, 19 Dec 2007 02:03:34 +0100
Subject: cfg80211/nl80211: station handling

This patch adds station handling to cfg80211/nl80211.

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/wireless/nl80211.c | 235 +++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 235 insertions(+)

(limited to 'net')

diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 306ae019ea8..431835126e8 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -76,6 +76,12 @@ static struct nla_policy nl80211_policy[NL80211_ATTR_MAX+1] __read_mostly = {
 				       .len = IEEE80211_MAX_DATA_LEN },
 	[NL80211_ATTR_BEACON_TAIL] = { .type = NLA_BINARY,
 				       .len = IEEE80211_MAX_DATA_LEN },
+	[NL80211_ATTR_STA_AID] = { .type = NLA_U16 },
+	[NL80211_ATTR_STA_FLAGS] = { .type = NLA_NESTED },
+	[NL80211_ATTR_STA_LISTEN_INTERVAL] = { .type = NLA_U16 },
+	[NL80211_ATTR_STA_SUPPORTED_RATES] = { .type = NLA_BINARY,
+					       .len = NL80211_MAX_SUPP_RATES },
+	[NL80211_ATTR_STA_VLAN] = { .type = NLA_U32 },
 };
 
 /* message building helper */
@@ -715,6 +721,210 @@ static int nl80211_del_beacon(struct sk_buff *skb, struct genl_info *info)
 	return err;
 }
 
+static const struct nla_policy sta_flags_policy[NL80211_STA_FLAG_MAX + 1] = {
+	[NL80211_STA_FLAG_AUTHORIZED] = { .type = NLA_FLAG },
+	[NL80211_STA_FLAG_SHORT_PREAMBLE] = { .type = NLA_FLAG },
+	[NL80211_STA_FLAG_WME] = { .type = NLA_FLAG },
+};
+
+static int parse_station_flags(struct nlattr *nla, u32 *staflags)
+{
+	struct nlattr *flags[NL80211_STA_FLAG_MAX + 1];
+	int flag;
+
+	*staflags = 0;
+
+	if (!nla)
+		return 0;
+
+	if (nla_parse_nested(flags, NL80211_STA_FLAG_MAX,
+			     nla, sta_flags_policy))
+		return -EINVAL;
+
+	*staflags = STATION_FLAG_CHANGED;
+
+	for (flag = 1; flag <= NL80211_STA_FLAG_MAX; flag++)
+		if (flags[flag])
+			*staflags |= (1<<flag);
+
+	return 0;
+}
+
+static int nl80211_get_station(struct sk_buff *skb, struct genl_info *info)
+{
+	return -EOPNOTSUPP;
+}
+
+/*
+ * Get vlan interface making sure it is on the right wiphy.
+ */
+static int get_vlan(struct nlattr *vlanattr,
+		    struct cfg80211_registered_device *rdev,
+		    struct net_device **vlan)
+{
+	*vlan = NULL;
+
+	if (vlanattr) {
+		*vlan = dev_get_by_index(&init_net, nla_get_u32(vlanattr));
+		if (!*vlan)
+			return -ENODEV;
+		if (!(*vlan)->ieee80211_ptr)
+			return -EINVAL;
+		if ((*vlan)->ieee80211_ptr->wiphy != &rdev->wiphy)
+			return -EINVAL;
+	}
+	return 0;
+}
+
+static int nl80211_set_station(struct sk_buff *skb, struct genl_info *info)
+{
+	struct cfg80211_registered_device *drv;
+	int err;
+	struct net_device *dev;
+	struct station_parameters params;
+	u8 *mac_addr = NULL;
+
+	memset(&params, 0, sizeof(params));
+
+	params.listen_interval = -1;
+
+	if (info->attrs[NL80211_ATTR_STA_AID])
+		return -EINVAL;
+
+	if (!info->attrs[NL80211_ATTR_MAC])
+		return -EINVAL;
+
+	mac_addr = nla_data(info->attrs[NL80211_ATTR_MAC]);
+
+	if (info->attrs[NL80211_ATTR_STA_SUPPORTED_RATES]) {
+		params.supported_rates =
+			nla_data(info->attrs[NL80211_ATTR_STA_SUPPORTED_RATES]);
+		params.supported_rates_len =
+			nla_len(info->attrs[NL80211_ATTR_STA_SUPPORTED_RATES]);
+	}
+
+	if (info->attrs[NL80211_ATTR_STA_LISTEN_INTERVAL])
+		params.listen_interval =
+		    nla_get_u16(info->attrs[NL80211_ATTR_STA_LISTEN_INTERVAL]);
+
+	if (parse_station_flags(info->attrs[NL80211_ATTR_STA_FLAGS],
+				&params.station_flags))
+		return -EINVAL;
+
+	err = get_drv_dev_by_info_ifindex(info, &drv, &dev);
+	if (err)
+		return err;
+
+	err = get_vlan(info->attrs[NL80211_ATTR_STA_VLAN], drv, &params.vlan);
+	if (err)
+		goto out;
+
+	if (!drv->ops->change_station) {
+		err = -EOPNOTSUPP;
+		goto out;
+	}
+
+	rtnl_lock();
+	err = drv->ops->change_station(&drv->wiphy, dev, mac_addr, &params);
+	rtnl_unlock();
+
+ out:
+	if (params.vlan)
+		dev_put(params.vlan);
+	cfg80211_put_dev(drv);
+	dev_put(dev);
+	return err;
+}
+
+static int nl80211_new_station(struct sk_buff *skb, struct genl_info *info)
+{
+	struct cfg80211_registered_device *drv;
+	int err;
+	struct net_device *dev;
+	struct station_parameters params;
+	u8 *mac_addr = NULL;
+
+	memset(&params, 0, sizeof(params));
+
+	if (!info->attrs[NL80211_ATTR_MAC])
+		return -EINVAL;
+
+	if (!info->attrs[NL80211_ATTR_STA_AID])
+		return -EINVAL;
+
+	if (!info->attrs[NL80211_ATTR_STA_LISTEN_INTERVAL])
+		return -EINVAL;
+
+	if (!info->attrs[NL80211_ATTR_STA_SUPPORTED_RATES])
+		return -EINVAL;
+
+	mac_addr = nla_data(info->attrs[NL80211_ATTR_MAC]);
+	params.supported_rates =
+		nla_data(info->attrs[NL80211_ATTR_STA_SUPPORTED_RATES]);
+	params.supported_rates_len =
+		nla_len(info->attrs[NL80211_ATTR_STA_SUPPORTED_RATES]);
+	params.listen_interval =
+		nla_get_u16(info->attrs[NL80211_ATTR_STA_LISTEN_INTERVAL]);
+	params.listen_interval = nla_get_u16(info->attrs[NL80211_ATTR_STA_AID]);
+
+	if (parse_station_flags(info->attrs[NL80211_ATTR_STA_FLAGS],
+				&params.station_flags))
+		return -EINVAL;
+
+	err = get_drv_dev_by_info_ifindex(info, &drv, &dev);
+	if (err)
+		return err;
+
+	err = get_vlan(info->attrs[NL80211_ATTR_STA_VLAN], drv, &params.vlan);
+	if (err)
+		goto out;
+
+	if (!drv->ops->add_station) {
+		err = -EOPNOTSUPP;
+		goto out;
+	}
+
+	rtnl_lock();
+	err = drv->ops->add_station(&drv->wiphy, dev, mac_addr, &params);
+	rtnl_unlock();
+
+ out:
+	if (params.vlan)
+		dev_put(params.vlan);
+	cfg80211_put_dev(drv);
+	dev_put(dev);
+	return err;
+}
+
+static int nl80211_del_station(struct sk_buff *skb, struct genl_info *info)
+{
+	struct cfg80211_registered_device *drv;
+	int err;
+	struct net_device *dev;
+	u8 *mac_addr = NULL;
+
+	if (info->attrs[NL80211_ATTR_MAC])
+		mac_addr = nla_data(info->attrs[NL80211_ATTR_MAC]);
+
+	err = get_drv_dev_by_info_ifindex(info, &drv, &dev);
+	if (err)
+		return err;
+
+	if (!drv->ops->del_station) {
+		err = -EOPNOTSUPP;
+		goto out;
+	}
+
+	rtnl_lock();
+	err = drv->ops->del_station(&drv->wiphy, dev, mac_addr);
+	rtnl_unlock();
+
+ out:
+	cfg80211_put_dev(drv);
+	dev_put(dev);
+	return err;
+}
+
 static struct genl_ops nl80211_ops[] = {
 	{
 		.cmd = NL80211_CMD_GET_WIPHY,
@@ -796,6 +1006,31 @@ static struct genl_ops nl80211_ops[] = {
 		.flags = GENL_ADMIN_PERM,
 		.doit = nl80211_del_beacon,
 	},
+	{
+		.cmd = NL80211_CMD_GET_STATION,
+		.doit = nl80211_get_station,
+		/* TODO: implement dumpit */
+		.policy = nl80211_policy,
+		.flags = GENL_ADMIN_PERM,
+	},
+	{
+		.cmd = NL80211_CMD_SET_STATION,
+		.doit = nl80211_set_station,
+		.policy = nl80211_policy,
+		.flags = GENL_ADMIN_PERM,
+	},
+	{
+		.cmd = NL80211_CMD_NEW_STATION,
+		.doit = nl80211_new_station,
+		.policy = nl80211_policy,
+		.flags = GENL_ADMIN_PERM,
+	},
+	{
+		.cmd = NL80211_CMD_DEL_STATION,
+		.doit = nl80211_del_station,
+		.policy = nl80211_policy,
+		.flags = GENL_ADMIN_PERM,
+	},
 };
 
 /* multicast groups */
-- 
cgit v1.2.3


From fd5b74dcb88cfc109d6576b22deaef6f47f82c12 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Wed, 19 Dec 2007 02:03:36 +0100
Subject: cfg80211/nl80211: implement station attribute retrieval

After a station is added to the kernel's structures, userspace
has to be able to retrieve statistics about that station, especially
whether the station was idle and how much bytes were transferred
to and from it. This adds the necessary code to nl80211.

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/wireless/nl80211.c | 82 +++++++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 81 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 431835126e8..e3a214f63f9 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -750,9 +750,89 @@ static int parse_station_flags(struct nlattr *nla, u32 *staflags)
 	return 0;
 }
 
+static int nl80211_send_station(struct sk_buff *msg, u32 pid, u32 seq,
+				int flags, struct net_device *dev,
+				u8 *mac_addr, struct station_stats *stats)
+{
+	void *hdr;
+	struct nlattr *statsattr;
+
+	hdr = nl80211hdr_put(msg, pid, seq, flags, NL80211_CMD_NEW_STATION);
+	if (!hdr)
+		return -1;
+
+	NLA_PUT_U32(msg, NL80211_ATTR_IFINDEX, dev->ifindex);
+	NLA_PUT(msg, NL80211_ATTR_MAC, ETH_ALEN, mac_addr);
+
+	statsattr = nla_nest_start(msg, NL80211_ATTR_STA_STATS);
+	if (!statsattr)
+		goto nla_put_failure;
+	if (stats->filled & STATION_STAT_INACTIVE_TIME)
+		NLA_PUT_U32(msg, NL80211_STA_STAT_INACTIVE_TIME,
+			    stats->inactive_time);
+	if (stats->filled & STATION_STAT_RX_BYTES)
+		NLA_PUT_U32(msg, NL80211_STA_STAT_RX_BYTES,
+			    stats->rx_bytes);
+	if (stats->filled & STATION_STAT_TX_BYTES)
+		NLA_PUT_U32(msg, NL80211_STA_STAT_TX_BYTES,
+			    stats->tx_bytes);
+
+	nla_nest_end(msg, statsattr);
+
+	return genlmsg_end(msg, hdr);
+
+ nla_put_failure:
+	return genlmsg_cancel(msg, hdr);
+}
+
+
 static int nl80211_get_station(struct sk_buff *skb, struct genl_info *info)
 {
-	return -EOPNOTSUPP;
+	struct cfg80211_registered_device *drv;
+	int err;
+	struct net_device *dev;
+	struct station_stats stats;
+	struct sk_buff *msg;
+	u8 *mac_addr = NULL;
+
+	memset(&stats, 0, sizeof(stats));
+
+	if (!info->attrs[NL80211_ATTR_MAC])
+		return -EINVAL;
+
+	mac_addr = nla_data(info->attrs[NL80211_ATTR_MAC]);
+
+	err = get_drv_dev_by_info_ifindex(info, &drv, &dev);
+	if (err)
+		return err;
+
+	if (!drv->ops->get_station) {
+		err = -EOPNOTSUPP;
+		goto out;
+	}
+
+	rtnl_lock();
+	err = drv->ops->get_station(&drv->wiphy, dev, mac_addr, &stats);
+	rtnl_unlock();
+
+	msg = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL);
+	if (!msg)
+		goto out;
+
+	if (nl80211_send_station(msg, info->snd_pid, info->snd_seq, 0,
+				 dev, mac_addr, &stats) < 0)
+		goto out_free;
+
+	err = genlmsg_unicast(msg, info->snd_pid);
+	goto out;
+
+ out_free:
+	nlmsg_free(msg);
+
+ out:
+	cfg80211_put_dev(drv);
+	dev_put(dev);
+	return err;
 }
 
 /*
-- 
cgit v1.2.3


From 7bbdd2d987971f9d123a2db89ed921bf02e34f9a Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Wed, 19 Dec 2007 02:03:37 +0100
Subject: mac80211: implement station stats retrieval

This implements the required cfg80211 callback in mac80211
to allow userspace to get station statistics.

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/mac80211/cfg.c | 26 ++++++++++++++++++++++++++
 1 file changed, 26 insertions(+)

(limited to 'net')

diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index 4c1ce353c66..11156b381ec 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -269,6 +269,31 @@ static int ieee80211_config_default_key(struct wiphy *wiphy,
 	return 0;
 }
 
+static int ieee80211_get_station(struct wiphy *wiphy, struct net_device *dev,
+				 u8 *mac, struct station_stats *stats)
+{
+	struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr);
+	struct sta_info *sta;
+
+	sta = sta_info_get(local, mac);
+	if (!sta)
+		return -ENOENT;
+
+	/* XXX: verify sta->dev == dev */
+
+	stats->filled = STATION_STAT_INACTIVE_TIME |
+			STATION_STAT_RX_BYTES |
+			STATION_STAT_TX_BYTES;
+
+	stats->inactive_time = jiffies_to_msecs(jiffies - sta->last_rx);
+	stats->rx_bytes = sta->rx_bytes;
+	stats->tx_bytes = sta->tx_bytes;
+
+	sta_info_put(sta);
+
+	return 0;
+}
+
 struct cfg80211_ops mac80211_config_ops = {
 	.add_virtual_intf = ieee80211_add_iface,
 	.del_virtual_intf = ieee80211_del_iface,
@@ -277,4 +302,5 @@ struct cfg80211_ops mac80211_config_ops = {
 	.del_key = ieee80211_del_key,
 	.get_key = ieee80211_get_key,
 	.set_default_key = ieee80211_config_default_key,
+	.get_station = ieee80211_get_station,
 };
-- 
cgit v1.2.3


From 176e4f84423af3105894a7d71b23c1a16678a6be Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Tue, 18 Dec 2007 15:27:47 +0100
Subject: mac80211: move tx crypto decision

This patch moves the decision making about whether a frame is encrypted
with a certain algorithm up into the TX handlers rather than having it
in the crypto algorithm implementation.

This fixes a problem with the radiotap injection code where injecting
a non-data packet and requesting encryption could end up asking the
driver to encrypt a packet without giving it a key.

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/mac80211/tx.c  | 39 ++++++++++++++++++++++-----------------
 net/mac80211/wep.c | 10 ----------
 net/mac80211/wpa.c | 14 --------------
 3 files changed, 22 insertions(+), 41 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index f7aff2e97ee..8302c70da9a 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -438,11 +438,7 @@ static ieee80211_txrx_result
 ieee80211_tx_h_select_key(struct ieee80211_txrx_data *tx)
 {
 	struct ieee80211_key *key;
-	const struct ieee80211_hdr *hdr;
-	u16 fc;
-
-	hdr = (const struct ieee80211_hdr *) tx->skb->data;
-	fc = le16_to_cpu(hdr->frame_control);
+	u16 fc = tx->fc;
 
 	if (unlikely(tx->u.tx.control->flags & IEEE80211_TXCTL_DO_NOT_ENCRYPT))
 		tx->key = NULL;
@@ -455,16 +451,34 @@ ieee80211_tx_h_select_key(struct ieee80211_txrx_data *tx)
 		 !(tx->flags & IEEE80211_TXRXD_TX_INJECTED)) {
 		I802_DEBUG_INC(tx->local->tx_handlers_drop_unencrypted);
 		return TXRX_DROP;
-	} else {
+	} else
 		tx->key = NULL;
-		tx->u.tx.control->flags |= IEEE80211_TXCTL_DO_NOT_ENCRYPT;
-	}
 
 	if (tx->key) {
+		u16 ftype, stype;
+
 		tx->key->tx_rx_count++;
 		/* TODO: add threshold stuff again */
+
+		switch (tx->key->conf.alg) {
+		case ALG_WEP:
+			ftype = fc & IEEE80211_FCTL_FTYPE;
+			stype = fc & IEEE80211_FCTL_STYPE;
+
+			if (ftype == IEEE80211_FTYPE_MGMT &&
+			    stype == IEEE80211_STYPE_AUTH)
+				break;
+		case ALG_TKIP:
+		case ALG_CCMP:
+			if (!WLAN_FC_DATA_PRESENT(fc))
+				tx->key = NULL;
+			break;
+		}
 	}
 
+	if (!tx->key || !(tx->key->flags & KEY_FLAG_UPLOADED_TO_HARDWARE))
+		tx->u.tx.control->flags |= IEEE80211_TXCTL_DO_NOT_ENCRYPT;
+
 	return TXRX_CONTINUE;
 }
 
@@ -706,15 +720,6 @@ ieee80211_tx_h_misc(struct ieee80211_txrx_data *tx)
 		}
 	}
 
-	/*
-	 * Tell hardware to not encrypt when we had sw crypto.
-	 * Because we use the same flag to internally indicate that
-	 * no (software) encryption should be done, we have to set it
-	 * after all crypto handlers.
-	 */
-	if (tx->key && !(tx->key->flags & KEY_FLAG_UPLOADED_TO_HARDWARE))
-		tx->u.tx.control->flags |= IEEE80211_TXCTL_DO_NOT_ENCRYPT;
-
 	return TXRX_CONTINUE;
 }
 
diff --git a/net/mac80211/wep.c b/net/mac80211/wep.c
index b5f3413403b..a0cff72a580 100644
--- a/net/mac80211/wep.c
+++ b/net/mac80211/wep.c
@@ -349,16 +349,6 @@ static int wep_encrypt_skb(struct ieee80211_txrx_data *tx, struct sk_buff *skb)
 ieee80211_txrx_result
 ieee80211_crypto_wep_encrypt(struct ieee80211_txrx_data *tx)
 {
-	struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) tx->skb->data;
-	u16 fc;
-
-	fc = le16_to_cpu(hdr->frame_control);
-
-	if (((fc & IEEE80211_FCTL_FTYPE) != IEEE80211_FTYPE_DATA &&
-	     ((fc & IEEE80211_FCTL_FTYPE) != IEEE80211_FTYPE_MGMT ||
-	      (fc & IEEE80211_FCTL_STYPE) != IEEE80211_STYPE_AUTH)))
-		return TXRX_CONTINUE;
-
 	tx->u.tx.control->iv_len = WEP_IV_LEN;
 	tx->u.tx.control->icv_len = WEP_ICV_LEN;
 	ieee80211_tx_set_iswep(tx);
diff --git a/net/mac80211/wpa.c b/net/mac80211/wpa.c
index 20cec1cb956..6f04311cf0a 100644
--- a/net/mac80211/wpa.c
+++ b/net/mac80211/wpa.c
@@ -245,16 +245,9 @@ static int tkip_encrypt_skb(struct ieee80211_txrx_data *tx,
 ieee80211_txrx_result
 ieee80211_crypto_tkip_encrypt(struct ieee80211_txrx_data *tx)
 {
-	struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) tx->skb->data;
-	u16 fc;
 	struct sk_buff *skb = tx->skb;
 	int wpa_test = 0, test = 0;
 
-	fc = le16_to_cpu(hdr->frame_control);
-
-	if (!WLAN_FC_DATA_PRESENT(fc))
-		return TXRX_CONTINUE;
-
 	tx->u.tx.control->icv_len = TKIP_ICV_LEN;
 	tx->u.tx.control->iv_len = TKIP_IV_LEN;
 	ieee80211_tx_set_iswep(tx);
@@ -501,16 +494,9 @@ static int ccmp_encrypt_skb(struct ieee80211_txrx_data *tx,
 ieee80211_txrx_result
 ieee80211_crypto_ccmp_encrypt(struct ieee80211_txrx_data *tx)
 {
-	struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) tx->skb->data;
-	u16 fc;
 	struct sk_buff *skb = tx->skb;
 	int test = 0;
 
-	fc = le16_to_cpu(hdr->frame_control);
-
-	if (!WLAN_FC_DATA_PRESENT(fc))
-		return TXRX_CONTINUE;
-
 	tx->u.tx.control->icv_len = CCMP_MIC_LEN;
 	tx->u.tx.control->iv_len = CCMP_HDR_LEN;
 	ieee80211_tx_set_iswep(tx);
-- 
cgit v1.2.3


From c27f9830f367a041ca976ccd102f590d27d4deb2 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Wed, 19 Dec 2007 23:38:24 +0100
Subject: mac80211: don't read ERP information from (re)association response

According to the standard, the field cannot be present, so don't
try to interpret it either.

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Cc: Daniel Drake <dsd@gentoo.org>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/mac80211/ieee80211_sta.c | 14 --------------
 1 file changed, 14 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/ieee80211_sta.c b/net/mac80211/ieee80211_sta.c
index 2e225aecb87..5b8f484c167 100644
--- a/net/mac80211/ieee80211_sta.c
+++ b/net/mac80211/ieee80211_sta.c
@@ -1371,20 +1371,6 @@ static void ieee80211_rx_mgmt_assoc_resp(struct net_device *dev,
 		return;
 	}
 
-	/* it probably doesn't, but if the frame includes an ERP value then
-	 * update our stored copy */
-	if (elems.erp_info && elems.erp_info_len >= 1) {
-		struct ieee80211_sta_bss *bss
-			= ieee80211_rx_bss_get(dev, ifsta->bssid,
-					       local->hw.conf.channel,
-					       ifsta->ssid, ifsta->ssid_len);
-		if (bss) {
-			bss->erp_value = elems.erp_info[0];
-			bss->has_erp_value = 1;
-			ieee80211_rx_bss_put(dev, bss);
-		}
-	}
-
 	printk(KERN_DEBUG "%s: associated\n", dev->name);
 	ifsta->aid = aid;
 	ifsta->ap_capab = capab_info;
-- 
cgit v1.2.3


From d6084cb61dce1e78e7079a1756df0de71dc8599c Mon Sep 17 00:00:00 2001
From: "John W. Linville" <linville@tuxdriver.com>
Date: Fri, 21 Dec 2007 00:43:34 -0500
Subject: net/wireless/Kconfig: whitespace corrections

Signed-off-by: John W. Linville <linville@tuxdriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/wireless/Kconfig | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

(limited to 'net')

diff --git a/net/wireless/Kconfig b/net/wireless/Kconfig
index 6426055a8be..79270903bda 100644
--- a/net/wireless/Kconfig
+++ b/net/wireless/Kconfig
@@ -6,13 +6,13 @@ config NL80211
 	depends on CFG80211
 	default y
 	---help---
-         This option turns on the new netlink interface
-         (nl80211) support in cfg80211.
+	  This option turns on the new netlink interface
+	  (nl80211) support in cfg80211.
 
-         If =n, drivers using mac80211 will be configured via
-         wireless extension support provided by that subsystem.
+	  If =n, drivers using mac80211 will be configured via
+	  wireless extension support provided by that subsystem.
 
-         If unsure, say Y.
+	  If unsure, say Y.
 
 config WIRELESS_EXT
 	bool "Wireless extensions"
-- 
cgit v1.2.3


From c40896de50c73e7835b34f23bea96625edd9d6c4 Mon Sep 17 00:00:00 2001
From: "John W. Linville" <linville@tuxdriver.com>
Date: Fri, 21 Dec 2007 00:44:59 -0500
Subject: net/mac80211/Kconfig: whitespace corrections

Signed-off-by: John W. Linville <linville@tuxdriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/mac80211/Kconfig | 24 ++++++++++++------------
 1 file changed, 12 insertions(+), 12 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/Kconfig b/net/mac80211/Kconfig
index 297f4d967e2..cac6cf2e9ac 100644
--- a/net/mac80211/Kconfig
+++ b/net/mac80211/Kconfig
@@ -10,8 +10,8 @@ config MAC80211
 	select CFG80211
 	select NET_SCH_FIFO
 	---help---
-	This option enables the hardware independent IEEE 802.11
-	networking stack.
+	  This option enables the hardware independent IEEE 802.11
+	  networking stack.
 
 config MAC80211_RC_DEFAULT_CHOICE
 	bool "Choose default rate control algorithm" if EMBEDDED
@@ -87,8 +87,8 @@ config MAC80211_LEDS
 	bool "Enable LED triggers"
 	depends on MAC80211 && LEDS_TRIGGERS
 	---help---
-	This option enables a few LED triggers for different
-	packet receive/transmit events.
+	  This option enables a few LED triggers for different
+	  packet receive/transmit events.
 
 config MAC80211_DEBUGFS
 	bool "Export mac80211 internals in DebugFS"
@@ -110,14 +110,14 @@ config MAC80211_DEBUG
 	  subsystem, you most likely want to say N here.
 
 config MAC80211_HT_DEBUG
-       bool "Enable HT debugging output"
-       depends on MAC80211_DEBUG
-       ---help---
-       This option enables 802.11n High Throughput features
-       debug tracing output.
-
-       If you are not trying to debug of develop the ieee80211
-       subsystem, you most likely want to say N here.
+	bool "Enable HT debugging output"
+	depends on MAC80211_DEBUG
+	---help---
+	  This option enables 802.11n High Throughput features
+	  debug tracing output.
+
+	  If you are not trying to debug of develop the ieee80211
+	  subsystem, you most likely want to say N here.
 
 config MAC80211_VERBOSE_DEBUG
 	bool "Verbose debugging output"
-- 
cgit v1.2.3


From 9cb5734e5b9b26097c7fa28a9c6426a204cc15e3 Mon Sep 17 00:00:00 2001
From: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
Date: Sat, 12 Jan 2008 02:16:03 -0800
Subject: [TCP]: Convert several length variable to unsigned.

Several length variables cannot be negative, so convert int to
unsigned int.  This also allows us to do sane shift operations
on those variables.

Signed-off-by: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp_ipv4.c | 6 +++---
 net/ipv6/tcp_ipv6.c | 8 ++++----
 2 files changed, 7 insertions(+), 7 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index fc9bdd8b7dc..9aea88b8d4f 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -99,7 +99,7 @@ static struct tcp_md5sig_key *tcp_v4_md5_do_lookup(struct sock *sk,
 static int tcp_v4_do_calc_md5_hash(char *md5_hash, struct tcp_md5sig_key *key,
 				   __be32 saddr, __be32 daddr,
 				   struct tcphdr *th, int protocol,
-				   int tcplen);
+				   unsigned int tcplen);
 #endif
 
 struct inet_hashinfo __cacheline_aligned tcp_hashinfo = {
@@ -1020,7 +1020,7 @@ static int tcp_v4_parse_md5_keys(struct sock *sk, char __user *optval,
 static int tcp_v4_do_calc_md5_hash(char *md5_hash, struct tcp_md5sig_key *key,
 				   __be32 saddr, __be32 daddr,
 				   struct tcphdr *th, int protocol,
-				   int tcplen)
+				   unsigned int tcplen)
 {
 	struct scatterlist sg[4];
 	__u16 data_len;
@@ -1113,7 +1113,7 @@ int tcp_v4_calc_md5_hash(char *md5_hash, struct tcp_md5sig_key *key,
 			 struct dst_entry *dst,
 			 struct request_sock *req,
 			 struct tcphdr *th, int protocol,
-			 int tcplen)
+			 unsigned int tcplen)
 {
 	__be32 saddr, daddr;
 
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 0268e118f0b..00c08399837 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -733,7 +733,7 @@ static int tcp_v6_do_calc_md5_hash(char *md5_hash, struct tcp_md5sig_key *key,
 				   struct in6_addr *saddr,
 				   struct in6_addr *daddr,
 				   struct tcphdr *th, int protocol,
-				   int tcplen)
+				   unsigned int tcplen)
 {
 	struct scatterlist sg[4];
 	__u16 data_len;
@@ -818,7 +818,7 @@ static int tcp_v6_calc_md5_hash(char *md5_hash, struct tcp_md5sig_key *key,
 				struct dst_entry *dst,
 				struct request_sock *req,
 				struct tcphdr *th, int protocol,
-				int tcplen)
+				unsigned int tcplen)
 {
 	struct in6_addr *saddr, *daddr;
 
@@ -985,7 +985,7 @@ static void tcp_v6_send_reset(struct sock *sk, struct sk_buff *skb)
 	struct tcphdr *th = tcp_hdr(skb), *t1;
 	struct sk_buff *buff;
 	struct flowi fl;
-	int tot_len = sizeof(*th);
+	unsigned int tot_len = sizeof(*th);
 #ifdef CONFIG_TCP_MD5SIG
 	struct tcp_md5sig_key *key;
 #endif
@@ -1085,7 +1085,7 @@ static void tcp_v6_send_ack(struct tcp_timewait_sock *tw,
 	struct tcphdr *th = tcp_hdr(skb), *t1;
 	struct sk_buff *buff;
 	struct flowi fl;
-	int tot_len = sizeof(struct tcphdr);
+	unsigned int tot_len = sizeof(struct tcphdr);
 	__be32 *topt;
 #ifdef CONFIG_TCP_MD5SIG
 	struct tcp_md5sig_key *key;
-- 
cgit v1.2.3


From b790cedd24a7f7d1639072b3faf35f1f56cb38ea Mon Sep 17 00:00:00 2001
From: Eric Dumazet <dada1@cosmosbay.com>
Date: Fri, 21 Dec 2007 01:49:07 -0800
Subject: [INET]: Avoid an integer divide in rt_garbage_collect()

Since 'goal' is a signed int, compiler may emit an integer divide
to compute goal/2.

Using a right shift is OK here and less expensive.

Signed-off-by: Eric Dumazet <dada1@cosmosbay.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/route.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 1cc6c23cf75..933b093721e 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -851,14 +851,14 @@ static int rt_garbage_collect(void)
 			equilibrium = ipv4_dst_ops.gc_thresh;
 		goal = atomic_read(&ipv4_dst_ops.entries) - equilibrium;
 		if (goal > 0) {
-			equilibrium += min_t(unsigned int, goal / 2, rt_hash_mask + 1);
+			equilibrium += min_t(unsigned int, goal >> 1, rt_hash_mask + 1);
 			goal = atomic_read(&ipv4_dst_ops.entries) - equilibrium;
 		}
 	} else {
 		/* We are in dangerous area. Try to reduce cache really
 		 * aggressively.
 		 */
-		goal = max_t(unsigned int, goal / 2, rt_hash_mask + 1);
+		goal = max_t(unsigned int, goal >> 1, rt_hash_mask + 1);
 		equilibrium = atomic_read(&ipv4_dst_ops.entries) - goal;
 	}
 
-- 
cgit v1.2.3


From ce55dd3610f7ac29bf8d159c2e2ace9aaf2c3038 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <dada1@cosmosbay.com>
Date: Fri, 21 Dec 2007 01:50:43 -0800
Subject: [TCP]: tcp_write_timeout.c cleanup

Before submiting a patch to change a divide to a right shift, I felt
necessary to create a helper function tcp_mtu_probing() to reduce length of
lines exceeding 100 chars in tcp_write_timeout().

Signed-off-by: Eric Dumazet <dada1@cosmosbay.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp_timer.c | 35 +++++++++++++++++++++--------------
 1 file changed, 21 insertions(+), 14 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c
index d8970ecfcfc..8f1480808f9 100644
--- a/net/ipv4/tcp_timer.c
+++ b/net/ipv4/tcp_timer.c
@@ -114,13 +114,31 @@ static int tcp_orphan_retries(struct sock *sk, int alive)
 	return retries;
 }
 
+static void tcp_mtu_probing(struct inet_connection_sock *icsk, struct sock *sk)
+{
+	int mss;
+
+	/* Black hole detection */
+	if (sysctl_tcp_mtu_probing) {
+		if (!icsk->icsk_mtup.enabled) {
+			icsk->icsk_mtup.enabled = 1;
+			tcp_sync_mss(sk, icsk->icsk_pmtu_cookie);
+		} else {
+			struct tcp_sock *tp = tcp_sk(sk);
+			mss = tcp_mtu_to_mss(sk, icsk->icsk_mtup.search_low)/2;
+			mss = min(sysctl_tcp_base_mss, mss);
+			mss = max(mss, 68 - tp->tcp_header_len);
+			icsk->icsk_mtup.search_low = tcp_mss_to_mtu(sk, mss);
+			tcp_sync_mss(sk, icsk->icsk_pmtu_cookie);
+		}
+	}
+}
+
 /* A write timeout has occurred. Process the after effects. */
 static int tcp_write_timeout(struct sock *sk)
 {
 	struct inet_connection_sock *icsk = inet_csk(sk);
-	struct tcp_sock *tp = tcp_sk(sk);
 	int retry_until;
-	int mss;
 
 	if ((1 << sk->sk_state) & (TCPF_SYN_SENT | TCPF_SYN_RECV)) {
 		if (icsk->icsk_retransmits)
@@ -129,18 +147,7 @@ static int tcp_write_timeout(struct sock *sk)
 	} else {
 		if (icsk->icsk_retransmits >= sysctl_tcp_retries1) {
 			/* Black hole detection */
-			if (sysctl_tcp_mtu_probing) {
-				if (!icsk->icsk_mtup.enabled) {
-					icsk->icsk_mtup.enabled = 1;
-					tcp_sync_mss(sk, icsk->icsk_pmtu_cookie);
-				} else {
-					mss = min(sysctl_tcp_base_mss,
-						  tcp_mtu_to_mss(sk, icsk->icsk_mtup.search_low)/2);
-					mss = max(mss, 68 - tp->tcp_header_len);
-					icsk->icsk_mtup.search_low = tcp_mss_to_mtu(sk, mss);
-					tcp_sync_mss(sk, icsk->icsk_pmtu_cookie);
-				}
-			}
+			tcp_mtu_probing(icsk, sk);
 
 			dst_negative_advice(&sk->sk_dst_cache);
 		}
-- 
cgit v1.2.3


From 829942c18704250fce4d5eca787065a3ee7c685d Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@sunset.davemloft.net>
Date: Fri, 21 Dec 2007 04:29:16 -0800
Subject: [TCP]: Move mss variable in tcp_mtu_probing()

Down into the only scope where it is used.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp_timer.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c
index 8f1480808f9..ea111e91f0e 100644
--- a/net/ipv4/tcp_timer.c
+++ b/net/ipv4/tcp_timer.c
@@ -116,8 +116,6 @@ static int tcp_orphan_retries(struct sock *sk, int alive)
 
 static void tcp_mtu_probing(struct inet_connection_sock *icsk, struct sock *sk)
 {
-	int mss;
-
 	/* Black hole detection */
 	if (sysctl_tcp_mtu_probing) {
 		if (!icsk->icsk_mtup.enabled) {
@@ -125,6 +123,8 @@ static void tcp_mtu_probing(struct inet_connection_sock *icsk, struct sock *sk)
 			tcp_sync_mss(sk, icsk->icsk_pmtu_cookie);
 		} else {
 			struct tcp_sock *tp = tcp_sk(sk);
+			int mss;
+
 			mss = tcp_mtu_to_mss(sk, icsk->icsk_mtup.search_low)/2;
 			mss = min(sysctl_tcp_base_mss, mss);
 			mss = max(mss, 68 - tp->tcp_header_len);
-- 
cgit v1.2.3


From 8beb5c5f12c8484c59edf9b691f2c4bb4d31f3a0 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <dada1@cosmosbay.com>
Date: Fri, 21 Dec 2007 05:58:29 -0800
Subject: [TCP]: Avoid a divide in tcp_mtu_probing()

tcp_mtu_to_mss() being signed, compiler might emit an integer divide
to compute tcp_mtu_to_mss()/2 .

Using a right shift is OK here and less expensive.

Signed-off-by: Eric Dumazet <dada1@cosmosbay.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp_timer.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c
index ea111e91f0e..ea85bc00c61 100644
--- a/net/ipv4/tcp_timer.c
+++ b/net/ipv4/tcp_timer.c
@@ -125,7 +125,7 @@ static void tcp_mtu_probing(struct inet_connection_sock *icsk, struct sock *sk)
 			struct tcp_sock *tp = tcp_sk(sk);
 			int mss;
 
-			mss = tcp_mtu_to_mss(sk, icsk->icsk_mtup.search_low)/2;
+			mss = tcp_mtu_to_mss(sk, icsk->icsk_mtup.search_low) >> 1;
 			mss = min(sysctl_tcp_base_mss, mss);
 			mss = max(mss, 68 - tp->tcp_header_len);
 			icsk->icsk_mtup.search_low = tcp_mss_to_mtu(sk, mss);
-- 
cgit v1.2.3


From dfd4f0ae2e111e2b93c295938c0e64ebbb69ae6e Mon Sep 17 00:00:00 2001
From: Eric Dumazet <dada1@cosmosbay.com>
Date: Fri, 21 Dec 2007 06:07:53 -0800
Subject: [TCP]: Avoid two divides in __tcp_grow_window()

tcp_win_from_space() being signed, compiler might emit an integer divide
to compute tcp_win_from_space()/2 .

Using right shifts is OK here and less expensive.

Signed-off-by: Eric Dumazet <dada1@cosmosbay.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp_input.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index bc2d5f70966..519bd24e0d3 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -289,8 +289,8 @@ static int __tcp_grow_window(const struct sock *sk, const struct sk_buff *skb)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
 	/* Optimize this! */
-	int truesize = tcp_win_from_space(skb->truesize)/2;
-	int window = tcp_win_from_space(sysctl_tcp_rmem[2])/2;
+	int truesize = tcp_win_from_space(skb->truesize) >> 1;
+	int window = tcp_win_from_space(sysctl_tcp_rmem[2]) >> 1;
 
 	while (tp->rcv_ssthresh <= window) {
 		if (truesize <= skb->len)
-- 
cgit v1.2.3


From afeb14b49098ba7a51c96e083a4105a0301f94c4 Mon Sep 17 00:00:00 2001
From: Paul Moore <paul.moore@hp.com>
Date: Fri, 21 Dec 2007 14:58:11 -0800
Subject: [XFRM]: RFC4303 compliant auditing

This patch adds a number of new IPsec audit events to meet the auditing
requirements of RFC4303.  This includes audit hooks for the following events:

 * Could not find a valid SA [sections 2.1, 3.4.2]
   . xfrm_audit_state_notfound()
   . xfrm_audit_state_notfound_simple()

 * Sequence number overflow [section 3.3.3]
   . xfrm_audit_state_replay_overflow()

 * Replayed packet [section 3.4.3]
   . xfrm_audit_state_replay()

 * Integrity check failure [sections 3.4.4.1, 3.4.4.2]
   . xfrm_audit_state_icvfail()

While RFC4304 deals only with ESP most of the changes in this patch apply to
IPsec in general, i.e. both AH and ESP.  The one case, integrity check
failure, where ESP specific code had to be modified the same was done to the
AH code for the sake of consistency.

Signed-off-by: Paul Moore <paul.moore@hp.com>
Acked-by: James Morris <jmorris@namei.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/ah4.c         |   4 +-
 net/ipv4/esp4.c        |   1 +
 net/ipv6/ah6.c         |   2 +-
 net/ipv6/esp6.c        |   1 +
 net/ipv6/xfrm6_input.c |   1 +
 net/xfrm/xfrm_input.c  |   3 +-
 net/xfrm/xfrm_output.c |   2 +
 net/xfrm/xfrm_policy.c |  14 ++---
 net/xfrm/xfrm_state.c  | 153 +++++++++++++++++++++++++++++++++++++++++++------
 9 files changed, 154 insertions(+), 27 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/ah4.c b/net/ipv4/ah4.c
index d76803a3dca..ec8de0aa20e 100644
--- a/net/ipv4/ah4.c
+++ b/net/ipv4/ah4.c
@@ -179,8 +179,10 @@ static int ah_input(struct xfrm_state *x, struct sk_buff *skb)
 		err = ah_mac_digest(ahp, skb, ah->auth_data);
 		if (err)
 			goto unlock;
-		if (memcmp(ahp->work_icv, auth_data, ahp->icv_trunc_len))
+		if (memcmp(ahp->work_icv, auth_data, ahp->icv_trunc_len)) {
+			xfrm_audit_state_icvfail(x, skb, IPPROTO_AH);
 			err = -EBADMSG;
+		}
 	}
 unlock:
 	spin_unlock(&x->lock);
diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c
index 28ea5c77ca2..b334c7619c0 100644
--- a/net/ipv4/esp4.c
+++ b/net/ipv4/esp4.c
@@ -191,6 +191,7 @@ static int esp_input(struct xfrm_state *x, struct sk_buff *skb)
 			BUG();
 
 		if (unlikely(memcmp(esp->auth.work_icv, sum, alen))) {
+			xfrm_audit_state_icvfail(x, skb, IPPROTO_ESP);
 			err = -EBADMSG;
 			goto unlock;
 		}
diff --git a/net/ipv6/ah6.c b/net/ipv6/ah6.c
index 1b51d1eedbd..2d32772c87c 100644
--- a/net/ipv6/ah6.c
+++ b/net/ipv6/ah6.c
@@ -381,7 +381,7 @@ static int ah6_input(struct xfrm_state *x, struct sk_buff *skb)
 		if (err)
 			goto unlock;
 		if (memcmp(ahp->work_icv, auth_data, ahp->icv_trunc_len)) {
-			LIMIT_NETDEBUG(KERN_WARNING "ipsec ah authentication error\n");
+			xfrm_audit_state_icvfail(x, skb, IPPROTO_AH);
 			err = -EBADMSG;
 		}
 	}
diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c
index 5bd5292ad9f..e10f10bfe2c 100644
--- a/net/ipv6/esp6.c
+++ b/net/ipv6/esp6.c
@@ -186,6 +186,7 @@ static int esp6_input(struct xfrm_state *x, struct sk_buff *skb)
 			BUG();
 
 		if (unlikely(memcmp(esp->auth.work_icv, sum, alen))) {
+			xfrm_audit_state_icvfail(x, skb, IPPROTO_ESP);
 			ret = -EBADMSG;
 			goto unlock;
 		}
diff --git a/net/ipv6/xfrm6_input.c b/net/ipv6/xfrm6_input.c
index 6644fc6d542..063ce6ed1bd 100644
--- a/net/ipv6/xfrm6_input.c
+++ b/net/ipv6/xfrm6_input.c
@@ -152,6 +152,7 @@ int xfrm6_input_addr(struct sk_buff *skb, xfrm_address_t *daddr,
 
 	if (!x) {
 		XFRM_INC_STATS(LINUX_MIB_XFRMINNOSTATES);
+		xfrm_audit_state_notfound_simple(skb, AF_INET6);
 		goto drop;
 	}
 
diff --git a/net/xfrm/xfrm_input.c b/net/xfrm/xfrm_input.c
index 493243fc5fe..1b250f33ad5 100644
--- a/net/xfrm/xfrm_input.c
+++ b/net/xfrm/xfrm_input.c
@@ -147,6 +147,7 @@ int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type)
 		x = xfrm_state_lookup(daddr, spi, nexthdr, family);
 		if (x == NULL) {
 			XFRM_INC_STATS(LINUX_MIB_XFRMINNOSTATES);
+			xfrm_audit_state_notfound(skb, family, spi, seq);
 			goto drop;
 		}
 
@@ -163,7 +164,7 @@ int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type)
 			goto drop_unlock;
 		}
 
-		if (x->props.replay_window && xfrm_replay_check(x, seq)) {
+		if (x->props.replay_window && xfrm_replay_check(x, skb, seq)) {
 			XFRM_INC_STATS(LINUX_MIB_XFRMINSEQOUTOFWINDOW);
 			goto drop_unlock;
 		}
diff --git a/net/xfrm/xfrm_output.c b/net/xfrm/xfrm_output.c
index 867484a046a..09514449fe8 100644
--- a/net/xfrm/xfrm_output.c
+++ b/net/xfrm/xfrm_output.c
@@ -57,6 +57,8 @@ static int xfrm_output_one(struct sk_buff *skb, int err)
 
 		if (x->type->flags & XFRM_TYPE_REPLAY_PROT) {
 			XFRM_SKB_CB(skb)->seq = ++x->replay.oseq;
+			if (unlikely(x->replay.oseq == 0))
+				xfrm_audit_state_replay_overflow(x, skb);
 			if (xfrm_aevent_is_on())
 				xfrm_replay_notify(x, XFRM_REPLAY_UPDATE);
 		}
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index abc3e39b115..280f8ded975 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -2407,12 +2407,11 @@ void xfrm_audit_policy_add(struct xfrm_policy *xp, int result,
 {
 	struct audit_buffer *audit_buf;
 
-	if (audit_enabled == 0)
-		return;
-	audit_buf = xfrm_audit_start(auid, secid);
+	audit_buf = xfrm_audit_start("SPD-add");
 	if (audit_buf == NULL)
 		return;
-	audit_log_format(audit_buf, " op=SPD-add res=%u", result);
+	xfrm_audit_helper_usrinfo(auid, secid, audit_buf);
+	audit_log_format(audit_buf, " res=%u", result);
 	xfrm_audit_common_policyinfo(xp, audit_buf);
 	audit_log_end(audit_buf);
 }
@@ -2423,12 +2422,11 @@ void xfrm_audit_policy_delete(struct xfrm_policy *xp, int result,
 {
 	struct audit_buffer *audit_buf;
 
-	if (audit_enabled == 0)
-		return;
-	audit_buf = xfrm_audit_start(auid, secid);
+	audit_buf = xfrm_audit_start("SPD-delete");
 	if (audit_buf == NULL)
 		return;
-	audit_log_format(audit_buf, " op=SPD-delete res=%u", result);
+	xfrm_audit_helper_usrinfo(auid, secid, audit_buf);
+	audit_log_format(audit_buf, " res=%u", result);
 	xfrm_audit_common_policyinfo(xp, audit_buf);
 	audit_log_end(audit_buf);
 }
diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c
index 9e57378c51d..6bf876c866d 100644
--- a/net/xfrm/xfrm_state.c
+++ b/net/xfrm/xfrm_state.c
@@ -61,6 +61,13 @@ static unsigned int xfrm_state_genid;
 static struct xfrm_state_afinfo *xfrm_state_get_afinfo(unsigned int family);
 static void xfrm_state_put_afinfo(struct xfrm_state_afinfo *afinfo);
 
+#ifdef CONFIG_AUDITSYSCALL
+static void xfrm_audit_state_replay(struct xfrm_state *x,
+				    struct sk_buff *skb, __be32 net_seq);
+#else
+#define xfrm_audit_state_replay(x, s, sq)	do { ; } while (0)
+#endif /* CONFIG_AUDITSYSCALL */
+
 static inline unsigned int xfrm_dst_hash(xfrm_address_t *daddr,
 					 xfrm_address_t *saddr,
 					 u32 reqid,
@@ -1609,13 +1616,14 @@ static void xfrm_replay_timer_handler(unsigned long data)
 	spin_unlock(&x->lock);
 }
 
-int xfrm_replay_check(struct xfrm_state *x, __be32 net_seq)
+int xfrm_replay_check(struct xfrm_state *x,
+		      struct sk_buff *skb, __be32 net_seq)
 {
 	u32 diff;
 	u32 seq = ntohl(net_seq);
 
 	if (unlikely(seq == 0))
-		return -EINVAL;
+		goto err;
 
 	if (likely(seq > x->replay.seq))
 		return 0;
@@ -1624,14 +1632,18 @@ int xfrm_replay_check(struct xfrm_state *x, __be32 net_seq)
 	if (diff >= min_t(unsigned int, x->props.replay_window,
 			  sizeof(x->replay.bitmap) * 8)) {
 		x->stats.replay_window++;
-		return -EINVAL;
+		goto err;
 	}
 
 	if (x->replay.bitmap & (1U << diff)) {
 		x->stats.replay++;
-		return -EINVAL;
+		goto err;
 	}
 	return 0;
+
+err:
+	xfrm_audit_state_replay(x, skb, net_seq);
+	return -EINVAL;
 }
 EXPORT_SYMBOL(xfrm_replay_check);
 
@@ -1996,8 +2008,8 @@ void __init xfrm_state_init(void)
 }
 
 #ifdef CONFIG_AUDITSYSCALL
-static inline void xfrm_audit_common_stateinfo(struct xfrm_state *x,
-					       struct audit_buffer *audit_buf)
+static inline void xfrm_audit_helper_sainfo(struct xfrm_state *x,
+					    struct audit_buffer *audit_buf)
 {
 	struct xfrm_sec_ctx *ctx = x->security;
 	u32 spi = ntohl(x->id.spi);
@@ -2024,18 +2036,45 @@ static inline void xfrm_audit_common_stateinfo(struct xfrm_state *x,
 	audit_log_format(audit_buf, " spi=%u(0x%x)", spi, spi);
 }
 
+static inline void xfrm_audit_helper_pktinfo(struct sk_buff *skb, u16 family,
+					     struct audit_buffer *audit_buf)
+{
+	struct iphdr *iph4;
+	struct ipv6hdr *iph6;
+
+	switch (family) {
+	case AF_INET:
+		iph4 = ip_hdr(skb);
+		audit_log_format(audit_buf,
+				 " src=" NIPQUAD_FMT " dst=" NIPQUAD_FMT,
+				 NIPQUAD(iph4->saddr),
+				 NIPQUAD(iph4->daddr));
+		break;
+	case AF_INET6:
+		iph6 = ipv6_hdr(skb);
+		audit_log_format(audit_buf,
+				 " src=" NIP6_FMT " dst=" NIP6_FMT
+				 " flowlbl=0x%x%x%x",
+				 NIP6(iph6->saddr),
+				 NIP6(iph6->daddr),
+				 iph6->flow_lbl[0] & 0x0f,
+				 iph6->flow_lbl[1],
+				 iph6->flow_lbl[2]);
+		break;
+	}
+}
+
 void xfrm_audit_state_add(struct xfrm_state *x, int result,
 			  u32 auid, u32 secid)
 {
 	struct audit_buffer *audit_buf;
 
-	if (audit_enabled == 0)
-		return;
-	audit_buf = xfrm_audit_start(auid, secid);
+	audit_buf = xfrm_audit_start("SAD-add");
 	if (audit_buf == NULL)
 		return;
-	audit_log_format(audit_buf, " op=SAD-add res=%u", result);
-	xfrm_audit_common_stateinfo(x, audit_buf);
+	xfrm_audit_helper_usrinfo(auid, secid, audit_buf);
+	xfrm_audit_helper_sainfo(x, audit_buf);
+	audit_log_format(audit_buf, " res=%u", result);
 	audit_log_end(audit_buf);
 }
 EXPORT_SYMBOL_GPL(xfrm_audit_state_add);
@@ -2045,14 +2084,96 @@ void xfrm_audit_state_delete(struct xfrm_state *x, int result,
 {
 	struct audit_buffer *audit_buf;
 
-	if (audit_enabled == 0)
-		return;
-	audit_buf = xfrm_audit_start(auid, secid);
+	audit_buf = xfrm_audit_start("SAD-delete");
 	if (audit_buf == NULL)
 		return;
-	audit_log_format(audit_buf, " op=SAD-delete res=%u", result);
-	xfrm_audit_common_stateinfo(x, audit_buf);
+	xfrm_audit_helper_usrinfo(auid, secid, audit_buf);
+	xfrm_audit_helper_sainfo(x, audit_buf);
+	audit_log_format(audit_buf, " res=%u", result);
 	audit_log_end(audit_buf);
 }
 EXPORT_SYMBOL_GPL(xfrm_audit_state_delete);
+
+void xfrm_audit_state_replay_overflow(struct xfrm_state *x,
+				      struct sk_buff *skb)
+{
+	struct audit_buffer *audit_buf;
+	u32 spi;
+
+	audit_buf = xfrm_audit_start("SA-replay-overflow");
+	if (audit_buf == NULL)
+		return;
+	xfrm_audit_helper_pktinfo(skb, x->props.family, audit_buf);
+	/* don't record the sequence number because it's inherent in this kind
+	 * of audit message */
+	spi = ntohl(x->id.spi);
+	audit_log_format(audit_buf, " spi=%u(0x%x)", spi, spi);
+	audit_log_end(audit_buf);
+}
+EXPORT_SYMBOL_GPL(xfrm_audit_state_replay_overflow);
+
+static void xfrm_audit_state_replay(struct xfrm_state *x,
+			     struct sk_buff *skb, __be32 net_seq)
+{
+	struct audit_buffer *audit_buf;
+	u32 spi;
+
+	audit_buf = xfrm_audit_start("SA-replayed-pkt");
+	if (audit_buf == NULL)
+		return;
+	xfrm_audit_helper_pktinfo(skb, x->props.family, audit_buf);
+	spi = ntohl(x->id.spi);
+	audit_log_format(audit_buf, " spi=%u(0x%x) seqno=%u",
+			 spi, spi, ntohl(net_seq));
+	audit_log_end(audit_buf);
+}
+
+void xfrm_audit_state_notfound_simple(struct sk_buff *skb, u16 family)
+{
+	struct audit_buffer *audit_buf;
+
+	audit_buf = xfrm_audit_start("SA-notfound");
+	if (audit_buf == NULL)
+		return;
+	xfrm_audit_helper_pktinfo(skb, family, audit_buf);
+	audit_log_end(audit_buf);
+}
+EXPORT_SYMBOL_GPL(xfrm_audit_state_notfound_simple);
+
+void xfrm_audit_state_notfound(struct sk_buff *skb, u16 family,
+			       __be32 net_spi, __be32 net_seq)
+{
+	struct audit_buffer *audit_buf;
+	u32 spi;
+
+	audit_buf = xfrm_audit_start("SA-notfound");
+	if (audit_buf == NULL)
+		return;
+	xfrm_audit_helper_pktinfo(skb, family, audit_buf);
+	spi = ntohl(net_spi);
+	audit_log_format(audit_buf, " spi=%u(0x%x) seqno=%u",
+			 spi, spi, ntohl(net_seq));
+	audit_log_end(audit_buf);
+}
+EXPORT_SYMBOL_GPL(xfrm_audit_state_notfound);
+
+void xfrm_audit_state_icvfail(struct xfrm_state *x,
+			      struct sk_buff *skb, u8 proto)
+{
+	struct audit_buffer *audit_buf;
+	__be32 net_spi;
+	__be32 net_seq;
+
+	audit_buf = xfrm_audit_start("SA-icv-failure");
+	if (audit_buf == NULL)
+		return;
+	xfrm_audit_helper_pktinfo(skb, x->props.family, audit_buf);
+	if (xfrm_parse_spi(skb, proto, &net_spi, &net_seq) == 0) {
+		u32 spi = ntohl(net_spi);
+		audit_log_format(audit_buf, " spi=%u(0x%x) seqno=%u",
+				 spi, spi, ntohl(net_seq));
+	}
+	audit_log_end(audit_buf);
+}
+EXPORT_SYMBOL_GPL(xfrm_audit_state_icvfail);
 #endif /* CONFIG_AUDITSYSCALL */
-- 
cgit v1.2.3


From e1af9f270b69a3ad1dcbabb404dd1f40a96f43f5 Mon Sep 17 00:00:00 2001
From: Paul Moore <paul.moore@hp.com>
Date: Fri, 21 Dec 2007 14:59:08 -0800
Subject: [XFRM]: Drop packets when replay counter would overflow

According to RFC4303, section 3.3.3 we need to drop outgoing packets which
cause the replay counter to overflow:

   3.3.3.  Sequence Number Generation

   The sender's counter is initialized to 0 when an SA is established.
   The sender increments the sequence number (or ESN) counter for this
   SA and inserts the low-order 32 bits of the value into the Sequence
   Number field.  Thus, the first packet sent using a given SA will
   contain a sequence number of 1.

   If anti-replay is enabled (the default), the sender checks to ensure
   that the counter has not cycled before inserting the new value in the
   Sequence Number field.  In other words, the sender MUST NOT send a
   packet on an SA if doing so would cause the sequence number to cycle.
   An attempt to transmit a packet that would result in sequence number
   overflow is an auditable event.  The audit log entry for this event
   SHOULD include the SPI value, current date/time, Source Address,
   Destination Address, and (in IPv6) the cleartext Flow ID.

Signed-off-by: Paul Moore <paul.moore@hp.com>
Acked-by: James Morris <jmorris@namei.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/xfrm/xfrm_output.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/xfrm/xfrm_output.c b/net/xfrm/xfrm_output.c
index 09514449fe8..d73003cb2c0 100644
--- a/net/xfrm/xfrm_output.c
+++ b/net/xfrm/xfrm_output.c
@@ -57,8 +57,11 @@ static int xfrm_output_one(struct sk_buff *skb, int err)
 
 		if (x->type->flags & XFRM_TYPE_REPLAY_PROT) {
 			XFRM_SKB_CB(skb)->seq = ++x->replay.oseq;
-			if (unlikely(x->replay.oseq == 0))
+			if (unlikely(x->replay.oseq == 0)) {
+				x->replay.oseq--;
 				xfrm_audit_state_replay_overflow(x, skb);
+				goto error;
+			}
 			if (xfrm_aevent_is_on())
 				xfrm_replay_notify(x, XFRM_REPLAY_UPDATE);
 		}
-- 
cgit v1.2.3


From 426b5303eb435d98b9bee37a807be386bc2b3320 Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Thu, 24 Jan 2008 00:13:18 -0800
Subject: [NETNS]: Modify the neighbour table code so it handles multiple
 network namespaces

I'm actually surprised at how much was involved.  At first glance it
appears that the neighbour table data structures are already split by
network device so all that should be needed is to modify the user
interface commands to filter the set of neighbours by the network
namespace of their devices.

However a couple things turned up while I was reading through the
code.  The proxy neighbour table allows entries with no network
device, and the neighbour parms are per network device (except for the
defaults) so they now need a per network namespace default.

So I updated the two structures (which surprised me) with their very
own network namespace parameter.  Updated the relevant lookup and
destroy routines with a network namespace parameter and modified the
code that interacts with users to filter out neighbour table entries
for devices of other namespaces.

I'm a little concerned that we can modify and display the global table
configuration and from all network namespaces.  But this appears good
enough for now.

I keep thinking modifying the neighbour table to have per network
namespace instances of each table type would should be cleaner.  The
hash table is already dynamically sized so there are it is not a
limiter.  The default parameter would be straight forward to take care
of.  However when I look at the how the network table is built and
used I still find some assumptions that there is only a single
neighbour table for each type of table in the kernel.  The netlink
operations, neigh_seq_start, the non-core network users that call
neigh_lookup.  So while it might be doable it would require more
refactoring than my current approach of just doing a little extra
filtering in the code.

Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
Signed-off-by: Daniel Lezcano <dlezcano@fr.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/atm/clip.c        |  15 ++++++-
 net/core/neighbour.c  | 116 +++++++++++++++++++++++++++++++-------------------
 net/decnet/dn_neigh.c |   6 +--
 net/decnet/dn_route.c |   2 +-
 net/ipv4/arp.c        |  12 +++---
 net/ipv6/ip6_output.c |   2 +-
 net/ipv6/ndisc.c      |   4 +-
 7 files changed, 99 insertions(+), 58 deletions(-)

(limited to 'net')

diff --git a/net/atm/clip.c b/net/atm/clip.c
index 741742f0079..47fbdc0c5f7 100644
--- a/net/atm/clip.c
+++ b/net/atm/clip.c
@@ -949,6 +949,11 @@ static int arp_seq_open(struct inode *inode, struct file *file)
 
 	seq = file->private_data;
 	seq->private = state;
+	state->ns.net = get_proc_net(inode);
+	if (!state->ns.net) {
+		seq_release_private(inode, file);
+		rc = -ENXIO;
+	}
 out:
 	return rc;
 
@@ -957,11 +962,19 @@ out_kfree:
 	goto out;
 }
 
+static int arp_seq_release(struct inode *inode, struct file *file)
+{
+	struct seq_file *seq = file->private_data;
+	struct clip_seq_state *state = seq->private;
+	put_net(state->ns.net);
+	return seq_release_private(inode, file);
+}
+
 static const struct file_operations arp_seq_fops = {
 	.open		= arp_seq_open,
 	.read		= seq_read,
 	.llseek		= seq_lseek,
-	.release	= seq_release_private,
+	.release	= arp_seq_release,
 	.owner		= THIS_MODULE
 };
 #endif
diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index 9a283fcde9a..bd899d55773 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -375,7 +375,8 @@ struct neighbour *neigh_lookup(struct neigh_table *tbl, const void *pkey,
 	return n;
 }
 
-struct neighbour *neigh_lookup_nodev(struct neigh_table *tbl, const void *pkey)
+struct neighbour *neigh_lookup_nodev(struct neigh_table *tbl, struct net *net,
+				     const void *pkey)
 {
 	struct neighbour *n;
 	int key_len = tbl->key_len;
@@ -385,7 +386,8 @@ struct neighbour *neigh_lookup_nodev(struct neigh_table *tbl, const void *pkey)
 
 	read_lock_bh(&tbl->lock);
 	for (n = tbl->hash_buckets[hash_val & tbl->hash_mask]; n; n = n->next) {
-		if (!memcmp(n->primary_key, pkey, key_len)) {
+		if (!memcmp(n->primary_key, pkey, key_len) &&
+		    (net == n->dev->nd_net)) {
 			neigh_hold(n);
 			NEIGH_CACHE_STAT_INC(tbl, hits);
 			break;
@@ -463,7 +465,8 @@ out_neigh_release:
 	goto out;
 }
 
-struct pneigh_entry * pneigh_lookup(struct neigh_table *tbl, const void *pkey,
+struct pneigh_entry * pneigh_lookup(struct neigh_table *tbl,
+				    struct net *net, const void *pkey,
 				    struct net_device *dev, int creat)
 {
 	struct pneigh_entry *n;
@@ -479,6 +482,7 @@ struct pneigh_entry * pneigh_lookup(struct neigh_table *tbl, const void *pkey,
 
 	for (n = tbl->phash_buckets[hash_val]; n; n = n->next) {
 		if (!memcmp(n->key, pkey, key_len) &&
+		    (n->net == net) &&
 		    (n->dev == dev || !n->dev)) {
 			read_unlock_bh(&tbl->lock);
 			goto out;
@@ -495,6 +499,7 @@ struct pneigh_entry * pneigh_lookup(struct neigh_table *tbl, const void *pkey,
 	if (!n)
 		goto out;
 
+	n->net = hold_net(net);
 	memcpy(n->key, pkey, key_len);
 	n->dev = dev;
 	if (dev)
@@ -517,7 +522,7 @@ out:
 }
 
 
-int pneigh_delete(struct neigh_table *tbl, const void *pkey,
+int pneigh_delete(struct neigh_table *tbl, struct net *net, const void *pkey,
 		  struct net_device *dev)
 {
 	struct pneigh_entry *n, **np;
@@ -532,13 +537,15 @@ int pneigh_delete(struct neigh_table *tbl, const void *pkey,
 	write_lock_bh(&tbl->lock);
 	for (np = &tbl->phash_buckets[hash_val]; (n = *np) != NULL;
 	     np = &n->next) {
-		if (!memcmp(n->key, pkey, key_len) && n->dev == dev) {
+		if (!memcmp(n->key, pkey, key_len) && n->dev == dev &&
+		    (n->net == net)) {
 			*np = n->next;
 			write_unlock_bh(&tbl->lock);
 			if (tbl->pdestructor)
 				tbl->pdestructor(n);
 			if (n->dev)
 				dev_put(n->dev);
+			release_net(n->net);
 			kfree(n);
 			return 0;
 		}
@@ -561,6 +568,7 @@ static int pneigh_ifdown(struct neigh_table *tbl, struct net_device *dev)
 					tbl->pdestructor(n);
 				if (n->dev)
 					dev_put(n->dev);
+				release_net(n->net);
 				kfree(n);
 				continue;
 			}
@@ -1261,12 +1269,37 @@ void pneigh_enqueue(struct neigh_table *tbl, struct neigh_parms *p,
 	spin_unlock(&tbl->proxy_queue.lock);
 }
 
+static inline struct neigh_parms *lookup_neigh_params(struct neigh_table *tbl,
+						      struct net *net, int ifindex)
+{
+	struct neigh_parms *p;
+
+	for (p = &tbl->parms; p; p = p->next) {
+		if (p->net != net)
+			continue;
+		if ((p->dev && p->dev->ifindex == ifindex) ||
+		    (!p->dev && !ifindex))
+			return p;
+	}
+
+	return NULL;
+}
 
 struct neigh_parms *neigh_parms_alloc(struct net_device *dev,
 				      struct neigh_table *tbl)
 {
-	struct neigh_parms *p = kmemdup(&tbl->parms, sizeof(*p), GFP_KERNEL);
+	struct neigh_parms *p, *ref;
+	struct net *net;
+
+	net = &init_net;
+	if (dev)
+		net = dev->nd_net;
+
+	ref = lookup_neigh_params(tbl, net, 0);
+	if (!ref)
+		return NULL;
 
+	p = kmemdup(ref, sizeof(*p), GFP_KERNEL);
 	if (p) {
 		p->tbl		  = tbl;
 		atomic_set(&p->refcnt, 1);
@@ -1282,6 +1315,7 @@ struct neigh_parms *neigh_parms_alloc(struct net_device *dev,
 			dev_hold(dev);
 			p->dev = dev;
 		}
+		p->net = hold_net(net);
 		p->sysctl_table = NULL;
 		write_lock_bh(&tbl->lock);
 		p->next		= tbl->parms.next;
@@ -1323,6 +1357,7 @@ void neigh_parms_release(struct neigh_table *tbl, struct neigh_parms *parms)
 
 void neigh_parms_destroy(struct neigh_parms *parms)
 {
+	release_net(parms->net);
 	kfree(parms);
 }
 
@@ -1333,6 +1368,7 @@ void neigh_table_init_no_netlink(struct neigh_table *tbl)
 	unsigned long now = jiffies;
 	unsigned long phsize;
 
+	tbl->parms.net = &init_net;
 	atomic_set(&tbl->parms.refcnt, 1);
 	INIT_RCU_HEAD(&tbl->parms.rcu_head);
 	tbl->parms.reachable_time =
@@ -1446,9 +1482,6 @@ static int neigh_delete(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
 	struct net_device *dev = NULL;
 	int err = -EINVAL;
 
-	if (net != &init_net)
-		return -EINVAL;
-
 	if (nlmsg_len(nlh) < sizeof(*ndm))
 		goto out;
 
@@ -1477,7 +1510,7 @@ static int neigh_delete(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
 			goto out_dev_put;
 
 		if (ndm->ndm_flags & NTF_PROXY) {
-			err = pneigh_delete(tbl, nla_data(dst_attr), dev);
+			err = pneigh_delete(tbl, net, nla_data(dst_attr), dev);
 			goto out_dev_put;
 		}
 
@@ -1515,9 +1548,6 @@ static int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
 	struct net_device *dev = NULL;
 	int err;
 
-	if (net != &init_net)
-		return -EINVAL;
-
 	err = nlmsg_parse(nlh, sizeof(*ndm), tb, NDA_MAX, NULL);
 	if (err < 0)
 		goto out;
@@ -1557,7 +1587,7 @@ static int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
 			struct pneigh_entry *pn;
 
 			err = -ENOBUFS;
-			pn = pneigh_lookup(tbl, dst, dev, 1);
+			pn = pneigh_lookup(tbl, net, dst, dev, 1);
 			if (pn) {
 				pn->flags = ndm->ndm_flags;
 				err = 0;
@@ -1752,19 +1782,6 @@ errout:
 	return -EMSGSIZE;
 }
 
-static inline struct neigh_parms *lookup_neigh_params(struct neigh_table *tbl,
-						      int ifindex)
-{
-	struct neigh_parms *p;
-
-	for (p = &tbl->parms; p; p = p->next)
-		if ((p->dev && p->dev->ifindex == ifindex) ||
-		    (!p->dev && !ifindex))
-			return p;
-
-	return NULL;
-}
-
 static const struct nla_policy nl_neightbl_policy[NDTA_MAX+1] = {
 	[NDTA_NAME]		= { .type = NLA_STRING },
 	[NDTA_THRESH1]		= { .type = NLA_U32 },
@@ -1798,9 +1815,6 @@ static int neightbl_set(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
 	struct nlattr *tb[NDTA_MAX+1];
 	int err;
 
-	if (net != &init_net)
-		return -EINVAL;
-
 	err = nlmsg_parse(nlh, sizeof(*ndtmsg), tb, NDTA_MAX,
 			  nl_neightbl_policy);
 	if (err < 0)
@@ -1845,7 +1859,7 @@ static int neightbl_set(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
 		if (tbp[NDTPA_IFINDEX])
 			ifindex = nla_get_u32(tbp[NDTPA_IFINDEX]);
 
-		p = lookup_neigh_params(tbl, ifindex);
+		p = lookup_neigh_params(tbl, net, ifindex);
 		if (p == NULL) {
 			err = -ENOENT;
 			goto errout_tbl_lock;
@@ -1926,9 +1940,6 @@ static int neightbl_dump_info(struct sk_buff *skb, struct netlink_callback *cb)
 	int neigh_skip = cb->args[1];
 	struct neigh_table *tbl;
 
-	if (net != &init_net)
-		return 0;
-
 	family = ((struct rtgenmsg *) nlmsg_data(cb->nlh))->rtgen_family;
 
 	read_lock(&neigh_tbl_lock);
@@ -1943,8 +1954,11 @@ static int neightbl_dump_info(struct sk_buff *skb, struct netlink_callback *cb)
 				       NLM_F_MULTI) <= 0)
 			break;
 
-		for (nidx = 0, p = tbl->parms.next; p; p = p->next, nidx++) {
-			if (nidx < neigh_skip)
+		for (nidx = 0, p = tbl->parms.next; p; p = p->next) {
+			if (net != p->net)
+				continue;
+
+			if (nidx++ < neigh_skip)
 				continue;
 
 			if (neightbl_fill_param_info(skb, tbl, p,
@@ -2020,6 +2034,7 @@ static void neigh_update_notify(struct neighbour *neigh)
 static int neigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb,
 			    struct netlink_callback *cb)
 {
+	struct net * net = skb->sk->sk_net;
 	struct neighbour *n;
 	int rc, h, s_h = cb->args[1];
 	int idx, s_idx = idx = cb->args[2];
@@ -2030,8 +2045,12 @@ static int neigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb,
 			continue;
 		if (h > s_h)
 			s_idx = 0;
-		for (n = tbl->hash_buckets[h], idx = 0; n; n = n->next, idx++) {
-			if (idx < s_idx)
+		for (n = tbl->hash_buckets[h], idx = 0; n; n = n->next) {
+			int lidx;
+			if (n->dev->nd_net != net)
+				continue;
+			lidx = idx++;
+			if (lidx < s_idx)
 				continue;
 			if (neigh_fill_info(skb, n, NETLINK_CB(cb->skb).pid,
 					    cb->nlh->nlmsg_seq,
@@ -2053,13 +2072,9 @@ out:
 
 static int neigh_dump_info(struct sk_buff *skb, struct netlink_callback *cb)
 {
-	struct net *net = skb->sk->sk_net;
 	struct neigh_table *tbl;
 	int t, family, s_t;
 
-	if (net != &init_net)
-		return 0;
-
 	read_lock(&neigh_tbl_lock);
 	family = ((struct rtgenmsg *) nlmsg_data(cb->nlh))->rtgen_family;
 	s_t = cb->args[0];
@@ -2127,6 +2142,7 @@ EXPORT_SYMBOL(__neigh_for_each_release);
 static struct neighbour *neigh_get_first(struct seq_file *seq)
 {
 	struct neigh_seq_state *state = seq->private;
+	struct net *net = state->net;
 	struct neigh_table *tbl = state->tbl;
 	struct neighbour *n = NULL;
 	int bucket = state->bucket;
@@ -2136,6 +2152,8 @@ static struct neighbour *neigh_get_first(struct seq_file *seq)
 		n = tbl->hash_buckets[bucket];
 
 		while (n) {
+			if (n->dev->nd_net != net)
+				goto next;
 			if (state->neigh_sub_iter) {
 				loff_t fakep = 0;
 				void *v;
@@ -2165,6 +2183,7 @@ static struct neighbour *neigh_get_next(struct seq_file *seq,
 					loff_t *pos)
 {
 	struct neigh_seq_state *state = seq->private;
+	struct net *net = state->net;
 	struct neigh_table *tbl = state->tbl;
 
 	if (state->neigh_sub_iter) {
@@ -2176,6 +2195,8 @@ static struct neighbour *neigh_get_next(struct seq_file *seq,
 
 	while (1) {
 		while (n) {
+			if (n->dev->nd_net != net)
+				goto next;
 			if (state->neigh_sub_iter) {
 				void *v = state->neigh_sub_iter(state, n, pos);
 				if (v)
@@ -2222,6 +2243,7 @@ static struct neighbour *neigh_get_idx(struct seq_file *seq, loff_t *pos)
 static struct pneigh_entry *pneigh_get_first(struct seq_file *seq)
 {
 	struct neigh_seq_state *state = seq->private;
+	struct net * net = state->net;
 	struct neigh_table *tbl = state->tbl;
 	struct pneigh_entry *pn = NULL;
 	int bucket = state->bucket;
@@ -2229,6 +2251,8 @@ static struct pneigh_entry *pneigh_get_first(struct seq_file *seq)
 	state->flags |= NEIGH_SEQ_IS_PNEIGH;
 	for (bucket = 0; bucket <= PNEIGH_HASHMASK; bucket++) {
 		pn = tbl->phash_buckets[bucket];
+		while (pn && (pn->net != net))
+			pn = pn->next;
 		if (pn)
 			break;
 	}
@@ -2242,6 +2266,7 @@ static struct pneigh_entry *pneigh_get_next(struct seq_file *seq,
 					    loff_t *pos)
 {
 	struct neigh_seq_state *state = seq->private;
+	struct net * net = state->net;
 	struct neigh_table *tbl = state->tbl;
 
 	pn = pn->next;
@@ -2249,6 +2274,8 @@ static struct pneigh_entry *pneigh_get_next(struct seq_file *seq,
 		if (++state->bucket > PNEIGH_HASHMASK)
 			break;
 		pn = tbl->phash_buckets[state->bucket];
+		while (pn && (pn->net != net))
+			pn = pn->next;
 		if (pn)
 			break;
 	}
@@ -2450,6 +2477,7 @@ static inline size_t neigh_nlmsg_size(void)
 
 static void __neigh_notify(struct neighbour *n, int type, int flags)
 {
+	struct net *net = n->dev->nd_net;
 	struct sk_buff *skb;
 	int err = -ENOBUFS;
 
@@ -2464,10 +2492,10 @@ static void __neigh_notify(struct neighbour *n, int type, int flags)
 		kfree_skb(skb);
 		goto errout;
 	}
-	err = rtnl_notify(skb, &init_net, 0, RTNLGRP_NEIGH, NULL, GFP_ATOMIC);
+	err = rtnl_notify(skb, net, 0, RTNLGRP_NEIGH, NULL, GFP_ATOMIC);
 errout:
 	if (err < 0)
-		rtnl_set_sk_err(&init_net, RTNLGRP_NEIGH, err);
+		rtnl_set_sk_err(net, RTNLGRP_NEIGH, err);
 }
 
 #ifdef CONFIG_ARPD
diff --git a/net/decnet/dn_neigh.c b/net/decnet/dn_neigh.c
index e851b143cca..1ca13b17974 100644
--- a/net/decnet/dn_neigh.c
+++ b/net/decnet/dn_neigh.c
@@ -580,8 +580,8 @@ static const struct seq_operations dn_neigh_seq_ops = {
 
 static int dn_neigh_seq_open(struct inode *inode, struct file *file)
 {
-	return seq_open_private(file, &dn_neigh_seq_ops,
-			sizeof(struct neigh_seq_state));
+	return seq_open_net(inode, file, &dn_neigh_seq_ops,
+			    sizeof(struct neigh_seq_state));
 }
 
 static const struct file_operations dn_neigh_seq_fops = {
@@ -589,7 +589,7 @@ static const struct file_operations dn_neigh_seq_fops = {
 	.open		= dn_neigh_seq_open,
 	.read		= seq_read,
 	.llseek		= seq_lseek,
-	.release	= seq_release_private,
+	.release	= seq_release_net,
 };
 
 #endif
diff --git a/net/decnet/dn_route.c b/net/decnet/dn_route.c
index 1ae5efcdbd5..938ba7da217 100644
--- a/net/decnet/dn_route.c
+++ b/net/decnet/dn_route.c
@@ -984,7 +984,7 @@ source_ok:
 		 * here
 		 */
 		if (!try_hard) {
-			neigh = neigh_lookup_nodev(&dn_neigh_table, &fl.fld_dst);
+			neigh = neigh_lookup_nodev(&dn_neigh_table, &init_net, &fl.fld_dst);
 			if (neigh) {
 				if ((oldflp->oif &&
 				    (neigh->dev->ifindex != oldflp->oif)) ||
diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c
index fdf12d1c350..9eb6d3ab297 100644
--- a/net/ipv4/arp.c
+++ b/net/ipv4/arp.c
@@ -837,7 +837,7 @@ static int arp_process(struct sk_buff *skb)
 		} else if (IN_DEV_FORWARD(in_dev)) {
 			if ((rt->rt_flags&RTCF_DNAT) ||
 			    (addr_type == RTN_UNICAST  && rt->u.dst.dev != dev &&
-			     (arp_fwd_proxy(in_dev, rt) || pneigh_lookup(&arp_tbl, &tip, dev, 0)))) {
+			     (arp_fwd_proxy(in_dev, rt) || pneigh_lookup(&arp_tbl, &init_net, &tip, dev, 0)))) {
 				n = neigh_event_ns(&arp_tbl, sha, &sip, dev);
 				if (n)
 					neigh_release(n);
@@ -980,7 +980,7 @@ static int arp_req_set_public(struct net *net, struct arpreq *r,
 			return -ENODEV;
 	}
 	if (mask) {
-		if (pneigh_lookup(&arp_tbl, &ip, dev, 1) == NULL)
+		if (pneigh_lookup(&arp_tbl, &init_net, &ip, dev, 1) == NULL)
 			return -ENOBUFS;
 		return 0;
 	}
@@ -1089,7 +1089,7 @@ static int arp_req_delete_public(struct net *net, struct arpreq *r,
 	__be32 mask = ((struct sockaddr_in *)&r->arp_netmask)->sin_addr.s_addr;
 
 	if (mask == htonl(0xFFFFFFFF))
-		return pneigh_delete(&arp_tbl, &ip, dev);
+		return pneigh_delete(&arp_tbl, &init_net, &ip, dev);
 
 	if (mask)
 		return -EINVAL;
@@ -1375,8 +1375,8 @@ static const struct seq_operations arp_seq_ops = {
 
 static int arp_seq_open(struct inode *inode, struct file *file)
 {
-	return seq_open_private(file, &arp_seq_ops,
-			sizeof(struct neigh_seq_state));
+	return seq_open_net(inode, file, &arp_seq_ops,
+			    sizeof(struct neigh_seq_state));
 }
 
 static const struct file_operations arp_seq_fops = {
@@ -1384,7 +1384,7 @@ static const struct file_operations arp_seq_fops = {
 	.open           = arp_seq_open,
 	.read           = seq_read,
 	.llseek         = seq_lseek,
-	.release	= seq_release_private,
+	.release	= seq_release_net,
 };
 
 static int __init arp_proc_init(void)
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index 4686646058d..ba7c8aaf278 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -449,7 +449,7 @@ int ip6_forward(struct sk_buff *skb)
 
 	/* XXX: idev->cnf.proxy_ndp? */
 	if (ipv6_devconf.proxy_ndp &&
-	    pneigh_lookup(&nd_tbl, &hdr->daddr, skb->dev, 0)) {
+	    pneigh_lookup(&nd_tbl, &init_net, &hdr->daddr, skb->dev, 0)) {
 		int proxied = ip6_forward_proxy_check(skb);
 		if (proxied > 0)
 			return ip6_input(skb);
diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c
index b87f9d245e2..b66a1f81bd8 100644
--- a/net/ipv6/ndisc.c
+++ b/net/ipv6/ndisc.c
@@ -789,7 +789,7 @@ static void ndisc_recv_ns(struct sk_buff *skb)
 		if (ipv6_chk_acast_addr(dev, &msg->target) ||
 		    (idev->cnf.forwarding &&
 		     (ipv6_devconf.proxy_ndp || idev->cnf.proxy_ndp) &&
-		     (pneigh = pneigh_lookup(&nd_tbl,
+		     (pneigh = pneigh_lookup(&nd_tbl, &init_net,
 					     &msg->target, dev, 0)) != NULL)) {
 			if (!(NEIGH_CB(skb)->flags & LOCALLY_ENQUEUED) &&
 			    skb->pkt_type != PACKET_HOST &&
@@ -930,7 +930,7 @@ static void ndisc_recv_na(struct sk_buff *skb)
 		 */
 		if (lladdr && !memcmp(lladdr, dev->dev_addr, dev->addr_len) &&
 		    ipv6_devconf.forwarding && ipv6_devconf.proxy_ndp &&
-		    pneigh_lookup(&nd_tbl, &msg->target, dev, 0)) {
+		    pneigh_lookup(&nd_tbl, &init_net, &msg->target, dev, 0)) {
 			/* XXX: idev->cnf.prixy_ndp */
 			goto out;
 		}
-- 
cgit v1.2.3


From b15c4bcd15741b31019379298edfca28dc78029d Mon Sep 17 00:00:00 2001
From: Masahide NAKAMURA <nakam@linux-ipv6.org>
Date: Mon, 24 Dec 2007 16:00:09 -0800
Subject: [XFRM]: Fix outbound statistics.

Signed-off-by: Masahide NAKAMURA <nakam@linux-ipv6.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/xfrm/xfrm_output.c | 12 +++++++++---
 1 file changed, 9 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/xfrm/xfrm_output.c b/net/xfrm/xfrm_output.c
index d73003cb2c0..c1ba63efd7c 100644
--- a/net/xfrm/xfrm_output.c
+++ b/net/xfrm/xfrm_output.c
@@ -43,17 +43,23 @@ static int xfrm_output_one(struct sk_buff *skb, int err)
 
 	do {
 		err = xfrm_state_check_space(x, skb);
-		if (err)
+		if (err) {
+			XFRM_INC_STATS(LINUX_MIB_XFRMOUTERROR);
 			goto error_nolock;
+		}
 
 		err = x->outer_mode->output(x, skb);
-		if (err)
+		if (err) {
+			XFRM_INC_STATS(LINUX_MIB_XFRMOUTSTATEMODEERROR);
 			goto error_nolock;
+		}
 
 		spin_lock_bh(&x->lock);
 		err = xfrm_state_check_expire(x);
-		if (err)
+		if (err) {
+			XFRM_INC_STATS(LINUX_MIB_XFRMOUTSTATEEXPIRED);
 			goto error;
+		}
 
 		if (x->type->flags & XFRM_TYPE_REPLAY_PROT) {
 			XFRM_SKB_CB(skb)->seq = ++x->replay.oseq;
-- 
cgit v1.2.3


From 21371f768bf7127ee45bfaadd17899df6a439e8f Mon Sep 17 00:00:00 2001
From: Eric Dumazet <dada1@cosmosbay.com>
Date: Mon, 24 Dec 2007 20:57:56 -0800
Subject: [SOCK] Avoid divides in sk_stream_pages() and
 __sk_stream_mem_reclaim()

sk_forward_alloc being signed, we should take care of divides by
SK_STREAM_MEM_QUANTUM we do in sk_stream_pages() and
__sk_stream_mem_reclaim()

This patchs introduces SK_STREAM_MEM_QUANTUM_SHIFT, defined
as ilog2(SK_STREAM_MEM_QUANTUM), to be able to use right
shifts instead of plain divides.

This should help compiler to choose right shifts instead of
expensive divides (as seen with CONFIG_CC_OPTIMIZE_FOR_SIZE=y on x86)

Signed-off-by: Eric Dumazet <dada1@cosmosbay.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/stream.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/core/stream.c b/net/core/stream.c
index 5586879bb9b..bf188ffdbdb 100644
--- a/net/core/stream.c
+++ b/net/core/stream.c
@@ -196,7 +196,7 @@ EXPORT_SYMBOL(sk_stream_error);
 
 void __sk_stream_mem_reclaim(struct sock *sk)
 {
-	atomic_sub(sk->sk_forward_alloc / SK_STREAM_MEM_QUANTUM,
+	atomic_sub(sk->sk_forward_alloc >> SK_STREAM_MEM_QUANTUM_SHIFT,
 		   sk->sk_prot->memory_allocated);
 	sk->sk_forward_alloc &= SK_STREAM_MEM_QUANTUM - 1;
 	if (*sk->sk_prot->memory_pressure &&
-- 
cgit v1.2.3


From 7ffc49a6ee92b7138c2ee28073a8e10e58335d62 Mon Sep 17 00:00:00 2001
From: Michael Chan <mchan@broadcom.com>
Date: Mon, 24 Dec 2007 21:28:09 -0800
Subject: [ETH]: Combine format_addr() with print_mac().

print_mac() used many most net drivers and format_addr() used by
net-sysfs.c are very similar and they can be intergrated.

format_addr() is also identically redefined in the qla4xxx iscsi
driver.

Export a new function sysfs_format_mac() to be used by net-sysfs,
qla4xxx and others in the future.  Both print_mac() and
sysfs_format_mac() call _format_mac_addr() to do the formatting.

Changed print_mac() to use unsigned char * to be consistent with
net_device struct's dev_addr.  Added buffer length overrun checking
as suggested by Joe Perches.

Signed-off-by: Michael Chan <mchan@broadcom.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/net-sysfs.c | 15 ++-------------
 net/ethernet/eth.c   | 30 +++++++++++++++++++++++++++---
 2 files changed, 29 insertions(+), 16 deletions(-)

(limited to 'net')

diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index e41f4b9d2e7..7635d3f7272 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -95,17 +95,6 @@ NETDEVICE_SHOW(type, fmt_dec);
 NETDEVICE_SHOW(link_mode, fmt_dec);
 
 /* use same locking rules as GIFHWADDR ioctl's */
-static ssize_t format_addr(char *buf, const unsigned char *addr, int len)
-{
-	int i;
-	char *cp = buf;
-
-	for (i = 0; i < len; i++)
-		cp += sprintf(cp, "%02x%c", addr[i],
-			      i == (len - 1) ? '\n' : ':');
-	return cp - buf;
-}
-
 static ssize_t show_address(struct device *dev, struct device_attribute *attr,
 			    char *buf)
 {
@@ -114,7 +103,7 @@ static ssize_t show_address(struct device *dev, struct device_attribute *attr,
 
 	read_lock(&dev_base_lock);
 	if (dev_isalive(net))
-	    ret = format_addr(buf, net->dev_addr, net->addr_len);
+		ret = sysfs_format_mac(buf, net->dev_addr, net->addr_len);
 	read_unlock(&dev_base_lock);
 	return ret;
 }
@@ -124,7 +113,7 @@ static ssize_t show_broadcast(struct device *dev,
 {
 	struct net_device *net = to_net_dev(dev);
 	if (dev_isalive(net))
-		return format_addr(buf, net->broadcast, net->addr_len);
+		return sysfs_format_mac(buf, net->broadcast, net->addr_len);
 	return -EINVAL;
 }
 
diff --git a/net/ethernet/eth.c b/net/ethernet/eth.c
index 6b2e454ae31..a7b417523e9 100644
--- a/net/ethernet/eth.c
+++ b/net/ethernet/eth.c
@@ -359,10 +359,34 @@ struct net_device *alloc_etherdev_mq(int sizeof_priv, unsigned int queue_count)
 }
 EXPORT_SYMBOL(alloc_etherdev_mq);
 
-char *print_mac(char *buf, const u8 *addr)
+static size_t _format_mac_addr(char *buf, int buflen,
+				const unsigned char *addr, int len)
 {
-	sprintf(buf, MAC_FMT,
-		addr[0], addr[1], addr[2], addr[3], addr[4], addr[5]);
+	int i;
+	char *cp = buf;
+
+	for (i = 0; i < len; i++) {
+		cp += scnprintf(cp, buflen - (cp - buf), "%02x", addr[i]);
+		if (i == len - 1)
+			break;
+		cp += strlcpy(cp, ":", buflen - (cp - buf));
+	}
+	return cp - buf;
+}
+
+ssize_t sysfs_format_mac(char *buf, const unsigned char *addr, int len)
+{
+	size_t l;
+
+	l = _format_mac_addr(buf, PAGE_SIZE, addr, len);
+	l += strlcpy(buf + l, "\n", PAGE_SIZE - l);
+	return ((ssize_t) l);
+}
+EXPORT_SYMBOL(sysfs_format_mac);
+
+char *print_mac(char *buf, const unsigned char *addr)
+{
+	_format_mac_addr(buf, MAC_BUF_SIZE, addr, ETH_ALEN);
 	return buf;
 }
 EXPORT_SYMBOL(print_mac);
-- 
cgit v1.2.3


From 0e3a4803aa06cd7bc2cfc1d04289df4f6027640a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= <ilpo.jarvinen@helsinki.fi>
Date: Mon, 24 Dec 2007 21:33:45 -0800
Subject: [TCP]: Force TSO splits to MSS boundaries
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

If snd_wnd - snd_nxt wasn't multiple of MSS, skb was split on
odd boundary by the callers of tcp_window_allows.

We try really hard to avoid unnecessary modulos. Therefore the
old caller side check "if (skb->len < limit)" was too wide as
well because limit is not bound in any way to skb->len and can
cause spurious testing for trimming in the middle of the queue
while we only wanted that to happen at the tail of the queue.
A simple additional caller side check for tcp_write_queue_tail
would likely have resulted 2 x modulos because the limit would
have to be first calculated from window, however, doing that
unnecessary modulo is not mandatory. After a minor change to
the algorithm, simply determine first if the modulo is needed
at all and at that point immediately decide also from which
value it should be calculated from.

This approach also kills some duplicated code.

Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@helsinki.fi>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp_output.c | 51 +++++++++++++++++++++++++--------------------------
 1 file changed, 25 insertions(+), 26 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 9a9510acb14..9058e0a2510 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -1017,13 +1017,29 @@ static void tcp_cwnd_validate(struct sock *sk)
 	}
 }
 
-static unsigned int tcp_window_allows(struct tcp_sock *tp, struct sk_buff *skb, unsigned int mss_now, unsigned int cwnd)
+/* Returns the portion of skb which can be sent right away without
+ * introducing MSS oddities to segment boundaries. In rare cases where
+ * mss_now != mss_cache, we will request caller to create a small skb
+ * per input skb which could be mostly avoided here (if desired).
+ */
+static unsigned int tcp_mss_split_point(struct sock *sk, struct sk_buff *skb,
+					unsigned int mss_now,
+					unsigned int cwnd)
 {
-	u32 window, cwnd_len;
+	struct tcp_sock *tp = tcp_sk(sk);
+	u32 needed, window, cwnd_len;
 
 	window = (tp->snd_una + tp->snd_wnd - TCP_SKB_CB(skb)->seq);
 	cwnd_len = mss_now * cwnd;
-	return min(window, cwnd_len);
+
+	if (likely(cwnd_len <= window && skb != tcp_write_queue_tail(sk)))
+		return cwnd_len;
+
+	if (skb == tcp_write_queue_tail(sk) && cwnd_len <= skb->len)
+		return cwnd_len;
+
+	needed = min(skb->len, window);
+	return needed - needed % mss_now;
 }
 
 /* Can at least one segment of SKB be sent right now, according to the
@@ -1458,17 +1474,9 @@ static int tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle)
 		}
 
 		limit = mss_now;
-		if (tso_segs > 1) {
-			limit = tcp_window_allows(tp, skb,
-						  mss_now, cwnd_quota);
-
-			if (skb->len < limit) {
-				unsigned int trim = skb->len % mss_now;
-
-				if (trim)
-					limit = skb->len - trim;
-			}
-		}
+		if (tso_segs > 1)
+			limit = tcp_mss_split_point(sk, skb, mss_now,
+						    cwnd_quota);
 
 		if (skb->len > limit &&
 		    unlikely(tso_fragment(sk, skb, limit, mss_now)))
@@ -1515,7 +1523,6 @@ void __tcp_push_pending_frames(struct sock *sk, unsigned int cur_mss,
  */
 void tcp_push_one(struct sock *sk, unsigned int mss_now)
 {
-	struct tcp_sock *tp = tcp_sk(sk);
 	struct sk_buff *skb = tcp_send_head(sk);
 	unsigned int tso_segs, cwnd_quota;
 
@@ -1530,17 +1537,9 @@ void tcp_push_one(struct sock *sk, unsigned int mss_now)
 		BUG_ON(!tso_segs);
 
 		limit = mss_now;
-		if (tso_segs > 1) {
-			limit = tcp_window_allows(tp, skb,
-						  mss_now, cwnd_quota);
-
-			if (skb->len < limit) {
-				unsigned int trim = skb->len % mss_now;
-
-				if (trim)
-					limit = skb->len - trim;
-			}
-		}
+		if (tso_segs > 1)
+			limit = tcp_mss_split_point(sk, skb, mss_now,
+						    cwnd_quota);
 
 		if (skb->len > limit &&
 		    unlikely(tso_fragment(sk, skb, limit, mss_now)))
-- 
cgit v1.2.3


From c776ee01bd83d9ba58220293a6e76e9772be0e66 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= <ilpo.jarvinen@helsinki.fi>
Date: Mon, 24 Dec 2007 21:55:39 -0800
Subject: [TCP]: Remove seq_rtt ptr from clean_rtx_queue args
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

While checking Gavin's patch I noticed that the returned seq_rtt
is not used by the caller.

Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@helsinki.fi>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp_input.c | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 519bd24e0d3..efea9873208 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -2759,8 +2759,7 @@ static u32 tcp_tso_acked(struct sock *sk, struct sk_buff *skb)
  * is before the ack sequence we can discard it as it's confirmed to have
  * arrived at the other end.
  */
-static int tcp_clean_rtx_queue(struct sock *sk, s32 *seq_rtt_p,
-			       int prior_fackets)
+static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
 	const struct inet_connection_sock *icsk = inet_csk(sk);
@@ -2915,7 +2914,6 @@ static int tcp_clean_rtx_queue(struct sock *sk, s32 *seq_rtt_p,
 		}
 	}
 #endif
-	*seq_rtt_p = seq_rtt;
 	return flag;
 }
 
@@ -3152,7 +3150,6 @@ static int tcp_ack(struct sock *sk, struct sk_buff *skb, int flag)
 	u32 ack = TCP_SKB_CB(skb)->ack_seq;
 	u32 prior_in_flight;
 	u32 prior_fackets;
-	s32 seq_rtt;
 	int prior_packets;
 	int frto_cwnd = 0;
 
@@ -3218,7 +3215,7 @@ static int tcp_ack(struct sock *sk, struct sk_buff *skb, int flag)
 		goto no_queue;
 
 	/* See if we can take anything off of the retransmit queue. */
-	flag |= tcp_clean_rtx_queue(sk, &seq_rtt, prior_fackets);
+	flag |= tcp_clean_rtx_queue(sk, prior_fackets);
 
 	if (tp->frto_counter)
 		frto_cwnd = tcp_process_frto(sk, flag);
-- 
cgit v1.2.3


From fcb8c156c8277ee9d71c8c81587d494c9c5240b3 Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Sun, 30 Dec 2007 21:09:38 -0800
Subject: [IPSEC]: Fix double free on skb on async output

When the output transform returns EINPROGRESS due to async operation we'll
free the skb the straight away as if it were an error.  This patch fixes
that so that the skb is freed when the async operation completes.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/xfrm/xfrm_output.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'net')

diff --git a/net/xfrm/xfrm_output.c b/net/xfrm/xfrm_output.c
index c1ba63efd7c..81ad8eb2b28 100644
--- a/net/xfrm/xfrm_output.c
+++ b/net/xfrm/xfrm_output.c
@@ -78,6 +78,8 @@ static int xfrm_output_one(struct sk_buff *skb, int err)
 		spin_unlock_bh(&x->lock);
 
 		err = x->type->output(x, skb);
+		if (err == -EINPROGRESS)
+			goto out_exit;
 
 resume:
 		if (err) {
-- 
cgit v1.2.3


From 0883ae0e5599656b5f3b0e9ce474e01dee7dfee4 Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Sun, 30 Dec 2007 21:10:14 -0800
Subject: [IPSEC]: Fix transport-mode async resume on intput without netfilter

When netfilter is off the transport-mode async resumption doesn't work
because we don't push back the IP header.  This patch fixes that by
moving most of the code outside of ifdef NETFILTER since the only part
that's not common is the short-circuit in the protocol handler.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/xfrm4_input.c | 12 +++++-------
 net/ipv6/xfrm6_input.c | 12 +++++-------
 2 files changed, 10 insertions(+), 14 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/xfrm4_input.c b/net/ipv4/xfrm4_input.c
index 33f990d56c9..390dcb1354a 100644
--- a/net/ipv4/xfrm4_input.c
+++ b/net/ipv4/xfrm4_input.c
@@ -51,7 +51,11 @@ int xfrm4_transport_finish(struct sk_buff *skb, int async)
 
 	iph->protocol = XFRM_MODE_SKB_CB(skb)->protocol;
 
-#ifdef CONFIG_NETFILTER
+#ifndef CONFIG_NETFILTER
+	if (!async)
+		return -iph->protocol;
+#endif
+
 	__skb_push(skb, skb->data - skb_network_header(skb));
 	iph->tot_len = htons(skb->len);
 	ip_send_check(iph);
@@ -59,12 +63,6 @@ int xfrm4_transport_finish(struct sk_buff *skb, int async)
 	NF_HOOK(PF_INET, NF_INET_PRE_ROUTING, skb, skb->dev, NULL,
 		xfrm4_rcv_encap_finish);
 	return 0;
-#else
-	if (async)
-		return xfrm4_rcv_encap_finish(skb);
-
-	return -iph->protocol;
-#endif
 }
 
 /* If it's a keepalive packet, then just eat it.
diff --git a/net/ipv6/xfrm6_input.c b/net/ipv6/xfrm6_input.c
index 063ce6ed1bd..a4714d76ae6 100644
--- a/net/ipv6/xfrm6_input.c
+++ b/net/ipv6/xfrm6_input.c
@@ -34,19 +34,17 @@ int xfrm6_transport_finish(struct sk_buff *skb, int async)
 	skb_network_header(skb)[IP6CB(skb)->nhoff] =
 		XFRM_MODE_SKB_CB(skb)->protocol;
 
-#ifdef CONFIG_NETFILTER
+#ifndef CONFIG_NETFILTER
+	if (!async)
+		return 1;
+#endif
+
 	ipv6_hdr(skb)->payload_len = htons(skb->len);
 	__skb_push(skb, skb->data - skb_network_header(skb));
 
 	NF_HOOK(PF_INET6, NF_INET_PRE_ROUTING, skb, skb->dev, NULL,
 		ip6_rcv_finish);
 	return -1;
-#else
-	if (async)
-		return ip6_rcv_finish(skb);
-
-	return 1;
-#endif
 }
 
 int xfrm6_rcv(struct sk_buff *skb)
-- 
cgit v1.2.3


From 9dd3245a2ac1834797191072705015e6a12f55bf Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Sun, 30 Dec 2007 21:10:30 -0800
Subject: [IPSEC]: Move all calls to xfrm_audit_state_icvfail to xfrm_input

Let's nip the code duplication in the bud :)

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/ah4.c        | 4 +---
 net/ipv4/esp4.c       | 1 -
 net/ipv6/ah6.c        | 4 +---
 net/ipv6/esp6.c       | 1 -
 net/xfrm/xfrm_input.c | 5 ++++-
 5 files changed, 6 insertions(+), 9 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/ah4.c b/net/ipv4/ah4.c
index ec8de0aa20e..d76803a3dca 100644
--- a/net/ipv4/ah4.c
+++ b/net/ipv4/ah4.c
@@ -179,10 +179,8 @@ static int ah_input(struct xfrm_state *x, struct sk_buff *skb)
 		err = ah_mac_digest(ahp, skb, ah->auth_data);
 		if (err)
 			goto unlock;
-		if (memcmp(ahp->work_icv, auth_data, ahp->icv_trunc_len)) {
-			xfrm_audit_state_icvfail(x, skb, IPPROTO_AH);
+		if (memcmp(ahp->work_icv, auth_data, ahp->icv_trunc_len))
 			err = -EBADMSG;
-		}
 	}
 unlock:
 	spin_unlock(&x->lock);
diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c
index b334c7619c0..28ea5c77ca2 100644
--- a/net/ipv4/esp4.c
+++ b/net/ipv4/esp4.c
@@ -191,7 +191,6 @@ static int esp_input(struct xfrm_state *x, struct sk_buff *skb)
 			BUG();
 
 		if (unlikely(memcmp(esp->auth.work_icv, sum, alen))) {
-			xfrm_audit_state_icvfail(x, skb, IPPROTO_ESP);
 			err = -EBADMSG;
 			goto unlock;
 		}
diff --git a/net/ipv6/ah6.c b/net/ipv6/ah6.c
index 2d32772c87c..fb0d07a15e9 100644
--- a/net/ipv6/ah6.c
+++ b/net/ipv6/ah6.c
@@ -380,10 +380,8 @@ static int ah6_input(struct xfrm_state *x, struct sk_buff *skb)
 		err = ah_mac_digest(ahp, skb, ah->auth_data);
 		if (err)
 			goto unlock;
-		if (memcmp(ahp->work_icv, auth_data, ahp->icv_trunc_len)) {
-			xfrm_audit_state_icvfail(x, skb, IPPROTO_AH);
+		if (memcmp(ahp->work_icv, auth_data, ahp->icv_trunc_len))
 			err = -EBADMSG;
-		}
 	}
 unlock:
 	spin_unlock(&x->lock);
diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c
index e10f10bfe2c..5bd5292ad9f 100644
--- a/net/ipv6/esp6.c
+++ b/net/ipv6/esp6.c
@@ -186,7 +186,6 @@ static int esp6_input(struct xfrm_state *x, struct sk_buff *skb)
 			BUG();
 
 		if (unlikely(memcmp(esp->auth.work_icv, sum, alen))) {
-			xfrm_audit_state_icvfail(x, skb, IPPROTO_ESP);
 			ret = -EBADMSG;
 			goto unlock;
 		}
diff --git a/net/xfrm/xfrm_input.c b/net/xfrm/xfrm_input.c
index 1b250f33ad5..039e7019c48 100644
--- a/net/xfrm/xfrm_input.c
+++ b/net/xfrm/xfrm_input.c
@@ -186,8 +186,11 @@ int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type)
 resume:
 		spin_lock(&x->lock);
 		if (nexthdr <= 0) {
-			if (nexthdr == -EBADMSG)
+			if (nexthdr == -EBADMSG) {
+				xfrm_audit_state_icvfail(x, skb,
+							 x->type->proto);
 				x->stats.integrity_failed++;
+			}
 			XFRM_INC_STATS(LINUX_MIB_XFRMINSTATEPROTOERROR);
 			goto drop_unlock;
 		}
-- 
cgit v1.2.3


From 6fe5452b3bb53e5bf5f37590b280ee73dc1e61b6 Mon Sep 17 00:00:00 2001
From: "Robert P. J. Day" <rpjday@mindspring.com>
Date: Sun, 30 Dec 2007 23:15:15 -0800
Subject: [ATM]: atm is no longer experimental

Signed-off-by: Chas Williams <chas@cmf.nrl.navy.mil>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/atm/Kconfig | 13 ++++++-------
 1 file changed, 6 insertions(+), 7 deletions(-)

(limited to 'net')

diff --git a/net/atm/Kconfig b/net/atm/Kconfig
index 21ff276b2d8..754ea103b37 100644
--- a/net/atm/Kconfig
+++ b/net/atm/Kconfig
@@ -1,10 +1,9 @@
 #
-# Asynchronous Transfer Mode (ATM) (EXPERIMENTAL)
+# Asynchronous Transfer Mode (ATM)
 #
 
 config ATM
-	tristate "Asynchronous Transfer Mode (ATM) (EXPERIMENTAL)"
-	depends on EXPERIMENTAL
+	tristate "Asynchronous Transfer Mode (ATM)"
 	---help---
 	  ATM is a high-speed networking technology for Local Area Networks
 	  and Wide Area Networks.  It uses a fixed packet size and is
@@ -20,7 +19,7 @@ config ATM
 	  further details.
 
 config ATM_CLIP
-	tristate "Classical IP over ATM (EXPERIMENTAL)"
+	tristate "Classical IP over ATM"
 	depends on ATM && INET
 	help
 	  Classical IP over ATM for PVCs and SVCs, supporting InARP and
@@ -29,7 +28,7 @@ config ATM_CLIP
 	  (LANE)" below.
 
 config ATM_CLIP_NO_ICMP
-	bool "Do NOT send ICMP if no neighbour (EXPERIMENTAL)"
+	bool "Do NOT send ICMP if no neighbour"
 	depends on ATM_CLIP
 	help
 	  Normally, an "ICMP host unreachable" message is sent if a neighbour
@@ -39,7 +38,7 @@ config ATM_CLIP_NO_ICMP
 	  such neighbours are silently discarded instead.
 
 config ATM_LANE
-	tristate "LAN Emulation (LANE) support (EXPERIMENTAL)"
+	tristate "LAN Emulation (LANE) support"
 	depends on ATM
 	help
 	  LAN Emulation emulates services of existing LANs across an ATM
@@ -48,7 +47,7 @@ config ATM_LANE
 	  ELAN and Ethernet segments. You need LANE if you want to try MPOA.
 
 config ATM_MPOA
-	tristate "Multi-Protocol Over ATM (MPOA) support (EXPERIMENTAL)"
+	tristate "Multi-Protocol Over ATM (MPOA) support"
 	depends on ATM && INET && ATM_LANE!=n
 	help
 	  Multi-Protocol Over ATM allows ATM edge devices such as routers,
-- 
cgit v1.2.3


From ef39592f786b6d56d9faf988a3f18786eeb050b3 Mon Sep 17 00:00:00 2001
From: Kay Sievers <kay.sievers@vrfy.org>
Date: Sun, 30 Dec 2007 23:16:06 -0800
Subject: [ATM]: Convert struct class_device to struct device

Signed-off-by: Kay Sievers <kay.sievers@vrfy.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
Signed-off-by: Chas Williams <chas@cmf.nrl.navy.mil>
---
 net/atm/atm_sysfs.c | 66 +++++++++++++++++++++++++++++------------------------
 1 file changed, 36 insertions(+), 30 deletions(-)

(limited to 'net')

diff --git a/net/atm/atm_sysfs.c b/net/atm/atm_sysfs.c
index 9ef07eda2c4..1b88311f213 100644
--- a/net/atm/atm_sysfs.c
+++ b/net/atm/atm_sysfs.c
@@ -9,13 +9,15 @@
 
 #define to_atm_dev(cldev) container_of(cldev, struct atm_dev, class_dev)
 
-static ssize_t show_type(struct class_device *cdev, char *buf)
+static ssize_t show_type(struct device *cdev,
+			 struct device_attribute *attr, char *buf)
 {
 	struct atm_dev *adev = to_atm_dev(cdev);
 	return sprintf(buf, "%s\n", adev->type);
 }
 
-static ssize_t show_address(struct class_device *cdev, char *buf)
+static ssize_t show_address(struct device *cdev,
+			    struct device_attribute *attr, char *buf)
 {
 	char *pos = buf;
 	struct atm_dev *adev = to_atm_dev(cdev);
@@ -28,7 +30,8 @@ static ssize_t show_address(struct class_device *cdev, char *buf)
 	return pos - buf;
 }
 
-static ssize_t show_atmaddress(struct class_device *cdev, char *buf)
+static ssize_t show_atmaddress(struct device *cdev,
+			       struct device_attribute *attr, char *buf)
 {
 	unsigned long flags;
 	char *pos = buf;
@@ -54,7 +57,8 @@ static ssize_t show_atmaddress(struct class_device *cdev, char *buf)
 	return pos - buf;
 }
 
-static ssize_t show_carrier(struct class_device *cdev, char *buf)
+static ssize_t show_carrier(struct device *cdev,
+			    struct device_attribute *attr, char *buf)
 {
 	char *pos = buf;
 	struct atm_dev *adev = to_atm_dev(cdev);
@@ -65,7 +69,8 @@ static ssize_t show_carrier(struct class_device *cdev, char *buf)
 	return pos - buf;
 }
 
-static ssize_t show_link_rate(struct class_device *cdev, char *buf)
+static ssize_t show_link_rate(struct device *cdev,
+			      struct device_attribute *attr, char *buf)
 {
 	char *pos = buf;
 	struct atm_dev *adev = to_atm_dev(cdev);
@@ -90,22 +95,23 @@ static ssize_t show_link_rate(struct class_device *cdev, char *buf)
 	return pos - buf;
 }
 
-static CLASS_DEVICE_ATTR(address, S_IRUGO, show_address, NULL);
-static CLASS_DEVICE_ATTR(atmaddress, S_IRUGO, show_atmaddress, NULL);
-static CLASS_DEVICE_ATTR(carrier, S_IRUGO, show_carrier, NULL);
-static CLASS_DEVICE_ATTR(type, S_IRUGO, show_type, NULL);
-static CLASS_DEVICE_ATTR(link_rate, S_IRUGO, show_link_rate, NULL);
-
-static struct class_device_attribute *atm_attrs[] = {
-	&class_device_attr_atmaddress,
-	&class_device_attr_address,
-	&class_device_attr_carrier,
-	&class_device_attr_type,
-	&class_device_attr_link_rate,
+static DEVICE_ATTR(address, S_IRUGO, show_address, NULL);
+static DEVICE_ATTR(atmaddress, S_IRUGO, show_atmaddress, NULL);
+static DEVICE_ATTR(carrier, S_IRUGO, show_carrier, NULL);
+static DEVICE_ATTR(type, S_IRUGO, show_type, NULL);
+static DEVICE_ATTR(link_rate, S_IRUGO, show_link_rate, NULL);
+
+static struct device_attribute *atm_attrs[] = {
+	&dev_attr_atmaddress,
+	&dev_attr_address,
+	&dev_attr_carrier,
+	&dev_attr_type,
+	&dev_attr_link_rate,
 	NULL
 };
 
-static int atm_uevent(struct class_device *cdev, struct kobj_uevent_env *env)
+
+static int atm_uevent(struct device *cdev, struct kobj_uevent_env *env)
 {
 	struct atm_dev *adev;
 
@@ -122,7 +128,7 @@ static int atm_uevent(struct class_device *cdev, struct kobj_uevent_env *env)
 	return 0;
 }
 
-static void atm_release(struct class_device *cdev)
+static void atm_release(struct device *cdev)
 {
 	struct atm_dev *adev = to_atm_dev(cdev);
 
@@ -131,25 +137,25 @@ static void atm_release(struct class_device *cdev)
 
 static struct class atm_class = {
 	.name		= "atm",
-	.release	= atm_release,
-	.uevent		= atm_uevent,
+	.dev_release	= atm_release,
+	.dev_uevent		= atm_uevent,
 };
 
 int atm_register_sysfs(struct atm_dev *adev)
 {
-	struct class_device *cdev = &adev->class_dev;
+	struct device *cdev = &adev->class_dev;
 	int i, j, err;
 
 	cdev->class = &atm_class;
-	class_set_devdata(cdev, adev);
+	dev_set_drvdata(cdev, adev);
 
-	snprintf(cdev->class_id, BUS_ID_SIZE, "%s%d", adev->type, adev->number);
-	err = class_device_register(cdev);
+	snprintf(cdev->bus_id, BUS_ID_SIZE, "%s%d", adev->type, adev->number);
+	err = device_register(cdev);
 	if (err < 0)
 		return err;
 
 	for (i = 0; atm_attrs[i]; i++) {
-		err = class_device_create_file(cdev, atm_attrs[i]);
+		err = device_create_file(cdev, atm_attrs[i]);
 		if (err)
 			goto err_out;
 	}
@@ -158,16 +164,16 @@ int atm_register_sysfs(struct atm_dev *adev)
 
 err_out:
 	for (j = 0; j < i; j++)
-		class_device_remove_file(cdev, atm_attrs[j]);
-	class_device_del(cdev);
+		device_remove_file(cdev, atm_attrs[j]);
+	device_del(cdev);
 	return err;
 }
 
 void atm_unregister_sysfs(struct atm_dev *adev)
 {
-	struct class_device *cdev = &adev->class_dev;
+	struct device *cdev = &adev->class_dev;
 
-	class_device_del(cdev);
+	device_del(cdev);
 }
 
 int __init atm_sysfs_init(void)
-- 
cgit v1.2.3


From 097b19a9987204b898299260ee3ebff4cf716800 Mon Sep 17 00:00:00 2001
From: Eric Kinzie <ekinzie@cmf.nrl.navy.mil>
Date: Sun, 30 Dec 2007 23:17:53 -0800
Subject: [ATM]: [br2864] routed support

Signed-off-by: Chas Williams <chas@cmf.nrl.navy.mil>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/atm/br2684.c | 148 +++++++++++++++++++++++++++++++++++++++++++------------
 1 file changed, 116 insertions(+), 32 deletions(-)

(limited to 'net')

diff --git a/net/atm/br2684.c b/net/atm/br2684.c
index ba6428f204f..d9bb2a16b7c 100644
--- a/net/atm/br2684.c
+++ b/net/atm/br2684.c
@@ -1,7 +1,8 @@
 /*
-Experimental ethernet netdevice using ATM AAL5 as underlying carrier
-(RFC1483 obsoleted by RFC2684) for Linux 2.4
-Author: Marcell GAL, 2000, XDSL Ltd, Hungary
+Ethernet netdevice using ATM AAL5 as underlying carrier
+(RFC1483 obsoleted by RFC2684) for Linux
+Authors: Marcell GAL, 2000, XDSL Ltd, Hungary
+         Eric Kinzie, 2006-2007, US Naval Research Laboratory
 */
 
 #include <linux/module.h>
@@ -39,9 +40,27 @@ static void skb_debug(const struct sk_buff *skb)
 #define skb_debug(skb)	do {} while (0)
 #endif
 
+#define BR2684_ETHERTYPE_LEN	2
+#define BR2684_PAD_LEN		2
+
+#define LLC		0xaa, 0xaa, 0x03
+#define SNAP_BRIDGED	0x00, 0x80, 0xc2
+#define SNAP_ROUTED	0x00, 0x00, 0x00
+#define PID_ETHERNET	0x00, 0x07
+#define ETHERTYPE_IPV4	0x08, 0x00
+#define ETHERTYPE_IPV6	0x86, 0xdd
+#define PAD_BRIDGED	0x00, 0x00
+
+static unsigned char ethertype_ipv4[] =
+	{ ETHERTYPE_IPV4 };
+static unsigned char ethertype_ipv6[] =
+	{ ETHERTYPE_IPV6 };
 static unsigned char llc_oui_pid_pad[] =
-    { 0xAA, 0xAA, 0x03, 0x00, 0x80, 0xC2, 0x00, 0x07, 0x00, 0x00 };
-#define PADLEN	(2)
+	{ LLC, SNAP_BRIDGED, PID_ETHERNET, PAD_BRIDGED };
+static unsigned char llc_oui_ipv4[] =
+	{ LLC, SNAP_ROUTED, ETHERTYPE_IPV4 };
+static unsigned char llc_oui_ipv6[] =
+	{ LLC, SNAP_ROUTED, ETHERTYPE_IPV6 };
 
 enum br2684_encaps {
 	e_vc  = BR2684_ENCAPS_VC,
@@ -69,6 +88,7 @@ struct br2684_dev {
 	struct list_head brvccs; /* one device <=> one vcc (before xmas) */
 	struct net_device_stats stats;
 	int mac_was_set;
+	enum br2684_payload payload;
 };
 
 /*
@@ -136,6 +156,7 @@ static int br2684_xmit_vcc(struct sk_buff *skb, struct br2684_dev *brdev,
 {
 	struct atm_vcc *atmvcc;
 	int minheadroom = (brvcc->encaps == e_llc) ? 10 : 2;
+
 	if (skb_headroom(skb) < minheadroom) {
 		struct sk_buff *skb2 = skb_realloc_headroom(skb, minheadroom);
 		brvcc->copies_needed++;
@@ -146,11 +167,32 @@ static int br2684_xmit_vcc(struct sk_buff *skb, struct br2684_dev *brdev,
 		}
 		skb = skb2;
 	}
-	skb_push(skb, minheadroom);
-	if (brvcc->encaps == e_llc)
-		skb_copy_to_linear_data(skb, llc_oui_pid_pad, 10);
-	else
-		memset(skb->data, 0, 2);
+
+	if (brvcc->encaps == e_llc) {
+		if (brdev->payload == p_bridged) {
+			skb_push(skb, sizeof(llc_oui_pid_pad));
+			skb_copy_to_linear_data(skb, llc_oui_pid_pad, sizeof(llc_oui_pid_pad));
+		} else if (brdev->payload == p_routed) {
+			unsigned short prot = ntohs(skb->protocol);
+
+			skb_push(skb, sizeof(llc_oui_ipv4));
+			switch (prot) {
+				case ETH_P_IP:
+					skb_copy_to_linear_data(skb, llc_oui_ipv4, sizeof(llc_oui_ipv4));
+					break;
+				case ETH_P_IPV6:
+					skb_copy_to_linear_data(skb, llc_oui_ipv6, sizeof(llc_oui_ipv6));
+					break;
+				default:
+					dev_kfree_skb(skb);
+					return 0;
+			}
+		}
+	} else {
+		skb_push(skb, 2);
+		if (brdev->payload == p_bridged)
+			memset(skb->data, 0, 2);
+	}
 	skb_debug(skb);
 
 	ATM_SKB(skb)->vcc = atmvcc = brvcc->atmvcc;
@@ -299,7 +341,6 @@ static void br2684_push(struct atm_vcc *atmvcc, struct sk_buff *skb)
 	struct br2684_vcc *brvcc = BR2684_VCC(atmvcc);
 	struct net_device *net_dev = brvcc->device;
 	struct br2684_dev *brdev = BRPRIV(net_dev);
-	int plen = sizeof(llc_oui_pid_pad) + ETH_HLEN;
 
 	pr_debug("br2684_push\n");
 
@@ -320,35 +361,50 @@ static void br2684_push(struct atm_vcc *atmvcc, struct sk_buff *skb)
 	atm_return(atmvcc, skb->truesize);
 	pr_debug("skb from brdev %p\n", brdev);
 	if (brvcc->encaps == e_llc) {
+
+		if (skb->len > 7 && skb->data[7] == 0x01)
+			__skb_trim(skb, skb->len - 4);
+
+		/* accept packets that have "ipv[46]" in the snap header */
+		if ((skb->len >= (sizeof(llc_oui_ipv4)))
+		    && (memcmp(skb->data, llc_oui_ipv4, sizeof(llc_oui_ipv4) - BR2684_ETHERTYPE_LEN) == 0)) {
+			if (memcmp(skb->data + 6, ethertype_ipv6, sizeof(ethertype_ipv6)) == 0)
+				skb->protocol = __constant_htons(ETH_P_IPV6);
+			else if (memcmp(skb->data + 6, ethertype_ipv4, sizeof(ethertype_ipv4)) == 0)
+				skb->protocol = __constant_htons(ETH_P_IP);
+			else {
+				brdev->stats.rx_errors++;
+				dev_kfree_skb(skb);
+				return;
+			}
+			skb_pull(skb, sizeof(llc_oui_ipv4));
+			skb_reset_network_header(skb);
+			skb->pkt_type = PACKET_HOST;
+
 		/* let us waste some time for checking the encapsulation.
 		   Note, that only 7 char is checked so frames with a valid FCS
 		   are also accepted (but FCS is not checked of course) */
-		if (memcmp(skb->data, llc_oui_pid_pad, 7)) {
+		} else if ((skb->len >= sizeof(llc_oui_pid_pad)) &&
+		           (memcmp(skb->data, llc_oui_pid_pad, 7) == 0)) {
+			skb_pull(skb, sizeof(llc_oui_pid_pad));
+			skb->protocol = eth_type_trans(skb, net_dev);
+		} else {
 			brdev->stats.rx_errors++;
 			dev_kfree_skb(skb);
 			return;
 		}
 
-		/* Strip FCS if present */
-		if (skb->len > 7 && skb->data[7] == 0x01)
-			__skb_trim(skb, skb->len - 4);
 	} else {
-		plen = PADLEN + ETH_HLEN;	/* pad, dstmac,srcmac, ethtype */
 		/* first 2 chars should be 0 */
 		if (*((u16 *) (skb->data)) != 0) {
 			brdev->stats.rx_errors++;
 			dev_kfree_skb(skb);
 			return;
 		}
-	}
-	if (skb->len < plen) {
-		brdev->stats.rx_errors++;
-		dev_kfree_skb(skb);	/* dev_ not needed? */
-		return;
+		skb_pull(skb, BR2684_PAD_LEN + ETH_HLEN); /* pad, dstmac, srcmac, ethtype */
+		skb->protocol = eth_type_trans(skb, net_dev);
 	}
 
-	skb_pull(skb, plen - ETH_HLEN);
-	skb->protocol = eth_type_trans(skb, net_dev);
 #ifdef CONFIG_ATM_BR2684_IPFILTER
 	if (unlikely(packet_fails_filter(skb->protocol, brvcc, skb))) {
 		brdev->stats.rx_dropped++;
@@ -482,25 +538,52 @@ static void br2684_setup(struct net_device *netdev)
 	INIT_LIST_HEAD(&brdev->brvccs);
 }
 
+static void br2684_setup_routed(struct net_device *netdev)
+{
+	struct br2684_dev *brdev = BRPRIV(netdev);
+	brdev->net_dev = netdev;
+
+	netdev->hard_header_len = 0;
+	my_eth_mac_addr = netdev->set_mac_address;
+	netdev->set_mac_address = br2684_mac_addr;
+	netdev->hard_start_xmit = br2684_start_xmit;
+	netdev->get_stats = br2684_get_stats;
+	netdev->addr_len = 0;
+	netdev->mtu = 1500;
+	netdev->type = ARPHRD_PPP;
+	netdev->flags = IFF_POINTOPOINT | IFF_NOARP | IFF_MULTICAST;
+	netdev->tx_queue_len = 100;
+	INIT_LIST_HEAD(&brdev->brvccs);
+}
+
 static int br2684_create(void __user *arg)
 {
 	int err;
 	struct net_device *netdev;
 	struct br2684_dev *brdev;
 	struct atm_newif_br2684 ni;
+	enum br2684_payload payload;
 
 	pr_debug("br2684_create\n");
 
 	if (copy_from_user(&ni, arg, sizeof ni)) {
 		return -EFAULT;
 	}
+
+	if (ni.media & BR2684_FLAG_ROUTED)
+		payload = p_routed;
+	else
+		payload = p_bridged;
+	ni.media &= 0xffff; /* strip flags */
+
 	if (ni.media != BR2684_MEDIA_ETHERNET || ni.mtu != 1500) {
 		return -EINVAL;
 	}
 
 	netdev = alloc_netdev(sizeof(struct br2684_dev),
 			      ni.ifname[0] ? ni.ifname : "nas%d",
-			      br2684_setup);
+			      (payload == p_routed) ?
+			       br2684_setup_routed : br2684_setup);
 	if (!netdev)
 		return -ENOMEM;
 
@@ -516,6 +599,7 @@ static int br2684_create(void __user *arg)
 	}
 
 	write_lock_irq(&devs_lock);
+	brdev->payload = payload;
 	brdev->number = list_empty(&br2684_devs) ? 1 :
 	    BRPRIV(list_entry_brdev(br2684_devs.prev))->number + 1;
 	list_add_tail(&brdev->br2684_devs, &br2684_devs);
@@ -601,14 +685,14 @@ static int br2684_seq_show(struct seq_file *seq, void *v)
 		   brdev->mac_was_set ? "set" : "auto");
 
 	list_for_each_entry(brvcc, &brdev->brvccs, brvccs) {
-		seq_printf(seq, "  vcc %d.%d.%d: encaps=%s"
-				    ", failed copies %u/%u"
-				    "\n", brvcc->atmvcc->dev->number,
-				    brvcc->atmvcc->vpi, brvcc->atmvcc->vci,
-				    (brvcc->encaps == e_llc) ? "LLC" : "VC"
-				    , brvcc->copies_failed
-				    , brvcc->copies_needed
-				    );
+		seq_printf(seq, "  vcc %d.%d.%d: encaps=%s payload=%s"
+			        ", failed copies %u/%u"
+			        "\n", brvcc->atmvcc->dev->number,
+				      brvcc->atmvcc->vpi, brvcc->atmvcc->vci,
+				      (brvcc->encaps == e_llc) ? "LLC" : "VC",
+				      (brdev->payload == p_bridged) ? "bridged" : "routed",
+				      brvcc->copies_failed,
+				      brvcc->copies_needed);
 #ifdef CONFIG_ATM_BR2684_IPFILTER
 #define b1(var, byte)	((u8 *) &brvcc->filter.var)[byte]
 #define bs(var)		b1(var, 0), b1(var, 1), b1(var, 2), b1(var, 3)
-- 
cgit v1.2.3


From fb64c735a52f396aa261844b851cd820a80dee46 Mon Sep 17 00:00:00 2001
From: Chas Williams <chas@cmf.nrl.navy.mil>
Date: Sun, 30 Dec 2007 23:18:29 -0800
Subject: [ATM]: [br2864] whitespace cleanup

Signed-off-by: Chas Williams <chas@cmf.nrl.navy.mil>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/atm/br2684.c | 222 ++++++++++++++++++++++++++++---------------------------
 1 file changed, 114 insertions(+), 108 deletions(-)

(limited to 'net')

diff --git a/net/atm/br2684.c b/net/atm/br2684.c
index d9bb2a16b7c..5c8a0dcb104 100644
--- a/net/atm/br2684.c
+++ b/net/atm/br2684.c
@@ -1,9 +1,10 @@
 /*
-Ethernet netdevice using ATM AAL5 as underlying carrier
-(RFC1483 obsoleted by RFC2684) for Linux
-Authors: Marcell GAL, 2000, XDSL Ltd, Hungary
-         Eric Kinzie, 2006-2007, US Naval Research Laboratory
-*/
+ * Ethernet netdevice using ATM AAL5 as underlying carrier
+ * (RFC1483 obsoleted by RFC2684) for Linux
+ *
+ * Authors: Marcell GAL, 2000, XDSL Ltd, Hungary
+ *          Eric Kinzie, 2006-2007, US Naval Research Laboratory
+ */
 
 #include <linux/module.h>
 #include <linux/init.h>
@@ -51,28 +52,24 @@ static void skb_debug(const struct sk_buff *skb)
 #define ETHERTYPE_IPV6	0x86, 0xdd
 #define PAD_BRIDGED	0x00, 0x00
 
-static unsigned char ethertype_ipv4[] =
-	{ ETHERTYPE_IPV4 };
-static unsigned char ethertype_ipv6[] =
-	{ ETHERTYPE_IPV6 };
+static unsigned char ethertype_ipv4[] = { ETHERTYPE_IPV4 };
+static unsigned char ethertype_ipv6[] = { ETHERTYPE_IPV6 };
 static unsigned char llc_oui_pid_pad[] =
-	{ LLC, SNAP_BRIDGED, PID_ETHERNET, PAD_BRIDGED };
-static unsigned char llc_oui_ipv4[] =
-	{ LLC, SNAP_ROUTED, ETHERTYPE_IPV4 };
-static unsigned char llc_oui_ipv6[] =
-	{ LLC, SNAP_ROUTED, ETHERTYPE_IPV6 };
+			{ LLC, SNAP_BRIDGED, PID_ETHERNET, PAD_BRIDGED };
+static unsigned char llc_oui_ipv4[] = { LLC, SNAP_ROUTED, ETHERTYPE_IPV4 };
+static unsigned char llc_oui_ipv6[] = { LLC, SNAP_ROUTED, ETHERTYPE_IPV6 };
 
 enum br2684_encaps {
-	e_vc  = BR2684_ENCAPS_VC,
+	e_vc = BR2684_ENCAPS_VC,
 	e_llc = BR2684_ENCAPS_LLC,
 };
 
 struct br2684_vcc {
-	struct atm_vcc  *atmvcc;
+	struct atm_vcc *atmvcc;
 	struct net_device *device;
-	/* keep old push,pop functions for chaining */
-	void (*old_push)(struct atm_vcc *vcc,struct sk_buff *skb);
-	/* void (*old_pop)(struct atm_vcc *vcc,struct sk_buff *skb); */
+	/* keep old push, pop functions for chaining */
+	void (*old_push) (struct atm_vcc * vcc, struct sk_buff * skb);
+	/* void (*old_pop)(struct atm_vcc *vcc, struct sk_buff *skb); */
 	enum br2684_encaps encaps;
 	struct list_head brvccs;
 #ifdef CONFIG_ATM_BR2684_IPFILTER
@@ -85,7 +82,7 @@ struct br2684_dev {
 	struct net_device *net_dev;
 	struct list_head br2684_devs;
 	int number;
-	struct list_head brvccs; /* one device <=> one vcc (before xmas) */
+	struct list_head brvccs;	/* one device <=> one vcc (before xmas) */
 	struct net_device_stats stats;
 	int mac_was_set;
 	enum br2684_payload payload;
@@ -104,7 +101,7 @@ static LIST_HEAD(br2684_devs);
 
 static inline struct br2684_dev *BRPRIV(const struct net_device *net_dev)
 {
-	return (struct br2684_dev *) net_dev->priv;
+	return (struct br2684_dev *)net_dev->priv;
 }
 
 static inline struct net_device *list_entry_brdev(const struct list_head *le)
@@ -114,7 +111,7 @@ static inline struct net_device *list_entry_brdev(const struct list_head *le)
 
 static inline struct br2684_vcc *BR2684_VCC(const struct atm_vcc *atmvcc)
 {
-	return (struct br2684_vcc *) (atmvcc->user_back);
+	return (struct br2684_vcc *)(atmvcc->user_back);
 }
 
 static inline struct br2684_vcc *list_entry_brvcc(const struct list_head *le)
@@ -152,7 +149,7 @@ static struct net_device *br2684_find_dev(const struct br2684_if_spec *s)
  * otherwise false
  */
 static int br2684_xmit_vcc(struct sk_buff *skb, struct br2684_dev *brdev,
-	struct br2684_vcc *brvcc)
+			   struct br2684_vcc *brvcc)
 {
 	struct atm_vcc *atmvcc;
 	int minheadroom = (brvcc->encaps == e_llc) ? 10 : 2;
@@ -171,21 +168,24 @@ static int br2684_xmit_vcc(struct sk_buff *skb, struct br2684_dev *brdev,
 	if (brvcc->encaps == e_llc) {
 		if (brdev->payload == p_bridged) {
 			skb_push(skb, sizeof(llc_oui_pid_pad));
-			skb_copy_to_linear_data(skb, llc_oui_pid_pad, sizeof(llc_oui_pid_pad));
+			skb_copy_to_linear_data(skb, llc_oui_pid_pad,
+						sizeof(llc_oui_pid_pad));
 		} else if (brdev->payload == p_routed) {
 			unsigned short prot = ntohs(skb->protocol);
 
 			skb_push(skb, sizeof(llc_oui_ipv4));
 			switch (prot) {
-				case ETH_P_IP:
-					skb_copy_to_linear_data(skb, llc_oui_ipv4, sizeof(llc_oui_ipv4));
-					break;
-				case ETH_P_IPV6:
-					skb_copy_to_linear_data(skb, llc_oui_ipv6, sizeof(llc_oui_ipv6));
-					break;
-				default:
-					dev_kfree_skb(skb);
-					return 0;
+			case ETH_P_IP:
+				skb_copy_to_linear_data(skb, llc_oui_ipv4,
+							sizeof(llc_oui_ipv4));
+				break;
+			case ETH_P_IPV6:
+				skb_copy_to_linear_data(skb, llc_oui_ipv6,
+							sizeof(llc_oui_ipv6));
+				break;
+			default:
+				dev_kfree_skb(skb);
+				return 0;
 			}
 		}
 	} else {
@@ -198,13 +198,14 @@ static int br2684_xmit_vcc(struct sk_buff *skb, struct br2684_dev *brdev,
 	ATM_SKB(skb)->vcc = atmvcc = brvcc->atmvcc;
 	pr_debug("atm_skb(%p)->vcc(%p)->dev(%p)\n", skb, atmvcc, atmvcc->dev);
 	if (!atm_may_send(atmvcc, skb->truesize)) {
-		/* we free this here for now, because we cannot know in a higher
-			layer whether the skb point it supplied wasn't freed yet.
-			now, it always is.
-		*/
+		/*
+		 * We free this here for now, because we cannot know in a higher
+		 * layer whether the skb pointer it supplied wasn't freed yet.
+		 * Now, it always is.
+		 */
 		dev_kfree_skb(skb);
 		return 0;
-		}
+	}
 	atomic_add(skb->truesize, &sk_atm(atmvcc)->sk_wmem_alloc);
 	ATM_SKB(skb)->atm_options = atmvcc->atm_options;
 	brdev->stats.tx_packets++;
@@ -214,10 +215,9 @@ static int br2684_xmit_vcc(struct sk_buff *skb, struct br2684_dev *brdev,
 }
 
 static inline struct br2684_vcc *pick_outgoing_vcc(struct sk_buff *skb,
-	struct br2684_dev *brdev)
+						   struct br2684_dev *brdev)
 {
-	return list_empty(&brdev->brvccs) ? NULL :
-	    list_entry_brvcc(brdev->brvccs.next); /* 1 vcc/dev right now */
+	return list_empty(&brdev->brvccs) ? NULL : list_entry_brvcc(brdev->brvccs.next);	/* 1 vcc/dev right now */
 }
 
 static int br2684_start_xmit(struct sk_buff *skb, struct net_device *dev)
@@ -241,11 +241,10 @@ static int br2684_start_xmit(struct sk_buff *skb, struct net_device *dev)
 		/*
 		 * We should probably use netif_*_queue() here, but that
 		 * involves added complication.  We need to walk before
-		 * we can run
+		 * we can run.
+		 *
+		 * Don't free here! this pointer might be no longer valid!
 		 */
-		/* don't free here! this pointer might be no longer valid!
-		dev_kfree_skb(skb);
-		*/
 		brdev->stats.tx_errors++;
 		brdev->stats.tx_fifo_errors++;
 	}
@@ -259,12 +258,11 @@ static struct net_device_stats *br2684_get_stats(struct net_device *dev)
 	return &BRPRIV(dev)->stats;
 }
 
-
 /*
  * We remember when the MAC gets set, so we don't override it later with
  * the ESI of the ATM card of the first VC
  */
-static int (*my_eth_mac_addr)(struct net_device *, void *);
+static int (*my_eth_mac_addr) (struct net_device *, void *);
 static int br2684_mac_addr(struct net_device *dev, void *p)
 {
 	int err = my_eth_mac_addr(dev, p);
@@ -275,7 +273,7 @@ static int br2684_mac_addr(struct net_device *dev, void *p)
 
 #ifdef CONFIG_ATM_BR2684_IPFILTER
 /* this IOCTL is experimental. */
-static int br2684_setfilt(struct atm_vcc *atmvcc, void __user *arg)
+static int br2684_setfilt(struct atm_vcc *atmvcc, void __user * arg)
 {
 	struct br2684_vcc *brvcc;
 	struct br2684_filter_set fs;
@@ -285,13 +283,12 @@ static int br2684_setfilt(struct atm_vcc *atmvcc, void __user *arg)
 	if (fs.ifspec.method != BR2684_FIND_BYNOTHING) {
 		/*
 		 * This is really a per-vcc thing, but we can also search
-		 * by device
+		 * by device.
 		 */
 		struct br2684_dev *brdev;
 		read_lock(&devs_lock);
 		brdev = BRPRIV(br2684_find_dev(&fs.ifspec));
-		if (brdev == NULL || list_empty(&brdev->brvccs) ||
-		    brdev->brvccs.next != brdev->brvccs.prev)  /* >1 VCC */
+		if (brdev == NULL || list_empty(&brdev->brvccs) || brdev->brvccs.next != brdev->brvccs.prev)	/* >1 VCC */
 			brvcc = NULL;
 		else
 			brvcc = list_entry_brvcc(brdev->brvccs.next);
@@ -309,15 +306,16 @@ static inline int
 packet_fails_filter(__be16 type, struct br2684_vcc *brvcc, struct sk_buff *skb)
 {
 	if (brvcc->filter.netmask == 0)
-		return 0;			/* no filter in place */
+		return 0;	/* no filter in place */
 	if (type == htons(ETH_P_IP) &&
-	    (((struct iphdr *) (skb->data))->daddr & brvcc->filter.
+	    (((struct iphdr *)(skb->data))->daddr & brvcc->filter.
 	     netmask) == brvcc->filter.prefix)
 		return 0;
 	if (type == htons(ETH_P_ARP))
 		return 0;
-	/* TODO: we should probably filter ARPs too.. don't want to have
-	 *   them returning values that don't make sense, or is that ok?
+	/*
+	 * TODO: we should probably filter ARPs too.. don't want to have
+	 * them returning values that don't make sense, or is that ok?
 	 */
 	return 1;		/* drop */
 }
@@ -367,10 +365,17 @@ static void br2684_push(struct atm_vcc *atmvcc, struct sk_buff *skb)
 
 		/* accept packets that have "ipv[46]" in the snap header */
 		if ((skb->len >= (sizeof(llc_oui_ipv4)))
-		    && (memcmp(skb->data, llc_oui_ipv4, sizeof(llc_oui_ipv4) - BR2684_ETHERTYPE_LEN) == 0)) {
-			if (memcmp(skb->data + 6, ethertype_ipv6, sizeof(ethertype_ipv6)) == 0)
+		    &&
+		    (memcmp
+		     (skb->data, llc_oui_ipv4,
+		      sizeof(llc_oui_ipv4) - BR2684_ETHERTYPE_LEN) == 0)) {
+			if (memcmp
+			    (skb->data + 6, ethertype_ipv6,
+			     sizeof(ethertype_ipv6)) == 0)
 				skb->protocol = __constant_htons(ETH_P_IPV6);
-			else if (memcmp(skb->data + 6, ethertype_ipv4, sizeof(ethertype_ipv4)) == 0)
+			else if (memcmp
+				 (skb->data + 6, ethertype_ipv4,
+				  sizeof(ethertype_ipv4)) == 0)
 				skb->protocol = __constant_htons(ETH_P_IP);
 			else {
 				brdev->stats.rx_errors++;
@@ -380,12 +385,13 @@ static void br2684_push(struct atm_vcc *atmvcc, struct sk_buff *skb)
 			skb_pull(skb, sizeof(llc_oui_ipv4));
 			skb_reset_network_header(skb);
 			skb->pkt_type = PACKET_HOST;
-
-		/* let us waste some time for checking the encapsulation.
-		   Note, that only 7 char is checked so frames with a valid FCS
-		   are also accepted (but FCS is not checked of course) */
+			/*
+			 * Let us waste some time for checking the encapsulation.
+			 * Note, that only 7 char is checked so frames with a valid FCS
+			 * are also accepted (but FCS is not checked of course).
+			 */
 		} else if ((skb->len >= sizeof(llc_oui_pid_pad)) &&
-		           (memcmp(skb->data, llc_oui_pid_pad, 7) == 0)) {
+			   (memcmp(skb->data, llc_oui_pid_pad, 7) == 0)) {
 			skb_pull(skb, sizeof(llc_oui_pid_pad));
 			skb->protocol = eth_type_trans(skb, net_dev);
 		} else {
@@ -401,7 +407,7 @@ static void br2684_push(struct atm_vcc *atmvcc, struct sk_buff *skb)
 			dev_kfree_skb(skb);
 			return;
 		}
-		skb_pull(skb, BR2684_PAD_LEN + ETH_HLEN); /* pad, dstmac, srcmac, ethtype */
+		skb_pull(skb, BR2684_PAD_LEN + ETH_HLEN);	/* pad, dstmac, srcmac, ethtype */
 		skb->protocol = eth_type_trans(skb, net_dev);
 	}
 
@@ -428,11 +434,12 @@ static void br2684_push(struct atm_vcc *atmvcc, struct sk_buff *skb)
 	netif_rx(skb);
 }
 
-static int br2684_regvcc(struct atm_vcc *atmvcc, void __user *arg)
+/*
+ * Assign a vcc to a dev
+ * Note: we do not have explicit unassign, but look at _push()
+ */
+static int br2684_regvcc(struct atm_vcc *atmvcc, void __user * arg)
 {
-/* assign a vcc to a dev
-Note: we do not have explicit unassign, but look at _push()
-*/
 	int err;
 	struct br2684_vcc *brvcc;
 	struct sk_buff *skb;
@@ -451,7 +458,7 @@ Note: we do not have explicit unassign, but look at _push()
 	net_dev = br2684_find_dev(&be.ifspec);
 	if (net_dev == NULL) {
 		printk(KERN_ERR
-		    "br2684: tried to attach to non-existant device\n");
+		       "br2684: tried to attach to non-existant device\n");
 		err = -ENXIO;
 		goto error;
 	}
@@ -467,13 +474,15 @@ Note: we do not have explicit unassign, but look at _push()
 	}
 	if (be.fcs_in != BR2684_FCSIN_NO || be.fcs_out != BR2684_FCSOUT_NO ||
 	    be.fcs_auto || be.has_vpiid || be.send_padding || (be.encaps !=
-	    BR2684_ENCAPS_VC && be.encaps != BR2684_ENCAPS_LLC) ||
-	    be.min_size != 0) {
+							       BR2684_ENCAPS_VC
+							       && be.encaps !=
+							       BR2684_ENCAPS_LLC)
+	    || be.min_size != 0) {
 		err = -EINVAL;
 		goto error;
 	}
-	pr_debug("br2684_regvcc vcc=%p, encaps=%d, brvcc=%p\n", atmvcc, be.encaps,
-		brvcc);
+	pr_debug("br2684_regvcc vcc=%p, encaps=%d, brvcc=%p\n", atmvcc,
+		 be.encaps, brvcc);
 	if (list_empty(&brdev->brvccs) && !brdev->mac_was_set) {
 		unsigned char *esi = atmvcc->dev->esi;
 		if (esi[0] | esi[1] | esi[2] | esi[3] | esi[4] | esi[5])
@@ -486,7 +495,7 @@ Note: we do not have explicit unassign, but look at _push()
 	brvcc->device = net_dev;
 	brvcc->atmvcc = atmvcc;
 	atmvcc->user_back = brvcc;
-	brvcc->encaps = (enum br2684_encaps) be.encaps;
+	brvcc->encaps = (enum br2684_encaps)be.encaps;
 	brvcc->old_push = atmvcc->push;
 	barrier();
 	atmvcc->push = br2684_push;
@@ -517,7 +526,7 @@ Note: we do not have explicit unassign, but look at _push()
 	}
 	__module_get(THIS_MODULE);
 	return 0;
-    error:
+      error:
 	write_unlock_irq(&devs_lock);
 	kfree(brvcc);
 	return err;
@@ -556,7 +565,7 @@ static void br2684_setup_routed(struct net_device *netdev)
 	INIT_LIST_HEAD(&brdev->brvccs);
 }
 
-static int br2684_create(void __user *arg)
+static int br2684_create(void __user * arg)
 {
 	int err;
 	struct net_device *netdev;
@@ -574,7 +583,7 @@ static int br2684_create(void __user *arg)
 		payload = p_routed;
 	else
 		payload = p_bridged;
-	ni.media &= 0xffff; /* strip flags */
+	ni.media &= 0xffff;	/* strip flags */
 
 	if (ni.media != BR2684_MEDIA_ETHERNET || ni.mtu != 1500) {
 		return -EINVAL;
@@ -583,7 +592,7 @@ static int br2684_create(void __user *arg)
 	netdev = alloc_netdev(sizeof(struct br2684_dev),
 			      ni.ifname[0] ? ni.ifname : "nas%d",
 			      (payload == p_routed) ?
-			       br2684_setup_routed : br2684_setup);
+			      br2684_setup_routed : br2684_setup);
 	if (!netdev)
 		return -ENOMEM;
 
@@ -612,16 +621,16 @@ static int br2684_create(void __user *arg)
  * -ENOIOCTLCMD for any unrecognized ioctl
  */
 static int br2684_ioctl(struct socket *sock, unsigned int cmd,
-	unsigned long arg)
+			unsigned long arg)
 {
 	struct atm_vcc *atmvcc = ATM_SD(sock);
 	void __user *argp = (void __user *)arg;
+	atm_backend_t b;
 
 	int err;
-	switch(cmd) {
+	switch (cmd) {
 	case ATM_SETBACKEND:
-	case ATM_NEWBACKENDIF: {
-		atm_backend_t b;
+	case ATM_NEWBACKENDIF:
 		err = get_user(b, (atm_backend_t __user *) argp);
 		if (err)
 			return -EFAULT;
@@ -633,7 +642,6 @@ static int br2684_ioctl(struct socket *sock, unsigned int cmd,
 			return br2684_regvcc(atmvcc, argp);
 		else
 			return br2684_create(argp);
-		}
 #ifdef CONFIG_ATM_BR2684_IPFILTER
 	case BR2684_SETFILT:
 		if (atmvcc->push != br2684_push)
@@ -641,6 +649,7 @@ static int br2684_ioctl(struct socket *sock, unsigned int cmd,
 		if (!capable(CAP_NET_ADMIN))
 			return -EPERM;
 		err = br2684_setfilt(atmvcc, argp);
+
 		return err;
 #endif /* CONFIG_ATM_BR2684_IPFILTER */
 	}
@@ -648,19 +657,18 @@ static int br2684_ioctl(struct socket *sock, unsigned int cmd,
 }
 
 static struct atm_ioctl br2684_ioctl_ops = {
-	.owner	= THIS_MODULE,
-	.ioctl	= br2684_ioctl,
+	.owner = THIS_MODULE,
+	.ioctl = br2684_ioctl,
 };
 
-
 #ifdef CONFIG_PROC_FS
-static void *br2684_seq_start(struct seq_file *seq, loff_t *pos)
+static void *br2684_seq_start(struct seq_file *seq, loff_t * pos)
 {
 	read_lock(&devs_lock);
 	return seq_list_start(&br2684_devs, *pos);
 }
 
-static void *br2684_seq_next(struct seq_file *seq, void *v, loff_t *pos)
+static void *br2684_seq_next(struct seq_file *seq, void *v, loff_t * pos)
 {
 	return seq_list_next(v, &br2684_devs, pos);
 }
@@ -673,7 +681,7 @@ static void br2684_seq_stop(struct seq_file *seq, void *v)
 static int br2684_seq_show(struct seq_file *seq, void *v)
 {
 	const struct br2684_dev *brdev = list_entry(v, struct br2684_dev,
-			br2684_devs);
+						    br2684_devs);
 	const struct net_device *net_dev = brdev->net_dev;
 	const struct br2684_vcc *brvcc;
 	DECLARE_MAC_BUF(mac);
@@ -686,20 +694,18 @@ static int br2684_seq_show(struct seq_file *seq, void *v)
 
 	list_for_each_entry(brvcc, &brdev->brvccs, brvccs) {
 		seq_printf(seq, "  vcc %d.%d.%d: encaps=%s payload=%s"
-			        ", failed copies %u/%u"
-			        "\n", brvcc->atmvcc->dev->number,
-				      brvcc->atmvcc->vpi, brvcc->atmvcc->vci,
-				      (brvcc->encaps == e_llc) ? "LLC" : "VC",
-				      (brdev->payload == p_bridged) ? "bridged" : "routed",
-				      brvcc->copies_failed,
-				      brvcc->copies_needed);
+			   ", failed copies %u/%u"
+			   "\n", brvcc->atmvcc->dev->number,
+			   brvcc->atmvcc->vpi, brvcc->atmvcc->vci,
+			   (brvcc->encaps == e_llc) ? "LLC" : "VC",
+			   (brdev->payload == p_bridged) ? "bridged" : "routed",
+			   brvcc->copies_failed, brvcc->copies_needed);
 #ifdef CONFIG_ATM_BR2684_IPFILTER
 #define b1(var, byte)	((u8 *) &brvcc->filter.var)[byte]
 #define bs(var)		b1(var, 0), b1(var, 1), b1(var, 2), b1(var, 3)
-			if (brvcc->filter.netmask != 0)
-				seq_printf(seq, "    filter=%d.%d.%d.%d/"
-						"%d.%d.%d.%d\n",
-						bs(prefix), bs(netmask));
+		if (brvcc->filter.netmask != 0)
+			seq_printf(seq, "    filter=%d.%d.%d.%d/"
+				   "%d.%d.%d.%d\n", bs(prefix), bs(netmask));
 #undef bs
 #undef b1
 #endif /* CONFIG_ATM_BR2684_IPFILTER */
@@ -709,9 +715,9 @@ static int br2684_seq_show(struct seq_file *seq, void *v)
 
 static const struct seq_operations br2684_seq_ops = {
 	.start = br2684_seq_start,
-	.next  = br2684_seq_next,
-	.stop  = br2684_seq_stop,
-	.show  = br2684_seq_show,
+	.next = br2684_seq_next,
+	.stop = br2684_seq_stop,
+	.show = br2684_seq_show,
 };
 
 static int br2684_proc_open(struct inode *inode, struct file *file)
@@ -720,15 +726,15 @@ static int br2684_proc_open(struct inode *inode, struct file *file)
 }
 
 static const struct file_operations br2684_proc_ops = {
-	.owner   = THIS_MODULE,
-	.open    = br2684_proc_open,
-	.read    = seq_read,
-	.llseek  = seq_lseek,
+	.owner = THIS_MODULE,
+	.open = br2684_proc_open,
+	.read = seq_read,
+	.llseek = seq_lseek,
 	.release = seq_release,
 };
 
 extern struct proc_dir_entry *atm_proc_root;	/* from proc.c */
-#endif
+#endif /* CONFIG_PROC_FS */
 
 static int __init br2684_init(void)
 {
-- 
cgit v1.2.3


From a06b494b61de44617dd58612164bdde56fca7bfb Mon Sep 17 00:00:00 2001
From: Gui Jianfeng <guijianfeng@cn.fujitsu.com>
Date: Sun, 30 Dec 2007 23:27:10 -0800
Subject: [IPV6]: Remove useless code from fib6_del_route().

There are useless codes in fib6_del_route(). The following patch has
been tested, every thing looks fine, as usual.

Signed-off-by: Gui Jianfeng <guijianfeng@cn.fujitsu.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/ip6_fib.c | 3 ---
 1 file changed, 3 deletions(-)

(limited to 'net')

diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c
index df05c6f2189..7165a5e90f4 100644
--- a/net/ipv6/ip6_fib.c
+++ b/net/ipv6/ip6_fib.c
@@ -1126,9 +1126,6 @@ static void fib6_del_route(struct fib6_node *fn, struct rt6_info **rtp,
 
 	rt->u.dst.rt6_next = NULL;
 
-	if (fn->leaf == NULL && fn->fn_flags&RTN_TL_ROOT)
-		fn->leaf = &ip6_null_entry;
-
 	/* If it was last route, expunge its radix tree node */
 	if (fn->leaf == NULL) {
 		fn->fn_flags &= ~RTN_RTINFO;
-- 
cgit v1.2.3


From 3ab224be6d69de912ee21302745ea45a99274dbc Mon Sep 17 00:00:00 2001
From: Hideo Aoki <haoki@redhat.com>
Date: Mon, 31 Dec 2007 00:11:19 -0800
Subject: [NET] CORE: Introducing new memory accounting interface.

This patch introduces new memory accounting functions for each network
protocol. Most of them are renamed from memory accounting functions
for stream protocols. At the same time, some stream memory accounting
functions are removed since other functions do same thing.

Renaming:
	sk_stream_free_skb()		->	sk_wmem_free_skb()
	__sk_stream_mem_reclaim()	->	__sk_mem_reclaim()
	sk_stream_mem_reclaim()		->	sk_mem_reclaim()
	sk_stream_mem_schedule 		->    	__sk_mem_schedule()
	sk_stream_pages()      		->	sk_mem_pages()
	sk_stream_rmem_schedule()	->	sk_rmem_schedule()
	sk_stream_wmem_schedule()	->	sk_wmem_schedule()
	sk_charge_skb()			->	sk_mem_charge()

Removeing
	sk_stream_rfree():	consolidates into sock_rfree()
	sk_stream_set_owner_r(): consolidates into skb_set_owner_r()
	sk_stream_mem_schedule()

The following functions are added.
    	sk_has_account(): check if the protocol supports accounting
	sk_mem_uncharge(): do the opposite of sk_mem_charge()

In addition, to achieve consolidation, updating sk_wmem_queued is
removed from sk_mem_charge().

Next, to consolidate memory accounting functions, this patch adds
memory accounting calls to network core functions. Moreover, present
memory accounting call is renamed to new accounting call.

Finally we replace present memory accounting calls with new interface
in TCP and SCTP.

Signed-off-by: Takahiro Yasui <tyasui@redhat.com>
Signed-off-by: Hideo Aoki <haoki@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/datagram.c     |   2 +
 net/core/sock.c         | 104 ++++++++++++++++++++++++++++++++++++++++++++++++
 net/core/stream.c       |  84 +-------------------------------------
 net/ipv4/tcp.c          |  23 ++++++-----
 net/ipv4/tcp_input.c    |  26 ++++++------
 net/ipv4/tcp_output.c   |  26 +++++++-----
 net/ipv4/tcp_timer.c    |   8 ++--
 net/sctp/protocol.c     |   2 +-
 net/sctp/sm_statefuns.c |   2 +-
 net/sctp/socket.c       |  11 ++---
 net/sctp/ulpevent.c     |   2 +-
 net/sctp/ulpqueue.c     |   2 +-
 12 files changed, 162 insertions(+), 130 deletions(-)

(limited to 'net')

diff --git a/net/core/datagram.c b/net/core/datagram.c
index 2d733131d7c..8a28fc93b72 100644
--- a/net/core/datagram.c
+++ b/net/core/datagram.c
@@ -209,6 +209,7 @@ struct sk_buff *skb_recv_datagram(struct sock *sk, unsigned flags,
 void skb_free_datagram(struct sock *sk, struct sk_buff *skb)
 {
 	kfree_skb(skb);
+	sk_mem_reclaim(sk);
 }
 
 /**
@@ -248,6 +249,7 @@ int skb_kill_datagram(struct sock *sk, struct sk_buff *skb, unsigned int flags)
 	}
 
 	kfree_skb(skb);
+	sk_mem_reclaim(sk);
 	return err;
 }
 
diff --git a/net/core/sock.c b/net/core/sock.c
index 118214047ed..8c184c4a381 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -282,6 +282,11 @@ int sock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
 	if (err)
 		goto out;
 
+	if (!sk_rmem_schedule(sk, skb->truesize)) {
+		err = -ENOBUFS;
+		goto out;
+	}
+
 	skb->dev = NULL;
 	skb_set_owner_r(skb, sk);
 
@@ -1107,7 +1112,9 @@ void sock_rfree(struct sk_buff *skb)
 {
 	struct sock *sk = skb->sk;
 
+	skb_truesize_check(skb);
 	atomic_sub(skb->truesize, &sk->sk_rmem_alloc);
+	sk_mem_uncharge(skb->sk, skb->truesize);
 }
 
 
@@ -1384,6 +1391,103 @@ int sk_wait_data(struct sock *sk, long *timeo)
 
 EXPORT_SYMBOL(sk_wait_data);
 
+/**
+ *	__sk_mem_schedule - increase sk_forward_alloc and memory_allocated
+ *	@sk: socket
+ *	@size: memory size to allocate
+ *	@kind: allocation type
+ *
+ *	If kind is SK_MEM_SEND, it means wmem allocation. Otherwise it means
+ *	rmem allocation. This function assumes that protocols which have
+ *	memory_pressure use sk_wmem_queued as write buffer accounting.
+ */
+int __sk_mem_schedule(struct sock *sk, int size, int kind)
+{
+	struct proto *prot = sk->sk_prot;
+	int amt = sk_mem_pages(size);
+	int allocated;
+
+	sk->sk_forward_alloc += amt * SK_MEM_QUANTUM;
+	allocated = atomic_add_return(amt, prot->memory_allocated);
+
+	/* Under limit. */
+	if (allocated <= prot->sysctl_mem[0]) {
+		if (prot->memory_pressure && *prot->memory_pressure)
+			*prot->memory_pressure = 0;
+		return 1;
+	}
+
+	/* Under pressure. */
+	if (allocated > prot->sysctl_mem[1])
+		if (prot->enter_memory_pressure)
+			prot->enter_memory_pressure();
+
+	/* Over hard limit. */
+	if (allocated > prot->sysctl_mem[2])
+		goto suppress_allocation;
+
+	/* guarantee minimum buffer size under pressure */
+	if (kind == SK_MEM_RECV) {
+		if (atomic_read(&sk->sk_rmem_alloc) < prot->sysctl_rmem[0])
+			return 1;
+	} else { /* SK_MEM_SEND */
+		if (sk->sk_type == SOCK_STREAM) {
+			if (sk->sk_wmem_queued < prot->sysctl_wmem[0])
+				return 1;
+		} else if (atomic_read(&sk->sk_wmem_alloc) <
+			   prot->sysctl_wmem[0])
+				return 1;
+	}
+
+	if (prot->memory_pressure) {
+		if (!*prot->memory_pressure ||
+		    prot->sysctl_mem[2] > atomic_read(prot->sockets_allocated) *
+		    sk_mem_pages(sk->sk_wmem_queued +
+				 atomic_read(&sk->sk_rmem_alloc) +
+				 sk->sk_forward_alloc))
+			return 1;
+	}
+
+suppress_allocation:
+
+	if (kind == SK_MEM_SEND && sk->sk_type == SOCK_STREAM) {
+		sk_stream_moderate_sndbuf(sk);
+
+		/* Fail only if socket is _under_ its sndbuf.
+		 * In this case we cannot block, so that we have to fail.
+		 */
+		if (sk->sk_wmem_queued + size >= sk->sk_sndbuf)
+			return 1;
+	}
+
+	/* Alas. Undo changes. */
+	sk->sk_forward_alloc -= amt * SK_MEM_QUANTUM;
+	atomic_sub(amt, prot->memory_allocated);
+	return 0;
+}
+
+EXPORT_SYMBOL(__sk_mem_schedule);
+
+/**
+ *	__sk_reclaim - reclaim memory_allocated
+ *	@sk: socket
+ */
+void __sk_mem_reclaim(struct sock *sk)
+{
+	struct proto *prot = sk->sk_prot;
+
+	atomic_sub(sk->sk_forward_alloc / SK_MEM_QUANTUM,
+		   prot->memory_allocated);
+	sk->sk_forward_alloc &= SK_MEM_QUANTUM - 1;
+
+	if (prot->memory_pressure && *prot->memory_pressure &&
+	    (atomic_read(prot->memory_allocated) < prot->sysctl_mem[0]))
+		*prot->memory_pressure = 0;
+}
+
+EXPORT_SYMBOL(__sk_mem_reclaim);
+
+
 /*
  * Set of default routines for initialising struct proto_ops when
  * the protocol does not support a particular function. In certain
diff --git a/net/core/stream.c b/net/core/stream.c
index bf188ffdbdb..4a0ad152c9c 100644
--- a/net/core/stream.c
+++ b/net/core/stream.c
@@ -172,17 +172,6 @@ do_interrupted:
 
 EXPORT_SYMBOL(sk_stream_wait_memory);
 
-void sk_stream_rfree(struct sk_buff *skb)
-{
-	struct sock *sk = skb->sk;
-
-	skb_truesize_check(skb);
-	atomic_sub(skb->truesize, &sk->sk_rmem_alloc);
-	sk->sk_forward_alloc += skb->truesize;
-}
-
-EXPORT_SYMBOL(sk_stream_rfree);
-
 int sk_stream_error(struct sock *sk, int flags, int err)
 {
 	if (err == -EPIPE)
@@ -194,77 +183,6 @@ int sk_stream_error(struct sock *sk, int flags, int err)
 
 EXPORT_SYMBOL(sk_stream_error);
 
-void __sk_stream_mem_reclaim(struct sock *sk)
-{
-	atomic_sub(sk->sk_forward_alloc >> SK_STREAM_MEM_QUANTUM_SHIFT,
-		   sk->sk_prot->memory_allocated);
-	sk->sk_forward_alloc &= SK_STREAM_MEM_QUANTUM - 1;
-	if (*sk->sk_prot->memory_pressure &&
-	    (atomic_read(sk->sk_prot->memory_allocated) <
-	     sk->sk_prot->sysctl_mem[0]))
-		*sk->sk_prot->memory_pressure = 0;
-}
-
-EXPORT_SYMBOL(__sk_stream_mem_reclaim);
-
-int sk_stream_mem_schedule(struct sock *sk, int size, int kind)
-{
-	int amt = sk_stream_pages(size);
-	struct proto *prot = sk->sk_prot;
-
-	sk->sk_forward_alloc += amt * SK_STREAM_MEM_QUANTUM;
-	atomic_add(amt, prot->memory_allocated);
-
-	/* Under limit. */
-	if (atomic_read(prot->memory_allocated) < prot->sysctl_mem[0]) {
-		if (*prot->memory_pressure)
-			*prot->memory_pressure = 0;
-		return 1;
-	}
-
-	/* Over hard limit. */
-	if (atomic_read(prot->memory_allocated) > prot->sysctl_mem[2]) {
-		prot->enter_memory_pressure();
-		goto suppress_allocation;
-	}
-
-	/* Under pressure. */
-	if (atomic_read(prot->memory_allocated) > prot->sysctl_mem[1])
-		prot->enter_memory_pressure();
-
-	if (kind) {
-		if (atomic_read(&sk->sk_rmem_alloc) < prot->sysctl_rmem[0])
-			return 1;
-	} else if (sk->sk_wmem_queued < prot->sysctl_wmem[0])
-		return 1;
-
-	if (!*prot->memory_pressure ||
-	    prot->sysctl_mem[2] > atomic_read(prot->sockets_allocated) *
-				sk_stream_pages(sk->sk_wmem_queued +
-						atomic_read(&sk->sk_rmem_alloc) +
-						sk->sk_forward_alloc))
-		return 1;
-
-suppress_allocation:
-
-	if (!kind) {
-		sk_stream_moderate_sndbuf(sk);
-
-		/* Fail only if socket is _under_ its sndbuf.
-		 * In this case we cannot block, so that we have to fail.
-		 */
-		if (sk->sk_wmem_queued + size >= sk->sk_sndbuf)
-			return 1;
-	}
-
-	/* Alas. Undo changes. */
-	sk->sk_forward_alloc -= amt * SK_STREAM_MEM_QUANTUM;
-	atomic_sub(amt, prot->memory_allocated);
-	return 0;
-}
-
-EXPORT_SYMBOL(sk_stream_mem_schedule);
-
 void sk_stream_kill_queues(struct sock *sk)
 {
 	/* First the read buffer. */
@@ -277,7 +195,7 @@ void sk_stream_kill_queues(struct sock *sk)
 	BUG_TRAP(skb_queue_empty(&sk->sk_write_queue));
 
 	/* Account for returned memory. */
-	sk_stream_mem_reclaim(sk);
+	sk_mem_reclaim(sk);
 
 	BUG_TRAP(!sk->sk_wmem_queued);
 	BUG_TRAP(!sk->sk_forward_alloc);
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index fdaf965a679..2cbfa6df797 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -308,7 +308,7 @@ struct tcp_splice_state {
 /*
  * Pressure flag: try to collapse.
  * Technical note: it is used by multiple contexts non atomically.
- * All the sk_stream_mem_schedule() is of this nature: accounting
+ * All the __sk_mem_schedule() is of this nature: accounting
  * is strict, actions are advisory and have some latency.
  */
 int tcp_memory_pressure __read_mostly;
@@ -485,7 +485,8 @@ static inline void skb_entail(struct sock *sk, struct sk_buff *skb)
 	tcb->sacked  = 0;
 	skb_header_release(skb);
 	tcp_add_write_queue_tail(sk, skb);
-	sk_charge_skb(sk, skb);
+	sk->sk_wmem_queued += skb->truesize;
+	sk_mem_charge(sk, skb->truesize);
 	if (tp->nonagle & TCP_NAGLE_PUSH)
 		tp->nonagle &= ~TCP_NAGLE_PUSH;
 }
@@ -638,7 +639,7 @@ struct sk_buff *sk_stream_alloc_skb(struct sock *sk, int size, gfp_t gfp)
 
 	skb = alloc_skb_fclone(size + sk->sk_prot->max_header, gfp);
 	if (skb) {
-		if (sk_stream_wmem_schedule(sk, skb->truesize)) {
+		if (sk_wmem_schedule(sk, skb->truesize)) {
 			/*
 			 * Make sure that we have exactly size bytes
 			 * available to the caller, no more, no less.
@@ -707,7 +708,7 @@ new_segment:
 			tcp_mark_push(tp, skb);
 			goto new_segment;
 		}
-		if (!sk_stream_wmem_schedule(sk, copy))
+		if (!sk_wmem_schedule(sk, copy))
 			goto wait_for_memory;
 
 		if (can_coalesce) {
@@ -721,7 +722,7 @@ new_segment:
 		skb->data_len += copy;
 		skb->truesize += copy;
 		sk->sk_wmem_queued += copy;
-		sk->sk_forward_alloc -= copy;
+		sk_mem_charge(sk, copy);
 		skb->ip_summed = CHECKSUM_PARTIAL;
 		tp->write_seq += copy;
 		TCP_SKB_CB(skb)->end_seq += copy;
@@ -928,7 +929,7 @@ new_segment:
 				if (copy > PAGE_SIZE - off)
 					copy = PAGE_SIZE - off;
 
-				if (!sk_stream_wmem_schedule(sk, copy))
+				if (!sk_wmem_schedule(sk, copy))
 					goto wait_for_memory;
 
 				if (!page) {
@@ -1019,7 +1020,7 @@ do_fault:
 		 * reset, where we can be unlinking the send_head.
 		 */
 		tcp_check_send_head(sk, skb);
-		sk_stream_free_skb(sk, skb);
+		sk_wmem_free_skb(sk, skb);
 	}
 
 do_error:
@@ -1738,7 +1739,7 @@ void tcp_close(struct sock *sk, long timeout)
 		__kfree_skb(skb);
 	}
 
-	sk_stream_mem_reclaim(sk);
+	sk_mem_reclaim(sk);
 
 	/* As outlined in RFC 2525, section 2.17, we send a RST here because
 	 * data was lost. To witness the awful effects of the old behavior of
@@ -1841,7 +1842,7 @@ adjudge_to_death:
 		}
 	}
 	if (sk->sk_state != TCP_CLOSE) {
-		sk_stream_mem_reclaim(sk);
+		sk_mem_reclaim(sk);
 		if (tcp_too_many_orphans(sk,
 				atomic_read(sk->sk_prot->orphan_count))) {
 			if (net_ratelimit())
@@ -2658,11 +2659,11 @@ void __init tcp_init(void)
 	limit = ((unsigned long)sysctl_tcp_mem[1]) << (PAGE_SHIFT - 7);
 	max_share = min(4UL*1024*1024, limit);
 
-	sysctl_tcp_wmem[0] = SK_STREAM_MEM_QUANTUM;
+	sysctl_tcp_wmem[0] = SK_MEM_QUANTUM;
 	sysctl_tcp_wmem[1] = 16*1024;
 	sysctl_tcp_wmem[2] = max(64*1024, max_share);
 
-	sysctl_tcp_rmem[0] = SK_STREAM_MEM_QUANTUM;
+	sysctl_tcp_rmem[0] = SK_MEM_QUANTUM;
 	sysctl_tcp_rmem[1] = 87380;
 	sysctl_tcp_rmem[2] = max(87380, max_share);
 
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index efea9873208..722c9cbb91e 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -591,7 +591,7 @@ static void tcp_event_data_recv(struct sock *sk, struct sk_buff *skb)
 			 * restart window, so that we send ACKs quickly.
 			 */
 			tcp_incr_quickack(sk);
-			sk_stream_mem_reclaim(sk);
+			sk_mem_reclaim(sk);
 		}
 	}
 	icsk->icsk_ack.lrcvtime = now;
@@ -2851,7 +2851,7 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets)
 			break;
 
 		tcp_unlink_write_queue(skb, sk);
-		sk_stream_free_skb(sk, skb);
+		sk_wmem_free_skb(sk, skb);
 		tcp_clear_all_retrans_hints(tp);
 	}
 
@@ -3567,7 +3567,7 @@ static void tcp_fin(struct sk_buff *skb, struct sock *sk, struct tcphdr *th)
 	__skb_queue_purge(&tp->out_of_order_queue);
 	if (tcp_is_sack(tp))
 		tcp_sack_reset(&tp->rx_opt);
-	sk_stream_mem_reclaim(sk);
+	sk_mem_reclaim(sk);
 
 	if (!sock_flag(sk, SOCK_DEAD)) {
 		sk->sk_state_change(sk);
@@ -3850,12 +3850,12 @@ static void tcp_data_queue(struct sock *sk, struct sk_buff *skb)
 queue_and_out:
 			if (eaten < 0 &&
 			    (atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf ||
-			     !sk_stream_rmem_schedule(sk, skb))) {
+			     !sk_rmem_schedule(sk, skb->truesize))) {
 				if (tcp_prune_queue(sk) < 0 ||
-				    !sk_stream_rmem_schedule(sk, skb))
+				    !sk_rmem_schedule(sk, skb->truesize))
 					goto drop;
 			}
-			sk_stream_set_owner_r(skb, sk);
+			skb_set_owner_r(skb, sk);
 			__skb_queue_tail(&sk->sk_receive_queue, skb);
 		}
 		tp->rcv_nxt = TCP_SKB_CB(skb)->end_seq;
@@ -3924,9 +3924,9 @@ drop:
 	TCP_ECN_check_ce(tp, skb);
 
 	if (atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf ||
-	    !sk_stream_rmem_schedule(sk, skb)) {
+	    !sk_rmem_schedule(sk, skb->truesize)) {
 		if (tcp_prune_queue(sk) < 0 ||
-		    !sk_stream_rmem_schedule(sk, skb))
+		    !sk_rmem_schedule(sk, skb->truesize))
 			goto drop;
 	}
 
@@ -3937,7 +3937,7 @@ drop:
 	SOCK_DEBUG(sk, "out of order segment: rcv_next %X seq %X - %X\n",
 		   tp->rcv_nxt, TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq);
 
-	sk_stream_set_owner_r(skb, sk);
+	skb_set_owner_r(skb, sk);
 
 	if (!skb_peek(&tp->out_of_order_queue)) {
 		/* Initial out of order segment, build 1 SACK. */
@@ -4079,7 +4079,7 @@ tcp_collapse(struct sock *sk, struct sk_buff_head *list,
 		memcpy(nskb->cb, skb->cb, sizeof(skb->cb));
 		TCP_SKB_CB(nskb)->seq = TCP_SKB_CB(nskb)->end_seq = start;
 		__skb_insert(nskb, skb->prev, skb, list);
-		sk_stream_set_owner_r(nskb, sk);
+		skb_set_owner_r(nskb, sk);
 
 		/* Copy data, releasing collapsed skbs. */
 		while (copy > 0) {
@@ -4177,7 +4177,7 @@ static int tcp_prune_queue(struct sock *sk)
 		     sk->sk_receive_queue.next,
 		     (struct sk_buff*)&sk->sk_receive_queue,
 		     tp->copied_seq, tp->rcv_nxt);
-	sk_stream_mem_reclaim(sk);
+	sk_mem_reclaim(sk);
 
 	if (atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf)
 		return 0;
@@ -4197,7 +4197,7 @@ static int tcp_prune_queue(struct sock *sk)
 		 */
 		if (tcp_is_sack(tp))
 			tcp_sack_reset(&tp->rx_opt);
-		sk_stream_mem_reclaim(sk);
+		sk_mem_reclaim(sk);
 	}
 
 	if (atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf)
@@ -4699,7 +4699,7 @@ int tcp_rcv_established(struct sock *sk, struct sk_buff *skb,
 				/* Bulk data transfer: receiver */
 				__skb_pull(skb,tcp_header_len);
 				__skb_queue_tail(&sk->sk_receive_queue, skb);
-				sk_stream_set_owner_r(skb, sk);
+				skb_set_owner_r(skb, sk);
 				tp->rcv_nxt = TCP_SKB_CB(skb)->end_seq;
 			}
 
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 9058e0a2510..7a4834a2ae8 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -637,7 +637,8 @@ static void tcp_queue_skb(struct sock *sk, struct sk_buff *skb)
 	tp->write_seq = TCP_SKB_CB(skb)->end_seq;
 	skb_header_release(skb);
 	tcp_add_write_queue_tail(sk, skb);
-	sk_charge_skb(sk, skb);
+	sk->sk_wmem_queued += skb->truesize;
+	sk_mem_charge(sk, skb->truesize);
 }
 
 static void tcp_set_skb_tso_segs(struct sock *sk, struct sk_buff *skb, unsigned int mss_now)
@@ -701,7 +702,8 @@ int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len, unsigned int mss
 	if (buff == NULL)
 		return -ENOMEM; /* We'll just try again later. */
 
-	sk_charge_skb(sk, buff);
+	sk->sk_wmem_queued += buff->truesize;
+	sk_mem_charge(sk, buff->truesize);
 	nlen = skb->len - len - nsize;
 	buff->truesize += nlen;
 	skb->truesize -= nlen;
@@ -825,7 +827,7 @@ int tcp_trim_head(struct sock *sk, struct sk_buff *skb, u32 len)
 
 	skb->truesize	     -= len;
 	sk->sk_wmem_queued   -= len;
-	sk->sk_forward_alloc += len;
+	sk_mem_uncharge(sk, len);
 	sock_set_flag(sk, SOCK_QUEUE_SHRUNK);
 
 	/* Any change of skb->len requires recalculation of tso
@@ -1197,7 +1199,8 @@ static int tso_fragment(struct sock *sk, struct sk_buff *skb, unsigned int len,
 	if (unlikely(buff == NULL))
 		return -ENOMEM;
 
-	sk_charge_skb(sk, buff);
+	sk->sk_wmem_queued += buff->truesize;
+	sk_mem_charge(sk, buff->truesize);
 	buff->truesize += nlen;
 	skb->truesize -= nlen;
 
@@ -1350,7 +1353,8 @@ static int tcp_mtu_probe(struct sock *sk)
 	/* We're allowed to probe.  Build it now. */
 	if ((nskb = sk_stream_alloc_skb(sk, probe_size, GFP_ATOMIC)) == NULL)
 		return -1;
-	sk_charge_skb(sk, nskb);
+	sk->sk_wmem_queued += nskb->truesize;
+	sk_mem_charge(sk, nskb->truesize);
 
 	skb = tcp_send_head(sk);
 
@@ -1377,7 +1381,7 @@ static int tcp_mtu_probe(struct sock *sk)
 			 * Throw it away. */
 			TCP_SKB_CB(nskb)->flags |= TCP_SKB_CB(skb)->flags;
 			tcp_unlink_write_queue(skb, sk);
-			sk_stream_free_skb(sk, skb);
+			sk_wmem_free_skb(sk, skb);
 		} else {
 			TCP_SKB_CB(nskb)->flags |= TCP_SKB_CB(skb)->flags &
 						   ~(TCPCB_FLAG_FIN|TCPCB_FLAG_PSH);
@@ -1744,7 +1748,7 @@ static void tcp_retrans_try_collapse(struct sock *sk, struct sk_buff *skb, int m
 		/* changed transmit queue under us so clear hints */
 		tcp_clear_retrans_hints_partial(tp);
 
-		sk_stream_free_skb(sk, next_skb);
+		sk_wmem_free_skb(sk, next_skb);
 	}
 }
 
@@ -2139,8 +2143,9 @@ int tcp_send_synack(struct sock *sk)
 			tcp_unlink_write_queue(skb, sk);
 			skb_header_release(nskb);
 			__tcp_add_write_queue_head(sk, nskb);
-			sk_stream_free_skb(sk, skb);
-			sk_charge_skb(sk, nskb);
+			sk_wmem_free_skb(sk, skb);
+			sk->sk_wmem_queued += nskb->truesize;
+			sk_mem_charge(sk, nskb->truesize);
 			skb = nskb;
 		}
 
@@ -2343,7 +2348,8 @@ int tcp_connect(struct sock *sk)
 	tp->retrans_stamp = TCP_SKB_CB(buff)->when;
 	skb_header_release(buff);
 	__tcp_add_write_queue_tail(sk, buff);
-	sk_charge_skb(sk, buff);
+	sk->sk_wmem_queued += buff->truesize;
+	sk_mem_charge(sk, buff->truesize);
 	tp->packets_out += tcp_skb_pcount(buff);
 	tcp_transmit_skb(sk, buff, 1, GFP_KERNEL);
 
diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c
index ea85bc00c61..17931be6d58 100644
--- a/net/ipv4/tcp_timer.c
+++ b/net/ipv4/tcp_timer.c
@@ -186,7 +186,7 @@ static void tcp_delack_timer(unsigned long data)
 		goto out_unlock;
 	}
 
-	sk_stream_mem_reclaim(sk);
+	sk_mem_reclaim(sk);
 
 	if (sk->sk_state == TCP_CLOSE || !(icsk->icsk_ack.pending & ICSK_ACK_TIMER))
 		goto out;
@@ -226,7 +226,7 @@ static void tcp_delack_timer(unsigned long data)
 
 out:
 	if (tcp_memory_pressure)
-		sk_stream_mem_reclaim(sk);
+		sk_mem_reclaim(sk);
 out_unlock:
 	bh_unlock_sock(sk);
 	sock_put(sk);
@@ -420,7 +420,7 @@ static void tcp_write_timer(unsigned long data)
 	TCP_CHECK_TIMER(sk);
 
 out:
-	sk_stream_mem_reclaim(sk);
+	sk_mem_reclaim(sk);
 out_unlock:
 	bh_unlock_sock(sk);
 	sock_put(sk);
@@ -514,7 +514,7 @@ static void tcp_keepalive_timer (unsigned long data)
 	}
 
 	TCP_CHECK_TIMER(sk);
-	sk_stream_mem_reclaim(sk);
+	sk_mem_reclaim(sk);
 
 resched:
 	inet_csk_reset_keepalive_timer (sk, elapsed);
diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c
index e466e00b9a9..b9219649502 100644
--- a/net/sctp/protocol.c
+++ b/net/sctp/protocol.c
@@ -1109,7 +1109,7 @@ SCTP_STATIC __init int sctp_init(void)
 	sysctl_sctp_rmem[1] = (1500 *(sizeof(struct sk_buff) + 1));
 	sysctl_sctp_rmem[2] = max(sysctl_sctp_rmem[1], max_share);
 
-	sysctl_sctp_wmem[0] = SK_STREAM_MEM_QUANTUM;
+	sysctl_sctp_wmem[0] = SK_MEM_QUANTUM;
 	sysctl_sctp_wmem[1] = 16*1024;
 	sysctl_sctp_wmem[2] = max(64*1024, max_share);
 
diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c
index 511d8c9a171..b1267519183 100644
--- a/net/sctp/sm_statefuns.c
+++ b/net/sctp/sm_statefuns.c
@@ -5844,7 +5844,7 @@ static int sctp_eat_data(const struct sctp_association *asoc,
 	/*
 	 * Also try to renege to limit our memory usage in the event that
 	 * we are under memory pressure
-	 * If we can't renege, don't worry about it, the sk_stream_rmem_schedule
+	 * If we can't renege, don't worry about it, the sk_rmem_schedule
 	 * in sctp_ulpevent_make_rcvmsg will drop the frame if we grow our
 	 * memory usage too much
 	 */
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index 7a8650f01d0..710df67a678 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -174,7 +174,8 @@ static inline void sctp_set_owner_w(struct sctp_chunk *chunk)
 				sizeof(struct sctp_chunk);
 
 	atomic_add(sizeof(struct sctp_chunk), &sk->sk_wmem_alloc);
-	sk_charge_skb(sk, chunk->skb);
+	sk->sk_wmem_queued += chunk->skb->truesize;
+	sk_mem_charge(sk, chunk->skb->truesize);
 }
 
 /* Verify that this is a valid address. */
@@ -6035,10 +6036,10 @@ static void sctp_wfree(struct sk_buff *skb)
 	atomic_sub(sizeof(struct sctp_chunk), &sk->sk_wmem_alloc);
 
 	/*
-	 * This undoes what is done via sk_charge_skb
+	 * This undoes what is done via sctp_set_owner_w and sk_mem_charge
 	 */
 	sk->sk_wmem_queued   -= skb->truesize;
-	sk->sk_forward_alloc += skb->truesize;
+	sk_mem_uncharge(sk, skb->truesize);
 
 	sock_wfree(skb);
 	__sctp_write_space(asoc);
@@ -6059,9 +6060,9 @@ void sctp_sock_rfree(struct sk_buff *skb)
 	atomic_sub(event->rmem_len, &sk->sk_rmem_alloc);
 
 	/*
-	 * Mimic the behavior of sk_stream_rfree
+	 * Mimic the behavior of sock_rfree
 	 */
-	sk->sk_forward_alloc += event->rmem_len;
+	sk_mem_uncharge(sk, event->rmem_len);
 }
 
 
diff --git a/net/sctp/ulpevent.c b/net/sctp/ulpevent.c
index 307314356e1..047c27df98f 100644
--- a/net/sctp/ulpevent.c
+++ b/net/sctp/ulpevent.c
@@ -700,7 +700,7 @@ struct sctp_ulpevent *sctp_ulpevent_make_rcvmsg(struct sctp_association *asoc,
 	if (rx_count >= asoc->base.sk->sk_rcvbuf) {
 
 		if ((asoc->base.sk->sk_userlocks & SOCK_RCVBUF_LOCK) ||
-		   (!sk_stream_rmem_schedule(asoc->base.sk, chunk->skb)))
+		    (!sk_rmem_schedule(asoc->base.sk, chunk->skb->truesize)))
 			goto fail;
 	}
 
diff --git a/net/sctp/ulpqueue.c b/net/sctp/ulpqueue.c
index 1733fa29a50..c25caefa3bc 100644
--- a/net/sctp/ulpqueue.c
+++ b/net/sctp/ulpqueue.c
@@ -1046,7 +1046,7 @@ void sctp_ulpq_renege(struct sctp_ulpq *ulpq, struct sctp_chunk *chunk,
 		sctp_ulpq_partial_delivery(ulpq, chunk, gfp);
 	}
 
-	sk_stream_mem_reclaim(asoc->base.sk);
+	sk_mem_reclaim(asoc->base.sk);
 	return;
 }
 
-- 
cgit v1.2.3


From 95766fff6b9a78d11fc2d3812dd035381690b55d Mon Sep 17 00:00:00 2001
From: Hideo Aoki <haoki@redhat.com>
Date: Mon, 31 Dec 2007 00:29:24 -0800
Subject: [UDP]: Add memory accounting.

Signed-off-by: Takahiro Yasui <tyasui@redhat.com>
Signed-off-by: Hideo Aoki <haoki@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/af_inet.c         |  5 ++++
 net/ipv4/proc.c            |  3 ++-
 net/ipv4/sysctl_net_ipv4.c | 31 +++++++++++++++++++++++++
 net/ipv4/udp.c             | 57 ++++++++++++++++++++++++++++++++++++++++++++--
 net/ipv6/udp.c             | 32 ++++++++++++++++++++++----
 5 files changed, 121 insertions(+), 7 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index 03633b7b9b4..0e12cf64607 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -139,6 +139,8 @@ void inet_sock_destruct(struct sock *sk)
 	__skb_queue_purge(&sk->sk_receive_queue);
 	__skb_queue_purge(&sk->sk_error_queue);
 
+	sk_mem_reclaim(sk);
+
 	if (sk->sk_type == SOCK_STREAM && sk->sk_state != TCP_CLOSE) {
 		printk("Attempt to release TCP socket in state %d %p\n",
 		       sk->sk_state, sk);
@@ -1417,6 +1419,9 @@ static int __init inet_init(void)
 	/* Setup TCP slab cache for open requests. */
 	tcp_init();
 
+	/* Setup UDP memory threshold */
+	udp_init();
+
 	/* Add UDP-Lite (RFC 3828) */
 	udplite4_register();
 
diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c
index 41734db677b..53bc010beef 100644
--- a/net/ipv4/proc.c
+++ b/net/ipv4/proc.c
@@ -56,7 +56,8 @@ static int sockstat_seq_show(struct seq_file *seq, void *v)
 		   sock_prot_inuse(&tcp_prot), atomic_read(&tcp_orphan_count),
 		   tcp_death_row.tw_count, atomic_read(&tcp_sockets_allocated),
 		   atomic_read(&tcp_memory_allocated));
-	seq_printf(seq, "UDP: inuse %d\n", sock_prot_inuse(&udp_prot));
+	seq_printf(seq, "UDP: inuse %d mem %d\n", sock_prot_inuse(&udp_prot),
+		   atomic_read(&udp_memory_allocated));
 	seq_printf(seq, "UDPLITE: inuse %d\n", sock_prot_inuse(&udplite_prot));
 	seq_printf(seq, "RAW: inuse %d\n", sock_prot_inuse(&raw_prot));
 	seq_printf(seq,  "FRAG: inuse %d memory %d\n",
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index 844f26fab06..a5a9f8e3bb2 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -19,6 +19,7 @@
 #include <net/ip.h>
 #include <net/route.h>
 #include <net/tcp.h>
+#include <net/udp.h>
 #include <net/cipso_ipv4.h>
 #include <net/inet_frag.h>
 
@@ -812,6 +813,36 @@ static struct ctl_table ipv4_table[] = {
 		.mode		= 0644,
 		.proc_handler	= &proc_dointvec,
 	},
+	{
+		.ctl_name	= CTL_UNNUMBERED,
+		.procname	= "udp_mem",
+		.data		= &sysctl_udp_mem,
+		.maxlen		= sizeof(sysctl_udp_mem),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec_minmax,
+		.strategy	= &sysctl_intvec,
+		.extra1		= &zero
+	},
+	{
+		.ctl_name	= CTL_UNNUMBERED,
+		.procname	= "udp_rmem_min",
+		.data		= &sysctl_udp_rmem_min,
+		.maxlen		= sizeof(sysctl_udp_rmem_min),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec_minmax,
+		.strategy	= &sysctl_intvec,
+		.extra1		= &zero
+	},
+	{
+		.ctl_name	= CTL_UNNUMBERED,
+		.procname	= "udp_wmem_min",
+		.data		= &sysctl_udp_wmem_min,
+		.maxlen		= sizeof(sysctl_udp_wmem_min),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec_minmax,
+		.strategy	= &sysctl_intvec,
+		.extra1		= &zero
+	},
 	{ .ctl_name = 0 }
 };
 
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 1ce6b60b7f9..35328436075 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -82,6 +82,7 @@
 #include <asm/system.h>
 #include <asm/uaccess.h>
 #include <asm/ioctls.h>
+#include <linux/bootmem.h>
 #include <linux/types.h>
 #include <linux/fcntl.h>
 #include <linux/module.h>
@@ -118,6 +119,17 @@ EXPORT_SYMBOL(udp_stats_in6);
 struct hlist_head udp_hash[UDP_HTABLE_SIZE];
 DEFINE_RWLOCK(udp_hash_lock);
 
+int sysctl_udp_mem[3] __read_mostly;
+int sysctl_udp_rmem_min __read_mostly;
+int sysctl_udp_wmem_min __read_mostly;
+
+EXPORT_SYMBOL(sysctl_udp_mem);
+EXPORT_SYMBOL(sysctl_udp_rmem_min);
+EXPORT_SYMBOL(sysctl_udp_wmem_min);
+
+atomic_t udp_memory_allocated;
+EXPORT_SYMBOL(udp_memory_allocated);
+
 static inline int __udp_lib_lport_inuse(__u16 num,
 					const struct hlist_head udptable[])
 {
@@ -901,13 +913,17 @@ try_again:
 		err = ulen;
 
 out_free:
+	lock_sock(sk);
 	skb_free_datagram(sk, skb);
+	release_sock(sk);
 out:
 	return err;
 
 csum_copy_err:
+	lock_sock(sk);
 	if (!skb_kill_datagram(sk, skb, flags))
 		UDP_INC_STATS_USER(UDP_MIB_INERRORS, is_udplite);
+	release_sock(sk);
 
 	if (noblock)
 		return -EAGAIN;
@@ -1072,7 +1088,15 @@ static int __udp4_lib_mcast_deliver(struct sk_buff *skb,
 				skb1 = skb_clone(skb, GFP_ATOMIC);
 
 			if (skb1) {
-				int ret = udp_queue_rcv_skb(sk, skb1);
+				int ret = 0;
+
+				bh_lock_sock_nested(sk);
+				if (!sock_owned_by_user(sk))
+					ret = udp_queue_rcv_skb(sk, skb1);
+				else
+					sk_add_backlog(sk, skb1);
+				bh_unlock_sock(sk);
+
 				if (ret > 0)
 					/* we should probably re-process instead
 					 * of dropping packets here. */
@@ -1165,7 +1189,13 @@ int __udp4_lib_rcv(struct sk_buff *skb, struct hlist_head udptable[],
 			       inet_iif(skb), udptable);
 
 	if (sk != NULL) {
-		int ret = udp_queue_rcv_skb(sk, skb);
+		int ret = 0;
+		bh_lock_sock_nested(sk);
+		if (!sock_owned_by_user(sk))
+			ret = udp_queue_rcv_skb(sk, skb);
+		else
+			sk_add_backlog(sk, skb);
+		bh_unlock_sock(sk);
 		sock_put(sk);
 
 		/* a return value > 0 means to resubmit the input, but
@@ -1460,6 +1490,10 @@ struct proto udp_prot = {
 	.hash		   = udp_lib_hash,
 	.unhash		   = udp_lib_unhash,
 	.get_port	   = udp_v4_get_port,
+	.memory_allocated  = &udp_memory_allocated,
+	.sysctl_mem	   = sysctl_udp_mem,
+	.sysctl_wmem	   = &sysctl_udp_wmem_min,
+	.sysctl_rmem	   = &sysctl_udp_rmem_min,
 	.obj_size	   = sizeof(struct udp_sock),
 #ifdef CONFIG_COMPAT
 	.compat_setsockopt = compat_udp_setsockopt,
@@ -1655,6 +1689,25 @@ void udp4_proc_exit(void)
 }
 #endif /* CONFIG_PROC_FS */
 
+void __init udp_init(void)
+{
+	unsigned long limit;
+
+	/* Set the pressure threshold up by the same strategy of TCP. It is a
+	 * fraction of global memory that is up to 1/2 at 256 MB, decreasing
+	 * toward zero with the amount of memory, with a floor of 128 pages.
+	 */
+	limit = min(nr_all_pages, 1UL<<(28-PAGE_SHIFT)) >> (20-PAGE_SHIFT);
+	limit = (limit * (nr_all_pages >> (20-PAGE_SHIFT))) >> (PAGE_SHIFT-11);
+	limit = max(limit, 128UL);
+	sysctl_udp_mem[0] = limit / 4 * 3;
+	sysctl_udp_mem[1] = limit;
+	sysctl_udp_mem[2] = sysctl_udp_mem[0] * 2;
+
+	sysctl_udp_rmem_min = SK_MEM_QUANTUM;
+	sysctl_udp_wmem_min = SK_MEM_QUANTUM;
+}
+
 EXPORT_SYMBOL(udp_disconnect);
 EXPORT_SYMBOL(udp_hash);
 EXPORT_SYMBOL(udp_hash_lock);
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index c9a97b40551..bf58acab206 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -204,13 +204,17 @@ try_again:
 		err = ulen;
 
 out_free:
+	lock_sock(sk);
 	skb_free_datagram(sk, skb);
+	release_sock(sk);
 out:
 	return err;
 
 csum_copy_err:
+	lock_sock(sk);
 	if (!skb_kill_datagram(sk, skb, flags))
 		UDP6_INC_STATS_USER(UDP_MIB_INERRORS, is_udplite);
+	release_sock(sk);
 
 	if (flags & MSG_DONTWAIT)
 		return -EAGAIN;
@@ -366,10 +370,21 @@ static int __udp6_lib_mcast_deliver(struct sk_buff *skb, struct in6_addr *saddr,
 	while ((sk2 = udp_v6_mcast_next(sk_next(sk2), uh->dest, daddr,
 					uh->source, saddr, dif))) {
 		struct sk_buff *buff = skb_clone(skb, GFP_ATOMIC);
-		if (buff)
-			udpv6_queue_rcv_skb(sk2, buff);
+		if (buff) {
+			bh_lock_sock_nested(sk2);
+			if (!sock_owned_by_user(sk2))
+				udpv6_queue_rcv_skb(sk2, buff);
+			else
+				sk_add_backlog(sk2, buff);
+			bh_unlock_sock(sk2);
+		}
 	}
-	udpv6_queue_rcv_skb(sk, skb);
+	bh_lock_sock_nested(sk);
+	if (!sock_owned_by_user(sk))
+		udpv6_queue_rcv_skb(sk, skb);
+	else
+		sk_add_backlog(sk, skb);
+	bh_unlock_sock(sk);
 out:
 	read_unlock(&udp_hash_lock);
 	return 0;
@@ -482,7 +497,12 @@ int __udp6_lib_rcv(struct sk_buff *skb, struct hlist_head udptable[],
 
 	/* deliver */
 
-	udpv6_queue_rcv_skb(sk, skb);
+	bh_lock_sock_nested(sk);
+	if (!sock_owned_by_user(sk))
+		udpv6_queue_rcv_skb(sk, skb);
+	else
+		sk_add_backlog(sk, skb);
+	bh_unlock_sock(sk);
 	sock_put(sk);
 	return 0;
 
@@ -994,6 +1014,10 @@ struct proto udpv6_prot = {
 	.hash		   = udp_lib_hash,
 	.unhash		   = udp_lib_unhash,
 	.get_port	   = udp_v6_get_port,
+	.memory_allocated  = &udp_memory_allocated,
+	.sysctl_mem	   = sysctl_udp_mem,
+	.sysctl_wmem	   = &sysctl_udp_wmem_min,
+	.sysctl_rmem	   = &sysctl_udp_rmem_min,
 	.obj_size	   = sizeof(struct udp6_sock),
 #ifdef CONFIG_COMPAT
 	.compat_setsockopt = compat_udpv6_setsockopt,
-- 
cgit v1.2.3


From 3ccd3130b3f681a4aef6392327256786b3b6aa04 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= <ilpo.jarvinen@helsinki.fi>
Date: Mon, 31 Dec 2007 04:43:32 -0800
Subject: [TCP]: Make invariant check complain about invalid sacked_out
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Earlier resolution for NewReno's sacked_out should now keep
it small enough for this to become invariant-like check.

Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@helsinki.fi>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp_input.c | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 722c9cbb91e..41f4b862319 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -2504,11 +2504,8 @@ tcp_fastretrans_alert(struct sock *sk, int pkts_acked, int flag)
 				    (tcp_fackets_out(tp) > tp->reordering));
 	int fast_rexmit = 0;
 
-	/* Some technical things:
-	 * 1. Reno does not count dupacks (sacked_out) automatically. */
-	if (!tp->packets_out)
+	if (WARN_ON(!tp->packets_out && tp->sacked_out))
 		tp->sacked_out = 0;
-
 	if (WARN_ON(!tp->sacked_out && tp->fackets_out))
 		tp->fackets_out = 0;
 
-- 
cgit v1.2.3


From 66f5fe624fa5f1d4574d2dd2bc0c72a17a92079c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= <ilpo.jarvinen@helsinki.fi>
Date: Mon, 31 Dec 2007 04:43:57 -0800
Subject: [TCP]: Rename update_send_head & include related increment to it
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

There's very little need to have the packets_out incrementing in
a separate function. Also name the combined function
appropriately.

Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@helsinki.fi>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp_output.c | 32 ++++++++++++--------------------
 1 file changed, 12 insertions(+), 20 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 7a4834a2ae8..1ca638b8316 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -61,29 +61,22 @@ int sysctl_tcp_base_mss __read_mostly = 512;
 /* By default, RFC2861 behavior.  */
 int sysctl_tcp_slow_start_after_idle __read_mostly = 1;
 
-static inline void tcp_packets_out_inc(struct sock *sk,
-				       const struct sk_buff *skb)
-{
-	struct tcp_sock *tp = tcp_sk(sk);
-	int orig = tp->packets_out;
-
-	tp->packets_out += tcp_skb_pcount(skb);
-	if (!orig)
-		inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
-					  inet_csk(sk)->icsk_rto, TCP_RTO_MAX);
-}
-
-static void update_send_head(struct sock *sk, struct sk_buff *skb)
+static void tcp_event_new_data_sent(struct sock *sk, struct sk_buff *skb)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
+	unsigned int prior_packets = tp->packets_out;
 
 	tcp_advance_send_head(sk, skb);
 	tp->snd_nxt = TCP_SKB_CB(skb)->end_seq;
-	tcp_packets_out_inc(sk, skb);
 
 	/* Don't override Nagle indefinately with F-RTO */
 	if (tp->frto_counter == 2)
 		tp->frto_counter = 3;
+
+	tp->packets_out += tcp_skb_pcount(skb);
+	if (!prior_packets)
+		inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
+					  inet_csk(sk)->icsk_rto, TCP_RTO_MAX);
 }
 
 /* SND.NXT, if window was not shrunk.
@@ -1410,7 +1403,7 @@ static int tcp_mtu_probe(struct sock *sk)
 		/* Decrement cwnd here because we are sending
 		* effectively two packets. */
 		tp->snd_cwnd--;
-		update_send_head(sk, nskb);
+		tcp_event_new_data_sent(sk, nskb);
 
 		icsk->icsk_mtup.probe_size = tcp_mss_to_mtu(sk, nskb->len);
 		tp->mtu_probe.probe_seq_start = TCP_SKB_CB(nskb)->seq;
@@ -1494,7 +1487,7 @@ static int tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle)
 		/* Advance the send_head.  This one is sent out.
 		 * This call will increment packets_out.
 		 */
-		update_send_head(sk, skb);
+		tcp_event_new_data_sent(sk, skb);
 
 		tcp_minshall_update(tp, mss_now, skb);
 		sent_pkts++;
@@ -1553,7 +1546,7 @@ void tcp_push_one(struct sock *sk, unsigned int mss_now)
 		TCP_SKB_CB(skb)->when = tcp_time_stamp;
 
 		if (likely(!tcp_transmit_skb(sk, skb, 1, sk->sk_allocation))) {
-			update_send_head(sk, skb);
+			tcp_event_new_data_sent(sk, skb);
 			tcp_cwnd_validate(sk);
 			return;
 		}
@@ -2528,9 +2521,8 @@ int tcp_write_wakeup(struct sock *sk)
 			TCP_SKB_CB(skb)->flags |= TCPCB_FLAG_PSH;
 			TCP_SKB_CB(skb)->when = tcp_time_stamp;
 			err = tcp_transmit_skb(sk, skb, 1, GFP_ATOMIC);
-			if (!err) {
-				update_send_head(sk, skb);
-			}
+			if (!err)
+				tcp_event_new_data_sent(sk, skb);
 			return err;
 		} else {
 			if (tp->urg_mode &&
-- 
cgit v1.2.3


From 90840defabbd819180c7528e12d550776848f833 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= <ilpo.jarvinen@helsinki.fi>
Date: Mon, 31 Dec 2007 04:48:41 -0800
Subject: [TCP]: Introduce tcp_wnd_end() to reduce line lengths
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@helsinki.fi>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp_input.c  |  3 +--
 net/ipv4/tcp_output.c | 20 ++++++++++----------
 2 files changed, 11 insertions(+), 12 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 41f4b862319..366f63ae4d5 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -2921,8 +2921,7 @@ static void tcp_ack_probe(struct sock *sk)
 
 	/* Was it a usable window open? */
 
-	if (!after(TCP_SKB_CB(tcp_send_head(sk))->end_seq,
-		   tp->snd_una + tp->snd_wnd)) {
+	if (!after(TCP_SKB_CB(tcp_send_head(sk))->end_seq, tcp_wnd_end(tp))) {
 		icsk->icsk_backoff = 0;
 		inet_csk_clear_xmit_timer(sk, ICSK_TIME_PROBE0);
 		/* Socket must be waked up by subsequent tcp_data_snd_check().
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 1ca638b8316..821fae27142 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -89,10 +89,10 @@ static inline __u32 tcp_acceptable_seq(struct sock *sk)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
 
-	if (!before(tp->snd_una+tp->snd_wnd, tp->snd_nxt))
+	if (!before(tcp_wnd_end(tp), tp->snd_nxt))
 		return tp->snd_nxt;
 	else
-		return tp->snd_una+tp->snd_wnd;
+		return tcp_wnd_end(tp);
 }
 
 /* Calculate mss to advertise in SYN segment.
@@ -1024,7 +1024,7 @@ static unsigned int tcp_mss_split_point(struct sock *sk, struct sk_buff *skb,
 	struct tcp_sock *tp = tcp_sk(sk);
 	u32 needed, window, cwnd_len;
 
-	window = (tp->snd_una + tp->snd_wnd - TCP_SKB_CB(skb)->seq);
+	window = tcp_wnd_end(tp) - TCP_SKB_CB(skb)->seq;
 	cwnd_len = mss_now * cwnd;
 
 	if (likely(cwnd_len <= window && skb != tcp_write_queue_tail(sk)))
@@ -1134,7 +1134,7 @@ static inline int tcp_snd_wnd_test(struct tcp_sock *tp, struct sk_buff *skb, uns
 	if (skb->len > cur_mss)
 		end_seq = TCP_SKB_CB(skb)->seq + cur_mss;
 
-	return !after(end_seq, tp->snd_una + tp->snd_wnd);
+	return !after(end_seq, tcp_wnd_end(tp));
 }
 
 /* This checks if the data bearing packet SKB (usually tcp_send_head(sk))
@@ -1251,7 +1251,7 @@ static int tcp_tso_should_defer(struct sock *sk, struct sk_buff *skb)
 	BUG_ON(tcp_skb_pcount(skb) <= 1 ||
 	       (tp->snd_cwnd <= in_flight));
 
-	send_win = (tp->snd_una + tp->snd_wnd) - TCP_SKB_CB(skb)->seq;
+	send_win = tcp_wnd_end(tp) - TCP_SKB_CB(skb)->seq;
 
 	/* From in_flight test above, we know that cwnd > in_flight.  */
 	cong_win = (tp->snd_cwnd - in_flight) * tp->mss_cache;
@@ -1332,7 +1332,7 @@ static int tcp_mtu_probe(struct sock *sk)
 
 	if (tp->snd_wnd < size_needed)
 		return -1;
-	if (after(tp->snd_nxt + size_needed, tp->snd_una + tp->snd_wnd))
+	if (after(tp->snd_nxt + size_needed, tcp_wnd_end(tp)))
 		return 0;
 
 	/* Do we need to wait to drain cwnd? With none in flight, don't stall */
@@ -1687,7 +1687,7 @@ static void tcp_retrans_try_collapse(struct sock *sk, struct sk_buff *skb, int m
 			return;
 
 		/* Next skb is out of window. */
-		if (after(TCP_SKB_CB(next_skb)->end_seq, tp->snd_una+tp->snd_wnd))
+		if (after(TCP_SKB_CB(next_skb)->end_seq, tcp_wnd_end(tp)))
 			return;
 
 		/* Punt if not enough space exists in the first SKB for
@@ -1831,7 +1831,7 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb)
 	 * case, when window is shrunk to zero. In this case
 	 * our retransmit serves as a zero window probe.
 	 */
-	if (!before(TCP_SKB_CB(skb)->seq, tp->snd_una+tp->snd_wnd)
+	if (!before(TCP_SKB_CB(skb)->seq, tcp_wnd_end(tp))
 	    && TCP_SKB_CB(skb)->seq != tp->snd_una)
 		return -EAGAIN;
 
@@ -2497,10 +2497,10 @@ int tcp_write_wakeup(struct sock *sk)
 		struct sk_buff *skb;
 
 		if ((skb = tcp_send_head(sk)) != NULL &&
-		    before(TCP_SKB_CB(skb)->seq, tp->snd_una+tp->snd_wnd)) {
+		    before(TCP_SKB_CB(skb)->seq, tcp_wnd_end(tp))) {
 			int err;
 			unsigned int mss = tcp_current_mss(sk, 0);
-			unsigned int seg_size = tp->snd_una+tp->snd_wnd-TCP_SKB_CB(skb)->seq;
+			unsigned int seg_size = tcp_wnd_end(tp) - TCP_SKB_CB(skb)->seq;
 
 			if (before(tp->pushed_seq, TCP_SKB_CB(skb)->end_seq))
 				tp->pushed_seq = TCP_SKB_CB(skb)->end_seq;
-- 
cgit v1.2.3


From cadbd0313bc897f5917d013174cdf9077edf4aa5 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= <ilpo.jarvinen@helsinki.fi>
Date: Mon, 31 Dec 2007 04:49:21 -0800
Subject: [TCP]: Dropped unnecessary skb/sacked accessing in reneging
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

SACK reneging can be precalculated to a FLAG in clean_rtx_queue
which has the right skb looked up. This will help a bit in
future because skb->sacked access will be changed eventually,
changing it already won't hurt any.

Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@helsinki.fi>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp_input.c | 25 +++++++++++++------------
 1 file changed, 13 insertions(+), 12 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 366f63ae4d5..7bac1fac065 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -105,6 +105,7 @@ int sysctl_tcp_abc __read_mostly;
 #define FLAG_SND_UNA_ADVANCED	0x400 /* Snd_una was changed (!= FLAG_DATA_ACKED) */
 #define FLAG_DSACKING_ACK	0x800 /* SACK blocks contained D-SACK info */
 #define FLAG_NONHEAD_RETRANS_ACKED	0x1000 /* Non-head rexmitted data was ACKed */
+#define FLAG_SACK_RENEGING	0x2000 /* snd_una advanced to a sacked seq */
 
 #define FLAG_ACKED		(FLAG_DATA_ACKED|FLAG_SYN_ACKED)
 #define FLAG_NOT_DUP		(FLAG_DATA|FLAG_WIN_UPDATE|FLAG_ACKED)
@@ -1918,18 +1919,15 @@ void tcp_enter_loss(struct sock *sk, int how)
 	tp->frto_counter = 0;
 }
 
-static int tcp_check_sack_reneging(struct sock *sk)
+/* If ACK arrived pointing to a remembered SACK, it means that our
+ * remembered SACKs do not reflect real state of receiver i.e.
+ * receiver _host_ is heavily congested (or buggy).
+ *
+ * Do processing similar to RTO timeout.
+ */
+static int tcp_check_sack_reneging(struct sock *sk, int flag)
 {
-	struct sk_buff *skb;
-
-	/* If ACK arrived pointing to a remembered SACK,
-	 * it means that our remembered SACKs do not reflect
-	 * real state of receiver i.e.
-	 * receiver _host_ is heavily congested (or buggy).
-	 * Do processing similar to RTO timeout.
-	 */
-	if ((skb = tcp_write_queue_head(sk)) != NULL &&
-	    (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED)) {
+	if (flag & FLAG_SACK_RENEGING) {
 		struct inet_connection_sock *icsk = inet_csk(sk);
 		NET_INC_STATS_BH(LINUX_MIB_TCPSACKRENEGING);
 
@@ -2515,7 +2513,7 @@ tcp_fastretrans_alert(struct sock *sk, int pkts_acked, int flag)
 		tp->prior_ssthresh = 0;
 
 	/* B. In all the states check for reneging SACKs. */
-	if (tp->sacked_out && tcp_check_sack_reneging(sk))
+	if (tcp_check_sack_reneging(sk, flag))
 		return;
 
 	/* C. Process data loss notification, provided it is valid. */
@@ -2852,6 +2850,9 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets)
 		tcp_clear_all_retrans_hints(tp);
 	}
 
+	if (skb && (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED))
+		flag |= FLAG_SACK_RENEGING;
+
 	if (flag & FLAG_ACKED) {
 		const struct tcp_congestion_ops *ca_ops
 			= inet_csk(sk)->icsk_ca_ops;
-- 
cgit v1.2.3


From 4828e7f49a402930e8b3e72de695c8d37e0f98ee Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= <ilpo.jarvinen@helsinki.fi>
Date: Mon, 31 Dec 2007 04:50:19 -0800
Subject: [TCP]: Remove TCPCB_URG & TCPCB_AT_TAIL as unnecessary
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The snd_up check should be enough. I suspect this has been
there to provide a minor optimization in clean_rtx_queue which
used to have a small if (!->sacked) block which could skip
snd_up check among the other work.

Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@helsinki.fi>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp.c        | 1 -
 net/ipv4/tcp_input.c  | 3 +--
 net/ipv4/tcp_output.c | 7 +++----
 3 files changed, 4 insertions(+), 7 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 2cbfa6df797..34085e3a409 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -497,7 +497,6 @@ static inline void tcp_mark_urg(struct tcp_sock *tp, int flags,
 	if (flags & MSG_OOB) {
 		tp->urg_mode = 1;
 		tp->snd_up = tp->write_seq;
-		TCP_SKB_CB(skb)->sacked |= TCPCB_URG;
 	}
 }
 
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 7bac1fac065..1e7fd811366 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -2821,8 +2821,7 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets)
 		if (sacked & TCPCB_LOST)
 			tp->lost_out -= acked_pcount;
 
-		if (unlikely((sacked & TCPCB_URG) && tp->urg_mode &&
-			     !before(end_seq, tp->snd_up)))
+		if (unlikely(tp->urg_mode && !before(end_seq, tp->snd_up)))
 			tp->urg_mode = 0;
 
 		tp->packets_out -= acked_pcount;
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 821fae27142..cd21528665f 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -711,7 +711,6 @@ int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len, unsigned int mss
 	TCP_SKB_CB(skb)->flags = flags & ~(TCPCB_FLAG_FIN|TCPCB_FLAG_PSH);
 	TCP_SKB_CB(buff)->flags = flags;
 	TCP_SKB_CB(buff)->sacked = TCP_SKB_CB(skb)->sacked;
-	TCP_SKB_CB(skb)->sacked &= ~TCPCB_AT_TAIL;
 
 	if (!skb_shinfo(skb)->nr_frags && skb->ip_summed != CHECKSUM_PARTIAL) {
 		/* Copy and checksum data tail into the new buffer. */
@@ -1726,7 +1725,7 @@ static void tcp_retrans_try_collapse(struct sock *sk, struct sk_buff *skb, int m
 		/* All done, get rid of second SKB and account for it so
 		 * packet counting does not break.
 		 */
-		TCP_SKB_CB(skb)->sacked |= TCP_SKB_CB(next_skb)->sacked&(TCPCB_EVER_RETRANS|TCPCB_AT_TAIL);
+		TCP_SKB_CB(skb)->sacked |= TCP_SKB_CB(next_skb)->sacked & TCPCB_EVER_RETRANS;
 		if (TCP_SKB_CB(next_skb)->sacked&TCPCB_SACKED_RETRANS)
 			tp->retrans_out -= tcp_skb_pcount(next_skb);
 		if (TCP_SKB_CB(next_skb)->sacked&TCPCB_LOST)
@@ -2475,7 +2474,7 @@ static int tcp_xmit_probe_skb(struct sock *sk, int urgent)
 	skb_reserve(skb, MAX_TCP_HEADER);
 	skb->csum = 0;
 	TCP_SKB_CB(skb)->flags = TCPCB_FLAG_ACK;
-	TCP_SKB_CB(skb)->sacked = urgent;
+	TCP_SKB_CB(skb)->sacked = 0;
 	skb_shinfo(skb)->gso_segs = 1;
 	skb_shinfo(skb)->gso_size = 0;
 	skb_shinfo(skb)->gso_type = 0;
@@ -2527,7 +2526,7 @@ int tcp_write_wakeup(struct sock *sk)
 		} else {
 			if (tp->urg_mode &&
 			    between(tp->snd_up, tp->snd_una+1, tp->snd_una+0xFFFF))
-				tcp_xmit_probe_skb(sk, TCPCB_URG);
+				tcp_xmit_probe_skb(sk, 1);
 			return tcp_xmit_probe_skb(sk, 0);
 		}
 	}
-- 
cgit v1.2.3


From 058dc3342b71ffb3531c4f9df7c35f943f392b8d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= <ilpo.jarvinen@helsinki.fi>
Date: Mon, 31 Dec 2007 04:51:11 -0800
Subject: [TCP]: reduce tcp_output's indentation levels a bit
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@helsinki.fi>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp_output.c | 239 +++++++++++++++++++++++++-------------------------
 1 file changed, 121 insertions(+), 118 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index cd21528665f..454cf84b615 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -1673,75 +1673,77 @@ static void tcp_retrans_try_collapse(struct sock *sk, struct sk_buff *skb, int m
 {
 	struct tcp_sock *tp = tcp_sk(sk);
 	struct sk_buff *next_skb = tcp_write_queue_next(sk, skb);
+	int skb_size, next_skb_size;
+	u16 flags;
 
 	/* The first test we must make is that neither of these two
 	 * SKB's are still referenced by someone else.
 	 */
-	if (!skb_cloned(skb) && !skb_cloned(next_skb)) {
-		int skb_size = skb->len, next_skb_size = next_skb->len;
-		u16 flags = TCP_SKB_CB(skb)->flags;
+	if (skb_cloned(skb) || skb_cloned(next_skb))
+		return;
 
-		/* Also punt if next skb has been SACK'd. */
-		if (TCP_SKB_CB(next_skb)->sacked & TCPCB_SACKED_ACKED)
-			return;
+	skb_size = skb->len;
+	next_skb_size = next_skb->len;
+	flags = TCP_SKB_CB(skb)->flags;
 
-		/* Next skb is out of window. */
-		if (after(TCP_SKB_CB(next_skb)->end_seq, tcp_wnd_end(tp)))
-			return;
+	/* Also punt if next skb has been SACK'd. */
+	if (TCP_SKB_CB(next_skb)->sacked & TCPCB_SACKED_ACKED)
+		return;
 
-		/* Punt if not enough space exists in the first SKB for
-		 * the data in the second, or the total combined payload
-		 * would exceed the MSS.
-		 */
-		if ((next_skb_size > skb_tailroom(skb)) ||
-		    ((skb_size + next_skb_size) > mss_now))
-			return;
+	/* Next skb is out of window. */
+	if (after(TCP_SKB_CB(next_skb)->end_seq, tcp_wnd_end(tp)))
+		return;
 
-		BUG_ON(tcp_skb_pcount(skb) != 1 ||
-		       tcp_skb_pcount(next_skb) != 1);
+	/* Punt if not enough space exists in the first SKB for
+	 * the data in the second, or the total combined payload
+	 * would exceed the MSS.
+	 */
+	if ((next_skb_size > skb_tailroom(skb)) ||
+	    ((skb_size + next_skb_size) > mss_now))
+		return;
 
-		tcp_highest_sack_combine(sk, next_skb, skb);
+	BUG_ON(tcp_skb_pcount(skb) != 1 || tcp_skb_pcount(next_skb) != 1);
 
-		/* Ok.	We will be able to collapse the packet. */
-		tcp_unlink_write_queue(next_skb, sk);
+	tcp_highest_sack_combine(sk, next_skb, skb);
 
-		skb_copy_from_linear_data(next_skb,
-					  skb_put(skb, next_skb_size),
-					  next_skb_size);
+	/* Ok.	We will be able to collapse the packet. */
+	tcp_unlink_write_queue(next_skb, sk);
 
-		if (next_skb->ip_summed == CHECKSUM_PARTIAL)
-			skb->ip_summed = CHECKSUM_PARTIAL;
+	skb_copy_from_linear_data(next_skb, skb_put(skb, next_skb_size),
+				  next_skb_size);
 
-		if (skb->ip_summed != CHECKSUM_PARTIAL)
-			skb->csum = csum_block_add(skb->csum, next_skb->csum, skb_size);
+	if (next_skb->ip_summed == CHECKSUM_PARTIAL)
+		skb->ip_summed = CHECKSUM_PARTIAL;
 
-		/* Update sequence range on original skb. */
-		TCP_SKB_CB(skb)->end_seq = TCP_SKB_CB(next_skb)->end_seq;
+	if (skb->ip_summed != CHECKSUM_PARTIAL)
+		skb->csum = csum_block_add(skb->csum, next_skb->csum, skb_size);
 
-		/* Merge over control information. */
-		flags |= TCP_SKB_CB(next_skb)->flags; /* This moves PSH/FIN etc. over */
-		TCP_SKB_CB(skb)->flags = flags;
+	/* Update sequence range on original skb. */
+	TCP_SKB_CB(skb)->end_seq = TCP_SKB_CB(next_skb)->end_seq;
 
-		/* All done, get rid of second SKB and account for it so
-		 * packet counting does not break.
-		 */
-		TCP_SKB_CB(skb)->sacked |= TCP_SKB_CB(next_skb)->sacked & TCPCB_EVER_RETRANS;
-		if (TCP_SKB_CB(next_skb)->sacked&TCPCB_SACKED_RETRANS)
-			tp->retrans_out -= tcp_skb_pcount(next_skb);
-		if (TCP_SKB_CB(next_skb)->sacked&TCPCB_LOST)
-			tp->lost_out -= tcp_skb_pcount(next_skb);
-		/* Reno case is special. Sigh... */
-		if (tcp_is_reno(tp) && tp->sacked_out)
-			tcp_dec_pcount_approx(&tp->sacked_out, next_skb);
-
-		tcp_adjust_fackets_out(sk, next_skb, tcp_skb_pcount(next_skb));
-		tp->packets_out -= tcp_skb_pcount(next_skb);
-
-		/* changed transmit queue under us so clear hints */
-		tcp_clear_retrans_hints_partial(tp);
-
-		sk_wmem_free_skb(sk, next_skb);
-	}
+	/* Merge over control information. */
+	flags |= TCP_SKB_CB(next_skb)->flags; /* This moves PSH/FIN etc. over */
+	TCP_SKB_CB(skb)->flags = flags;
+
+	/* All done, get rid of second SKB and account for it so
+	 * packet counting does not break.
+	 */
+	TCP_SKB_CB(skb)->sacked |= TCP_SKB_CB(next_skb)->sacked & TCPCB_EVER_RETRANS;
+	if (TCP_SKB_CB(next_skb)->sacked & TCPCB_SACKED_RETRANS)
+		tp->retrans_out -= tcp_skb_pcount(next_skb);
+	if (TCP_SKB_CB(next_skb)->sacked & TCPCB_LOST)
+		tp->lost_out -= tcp_skb_pcount(next_skb);
+	/* Reno case is special. Sigh... */
+	if (tcp_is_reno(tp) && tp->sacked_out)
+		tcp_dec_pcount_approx(&tp->sacked_out, next_skb);
+
+	tcp_adjust_fackets_out(sk, next_skb, tcp_skb_pcount(next_skb));
+	tp->packets_out -= tcp_skb_pcount(next_skb);
+
+	/* changed transmit queue under us so clear hints */
+	tcp_clear_retrans_hints_partial(tp);
+
+	sk_wmem_free_skb(sk, next_skb);
 }
 
 /* Do a simple retransmit without using the backoff mechanisms in
@@ -2416,37 +2418,38 @@ void tcp_send_delayed_ack(struct sock *sk)
 /* This routine sends an ack and also updates the window. */
 void tcp_send_ack(struct sock *sk)
 {
-	/* If we have been reset, we may not send again. */
-	if (sk->sk_state != TCP_CLOSE) {
-		struct sk_buff *buff;
+	struct sk_buff *buff;
 
-		/* We are not putting this on the write queue, so
-		 * tcp_transmit_skb() will set the ownership to this
-		 * sock.
-		 */
-		buff = alloc_skb(MAX_TCP_HEADER, GFP_ATOMIC);
-		if (buff == NULL) {
-			inet_csk_schedule_ack(sk);
-			inet_csk(sk)->icsk_ack.ato = TCP_ATO_MIN;
-			inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK,
-						  TCP_DELACK_MAX, TCP_RTO_MAX);
-			return;
-		}
+	/* If we have been reset, we may not send again. */
+	if (sk->sk_state == TCP_CLOSE)
+		return;
 
-		/* Reserve space for headers and prepare control bits. */
-		skb_reserve(buff, MAX_TCP_HEADER);
-		buff->csum = 0;
-		TCP_SKB_CB(buff)->flags = TCPCB_FLAG_ACK;
-		TCP_SKB_CB(buff)->sacked = 0;
-		skb_shinfo(buff)->gso_segs = 1;
-		skb_shinfo(buff)->gso_size = 0;
-		skb_shinfo(buff)->gso_type = 0;
-
-		/* Send it off, this clears delayed acks for us. */
-		TCP_SKB_CB(buff)->seq = TCP_SKB_CB(buff)->end_seq = tcp_acceptable_seq(sk);
-		TCP_SKB_CB(buff)->when = tcp_time_stamp;
-		tcp_transmit_skb(sk, buff, 0, GFP_ATOMIC);
+	/* We are not putting this on the write queue, so
+	 * tcp_transmit_skb() will set the ownership to this
+	 * sock.
+	 */
+	buff = alloc_skb(MAX_TCP_HEADER, GFP_ATOMIC);
+	if (buff == NULL) {
+		inet_csk_schedule_ack(sk);
+		inet_csk(sk)->icsk_ack.ato = TCP_ATO_MIN;
+		inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK,
+					  TCP_DELACK_MAX, TCP_RTO_MAX);
+		return;
 	}
+
+	/* Reserve space for headers and prepare control bits. */
+	skb_reserve(buff, MAX_TCP_HEADER);
+	buff->csum = 0;
+	TCP_SKB_CB(buff)->flags = TCPCB_FLAG_ACK;
+	TCP_SKB_CB(buff)->sacked = 0;
+	skb_shinfo(buff)->gso_segs = 1;
+	skb_shinfo(buff)->gso_size = 0;
+	skb_shinfo(buff)->gso_type = 0;
+
+	/* Send it off, this clears delayed acks for us. */
+	TCP_SKB_CB(buff)->seq = TCP_SKB_CB(buff)->end_seq = tcp_acceptable_seq(sk);
+	TCP_SKB_CB(buff)->when = tcp_time_stamp;
+	tcp_transmit_skb(sk, buff, 0, GFP_ATOMIC);
 }
 
 /* This routine sends a packet with an out of date sequence
@@ -2491,46 +2494,46 @@ static int tcp_xmit_probe_skb(struct sock *sk, int urgent)
 
 int tcp_write_wakeup(struct sock *sk)
 {
-	if (sk->sk_state != TCP_CLOSE) {
-		struct tcp_sock *tp = tcp_sk(sk);
-		struct sk_buff *skb;
-
-		if ((skb = tcp_send_head(sk)) != NULL &&
-		    before(TCP_SKB_CB(skb)->seq, tcp_wnd_end(tp))) {
-			int err;
-			unsigned int mss = tcp_current_mss(sk, 0);
-			unsigned int seg_size = tcp_wnd_end(tp) - TCP_SKB_CB(skb)->seq;
-
-			if (before(tp->pushed_seq, TCP_SKB_CB(skb)->end_seq))
-				tp->pushed_seq = TCP_SKB_CB(skb)->end_seq;
-
-			/* We are probing the opening of a window
-			 * but the window size is != 0
-			 * must have been a result SWS avoidance ( sender )
-			 */
-			if (seg_size < TCP_SKB_CB(skb)->end_seq - TCP_SKB_CB(skb)->seq ||
-			    skb->len > mss) {
-				seg_size = min(seg_size, mss);
-				TCP_SKB_CB(skb)->flags |= TCPCB_FLAG_PSH;
-				if (tcp_fragment(sk, skb, seg_size, mss))
-					return -1;
-			} else if (!tcp_skb_pcount(skb))
-				tcp_set_skb_tso_segs(sk, skb, mss);
+	struct tcp_sock *tp = tcp_sk(sk);
+	struct sk_buff *skb;
 
+	if (sk->sk_state == TCP_CLOSE)
+		return -1;
+
+	if ((skb = tcp_send_head(sk)) != NULL &&
+	    before(TCP_SKB_CB(skb)->seq, tcp_wnd_end(tp))) {
+		int err;
+		unsigned int mss = tcp_current_mss(sk, 0);
+		unsigned int seg_size = tcp_wnd_end(tp) - TCP_SKB_CB(skb)->seq;
+
+		if (before(tp->pushed_seq, TCP_SKB_CB(skb)->end_seq))
+			tp->pushed_seq = TCP_SKB_CB(skb)->end_seq;
+
+		/* We are probing the opening of a window
+		 * but the window size is != 0
+		 * must have been a result SWS avoidance ( sender )
+		 */
+		if (seg_size < TCP_SKB_CB(skb)->end_seq - TCP_SKB_CB(skb)->seq ||
+		    skb->len > mss) {
+			seg_size = min(seg_size, mss);
 			TCP_SKB_CB(skb)->flags |= TCPCB_FLAG_PSH;
-			TCP_SKB_CB(skb)->when = tcp_time_stamp;
-			err = tcp_transmit_skb(sk, skb, 1, GFP_ATOMIC);
-			if (!err)
-				tcp_event_new_data_sent(sk, skb);
-			return err;
-		} else {
-			if (tp->urg_mode &&
-			    between(tp->snd_up, tp->snd_una+1, tp->snd_una+0xFFFF))
-				tcp_xmit_probe_skb(sk, 1);
-			return tcp_xmit_probe_skb(sk, 0);
-		}
+			if (tcp_fragment(sk, skb, seg_size, mss))
+				return -1;
+		} else if (!tcp_skb_pcount(skb))
+			tcp_set_skb_tso_segs(sk, skb, mss);
+
+		TCP_SKB_CB(skb)->flags |= TCPCB_FLAG_PSH;
+		TCP_SKB_CB(skb)->when = tcp_time_stamp;
+		err = tcp_transmit_skb(sk, skb, 1, GFP_ATOMIC);
+		if (!err)
+			tcp_event_new_data_sent(sk, skb);
+		return err;
+	} else {
+		if (tp->urg_mode &&
+		    between(tp->snd_up, tp->snd_una + 1, tp->snd_una + 0xFFFF))
+			tcp_xmit_probe_skb(sk, 1);
+		return tcp_xmit_probe_skb(sk, 0);
 	}
-	return -1;
 }
 
 /* A window probe timeout has occurred.  If window is not closed send
-- 
cgit v1.2.3


From 056834d9f6f6eaf4cc7268569e53acab957aac27 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= <ilpo.jarvinen@helsinki.fi>
Date: Mon, 31 Dec 2007 14:57:14 -0800
Subject: [TCP]: cleanup tcp_{in,out}put.c style
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

These were manually selected from indent's results which as is
are too noisy to be of any use without human reason. In addition,
some extra newlines between function and its comment were removed
too.

Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@helsinki.fi>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp_input.c  | 436 ++++++++++++++++++++++++++------------------------
 net/ipv4/tcp_output.c | 146 ++++++++---------
 2 files changed, 300 insertions(+), 282 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 1e7fd811366..18e099c6fa6 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -121,8 +121,7 @@ int sysctl_tcp_abc __read_mostly;
 /* Adapt the MSS value used to make delayed ack decision to the
  * real world.
  */
-static void tcp_measure_rcv_mss(struct sock *sk,
-				const struct sk_buff *skb)
+static void tcp_measure_rcv_mss(struct sock *sk, const struct sk_buff *skb)
 {
 	struct inet_connection_sock *icsk = inet_csk(sk);
 	const unsigned int lss = icsk->icsk_ack.last_seg_size;
@@ -133,7 +132,7 @@ static void tcp_measure_rcv_mss(struct sock *sk,
 	/* skb->len may jitter because of SACKs, even if peer
 	 * sends good full-sized frames.
 	 */
-	len = skb_shinfo(skb)->gso_size ?: skb->len;
+	len = skb_shinfo(skb)->gso_size ? : skb->len;
 	if (len >= icsk->icsk_ack.rcv_mss) {
 		icsk->icsk_ack.rcv_mss = len;
 	} else {
@@ -173,8 +172,8 @@ static void tcp_incr_quickack(struct sock *sk)
 	struct inet_connection_sock *icsk = inet_csk(sk);
 	unsigned quickacks = tcp_sk(sk)->rcv_wnd / (2 * icsk->icsk_ack.rcv_mss);
 
-	if (quickacks==0)
-		quickacks=2;
+	if (quickacks == 0)
+		quickacks = 2;
 	if (quickacks > icsk->icsk_ack.quick)
 		icsk->icsk_ack.quick = min(quickacks, TCP_MAX_QUICKACKS);
 }
@@ -199,7 +198,7 @@ static inline int tcp_in_quickack_mode(const struct sock *sk)
 
 static inline void TCP_ECN_queue_cwr(struct tcp_sock *tp)
 {
-	if (tp->ecn_flags&TCP_ECN_OK)
+	if (tp->ecn_flags & TCP_ECN_OK)
 		tp->ecn_flags |= TCP_ECN_QUEUE_CWR;
 }
 
@@ -216,7 +215,7 @@ static inline void TCP_ECN_withdraw_cwr(struct tcp_sock *tp)
 
 static inline void TCP_ECN_check_ce(struct tcp_sock *tp, struct sk_buff *skb)
 {
-	if (tp->ecn_flags&TCP_ECN_OK) {
+	if (tp->ecn_flags & TCP_ECN_OK) {
 		if (INET_ECN_is_ce(TCP_SKB_CB(skb)->flags))
 			tp->ecn_flags |= TCP_ECN_DEMAND_CWR;
 		/* Funny extension: if ECT is not set on a segment,
@@ -229,19 +228,19 @@ static inline void TCP_ECN_check_ce(struct tcp_sock *tp, struct sk_buff *skb)
 
 static inline void TCP_ECN_rcv_synack(struct tcp_sock *tp, struct tcphdr *th)
 {
-	if ((tp->ecn_flags&TCP_ECN_OK) && (!th->ece || th->cwr))
+	if ((tp->ecn_flags & TCP_ECN_OK) && (!th->ece || th->cwr))
 		tp->ecn_flags &= ~TCP_ECN_OK;
 }
 
 static inline void TCP_ECN_rcv_syn(struct tcp_sock *tp, struct tcphdr *th)
 {
-	if ((tp->ecn_flags&TCP_ECN_OK) && (!th->ece || !th->cwr))
+	if ((tp->ecn_flags & TCP_ECN_OK) && (!th->ece || !th->cwr))
 		tp->ecn_flags &= ~TCP_ECN_OK;
 }
 
 static inline int TCP_ECN_rcv_ecn_echo(struct tcp_sock *tp, struct tcphdr *th)
 {
-	if (th->ece && !th->syn && (tp->ecn_flags&TCP_ECN_OK))
+	if (th->ece && !th->syn && (tp->ecn_flags & TCP_ECN_OK))
 		return 1;
 	return 0;
 }
@@ -303,8 +302,7 @@ static int __tcp_grow_window(const struct sock *sk, const struct sk_buff *skb)
 	return 0;
 }
 
-static void tcp_grow_window(struct sock *sk,
-			    struct sk_buff *skb)
+static void tcp_grow_window(struct sock *sk, struct sk_buff *skb)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
 
@@ -318,12 +316,13 @@ static void tcp_grow_window(struct sock *sk,
 		 * will fit to rcvbuf in future.
 		 */
 		if (tcp_win_from_space(skb->truesize) <= skb->len)
-			incr = 2*tp->advmss;
+			incr = 2 * tp->advmss;
 		else
 			incr = __tcp_grow_window(sk, skb);
 
 		if (incr) {
-			tp->rcv_ssthresh = min(tp->rcv_ssthresh + incr, tp->window_clamp);
+			tp->rcv_ssthresh = min(tp->rcv_ssthresh + incr,
+					       tp->window_clamp);
 			inet_csk(sk)->icsk_ack.quick |= 1;
 		}
 	}
@@ -398,10 +397,9 @@ static void tcp_clamp_window(struct sock *sk)
 				    sysctl_tcp_rmem[2]);
 	}
 	if (atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf)
-		tp->rcv_ssthresh = min(tp->window_clamp, 2U*tp->advmss);
+		tp->rcv_ssthresh = min(tp->window_clamp, 2U * tp->advmss);
 }
 
-
 /* Initialize RCV_MSS value.
  * RCV_MSS is an our guess about MSS used by the peer.
  * We haven't any direct information about the MSS.
@@ -414,7 +412,7 @@ void tcp_initialize_rcv_mss(struct sock *sk)
 	struct tcp_sock *tp = tcp_sk(sk);
 	unsigned int hint = min_t(unsigned int, tp->advmss, tp->mss_cache);
 
-	hint = min(hint, tp->rcv_wnd/2);
+	hint = min(hint, tp->rcv_wnd / 2);
 	hint = min(hint, TCP_MIN_RCVMSS);
 	hint = max(hint, TCP_MIN_MSS);
 
@@ -471,16 +469,15 @@ static inline void tcp_rcv_rtt_measure(struct tcp_sock *tp)
 		goto new_measure;
 	if (before(tp->rcv_nxt, tp->rcv_rtt_est.seq))
 		return;
-	tcp_rcv_rtt_update(tp,
-			   jiffies - tp->rcv_rtt_est.time,
-			   1);
+	tcp_rcv_rtt_update(tp, jiffies - tp->rcv_rtt_est.time, 1);
 
 new_measure:
 	tp->rcv_rtt_est.seq = tp->rcv_nxt + tp->rcv_wnd;
 	tp->rcv_rtt_est.time = tcp_time_stamp;
 }
 
-static inline void tcp_rcv_rtt_measure_ts(struct sock *sk, const struct sk_buff *skb)
+static inline void tcp_rcv_rtt_measure_ts(struct sock *sk,
+					  const struct sk_buff *skb)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
 	if (tp->rx_opt.rcv_tsecr &&
@@ -503,8 +500,7 @@ void tcp_rcv_space_adjust(struct sock *sk)
 		goto new_measure;
 
 	time = tcp_time_stamp - tp->rcvq_space.time;
-	if (time < (tp->rcv_rtt_est.rtt >> 3) ||
-	    tp->rcv_rtt_est.rtt == 0)
+	if (time < (tp->rcv_rtt_est.rtt >> 3) || tp->rcv_rtt_est.rtt == 0)
 		return;
 
 	space = 2 * (tp->copied_seq - tp->rcvq_space.seq);
@@ -580,7 +576,7 @@ static void tcp_event_data_recv(struct sock *sk, struct sk_buff *skb)
 	} else {
 		int m = now - icsk->icsk_ack.lrcvtime;
 
-		if (m <= TCP_ATO_MIN/2) {
+		if (m <= TCP_ATO_MIN / 2) {
 			/* The fastest case is the first. */
 			icsk->icsk_ack.ato = (icsk->icsk_ack.ato >> 1) + TCP_ATO_MIN / 2;
 		} else if (m < icsk->icsk_ack.ato) {
@@ -609,7 +605,7 @@ static u32 tcp_rto_min(struct sock *sk)
 	u32 rto_min = TCP_RTO_MIN;
 
 	if (dst && dst_metric_locked(dst, RTAX_RTO_MIN))
-		rto_min = dst->metrics[RTAX_RTO_MIN-1];
+		rto_min = dst->metrics[RTAX_RTO_MIN - 1];
 	return rto_min;
 }
 
@@ -672,14 +668,14 @@ static void tcp_rtt_estimator(struct sock *sk, const __u32 mrtt)
 		}
 		if (after(tp->snd_una, tp->rtt_seq)) {
 			if (tp->mdev_max < tp->rttvar)
-				tp->rttvar -= (tp->rttvar-tp->mdev_max)>>2;
+				tp->rttvar -= (tp->rttvar - tp->mdev_max) >> 2;
 			tp->rtt_seq = tp->snd_nxt;
 			tp->mdev_max = tcp_rto_min(sk);
 		}
 	} else {
 		/* no previous measure. */
-		tp->srtt = m<<3;	/* take the measured time to be rtt */
-		tp->mdev = m<<1;	/* make sure rto = 3*rtt */
+		tp->srtt = m << 3;	/* take the measured time to be rtt */
+		tp->mdev = m << 1;	/* make sure rto = 3*rtt */
 		tp->mdev_max = tp->rttvar = max(tp->mdev, tcp_rto_min(sk));
 		tp->rtt_seq = tp->snd_nxt;
 	}
@@ -733,7 +729,7 @@ void tcp_update_metrics(struct sock *sk)
 
 	dst_confirm(dst);
 
-	if (dst && (dst->flags&DST_HOST)) {
+	if (dst && (dst->flags & DST_HOST)) {
 		const struct inet_connection_sock *icsk = inet_csk(sk);
 		int m;
 
@@ -743,7 +739,7 @@ void tcp_update_metrics(struct sock *sk)
 			 * Reset our results.
 			 */
 			if (!(dst_metric_locked(dst, RTAX_RTT)))
-				dst->metrics[RTAX_RTT-1] = 0;
+				dst->metrics[RTAX_RTT - 1] = 0;
 			return;
 		}
 
@@ -755,9 +751,9 @@ void tcp_update_metrics(struct sock *sk)
 		 */
 		if (!(dst_metric_locked(dst, RTAX_RTT))) {
 			if (m <= 0)
-				dst->metrics[RTAX_RTT-1] = tp->srtt;
+				dst->metrics[RTAX_RTT - 1] = tp->srtt;
 			else
-				dst->metrics[RTAX_RTT-1] -= (m>>3);
+				dst->metrics[RTAX_RTT - 1] -= (m >> 3);
 		}
 
 		if (!(dst_metric_locked(dst, RTAX_RTTVAR))) {
@@ -770,7 +766,7 @@ void tcp_update_metrics(struct sock *sk)
 				m = tp->mdev;
 
 			if (m >= dst_metric(dst, RTAX_RTTVAR))
-				dst->metrics[RTAX_RTTVAR-1] = m;
+				dst->metrics[RTAX_RTTVAR - 1] = m;
 			else
 				dst->metrics[RTAX_RTTVAR-1] -=
 					(dst->metrics[RTAX_RTTVAR-1] - m)>>2;
@@ -784,7 +780,7 @@ void tcp_update_metrics(struct sock *sk)
 				dst->metrics[RTAX_SSTHRESH-1] = tp->snd_cwnd >> 1;
 			if (!dst_metric_locked(dst, RTAX_CWND) &&
 			    tp->snd_cwnd > dst_metric(dst, RTAX_CWND))
-				dst->metrics[RTAX_CWND-1] = tp->snd_cwnd;
+				dst->metrics[RTAX_CWND - 1] = tp->snd_cwnd;
 		} else if (tp->snd_cwnd > tp->snd_ssthresh &&
 			   icsk->icsk_ca_state == TCP_CA_Open) {
 			/* Cong. avoidance phase, cwnd is reliable. */
@@ -1353,12 +1349,14 @@ static struct sk_buff *tcp_sacktag_walk(struct sk_buff *skb, struct sock *sk,
 		}
 
 		if (in_sack <= 0)
-			in_sack = tcp_match_skb_to_sack(sk, skb, start_seq, end_seq);
+			in_sack = tcp_match_skb_to_sack(sk, skb, start_seq,
+							end_seq);
 		if (unlikely(in_sack < 0))
 			break;
 
 		if (in_sack)
-			*flag |= tcp_sacktag_one(skb, sk, reord, dup_sack, *fack_count);
+			*flag |= tcp_sacktag_one(skb, sk, reord, dup_sack,
+						 *fack_count);
 
 		*fack_count += tcp_skb_pcount(skb);
 	}
@@ -1407,7 +1405,8 @@ static int tcp_sack_cache_ok(struct tcp_sock *tp, struct tcp_sack_block *cache)
 }
 
 static int
-tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_una)
+tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb,
+			u32 prior_snd_una)
 {
 	const struct inet_connection_sock *icsk = inet_csk(sk);
 	struct tcp_sock *tp = tcp_sk(sk);
@@ -1417,7 +1416,7 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_
 	struct tcp_sack_block sp[4];
 	struct tcp_sack_block *cache;
 	struct sk_buff *skb;
-	int num_sacks = (ptr[1] - TCPOLEN_SACK_BASE)>>3;
+	int num_sacks = (ptr[1] - TCPOLEN_SACK_BASE) >> 3;
 	int used_sacks;
 	int reord = tp->packets_out;
 	int flag = 0;
@@ -1484,17 +1483,17 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_
 
 	/* order SACK blocks to allow in order walk of the retrans queue */
 	for (i = used_sacks - 1; i > 0; i--) {
-		for (j = 0; j < i; j++){
-			if (after(sp[j].start_seq, sp[j+1].start_seq)) {
+		for (j = 0; j < i; j++) {
+			if (after(sp[j].start_seq, sp[j + 1].start_seq)) {
 				struct tcp_sack_block tmp;
 
 				tmp = sp[j];
-				sp[j] = sp[j+1];
-				sp[j+1] = tmp;
+				sp[j] = sp[j + 1];
+				sp[j + 1] = tmp;
 
 				/* Track where the first SACK block goes to */
 				if (j == first_sack_index)
-					first_sack_index = j+1;
+					first_sack_index = j + 1;
 			}
 		}
 	}
@@ -1539,17 +1538,21 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_
 			/* Head todo? */
 			if (before(start_seq, cache->start_seq)) {
 				skb = tcp_sacktag_skip(skb, sk, start_seq);
-				skb = tcp_sacktag_walk(skb, sk, next_dup, start_seq,
-						       cache->start_seq, dup_sack,
-						       &fack_count, &reord, &flag);
+				skb = tcp_sacktag_walk(skb, sk, next_dup,
+						       start_seq,
+						       cache->start_seq,
+						       dup_sack, &fack_count,
+						       &reord, &flag);
 			}
 
 			/* Rest of the block already fully processed? */
 			if (!after(end_seq, cache->end_seq))
 				goto advance_sp;
 
-			skb = tcp_maybe_skipping_dsack(skb, sk, next_dup, cache->end_seq,
-						       &fack_count, &reord, &flag);
+			skb = tcp_maybe_skipping_dsack(skb, sk, next_dup,
+						       cache->end_seq,
+						       &fack_count, &reord,
+						       &flag);
 
 			/* ...tail remains todo... */
 			if (tcp_highest_sack_seq(tp) == cache->end_seq) {
@@ -1654,10 +1657,10 @@ static void tcp_remove_reno_sacks(struct sock *sk, int acked)
 
 	if (acked > 0) {
 		/* One ACK acked hole. The rest eat duplicate ACKs. */
-		if (acked-1 >= tp->sacked_out)
+		if (acked - 1 >= tp->sacked_out)
 			tp->sacked_out = 0;
 		else
-			tp->sacked_out -= acked-1;
+			tp->sacked_out -= acked - 1;
 	}
 	tcp_check_reno_reordering(sk, acked);
 	tcp_verify_left_out(tp);
@@ -1691,10 +1694,10 @@ int tcp_use_frto(struct sock *sk)
 	tcp_for_write_queue_from(skb, sk) {
 		if (skb == tcp_send_head(sk))
 			break;
-		if (TCP_SKB_CB(skb)->sacked&TCPCB_RETRANS)
+		if (TCP_SKB_CB(skb)->sacked & TCPCB_RETRANS)
 			return 0;
 		/* Short-circuit when first non-SACKed skb has been checked */
-		if (!(TCP_SKB_CB(skb)->sacked&TCPCB_SACKED_ACKED))
+		if (!(TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED))
 			break;
 	}
 	return 1;
@@ -1804,7 +1807,7 @@ static void tcp_enter_frto_loss(struct sock *sk, int allowed_segments, int flag)
 		 * Count the retransmission made on RTO correctly (only when
 		 * waiting for the first ACK and did not get it)...
 		 */
-		if ((tp->frto_counter == 1) && !(flag&FLAG_DATA_ACKED)) {
+		if ((tp->frto_counter == 1) && !(flag & FLAG_DATA_ACKED)) {
 			/* For some reason this R-bit might get cleared? */
 			if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_RETRANS)
 				tp->retrans_out += tcp_skb_pcount(skb);
@@ -1817,7 +1820,7 @@ static void tcp_enter_frto_loss(struct sock *sk, int allowed_segments, int flag)
 		}
 
 		/* Don't lost mark skbs that were fwd transmitted after RTO */
-		if (!(TCP_SKB_CB(skb)->sacked&TCPCB_SACKED_ACKED) &&
+		if (!(TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED) &&
 		    !after(TCP_SKB_CB(skb)->end_seq, tp->frto_highmark)) {
 			TCP_SKB_CB(skb)->sacked |= TCPCB_LOST;
 			tp->lost_out += tcp_skb_pcount(skb);
@@ -1832,7 +1835,7 @@ static void tcp_enter_frto_loss(struct sock *sk, int allowed_segments, int flag)
 	tp->bytes_acked = 0;
 
 	tp->reordering = min_t(unsigned int, tp->reordering,
-					     sysctl_tcp_reordering);
+			       sysctl_tcp_reordering);
 	tcp_set_ca_state(sk, TCP_CA_Loss);
 	tp->high_seq = tp->frto_highmark;
 	TCP_ECN_queue_cwr(tp);
@@ -1899,7 +1902,7 @@ void tcp_enter_loss(struct sock *sk, int how)
 		if (skb == tcp_send_head(sk))
 			break;
 
-		if (TCP_SKB_CB(skb)->sacked&TCPCB_RETRANS)
+		if (TCP_SKB_CB(skb)->sacked & TCPCB_RETRANS)
 			tp->undo_marker = 0;
 		TCP_SKB_CB(skb)->sacked &= (~TCPCB_TAGBITS)|TCPCB_SACKED_ACKED;
 		if (!(TCP_SKB_CB(skb)->sacked&TCPCB_SACKED_ACKED) || how) {
@@ -1911,7 +1914,7 @@ void tcp_enter_loss(struct sock *sk, int how)
 	tcp_verify_left_out(tp);
 
 	tp->reordering = min_t(unsigned int, tp->reordering,
-					     sysctl_tcp_reordering);
+			       sysctl_tcp_reordering);
 	tcp_set_ca_state(sk, TCP_CA_Loss);
 	tp->high_seq = tp->snd_nxt;
 	TCP_ECN_queue_cwr(tp);
@@ -1943,7 +1946,7 @@ static int tcp_check_sack_reneging(struct sock *sk, int flag)
 
 static inline int tcp_fackets_out(struct tcp_sock *tp)
 {
-	return tcp_is_reno(tp) ? tp->sacked_out+1 : tp->fackets_out;
+	return tcp_is_reno(tp) ? tp->sacked_out + 1 : tp->fackets_out;
 }
 
 /* Heurestics to calculate number of duplicate ACKs. There's no dupACKs
@@ -2116,12 +2119,11 @@ static int tcp_time_to_recover(struct sock *sk)
  * retransmitted past LOST markings in the first place? I'm not fully sure
  * about undo and end of connection cases, which can cause R without L?
  */
-static void tcp_verify_retransmit_hint(struct tcp_sock *tp,
-				       struct sk_buff *skb)
+static void tcp_verify_retransmit_hint(struct tcp_sock *tp, struct sk_buff *skb)
 {
 	if ((tp->retransmit_skb_hint != NULL) &&
 	    before(TCP_SKB_CB(skb)->seq,
-	    TCP_SKB_CB(tp->retransmit_skb_hint)->seq))
+		   TCP_SKB_CB(tp->retransmit_skb_hint)->seq))
 		tp->retransmit_skb_hint = NULL;
 }
 
@@ -2156,7 +2158,7 @@ static void tcp_mark_head_lost(struct sock *sk, int packets, int fast_rexmit)
 			cnt += tcp_skb_pcount(skb);
 
 		if (((!fast_rexmit || (tp->lost_out > 0)) && (cnt > packets)) ||
-		     after(TCP_SKB_CB(skb)->end_seq, tp->high_seq))
+		    after(TCP_SKB_CB(skb)->end_seq, tp->high_seq))
 			break;
 		if (!(TCP_SKB_CB(skb)->sacked & (TCPCB_SACKED_ACKED|TCPCB_LOST))) {
 			TCP_SKB_CB(skb)->sacked |= TCPCB_LOST;
@@ -2223,7 +2225,7 @@ static void tcp_update_scoreboard(struct sock *sk, int fast_rexmit)
 static inline void tcp_moderate_cwnd(struct tcp_sock *tp)
 {
 	tp->snd_cwnd = min(tp->snd_cwnd,
-			   tcp_packets_in_flight(tp)+tcp_max_burst(tp));
+			   tcp_packets_in_flight(tp) + tcp_max_burst(tp));
 	tp->snd_cwnd_stamp = tcp_time_stamp;
 }
 
@@ -2243,15 +2245,15 @@ static void tcp_cwnd_down(struct sock *sk, int flag)
 	struct tcp_sock *tp = tcp_sk(sk);
 	int decr = tp->snd_cwnd_cnt + 1;
 
-	if ((flag&(FLAG_ANY_PROGRESS|FLAG_DSACKING_ACK)) ||
-	    (tcp_is_reno(tp) && !(flag&FLAG_NOT_DUP))) {
-		tp->snd_cwnd_cnt = decr&1;
+	if ((flag & (FLAG_ANY_PROGRESS | FLAG_DSACKING_ACK)) ||
+	    (tcp_is_reno(tp) && !(flag & FLAG_NOT_DUP))) {
+		tp->snd_cwnd_cnt = decr & 1;
 		decr >>= 1;
 
 		if (decr && tp->snd_cwnd > tcp_cwnd_min(sk))
 			tp->snd_cwnd -= decr;
 
-		tp->snd_cwnd = min(tp->snd_cwnd, tcp_packets_in_flight(tp)+1);
+		tp->snd_cwnd = min(tp->snd_cwnd, tcp_packets_in_flight(tp) + 1);
 		tp->snd_cwnd_stamp = tcp_time_stamp;
 	}
 }
@@ -2295,7 +2297,7 @@ static void tcp_undo_cwr(struct sock *sk, const int undo)
 		if (icsk->icsk_ca_ops->undo_cwnd)
 			tp->snd_cwnd = icsk->icsk_ca_ops->undo_cwnd(sk);
 		else
-			tp->snd_cwnd = max(tp->snd_cwnd, tp->snd_ssthresh<<1);
+			tp->snd_cwnd = max(tp->snd_cwnd, tp->snd_ssthresh << 1);
 
 		if (undo && tp->prior_ssthresh > tp->snd_ssthresh) {
 			tp->snd_ssthresh = tp->prior_ssthresh;
@@ -2314,8 +2316,7 @@ static void tcp_undo_cwr(struct sock *sk, const int undo)
 
 static inline int tcp_may_undo(struct tcp_sock *tp)
 {
-	return tp->undo_marker &&
-		(!tp->undo_retrans || tcp_packet_delayed(tp));
+	return tp->undo_marker && (!tp->undo_retrans || tcp_packet_delayed(tp));
 }
 
 /* People celebrate: "We love our President!" */
@@ -2434,7 +2435,7 @@ static void tcp_try_to_open(struct sock *sk, int flag)
 	if (tp->retrans_out == 0)
 		tp->retrans_stamp = 0;
 
-	if (flag&FLAG_ECE)
+	if (flag & FLAG_ECE)
 		tcp_enter_cwr(sk, 1);
 
 	if (inet_csk(sk)->icsk_ca_state != TCP_CA_CWR) {
@@ -2480,7 +2481,6 @@ static void tcp_mtup_probe_success(struct sock *sk, struct sk_buff *skb)
 	tcp_sync_mss(sk, icsk->icsk_pmtu_cookie);
 }
 
-
 /* Process an event, which can update packets-in-flight not trivially.
  * Main goal of this function is to calculate new estimate for left_out,
  * taking into account both packets sitting in receiver's buffer and
@@ -2492,13 +2492,12 @@ static void tcp_mtup_probe_success(struct sock *sk, struct sk_buff *skb)
  * It does _not_ decide what to send, it is made in function
  * tcp_xmit_retransmit_queue().
  */
-static void
-tcp_fastretrans_alert(struct sock *sk, int pkts_acked, int flag)
+static void tcp_fastretrans_alert(struct sock *sk, int pkts_acked, int flag)
 {
 	struct inet_connection_sock *icsk = inet_csk(sk);
 	struct tcp_sock *tp = tcp_sk(sk);
-	int is_dupack = !(flag&(FLAG_SND_UNA_ADVANCED|FLAG_NOT_DUP));
-	int do_lost = is_dupack || ((flag&FLAG_DATA_SACKED) &&
+	int is_dupack = !(flag & (FLAG_SND_UNA_ADVANCED | FLAG_NOT_DUP));
+	int do_lost = is_dupack || ((flag & FLAG_DATA_SACKED) &&
 				    (tcp_fackets_out(tp) > tp->reordering));
 	int fast_rexmit = 0;
 
@@ -2509,7 +2508,7 @@ tcp_fastretrans_alert(struct sock *sk, int pkts_acked, int flag)
 
 	/* Now state machine starts.
 	 * A. ECE, hence prohibit cwnd undoing, the reduction is required. */
-	if (flag&FLAG_ECE)
+	if (flag & FLAG_ECE)
 		tp->prior_ssthresh = 0;
 
 	/* B. In all the states check for reneging SACKs. */
@@ -2521,7 +2520,7 @@ tcp_fastretrans_alert(struct sock *sk, int pkts_acked, int flag)
 	    before(tp->snd_una, tp->high_seq) &&
 	    icsk->icsk_ca_state != TCP_CA_Open &&
 	    tp->fackets_out > tp->reordering) {
-		tcp_mark_head_lost(sk, tp->fackets_out-tp->reordering, 0);
+		tcp_mark_head_lost(sk, tp->fackets_out - tp->reordering, 0);
 		NET_INC_STATS_BH(LINUX_MIB_TCPLOSS);
 	}
 
@@ -2581,7 +2580,7 @@ tcp_fastretrans_alert(struct sock *sk, int pkts_acked, int flag)
 			do_lost = tcp_try_undo_partial(sk, pkts_acked);
 		break;
 	case TCP_CA_Loss:
-		if (flag&FLAG_DATA_ACKED)
+		if (flag & FLAG_DATA_ACKED)
 			icsk->icsk_retransmits = 0;
 		if (!tcp_try_undo_loss(sk)) {
 			tcp_moderate_cwnd(tp);
@@ -2631,7 +2630,7 @@ tcp_fastretrans_alert(struct sock *sk, int pkts_acked, int flag)
 		tp->undo_retrans = tp->retrans_out;
 
 		if (icsk->icsk_ca_state < TCP_CA_CWR) {
-			if (!(flag&FLAG_ECE))
+			if (!(flag & FLAG_ECE))
 				tp->prior_ssthresh = tcp_current_ssthresh(sk);
 			tp->snd_ssthresh = icsk->icsk_ca_ops->ssthresh(sk);
 			TCP_ECN_queue_cwr(tp);
@@ -2725,7 +2724,8 @@ static void tcp_rearm_rto(struct sock *sk)
 	if (!tp->packets_out) {
 		inet_csk_clear_xmit_timer(sk, ICSK_TIME_RETRANS);
 	} else {
-		inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, inet_csk(sk)->icsk_rto, TCP_RTO_MAX);
+		inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
+					  inet_csk(sk)->icsk_rto, TCP_RTO_MAX);
 	}
 }
 
@@ -2803,8 +2803,7 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets)
 			flag |= FLAG_RETRANS_DATA_ACKED;
 			ca_seq_rtt = -1;
 			seq_rtt = -1;
-			if ((flag & FLAG_DATA_ACKED) ||
-			    (acked_pcount > 1))
+			if ((flag & FLAG_DATA_ACKED) || (acked_pcount > 1))
 				flag |= FLAG_NONHEAD_RETRANS_ACKED;
 		} else {
 			ca_seq_rtt = now - scb->when;
@@ -2950,8 +2949,9 @@ static inline int tcp_may_raise_cwnd(const struct sock *sk, const int flag)
 /* Check that window update is acceptable.
  * The function assumes that snd_una<=ack<=snd_next.
  */
-static inline int tcp_may_update_window(const struct tcp_sock *tp, const u32 ack,
-					const u32 ack_seq, const u32 nwin)
+static inline int tcp_may_update_window(const struct tcp_sock *tp,
+					const u32 ack, const u32 ack_seq,
+					const u32 nwin)
 {
 	return (after(ack, tp->snd_una) ||
 		after(ack_seq, tp->snd_wl1) ||
@@ -3020,7 +3020,7 @@ static void tcp_ratehalving_spur_to_response(struct sock *sk)
 
 static void tcp_undo_spur_to_response(struct sock *sk, int flag)
 {
-	if (flag&FLAG_ECE)
+	if (flag & FLAG_ECE)
 		tcp_ratehalving_spur_to_response(sk);
 	else
 		tcp_undo_cwr(sk, 1);
@@ -3063,7 +3063,7 @@ static int tcp_process_frto(struct sock *sk, int flag)
 	tcp_verify_left_out(tp);
 
 	/* Duplicate the behavior from Loss state (fastretrans_alert) */
-	if (flag&FLAG_DATA_ACKED)
+	if (flag & FLAG_DATA_ACKED)
 		inet_csk(sk)->icsk_retransmits = 0;
 
 	if ((flag & FLAG_NONHEAD_RETRANS_ACKED) ||
@@ -3080,16 +3080,16 @@ static int tcp_process_frto(struct sock *sk, int flag)
 		 * ACK isn't duplicate nor advances window, e.g., opposite dir
 		 * data, winupdate
 		 */
-		if (!(flag&FLAG_ANY_PROGRESS) && (flag&FLAG_NOT_DUP))
+		if (!(flag & FLAG_ANY_PROGRESS) && (flag & FLAG_NOT_DUP))
 			return 1;
 
-		if (!(flag&FLAG_DATA_ACKED)) {
+		if (!(flag & FLAG_DATA_ACKED)) {
 			tcp_enter_frto_loss(sk, (tp->frto_counter == 1 ? 0 : 3),
 					    flag);
 			return 1;
 		}
 	} else {
-		if (!(flag&FLAG_DATA_ACKED) && (tp->frto_counter == 1)) {
+		if (!(flag & FLAG_DATA_ACKED) && (tp->frto_counter == 1)) {
 			/* Prevent sending of new data. */
 			tp->snd_cwnd = min(tp->snd_cwnd,
 					   tcp_packets_in_flight(tp));
@@ -3097,10 +3097,12 @@ static int tcp_process_frto(struct sock *sk, int flag)
 		}
 
 		if ((tp->frto_counter >= 2) &&
-		    (!(flag&FLAG_FORWARD_PROGRESS) ||
-		     ((flag&FLAG_DATA_SACKED) && !(flag&FLAG_ONLY_ORIG_SACKED)))) {
+		    (!(flag & FLAG_FORWARD_PROGRESS) ||
+		     ((flag & FLAG_DATA_SACKED) &&
+		      !(flag & FLAG_ONLY_ORIG_SACKED)))) {
 			/* RFC4138 shortcoming (see comment above) */
-			if (!(flag&FLAG_FORWARD_PROGRESS) && (flag&FLAG_NOT_DUP))
+			if (!(flag & FLAG_FORWARD_PROGRESS) &&
+			    (flag & FLAG_NOT_DUP))
 				return 1;
 
 			tcp_enter_frto_loss(sk, 3, flag);
@@ -3166,13 +3168,14 @@ static int tcp_ack(struct sock *sk, struct sk_buff *skb, int flag)
 			tp->bytes_acked += ack - prior_snd_una;
 		else if (icsk->icsk_ca_state == TCP_CA_Loss)
 			/* we assume just one segment left network */
-			tp->bytes_acked += min(ack - prior_snd_una, tp->mss_cache);
+			tp->bytes_acked += min(ack - prior_snd_una,
+					       tp->mss_cache);
 	}
 
 	prior_fackets = tp->fackets_out;
 	prior_in_flight = tcp_packets_in_flight(tp);
 
-	if (!(flag&FLAG_SLOWPATH) && after(ack, prior_snd_una)) {
+	if (!(flag & FLAG_SLOWPATH) && after(ack, prior_snd_una)) {
 		/* Window is constant, pure forward advance.
 		 * No more checks are required.
 		 * Note, we use the fact that SND.UNA>=SND.WL2.
@@ -3224,13 +3227,14 @@ static int tcp_ack(struct sock *sk, struct sk_buff *skb, int flag)
 		if ((flag & FLAG_DATA_ACKED) && !frto_cwnd &&
 		    tcp_may_raise_cwnd(sk, flag))
 			tcp_cong_avoid(sk, ack, prior_in_flight);
-		tcp_fastretrans_alert(sk, prior_packets - tp->packets_out, flag);
+		tcp_fastretrans_alert(sk, prior_packets - tp->packets_out,
+				      flag);
 	} else {
 		if ((flag & FLAG_DATA_ACKED) && !frto_cwnd)
 			tcp_cong_avoid(sk, ack, prior_in_flight);
 	}
 
-	if ((flag & FLAG_FORWARD_PROGRESS) || !(flag&FLAG_NOT_DUP))
+	if ((flag & FLAG_FORWARD_PROGRESS) || !(flag & FLAG_NOT_DUP))
 		dst_confirm(sk->sk_dst_cache);
 
 	return 1;
@@ -3255,22 +3259,22 @@ uninteresting_ack:
 	return 0;
 }
 
-
 /* Look for tcp options. Normally only called on SYN and SYNACK packets.
  * But, this can also be called on packets in the established flow when
  * the fast version below fails.
  */
-void tcp_parse_options(struct sk_buff *skb, struct tcp_options_received *opt_rx, int estab)
+void tcp_parse_options(struct sk_buff *skb, struct tcp_options_received *opt_rx,
+		       int estab)
 {
 	unsigned char *ptr;
 	struct tcphdr *th = tcp_hdr(skb);
-	int length=(th->doff*4)-sizeof(struct tcphdr);
+	int length = (th->doff * 4) - sizeof(struct tcphdr);
 
 	ptr = (unsigned char *)(th + 1);
 	opt_rx->saw_tstamp = 0;
 
 	while (length > 0) {
-		int opcode=*ptr++;
+		int opcode = *ptr++;
 		int opsize;
 
 		switch (opcode) {
@@ -3359,7 +3363,7 @@ void tcp_parse_options(struct sk_buff *skb, struct tcp_options_received *opt_rx,
 static int tcp_fast_parse_options(struct sk_buff *skb, struct tcphdr *th,
 				  struct tcp_sock *tp)
 {
-	if (th->doff == sizeof(struct tcphdr)>>2) {
+	if (th->doff == sizeof(struct tcphdr) >> 2) {
 		tp->rx_opt.saw_tstamp = 0;
 		return 0;
 	} else if (tp->rx_opt.tstamp_ok &&
@@ -3444,7 +3448,8 @@ static int tcp_disordered_ack(const struct sock *sk, const struct sk_buff *skb)
 		(s32)(tp->rx_opt.ts_recent - tp->rx_opt.rcv_tsval) <= (inet_csk(sk)->icsk_rto * 1024) / HZ);
 }
 
-static inline int tcp_paws_discard(const struct sock *sk, const struct sk_buff *skb)
+static inline int tcp_paws_discard(const struct sock *sk,
+				   const struct sk_buff *skb)
 {
 	const struct tcp_sock *tp = tcp_sk(sk);
 	return ((s32)(tp->rx_opt.ts_recent - tp->rx_opt.rcv_tsval) > TCP_PAWS_WINDOW &&
@@ -3476,16 +3481,16 @@ static void tcp_reset(struct sock *sk)
 {
 	/* We want the right error as BSD sees it (and indeed as we do). */
 	switch (sk->sk_state) {
-		case TCP_SYN_SENT:
-			sk->sk_err = ECONNREFUSED;
-			break;
-		case TCP_CLOSE_WAIT:
-			sk->sk_err = EPIPE;
-			break;
-		case TCP_CLOSE:
-			return;
-		default:
-			sk->sk_err = ECONNRESET;
+	case TCP_SYN_SENT:
+		sk->sk_err = ECONNREFUSED;
+		break;
+	case TCP_CLOSE_WAIT:
+		sk->sk_err = EPIPE;
+		break;
+	case TCP_CLOSE:
+		return;
+	default:
+		sk->sk_err = ECONNRESET;
 	}
 
 	if (!sock_flag(sk, SOCK_DEAD))
@@ -3518,43 +3523,43 @@ static void tcp_fin(struct sk_buff *skb, struct sock *sk, struct tcphdr *th)
 	sock_set_flag(sk, SOCK_DONE);
 
 	switch (sk->sk_state) {
-		case TCP_SYN_RECV:
-		case TCP_ESTABLISHED:
-			/* Move to CLOSE_WAIT */
-			tcp_set_state(sk, TCP_CLOSE_WAIT);
-			inet_csk(sk)->icsk_ack.pingpong = 1;
-			break;
+	case TCP_SYN_RECV:
+	case TCP_ESTABLISHED:
+		/* Move to CLOSE_WAIT */
+		tcp_set_state(sk, TCP_CLOSE_WAIT);
+		inet_csk(sk)->icsk_ack.pingpong = 1;
+		break;
 
-		case TCP_CLOSE_WAIT:
-		case TCP_CLOSING:
-			/* Received a retransmission of the FIN, do
-			 * nothing.
-			 */
-			break;
-		case TCP_LAST_ACK:
-			/* RFC793: Remain in the LAST-ACK state. */
-			break;
+	case TCP_CLOSE_WAIT:
+	case TCP_CLOSING:
+		/* Received a retransmission of the FIN, do
+		 * nothing.
+		 */
+		break;
+	case TCP_LAST_ACK:
+		/* RFC793: Remain in the LAST-ACK state. */
+		break;
 
-		case TCP_FIN_WAIT1:
-			/* This case occurs when a simultaneous close
-			 * happens, we must ack the received FIN and
-			 * enter the CLOSING state.
-			 */
-			tcp_send_ack(sk);
-			tcp_set_state(sk, TCP_CLOSING);
-			break;
-		case TCP_FIN_WAIT2:
-			/* Received a FIN -- send ACK and enter TIME_WAIT. */
-			tcp_send_ack(sk);
-			tcp_time_wait(sk, TCP_TIME_WAIT, 0);
-			break;
-		default:
-			/* Only TCP_LISTEN and TCP_CLOSE are left, in these
-			 * cases we should never reach this piece of code.
-			 */
-			printk(KERN_ERR "%s: Impossible, sk->sk_state=%d\n",
-			       __FUNCTION__, sk->sk_state);
-			break;
+	case TCP_FIN_WAIT1:
+		/* This case occurs when a simultaneous close
+		 * happens, we must ack the received FIN and
+		 * enter the CLOSING state.
+		 */
+		tcp_send_ack(sk);
+		tcp_set_state(sk, TCP_CLOSING);
+		break;
+	case TCP_FIN_WAIT2:
+		/* Received a FIN -- send ACK and enter TIME_WAIT. */
+		tcp_send_ack(sk);
+		tcp_time_wait(sk, TCP_TIME_WAIT, 0);
+		break;
+	default:
+		/* Only TCP_LISTEN and TCP_CLOSE are left, in these
+		 * cases we should never reach this piece of code.
+		 */
+		printk(KERN_ERR "%s: Impossible, sk->sk_state=%d\n",
+		       __FUNCTION__, sk->sk_state);
+		break;
 	}
 
 	/* It _is_ possible, that we have something out-of-order _after_ FIN.
@@ -3577,7 +3582,8 @@ static void tcp_fin(struct sk_buff *skb, struct sock *sk, struct tcphdr *th)
 	}
 }
 
-static inline int tcp_sack_extend(struct tcp_sack_block *sp, u32 seq, u32 end_seq)
+static inline int tcp_sack_extend(struct tcp_sack_block *sp, u32 seq,
+				  u32 end_seq)
 {
 	if (!after(seq, sp->end_seq) && !after(sp->start_seq, end_seq)) {
 		if (before(seq, sp->start_seq))
@@ -3600,7 +3606,8 @@ static void tcp_dsack_set(struct tcp_sock *tp, u32 seq, u32 end_seq)
 		tp->rx_opt.dsack = 1;
 		tp->duplicate_sack[0].start_seq = seq;
 		tp->duplicate_sack[0].end_seq = end_seq;
-		tp->rx_opt.eff_sacks = min(tp->rx_opt.num_sacks + 1, 4 - tp->rx_opt.tstamp_ok);
+		tp->rx_opt.eff_sacks = min(tp->rx_opt.num_sacks + 1,
+					   4 - tp->rx_opt.tstamp_ok);
 	}
 }
 
@@ -3640,12 +3647,12 @@ static void tcp_sack_maybe_coalesce(struct tcp_sock *tp)
 {
 	int this_sack;
 	struct tcp_sack_block *sp = &tp->selective_acks[0];
-	struct tcp_sack_block *swalk = sp+1;
+	struct tcp_sack_block *swalk = sp + 1;
 
 	/* See if the recent change to the first SACK eats into
 	 * or hits the sequence space of other SACK blocks, if so coalesce.
 	 */
-	for (this_sack = 1; this_sack < tp->rx_opt.num_sacks; ) {
+	for (this_sack = 1; this_sack < tp->rx_opt.num_sacks;) {
 		if (tcp_sack_extend(sp, swalk->start_seq, swalk->end_seq)) {
 			int i;
 
@@ -3653,16 +3660,19 @@ static void tcp_sack_maybe_coalesce(struct tcp_sock *tp)
 			 * Decrease num_sacks.
 			 */
 			tp->rx_opt.num_sacks--;
-			tp->rx_opt.eff_sacks = min(tp->rx_opt.num_sacks + tp->rx_opt.dsack, 4 - tp->rx_opt.tstamp_ok);
-			for (i=this_sack; i < tp->rx_opt.num_sacks; i++)
-				sp[i] = sp[i+1];
+			tp->rx_opt.eff_sacks = min(tp->rx_opt.num_sacks +
+						   tp->rx_opt.dsack,
+						   4 - tp->rx_opt.tstamp_ok);
+			for (i = this_sack; i < tp->rx_opt.num_sacks; i++)
+				sp[i] = sp[i + 1];
 			continue;
 		}
 		this_sack++, swalk++;
 	}
 }
 
-static inline void tcp_sack_swap(struct tcp_sack_block *sack1, struct tcp_sack_block *sack2)
+static inline void tcp_sack_swap(struct tcp_sack_block *sack1,
+				 struct tcp_sack_block *sack2)
 {
 	__u32 tmp;
 
@@ -3685,11 +3695,11 @@ static void tcp_sack_new_ofo_skb(struct sock *sk, u32 seq, u32 end_seq)
 	if (!cur_sacks)
 		goto new_sack;
 
-	for (this_sack=0; this_sack<cur_sacks; this_sack++, sp++) {
+	for (this_sack = 0; this_sack < cur_sacks; this_sack++, sp++) {
 		if (tcp_sack_extend(sp, seq, end_seq)) {
 			/* Rotate this_sack to the first one. */
-			for (; this_sack>0; this_sack--, sp--)
-				tcp_sack_swap(sp, sp-1);
+			for (; this_sack > 0; this_sack--, sp--)
+				tcp_sack_swap(sp, sp - 1);
 			if (cur_sacks > 1)
 				tcp_sack_maybe_coalesce(tp);
 			return;
@@ -3708,14 +3718,15 @@ static void tcp_sack_new_ofo_skb(struct sock *sk, u32 seq, u32 end_seq)
 		sp--;
 	}
 	for (; this_sack > 0; this_sack--, sp--)
-		*sp = *(sp-1);
+		*sp = *(sp - 1);
 
 new_sack:
 	/* Build the new head SACK, and we're done. */
 	sp->start_seq = seq;
 	sp->end_seq = end_seq;
 	tp->rx_opt.num_sacks++;
-	tp->rx_opt.eff_sacks = min(tp->rx_opt.num_sacks + tp->rx_opt.dsack, 4 - tp->rx_opt.tstamp_ok);
+	tp->rx_opt.eff_sacks = min(tp->rx_opt.num_sacks + tp->rx_opt.dsack,
+				   4 - tp->rx_opt.tstamp_ok);
 }
 
 /* RCV.NXT advances, some SACKs should be eaten. */
@@ -3733,7 +3744,7 @@ static void tcp_sack_remove(struct tcp_sock *tp)
 		return;
 	}
 
-	for (this_sack = 0; this_sack < num_sacks; ) {
+	for (this_sack = 0; this_sack < num_sacks;) {
 		/* Check if the start of the sack is covered by RCV.NXT. */
 		if (!before(tp->rcv_nxt, sp->start_seq)) {
 			int i;
@@ -3752,7 +3763,9 @@ static void tcp_sack_remove(struct tcp_sock *tp)
 	}
 	if (num_sacks != tp->rx_opt.num_sacks) {
 		tp->rx_opt.num_sacks = num_sacks;
-		tp->rx_opt.eff_sacks = min(tp->rx_opt.num_sacks + tp->rx_opt.dsack, 4 - tp->rx_opt.tstamp_ok);
+		tp->rx_opt.eff_sacks = min(tp->rx_opt.num_sacks +
+					   tp->rx_opt.dsack,
+					   4 - tp->rx_opt.tstamp_ok);
 	}
 }
 
@@ -3805,14 +3818,14 @@ static void tcp_data_queue(struct sock *sk, struct sk_buff *skb)
 	if (TCP_SKB_CB(skb)->seq == TCP_SKB_CB(skb)->end_seq)
 		goto drop;
 
-	__skb_pull(skb, th->doff*4);
+	__skb_pull(skb, th->doff * 4);
 
 	TCP_ECN_accept_cwr(tp, skb);
 
 	if (tp->rx_opt.dsack) {
 		tp->rx_opt.dsack = 0;
 		tp->rx_opt.eff_sacks = min_t(unsigned int, tp->rx_opt.num_sacks,
-						    4 - tp->rx_opt.tstamp_ok);
+					     4 - tp->rx_opt.tstamp_ok);
 	}
 
 	/*  Queue data for delivery to the user.
@@ -3828,7 +3841,7 @@ static void tcp_data_queue(struct sock *sk, struct sk_buff *skb)
 		    tp->copied_seq == tp->rcv_nxt && tp->ucopy.len &&
 		    sock_owned_by_user(sk) && !tp->urg_data) {
 			int chunk = min_t(unsigned int, skb->len,
-							tp->ucopy.len);
+					  tp->ucopy.len);
 
 			__set_current_state(TASK_RUNNING);
 
@@ -3945,7 +3958,7 @@ drop:
 			tp->selective_acks[0].end_seq =
 						TCP_SKB_CB(skb)->end_seq;
 		}
-		__skb_queue_head(&tp->out_of_order_queue,skb);
+		__skb_queue_head(&tp->out_of_order_queue, skb);
 	} else {
 		struct sk_buff *skb1 = tp->out_of_order_queue.prev;
 		u32 seq = TCP_SKB_CB(skb)->seq;
@@ -3968,10 +3981,10 @@ drop:
 			if (!after(TCP_SKB_CB(skb1)->seq, seq))
 				break;
 		} while ((skb1 = skb1->prev) !=
-			 (struct sk_buff*)&tp->out_of_order_queue);
+			 (struct sk_buff *)&tp->out_of_order_queue);
 
 		/* Do skb overlap to previous one? */
-		if (skb1 != (struct sk_buff*)&tp->out_of_order_queue &&
+		if (skb1 != (struct sk_buff *)&tp->out_of_order_queue &&
 		    before(seq, TCP_SKB_CB(skb1)->end_seq)) {
 			if (!after(end_seq, TCP_SKB_CB(skb1)->end_seq)) {
 				/* All the bits are present. Drop. */
@@ -3981,7 +3994,8 @@ drop:
 			}
 			if (after(seq, TCP_SKB_CB(skb1)->seq)) {
 				/* Partial overlap. */
-				tcp_dsack_set(tp, seq, TCP_SKB_CB(skb1)->end_seq);
+				tcp_dsack_set(tp, seq,
+					      TCP_SKB_CB(skb1)->end_seq);
 			} else {
 				skb1 = skb1->prev;
 			}
@@ -3990,15 +4004,17 @@ drop:
 
 		/* And clean segments covered by new one as whole. */
 		while ((skb1 = skb->next) !=
-		       (struct sk_buff*)&tp->out_of_order_queue &&
+		       (struct sk_buff *)&tp->out_of_order_queue &&
 		       after(end_seq, TCP_SKB_CB(skb1)->seq)) {
-		       if (before(end_seq, TCP_SKB_CB(skb1)->end_seq)) {
-			       tcp_dsack_extend(tp, TCP_SKB_CB(skb1)->seq, end_seq);
-			       break;
-		       }
-		       __skb_unlink(skb1, &tp->out_of_order_queue);
-		       tcp_dsack_extend(tp, TCP_SKB_CB(skb1)->seq, TCP_SKB_CB(skb1)->end_seq);
-		       __kfree_skb(skb1);
+			if (before(end_seq, TCP_SKB_CB(skb1)->end_seq)) {
+				tcp_dsack_extend(tp, TCP_SKB_CB(skb1)->seq,
+						 end_seq);
+				break;
+			}
+			__skb_unlink(skb1, &tp->out_of_order_queue);
+			tcp_dsack_extend(tp, TCP_SKB_CB(skb1)->seq,
+					 TCP_SKB_CB(skb1)->end_seq);
+			__kfree_skb(skb1);
 		}
 
 add_sack:
@@ -4021,7 +4037,7 @@ tcp_collapse(struct sock *sk, struct sk_buff_head *list,
 
 	/* First, check that queue is collapsible and find
 	 * the point where collapsing can be useful. */
-	for (skb = head; skb != tail; ) {
+	for (skb = head; skb != tail;) {
 		/* No new bits? It is possible on ofo queue. */
 		if (!before(start, TCP_SKB_CB(skb)->end_seq)) {
 			struct sk_buff *next = skb->next;
@@ -4059,9 +4075,9 @@ tcp_collapse(struct sock *sk, struct sk_buff_head *list,
 		/* Too big header? This can happen with IPv6. */
 		if (copy < 0)
 			return;
-		if (end-start < copy)
-			copy = end-start;
-		nskb = alloc_skb(copy+header, GFP_ATOMIC);
+		if (end - start < copy)
+			copy = end - start;
+		nskb = alloc_skb(copy + header, GFP_ATOMIC);
 		if (!nskb)
 			return;
 
@@ -4171,7 +4187,7 @@ static int tcp_prune_queue(struct sock *sk)
 	tcp_collapse_ofo_queue(sk);
 	tcp_collapse(sk, &sk->sk_receive_queue,
 		     sk->sk_receive_queue.next,
-		     (struct sk_buff*)&sk->sk_receive_queue,
+		     (struct sk_buff *)&sk->sk_receive_queue,
 		     tp->copied_seq, tp->rcv_nxt);
 	sk_mem_reclaim(sk);
 
@@ -4210,7 +4226,6 @@ static int tcp_prune_queue(struct sock *sk)
 	return -1;
 }
 
-
 /* RFC2861, slow part. Adjust cwnd, after it was not full during one rto.
  * As additional protections, we do not touch cwnd in retransmission phases,
  * and if application hit its sndbuf limit recently.
@@ -4272,8 +4287,8 @@ static void tcp_new_space(struct sock *sk)
 		int sndmem = max_t(u32, tp->rx_opt.mss_clamp, tp->mss_cache) +
 			MAX_TCP_HEADER + 16 + sizeof(struct sk_buff),
 		    demanded = max_t(unsigned int, tp->snd_cwnd,
-						   tp->reordering + 1);
-		sndmem *= 2*demanded;
+				     tp->reordering + 1);
+		sndmem *= 2 * demanded;
 		if (sndmem > sk->sk_sndbuf)
 			sk->sk_sndbuf = min(sndmem, sysctl_tcp_wmem[2]);
 		tp->snd_cwnd_stamp = tcp_time_stamp;
@@ -4314,8 +4329,7 @@ static void __tcp_ack_snd_check(struct sock *sk, int ofo_possible)
 	    /* We ACK each frame or... */
 	    tcp_in_quickack_mode(sk) ||
 	    /* We have out of order data. */
-	    (ofo_possible &&
-	     skb_peek(&tp->out_of_order_queue))) {
+	    (ofo_possible && skb_peek(&tp->out_of_order_queue))) {
 		/* Then ack it now */
 		tcp_send_ack(sk);
 	} else {
@@ -4343,7 +4357,7 @@ static inline void tcp_ack_snd_check(struct sock *sk)
  *	either form (or just set the sysctl tcp_stdurg).
  */
 
-static void tcp_check_urg(struct sock * sk, struct tcphdr * th)
+static void tcp_check_urg(struct sock *sk, struct tcphdr *th)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
 	u32 ptr = ntohs(th->urg_ptr);
@@ -4392,8 +4406,7 @@ static void tcp_check_urg(struct sock * sk, struct tcphdr * th)
 	 * buggy users.
 	 */
 	if (tp->urg_seq == tp->copied_seq && tp->urg_data &&
-	    !sock_flag(sk, SOCK_URGINLINE) &&
-	    tp->copied_seq != tp->rcv_nxt) {
+	    !sock_flag(sk, SOCK_URGINLINE) && tp->copied_seq != tp->rcv_nxt) {
 		struct sk_buff *skb = skb_peek(&sk->sk_receive_queue);
 		tp->copied_seq++;
 		if (skb && !before(tp->copied_seq, TCP_SKB_CB(skb)->end_seq)) {
@@ -4402,8 +4415,8 @@ static void tcp_check_urg(struct sock * sk, struct tcphdr * th)
 		}
 	}
 
-	tp->urg_data   = TCP_URG_NOTYET;
-	tp->urg_seq    = ptr;
+	tp->urg_data = TCP_URG_NOTYET;
+	tp->urg_seq = ptr;
 
 	/* Disable header prediction. */
 	tp->pred_flags = 0;
@@ -4416,7 +4429,7 @@ static void tcp_urg(struct sock *sk, struct sk_buff *skb, struct tcphdr *th)
 
 	/* Check if we get a new urgent pointer - normally not. */
 	if (th->urg)
-		tcp_check_urg(sk,th);
+		tcp_check_urg(sk, th);
 
 	/* Do we wait for any urgent data? - normally not... */
 	if (tp->urg_data == TCP_URG_NOTYET) {
@@ -4458,7 +4471,8 @@ static int tcp_copy_to_iovec(struct sock *sk, struct sk_buff *skb, int hlen)
 	return err;
 }
 
-static __sum16 __tcp_checksum_complete_user(struct sock *sk, struct sk_buff *skb)
+static __sum16 __tcp_checksum_complete_user(struct sock *sk,
+					    struct sk_buff *skb)
 {
 	__sum16 result;
 
@@ -4472,14 +4486,16 @@ static __sum16 __tcp_checksum_complete_user(struct sock *sk, struct sk_buff *skb
 	return result;
 }
 
-static inline int tcp_checksum_complete_user(struct sock *sk, struct sk_buff *skb)
+static inline int tcp_checksum_complete_user(struct sock *sk,
+					     struct sk_buff *skb)
 {
 	return !skb_csum_unnecessary(skb) &&
-		__tcp_checksum_complete_user(sk, skb);
+	       __tcp_checksum_complete_user(sk, skb);
 }
 
 #ifdef CONFIG_NET_DMA
-static int tcp_dma_try_early_copy(struct sock *sk, struct sk_buff *skb, int hlen)
+static int tcp_dma_try_early_copy(struct sock *sk, struct sk_buff *skb,
+				  int hlen)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
 	int chunk = skb->len - hlen;
@@ -4495,7 +4511,9 @@ static int tcp_dma_try_early_copy(struct sock *sk, struct sk_buff *skb, int hlen
 	if (tp->ucopy.dma_chan && skb_csum_unnecessary(skb)) {
 
 		dma_cookie = dma_skb_copy_datagram_iovec(tp->ucopy.dma_chan,
-			skb, hlen, tp->ucopy.iov, chunk, tp->ucopy.pinned_list);
+							 skb, hlen,
+							 tp->ucopy.iov, chunk,
+							 tp->ucopy.pinned_list);
 
 		if (dma_cookie < 0)
 			goto out;
@@ -4577,7 +4595,7 @@ int tcp_rcv_established(struct sock *sk, struct sk_buff *skb,
 	 */
 
 	if ((tcp_flag_word(th) & TCP_HP_BITS) == tp->pred_flags &&
-		TCP_SKB_CB(skb)->seq == tp->rcv_nxt) {
+	    TCP_SKB_CB(skb)->seq == tp->rcv_nxt) {
 		int tcp_header_len = tp->tcp_header_len;
 
 		/* Timestamp header prediction: tcp_header_len
@@ -4646,7 +4664,8 @@ int tcp_rcv_established(struct sock *sk, struct sk_buff *skb,
 					eaten = 1;
 				}
 #endif
-				if (tp->ucopy.task == current && sock_owned_by_user(sk) && !copied_early) {
+				if (tp->ucopy.task == current &&
+				    sock_owned_by_user(sk) && !copied_early) {
 					__set_current_state(TASK_RUNNING);
 
 					if (!tcp_copy_to_iovec(sk, skb, tcp_header_len))
@@ -4693,7 +4712,7 @@ int tcp_rcv_established(struct sock *sk, struct sk_buff *skb,
 				NET_INC_STATS_BH(LINUX_MIB_TCPHPHITS);
 
 				/* Bulk data transfer: receiver */
-				__skb_pull(skb,tcp_header_len);
+				__skb_pull(skb, tcp_header_len);
 				__skb_queue_tail(&sk->sk_receive_queue, skb);
 				skb_set_owner_r(skb, sk);
 				tp->rcv_nxt = TCP_SKB_CB(skb)->end_seq;
@@ -4725,7 +4744,7 @@ no_ack:
 	}
 
 slow_path:
-	if (len < (th->doff<<2) || tcp_checksum_complete_user(sk, skb))
+	if (len < (th->doff << 2) || tcp_checksum_complete_user(sk, skb))
 		goto csum_error;
 
 	/*
@@ -4975,7 +4994,8 @@ discard:
 	}
 
 	/* PAWS check. */
-	if (tp->rx_opt.ts_recent_stamp && tp->rx_opt.saw_tstamp && tcp_paws_check(&tp->rx_opt, 0))
+	if (tp->rx_opt.ts_recent_stamp && tp->rx_opt.saw_tstamp &&
+	    tcp_paws_check(&tp->rx_opt, 0))
 		goto discard_and_undo;
 
 	if (th->syn) {
@@ -5010,7 +5030,6 @@ discard:
 		tcp_sync_mss(sk, icsk->icsk_pmtu_cookie);
 		tcp_initialize_rcv_mss(sk);
 
-
 		tcp_send_synack(sk);
 #if 0
 		/* Note, we could accept data and URG from this segment.
@@ -5042,7 +5061,6 @@ reset_and_undo:
 	return 1;
 }
 
-
 /*
  *	This function implements the receiving procedure of RFC 793 for
  *	all states except ESTABLISHED and TIME_WAIT.
@@ -5164,7 +5182,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
 				 */
 				if (sk->sk_socket)
 					sk_wake_async(sk,
-							SOCK_WAKE_IO, POLL_OUT);
+						      SOCK_WAKE_IO, POLL_OUT);
 
 				tp->snd_una = TCP_SKB_CB(skb)->ack_seq;
 				tp->snd_wnd = ntohs(th->window) <<
@@ -5176,8 +5194,8 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
 				 * and does not calculate rtt.
 				 * Fix it at least with timestamps.
 				 */
-				if (tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr &&
-				    !tp->srtt)
+				if (tp->rx_opt.saw_tstamp &&
+				    tp->rx_opt.rcv_tsecr && !tp->srtt)
 					tcp_ack_saw_tstamp(sk, 0);
 
 				if (tp->rx_opt.tstamp_ok)
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 454cf84b615..bb7e80a284e 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -221,14 +221,14 @@ void tcp_select_initial_window(int __space, __u32 mss,
 	 * following RFC2414. Senders, not following this RFC,
 	 * will be satisfied with 2.
 	 */
-	if (mss > (1<<*rcv_wscale)) {
+	if (mss > (1 << *rcv_wscale)) {
 		int init_cwnd = 4;
-		if (mss > 1460*3)
+		if (mss > 1460 * 3)
 			init_cwnd = 2;
 		else if (mss > 1460)
 			init_cwnd = 3;
-		if (*rcv_wnd > init_cwnd*mss)
-			*rcv_wnd = init_cwnd*mss;
+		if (*rcv_wnd > init_cwnd * mss)
+			*rcv_wnd = init_cwnd * mss;
 	}
 
 	/* Set the clamp no higher than max representable value */
@@ -278,11 +278,10 @@ static u16 tcp_select_window(struct sock *sk)
 	return new_win;
 }
 
-static inline void TCP_ECN_send_synack(struct tcp_sock *tp,
-				       struct sk_buff *skb)
+static inline void TCP_ECN_send_synack(struct tcp_sock *tp, struct sk_buff *skb)
 {
 	TCP_SKB_CB(skb)->flags &= ~TCPCB_FLAG_CWR;
-	if (!(tp->ecn_flags&TCP_ECN_OK))
+	if (!(tp->ecn_flags & TCP_ECN_OK))
 		TCP_SKB_CB(skb)->flags &= ~TCPCB_FLAG_ECE;
 }
 
@@ -292,7 +291,7 @@ static inline void TCP_ECN_send_syn(struct sock *sk, struct sk_buff *skb)
 
 	tp->ecn_flags = 0;
 	if (sysctl_tcp_ecn) {
-		TCP_SKB_CB(skb)->flags |= TCPCB_FLAG_ECE|TCPCB_FLAG_CWR;
+		TCP_SKB_CB(skb)->flags |= TCPCB_FLAG_ECE | TCPCB_FLAG_CWR;
 		tp->ecn_flags = TCP_ECN_OK;
 	}
 }
@@ -314,7 +313,7 @@ static inline void TCP_ECN_send(struct sock *sk, struct sk_buff *skb,
 		if (skb->len != tcp_header_len &&
 		    !before(TCP_SKB_CB(skb)->seq, tp->snd_nxt)) {
 			INET_ECN_xmit(sk);
-			if (tp->ecn_flags&TCP_ECN_QUEUE_CWR) {
+			if (tp->ecn_flags & TCP_ECN_QUEUE_CWR) {
 				tp->ecn_flags &= ~TCP_ECN_QUEUE_CWR;
 				tcp_hdr(skb)->cwr = 1;
 				skb_shinfo(skb)->gso_type |= SKB_GSO_TCP_ECN;
@@ -431,7 +430,7 @@ static void tcp_syn_build_options(__be32 *ptr, int mss, int ts, int sack,
 			       (TCPOPT_NOP << 16) |
 			       (TCPOPT_MD5SIG << 8) |
 			       TCPOLEN_MD5SIG);
-		*md5_hash = (__u8 *) ptr;
+		*md5_hash = (__u8 *)ptr;
 	}
 #endif
 }
@@ -447,7 +446,8 @@ static void tcp_syn_build_options(__be32 *ptr, int mss, int ts, int sack,
  * We are working here with either a clone of the original
  * SKB, or a fresh unique copy made by the retransmit engine.
  */
-static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it, gfp_t gfp_mask)
+static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
+			    gfp_t gfp_mask)
 {
 	const struct inet_connection_sock *icsk = inet_csk(sk);
 	struct inet_sock *inet;
@@ -551,8 +551,8 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
 	th->urg_ptr		= 0;
 
 	if (unlikely(tp->urg_mode &&
-		     between(tp->snd_up, tcb->seq+1, tcb->seq+0xFFFF))) {
-		th->urg_ptr		= htons(tp->snd_up-tcb->seq);
+		     between(tp->snd_up, tcb->seq + 1, tcb->seq + 0xFFFF))) {
+		th->urg_ptr		= htons(tp->snd_up - tcb->seq);
 		th->urg			= 1;
 	}
 
@@ -616,7 +616,6 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
 #undef SYSCTL_FLAG_SACK
 }
 
-
 /* This routine just queue's the buffer
  *
  * NOTE: probe0 timer is not checked, do not forget tcp_push_pending_frames,
@@ -634,7 +633,8 @@ static void tcp_queue_skb(struct sock *sk, struct sk_buff *skb)
 	sk_mem_charge(sk, skb->truesize);
 }
 
-static void tcp_set_skb_tso_segs(struct sock *sk, struct sk_buff *skb, unsigned int mss_now)
+static void tcp_set_skb_tso_segs(struct sock *sk, struct sk_buff *skb,
+				 unsigned int mss_now)
 {
 	if (skb->len <= mss_now || !sk_can_gso(sk)) {
 		/* Avoid the costly divide in the normal
@@ -670,7 +670,8 @@ static void tcp_adjust_fackets_out(struct sock *sk, struct sk_buff *skb,
  * packet to the list.  This won't be called frequently, I hope.
  * Remember, these are still headerless SKBs at this point.
  */
-int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len, unsigned int mss_now)
+int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len,
+		 unsigned int mss_now)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
 	struct sk_buff *buff;
@@ -708,13 +709,14 @@ int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len, unsigned int mss
 
 	/* PSH and FIN should only be set in the second packet. */
 	flags = TCP_SKB_CB(skb)->flags;
-	TCP_SKB_CB(skb)->flags = flags & ~(TCPCB_FLAG_FIN|TCPCB_FLAG_PSH);
+	TCP_SKB_CB(skb)->flags = flags & ~(TCPCB_FLAG_FIN | TCPCB_FLAG_PSH);
 	TCP_SKB_CB(buff)->flags = flags;
 	TCP_SKB_CB(buff)->sacked = TCP_SKB_CB(skb)->sacked;
 
 	if (!skb_shinfo(skb)->nr_frags && skb->ip_summed != CHECKSUM_PARTIAL) {
 		/* Copy and checksum data tail into the new buffer. */
-		buff->csum = csum_partial_copy_nocheck(skb->data + len, skb_put(buff, nsize),
+		buff->csum = csum_partial_copy_nocheck(skb->data + len,
+						       skb_put(buff, nsize),
 						       nsize, 0);
 
 		skb_trim(skb, len);
@@ -781,7 +783,7 @@ static void __pskb_trim_head(struct sk_buff *skb, int len)
 
 	eat = len;
 	k = 0;
-	for (i=0; i<skb_shinfo(skb)->nr_frags; i++) {
+	for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
 		if (skb_shinfo(skb)->frags[i].size <= eat) {
 			put_page(skb_shinfo(skb)->frags[i].page);
 			eat -= skb_shinfo(skb)->frags[i].size;
@@ -804,8 +806,7 @@ static void __pskb_trim_head(struct sk_buff *skb, int len)
 
 int tcp_trim_head(struct sock *sk, struct sk_buff *skb, u32 len)
 {
-	if (skb_cloned(skb) &&
-	    pskb_expand_head(skb, 0, 0, GFP_ATOMIC))
+	if (skb_cloned(skb) && pskb_expand_head(skb, 0, 0, GFP_ATOMIC))
 		return -ENOMEM;
 
 	/* If len == headlen, we avoid __skb_pull to preserve alignment. */
@@ -909,7 +910,6 @@ void tcp_mtup_init(struct sock *sk)
    NOTE2. inet_csk(sk)->icsk_pmtu_cookie and tp->mss_cache
    are READ ONLY outside this function.		--ANK (980731)
  */
-
 unsigned int tcp_sync_mss(struct sock *sk, u32 pmtu)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
@@ -922,8 +922,8 @@ unsigned int tcp_sync_mss(struct sock *sk, u32 pmtu)
 	mss_now = tcp_mtu_to_mss(sk, pmtu);
 
 	/* Bound mss with half of window */
-	if (tp->max_window && mss_now > (tp->max_window>>1))
-		mss_now = max((tp->max_window>>1), 68U - tp->tcp_header_len);
+	if (tp->max_window && mss_now > (tp->max_window >> 1))
+		mss_now = max((tp->max_window >> 1), 68U - tp->tcp_header_len);
 
 	/* And store cached results */
 	icsk->icsk_pmtu_cookie = pmtu;
@@ -977,8 +977,7 @@ unsigned int tcp_current_mss(struct sock *sk, int large_allowed)
 				  inet_csk(sk)->icsk_ext_hdr_len -
 				  tp->tcp_header_len);
 
-		if (tp->max_window &&
-		    (xmit_size_goal > (tp->max_window >> 1)))
+		if (tp->max_window && (xmit_size_goal > (tp->max_window >> 1)))
 			xmit_size_goal = max((tp->max_window >> 1),
 					     68U - tp->tcp_header_len);
 
@@ -990,7 +989,6 @@ unsigned int tcp_current_mss(struct sock *sk, int large_allowed)
 }
 
 /* Congestion window validation. (RFC2861) */
-
 static void tcp_cwnd_validate(struct sock *sk)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
@@ -1017,8 +1015,7 @@ static void tcp_cwnd_validate(struct sock *sk)
  * per input skb which could be mostly avoided here (if desired).
  */
 static unsigned int tcp_mss_split_point(struct sock *sk, struct sk_buff *skb,
-					unsigned int mss_now,
-					unsigned int cwnd)
+					unsigned int mss_now, unsigned int cwnd)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
 	u32 needed, window, cwnd_len;
@@ -1039,7 +1036,8 @@ static unsigned int tcp_mss_split_point(struct sock *sk, struct sk_buff *skb,
 /* Can at least one segment of SKB be sent right now, according to the
  * congestion window rules?  If so, return how many segments are allowed.
  */
-static inline unsigned int tcp_cwnd_test(struct tcp_sock *tp, struct sk_buff *skb)
+static inline unsigned int tcp_cwnd_test(struct tcp_sock *tp,
+					 struct sk_buff *skb)
 {
 	u32 in_flight, cwnd;
 
@@ -1059,13 +1057,12 @@ static inline unsigned int tcp_cwnd_test(struct tcp_sock *tp, struct sk_buff *sk
 /* This must be invoked the first time we consider transmitting
  * SKB onto the wire.
  */
-static int tcp_init_tso_segs(struct sock *sk, struct sk_buff *skb, unsigned int mss_now)
+static int tcp_init_tso_segs(struct sock *sk, struct sk_buff *skb,
+			     unsigned int mss_now)
 {
 	int tso_segs = tcp_skb_pcount(skb);
 
-	if (!tso_segs ||
-	    (tso_segs > 1 &&
-	     tcp_skb_mss(skb) != mss_now)) {
+	if (!tso_segs || (tso_segs > 1 && tcp_skb_mss(skb) != mss_now)) {
 		tcp_set_skb_tso_segs(sk, skb, mss_now);
 		tso_segs = tcp_skb_pcount(skb);
 	}
@@ -1085,16 +1082,13 @@ static inline int tcp_minshall_check(const struct tcp_sock *tp)
  * 4. Or TCP_CORK is not set, and all sent packets are ACKed.
  *    With Minshall's modification: all sent small packets are ACKed.
  */
-
 static inline int tcp_nagle_check(const struct tcp_sock *tp,
 				  const struct sk_buff *skb,
 				  unsigned mss_now, int nonagle)
 {
 	return (skb->len < mss_now &&
-		((nonagle&TCP_NAGLE_CORK) ||
-		 (!nonagle &&
-		  tp->packets_out &&
-		  tcp_minshall_check(tp))));
+		((nonagle & TCP_NAGLE_CORK) ||
+		 (!nonagle && tp->packets_out && tcp_minshall_check(tp))));
 }
 
 /* Return non-zero if the Nagle test allows this packet to be
@@ -1126,7 +1120,8 @@ static inline int tcp_nagle_test(struct tcp_sock *tp, struct sk_buff *skb,
 }
 
 /* Does at least the first segment of SKB fit into the send window? */
-static inline int tcp_snd_wnd_test(struct tcp_sock *tp, struct sk_buff *skb, unsigned int cur_mss)
+static inline int tcp_snd_wnd_test(struct tcp_sock *tp, struct sk_buff *skb,
+				   unsigned int cur_mss)
 {
 	u32 end_seq = TCP_SKB_CB(skb)->end_seq;
 
@@ -1152,8 +1147,7 @@ static unsigned int tcp_snd_test(struct sock *sk, struct sk_buff *skb,
 		return 0;
 
 	cwnd_quota = tcp_cwnd_test(tp, skb);
-	if (cwnd_quota &&
-	    !tcp_snd_wnd_test(tp, skb, cur_mss))
+	if (cwnd_quota && !tcp_snd_wnd_test(tp, skb, cur_mss))
 		cwnd_quota = 0;
 
 	return cwnd_quota;
@@ -1177,7 +1171,8 @@ int tcp_may_send_now(struct sock *sk)
  * know that all the data is in scatter-gather pages, and that the
  * packet has never been sent out before (and thus is not cloned).
  */
-static int tso_fragment(struct sock *sk, struct sk_buff *skb, unsigned int len, unsigned int mss_now)
+static int tso_fragment(struct sock *sk, struct sk_buff *skb, unsigned int len,
+			unsigned int mss_now)
 {
 	struct sk_buff *buff;
 	int nlen = skb->len - len;
@@ -1203,7 +1198,7 @@ static int tso_fragment(struct sock *sk, struct sk_buff *skb, unsigned int len,
 
 	/* PSH and FIN should only be set in the second packet. */
 	flags = TCP_SKB_CB(skb)->flags;
-	TCP_SKB_CB(skb)->flags = flags & ~(TCPCB_FLAG_FIN|TCPCB_FLAG_PSH);
+	TCP_SKB_CB(skb)->flags = flags & ~(TCPCB_FLAG_FIN | TCPCB_FLAG_PSH);
 	TCP_SKB_CB(buff)->flags = flags;
 
 	/* This packet was never sent out yet, so no SACK bits. */
@@ -1247,8 +1242,7 @@ static int tcp_tso_should_defer(struct sock *sk, struct sk_buff *skb)
 
 	in_flight = tcp_packets_in_flight(tp);
 
-	BUG_ON(tcp_skb_pcount(skb) <= 1 ||
-	       (tp->snd_cwnd <= in_flight));
+	BUG_ON(tcp_skb_pcount(skb) <= 1 || (tp->snd_cwnd <= in_flight));
 
 	send_win = tcp_wnd_end(tp) - TCP_SKB_CB(skb)->seq;
 
@@ -1281,7 +1275,7 @@ static int tcp_tso_should_defer(struct sock *sk, struct sk_buff *skb)
 	}
 
 	/* Ok, it looks like it is advisable to defer.  */
-	tp->tso_deferred = 1 | (jiffies<<1);
+	tp->tso_deferred = 1 | (jiffies << 1);
 
 	return 1;
 
@@ -1293,7 +1287,8 @@ send_now:
 /* Create a new MTU probe if we are ready.
  * Returns 0 if we should wait to probe (no cwnd available),
  *         1 if a probe was sent,
- *         -1 otherwise */
+ *         -1 otherwise
+ */
 static int tcp_mtu_probe(struct sock *sk)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
@@ -1318,7 +1313,7 @@ static int tcp_mtu_probe(struct sock *sk)
 
 	/* Very simple search strategy: just double the MSS. */
 	mss_now = tcp_current_mss(sk, 0);
-	probe_size = 2*tp->mss_cache;
+	probe_size = 2 * tp->mss_cache;
 	size_needed = probe_size + (tp->reordering + 1) * tp->mss_cache;
 	if (probe_size > tcp_mtu_to_mss(sk, icsk->icsk_mtup.search_high)) {
 		/* TODO: set timer for probe_converge_event */
@@ -1366,7 +1361,8 @@ static int tcp_mtu_probe(struct sock *sk)
 			skb_copy_bits(skb, 0, skb_put(nskb, copy), copy);
 		else
 			nskb->csum = skb_copy_and_csum_bits(skb, 0,
-					 skb_put(nskb, copy), copy, nskb->csum);
+							    skb_put(nskb, copy),
+							    copy, nskb->csum);
 
 		if (skb->len <= copy) {
 			/* We've eaten all the data from this skb.
@@ -1380,7 +1376,8 @@ static int tcp_mtu_probe(struct sock *sk)
 			if (!skb_shinfo(skb)->nr_frags) {
 				skb_pull(skb, copy);
 				if (skb->ip_summed != CHECKSUM_PARTIAL)
-					skb->csum = csum_partial(skb->data, skb->len, 0);
+					skb->csum = csum_partial(skb->data,
+								 skb->len, 0);
 			} else {
 				__pskb_trim_head(skb, copy);
 				tcp_set_skb_tso_segs(sk, skb, mss_now);
@@ -1400,7 +1397,7 @@ static int tcp_mtu_probe(struct sock *sk)
 	TCP_SKB_CB(nskb)->when = tcp_time_stamp;
 	if (!tcp_transmit_skb(sk, nskb, 1, GFP_ATOMIC)) {
 		/* Decrement cwnd here because we are sending
-		* effectively two packets. */
+		 * effectively two packets. */
 		tp->snd_cwnd--;
 		tcp_event_new_data_sent(sk, nskb);
 
@@ -1414,7 +1411,6 @@ static int tcp_mtu_probe(struct sock *sk)
 	return -1;
 }
 
-
 /* This routine writes packets to the network.  It advances the
  * send_head.  This happens as incoming acks open up the remote
  * window for us.
@@ -1626,7 +1622,8 @@ u32 __tcp_select_window(struct sock *sk)
 		icsk->icsk_ack.quick = 0;
 
 		if (tcp_memory_pressure)
-			tp->rcv_ssthresh = min(tp->rcv_ssthresh, 4U*tp->advmss);
+			tp->rcv_ssthresh = min(tp->rcv_ssthresh,
+					       4U * tp->advmss);
 
 		if (free_space < mss)
 			return 0;
@@ -1659,7 +1656,7 @@ u32 __tcp_select_window(struct sock *sk)
 		 * is too small.
 		 */
 		if (window <= free_space - mss || window > free_space)
-			window = (free_space/mss)*mss;
+			window = (free_space / mss) * mss;
 		else if (mss == full_space &&
 			 free_space > window + (full_space >> 1))
 			window = free_space;
@@ -1669,7 +1666,8 @@ u32 __tcp_select_window(struct sock *sk)
 }
 
 /* Attempt to collapse two adjacent SKB's during retransmission. */
-static void tcp_retrans_try_collapse(struct sock *sk, struct sk_buff *skb, int mss_now)
+static void tcp_retrans_try_collapse(struct sock *sk, struct sk_buff *skb,
+				     int mss_now)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
 	struct sk_buff *next_skb = tcp_write_queue_next(sk, skb);
@@ -1762,12 +1760,12 @@ void tcp_simple_retransmit(struct sock *sk)
 		if (skb == tcp_send_head(sk))
 			break;
 		if (skb->len > mss &&
-		    !(TCP_SKB_CB(skb)->sacked&TCPCB_SACKED_ACKED)) {
-			if (TCP_SKB_CB(skb)->sacked&TCPCB_SACKED_RETRANS) {
+		    !(TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED)) {
+			if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_RETRANS) {
 				TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_RETRANS;
 				tp->retrans_out -= tcp_skb_pcount(skb);
 			}
-			if (!(TCP_SKB_CB(skb)->sacked&TCPCB_LOST)) {
+			if (!(TCP_SKB_CB(skb)->sacked & TCPCB_LOST)) {
 				TCP_SKB_CB(skb)->sacked |= TCPCB_LOST;
 				tp->lost_out += tcp_skb_pcount(skb);
 				lost = 1;
@@ -1846,8 +1844,10 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb)
 	    (skb->len < (cur_mss >> 1)) &&
 	    (tcp_write_queue_next(sk, skb) != tcp_send_head(sk)) &&
 	    (!tcp_skb_is_last(sk, skb)) &&
-	    (skb_shinfo(skb)->nr_frags == 0 && skb_shinfo(tcp_write_queue_next(sk, skb))->nr_frags == 0) &&
-	    (tcp_skb_pcount(skb) == 1 && tcp_skb_pcount(tcp_write_queue_next(sk, skb)) == 1) &&
+	    (skb_shinfo(skb)->nr_frags == 0 &&
+	     skb_shinfo(tcp_write_queue_next(sk, skb))->nr_frags == 0) &&
+	    (tcp_skb_pcount(skb) == 1 &&
+	     tcp_skb_pcount(tcp_write_queue_next(sk, skb)) == 1) &&
 	    (sysctl_tcp_retrans_collapse != 0))
 		tcp_retrans_try_collapse(sk, skb, cur_mss);
 
@@ -1885,7 +1885,7 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb)
 		tp->total_retrans++;
 
 #if FASTRETRANS_DEBUG > 0
-		if (TCP_SKB_CB(skb)->sacked&TCPCB_SACKED_RETRANS) {
+		if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_RETRANS) {
 			if (net_ratelimit())
 				printk(KERN_DEBUG "retrans_out leaked.\n");
 		}
@@ -1927,7 +1927,7 @@ void tcp_xmit_retransmit_queue(struct sock *sk)
 	if (tp->retransmit_skb_hint) {
 		skb = tp->retransmit_skb_hint;
 		packet_cnt = tp->retransmit_cnt_hint;
-	}else{
+	} else {
 		skb = tcp_write_queue_head(sk);
 		packet_cnt = 0;
 	}
@@ -1954,7 +1954,7 @@ void tcp_xmit_retransmit_queue(struct sock *sk)
 				return;
 
 			if (sacked & TCPCB_LOST) {
-				if (!(sacked&(TCPCB_SACKED_ACKED|TCPCB_SACKED_RETRANS))) {
+				if (!(sacked & (TCPCB_SACKED_ACKED|TCPCB_SACKED_RETRANS))) {
 					if (tcp_retransmit_skb(sk, skb)) {
 						tp->retransmit_skb_hint = NULL;
 						return;
@@ -2036,7 +2036,6 @@ void tcp_xmit_retransmit_queue(struct sock *sk)
 	}
 }
 
-
 /* Send a fin.  The caller locks the socket for us.  This cannot be
  * allowed to fail queueing a FIN frame under any circumstances.
  */
@@ -2122,14 +2121,14 @@ void tcp_send_active_reset(struct sock *sk, gfp_t priority)
  */
 int tcp_send_synack(struct sock *sk)
 {
-	struct sk_buff* skb;
+	struct sk_buff *skb;
 
 	skb = tcp_write_queue_head(sk);
-	if (skb == NULL || !(TCP_SKB_CB(skb)->flags&TCPCB_FLAG_SYN)) {
+	if (skb == NULL || !(TCP_SKB_CB(skb)->flags & TCPCB_FLAG_SYN)) {
 		printk(KERN_DEBUG "tcp_send_synack: wrong queue state\n");
 		return -EFAULT;
 	}
-	if (!(TCP_SKB_CB(skb)->flags&TCPCB_FLAG_ACK)) {
+	if (!(TCP_SKB_CB(skb)->flags & TCPCB_FLAG_ACK)) {
 		if (skb_cloned(skb)) {
 			struct sk_buff *nskb = skb_copy(skb, GFP_ATOMIC);
 			if (nskb == NULL)
@@ -2153,8 +2152,8 @@ int tcp_send_synack(struct sock *sk)
 /*
  * Prepare a SYN-ACK.
  */
-struct sk_buff * tcp_make_synack(struct sock *sk, struct dst_entry *dst,
-				 struct request_sock *req)
+struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst,
+				struct request_sock *req)
 {
 	struct inet_request_sock *ireq = inet_rsk(req);
 	struct tcp_sock *tp = tcp_sk(sk);
@@ -2372,9 +2371,10 @@ void tcp_send_delayed_ack(struct sock *sk)
 
 	if (ato > TCP_DELACK_MIN) {
 		const struct tcp_sock *tp = tcp_sk(sk);
-		int max_ato = HZ/2;
+		int max_ato = HZ / 2;
 
-		if (icsk->icsk_ack.pingpong || (icsk->icsk_ack.pending & ICSK_ACK_PUSHED))
+		if (icsk->icsk_ack.pingpong ||
+		    (icsk->icsk_ack.pending & ICSK_ACK_PUSHED))
 			max_ato = TCP_DELACK_MAX;
 
 		/* Slow path, intersegment interval is "high". */
@@ -2384,7 +2384,7 @@ void tcp_send_delayed_ack(struct sock *sk)
 		 * directly.
 		 */
 		if (tp->srtt) {
-			int rtt = max(tp->srtt>>3, TCP_DELACK_MIN);
+			int rtt = max(tp->srtt >> 3, TCP_DELACK_MIN);
 
 			if (rtt < max_ato)
 				max_ato = rtt;
-- 
cgit v1.2.3


From 409d22b470532cb92b91b9aeb7257357a176b849 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= <ilpo.jarvinen@helsinki.fi>
Date: Mon, 31 Dec 2007 14:57:40 -0800
Subject: [TCP]: Code duplication removal, added tcp_bound_to_half_wnd()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@helsinki.fi>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp_output.c | 19 +++++++++++--------
 1 file changed, 11 insertions(+), 8 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index bb7e80a284e..b3110fc1570 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -888,6 +888,15 @@ void tcp_mtup_init(struct sock *sk)
 	icsk->icsk_mtup.probe_size = 0;
 }
 
+/* Bound MSS / TSO packet size with the half of the window */
+static int tcp_bound_to_half_wnd(struct tcp_sock *tp, int pktsize)
+{
+	if (tp->max_window && pktsize > (tp->max_window >> 1))
+		return max(tp->max_window >> 1, 68U - tp->tcp_header_len);
+	else
+		return pktsize;
+}
+
 /* This function synchronize snd mss to current pmtu/exthdr set.
 
    tp->rx_opt.user_mss is mss set by user by TCP_MAXSEG. It does NOT counts
@@ -920,10 +929,7 @@ unsigned int tcp_sync_mss(struct sock *sk, u32 pmtu)
 		icsk->icsk_mtup.search_high = pmtu;
 
 	mss_now = tcp_mtu_to_mss(sk, pmtu);
-
-	/* Bound mss with half of window */
-	if (tp->max_window && mss_now > (tp->max_window >> 1))
-		mss_now = max((tp->max_window >> 1), 68U - tp->tcp_header_len);
+	mss_now = tcp_bound_to_half_wnd(tp, mss_now);
 
 	/* And store cached results */
 	icsk->icsk_pmtu_cookie = pmtu;
@@ -977,10 +983,7 @@ unsigned int tcp_current_mss(struct sock *sk, int large_allowed)
 				  inet_csk(sk)->icsk_ext_hdr_len -
 				  tp->tcp_header_len);
 
-		if (tp->max_window && (xmit_size_goal > (tp->max_window >> 1)))
-			xmit_size_goal = max((tp->max_window >> 1),
-					     68U - tp->tcp_header_len);
-
+		xmit_size_goal = tcp_bound_to_half_wnd(tp, xmit_size_goal);
 		xmit_size_goal -= (xmit_size_goal % mss_now);
 	}
 	tp->xmit_size_goal = xmit_size_goal;
-- 
cgit v1.2.3


From d436d68630a74ba3c898ff1b53591ddc4eb7f2bf Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= <ilpo.jarvinen@helsinki.fi>
Date: Mon, 31 Dec 2007 14:58:00 -0800
Subject: [TCP]: Remove unnecessary local variable
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@helsinki.fi>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp_output.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index b3110fc1570..f6d279a95f4 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -995,9 +995,8 @@ unsigned int tcp_current_mss(struct sock *sk, int large_allowed)
 static void tcp_cwnd_validate(struct sock *sk)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
-	__u32 packets_out = tp->packets_out;
 
-	if (packets_out >= tp->snd_cwnd) {
+	if (tp->packets_out >= tp->snd_cwnd) {
 		/* Network is feed fully. */
 		tp->snd_cwnd_used = 0;
 		tp->snd_cwnd_stamp = tcp_time_stamp;
-- 
cgit v1.2.3


From 680a5a5086443b9547b32b04f40af8f9d717f711 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <dada1@cosmosbay.com>
Date: Mon, 31 Dec 2007 15:00:50 -0800
Subject: [PATCH] use SK_MEM_QUANTUM_SHIFT in __sk_mem_reclaim()

Avoid an expensive divide (as done in commit
18030477e70a826b91608aee40a987bbd368fec6 but lost in commit
23821d2653111d20e75472c8c5003df1a55309a8)

Signed-off-by: Eric Dumazet <dada1@cosmosbay.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/sock.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/core/sock.c b/net/core/sock.c
index 8c184c4a381..3804e7df626 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -1476,7 +1476,7 @@ void __sk_mem_reclaim(struct sock *sk)
 {
 	struct proto *prot = sk->sk_prot;
 
-	atomic_sub(sk->sk_forward_alloc / SK_MEM_QUANTUM,
+	atomic_sub(sk->sk_forward_alloc >> SK_MEM_QUANTUM_SHIFT,
 		   prot->memory_allocated);
 	sk->sk_forward_alloc &= SK_MEM_QUANTUM - 1;
 
-- 
cgit v1.2.3


From 9a429c4983deae020f1e757ecc8f547b6d4e2f2b Mon Sep 17 00:00:00 2001
From: Eric Dumazet <dada1@cosmosbay.com>
Date: Tue, 1 Jan 2008 21:58:02 -0800
Subject: [NET]: Add some acquires/releases sparse annotations.

Add __acquires() and __releases() annotations to suppress some sparse
warnings.

example of warnings :

net/ipv4/udp.c:1555:14: warning: context imbalance in 'udp_seq_start' - wrong
count at exit
net/ipv4/udp.c:1571:13: warning: context imbalance in 'udp_seq_stop' -
unexpected unlock

Signed-off-by: Eric Dumazet <dada1@cosmosbay.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/dev.c             | 2 ++
 net/core/dev_mcast.c       | 2 ++
 net/core/gen_stats.c       | 1 +
 net/core/neighbour.c       | 2 ++
 net/core/sock.c            | 2 ++
 net/ipv4/fib_hash.c        | 2 ++
 net/ipv4/inet_hashtables.c | 1 +
 net/ipv4/udp.c             | 2 ++
 net/ipv6/anycast.c         | 2 ++
 net/ipv6/ip6_flowlabel.c   | 2 ++
 net/ipv6/mcast.c           | 4 ++++
 net/netlink/af_netlink.c   | 4 ++++
 net/sched/sch_generic.c    | 4 ++++
 net/sunrpc/cache.c         | 2 ++
 net/unix/af_unix.c         | 2 ++
 net/xfrm/xfrm_state.c      | 2 ++
 16 files changed, 36 insertions(+)

(limited to 'net')

diff --git a/net/core/dev.c b/net/core/dev.c
index 7153e94f50a..eee77424309 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2368,6 +2368,7 @@ static int dev_ifconf(struct net *net, char __user *arg)
  *	in detail.
  */
 void *dev_seq_start(struct seq_file *seq, loff_t *pos)
+	__acquires(dev_base_lock)
 {
 	struct net *net = seq_file_net(seq);
 	loff_t off;
@@ -2394,6 +2395,7 @@ void *dev_seq_next(struct seq_file *seq, void *v, loff_t *pos)
 }
 
 void dev_seq_stop(struct seq_file *seq, void *v)
+	__releases(dev_base_lock)
 {
 	read_unlock(&dev_base_lock);
 }
diff --git a/net/core/dev_mcast.c b/net/core/dev_mcast.c
index 63f0b33d7ce..cadbfbf7e7f 100644
--- a/net/core/dev_mcast.c
+++ b/net/core/dev_mcast.c
@@ -186,6 +186,7 @@ EXPORT_SYMBOL(dev_mc_unsync);
 
 #ifdef CONFIG_PROC_FS
 static void *dev_mc_seq_start(struct seq_file *seq, loff_t *pos)
+	__acquires(dev_base_lock)
 {
 	struct net *net = seq_file_net(seq);
 	struct net_device *dev;
@@ -206,6 +207,7 @@ static void *dev_mc_seq_next(struct seq_file *seq, void *v, loff_t *pos)
 }
 
 static void dev_mc_seq_stop(struct seq_file *seq, void *v)
+	__releases(dev_base_lock)
 {
 	read_unlock(&dev_base_lock);
 }
diff --git a/net/core/gen_stats.c b/net/core/gen_stats.c
index bcc25591d8a..8073561f7c6 100644
--- a/net/core/gen_stats.c
+++ b/net/core/gen_stats.c
@@ -55,6 +55,7 @@ rtattr_failure:
 int
 gnet_stats_start_copy_compat(struct sk_buff *skb, int type, int tc_stats_type,
 	int xstats_type, spinlock_t *lock, struct gnet_dump *d)
+	__acquires(lock)
 {
 	memset(d, 0, sizeof(*d));
 
diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index bd899d55773..802493327a8 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -2313,6 +2313,7 @@ static void *neigh_get_idx_any(struct seq_file *seq, loff_t *pos)
 }
 
 void *neigh_seq_start(struct seq_file *seq, loff_t *pos, struct neigh_table *tbl, unsigned int neigh_seq_flags)
+	__acquires(tbl->lock)
 {
 	struct neigh_seq_state *state = seq->private;
 	loff_t pos_minus_one;
@@ -2356,6 +2357,7 @@ out:
 EXPORT_SYMBOL(neigh_seq_next);
 
 void neigh_seq_stop(struct seq_file *seq, void *v)
+	__releases(tbl->lock)
 {
 	struct neigh_seq_state *state = seq->private;
 	struct neigh_table *tbl = state->tbl;
diff --git a/net/core/sock.c b/net/core/sock.c
index 3804e7df626..3d7757ee2fc 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -2025,6 +2025,7 @@ EXPORT_SYMBOL(proto_unregister);
 
 #ifdef CONFIG_PROC_FS
 static void *proto_seq_start(struct seq_file *seq, loff_t *pos)
+	__acquires(proto_list_lock)
 {
 	read_lock(&proto_list_lock);
 	return seq_list_start_head(&proto_list, *pos);
@@ -2036,6 +2037,7 @@ static void *proto_seq_next(struct seq_file *seq, void *v, loff_t *pos)
 }
 
 static void proto_seq_stop(struct seq_file *seq, void *v)
+	__releases(proto_list_lock)
 {
 	read_unlock(&proto_list_lock);
 }
diff --git a/net/ipv4/fib_hash.c b/net/ipv4/fib_hash.c
index 9d540415847..ee1ffdb3044 100644
--- a/net/ipv4/fib_hash.c
+++ b/net/ipv4/fib_hash.c
@@ -936,6 +936,7 @@ static struct fib_alias *fib_get_idx(struct seq_file *seq, loff_t pos)
 }
 
 static void *fib_seq_start(struct seq_file *seq, loff_t *pos)
+	__acquires(fib_hash_lock)
 {
 	void *v = NULL;
 
@@ -952,6 +953,7 @@ static void *fib_seq_next(struct seq_file *seq, void *v, loff_t *pos)
 }
 
 static void fib_seq_stop(struct seq_file *seq, void *v)
+	__releases(fib_hash_lock)
 {
 	read_unlock(&fib_hash_lock);
 }
diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c
index 2e5814a8436..88a059e04e3 100644
--- a/net/ipv4/inet_hashtables.c
+++ b/net/ipv4/inet_hashtables.c
@@ -96,6 +96,7 @@ EXPORT_SYMBOL(inet_put_port);
  * exclusive lock release). It should be ifdefed really.
  */
 void inet_listen_wlock(struct inet_hashinfo *hashinfo)
+	__acquires(hashinfo->lhash_lock)
 {
 	write_lock(&hashinfo->lhash_lock);
 
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 35328436075..02fcccd0486 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -1550,6 +1550,7 @@ static struct sock *udp_get_idx(struct seq_file *seq, loff_t pos)
 }
 
 static void *udp_seq_start(struct seq_file *seq, loff_t *pos)
+	__acquires(udp_hash_lock)
 {
 	read_lock(&udp_hash_lock);
 	return *pos ? udp_get_idx(seq, *pos-1) : (void *)1;
@@ -1569,6 +1570,7 @@ static void *udp_seq_next(struct seq_file *seq, void *v, loff_t *pos)
 }
 
 static void udp_seq_stop(struct seq_file *seq, void *v)
+	__releases(udp_hash_lock)
 {
 	read_unlock(&udp_hash_lock);
 }
diff --git a/net/ipv6/anycast.c b/net/ipv6/anycast.c
index f915c4df982..5c4190060e7 100644
--- a/net/ipv6/anycast.c
+++ b/net/ipv6/anycast.c
@@ -504,6 +504,7 @@ static struct ifacaddr6 *ac6_get_idx(struct seq_file *seq, loff_t pos)
 }
 
 static void *ac6_seq_start(struct seq_file *seq, loff_t *pos)
+	__acquires(dev_base_lock)
 {
 	read_lock(&dev_base_lock);
 	return ac6_get_idx(seq, *pos);
@@ -518,6 +519,7 @@ static void *ac6_seq_next(struct seq_file *seq, void *v, loff_t *pos)
 }
 
 static void ac6_seq_stop(struct seq_file *seq, void *v)
+	__releases(dev_base_lock)
 {
 	struct ac6_iter_state *state = ac6_seq_private(seq);
 	if (likely(state->idev != NULL)) {
diff --git a/net/ipv6/ip6_flowlabel.c b/net/ipv6/ip6_flowlabel.c
index d0babea8981..2b7d9ee9883 100644
--- a/net/ipv6/ip6_flowlabel.c
+++ b/net/ipv6/ip6_flowlabel.c
@@ -629,6 +629,7 @@ static struct ip6_flowlabel *ip6fl_get_idx(struct seq_file *seq, loff_t pos)
 }
 
 static void *ip6fl_seq_start(struct seq_file *seq, loff_t *pos)
+	__acquires(ip6_fl_lock)
 {
 	read_lock_bh(&ip6_fl_lock);
 	return *pos ? ip6fl_get_idx(seq, *pos - 1) : SEQ_START_TOKEN;
@@ -647,6 +648,7 @@ static void *ip6fl_seq_next(struct seq_file *seq, void *v, loff_t *pos)
 }
 
 static void ip6fl_seq_stop(struct seq_file *seq, void *v)
+	__releases(ip6_fl_lock)
 {
 	read_unlock_bh(&ip6_fl_lock);
 }
diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c
index 82b12940c2a..ab228d1ea11 100644
--- a/net/ipv6/mcast.c
+++ b/net/ipv6/mcast.c
@@ -2373,6 +2373,7 @@ static struct ifmcaddr6 *igmp6_mc_get_idx(struct seq_file *seq, loff_t pos)
 }
 
 static void *igmp6_mc_seq_start(struct seq_file *seq, loff_t *pos)
+	__acquires(dev_base_lock)
 {
 	read_lock(&dev_base_lock);
 	return igmp6_mc_get_idx(seq, *pos);
@@ -2387,6 +2388,7 @@ static void *igmp6_mc_seq_next(struct seq_file *seq, void *v, loff_t *pos)
 }
 
 static void igmp6_mc_seq_stop(struct seq_file *seq, void *v)
+	__releases(dev_base_lock)
 {
 	struct igmp6_mc_iter_state *state = igmp6_mc_seq_private(seq);
 	if (likely(state->idev != NULL)) {
@@ -2516,6 +2518,7 @@ static struct ip6_sf_list *igmp6_mcf_get_idx(struct seq_file *seq, loff_t pos)
 }
 
 static void *igmp6_mcf_seq_start(struct seq_file *seq, loff_t *pos)
+	__acquires(dev_base_lock)
 {
 	read_lock(&dev_base_lock);
 	return *pos ? igmp6_mcf_get_idx(seq, *pos - 1) : SEQ_START_TOKEN;
@@ -2533,6 +2536,7 @@ static void *igmp6_mcf_seq_next(struct seq_file *seq, void *v, loff_t *pos)
 }
 
 static void igmp6_mcf_seq_stop(struct seq_file *seq, void *v)
+	__releases(dev_base_lock)
 {
 	struct igmp6_mcf_iter_state *state = igmp6_mcf_seq_private(seq);
 	if (likely(state->im != NULL)) {
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index dbd7cad1c9a..be07f1b45ee 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -171,6 +171,7 @@ static void netlink_sock_destruct(struct sock *sk)
  */
 
 static void netlink_table_grab(void)
+	__acquires(nl_table_lock)
 {
 	write_lock_irq(&nl_table_lock);
 
@@ -193,6 +194,7 @@ static void netlink_table_grab(void)
 }
 
 static inline void netlink_table_ungrab(void)
+	__releases(nl_table_lock)
 {
 	write_unlock_irq(&nl_table_lock);
 	wake_up(&nl_table_wait);
@@ -1728,6 +1730,7 @@ static struct sock *netlink_seq_socket_idx(struct seq_file *seq, loff_t pos)
 }
 
 static void *netlink_seq_start(struct seq_file *seq, loff_t *pos)
+	__acquires(nl_table_lock)
 {
 	read_lock(&nl_table_lock);
 	return *pos ? netlink_seq_socket_idx(seq, *pos - 1) : SEQ_START_TOKEN;
@@ -1776,6 +1779,7 @@ static void *netlink_seq_next(struct seq_file *seq, void *v, loff_t *pos)
 }
 
 static void netlink_seq_stop(struct seq_file *seq, void *v)
+	__releases(nl_table_lock)
 {
 	read_unlock(&nl_table_lock);
 }
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index 9be2f152455..ea5a05b172c 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -40,12 +40,16 @@
  */
 
 void qdisc_lock_tree(struct net_device *dev)
+	__acquires(dev->queue_lock)
+	__acquires(dev->ingress_lock)
 {
 	spin_lock_bh(&dev->queue_lock);
 	spin_lock(&dev->ingress_lock);
 }
 
 void qdisc_unlock_tree(struct net_device *dev)
+	__releases(dev->ingress_lock)
+	__releases(dev->queue_lock)
 {
 	spin_unlock(&dev->ingress_lock);
 	spin_unlock_bh(&dev->queue_lock);
diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c
index 8e05557414c..73f053d0cc7 100644
--- a/net/sunrpc/cache.c
+++ b/net/sunrpc/cache.c
@@ -1127,6 +1127,7 @@ struct handle {
 };
 
 static void *c_start(struct seq_file *m, loff_t *pos)
+	__acquires(cd->hash_lock)
 {
 	loff_t n = *pos;
 	unsigned hash, entry;
@@ -1183,6 +1184,7 @@ static void *c_next(struct seq_file *m, void *p, loff_t *pos)
 }
 
 static void c_stop(struct seq_file *m, void *p)
+	__releases(cd->hash_lock)
 {
 	struct cache_detail *cd = ((struct handle*)m->private)->cd;
 	read_unlock(&cd->hash_lock);
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index 63a9239571a..eea75888805 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -2033,6 +2033,7 @@ static struct sock *unix_seq_idx(struct unix_iter_state *iter, loff_t pos)
 
 
 static void *unix_seq_start(struct seq_file *seq, loff_t *pos)
+	__acquires(unix_table_lock)
 {
 	struct unix_iter_state *iter = seq->private;
 	spin_lock(&unix_table_lock);
@@ -2055,6 +2056,7 @@ static void *unix_seq_next(struct seq_file *seq, void *v, loff_t *pos)
 }
 
 static void unix_seq_stop(struct seq_file *seq, void *v)
+	__releases(unix_table_lock)
 {
 	spin_unlock(&unix_table_lock);
 }
diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c
index 6bf876c866d..65f5ea4ae4c 100644
--- a/net/xfrm/xfrm_state.c
+++ b/net/xfrm/xfrm_state.c
@@ -211,6 +211,7 @@ static struct xfrm_state_afinfo *xfrm_state_lock_afinfo(unsigned int family)
 }
 
 static void xfrm_state_unlock_afinfo(struct xfrm_state_afinfo *afinfo)
+	__releases(xfrm_state_afinfo_lock)
 {
 	write_unlock_bh(&xfrm_state_afinfo_lock);
 }
@@ -1909,6 +1910,7 @@ static struct xfrm_state_afinfo *xfrm_state_get_afinfo(unsigned int family)
 }
 
 static void xfrm_state_put_afinfo(struct xfrm_state_afinfo *afinfo)
+	__releases(xfrm_state_afinfo_lock)
 {
 	read_unlock(&xfrm_state_afinfo_lock);
 }
-- 
cgit v1.2.3


From dbb1db8b59fb84d620ab5266366b52c8052ee75c Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Tue, 1 Jan 2008 23:52:59 -0800
Subject: [IPSEC]: Return EOVERFLOW when output sequence number overflows

Previously we made it an error on the output path if the sequence number
overflowed.  However we did not set the err variable accordingly.  This
patch sets err to -EOVERFLOW in that case.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/xfrm/xfrm_output.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'net')

diff --git a/net/xfrm/xfrm_output.c b/net/xfrm/xfrm_output.c
index 81ad8eb2b28..f4a1047a557 100644
--- a/net/xfrm/xfrm_output.c
+++ b/net/xfrm/xfrm_output.c
@@ -66,6 +66,7 @@ static int xfrm_output_one(struct sk_buff *skb, int err)
 			if (unlikely(x->replay.oseq == 0)) {
 				x->replay.oseq--;
 				xfrm_audit_state_replay_overflow(x, skb);
+				err = -EOVERFLOW;
 				goto error;
 			}
 			if (xfrm_aevent_is_on())
-- 
cgit v1.2.3


From f038ac8f9b9735358ef410d45f4cff1da810c1cb Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= <ilpo.jarvinen@helsinki.fi>
Date: Thu, 3 Jan 2008 20:36:55 -0800
Subject: [TCP]: cleanup tcp_parse_options deep indented switch
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Removed case indentation level & combined some nested ifs, mostly
within 80 lines now. This is a leftover from indent patch, it
just had to be done manually to avoid messing it up completely.

Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@helsinki.fi>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp_input.c | 135 +++++++++++++++++++++++++--------------------------
 1 file changed, 67 insertions(+), 68 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 18e099c6fa6..fa2c85ca5bc 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -3278,81 +3278,80 @@ void tcp_parse_options(struct sk_buff *skb, struct tcp_options_received *opt_rx,
 		int opsize;
 
 		switch (opcode) {
-			case TCPOPT_EOL:
+		case TCPOPT_EOL:
+			return;
+		case TCPOPT_NOP:	/* Ref: RFC 793 section 3.1 */
+			length--;
+			continue;
+		default:
+			opsize = *ptr++;
+			if (opsize < 2) /* "silly options" */
 				return;
-			case TCPOPT_NOP:	/* Ref: RFC 793 section 3.1 */
-				length--;
-				continue;
-			default:
-				opsize=*ptr++;
-				if (opsize < 2) /* "silly options" */
-					return;
-				if (opsize > length)
-					return;	/* don't parse partial options */
-				switch (opcode) {
-				case TCPOPT_MSS:
-					if (opsize==TCPOLEN_MSS && th->syn && !estab) {
-						u16 in_mss = ntohs(get_unaligned((__be16 *)ptr));
-						if (in_mss) {
-							if (opt_rx->user_mss && opt_rx->user_mss < in_mss)
-								in_mss = opt_rx->user_mss;
-							opt_rx->mss_clamp = in_mss;
-						}
-					}
-					break;
-				case TCPOPT_WINDOW:
-					if (opsize==TCPOLEN_WINDOW && th->syn && !estab)
-						if (sysctl_tcp_window_scaling) {
-							__u8 snd_wscale = *(__u8 *) ptr;
-							opt_rx->wscale_ok = 1;
-							if (snd_wscale > 14) {
-								if (net_ratelimit())
-									printk(KERN_INFO "tcp_parse_options: Illegal window "
-									       "scaling value %d >14 received.\n",
-									       snd_wscale);
-								snd_wscale = 14;
-							}
-							opt_rx->snd_wscale = snd_wscale;
-						}
-					break;
-				case TCPOPT_TIMESTAMP:
-					if (opsize==TCPOLEN_TIMESTAMP) {
-						if ((estab && opt_rx->tstamp_ok) ||
-						    (!estab && sysctl_tcp_timestamps)) {
-							opt_rx->saw_tstamp = 1;
-							opt_rx->rcv_tsval = ntohl(get_unaligned((__be32 *)ptr));
-							opt_rx->rcv_tsecr = ntohl(get_unaligned((__be32 *)(ptr+4)));
-						}
+			if (opsize > length)
+				return;	/* don't parse partial options */
+			switch (opcode) {
+			case TCPOPT_MSS:
+				if (opsize == TCPOLEN_MSS && th->syn && !estab) {
+					u16 in_mss = ntohs(get_unaligned((__be16 *)ptr));
+					if (in_mss) {
+						if (opt_rx->user_mss &&
+						    opt_rx->user_mss < in_mss)
+							in_mss = opt_rx->user_mss;
+						opt_rx->mss_clamp = in_mss;
 					}
-					break;
-				case TCPOPT_SACK_PERM:
-					if (opsize==TCPOLEN_SACK_PERM && th->syn && !estab) {
-						if (sysctl_tcp_sack) {
-							opt_rx->sack_ok = 1;
-							tcp_sack_reset(opt_rx);
-						}
+				}
+				break;
+			case TCPOPT_WINDOW:
+				if (opsize == TCPOLEN_WINDOW && th->syn &&
+				    !estab && sysctl_tcp_window_scaling) {
+					__u8 snd_wscale = *(__u8 *)ptr;
+					opt_rx->wscale_ok = 1;
+					if (snd_wscale > 14) {
+						if (net_ratelimit())
+							printk(KERN_INFO "tcp_parse_options: Illegal window "
+							       "scaling value %d >14 received.\n",
+							       snd_wscale);
+						snd_wscale = 14;
 					}
-					break;
+					opt_rx->snd_wscale = snd_wscale;
+				}
+				break;
+			case TCPOPT_TIMESTAMP:
+				if ((opsize == TCPOLEN_TIMESTAMP) &&
+				    ((estab && opt_rx->tstamp_ok) ||
+				     (!estab && sysctl_tcp_timestamps))) {
+					opt_rx->saw_tstamp = 1;
+					opt_rx->rcv_tsval = ntohl(get_unaligned((__be32 *)ptr));
+					opt_rx->rcv_tsecr = ntohl(get_unaligned((__be32 *)(ptr+4)));
+				}
+				break;
+			case TCPOPT_SACK_PERM:
+				if (opsize == TCPOLEN_SACK_PERM && th->syn &&
+				    !estab && sysctl_tcp_sack) {
+					opt_rx->sack_ok = 1;
+					tcp_sack_reset(opt_rx);
+				}
+				break;
 
-				case TCPOPT_SACK:
-					if ((opsize >= (TCPOLEN_SACK_BASE + TCPOLEN_SACK_PERBLOCK)) &&
-					   !((opsize - TCPOLEN_SACK_BASE) % TCPOLEN_SACK_PERBLOCK) &&
-					   opt_rx->sack_ok) {
-						TCP_SKB_CB(skb)->sacked = (ptr - 2) - (unsigned char *)th;
-					}
-					break;
+			case TCPOPT_SACK:
+				if ((opsize >= (TCPOLEN_SACK_BASE + TCPOLEN_SACK_PERBLOCK)) &&
+				   !((opsize - TCPOLEN_SACK_BASE) % TCPOLEN_SACK_PERBLOCK) &&
+				   opt_rx->sack_ok) {
+					TCP_SKB_CB(skb)->sacked = (ptr - 2) - (unsigned char *)th;
+				}
+				break;
 #ifdef CONFIG_TCP_MD5SIG
-				case TCPOPT_MD5SIG:
-					/*
-					 * The MD5 Hash has already been
-					 * checked (see tcp_v{4,6}_do_rcv()).
-					 */
-					break;
+			case TCPOPT_MD5SIG:
+				/*
+				 * The MD5 Hash has already been
+				 * checked (see tcp_v{4,6}_do_rcv()).
+				 */
+				break;
 #endif
-				}
+			}
 
-				ptr+=opsize-2;
-				length-=opsize;
+			ptr += opsize-2;
+			length -= opsize;
 		}
 	}
 }
-- 
cgit v1.2.3


From 19773b4923ed0e21e3289361dba5e69e1ce6e00b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= <ilpo.jarvinen@helsinki.fi>
Date: Thu, 3 Jan 2008 20:38:05 -0800
Subject: [TCP]: Urgent parameter effect can be simplified.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@helsinki.fi>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp_output.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index f6d279a95f4..6c7cd0a9098 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -2488,7 +2488,7 @@ static int tcp_xmit_probe_skb(struct sock *sk, int urgent)
 	 * end to send an ack.  Don't queue or clone SKB, just
 	 * send it.
 	 */
-	TCP_SKB_CB(skb)->seq = urgent ? tp->snd_una : tp->snd_una - 1;
+	TCP_SKB_CB(skb)->seq = tp->snd_una - !urgent;
 	TCP_SKB_CB(skb)->end_seq = TCP_SKB_CB(skb)->seq;
 	TCP_SKB_CB(skb)->when = tcp_time_stamp;
 	return tcp_transmit_skb(sk, skb, 0, GFP_ATOMIC);
-- 
cgit v1.2.3


From e870a8efcddaaa3da7e180b6ae21239fb96aa2bb Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= <ilpo.jarvinen@helsinki.fi>
Date: Thu, 3 Jan 2008 20:39:01 -0800
Subject: [TCP]: Perform setting of common control fields in one place
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

In case of segments which are purely for control without any
data (SYN/ACK/FIN/RST), many fields are set to common values
in multiple places.

i386 results:

$ gcc --version
gcc (GCC) 4.1.2 20070626 (Red Hat 4.1.2-13)

$ codiff tcp_output.o.old tcp_output.o.new
net/ipv4/tcp_output.c:
  tcp_xmit_probe_skb    |  -48
  tcp_send_ack          |  -56
  tcp_retransmit_skb    |  -79
  tcp_connect           |  -43
  tcp_send_active_reset |  -35
  tcp_make_synack       |  -42
  tcp_send_fin          |  -48
 7 functions changed, 351 bytes removed

net/ipv4/tcp_output.c:
  tcp_init_nondata_skb |  +90
 1 function changed, 90 bytes added

tcp_output.o.mid:
 8 functions changed, 90 bytes added, 351 bytes removed, diff: -261

Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@helsinki.fi>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp_output.c | 91 ++++++++++++++++++++-------------------------------
 1 file changed, 36 insertions(+), 55 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 6c7cd0a9098..89f0188885c 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -327,6 +327,26 @@ static inline void TCP_ECN_send(struct sock *sk, struct sk_buff *skb,
 	}
 }
 
+/* Constructs common control bits of non-data skb. If SYN/FIN is present,
+ * auto increment end seqno.
+ */
+static void tcp_init_nondata_skb(struct sk_buff *skb, u32 seq, u8 flags)
+{
+	skb->csum = 0;
+
+	TCP_SKB_CB(skb)->flags = flags;
+	TCP_SKB_CB(skb)->sacked = 0;
+
+	skb_shinfo(skb)->gso_segs = 1;
+	skb_shinfo(skb)->gso_size = 0;
+	skb_shinfo(skb)->gso_type = 0;
+
+	TCP_SKB_CB(skb)->seq = seq;
+	if (flags & (TCPCB_FLAG_SYN | TCPCB_FLAG_FIN))
+		seq++;
+	TCP_SKB_CB(skb)->end_seq = seq;
+}
+
 static void tcp_build_and_update_options(__be32 *ptr, struct tcp_sock *tp,
 					 __u32 tstamp, __u8 **md5_hash)
 {
@@ -1864,12 +1884,10 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb)
 	    (TCP_SKB_CB(skb)->flags & TCPCB_FLAG_FIN) &&
 	    tp->snd_una == (TCP_SKB_CB(skb)->end_seq - 1)) {
 		if (!pskb_trim(skb, 0)) {
-			TCP_SKB_CB(skb)->seq = TCP_SKB_CB(skb)->end_seq - 1;
-			skb_shinfo(skb)->gso_segs = 1;
-			skb_shinfo(skb)->gso_size = 0;
-			skb_shinfo(skb)->gso_type = 0;
+			/* Reuse, even though it does some unnecessary work */
+			tcp_init_nondata_skb(skb, TCP_SKB_CB(skb)->end_seq - 1,
+					     TCP_SKB_CB(skb)->flags);
 			skb->ip_summed = CHECKSUM_NONE;
-			skb->csum = 0;
 		}
 	}
 
@@ -2068,16 +2086,9 @@ void tcp_send_fin(struct sock *sk)
 
 		/* Reserve space for headers and prepare control bits. */
 		skb_reserve(skb, MAX_TCP_HEADER);
-		skb->csum = 0;
-		TCP_SKB_CB(skb)->flags = (TCPCB_FLAG_ACK | TCPCB_FLAG_FIN);
-		TCP_SKB_CB(skb)->sacked = 0;
-		skb_shinfo(skb)->gso_segs = 1;
-		skb_shinfo(skb)->gso_size = 0;
-		skb_shinfo(skb)->gso_type = 0;
-
 		/* FIN eats a sequence byte, write_seq advanced by tcp_queue_skb(). */
-		TCP_SKB_CB(skb)->seq = tp->write_seq;
-		TCP_SKB_CB(skb)->end_seq = TCP_SKB_CB(skb)->seq + 1;
+		tcp_init_nondata_skb(skb, tp->write_seq,
+				     TCPCB_FLAG_ACK | TCPCB_FLAG_FIN);
 		tcp_queue_skb(sk, skb);
 	}
 	__tcp_push_pending_frames(sk, mss_now, TCP_NAGLE_OFF);
@@ -2101,16 +2112,9 @@ void tcp_send_active_reset(struct sock *sk, gfp_t priority)
 
 	/* Reserve space for headers and prepare control bits. */
 	skb_reserve(skb, MAX_TCP_HEADER);
-	skb->csum = 0;
-	TCP_SKB_CB(skb)->flags = (TCPCB_FLAG_ACK | TCPCB_FLAG_RST);
-	TCP_SKB_CB(skb)->sacked = 0;
-	skb_shinfo(skb)->gso_segs = 1;
-	skb_shinfo(skb)->gso_size = 0;
-	skb_shinfo(skb)->gso_type = 0;
-
+	tcp_init_nondata_skb(skb, tcp_acceptable_seq(sk),
+			     TCPCB_FLAG_ACK | TCPCB_FLAG_RST);
 	/* Send it off. */
-	TCP_SKB_CB(skb)->seq = tcp_acceptable_seq(sk);
-	TCP_SKB_CB(skb)->end_seq = TCP_SKB_CB(skb)->seq;
 	TCP_SKB_CB(skb)->when = tcp_time_stamp;
 	if (tcp_transmit_skb(sk, skb, 0, priority))
 		NET_INC_STATS(LINUX_MIB_TCPABORTFAILED);
@@ -2198,12 +2202,11 @@ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst,
 	TCP_ECN_make_synack(req, th);
 	th->source = inet_sk(sk)->sport;
 	th->dest = ireq->rmt_port;
-	TCP_SKB_CB(skb)->seq = tcp_rsk(req)->snt_isn;
-	TCP_SKB_CB(skb)->end_seq = TCP_SKB_CB(skb)->seq + 1;
-	TCP_SKB_CB(skb)->sacked = 0;
-	skb_shinfo(skb)->gso_segs = 1;
-	skb_shinfo(skb)->gso_size = 0;
-	skb_shinfo(skb)->gso_type = 0;
+	/* Setting of flags are superfluous here for callers (and ECE is
+	 * not even correctly set)
+	 */
+	tcp_init_nondata_skb(skb, tcp_rsk(req)->snt_isn,
+			     TCPCB_FLAG_SYN | TCPCB_FLAG_ACK);
 	th->seq = htonl(TCP_SKB_CB(skb)->seq);
 	th->ack_seq = htonl(tcp_rsk(req)->rcv_isn + 1);
 	if (req->rcv_wnd == 0) { /* ignored for retransmitted syns */
@@ -2235,7 +2238,6 @@ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst,
 			       NULL)
 			      );
 
-	skb->csum = 0;
 	th->doff = (tcp_header_size >> 2);
 	TCP_INC_STATS(TCP_MIB_OUTSEGS);
 
@@ -2327,16 +2329,9 @@ int tcp_connect(struct sock *sk)
 	/* Reserve space for headers. */
 	skb_reserve(buff, MAX_TCP_HEADER);
 
-	TCP_SKB_CB(buff)->flags = TCPCB_FLAG_SYN;
-	TCP_ECN_send_syn(sk, buff);
-	TCP_SKB_CB(buff)->sacked = 0;
-	skb_shinfo(buff)->gso_segs = 1;
-	skb_shinfo(buff)->gso_size = 0;
-	skb_shinfo(buff)->gso_type = 0;
-	buff->csum = 0;
 	tp->snd_nxt = tp->write_seq;
-	TCP_SKB_CB(buff)->seq = tp->write_seq++;
-	TCP_SKB_CB(buff)->end_seq = tp->write_seq;
+	tcp_init_nondata_skb(buff, tp->write_seq++, TCPCB_FLAG_SYN);
+	TCP_ECN_send_syn(sk, buff);
 
 	/* Send it off. */
 	TCP_SKB_CB(buff)->when = tcp_time_stamp;
@@ -2441,15 +2436,9 @@ void tcp_send_ack(struct sock *sk)
 
 	/* Reserve space for headers and prepare control bits. */
 	skb_reserve(buff, MAX_TCP_HEADER);
-	buff->csum = 0;
-	TCP_SKB_CB(buff)->flags = TCPCB_FLAG_ACK;
-	TCP_SKB_CB(buff)->sacked = 0;
-	skb_shinfo(buff)->gso_segs = 1;
-	skb_shinfo(buff)->gso_size = 0;
-	skb_shinfo(buff)->gso_type = 0;
+	tcp_init_nondata_skb(buff, tcp_acceptable_seq(sk), TCPCB_FLAG_ACK);
 
 	/* Send it off, this clears delayed acks for us. */
-	TCP_SKB_CB(buff)->seq = TCP_SKB_CB(buff)->end_seq = tcp_acceptable_seq(sk);
 	TCP_SKB_CB(buff)->when = tcp_time_stamp;
 	tcp_transmit_skb(sk, buff, 0, GFP_ATOMIC);
 }
@@ -2477,19 +2466,11 @@ static int tcp_xmit_probe_skb(struct sock *sk, int urgent)
 
 	/* Reserve space for headers and set control bits. */
 	skb_reserve(skb, MAX_TCP_HEADER);
-	skb->csum = 0;
-	TCP_SKB_CB(skb)->flags = TCPCB_FLAG_ACK;
-	TCP_SKB_CB(skb)->sacked = 0;
-	skb_shinfo(skb)->gso_segs = 1;
-	skb_shinfo(skb)->gso_size = 0;
-	skb_shinfo(skb)->gso_type = 0;
-
 	/* Use a previous sequence.  This should cause the other
 	 * end to send an ack.  Don't queue or clone SKB, just
 	 * send it.
 	 */
-	TCP_SKB_CB(skb)->seq = tp->snd_una - !urgent;
-	TCP_SKB_CB(skb)->end_seq = TCP_SKB_CB(skb)->seq;
+	tcp_init_nondata_skb(skb, tp->snd_una - !urgent, TCPCB_FLAG_ACK);
 	TCP_SKB_CB(skb)->when = tcp_time_stamp;
 	return tcp_transmit_skb(sk, skb, 0, GFP_ATOMIC);
 }
-- 
cgit v1.2.3


From 789675e216617b1331875c42a81f58227a06df91 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <dada1@cosmosbay.com>
Date: Thu, 3 Jan 2008 20:40:01 -0800
Subject: [NET]: Avoid divides in net/core/gen_estimator.c

We can void divides (as seen with CONFIG_CC_OPTIMIZE_FOR_SIZE=y on x86)
changing ((HZ<<idx)/4) to ((HZ/4) << idx)

Signed-off-by: Eric Dumazet <dada1@cosmosbay.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/gen_estimator.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/core/gen_estimator.c b/net/core/gen_estimator.c
index daadbcc4e8d..86037d16f19 100644
--- a/net/core/gen_estimator.c
+++ b/net/core/gen_estimator.c
@@ -135,7 +135,7 @@ skip:
 	}
 
 	if (!list_empty(&elist[idx].list))
-		mod_timer(&elist[idx].timer, jiffies + ((HZ<<idx)/4));
+		mod_timer(&elist[idx].timer, jiffies + ((HZ/4) << idx));
 	rcu_read_unlock();
 }
 
@@ -191,7 +191,7 @@ int gen_new_estimator(struct gnet_stats_basic *bstats,
 	}
 
 	if (list_empty(&elist[idx].list))
-		mod_timer(&elist[idx].timer, jiffies + ((HZ<<idx)/4));
+		mod_timer(&elist[idx].timer, jiffies + ((HZ/4) << idx));
 
 	list_add_rcu(&est->list, &elist[idx].list);
 	return 0;
-- 
cgit v1.2.3


From 65f7651788e18fadb2fbb7276af935d7871e1803 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <dada1@cosmosbay.com>
Date: Thu, 3 Jan 2008 20:46:48 -0800
Subject: [NET]: prot_inuse cleanups and optimizations

1) Cleanups (all functions are prefixed by sock_prot_inuse)

sock_prot_inc_use(prot) -> sock_prot_inuse_add(prot,-1)
sock_prot_dec_use(prot) -> sock_prot_inuse_add(prot,-1)
sock_prot_inuse()       -> sock_prot_inuse_get()

New functions :

sock_prot_inuse_init() and sock_prot_inuse_free() to abstract pcounter use.

2) if CONFIG_PROC_FS=n, we can zap 'inuse' member from "struct proto",
since nobody wants to read the inuse value.

This saves 1372 bytes on i386/SMP and some cpu cycles.

Signed-off-by: Eric Dumazet <dada1@cosmosbay.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/sock.c               | 6 +++---
 net/ipv4/inet_hashtables.c    | 6 +++---
 net/ipv4/inet_timewait_sock.c | 2 +-
 net/ipv4/proc.c               | 9 +++++----
 net/ipv4/raw.c                | 4 ++--
 net/ipv4/udp.c                | 2 +-
 net/ipv6/inet6_hashtables.c   | 4 ++--
 net/ipv6/ipv6_sockglue.c      | 8 ++++----
 net/ipv6/proc.c               | 8 ++++----
 9 files changed, 25 insertions(+), 24 deletions(-)

(limited to 'net')

diff --git a/net/core/sock.c b/net/core/sock.c
index 3d7757ee2fc..1c4b1cd16d6 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -1913,7 +1913,7 @@ int proto_register(struct proto *prot, int alloc_slab)
 	char *request_sock_slab_name = NULL;
 	char *timewait_sock_slab_name;
 
-	if (pcounter_alloc(&prot->inuse) != 0) {
+	if (sock_prot_inuse_init(prot) != 0) {
 		printk(KERN_CRIT "%s: Can't alloc inuse counters!\n", prot->name);
 		goto out;
 	}
@@ -1984,7 +1984,7 @@ out_free_sock_slab:
 	kmem_cache_destroy(prot->slab);
 	prot->slab = NULL;
 out_free_inuse:
-	pcounter_free(&prot->inuse);
+	sock_prot_inuse_free(prot);
 out:
 	return -ENOBUFS;
 }
@@ -1997,7 +1997,7 @@ void proto_unregister(struct proto *prot)
 	list_del(&prot->node);
 	write_unlock(&proto_list_lock);
 
-	pcounter_free(&prot->inuse);
+	sock_prot_inuse_free(prot);
 
 	if (prot->slab != NULL) {
 		kmem_cache_destroy(prot->slab);
diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c
index 88a059e04e3..619c63c6948 100644
--- a/net/ipv4/inet_hashtables.c
+++ b/net/ipv4/inet_hashtables.c
@@ -278,7 +278,7 @@ unique:
 	sk->sk_hash = hash;
 	BUG_TRAP(sk_unhashed(sk));
 	__sk_add_node(sk, &head->chain);
-	sock_prot_inc_use(sk->sk_prot);
+	sock_prot_inuse_add(sk->sk_prot, 1);
 	write_unlock(lock);
 
 	if (twp) {
@@ -321,7 +321,7 @@ void __inet_hash_nolisten(struct inet_hashinfo *hashinfo, struct sock *sk)
 
 	write_lock(lock);
 	__sk_add_node(sk, list);
-	sock_prot_inc_use(sk->sk_prot);
+	sock_prot_inuse_add(sk->sk_prot, 1);
 	write_unlock(lock);
 }
 EXPORT_SYMBOL_GPL(__inet_hash_nolisten);
@@ -342,7 +342,7 @@ void __inet_hash(struct inet_hashinfo *hashinfo, struct sock *sk)
 
 	inet_listen_wlock(hashinfo);
 	__sk_add_node(sk, list);
-	sock_prot_inc_use(sk->sk_prot);
+	sock_prot_inuse_add(sk->sk_prot, 1);
 	write_unlock(lock);
 	wake_up(&hashinfo->lhash_wait);
 }
diff --git a/net/ipv4/inet_timewait_sock.c b/net/ipv4/inet_timewait_sock.c
index 1b7db420896..876169f3a52 100644
--- a/net/ipv4/inet_timewait_sock.c
+++ b/net/ipv4/inet_timewait_sock.c
@@ -91,7 +91,7 @@ void __inet_twsk_hashdance(struct inet_timewait_sock *tw, struct sock *sk,
 
 	/* Step 2: Remove SK from established hash. */
 	if (__sk_del_node_init(sk))
-		sock_prot_dec_use(sk->sk_prot);
+		sock_prot_inuse_add(sk->sk_prot, -1);
 
 	/* Step 3: Hash TW into TIMEWAIT chain. */
 	inet_twsk_add_node(tw, &ehead->twchain);
diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c
index 53bc010beef..cb3787fbeb9 100644
--- a/net/ipv4/proc.c
+++ b/net/ipv4/proc.c
@@ -53,13 +53,14 @@ static int sockstat_seq_show(struct seq_file *seq, void *v)
 {
 	socket_seq_show(seq);
 	seq_printf(seq, "TCP: inuse %d orphan %d tw %d alloc %d mem %d\n",
-		   sock_prot_inuse(&tcp_prot), atomic_read(&tcp_orphan_count),
+		   sock_prot_inuse_get(&tcp_prot),
+		   atomic_read(&tcp_orphan_count),
 		   tcp_death_row.tw_count, atomic_read(&tcp_sockets_allocated),
 		   atomic_read(&tcp_memory_allocated));
-	seq_printf(seq, "UDP: inuse %d mem %d\n", sock_prot_inuse(&udp_prot),
+	seq_printf(seq, "UDP: inuse %d mem %d\n", sock_prot_inuse_get(&udp_prot),
 		   atomic_read(&udp_memory_allocated));
-	seq_printf(seq, "UDPLITE: inuse %d\n", sock_prot_inuse(&udplite_prot));
-	seq_printf(seq, "RAW: inuse %d\n", sock_prot_inuse(&raw_prot));
+	seq_printf(seq, "UDPLITE: inuse %d\n", sock_prot_inuse_get(&udplite_prot));
+	seq_printf(seq, "RAW: inuse %d\n", sock_prot_inuse_get(&raw_prot));
 	seq_printf(seq,  "FRAG: inuse %d memory %d\n",
 			ip_frag_nqueues(), ip_frag_mem());
 	return 0;
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c
index 5aec5a5e5f1..e811034b1b0 100644
--- a/net/ipv4/raw.c
+++ b/net/ipv4/raw.c
@@ -92,7 +92,7 @@ void raw_hash_sk(struct sock *sk, struct raw_hashinfo *h)
 
 	write_lock_bh(&h->lock);
 	sk_add_node(sk, head);
-	sock_prot_inc_use(sk->sk_prot);
+	sock_prot_inuse_add(sk->sk_prot, 1);
 	write_unlock_bh(&h->lock);
 }
 EXPORT_SYMBOL_GPL(raw_hash_sk);
@@ -101,7 +101,7 @@ void raw_unhash_sk(struct sock *sk, struct raw_hashinfo *h)
 {
 	write_lock_bh(&h->lock);
 	if (sk_del_node_init(sk))
-		sock_prot_dec_use(sk->sk_prot);
+		sock_prot_inuse_add(sk->sk_prot, -1);
 	write_unlock_bh(&h->lock);
 }
 EXPORT_SYMBOL_GPL(raw_unhash_sk);
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 02fcccd0486..cb2411cb87d 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -230,7 +230,7 @@ gotit:
 	if (sk_unhashed(sk)) {
 		head = &udptable[snum & (UDP_HTABLE_SIZE - 1)];
 		sk_add_node(sk, head);
-		sock_prot_inc_use(sk->sk_prot);
+		sock_prot_inuse_add(sk->sk_prot, 1);
 	}
 	error = 0;
 fail:
diff --git a/net/ipv6/inet6_hashtables.c b/net/ipv6/inet6_hashtables.c
index 0765d8bd380..a66a7d8e281 100644
--- a/net/ipv6/inet6_hashtables.c
+++ b/net/ipv6/inet6_hashtables.c
@@ -43,7 +43,7 @@ void __inet6_hash(struct inet_hashinfo *hashinfo,
 	}
 
 	__sk_add_node(sk, list);
-	sock_prot_inc_use(sk->sk_prot);
+	sock_prot_inuse_add(sk->sk_prot, 1);
 	write_unlock(lock);
 }
 EXPORT_SYMBOL(__inet6_hash);
@@ -216,7 +216,7 @@ unique:
 	BUG_TRAP(sk_unhashed(sk));
 	__sk_add_node(sk, &head->chain);
 	sk->sk_hash = hash;
-	sock_prot_inc_use(sk->sk_prot);
+	sock_prot_inuse_add(sk->sk_prot, 1);
 	write_unlock(lock);
 
 	if (twp != NULL) {
diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c
index 20fece4ad3d..bf2a686aa13 100644
--- a/net/ipv6/ipv6_sockglue.c
+++ b/net/ipv6/ipv6_sockglue.c
@@ -268,8 +268,8 @@ static int do_ipv6_setsockopt(struct sock *sk, int level, int optname,
 				struct inet_connection_sock *icsk = inet_csk(sk);
 
 				local_bh_disable();
-				sock_prot_dec_use(sk->sk_prot);
-				sock_prot_inc_use(&tcp_prot);
+				sock_prot_inuse_add(sk->sk_prot, -1);
+				sock_prot_inuse_add(&tcp_prot, 1);
 				local_bh_enable();
 				sk->sk_prot = &tcp_prot;
 				icsk->icsk_af_ops = &ipv4_specific;
@@ -282,8 +282,8 @@ static int do_ipv6_setsockopt(struct sock *sk, int level, int optname,
 				if (sk->sk_protocol == IPPROTO_UDPLITE)
 					prot = &udplite_prot;
 				local_bh_disable();
-				sock_prot_dec_use(sk->sk_prot);
-				sock_prot_inc_use(prot);
+				sock_prot_inuse_add(sk->sk_prot, -1);
+				sock_prot_inuse_add(prot, 1);
 				local_bh_enable();
 				sk->sk_prot = prot;
 				sk->sk_socket->ops = &inet_dgram_ops;
diff --git a/net/ipv6/proc.c b/net/ipv6/proc.c
index 41e9980b3e0..571d95a21c1 100644
--- a/net/ipv6/proc.c
+++ b/net/ipv6/proc.c
@@ -36,13 +36,13 @@ static struct proc_dir_entry *proc_net_devsnmp6;
 static int sockstat6_seq_show(struct seq_file *seq, void *v)
 {
 	seq_printf(seq, "TCP6: inuse %d\n",
-		       sock_prot_inuse(&tcpv6_prot));
+		       sock_prot_inuse_get(&tcpv6_prot));
 	seq_printf(seq, "UDP6: inuse %d\n",
-		       sock_prot_inuse(&udpv6_prot));
+		       sock_prot_inuse_get(&udpv6_prot));
 	seq_printf(seq, "UDPLITE6: inuse %d\n",
-			sock_prot_inuse(&udplitev6_prot));
+			sock_prot_inuse_get(&udplitev6_prot));
 	seq_printf(seq, "RAW6: inuse %d\n",
-		       sock_prot_inuse(&rawv6_prot));
+		       sock_prot_inuse_get(&rawv6_prot));
 	seq_printf(seq, "FRAG6: inuse %d memory %d\n",
 		       ip6_frag_nqueues(), ip6_frag_mem());
 	return 0;
-- 
cgit v1.2.3


From 74feb6e84e189707ca1324a92f7206617efbd735 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <dada1@cosmosbay.com>
Date: Thu, 24 Jan 2008 00:15:42 -0800
Subject: [ICMP]: Avoid sparse warnings in net/ipv4/icmp.c

   CHECK   net/ipv4/icmp.c
net/ipv4/icmp.c:249:13: warning: context imbalance in 'icmp_xmit_unlock' -
unexpected unlock
net/ipv4/icmp.c:376:13: warning: context imbalance in 'icmp_reply' - different
lock contexts for basic block
net/ipv4/icmp.c:430:6: warning: context imbalance in 'icmp_send' - different
lock contexts for basic block

Solution is to declare both icmp_xmit_lock() and icmp_xmit_unlock() as inline

Signed-off-by: Eric Dumazet <dada1@cosmosbay.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/icmp.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
index ce5b4be559a..1a6024978e2 100644
--- a/net/ipv4/icmp.c
+++ b/net/ipv4/icmp.c
@@ -232,7 +232,7 @@ static const struct icmp_control icmp_pointers[NR_ICMP_TYPES+1];
 static DEFINE_PER_CPU(struct socket *, __icmp_socket) = NULL;
 #define icmp_socket	__get_cpu_var(__icmp_socket)
 
-static __inline__ int icmp_xmit_lock(void)
+static inline int icmp_xmit_lock(void)
 {
 	local_bh_disable();
 
@@ -246,7 +246,7 @@ static __inline__ int icmp_xmit_lock(void)
 	return 0;
 }
 
-static void icmp_xmit_unlock(void)
+static inline void icmp_xmit_unlock(void)
 {
 	spin_unlock_bh(&icmp_socket->sk->sk_lock.slock);
 }
-- 
cgit v1.2.3


From 2a75de0c1de2dde9ef41aeb45a21048681421b8a Mon Sep 17 00:00:00 2001
From: Eric Dumazet <dada1@cosmosbay.com>
Date: Sat, 5 Jan 2008 23:08:49 -0800
Subject: [NETNS]: Should build with CONFIG_SYSCTL=n

Previous NETNS patches broke CONFIG_SYSCTL=n case

Signed-off-by: Eric Dumazet <dada1@cosmosbay.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/devinet.c | 17 ++++++++++-------
 1 file changed, 10 insertions(+), 7 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index 44cb252d2f6..03db15b1030 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -1542,7 +1542,6 @@ static void devinet_sysctl_unregister(struct in_device *idev)
 	__devinet_sysctl_unregister(&idev->cnf);
 	neigh_sysctl_unregister(idev->arp_parms);
 }
-#endif
 
 static struct ctl_table ctl_forward_entry[] = {
 	{
@@ -1565,18 +1564,20 @@ static __net_initdata struct ctl_path net_ipv4_path[] = {
 	{ .procname = "ipv4", .ctl_name = NET_IPV4, },
 	{ },
 };
+#endif
 
 static __net_init int devinet_init_net(struct net *net)
 {
 	int err;
-	struct ctl_table *tbl;
 	struct ipv4_devconf *all, *dflt;
+#ifdef CONFIG_SYSCTL
+	struct ctl_table *tbl = ctl_forward_entry;
 	struct ctl_table_header *forw_hdr;
+#endif
 
 	err = -ENOMEM;
 	all = &ipv4_devconf;
 	dflt = &ipv4_devconf_dflt;
-	tbl = ctl_forward_entry;
 
 	if (net != &init_net) {
 		all = kmemdup(all, sizeof(ipv4_devconf), GFP_KERNEL);
@@ -1587,6 +1588,7 @@ static __net_init int devinet_init_net(struct net *net)
 		if (dflt == NULL)
 			goto err_alloc_dflt;
 
+#ifdef CONFIG_SYSCTL
 		tbl = kmemdup(tbl, sizeof(ctl_forward_entry), GFP_KERNEL);
 		if (tbl == NULL)
 			goto err_alloc_ctl;
@@ -1594,6 +1596,7 @@ static __net_init int devinet_init_net(struct net *net)
 		tbl[0].data = &all->data[NET_IPV4_CONF_FORWARDING - 1];
 		tbl[0].extra1 = all;
 		tbl[0].extra2 = net;
+#endif
 	}
 
 #ifdef CONFIG_SYSCTL
@@ -1611,9 +1614,9 @@ static __net_init int devinet_init_net(struct net *net)
 	forw_hdr = register_net_sysctl_table(net, net_ipv4_path, tbl);
 	if (forw_hdr == NULL)
 		goto err_reg_ctl;
+	net->ipv4.forw_hdr = forw_hdr;
 #endif
 
-	net->ipv4.forw_hdr = forw_hdr;
 	net->ipv4.devconf_all = all;
 	net->ipv4.devconf_dflt = dflt;
 	return 0;
@@ -1626,8 +1629,8 @@ err_reg_dflt:
 err_reg_all:
 	if (tbl != ctl_forward_entry)
 		kfree(tbl);
-#endif
 err_alloc_ctl:
+#endif
 	if (dflt != &ipv4_devconf_dflt)
 		kfree(dflt);
 err_alloc_dflt:
@@ -1639,15 +1642,15 @@ err_alloc_all:
 
 static __net_exit void devinet_exit_net(struct net *net)
 {
+#ifdef CONFIG_SYSCTL
 	struct ctl_table *tbl;
 
 	tbl = net->ipv4.forw_hdr->ctl_table_arg;
-#ifdef CONFIG_SYSCTL
 	unregister_net_sysctl_table(net->ipv4.forw_hdr);
 	__devinet_sysctl_unregister(net->ipv4.devconf_dflt);
 	__devinet_sysctl_unregister(net->ipv4.devconf_all);
-#endif
 	kfree(tbl);
+#endif
 	kfree(net->ipv4.devconf_dflt);
 	kfree(net->ipv4.devconf_all);
 }
-- 
cgit v1.2.3


From bb5cf80e94ad9650c4bd39e92fb917af8e87fa43 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= <ilpo.jarvinen@helsinki.fi>
Date: Sat, 5 Jan 2008 23:11:31 -0800
Subject: [NETFILTER]: Kill some supper dupper bloatry
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

/me awards the bloatiest-of-all-net/-.c-code award to
nf_conntrack_netlink.c, congratulations to all the authors :-/!

Hall of (unquestionable) fame (measured per inline, top 10 under
net/):
  -4496 ctnetlink_parse_tuple        netfilter/nf_conntrack_netlink.c
  -2165 ctnetlink_dump_tuples        netfilter/nf_conntrack_netlink.c
  -2115 __ip_vs_get_out_rt           ipv4/ipvs/ip_vs_xmit.c
  -1924 xfrm_audit_helper_pktinfo    xfrm/xfrm_state.c
  -1799 ctnetlink_parse_tuple_proto  netfilter/nf_conntrack_netlink.c
  -1268 ctnetlink_parse_tuple_ip     netfilter/nf_conntrack_netlink.c
  -1093 ctnetlink_exp_dump_expect    netfilter/nf_conntrack_netlink.c
  -1060 void ccid3_update_send_interval  dccp/ccids/ccid3.c
  -983  ctnetlink_dump_tuples_proto  netfilter/nf_conntrack_netlink.c
  -827  ctnetlink_exp_dump_tuple     netfilter/nf_conntrack_netlink.c

  (i386 / gcc (GCC) 4.1.2 20070626 (Red Hat 4.1.2-13) /
   allyesconfig except CONFIG_FORCED_INLINING)

...and I left < 200 byte gains as future work item.

After iterative inline removal, I finally have this:

net/netfilter/nf_conntrack_netlink.c:
  ctnetlink_exp_fill_info   | -1104
  ctnetlink_new_expect      | -1572
  ctnetlink_fill_info       | -1303
  ctnetlink_new_conntrack   | -2230
  ctnetlink_get_expect      | -341
  ctnetlink_del_expect      | -352
  ctnetlink_expect_event    | -1110
  ctnetlink_conntrack_event | -1548
  ctnetlink_del_conntrack   | -729
  ctnetlink_get_conntrack   | -728
 10 functions changed, 11017 bytes removed, diff: -11017

net/netfilter/nf_conntrack_netlink.c:
  ctnetlink_parse_tuple     | +419
  dump_nat_seq_adj          | +183
  ctnetlink_dump_counters   | +166
  ctnetlink_dump_tuples     | +261
  ctnetlink_exp_dump_expect | +633
  ctnetlink_change_status   | +460
 6 functions changed, 2122 bytes added, diff: +2122

net/netfilter/nf_conntrack_netlink.o:
 16 functions changed, 2122 bytes added, 11017 bytes removed, diff: -8895

Without a number of CONFIG.*DEBUGs, I got this:
net/netfilter/nf_conntrack_netlink.o:
 16 functions changed, 2122 bytes added, 11029 bytes removed, diff: -8907

Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@helsinki.fi>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/netfilter/nf_conntrack_netlink.c | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

(limited to 'net')

diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index d93d58d688b..38141f104db 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -95,7 +95,7 @@ nla_put_failure:
 	return -1;
 }
 
-static inline int
+static int
 ctnetlink_dump_tuples(struct sk_buff *skb,
 		      const struct nf_conntrack_tuple *tuple)
 {
@@ -205,7 +205,7 @@ nla_put_failure:
 }
 
 #ifdef CONFIG_NF_CT_ACCT
-static inline int
+static int
 ctnetlink_dump_counters(struct sk_buff *skb, const struct nf_conn *ct,
 			enum ip_conntrack_dir dir)
 {
@@ -284,7 +284,7 @@ nla_put_failure:
 }
 
 #ifdef CONFIG_NF_NAT_NEEDED
-static inline int
+static int
 dump_nat_seq_adj(struct sk_buff *skb, const struct nf_nat_seq *natseq, int type)
 {
 	struct nlattr *nest_parms;
@@ -648,7 +648,7 @@ ctnetlink_parse_tuple_proto(struct nlattr *attr,
 	return ret;
 }
 
-static inline int
+static int
 ctnetlink_parse_tuple(struct nlattr *cda[], struct nf_conntrack_tuple *tuple,
 		      enum ctattr_tuple type, u_int8_t l3num)
 {
@@ -888,7 +888,7 @@ out:
 	return err;
 }
 
-static inline int
+static int
 ctnetlink_change_status(struct nf_conn *ct, struct nlattr *cda[])
 {
 	unsigned long d;
@@ -1349,7 +1349,7 @@ nla_put_failure:
 	return -1;
 }
 
-static inline int
+static int
 ctnetlink_exp_dump_expect(struct sk_buff *skb,
 			  const struct nf_conntrack_expect *exp)
 {
-- 
cgit v1.2.3


From ad1b30b1c282bc8c726a9b123a1312a3930bcd42 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= <ilpo.jarvinen@helsinki.fi>
Date: Sat, 5 Jan 2008 23:12:40 -0800
Subject: [IPVS]: Kill some bloat
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

net/ipv4/ipvs/ip_vs_xmit.c:
  ip_vs_icmp_xmit   | -638
  ip_vs_tunnel_xmit | -674
  ip_vs_nat_xmit    | -716
  ip_vs_dr_xmit     | -682
 4 functions changed, 2710 bytes removed, diff: -2710

net/ipv4/ipvs/ip_vs_xmit.c:
  __ip_vs_get_out_rt | +595
 1 function changed, 595 bytes added, diff: +595

net/ipv4/ipvs/ip_vs_xmit.o:
 5 functions changed, 595 bytes added, 2710 bytes removed, diff: -2115

Without some CONFIG.*DEBUGs:

net/ipv4/ipvs/ip_vs_xmit.o:
 5 functions changed, 383 bytes added, 1513 bytes removed, diff: -1130

Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@helsinki.fi>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/ipvs/ip_vs_xmit.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/ipv4/ipvs/ip_vs_xmit.c b/net/ipv4/ipvs/ip_vs_xmit.c
index 1e96bf82a0b..8436bf81859 100644
--- a/net/ipv4/ipvs/ip_vs_xmit.c
+++ b/net/ipv4/ipvs/ip_vs_xmit.c
@@ -59,7 +59,7 @@ __ip_vs_dst_check(struct ip_vs_dest *dest, u32 rtos, u32 cookie)
 	return dst;
 }
 
-static inline struct rtable *
+static struct rtable *
 __ip_vs_get_out_rt(struct ip_vs_conn *cp, u32 rtos)
 {
 	struct rtable *rt;			/* Route to the other host */
-- 
cgit v1.2.3


From cf35f43e6e41b160d8dedd80a127210fd3be9ada Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= <ilpo.jarvinen@helsinki.fi>
Date: Sat, 5 Jan 2008 23:13:20 -0800
Subject: [XFRM]: Kill some bloat
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

net/xfrm/xfrm_state.c:
  xfrm_audit_state_delete          | -589
  xfrm_replay_check                | -542
  xfrm_audit_state_icvfail         | -520
  xfrm_audit_state_add             | -589
  xfrm_audit_state_replay_overflow | -523
  xfrm_audit_state_notfound_simple | -509
  xfrm_audit_state_notfound        | -521
 7 functions changed, 3793 bytes removed, diff: -3793

net/xfrm/xfrm_state.c:
  xfrm_audit_helper_pktinfo | +522
  xfrm_audit_helper_sainfo  | +598
 2 functions changed, 1120 bytes added, diff: +1120

net/xfrm/xfrm_state.o:
 9 functions changed, 1120 bytes added, 3793 bytes removed, diff: -2673

Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@helsinki.fi>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/xfrm/xfrm_state.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c
index 65f5ea4ae4c..2585e4b0d27 100644
--- a/net/xfrm/xfrm_state.c
+++ b/net/xfrm/xfrm_state.c
@@ -2010,8 +2010,8 @@ void __init xfrm_state_init(void)
 }
 
 #ifdef CONFIG_AUDITSYSCALL
-static inline void xfrm_audit_helper_sainfo(struct xfrm_state *x,
-					    struct audit_buffer *audit_buf)
+static void xfrm_audit_helper_sainfo(struct xfrm_state *x,
+				     struct audit_buffer *audit_buf)
 {
 	struct xfrm_sec_ctx *ctx = x->security;
 	u32 spi = ntohl(x->id.spi);
@@ -2038,8 +2038,8 @@ static inline void xfrm_audit_helper_sainfo(struct xfrm_state *x,
 	audit_log_format(audit_buf, " spi=%u(0x%x)", spi, spi);
 }
 
-static inline void xfrm_audit_helper_pktinfo(struct sk_buff *skb, u16 family,
-					     struct audit_buffer *audit_buf)
+static void xfrm_audit_helper_pktinfo(struct sk_buff *skb, u16 family,
+				      struct audit_buffer *audit_buf)
 {
 	struct iphdr *iph4;
 	struct ipv6hdr *iph6;
-- 
cgit v1.2.3


From c4e18dade1f878db33ed38927de22e63d550970d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= <ilpo.jarvinen@helsinki.fi>
Date: Sat, 5 Jan 2008 23:13:58 -0800
Subject: [CCID3]: Kill some bloat
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Without a number of CONFIG.*DEBUG:

net/dccp/ccids/ccid3.c:
  ccid3_hc_tx_update_x          | -170
  ccid3_hc_tx_packet_sent       | -175
  ccid3_hc_tx_packet_recv       | -169
  ccid3_hc_tx_no_feedback_timer | -192
  ccid3_hc_tx_send_packet       | -144
 5 functions changed, 850 bytes removed, diff: -850

net/dccp/ccids/ccid3.c:
  ccid3_update_send_interval | +191
 1 function changed, 191 bytes added, diff: +191

net/dccp/ccids/ccid3.o:
 6 functions changed, 191 bytes added, 850 bytes removed, diff: -659

Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@helsinki.fi>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/dccp/ccids/ccid3.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/dccp/ccids/ccid3.c b/net/dccp/ccids/ccid3.c
index d292f23c002..e76f460af0e 100644
--- a/net/dccp/ccids/ccid3.c
+++ b/net/dccp/ccids/ccid3.c
@@ -97,7 +97,7 @@ static inline u64 rfc3390_initial_rate(struct sock *sk)
 /*
  * Recalculate t_ipi and delta (should be called whenever X changes)
  */
-static inline void ccid3_update_send_interval(struct ccid3_hc_tx_sock *hctx)
+static void ccid3_update_send_interval(struct ccid3_hc_tx_sock *hctx)
 {
 	/* Calculate new t_ipi = s / X_inst (X_inst is in 64 * bytes/second) */
 	hctx->ccid3hctx_t_ipi = scaled_div32(((u64)hctx->ccid3hctx_s) << 6,
-- 
cgit v1.2.3


From a067d9ac39cd207b5a0994c73199a56e7d5a17a3 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= <ilpo.jarvinen@helsinki.fi>
Date: Sat, 5 Jan 2008 23:17:49 -0800
Subject: [NET]: Remove obsolete comment
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

It seems that ip_build_xmit is no longer used in here and
ip_append_data is used.

Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@helsinki.fi>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/ip_output.c | 2 --
 1 file changed, 2 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index 6dd1d9c5d52..8950d18001f 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -1341,8 +1341,6 @@ static int ip_reply_glue_bits(void *dptr, char *to, int offset,
  *
  *	Should run single threaded per socket because it uses the sock
  *     	structure to pass arguments.
- *
- *	LATER: switch from ip_build_xmit to ip_append_*
  */
 void ip_send_reply(struct sock *sk, struct sk_buff *skb, struct ip_reply_arg *arg,
 		   unsigned int len)
-- 
cgit v1.2.3


From d66e37a99d323012165ce91fd5c4518e2fcea0c5 Mon Sep 17 00:00:00 2001
From: Masahide NAKAMURA <nakam@linux-ipv6.org>
Date: Mon, 7 Jan 2008 21:46:15 -0800
Subject: [XFRM] Statistics: Add outbound-dropping error.

o Increment PolError counter when flow_cache_lookup() returns
  errored pointer.

o Increment NoStates counter at larval-drop.

Signed-off-by: Masahide NAKAMURA <nakam@linux-ipv6.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/xfrm/xfrm_policy.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index 280f8ded975..d83227baaa0 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -1510,8 +1510,10 @@ restart:
 		policy = flow_cache_lookup(fl, dst_orig->ops->family,
 					   dir, xfrm_policy_lookup);
 		err = PTR_ERR(policy);
-		if (IS_ERR(policy))
+		if (IS_ERR(policy)) {
+			XFRM_INC_STATS(LINUX_MIB_XFRMOUTPOLERROR);
 			goto dropdst;
+		}
 	}
 
 	if (!policy)
@@ -1603,6 +1605,7 @@ restart:
 				/* EREMOTE tells the caller to generate
 				 * a one-shot blackhole route.
 				 */
+				XFRM_INC_STATS(LINUX_MIB_XFRMOUTNOSTATES);
 				xfrm_pol_put(policy);
 				return -EREMOTE;
 			}
-- 
cgit v1.2.3


From 64c31b3f76482bb64459e786f9eca3bd0164d153 Mon Sep 17 00:00:00 2001
From: WANG Cong <xiyou.wangcong@gmail.com>
Date: Mon, 7 Jan 2008 22:34:29 -0800
Subject: [XFRM] xfrm_policy_destroy: Rename and relative fixes.

Since __xfrm_policy_destroy is used to destory the resources
allocated by xfrm_policy_alloc. So using the name
__xfrm_policy_destroy is not correspond with xfrm_policy_alloc.
Rename it to xfrm_policy_destroy.

And along with some instances that call xfrm_policy_alloc
but not using xfrm_policy_destroy to destroy the resource,
fix them.

Signed-off-by: WANG Cong <xiyou.wangcong@gmail.com>
Acked-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/key/af_key.c       | 6 ++----
 net/xfrm/xfrm_policy.c | 4 ++--
 net/xfrm/xfrm_user.c   | 2 +-
 3 files changed, 5 insertions(+), 7 deletions(-)

(limited to 'net')

diff --git a/net/key/af_key.c b/net/key/af_key.c
index 76dcd882f87..16b72b5570c 100644
--- a/net/key/af_key.c
+++ b/net/key/af_key.c
@@ -2291,8 +2291,7 @@ static int pfkey_spdadd(struct sock *sk, struct sk_buff *skb, struct sadb_msg *h
 	return 0;
 
 out:
-	security_xfrm_policy_free(xp);
-	kfree(xp);
+	xfrm_policy_destroy(xp);
 	return err;
 }
 
@@ -3236,8 +3235,7 @@ static struct xfrm_policy *pfkey_compile_policy(struct sock *sk, int opt,
 	return xp;
 
 out:
-	security_xfrm_policy_free(xp);
-	kfree(xp);
+	xfrm_policy_destroy(xp);
 	return NULL;
 }
 
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index d83227baaa0..534b29eb46f 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -221,7 +221,7 @@ EXPORT_SYMBOL(xfrm_policy_alloc);
 
 /* Destroy xfrm_policy: descendant resources must be released to this moment. */
 
-void __xfrm_policy_destroy(struct xfrm_policy *policy)
+void xfrm_policy_destroy(struct xfrm_policy *policy)
 {
 	BUG_ON(!policy->dead);
 
@@ -233,7 +233,7 @@ void __xfrm_policy_destroy(struct xfrm_policy *policy)
 	security_xfrm_policy_free(policy);
 	kfree(policy);
 }
-EXPORT_SYMBOL(__xfrm_policy_destroy);
+EXPORT_SYMBOL(xfrm_policy_destroy);
 
 static void xfrm_policy_gc_kill(struct xfrm_policy *policy)
 {
diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c
index 6424e536051..35fc16ae50a 100644
--- a/net/xfrm/xfrm_user.c
+++ b/net/xfrm/xfrm_user.c
@@ -1043,7 +1043,7 @@ static struct xfrm_policy *xfrm_policy_construct(struct xfrm_userpolicy_info *p,
 	return xp;
  error:
 	*errp = err;
-	kfree(xp);
+	xfrm_policy_destroy(xp);
 	return NULL;
 }
 
-- 
cgit v1.2.3


From 67b23219ce2f78352b0c566a472ff16c1b0fea9a Mon Sep 17 00:00:00 2001
From: Julia Lawall <julia@diku.dk>
Date: Mon, 7 Jan 2008 22:38:42 -0800
Subject: [BLUETOOTH]: Use sockfd_put()

The function sockfd_lookup uses fget on the value that is stored in
the file field of the returned structure, so fput should ultimately be
applied to this value.  This can be done directly, but it seems better
to use the specific macro sockfd_put, which does the same thing.

The problem was fixed using the following semantic patch.
(http://www.emn.fr/x-info/coccinelle/)

// <smpl>
@@
expression s;
@@

   s = sockfd_lookup(...)
   ...
+  sockfd_put(s);
?- fput(s->file);
// </smpl>

Signed-off-by: Julia Lawall <julia@diku.dk>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/bluetooth/bnep/sock.c |  4 ++--
 net/bluetooth/cmtp/sock.c |  4 ++--
 net/bluetooth/hidp/sock.c | 10 +++++-----
 3 files changed, 9 insertions(+), 9 deletions(-)

(limited to 'net')

diff --git a/net/bluetooth/bnep/sock.c b/net/bluetooth/bnep/sock.c
index 9ebd3c64474..81065e548a1 100644
--- a/net/bluetooth/bnep/sock.c
+++ b/net/bluetooth/bnep/sock.c
@@ -94,7 +94,7 @@ static int bnep_sock_ioctl(struct socket *sock, unsigned int cmd, unsigned long
 			return err;
 
 		if (nsock->sk->sk_state != BT_CONNECTED) {
-			fput(nsock->file);
+			sockfd_put(nsock);
 			return -EBADFD;
 		}
 
@@ -103,7 +103,7 @@ static int bnep_sock_ioctl(struct socket *sock, unsigned int cmd, unsigned long
 			if (copy_to_user(argp, &ca, sizeof(ca)))
 				err = -EFAULT;
 		} else
-			fput(nsock->file);
+			sockfd_put(nsock);
 
 		return err;
 
diff --git a/net/bluetooth/cmtp/sock.c b/net/bluetooth/cmtp/sock.c
index 783edab12ce..8c7f7bc4e0b 100644
--- a/net/bluetooth/cmtp/sock.c
+++ b/net/bluetooth/cmtp/sock.c
@@ -88,7 +88,7 @@ static int cmtp_sock_ioctl(struct socket *sock, unsigned int cmd, unsigned long
 			return err;
 
 		if (nsock->sk->sk_state != BT_CONNECTED) {
-			fput(nsock->file);
+			sockfd_put(nsock);
 			return -EBADFD;
 		}
 
@@ -97,7 +97,7 @@ static int cmtp_sock_ioctl(struct socket *sock, unsigned int cmd, unsigned long
 			if (copy_to_user(argp, &ca, sizeof(ca)))
 				err = -EFAULT;
 		} else
-			fput(nsock->file);
+			sockfd_put(nsock);
 
 		return err;
 
diff --git a/net/bluetooth/hidp/sock.c b/net/bluetooth/hidp/sock.c
index 3292b956a7c..f4dd02ca9a9 100644
--- a/net/bluetooth/hidp/sock.c
+++ b/net/bluetooth/hidp/sock.c
@@ -86,13 +86,13 @@ static int hidp_sock_ioctl(struct socket *sock, unsigned int cmd, unsigned long
 
 		isock = sockfd_lookup(ca.intr_sock, &err);
 		if (!isock) {
-			fput(csock->file);
+			sockfd_put(csock);
 			return err;
 		}
 
 		if (csock->sk->sk_state != BT_CONNECTED || isock->sk->sk_state != BT_CONNECTED) {
-			fput(csock->file);
-			fput(isock->file);
+			sockfd_put(csock);
+			sockfd_put(isock);
 			return -EBADFD;
 		}
 
@@ -101,8 +101,8 @@ static int hidp_sock_ioctl(struct socket *sock, unsigned int cmd, unsigned long
 			if (copy_to_user(argp, &ca, sizeof(ca)))
 				err = -EFAULT;
 		} else {
-			fput(csock->file);
-			fput(isock->file);
+			sockfd_put(csock);
+			sockfd_put(isock);
 		}
 
 		return err;
-- 
cgit v1.2.3


From 40ccbf525e55fc6d1f3a88c1e98b13db4dd618db Mon Sep 17 00:00:00 2001
From: Eric Dumazet <dada1@cosmosbay.com>
Date: Mon, 7 Jan 2008 22:39:57 -0800
Subject: [PACKET]: Fix sparse warnings in af_packet.c

  CHECK   net/packet/af_packet.c
net/packet/af_packet.c:1876:14: warning: context imbalance in 'packet_seq_start' - wrong count at exit
net/packet/af_packet.c:1888:13: warning: context imbalance in 'packet_seq_stop' - unexpected unlock

Signed-off-by: Eric Dumazet <dada1@cosmosbay.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/packet/af_packet.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'net')

diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index 43e49f46ad4..b8b827c7062 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -1870,6 +1870,7 @@ static inline struct sock *packet_seq_idx(struct net *net, loff_t off)
 }
 
 static void *packet_seq_start(struct seq_file *seq, loff_t *pos)
+	__acquires(seq_file_net(seq)->packet.sklist_lock)
 {
 	struct net *net = seq_file_net(seq);
 	read_lock(&net->packet.sklist_lock);
@@ -1886,6 +1887,7 @@ static void *packet_seq_next(struct seq_file *seq, void *v, loff_t *pos)
 }
 
 static void packet_seq_stop(struct seq_file *seq, void *v)
+	__releases(seq_file_net(seq)->packet.sklist_lock)
 {
 	struct net *net = seq_file_net(seq);
 	read_unlock(&net->packet.sklist_lock);
-- 
cgit v1.2.3


From 6666351df90656677723f8232b3fdd26a500e51e Mon Sep 17 00:00:00 2001
From: Eric Dumazet <dada1@cosmosbay.com>
Date: Tue, 8 Jan 2008 01:35:52 -0800
Subject: [XFRM]: xfrm_state_clone() should be static, not exported

xfrm_state_clone() is not used outside of net/xfrm/xfrm_state.c
There is no need to export it.

Spoted by sparse checker.
   CHECK   net/xfrm/xfrm_state.c
net/xfrm/xfrm_state.c:1103:19: warning: symbol 'xfrm_state_clone' was not
declared. Should it be static?

Signed-off-by: Eric Dumazet <dada1@cosmosbay.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/xfrm/xfrm_state.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c
index 2585e4b0d27..3003503d0c9 100644
--- a/net/xfrm/xfrm_state.c
+++ b/net/xfrm/xfrm_state.c
@@ -1100,7 +1100,7 @@ out:
 EXPORT_SYMBOL(xfrm_state_add);
 
 #ifdef CONFIG_XFRM_MIGRATE
-struct xfrm_state *xfrm_state_clone(struct xfrm_state *orig, int *errp)
+static struct xfrm_state *xfrm_state_clone(struct xfrm_state *orig, int *errp)
 {
 	int err = -ENOMEM;
 	struct xfrm_state *x = xfrm_state_alloc();
@@ -1175,7 +1175,6 @@ struct xfrm_state *xfrm_state_clone(struct xfrm_state *orig, int *errp)
 	kfree(x);
 	return NULL;
 }
-EXPORT_SYMBOL(xfrm_state_clone);
 
 /* xfrm_state_lock is held */
 struct xfrm_state * xfrm_migrate_state_find(struct xfrm_migrate *m)
-- 
cgit v1.2.3


From 69f817b654d683265118188bbfb8bc0d8978cce6 Mon Sep 17 00:00:00 2001
From: Helmut Schaa <hschaa@suse.de>
Date: Fri, 21 Dec 2007 15:16:35 +0100
Subject: mac80211: Restore rx.fc before every invocation of
 ieee80211_invoke_rx_handlers

This patch fixes a problem with rx handling on multiple interfaces. Especially
when using hardware-scanning and a wireless driver (i.e. iwlwifi) which is
able to receive data while scanning.

The rx handlers can modify the skb and the frame control field (see
ieee80211_rx_h_remove_qos_control) but since every interface gets its own
copy of the skb each should get its own copy of rx.fc too.

In my case the wlan0-interface did not remove the qos-control from the frame
because the corresponding flag in rx.fc was already removed while processing
the frame on the master interface. Therefore somehow corrupted frames were
passed to the userspace.

Signed-off-by: Helmut Schaa <hschaa@suse.de>
Acked-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/mac80211/rx.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'net')

diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index 505159f8dff..306e6fc25d8 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -1746,6 +1746,7 @@ void __ieee80211_rx(struct ieee80211_hw *hw, struct sk_buff *skb,
 				       prev->dev->name);
 			continue;
 		}
+		rx.fc = le16_to_cpu(hdr->frame_control);
 		rx.skb = skb_new;
 		rx.dev = prev->dev;
 		rx.sdata = prev;
@@ -1754,6 +1755,7 @@ void __ieee80211_rx(struct ieee80211_hw *hw, struct sk_buff *skb,
 		prev = sdata;
 	}
 	if (prev) {
+		rx.fc = le16_to_cpu(hdr->frame_control);
 		rx.skb = skb;
 		rx.dev = prev->dev;
 		rx.sdata = prev;
-- 
cgit v1.2.3


From 426706c0791904766e10bef512d86786f2f62857 Mon Sep 17 00:00:00 2001
From: Stefano Brivio <stefano.brivio@polimi.it>
Date: Sun, 23 Dec 2007 04:39:17 +0100
Subject: rc80211-pid: export human-readable target_pf value to debugfs

Export the non-shifted target_pf value to debugfs, so that it's human-readable.

Signed-off-by: Stefano Brivio <stefano.brivio@polimi.it>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/mac80211/rc80211_pid.h      | 2 +-
 net/mac80211/rc80211_pid_algo.c | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/rc80211_pid.h b/net/mac80211/rc80211_pid.h
index 425eb708182..81aa4ea1995 100644
--- a/net/mac80211/rc80211_pid.h
+++ b/net/mac80211/rc80211_pid.h
@@ -38,7 +38,7 @@
  * link quality is good, the controller will fail to adjust failed frames
  * percentage to the target. This is intentional.
  */
-#define RC_PID_TARGET_PF (11 << RC_PID_ARITH_SHIFT)
+#define RC_PID_TARGET_PF 11
 
 /* Rate behaviour normalization quantity over time. */
 #define RC_PID_NORM_OFFSET 3
diff --git a/net/mac80211/rc80211_pid_algo.c b/net/mac80211/rc80211_pid_algo.c
index 631e4688826..b84e51480c8 100644
--- a/net/mac80211/rc80211_pid_algo.c
+++ b/net/mac80211/rc80211_pid_algo.c
@@ -210,7 +210,7 @@ static void rate_control_pid_sample(struct rc_pid_info *pinfo,
 	rate_control_pid_normalize(pinfo, mode->num_rates);
 
 	/* Compute the proportional, integral and derivative errors. */
-	err_prop = pinfo->target - pf;
+	err_prop = (pinfo->target << RC_PID_ARITH_SHIFT) - pf;
 
 	err_avg = spinfo->err_avg_sc >> pinfo->smoothing_shift;
 	spinfo->err_avg_sc = spinfo->err_avg_sc - err_avg + err_prop;
-- 
cgit v1.2.3


From ca5fbca924b845863ab9da00ac90b3384445f497 Mon Sep 17 00:00:00 2001
From: Stefano Brivio <stefano.brivio@polimi.it>
Date: Sun, 23 Dec 2007 04:40:32 +0100
Subject: rc80211-pid: add kerneldoc for tunable parameters

Add a kerneldoc description for parameters which are tunable through debugfs.

Signed-off-by: Stefano Brivio <stefano.brivio@polimi.it>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/mac80211/rc80211_pid.h | 24 ++++++++++++++++++++++++
 1 file changed, 24 insertions(+)

(limited to 'net')

diff --git a/net/mac80211/rc80211_pid.h b/net/mac80211/rc80211_pid.h
index 81aa4ea1995..6afd3cebdec 100644
--- a/net/mac80211/rc80211_pid.h
+++ b/net/mac80211/rc80211_pid.h
@@ -1,5 +1,6 @@
 /*
  * Copyright 2007, Mattias Nissler <mattias.nissler@gmx.de>
+ * Copyright 2007, Stefano Brivio <stefano.brivio@polimi.it>
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
@@ -119,6 +120,29 @@ struct rc_pid_events_file_info {
 	unsigned int next_entry;
 };
 
+/**
+ * struct rc_pid_debugfs_entries - tunable parameters
+ *
+ * Algorithm parameters, tunable via debugfs.
+ * @dir: the debugfs directory for a specific phy
+ * @target: target percentage for failed frames
+ * @sampling_period: error sampling interval in milliseconds
+ * @coeff_p: absolute value of the proportional coefficient
+ * @coeff_i: absolute value of the integral coefficient
+ * @coeff_d: absolute value of the derivative coefficient
+ * @smoothing_shift: absolute value of the integral smoothing factor (i.e.
+ *	amount of smoothing introduced by the exponential moving average)
+ * @sharpen_factor: absolute value of the derivative sharpening factor (i.e.
+ *	amount of emphasis given to the derivative term after low activity
+ *	events)
+ * @sharpen_duration: duration of the sharpening effect after the detected low
+ *	activity event, relative to sampling_period
+ * @norm_offset: amount of normalization periodically performed on the learnt
+ *	rate behaviour values (lower means we should trust more what we learnt
+ *	about behaviour of rates, higher means we should trust more the natural
+ *	ordering of rates)
+ * @fast_start: if Y, push high rates right after initialization
+ */
 struct rc_pid_debugfs_entries {
 	struct dentry *dir;
 	struct dentry *target;
-- 
cgit v1.2.3


From fa44327c06492c9bd625dbc8dbe35e5d5965fec6 Mon Sep 17 00:00:00 2001
From: Stefano Brivio <stefano.brivio@polimi.it>
Date: Sun, 23 Dec 2007 04:41:19 +0100
Subject: rc80211-pid: simplify and fix shift_adjust

Simplify and fix rate_control_pid_shift_adjust(). A bug prevented correct
mapping of sorted rates, and readability was seriously flawed.

Signed-off-by: Stefano Brivio <stefano.brivio@polimi.it>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/mac80211/rc80211_pid_algo.c | 34 ++++++++++++++++------------------
 1 file changed, 16 insertions(+), 18 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/rc80211_pid_algo.c b/net/mac80211/rc80211_pid_algo.c
index b84e51480c8..3e26280d314 100644
--- a/net/mac80211/rc80211_pid_algo.c
+++ b/net/mac80211/rc80211_pid_algo.c
@@ -74,29 +74,27 @@ static int rate_control_pid_shift_adjust(struct rc_pid_rateinfo *r,
 {
 	int i, j, k, tmp;
 
-	if (cur + adj < 0)
-		return 0;
-	if (cur + adj >= l)
-		return l - 1;
+	j = r[cur].rev_index;
+	i = j + adj;
 
-	i = r[cur + adj].rev_index;
+	if (i < 0)
+		return r[0].index;
+	if (i >= l - 1)
+		return r[l - 1].index;
 
-	j = r[cur].rev_index;
+	tmp = i;
 
 	if (adj < 0) {
-			tmp = i;
-			for (k = j; k >= i; k--)
-				if (r[k].diff <= r[j].diff)
-					tmp = k;
-			return r[tmp].index;
-	} else if (adj > 0) {
-			tmp = i;
-			for (k = i + 1; k + i < l; k++)
-				if (r[k].diff <= r[i].diff)
-					tmp = k;
-			return r[tmp].index;
+		for (k = j; k >= i; k--)
+			if (r[k].diff <= r[j].diff)
+				tmp = k;
+	} else {
+		for (k = i + 1; k + i < l; k++)
+			if (r[k].diff <= r[i].diff)
+				tmp = k;
 	}
-	return cur + adj;
+
+	return r[tmp].index;
 }
 
 static void rate_control_pid_adjust_rate(struct ieee80211_local *local,
-- 
cgit v1.2.3


From 13e05aa631b195ce30737b320da17e7542c82ead Mon Sep 17 00:00:00 2001
From: Stefano Brivio <stefano.brivio@polimi.it>
Date: Sun, 23 Dec 2007 04:43:57 +0100
Subject: rc80211-pid: fix sta_info refcounting

Fix a bug which caused uncorrect refcounting of PHYs in mac80211. Thanks to
Johannes Berg for spotting this out.

Cc: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: Stefano Brivio <stefano.brivio@polimi.it>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/mac80211/rc80211_pid_algo.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/mac80211/rc80211_pid_algo.c b/net/mac80211/rc80211_pid_algo.c
index 3e26280d314..da3529017da 100644
--- a/net/mac80211/rc80211_pid_algo.c
+++ b/net/mac80211/rc80211_pid_algo.c
@@ -254,7 +254,7 @@ static void rate_control_pid_tx_status(void *priv, struct net_device *dev,
 	/* Ignore all frames that were sent with a different rate than the rate
 	 * we currently advise mac80211 to use. */
 	if (status->control.rate != &local->oper_hw_mode->rates[sta->txrate])
-		return;
+		goto ignore;
 
 	spinfo = sta->rate_ctrl_priv;
 	spinfo->tx_num_xmit++;
@@ -295,6 +295,7 @@ static void rate_control_pid_tx_status(void *priv, struct net_device *dev,
 	if (time_after(jiffies, spinfo->last_sample + period))
 		rate_control_pid_sample(pinfo, local, sta);
 
+ignore:
 	sta_info_put(sta);
 }
 
-- 
cgit v1.2.3


From d439810bda5082e213c59c2aa8108ae4c0e520ef Mon Sep 17 00:00:00 2001
From: Stefano Brivio <stefano.brivio@polimi.it>
Date: Sun, 23 Dec 2007 04:44:56 +0100
Subject: rc80211-pid: pf_target tuning

Set a better value for percentage target for failed frames. The previous value
slowed down too much rate increases in case of permanently low activity. While
at it, increase readability.

Signed-off-by: Stefano Brivio <stefano.brivio@polimi.it>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/mac80211/rc80211_pid.h | 26 +++++++++++++-------------
 1 file changed, 13 insertions(+), 13 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/rc80211_pid.h b/net/mac80211/rc80211_pid.h
index 6afd3cebdec..b98e16afe4d 100644
--- a/net/mac80211/rc80211_pid.h
+++ b/net/mac80211/rc80211_pid.h
@@ -11,41 +11,41 @@
 #define RC80211_PID_H
 
 /* Sampling period for measuring percentage of failed frames. */
-#define RC_PID_INTERVAL (HZ / 8)
+#define RC_PID_INTERVAL			(HZ / 8)
 
 /* Exponential averaging smoothness (used for I part of PID controller) */
-#define RC_PID_SMOOTHING_SHIFT 3
-#define RC_PID_SMOOTHING (1 << RC_PID_SMOOTHING_SHIFT)
+#define RC_PID_SMOOTHING_SHIFT		3
+#define RC_PID_SMOOTHING		(1 << RC_PID_SMOOTHING_SHIFT)
 
 /* Sharpening factor (used for D part of PID controller) */
-#define RC_PID_SHARPENING_FACTOR 0
-#define RC_PID_SHARPENING_DURATION 0
+#define RC_PID_SHARPENING_FACTOR	0
+#define RC_PID_SHARPENING_DURATION	0
 
 /* Fixed point arithmetic shifting amount. */
-#define RC_PID_ARITH_SHIFT 8
+#define RC_PID_ARITH_SHIFT		8
 
 /* Fixed point arithmetic factor. */
-#define RC_PID_ARITH_FACTOR (1 << RC_PID_ARITH_SHIFT)
+#define RC_PID_ARITH_FACTOR		(1 << RC_PID_ARITH_SHIFT)
 
 /* Proportional PID component coefficient. */
-#define RC_PID_COEFF_P 15
+#define RC_PID_COEFF_P			15
 /* Integral PID component coefficient. */
-#define RC_PID_COEFF_I 9
+#define RC_PID_COEFF_I			9
 /* Derivative PID component coefficient. */
-#define RC_PID_COEFF_D 15
+#define RC_PID_COEFF_D			15
 
 /* Target failed frames rate for the PID controller. NB: This effectively gives
  * maximum failed frames percentage we're willing to accept. If the wireless
  * link quality is good, the controller will fail to adjust failed frames
  * percentage to the target. This is intentional.
  */
-#define RC_PID_TARGET_PF 11
+#define RC_PID_TARGET_PF		14
 
 /* Rate behaviour normalization quantity over time. */
-#define RC_PID_NORM_OFFSET 3
+#define RC_PID_NORM_OFFSET		3
 
 /* Push high rates right after loading. */
-#define RC_PID_FAST_START 0
+#define RC_PID_FAST_START		0
 
 /* Arithmetic right shift for positive and negative values for ISO C. */
 #define RC_PID_DO_ARITH_RIGHT_SHIFT(x, y) \
-- 
cgit v1.2.3


From 6d65f5db2f66e3ff44df759fbbb3c7482879fb1e Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Sun, 23 Dec 2007 10:11:55 +0100
Subject: mac80211: remove misleading 'res' variable

When this function returns != CONTINUE, it needs to put the
station struct it has acquired. Hence, having this unused
variable is not just superfluous but also misleading.

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/mac80211/tx.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index 8302c70da9a..f6194167253 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -932,7 +932,6 @@ __ieee80211_tx_prepare(struct ieee80211_txrx_data *tx,
 	struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr);
 	struct ieee80211_hdr *hdr;
 	struct ieee80211_sub_if_data *sdata;
-	ieee80211_txrx_result res = TXRX_CONTINUE;
 
 	int hdrlen;
 
@@ -997,7 +996,7 @@ __ieee80211_tx_prepare(struct ieee80211_txrx_data *tx,
 	}
 	control->flags |= IEEE80211_TXCTL_FIRST_FRAGMENT;
 
-	return res;
+	return TXRX_CONTINUE;
 }
 
 /* Device in tx->dev has a reference added; use dev_put(tx->dev) when
-- 
cgit v1.2.3


From 1b507e7e538ee1c8a25e698911a44604d6be3954 Mon Sep 17 00:00:00 2001
From: Stefano Brivio <stefano.brivio@polimi.it>
Date: Sun, 23 Dec 2007 17:49:00 +0100
Subject: rc80211-pid: fix definition of rate control interval

Fix the rate control interval definition. Thanks to Mattias Nissler for
spotting this out.

Cc: Mattias Nissler <mattias.nissler@gmx.de>
Signed-off-by: Stefano Brivio <stefano.brivio@polimi.it>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/mac80211/rc80211_pid.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/rc80211_pid.h b/net/mac80211/rc80211_pid.h
index b98e16afe4d..04afc13ed82 100644
--- a/net/mac80211/rc80211_pid.h
+++ b/net/mac80211/rc80211_pid.h
@@ -10,8 +10,8 @@
 #ifndef RC80211_PID_H
 #define RC80211_PID_H
 
-/* Sampling period for measuring percentage of failed frames. */
-#define RC_PID_INTERVAL			(HZ / 8)
+/* Sampling period for measuring percentage of failed frames in ms. */
+#define RC_PID_INTERVAL			125
 
 /* Exponential averaging smoothness (used for I part of PID controller) */
 #define RC_PID_SMOOTHING_SHIFT		3
-- 
cgit v1.2.3


From f704662fb7cd81bfdc441207e788860ae4685e95 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Sun, 23 Dec 2007 22:05:25 +0100
Subject: mac80211: make rc_pid_fop_events static

No need to not be.

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Acked-by: Stefano Brivio <stefano.brivio@polimi.it>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/mac80211/rc80211_pid_debugfs.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/mac80211/rc80211_pid_debugfs.c b/net/mac80211/rc80211_pid_debugfs.c
index 91818e4ff00..88b8dc9999b 100644
--- a/net/mac80211/rc80211_pid_debugfs.c
+++ b/net/mac80211/rc80211_pid_debugfs.c
@@ -197,7 +197,7 @@ static ssize_t rate_control_pid_events_read(struct file *file, char __user *buf,
 
 #undef RC_PID_PRINT_BUF_SIZE
 
-struct file_operations rc_pid_fop_events = {
+static struct file_operations rc_pid_fop_events = {
 	.owner = THIS_MODULE,
 	.read = rate_control_pid_events_read,
 	.poll = rate_control_pid_events_poll,
-- 
cgit v1.2.3


From 6368e4b18d5c71c73eecd96d568e726b80e5bce1 Mon Sep 17 00:00:00 2001
From: Ron Rindjunsky <ron.rindjunsky@intel.com>
Date: Mon, 24 Dec 2007 13:36:39 +0200
Subject: mac80211: restructure __ieee80211_rx

This patch makes a separation between Rx frame pre-handling which stays in
__ieee80211_rx and Rx frame handlers, moving to __ieee80211_rx_handle_packet.
Although this separation has no affect in regular mode of operation, this kind
of mechanism will be used in A-MPDU frames reordering as it allows accumulation
of frames during pre-handling, dispatching them to later handling when necessary.

Signed-off-by: Ron Rindjunsky <ron.rindjunsky@intel.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/mac80211/rx.c | 86 ++++++++++++++++++++++++++++++++-----------------------
 1 file changed, 50 insertions(+), 36 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index 306e6fc25d8..a58a94b2140 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -288,11 +288,11 @@ ieee80211_rx_h_parse_qos(struct ieee80211_txrx_data *rx)
 	return TXRX_CONTINUE;
 }
 
-static ieee80211_txrx_result
-ieee80211_rx_h_load_stats(struct ieee80211_txrx_data *rx)
+
+u32 ieee80211_rx_load_stats(struct ieee80211_local *local,
+			      struct sk_buff *skb,
+			      struct ieee80211_rx_status *status)
 {
-	struct ieee80211_local *local = rx->local;
-	struct sk_buff *skb = rx->skb;
 	struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data;
 	u32 load = 0, hdrtime;
 	struct ieee80211_rate *rate;
@@ -306,7 +306,7 @@ ieee80211_rx_h_load_stats(struct ieee80211_txrx_data *rx)
 
 	rate = &mode->rates[0];
 	for (i = 0; i < mode->num_rates; i++) {
-		if (mode->rates[i].val == rx->u.rx.status->rate) {
+		if (mode->rates[i].val == status->rate) {
 			rate = &mode->rates[i];
 			break;
 		}
@@ -331,15 +331,13 @@ ieee80211_rx_h_load_stats(struct ieee80211_txrx_data *rx)
 	/* Divide channel_use by 8 to avoid wrapping around the counter */
 	load >>= CHAN_UTIL_SHIFT;
 	local->channel_use_raw += load;
-	rx->u.rx.load = load;
 
-	return TXRX_CONTINUE;
+	return load;
 }
 
 ieee80211_rx_handler ieee80211_rx_pre_handlers[] =
 {
 	ieee80211_rx_h_parse_qos,
-	ieee80211_rx_h_load_stats,
 	NULL
 };
 
@@ -1613,11 +1611,11 @@ static int prepare_for_handlers(struct ieee80211_sub_if_data *sdata,
 }
 
 /*
- * This is the receive path handler. It is called by a low level driver when an
- * 802.11 MPDU is received from the hardware.
+ * This is the actual Rx frames handler. as it blongs to Rx path it must
+ * be called with rcu_read_lock protection.
  */
-void __ieee80211_rx(struct ieee80211_hw *hw, struct sk_buff *skb,
-		    struct ieee80211_rx_status *status)
+void __ieee80211_rx_handle_packet(struct ieee80211_hw *hw, struct sk_buff *skb,
+			    struct ieee80211_rx_status *status, u32 load)
 {
 	struct ieee80211_local *local = hw_to_local(hw);
 	struct ieee80211_sub_if_data *sdata;
@@ -1625,37 +1623,19 @@ void __ieee80211_rx(struct ieee80211_hw *hw, struct sk_buff *skb,
 	struct ieee80211_hdr *hdr;
 	struct ieee80211_txrx_data rx;
 	u16 type;
-	int prepres;
+	int prepares;
 	struct ieee80211_sub_if_data *prev = NULL;
 	struct sk_buff *skb_new;
 	u8 *bssid;
 	int hdrlen;
 
-	/*
-	 * key references and virtual interfaces are protected using RCU
-	 * and this requires that we are in a read-side RCU section during
-	 * receive processing
-	 */
-	rcu_read_lock();
-
-	/*
-	 * Frames with failed FCS/PLCP checksum are not returned,
-	 * all other frames are returned without radiotap header
-	 * if it was previously present.
-	 * Also, frames with less than 16 bytes are dropped.
-	 */
-	skb = ieee80211_rx_monitor(local, skb, status);
-	if (!skb) {
-		rcu_read_unlock();
-		return;
-	}
-
 	hdr = (struct ieee80211_hdr *) skb->data;
 	memset(&rx, 0, sizeof(rx));
 	rx.skb = skb;
 	rx.local = local;
 
 	rx.u.rx.status = status;
+	rx.u.rx.load = load;
 	rx.fc = le16_to_cpu(hdr->frame_control);
 	type = rx.fc & IEEE80211_FCTL_FTYPE;
 
@@ -1714,11 +1694,11 @@ void __ieee80211_rx(struct ieee80211_hw *hw, struct sk_buff *skb,
 			continue;
 
 		rx.flags |= IEEE80211_TXRXD_RXRA_MATCH;
-		prepres = prepare_for_handlers(sdata, bssid, &rx, hdr);
+		prepares = prepare_for_handlers(sdata, bssid, &rx, hdr);
 		/* prepare_for_handlers can change sta */
 		sta = rx.sta;
 
-		if (!prepres)
+		if (!prepares)
 			continue;
 
 		/*
@@ -1765,11 +1745,45 @@ void __ieee80211_rx(struct ieee80211_hw *hw, struct sk_buff *skb,
 		dev_kfree_skb(skb);
 
  end:
-	rcu_read_unlock();
-
 	if (sta)
 		sta_info_put(sta);
 }
+
+/*
+ * This is the receive path handler. It is called by a low level driver when an
+ * 802.11 MPDU is received from the hardware.
+ */
+void __ieee80211_rx(struct ieee80211_hw *hw, struct sk_buff *skb,
+		    struct ieee80211_rx_status *status)
+{
+	struct ieee80211_local *local = hw_to_local(hw);
+	u32 pkt_load;
+
+	/*
+	 * key references and virtual interfaces are protected using RCU
+	 * and this requires that we are in a read-side RCU section during
+	 * receive processing
+	 */
+	rcu_read_lock();
+
+	/*
+	 * Frames with failed FCS/PLCP checksum are not returned,
+	 * all other frames are returned without radiotap header
+	 * if it was previously present.
+	 * Also, frames with less than 16 bytes are dropped.
+	 */
+	skb = ieee80211_rx_monitor(local, skb, status);
+	if (!skb) {
+		rcu_read_unlock();
+		return;
+	}
+
+	pkt_load = ieee80211_rx_load_stats(local, skb, status);
+
+	__ieee80211_rx_handle_packet(hw, skb, status, pkt_load);
+
+	rcu_read_unlock();
+}
 EXPORT_SYMBOL(__ieee80211_rx);
 
 /* This is a version of the rx handler that can be called from hard irq
-- 
cgit v1.2.3


From 5aae2880618471cfa679ca22531b88990bee9bf8 Mon Sep 17 00:00:00 2001
From: Ron Rindjunsky <ron.rindjunsky@intel.com>
Date: Tue, 25 Dec 2007 17:00:32 +0200
Subject: mac80211: A-MPDU Rx add MLME structures

This patch adds the needed structures to describe the Rx aggregation MLME per STA
new:
 - struct tid_ampdu_rx: TID aggregation information (Rx)
 - struct sta_ampdu_mlme: MLME aggregation information for STA
changed:
 - struct sta_info: ampdu_mlme added to describe A-MPDU MLME per STA,
		    and timer_to_tid added to map timer id into TID

Signed-off-by: Ron Rindjunsky <ron.rindjunsky@intel.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/mac80211/sta_info.h | 47 +++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 47 insertions(+)

(limited to 'net')

diff --git a/net/mac80211/sta_info.h b/net/mac80211/sta_info.h
index e1a4ac1d48d..96fe3ed9503 100644
--- a/net/mac80211/sta_info.h
+++ b/net/mac80211/sta_info.h
@@ -31,6 +31,51 @@
 #define WLAN_STA_WME BIT(9)
 #define WLAN_STA_WDS BIT(27)
 
+#define STA_TID_NUM 16
+#define ADDBA_RESP_INTERVAL HZ
+
+#define HT_AGG_STATE_INITIATOR_SHIFT	(4)
+
+#define HT_AGG_STATE_REQ_STOP_BA_MSK	BIT(3)
+
+#define HT_AGG_STATE_IDLE		(0x0)
+#define HT_AGG_STATE_OPERATIONAL	(0x7)
+
+/**
+ * struct tid_ampdu_rx - TID aggregation information (Rx).
+ *
+ * @state: TID's state in session state machine.
+ * @dialog_token: dialog token for aggregation session
+ * @ssn: Starting Sequence Number expected to be aggregated.
+ * @buf_size: buffer size for incoming A-MPDUs
+ * @timeout: reset timer value.
+ * @head_seq_num: head sequence number in reordering buffer.
+ * @stored_mpdu_num: number of MPDUs in reordering buffer
+ * @reorder_buf: buffer to reorder incoming aggregated MPDUs
+ * @session_timer: check if peer keeps Tx-ing on the TID (by timeout value)
+ */
+struct tid_ampdu_rx {
+	u8 state;
+	u8 dialog_token;
+	u16 ssn;
+	u16 buf_size;
+	u16 timeout;
+	u16 head_seq_num;
+	u16 stored_mpdu_num;
+	struct sk_buff **reorder_buf;
+	struct timer_list session_timer;
+};
+
+/**
+ * struct sta_ampdu_mlme - STA aggregation information.
+ *
+ * @tid_agg_info_rx: aggregation info for Rx per TID
+ * @ampdu_rx: for locking sections in aggregation Rx flow
+ */
+struct sta_ampdu_mlme {
+	struct tid_ampdu_rx tid_rx[STA_TID_NUM];
+	spinlock_t ampdu_rx;
+};
 
 struct sta_info {
 	struct kref kref;
@@ -101,6 +146,8 @@ struct sta_info {
 
 	struct ieee80211_ht_info ht_info; /* 802.11n HT capabilities
 					     of this STA */
+	struct sta_ampdu_mlme ampdu_mlme;
+	u8 timer_to_tid[STA_TID_NUM];	/* convert timer id to tid */
 
 #ifdef CONFIG_MAC80211_DEBUGFS
 	struct sta_info_debugfsdentries {
-- 
cgit v1.2.3


From 07db218396650933abff3c5c1ad1e2a6e0cfedeb Mon Sep 17 00:00:00 2001
From: Ron Rindjunsky <ron.rindjunsky@intel.com>
Date: Tue, 25 Dec 2007 17:00:33 +0200
Subject: mac80211: A-MPDU Rx adding basic functionality

This patch adds the basic needed abilities and functions for A-MPDU Rx session
changed functions:
 - ieee80211_sta_process_addba_request - Rx A-MPDU initialization enabled
 - ieee80211_stop - stops all A-MPDU Rx in case interface goes down
added functions:
 - ieee80211_send_delba - used for sending out Del BA in A-MPDU sessions
 - ieee80211_sta_stop_rx_BA_session - stopping Rx A-MPDU session
 - sta_rx_agg_session_timer_expired - stops A-MPDU Rx use if load is too
low

Signed-off-by: Ron Rindjunsky <ron.rindjunsky@intel.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/mac80211/ieee80211.c     |   9 ++
 net/mac80211/ieee80211_i.h   |   3 +
 net/mac80211/ieee80211_sta.c | 221 ++++++++++++++++++++++++++++++++++++++++---
 net/mac80211/sta_info.c      |   3 +
 4 files changed, 225 insertions(+), 11 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/ieee80211.c b/net/mac80211/ieee80211.c
index 9c14e3d303c..2011c726f2b 100644
--- a/net/mac80211/ieee80211.c
+++ b/net/mac80211/ieee80211.c
@@ -292,9 +292,18 @@ static int ieee80211_stop(struct net_device *dev)
 	struct ieee80211_sub_if_data *sdata;
 	struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr);
 	struct ieee80211_if_init_conf conf;
+	struct sta_info *sta;
+	int i;
 
 	sdata = IEEE80211_DEV_TO_SUB_IF(dev);
 
+	list_for_each_entry(sta, &local->sta_list, list) {
+		for (i = 0; i <  STA_TID_NUM; i++)
+			ieee80211_sta_stop_rx_ba_session(sta->dev, sta->addr,
+						i, WLAN_BACK_RECIPIENT,
+						WLAN_REASON_QSTA_LEAVE_QBSS);
+	}
+
 	netif_stop_queue(dev);
 
 	/*
diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index baf53c04712..740d69d5bbd 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -767,6 +767,9 @@ int ieee80211_ht_cap_ie_to_ht_info(struct ieee80211_ht_cap *ht_cap_ie,
 int ieee80211_ht_addt_info_ie_to_ht_bss_info(
 			struct ieee80211_ht_addt_info *ht_add_info_ie,
 			struct ieee80211_ht_bss_info *bss_info);
+void ieee80211_sta_stop_rx_ba_session(struct net_device *dev, u8 *da,
+				u16 tid, u16 initiator, u16 reason);
+void sta_rx_agg_session_timer_expired(unsigned long data);
 /* ieee80211_iface.c */
 int ieee80211_if_add(struct net_device *dev, const char *name,
 		     struct net_device **new_dev, int type);
diff --git a/net/mac80211/ieee80211_sta.c b/net/mac80211/ieee80211_sta.c
index 5b8f484c167..d5a7683fab3 100644
--- a/net/mac80211/ieee80211_sta.c
+++ b/net/mac80211/ieee80211_sta.c
@@ -64,6 +64,11 @@
 #define IEEE80211_ADDBA_PARAM_TID_MASK 0x003C
 #define IEEE80211_ADDBA_PARAM_BUF_SIZE_MASK 0xFFA0
 
+/* next values represent the buffer size for A-MPDU frame.
+ * According to IEEE802.11n spec size varies from 8K to 64K (in powers of 2) */
+#define IEEE80211_MIN_AMPDU_BUF 0x8
+#define IEEE80211_MAX_AMPDU_BUF 0x40
+
 static void ieee80211_send_probe_req(struct net_device *dev, u8 *dst,
 				     u8 *ssid, size_t ssid_len);
 static struct ieee80211_sta_bss *
@@ -1005,7 +1010,8 @@ static void ieee80211_send_addba_resp(struct net_device *dev, u8 *da, u16 tid,
 	struct ieee80211_mgmt *mgmt;
 	u16 capab;
 
-	skb = dev_alloc_skb(sizeof(*mgmt) + local->hw.extra_tx_headroom);
+	skb = dev_alloc_skb(sizeof(*mgmt) + local->hw.extra_tx_headroom + 1 +
+					sizeof(mgmt->u.action.u.addba_resp));
 	if (!skb) {
 		printk(KERN_DEBUG "%s: failed to allocate buffer "
 		       "for addba resp frame\n", dev->name);
@@ -1047,9 +1053,14 @@ static void ieee80211_sta_process_addba_request(struct net_device *dev,
 						size_t len)
 {
 	struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr);
+	struct ieee80211_hw *hw = &local->hw;
+	struct ieee80211_conf *conf = &hw->conf;
 	struct sta_info *sta;
-	u16 capab, tid, timeout, ba_policy, buf_size, status;
+	struct tid_ampdu_rx *tid_agg_rx;
+	u16 capab, tid, timeout, ba_policy, buf_size, start_seq_num, status;
 	u8 dialog_token;
+	int ret = -EOPNOTSUPP;
+	DECLARE_MAC_BUF(mac);
 
 	sta = sta_info_get(local, mgmt->sa);
 	if (!sta)
@@ -1058,28 +1069,216 @@ static void ieee80211_sta_process_addba_request(struct net_device *dev,
 	/* extract session parameters from addba request frame */
 	dialog_token = mgmt->u.action.u.addba_req.dialog_token;
 	timeout = le16_to_cpu(mgmt->u.action.u.addba_req.timeout);
+	start_seq_num =
+		le16_to_cpu(mgmt->u.action.u.addba_req.start_seq_num) >> 4;
 
 	capab = le16_to_cpu(mgmt->u.action.u.addba_req.capab);
 	ba_policy = (capab & IEEE80211_ADDBA_PARAM_POLICY_MASK) >> 1;
 	tid = (capab & IEEE80211_ADDBA_PARAM_TID_MASK) >> 2;
 	buf_size = (capab & IEEE80211_ADDBA_PARAM_BUF_SIZE_MASK) >> 6;
 
-	/* TODO - currently aggregation is declined (A-MPDU add BA request
-	* acceptance is not obligatory by 802.11n draft), but here is
-	* the entry point for dealing with it */
-#ifdef MAC80211_HT_DEBUG
-	if (net_ratelimit())
-		printk(KERN_DEBUG "Add Block Ack request arrived,"
-				   " currently denying it\n");
-#endif /* MAC80211_HT_DEBUG */
-
 	status = WLAN_STATUS_REQUEST_DECLINED;
 
+	/* sanity check for incoming parameters:
+	 * check if configuration can support the BA policy
+	 * and if buffer size does not exceeds max value */
+	if (((ba_policy != 1)
+		&& (!(conf->ht_conf.cap & IEEE80211_HT_CAP_DELAY_BA)))
+		|| (buf_size > IEEE80211_MAX_AMPDU_BUF)) {
+		status = WLAN_STATUS_INVALID_QOS_PARAM;
+#ifdef CONFIG_MAC80211_HT_DEBUG
+		if (net_ratelimit())
+			printk(KERN_DEBUG "Block Ack Req with bad params from "
+				"%s on tid %u. policy %d, buffer size %d\n",
+				print_mac(mac, mgmt->sa), tid, ba_policy,
+				buf_size);
+#endif /* CONFIG_MAC80211_HT_DEBUG */
+		goto end_no_lock;
+	}
+	/* determine default buffer size */
+	if (buf_size == 0) {
+		struct ieee80211_hw_mode *mode = conf->mode;
+		buf_size = IEEE80211_MIN_AMPDU_BUF;
+		buf_size = buf_size << mode->ht_info.ampdu_factor;
+	}
+
+	tid_agg_rx = &sta->ampdu_mlme.tid_rx[tid];
+
+	/* examine state machine */
+	spin_lock_bh(&sta->ampdu_mlme.ampdu_rx);
+
+	if (tid_agg_rx->state != HT_AGG_STATE_IDLE) {
+#ifdef CONFIG_MAC80211_HT_DEBUG
+		if (net_ratelimit())
+			printk(KERN_DEBUG "unexpected Block Ack Req from "
+				"%s on tid %u\n",
+				print_mac(mac, mgmt->sa), tid);
+#endif /* CONFIG_MAC80211_HT_DEBUG */
+		goto end;
+	}
+
+	/* prepare reordering buffer */
+	tid_agg_rx->reorder_buf =
+		kmalloc(buf_size * sizeof(struct sk_buf *), GFP_ATOMIC);
+	if ((!tid_agg_rx->reorder_buf) && net_ratelimit()) {
+		printk(KERN_ERR "can not allocate reordering buffer "
+						"to tid %d\n", tid);
+		goto end;
+	}
+	memset(tid_agg_rx->reorder_buf, 0,
+		buf_size * sizeof(struct sk_buf *));
+
+	if (local->ops->ampdu_action)
+		ret = local->ops->ampdu_action(hw, IEEE80211_AMPDU_RX_START,
+					       sta->addr, tid, start_seq_num);
+#ifdef CONFIG_MAC80211_HT_DEBUG
+	printk(KERN_DEBUG "Rx A-MPDU on tid %d result %d", tid, ret);
+#endif /* CONFIG_MAC80211_HT_DEBUG */
+
+	if (ret) {
+		kfree(tid_agg_rx->reorder_buf);
+		goto end;
+	}
+
+	/* change state and send addba resp */
+	tid_agg_rx->state = HT_AGG_STATE_OPERATIONAL;
+	tid_agg_rx->dialog_token = dialog_token;
+	tid_agg_rx->ssn = start_seq_num;
+	tid_agg_rx->head_seq_num = start_seq_num;
+	tid_agg_rx->buf_size = buf_size;
+	tid_agg_rx->timeout = timeout;
+	tid_agg_rx->stored_mpdu_num = 0;
+	status = WLAN_STATUS_SUCCESS;
+end:
+	spin_unlock_bh(&sta->ampdu_mlme.ampdu_rx);
+
+end_no_lock:
 	ieee80211_send_addba_resp(sta->dev, sta->addr, tid, dialog_token,
 				status, 1, buf_size, timeout);
 	sta_info_put(sta);
 }
 
+void ieee80211_send_delba(struct net_device *dev, const u8 *da, u16 tid,
+				u16 initiator, u16 reason_code)
+{
+	struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr);
+	struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
+	struct ieee80211_if_sta *ifsta = &sdata->u.sta;
+	struct sk_buff *skb;
+	struct ieee80211_mgmt *mgmt;
+	u16 params;
+
+	skb = dev_alloc_skb(sizeof(*mgmt) + local->hw.extra_tx_headroom + 1 +
+					sizeof(mgmt->u.action.u.delba));
+
+	if (!skb) {
+		printk(KERN_ERR "%s: failed to allocate buffer "
+					"for delba frame\n", dev->name);
+		return;
+	}
+
+	skb_reserve(skb, local->hw.extra_tx_headroom);
+	mgmt = (struct ieee80211_mgmt *) skb_put(skb, 24);
+	memset(mgmt, 0, 24);
+	memcpy(mgmt->da, da, ETH_ALEN);
+	memcpy(mgmt->sa, dev->dev_addr, ETH_ALEN);
+	if (sdata->type == IEEE80211_IF_TYPE_AP)
+		memcpy(mgmt->bssid, dev->dev_addr, ETH_ALEN);
+	else
+		memcpy(mgmt->bssid, ifsta->bssid, ETH_ALEN);
+	mgmt->frame_control = IEEE80211_FC(IEEE80211_FTYPE_MGMT,
+					IEEE80211_STYPE_ACTION);
+
+	skb_put(skb, 1 + sizeof(mgmt->u.action.u.delba));
+
+	mgmt->u.action.category = WLAN_CATEGORY_BACK;
+	mgmt->u.action.u.delba.action_code = WLAN_ACTION_DELBA;
+	params = (u16)(initiator << 11); 	/* bit 11 initiator */
+	params |= (u16)(tid << 12); 		/* bit 15:12 TID number */
+
+	mgmt->u.action.u.delba.params = cpu_to_le16(params);
+	mgmt->u.action.u.delba.reason_code = cpu_to_le16(reason_code);
+
+	ieee80211_sta_tx(dev, skb, 0);
+}
+
+void ieee80211_sta_stop_rx_ba_session(struct net_device *dev, u8 *ra, u16 tid,
+					u16 initiator, u16 reason)
+{
+	struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr);
+	struct ieee80211_hw *hw = &local->hw;
+	struct sta_info *sta;
+	int ret;
+
+	sta = sta_info_get(local, ra);
+	if (!sta)
+		return;
+
+	/* check if TID is in operational state */
+	spin_lock_bh(&sta->ampdu_mlme.ampdu_rx);
+	if (sta->ampdu_mlme.tid_rx[tid].state
+				!= HT_AGG_STATE_OPERATIONAL) {
+		spin_unlock_bh(&sta->ampdu_mlme.ampdu_rx);
+		if (net_ratelimit())
+			printk(KERN_DEBUG "rx BA session requested to stop on "
+				"inactive tid %d\n", tid);
+		sta_info_put(sta);
+		return;
+	}
+	sta->ampdu_mlme.tid_rx[tid].state =
+		HT_AGG_STATE_REQ_STOP_BA_MSK |
+		(initiator << HT_AGG_STATE_INITIATOR_SHIFT);
+		spin_unlock_bh(&sta->ampdu_mlme.ampdu_rx);
+
+	/* stop HW Rx aggregation. ampdu_action existence
+	 * already verified in session init so we add the BUG_ON */
+	BUG_ON(!local->ops->ampdu_action);
+
+	ret = local->ops->ampdu_action(hw, IEEE80211_AMPDU_RX_STOP,
+					ra, tid, EINVAL);
+	if (ret)
+		printk(KERN_DEBUG "HW problem - can not stop rx "
+				"aggergation for tid %d\n", tid);
+
+	/* shutdown timer has not expired */
+	if (initiator != WLAN_BACK_TIMER)
+		del_timer_sync(&sta->ampdu_mlme.tid_rx[tid].
+					session_timer);
+
+	/* check if this is a self generated aggregation halt */
+	if (initiator == WLAN_BACK_RECIPIENT || initiator == WLAN_BACK_TIMER)
+		ieee80211_send_delba(dev, ra, tid, 0, reason);
+
+	/* free the reordering buffer */
+	kfree(sta->ampdu_mlme.tid_rx[tid].reorder_buf);
+
+	sta->ampdu_mlme.tid_rx[tid].state = HT_AGG_STATE_IDLE;
+	sta_info_put(sta);
+}
+
+/*
+ * After receiving Block Ack Request (BAR) we activated a
+ * timer after each frame arrives from the originator.
+ * if this timer expires ieee80211_sta_stop_rx_ba_session will be executed.
+ */
+void sta_rx_agg_session_timer_expired(unsigned long data)
+{
+	/* not an elegant detour, but there is no choice as the timer passes
+	 * only one argument, and verious sta_info are needed here, so init
+	 * flow in sta_info_add gives the TID as data, while the timer_to_id
+	 * array gives the sta through container_of */
+	u8 *ptid = (u8 *)data;
+	u8 *timer_to_id = ptid - *ptid;
+	struct sta_info *sta = container_of(timer_to_id, struct sta_info,
+					 timer_to_tid[0]);
+
+	printk(KERN_DEBUG "rx session timer expired on tid %d\n", (u16)*ptid);
+	ieee80211_sta_stop_rx_ba_session(sta->dev, sta->addr, (u16)*ptid,
+					 WLAN_BACK_TIMER,
+					 WLAN_REASON_QSTA_TIMEOUT);
+}
+
+
 static void ieee80211_rx_mgmt_auth(struct net_device *dev,
 				   struct ieee80211_if_sta *ifsta,
 				   struct ieee80211_mgmt *mgmt,
diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c
index ffe8a49d892..60ca0780405 100644
--- a/net/mac80211/sta_info.c
+++ b/net/mac80211/sta_info.c
@@ -104,6 +104,7 @@ static void sta_info_release(struct kref *kref)
 	struct sta_info *sta = container_of(kref, struct sta_info, kref);
 	struct ieee80211_local *local = sta->local;
 	struct sk_buff *skb;
+	int i;
 
 	/* free sta structure; it has already been removed from
 	 * hash table etc. external structures. Make sure that all
@@ -116,6 +117,8 @@ static void sta_info_release(struct kref *kref)
 	while ((skb = skb_dequeue(&sta->tx_filtered)) != NULL) {
 		dev_kfree_skb_any(skb);
 	}
+	for (i = 0; i <  STA_TID_NUM; i++)
+		del_timer_sync(&sta->ampdu_mlme.tid_rx[i].session_timer);
 	rate_control_free_sta(sta->rate_ctrl, sta->rate_ctrl_priv);
 	rate_control_put(sta->rate_ctrl);
 	kfree(sta);
-- 
cgit v1.2.3


From 16c5f15c73e97e22a1fcc6518da32bdcf98aec3d Mon Sep 17 00:00:00 2001
From: Ron Rindjunsky <ron.rindjunsky@intel.com>
Date: Tue, 25 Dec 2007 17:00:34 +0200
Subject: mac80211: A-MPDU Rx MLME data initialization

This patch initialize A-MPDU MLME data for Rx sessions.

Signed-off-by: Ron Rindjunsky <ron.rindjunsky@intel.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/mac80211/sta_info.c | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

(limited to 'net')

diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c
index 60ca0780405..1257c7aab2a 100644
--- a/net/mac80211/sta_info.c
+++ b/net/mac80211/sta_info.c
@@ -136,6 +136,7 @@ struct sta_info * sta_info_add(struct ieee80211_local *local,
 			       struct net_device *dev, u8 *addr, gfp_t gfp)
 {
 	struct sta_info *sta;
+	int i;
 	DECLARE_MAC_BUF(mac);
 
 	sta = kzalloc(sizeof(*sta), gfp);
@@ -155,6 +156,19 @@ struct sta_info * sta_info_add(struct ieee80211_local *local,
 	memcpy(sta->addr, addr, ETH_ALEN);
 	sta->local = local;
 	sta->dev = dev;
+	spin_lock_init(&sta->ampdu_mlme.ampdu_rx);
+	for (i = 0; i < STA_TID_NUM; i++) {
+		/* timer_to_tid must be initialized with identity mapping to
+		 * enable session_timer's data differentiation. refer to
+		 * sta_rx_agg_session_timer_expired for useage */
+		sta->timer_to_tid[i] = i;
+		/* rx timers */
+		sta->ampdu_mlme.tid_rx[i].session_timer.function =
+			sta_rx_agg_session_timer_expired;
+		sta->ampdu_mlme.tid_rx[i].session_timer.data =
+			(unsigned long)&sta->timer_to_tid[i];
+		init_timer(&sta->ampdu_mlme.tid_rx[i].session_timer);
+	}
 	skb_queue_head_init(&sta->ps_tx_buf);
 	skb_queue_head_init(&sta->tx_filtered);
 	__sta_info_get(sta);	/* sta used by caller, decremented by
-- 
cgit v1.2.3


From b580781e038968fb2529460e8b61e3bf77de112a Mon Sep 17 00:00:00 2001
From: Ron Rindjunsky <ron.rindjunsky@intel.com>
Date: Tue, 25 Dec 2007 17:00:35 +0200
Subject: mac80211: A-MPDU Rx handling aggregation reordering

This patch handles the reordering of the Rx A-MPDU.
This issue occurs when the sequence of the internal MPDUs is not in the
right order. such a case can be encountered for example when some MPDUs from
previous aggregations were recieved, while others failed, so current A-MPDU
will contain a mix of re-transmited MPDUs and new ones.

Signed-off-by: Ron Rindjunsky <ron.rindjunsky@intel.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/mac80211/ieee80211_sta.c |  10 ++-
 net/mac80211/rx.c            | 185 ++++++++++++++++++++++++++++++++++++++++++-
 2 files changed, 192 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/ieee80211_sta.c b/net/mac80211/ieee80211_sta.c
index d5a7683fab3..f1edaa0c0da 100644
--- a/net/mac80211/ieee80211_sta.c
+++ b/net/mac80211/ieee80211_sta.c
@@ -1208,7 +1208,7 @@ void ieee80211_sta_stop_rx_ba_session(struct net_device *dev, u8 *ra, u16 tid,
 	struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr);
 	struct ieee80211_hw *hw = &local->hw;
 	struct sta_info *sta;
-	int ret;
+	int ret, i;
 
 	sta = sta_info_get(local, ra);
 	if (!sta)
@@ -1250,6 +1250,14 @@ void ieee80211_sta_stop_rx_ba_session(struct net_device *dev, u8 *ra, u16 tid,
 		ieee80211_send_delba(dev, ra, tid, 0, reason);
 
 	/* free the reordering buffer */
+	for (i = 0; i < sta->ampdu_mlme.tid_rx[tid].buf_size; i++) {
+		if (sta->ampdu_mlme.tid_rx[tid].reorder_buf[i]) {
+			/* release the reordered frames */
+			dev_kfree_skb(sta->ampdu_mlme.tid_rx[tid].reorder_buf[i]);
+			sta->ampdu_mlme.tid_rx[tid].stored_mpdu_num--;
+			sta->ampdu_mlme.tid_rx[tid].reorder_buf[i] = NULL;
+		}
+	}
 	kfree(sta->ampdu_mlme.tid_rx[tid].reorder_buf);
 
 	sta->ampdu_mlme.tid_rx[tid].state = HT_AGG_STATE_IDLE;
diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index a58a94b2140..e7b1eb8da25 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -330,7 +330,6 @@ u32 ieee80211_rx_load_stats(struct ieee80211_local *local,
 
 	/* Divide channel_use by 8 to avoid wrapping around the counter */
 	load >>= CHAN_UTIL_SHIFT;
-	local->channel_use_raw += load;
 
 	return load;
 }
@@ -1749,6 +1748,186 @@ void __ieee80211_rx_handle_packet(struct ieee80211_hw *hw, struct sk_buff *skb,
 		sta_info_put(sta);
 }
 
+#define SEQ_MODULO 0x1000
+#define SEQ_MASK   0xfff
+
+static inline int seq_less(u16 sq1, u16 sq2)
+{
+	return (((sq1 - sq2) & SEQ_MASK) > (SEQ_MODULO >> 1));
+}
+
+static inline u16 seq_inc(u16 sq)
+{
+	return ((sq + 1) & SEQ_MASK);
+}
+
+static inline u16 seq_sub(u16 sq1, u16 sq2)
+{
+	return ((sq1 - sq2) & SEQ_MASK);
+}
+
+
+u8 ieee80211_sta_manage_reorder_buf(struct ieee80211_hw *hw,
+				struct tid_ampdu_rx *tid_agg_rx,
+				struct sk_buff *skb, u16 mpdu_seq_num,
+				int bar_req)
+{
+	struct ieee80211_local *local = hw_to_local(hw);
+	struct ieee80211_rx_status status;
+	u16 head_seq_num, buf_size;
+	int index;
+	u32 pkt_load;
+
+	buf_size = tid_agg_rx->buf_size;
+	head_seq_num = tid_agg_rx->head_seq_num;
+
+	/* frame with out of date sequence number */
+	if (seq_less(mpdu_seq_num, head_seq_num)) {
+		dev_kfree_skb(skb);
+		return 1;
+	}
+
+	/* if frame sequence number exceeds our buffering window size or
+	 * block Ack Request arrived - release stored frames */
+	if ((!seq_less(mpdu_seq_num, head_seq_num + buf_size)) || (bar_req)) {
+		/* new head to the ordering buffer */
+		if (bar_req)
+			head_seq_num = mpdu_seq_num;
+		else
+			head_seq_num =
+				seq_inc(seq_sub(mpdu_seq_num, buf_size));
+		/* release stored frames up to new head to stack */
+		while (seq_less(tid_agg_rx->head_seq_num, head_seq_num)) {
+			index = seq_sub(tid_agg_rx->head_seq_num,
+				tid_agg_rx->ssn)
+				% tid_agg_rx->buf_size;
+
+			if (tid_agg_rx->reorder_buf[index]) {
+				/* release the reordered frames to stack */
+				memcpy(&status,
+					tid_agg_rx->reorder_buf[index]->cb,
+					sizeof(status));
+				pkt_load = ieee80211_rx_load_stats(local,
+						tid_agg_rx->reorder_buf[index],
+						&status);
+				__ieee80211_rx_handle_packet(hw,
+					tid_agg_rx->reorder_buf[index],
+					&status, pkt_load);
+				tid_agg_rx->stored_mpdu_num--;
+				tid_agg_rx->reorder_buf[index] = NULL;
+			}
+			tid_agg_rx->head_seq_num =
+				seq_inc(tid_agg_rx->head_seq_num);
+		}
+		if (bar_req)
+			return 1;
+	}
+
+	/* now the new frame is always in the range of the reordering */
+	/* buffer window */
+	index = seq_sub(mpdu_seq_num, tid_agg_rx->ssn)
+				% tid_agg_rx->buf_size;
+	/* check if we already stored this frame */
+	if (tid_agg_rx->reorder_buf[index]) {
+		dev_kfree_skb(skb);
+		return 1;
+	}
+
+	/* if arrived mpdu is in the right order and nothing else stored */
+	/* release it immediately */
+	if (mpdu_seq_num == tid_agg_rx->head_seq_num &&
+			tid_agg_rx->stored_mpdu_num == 0) {
+		tid_agg_rx->head_seq_num =
+			seq_inc(tid_agg_rx->head_seq_num);
+		return 0;
+	}
+
+	/* put the frame in the reordering buffer */
+	tid_agg_rx->reorder_buf[index] = skb;
+	tid_agg_rx->stored_mpdu_num++;
+	/* release the buffer until next missing frame */
+	index = seq_sub(tid_agg_rx->head_seq_num, tid_agg_rx->ssn)
+						% tid_agg_rx->buf_size;
+	while (tid_agg_rx->reorder_buf[index]) {
+		/* release the reordered frame back to stack */
+		memcpy(&status, tid_agg_rx->reorder_buf[index]->cb,
+			sizeof(status));
+		pkt_load = ieee80211_rx_load_stats(local,
+					tid_agg_rx->reorder_buf[index],
+					&status);
+		__ieee80211_rx_handle_packet(hw, tid_agg_rx->reorder_buf[index],
+						&status, pkt_load);
+		tid_agg_rx->stored_mpdu_num--;
+		tid_agg_rx->reorder_buf[index] = NULL;
+		tid_agg_rx->head_seq_num = seq_inc(tid_agg_rx->head_seq_num);
+		index =	seq_sub(tid_agg_rx->head_seq_num,
+			tid_agg_rx->ssn) % tid_agg_rx->buf_size;
+	}
+	return 1;
+}
+
+u8 ieee80211_rx_reorder_ampdu(struct ieee80211_local *local,
+			      struct sk_buff *skb)
+{
+	struct ieee80211_hw *hw = &local->hw;
+	struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data;
+	struct sta_info *sta;
+	struct tid_ampdu_rx *tid_agg_rx;
+	u16 fc, sc;
+	u16 mpdu_seq_num;
+	u8 ret = 0, *qc;
+	int tid;
+
+	sta = sta_info_get(local, hdr->addr2);
+	if (!sta)
+		return ret;
+
+	fc = le16_to_cpu(hdr->frame_control);
+
+	/* filter the QoS data rx stream according to
+	 * STA/TID and check if this STA/TID is on aggregation */
+	if (!WLAN_FC_IS_QOS_DATA(fc))
+		goto end_reorder;
+
+	qc = skb->data + ieee80211_get_hdrlen(fc) - QOS_CONTROL_LEN;
+	tid = qc[0] & QOS_CONTROL_TID_MASK;
+	tid_agg_rx = &(sta->ampdu_mlme.tid_rx[tid]);
+
+	if (tid_agg_rx->state != HT_AGG_STATE_OPERATIONAL)
+		goto end_reorder;
+
+	/* null data frames are excluded */
+	if (unlikely(fc & IEEE80211_STYPE_QOS_NULLFUNC))
+		goto end_reorder;
+
+	/* new un-ordered ampdu frame - process it */
+
+	/* reset session timer */
+	if (tid_agg_rx->timeout) {
+		unsigned long expires =
+			jiffies + (tid_agg_rx->timeout / 1000) * HZ;
+		mod_timer(&tid_agg_rx->session_timer, expires);
+	}
+
+	/* if this mpdu is fragmented - terminate rx aggregation session */
+	sc = le16_to_cpu(hdr->seq_ctrl);
+	if (sc & IEEE80211_SCTL_FRAG) {
+		ieee80211_sta_stop_rx_ba_session(sta->dev, sta->addr,
+			tid, 0, WLAN_REASON_QSTA_REQUIRE_SETUP);
+		ret = 1;
+		goto end_reorder;
+	}
+
+	/* according to mpdu sequence number deal with reordering buffer */
+	mpdu_seq_num = (sc & IEEE80211_SCTL_SEQ) >> 4;
+	ret = ieee80211_sta_manage_reorder_buf(hw, tid_agg_rx, skb,
+						mpdu_seq_num, 0);
+end_reorder:
+	if (sta)
+		sta_info_put(sta);
+	return ret;
+}
+
 /*
  * This is the receive path handler. It is called by a low level driver when an
  * 802.11 MPDU is received from the hardware.
@@ -1779,8 +1958,10 @@ void __ieee80211_rx(struct ieee80211_hw *hw, struct sk_buff *skb,
 	}
 
 	pkt_load = ieee80211_rx_load_stats(local, skb, status);
+	local->channel_use_raw += pkt_load;
 
-	__ieee80211_rx_handle_packet(hw, skb, status, pkt_load);
+	if (!ieee80211_rx_reorder_ampdu(local, skb))
+		__ieee80211_rx_handle_packet(hw, skb, status, pkt_load);
 
 	rcu_read_unlock();
 }
-- 
cgit v1.2.3


From 713647169e3aaca16be4cfba42513bd4558abec0 Mon Sep 17 00:00:00 2001
From: Ron Rindjunsky <ron.rindjunsky@intel.com>
Date: Tue, 25 Dec 2007 17:00:36 +0200
Subject: mac80211: A-MPDU Rx adding BAR handling capability

This patch adds the ability to handle Block Ack Request

Signed-off-by: Ron Rindjunsky <ron.rindjunsky@intel.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/mac80211/ieee80211_i.h |  3 ++-
 net/mac80211/rx.c          | 62 +++++++++++++++++++++++++++++++++++++++++++---
 net/mac80211/util.c        | 15 ++++++++++-
 3 files changed, 74 insertions(+), 6 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index 740d69d5bbd..662e69ef7ee 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -801,7 +801,8 @@ int ieee80211_subif_start_xmit(struct sk_buff *skb, struct net_device *dev);
 extern void *mac80211_wiphy_privid; /* for wiphy privid */
 extern const unsigned char rfc1042_header[6];
 extern const unsigned char bridge_tunnel_header[6];
-u8 *ieee80211_get_bssid(struct ieee80211_hdr *hdr, size_t len);
+u8 *ieee80211_get_bssid(struct ieee80211_hdr *hdr, size_t len,
+			enum ieee80211_if_types type);
 int ieee80211_frame_duration(struct ieee80211_local *local, size_t len,
 			     int rate, int erp, int short_preamble);
 void mac80211_ev_michael_mic_failure(struct net_device *dev, int keyidx,
diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index e7b1eb8da25..ed3b8163920 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -24,6 +24,10 @@
 #include "tkip.h"
 #include "wme.h"
 
+u8 ieee80211_sta_manage_reorder_buf(struct ieee80211_hw *hw,
+				struct tid_ampdu_rx *tid_agg_rx,
+				struct sk_buff *skb, u16 mpdu_seq_num,
+				int bar_req);
 /*
  * monitor mode reception
  *
@@ -64,7 +68,9 @@ static inline int should_drop_frame(struct ieee80211_rx_status *status,
 	if (((hdr->frame_control & cpu_to_le16(IEEE80211_FCTL_FTYPE)) ==
 			cpu_to_le16(IEEE80211_FTYPE_CTL)) &&
 	    ((hdr->frame_control & cpu_to_le16(IEEE80211_FCTL_STYPE)) !=
-			cpu_to_le16(IEEE80211_STYPE_PSPOLL)))
+			cpu_to_le16(IEEE80211_STYPE_PSPOLL)) &&
+	    ((hdr->frame_control & cpu_to_le16(IEEE80211_FCTL_STYPE)) !=
+			cpu_to_le16(IEEE80211_STYPE_BACK_REQ)))
 		return 1;
 	return 0;
 }
@@ -634,7 +640,8 @@ ieee80211_rx_h_sta_process(struct ieee80211_txrx_data *rx)
 	 * BSSID to avoid keeping the current IBSS network alive in cases where
 	 * other STAs are using different BSSID. */
 	if (rx->sdata->type == IEEE80211_IF_TYPE_IBSS) {
-		u8 *bssid = ieee80211_get_bssid(hdr, rx->skb->len);
+		u8 *bssid = ieee80211_get_bssid(hdr, rx->skb->len,
+						IEEE80211_IF_TYPE_IBSS);
 		if (compare_ether_addr(bssid, rx->sdata->u.sta.bssid) == 0)
 			sta->last_rx = jiffies;
 	} else
@@ -1376,6 +1383,49 @@ ieee80211_rx_h_data(struct ieee80211_txrx_data *rx)
 	return TXRX_QUEUED;
 }
 
+static ieee80211_txrx_result
+ieee80211_rx_h_ctrl(struct ieee80211_txrx_data *rx)
+{
+	struct ieee80211_local *local = rx->local;
+	struct ieee80211_hw *hw = &local->hw;
+	struct sk_buff *skb = rx->skb;
+	struct ieee80211_bar *bar = (struct ieee80211_bar *) skb->data;
+	struct tid_ampdu_rx *tid_agg_rx;
+	u16 start_seq_num;
+	u16 tid;
+
+	if (likely((rx->fc & IEEE80211_FCTL_FTYPE) != IEEE80211_FTYPE_CTL))
+		return TXRX_CONTINUE;
+
+	if ((rx->fc & IEEE80211_FCTL_STYPE) == IEEE80211_STYPE_BACK_REQ) {
+		if (!rx->sta)
+			return TXRX_CONTINUE;
+		tid = le16_to_cpu(bar->control) >> 12;
+		tid_agg_rx = &(rx->sta->ampdu_mlme.tid_rx[tid]);
+		if (tid_agg_rx->state != HT_AGG_STATE_OPERATIONAL)
+			return TXRX_CONTINUE;
+
+		start_seq_num = le16_to_cpu(bar->start_seq_num) >> 4;
+
+		/* reset session timer */
+		if (tid_agg_rx->timeout) {
+			unsigned long expires =
+				jiffies + (tid_agg_rx->timeout / 1000) * HZ;
+			mod_timer(&tid_agg_rx->session_timer, expires);
+		}
+
+		/* manage reordering buffer according to requested */
+		/* sequence number */
+		rcu_read_lock();
+		ieee80211_sta_manage_reorder_buf(hw, tid_agg_rx, NULL,
+						 start_seq_num, 1);
+		rcu_read_unlock();
+		return TXRX_DROP;
+	}
+
+	return TXRX_CONTINUE;
+}
+
 static ieee80211_txrx_result
 ieee80211_rx_h_mgmt(struct ieee80211_txrx_data *rx)
 {
@@ -1527,6 +1577,7 @@ ieee80211_rx_handler ieee80211_rx_handlers[] =
 	ieee80211_rx_h_remove_qos_control,
 	ieee80211_rx_h_amsdu,
 	ieee80211_rx_h_data,
+	ieee80211_rx_h_ctrl,
 	ieee80211_rx_h_mgmt,
 	NULL
 };
@@ -1683,8 +1734,6 @@ void __ieee80211_rx_handle_packet(struct ieee80211_hw *hw, struct sk_buff *skb,
 		return;
 	}
 
-	bssid = ieee80211_get_bssid(hdr, skb->len);
-
 	list_for_each_entry_rcu(sdata, &local->interfaces, list) {
 		if (!netif_running(sdata->dev))
 			continue;
@@ -1692,6 +1741,7 @@ void __ieee80211_rx_handle_packet(struct ieee80211_hw *hw, struct sk_buff *skb,
 		if (sdata->type == IEEE80211_IF_TYPE_MNTR)
 			continue;
 
+		bssid = ieee80211_get_bssid(hdr, skb->len, sdata->type);
 		rx.flags |= IEEE80211_TXRXD_RXRA_MATCH;
 		prepares = prepare_for_handlers(sdata, bssid, &rx, hdr);
 		/* prepare_for_handlers can change sta */
@@ -1767,6 +1817,10 @@ static inline u16 seq_sub(u16 sq1, u16 sq2)
 }
 
 
+/*
+ * As it function blongs to Rx path it must be called with
+ * the proper rcu_read_lock protection for its flow.
+ */
 u8 ieee80211_sta_manage_reorder_buf(struct ieee80211_hw *hw,
 				struct tid_ampdu_rx *tid_agg_rx,
 				struct sk_buff *skb, u16 mpdu_seq_num,
diff --git a/net/mac80211/util.c b/net/mac80211/util.c
index 2b02b2b9d64..adb85dd5098 100644
--- a/net/mac80211/util.c
+++ b/net/mac80211/util.c
@@ -127,7 +127,8 @@ void ieee80211_prepare_rates(struct ieee80211_local *local,
 	}
 }
 
-u8 *ieee80211_get_bssid(struct ieee80211_hdr *hdr, size_t len)
+u8 *ieee80211_get_bssid(struct ieee80211_hdr *hdr, size_t len,
+			enum ieee80211_if_types type)
 {
 	u16 fc;
 
@@ -159,6 +160,18 @@ u8 *ieee80211_get_bssid(struct ieee80211_hdr *hdr, size_t len)
 	case IEEE80211_FTYPE_CTL:
 		if ((fc & IEEE80211_FCTL_STYPE) == IEEE80211_STYPE_PSPOLL)
 			return hdr->addr1;
+		else if ((fc & IEEE80211_FCTL_STYPE) ==
+						IEEE80211_STYPE_BACK_REQ) {
+			switch (type) {
+			case IEEE80211_IF_TYPE_STA:
+				return hdr->addr2;
+			case IEEE80211_IF_TYPE_AP:
+			case IEEE80211_IF_TYPE_VLAN:
+				return hdr->addr1;
+			default:
+				return NULL;
+			}
+		}
 		else
 			return NULL;
 	}
-- 
cgit v1.2.3


From 688b88a4886834d7e3457711cd4feef6611d3232 Mon Sep 17 00:00:00 2001
From: Ron Rindjunsky <ron.rindjunsky@intel.com>
Date: Tue, 25 Dec 2007 17:00:37 +0200
Subject: mac80211: A-MPDU Rx handling DELBA requests

This patch opens the flow to DELBA management frames, and handles end
of A-MPDU session produced by this event.

Signed-off-by: Ron Rindjunsky <ron.rindjunsky@intel.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/mac80211/ieee80211_sta.c | 40 +++++++++++++++++++++++++++++++++++++++-
 1 file changed, 39 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/mac80211/ieee80211_sta.c b/net/mac80211/ieee80211_sta.c
index f1edaa0c0da..d1f7199a208 100644
--- a/net/mac80211/ieee80211_sta.c
+++ b/net/mac80211/ieee80211_sta.c
@@ -63,6 +63,8 @@
 #define IEEE80211_ADDBA_PARAM_POLICY_MASK 0x0002
 #define IEEE80211_ADDBA_PARAM_TID_MASK 0x003C
 #define IEEE80211_ADDBA_PARAM_BUF_SIZE_MASK 0xFFA0
+#define IEEE80211_DELBA_PARAM_TID_MASK 0xF000
+#define IEEE80211_DELBA_PARAM_INITIATOR_MASK 0x0800
 
 /* next values represent the buffer size for A-MPDU frame.
  * According to IEEE802.11n spec size varies from 8K to 64K (in powers of 2) */
@@ -1264,6 +1266,36 @@ void ieee80211_sta_stop_rx_ba_session(struct net_device *dev, u8 *ra, u16 tid,
 	sta_info_put(sta);
 }
 
+static void ieee80211_sta_process_delba(struct net_device *dev,
+			struct ieee80211_mgmt *mgmt, size_t len)
+{
+	struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr);
+	struct sta_info *sta;
+	u16 tid, params;
+	u16 initiator;
+	DECLARE_MAC_BUF(mac);
+
+	sta = sta_info_get(local, mgmt->sa);
+	if (!sta)
+		return;
+
+	params = le16_to_cpu(mgmt->u.action.u.delba.params);
+	tid = (params & IEEE80211_DELBA_PARAM_TID_MASK) >> 12;
+	initiator = (params & IEEE80211_DELBA_PARAM_INITIATOR_MASK) >> 11;
+
+#ifdef CONFIG_MAC80211_HT_DEBUG
+	if (net_ratelimit())
+		printk(KERN_DEBUG "delba from %s on tid %d reason code %d\n",
+			print_mac(mac, mgmt->sa), tid,
+			mgmt->u.action.u.delba.reason_code);
+#endif /* CONFIG_MAC80211_HT_DEBUG */
+
+	if (initiator == WLAN_BACK_INITIATOR)
+		ieee80211_sta_stop_rx_ba_session(dev, sta->addr, tid,
+						 WLAN_BACK_INITIATOR, 0);
+	sta_info_put(sta);
+}
+
 /*
  * After receiving Block Ack Request (BAR) we activated a
  * timer after each frame arrives from the originator.
@@ -2204,9 +2236,15 @@ static void ieee80211_rx_mgmt_action(struct net_device *dev,
 				break;
 			ieee80211_sta_process_addba_request(dev, mgmt, len);
 			break;
+		case WLAN_ACTION_DELBA:
+			if (len < (IEEE80211_MIN_ACTION_SIZE +
+				   sizeof(mgmt->u.action.u.delba)))
+				break;
+			ieee80211_sta_process_delba(dev, mgmt, len);
+			break;
 		default:
 			if (net_ratelimit())
-			   printk(KERN_DEBUG "%s: received unsupported BACK\n",
+			   printk(KERN_DEBUG "%s: Rx unknown A-MPDU action\n",
 					dev->name);
 			break;
 		}
-- 
cgit v1.2.3


From 4b475898ec9dc6e62cebcb8fc0b3495c986a4590 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Wed, 2 Jan 2008 15:17:03 +0100
Subject: mac80211: better rate control algorithm selection

This patch changes mac80211's Kconfig/Makefile to:
 * select between the PID and the SIMPLE rate control
   algorithm as default
 * always allow tri-state for the rate control algorithms,
   building those that are selected 'y' into the mac80211
   module (if that is a module, otherwise all into the kernel)
 * force the default rate control algorithm to be built into
   mac80211

It also makes both rate control algorithms proper modules again
with MODULE_LICENSE etc.

Only if EMBEDDED is the user allowed to select "NONE" as default
which will cause no algorithm to be selected, this will work
only when the driver brings one itself (e.g. iwlwifi drivers).

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/mac80211/Kconfig            | 37 ++++++++++++++++++-------------------
 net/mac80211/Makefile           | 41 +++++++++++++++++++++++++++--------------
 net/mac80211/ieee80211.c        | 34 +++++++++++-----------------------
 net/mac80211/ieee80211_rate.c   |  4 ++++
 net/mac80211/ieee80211_rate.h   | 38 ++++++++++++++++++++++++++++++++------
 net/mac80211/rc80211_pid_algo.c | 24 ++++++++++++++++++++++--
 net/mac80211/rc80211_simple.c   | 21 ++++++++++++++++++++-
 7 files changed, 134 insertions(+), 65 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/Kconfig b/net/mac80211/Kconfig
index cac6cf2e9ac..09c255002e5 100644
--- a/net/mac80211/Kconfig
+++ b/net/mac80211/Kconfig
@@ -13,25 +13,17 @@ config MAC80211
 	  This option enables the hardware independent IEEE 802.11
 	  networking stack.
 
-config MAC80211_RC_DEFAULT_CHOICE
-	bool "Choose default rate control algorithm" if EMBEDDED
-	default y
-	depends on MAC80211
-	---help---
-	  This options enables selection of a default rate control
-	  algorithm to be built into the mac80211 module.  Alternate
-	  rate control algorithms might be built into the mac80211
-	  module as well.
+menu "Rate control algorithm selection"
+	depends on MAC80211 != n
 
 choice
 	prompt "Default rate control algorithm"
 	default MAC80211_RC_DEFAULT_PID
-	depends on MAC80211 && MAC80211_RC_DEFAULT_CHOICE
 	---help---
 	  This option selects the default rate control algorithm
 	  mac80211 will use. Note that this default can still be
 	  overriden through the ieee80211_default_rc_algo module
-	  parameter.
+	  parameter if different algorithms are available.
 
 config MAC80211_RC_DEFAULT_PID
 	bool "PID controller based rate control algorithm"
@@ -50,19 +42,27 @@ config MAC80211_RC_DEFAULT_SIMPLE
 	  dumb algorithm. You should choose the PID rate control
 	  instead.
 
+config MAC80211_RC_DEFAULT_NONE
+	bool "No default algorithm"
+	depends on EMBEDDED
+	help
+	  Selecting this option will select no default algorithm
+	  and allow you to not build any. Do not choose this
+	  option unless you know your driver comes with another
+	  suitable algorithm.
 endchoice
 
+comment "Selecting 'y' for an algorithm will"
+comment "build the algorithm into mac80211."
+
 config MAC80211_RC_DEFAULT
 	string
-	depends on MAC80211
 	default "pid" if MAC80211_RC_DEFAULT_PID
 	default "simple" if MAC80211_RC_DEFAULT_SIMPLE
 	default ""
 
 config MAC80211_RC_PID
-	bool "PID controller based rate control algorithm"
-	default y
-	depends on MAC80211
+	tristate "PID controller based rate control algorithm"
 	---help---
 	  This option enables a TX rate control algorithm for
 	  mac80211 that uses a PID controller to select the TX
@@ -72,16 +72,15 @@ config MAC80211_RC_PID
 	  different rate control algorithm.
 
 config MAC80211_RC_SIMPLE
-	bool "Simple rate control algorithm (DEPRECATED)"
-	default n
-	depends on MAC80211
+	tristate "Simple rate control algorithm (DEPRECATED)"
 	---help---
 	  This option enables a very simple, non-responsive TX
 	  rate control algorithm. This algorithm is deprecated
-	  and will be removed from the kernel in near future.
+	  and will be removed from the kernel in the near future.
 	  It has been replaced by the PID algorithm.
 
 	  Say N unless you know what you are doing.
+endmenu
 
 config MAC80211_LEDS
 	bool "Enable LED triggers"
diff --git a/net/mac80211/Makefile b/net/mac80211/Makefile
index 06aea8009cd..54f46bc80cf 100644
--- a/net/mac80211/Makefile
+++ b/net/mac80211/Makefile
@@ -1,19 +1,15 @@
 obj-$(CONFIG_MAC80211) += mac80211.o
 
-mac80211-objs-$(CONFIG_MAC80211_LEDS) += ieee80211_led.o
-mac80211-objs-$(CONFIG_NET_SCHED) += wme.o
-mac80211-objs-$(CONFIG_MAC80211_RC_SIMPLE) += rc80211_simple.o
-mac80211-objs-$(CONFIG_MAC80211_RC_PID) += rc80211_pid_algo.o
+# objects for PID algorithm
+rc80211_pid-y := rc80211_pid_algo.o
+rc80211_pid-$(CONFIG_MAC80211_DEBUGFS) += rc80211_pid_debugfs.o
 
-mac80211-debugfs-objs-$(CONFIG_MAC80211_RC_PID) += rc80211_pid_debugfs.o
-mac80211-objs-$(CONFIG_MAC80211_DEBUGFS) += \
-	debugfs.o \
-	debugfs_sta.o \
-	debugfs_netdev.o \
-	debugfs_key.o \
-	$(mac80211-debugfs-objs-y)
+# build helper for PID algorithm
+rc-pid-y := $(rc80211_pid-y)
+rc-pid-m := rc80211_pid.o
 
-mac80211-objs := \
+# mac80211 objects
+mac80211-y := \
 	ieee80211.o \
 	ieee80211_ioctl.o \
 	sta_info.o \
@@ -31,5 +27,22 @@ mac80211-objs := \
 	tx.o \
 	key.o \
 	util.o \
-	event.o \
-	$(mac80211-objs-y)
+	event.o
+
+mac80211-$(CONFIG_MAC80211_LEDS) += ieee80211_led.o
+mac80211-$(CONFIG_NET_SCHED) += wme.o
+mac80211-$(CONFIG_MAC80211_DEBUGFS) += \
+	debugfs.o \
+	debugfs_sta.o \
+	debugfs_netdev.o \
+	debugfs_key.o
+
+
+# Build rate control algorithm(s)
+CFLAGS_rc80211_simple.o += -DRC80211_SIMPLE_COMPILE
+CFLAGS_rc80211_pid_algo.o += -DRC80211_PID_COMPILE
+mac80211-$(CONFIG_MAC80211_RC_SIMPLE) += rc80211_simple.o
+mac80211-$(CONFIG_MAC80211_RC_PID) += $(rc-pid-$(CONFIG_MAC80211_RC_PID))
+
+# Modular rate algorithms are assigned to mac80211-m - make separate modules
+obj-m += $(mac80211-m)
diff --git a/net/mac80211/ieee80211.c b/net/mac80211/ieee80211.c
index 2011c726f2b..8ba69ae676c 100644
--- a/net/mac80211/ieee80211.c
+++ b/net/mac80211/ieee80211.c
@@ -1323,23 +1323,19 @@ static int __init ieee80211_init(void)
 
 	BUILD_BUG_ON(sizeof(struct ieee80211_tx_packet_data) > sizeof(skb->cb));
 
-#ifdef CONFIG_MAC80211_RC_SIMPLE
-	ret = ieee80211_rate_control_register(&mac80211_rcsimple);
+	ret = rc80211_simple_init();
 	if (ret)
 		goto fail;
-#endif
 
-#ifdef CONFIG_MAC80211_RC_PID
-	ret = ieee80211_rate_control_register(&mac80211_rcpid);
+	ret = rc80211_pid_init();
 	if (ret)
-		goto fail;
-#endif
+		goto fail_simple;
 
 	ret = ieee80211_wme_register();
 	if (ret) {
 		printk(KERN_DEBUG "ieee80211_init: failed to "
 		       "initialize WME (err=%d)\n", ret);
-		goto fail;
+		goto fail_pid;
 	}
 
 	ieee80211_debugfs_netdev_init();
@@ -1347,26 +1343,18 @@ static int __init ieee80211_init(void)
 
 	return 0;
 
-fail:
-
-#ifdef CONFIG_MAC80211_RC_SIMPLE
-	ieee80211_rate_control_unregister(&mac80211_rcsimple);
-#endif
-#ifdef CONFIG_MAC80211_RC_PID
-	ieee80211_rate_control_unregister(&mac80211_rcpid);
-#endif
-
+ fail_pid:
+	rc80211_simple_exit();
+ fail_simple:
+	rc80211_pid_exit();
+ fail:
 	return ret;
 }
 
 static void __exit ieee80211_exit(void)
 {
-#ifdef CONFIG_MAC80211_RC_SIMPLE
-	ieee80211_rate_control_unregister(&mac80211_rcsimple);
-#endif
-#ifdef CONFIG_MAC80211_RC_PID
-	ieee80211_rate_control_unregister(&mac80211_rcpid);
-#endif
+	rc80211_simple_exit();
+	rc80211_pid_exit();
 
 	ieee80211_wme_unregister();
 	ieee80211_debugfs_netdev_exit();
diff --git a/net/mac80211/ieee80211_rate.c b/net/mac80211/ieee80211_rate.c
index 65fc9ad615e..5676a26a7c7 100644
--- a/net/mac80211/ieee80211_rate.c
+++ b/net/mac80211/ieee80211_rate.c
@@ -115,6 +115,10 @@ ieee80211_rate_control_ops_get(const char *name)
 		/* try default if specific alg requested but not found */
 		ops = ieee80211_try_rate_control_ops_get(ieee80211_default_rc_algo);
 
+	/* try built-in one if specific alg requested but not found */
+	if (!ops && strlen(CONFIG_MAC80211_RC_DEFAULT))
+		ops = ieee80211_try_rate_control_ops_get(CONFIG_MAC80211_RC_DEFAULT);
+
 	return ops;
 }
 
diff --git a/net/mac80211/ieee80211_rate.h b/net/mac80211/ieee80211_rate.h
index 3eb0696f375..73f19e8aa51 100644
--- a/net/mac80211/ieee80211_rate.h
+++ b/net/mac80211/ieee80211_rate.h
@@ -58,12 +58,6 @@ struct rate_control_ref {
 	struct kref kref;
 };
 
-/* default 'simple' algorithm */
-extern struct rate_control_ops mac80211_rcsimple;
-
-/* 'PID' algorithm */
-extern struct rate_control_ops mac80211_rcpid;
-
 int ieee80211_rate_control_register(struct rate_control_ops *ops);
 void ieee80211_rate_control_unregister(struct rate_control_ops *ops);
 
@@ -170,4 +164,36 @@ int ieee80211_init_rate_ctrl_alg(struct ieee80211_local *local,
 				 const char *name);
 void rate_control_deinitialize(struct ieee80211_local *local);
 
+
+/* Rate control algorithms */
+#if defined(RC80211_SIMPLE_COMPILE) || \
+	(defined(CONFIG_MAC80211_RC_SIMPLE) && \
+	 !defined(CONFIG_MAC80211_RC_SIMPLE_MODULE))
+extern int rc80211_simple_init(void);
+extern void rc80211_simple_exit(void);
+#else
+static inline int rc80211_simple_init(void)
+{
+	return 0;
+}
+static inline void rc80211_simple_exit(void)
+{
+}
+#endif
+
+#if defined(RC80211_PID_COMPILE) || \
+	(defined(CONFIG_MAC80211_RC_PID) && \
+	 !defined(CONFIG_MAC80211_RC_PID_MODULE))
+extern int rc80211_pid_init(void);
+extern void rc80211_pid_exit(void);
+#else
+static inline int rc80211_pid_init(void)
+{
+	return 0;
+}
+static inline void rc80211_pid_exit(void)
+{
+}
+#endif
+
 #endif /* IEEE80211_RATE_H */
diff --git a/net/mac80211/rc80211_pid_algo.c b/net/mac80211/rc80211_pid_algo.c
index da3529017da..0995bb9cec2 100644
--- a/net/mac80211/rc80211_pid_algo.c
+++ b/net/mac80211/rc80211_pid_algo.c
@@ -12,7 +12,7 @@
 #include <linux/netdevice.h>
 #include <linux/types.h>
 #include <linux/skbuff.h>
-
+#include <linux/debugfs.h>
 #include <net/mac80211.h>
 #include "ieee80211_rate.h"
 
@@ -492,7 +492,7 @@ static void rate_control_pid_free_sta(void *priv, void *priv_sta)
 	kfree(spinfo);
 }
 
-struct rate_control_ops mac80211_rcpid = {
+static struct rate_control_ops mac80211_rcpid = {
 	.name = "pid",
 	.tx_status = rate_control_pid_tx_status,
 	.get_rate = rate_control_pid_get_rate,
@@ -507,3 +507,23 @@ struct rate_control_ops mac80211_rcpid = {
 	.remove_sta_debugfs = rate_control_pid_remove_sta_debugfs,
 #endif
 };
+
+MODULE_DESCRIPTION("PID controller based rate control algorithm");
+MODULE_AUTHOR("Stefano Brivio");
+MODULE_AUTHOR("Mattias Nissler");
+MODULE_LICENSE("GPL");
+
+int __init rc80211_pid_init(void)
+{
+	return ieee80211_rate_control_register(&mac80211_rcpid);
+}
+
+void __exit rc80211_pid_exit(void)
+{
+	ieee80211_rate_control_unregister(&mac80211_rcpid);
+}
+
+#ifdef CONFIG_MAC80211_RC_PID_MODULE
+module_init(rc80211_pid_init);
+module_exit(rc80211_pid_exit);
+#endif
diff --git a/net/mac80211/rc80211_simple.c b/net/mac80211/rc80211_simple.c
index c1c8b76a56a..33de6f967e5 100644
--- a/net/mac80211/rc80211_simple.c
+++ b/net/mac80211/rc80211_simple.c
@@ -13,6 +13,7 @@
 #include <linux/slab.h>
 #include <linux/skbuff.h>
 #include <linux/compiler.h>
+#include <linux/module.h>
 
 #include <net/mac80211.h>
 #include "ieee80211_i.h"
@@ -349,7 +350,7 @@ static void rate_control_simple_remove_sta_debugfs(void *priv, void *priv_sta)
 }
 #endif
 
-struct rate_control_ops mac80211_rcsimple = {
+static struct rate_control_ops mac80211_rcsimple = {
 	.name = "simple",
 	.tx_status = rate_control_simple_tx_status,
 	.get_rate = rate_control_simple_get_rate,
@@ -364,3 +365,21 @@ struct rate_control_ops mac80211_rcsimple = {
 	.remove_sta_debugfs = rate_control_simple_remove_sta_debugfs,
 #endif
 };
+
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("Simple rate control algorithm");
+
+int __init rc80211_simple_init(void)
+{
+	return ieee80211_rate_control_register(&mac80211_rcsimple);
+}
+
+void __exit rc80211_simple_exit(void)
+{
+	ieee80211_rate_control_unregister(&mac80211_rcsimple);
+}
+
+#ifdef CONFIG_MAC80211_RC_SIMPLE_MODULE
+module_init(rc80211_simple_init);
+module_exit(rc80211_simple_exit);
+#endif
-- 
cgit v1.2.3


From df26e7ea049abe5104062f1f3e9ee7ede9d5104f Mon Sep 17 00:00:00 2001
From: Andrew Lutomirski <luto@myrealbox.com>
Date: Thu, 3 Jan 2008 21:05:37 -0800
Subject: rc80211_pid should respect fixed rates.

I would argue that mac80211 should handle fixed rates outside the rate
control code, which would also allow them to take effect immediately
instead of during the rate control callback, but this is pretty close
to correct.

Signed-Off-By: Andy Lutomirski <luto@myrealbox.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/mac80211/rc80211_pid_algo.c | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/rc80211_pid_algo.c b/net/mac80211/rc80211_pid_algo.c
index 0995bb9cec2..66cae53a647 100644
--- a/net/mac80211/rc80211_pid_algo.c
+++ b/net/mac80211/rc80211_pid_algo.c
@@ -108,10 +108,6 @@ static void rate_control_pid_adjust_rate(struct ieee80211_local *local,
 	int back = (adj > 0) ? 1 : -1;
 
 	sdata = IEEE80211_DEV_TO_SUB_IF(sta->dev);
-	if (sdata->bss && sdata->bss->force_unicast_rateidx > -1) {
-		/* forced unicast rate - do not change STA rate */
-		return;
-	}
 
 	mode = local->oper_hw_mode;
 	maxrate = sdata->bss ? sdata->bss->max_ratectrl_rateidx : -1;
@@ -241,6 +237,7 @@ static void rate_control_pid_tx_status(void *priv, struct net_device *dev,
 {
 	struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr);
 	struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data;
+	struct ieee80211_sub_if_data *sdata;
 	struct rc_pid_info *pinfo = priv;
 	struct sta_info *sta;
 	struct rc_pid_sta_info *spinfo;
@@ -251,6 +248,13 @@ static void rate_control_pid_tx_status(void *priv, struct net_device *dev,
 	if (!sta)
 		return;
 
+	/* Don't update the state if we're not controlling the rate. */
+	sdata = IEEE80211_DEV_TO_SUB_IF(sta->dev);
+	if (sdata->bss && sdata->bss->force_unicast_rateidx > -1) {
+		sta->txrate = sdata->bss->max_ratectrl_rateidx;
+		return;
+	}
+
 	/* Ignore all frames that were sent with a different rate than the rate
 	 * we currently advise mac80211 to use. */
 	if (status->control.rate != &local->oper_hw_mode->rates[sta->txrate])
-- 
cgit v1.2.3


From cdcb006fbe7a74b5f7827f5c5c27e11399a2fab7 Mon Sep 17 00:00:00 2001
From: Ivo van Doorn <ivdoorn@gmail.com>
Date: Mon, 7 Jan 2008 19:45:24 +0100
Subject: mac80211: Add radio led trigger

Some devices have a seperate LED which indicates if the radio is
enabled or not. This adds a LED trigger to mac80211 where drivers
can hook into when they are interested in radio status changes.

v2: Check hw.conf.radio_enabled when calling start().

Signed-off-by: Ivo van Doorn <IvDoorn@gmail.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/mac80211/ieee80211.c       |  3 +++
 net/mac80211/ieee80211_i.h     |  5 +++--
 net/mac80211/ieee80211_ioctl.c |  2 ++
 net/mac80211/ieee80211_led.c   | 35 +++++++++++++++++++++++++++++++++++
 net/mac80211/ieee80211_led.h   |  6 ++++++
 5 files changed, 49 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/ieee80211.c b/net/mac80211/ieee80211.c
index 8ba69ae676c..4807e5215a7 100644
--- a/net/mac80211/ieee80211.c
+++ b/net/mac80211/ieee80211.c
@@ -219,6 +219,7 @@ static int ieee80211_open(struct net_device *dev)
 		if (res)
 			return res;
 		ieee80211_hw_config(local);
+		ieee80211_led_radio(local, local->hw.conf.radio_enabled);
 	}
 
 	switch (sdata->type) {
@@ -392,6 +393,8 @@ static int ieee80211_stop(struct net_device *dev)
 		if (local->ops->stop)
 			local->ops->stop(local_to_hw(local));
 
+		ieee80211_led_radio(local, 0);
+
 		tasklet_disable(&local->tx_pending_tasklet);
 		tasklet_disable(&local->tasklet);
 	}
diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index 662e69ef7ee..b898b316bda 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -500,8 +500,9 @@ struct ieee80211_local {
 
 #ifdef CONFIG_MAC80211_LEDS
 	int tx_led_counter, rx_led_counter;
-	struct led_trigger *tx_led, *rx_led, *assoc_led;
-	char tx_led_name[32], rx_led_name[32], assoc_led_name[32];
+	struct led_trigger *tx_led, *rx_led, *assoc_led, *radio_led;
+	char tx_led_name[32], rx_led_name[32],
+	     assoc_led_name[32], radio_led_name[32];
 #endif
 
 	u32 channel_use;
diff --git a/net/mac80211/ieee80211_ioctl.c b/net/mac80211/ieee80211_ioctl.c
index 04ddce76135..2604e21c05a 100644
--- a/net/mac80211/ieee80211_ioctl.c
+++ b/net/mac80211/ieee80211_ioctl.c
@@ -21,6 +21,7 @@
 
 #include <net/mac80211.h>
 #include "ieee80211_i.h"
+#include "ieee80211_led.h"
 #include "ieee80211_rate.h"
 #include "wpa.h"
 #include "aes_ccm.h"
@@ -652,6 +653,7 @@ static int ieee80211_ioctl_siwtxpower(struct net_device *dev,
 	if (local->hw.conf.radio_enabled != !(data->txpower.disabled)) {
 		local->hw.conf.radio_enabled = !(data->txpower.disabled);
 		need_reconfig = 1;
+		ieee80211_led_radio(local, local->hw.conf.radio_enabled);
 	}
 
 	if (need_reconfig) {
diff --git a/net/mac80211/ieee80211_led.c b/net/mac80211/ieee80211_led.c
index 4cf89af9d10..f401484ab6d 100644
--- a/net/mac80211/ieee80211_led.c
+++ b/net/mac80211/ieee80211_led.c
@@ -43,6 +43,16 @@ void ieee80211_led_assoc(struct ieee80211_local *local, bool associated)
 		led_trigger_event(local->assoc_led, LED_OFF);
 }
 
+void ieee80211_led_radio(struct ieee80211_local *local, bool enabled)
+{
+	if (unlikely(!local->radio_led))
+		return;
+	if (enabled)
+		led_trigger_event(local->radio_led, LED_FULL);
+	else
+		led_trigger_event(local->radio_led, LED_OFF);
+}
+
 void ieee80211_led_init(struct ieee80211_local *local)
 {
 	local->rx_led = kzalloc(sizeof(struct led_trigger), GFP_KERNEL);
@@ -77,10 +87,25 @@ void ieee80211_led_init(struct ieee80211_local *local)
 			local->assoc_led = NULL;
 		}
 	}
+
+	local->radio_led = kzalloc(sizeof(struct led_trigger), GFP_KERNEL);
+	if (local->radio_led) {
+		snprintf(local->radio_led_name, sizeof(local->radio_led_name),
+			 "%sradio", wiphy_name(local->hw.wiphy));
+		local->radio_led->name = local->radio_led_name;
+		if (led_trigger_register(local->radio_led)) {
+			kfree(local->radio_led);
+			local->radio_led = NULL;
+		}
+	}
 }
 
 void ieee80211_led_exit(struct ieee80211_local *local)
 {
+	if (local->radio_led) {
+		led_trigger_unregister(local->radio_led);
+		kfree(local->radio_led);
+	}
 	if (local->assoc_led) {
 		led_trigger_unregister(local->assoc_led);
 		kfree(local->assoc_led);
@@ -95,6 +120,16 @@ void ieee80211_led_exit(struct ieee80211_local *local)
 	}
 }
 
+char *__ieee80211_get_radio_led_name(struct ieee80211_hw *hw)
+{
+	struct ieee80211_local *local = hw_to_local(hw);
+
+	if (local->radio_led)
+		return local->radio_led_name;
+	return NULL;
+}
+EXPORT_SYMBOL(__ieee80211_get_radio_led_name);
+
 char *__ieee80211_get_assoc_led_name(struct ieee80211_hw *hw)
 {
 	struct ieee80211_local *local = hw_to_local(hw);
diff --git a/net/mac80211/ieee80211_led.h b/net/mac80211/ieee80211_led.h
index 0feb2261983..77b1e1ba603 100644
--- a/net/mac80211/ieee80211_led.h
+++ b/net/mac80211/ieee80211_led.h
@@ -16,6 +16,8 @@ extern void ieee80211_led_rx(struct ieee80211_local *local);
 extern void ieee80211_led_tx(struct ieee80211_local *local, int q);
 extern void ieee80211_led_assoc(struct ieee80211_local *local,
 				bool associated);
+extern void ieee80211_led_radio(struct ieee80211_local *local,
+				bool enabled);
 extern void ieee80211_led_init(struct ieee80211_local *local);
 extern void ieee80211_led_exit(struct ieee80211_local *local);
 #else
@@ -29,6 +31,10 @@ static inline void ieee80211_led_assoc(struct ieee80211_local *local,
 				       bool associated)
 {
 }
+static inline void ieee80211_led_radio(struct ieee80211_local *local,
+				       bool enabled)
+{
+}
 static inline void ieee80211_led_init(struct ieee80211_local *local)
 {
 }
-- 
cgit v1.2.3


From 4cec72c890d3a465eed08c24a4a2bfe25650318f Mon Sep 17 00:00:00 2001
From: Julia Lawall <julia@diku.dk>
Date: Tue, 8 Jan 2008 23:48:20 -0800
Subject: [TIPC]: Use tipc_port_unlock

The file net/tipc/port.c takes a lock using the function tipc_port_lock and
then releases the lock sometimes using tipc_port_unlock and sometimes using
spin_unlock_bh(p_ptr->publ.lock).  tipc_port_unlock simply does the
spin_unlock_bh, but it seems cleaner to use it everywhere.

The problem was fixed using the following semantic patch.
(http://www.emn.fr/x-info/coccinelle/)

// <smpl>
@@
struct port *p_ptr;
@@

   p_ptr = tipc_port_lock(...)
   ...
(
   p_ptr = tipc_port_lock(...);
|
?- spin_unlock_bh(p_ptr->publ.lock);
+  tipc_port_unlock(p_ptr);
)
// </smpl>

Signed-off-by: Julia Lawall <julia@diku.dk>
Acked-by: Jon Paul Maloy <maloy@donjonn.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/tipc/port.c | 20 ++++++++++----------
 1 file changed, 10 insertions(+), 10 deletions(-)

(limited to 'net')

diff --git a/net/tipc/port.c b/net/tipc/port.c
index 76088153524..f508614ca59 100644
--- a/net/tipc/port.c
+++ b/net/tipc/port.c
@@ -340,7 +340,7 @@ int tipc_portunreliable(u32 ref, unsigned int *isunreliable)
 	if (!p_ptr)
 		return -EINVAL;
 	*isunreliable = port_unreliable(p_ptr);
-	spin_unlock_bh(p_ptr->publ.lock);
+	tipc_port_unlock(p_ptr);
 	return TIPC_OK;
 }
 
@@ -369,7 +369,7 @@ int tipc_portunreturnable(u32 ref, unsigned int *isunrejectable)
 	if (!p_ptr)
 		return -EINVAL;
 	*isunrejectable = port_unreturnable(p_ptr);
-	spin_unlock_bh(p_ptr->publ.lock);
+	tipc_port_unlock(p_ptr);
 	return TIPC_OK;
 }
 
@@ -843,7 +843,7 @@ static void port_dispatcher_sigh(void *dummy)
 				u32 peer_port = port_peerport(p_ptr);
 				u32 peer_node = port_peernode(p_ptr);
 
-				spin_unlock_bh(p_ptr->publ.lock);
+				tipc_port_unlock(p_ptr);
 				if (unlikely(!connected)) {
 					if (unlikely(published))
 						goto reject;
@@ -867,7 +867,7 @@ static void port_dispatcher_sigh(void *dummy)
 		case TIPC_DIRECT_MSG:{
 				tipc_msg_event cb = up_ptr->msg_cb;
 
-				spin_unlock_bh(p_ptr->publ.lock);
+				tipc_port_unlock(p_ptr);
 				if (unlikely(connected))
 					goto reject;
 				if (unlikely(!cb))
@@ -882,7 +882,7 @@ static void port_dispatcher_sigh(void *dummy)
 		case TIPC_NAMED_MSG:{
 				tipc_named_msg_event cb = up_ptr->named_msg_cb;
 
-				spin_unlock_bh(p_ptr->publ.lock);
+				tipc_port_unlock(p_ptr);
 				if (unlikely(connected))
 					goto reject;
 				if (unlikely(!cb))
@@ -913,7 +913,7 @@ err:
 				u32 peer_port = port_peerport(p_ptr);
 				u32 peer_node = port_peernode(p_ptr);
 
-				spin_unlock_bh(p_ptr->publ.lock);
+				tipc_port_unlock(p_ptr);
 				if (!connected || !cb)
 					break;
 				if (msg_origport(msg) != peer_port)
@@ -929,7 +929,7 @@ err:
 		case TIPC_DIRECT_MSG:{
 				tipc_msg_err_event cb = up_ptr->err_cb;
 
-				spin_unlock_bh(p_ptr->publ.lock);
+				tipc_port_unlock(p_ptr);
 				if (connected || !cb)
 					break;
 				skb_pull(buf, msg_hdr_sz(msg));
@@ -942,7 +942,7 @@ err:
 				tipc_named_msg_err_event cb =
 					up_ptr->named_err_cb;
 
-				spin_unlock_bh(p_ptr->publ.lock);
+				tipc_port_unlock(p_ptr);
 				if (connected || !cb)
 					break;
 				dseq.type =  msg_nametype(msg);
@@ -1107,7 +1107,7 @@ int tipc_portimportance(u32 ref, unsigned int *importance)
 	if (!p_ptr)
 		return -EINVAL;
 	*importance = (unsigned int)msg_importance(&p_ptr->publ.phdr);
-	spin_unlock_bh(p_ptr->publ.lock);
+	tipc_port_unlock(p_ptr);
 	return TIPC_OK;
 }
 
@@ -1122,7 +1122,7 @@ int tipc_set_portimportance(u32 ref, unsigned int imp)
 	if (!p_ptr)
 		return -EINVAL;
 	msg_set_importance(&p_ptr->publ.phdr, (u32)imp);
-	spin_unlock_bh(p_ptr->publ.lock);
+	tipc_port_unlock(p_ptr);
 	return TIPC_OK;
 }
 
-- 
cgit v1.2.3


From cb7928a528264a69b29b6001b490b64607ed0557 Mon Sep 17 00:00:00 2001
From: Rami Rosen <ramirose@gmail.com>
Date: Wed, 9 Jan 2008 00:18:24 -0800
Subject: [IPV4]: Remove unsupported DNAT (RTCF_NAT and RTCF_NAT) in IPV4

- The DNAT (Destination NAT) is not implemented in IPV4.

- This patch remove the code which checks these flags
in net/ipv4/arp.c and net/ipv4/route.c.

The RTCF_NAT and RTCF_NAT should stay in the header (linux/in_route.h)
because they are used in DECnet.

Signed-off-by: Rami Rosen <ramirose@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/arp.c   | 5 ++---
 net/ipv4/route.c | 4 ++--
 2 files changed, 4 insertions(+), 5 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c
index 9eb6d3ab297..46edf1c32a4 100644
--- a/net/ipv4/arp.c
+++ b/net/ipv4/arp.c
@@ -835,9 +835,8 @@ static int arp_process(struct sk_buff *skb)
 			}
 			goto out;
 		} else if (IN_DEV_FORWARD(in_dev)) {
-			if ((rt->rt_flags&RTCF_DNAT) ||
-			    (addr_type == RTN_UNICAST  && rt->u.dst.dev != dev &&
-			     (arp_fwd_proxy(in_dev, rt) || pneigh_lookup(&arp_tbl, &init_net, &tip, dev, 0)))) {
+			    if (addr_type == RTN_UNICAST  && rt->u.dst.dev != dev &&
+			     (arp_fwd_proxy(in_dev, rt) || pneigh_lookup(&arp_tbl, &init_net, &tip, dev, 0))) {
 				n = neigh_event_ns(&arp_tbl, sha, &sip, dev);
 				if (n)
 					neigh_release(n);
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 933b093721e..78c4ce424a6 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -1768,7 +1768,7 @@ static inline int __mkroute_input(struct sk_buff *skb,
 	if (err)
 		flags |= RTCF_DIRECTSRC;
 
-	if (out_dev == in_dev && err && !(flags & (RTCF_NAT | RTCF_MASQ)) &&
+	if (out_dev == in_dev && err && !(flags & RTCF_MASQ) &&
 	    (IN_DEV_SHARED_MEDIA(out_dev) ||
 	     inet_addr_onlink(out_dev, saddr, FIB_RES_GW(*res))))
 		flags |= RTCF_DOREDIRECT;
@@ -1777,7 +1777,7 @@ static inline int __mkroute_input(struct sk_buff *skb,
 		/* Not IP (i.e. ARP). Do not create route, if it is
 		 * invalid for proxy arp. DNAT routes are always valid.
 		 */
-		if (out_dev == in_dev && !(flags & RTCF_DNAT)) {
+		if (out_dev == in_dev) {
 			err = -EINVAL;
 			goto cleanup;
 		}
-- 
cgit v1.2.3


From b5ccd792fa413f9336273cb8fa3b9dd3a7ec1735 Mon Sep 17 00:00:00 2001
From: Pavel Emelyanov <xemul@openvz.org>
Date: Wed, 9 Jan 2008 00:30:05 -0800
Subject: [NET]: Simple ctl_table to ctl_path conversions.

This patch includes many places, that only required
replacing the ctl_table-s with appropriate ctl_paths
and call register_sysctl_paths().

Nothing special was done with them.

Signed-off-by: Pavel Emelyanov <xemul@openvz.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/appletalk/sysctl_net_atalk.c | 24 +++++-------------------
 net/bridge/br_netfilter.c        | 24 +++++-------------------
 net/dccp/sysctl.c                | 36 +++++++-----------------------------
 net/ipx/sysctl_net_ipx.c         | 24 +++++-------------------
 net/irda/irsysctl.c              | 28 +++++-----------------------
 net/llc/sysctl_net_llc.c         | 24 +++++-------------------
 net/netrom/sysctl_net_netrom.c   | 24 +++++-------------------
 net/rose/sysctl_net_rose.c       | 24 +++++-------------------
 net/sctp/sysctl.c                | 24 +++++-------------------
 net/x25/sysctl_net_x25.c         | 24 +++++-------------------
 10 files changed, 52 insertions(+), 204 deletions(-)

(limited to 'net')

diff --git a/net/appletalk/sysctl_net_atalk.c b/net/appletalk/sysctl_net_atalk.c
index 7df1778e221..621805dfa2f 100644
--- a/net/appletalk/sysctl_net_atalk.c
+++ b/net/appletalk/sysctl_net_atalk.c
@@ -49,31 +49,17 @@ static struct ctl_table atalk_table[] = {
 	{ 0 },
 };
 
-static struct ctl_table atalk_dir_table[] = {
-	{
-		.ctl_name	= NET_ATALK,
-		.procname	= "appletalk",
-		.mode		= 0555,
-		.child		= atalk_table,
-	},
-	{ 0 },
-};
-
-static struct ctl_table atalk_root_table[] = {
-	{
-		.ctl_name	= CTL_NET,
-		.procname	= "net",
-		.mode		= 0555,
-		.child		= atalk_dir_table,
-	},
-	{ 0 },
+static struct ctl_path atalk_path[] = {
+	{ .procname = "net", .ctl_name = CTL_NET, },
+	{ .procname = "appletalk", .ctl_name = NET_ATALK, },
+	{ }
 };
 
 static struct ctl_table_header *atalk_table_header;
 
 void atalk_register_sysctl(void)
 {
-	atalk_table_header = register_sysctl_table(atalk_root_table);
+	atalk_table_header = register_sysctl_paths(atalk_path, atalk_table);
 }
 
 void atalk_unregister_sysctl(void)
diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c
index 859fe4d8669..141f069e77a 100644
--- a/net/bridge/br_netfilter.c
+++ b/net/bridge/br_netfilter.c
@@ -967,24 +967,10 @@ static ctl_table brnf_table[] = {
 	{ .ctl_name = 0 }
 };
 
-static ctl_table brnf_bridge_table[] = {
-	{
-		.ctl_name	= NET_BRIDGE,
-		.procname	= "bridge",
-		.mode		= 0555,
-		.child		= brnf_table,
-	},
-	{ .ctl_name = 0 }
-};
-
-static ctl_table brnf_net_table[] = {
-	{
-		.ctl_name	= CTL_NET,
-		.procname	= "net",
-		.mode		= 0555,
-		.child		= brnf_bridge_table,
-	},
-	{ .ctl_name = 0 }
+static struct ctl_path brnf_path[] = {
+	{ .procname = "net", .ctl_name = CTL_NET, },
+	{ .procname = "bridge", .ctl_name = NET_BRIDGE, },
+	{ }
 };
 #endif
 
@@ -996,7 +982,7 @@ int __init br_netfilter_init(void)
 	if (ret < 0)
 		return ret;
 #ifdef CONFIG_SYSCTL
-	brnf_sysctl_header = register_sysctl_table(brnf_net_table);
+	brnf_sysctl_header = register_sysctl_paths(brnf_path, brnf_table);
 	if (brnf_sysctl_header == NULL) {
 		printk(KERN_WARNING
 		       "br_netfilter: can't register to sysctl.\n");
diff --git a/net/dccp/sysctl.c b/net/dccp/sysctl.c
index c62c05039f6..21295993fdb 100644
--- a/net/dccp/sysctl.c
+++ b/net/dccp/sysctl.c
@@ -100,41 +100,19 @@ static struct ctl_table dccp_default_table[] = {
 	{ .ctl_name = 0, }
 };
 
-static struct ctl_table dccp_table[] = {
-	{
-		.ctl_name	= NET_DCCP_DEFAULT,
-		.procname	= "default",
-		.mode		= 0555,
-		.child		= dccp_default_table,
-	},
-	{ .ctl_name = 0, },
-};
-
-static struct ctl_table dccp_dir_table[] = {
-	{
-		.ctl_name	= NET_DCCP,
-		.procname	= "dccp",
-		.mode		= 0555,
-		.child		= dccp_table,
-	},
-	{ .ctl_name = 0, },
-};
-
-static struct ctl_table dccp_root_table[] = {
-	{
-		.ctl_name	= CTL_NET,
-		.procname	= "net",
-		.mode		= 0555,
-		.child		= dccp_dir_table,
-	},
-	{ .ctl_name = 0, },
+static struct ctl_path dccp_path[] = {
+	{ .procname = "net", .ctl_name = CTL_NET, },
+	{ .procname = "dccp", .ctl_name = NET_DCCP, },
+	{ .procname = "default", .ctl_name = NET_DCCP_DEFAULT, },
+	{ }
 };
 
 static struct ctl_table_header *dccp_table_header;
 
 int __init dccp_sysctl_init(void)
 {
-	dccp_table_header = register_sysctl_table(dccp_root_table);
+	dccp_table_header = register_sysctl_paths(dccp_path,
+			dccp_default_table);
 
 	return dccp_table_header != NULL ? 0 : -ENOMEM;
 }
diff --git a/net/ipx/sysctl_net_ipx.c b/net/ipx/sysctl_net_ipx.c
index 0cf52645053..92fef864e85 100644
--- a/net/ipx/sysctl_net_ipx.c
+++ b/net/ipx/sysctl_net_ipx.c
@@ -28,31 +28,17 @@ static struct ctl_table ipx_table[] = {
 	{ 0 },
 };
 
-static struct ctl_table ipx_dir_table[] = {
-	{
-		.ctl_name	= NET_IPX,
-		.procname	= "ipx",
-		.mode		= 0555,
-		.child		= ipx_table,
-	},
-	{ 0 },
-};
-
-static struct ctl_table ipx_root_table[] = {
-	{
-		.ctl_name	= CTL_NET,
-		.procname	= "net",
-		.mode		= 0555,
-		.child		= ipx_dir_table,
-	},
-	{ 0 },
+static struct ctl_path ipx_path[] = {
+	{ .procname = "net", .ctl_name = CTL_NET, },
+	{ .procname = "ipx", .ctl_name = NET_IPX, },
+	{ }
 };
 
 static struct ctl_table_header *ipx_table_header;
 
 void ipx_register_sysctl(void)
 {
-	ipx_table_header = register_sysctl_table(ipx_root_table);
+	ipx_table_header = register_sysctl_paths(ipx_path, ipx_table);
 }
 
 void ipx_unregister_sysctl(void)
diff --git a/net/irda/irsysctl.c b/net/irda/irsysctl.c
index 565cbf0421c..d8aba869ff1 100644
--- a/net/irda/irsysctl.c
+++ b/net/irda/irsysctl.c
@@ -234,28 +234,10 @@ static ctl_table irda_table[] = {
 	{ .ctl_name = 0 }
 };
 
-/* One directory */
-static ctl_table irda_net_table[] = {
-	{
-		.ctl_name	= NET_IRDA,
-		.procname	= "irda",
-		.maxlen		= 0,
-		.mode		= 0555,
-		.child		= irda_table
-	},
-	{ .ctl_name = 0 }
-};
-
-/* The parent directory */
-static ctl_table irda_root_table[] = {
-	{
-		.ctl_name	= CTL_NET,
-		.procname	= "net",
-		.maxlen		= 0,
-		.mode		= 0555,
-		.child		= irda_net_table
-	},
-	{ .ctl_name = 0 }
+static struct ctl_path irda_path[] = {
+	{ .procname = "net", .ctl_name = CTL_NET, },
+	{ .procname = "irda", .ctl_name = NET_IRDA, },
+	{ }
 };
 
 static struct ctl_table_header *irda_table_header;
@@ -268,7 +250,7 @@ static struct ctl_table_header *irda_table_header;
  */
 int __init irda_sysctl_register(void)
 {
-	irda_table_header = register_sysctl_table(irda_root_table);
+	irda_table_header = register_sysctl_paths(irda_path, irda_table);
 	if (!irda_table_header)
 		return -ENOMEM;
 
diff --git a/net/llc/sysctl_net_llc.c b/net/llc/sysctl_net_llc.c
index 46992d03601..5bef1dcf18e 100644
--- a/net/llc/sysctl_net_llc.c
+++ b/net/llc/sysctl_net_llc.c
@@ -92,31 +92,17 @@ static struct ctl_table llc_table[] = {
 	{ 0 },
 };
 
-static struct ctl_table llc_dir_table[] = {
-	{
-		.ctl_name	= NET_LLC,
-		.procname	= "llc",
-		.mode		= 0555,
-		.child		= llc_table,
-	},
-	{ 0 },
-};
-
-static struct ctl_table llc_root_table[] = {
-	{
-		.ctl_name	= CTL_NET,
-		.procname	= "net",
-		.mode		= 0555,
-		.child		= llc_dir_table,
-	},
-	{ 0 },
+static struct ctl_path llc_path[] = {
+	{ .procname = "net", .ctl_name = CTL_NET, },
+	{ .procname = "llc", .ctl_name = NET_LLC, },
+	{ }
 };
 
 static struct ctl_table_header *llc_table_header;
 
 int __init llc_sysctl_init(void)
 {
-	llc_table_header = register_sysctl_table(llc_root_table);
+	llc_table_header = register_sysctl_paths(llc_path, llc_table);
 
 	return llc_table_header ? 0 : -ENOMEM;
 }
diff --git a/net/netrom/sysctl_net_netrom.c b/net/netrom/sysctl_net_netrom.c
index 2ea68da01fb..34c96c9674d 100644
--- a/net/netrom/sysctl_net_netrom.c
+++ b/net/netrom/sysctl_net_netrom.c
@@ -170,29 +170,15 @@ static ctl_table nr_table[] = {
 	{ .ctl_name = 0 }
 };
 
-static ctl_table nr_dir_table[] = {
-	{
-		.ctl_name	= NET_NETROM,
-		.procname	= "netrom",
-		.mode		= 0555,
-		.child		= nr_table
-	},
-	{ .ctl_name = 0 }
-};
-
-static ctl_table nr_root_table[] = {
-	{
-		.ctl_name	= CTL_NET,
-		.procname	= "net",
-		.mode		= 0555,
-		.child		= nr_dir_table
-	},
-	{ .ctl_name = 0 }
+static struct ctl_path nr_path[] = {
+	{ .procname = "net", .ctl_name = CTL_NET, },
+	{ .procname = "netrom", .ctl_name = NET_NETROM, },
+	{ }
 };
 
 void __init nr_register_sysctl(void)
 {
-	nr_table_header = register_sysctl_table(nr_root_table);
+	nr_table_header = register_sysctl_paths(nr_path, nr_table);
 }
 
 void nr_unregister_sysctl(void)
diff --git a/net/rose/sysctl_net_rose.c b/net/rose/sysctl_net_rose.c
index 455b0555a66..20be3485a97 100644
--- a/net/rose/sysctl_net_rose.c
+++ b/net/rose/sysctl_net_rose.c
@@ -138,29 +138,15 @@ static ctl_table rose_table[] = {
 	{ .ctl_name = 0 }
 };
 
-static ctl_table rose_dir_table[] = {
-	{
-		.ctl_name	= NET_ROSE,
-		.procname	= "rose",
-		.mode		= 0555,
-		.child		= rose_table
-	},
-	{ .ctl_name = 0 }
-};
-
-static ctl_table rose_root_table[] = {
-	{
-		.ctl_name	= CTL_NET,
-		.procname	= "net",
-		.mode		= 0555,
-		.child		= rose_dir_table
-	},
-	{ .ctl_name = 0 }
+static struct ctl_path rose_path[] = {
+	{ .procname = "net", .ctl_name = CTL_NET, },
+	{ .procname = "rose", .ctl_name = NET_ROSE, },
+	{ }
 };
 
 void __init rose_register_sysctl(void)
 {
-	rose_table_header = register_sysctl_table(rose_root_table);
+	rose_table_header = register_sysctl_paths(rose_path, rose_table);
 }
 
 void rose_unregister_sysctl(void)
diff --git a/net/sctp/sysctl.c b/net/sctp/sysctl.c
index da4f15734fb..5eb6ea829b5 100644
--- a/net/sctp/sysctl.c
+++ b/net/sctp/sysctl.c
@@ -275,24 +275,10 @@ static ctl_table sctp_table[] = {
 	{ .ctl_name = 0 }
 };
 
-static ctl_table sctp_net_table[] = {
-	{
-		.ctl_name	= NET_SCTP,
-		.procname	= "sctp",
-		.mode		= 0555,
-		.child		= sctp_table
-	},
-	{ .ctl_name = 0 }
-};
-
-static ctl_table sctp_root_table[] = {
-	{
-		.ctl_name	= CTL_NET,
-		.procname	= "net",
-		.mode		= 0555,
-		.child		= sctp_net_table
-	},
-	{ .ctl_name = 0 }
+static struct ctl_path sctp_path[] = {
+	{ .procname = "net", .ctl_name = CTL_NET, },
+	{ .procname = "sctp", .ctl_name = NET_SCTP, },
+	{ }
 };
 
 static struct ctl_table_header * sctp_sysctl_header;
@@ -300,7 +286,7 @@ static struct ctl_table_header * sctp_sysctl_header;
 /* Sysctl registration.  */
 void sctp_sysctl_register(void)
 {
-	sctp_sysctl_header = register_sysctl_table(sctp_root_table);
+	sctp_sysctl_header = register_sysctl_paths(sctp_path, sctp_table);
 }
 
 /* Sysctl deregistration.  */
diff --git a/net/x25/sysctl_net_x25.c b/net/x25/sysctl_net_x25.c
index a59b77f1823..6ebda25c24e 100644
--- a/net/x25/sysctl_net_x25.c
+++ b/net/x25/sysctl_net_x25.c
@@ -84,29 +84,15 @@ static struct ctl_table x25_table[] = {
 	{ 0, },
 };
 
-static struct ctl_table x25_dir_table[] = {
-	{
-		.ctl_name =	NET_X25,
-		.procname =	"x25",
-		.mode =		0555,
-		.child =	x25_table,
-	},
-	{ 0, },
-};
-
-static struct ctl_table x25_root_table[] = {
-	{
-		.ctl_name =	CTL_NET,
-		.procname =	"net",
-		.mode =		0555,
-		.child =	x25_dir_table,
-	},
-	{ 0, },
+static struct ctl_path x25_path[] = {
+	{ .procname = "net", .ctl_name = CTL_NET, },
+	{ .procname = "x25", .ctl_name = NET_X25, },
+	{ }
 };
 
 void __init x25_register_sysctl(void)
 {
-	x25_table_header = register_sysctl_table(x25_root_table);
+	x25_table_header = register_sysctl_paths(x25_path, x25_table);
 }
 
 void x25_unregister_sysctl(void)
-- 
cgit v1.2.3


From 90754f8ec011c91382ece2849a43b8f8aa8a0105 Mon Sep 17 00:00:00 2001
From: Pavel Emelyanov <xemul@openvz.org>
Date: Sat, 12 Jan 2008 02:33:50 -0800
Subject: [IPVS]: Switch to using ctl_paths.

The feature of ipvs ctls is that the net/ipv4/vs path
is common for core ipvs ctls and for two schedulers,
so I make it exported and re-use it in modules.

Two other .c files required linux/sysctl.h to make the
extern declaration of this path compile well.

Signed-off-by: Pavel Emelyanov <xemul@openvz.org>
Acked-by: Simon Horman <horms@verge.net.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/ipvs/ip_vs_ctl.c   | 35 +++++++----------------------------
 net/ipv4/ipvs/ip_vs_est.c   |  1 +
 net/ipv4/ipvs/ip_vs_lblc.c  | 31 +------------------------------
 net/ipv4/ipvs/ip_vs_lblcr.c | 31 +------------------------------
 net/ipv4/ipvs/ip_vs_sched.c |  1 +
 5 files changed, 11 insertions(+), 88 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/ipvs/ip_vs_ctl.c b/net/ipv4/ipvs/ip_vs_ctl.c
index 693d92490c1..9fecfe7d116 100644
--- a/net/ipv4/ipvs/ip_vs_ctl.c
+++ b/net/ipv4/ipvs/ip_vs_ctl.c
@@ -1591,34 +1591,13 @@ static struct ctl_table vs_vars[] = {
 	{ .ctl_name = 0 }
 };
 
-static ctl_table vs_table[] = {
-	{
-		.procname	= "vs",
-		.mode		= 0555,
-		.child		= vs_vars
-	},
-	{ .ctl_name = 0 }
-};
-
-static ctl_table ipvs_ipv4_table[] = {
-	{
-		.ctl_name	= NET_IPV4,
-		.procname	= "ipv4",
-		.mode		= 0555,
-		.child		= vs_table,
-	},
-	{ .ctl_name = 0 }
-};
-
-static ctl_table vs_root_table[] = {
-	{
-		.ctl_name	= CTL_NET,
-		.procname	= "net",
-		.mode		= 0555,
-		.child		= ipvs_ipv4_table,
-	},
-	{ .ctl_name = 0 }
+struct ctl_path net_vs_ctl_path[] = {
+	{ .procname = "net", .ctl_name = CTL_NET, },
+	{ .procname = "ipv4", .ctl_name = NET_IPV4, },
+	{ .procname = "vs", },
+	{ }
 };
+EXPORT_SYMBOL_GPL(net_vs_ctl_path);
 
 static struct ctl_table_header * sysctl_header;
 
@@ -2345,7 +2324,7 @@ int ip_vs_control_init(void)
 	proc_net_fops_create(&init_net, "ip_vs", 0, &ip_vs_info_fops);
 	proc_net_fops_create(&init_net, "ip_vs_stats",0, &ip_vs_stats_fops);
 
-	sysctl_header = register_sysctl_table(vs_root_table);
+	sysctl_header = register_sysctl_paths(net_vs_ctl_path, vs_vars);
 
 	/* Initialize ip_vs_svc_table, ip_vs_svc_fwm_table, ip_vs_rtable */
 	for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++)  {
diff --git a/net/ipv4/ipvs/ip_vs_est.c b/net/ipv4/ipvs/ip_vs_est.c
index efdd74e4fa2..dfa0d713c80 100644
--- a/net/ipv4/ipvs/ip_vs_est.c
+++ b/net/ipv4/ipvs/ip_vs_est.c
@@ -18,6 +18,7 @@
 #include <linux/slab.h>
 #include <linux/types.h>
 #include <linux/interrupt.h>
+#include <linux/sysctl.h>
 
 #include <net/ip_vs.h>
 
diff --git a/net/ipv4/ipvs/ip_vs_lblc.c b/net/ipv4/ipvs/ip_vs_lblc.c
index bf8c04a5754..3888642706a 100644
--- a/net/ipv4/ipvs/ip_vs_lblc.c
+++ b/net/ipv4/ipvs/ip_vs_lblc.c
@@ -123,35 +123,6 @@ static ctl_table vs_vars_table[] = {
 	{ .ctl_name = 0 }
 };
 
-static ctl_table vs_table[] = {
-	{
-		.procname	= "vs",
-		.mode		= 0555,
-		.child		= vs_vars_table
-	},
-	{ .ctl_name = 0 }
-};
-
-static ctl_table ipvs_ipv4_table[] = {
-	{
-		.ctl_name	= NET_IPV4,
-		.procname	= "ipv4",
-		.mode		= 0555,
-		.child		= vs_table
-	},
-	{ .ctl_name = 0 }
-};
-
-static ctl_table lblc_root_table[] = {
-	{
-		.ctl_name	= CTL_NET,
-		.procname	= "net",
-		.mode		= 0555,
-		.child		= ipvs_ipv4_table
-	},
-	{ .ctl_name = 0 }
-};
-
 static struct ctl_table_header * sysctl_header;
 
 /*
@@ -582,7 +553,7 @@ static int __init ip_vs_lblc_init(void)
 	int ret;
 
 	INIT_LIST_HEAD(&ip_vs_lblc_scheduler.n_list);
-	sysctl_header = register_sysctl_table(lblc_root_table);
+	sysctl_header = register_sysctl_paths(net_vs_ctl_path, vs_vars_table);
 	ret = register_ip_vs_scheduler(&ip_vs_lblc_scheduler);
 	if (ret)
 		unregister_sysctl_table(sysctl_header);
diff --git a/net/ipv4/ipvs/ip_vs_lblcr.c b/net/ipv4/ipvs/ip_vs_lblcr.c
index f50da641137..daa260eb21c 100644
--- a/net/ipv4/ipvs/ip_vs_lblcr.c
+++ b/net/ipv4/ipvs/ip_vs_lblcr.c
@@ -311,35 +311,6 @@ static ctl_table vs_vars_table[] = {
 	{ .ctl_name = 0 }
 };
 
-static ctl_table vs_table[] = {
-	{
-		.procname	= "vs",
-		.mode		= 0555,
-		.child		= vs_vars_table
-	},
-	{ .ctl_name = 0 }
-};
-
-static ctl_table ipvs_ipv4_table[] = {
-	{
-		.ctl_name	= NET_IPV4,
-		.procname	= "ipv4",
-		.mode		= 0555,
-		.child		= vs_table
-	},
-	{ .ctl_name = 0 }
-};
-
-static ctl_table lblcr_root_table[] = {
-	{
-		.ctl_name	= CTL_NET,
-		.procname	= "net",
-		.mode		= 0555,
-		.child		= ipvs_ipv4_table
-	},
-	{ .ctl_name = 0 }
-};
-
 static struct ctl_table_header * sysctl_header;
 
 /*
@@ -771,7 +742,7 @@ static int __init ip_vs_lblcr_init(void)
 	int ret;
 
 	INIT_LIST_HEAD(&ip_vs_lblcr_scheduler.n_list);
-	sysctl_header = register_sysctl_table(lblcr_root_table);
+	sysctl_header = register_sysctl_paths(net_vs_ctl_path, vs_vars_table);
 	ret = register_ip_vs_scheduler(&ip_vs_lblcr_scheduler);
 	if (ret)
 		unregister_sysctl_table(sysctl_header);
diff --git a/net/ipv4/ipvs/ip_vs_sched.c b/net/ipv4/ipvs/ip_vs_sched.c
index 43223586190..121a32b1b75 100644
--- a/net/ipv4/ipvs/ip_vs_sched.c
+++ b/net/ipv4/ipvs/ip_vs_sched.c
@@ -24,6 +24,7 @@
 #include <linux/interrupt.h>
 #include <asm/string.h>
 #include <linux/kmod.h>
+#include <linux/sysctl.h>
 
 #include <net/ip_vs.h>
 
-- 
cgit v1.2.3


From 3151a9ab04714d0323efbe3c6c12bc5b2b1f058d Mon Sep 17 00:00:00 2001
From: Pavel Emelyanov <xemul@openvz.org>
Date: Wed, 9 Jan 2008 00:31:49 -0800
Subject: [DECNET]: Switch to using ctl_paths.

The decnet includes two places to patch. The first one is
the net/decnet table itself, and it is patched just like
other subsystems in the first patch in this series.

The second place is a bit more complex - it is the
net/decnet/conf/xxx entries,. similar to those in
ipv4/devinet.c and ipv6/addrconf.c. This code is made similar
to those in ipv[46].

Signed-off-by: Pavel Emelyanov <xemul@openvz.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/decnet/dn_dev.c            | 52 ++++++++++++------------------------------
 net/decnet/sysctl_net_decnet.c | 23 ++++---------------
 2 files changed, 20 insertions(+), 55 deletions(-)

(limited to 'net')

diff --git a/net/decnet/dn_dev.c b/net/decnet/dn_dev.c
index 39c89c68204..1bbfce5f7a2 100644
--- a/net/decnet/dn_dev.c
+++ b/net/decnet/dn_dev.c
@@ -173,10 +173,6 @@ static int dn_forwarding_sysctl(ctl_table *table, int __user *name, int nlen,
 static struct dn_dev_sysctl_table {
 	struct ctl_table_header *sysctl_header;
 	ctl_table dn_dev_vars[5];
-	ctl_table dn_dev_dev[2];
-	ctl_table dn_dev_conf_dir[2];
-	ctl_table dn_dev_proto_dir[2];
-	ctl_table dn_dev_root_dir[2];
 } dn_dev_sysctl = {
 	NULL,
 	{
@@ -224,30 +220,6 @@ static struct dn_dev_sysctl_table {
 	},
 	{0}
 	},
-	{{
-		.ctl_name = 0,
-		.procname = "",
-		.mode = 0555,
-		.child = dn_dev_sysctl.dn_dev_vars
-	}, {0}},
-	{{
-		.ctl_name = NET_DECNET_CONF,
-		.procname = "conf",
-		.mode = 0555,
-		.child = dn_dev_sysctl.dn_dev_dev
-	}, {0}},
-	{{
-		.ctl_name = NET_DECNET,
-		.procname = "decnet",
-		.mode = 0555,
-		.child = dn_dev_sysctl.dn_dev_conf_dir
-	}, {0}},
-	{{
-		.ctl_name = CTL_NET,
-		.procname = "net",
-		.mode = 0555,
-		.child = dn_dev_sysctl.dn_dev_proto_dir
-	}, {0}}
 };
 
 static void dn_dev_sysctl_register(struct net_device *dev, struct dn_dev_parms *parms)
@@ -255,6 +227,16 @@ static void dn_dev_sysctl_register(struct net_device *dev, struct dn_dev_parms *
 	struct dn_dev_sysctl_table *t;
 	int i;
 
+#define DN_CTL_PATH_DEV	3
+
+	struct ctl_path dn_ctl_path[] = {
+		{ .procname = "net", .ctl_name = CTL_NET, },
+		{ .procname = "decnet", .ctl_name = NET_DECNET, },
+		{ .procname = "conf", .ctl_name = NET_DECNET_CONF, },
+		{ /* to be set */ },
+		{ },
+	};
+
 	t = kmemdup(&dn_dev_sysctl, sizeof(*t), GFP_KERNEL);
 	if (t == NULL)
 		return;
@@ -265,20 +247,16 @@ static void dn_dev_sysctl_register(struct net_device *dev, struct dn_dev_parms *
 	}
 
 	if (dev) {
-		t->dn_dev_dev[0].procname = dev->name;
-		t->dn_dev_dev[0].ctl_name = dev->ifindex;
+		dn_ctl_path[DN_CTL_PATH_DEV].procname = dev->name;
+		dn_ctl_path[DN_CTL_PATH_DEV].ctl_name = dev->ifindex;
 	} else {
-		t->dn_dev_dev[0].procname = parms->name;
-		t->dn_dev_dev[0].ctl_name = parms->ctl_name;
+		dn_ctl_path[DN_CTL_PATH_DEV].procname = parms->name;
+		dn_ctl_path[DN_CTL_PATH_DEV].ctl_name = parms->ctl_name;
 	}
 
-	t->dn_dev_dev[0].child = t->dn_dev_vars;
-	t->dn_dev_conf_dir[0].child = t->dn_dev_dev;
-	t->dn_dev_proto_dir[0].child = t->dn_dev_conf_dir;
-	t->dn_dev_root_dir[0].child = t->dn_dev_proto_dir;
 	t->dn_dev_vars[0].extra1 = (void *)dev;
 
-	t->sysctl_header = register_sysctl_table(t->dn_dev_root_dir);
+	t->sysctl_header = register_sysctl_paths(dn_ctl_path, t->dn_dev_vars);
 	if (t->sysctl_header == NULL)
 		kfree(t);
 	else
diff --git a/net/decnet/sysctl_net_decnet.c b/net/decnet/sysctl_net_decnet.c
index ae354a43fb9..228067c571b 100644
--- a/net/decnet/sysctl_net_decnet.c
+++ b/net/decnet/sysctl_net_decnet.c
@@ -470,28 +470,15 @@ static ctl_table dn_table[] = {
 	{0}
 };
 
-static ctl_table dn_dir_table[] = {
-	{
-		.ctl_name = NET_DECNET,
-		.procname = "decnet",
-		.mode = 0555,
-		.child = dn_table},
-	{0}
-};
-
-static ctl_table dn_root_table[] = {
-	{
-		.ctl_name = CTL_NET,
-		.procname = "net",
-		.mode = 0555,
-		.child = dn_dir_table
-	},
-	{0}
+static struct ctl_path dn_path[] = {
+	{ .procname = "net", .ctl_name = CTL_NET, },
+	{ .procname = "decnet", .ctl_name = NET_DECNET, },
+	{ }
 };
 
 void dn_register_sysctl(void)
 {
-	dn_table_header = register_sysctl_table(dn_root_table);
+	dn_table_header = register_sysctl_paths(dn_path, dn_table);
 }
 
 void dn_unregister_sysctl(void)
-- 
cgit v1.2.3


From c6995bdff0fc9f97621922a7f857fa640537a7e2 Mon Sep 17 00:00:00 2001
From: Pavel Emelyanov <xemul@openvz.org>
Date: Wed, 9 Jan 2008 00:32:21 -0800
Subject: [AX25]: Switch to using ctl_paths.

This one is almost the same as the hunks in the
first patch, but ax25 tables are created dynamically.

So this patch differs a bit to handle this case.

Signed-off-by: Pavel Emelyanov <xemul@openvz.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ax25/sysctl_net_ax25.c | 27 +++++----------------------
 1 file changed, 5 insertions(+), 22 deletions(-)

(limited to 'net')

diff --git a/net/ax25/sysctl_net_ax25.c b/net/ax25/sysctl_net_ax25.c
index 443a8367663..f597987b242 100644
--- a/net/ax25/sysctl_net_ax25.c
+++ b/net/ax25/sysctl_net_ax25.c
@@ -31,25 +31,11 @@ static struct ctl_table_header *ax25_table_header;
 static ctl_table *ax25_table;
 static int ax25_table_size;
 
-static ctl_table ax25_dir_table[] = {
-	{
-		.ctl_name	= NET_AX25,
-		.procname	= "ax25",
-		.mode		= 0555,
-	},
-	{ .ctl_name = 0 }
-};
-
-static ctl_table ax25_root_table[] = {
-	{
-		.ctl_name	= CTL_NET,
-		.procname	= "net",
-		.mode		= 0555,
-		.child		= ax25_dir_table
-	},
-	{ .ctl_name = 0 }
+static struct ctl_path ax25_path[] = {
+	{ .procname = "net", .ctl_name = CTL_NET, },
+	{ .procname = "ax25", .ctl_name = NET_AX25, },
+	{ }
 };
-
 static const ctl_table ax25_param_table[] = {
 	{
 		.ctl_name	= NET_AX25_IP_DEFAULT_MODE,
@@ -243,9 +229,7 @@ void ax25_register_sysctl(void)
 	}
 	spin_unlock_bh(&ax25_dev_lock);
 
-	ax25_dir_table[0].child = ax25_table;
-
-	ax25_table_header = register_sysctl_table(ax25_root_table);
+	ax25_table_header = register_sysctl_paths(ax25_path, ax25_table);
 }
 
 void ax25_unregister_sysctl(void)
@@ -253,7 +237,6 @@ void ax25_unregister_sysctl(void)
 	ctl_table *p;
 	unregister_sysctl_table(ax25_table_header);
 
-	ax25_dir_table[0].child = NULL;
 	for (p = ax25_table; p->ctl_name; p++)
 		kfree(p->child);
 	kfree(ax25_table);
-- 
cgit v1.2.3


From 3d7cc2ba628dcc6b55a2bafc7eaf35019fdcc201 Mon Sep 17 00:00:00 2001
From: Pavel Emelyanov <xemul@openvz.org>
Date: Wed, 9 Jan 2008 00:33:11 -0800
Subject: [NETFILTER]: Switch to using ctl_paths in nf_queue and conntrack
 modules

This includes the most simple cases for netfilter.

The first part is tne queue modules for ipv4 and ipv6,
on which the net/ipv4/ and net/ipv6/ paths are reused
from the appropriate ipv4 and ipv6 code.

The conntrack module is also patched, but this hunk is
very small and simple.

Signed-off-by: Pavel Emelyanov <xemul@openvz.org>
Acked-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/netfilter/ip_queue.c           | 23 ++---------------------
 net/ipv4/sysctl_net_ipv4.c              |  5 +++--
 net/ipv6/netfilter/ip6_queue.c          | 22 +---------------------
 net/ipv6/sysctl_net_ipv6.c              |  6 ++++--
 net/netfilter/nf_conntrack_standalone.c | 15 ++++++---------
 5 files changed, 16 insertions(+), 55 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/netfilter/ip_queue.c b/net/ipv4/netfilter/ip_queue.c
index 68b12ce8ba5..7361315f20c 100644
--- a/net/ipv4/netfilter/ip_queue.c
+++ b/net/ipv4/netfilter/ip_queue.c
@@ -29,6 +29,7 @@
 #include <net/sock.h>
 #include <net/route.h>
 #include <net/netfilter/nf_queue.h>
+#include <net/ip.h>
 
 #define IPQ_QMAX_DEFAULT 1024
 #define IPQ_PROC_FS_NAME "ip_queue"
@@ -525,26 +526,6 @@ static ctl_table ipq_table[] = {
 	{ .ctl_name = 0 }
 };
 
-static ctl_table ipq_dir_table[] = {
-	{
-		.ctl_name	= NET_IPV4,
-		.procname	= "ipv4",
-		.mode		= 0555,
-		.child		= ipq_table
-	},
-	{ .ctl_name = 0 }
-};
-
-static ctl_table ipq_root_table[] = {
-	{
-		.ctl_name	= CTL_NET,
-		.procname	= "net",
-		.mode		= 0555,
-		.child		= ipq_dir_table
-	},
-	{ .ctl_name = 0 }
-};
-
 static int ip_queue_show(struct seq_file *m, void *v)
 {
 	read_lock_bh(&queue_lock);
@@ -610,7 +591,7 @@ static int __init ip_queue_init(void)
 	}
 
 	register_netdevice_notifier(&ipq_dev_notifier);
-	ipq_sysctl_header = register_sysctl_table(ipq_root_table);
+	ipq_sysctl_header = register_sysctl_paths(net_ipv4_ctl_path, ipq_table);
 
 	status = nf_register_queue_handler(PF_INET, &nfqh);
 	if (status < 0) {
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index a5a9f8e3bb2..45536a91266 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -846,17 +846,18 @@ static struct ctl_table ipv4_table[] = {
 	{ .ctl_name = 0 }
 };
 
-static __initdata struct ctl_path net_ipv4_path[] = {
+struct ctl_path net_ipv4_ctl_path[] = {
 	{ .procname = "net", .ctl_name = CTL_NET, },
 	{ .procname = "ipv4", .ctl_name = NET_IPV4, },
 	{ },
 };
+EXPORT_SYMBOL_GPL(net_ipv4_ctl_path);
 
 static __init int sysctl_ipv4_init(void)
 {
 	struct ctl_table_header *hdr;
 
-	hdr = register_sysctl_paths(net_ipv4_path, ipv4_table);
+	hdr = register_sysctl_paths(net_ipv4_ctl_path, ipv4_table);
 	return hdr == NULL ? -ENOMEM : 0;
 }
 
diff --git a/net/ipv6/netfilter/ip6_queue.c b/net/ipv6/netfilter/ip6_queue.c
index e5b0059582f..a20db0bb5a1 100644
--- a/net/ipv6/netfilter/ip6_queue.c
+++ b/net/ipv6/netfilter/ip6_queue.c
@@ -529,26 +529,6 @@ static ctl_table ipq_table[] = {
 	{ .ctl_name = 0 }
 };
 
-static ctl_table ipq_dir_table[] = {
-	{
-		.ctl_name	= NET_IPV6,
-		.procname	= "ipv6",
-		.mode		= 0555,
-		.child		= ipq_table
-	},
-	{ .ctl_name = 0 }
-};
-
-static ctl_table ipq_root_table[] = {
-	{
-		.ctl_name	= CTL_NET,
-		.procname	= "net",
-		.mode		= 0555,
-		.child		= ipq_dir_table
-	},
-	{ .ctl_name = 0 }
-};
-
 static int ip6_queue_show(struct seq_file *m, void *v)
 {
 	read_lock_bh(&queue_lock);
@@ -614,7 +594,7 @@ static int __init ip6_queue_init(void)
 	}
 
 	register_netdevice_notifier(&ipq_dev_notifier);
-	ipq_sysctl_header = register_sysctl_table(ipq_root_table);
+	ipq_sysctl_header = register_sysctl_paths(net_ipv6_ctl_path, ipq_table);
 
 	status = nf_register_queue_handler(PF_INET6, &nfqh);
 	if (status < 0) {
diff --git a/net/ipv6/sysctl_net_ipv6.c b/net/ipv6/sysctl_net_ipv6.c
index 0b5bec3cb79..4ad8d9d3cb7 100644
--- a/net/ipv6/sysctl_net_ipv6.c
+++ b/net/ipv6/sysctl_net_ipv6.c
@@ -82,17 +82,19 @@ static ctl_table ipv6_table[] = {
 	{ .ctl_name = 0 }
 };
 
-static struct ctl_path ipv6_ctl_path[] = {
+struct ctl_path net_ipv6_ctl_path[] = {
 	{ .procname = "net", .ctl_name = CTL_NET, },
 	{ .procname = "ipv6", .ctl_name = NET_IPV6, },
 	{ },
 };
+EXPORT_SYMBOL_GPL(net_ipv6_ctl_path);
 
 static struct ctl_table_header *ipv6_sysctl_header;
 
 void ipv6_sysctl_register(void)
 {
-	ipv6_sysctl_header = register_sysctl_paths(ipv6_ctl_path, ipv6_table);
+	ipv6_sysctl_header = register_sysctl_paths(net_ipv6_ctl_path,
+			ipv6_table);
 }
 
 void ipv6_sysctl_unregister(void)
diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c
index 9efdd37fc19..2ad49331302 100644
--- a/net/netfilter/nf_conntrack_standalone.c
+++ b/net/netfilter/nf_conntrack_standalone.c
@@ -383,15 +383,11 @@ static ctl_table nf_ct_netfilter_table[] = {
 	{ .ctl_name = 0 }
 };
 
-static ctl_table nf_ct_net_table[] = {
-	{
-		.ctl_name	= CTL_NET,
-		.procname	= "net",
-		.mode		= 0555,
-		.child		= nf_ct_netfilter_table,
-	},
-	{ .ctl_name = 0 }
+struct ctl_path nf_ct_path[] = {
+	{ .procname = "net", .ctl_name = CTL_NET, },
+	{ }
 };
+
 EXPORT_SYMBOL_GPL(nf_ct_log_invalid);
 #endif /* CONFIG_SYSCTL */
 
@@ -418,7 +414,8 @@ static int __init nf_conntrack_standalone_init(void)
 	proc_stat->owner = THIS_MODULE;
 #endif
 #ifdef CONFIG_SYSCTL
-	nf_ct_sysctl_header = register_sysctl_table(nf_ct_net_table);
+	nf_ct_sysctl_header = register_sysctl_paths(nf_ct_path,
+			nf_ct_netfilter_table);
 	if (nf_ct_sysctl_header == NULL) {
 		printk("nf_conntrack: can't register to sysctl.\n");
 		ret = -ENOMEM;
-- 
cgit v1.2.3


From b3fd3ffe39d830e7c24ef63b7f28703b485da2e3 Mon Sep 17 00:00:00 2001
From: Pavel Emelyanov <xemul@openvz.org>
Date: Wed, 9 Jan 2008 00:34:02 -0800
Subject: [NETFILTER]: Use the ctl paths instead of hand-made analogue

The conntracks subsystem has a similar infrastructure
to maintain ctl_paths, but since we already have it
on the generic level, I think it's OK to switch to
using it.

So, basically, this patch just replaces the ctl_table-s
with ctl_path-s, nf_register_sysctl_table with
register_sysctl_paths() and removes no longer needed code.

After this the net/netfilter/nf_sysctl.c file contains
the paths only.

Signed-off-by: Pavel Emelyanov <xemul@openvz.org>
Acked-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/netfilter/nf_conntrack_proto.c |   7 +-
 net/netfilter/nf_sysctl.c          | 127 +++----------------------------------
 2 files changed, 13 insertions(+), 121 deletions(-)

(limited to 'net')

diff --git a/net/netfilter/nf_conntrack_proto.c b/net/netfilter/nf_conntrack_proto.c
index 6d947068c58..8595b5946ac 100644
--- a/net/netfilter/nf_conntrack_proto.c
+++ b/net/netfilter/nf_conntrack_proto.c
@@ -36,11 +36,11 @@ static DEFINE_MUTEX(nf_ct_proto_mutex);
 
 #ifdef CONFIG_SYSCTL
 static int
-nf_ct_register_sysctl(struct ctl_table_header **header, struct ctl_table *path,
+nf_ct_register_sysctl(struct ctl_table_header **header, struct ctl_path *path,
 		      struct ctl_table *table, unsigned int *users)
 {
 	if (*header == NULL) {
-		*header = nf_register_sysctl_table(path, table);
+		*header = register_sysctl_paths(path, table);
 		if (*header == NULL)
 			return -ENOMEM;
 	}
@@ -55,7 +55,8 @@ nf_ct_unregister_sysctl(struct ctl_table_header **header,
 {
 	if (users != NULL && --*users > 0)
 		return;
-	nf_unregister_sysctl_table(*header, table);
+
+	unregister_sysctl_table(*header);
 	*header = NULL;
 }
 #endif
diff --git a/net/netfilter/nf_sysctl.c b/net/netfilter/nf_sysctl.c
index ee34589e48a..d9fcc893301 100644
--- a/net/netfilter/nf_sysctl.c
+++ b/net/netfilter/nf_sysctl.c
@@ -7,128 +7,19 @@
 #include <linux/string.h>
 #include <linux/slab.h>
 
-static void
-path_free(struct ctl_table *path, struct ctl_table *table)
-{
-	struct ctl_table *t, *next;
-
-	for (t = path; t != NULL && t != table; t = next) {
-		next = t->child;
-		kfree(t);
-	}
-}
-
-static struct ctl_table *
-path_dup(struct ctl_table *path, struct ctl_table *table)
-{
-	struct ctl_table *t, *last = NULL, *tmp;
-
-	for (t = path; t != NULL; t = t->child) {
-		/* twice the size since path elements are terminated by an
-		 * empty element */
-		tmp = kmemdup(t, 2 * sizeof(*t), GFP_KERNEL);
-		if (tmp == NULL) {
-			if (last != NULL)
-				path_free(path, table);
-			return NULL;
-		}
-
-		if (last != NULL)
-			last->child = tmp;
-		else
-			path = tmp;
-		last = tmp;
-	}
-
-	if (last != NULL)
-		last->child = table;
-	else
-		path = table;
-
-	return path;
-}
-
-struct ctl_table_header *
-nf_register_sysctl_table(struct ctl_table *path, struct ctl_table *table)
-{
-	struct ctl_table_header *header;
-
-	path = path_dup(path, table);
-	if (path == NULL)
-		return NULL;
-	header = register_sysctl_table(path);
-	if (header == NULL)
-		path_free(path, table);
-	return header;
-}
-EXPORT_SYMBOL_GPL(nf_register_sysctl_table);
-
-void
-nf_unregister_sysctl_table(struct ctl_table_header *header,
-			   struct ctl_table *table)
-{
-	struct ctl_table *path = header->ctl_table;
-
-	unregister_sysctl_table(header);
-	path_free(path, table);
-}
-EXPORT_SYMBOL_GPL(nf_unregister_sysctl_table);
-
 /* net/netfilter */
-static struct ctl_table nf_net_netfilter_table[] = {
-	{
-		.ctl_name	= NET_NETFILTER,
-		.procname	= "netfilter",
-		.mode		= 0555,
-	},
-	{
-		.ctl_name	= 0
-	}
-};
-struct ctl_table nf_net_netfilter_sysctl_path[] = {
-	{
-		.ctl_name	= CTL_NET,
-		.procname	= "net",
-		.mode		= 0555,
-		.child		= nf_net_netfilter_table,
-	},
-	{
-		.ctl_name	= 0
-	}
+struct ctl_path nf_net_netfilter_sysctl_path[] = {
+	{ .procname = "net", .ctl_name = CTL_NET, },
+	{ .procname = "netfilter", .ctl_name = NET_NETFILTER, },
+	{ }
 };
 EXPORT_SYMBOL_GPL(nf_net_netfilter_sysctl_path);
 
 /* net/ipv4/netfilter */
-static struct ctl_table nf_net_ipv4_netfilter_table[] = {
-	{
-		.ctl_name	= NET_IPV4_NETFILTER,
-		.procname	= "netfilter",
-		.mode		= 0555,
-	},
-	{
-		.ctl_name	= 0
-	}
-};
-static struct ctl_table nf_net_ipv4_table[] = {
-	{
-		.ctl_name	= NET_IPV4,
-		.procname	= "ipv4",
-		.mode		= 0555,
-		.child		= nf_net_ipv4_netfilter_table,
-	},
-	{
-		.ctl_name	= 0
-	}
-};
-struct ctl_table nf_net_ipv4_netfilter_sysctl_path[] = {
-	{
-		.ctl_name	= CTL_NET,
-		.procname	= "net",
-		.mode		= 0555,
-		.child		= nf_net_ipv4_table,
-	},
-	{
-		.ctl_name	= 0
-	}
+struct ctl_path nf_net_ipv4_netfilter_sysctl_path[] = {
+	{ .procname = "net", .ctl_name = CTL_NET, },
+	{ .procname = "ipv4", .ctl_name = NET_IPV4, },
+	{ .procname = "netfilter", .ctl_name = NET_IPV4_NETFILTER, },
+	{ }
 };
 EXPORT_SYMBOL_GPL(nf_net_ipv4_netfilter_sysctl_path);
-- 
cgit v1.2.3


From 06eaa1a01d033bd5fd298b010334034f93021fbe Mon Sep 17 00:00:00 2001
From: Rami Rosen <ramirose@gmail.com>
Date: Wed, 9 Jan 2008 00:35:12 -0800
Subject: [BRIDGE]: Remove unused macros from ebt_vlan.c

Remove two unused macros, INV_FLAG and SET_BITMASK
from net/bridge/netfilter/ebt_vlan.c.

Signed-off-by: Rami Rosen <ramirose@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/bridge/netfilter/ebt_vlan.c | 2 --
 1 file changed, 2 deletions(-)

(limited to 'net')

diff --git a/net/bridge/netfilter/ebt_vlan.c b/net/bridge/netfilter/ebt_vlan.c
index a43c697d3d7..0ddf7499d49 100644
--- a/net/bridge/netfilter/ebt_vlan.c
+++ b/net/bridge/netfilter/ebt_vlan.c
@@ -37,9 +37,7 @@ MODULE_LICENSE("GPL");
 
 
 #define DEBUG_MSG(args...) if (debug) printk (KERN_DEBUG "ebt_vlan: " args)
-#define INV_FLAG(_inv_flag_) (info->invflags & _inv_flag_) ? "!" : ""
 #define GET_BITMASK(_BIT_MASK_) info->bitmask & _BIT_MASK_
-#define SET_BITMASK(_BIT_MASK_) info->bitmask |= _BIT_MASK_
 #define EXIT_ON_MISMATCH(_MATCH_,_MASK_) {if (!((info->_MATCH_ == _MATCH_)^!!(info->invflags & _MASK_))) return EBT_NOMATCH;}
 
 static int
-- 
cgit v1.2.3


From 50dd79653e295ce40da38c03bbfb5966aa1c1598 Mon Sep 17 00:00:00 2001
From: Sebastian Siewior <sebastian@breakpoint.cc>
Date: Wed, 9 Jan 2008 00:36:17 -0800
Subject: [XFRM]: Remove ifdef crypto.

and select the crypto subsystem if neccessary

Signed-off-by: Sebastian Siewior <sebastian@breakpoint.cc>
Acked-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/xfrm/Kconfig     | 1 +
 net/xfrm/xfrm_algo.c | 2 --
 2 files changed, 1 insertion(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/xfrm/Kconfig b/net/xfrm/Kconfig
index 6b5b50f427f..8f9dbec319b 100644
--- a/net/xfrm/Kconfig
+++ b/net/xfrm/Kconfig
@@ -3,6 +3,7 @@
 #
 config XFRM
        bool
+       select CRYPTO
        depends on NET
 
 config XFRM_USER
diff --git a/net/xfrm/xfrm_algo.c b/net/xfrm/xfrm_algo.c
index 1686f64c435..b5c5347aed6 100644
--- a/net/xfrm/xfrm_algo.c
+++ b/net/xfrm/xfrm_algo.c
@@ -486,7 +486,6 @@ EXPORT_SYMBOL_GPL(xfrm_ealg_get_byidx);
  */
 void xfrm_probe_algs(void)
 {
-#ifdef CONFIG_CRYPTO
 	int i, status;
 
 	BUG_ON(in_softirq());
@@ -511,7 +510,6 @@ void xfrm_probe_algs(void)
 		if (calg_list[i].available != status)
 			calg_list[i].available = status;
 	}
-#endif
 }
 EXPORT_SYMBOL_GPL(xfrm_probe_algs);
 
-- 
cgit v1.2.3


From 291480c09a9452a3d8852a9bfeb5ba2cbcfe662c Mon Sep 17 00:00:00 2001
From: Daniel Lezcano <dlezcano@fr.ibm.com>
Date: Thu, 10 Jan 2008 02:47:55 -0800
Subject: [NETNS][IPV6]: Make ipv6_sysctl_register to return a value.

This patch makes the function ipv6_sysctl_register to return a
value. The af_inet6 init function is now able to handle an error and
catch it from the initialization of the sysctl.

Signed-off-by: Daniel Lezcano <dlezcano@fr.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/af_inet6.c        | 5 ++++-
 net/ipv6/sysctl_net_ipv6.c | 9 +++++++--
 2 files changed, 11 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index 34c20533ba5..a2842400a09 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -783,7 +783,9 @@ static int __init inet6_init(void)
 	 */
 
 #ifdef CONFIG_SYSCTL
-	ipv6_sysctl_register();
+	err = ipv6_sysctl_register();
+	if (err)
+		goto sysctl_fail;
 #endif
 	err = icmpv6_init(&inet6_family_ops);
 	if (err)
@@ -897,6 +899,7 @@ ndisc_fail:
 icmp_fail:
 #ifdef CONFIG_SYSCTL
 	ipv6_sysctl_unregister();
+sysctl_fail:
 #endif
 	cleanup_ipv6_mibs();
 out_unregister_sock:
diff --git a/net/ipv6/sysctl_net_ipv6.c b/net/ipv6/sysctl_net_ipv6.c
index 4ad8d9d3cb7..f713fbf34c2 100644
--- a/net/ipv6/sysctl_net_ipv6.c
+++ b/net/ipv6/sysctl_net_ipv6.c
@@ -91,10 +91,15 @@ EXPORT_SYMBOL_GPL(net_ipv6_ctl_path);
 
 static struct ctl_table_header *ipv6_sysctl_header;
 
-void ipv6_sysctl_register(void)
+int ipv6_sysctl_register(void)
 {
 	ipv6_sysctl_header = register_sysctl_paths(net_ipv6_ctl_path,
-			ipv6_table);
+						   ipv6_table);
+	if (!ipv6_sysctl_header)
+		return -ENOMEM;
+
+	return 0;
+
 }
 
 void ipv6_sysctl_unregister(void)
-- 
cgit v1.2.3


From 81c1c17804ac52036e07b0ba95cb637bdd4784d6 Mon Sep 17 00:00:00 2001
From: Daniel Lezcano <dlezcano@fr.ibm.com>
Date: Thu, 10 Jan 2008 02:48:33 -0800
Subject: [NETNS][IPV6]: Make a subsystem for af_inet6.

This patch add a network namespace subsystem for the af_inet6 module.
It does nothing right now, but one of its purpose is to receive the
different variables for sysctl in order to initialize them.

When the sysctl variable will be moved to the network namespace
structure, they will be no longer initialized as global static
variables, so we must find a place to initialize them. Because the
sysctl can be disabled, it has no sense to store them in the
sysctl_net_ipv6 file.

Signed-off-by: Daniel Lezcano <dlezcano@fr.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/af_inet6.c | 22 ++++++++++++++++++++++
 1 file changed, 22 insertions(+)

(limited to 'net')

diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index a2842400a09..72b898fe2dd 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -719,6 +719,21 @@ static void cleanup_ipv6_mibs(void)
 	snmp_mib_free((void **)udplite_stats_in6);
 }
 
+static int inet6_net_init(struct net *net)
+{
+	return 0;
+}
+
+static void inet6_net_exit(struct net *net)
+{
+	return;
+}
+
+static struct pernet_operations inet6_net_ops = {
+	.init = inet6_net_init,
+	.exit = inet6_net_exit,
+};
+
 static int __init inet6_init(void)
 {
 	struct sk_buff *dummy_skb;
@@ -782,6 +797,10 @@ static int __init inet6_init(void)
 	 *	able to communicate via both network protocols.
 	 */
 
+	err = register_pernet_subsys(&inet6_net_ops);
+	if (err)
+		goto register_pernet_fail;
+
 #ifdef CONFIG_SYSCTL
 	err = ipv6_sysctl_register();
 	if (err)
@@ -901,6 +920,8 @@ icmp_fail:
 	ipv6_sysctl_unregister();
 sysctl_fail:
 #endif
+	unregister_pernet_subsys(&inet6_net_ops);
+register_pernet_fail:
 	cleanup_ipv6_mibs();
 out_unregister_sock:
 	sock_unregister(PF_INET6);
@@ -956,6 +977,7 @@ static void __exit inet6_exit(void)
 #ifdef CONFIG_SYSCTL
 	ipv6_sysctl_unregister();
 #endif
+	unregister_pernet_subsys(&inet6_net_ops);
 	cleanup_ipv6_mibs();
 	proto_unregister(&rawv6_prot);
 	proto_unregister(&udplitev6_prot);
-- 
cgit v1.2.3


From 89918fc270bb77cb1a0703f0ea566a692b32e324 Mon Sep 17 00:00:00 2001
From: Daniel Lezcano <dlezcano@fr.ibm.com>
Date: Thu, 10 Jan 2008 02:49:34 -0800
Subject: [NETNS][IPV6]: Make the ipv6 sysctl to be a netns subsystem.

The initialization of the sysctl for the ipv6 protocol is changed to a
network namespace subsystem. That means when a new network namespace
is created the initialization function for the sysctl will be called.

That do not change the behavior of the sysctl in case of the kernel
with the network namespace disabled.

Signed-off-by: Daniel Lezcano <dlezcano@fr.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/sysctl_net_ipv6.c | 23 +++++++++++++++++++----
 1 file changed, 19 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/ipv6/sysctl_net_ipv6.c b/net/ipv6/sysctl_net_ipv6.c
index f713fbf34c2..7329decf1f9 100644
--- a/net/ipv6/sysctl_net_ipv6.c
+++ b/net/ipv6/sysctl_net_ipv6.c
@@ -91,10 +91,10 @@ EXPORT_SYMBOL_GPL(net_ipv6_ctl_path);
 
 static struct ctl_table_header *ipv6_sysctl_header;
 
-int ipv6_sysctl_register(void)
+static int ipv6_sysctl_net_init(struct net *net)
 {
-	ipv6_sysctl_header = register_sysctl_paths(net_ipv6_ctl_path,
-						   ipv6_table);
+	ipv6_sysctl_header = register_net_sysctl_table(net, net_ipv6_ctl_path,
+						       ipv6_table);
 	if (!ipv6_sysctl_header)
 		return -ENOMEM;
 
@@ -102,7 +102,22 @@ int ipv6_sysctl_register(void)
 
 }
 
+static void ipv6_sysctl_net_exit(struct net *net)
+{
+	unregister_net_sysctl_table(ipv6_sysctl_header);
+}
+
+static struct pernet_operations ipv6_sysctl_net_ops = {
+	.init = ipv6_sysctl_net_init,
+	.exit = ipv6_sysctl_net_exit,
+};
+
+int ipv6_sysctl_register(void)
+{
+	return register_pernet_subsys(&ipv6_sysctl_net_ops);
+}
+
 void ipv6_sysctl_unregister(void)
 {
-	unregister_sysctl_table(ipv6_sysctl_header);
+	unregister_pernet_subsys(&ipv6_sysctl_net_ops);
 }
-- 
cgit v1.2.3


From 760f2d0186225f06d46e07232d65219c5055cad3 Mon Sep 17 00:00:00 2001
From: Daniel Lezcano <dlezcano@fr.ibm.com>
Date: Thu, 10 Jan 2008 02:53:43 -0800
Subject: [NETNS][IPV6]: Make multiple instance of sysctl tables.

Each network namespace wants its own set of sysctl value, eg. we
should not be able from a namespace to set a sysctl value for another
namespace , especially for the initial network namespace.

This patch duplicates the sysctl table when we register a new network
namespace for ipv6. The duplicated table are postfixed with the
"template" word to notify the developper the table is cloned.

Signed-off-by: Daniel Lezcano <dlezcano@fr.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/icmp.c            | 12 ++++++++-
 net/ipv6/route.c           | 11 +++++++-
 net/ipv6/sysctl_net_ipv6.c | 67 +++++++++++++++++++++++++++++++++++++++-------
 3 files changed, 78 insertions(+), 12 deletions(-)

(limited to 'net')

diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c
index c3bbd868730..dfe3b37c43e 100644
--- a/net/ipv6/icmp.c
+++ b/net/ipv6/icmp.c
@@ -907,7 +907,7 @@ int icmpv6_err_convert(int type, int code, int *err)
 EXPORT_SYMBOL(icmpv6_err_convert);
 
 #ifdef CONFIG_SYSCTL
-ctl_table ipv6_icmp_table[] = {
+ctl_table ipv6_icmp_table_template[] = {
 	{
 		.ctl_name	= NET_IPV6_ICMP_RATELIMIT,
 		.procname	= "ratelimit",
@@ -918,5 +918,15 @@ ctl_table ipv6_icmp_table[] = {
 	},
 	{ .ctl_name = 0 },
 };
+
+struct ctl_table *ipv6_icmp_sysctl_init(struct net *net)
+{
+	struct ctl_table *table;
+
+	table = kmemdup(ipv6_icmp_table_template,
+			sizeof(ipv6_icmp_table_template),
+			GFP_KERNEL);
+	return table;
+}
 #endif
 
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index b80ef578420..0c7382f4fb8 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -2409,7 +2409,7 @@ int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write, struct file * filp,
 		return -EINVAL;
 }
 
-ctl_table ipv6_route_table[] = {
+ctl_table ipv6_route_table_template[] = {
 	{
 		.procname	=	"flush",
 		.data		=	&flush_delay,
@@ -2499,6 +2499,15 @@ ctl_table ipv6_route_table[] = {
 	{ .ctl_name = 0 }
 };
 
+struct ctl_table *ipv6_route_sysctl_init(struct net *net)
+{
+	struct ctl_table *table;
+
+	table = kmemdup(ipv6_route_table_template,
+			sizeof(ipv6_route_table_template),
+			GFP_KERNEL);
+	return table;
+}
 #endif
 
 int __init ip6_route_init(void)
diff --git a/net/ipv6/sysctl_net_ipv6.c b/net/ipv6/sysctl_net_ipv6.c
index 7329decf1f9..7970f3366f8 100644
--- a/net/ipv6/sysctl_net_ipv6.c
+++ b/net/ipv6/sysctl_net_ipv6.c
@@ -14,20 +14,23 @@
 #include <net/addrconf.h>
 #include <net/inet_frag.h>
 
-static ctl_table ipv6_table[] = {
+extern struct ctl_table *ipv6_route_sysctl_init(struct net *net);
+extern struct ctl_table *ipv6_icmp_sysctl_init(struct net *net);
+
+static ctl_table ipv6_table_template[] = {
 	{
 		.ctl_name	= NET_IPV6_ROUTE,
 		.procname	= "route",
 		.maxlen		= 0,
 		.mode		= 0555,
-		.child		= ipv6_route_table
+		.child		= ipv6_route_table_template
 	},
 	{
 		.ctl_name	= NET_IPV6_ICMP,
 		.procname	= "icmp",
 		.maxlen		= 0,
 		.mode		= 0555,
-		.child		= ipv6_icmp_table
+		.child		= ipv6_icmp_table_template
 	},
 	{
 		.ctl_name	= NET_IPV6_BINDV6ONLY,
@@ -89,22 +92,66 @@ struct ctl_path net_ipv6_ctl_path[] = {
 };
 EXPORT_SYMBOL_GPL(net_ipv6_ctl_path);
 
-static struct ctl_table_header *ipv6_sysctl_header;
-
 static int ipv6_sysctl_net_init(struct net *net)
 {
-	ipv6_sysctl_header = register_net_sysctl_table(net, net_ipv6_ctl_path,
-						       ipv6_table);
-	if (!ipv6_sysctl_header)
+	struct ctl_table *ipv6_table;
+	struct ctl_table *ipv6_route_table;
+	struct ctl_table *ipv6_icmp_table;
+	int err;
+
+	err = -ENOMEM;
+	ipv6_table = kmemdup(ipv6_table_template, sizeof(ipv6_table_template),
+			     GFP_KERNEL);
+	if (!ipv6_table)
+		goto out;
+
+	ipv6_route_table = ipv6_route_sysctl_init(net);
+	if (!ipv6_route_table)
+		goto out_ipv6_table;
+
+	ipv6_icmp_table = ipv6_icmp_sysctl_init(net);
+	if (!ipv6_icmp_table)
+		goto out_ipv6_route_table;
+
+	ipv6_table[0].child = ipv6_route_table;
+	ipv6_table[1].child = ipv6_icmp_table;
+
+	net->ipv6.sysctl.table = register_net_sysctl_table(net, net_ipv6_ctl_path,
+							   ipv6_table);
+	if (!net->ipv6.sysctl.table)
 		return -ENOMEM;
 
-	return 0;
+	if (!net->ipv6.sysctl.table)
+		goto out_ipv6_icmp_table;
+
+	err = 0;
+out:
+	return err;
 
+out_ipv6_icmp_table:
+	kfree(ipv6_icmp_table);
+out_ipv6_route_table:
+	kfree(ipv6_route_table);
+out_ipv6_table:
+	kfree(ipv6_table);
+	goto out;
 }
 
 static void ipv6_sysctl_net_exit(struct net *net)
 {
-	unregister_net_sysctl_table(ipv6_sysctl_header);
+	struct ctl_table *ipv6_table;
+	struct ctl_table *ipv6_route_table;
+	struct ctl_table *ipv6_icmp_table;
+
+	ipv6_table = net->ipv6.sysctl.table->ctl_table_arg;
+	ipv6_route_table = ipv6_table[0].child;
+	ipv6_icmp_table = ipv6_table[1].child;
+
+	unregister_net_sysctl_table(net->ipv6.sysctl.table);
+
+	kfree(ipv6_table);
+	kfree(ipv6_route_table);
+	kfree(ipv6_icmp_table);
 }
 
 static struct pernet_operations ipv6_sysctl_net_ops = {
-- 
cgit v1.2.3


From 99bc9c4e45e7e783cf0b0a25cc03a103c038f254 Mon Sep 17 00:00:00 2001
From: Daniel Lezcano <dlezcano@fr.ibm.com>
Date: Thu, 10 Jan 2008 02:54:53 -0800
Subject: [NETNS][IPV6]: Make bindv6only sysctl per namespace.

This patch moves the bindv6only sysctl to the network namespace
structure. Until the ipv6 protocol is not per namespace, the sysctl
variable is always from the initial network namespace.

Signed-off-by: Daniel Lezcano <dlezcano@fr.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/af_inet6.c        | 5 ++---
 net/ipv6/sysctl_net_ipv6.c | 4 +++-
 2 files changed, 5 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index 72b898fe2dd..70662bf8ab9 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -66,8 +66,6 @@ MODULE_AUTHOR("Cast of dozens");
 MODULE_DESCRIPTION("IPv6 protocol stack for Linux");
 MODULE_LICENSE("GPL");
 
-int sysctl_ipv6_bindv6only __read_mostly;
-
 /* The inetsw6 table contains everything that inet6_create needs to
  * build a new socket.
  */
@@ -193,7 +191,7 @@ lookup_protocol:
 	np->mcast_hops	= -1;
 	np->mc_loop	= 1;
 	np->pmtudisc	= IPV6_PMTUDISC_WANT;
-	np->ipv6only	= sysctl_ipv6_bindv6only;
+	np->ipv6only	= init_net.ipv6.sysctl.bindv6only;
 
 	/* Init the ipv4 part of the socket since we can have sockets
 	 * using v6 API for ipv4.
@@ -721,6 +719,7 @@ static void cleanup_ipv6_mibs(void)
 
 static int inet6_net_init(struct net *net)
 {
+	net->ipv6.sysctl.bindv6only = 0;
 	return 0;
 }
 
diff --git a/net/ipv6/sysctl_net_ipv6.c b/net/ipv6/sysctl_net_ipv6.c
index 7970f3366f8..13be97a928c 100644
--- a/net/ipv6/sysctl_net_ipv6.c
+++ b/net/ipv6/sysctl_net_ipv6.c
@@ -35,7 +35,7 @@ static ctl_table ipv6_table_template[] = {
 	{
 		.ctl_name	= NET_IPV6_BINDV6ONLY,
 		.procname	= "bindv6only",
-		.data		= &sysctl_ipv6_bindv6only,
+		.data		= &init_net.ipv6.sysctl.bindv6only,
 		.maxlen		= sizeof(int),
 		.mode		= 0644,
 		.proc_handler	= &proc_dointvec
@@ -116,6 +116,8 @@ static int ipv6_sysctl_net_init(struct net *net)
 	ipv6_table[0].child = ipv6_route_table;
 	ipv6_table[1].child = ipv6_icmp_table;
 
+	ipv6_table[2].data = &net->ipv6.sysctl.bindv6only;
+
 	net->ipv6.sysctl.table = register_net_sysctl_table(net, net_ipv6_ctl_path,
 							   ipv6_table);
 	if (!net->ipv6.sysctl.table)
-- 
cgit v1.2.3


From e71e0349eb32bc438fa80d8990c6f3592967d111 Mon Sep 17 00:00:00 2001
From: Daniel Lezcano <dlezcano@fr.ibm.com>
Date: Thu, 10 Jan 2008 02:56:03 -0800
Subject: [NETNS][IPV6]: Make ip6_frags per namespace.

The ip6_frags is moved to the network namespace structure.  Because
there can be multiple instances of the network namespaces, and the
ip6_frags is no longer a global static variable, a helper function has
been added to facilitate the initialization of the variables.

Until the ipv6 protocol is not per namespace, the variables are
accessed relatively from the initial network namespace.

Signed-off-by: Daniel Lezcano <dlezcano@fr.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/af_inet6.c        |  8 ++++++++
 net/ipv6/reassembly.c      | 16 +++++++---------
 net/ipv6/sysctl_net_ipv6.c | 12 ++++++++----
 3 files changed, 23 insertions(+), 13 deletions(-)

(limited to 'net')

diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index 70662bf8ab9..c4a1882fa80 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -72,6 +72,8 @@ MODULE_LICENSE("GPL");
 static struct list_head inetsw6[SOCK_MAX];
 static DEFINE_SPINLOCK(inetsw6_lock);
 
+void ipv6_frag_sysctl_init(struct net *net);
+
 static __inline__ struct ipv6_pinfo *inet6_sk_generic(struct sock *sk)
 {
 	const int offset = sk->sk_prot->obj_size - sizeof(struct ipv6_pinfo);
@@ -720,6 +722,12 @@ static void cleanup_ipv6_mibs(void)
 static int inet6_net_init(struct net *net)
 {
 	net->ipv6.sysctl.bindv6only = 0;
+	net->ipv6.sysctl.frags.high_thresh = 256 * 1024;
+	net->ipv6.sysctl.frags.low_thresh = 192 * 1024;
+	net->ipv6.sysctl.frags.timeout = IPV6_FRAG_TIMEOUT;
+	net->ipv6.sysctl.frags.secret_interval = 10 * 60 * HZ;
+	ipv6_frag_sysctl_init(net);
+
 	return 0;
 }
 
diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c
index bf4173daecb..5cd0bc693a5 100644
--- a/net/ipv6/reassembly.c
+++ b/net/ipv6/reassembly.c
@@ -82,13 +82,6 @@ struct frag_queue
 	__u16			nhoffset;
 };
 
-struct inet_frags_ctl ip6_frags_ctl __read_mostly = {
-	.high_thresh 	 = 256 * 1024,
-	.low_thresh	 = 192 * 1024,
-	.timeout	 = IPV6_FRAG_TIMEOUT,
-	.secret_interval = 10 * 60 * HZ,
-};
-
 static struct inet_frags ip6_frags;
 
 int ip6_frag_nqueues(void)
@@ -605,7 +598,7 @@ static int ipv6_frag_rcv(struct sk_buff *skb)
 		return 1;
 	}
 
-	if (atomic_read(&ip6_frags.mem) > ip6_frags_ctl.high_thresh)
+	if (atomic_read(&ip6_frags.mem) > init_net.ipv6.sysctl.frags.high_thresh)
 		ip6_evictor(ip6_dst_idev(skb->dst));
 
 	if ((fq = fq_find(fhdr->identification, &hdr->saddr, &hdr->daddr,
@@ -632,6 +625,11 @@ static struct inet6_protocol frag_protocol =
 	.flags		=	INET6_PROTO_NOPOLICY,
 };
 
+void ipv6_frag_sysctl_init(struct net *net)
+{
+	ip6_frags.ctl = &net->ipv6.sysctl.frags;
+}
+
 int __init ipv6_frag_init(void)
 {
 	int ret;
@@ -639,7 +637,7 @@ int __init ipv6_frag_init(void)
 	ret = inet6_add_protocol(&frag_protocol, IPPROTO_FRAGMENT);
 	if (ret)
 		goto out;
-	ip6_frags.ctl = &ip6_frags_ctl;
+
 	ip6_frags.hashfn = ip6_hashfn;
 	ip6_frags.constructor = ip6_frag_init;
 	ip6_frags.destructor = NULL;
diff --git a/net/ipv6/sysctl_net_ipv6.c b/net/ipv6/sysctl_net_ipv6.c
index 13be97a928c..ae3cfd1b8e0 100644
--- a/net/ipv6/sysctl_net_ipv6.c
+++ b/net/ipv6/sysctl_net_ipv6.c
@@ -43,7 +43,7 @@ static ctl_table ipv6_table_template[] = {
 	{
 		.ctl_name	= NET_IPV6_IP6FRAG_HIGH_THRESH,
 		.procname	= "ip6frag_high_thresh",
-		.data		= &ip6_frags_ctl.high_thresh,
+		.data		= &init_net.ipv6.sysctl.frags.high_thresh,
 		.maxlen		= sizeof(int),
 		.mode		= 0644,
 		.proc_handler	= &proc_dointvec
@@ -51,7 +51,7 @@ static ctl_table ipv6_table_template[] = {
 	{
 		.ctl_name	= NET_IPV6_IP6FRAG_LOW_THRESH,
 		.procname	= "ip6frag_low_thresh",
-		.data		= &ip6_frags_ctl.low_thresh,
+		.data		= &init_net.ipv6.sysctl.frags.low_thresh,
 		.maxlen		= sizeof(int),
 		.mode		= 0644,
 		.proc_handler	= &proc_dointvec
@@ -59,7 +59,7 @@ static ctl_table ipv6_table_template[] = {
 	{
 		.ctl_name	= NET_IPV6_IP6FRAG_TIME,
 		.procname	= "ip6frag_time",
-		.data		= &ip6_frags_ctl.timeout,
+		.data		= &init_net.ipv6.sysctl.frags.timeout,
 		.maxlen		= sizeof(int),
 		.mode		= 0644,
 		.proc_handler	= &proc_dointvec_jiffies,
@@ -68,7 +68,7 @@ static ctl_table ipv6_table_template[] = {
 	{
 		.ctl_name	= NET_IPV6_IP6FRAG_SECRET_INTERVAL,
 		.procname	= "ip6frag_secret_interval",
-		.data		= &ip6_frags_ctl.secret_interval,
+		.data		= &init_net.ipv6.sysctl.frags.secret_interval,
 		.maxlen		= sizeof(int),
 		.mode		= 0644,
 		.proc_handler	= &proc_dointvec_jiffies,
@@ -117,6 +117,10 @@ static int ipv6_sysctl_net_init(struct net *net)
 	ipv6_table[1].child = ipv6_icmp_table;
 
 	ipv6_table[2].data = &net->ipv6.sysctl.bindv6only;
+	ipv6_table[3].data = &net->ipv6.sysctl.frags.high_thresh;
+	ipv6_table[4].data = &net->ipv6.sysctl.frags.low_thresh;
+	ipv6_table[5].data = &net->ipv6.sysctl.frags.timeout;
+	ipv6_table[6].data = &net->ipv6.sysctl.frags.secret_interval;
 
 	net->ipv6.sysctl.table = register_net_sysctl_table(net, net_ipv6_ctl_path,
 							   ipv6_table);
-- 
cgit v1.2.3


From 7c76509d0da99f29289b9b7ab134791e45d49b15 Mon Sep 17 00:00:00 2001
From: Daniel Lezcano <dlezcano@fr.ibm.com>
Date: Thu, 10 Jan 2008 02:57:43 -0800
Subject: [NETNS][IPV6]: Make mld_max_msf readonly in other namespaces.

The mld_max_msf protects the system with a maximum allowed multicast
source filters. Making this variable per namespace can be potentially
an problem if someone inside a namespace set it to a big value, that
will impact the whole system including other namespaces.

I don't see any benefits to have it per namespace for now, so in order
to keep a directory entry in a newly created namespace, I make it
read-only when we are not in the initial network namespace.

Signed-off-by: Daniel Lezcano <dlezcano@fr.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/sysctl_net_ipv6.c | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'net')

diff --git a/net/ipv6/sysctl_net_ipv6.c b/net/ipv6/sysctl_net_ipv6.c
index ae3cfd1b8e0..d223159638d 100644
--- a/net/ipv6/sysctl_net_ipv6.c
+++ b/net/ipv6/sysctl_net_ipv6.c
@@ -122,6 +122,12 @@ static int ipv6_sysctl_net_init(struct net *net)
 	ipv6_table[5].data = &net->ipv6.sysctl.frags.timeout;
 	ipv6_table[6].data = &net->ipv6.sysctl.frags.secret_interval;
 
+	/* We don't want this value to be per namespace, it should be global
+	   to all namespaces, so make it read-only when we are not in the
+	   init network namespace */
+	if (net != &init_net)
+		ipv6_table[7].mode = 0444;
+
 	net->ipv6.sysctl.table = register_net_sysctl_table(net, net_ipv6_ctl_path,
 							   ipv6_table);
 	if (!net->ipv6.sysctl.table)
-- 
cgit v1.2.3


From 4990509f19e8f1e000a83a88fc46328f73b8a88a Mon Sep 17 00:00:00 2001
From: Daniel Lezcano <dlezcano@fr.ibm.com>
Date: Thu, 10 Jan 2008 03:01:01 -0800
Subject: [NETNS][IPV6]: Make sysctls route per namespace.

All the sysctl concerning the routes are moved to the network
namespace structure. A helper function is called to initialize the
variables.

Because the ipv6 protocol is not yet per namespace, the variables are
accessed relatively from the network namespace.

Signed-off-by: Daniel Lezcano <dlezcano@fr.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/af_inet6.c        |  8 ++++++++
 net/ipv6/ip6_fib.c         | 14 ++++++++-----
 net/ipv6/route.c           | 49 +++++++++++++++++++---------------------------
 net/ipv6/sysctl_net_ipv6.c | 11 +++++++++++
 4 files changed, 48 insertions(+), 34 deletions(-)

(limited to 'net')

diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index c4a1882fa80..3aea84a1822 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -726,6 +726,14 @@ static int inet6_net_init(struct net *net)
 	net->ipv6.sysctl.frags.low_thresh = 192 * 1024;
 	net->ipv6.sysctl.frags.timeout = IPV6_FRAG_TIMEOUT;
 	net->ipv6.sysctl.frags.secret_interval = 10 * 60 * HZ;
+	net->ipv6.sysctl.flush_delay = 0;
+	net->ipv6.sysctl.ip6_rt_max_size = 4096;
+	net->ipv6.sysctl.ip6_rt_gc_min_interval = HZ / 2;
+	net->ipv6.sysctl.ip6_rt_gc_timeout = 60*HZ;
+	net->ipv6.sysctl.ip6_rt_gc_interval = 30*HZ;
+	net->ipv6.sysctl.ip6_rt_gc_elasticity = 9;
+	net->ipv6.sysctl.ip6_rt_mtu_expires = 10*60*HZ;
+	net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
 	ipv6_frag_sysctl_init(net);
 
 	return 0;
diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c
index 7165a5e90f4..0e83164aa3e 100644
--- a/net/ipv6/ip6_fib.c
+++ b/net/ipv6/ip6_fib.c
@@ -681,13 +681,15 @@ static __inline__ void fib6_start_gc(struct rt6_info *rt)
 {
 	if (ip6_fib_timer.expires == 0 &&
 	    (rt->rt6i_flags & (RTF_EXPIRES|RTF_CACHE)))
-		mod_timer(&ip6_fib_timer, jiffies + ip6_rt_gc_interval);
+		mod_timer(&ip6_fib_timer, jiffies +
+			  init_net.ipv6.sysctl.ip6_rt_gc_interval);
 }
 
 void fib6_force_start_gc(void)
 {
 	if (ip6_fib_timer.expires == 0)
-		mod_timer(&ip6_fib_timer, jiffies + ip6_rt_gc_interval);
+		mod_timer(&ip6_fib_timer, jiffies +
+			  init_net.ipv6.sysctl.ip6_rt_gc_interval);
 }
 
 /*
@@ -1447,7 +1449,8 @@ void fib6_run_gc(unsigned long dummy)
 {
 	if (dummy != ~0UL) {
 		spin_lock_bh(&fib6_gc_lock);
-		gc_args.timeout = dummy ? (int)dummy : ip6_rt_gc_interval;
+		gc_args.timeout = dummy ? (int)dummy :
+			init_net.ipv6.sysctl.ip6_rt_gc_interval;
 	} else {
 		local_bh_disable();
 		if (!spin_trylock(&fib6_gc_lock)) {
@@ -1455,7 +1458,7 @@ void fib6_run_gc(unsigned long dummy)
 			local_bh_enable();
 			return;
 		}
-		gc_args.timeout = ip6_rt_gc_interval;
+		gc_args.timeout = init_net.ipv6.sysctl.ip6_rt_gc_interval;
 	}
 	gc_args.more = 0;
 
@@ -1463,7 +1466,8 @@ void fib6_run_gc(unsigned long dummy)
 	fib6_clean_all(fib6_age, 0, NULL);
 
 	if (gc_args.more)
-		mod_timer(&ip6_fib_timer, jiffies + ip6_rt_gc_interval);
+		mod_timer(&ip6_fib_timer, jiffies +
+			  init_net.ipv6.sysctl.ip6_rt_gc_interval);
 	else {
 		del_timer(&ip6_fib_timer);
 		ip6_fib_timer.expires = 0;
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 0c7382f4fb8..d2b3cf695af 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -73,14 +73,6 @@
 
 #define CLONE_OFFLINK_ROUTE 0
 
-static int ip6_rt_max_size = 4096;
-static int ip6_rt_gc_min_interval = HZ / 2;
-static int ip6_rt_gc_timeout = 60*HZ;
-int ip6_rt_gc_interval = 30*HZ;
-static int ip6_rt_gc_elasticity = 9;
-static int ip6_rt_mtu_expires = 10*60*HZ;
-static int ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
-
 static struct rt6_info * ip6_rt_copy(struct rt6_info *ort);
 static struct dst_entry	*ip6_dst_check(struct dst_entry *dst, u32 cookie);
 static struct dst_entry *ip6_negative_advice(struct dst_entry *);
@@ -894,8 +886,8 @@ static inline unsigned int ipv6_advmss(unsigned int mtu)
 {
 	mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
 
-	if (mtu < ip6_rt_min_advmss)
-		mtu = ip6_rt_min_advmss;
+	if (mtu < init_net.ipv6.sysctl.ip6_rt_min_advmss)
+		mtu = init_net.ipv6.sysctl.ip6_rt_min_advmss;
 
 	/*
 	 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
@@ -995,19 +987,19 @@ static int ip6_dst_gc(void)
 	static unsigned long last_gc;
 	unsigned long now = jiffies;
 
-	if (time_after(last_gc + ip6_rt_gc_min_interval, now) &&
-	    atomic_read(&ip6_dst_ops.entries) <= ip6_rt_max_size)
+	if (time_after(last_gc + init_net.ipv6.sysctl.ip6_rt_gc_min_interval, now) &&
+	    atomic_read(&ip6_dst_ops.entries) <= init_net.ipv6.sysctl.ip6_rt_max_size)
 		goto out;
 
 	expire++;
 	fib6_run_gc(expire);
 	last_gc = now;
 	if (atomic_read(&ip6_dst_ops.entries) < ip6_dst_ops.gc_thresh)
-		expire = ip6_rt_gc_timeout>>1;
+		expire = init_net.ipv6.sysctl.ip6_rt_gc_timeout>>1;
 
 out:
-	expire -= expire>>ip6_rt_gc_elasticity;
-	return (atomic_read(&ip6_dst_ops.entries) > ip6_rt_max_size);
+	expire -= expire>>init_net.ipv6.sysctl.ip6_rt_gc_elasticity;
+	return (atomic_read(&ip6_dst_ops.entries) > init_net.ipv6.sysctl.ip6_rt_max_size);
 }
 
 /* Clean host part of a prefix. Not necessary in radix tree,
@@ -1513,7 +1505,7 @@ void rt6_pmtu_discovery(struct in6_addr *daddr, struct in6_addr *saddr,
 		rt->u.dst.metrics[RTAX_MTU-1] = pmtu;
 		if (allfrag)
 			rt->u.dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
-		dst_set_expires(&rt->u.dst, ip6_rt_mtu_expires);
+		dst_set_expires(&rt->u.dst, init_net.ipv6.sysctl.ip6_rt_mtu_expires);
 		rt->rt6i_flags |= RTF_MODIFIED|RTF_EXPIRES;
 		goto out;
 	}
@@ -1539,7 +1531,7 @@ void rt6_pmtu_discovery(struct in6_addr *daddr, struct in6_addr *saddr,
 		 * which is 10 mins. After 10 mins the decreased pmtu is expired
 		 * and detecting PMTU increase will be automatically happened.
 		 */
-		dst_set_expires(&nrt->u.dst, ip6_rt_mtu_expires);
+		dst_set_expires(&nrt->u.dst, init_net.ipv6.sysctl.ip6_rt_mtu_expires);
 		nrt->rt6i_flags |= RTF_DYNAMIC|RTF_EXPIRES;
 
 		ip6_ins_rt(nrt);
@@ -2395,15 +2387,14 @@ static inline void ipv6_route_proc_fini(struct net *net)
 
 #ifdef CONFIG_SYSCTL
 
-static int flush_delay;
-
 static
 int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write, struct file * filp,
 			      void __user *buffer, size_t *lenp, loff_t *ppos)
 {
+	int delay = init_net.ipv6.sysctl.flush_delay;
 	if (write) {
 		proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
-		fib6_run_gc(flush_delay <= 0 ? ~0UL : (unsigned long)flush_delay);
+		fib6_run_gc(delay <= 0 ? ~0UL : (unsigned long)delay);
 		return 0;
 	} else
 		return -EINVAL;
@@ -2412,7 +2403,7 @@ int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write, struct file * filp,
 ctl_table ipv6_route_table_template[] = {
 	{
 		.procname	=	"flush",
-		.data		=	&flush_delay,
+		.data		=	&init_net.ipv6.sysctl.flush_delay,
 		.maxlen		=	sizeof(int),
 		.mode		=	0200,
 		.proc_handler	=	&ipv6_sysctl_rtcache_flush
@@ -2428,7 +2419,7 @@ ctl_table ipv6_route_table_template[] = {
 	{
 		.ctl_name	=	NET_IPV6_ROUTE_MAX_SIZE,
 		.procname	=	"max_size",
-		.data		=	&ip6_rt_max_size,
+		.data		=	&init_net.ipv6.sysctl.ip6_rt_max_size,
 		.maxlen		=	sizeof(int),
 		.mode		=	0644,
 		.proc_handler	=	&proc_dointvec,
@@ -2436,7 +2427,7 @@ ctl_table ipv6_route_table_template[] = {
 	{
 		.ctl_name	=	NET_IPV6_ROUTE_GC_MIN_INTERVAL,
 		.procname	=	"gc_min_interval",
-		.data		=	&ip6_rt_gc_min_interval,
+		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
 		.maxlen		=	sizeof(int),
 		.mode		=	0644,
 		.proc_handler	=	&proc_dointvec_jiffies,
@@ -2445,7 +2436,7 @@ ctl_table ipv6_route_table_template[] = {
 	{
 		.ctl_name	=	NET_IPV6_ROUTE_GC_TIMEOUT,
 		.procname	=	"gc_timeout",
-		.data		=	&ip6_rt_gc_timeout,
+		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_timeout,
 		.maxlen		=	sizeof(int),
 		.mode		=	0644,
 		.proc_handler	=	&proc_dointvec_jiffies,
@@ -2454,7 +2445,7 @@ ctl_table ipv6_route_table_template[] = {
 	{
 		.ctl_name	=	NET_IPV6_ROUTE_GC_INTERVAL,
 		.procname	=	"gc_interval",
-		.data		=	&ip6_rt_gc_interval,
+		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_interval,
 		.maxlen		=	sizeof(int),
 		.mode		=	0644,
 		.proc_handler	=	&proc_dointvec_jiffies,
@@ -2463,7 +2454,7 @@ ctl_table ipv6_route_table_template[] = {
 	{
 		.ctl_name	=	NET_IPV6_ROUTE_GC_ELASTICITY,
 		.procname	=	"gc_elasticity",
-		.data		=	&ip6_rt_gc_elasticity,
+		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_elasticity,
 		.maxlen		=	sizeof(int),
 		.mode		=	0644,
 		.proc_handler	=	&proc_dointvec_jiffies,
@@ -2472,7 +2463,7 @@ ctl_table ipv6_route_table_template[] = {
 	{
 		.ctl_name	=	NET_IPV6_ROUTE_MTU_EXPIRES,
 		.procname	=	"mtu_expires",
-		.data		=	&ip6_rt_mtu_expires,
+		.data		=	&init_net.ipv6.sysctl.ip6_rt_mtu_expires,
 		.maxlen		=	sizeof(int),
 		.mode		=	0644,
 		.proc_handler	=	&proc_dointvec_jiffies,
@@ -2481,7 +2472,7 @@ ctl_table ipv6_route_table_template[] = {
 	{
 		.ctl_name	=	NET_IPV6_ROUTE_MIN_ADVMSS,
 		.procname	=	"min_adv_mss",
-		.data		=	&ip6_rt_min_advmss,
+		.data		=	&init_net.ipv6.sysctl.ip6_rt_min_advmss,
 		.maxlen		=	sizeof(int),
 		.mode		=	0644,
 		.proc_handler	=	&proc_dointvec_jiffies,
@@ -2490,7 +2481,7 @@ ctl_table ipv6_route_table_template[] = {
 	{
 		.ctl_name	=	NET_IPV6_ROUTE_GC_MIN_INTERVAL_MS,
 		.procname	=	"gc_min_interval_ms",
-		.data		=	&ip6_rt_gc_min_interval,
+		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
 		.maxlen		=	sizeof(int),
 		.mode		=	0644,
 		.proc_handler	=	&proc_dointvec_ms_jiffies,
diff --git a/net/ipv6/sysctl_net_ipv6.c b/net/ipv6/sysctl_net_ipv6.c
index d223159638d..b4ba422f271 100644
--- a/net/ipv6/sysctl_net_ipv6.c
+++ b/net/ipv6/sysctl_net_ipv6.c
@@ -113,7 +113,18 @@ static int ipv6_sysctl_net_init(struct net *net)
 	if (!ipv6_icmp_table)
 		goto out_ipv6_route_table;
 
+	ipv6_route_table[0].data = &net->ipv6.sysctl.flush_delay;
+	/* ipv6_route_table[1].data will be handled when we have
+	   routes per namespace */
+	ipv6_route_table[2].data = &net->ipv6.sysctl.ip6_rt_max_size;
+	ipv6_route_table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
+	ipv6_route_table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout;
+	ipv6_route_table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval;
+	ipv6_route_table[6].data = &net->ipv6.sysctl.ip6_rt_gc_elasticity;
+	ipv6_route_table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires;
+	ipv6_route_table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss;
 	ipv6_table[0].child = ipv6_route_table;
+
 	ipv6_table[1].child = ipv6_icmp_table;
 
 	ipv6_table[2].data = &net->ipv6.sysctl.bindv6only;
-- 
cgit v1.2.3


From 41a76906b3225997036efd88cbaae69d60b1e947 Mon Sep 17 00:00:00 2001
From: Daniel Lezcano <dlezcano@fr.ibm.com>
Date: Thu, 10 Jan 2008 03:02:40 -0800
Subject: [NETNS][IPV6]: Make icmpv6_time sysctl per namespace.

This patch moves the icmpv6_time sysctl to the network namespace
structure.

Because the ipv6 protocol is not yet per namespace, the variable is
accessed relatively to the initial network namespace.

Signed-off-by: Daniel Lezcano <dlezcano@fr.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/af_inet6.c        | 1 +
 net/ipv6/icmp.c            | 6 ++----
 net/ipv6/sysctl_net_ipv6.c | 1 +
 3 files changed, 4 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index 3aea84a1822..218b8b3050a 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -734,6 +734,7 @@ static int inet6_net_init(struct net *net)
 	net->ipv6.sysctl.ip6_rt_gc_elasticity = 9;
 	net->ipv6.sysctl.ip6_rt_mtu_expires = 10*60*HZ;
 	net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
+	net->ipv6.sysctl.icmpv6_time = 1*HZ;
 	ipv6_frag_sysctl_init(net);
 
 	return 0;
diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c
index dfe3b37c43e..5395afe55ca 100644
--- a/net/ipv6/icmp.c
+++ b/net/ipv6/icmp.c
@@ -154,8 +154,6 @@ static int is_ineligible(struct sk_buff *skb)
 	return 0;
 }
 
-static int sysctl_icmpv6_time __read_mostly = 1*HZ;
-
 /*
  * Check the ICMP output rate limit
  */
@@ -186,7 +184,7 @@ static inline int icmpv6_xrlim_allow(struct sock *sk, int type,
 		res = 1;
 	} else {
 		struct rt6_info *rt = (struct rt6_info *)dst;
-		int tmo = sysctl_icmpv6_time;
+		int tmo = init_net.ipv6.sysctl.icmpv6_time;
 
 		/* Give more bandwidth to wider prefixes. */
 		if (rt->rt6i_dst.plen < 128)
@@ -911,7 +909,7 @@ ctl_table ipv6_icmp_table_template[] = {
 	{
 		.ctl_name	= NET_IPV6_ICMP_RATELIMIT,
 		.procname	= "ratelimit",
-		.data		= &sysctl_icmpv6_time,
+		.data		= &init_net.ipv6.sysctl.icmpv6_time,
 		.maxlen		= sizeof(int),
 		.mode		= 0644,
 		.proc_handler	= &proc_dointvec
diff --git a/net/ipv6/sysctl_net_ipv6.c b/net/ipv6/sysctl_net_ipv6.c
index b4ba422f271..5e0af4d4632 100644
--- a/net/ipv6/sysctl_net_ipv6.c
+++ b/net/ipv6/sysctl_net_ipv6.c
@@ -125,6 +125,7 @@ static int ipv6_sysctl_net_init(struct net *net)
 	ipv6_route_table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss;
 	ipv6_table[0].child = ipv6_route_table;
 
+	ipv6_icmp_table[0].data = &net->ipv6.sysctl.icmpv6_time;
 	ipv6_table[1].child = ipv6_icmp_table;
 
 	ipv6_table[2].data = &net->ipv6.sysctl.bindv6only;
-- 
cgit v1.2.3


From f8c26b8d589867aed8251db2935f8aa03aa68717 Mon Sep 17 00:00:00 2001
From: "Denis V. Lunev" <den@openvz.org>
Date: Thu, 10 Jan 2008 03:17:29 -0800
Subject: [NETNS]: Add netns parameter to fib_rules_(un)register.

The patch extends the different fib rules API in order to pass the
network namespace pointer. That will allow to access the different
tables from a namespace relative object. As usual, the pointer to the
init_net variable is passed as parameter so we don't break the
network.

Acked-by: Benjamin Thery <benjamin.thery@bull.net>
Acked-by: Daniel Lezcano <dlezcano@fr.ibm.com>
Signed-off-by: Denis V. Lunev <den@openvz.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/fib_rules.c  | 4 ++--
 net/decnet/dn_rules.c | 4 ++--
 net/ipv4/fib_rules.c  | 2 +-
 net/ipv6/fib6_rules.c | 4 ++--
 4 files changed, 7 insertions(+), 7 deletions(-)

(limited to 'net')

diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c
index fcbf41c0a5d..ada9c8155e3 100644
--- a/net/core/fib_rules.c
+++ b/net/core/fib_rules.c
@@ -74,7 +74,7 @@ static void flush_route_cache(struct fib_rules_ops *ops)
 		ops->flush_cache();
 }
 
-int fib_rules_register(struct fib_rules_ops *ops)
+int fib_rules_register(struct net *net, struct fib_rules_ops *ops)
 {
 	int err = -EEXIST;
 	struct fib_rules_ops *o;
@@ -113,7 +113,7 @@ void fib_rules_cleanup_ops(struct fib_rules_ops *ops)
 }
 EXPORT_SYMBOL_GPL(fib_rules_cleanup_ops);
 
-int fib_rules_unregister(struct fib_rules_ops *ops)
+int fib_rules_unregister(struct net *net, struct fib_rules_ops *ops)
 {
 	int err = 0;
 	struct fib_rules_ops *o;
diff --git a/net/decnet/dn_rules.c b/net/decnet/dn_rules.c
index ffebea04cc9..0b5e2b97e15 100644
--- a/net/decnet/dn_rules.c
+++ b/net/decnet/dn_rules.c
@@ -255,12 +255,12 @@ void __init dn_fib_rules_init(void)
 {
 	BUG_ON(fib_default_rule_add(&dn_fib_rules_ops, 0x7fff,
 			            RT_TABLE_MAIN, 0));
-	fib_rules_register(&dn_fib_rules_ops);
+	fib_rules_register(&init_net, &dn_fib_rules_ops);
 }
 
 void __exit dn_fib_rules_cleanup(void)
 {
-	fib_rules_unregister(&dn_fib_rules_ops);
+	fib_rules_unregister(&init_net, &dn_fib_rules_ops);
 }
 
 
diff --git a/net/ipv4/fib_rules.c b/net/ipv4/fib_rules.c
index a0ada3a8d8d..eac3f717eca 100644
--- a/net/ipv4/fib_rules.c
+++ b/net/ipv4/fib_rules.c
@@ -314,5 +314,5 @@ static int __init fib_default_rules_init(void)
 void __init fib4_rules_init(void)
 {
 	BUG_ON(fib_default_rules_init());
-	fib_rules_register(&fib4_rules_ops);
+	fib_rules_register(&init_net, &fib4_rules_ops);
 }
diff --git a/net/ipv6/fib6_rules.c b/net/ipv6/fib6_rules.c
index 9ce2e0a6748..e4d7e5a08b4 100644
--- a/net/ipv6/fib6_rules.c
+++ b/net/ipv6/fib6_rules.c
@@ -273,7 +273,7 @@ int __init fib6_rules_init(void)
 	if (ret)
 		goto out;
 
-	ret = fib_rules_register(&fib6_rules_ops);
+	ret = fib_rules_register(&init_net, &fib6_rules_ops);
 	if (ret)
 		goto out_default_rules_init;
 out:
@@ -286,5 +286,5 @@ out_default_rules_init:
 
 void fib6_rules_cleanup(void)
 {
-	fib_rules_unregister(&fib6_rules_ops);
+	fib_rules_unregister(&init_net, &fib6_rules_ops);
 }
-- 
cgit v1.2.3


From 868d13ac811746e28e4c806f2b1bd8575796f9af Mon Sep 17 00:00:00 2001
From: "Denis V. Lunev" <den@openvz.org>
Date: Thu, 10 Jan 2008 03:18:25 -0800
Subject: [NETNS]: Pass fib_rules_ops into default_pref method.

fib_rules_ops contains operations and the list of configured rules. ops will
become per/namespace soon, so we need them to be known in the default_pref
callback.

Acked-by: Benjamin Thery <benjamin.thery@bull.net>
Acked-by: Daniel Lezcano <dlezcano@fr.ibm.com>
Signed-off-by: Denis V. Lunev <den@openvz.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/fib_rules.c  | 2 +-
 net/decnet/dn_rules.c | 2 +-
 net/ipv4/fib_rules.c  | 2 +-
 net/ipv6/fib6_rules.c | 2 +-
 4 files changed, 4 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c
index ada9c8155e3..e12e9f58394 100644
--- a/net/core/fib_rules.c
+++ b/net/core/fib_rules.c
@@ -285,7 +285,7 @@ static int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
 	rule->table = frh_get_table(frh, tb);
 
 	if (!rule->pref && ops->default_pref)
-		rule->pref = ops->default_pref();
+		rule->pref = ops->default_pref(ops);
 
 	err = -EINVAL;
 	if (tb[FRA_GOTO]) {
diff --git a/net/decnet/dn_rules.c b/net/decnet/dn_rules.c
index 0b5e2b97e15..c1fae23d226 100644
--- a/net/decnet/dn_rules.c
+++ b/net/decnet/dn_rules.c
@@ -212,7 +212,7 @@ nla_put_failure:
 	return -ENOBUFS;
 }
 
-static u32 dn_fib_rule_default_pref(void)
+static u32 dn_fib_rule_default_pref(struct fib_rules_ops *ops)
 {
 	struct list_head *pos;
 	struct fib_rule *rule;
diff --git a/net/ipv4/fib_rules.c b/net/ipv4/fib_rules.c
index eac3f717eca..afe669dd1bd 100644
--- a/net/ipv4/fib_rules.c
+++ b/net/ipv4/fib_rules.c
@@ -245,7 +245,7 @@ nla_put_failure:
 	return -ENOBUFS;
 }
 
-static u32 fib4_rule_default_pref(void)
+static u32 fib4_rule_default_pref(struct fib_rules_ops *ops)
 {
 	struct list_head *pos;
 	struct fib_rule *rule;
diff --git a/net/ipv6/fib6_rules.c b/net/ipv6/fib6_rules.c
index e4d7e5a08b4..76437a1fcab 100644
--- a/net/ipv6/fib6_rules.c
+++ b/net/ipv6/fib6_rules.c
@@ -223,7 +223,7 @@ nla_put_failure:
 	return -ENOBUFS;
 }
 
-static u32 fib6_rule_default_pref(void)
+static u32 fib6_rule_default_pref(struct fib_rules_ops *ops)
 {
 	return 0x3FFF;
 }
-- 
cgit v1.2.3


From 5fd30ee7c48bf7f9cd16ab44c8a09fa4a57cc21d Mon Sep 17 00:00:00 2001
From: "Denis V. Lunev" <den@openvz.org>
Date: Thu, 10 Jan 2008 03:20:28 -0800
Subject: [NETNS]: Namespacing in the generic fib rules code.

Move static rules_ops & rules_mod_lock to the struct net, register the
pernet subsys to init them and enjoy the fact that the core rules
infrastructure works in the namespace.

Real IPv4 fib rules virtualization requires fib tables support in the
namespace and will be done seriously later in the patchset.

Acked-by: Benjamin Thery <benjamin.thery@bull.net>
Acked-by: Daniel Lezcano <dlezcano@fr.ibm.com>
Signed-off-by: Denis V. Lunev <den@openvz.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/fib_rules.c | 91 +++++++++++++++++++++++++++++++---------------------
 1 file changed, 54 insertions(+), 37 deletions(-)

(limited to 'net')

diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c
index e12e9f58394..c5f78fed688 100644
--- a/net/core/fib_rules.c
+++ b/net/core/fib_rules.c
@@ -15,9 +15,6 @@
 #include <net/sock.h>
 #include <net/fib_rules.h>
 
-static LIST_HEAD(rules_ops);
-static DEFINE_SPINLOCK(rules_mod_lock);
-
 int fib_default_rule_add(struct fib_rules_ops *ops,
 			 u32 pref, u32 table, u32 flags)
 {
@@ -40,16 +37,17 @@ int fib_default_rule_add(struct fib_rules_ops *ops,
 }
 EXPORT_SYMBOL(fib_default_rule_add);
 
-static void notify_rule_change(int event, struct fib_rule *rule,
+static void notify_rule_change(struct net *net, int event,
+			       struct fib_rule *rule,
 			       struct fib_rules_ops *ops, struct nlmsghdr *nlh,
 			       u32 pid);
 
-static struct fib_rules_ops *lookup_rules_ops(int family)
+static struct fib_rules_ops *lookup_rules_ops(struct net *net, int family)
 {
 	struct fib_rules_ops *ops;
 
 	rcu_read_lock();
-	list_for_each_entry_rcu(ops, &rules_ops, list) {
+	list_for_each_entry_rcu(ops, &net->rules_ops, list) {
 		if (ops->family == family) {
 			if (!try_module_get(ops->owner))
 				ops = NULL;
@@ -87,15 +85,16 @@ int fib_rules_register(struct net *net, struct fib_rules_ops *ops)
 	    ops->action == NULL)
 		return -EINVAL;
 
-	spin_lock(&rules_mod_lock);
-	list_for_each_entry(o, &rules_ops, list)
+	spin_lock(&net->rules_mod_lock);
+	list_for_each_entry(o, &net->rules_ops, list)
 		if (ops->family == o->family)
 			goto errout;
 
-	list_add_tail_rcu(&ops->list, &rules_ops);
+	hold_net(net);
+	list_add_tail_rcu(&ops->list, &net->rules_ops);
 	err = 0;
 errout:
-	spin_unlock(&rules_mod_lock);
+	spin_unlock(&net->rules_mod_lock);
 
 	return err;
 }
@@ -118,8 +117,8 @@ int fib_rules_unregister(struct net *net, struct fib_rules_ops *ops)
 	int err = 0;
 	struct fib_rules_ops *o;
 
-	spin_lock(&rules_mod_lock);
-	list_for_each_entry(o, &rules_ops, list) {
+	spin_lock(&net->rules_mod_lock);
+	list_for_each_entry(o, &net->rules_ops, list) {
 		if (o == ops) {
 			list_del_rcu(&o->list);
 			fib_rules_cleanup_ops(ops);
@@ -129,9 +128,11 @@ int fib_rules_unregister(struct net *net, struct fib_rules_ops *ops)
 
 	err = -ENOENT;
 out:
-	spin_unlock(&rules_mod_lock);
+	spin_unlock(&net->rules_mod_lock);
 
 	synchronize_rcu();
+	if (!err)
+		release_net(net);
 
 	return err;
 }
@@ -229,13 +230,10 @@ static int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
 	struct nlattr *tb[FRA_MAX+1];
 	int err = -EINVAL, unresolved = 0;
 
-	if (net != &init_net)
-		return -EINVAL;
-
 	if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*frh)))
 		goto errout;
 
-	ops = lookup_rules_ops(frh->family);
+	ops = lookup_rules_ops(net, frh->family);
 	if (ops == NULL) {
 		err = EAFNOSUPPORT;
 		goto errout;
@@ -348,7 +346,7 @@ static int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
 	else
 		list_add_rcu(&rule->list, &ops->rules_list);
 
-	notify_rule_change(RTM_NEWRULE, rule, ops, nlh, NETLINK_CB(skb).pid);
+	notify_rule_change(net, RTM_NEWRULE, rule, ops, nlh, NETLINK_CB(skb).pid);
 	flush_route_cache(ops);
 	rules_ops_put(ops);
 	return 0;
@@ -369,13 +367,10 @@ static int fib_nl_delrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
 	struct nlattr *tb[FRA_MAX+1];
 	int err = -EINVAL;
 
-	if (net != &init_net)
-		return -EINVAL;
-
 	if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*frh)))
 		goto errout;
 
-	ops = lookup_rules_ops(frh->family);
+	ops = lookup_rules_ops(net, frh->family);
 	if (ops == NULL) {
 		err = EAFNOSUPPORT;
 		goto errout;
@@ -441,7 +436,7 @@ static int fib_nl_delrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
 		}
 
 		synchronize_rcu();
-		notify_rule_change(RTM_DELRULE, rule, ops, nlh,
+		notify_rule_change(net, RTM_DELRULE, rule, ops, nlh,
 				   NETLINK_CB(skb).pid);
 		fib_rule_put(rule);
 		flush_route_cache(ops);
@@ -551,13 +546,10 @@ static int fib_nl_dumprule(struct sk_buff *skb, struct netlink_callback *cb)
 	struct fib_rules_ops *ops;
 	int idx = 0, family;
 
-	if (net != &init_net)
-		return -EINVAL;
-
 	family = rtnl_msg_family(cb->nlh);
 	if (family != AF_UNSPEC) {
 		/* Protocol specific dump request */
-		ops = lookup_rules_ops(family);
+		ops = lookup_rules_ops(net, family);
 		if (ops == NULL)
 			return -EAFNOSUPPORT;
 
@@ -565,7 +557,7 @@ static int fib_nl_dumprule(struct sk_buff *skb, struct netlink_callback *cb)
 	}
 
 	rcu_read_lock();
-	list_for_each_entry_rcu(ops, &rules_ops, list) {
+	list_for_each_entry_rcu(ops, &net->rules_ops, list) {
 		if (idx < cb->args[0] || !try_module_get(ops->owner))
 			goto skip;
 
@@ -582,7 +574,7 @@ static int fib_nl_dumprule(struct sk_buff *skb, struct netlink_callback *cb)
 	return skb->len;
 }
 
-static void notify_rule_change(int event, struct fib_rule *rule,
+static void notify_rule_change(struct net *net, int event, struct fib_rule *rule,
 			       struct fib_rules_ops *ops, struct nlmsghdr *nlh,
 			       u32 pid)
 {
@@ -600,10 +592,10 @@ static void notify_rule_change(int event, struct fib_rule *rule,
 		kfree_skb(skb);
 		goto errout;
 	}
-	err = rtnl_notify(skb, &init_net, pid, ops->nlgroup, nlh, GFP_KERNEL);
+	err = rtnl_notify(skb, net, pid, ops->nlgroup, nlh, GFP_KERNEL);
 errout:
 	if (err < 0)
-		rtnl_set_sk_err(&init_net, ops->nlgroup, err);
+		rtnl_set_sk_err(net, ops->nlgroup, err);
 }
 
 static void attach_rules(struct list_head *rules, struct net_device *dev)
@@ -631,22 +623,20 @@ static int fib_rules_event(struct notifier_block *this, unsigned long event,
 			    void *ptr)
 {
 	struct net_device *dev = ptr;
+	struct net *net = dev->nd_net;
 	struct fib_rules_ops *ops;
 
-	if (dev->nd_net != &init_net)
-		return NOTIFY_DONE;
-
 	ASSERT_RTNL();
 	rcu_read_lock();
 
 	switch (event) {
 	case NETDEV_REGISTER:
-		list_for_each_entry(ops, &rules_ops, list)
+		list_for_each_entry(ops, &net->rules_ops, list)
 			attach_rules(&ops->rules_list, dev);
 		break;
 
 	case NETDEV_UNREGISTER:
-		list_for_each_entry(ops, &rules_ops, list)
+		list_for_each_entry(ops, &net->rules_ops, list)
 			detach_rules(&ops->rules_list, dev);
 		break;
 	}
@@ -660,13 +650,40 @@ static struct notifier_block fib_rules_notifier = {
 	.notifier_call = fib_rules_event,
 };
 
+static int fib_rules_net_init(struct net *net)
+{
+	INIT_LIST_HEAD(&net->rules_ops);
+	spin_lock_init(&net->rules_mod_lock);
+	return 0;
+}
+
+static struct pernet_operations fib_rules_net_ops = {
+	.init = fib_rules_net_init,
+};
+
 static int __init fib_rules_init(void)
 {
+	int err;
 	rtnl_register(PF_UNSPEC, RTM_NEWRULE, fib_nl_newrule, NULL);
 	rtnl_register(PF_UNSPEC, RTM_DELRULE, fib_nl_delrule, NULL);
 	rtnl_register(PF_UNSPEC, RTM_GETRULE, NULL, fib_nl_dumprule);
 
-	return register_netdevice_notifier(&fib_rules_notifier);
+	err = register_netdevice_notifier(&fib_rules_notifier);
+	if (err < 0)
+		goto fail;
+
+	err = register_pernet_subsys(&fib_rules_net_ops);
+	if (err < 0)
+		goto fail_unregister;
+	return 0;
+
+fail_unregister:
+	unregister_netdevice_notifier(&fib_rules_notifier);
+fail:
+	rtnl_unregister(PF_UNSPEC, RTM_NEWRULE);
+	rtnl_unregister(PF_UNSPEC, RTM_DELRULE);
+	rtnl_unregister(PF_UNSPEC, RTM_GETRULE);
+	return err;
 }
 
 subsys_initcall(fib_rules_init);
-- 
cgit v1.2.3


From 61a0265344786a548e8a0b26cb668e78a71f9602 Mon Sep 17 00:00:00 2001
From: "Denis V. Lunev" <den@openvz.org>
Date: Thu, 10 Jan 2008 03:21:09 -0800
Subject: [NETNS]: Add namespace to API for routing /proc entries creation.

This adds netns parameter to fib_proc_init/exit and replaces __init
specifier with __net_init. After this, we will not yet have these proc
files show info from the specific namespace - this will be done when
these tables become namespaced.

Acked-by: Benjamin Thery <benjamin.thery@bull.net>
Acked-by: Daniel Lezcano <dlezcano@fr.ibm.com>
Signed-off-by: Denis V. Lunev <den@openvz.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/af_inet.c  |  4 ++--
 net/ipv4/fib_hash.c |  8 ++++----
 net/ipv4/fib_trie.c | 21 +++++++++++----------
 3 files changed, 17 insertions(+), 16 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index 0e12cf64607..d5b8cb1dad6 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -1475,14 +1475,14 @@ static int __init ipv4_proc_init(void)
 		goto out_tcp;
 	if (udp4_proc_init())
 		goto out_udp;
-	if (fib_proc_init())
+	if (fib_proc_init(&init_net))
 		goto out_fib;
 	if (ip_misc_proc_init())
 		goto out_misc;
 out:
 	return rc;
 out_misc:
-	fib_proc_exit();
+	fib_proc_exit(&init_net);
 out_fib:
 	udp4_proc_exit();
 out_udp:
diff --git a/net/ipv4/fib_hash.c b/net/ipv4/fib_hash.c
index ee1ffdb3044..8de21bc4947 100644
--- a/net/ipv4/fib_hash.c
+++ b/net/ipv4/fib_hash.c
@@ -1041,15 +1041,15 @@ static const struct file_operations fib_seq_fops = {
 	.release	= seq_release_private,
 };
 
-int __init fib_proc_init(void)
+int __net_init fib_proc_init(struct net *net)
 {
-	if (!proc_net_fops_create(&init_net, "route", S_IRUGO, &fib_seq_fops))
+	if (!proc_net_fops_create(net, "route", S_IRUGO, &fib_seq_fops))
 		return -ENOMEM;
 	return 0;
 }
 
-void __init fib_proc_exit(void)
+void __net_exit fib_proc_exit(struct net *net)
 {
-	proc_net_remove(&init_net, "route");
+	proc_net_remove(net, "route");
 }
 #endif /* CONFIG_PROC_FS */
diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index a842204df7b..4bbfaeedf21 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -2508,32 +2508,33 @@ static const struct file_operations fib_route_fops = {
 	.release = seq_release_private,
 };
 
-int __init fib_proc_init(void)
+int __net_init fib_proc_init(struct net *net)
 {
-	if (!proc_net_fops_create(&init_net, "fib_trie", S_IRUGO, &fib_trie_fops))
+	if (!proc_net_fops_create(net, "fib_trie", S_IRUGO, &fib_trie_fops))
 		goto out1;
 
-	if (!proc_net_fops_create(&init_net, "fib_triestat", S_IRUGO, &fib_triestat_fops))
+	if (!proc_net_fops_create(net, "fib_triestat", S_IRUGO,
+				  &fib_triestat_fops))
 		goto out2;
 
-	if (!proc_net_fops_create(&init_net, "route", S_IRUGO, &fib_route_fops))
+	if (!proc_net_fops_create(net, "route", S_IRUGO, &fib_route_fops))
 		goto out3;
 
 	return 0;
 
 out3:
-	proc_net_remove(&init_net, "fib_triestat");
+	proc_net_remove(net, "fib_triestat");
 out2:
-	proc_net_remove(&init_net, "fib_trie");
+	proc_net_remove(net, "fib_trie");
 out1:
 	return -ENOMEM;
 }
 
-void __init fib_proc_exit(void)
+void __net_exit fib_proc_exit(struct net *net)
 {
-	proc_net_remove(&init_net, "fib_trie");
-	proc_net_remove(&init_net, "fib_triestat");
-	proc_net_remove(&init_net, "route");
+	proc_net_remove(net, "fib_trie");
+	proc_net_remove(net, "fib_triestat");
+	proc_net_remove(net, "route");
 }
 
 #endif /* CONFIG_PROC_FS */
-- 
cgit v1.2.3


From dbb50165b512f6c9b7aae10af73ae5b6d811f4d0 Mon Sep 17 00:00:00 2001
From: "Denis V. Lunev" <den@openvz.org>
Date: Thu, 10 Jan 2008 03:21:49 -0800
Subject: [IPV4]: Check fib4_rules_init failure.

This adds error paths into both versions of fib4_rules_init
(with/without CONFIG_IP_MULTIPLE_TABLES) and returns error code to the
caller.

Acked-by: Benjamin Thery <benjamin.thery@bull.net>
Acked-by: Daniel Lezcano <dlezcano@fr.ibm.com>
Signed-off-by: Denis V. Lunev <den@openvz.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/fib_frontend.c | 18 +++++++++++++++---
 net/ipv4/fib_rules.c    | 14 ++++++++++++--
 2 files changed, 27 insertions(+), 5 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index ac6238a3b0f..1bb956b3be2 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -59,12 +59,24 @@ struct fib_table *ip_fib_main_table;
 #define FIB_TABLE_HASHSZ 1
 static struct hlist_head fib_table_hash[FIB_TABLE_HASHSZ];
 
-static void __init fib4_rules_init(void)
+static int __init fib4_rules_init(void)
 {
 	ip_fib_local_table = fib_hash_init(RT_TABLE_LOCAL);
-	hlist_add_head_rcu(&ip_fib_local_table->tb_hlist, &fib_table_hash[0]);
+	if (ip_fib_local_table == NULL)
+		return -ENOMEM;
+
 	ip_fib_main_table  = fib_hash_init(RT_TABLE_MAIN);
+	if (ip_fib_main_table == NULL)
+		goto fail;
+
+	hlist_add_head_rcu(&ip_fib_local_table->tb_hlist, &fib_table_hash[0]);
 	hlist_add_head_rcu(&ip_fib_main_table->tb_hlist, &fib_table_hash[0]);
+	return 0;
+
+fail:
+	kfree(ip_fib_local_table);
+	ip_fib_local_table = NULL;
+	return -ENOMEM;
 }
 #else
 
@@ -944,7 +956,7 @@ void __init ip_fib_init(void)
 	for (i = 0; i < FIB_TABLE_HASHSZ; i++)
 		INIT_HLIST_HEAD(&fib_table_hash[i]);
 
-	fib4_rules_init();
+	BUG_ON(fib4_rules_init());
 
 	register_netdevice_notifier(&fib_netdev_notifier);
 	register_inetaddr_notifier(&fib_inetaddr_notifier);
diff --git a/net/ipv4/fib_rules.c b/net/ipv4/fib_rules.c
index afe669dd1bd..0751734ecf4 100644
--- a/net/ipv4/fib_rules.c
+++ b/net/ipv4/fib_rules.c
@@ -311,8 +311,18 @@ static int __init fib_default_rules_init(void)
 	return 0;
 }
 
-void __init fib4_rules_init(void)
+int __init fib4_rules_init()
 {
-	BUG_ON(fib_default_rules_init());
+	int err;
+
 	fib_rules_register(&init_net, &fib4_rules_ops);
+	err = fib_default_rules_init();
+	if (err < 0)
+		goto fail;
+	return 0;
+
+fail:
+	/* also cleans all rules already added */
+	fib_rules_unregister(&init_net, &fib4_rules_ops);
+	return err;
 }
-- 
cgit v1.2.3


From 7b1a74fdbb9ec38a9780620fae25519fde4b21ee Mon Sep 17 00:00:00 2001
From: "Denis V. Lunev" <den@openvz.org>
Date: Thu, 10 Jan 2008 03:22:17 -0800
Subject: [NETNS]: Refactor fib initialization so it can handle multiple
 namespaces.

This patch makes the fib to be initialized as a subsystem for the
network namespaces. The code does not handle several namespaces yet,
so in case of a creation of a network namespace, the
creation/initialization will not occur.

Acked-by: Benjamin Thery <benjamin.thery@bull.net>
Acked-by: Daniel Lezcano <dlezcano@fr.ibm.com>
Signed-off-by: Denis V. Lunev <den@openvz.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/af_inet.c      |  4 ---
 net/ipv4/fib_frontend.c | 88 ++++++++++++++++++++++++++++++++++++++++++++-----
 net/ipv4/fib_hash.c     |  6 +---
 net/ipv4/fib_rules.c    | 11 +++++--
 net/ipv4/fib_trie.c     |  6 +---
 5 files changed, 90 insertions(+), 25 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index d5b8cb1dad6..40ecaae7be3 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -1475,15 +1475,11 @@ static int __init ipv4_proc_init(void)
 		goto out_tcp;
 	if (udp4_proc_init())
 		goto out_udp;
-	if (fib_proc_init(&init_net))
-		goto out_fib;
 	if (ip_misc_proc_init())
 		goto out_misc;
 out:
 	return rc;
 out_misc:
-	fib_proc_exit(&init_net);
-out_fib:
 	udp4_proc_exit();
 out_udp:
 	tcp4_proc_exit();
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index 1bb956b3be2..0484cae0273 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -59,7 +59,7 @@ struct fib_table *ip_fib_main_table;
 #define FIB_TABLE_HASHSZ 1
 static struct hlist_head fib_table_hash[FIB_TABLE_HASHSZ];
 
-static int __init fib4_rules_init(void)
+static int __net_init fib4_rules_init(struct net *net)
 {
 	ip_fib_local_table = fib_hash_init(RT_TABLE_LOCAL);
 	if (ip_fib_local_table == NULL)
@@ -863,10 +863,18 @@ static void nl_fib_input(struct sk_buff *skb)
 	netlink_unicast(fibnl, skb, pid, MSG_DONTWAIT);
 }
 
-static void nl_fib_lookup_init(void)
+static int nl_fib_lookup_init(struct net *net)
 {
-	fibnl = netlink_kernel_create(&init_net, NETLINK_FIB_LOOKUP, 0,
+	fibnl = netlink_kernel_create(net, NETLINK_FIB_LOOKUP, 0,
 				      nl_fib_input, NULL, THIS_MODULE);
+	if (fibnl == NULL)
+		return -EAFNOSUPPORT;
+	return 0;
+}
+
+static void nl_fib_lookup_exit(struct net *net)
+{
+	sock_put(fibnl);
 }
 
 static void fib_disable_ip(struct net_device *dev, int force)
@@ -949,22 +957,86 @@ static struct notifier_block fib_netdev_notifier = {
 	.notifier_call =fib_netdev_event,
 };
 
-void __init ip_fib_init(void)
+static int __net_init ip_fib_net_init(struct net *net)
 {
 	unsigned int i;
 
 	for (i = 0; i < FIB_TABLE_HASHSZ; i++)
 		INIT_HLIST_HEAD(&fib_table_hash[i]);
 
-	BUG_ON(fib4_rules_init());
+	return fib4_rules_init(net);
+}
 
-	register_netdevice_notifier(&fib_netdev_notifier);
-	register_inetaddr_notifier(&fib_inetaddr_notifier);
-	nl_fib_lookup_init();
+static void __net_exit ip_fib_net_exit(struct net *net)
+{
+	unsigned int i;
+
+#ifdef CONFIG_IP_MULTIPLE_TABLES
+	fib4_rules_exit(net);
+#endif
+
+	for (i = 0; i < FIB_TABLE_HASHSZ; i++) {
+		struct fib_table *tb;
+		struct hlist_head *head;
+		struct hlist_node *node, *tmp;
 
+		head = &fib_table_hash[i];
+		hlist_for_each_entry_safe(tb, node, tmp, head, tb_hlist) {
+			hlist_del(node);
+			tb->tb_flush(tb);
+			kfree(tb);
+		}
+	}
+}
+
+static int __net_init fib_net_init(struct net *net)
+{
+	int error;
+
+	error = 0;
+	if (net != &init_net)
+		goto out;
+
+	error = ip_fib_net_init(net);
+	if (error < 0)
+		goto out;
+	error = nl_fib_lookup_init(net);
+	if (error < 0)
+		goto out_nlfl;
+	error = fib_proc_init(net);
+	if (error < 0)
+		goto out_proc;
+out:
+	return error;
+
+out_proc:
+	nl_fib_lookup_exit(net);
+out_nlfl:
+	ip_fib_net_exit(net);
+	goto out;
+}
+
+static void __net_exit fib_net_exit(struct net *net)
+{
+	fib_proc_exit(net);
+	nl_fib_lookup_exit(net);
+	ip_fib_net_exit(net);
+}
+
+static struct pernet_operations fib_net_ops = {
+	.init = fib_net_init,
+	.exit = fib_net_exit,
+};
+
+void __init ip_fib_init(void)
+{
 	rtnl_register(PF_INET, RTM_NEWROUTE, inet_rtm_newroute, NULL);
 	rtnl_register(PF_INET, RTM_DELROUTE, inet_rtm_delroute, NULL);
 	rtnl_register(PF_INET, RTM_GETROUTE, NULL, inet_dump_fib);
+
+	register_pernet_subsys(&fib_net_ops);
+	register_netdevice_notifier(&fib_netdev_notifier);
+	register_inetaddr_notifier(&fib_inetaddr_notifier);
 }
 
 EXPORT_SYMBOL(inet_addr_type);
diff --git a/net/ipv4/fib_hash.c b/net/ipv4/fib_hash.c
index 8de21bc4947..694a072d809 100644
--- a/net/ipv4/fib_hash.c
+++ b/net/ipv4/fib_hash.c
@@ -746,11 +746,7 @@ static int fn_hash_dump(struct fib_table *tb, struct sk_buff *skb, struct netlin
 	return skb->len;
 }
 
-#ifdef CONFIG_IP_MULTIPLE_TABLES
-struct fib_table * fib_hash_init(u32 id)
-#else
-struct fib_table * __init fib_hash_init(u32 id)
-#endif
+struct fib_table *fib_hash_init(u32 id)
 {
 	struct fib_table *tb;
 
diff --git a/net/ipv4/fib_rules.c b/net/ipv4/fib_rules.c
index 0751734ecf4..1aae61c8ea6 100644
--- a/net/ipv4/fib_rules.c
+++ b/net/ipv4/fib_rules.c
@@ -311,11 +311,11 @@ static int __init fib_default_rules_init(void)
 	return 0;
 }
 
-int __init fib4_rules_init()
+int __net_init fib4_rules_init(struct net *net)
 {
 	int err;
 
-	fib_rules_register(&init_net, &fib4_rules_ops);
+	fib_rules_register(net, &fib4_rules_ops);
 	err = fib_default_rules_init();
 	if (err < 0)
 		goto fail;
@@ -323,6 +323,11 @@ int __init fib4_rules_init()
 
 fail:
 	/* also cleans all rules already added */
-	fib_rules_unregister(&init_net, &fib4_rules_ops);
+	fib_rules_unregister(net, &fib4_rules_ops);
 	return err;
 }
+
+void __net_exit fib4_rules_exit(struct net *net)
+{
+	fib_rules_unregister(net, &fib4_rules_ops);
+}
diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index 4bbfaeedf21..92b687e6a47 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -1956,11 +1956,7 @@ out:
 
 /* Fix more generic FIB names for init later */
 
-#ifdef CONFIG_IP_MULTIPLE_TABLES
-struct fib_table * fib_hash_init(u32 id)
-#else
-struct fib_table * __init fib_hash_init(u32 id)
-#endif
+struct fib_table *fib_hash_init(u32 id)
 {
 	struct fib_table *tb;
 	struct trie *t;
-- 
cgit v1.2.3


From 93456b6d7753def8760b423ac6b986eb9d5a4a95 Mon Sep 17 00:00:00 2001
From: "Denis V. Lunev" <den@openvz.org>
Date: Thu, 10 Jan 2008 03:23:38 -0800
Subject: [IPV4]: Unify access to the routing tables.

Replace the direct pointers to local and main tables with
calls to fib_get_table() with appropriate argument.

This doesn't introduce additional dereferences, but makes the access to fib
tables uniform in any (CONFIG_IP_MULTIPLE_TABLES) case.

Acked-by: Benjamin Thery <benjamin.thery@bull.net>
Acked-by: Daniel Lezcano <dlezcano@fr.ibm.com>
Signed-off-by: Denis V. Lunev <den@openvz.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/fib_frontend.c | 29 ++++++++++++-----------------
 1 file changed, 12 insertions(+), 17 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index 0484cae0273..9ff1e6669ef 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -50,39 +50,34 @@
 #define FFprint(a...) printk(KERN_DEBUG a)
 
 static struct sock *fibnl;
+struct hlist_head fib_table_hash[FIB_TABLE_HASHSZ];
 
 #ifndef CONFIG_IP_MULTIPLE_TABLES
 
-struct fib_table *ip_fib_local_table;
-struct fib_table *ip_fib_main_table;
-
-#define FIB_TABLE_HASHSZ 1
-static struct hlist_head fib_table_hash[FIB_TABLE_HASHSZ];
-
 static int __net_init fib4_rules_init(struct net *net)
 {
-	ip_fib_local_table = fib_hash_init(RT_TABLE_LOCAL);
-	if (ip_fib_local_table == NULL)
+	struct fib_table *local_table, *main_table;
+
+	local_table = fib_hash_init(RT_TABLE_LOCAL);
+	if (local_table == NULL)
 		return -ENOMEM;
 
-	ip_fib_main_table  = fib_hash_init(RT_TABLE_MAIN);
-	if (ip_fib_main_table == NULL)
+	main_table  = fib_hash_init(RT_TABLE_MAIN);
+	if (main_table == NULL)
 		goto fail;
 
-	hlist_add_head_rcu(&ip_fib_local_table->tb_hlist, &fib_table_hash[0]);
-	hlist_add_head_rcu(&ip_fib_main_table->tb_hlist, &fib_table_hash[0]);
+	hlist_add_head_rcu(&local_table->tb_hlist,
+				&fib_table_hash[TABLE_LOCAL_INDEX]);
+	hlist_add_head_rcu(&main_table->tb_hlist,
+				&fib_table_hash[TABLE_MAIN_INDEX]);
 	return 0;
 
 fail:
-	kfree(ip_fib_local_table);
-	ip_fib_local_table = NULL;
+	kfree(local_table);
 	return -ENOMEM;
 }
 #else
 
-#define FIB_TABLE_HASHSZ 256
-static struct hlist_head fib_table_hash[FIB_TABLE_HASHSZ];
-
 struct fib_table *fib_new_table(u32 id)
 {
 	struct fib_table *tb;
-- 
cgit v1.2.3


From 8ad4942cd5bdad4143f7aa1d1bd4f7b2526c19c5 Mon Sep 17 00:00:00 2001
From: "Denis V. Lunev" <den@openvz.org>
Date: Thu, 10 Jan 2008 03:24:11 -0800
Subject: [NETNS]: Add netns parameter to fib_get_table/fib_new_table.

This patch extends the fib_get_table and the fib_new_table functions
with the network namespace pointer. That will allow to access the
table relatively from the network namespace.

Acked-by: Benjamin Thery <benjamin.thery@bull.net>
Acked-by: Daniel Lezcano <dlezcano@fr.ibm.com>
Signed-off-by: Denis V. Lunev <den@openvz.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/fib_frontend.c | 24 ++++++++++++------------
 net/ipv4/fib_hash.c     |  4 ++--
 net/ipv4/fib_rules.c    | 12 ++++++------
 net/ipv4/fib_trie.c     |  8 ++++----
 4 files changed, 24 insertions(+), 24 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index 9ff1e6669ef..7718823711e 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -78,14 +78,14 @@ fail:
 }
 #else
 
-struct fib_table *fib_new_table(u32 id)
+struct fib_table *fib_new_table(struct net *net, u32 id)
 {
 	struct fib_table *tb;
 	unsigned int h;
 
 	if (id == 0)
 		id = RT_TABLE_MAIN;
-	tb = fib_get_table(id);
+	tb = fib_get_table(net, id);
 	if (tb)
 		return tb;
 	tb = fib_hash_init(id);
@@ -96,7 +96,7 @@ struct fib_table *fib_new_table(u32 id)
 	return tb;
 }
 
-struct fib_table *fib_get_table(u32 id)
+struct fib_table *fib_get_table(struct net *net, u32 id)
 {
 	struct fib_table *tb;
 	struct hlist_node *node;
@@ -148,7 +148,7 @@ struct net_device * ip_dev_find(__be32 addr)
 	res.r = NULL;
 #endif
 
-	local_table = fib_get_table(RT_TABLE_LOCAL);
+	local_table = fib_get_table(&init_net, RT_TABLE_LOCAL);
 	if (!local_table || local_table->tb_lookup(local_table, &fl, &res))
 		return NULL;
 	if (res.type != RTN_LOCAL)
@@ -183,7 +183,7 @@ static inline unsigned __inet_dev_addr_type(const struct net_device *dev,
 	res.r = NULL;
 #endif
 
-	local_table = fib_get_table(RT_TABLE_LOCAL);
+	local_table = fib_get_table(&init_net, RT_TABLE_LOCAL);
 	if (local_table) {
 		ret = RTN_UNICAST;
 		if (!local_table->tb_lookup(local_table, &fl, &res)) {
@@ -453,13 +453,13 @@ int ip_rt_ioctl(unsigned int cmd, void __user *arg)
 			struct fib_table *tb;
 
 			if (cmd == SIOCDELRT) {
-				tb = fib_get_table(cfg.fc_table);
+				tb = fib_get_table(&init_net, cfg.fc_table);
 				if (tb)
 					err = tb->tb_delete(tb, &cfg);
 				else
 					err = -ESRCH;
 			} else {
-				tb = fib_new_table(cfg.fc_table);
+				tb = fib_new_table(&init_net, cfg.fc_table);
 				if (tb)
 					err = tb->tb_insert(tb, &cfg);
 				else
@@ -573,7 +573,7 @@ static int inet_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *ar
 	if (err < 0)
 		goto errout;
 
-	tb = fib_get_table(cfg.fc_table);
+	tb = fib_get_table(net, cfg.fc_table);
 	if (tb == NULL) {
 		err = -ESRCH;
 		goto errout;
@@ -598,7 +598,7 @@ static int inet_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *ar
 	if (err < 0)
 		goto errout;
 
-	tb = fib_new_table(cfg.fc_table);
+	tb = fib_new_table(&init_net, cfg.fc_table);
 	if (tb == NULL) {
 		err = -ENOBUFS;
 		goto errout;
@@ -671,9 +671,9 @@ static void fib_magic(int cmd, int type, __be32 dst, int dst_len, struct in_ifad
 	};
 
 	if (type == RTN_UNICAST)
-		tb = fib_new_table(RT_TABLE_MAIN);
+		tb = fib_new_table(&init_net, RT_TABLE_MAIN);
 	else
-		tb = fib_new_table(RT_TABLE_LOCAL);
+		tb = fib_new_table(&init_net, RT_TABLE_LOCAL);
 
 	if (tb == NULL)
 		return;
@@ -848,7 +848,7 @@ static void nl_fib_input(struct sk_buff *skb)
 	nlh = nlmsg_hdr(skb);
 
 	frn = (struct fib_result_nl *) NLMSG_DATA(nlh);
-	tb = fib_get_table(frn->tb_id_in);
+	tb = fib_get_table(&init_net, frn->tb_id_in);
 
 	nl_fib_lookup(frn, tb);
 
diff --git a/net/ipv4/fib_hash.c b/net/ipv4/fib_hash.c
index 694a072d809..d20ae2e28ab 100644
--- a/net/ipv4/fib_hash.c
+++ b/net/ipv4/fib_hash.c
@@ -796,7 +796,7 @@ struct fib_iter_state {
 static struct fib_alias *fib_get_first(struct seq_file *seq)
 {
 	struct fib_iter_state *iter = seq->private;
-	struct fib_table *main_table = fib_get_table(RT_TABLE_MAIN);
+	struct fib_table *main_table = fib_get_table(&init_net, RT_TABLE_MAIN);
 	struct fn_hash *table = (struct fn_hash *)main_table->tb_data;
 
 	iter->bucket    = 0;
@@ -937,7 +937,7 @@ static void *fib_seq_start(struct seq_file *seq, loff_t *pos)
 	void *v = NULL;
 
 	read_lock(&fib_hash_lock);
-	if (fib_get_table(RT_TABLE_MAIN))
+	if (fib_get_table(&init_net, RT_TABLE_MAIN))
 		v = *pos ? fib_get_idx(seq, *pos - 1) : SEQ_START_TOKEN;
 	return v;
 }
diff --git a/net/ipv4/fib_rules.c b/net/ipv4/fib_rules.c
index 1aae61c8ea6..49819fe7e4c 100644
--- a/net/ipv4/fib_rules.c
+++ b/net/ipv4/fib_rules.c
@@ -93,7 +93,7 @@ static int fib4_rule_action(struct fib_rule *rule, struct flowi *flp,
 		goto errout;
 	}
 
-	if ((tbl = fib_get_table(rule->table)) == NULL)
+	if ((tbl = fib_get_table(&init_net, rule->table)) == NULL)
 		goto errout;
 
 	err = tbl->tb_lookup(tbl, flp, (struct fib_result *) arg->result);
@@ -109,7 +109,7 @@ void fib_select_default(const struct flowi *flp, struct fib_result *res)
 	if (res->r && res->r->action == FR_ACT_TO_TBL &&
 	    FIB_RES_GW(*res) && FIB_RES_NH(*res).nh_scope == RT_SCOPE_LINK) {
 		struct fib_table *tb;
-		if ((tb = fib_get_table(res->r->table)) != NULL)
+		if ((tb = fib_get_table(&init_net, res->r->table)) != NULL)
 			tb->tb_select_default(tb, flp, res);
 	}
 }
@@ -130,13 +130,13 @@ static int fib4_rule_match(struct fib_rule *rule, struct flowi *fl, int flags)
 	return 1;
 }
 
-static struct fib_table *fib_empty_table(void)
+static struct fib_table *fib_empty_table(struct net *net)
 {
 	u32 id;
 
 	for (id = 1; id <= RT_TABLE_MAX; id++)
-		if (fib_get_table(id) == NULL)
-			return fib_new_table(id);
+		if (fib_get_table(net, id) == NULL)
+			return fib_new_table(net, id);
 	return NULL;
 }
 
@@ -159,7 +159,7 @@ static int fib4_rule_configure(struct fib_rule *rule, struct sk_buff *skb,
 		if (rule->action == FR_ACT_TO_TBL) {
 			struct fib_table *table;
 
-			table = fib_empty_table();
+			table = fib_empty_table(&init_net);
 			if (table == NULL) {
 				err = -ENOBUFS;
 				goto errout;
diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index 92b687e6a47..fc0624e6a64 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -2167,12 +2167,12 @@ static int fib_triestat_seq_show(struct seq_file *seq, void *v)
 	struct fib_table *tb;
 
 	trie_local = NULL;
-	tb = fib_get_table(RT_TABLE_LOCAL);
+	tb = fib_get_table(&init_net, RT_TABLE_LOCAL);
 	if (tb)
 		trie_local = (struct trie *) tb->tb_data;
 
 	trie_main = NULL;
-	tb = fib_get_table(RT_TABLE_MAIN);
+	tb = fib_get_table(&init_net, RT_TABLE_MAIN);
 	if (tb)
 		trie_main = (struct trie *) tb->tb_data;
 
@@ -2239,12 +2239,12 @@ static void *fib_trie_seq_start(struct seq_file *seq, loff_t *pos)
 	struct fib_table *tb;
 
 	if (!iter->trie_local) {
-		tb = fib_get_table(RT_TABLE_LOCAL);
+		tb = fib_get_table(&init_net, RT_TABLE_LOCAL);
 		if (tb)
 			iter->trie_local = (struct trie *) tb->tb_data;
 	}
 	if (!iter->trie_main) {
-		tb = fib_get_table(RT_TABLE_MAIN);
+		tb = fib_get_table(&init_net, RT_TABLE_MAIN);
 		if (tb)
 			iter->trie_main = (struct trie *) tb->tb_data;
 	}
-- 
cgit v1.2.3


From 6b175b26c1048d331508940ad3516ead1998084f Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Thu, 10 Jan 2008 03:25:28 -0800
Subject: [NETNS]: Add netns parameter to inet_(dev_)add_type.

The patch extends the inet_addr_type and inet_dev_addr_type with the
network namespace pointer. That allows to access the different tables
relatively to the network namespace.

The modification of the signature function is reported in all the
callers of the inet_addr_type using the pointer to the well known
init_net.

Acked-by: Benjamin Thery <benjamin.thery@bull.net>
Acked-by: Daniel Lezcano <dlezcano@fr.ibm.com>
Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/atm/clip.c                    |  2 +-
 net/ipv4/af_inet.c                |  2 +-
 net/ipv4/arp.c                    | 12 ++++++------
 net/ipv4/fib_frontend.c           | 18 ++++++++++--------
 net/ipv4/fib_semantics.c          |  4 ++--
 net/ipv4/icmp.c                   |  4 ++--
 net/ipv4/ip_options.c             |  4 ++--
 net/ipv4/ipvs/ip_vs_core.c        |  2 +-
 net/ipv4/ipvs/ip_vs_ctl.c         |  4 ++--
 net/ipv4/netfilter.c              |  2 +-
 net/ipv4/netfilter/ipt_addrtype.c |  2 +-
 net/ipv4/raw.c                    |  2 +-
 net/ipv4/route.c                  |  2 +-
 net/ipv6/af_inet6.c               |  2 +-
 net/sctp/protocol.c               |  2 +-
 15 files changed, 33 insertions(+), 31 deletions(-)

(limited to 'net')

diff --git a/net/atm/clip.c b/net/atm/clip.c
index 47fbdc0c5f7..df7d218a682 100644
--- a/net/atm/clip.c
+++ b/net/atm/clip.c
@@ -285,7 +285,7 @@ static int clip_constructor(struct neighbour *neigh)
 	struct neigh_parms *parms;
 
 	pr_debug("clip_constructor (neigh %p, entry %p)\n", neigh, entry);
-	neigh->type = inet_addr_type(entry->ip);
+	neigh->type = inet_addr_type(&init_net, entry->ip);
 	if (neigh->type != RTN_UNICAST)
 		return -EINVAL;
 
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index 40ecaae7be3..d76f8d8d5eb 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -446,7 +446,7 @@ int inet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
 	if (addr_len < sizeof(struct sockaddr_in))
 		goto out;
 
-	chk_addr_ret = inet_addr_type(addr->sin_addr.s_addr);
+	chk_addr_ret = inet_addr_type(&init_net, addr->sin_addr.s_addr);
 
 	/* Not specified by any standard per-se, however it breaks too
 	 * many applications when removed.  It is unfortunate since
diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c
index 46edf1c32a4..3f0730ec0a2 100644
--- a/net/ipv4/arp.c
+++ b/net/ipv4/arp.c
@@ -235,7 +235,7 @@ static int arp_constructor(struct neighbour *neigh)
 	struct in_device *in_dev;
 	struct neigh_parms *parms;
 
-	neigh->type = inet_addr_type(addr);
+	neigh->type = inet_addr_type(&init_net, addr);
 
 	rcu_read_lock();
 	in_dev = __in_dev_get_rcu(dev);
@@ -341,14 +341,14 @@ static void arp_solicit(struct neighbour *neigh, struct sk_buff *skb)
 	switch (IN_DEV_ARP_ANNOUNCE(in_dev)) {
 	default:
 	case 0:		/* By default announce any local IP */
-		if (skb && inet_addr_type(ip_hdr(skb)->saddr) == RTN_LOCAL)
+		if (skb && inet_addr_type(&init_net, ip_hdr(skb)->saddr) == RTN_LOCAL)
 			saddr = ip_hdr(skb)->saddr;
 		break;
 	case 1:		/* Restrict announcements of saddr in same subnet */
 		if (!skb)
 			break;
 		saddr = ip_hdr(skb)->saddr;
-		if (inet_addr_type(saddr) == RTN_LOCAL) {
+		if (inet_addr_type(&init_net, saddr) == RTN_LOCAL) {
 			/* saddr should be known to target */
 			if (inet_addr_onlink(in_dev, target, saddr))
 				break;
@@ -479,7 +479,7 @@ int arp_find(unsigned char *haddr, struct sk_buff *skb)
 
 	paddr = ((struct rtable*)skb->dst)->rt_gateway;
 
-	if (arp_set_predefined(inet_addr_type(paddr), haddr, paddr, dev))
+	if (arp_set_predefined(inet_addr_type(&init_net, paddr), haddr, paddr, dev))
 		return 0;
 
 	n = __neigh_lookup(&arp_tbl, &paddr, dev, 1);
@@ -806,7 +806,7 @@ static int arp_process(struct sk_buff *skb)
 	/* Special case: IPv4 duplicate address detection packet (RFC2131) */
 	if (sip == 0) {
 		if (arp->ar_op == htons(ARPOP_REQUEST) &&
-		    inet_addr_type(tip) == RTN_LOCAL &&
+		    inet_addr_type(&init_net, tip) == RTN_LOCAL &&
 		    !arp_ignore(in_dev,dev,sip,tip))
 			arp_send(ARPOP_REPLY, ETH_P_ARP, sip, dev, tip, sha,
 				 dev->dev_addr, sha);
@@ -866,7 +866,7 @@ static int arp_process(struct sk_buff *skb)
 		 */
 		if (n == NULL &&
 		    arp->ar_op == htons(ARPOP_REPLY) &&
-		    inet_addr_type(sip) == RTN_UNICAST)
+		    inet_addr_type(&init_net, sip) == RTN_UNICAST)
 			n = __neigh_lookup(&arp_tbl, &sip, dev, 1);
 	}
 
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index 7718823711e..d1a45cb6f6b 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -166,7 +166,8 @@ out:
  * Find address type as if only "dev" was present in the system. If
  * on_dev is NULL then all interfaces are taken into consideration.
  */
-static inline unsigned __inet_dev_addr_type(const struct net_device *dev,
+static inline unsigned __inet_dev_addr_type(struct net *net,
+					    const struct net_device *dev,
 					    __be32 addr)
 {
 	struct flowi		fl = { .nl_u = { .ip4_u = { .daddr = addr } } };
@@ -183,7 +184,7 @@ static inline unsigned __inet_dev_addr_type(const struct net_device *dev,
 	res.r = NULL;
 #endif
 
-	local_table = fib_get_table(&init_net, RT_TABLE_LOCAL);
+	local_table = fib_get_table(net, RT_TABLE_LOCAL);
 	if (local_table) {
 		ret = RTN_UNICAST;
 		if (!local_table->tb_lookup(local_table, &fl, &res)) {
@@ -195,14 +196,15 @@ static inline unsigned __inet_dev_addr_type(const struct net_device *dev,
 	return ret;
 }
 
-unsigned int inet_addr_type(__be32 addr)
+unsigned int inet_addr_type(struct net *net, __be32 addr)
 {
-	return __inet_dev_addr_type(NULL, addr);
+	return __inet_dev_addr_type(net, NULL, addr);
 }
 
-unsigned int inet_dev_addr_type(const struct net_device *dev, __be32 addr)
+unsigned int inet_dev_addr_type(struct net *net, const struct net_device *dev,
+				__be32 addr)
 {
-       return __inet_dev_addr_type(dev, addr);
+       return __inet_dev_addr_type(net, dev, addr);
 }
 
 /* Given (packet source, input interface) and optional (dst, oif, tos):
@@ -391,7 +393,7 @@ static int rtentry_to_fib_config(int cmd, struct rtentry *rt,
 	if (rt->rt_gateway.sa_family == AF_INET && addr) {
 		cfg->fc_gw = addr;
 		if (rt->rt_flags & RTF_GATEWAY &&
-		    inet_addr_type(addr) == RTN_UNICAST)
+		    inet_addr_type(&init_net, addr) == RTN_UNICAST)
 			cfg->fc_scope = RT_SCOPE_UNIVERSE;
 	}
 
@@ -782,7 +784,7 @@ static void fib_del_ifaddr(struct in_ifaddr *ifa)
 		fib_magic(RTM_DELROUTE, RTN_LOCAL, ifa->ifa_local, 32, prim);
 
 		/* Check, that this local address finally disappeared. */
-		if (inet_addr_type(ifa->ifa_local) != RTN_LOCAL) {
+		if (inet_addr_type(&init_net, ifa->ifa_local) != RTN_LOCAL) {
 			/* And the last, but not the least thing.
 			   We must flush stray FIB entries.
 
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index bbd4a247b19..c1263e23876 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -531,7 +531,7 @@ static int fib_check_nh(struct fib_config *cfg, struct fib_info *fi,
 
 			if (cfg->fc_scope >= RT_SCOPE_LINK)
 				return -EINVAL;
-			if (inet_addr_type(nh->nh_gw) != RTN_UNICAST)
+			if (inet_addr_type(&init_net, nh->nh_gw) != RTN_UNICAST)
 				return -EINVAL;
 			if ((dev = __dev_get_by_index(&init_net, nh->nh_oif)) == NULL)
 				return -ENODEV;
@@ -809,7 +809,7 @@ struct fib_info *fib_create_info(struct fib_config *cfg)
 	if (fi->fib_prefsrc) {
 		if (cfg->fc_type != RTN_LOCAL || !cfg->fc_dst ||
 		    fi->fib_prefsrc != cfg->fc_dst)
-			if (inet_addr_type(fi->fib_prefsrc) != RTN_LOCAL)
+			if (inet_addr_type(&init_net, fi->fib_prefsrc) != RTN_LOCAL)
 				goto err_inval;
 	}
 
diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
index 1a6024978e2..e57f1673bf6 100644
--- a/net/ipv4/icmp.c
+++ b/net/ipv4/icmp.c
@@ -590,7 +590,7 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info)
 		if (xfrm_decode_session_reverse(skb_in, &fl, AF_INET))
 			goto out_unlock;
 
-		if (inet_addr_type(fl.fl4_src) == RTN_LOCAL)
+		if (inet_addr_type(&init_net, fl.fl4_src) == RTN_LOCAL)
 			err = __ip_route_output_key(&rt2, &fl);
 		else {
 			struct flowi fl2 = {};
@@ -733,7 +733,7 @@ static void icmp_unreach(struct sk_buff *skb)
 	 */
 
 	if (!sysctl_icmp_ignore_bogus_error_responses &&
-	    inet_addr_type(iph->daddr) == RTN_BROADCAST) {
+	    inet_addr_type(&init_net, iph->daddr) == RTN_BROADCAST) {
 		if (net_ratelimit())
 			printk(KERN_WARNING "%u.%u.%u.%u sent an invalid ICMP "
 					    "type %u, code %u "
diff --git a/net/ipv4/ip_options.c b/net/ipv4/ip_options.c
index 2f14745a9e1..4d315158fd3 100644
--- a/net/ipv4/ip_options.c
+++ b/net/ipv4/ip_options.c
@@ -151,7 +151,7 @@ int ip_options_echo(struct ip_options * dopt, struct sk_buff * skb)
 						__be32 addr;
 
 						memcpy(&addr, sptr+soffset-1, 4);
-						if (inet_addr_type(addr) != RTN_LOCAL) {
+						if (inet_addr_type(&init_net, addr) != RTN_LOCAL) {
 							dopt->ts_needtime = 1;
 							soffset += 8;
 						}
@@ -400,7 +400,7 @@ int ip_options_compile(struct ip_options * opt, struct sk_buff * skb)
 					{
 						__be32 addr;
 						memcpy(&addr, &optptr[optptr[2]-1], 4);
-						if (inet_addr_type(addr) == RTN_UNICAST)
+						if (inet_addr_type(&init_net, addr) == RTN_UNICAST)
 							break;
 						if (skb)
 							timeptr = (__be32*)&optptr[optptr[2]+3];
diff --git a/net/ipv4/ipvs/ip_vs_core.c b/net/ipv4/ipvs/ip_vs_core.c
index 041f5120808..963981a9d50 100644
--- a/net/ipv4/ipvs/ip_vs_core.c
+++ b/net/ipv4/ipvs/ip_vs_core.c
@@ -423,7 +423,7 @@ int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb,
 	   and the destination is RTN_UNICAST (and not local), then create
 	   a cache_bypass connection entry */
 	if (sysctl_ip_vs_cache_bypass && svc->fwmark
-	    && (inet_addr_type(iph->daddr) == RTN_UNICAST)) {
+	    && (inet_addr_type(&init_net, iph->daddr) == RTN_UNICAST)) {
 		int ret, cs;
 		struct ip_vs_conn *cp;
 
diff --git a/net/ipv4/ipvs/ip_vs_ctl.c b/net/ipv4/ipvs/ip_vs_ctl.c
index 9fecfe7d116..94c5767c8e0 100644
--- a/net/ipv4/ipvs/ip_vs_ctl.c
+++ b/net/ipv4/ipvs/ip_vs_ctl.c
@@ -704,7 +704,7 @@ __ip_vs_update_dest(struct ip_vs_service *svc,
 	conn_flags = udest->conn_flags | IP_VS_CONN_F_INACTIVE;
 
 	/* check if local node and update the flags */
-	if (inet_addr_type(udest->addr) == RTN_LOCAL) {
+	if (inet_addr_type(&init_net, udest->addr) == RTN_LOCAL) {
 		conn_flags = (conn_flags & ~IP_VS_CONN_F_FWD_MASK)
 			| IP_VS_CONN_F_LOCALNODE;
 	}
@@ -756,7 +756,7 @@ ip_vs_new_dest(struct ip_vs_service *svc, struct ip_vs_dest_user *udest,
 
 	EnterFunction(2);
 
-	atype = inet_addr_type(udest->addr);
+	atype = inet_addr_type(&init_net, udest->addr);
 	if (atype != RTN_LOCAL && atype != RTN_UNICAST)
 		return -EINVAL;
 
diff --git a/net/ipv4/netfilter.c b/net/ipv4/netfilter.c
index 4011f8f987c..0ed843ed420 100644
--- a/net/ipv4/netfilter.c
+++ b/net/ipv4/netfilter.c
@@ -19,7 +19,7 @@ int ip_route_me_harder(struct sk_buff *skb, unsigned addr_type)
 	unsigned int hh_len;
 	unsigned int type;
 
-	type = inet_addr_type(iph->saddr);
+	type = inet_addr_type(&init_net, iph->saddr);
 	if (addr_type == RTN_UNSPEC)
 		addr_type = type;
 
diff --git a/net/ipv4/netfilter/ipt_addrtype.c b/net/ipv4/netfilter/ipt_addrtype.c
index 14394c6a3c2..8763902944b 100644
--- a/net/ipv4/netfilter/ipt_addrtype.c
+++ b/net/ipv4/netfilter/ipt_addrtype.c
@@ -26,7 +26,7 @@ MODULE_DESCRIPTION("iptables addrtype match");
 static inline bool match_type(const struct net_device *dev, __be32 addr,
 			      u_int16_t mask)
 {
-	return !!(mask & (1 << inet_dev_addr_type(dev, addr)));
+	return !!(mask & (1 << inet_dev_addr_type(&init_net, dev, addr)));
 }
 
 static bool
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c
index e811034b1b0..747911a8241 100644
--- a/net/ipv4/raw.c
+++ b/net/ipv4/raw.c
@@ -620,7 +620,7 @@ static int raw_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len)
 
 	if (sk->sk_state != TCP_CLOSE || addr_len < sizeof(struct sockaddr_in))
 		goto out;
-	chk_addr_ret = inet_addr_type(addr->sin_addr.s_addr);
+	chk_addr_ret = inet_addr_type(&init_net, addr->sin_addr.s_addr);
 	ret = -EADDRNOTAVAIL;
 	if (addr->sin_addr.s_addr && chk_addr_ret != RTN_LOCAL &&
 	    chk_addr_ret != RTN_MULTICAST && chk_addr_ret != RTN_BROADCAST)
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 78c4ce424a6..1e59c0d4b11 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -1164,7 +1164,7 @@ void ip_rt_redirect(__be32 old_gw, __be32 daddr, __be32 new_gw,
 		if (IN_DEV_SEC_REDIRECTS(in_dev) && ip_fib_check_default(new_gw, dev))
 			goto reject_redirect;
 	} else {
-		if (inet_addr_type(new_gw) != RTN_UNICAST)
+		if (inet_addr_type(&init_net, new_gw) != RTN_UNICAST)
 			goto reject_redirect;
 	}
 
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index 218b8b3050a..ac8772dd968 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -280,7 +280,7 @@ int inet6_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
 	/* Check if the address belongs to the host. */
 	if (addr_type == IPV6_ADDR_MAPPED) {
 		v4addr = addr->sin6_addr.s6_addr32[3];
-		if (inet_addr_type(v4addr) != RTN_LOCAL) {
+		if (inet_addr_type(&init_net, v4addr) != RTN_LOCAL) {
 			err = -EADDRNOTAVAIL;
 			goto out;
 		}
diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c
index b9219649502..3f7def2936b 100644
--- a/net/sctp/protocol.c
+++ b/net/sctp/protocol.c
@@ -372,7 +372,7 @@ static int sctp_v4_addr_valid(union sctp_addr *addr,
 /* Should this be available for binding?   */
 static int sctp_v4_available(union sctp_addr *addr, struct sctp_sock *sp)
 {
-	int ret = inet_addr_type(addr->v4.sin_addr.s_addr);
+	int ret = inet_addr_type(&init_net, addr->v4.sin_addr.s_addr);
 
 
 	if (addr->v4.sin_addr.s_addr != INADDR_ANY &&
-- 
cgit v1.2.3


From 4d1169c1e781e5853317c6b75620d678b2c4854e Mon Sep 17 00:00:00 2001
From: "Denis V. Lunev" <den@openvz.org>
Date: Thu, 10 Jan 2008 03:26:13 -0800
Subject: [NETNS]: Add netns to nl_info structure.

nl_info is used to track the end-user destination of routing change
notification. This is a natural object to hold a namespace on. Place
it there and utilize the context in the appropriate places.

Acked-by: Benjamin Thery <benjamin.thery@bull.net>
Acked-by: Daniel Lezcano <dlezcano@fr.ibm.com>
Signed-off-by: Denis V. Lunev <den@openvz.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/fib_frontend.c  |  5 +++++
 net/ipv4/fib_semantics.c | 16 ++++++++++------
 net/ipv6/ip6_fib.c       |  4 +++-
 net/ipv6/route.c         |  8 ++++++--
 4 files changed, 24 insertions(+), 9 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index d1a45cb6f6b..90aa05f47f8 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -310,6 +310,7 @@ static int rtentry_to_fib_config(int cmd, struct rtentry *rt,
 	int plen;
 
 	memset(cfg, 0, sizeof(*cfg));
+	cfg->fc_nlinfo.nl_net = &init_net;
 
 	if (rt->rt_dst.sa_family != AF_INET)
 		return -EAFNOSUPPORT;
@@ -516,6 +517,7 @@ static int rtm_to_fib_config(struct sk_buff *skb, struct nlmsghdr *nlh,
 
 	cfg->fc_nlinfo.pid = NETLINK_CB(skb).pid;
 	cfg->fc_nlinfo.nlh = nlh;
+	cfg->fc_nlinfo.nl_net = &init_net;
 
 	if (cfg->fc_type > RTN_MAX) {
 		err = -EINVAL;
@@ -670,6 +672,9 @@ static void fib_magic(int cmd, int type, __be32 dst, int dst_len, struct in_ifad
 		.fc_prefsrc = ifa->ifa_local,
 		.fc_oif = ifa->ifa_dev->dev->ifindex,
 		.fc_nlflags = NLM_F_CREATE | NLM_F_APPEND,
+		.fc_nlinfo = {
+			.nl_net = &init_net,
+		},
 	};
 
 	if (type == RTN_UNICAST)
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index c1263e23876..0de6102020e 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -320,11 +320,11 @@ void rtmsg_fib(int event, __be32 key, struct fib_alias *fa,
 		kfree_skb(skb);
 		goto errout;
 	}
-	err = rtnl_notify(skb, &init_net, info->pid, RTNLGRP_IPV4_ROUTE,
+	err = rtnl_notify(skb, info->nl_net, info->pid, RTNLGRP_IPV4_ROUTE,
 			  info->nlh, GFP_KERNEL);
 errout:
 	if (err < 0)
-		rtnl_set_sk_err(&init_net, RTNLGRP_IPV4_ROUTE, err);
+		rtnl_set_sk_err(info->nl_net, RTNLGRP_IPV4_ROUTE, err);
 }
 
 /* Return the first fib alias matching TOS with
@@ -531,9 +531,11 @@ static int fib_check_nh(struct fib_config *cfg, struct fib_info *fi,
 
 			if (cfg->fc_scope >= RT_SCOPE_LINK)
 				return -EINVAL;
-			if (inet_addr_type(&init_net, nh->nh_gw) != RTN_UNICAST)
+			if (inet_addr_type(cfg->fc_nlinfo.nl_net,
+					   nh->nh_gw) != RTN_UNICAST)
 				return -EINVAL;
-			if ((dev = __dev_get_by_index(&init_net, nh->nh_oif)) == NULL)
+			if ((dev = __dev_get_by_index(cfg->fc_nlinfo.nl_net,
+						      nh->nh_oif)) == NULL)
 				return -ENODEV;
 			if (!(dev->flags&IFF_UP))
 				return -ENETDOWN;
@@ -795,7 +797,8 @@ struct fib_info *fib_create_info(struct fib_config *cfg)
 		if (nhs != 1 || nh->nh_gw)
 			goto err_inval;
 		nh->nh_scope = RT_SCOPE_NOWHERE;
-		nh->nh_dev = dev_get_by_index(&init_net, fi->fib_nh->nh_oif);
+		nh->nh_dev = dev_get_by_index(cfg->fc_nlinfo.nl_net,
+					      fi->fib_nh->nh_oif);
 		err = -ENODEV;
 		if (nh->nh_dev == NULL)
 			goto failure;
@@ -809,7 +812,8 @@ struct fib_info *fib_create_info(struct fib_config *cfg)
 	if (fi->fib_prefsrc) {
 		if (cfg->fc_type != RTN_LOCAL || !cfg->fc_dst ||
 		    fi->fib_prefsrc != cfg->fc_dst)
-			if (inet_addr_type(&init_net, fi->fib_prefsrc) != RTN_LOCAL)
+			if (inet_addr_type(cfg->fc_nlinfo.nl_net,
+					   fi->fib_prefsrc) != RTN_LOCAL)
 				goto err_inval;
 	}
 
diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c
index 0e83164aa3e..f93407cf651 100644
--- a/net/ipv6/ip6_fib.c
+++ b/net/ipv6/ip6_fib.c
@@ -1314,7 +1314,9 @@ static int fib6_walk(struct fib6_walker_t *w)
 
 static int fib6_clean_node(struct fib6_walker_t *w)
 {
-	struct nl_info info = {};
+	struct nl_info info = {
+		.nl_net = &init_net,
+	};
 	int res;
 	struct rt6_info *rt;
 	struct fib6_cleaner_t *c = container_of(w, struct fib6_cleaner_t, w);
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index d2b3cf695af..48c8d7cb902 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -601,7 +601,9 @@ static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info)
 
 int ip6_ins_rt(struct rt6_info *rt)
 {
-	struct nl_info info = {};
+	struct nl_info info = {
+		.nl_net = &init_net,
+	};
 	return __ip6_ins_rt(rt, &info);
 }
 
@@ -1259,7 +1261,9 @@ static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
 
 int ip6_del_rt(struct rt6_info *rt)
 {
-	struct nl_info info = {};
+	struct nl_info info = {
+		.nl_net = &init_net,
+	};
 	return __ip6_del_rt(rt, &info);
 }
 
-- 
cgit v1.2.3


From 6e04d01dfa6fe81d6bcae1e85de695285086cee2 Mon Sep 17 00:00:00 2001
From: "Denis V. Lunev" <den@openvz.org>
Date: Thu, 10 Jan 2008 03:26:50 -0800
Subject: [NETNS]: Show routing information from correct namespace (fib_hash.c)

This is the second part (for the CONFIG_IP_FIB_HASH case) of the patch
#4, where we have created proc files in namespaces.

Now we can dump correct info in them.

Acked-by: Benjamin Thery <benjamin.thery@bull.net>
Acked-by: Daniel Lezcano <dlezcano@fr.ibm.com>
Signed-off-by: Denis V. Lunev <den@openvz.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/fib_hash.c | 17 +++++++++++------
 1 file changed, 11 insertions(+), 6 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/fib_hash.c b/net/ipv4/fib_hash.c
index d20ae2e28ab..a07300404e8 100644
--- a/net/ipv4/fib_hash.c
+++ b/net/ipv4/fib_hash.c
@@ -783,6 +783,7 @@ struct fib_table *fib_hash_init(u32 id)
 #ifdef CONFIG_PROC_FS
 
 struct fib_iter_state {
+	struct seq_net_private p;
 	struct fn_zone	*zone;
 	int		bucket;
 	struct hlist_head *hash_head;
@@ -796,8 +797,11 @@ struct fib_iter_state {
 static struct fib_alias *fib_get_first(struct seq_file *seq)
 {
 	struct fib_iter_state *iter = seq->private;
-	struct fib_table *main_table = fib_get_table(&init_net, RT_TABLE_MAIN);
-	struct fn_hash *table = (struct fn_hash *)main_table->tb_data;
+	struct fib_table *main_table;
+	struct fn_hash *table;
+
+	main_table = fib_get_table(iter->p.net, RT_TABLE_MAIN);
+	table = (struct fn_hash *)main_table->tb_data;
 
 	iter->bucket    = 0;
 	iter->hash_head = NULL;
@@ -934,10 +938,11 @@ static struct fib_alias *fib_get_idx(struct seq_file *seq, loff_t pos)
 static void *fib_seq_start(struct seq_file *seq, loff_t *pos)
 	__acquires(fib_hash_lock)
 {
+	struct fib_iter_state *iter = seq->private;
 	void *v = NULL;
 
 	read_lock(&fib_hash_lock);
-	if (fib_get_table(&init_net, RT_TABLE_MAIN))
+	if (fib_get_table(iter->p.net, RT_TABLE_MAIN))
 		v = *pos ? fib_get_idx(seq, *pos - 1) : SEQ_START_TOKEN;
 	return v;
 }
@@ -1025,8 +1030,8 @@ static const struct seq_operations fib_seq_ops = {
 
 static int fib_seq_open(struct inode *inode, struct file *file)
 {
-	return seq_open_private(file, &fib_seq_ops,
-			sizeof(struct fib_iter_state));
+	return seq_open_net(inode, file, &fib_seq_ops,
+			    sizeof(struct fib_iter_state));
 }
 
 static const struct file_operations fib_seq_fops = {
@@ -1034,7 +1039,7 @@ static const struct file_operations fib_seq_fops = {
 	.open           = fib_seq_open,
 	.read           = seq_read,
 	.llseek         = seq_lseek,
-	.release	= seq_release_private,
+	.release	= seq_release_net,
 };
 
 int __net_init fib_proc_init(struct net *net)
-- 
cgit v1.2.3


From 1c340b2fd73880136c438e6e7978288fbec8273f Mon Sep 17 00:00:00 2001
From: "Denis V. Lunev" <den@openvz.org>
Date: Thu, 10 Jan 2008 03:27:17 -0800
Subject: [NETNS]: Show routing information from correct namespace (fib_trie.c)

This is the second part (for the CONFIG_IP_FIB_TRIE case) of the patch
#4, where we have created proc files in namespaces.

Now we can dump correct info in them.

Acked-by: Benjamin Thery <benjamin.thery@bull.net>
Acked-by: Daniel Lezcano <dlezcano@fr.ibm.com>
Signed-off-by: Denis V. Lunev <den@openvz.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/fib_trie.c | 44 ++++++++++++++++++++++++++++++++------------
 1 file changed, 32 insertions(+), 12 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index fc0624e6a64..aa9deb708dc 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -1995,6 +1995,7 @@ struct fib_table *fib_hash_init(u32 id)
 #ifdef CONFIG_PROC_FS
 /* Depth first Trie walk iterator */
 struct fib_trie_iter {
+	struct seq_net_private p;
 	struct trie *trie_local, *trie_main;
 	struct tnode *tnode;
 	struct trie *trie;
@@ -2162,17 +2163,18 @@ static void trie_show_stats(struct seq_file *seq, struct trie_stat *stat)
 
 static int fib_triestat_seq_show(struct seq_file *seq, void *v)
 {
+	struct net *net = (struct net *)seq->private;
 	struct trie *trie_local, *trie_main;
 	struct trie_stat *stat;
 	struct fib_table *tb;
 
 	trie_local = NULL;
-	tb = fib_get_table(&init_net, RT_TABLE_LOCAL);
+	tb = fib_get_table(net, RT_TABLE_LOCAL);
 	if (tb)
 		trie_local = (struct trie *) tb->tb_data;
 
 	trie_main = NULL;
-	tb = fib_get_table(&init_net, RT_TABLE_MAIN);
+	tb = fib_get_table(net, RT_TABLE_MAIN);
 	if (tb)
 		trie_main = (struct trie *) tb->tb_data;
 
@@ -2202,7 +2204,25 @@ static int fib_triestat_seq_show(struct seq_file *seq, void *v)
 
 static int fib_triestat_seq_open(struct inode *inode, struct file *file)
 {
-	return single_open(file, fib_triestat_seq_show, NULL);
+	int err;
+	struct net *net;
+
+	net = get_proc_net(inode);
+	if (net == NULL)
+		return -ENXIO;
+	err = single_open(file, fib_triestat_seq_show, net);
+	if (err < 0) {
+		put_net(net);
+		return err;
+	}
+	return 0;
+}
+
+static int fib_triestat_seq_release(struct inode *ino, struct file *f)
+{
+	struct seq_file *seq = f->private_data;
+	put_net(seq->private);
+	return single_release(ino, f);
 }
 
 static const struct file_operations fib_triestat_fops = {
@@ -2210,7 +2230,7 @@ static const struct file_operations fib_triestat_fops = {
 	.open	= fib_triestat_seq_open,
 	.read	= seq_read,
 	.llseek	= seq_lseek,
-	.release = single_release,
+	.release = fib_triestat_seq_release,
 };
 
 static struct node *fib_trie_get_idx(struct fib_trie_iter *iter,
@@ -2239,12 +2259,12 @@ static void *fib_trie_seq_start(struct seq_file *seq, loff_t *pos)
 	struct fib_table *tb;
 
 	if (!iter->trie_local) {
-		tb = fib_get_table(&init_net, RT_TABLE_LOCAL);
+		tb = fib_get_table(iter->p.net, RT_TABLE_LOCAL);
 		if (tb)
 			iter->trie_local = (struct trie *) tb->tb_data;
 	}
 	if (!iter->trie_main) {
-		tb = fib_get_table(&init_net, RT_TABLE_MAIN);
+		tb = fib_get_table(iter->p.net, RT_TABLE_MAIN);
 		if (tb)
 			iter->trie_main = (struct trie *) tb->tb_data;
 	}
@@ -2388,8 +2408,8 @@ static const struct seq_operations fib_trie_seq_ops = {
 
 static int fib_trie_seq_open(struct inode *inode, struct file *file)
 {
-	return seq_open_private(file, &fib_trie_seq_ops,
-			sizeof(struct fib_trie_iter));
+	return seq_open_net(inode, file, &fib_trie_seq_ops,
+			    sizeof(struct fib_trie_iter));
 }
 
 static const struct file_operations fib_trie_fops = {
@@ -2397,7 +2417,7 @@ static const struct file_operations fib_trie_fops = {
 	.open   = fib_trie_seq_open,
 	.read   = seq_read,
 	.llseek = seq_lseek,
-	.release = seq_release_private,
+	.release = seq_release_net,
 };
 
 static unsigned fib_flag_trans(int type, __be32 mask, const struct fib_info *fi)
@@ -2492,8 +2512,8 @@ static const struct seq_operations fib_route_seq_ops = {
 
 static int fib_route_seq_open(struct inode *inode, struct file *file)
 {
-	return seq_open_private(file, &fib_route_seq_ops,
-			sizeof(struct fib_trie_iter));
+	return seq_open_net(inode, file, &fib_route_seq_ops,
+			    sizeof(struct fib_trie_iter));
 }
 
 static const struct file_operations fib_route_fops = {
@@ -2501,7 +2521,7 @@ static const struct file_operations fib_route_fops = {
 	.open   = fib_route_seq_open,
 	.read   = seq_read,
 	.llseek = seq_lseek,
-	.release = seq_release_private,
+	.release = seq_release_net,
 };
 
 int __net_init fib_proc_init(struct net *net)
-- 
cgit v1.2.3


From e4e4971c5f8b70daccdd401132a81b723dc8337e Mon Sep 17 00:00:00 2001
From: "Denis V. Lunev" <den@openvz.org>
Date: Thu, 10 Jan 2008 03:27:51 -0800
Subject: [NETNS]: Namespacing IPv4 fib rules.

The final trick for rules: place fib4_rules_ops into struct net and
modify initialization path for this.

Acked-by: Benjamin Thery <benjamin.thery@bull.net>
Acked-by: Daniel Lezcano <dlezcano@fr.ibm.com>
Signed-off-by: Denis V. Lunev <den@openvz.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/fib_rules.c | 44 ++++++++++++++++++++++++--------------------
 1 file changed, 24 insertions(+), 20 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/fib_rules.c b/net/ipv4/fib_rules.c
index 49819fe7e4c..72232ab4ecb 100644
--- a/net/ipv4/fib_rules.c
+++ b/net/ipv4/fib_rules.c
@@ -32,8 +32,6 @@
 #include <net/ip_fib.h>
 #include <net/fib_rules.h>
 
-static struct fib_rules_ops fib4_rules_ops;
-
 struct fib4_rule
 {
 	struct fib_rule		common;
@@ -63,7 +61,7 @@ int fib_lookup(struct flowi *flp, struct fib_result *res)
 	};
 	int err;
 
-	err = fib_rules_lookup(&fib4_rules_ops, flp, 0, &arg);
+	err = fib_rules_lookup(init_net.ipv4.rules_ops, flp, 0, &arg);
 	res->r = arg.rule;
 
 	return err;
@@ -149,6 +147,7 @@ static int fib4_rule_configure(struct fib_rule *rule, struct sk_buff *skb,
 			       struct nlmsghdr *nlh, struct fib_rule_hdr *frh,
 			       struct nlattr **tb)
 {
+	struct net *net = skb->sk->sk_net;
 	int err = -EINVAL;
 	struct fib4_rule *rule4 = (struct fib4_rule *) rule;
 
@@ -159,7 +158,7 @@ static int fib4_rule_configure(struct fib_rule *rule, struct sk_buff *skb,
 		if (rule->action == FR_ACT_TO_TBL) {
 			struct fib_table *table;
 
-			table = fib_empty_table(&init_net);
+			table = fib_empty_table(net);
 			if (table == NULL) {
 				err = -ENOBUFS;
 				goto errout;
@@ -250,9 +249,9 @@ static u32 fib4_rule_default_pref(struct fib_rules_ops *ops)
 	struct list_head *pos;
 	struct fib_rule *rule;
 
-	if (!list_empty(&fib4_rules_ops.rules_list)) {
-		pos = fib4_rules_ops.rules_list.next;
-		if (pos->next != &fib4_rules_ops.rules_list) {
+	if (!list_empty(&ops->rules_list)) {
+		pos = ops->rules_list.next;
+		if (pos->next != &ops->rules_list) {
 			rule = list_entry(pos->next, struct fib_rule, list);
 			if (rule->pref)
 				return rule->pref - 1;
@@ -274,7 +273,7 @@ static void fib4_rule_flush_cache(void)
 	rt_cache_flush(-1);
 }
 
-static struct fib_rules_ops fib4_rules_ops = {
+static struct fib_rules_ops fib4_rules_ops_template = {
 	.family		= AF_INET,
 	.rule_size	= sizeof(struct fib4_rule),
 	.addr_size	= sizeof(u32),
@@ -288,24 +287,20 @@ static struct fib_rules_ops fib4_rules_ops = {
 	.flush_cache	= fib4_rule_flush_cache,
 	.nlgroup	= RTNLGRP_IPV4_RULE,
 	.policy		= fib4_rule_policy,
-	.rules_list	= LIST_HEAD_INIT(fib4_rules_ops.rules_list),
 	.owner		= THIS_MODULE,
 };
 
-static int __init fib_default_rules_init(void)
+static int fib_default_rules_init(struct fib_rules_ops *ops)
 {
 	int err;
 
-	err = fib_default_rule_add(&fib4_rules_ops, 0,
-				   RT_TABLE_LOCAL, FIB_RULE_PERMANENT);
+	err = fib_default_rule_add(ops, 0, RT_TABLE_LOCAL, FIB_RULE_PERMANENT);
 	if (err < 0)
 		return err;
-	err = fib_default_rule_add(&fib4_rules_ops, 0x7FFE,
-				   RT_TABLE_MAIN, 0);
+	err = fib_default_rule_add(ops, 0x7FFE, RT_TABLE_MAIN, 0);
 	if (err < 0)
 		return err;
-	err = fib_default_rule_add(&fib4_rules_ops, 0x7FFF,
-				   RT_TABLE_DEFAULT, 0);
+	err = fib_default_rule_add(ops, 0x7FFF, RT_TABLE_DEFAULT, 0);
 	if (err < 0)
 		return err;
 	return 0;
@@ -314,20 +309,29 @@ static int __init fib_default_rules_init(void)
 int __net_init fib4_rules_init(struct net *net)
 {
 	int err;
+	struct fib_rules_ops *ops;
+
+	ops = kmemdup(&fib4_rules_ops_template, sizeof(*ops), GFP_KERNEL);
+	if (ops == NULL)
+		return -ENOMEM;
+	INIT_LIST_HEAD(&ops->rules_list);
+	fib_rules_register(net, ops);
 
-	fib_rules_register(net, &fib4_rules_ops);
-	err = fib_default_rules_init();
+	err = fib_default_rules_init(ops);
 	if (err < 0)
 		goto fail;
+	net->ipv4.rules_ops = ops;
 	return 0;
 
 fail:
 	/* also cleans all rules already added */
-	fib_rules_unregister(net, &fib4_rules_ops);
+	fib_rules_unregister(net, ops);
+	kfree(ops);
 	return err;
 }
 
 void __net_exit fib4_rules_exit(struct net *net)
 {
-	fib_rules_unregister(net, &fib4_rules_ops);
+	fib_rules_unregister(net, net->ipv4.rules_ops);
+	kfree(net->ipv4.rules_ops);
 }
-- 
cgit v1.2.3


From e4aef8aea31e6fc61b33a57120968a6e9824d138 Mon Sep 17 00:00:00 2001
From: "Denis V. Lunev" <den@openvz.org>
Date: Thu, 10 Jan 2008 03:28:24 -0800
Subject: [NETNS]: Place fib tables into netns.

The preparatory work has been done. All we need is to substitute
fib_table_hash with net->ipv4.fib_table_hash. Netns context is
available when required.

Acked-by: Benjamin Thery <benjamin.thery@bull.net>
Acked-by: Daniel Lezcano <dlezcano@fr.ibm.com>
Signed-off-by: Denis V. Lunev <den@openvz.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/fib_frontend.c | 36 ++++++++++++++++++++++++------------
 1 file changed, 24 insertions(+), 12 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index 90aa05f47f8..7fe54a3c168 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -50,7 +50,6 @@
 #define FFprint(a...) printk(KERN_DEBUG a)
 
 static struct sock *fibnl;
-struct hlist_head fib_table_hash[FIB_TABLE_HASHSZ];
 
 #ifndef CONFIG_IP_MULTIPLE_TABLES
 
@@ -67,9 +66,9 @@ static int __net_init fib4_rules_init(struct net *net)
 		goto fail;
 
 	hlist_add_head_rcu(&local_table->tb_hlist,
-				&fib_table_hash[TABLE_LOCAL_INDEX]);
+				&net->ipv4.fib_table_hash[TABLE_LOCAL_INDEX]);
 	hlist_add_head_rcu(&main_table->tb_hlist,
-				&fib_table_hash[TABLE_MAIN_INDEX]);
+				&net->ipv4.fib_table_hash[TABLE_MAIN_INDEX]);
 	return 0;
 
 fail:
@@ -92,7 +91,7 @@ struct fib_table *fib_new_table(struct net *net, u32 id)
 	if (!tb)
 		return NULL;
 	h = id & (FIB_TABLE_HASHSZ - 1);
-	hlist_add_head_rcu(&tb->tb_hlist, &fib_table_hash[h]);
+	hlist_add_head_rcu(&tb->tb_hlist, &net->ipv4.fib_table_hash[h]);
 	return tb;
 }
 
@@ -100,13 +99,16 @@ struct fib_table *fib_get_table(struct net *net, u32 id)
 {
 	struct fib_table *tb;
 	struct hlist_node *node;
+	struct hlist_head *head;
 	unsigned int h;
 
 	if (id == 0)
 		id = RT_TABLE_MAIN;
 	h = id & (FIB_TABLE_HASHSZ - 1);
+
 	rcu_read_lock();
-	hlist_for_each_entry_rcu(tb, node, &fib_table_hash[h], tb_hlist) {
+	head = &net->ipv4.fib_table_hash[h];
+	hlist_for_each_entry_rcu(tb, node, head, tb_hlist) {
 		if (tb->tb_id == id) {
 			rcu_read_unlock();
 			return tb;
@@ -117,15 +119,17 @@ struct fib_table *fib_get_table(struct net *net, u32 id)
 }
 #endif /* CONFIG_IP_MULTIPLE_TABLES */
 
-static void fib_flush(void)
+static void fib_flush(struct net *net)
 {
 	int flushed = 0;
 	struct fib_table *tb;
 	struct hlist_node *node;
+	struct hlist_head *head;
 	unsigned int h;
 
 	for (h = 0; h < FIB_TABLE_HASHSZ; h++) {
-		hlist_for_each_entry(tb, node, &fib_table_hash[h], tb_hlist)
+		head = &net->ipv4.fib_table_hash[h];
+		hlist_for_each_entry(tb, node, head, tb_hlist)
 			flushed += tb->tb_flush(tb);
 	}
 
@@ -620,6 +624,7 @@ static int inet_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
 	unsigned int e = 0, s_e;
 	struct fib_table *tb;
 	struct hlist_node *node;
+	struct hlist_head *head;
 	int dumped = 0;
 
 	if (net != &init_net)
@@ -634,7 +639,8 @@ static int inet_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
 
 	for (h = s_h; h < FIB_TABLE_HASHSZ; h++, s_e = 0) {
 		e = 0;
-		hlist_for_each_entry(tb, node, &fib_table_hash[h], tb_hlist) {
+		head = &net->ipv4.fib_table_hash[h];
+		hlist_for_each_entry(tb, node, head, tb_hlist) {
 			if (e < s_e)
 				goto next;
 			if (dumped)
@@ -797,7 +803,7 @@ static void fib_del_ifaddr(struct in_ifaddr *ifa)
 			   for stray nexthop entries, then ignite fib_flush.
 			*/
 			if (fib_sync_down(ifa->ifa_local, NULL, 0))
-				fib_flush();
+				fib_flush(&init_net);
 		}
 	}
 #undef LOCAL_OK
@@ -882,7 +888,7 @@ static void nl_fib_lookup_exit(struct net *net)
 static void fib_disable_ip(struct net_device *dev, int force)
 {
 	if (fib_sync_down(0, dev, force))
-		fib_flush();
+		fib_flush(&init_net);
 	rt_cache_flush(0);
 	arp_ifdown(dev);
 }
@@ -963,8 +969,13 @@ static int __net_init ip_fib_net_init(struct net *net)
 {
 	unsigned int i;
 
+	net->ipv4.fib_table_hash = kzalloc(
+			sizeof(struct hlist_head)*FIB_TABLE_HASHSZ, GFP_KERNEL);
+	if (net->ipv4.fib_table_hash == NULL)
+		return -ENOMEM;
+
 	for (i = 0; i < FIB_TABLE_HASHSZ; i++)
-		INIT_HLIST_HEAD(&fib_table_hash[i]);
+		INIT_HLIST_HEAD(&net->ipv4.fib_table_hash[i]);
 
 	return fib4_rules_init(net);
 }
@@ -982,13 +993,14 @@ static void __net_exit ip_fib_net_exit(struct net *net)
 		struct hlist_head *head;
 		struct hlist_node *node, *tmp;
 
-		head = &fib_table_hash[i];
+		head = &net->ipv4.fib_table_hash[i];
 		hlist_for_each_entry_safe(tb, node, tmp, head, tb_hlist) {
 			hlist_del(node);
 			tb->tb_flush(tb);
 			kfree(tb);
 		}
 	}
+	kfree(net->ipv4.fib_table_hash);
 }
 
 static int __net_init fib_net_init(struct net *net)
-- 
cgit v1.2.3


From 6bd48fcf73019219495f7599028296c65b749bb4 Mon Sep 17 00:00:00 2001
From: "Denis V. Lunev" <den@openvz.org>
Date: Thu, 10 Jan 2008 03:28:55 -0800
Subject: [NETNS]: Provide correct namespace for fibnl netlink socket.

This patch makes the netlink socket to be per namespace. That allows
to have each namespace its own socket for routing queries.

Acked-by: Benjamin Thery <benjamin.thery@bull.net>
Acked-by: Daniel Lezcano <dlezcano@fr.ibm.com>
Signed-off-by: Denis V. Lunev <den@openvz.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/fib_frontend.c | 24 ++++++++++++++++--------
 1 file changed, 16 insertions(+), 8 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index 7fe54a3c168..a5e81677a2b 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -49,8 +49,6 @@
 
 #define FFprint(a...) printk(KERN_DEBUG a)
 
-static struct sock *fibnl;
-
 #ifndef CONFIG_IP_MULTIPLE_TABLES
 
 static int __net_init fib4_rules_init(struct net *net)
@@ -845,11 +843,13 @@ static void nl_fib_lookup(struct fib_result_nl *frn, struct fib_table *tb )
 
 static void nl_fib_input(struct sk_buff *skb)
 {
+	struct net *net;
 	struct fib_result_nl *frn;
 	struct nlmsghdr *nlh;
 	struct fib_table *tb;
 	u32 pid;
 
+	net = skb->sk->sk_net;
 	nlh = nlmsg_hdr(skb);
 	if (skb->len < NLMSG_SPACE(0) || skb->len < nlh->nlmsg_len ||
 	    nlh->nlmsg_len < NLMSG_LENGTH(sizeof(*frn)))
@@ -861,28 +861,36 @@ static void nl_fib_input(struct sk_buff *skb)
 	nlh = nlmsg_hdr(skb);
 
 	frn = (struct fib_result_nl *) NLMSG_DATA(nlh);
-	tb = fib_get_table(&init_net, frn->tb_id_in);
+	tb = fib_get_table(net, frn->tb_id_in);
 
 	nl_fib_lookup(frn, tb);
 
 	pid = NETLINK_CB(skb).pid;       /* pid of sending process */
 	NETLINK_CB(skb).pid = 0;         /* from kernel */
 	NETLINK_CB(skb).dst_group = 0;  /* unicast */
-	netlink_unicast(fibnl, skb, pid, MSG_DONTWAIT);
+	netlink_unicast(net->ipv4.fibnl, skb, pid, MSG_DONTWAIT);
 }
 
 static int nl_fib_lookup_init(struct net *net)
 {
-	fibnl = netlink_kernel_create(net, NETLINK_FIB_LOOKUP, 0,
-				      nl_fib_input, NULL, THIS_MODULE);
-	if (fibnl == NULL)
+	struct sock *sk;
+	sk = netlink_kernel_create(net, NETLINK_FIB_LOOKUP, 0,
+				   nl_fib_input, NULL, THIS_MODULE);
+	if (sk == NULL)
 		return -EAFNOSUPPORT;
+	/* Don't hold an extra reference on the namespace */
+	put_net(sk->sk_net);
+	net->ipv4.fibnl = sk;
 	return 0;
 }
 
 static void nl_fib_lookup_exit(struct net *net)
 {
-	sock_put(fibnl);
+	/* At the last minute lie and say this is a socket for the
+	 * initial network namespace. So the socket will  be safe to free.
+	 */
+	net->ipv4.fibnl->sk_net = get_net(&init_net);
+	sock_put(net->ipv4.fibnl);
 }
 
 static void fib_disable_ip(struct net_device *dev, int force)
-- 
cgit v1.2.3


From 4b5d47d4d372287b48a5fac8c497cba5e0618a36 Mon Sep 17 00:00:00 2001
From: "Denis V. Lunev" <den@openvz.org>
Date: Thu, 10 Jan 2008 03:29:23 -0800
Subject: [NETNS]: Correctly fill fib_config data.

Acked-by: Benjamin Thery <benjamin.thery@bull.net>
Acked-by: Daniel Lezcano <dlezcano@fr.ibm.com>
Signed-off-by: Denis V. Lunev <den@openvz.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/fib_frontend.c | 27 ++++++++++++++-------------
 1 file changed, 14 insertions(+), 13 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index a5e81677a2b..8b44e319524 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -305,14 +305,14 @@ static int put_rtax(struct nlattr *mx, int len, int type, u32 value)
 	return len + nla_total_size(4);
 }
 
-static int rtentry_to_fib_config(int cmd, struct rtentry *rt,
+static int rtentry_to_fib_config(struct net *net, int cmd, struct rtentry *rt,
 				 struct fib_config *cfg)
 {
 	__be32 addr;
 	int plen;
 
 	memset(cfg, 0, sizeof(*cfg));
-	cfg->fc_nlinfo.nl_net = &init_net;
+	cfg->fc_nlinfo.nl_net = net;
 
 	if (rt->rt_dst.sa_family != AF_INET)
 		return -EAFNOSUPPORT;
@@ -373,7 +373,7 @@ static int rtentry_to_fib_config(int cmd, struct rtentry *rt,
 		colon = strchr(devname, ':');
 		if (colon)
 			*colon = 0;
-		dev = __dev_get_by_name(&init_net, devname);
+		dev = __dev_get_by_name(net, devname);
 		if (!dev)
 			return -ENODEV;
 		cfg->fc_oif = dev->ifindex;
@@ -396,7 +396,7 @@ static int rtentry_to_fib_config(int cmd, struct rtentry *rt,
 	if (rt->rt_gateway.sa_family == AF_INET && addr) {
 		cfg->fc_gw = addr;
 		if (rt->rt_flags & RTF_GATEWAY &&
-		    inet_addr_type(&init_net, addr) == RTN_UNICAST)
+		    inet_addr_type(net, addr) == RTN_UNICAST)
 			cfg->fc_scope = RT_SCOPE_UNIVERSE;
 	}
 
@@ -453,7 +453,7 @@ int ip_rt_ioctl(unsigned int cmd, void __user *arg)
 			return -EFAULT;
 
 		rtnl_lock();
-		err = rtentry_to_fib_config(cmd, &rt, &cfg);
+		err = rtentry_to_fib_config(&init_net, cmd, &rt, &cfg);
 		if (err == 0) {
 			struct fib_table *tb;
 
@@ -494,8 +494,8 @@ const struct nla_policy rtm_ipv4_policy[RTA_MAX+1] = {
 	[RTA_FLOW]		= { .type = NLA_U32 },
 };
 
-static int rtm_to_fib_config(struct sk_buff *skb, struct nlmsghdr *nlh,
-			     struct fib_config *cfg)
+static int rtm_to_fib_config(struct net *net, struct sk_buff *skb,
+			    struct nlmsghdr *nlh, struct fib_config *cfg)
 {
 	struct nlattr *attr;
 	int err, remaining;
@@ -519,7 +519,7 @@ static int rtm_to_fib_config(struct sk_buff *skb, struct nlmsghdr *nlh,
 
 	cfg->fc_nlinfo.pid = NETLINK_CB(skb).pid;
 	cfg->fc_nlinfo.nlh = nlh;
-	cfg->fc_nlinfo.nl_net = &init_net;
+	cfg->fc_nlinfo.nl_net = net;
 
 	if (cfg->fc_type > RTN_MAX) {
 		err = -EINVAL;
@@ -575,7 +575,7 @@ static int inet_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *ar
 	if (net != &init_net)
 		return -EINVAL;
 
-	err = rtm_to_fib_config(skb, nlh, &cfg);
+	err = rtm_to_fib_config(net, skb, nlh, &cfg);
 	if (err < 0)
 		goto errout;
 
@@ -600,7 +600,7 @@ static int inet_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *ar
 	if (net != &init_net)
 		return -EINVAL;
 
-	err = rtm_to_fib_config(skb, nlh, &cfg);
+	err = rtm_to_fib_config(net, skb, nlh, &cfg);
 	if (err < 0)
 		goto errout;
 
@@ -667,6 +667,7 @@ out:
 
 static void fib_magic(int cmd, int type, __be32 dst, int dst_len, struct in_ifaddr *ifa)
 {
+	struct net *net = ifa->ifa_dev->dev->nd_net;
 	struct fib_table *tb;
 	struct fib_config cfg = {
 		.fc_protocol = RTPROT_KERNEL,
@@ -677,14 +678,14 @@ static void fib_magic(int cmd, int type, __be32 dst, int dst_len, struct in_ifad
 		.fc_oif = ifa->ifa_dev->dev->ifindex,
 		.fc_nlflags = NLM_F_CREATE | NLM_F_APPEND,
 		.fc_nlinfo = {
-			.nl_net = &init_net,
+			.nl_net = net,
 		},
 	};
 
 	if (type == RTN_UNICAST)
-		tb = fib_new_table(&init_net, RT_TABLE_MAIN);
+		tb = fib_new_table(net, RT_TABLE_MAIN);
 	else
-		tb = fib_new_table(&init_net, RT_TABLE_LOCAL);
+		tb = fib_new_table(net, RT_TABLE_LOCAL);
 
 	if (tb == NULL)
 		return;
-- 
cgit v1.2.3


From 1bad118a330d494b23663fce94d4e9d9d5065fa7 Mon Sep 17 00:00:00 2001
From: "Denis V. Lunev" <den@openvz.org>
Date: Thu, 10 Jan 2008 03:29:53 -0800
Subject: [NETNS]: Pass namespace through ip_rt_ioctl.

... up to rtentry_to_fib_config

Acked-by: Benjamin Thery <benjamin.thery@bull.net>
Acked-by: Daniel Lezcano <dlezcano@fr.ibm.com>
Signed-off-by: Denis V. Lunev <den@openvz.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/af_inet.c      | 2 +-
 net/ipv4/fib_frontend.c | 8 ++++----
 net/ipv4/ipconfig.c     | 2 +-
 3 files changed, 6 insertions(+), 6 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index d76f8d8d5eb..bcf8c8a4a3a 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -795,7 +795,7 @@ int inet_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
 		case SIOCADDRT:
 		case SIOCDELRT:
 		case SIOCRTMSG:
-			err = ip_rt_ioctl(cmd, (void __user *)arg);
+			err = ip_rt_ioctl(sk->sk_net, cmd, (void __user *)arg);
 			break;
 		case SIOCDARP:
 		case SIOCGARP:
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index 8b44e319524..15909a9fa9b 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -437,7 +437,7 @@ static int rtentry_to_fib_config(struct net *net, int cmd, struct rtentry *rt,
  *	Handle IP routing ioctl calls. These are used to manipulate the routing tables
  */
 
-int ip_rt_ioctl(unsigned int cmd, void __user *arg)
+int ip_rt_ioctl(struct net *net, unsigned int cmd, void __user *arg)
 {
 	struct fib_config cfg;
 	struct rtentry rt;
@@ -453,18 +453,18 @@ int ip_rt_ioctl(unsigned int cmd, void __user *arg)
 			return -EFAULT;
 
 		rtnl_lock();
-		err = rtentry_to_fib_config(&init_net, cmd, &rt, &cfg);
+		err = rtentry_to_fib_config(net, cmd, &rt, &cfg);
 		if (err == 0) {
 			struct fib_table *tb;
 
 			if (cmd == SIOCDELRT) {
-				tb = fib_get_table(&init_net, cfg.fc_table);
+				tb = fib_get_table(net, cfg.fc_table);
 				if (tb)
 					err = tb->tb_delete(tb, &cfg);
 				else
 					err = -ESRCH;
 			} else {
-				tb = fib_new_table(&init_net, cfg.fc_table);
+				tb = fib_new_table(net, cfg.fc_table);
 				if (tb)
 					err = tb->tb_insert(tb, &cfg);
 				else
diff --git a/net/ipv4/ipconfig.c b/net/ipv4/ipconfig.c
index 4bc3a57c183..a52b5853aaa 100644
--- a/net/ipv4/ipconfig.c
+++ b/net/ipv4/ipconfig.c
@@ -302,7 +302,7 @@ static int __init ic_route_ioctl(unsigned int cmd, struct rtentry *arg)
 
 	mm_segment_t oldfs = get_fs();
 	set_fs(get_ds());
-	res = ip_rt_ioctl(cmd, (void __user *) arg);
+	res = ip_rt_ioctl(&init_net, cmd, (void __user *) arg);
 	set_fs(oldfs);
 	return res;
 }
-- 
cgit v1.2.3


From 226b0b4a51d1cc09928e569b121ca0abe2839169 Mon Sep 17 00:00:00 2001
From: "Denis V. Lunev" <den@openvz.org>
Date: Thu, 10 Jan 2008 03:30:24 -0800
Subject: [NETNS]: Replace init_net with the correct context in fib_frontend.c

Acked-by: Benjamin Thery <benjamin.thery@bull.net>
Acked-by: Daniel Lezcano <dlezcano@fr.ibm.com>
Signed-off-by: Denis V. Lunev <den@openvz.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/fib_frontend.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index 15909a9fa9b..82109f13d57 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -604,7 +604,7 @@ static int inet_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *ar
 	if (err < 0)
 		goto errout;
 
-	tb = fib_new_table(&init_net, cfg.fc_table);
+	tb = fib_new_table(net, cfg.fc_table);
 	if (tb == NULL) {
 		err = -ENOBUFS;
 		goto errout;
@@ -794,7 +794,7 @@ static void fib_del_ifaddr(struct in_ifaddr *ifa)
 		fib_magic(RTM_DELROUTE, RTN_LOCAL, ifa->ifa_local, 32, prim);
 
 		/* Check, that this local address finally disappeared. */
-		if (inet_addr_type(&init_net, ifa->ifa_local) != RTN_LOCAL) {
+		if (inet_addr_type(dev->nd_net, ifa->ifa_local) != RTN_LOCAL) {
 			/* And the last, but not the least thing.
 			   We must flush stray FIB entries.
 
@@ -802,7 +802,7 @@ static void fib_del_ifaddr(struct in_ifaddr *ifa)
 			   for stray nexthop entries, then ignite fib_flush.
 			*/
 			if (fib_sync_down(ifa->ifa_local, NULL, 0))
-				fib_flush(&init_net);
+				fib_flush(dev->nd_net);
 		}
 	}
 #undef LOCAL_OK
@@ -897,7 +897,7 @@ static void nl_fib_lookup_exit(struct net *net)
 static void fib_disable_ip(struct net_device *dev, int force)
 {
 	if (fib_sync_down(0, dev, force))
-		fib_flush(&init_net);
+		fib_flush(dev->nd_net);
 	rt_cache_flush(0);
 	arp_ifdown(dev);
 }
-- 
cgit v1.2.3


From 8cced9eff1d413c28efac9c5ac5a75793c9251cf Mon Sep 17 00:00:00 2001
From: "Denis V. Lunev" <den@openvz.org>
Date: Thu, 10 Jan 2008 03:30:49 -0800
Subject: [NETNS]: Enable routing configuration in non-initial namespace.

I.e. remove the net != &init_net checks from the places, that now can
handle other-than-init net namespace.

Acked-by: Benjamin Thery <benjamin.thery@bull.net>
Acked-by: Daniel Lezcano <dlezcano@fr.ibm.com>
Signed-off-by: Denis V. Lunev <den@openvz.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/fib_frontend.c | 16 ----------------
 1 file changed, 16 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index 82109f13d57..64421c520a0 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -572,9 +572,6 @@ static int inet_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *ar
 	struct fib_table *tb;
 	int err;
 
-	if (net != &init_net)
-		return -EINVAL;
-
 	err = rtm_to_fib_config(net, skb, nlh, &cfg);
 	if (err < 0)
 		goto errout;
@@ -597,9 +594,6 @@ static int inet_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *ar
 	struct fib_table *tb;
 	int err;
 
-	if (net != &init_net)
-		return -EINVAL;
-
 	err = rtm_to_fib_config(net, skb, nlh, &cfg);
 	if (err < 0)
 		goto errout;
@@ -625,9 +619,6 @@ static int inet_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
 	struct hlist_head *head;
 	int dumped = 0;
 
-	if (net != &init_net)
-		return 0;
-
 	if (nlmsg_len(cb->nlh) >= sizeof(struct rtmsg) &&
 	    ((struct rtmsg *) nlmsg_data(cb->nlh))->rtm_flags & RTM_F_CLONED)
 		return ip_rt_dump(skb, cb);
@@ -934,9 +925,6 @@ static int fib_netdev_event(struct notifier_block *this, unsigned long event, vo
 	struct net_device *dev = ptr;
 	struct in_device *in_dev = __in_dev_get_rtnl(dev);
 
-	if (dev->nd_net != &init_net)
-		return NOTIFY_DONE;
-
 	if (event == NETDEV_UNREGISTER) {
 		fib_disable_ip(dev, 2);
 		return NOTIFY_DONE;
@@ -1016,10 +1004,6 @@ static int __net_init fib_net_init(struct net *net)
 {
 	int error;
 
-	error = 0;
-	if (net != &init_net)
-		goto out;
-
 	error = ip_fib_net_init(net);
 	if (error < 0)
 		goto out;
-- 
cgit v1.2.3


From e5d69b9f4a6ce17f0d09595da45e37b870fee5ae Mon Sep 17 00:00:00 2001
From: "Denis V. Lunev" <den@openvz.org>
Date: Thu, 10 Jan 2008 03:51:41 -0800
Subject: [ATM]: Oops reading net/atm/arp

cat /proc/net/atm/arp causes the NULL pointer dereference in the
get_proc_net+0xc/0x3a. This happens as proc_get_net believes that the
parent proc dir entry contains struct net.

Fix this assumption for "net/atm" case.

The problem is introduced by the commit c0097b07abf5f92ab135d024dd41bd2aada1512f
from Eric W. Biederman/Daniel Lezcano.

Signed-off-by: Denis V. Lunev <den@openvz.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/atm/proc.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/atm/proc.c b/net/atm/proc.c
index 5d9d5ffba14..565e75e62ca 100644
--- a/net/atm/proc.c
+++ b/net/atm/proc.c
@@ -476,7 +476,7 @@ static void atm_proc_dirs_remove(void)
 		if (e->dirent)
 			remove_proc_entry(e->name, atm_proc_root);
 	}
-	remove_proc_entry("atm", init_net.proc_net);
+	proc_net_remove(&init_net, "atm");
 }
 
 int __init atm_proc_init(void)
@@ -484,7 +484,7 @@ int __init atm_proc_init(void)
 	static struct atm_proc_entry *e;
 	int ret;
 
-	atm_proc_root = proc_mkdir("atm", init_net.proc_net);
+	atm_proc_root = proc_net_mkdir(&init_net, "atm", init_net.proc_net);
 	if (!atm_proc_root)
 		goto err_out;
 	for (e = atm_proc_ents; e->name; e++) {
-- 
cgit v1.2.3


From ae22120ad846399f6aa19c5b32f8d4c7bd068fd1 Mon Sep 17 00:00:00 2001
From: "Denis V. Lunev" <den@openvz.org>
Date: Thu, 10 Jan 2008 03:52:35 -0800
Subject: [ATM]: Simplify /proc/net/atm/arp opening

The iterator state->ns.neigh_sub_iter initialization is moved from
arp_seq_open to clip_seq_start for convinience. This should not be a
problem as the iterator will be used only after the seq_start
callback.

Signed-off-by: Denis V. Lunev <den@openvz.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/atm/clip.c | 42 +++++-------------------------------------
 1 file changed, 5 insertions(+), 37 deletions(-)

(limited to 'net')

diff --git a/net/atm/clip.c b/net/atm/clip.c
index df7d218a682..45e08620c8c 100644
--- a/net/atm/clip.c
+++ b/net/atm/clip.c
@@ -903,6 +903,8 @@ static void *clip_seq_sub_iter(struct neigh_seq_state *_state,
 
 static void *clip_seq_start(struct seq_file *seq, loff_t * pos)
 {
+	struct clip_seq_state *state = seq->private;
+	state->ns.neigh_sub_iter = clip_seq_sub_iter;
 	return neigh_seq_start(seq, pos, &clip_tbl, NEIGH_SEQ_NEIGH_ONLY);
 }
 
@@ -932,49 +934,15 @@ static const struct seq_operations arp_seq_ops = {
 
 static int arp_seq_open(struct inode *inode, struct file *file)
 {
-	struct clip_seq_state *state;
-	struct seq_file *seq;
-	int rc = -EAGAIN;
-
-	state = kzalloc(sizeof(*state), GFP_KERNEL);
-	if (!state) {
-		rc = -ENOMEM;
-		goto out_kfree;
-	}
-	state->ns.neigh_sub_iter = clip_seq_sub_iter;
-
-	rc = seq_open(file, &arp_seq_ops);
-	if (rc)
-		goto out_kfree;
-
-	seq = file->private_data;
-	seq->private = state;
-	state->ns.net = get_proc_net(inode);
-	if (!state->ns.net) {
-		seq_release_private(inode, file);
-		rc = -ENXIO;
-	}
-out:
-	return rc;
-
-out_kfree:
-	kfree(state);
-	goto out;
-}
-
-static int arp_seq_release(struct inode *inode, struct file *file)
-{
-	struct seq_file *seq = file->private_data;
-	struct clip_seq_state *state = seq->private;
-	put_net(state->ns.net);
-	return seq_release_private(inode, file);
+	return seq_open_net(inode, file, &arp_seq_ops,
+			    sizeof(struct clip_seq_state));
 }
 
 static const struct file_operations arp_seq_fops = {
 	.open		= arp_seq_open,
 	.read		= seq_read,
 	.llseek		= seq_lseek,
-	.release	= arp_seq_release,
+	.release	= seq_release_net,
 	.owner		= THIS_MODULE
 };
 #endif
-- 
cgit v1.2.3


From 4250846146c04ac6f17bf92619ddfef6db2cf34f Mon Sep 17 00:00:00 2001
From: "Denis V. Lunev" <den@openvz.org>
Date: Thu, 10 Jan 2008 03:53:12 -0800
Subject: [NEIGH]: Make /proc/net/arp opening consistent with seq_net_open
 semantics

seq_open_net requires that first field of the seq->private data to be
struct seq_net_private. In reality this is a single pointer to a
struct net for now. The patch makes code consistent.

Signed-off-by: Denis V. Lunev <den@openvz.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/neighbour.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index 802493327a8..19c0dd12fd5 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -2142,7 +2142,7 @@ EXPORT_SYMBOL(__neigh_for_each_release);
 static struct neighbour *neigh_get_first(struct seq_file *seq)
 {
 	struct neigh_seq_state *state = seq->private;
-	struct net *net = state->net;
+	struct net *net = state->p.net;
 	struct neigh_table *tbl = state->tbl;
 	struct neighbour *n = NULL;
 	int bucket = state->bucket;
@@ -2183,7 +2183,7 @@ static struct neighbour *neigh_get_next(struct seq_file *seq,
 					loff_t *pos)
 {
 	struct neigh_seq_state *state = seq->private;
-	struct net *net = state->net;
+	struct net *net = state->p.net;
 	struct neigh_table *tbl = state->tbl;
 
 	if (state->neigh_sub_iter) {
@@ -2243,7 +2243,7 @@ static struct neighbour *neigh_get_idx(struct seq_file *seq, loff_t *pos)
 static struct pneigh_entry *pneigh_get_first(struct seq_file *seq)
 {
 	struct neigh_seq_state *state = seq->private;
-	struct net * net = state->net;
+	struct net * net = state->p.net;
 	struct neigh_table *tbl = state->tbl;
 	struct pneigh_entry *pn = NULL;
 	int bucket = state->bucket;
@@ -2266,7 +2266,7 @@ static struct pneigh_entry *pneigh_get_next(struct seq_file *seq,
 					    loff_t *pos)
 {
 	struct neigh_seq_state *state = seq->private;
-	struct net * net = state->net;
+	struct net * net = state->p.net;
 	struct neigh_table *tbl = state->tbl;
 
 	pn = pn->next;
-- 
cgit v1.2.3


From 408c4768cd0843f43a13a442c76215dd9cadf23d Mon Sep 17 00:00:00 2001
From: Pavel Emelyanov <xemul@openvz.org>
Date: Thu, 10 Jan 2008 17:41:21 -0800
Subject: [NETNS]: Clean out the ipv6-related sysctls creation/destruction

The addrconf sysctls and neigh sysctls are registered and
unregistered always in pairs, so they can be joined into
one (well, two) functions, that accept the struct inet6_dev
and do all the job.

This also get rids of unneeded ifdefs inside the code.

Signed-off-by: Pavel Emelyanov <xemul@openvz.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/addrconf.c | 63 +++++++++++++++++++++++++++++------------------------
 1 file changed, 34 insertions(+), 29 deletions(-)

(limited to 'net')

diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 6a48bb88f46..27b35ddeeab 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -102,7 +102,15 @@
 
 #ifdef CONFIG_SYSCTL
 static void addrconf_sysctl_register(struct inet6_dev *idev);
-static void addrconf_sysctl_unregister(struct ipv6_devconf *p);
+static void addrconf_sysctl_unregister(struct inet6_dev *idev);
+#else
+static inline void addrconf_sysctl_register(struct inet6_dev *idev)
+{
+}
+
+static inline void addrconf_sysctl_unregister(struct inet6_dev *idev)
+{
+}
 #endif
 
 #ifdef CONFIG_IPV6_PRIVACY
@@ -392,13 +400,7 @@ static struct inet6_dev * ipv6_add_dev(struct net_device *dev)
 
 	ipv6_mc_init_dev(ndev);
 	ndev->tstamp = jiffies;
-#ifdef CONFIG_SYSCTL
-	neigh_sysctl_register(dev, ndev->nd_parms, NET_IPV6,
-			      NET_IPV6_NEIGH, "ipv6",
-			      &ndisc_ifinfo_sysctl_change,
-			      NULL);
 	addrconf_sysctl_register(ndev);
-#endif
 	/* protected by rtnl_lock */
 	rcu_assign_pointer(dev->ip6_ptr, ndev);
 
@@ -2391,15 +2393,8 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event,
 	case NETDEV_CHANGENAME:
 		if (idev) {
 			snmp6_unregister_dev(idev);
-#ifdef CONFIG_SYSCTL
-			addrconf_sysctl_unregister(&idev->cnf);
-			neigh_sysctl_unregister(idev->nd_parms);
-			neigh_sysctl_register(dev, idev->nd_parms,
-					      NET_IPV6, NET_IPV6_NEIGH, "ipv6",
-					      &ndisc_ifinfo_sysctl_change,
-					      NULL);
+			addrconf_sysctl_unregister(idev);
 			addrconf_sysctl_register(idev);
-#endif
 			err = snmp6_register_dev(idev);
 			if (err)
 				return notifier_from_errno(err);
@@ -2523,10 +2518,7 @@ static int addrconf_ifdown(struct net_device *dev, int how)
 	/* Shot the device (if unregistered) */
 
 	if (how == 1) {
-#ifdef CONFIG_SYSCTL
-		addrconf_sysctl_unregister(&idev->cnf);
-		neigh_sysctl_unregister(idev->nd_parms);
-#endif
+		addrconf_sysctl_unregister(idev);
 		neigh_parms_release(&nd_tbl, idev->nd_parms);
 		neigh_ifdown(&nd_tbl, dev);
 		in6_dev_put(idev);
@@ -4106,21 +4098,34 @@ out:
 	return;
 }
 
+static void __addrconf_sysctl_unregister(struct ipv6_devconf *p)
+{
+	struct addrconf_sysctl_table *t;
+
+	if (p->sysctl == NULL)
+		return;
+
+	t = p->sysctl;
+	p->sysctl = NULL;
+	unregister_sysctl_table(t->sysctl_header);
+	kfree(t->dev_name);
+	kfree(t);
+}
+
 static void addrconf_sysctl_register(struct inet6_dev *idev)
 {
+	neigh_sysctl_register(idev->dev, idev->nd_parms, NET_IPV6,
+			      NET_IPV6_NEIGH, "ipv6",
+			      &ndisc_ifinfo_sysctl_change,
+			      NULL);
 	__addrconf_sysctl_register(idev->dev->name, idev->dev->ifindex,
 			idev, &idev->cnf);
 }
 
-static void addrconf_sysctl_unregister(struct ipv6_devconf *p)
+static void addrconf_sysctl_unregister(struct inet6_dev *idev)
 {
-	if (p->sysctl) {
-		struct addrconf_sysctl_table *t = p->sysctl;
-		p->sysctl = NULL;
-		unregister_sysctl_table(t->sysctl_header);
-		kfree(t->dev_name);
-		kfree(t);
-	}
+	__addrconf_sysctl_unregister(&idev->cnf);
+	neigh_sysctl_unregister(idev->nd_parms);
 }
 
 
@@ -4232,8 +4237,8 @@ void addrconf_cleanup(void)
 	unregister_netdevice_notifier(&ipv6_dev_notf);
 
 #ifdef CONFIG_SYSCTL
-	addrconf_sysctl_unregister(&ipv6_devconf_dflt);
-	addrconf_sysctl_unregister(&ipv6_devconf);
+	__addrconf_sysctl_unregister(&ipv6_devconf_dflt);
+	__addrconf_sysctl_unregister(&ipv6_devconf);
 #endif
 
 	rtnl_lock();
-- 
cgit v1.2.3


From 9589731220edfebeb6a05c52d0838a99dee20893 Mon Sep 17 00:00:00 2001
From: Pavel Emelyanov <xemul@openvz.org>
Date: Thu, 10 Jan 2008 17:41:45 -0800
Subject: [NETNS]: Make the __addrconf_sysctl_register return an error

This error code will be needed to abort the namespace
creation if needed.

Probably, this is to be checked when a new device is
created (currently it is ignored).

Signed-off-by: Pavel Emelyanov <xemul@openvz.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/addrconf.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 27b35ddeeab..18d43349013 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -4044,7 +4044,7 @@ static struct addrconf_sysctl_table
 	},
 };
 
-static void __addrconf_sysctl_register(char *dev_name, int ctl_name,
+static int __addrconf_sysctl_register(char *dev_name, int ctl_name,
 		struct inet6_dev *idev, struct ipv6_devconf *p)
 {
 	int i;
@@ -4088,14 +4088,14 @@ static void __addrconf_sysctl_register(char *dev_name, int ctl_name,
 		goto free_procname;
 
 	p->sysctl = t;
-	return;
+	return 0;
 
 free_procname:
 	kfree(t->dev_name);
 free:
 	kfree(t);
 out:
-	return;
+	return -ENOBUFS;
 }
 
 static void __addrconf_sysctl_unregister(struct ipv6_devconf *p)
-- 
cgit v1.2.3


From bff16c2f991386883dc81ec969ba15eb270a0c7f Mon Sep 17 00:00:00 2001
From: Pavel Emelyanov <xemul@openvz.org>
Date: Thu, 10 Jan 2008 17:42:13 -0800
Subject: [NETNS]: Make the ctl-tables per-namespace

This includes passing the net to __addrconf_sysctl_register
and saving this on the ctl_table->extra2 to be used in
handlers (those, needing it).

Signed-off-by: Pavel Emelyanov <xemul@openvz.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/addrconf.c | 24 ++++++++++++++----------
 1 file changed, 14 insertions(+), 10 deletions(-)

(limited to 'net')

diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 18d43349013..bde50c68672 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -456,13 +456,13 @@ static void dev_forward_change(struct inet6_dev *idev)
 }
 
 
-static void addrconf_forward_change(void)
+static void addrconf_forward_change(struct net *net)
 {
 	struct net_device *dev;
 	struct inet6_dev *idev;
 
 	read_lock(&dev_base_lock);
-	for_each_netdev(&init_net, dev) {
+	for_each_netdev(net, dev) {
 		rcu_read_lock();
 		idev = __in6_dev_get(dev);
 		if (idev) {
@@ -478,12 +478,15 @@ static void addrconf_forward_change(void)
 
 static void addrconf_fixup_forwarding(struct ctl_table *table, int *p, int old)
 {
+	struct net *net;
+
+	net = (struct net *)table->extra2;
 	if (p == &ipv6_devconf_dflt.forwarding)
 		return;
 
 	if (p == &ipv6_devconf.forwarding) {
 		ipv6_devconf_dflt.forwarding = ipv6_devconf.forwarding;
-		addrconf_forward_change();
+		addrconf_forward_change(net);
 	} else if ((!*p) ^ (!old))
 		dev_forward_change((struct inet6_dev *)table->extra1);
 
@@ -4044,8 +4047,8 @@ static struct addrconf_sysctl_table
 	},
 };
 
-static int __addrconf_sysctl_register(char *dev_name, int ctl_name,
-		struct inet6_dev *idev, struct ipv6_devconf *p)
+static int __addrconf_sysctl_register(struct net *net, char *dev_name,
+		int ctl_name, struct inet6_dev *idev, struct ipv6_devconf *p)
 {
 	int i;
 	struct addrconf_sysctl_table *t;
@@ -4068,6 +4071,7 @@ static int __addrconf_sysctl_register(char *dev_name, int ctl_name,
 	for (i=0; t->addrconf_vars[i].data; i++) {
 		t->addrconf_vars[i].data += (char*)p - (char*)&ipv6_devconf;
 		t->addrconf_vars[i].extra1 = idev; /* embedded; no ref */
+		t->addrconf_vars[i].extra2 = net;
 	}
 
 	/*
@@ -4082,7 +4086,7 @@ static int __addrconf_sysctl_register(char *dev_name, int ctl_name,
 	addrconf_ctl_path[ADDRCONF_CTL_PATH_DEV].procname = t->dev_name;
 	addrconf_ctl_path[ADDRCONF_CTL_PATH_DEV].ctl_name = ctl_name;
 
-	t->sysctl_header = register_sysctl_paths(addrconf_ctl_path,
+	t->sysctl_header = register_net_sysctl_table(net, addrconf_ctl_path,
 			t->addrconf_vars);
 	if (t->sysctl_header == NULL)
 		goto free_procname;
@@ -4118,8 +4122,8 @@ static void addrconf_sysctl_register(struct inet6_dev *idev)
 			      NET_IPV6_NEIGH, "ipv6",
 			      &ndisc_ifinfo_sysctl_change,
 			      NULL);
-	__addrconf_sysctl_register(idev->dev->name, idev->dev->ifindex,
-			idev, &idev->cnf);
+	__addrconf_sysctl_register(idev->dev->nd_net, idev->dev->name,
+			idev->dev->ifindex, idev, &idev->cnf);
 }
 
 static void addrconf_sysctl_unregister(struct inet6_dev *idev)
@@ -4215,9 +4219,9 @@ int __init addrconf_init(void)
 	ipv6_addr_label_rtnl_register();
 
 #ifdef CONFIG_SYSCTL
-	__addrconf_sysctl_register("all", NET_PROTO_CONF_ALL,
+	__addrconf_sysctl_register(&init_net, "all", NET_PROTO_CONF_ALL,
 			NULL, &ipv6_devconf);
-	__addrconf_sysctl_register("default", NET_PROTO_CONF_DEFAULT,
+	__addrconf_sysctl_register(&init_net, "default", NET_PROTO_CONF_DEFAULT,
 			NULL, &ipv6_devconf_dflt);
 #endif
 
-- 
cgit v1.2.3


From e0da5a480cafc7ca228d6b5a05dbd77344a6bd29 Mon Sep 17 00:00:00 2001
From: Pavel Emelyanov <xemul@openvz.org>
Date: Thu, 10 Jan 2008 17:42:55 -0800
Subject: [NETNS]: Create ipv6 devconf-s for namespaces

This is the core. Declare and register the pernet subsys for
addrconf. The init callback the will create the devconf-s.

The init_net will reuse the existing statically declared confs,
so that accessing them from inside the ipv6 code will still
work.

The register_pernet_subsys() is moved above the ipv6_add_dev()
call for loopback, because this function will need the
net->devconf_dflt pointer to be already set.

Signed-off-by: Pavel Emelyanov <xemul@openvz.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/addrconf.c | 82 +++++++++++++++++++++++++++++++++++++++++++++--------
 1 file changed, 70 insertions(+), 12 deletions(-)

(limited to 'net')

diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index bde50c68672..3ad081e9366 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -4135,6 +4135,70 @@ static void addrconf_sysctl_unregister(struct inet6_dev *idev)
 
 #endif
 
+static int addrconf_init_net(struct net *net)
+{
+	int err;
+	struct ipv6_devconf *all, *dflt;
+
+	err = -ENOMEM;
+	all = &ipv6_devconf;
+	dflt = &ipv6_devconf_dflt;
+
+	if (net != &init_net) {
+		all = kmemdup(all, sizeof(ipv6_devconf), GFP_KERNEL);
+		if (all == NULL)
+			goto err_alloc_all;
+
+		dflt = kmemdup(dflt, sizeof(ipv6_devconf_dflt), GFP_KERNEL);
+		if (dflt == NULL)
+			goto err_alloc_dflt;
+	}
+
+	net->ipv6.devconf_all = all;
+	net->ipv6.devconf_dflt = dflt;
+
+#ifdef CONFIG_SYSCTL
+	err = __addrconf_sysctl_register(net, "all", NET_PROTO_CONF_ALL,
+			NULL, all);
+	if (err < 0)
+		goto err_reg_all;
+
+	err = __addrconf_sysctl_register(net, "default", NET_PROTO_CONF_DEFAULT,
+			NULL, dflt);
+	if (err < 0)
+		goto err_reg_dflt;
+#endif
+	return 0;
+
+#ifdef CONFIG_SYSCTL
+err_reg_dflt:
+	__addrconf_sysctl_unregister(all);
+err_reg_all:
+	kfree(dflt);
+#endif
+err_alloc_dflt:
+	kfree(all);
+err_alloc_all:
+	return err;
+}
+
+static void addrconf_exit_net(struct net *net)
+{
+#ifdef CONFIG_SYSCTL
+	__addrconf_sysctl_unregister(net->ipv6.devconf_dflt);
+	__addrconf_sysctl_unregister(net->ipv6.devconf_all);
+#endif
+	if (net != &init_net) {
+		kfree(net->ipv6.devconf_dflt);
+		kfree(net->ipv6.devconf_all);
+	}
+}
+
+static struct pernet_operations addrconf_ops = {
+	.init = addrconf_init_net,
+	.exit = addrconf_exit_net,
+};
+
 /*
  *      Device notifier
  */
@@ -4167,6 +4231,8 @@ int __init addrconf_init(void)
 		return err;
 	}
 
+	register_pernet_subsys(&addrconf_ops);
+
 	/* The addrconf netdev notifier requires that loopback_dev
 	 * has it's ipv6 private information allocated and setup
 	 * before it can bring up and give link-local addresses
@@ -4190,7 +4256,7 @@ int __init addrconf_init(void)
 		err = -ENOMEM;
 	rtnl_unlock();
 	if (err)
-		return err;
+		goto errlo;
 
 	ip6_null_entry.u.dst.dev = init_net.loopback_dev;
 	ip6_null_entry.rt6i_idev = in6_dev_get(init_net.loopback_dev);
@@ -4218,16 +4284,11 @@ int __init addrconf_init(void)
 
 	ipv6_addr_label_rtnl_register();
 
-#ifdef CONFIG_SYSCTL
-	__addrconf_sysctl_register(&init_net, "all", NET_PROTO_CONF_ALL,
-			NULL, &ipv6_devconf);
-	__addrconf_sysctl_register(&init_net, "default", NET_PROTO_CONF_DEFAULT,
-			NULL, &ipv6_devconf_dflt);
-#endif
-
 	return 0;
 errout:
 	unregister_netdevice_notifier(&ipv6_dev_notf);
+errlo:
+	unregister_pernet_subsys(&addrconf_ops);
 
 	return err;
 }
@@ -4240,10 +4301,7 @@ void addrconf_cleanup(void)
 
 	unregister_netdevice_notifier(&ipv6_dev_notf);
 
-#ifdef CONFIG_SYSCTL
-	__addrconf_sysctl_unregister(&ipv6_devconf_dflt);
-	__addrconf_sysctl_unregister(&ipv6_devconf);
-#endif
+	unregister_pernet_subsys(&addrconf_ops);
 
 	rtnl_lock();
 
-- 
cgit v1.2.3


From 441fc2a2393a9b9ffbacb97f4427cce743579411 Mon Sep 17 00:00:00 2001
From: Pavel Emelyanov <xemul@openvz.org>
Date: Thu, 10 Jan 2008 17:43:22 -0800
Subject: [NETNS]: Use the per-net ipv6_devconf_dflt

All its users are in net/ipv6/addrconf.c's sysctl handlers.
Since they already have the struct net to get from, the
per-net ipv6_devconf_dflt can already be used.

Signed-off-by: Pavel Emelyanov <xemul@openvz.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/addrconf.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 3ad081e9366..9b96de3ba5e 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -334,7 +334,7 @@ static struct inet6_dev * ipv6_add_dev(struct net_device *dev)
 
 	rwlock_init(&ndev->lock);
 	ndev->dev = dev;
-	memcpy(&ndev->cnf, &ipv6_devconf_dflt, sizeof(ndev->cnf));
+	memcpy(&ndev->cnf, dev->nd_net->ipv6.devconf_dflt, sizeof(ndev->cnf));
 	ndev->cnf.mtu6 = dev->mtu;
 	ndev->cnf.sysctl = NULL;
 	ndev->nd_parms = neigh_parms_alloc(dev, &nd_tbl);
@@ -481,11 +481,11 @@ static void addrconf_fixup_forwarding(struct ctl_table *table, int *p, int old)
 	struct net *net;
 
 	net = (struct net *)table->extra2;
-	if (p == &ipv6_devconf_dflt.forwarding)
+	if (p == &net->ipv6.devconf_dflt->forwarding)
 		return;
 
 	if (p == &ipv6_devconf.forwarding) {
-		ipv6_devconf_dflt.forwarding = ipv6_devconf.forwarding;
+		net->ipv6.devconf_dflt->forwarding = ipv6_devconf.forwarding;
 		addrconf_forward_change(net);
 	} else if ((!*p) ^ (!old))
 		dev_forward_change((struct inet6_dev *)table->extra1);
-- 
cgit v1.2.3


From e186932b3d26bd975022a1e254407e95dddceae7 Mon Sep 17 00:00:00 2001
From: Pavel Emelyanov <xemul@openvz.org>
Date: Thu, 10 Jan 2008 17:43:50 -0800
Subject: [NETNS]: Use the per-net ipv6_devconf(_all) in sysctl handlers

Actually the net->ipv6.devconf_all can be used in a few places,
but to keep the /proc/sys/net/ipv6/conf/ sysctls work consistently
in the namespace we should use the per-net devconf_all in the
sysctl "forwarding" handler.

Signed-off-by: Pavel Emelyanov <xemul@openvz.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/addrconf.c | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

(limited to 'net')

diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 9b96de3ba5e..cd90f9a6da3 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -456,7 +456,7 @@ static void dev_forward_change(struct inet6_dev *idev)
 }
 
 
-static void addrconf_forward_change(struct net *net)
+static void addrconf_forward_change(struct net *net, __s32 newf)
 {
 	struct net_device *dev;
 	struct inet6_dev *idev;
@@ -466,8 +466,8 @@ static void addrconf_forward_change(struct net *net)
 		rcu_read_lock();
 		idev = __in6_dev_get(dev);
 		if (idev) {
-			int changed = (!idev->cnf.forwarding) ^ (!ipv6_devconf.forwarding);
-			idev->cnf.forwarding = ipv6_devconf.forwarding;
+			int changed = (!idev->cnf.forwarding) ^ (!newf);
+			idev->cnf.forwarding = newf;
 			if (changed)
 				dev_forward_change(idev);
 		}
@@ -484,9 +484,10 @@ static void addrconf_fixup_forwarding(struct ctl_table *table, int *p, int old)
 	if (p == &net->ipv6.devconf_dflt->forwarding)
 		return;
 
-	if (p == &ipv6_devconf.forwarding) {
-		net->ipv6.devconf_dflt->forwarding = ipv6_devconf.forwarding;
-		addrconf_forward_change(net);
+	if (p == &net->ipv6.devconf_all->forwarding) {
+		__s32 newf = net->ipv6.devconf_all->forwarding;
+		net->ipv6.devconf_dflt->forwarding = newf;
+		addrconf_forward_change(net, newf);
 	} else if ((!*p) ^ (!old))
 		dev_forward_change((struct inet6_dev *)table->extra1);
 
-- 
cgit v1.2.3


From 9993e7d313e80bdc005d09c7def91903e0068f07 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Thu, 10 Jan 2008 21:56:38 -0800
Subject: [TCP]: Do not purge sk_forward_alloc entirely in tcp_delack_timer().

Otherwise we beat heavily on the global tcp_memory atomics
when all of the sockets in the system are slowly sending
perioding packet clumps.

Noticed and suggested by Eric Dumazet.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp_timer.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c
index 17931be6d58..803d758a2b1 100644
--- a/net/ipv4/tcp_timer.c
+++ b/net/ipv4/tcp_timer.c
@@ -186,7 +186,7 @@ static void tcp_delack_timer(unsigned long data)
 		goto out_unlock;
 	}
 
-	sk_mem_reclaim(sk);
+	sk_mem_reclaim_partial(sk);
 
 	if (sk->sk_state == TCP_CLOSE || !(icsk->icsk_ack.pending & ICSK_ACK_TIMER))
 		goto out;
-- 
cgit v1.2.3


From 3c40090a0f5b69deecc5ca615f994957f949333d Mon Sep 17 00:00:00 2001
From: Daniel Lezcano <dlezcano@fr.ibm.com>
Date: Thu, 10 Jan 2008 22:42:49 -0800
Subject: [NETNS][IPV6]: inet6_addr - isolate inet6 addresses from proc file

Make /proc/net/if_inet6 show only inet6 addresses belonging to the
namespace.

Signed-off-by: Daniel Lezcano <dlezcano@fr.ibm.com>
Signed-off-by: Benjamin Thery <benjamin.thery@bull.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/addrconf.c | 41 +++++++++++++++++++++++++++++++++++------
 1 file changed, 35 insertions(+), 6 deletions(-)

(limited to 'net')

diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index cd90f9a6da3..d7b440343e9 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -2736,6 +2736,7 @@ static void addrconf_dad_run(struct inet6_dev *idev) {
 
 #ifdef CONFIG_PROC_FS
 struct if6_iter_state {
+	struct seq_net_private p;
 	int bucket;
 };
 
@@ -2743,9 +2744,13 @@ static struct inet6_ifaddr *if6_get_first(struct seq_file *seq)
 {
 	struct inet6_ifaddr *ifa = NULL;
 	struct if6_iter_state *state = seq->private;
+	struct net *net = state->p.net;
 
 	for (state->bucket = 0; state->bucket < IN6_ADDR_HSIZE; ++state->bucket) {
 		ifa = inet6_addr_lst[state->bucket];
+
+		while (ifa && ifa->idev->dev->nd_net != net)
+			ifa = ifa->lst_next;
 		if (ifa)
 			break;
 	}
@@ -2755,13 +2760,22 @@ static struct inet6_ifaddr *if6_get_first(struct seq_file *seq)
 static struct inet6_ifaddr *if6_get_next(struct seq_file *seq, struct inet6_ifaddr *ifa)
 {
 	struct if6_iter_state *state = seq->private;
+	struct net *net = state->p.net;
 
 	ifa = ifa->lst_next;
 try_again:
+	if (ifa) {
+		if (ifa->idev->dev->nd_net != net) {
+			ifa = ifa->lst_next;
+			goto try_again;
+		}
+	}
+
 	if (!ifa && ++state->bucket < IN6_ADDR_HSIZE) {
 		ifa = inet6_addr_lst[state->bucket];
 		goto try_again;
 	}
+
 	return ifa;
 }
 
@@ -2818,8 +2832,8 @@ static const struct seq_operations if6_seq_ops = {
 
 static int if6_seq_open(struct inode *inode, struct file *file)
 {
-	return seq_open_private(file, &if6_seq_ops,
-			sizeof(struct if6_iter_state));
+	return seq_open_net(inode, file, &if6_seq_ops,
+			    sizeof(struct if6_iter_state));
 }
 
 static const struct file_operations if6_fops = {
@@ -2827,19 +2841,34 @@ static const struct file_operations if6_fops = {
 	.open		= if6_seq_open,
 	.read		= seq_read,
 	.llseek		= seq_lseek,
-	.release	= seq_release_private,
+	.release	= seq_release_net,
 };
 
-int __init if6_proc_init(void)
+static int if6_proc_net_init(struct net *net)
 {
-	if (!proc_net_fops_create(&init_net, "if_inet6", S_IRUGO, &if6_fops))
+	if (!proc_net_fops_create(net, "if_inet6", S_IRUGO, &if6_fops))
 		return -ENOMEM;
 	return 0;
 }
 
+static void if6_proc_net_exit(struct net *net)
+{
+       proc_net_remove(net, "if_inet6");
+}
+
+static struct pernet_operations if6_proc_net_ops = {
+       .init = if6_proc_net_init,
+       .exit = if6_proc_net_exit,
+};
+
+int __init if6_proc_init(void)
+{
+	return register_pernet_subsys(&if6_proc_net_ops);
+}
+
 void if6_proc_exit(void)
 {
-	proc_net_remove(&init_net, "if_inet6");
+	unregister_pernet_subsys(&if6_proc_net_ops);
 }
 #endif	/* CONFIG_PROC_FS */
 
-- 
cgit v1.2.3


From bfeade087005278fc8cafe230b7658a4f40c5acb Mon Sep 17 00:00:00 2001
From: Daniel Lezcano <dlezcano@fr.ibm.com>
Date: Thu, 10 Jan 2008 22:43:18 -0800
Subject: [NETNS][IPV6]: inet6_addr - check ipv6 address per namespace

When a new address is added, we must check if the new address does not
already exists.  This patch makes this check to be aware of a network
namespace, so the check will look if the address already exists for
the specified network namespace. While the addresses are browsed, the
addresses which do not belong to the namespace are discarded.

Signed-off-by: Daniel Lezcano <dlezcano@fr.ibm.com>
Signed-off-by: Benjamin Thery <benjamin.thery@bull.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/addrconf.c   | 6 ++++--
 net/ipv6/af_inet6.c   | 3 ++-
 net/ipv6/anycast.c    | 2 +-
 net/ipv6/datagram.c   | 3 ++-
 net/ipv6/icmp.c       | 2 +-
 net/ipv6/ip6_tunnel.c | 8 ++++----
 net/ipv6/ndisc.c      | 2 +-
 net/ipv6/raw.c        | 3 ++-
 net/sctp/ipv6.c       | 5 +++--
 9 files changed, 20 insertions(+), 14 deletions(-)

(limited to 'net')

diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index d7b440343e9..f35c3df410d 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -1206,13 +1206,16 @@ static int ipv6_count_addresses(struct inet6_dev *idev)
 	return cnt;
 }
 
-int ipv6_chk_addr(struct in6_addr *addr, struct net_device *dev, int strict)
+int ipv6_chk_addr(struct net *net, struct in6_addr *addr,
+		  struct net_device *dev, int strict)
 {
 	struct inet6_ifaddr * ifp;
 	u8 hash = ipv6_addr_hash(addr);
 
 	read_lock_bh(&addrconf_hash_lock);
 	for(ifp = inet6_addr_lst[hash]; ifp; ifp=ifp->lst_next) {
+		if (ifp->idev->dev->nd_net != net)
+			continue;
 		if (ipv6_addr_equal(&ifp->addr, addr) &&
 		    !(ifp->flags&IFA_F_TENTATIVE)) {
 			if (dev == NULL || ifp->idev->dev == dev ||
@@ -1223,7 +1226,6 @@ int ipv6_chk_addr(struct in6_addr *addr, struct net_device *dev, int strict)
 	read_unlock_bh(&addrconf_hash_lock);
 	return ifp != NULL;
 }
-
 EXPORT_SYMBOL(ipv6_chk_addr);
 
 static
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index ac8772dd968..3150c4be3c0 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -314,7 +314,8 @@ int inet6_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
 			 */
 			v4addr = LOOPBACK4_IPV6;
 			if (!(addr_type & IPV6_ADDR_MULTICAST))	{
-				if (!ipv6_chk_addr(&addr->sin6_addr, dev, 0)) {
+				if (!ipv6_chk_addr(&init_net, &addr->sin6_addr,
+						   dev, 0)) {
 					if (dev)
 						dev_put(dev);
 					err = -EADDRNOTAVAIL;
diff --git a/net/ipv6/anycast.c b/net/ipv6/anycast.c
index 5c4190060e7..9c7f83fbc3a 100644
--- a/net/ipv6/anycast.c
+++ b/net/ipv6/anycast.c
@@ -89,7 +89,7 @@ int ipv6_sock_ac_join(struct sock *sk, int ifindex, struct in6_addr *addr)
 		return -EPERM;
 	if (ipv6_addr_is_multicast(addr))
 		return -EINVAL;
-	if (ipv6_chk_addr(addr, NULL, 0))
+	if (ipv6_chk_addr(&init_net, addr, NULL, 0))
 		return -EINVAL;
 
 	pac = sock_kmalloc(sk, sizeof(struct ipv6_ac_socklist), GFP_KERNEL);
diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c
index f49a06aa97d..94fa6ae77cf 100644
--- a/net/ipv6/datagram.c
+++ b/net/ipv6/datagram.c
@@ -549,7 +549,8 @@ int datagram_send_ctl(struct msghdr *msg, struct flowi *fl,
 						return -ENODEV;
 				}
 			}
-			if (!ipv6_chk_addr(&src_info->ipi6_addr, dev, 0)) {
+			if (!ipv6_chk_addr(&init_net, &src_info->ipi6_addr,
+					   dev, 0)) {
 				if (dev)
 					dev_put(dev);
 				err = -EINVAL;
diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c
index 5395afe55ca..cbb5b9cf84a 100644
--- a/net/ipv6/icmp.c
+++ b/net/ipv6/icmp.c
@@ -332,7 +332,7 @@ void icmpv6_send(struct sk_buff *skb, int type, int code, __u32 info,
 	 */
 	addr_type = ipv6_addr_type(&hdr->daddr);
 
-	if (ipv6_chk_addr(&hdr->daddr, skb->dev, 0))
+	if (ipv6_chk_addr(&init_net, &hdr->daddr, skb->dev, 0))
 		saddr = &hdr->daddr;
 
 	/*
diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c
index 29b5321e39c..425c9ae8b31 100644
--- a/net/ipv6/ip6_tunnel.c
+++ b/net/ipv6/ip6_tunnel.c
@@ -653,8 +653,8 @@ static inline int ip6_tnl_rcv_ctl(struct ip6_tnl *t)
 			ldev = dev_get_by_index(&init_net, p->link);
 
 		if ((ipv6_addr_is_multicast(&p->laddr) ||
-		     likely(ipv6_chk_addr(&p->laddr, ldev, 0))) &&
-		    likely(!ipv6_chk_addr(&p->raddr, NULL, 0)))
+		     likely(ipv6_chk_addr(&init_net, &p->laddr, ldev, 0))) &&
+		    likely(!ipv6_chk_addr(&init_net, &p->raddr, NULL, 0)))
 			ret = 1;
 
 		if (ldev)
@@ -788,12 +788,12 @@ static inline int ip6_tnl_xmit_ctl(struct ip6_tnl *t)
 		if (p->link)
 			ldev = dev_get_by_index(&init_net, p->link);
 
-		if (unlikely(!ipv6_chk_addr(&p->laddr, ldev, 0)))
+		if (unlikely(!ipv6_chk_addr(&init_net, &p->laddr, ldev, 0)))
 			printk(KERN_WARNING
 			       "%s xmit: Local address not yet configured!\n",
 			       p->name);
 		else if (!ipv6_addr_is_multicast(&p->raddr) &&
-			 unlikely(ipv6_chk_addr(&p->raddr, NULL, 0)))
+			 unlikely(ipv6_chk_addr(&init_net, &p->raddr, NULL, 0)))
 			printk(KERN_WARNING
 			       "%s xmit: Routing loop! "
 			       "Remote address found on this node!\n",
diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c
index b66a1f81bd8..e217d3ff00f 100644
--- a/net/ipv6/ndisc.c
+++ b/net/ipv6/ndisc.c
@@ -653,7 +653,7 @@ static void ndisc_solicit(struct neighbour *neigh, struct sk_buff *skb)
 	struct in6_addr *target = (struct in6_addr *)&neigh->primary_key;
 	int probes = atomic_read(&neigh->probes);
 
-	if (skb && ipv6_chk_addr(&ipv6_hdr(skb)->saddr, dev, 1))
+	if (skb && ipv6_chk_addr(&init_net, &ipv6_hdr(skb)->saddr, dev, 1))
 		saddr = &ipv6_hdr(skb)->saddr;
 
 	if ((probes -= neigh->parms->ucast_probes) < 0) {
diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c
index 45a580e843d..cb0b110a2ac 100644
--- a/net/ipv6/raw.c
+++ b/net/ipv6/raw.c
@@ -298,7 +298,8 @@ static int rawv6_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len)
 		v4addr = LOOPBACK4_IPV6;
 		if (!(addr_type & IPV6_ADDR_MULTICAST))	{
 			err = -EADDRNOTAVAIL;
-			if (!ipv6_chk_addr(&addr->sin6_addr, dev, 0)) {
+			if (!ipv6_chk_addr(&init_net, &addr->sin6_addr,
+					   dev, 0)) {
 				if (dev)
 					dev_put(dev);
 				goto out;
diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c
index bd04aed673c..74f106a7a7e 100644
--- a/net/sctp/ipv6.c
+++ b/net/sctp/ipv6.c
@@ -556,7 +556,7 @@ static int sctp_v6_available(union sctp_addr *addr, struct sctp_sock *sp)
 	if (!(type & IPV6_ADDR_UNICAST))
 		return 0;
 
-	return ipv6_chk_addr(in6, NULL, 0);
+	return ipv6_chk_addr(&init_net, in6, NULL, 0);
 }
 
 /* This function checks if the address is a valid address to be used for
@@ -858,7 +858,8 @@ static int sctp_inet6_bind_verify(struct sctp_sock *opt, union sctp_addr *addr)
 			dev = dev_get_by_index(&init_net, addr->v6.sin6_scope_id);
 			if (!dev)
 				return 0;
-			if (!ipv6_chk_addr(&addr->v6.sin6_addr, dev, 0)) {
+			if (!ipv6_chk_addr(&init_net, &addr->v6.sin6_addr,
+					   dev, 0)) {
 				dev_put(dev);
 				return 0;
 			}
-- 
cgit v1.2.3


From 06bfe655e7db7719c0eb51eb420fb9c2a6aa1e00 Mon Sep 17 00:00:00 2001
From: Daniel Lezcano <dlezcano@fr.ibm.com>
Date: Thu, 10 Jan 2008 22:43:42 -0800
Subject: [NETNS][IPV6]: inet6_addr - ipv6_chk_same_addr namespace aware

This patch makes ipv6_chk_same_addr function to be aware of the
network namespace. The addresses not belonging to the network
namespace are discarded.

Signed-off-by: Daniel Lezcano <dlezcano@fr.ibm.com>
Signed-off-by: Benjamin Thery <benjamin.thery@bull.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/addrconf.c | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index f35c3df410d..41cc31ee297 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -149,7 +149,8 @@ static void ipv6_ifa_notify(int event, struct inet6_ifaddr *ifa);
 
 static void inet6_prefix_notify(int event, struct inet6_dev *idev,
 				struct prefix_info *pinfo);
-static int ipv6_chk_same_addr(const struct in6_addr *addr, struct net_device *dev);
+static int ipv6_chk_same_addr(struct net *net, const struct in6_addr *addr,
+			      struct net_device *dev);
 
 static ATOMIC_NOTIFIER_HEAD(inet6addr_chain);
 
@@ -560,7 +561,7 @@ ipv6_add_addr(struct inet6_dev *idev, const struct in6_addr *addr, int pfxlen,
 	write_lock(&addrconf_hash_lock);
 
 	/* Ignore adding duplicate addresses on an interface */
-	if (ipv6_chk_same_addr(addr, idev->dev)) {
+	if (ipv6_chk_same_addr(&init_net, addr, idev->dev)) {
 		ADBG(("ipv6_add_addr: already assigned\n"));
 		err = -EEXIST;
 		goto out;
@@ -1229,12 +1230,15 @@ int ipv6_chk_addr(struct net *net, struct in6_addr *addr,
 EXPORT_SYMBOL(ipv6_chk_addr);
 
 static
-int ipv6_chk_same_addr(const struct in6_addr *addr, struct net_device *dev)
+int ipv6_chk_same_addr(struct net *net, const struct in6_addr *addr,
+		       struct net_device *dev)
 {
 	struct inet6_ifaddr * ifp;
 	u8 hash = ipv6_addr_hash(addr);
 
 	for(ifp = inet6_addr_lst[hash]; ifp; ifp=ifp->lst_next) {
+		if (ifp->idev->dev->nd_net != net)
+			continue;
 		if (ipv6_addr_equal(&ifp->addr, addr)) {
 			if (dev == NULL || ifp->idev->dev == dev)
 				break;
-- 
cgit v1.2.3


From 1cab3da6be6c7659f62d0d297b389cc0e48b2178 Mon Sep 17 00:00:00 2001
From: Daniel Lezcano <dlezcano@fr.ibm.com>
Date: Thu, 10 Jan 2008 22:44:09 -0800
Subject: [NETNS][IPV6]: inet6_addr - ipv6_get_ifaddr namespace aware

The inet6_addr_lst is browsed taking into account the network
namespace specified as parameter. If an address does not belong
to the specified namespace, it is ignored.

Signed-off-by: Daniel Lezcano <dlezcano@fr.ibm.com>
Signed-off-by: Benjamin Thery <benjamin.thery@bull.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/addrconf.c   | 11 +++++++----
 net/ipv6/ip6_output.c |  3 ++-
 net/ipv6/ndisc.c      |  9 +++++----
 3 files changed, 14 insertions(+), 9 deletions(-)

(limited to 'net')

diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 41cc31ee297..c4df6cdff65 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -1247,13 +1247,16 @@ int ipv6_chk_same_addr(struct net *net, const struct in6_addr *addr,
 	return ifp != NULL;
 }
 
-struct inet6_ifaddr * ipv6_get_ifaddr(struct in6_addr *addr, struct net_device *dev, int strict)
+struct inet6_ifaddr *ipv6_get_ifaddr(struct net *net, struct in6_addr *addr,
+				     struct net_device *dev, int strict)
 {
 	struct inet6_ifaddr * ifp;
 	u8 hash = ipv6_addr_hash(addr);
 
 	read_lock_bh(&addrconf_hash_lock);
 	for(ifp = inet6_addr_lst[hash]; ifp; ifp=ifp->lst_next) {
+		if (ifp->idev->dev->nd_net != net)
+			continue;
 		if (ipv6_addr_equal(&ifp->addr, addr)) {
 			if (dev == NULL || ifp->idev->dev == dev ||
 			    !(ifp->scope&(IFA_LINK|IFA_HOST) || strict)) {
@@ -1739,7 +1742,7 @@ void addrconf_prefix_rcv(struct net_device *dev, u8 *opt, int len)
 
 ok:
 
-		ifp = ipv6_get_ifaddr(&addr, dev, 1);
+		ifp = ipv6_get_ifaddr(&init_net, &addr, dev, 1);
 
 		if (ifp == NULL && valid_lft) {
 			int max_addresses = in6_dev->cnf.max_addresses;
@@ -3135,7 +3138,7 @@ inet6_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
 	/* We ignore other flags so far. */
 	ifa_flags = ifm->ifa_flags & (IFA_F_NODAD | IFA_F_HOMEADDRESS);
 
-	ifa = ipv6_get_ifaddr(pfx, dev, 1);
+	ifa = ipv6_get_ifaddr(net, pfx, dev, 1);
 	if (ifa == NULL) {
 		/*
 		 * It would be best to check for !NLM_F_CREATE here but
@@ -3442,7 +3445,7 @@ static int inet6_rtm_getaddr(struct sk_buff *in_skb, struct nlmsghdr* nlh,
 	if (ifm->ifa_index)
 		dev = __dev_get_by_index(&init_net, ifm->ifa_index);
 
-	if ((ifa = ipv6_get_ifaddr(addr, dev, 1)) == NULL) {
+	if ((ifa = ipv6_get_ifaddr(net, addr, dev, 1)) == NULL) {
 		err = -EADDRNOTAVAIL;
 		goto errout;
 	}
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index ba7c8aaf278..15c4f6cee3e 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -936,7 +936,8 @@ static int ip6_dst_lookup_tail(struct sock *sk,
 			struct flowi fl_gw;
 			int redirect;
 
-			ifp = ipv6_get_ifaddr(&fl->fl6_src, (*dst)->dev, 1);
+			ifp = ipv6_get_ifaddr(&init_net, &fl->fl6_src,
+					      (*dst)->dev, 1);
 
 			redirect = (ifp && ifp->flags & IFA_F_OPTIMISTIC);
 			if (ifp)
diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c
index e217d3ff00f..bdfc4ea6194 100644
--- a/net/ipv6/ndisc.c
+++ b/net/ipv6/ndisc.c
@@ -556,7 +556,7 @@ static void ndisc_send_na(struct net_device *dev, struct neighbour *neigh,
 	};
 
 	/* for anycast or proxy, solicited_addr != src_addr */
-	ifp = ipv6_get_ifaddr(solicited_addr, dev, 1);
+	ifp = ipv6_get_ifaddr(&init_net, solicited_addr, dev, 1);
 	if (ifp) {
 		src_addr = solicited_addr;
 		if (ifp->flags & IFA_F_OPTIMISTIC)
@@ -616,7 +616,8 @@ void ndisc_send_rs(struct net_device *dev, struct in6_addr *saddr,
 	 * suppress the inclusion of the sllao.
 	 */
 	if (send_sllao) {
-		struct inet6_ifaddr *ifp = ipv6_get_ifaddr(saddr, dev, 1);
+		struct inet6_ifaddr *ifp = ipv6_get_ifaddr(&init_net, saddr,
+							   dev, 1);
 		if (ifp) {
 			if (ifp->flags & IFA_F_OPTIMISTIC)  {
 				send_sllao = 0;
@@ -741,7 +742,7 @@ static void ndisc_recv_ns(struct sk_buff *skb)
 
 	inc = ipv6_addr_is_multicast(daddr);
 
-	if ((ifp = ipv6_get_ifaddr(&msg->target, dev, 1)) != NULL) {
+	if ((ifp = ipv6_get_ifaddr(&init_net, &msg->target, dev, 1)) != NULL) {
 
 		if (ifp->flags & (IFA_F_TENTATIVE|IFA_F_OPTIMISTIC)) {
 			if (dad) {
@@ -899,7 +900,7 @@ static void ndisc_recv_na(struct sk_buff *skb)
 			return;
 		}
 	}
-	if ((ifp = ipv6_get_ifaddr(&msg->target, dev, 1))) {
+	if ((ifp = ipv6_get_ifaddr(&init_net, &msg->target, dev, 1))) {
 		if (ifp->flags & IFA_F_TENTATIVE) {
 			addrconf_dad_failure(ifp);
 			return;
-- 
cgit v1.2.3


From 389f661224cdbdf178553fb09a52dc6c8bf86890 Mon Sep 17 00:00:00 2001
From: Daniel Lezcano <dlezcano@fr.ibm.com>
Date: Thu, 10 Jan 2008 22:44:40 -0800
Subject: [NETNS][IPV6]: inet6_addr - make ipv6_chk_home_addr namespace aware

Looks if the address is belonging to the network namespace, otherwise
discard the address for the check.

Signed-off-by: Daniel Lezcano <dlezcano@fr.ibm.com>
Signed-off-by: Benjamin Thery <benjamin.thery@bull.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/addrconf.c | 4 +++-
 net/ipv6/exthdrs.c  | 2 +-
 2 files changed, 4 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index c4df6cdff65..803caf1a389 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -2883,13 +2883,15 @@ void if6_proc_exit(void)
 
 #if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
 /* Check if address is a home address configured on any interface. */
-int ipv6_chk_home_addr(struct in6_addr *addr)
+int ipv6_chk_home_addr(struct net *net, struct in6_addr *addr)
 {
 	int ret = 0;
 	struct inet6_ifaddr * ifp;
 	u8 hash = ipv6_addr_hash(addr);
 	read_lock_bh(&addrconf_hash_lock);
 	for (ifp = inet6_addr_lst[hash]; ifp; ifp = ifp->lst_next) {
+		if (ifp->idev->dev->nd_net != net)
+			continue;
 		if (ipv6_addr_cmp(&ifp->addr, addr) == 0 &&
 		    (ifp->flags & IFA_F_HOMEADDRESS)) {
 			ret = 1;
diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c
index 2df34ed276f..3cd1c993d52 100644
--- a/net/ipv6/exthdrs.c
+++ b/net/ipv6/exthdrs.c
@@ -445,7 +445,7 @@ looped_back:
 			kfree_skb(skb);
 			return -1;
 		}
-		if (!ipv6_chk_home_addr(addr)) {
+		if (!ipv6_chk_home_addr(&init_net, addr)) {
 			IP6_INC_STATS_BH(ip6_dst_idev(skb->dst),
 					 IPSTATS_MIB_INADDRERRORS);
 			kfree_skb(skb);
-- 
cgit v1.2.3


From 490d5046930276aae50dd16942649bfc626056f7 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= <ilpo.jarvinen@helsinki.fi>
Date: Sat, 12 Jan 2008 03:17:20 -0800
Subject: [TCP]: Uninline tcp_set_state
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

net/ipv4/tcp.c:
  tcp_close_state | -226
  tcp_done        | -145
  tcp_close       | -564
  tcp_disconnect  | -141
 4 functions changed, 1076 bytes removed, diff: -1076

net/ipv4/tcp_input.c:
  tcp_fin               |  -86
  tcp_rcv_state_process | -164
 2 functions changed, 250 bytes removed, diff: -250

net/ipv4/tcp_ipv4.c:
  tcp_v4_connect | -209
 1 function changed, 209 bytes removed, diff: -209

net/ipv4/arp.c:
  arp_ignore |   +5
 1 function changed, 5 bytes added, diff: +5

net/ipv6/tcp_ipv6.c:
  tcp_v6_connect | -158
 1 function changed, 158 bytes removed, diff: -158

net/sunrpc/xprtsock.c:
  xs_sendpages |   -2
 1 function changed, 2 bytes removed, diff: -2

net/dccp/ccids/ccid3.c:
  ccid3_update_send_interval |   +7
 1 function changed, 7 bytes added, diff: +7

net/ipv4/tcp.c:
  tcp_set_state | +238
 1 function changed, 238 bytes added, diff: +238

built-in.o:
 12 functions changed, 250 bytes added, 1695 bytes removed, diff: -1445

I've no explanation why some unrelated changes seem to occur
consistently as well (arp_ignore, ccid3_update_send_interval;
I checked the arp_ignore asm and it seems to be due to some
reordered of operation order causing some extra opcodes to be
generated). Still, the benefits are pretty obvious from the
codiff's results.

Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@helsinki.fi>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp.c | 35 +++++++++++++++++++++++++++++++++++
 1 file changed, 35 insertions(+)

(limited to 'net')

diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 34085e3a409..a0d373bd906 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -1652,6 +1652,41 @@ recv_urg:
 	goto out;
 }
 
+void tcp_set_state(struct sock *sk, int state)
+{
+	int oldstate = sk->sk_state;
+
+	switch (state) {
+	case TCP_ESTABLISHED:
+		if (oldstate != TCP_ESTABLISHED)
+			TCP_INC_STATS(TCP_MIB_CURRESTAB);
+		break;
+
+	case TCP_CLOSE:
+		if (oldstate == TCP_CLOSE_WAIT || oldstate == TCP_ESTABLISHED)
+			TCP_INC_STATS(TCP_MIB_ESTABRESETS);
+
+		sk->sk_prot->unhash(sk);
+		if (inet_csk(sk)->icsk_bind_hash &&
+		    !(sk->sk_userlocks & SOCK_BINDPORT_LOCK))
+			inet_put_port(&tcp_hashinfo, sk);
+		/* fall through */
+	default:
+		if (oldstate==TCP_ESTABLISHED)
+			TCP_DEC_STATS(TCP_MIB_CURRESTAB);
+	}
+
+	/* Change state AFTER socket is unhashed to avoid closed
+	 * socket sitting in hash tables.
+	 */
+	sk->sk_state = state;
+
+#ifdef STATE_TRACE
+	SOCK_DEBUG(sk, "TCP sk=%p, State %s -> %s\n",sk, statename[oldstate],statename[state]);
+#endif
+}
+EXPORT_SYMBOL_GPL(tcp_set_state);
+
 /*
  *	State processing on a close. This implements the state shift for
  *	sending our FIN frame. Note that we only send a FIN for some
-- 
cgit v1.2.3


From cea14e0ed6082bd3dff19203d01340de6e7ca067 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= <ilpo.jarvinen@helsinki.fi>
Date: Sat, 12 Jan 2008 03:19:12 -0800
Subject: [TCP]: Uninline tcp_is_cwnd_limited
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

net/ipv4/tcp_cong.c:
  tcp_reno_cong_avoid |  -65
 1 function changed, 65 bytes removed, diff: -65

net/ipv4/arp.c:
  arp_ignore |   -5
 1 function changed, 5 bytes removed, diff: -5

net/ipv4/tcp_bic.c:
  bictcp_cong_avoid |  -57
 1 function changed, 57 bytes removed, diff: -57

net/ipv4/tcp_cubic.c:
  bictcp_cong_avoid |  -61
 1 function changed, 61 bytes removed, diff: -61

net/ipv4/tcp_highspeed.c:
  hstcp_cong_avoid |  -63
 1 function changed, 63 bytes removed, diff: -63

net/ipv4/tcp_hybla.c:
  hybla_cong_avoid |  -85
 1 function changed, 85 bytes removed, diff: -85

net/ipv4/tcp_htcp.c:
  htcp_cong_avoid |  -57
 1 function changed, 57 bytes removed, diff: -57

net/ipv4/tcp_veno.c:
  tcp_veno_cong_avoid |  -52
 1 function changed, 52 bytes removed, diff: -52

net/ipv4/tcp_scalable.c:
  tcp_scalable_cong_avoid |  -61
 1 function changed, 61 bytes removed, diff: -61

net/ipv4/tcp_yeah.c:
  tcp_yeah_cong_avoid |  -75
 1 function changed, 75 bytes removed, diff: -75

net/ipv4/tcp_illinois.c:
  tcp_illinois_cong_avoid |  -54
 1 function changed, 54 bytes removed, diff: -54

net/dccp/ccids/ccid3.c:
  ccid3_update_send_interval |   -7
  ccid3_hc_tx_packet_recv    |   +7
 2 functions changed, 7 bytes added, 7 bytes removed, diff: +0

net/ipv4/tcp_cong.c:
  tcp_is_cwnd_limited |  +88
 1 function changed, 88 bytes added, diff: +88

built-in.o:
 14 functions changed, 95 bytes added, 642 bytes removed, diff: -547

...Again some gcc artifacts visible as well.

Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@helsinki.fi>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp_cong.c | 21 +++++++++++++++++++++
 1 file changed, 21 insertions(+)

(limited to 'net')

diff --git a/net/ipv4/tcp_cong.c b/net/ipv4/tcp_cong.c
index 4451750b478..3a6be23d222 100644
--- a/net/ipv4/tcp_cong.c
+++ b/net/ipv4/tcp_cong.c
@@ -274,6 +274,27 @@ int tcp_set_congestion_control(struct sock *sk, const char *name)
 	return err;
 }
 
+/* RFC2861 Check whether we are limited by application or congestion window
+ * This is the inverse of cwnd check in tcp_tso_should_defer
+ */
+int tcp_is_cwnd_limited(const struct sock *sk, u32 in_flight)
+{
+	const struct tcp_sock *tp = tcp_sk(sk);
+	u32 left;
+
+	if (in_flight >= tp->snd_cwnd)
+		return 1;
+
+	if (!sk_can_gso(sk))
+		return 0;
+
+	left = tp->snd_cwnd - in_flight;
+	if (sysctl_tcp_tso_win_divisor)
+		return left * sysctl_tcp_tso_win_divisor < tp->snd_cwnd;
+	else
+		return left <= tcp_max_burst(tp);
+}
+EXPORT_SYMBOL_GPL(tcp_is_cwnd_limited);
 
 /*
  * Slow start is used when congestion window is less than slow start
-- 
cgit v1.2.3


From 1486cbd777316e55aa30aeb37e231ce618c29d2e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= <ilpo.jarvinen@helsinki.fi>
Date: Sat, 12 Jan 2008 03:20:03 -0800
Subject: [XFRM] xfrm_policy: kill some bloat
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

net/xfrm/xfrm_policy.c:
  xfrm_audit_policy_delete | -692
  xfrm_audit_policy_add    | -692
 2 functions changed, 1384 bytes removed, diff: -1384

net/xfrm/xfrm_policy.c:
  xfrm_audit_common_policyinfo | +704
 1 function changed, 704 bytes added, diff: +704

net/xfrm/xfrm_policy.o:
 3 functions changed, 704 bytes added, 1384 bytes removed, diff: -680

Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@helsinki.fi>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/xfrm/xfrm_policy.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index 534b29eb46f..47219f98053 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -2367,8 +2367,8 @@ void __init xfrm_init(void)
 }
 
 #ifdef CONFIG_AUDITSYSCALL
-static inline void xfrm_audit_common_policyinfo(struct xfrm_policy *xp,
-						struct audit_buffer *audit_buf)
+static void xfrm_audit_common_policyinfo(struct xfrm_policy *xp,
+					 struct audit_buffer *audit_buf)
 {
 	struct xfrm_sec_ctx *ctx = xp->security;
 	struct xfrm_selector *sel = &xp->selector;
-- 
cgit v1.2.3


From 50eb431d6e98189eb40606fcd4d03ecd8e168afa Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= <ilpo.jarvinen@helsinki.fi>
Date: Sat, 12 Jan 2008 03:21:00 -0800
Subject: [IPV6] route: kill some bloat
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

net/ipv6/route.c:
  ip6_pkt_prohibit_out | -130
  ip6_pkt_discard      | -261
  ip6_pkt_discard_out  | -130
  ip6_pkt_prohibit     | -261
 4 functions changed, 782 bytes removed, diff: -782

net/ipv6/route.c:
  ip6_pkt_drop | +300
 1 function changed, 300 bytes added, diff: +300

net/ipv6/route.o:
 5 functions changed, 300 bytes added, 782 bytes removed, diff: -482

Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@helsinki.fi>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/route.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 48c8d7cb902..a429900d16a 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -1763,8 +1763,7 @@ int ipv6_route_ioctl(unsigned int cmd, void __user *arg)
  *	Drop the packet on the floor
  */
 
-static inline int ip6_pkt_drop(struct sk_buff *skb, int code,
-			       int ipstats_mib_noroutes)
+static int ip6_pkt_drop(struct sk_buff *skb, int code, int ipstats_mib_noroutes)
 {
 	int type;
 	switch (ipstats_mib_noroutes) {
-- 
cgit v1.2.3


From 3f25252675770e08d97bc112e52208e8c70ce0e5 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= <ilpo.jarvinen@helsinki.fi>
Date: Sat, 12 Jan 2008 03:21:50 -0800
Subject: [NETLINK] af_netlink: kill some bloat
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

net/netlink/af_netlink.c:
  netlink_realloc_groups        |  -46
  netlink_insert                |  -49
  netlink_autobind              |  -94
  netlink_clear_multicast_users |  -48
  netlink_bind                  |  -55
  netlink_setsockopt            |  -54
  netlink_release               |  -86
  netlink_kernel_create         |  -47
  netlink_change_ngroups        |  -56
 9 functions changed, 535 bytes removed, diff: -535

net/netlink/af_netlink.c:
  netlink_table_ungrab |  +53
 1 function changed, 53 bytes added, diff: +53

net/netlink/af_netlink.o:
 10 functions changed, 53 bytes added, 535 bytes removed, diff: -482

Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@helsinki.fi>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/netlink/af_netlink.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index be07f1b45ee..21f9e30184e 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -193,7 +193,7 @@ static void netlink_table_grab(void)
 	}
 }
 
-static inline void netlink_table_ungrab(void)
+static void netlink_table_ungrab(void)
 	__releases(nl_table_lock)
 {
 	write_unlock_irq(&nl_table_lock);
-- 
cgit v1.2.3


From 56e252c7484c3ebc058f3c22d4a75386b079c49c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= <ilpo.jarvinen@helsinki.fi>
Date: Sat, 12 Jan 2008 03:23:58 -0800
Subject: [PKTGEN]: Kill dead static inlines
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@helsinki.fi>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/pktgen.c | 94 -------------------------------------------------------
 1 file changed, 94 deletions(-)

(limited to 'net')

diff --git a/net/core/pktgen.c b/net/core/pktgen.c
index ede1feacddf..ebfb1268ac3 100644
--- a/net/core/pktgen.c
+++ b/net/core/pktgen.c
@@ -397,62 +397,6 @@ struct pktgen_thread {
 #define REMOVE 1
 #define FIND   0
 
-/*  This code works around the fact that do_div cannot handle two 64-bit
-    numbers, and regular 64-bit division doesn't work on x86 kernels.
-    --Ben
-*/
-
-#define PG_DIV 0
-
-/* This was emailed to LMKL by: Chris Caputo <ccaputo@alt.net>
- * Function copied/adapted/optimized from:
- *
- *  nemesis.sourceforge.net/browse/lib/static/intmath/ix86/intmath.c.html
- *
- * Copyright 1994, University of Cambridge Computer Laboratory
- * All Rights Reserved.
- *
- */
-static inline s64 divremdi3(s64 x, s64 y, int type)
-{
-	u64 a = (x < 0) ? -x : x;
-	u64 b = (y < 0) ? -y : y;
-	u64 res = 0, d = 1;
-
-	if (b > 0) {
-		while (b < a) {
-			b <<= 1;
-			d <<= 1;
-		}
-	}
-
-	do {
-		if (a >= b) {
-			a -= b;
-			res += d;
-		}
-		b >>= 1;
-		d >>= 1;
-	}
-	while (d);
-
-	if (PG_DIV == type) {
-		return (((x ^ y) & (1ll << 63)) == 0) ? res : -(s64) res;
-	} else {
-		return ((x & (1ll << 63)) == 0) ? a : -(s64) a;
-	}
-}
-
-/* End of hacks to deal with 64-bit math on x86 */
-
-/** Convert to milliseconds */
-static inline __u64 tv_to_ms(const struct timeval *tv)
-{
-	__u64 ms = tv->tv_usec / 1000;
-	ms += (__u64) tv->tv_sec * (__u64) 1000;
-	return ms;
-}
-
 /** Convert to micro-seconds */
 static inline __u64 tv_to_us(const struct timeval *tv)
 {
@@ -461,39 +405,6 @@ static inline __u64 tv_to_us(const struct timeval *tv)
 	return us;
 }
 
-static inline __u64 pg_div(__u64 n, __u32 base)
-{
-	__u64 tmp = n;
-	do_div(tmp, base);
-	/* printk("pktgen: pg_div, n: %llu  base: %d  rv: %llu\n",
-	   n, base, tmp); */
-	return tmp;
-}
-
-static inline __u64 pg_div64(__u64 n, __u64 base)
-{
-	__u64 tmp = n;
-/*
- * How do we know if the architecture we are running on
- * supports division with 64 bit base?
- *
- */
-#if defined(__sparc_v9__) || defined(__powerpc64__) || defined(__alpha__) || defined(__x86_64__) || defined(__ia64__)
-
-	do_div(tmp, base);
-#else
-	tmp = divremdi3(n, base, PG_DIV);
-#endif
-	return tmp;
-}
-
-static inline __u64 getCurMs(void)
-{
-	struct timeval tv;
-	do_gettimeofday(&tv);
-	return tv_to_ms(&tv);
-}
-
 static inline __u64 getCurUs(void)
 {
 	struct timeval tv;
@@ -501,11 +412,6 @@ static inline __u64 getCurUs(void)
 	return tv_to_us(&tv);
 }
 
-static inline __u64 tv_diff(const struct timeval *a, const struct timeval *b)
-{
-	return tv_to_us(a) - tv_to_us(b);
-}
-
 /* old include end */
 
 static char version[] __initdata = VERSION;
-- 
cgit v1.2.3


From 6db105db95197c0fe93f8b3fb338eb6cf17440b7 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= <ilpo.jarvinen@helsinki.fi>
Date: Sat, 12 Jan 2008 03:25:00 -0800
Subject: [PKTGEN]: uninline getCurUs
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

net/core/pktgen.c:
  pktgen_stop_device   |  -50
  pktgen_run           | -105
  pktgen_if_show       |  -37
  pktgen_thread_worker | -702
 4 functions changed, 894 bytes removed, diff: -894

net/core/pktgen.c:
  getCurUs |  +36
 1 function changed, 36 bytes added, diff: +36

net/core/pktgen.o:
 5 functions changed, 36 bytes added, 894 bytes removed, diff: -858

Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@helsinki.fi>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/pktgen.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/core/pktgen.c b/net/core/pktgen.c
index ebfb1268ac3..d18fdb10236 100644
--- a/net/core/pktgen.c
+++ b/net/core/pktgen.c
@@ -405,7 +405,7 @@ static inline __u64 tv_to_us(const struct timeval *tv)
 	return us;
 }
 
-static inline __u64 getCurUs(void)
+static __u64 getCurUs(void)
 {
 	struct timeval tv;
 	do_gettimeofday(&tv);
-- 
cgit v1.2.3


From c28a1cf448e59019fa681741963c3acaeaeb6d27 Mon Sep 17 00:00:00 2001
From: Stephen Hemminger <stephen.hemminger@vyatta.com>
Date: Sat, 12 Jan 2008 20:49:13 -0800
Subject: [IPV4] fib_trie: Get rid of trie_init().

trie_init is worthless it is just zeroing stuff that is already zero!
Move the memset() down to make it obvious.

Signed-off-by: Stephen Hemminger <stephen.hemminger@vyatta.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/fib_trie.c | 17 +----------------
 1 file changed, 1 insertion(+), 16 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index aa9deb708dc..0179d245c81 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -876,19 +876,6 @@ nomem:
 	}
 }
 
-static void trie_init(struct trie *t)
-{
-	if (!t)
-		return;
-
-	t->size = 0;
-	rcu_assign_pointer(t->trie, NULL);
-	t->revision = 0;
-#ifdef CONFIG_IP_FIB_TRIE_STATS
-	memset(&t->stats, 0, sizeof(struct trie_use_stats));
-#endif
-}
-
 /* readside must use rcu_read_lock currently dump routines
  via get_fa_head and dump */
 
@@ -1980,11 +1967,9 @@ struct fib_table *fib_hash_init(u32 id)
 	tb->tb_flush = fn_trie_flush;
 	tb->tb_select_default = fn_trie_select_default;
 	tb->tb_dump = fn_trie_dump;
-	memset(tb->tb_data, 0, sizeof(struct trie));
 
 	t = (struct trie *) tb->tb_data;
-
-	trie_init(t);
+	memset(t, 0, sizeof(*t));
 
 	if (id == RT_TABLE_LOCAL)
 		printk(KERN_INFO "IPv4 FIB: Using LC-trie version %s\n", VERSION);
-- 
cgit v1.2.3


From 93e4308b3bea445dc2d3e3c1897a93fe111eba17 Mon Sep 17 00:00:00 2001
From: Stephen Hemminger <stephen.hemminger@vyatta.com>
Date: Sat, 12 Jan 2008 20:50:23 -0800
Subject: [IPV4] fib_trie: Get rid of unused revision element.

The revision element must of been part of an earlier design, because
currently it is set but never used.

Signed-off-by: Stephen Hemminger <stephen.hemminger@vyatta.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/fib_trie.c | 14 ++++----------
 1 file changed, 4 insertions(+), 10 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index 0179d245c81..95a62aae5f4 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -153,7 +153,6 @@ struct trie {
 	struct trie_use_stats stats;
 #endif
 	int size;
-	unsigned int revision;
 };
 
 static void put_child(struct trie *t, struct tnode *tn, int i, struct node *n);
@@ -1046,7 +1045,7 @@ fib_insert_node(struct trie *t, int *err, u32 key, int plen)
 
 		if (!li) {
 			*err = -ENOMEM;
-			goto err;
+			goto done;
 		}
 
 		fa_head = &li->falh;
@@ -1058,7 +1057,7 @@ fib_insert_node(struct trie *t, int *err, u32 key, int plen)
 
 	if (!l) {
 		*err = -ENOMEM;
-		goto err;
+		goto done;
 	}
 
 	l->key = key;
@@ -1067,7 +1066,7 @@ fib_insert_node(struct trie *t, int *err, u32 key, int plen)
 	if (!li) {
 		tnode_free((struct tnode *) l);
 		*err = -ENOMEM;
-		goto err;
+		goto done;
 	}
 
 	fa_head = &li->falh;
@@ -1104,7 +1103,7 @@ fib_insert_node(struct trie *t, int *err, u32 key, int plen)
 			free_leaf_info(li);
 			tnode_free((struct tnode *) l);
 			*err = -ENOMEM;
-			goto err;
+			goto done;
 		}
 
 		node_set_parent((struct node *)tn, tp);
@@ -1130,8 +1129,6 @@ fib_insert_node(struct trie *t, int *err, u32 key, int plen)
 
 	rcu_assign_pointer(t->trie, trie_rebalance(t, tp));
 done:
-	t->revision++;
-err:
 	return fa_head;
 }
 
@@ -1546,7 +1543,6 @@ static int trie_leaf_remove(struct trie *t, t_key key)
 	 * Remove the leaf and rebalance the tree
 	 */
 
-	t->revision++;
 	t->size--;
 
 	tp = node_parent(n);
@@ -1752,8 +1748,6 @@ static int fn_trie_flush(struct fib_table *tb)
 	struct leaf *ll = NULL, *l = NULL;
 	int found = 0, h;
 
-	t->revision++;
-
 	for (h = 0; (l = nextleaf(t, l)) != NULL; h++) {
 		found += trie_flush_leaf(t, l);
 
-- 
cgit v1.2.3


From 187b5188a78694fa6608fa1252d5197a7b3ab076 Mon Sep 17 00:00:00 2001
From: Stephen Hemminger <stephen.hemminger@vyatta.com>
Date: Sat, 12 Jan 2008 20:55:55 -0800
Subject: [IPV4] fib_trie: Use %u for unsigned printfs.

Use %u instead of %d when printing unsigned values.

Signed-off-by: Stephen Hemminger <stephen.hemminger@vyatta.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/fib_trie.c | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index 95a62aae5f4..f5c046b517e 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -2100,13 +2100,13 @@ static void trie_show_stats(struct seq_file *seq, struct trie_stat *stat)
 	else
 		avdepth = 0;
 
-	seq_printf(seq, "\tAver depth:     %d.%02d\n", avdepth / 100, avdepth % 100 );
+	seq_printf(seq, "\tAver depth:     %u.%02d\n", avdepth / 100, avdepth % 100 );
 	seq_printf(seq, "\tMax depth:      %u\n", stat->maxdepth);
 
 	seq_printf(seq, "\tLeaves:         %u\n", stat->leaves);
 
 	bytes = sizeof(struct leaf) * stat->leaves;
-	seq_printf(seq, "\tInternal nodes: %d\n\t", stat->tnodes);
+	seq_printf(seq, "\tInternal nodes: %u\n\t", stat->tnodes);
 	bytes += sizeof(struct tnode) * stat->tnodes;
 
 	max = MAX_STAT_DEPTH;
@@ -2116,15 +2116,15 @@ static void trie_show_stats(struct seq_file *seq, struct trie_stat *stat)
 	pointers = 0;
 	for (i = 1; i <= max; i++)
 		if (stat->nodesizes[i] != 0) {
-			seq_printf(seq, "  %d: %d",  i, stat->nodesizes[i]);
+			seq_printf(seq, "  %u: %u",  i, stat->nodesizes[i]);
 			pointers += (1<<i) * stat->nodesizes[i];
 		}
 	seq_putc(seq, '\n');
-	seq_printf(seq, "\tPointers: %d\n", pointers);
+	seq_printf(seq, "\tPointers: %u\n", pointers);
 
 	bytes += sizeof(struct node *) * pointers;
-	seq_printf(seq, "Null ptrs: %d\n", stat->nullpointers);
-	seq_printf(seq, "Total size: %d  kB\n", (bytes + 1023) / 1024);
+	seq_printf(seq, "Null ptrs: %u\n", stat->nullpointers);
+	seq_printf(seq, "Total size: %u  kB\n", (bytes + 1023) / 1024);
 
 #ifdef CONFIG_IP_FIB_TRIE_STATS
 	seq_printf(seq, "Counters:\n---------\n");
@@ -2321,7 +2321,7 @@ static inline const char *rtn_type(unsigned t)
 
 	if (t < __RTN_MAX && rtn_type_names[t])
 		return rtn_type_names[t];
-	snprintf(buf, sizeof(buf), "type %d", t);
+	snprintf(buf, sizeof(buf), "type %u", t);
 	return buf;
 }
 
-- 
cgit v1.2.3


From fea86ad8123df0d49188cbc1dd2f48da6ae49d65 Mon Sep 17 00:00:00 2001
From: Stephen Hemminger <stephen.hemminger@vyatta.com>
Date: Sat, 12 Jan 2008 20:57:07 -0800
Subject: [IPV4] fib_trie: fib_insert_node cleanup

The only error from fib_insert_node is if memory allocation fails, so
instead of passing by reference, just use the convention of returning
NULL.

Signed-off-by: Stephen Hemminger <stephen.hemminger@vyatta.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/fib_trie.c | 28 +++++++++++-----------------
 1 file changed, 11 insertions(+), 17 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index f5c046b517e..e047de6873b 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -980,8 +980,7 @@ static struct node *trie_rebalance(struct trie *t, struct tnode *tn)
 
 /* only used from updater-side */
 
-static  struct list_head *
-fib_insert_node(struct trie *t, int *err, u32 key, int plen)
+static struct list_head *fib_insert_node(struct trie *t, u32 key, int plen)
 {
 	int pos, newpos;
 	struct tnode *tp = NULL, *tn = NULL;
@@ -1043,10 +1042,8 @@ fib_insert_node(struct trie *t, int *err, u32 key, int plen)
 
 		li = leaf_info_new(plen);
 
-		if (!li) {
-			*err = -ENOMEM;
-			goto done;
-		}
+		if (!li)
+			return NULL;
 
 		fa_head = &li->falh;
 		insert_leaf_info(&l->list, li);
@@ -1055,18 +1052,15 @@ fib_insert_node(struct trie *t, int *err, u32 key, int plen)
 	t->size++;
 	l = leaf_new();
 
-	if (!l) {
-		*err = -ENOMEM;
-		goto done;
-	}
+	if (!l)
+		return NULL;
 
 	l->key = key;
 	li = leaf_info_new(plen);
 
 	if (!li) {
 		tnode_free((struct tnode *) l);
-		*err = -ENOMEM;
-		goto done;
+		return NULL;
 	}
 
 	fa_head = &li->falh;
@@ -1102,8 +1096,7 @@ fib_insert_node(struct trie *t, int *err, u32 key, int plen)
 		if (!tn) {
 			free_leaf_info(li);
 			tnode_free((struct tnode *) l);
-			*err = -ENOMEM;
-			goto done;
+			return NULL;
 		}
 
 		node_set_parent((struct node *)tn, tp);
@@ -1262,10 +1255,11 @@ static int fn_trie_insert(struct fib_table *tb, struct fib_config *cfg)
 	 */
 
 	if (!fa_head) {
-		err = 0;
-		fa_head = fib_insert_node(t, &err, key, plen);
-		if (err)
+		fa_head = fib_insert_node(t, key, plen);
+		if (unlikely(!fa_head)) {
+			err = -ENOMEM;
 			goto out_free_new_fa;
+		}
 	}
 
 	list_add_tail_rcu(&new_fa->fa_list,
-- 
cgit v1.2.3


From a6db9010922f2c02db2bbea8c17c50e451be38d9 Mon Sep 17 00:00:00 2001
From: Stephen Hemminger <stephen.hemminger@vyatta.com>
Date: Sat, 12 Jan 2008 20:58:35 -0800
Subject: [IPV4] FIB: printk related cleanups

printk related cleanups:
 * Get rid of unused printk wrappers.
 * Make bug checks into KERN_WARNING because KERN_DEBUG gets ignored
 * Turn one cryptic old message into something real
 * Make sure all messages have KERN_XXX

Signed-off-by: Stephen Hemminger <stephen.hemminger@vyatta.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/fib_frontend.c  | 6 ++----
 net/ipv4/fib_hash.c      | 3 ++-
 net/ipv4/fib_semantics.c | 7 +++----
 3 files changed, 7 insertions(+), 9 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index 64421c520a0..02b5ff73357 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -47,8 +47,6 @@
 #include <net/ip_fib.h>
 #include <net/rtnetlink.h>
 
-#define FFprint(a...) printk(KERN_DEBUG a)
-
 #ifndef CONFIG_IP_MULTIPLE_TABLES
 
 static int __net_init fib4_rules_init(struct net *net)
@@ -706,7 +704,7 @@ void fib_add_ifaddr(struct in_ifaddr *ifa)
 	if (ifa->ifa_flags&IFA_F_SECONDARY) {
 		prim = inet_ifa_byprefix(in_dev, prefix, mask);
 		if (prim == NULL) {
-			printk(KERN_DEBUG "fib_add_ifaddr: bug: prim == NULL\n");
+			printk(KERN_WARNING "fib_add_ifaddr: bug: prim == NULL\n");
 			return;
 		}
 	}
@@ -753,7 +751,7 @@ static void fib_del_ifaddr(struct in_ifaddr *ifa)
 	else {
 		prim = inet_ifa_byprefix(in_dev, any, ifa->ifa_mask);
 		if (prim == NULL) {
-			printk(KERN_DEBUG "fib_del_ifaddr: bug: prim == NULL\n");
+			printk(KERN_WARNING "fib_del_ifaddr: bug: prim == NULL\n");
 			return;
 		}
 	}
diff --git a/net/ipv4/fib_hash.c b/net/ipv4/fib_hash.c
index a07300404e8..258214f57fb 100644
--- a/net/ipv4/fib_hash.c
+++ b/net/ipv4/fib_hash.c
@@ -168,7 +168,8 @@ static void fn_rehash_zone(struct fn_zone *fz)
 	new_hashmask = (new_divisor - 1);
 
 #if RT_CACHE_DEBUG >= 2
-	printk("fn_rehash_zone: hash for zone %d grows from %d\n", fz->fz_order, old_divisor);
+	printk(KERN_DEBUG "fn_rehash_zone: hash for zone %d grows from %d\n",
+	       fz->fz_order, old_divisor);
 #endif
 
 	ht = fz_hash_alloc(new_divisor);
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index 0de6102020e..3ed920b92fb 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -47,8 +47,6 @@
 
 #include "fib_lookup.h"
 
-#define FSprintk(a...)
-
 static DEFINE_SPINLOCK(fib_info_lock);
 static struct hlist_head *fib_info_hash;
 static struct hlist_head *fib_info_laddrhash;
@@ -145,7 +143,7 @@ static const struct
 void free_fib_info(struct fib_info *fi)
 {
 	if (fi->fib_dead == 0) {
-		printk("Freeing alive fib_info %p\n", fi);
+		printk(KERN_WARNING "Freeing alive fib_info %p\n", fi);
 		return;
 	}
 	change_nexthops(fi) {
@@ -914,7 +912,8 @@ int fib_semantic_match(struct list_head *head, const struct flowi *flp,
 				continue;
 
 			default:
-				printk(KERN_DEBUG "impossible 102\n");
+				printk(KERN_WARNING "fib_semantic_match bad type %#x\n",
+					fa->fa_type);
 				return -EINVAL;
 			}
 		}
-- 
cgit v1.2.3


From 66a2f7fd2fddee1ddc5d1d286cd832e50a97258e Mon Sep 17 00:00:00 2001
From: Stephen Hemminger <stephen.hemminger@vyatta.com>
Date: Sat, 12 Jan 2008 21:23:17 -0800
Subject: [IPV4] fib_trie: Add statistics.

The FIB TRIE code has a bunch of statistics, but the code is hidden
behind an ifdef that was never implemented. Since it was dead code, it
was broken as well.

This patch fixes that by making it a config option.

Signed-off-by: Stephen Hemminger <stephen.hemminger@vyatta.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/Kconfig    |  7 +++++++
 net/ipv4/fib_trie.c | 31 +++++++++++++++++++------------
 2 files changed, 26 insertions(+), 12 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig
index 9f9fd2c6f6e..24e2b7294bf 100644
--- a/net/ipv4/Kconfig
+++ b/net/ipv4/Kconfig
@@ -85,6 +85,13 @@ endchoice
 config IP_FIB_HASH
 	def_bool ASK_IP_FIB_HASH || !IP_ADVANCED_ROUTER
 
+config IP_FIB_TRIE_STATS
+	bool "FIB TRIE statistics"
+	depends on IP_FIB_TRIE
+	---help---
+	  Keep track of statistics on structure of FIB TRIE table.
+	  Useful for testing and measuring TRIE performance.
+
 config IP_MULTIPLE_TABLES
 	bool "IP: policy routing"
 	depends on IP_ADVANCED_ROUTER
diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index e047de6873b..2075eea7eea 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -82,7 +82,6 @@
 #include <net/ip_fib.h>
 #include "fib_lookup.h"
 
-#undef CONFIG_IP_FIB_TRIE_STATS
 #define MAX_STAT_DEPTH 32
 
 #define KEYLENGTH (8*sizeof(t_key))
@@ -2119,20 +2118,22 @@ static void trie_show_stats(struct seq_file *seq, struct trie_stat *stat)
 	bytes += sizeof(struct node *) * pointers;
 	seq_printf(seq, "Null ptrs: %u\n", stat->nullpointers);
 	seq_printf(seq, "Total size: %u  kB\n", (bytes + 1023) / 1024);
+}
 
 #ifdef CONFIG_IP_FIB_TRIE_STATS
-	seq_printf(seq, "Counters:\n---------\n");
-	seq_printf(seq,"gets = %d\n", t->stats.gets);
-	seq_printf(seq,"backtracks = %d\n", t->stats.backtrack);
-	seq_printf(seq,"semantic match passed = %d\n", t->stats.semantic_match_passed);
-	seq_printf(seq,"semantic match miss = %d\n", t->stats.semantic_match_miss);
-	seq_printf(seq,"null node hit= %d\n", t->stats.null_node_hit);
-	seq_printf(seq,"skipped node resize = %d\n", t->stats.resize_node_skipped);
-#ifdef CLEAR_STATS
-	memset(&(t->stats), 0, sizeof(t->stats));
-#endif
-#endif /*  CONFIG_IP_FIB_TRIE_STATS */
+static void trie_show_usage(struct seq_file *seq,
+			    const struct trie_use_stats *stats)
+{
+	seq_printf(seq, "\nCounters:\n---------\n");
+	seq_printf(seq,"gets = %u\n", stats->gets);
+	seq_printf(seq,"backtracks = %u\n", stats->backtrack);
+	seq_printf(seq,"semantic match passed = %u\n", stats->semantic_match_passed);
+	seq_printf(seq,"semantic match miss = %u\n", stats->semantic_match_miss);
+	seq_printf(seq,"null node hit= %u\n", stats->null_node_hit);
+	seq_printf(seq,"skipped node resize = %u\n\n", stats->resize_node_skipped);
 }
+#endif /*  CONFIG_IP_FIB_TRIE_STATS */
+
 
 static int fib_triestat_seq_show(struct seq_file *seq, void *v)
 {
@@ -2163,12 +2164,18 @@ static int fib_triestat_seq_show(struct seq_file *seq, void *v)
 		seq_printf(seq, "Local:\n");
 		trie_collect_stats(trie_local, stat);
 		trie_show_stats(seq, stat);
+#ifdef CONFIG_IP_FIB_TRIE_STATS
+		trie_show_usage(seq, &trie_local->stats);
+#endif
 	}
 
 	if (trie_main) {
 		seq_printf(seq, "Main:\n");
 		trie_collect_stats(trie_main, stat);
 		trie_show_stats(seq, stat);
+#ifdef CONFIG_IP_FIB_TRIE_STATS
+		trie_show_usage(seq, &trie_main->stats);
+#endif
 	}
 	kfree(stat);
 
-- 
cgit v1.2.3


From c95aaf9af5a1f6dee56d1f2ab4915cd722d608da Mon Sep 17 00:00:00 2001
From: Stephen Hemminger <stephen.hemminger@vyatta.com>
Date: Sat, 12 Jan 2008 21:25:02 -0800
Subject: [IPV4] fib_trie: Fix sparse warnings.

Make FIB TRIE go through sparse checker without warnings.

Signed-off-by: Stephen Hemminger <stephen.hemminger@vyatta.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/fib_trie.c | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index 2075eea7eea..3e94a4dddb0 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -653,7 +653,6 @@ static struct node *resize(struct trie *t, struct tnode *tn)
 
 static struct tnode *inflate(struct trie *t, struct tnode *tn)
 {
-	struct tnode *inode;
 	struct tnode *oldtnode = tn;
 	int olen = tnode_child_length(tn);
 	int i;
@@ -701,6 +700,7 @@ static struct tnode *inflate(struct trie *t, struct tnode *tn)
 	}
 
 	for (i = 0; i < olen; i++) {
+		struct tnode *inode;
 		struct node *node = tnode_get_child(oldtnode, i);
 		struct tnode *left, *right;
 		int size, j;
@@ -1037,8 +1037,7 @@ static struct list_head *fib_insert_node(struct trie *t, u32 key, int plen)
 	/* Case 1: n is a leaf. Compare prefixes */
 
 	if (n != NULL && IS_LEAF(n) && tkey_equals(key, n->key)) {
-		struct leaf *l = (struct leaf *) n;
-
+		l = (struct leaf *) n;
 		li = leaf_info_new(plen);
 
 		if (!li)
@@ -2234,6 +2233,7 @@ static struct node *fib_trie_get_idx(struct fib_trie_iter *iter,
 }
 
 static void *fib_trie_seq_start(struct seq_file *seq, loff_t *pos)
+	__acquires(RCU)
 {
 	struct fib_trie_iter *iter = seq->private;
 	struct fib_table *tb;
@@ -2276,6 +2276,7 @@ static void *fib_trie_seq_next(struct seq_file *seq, void *v, loff_t *pos)
 }
 
 static void fib_trie_seq_stop(struct seq_file *seq, void *v)
+	__releases(RCU)
 {
 	rcu_read_unlock();
 }
-- 
cgit v1.2.3


From b9aed45507b657abab0b88da2c9b509a9dc462b1 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= <ilpo.jarvinen@helsinki.fi>
Date: Sat, 12 Jan 2008 21:26:31 -0800
Subject: [NETFILTER] xt_policy.c: kill some bloat
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

net/netfilter/xt_policy.c:
  policy_mt | -906
 1 function changed, 906 bytes removed, diff: -906

net/netfilter/xt_policy.c:
  match_xfrm_state | +427
 1 function changed, 427 bytes added, diff: +427

net/netfilter/xt_policy.o:
 2 functions changed, 427 bytes added, 906 bytes removed, diff: -479

Alternatively, this could be done by combining identical
parts of the match_policy_in/out()

Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@helsinki.fi>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/netfilter/xt_policy.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/netfilter/xt_policy.c b/net/netfilter/xt_policy.c
index 46ee7e81a7a..45731ca15b7 100644
--- a/net/netfilter/xt_policy.c
+++ b/net/netfilter/xt_policy.c
@@ -33,7 +33,7 @@ xt_addr_cmp(const union xt_policy_addr *a1, const union xt_policy_addr *m,
 	return false;
 }
 
-static inline bool
+static bool
 match_xfrm_state(const struct xfrm_state *x, const struct xt_policy_elem *e,
 		 unsigned short family)
 {
-- 
cgit v1.2.3


From 112d8cfcbf4f5ef0cf669cb5864f1206972076d6 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <dada1@cosmosbay.com>
Date: Sat, 12 Jan 2008 21:27:41 -0800
Subject: [FIB]: Reduce text size of net/ipv4/fib_trie.o

In struct tnode, we use two fields of 5 bits for 'pos' and 'bits'.
Switching to plain 'unsigned char' (8 bits) take the same space
because of compiler alignments, and reduce text size by 435 bytes
on i386.

On i386 :
$ size net/ipv4/fib_trie.o.before_patch net/ipv4/fib_trie.o
    text    data     bss     dec     hex filename
   13714       4      64   13782    35d6 net/ipv4/fib_trie.o.before
   13279       4      64   13347    3423 net/ipv4/fib_trie.o

Signed-off-by: Eric Dumazet <dada1@cosmosbay.com>
Acked-by: Stephen Hemminger <stephen.hemminger@vyatta.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/fib_trie.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index 3e94a4dddb0..350904c233e 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -118,8 +118,8 @@ struct leaf_info {
 struct tnode {
 	t_key key;
 	unsigned long parent;
-	unsigned short pos:5;		/* 2log(KEYLENGTH) bits needed */
-	unsigned short bits:5;		/* 2log(KEYLENGTH) bits needed */
+	unsigned char pos;		/* 2log(KEYLENGTH) bits needed */
+	unsigned char bits;		/* 2log(KEYLENGTH) bits needed */
 	unsigned short full_children;	/* KEYLENGTH bits needed */
 	unsigned short empty_children;	/* KEYLENGTH bits needed */
 	struct rcu_head rcu;
-- 
cgit v1.2.3


From 8519660b98349fb922157fa2f5fb6e49eb17ad38 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= <ilpo.jarvinen@helsinki.fi>
Date: Sat, 12 Jan 2008 21:28:37 -0800
Subject: [NET] core/utils.c: digit2bin is dead static inline
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@helsinki.fi>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/utils.c | 11 -----------
 1 file changed, 11 deletions(-)

(limited to 'net')

diff --git a/net/core/utils.c b/net/core/utils.c
index 34459c4c740..8031eb59054 100644
--- a/net/core/utils.c
+++ b/net/core/utils.c
@@ -91,17 +91,6 @@ EXPORT_SYMBOL(in_aton);
 #define IN6PTON_NULL		0x20000000	/* first/tail */
 #define IN6PTON_UNKNOWN		0x40000000
 
-static inline int digit2bin(char c, int delim)
-{
-	if (c == delim || c == '\0')
-		return IN6PTON_DELIM;
-	if (c == '.')
-		return IN6PTON_DOT;
-	if (c >= '0' && c <= '9')
-		return (IN6PTON_DIGIT | (c - '0'));
-	return IN6PTON_UNKNOWN;
-}
-
 static inline int xdigit2bin(char c, int delim)
 {
 	if (c == delim || c == '\0')
-- 
cgit v1.2.3


From d88c305a03c37a95c4b27e1a0c2e387bb7ce80df Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= <ilpo.jarvinen@helsinki.fi>
Date: Sat, 12 Jan 2008 21:29:14 -0800
Subject: [PKT_SCHED] HTB: htb_classid is dead static inline
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@helsinki.fi>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/sch_htb.c | 4 ----
 1 file changed, 4 deletions(-)

(limited to 'net')

diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c
index 72beb66cec9..6a2352cd9c2 100644
--- a/net/sched/sch_htb.c
+++ b/net/sched/sch_htb.c
@@ -214,10 +214,6 @@ static inline struct htb_class *htb_find(u32 handle, struct Qdisc *sch)
  * then finish and return direct queue.
  */
 #define HTB_DIRECT (struct htb_class*)-1
-static inline u32 htb_classid(struct htb_class *cl)
-{
-	return (cl && cl != HTB_DIRECT) ? cl->classid : TC_H_UNSPEC;
-}
 
 static struct htb_class *htb_classify(struct sk_buff *skb, struct Qdisc *sch,
 				      int *qerr)
-- 
cgit v1.2.3


From 88ebc72f68974965b41ad7e8e441df57a530e386 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Sat, 12 Jan 2008 21:49:01 -0800
Subject: [IPV4] FIB: Include nexthop device indexes in fib_info hashfn.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/fib_semantics.c | 21 ++++++++++++---------
 1 file changed, 12 insertions(+), 9 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index 3ed920b92fb..0e08df4d6f9 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -194,6 +194,15 @@ static __inline__ int nh_comp(const struct fib_info *fi, const struct fib_info *
 	return 0;
 }
 
+static inline unsigned int fib_devindex_hashfn(unsigned int val)
+{
+	unsigned int mask = DEVINDEX_HASHSIZE - 1;
+
+	return (val ^
+		(val >> DEVINDEX_HASHBITS) ^
+		(val >> (DEVINDEX_HASHBITS * 2))) & mask;
+}
+
 static inline unsigned int fib_info_hashfn(const struct fib_info *fi)
 {
 	unsigned int mask = (fib_hash_size - 1);
@@ -202,6 +211,9 @@ static inline unsigned int fib_info_hashfn(const struct fib_info *fi)
 	val ^= fi->fib_protocol;
 	val ^= (__force u32)fi->fib_prefsrc;
 	val ^= fi->fib_priority;
+	for_nexthops(fi) {
+		val ^= fib_devindex_hashfn(nh->nh_oif);
+	} endfor_nexthops(fi)
 
 	return (val ^ (val >> 7) ^ (val >> 12)) & mask;
 }
@@ -232,15 +244,6 @@ static struct fib_info *fib_find_info(const struct fib_info *nfi)
 	return NULL;
 }
 
-static inline unsigned int fib_devindex_hashfn(unsigned int val)
-{
-	unsigned int mask = DEVINDEX_HASHSIZE - 1;
-
-	return (val ^
-		(val >> DEVINDEX_HASHBITS) ^
-		(val >> (DEVINDEX_HASHBITS * 2))) & mask;
-}
-
 /* Check, that the gateway is already configured.
    Used only by redirect accept routine.
  */
-- 
cgit v1.2.3


From 4dde4610c4ab54e9d36a4afaa98c23b017f7f9e3 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <dada1@cosmosbay.com>
Date: Sun, 13 Jan 2008 00:43:22 -0800
Subject: [IPV4] fib_trie: removes a memset() call in tnode_new()

tnode_alloc() already clears allocated memory, using kcalloc() or
alloc_pages(GFP_KERNEL|__GFP_ZERO, ...)

Signed-off-by: Eric Dumazet <dada1@cosmosbay.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/fib_trie.c | 1 -
 1 file changed, 1 deletion(-)

(limited to 'net')

diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index 350904c233e..da6681ddc50 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -391,7 +391,6 @@ static struct tnode* tnode_new(t_key key, int pos, int bits)
 	struct tnode *tn = tnode_alloc(sz);
 
 	if (tn) {
-		memset(tn, 0, sz);
 		tn->parent = T_TNODE;
 		tn->pos = pos;
 		tn->bits = bits;
-- 
cgit v1.2.3


From 6bf1574ee33270e7c0b9d43103e8cedffd9f74db Mon Sep 17 00:00:00 2001
From: Eric Dumazet <dada1@cosmosbay.com>
Date: Sun, 13 Jan 2008 22:27:52 -0800
Subject: [X25]: Avoid divides and sparse warnings

   CHECK   net/x25/af_x25.c
net/x25/af_x25.c:117:46: warning: expensive signed divide
   CHECK   net/x25/x25_facilities.c
net/x25/x25_facilities.c:209:30: warning: expensive signed divide
   CHECK   net/x25/x25_in.c
net/x25/x25_in.c:250:26: warning: expensive signed divide
   CHECK   net/x25/x25_proc.c
net/x25/x25_proc.c:48:11: warning: context imbalance in 'x25_seq_route_start'
- wrong count at exit
net/x25/x25_proc.c:72:13: warning: context imbalance in 'x25_seq_route_stop' -
unexpected unlock
net/x25/x25_proc.c:112:11: warning: context imbalance in
'x25_seq_socket_start' - wrong count at exit
net/x25/x25_proc.c:129:13: warning: context imbalance in 'x25_seq_socket_stop'
- unexpected unlock
net/x25/x25_proc.c:190:11: warning: context imbalance in
'x25_seq_forward_start' - wrong count at exit
net/x25/x25_proc.c:215:13: warning: context imbalance in
'x25_seq_forward_stop' - unexpected unlock
   CHECK   net/x25/x25_subr.c
net/x25/x25_subr.c:362:57: warning: expensive signed divide

Signed-off-by: Eric Dumazet <dada1@cosmosbay.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/x25/af_x25.c         | 4 ++--
 net/x25/x25_facilities.c | 4 +---
 net/x25/x25_in.c         | 2 +-
 net/x25/x25_proc.c       | 6 ++++++
 net/x25/x25_subr.c       | 2 +-
 5 files changed, 11 insertions(+), 7 deletions(-)

(limited to 'net')

diff --git a/net/x25/af_x25.c b/net/x25/af_x25.c
index 92cfe8e3e0b..07fad7ccf83 100644
--- a/net/x25/af_x25.c
+++ b/net/x25/af_x25.c
@@ -83,9 +83,9 @@ struct compat_x25_subscrip_struct {
 int x25_addr_ntoa(unsigned char *p, struct x25_address *called_addr,
 		  struct x25_address *calling_addr)
 {
-	int called_len, calling_len;
+	unsigned int called_len, calling_len;
 	char *called, *calling;
-	int i;
+	unsigned int i;
 
 	called_len  = (*p >> 0) & 0x0F;
 	calling_len = (*p >> 4) & 0x0F;
diff --git a/net/x25/x25_facilities.c b/net/x25/x25_facilities.c
index dec404afa11..a21f6646eb3 100644
--- a/net/x25/x25_facilities.c
+++ b/net/x25/x25_facilities.c
@@ -205,9 +205,7 @@ int x25_create_facilities(unsigned char *buffer,
 	}
 
 	if (dte_facs->calling_len && (facil_mask & X25_MASK_CALLING_AE)) {
-		unsigned bytecount = (dte_facs->calling_len % 2) ?
-					dte_facs->calling_len / 2 + 1 :
-					dte_facs->calling_len / 2;
+		unsigned bytecount = (dte_facs->calling_len + 1) >> 1;
 		*p++ = X25_FAC_CALLING_AE;
 		*p++ = 1 + bytecount;
 		*p++ = dte_facs->calling_len;
diff --git a/net/x25/x25_in.c b/net/x25/x25_in.c
index 1c88762c279..7d7c3abf38b 100644
--- a/net/x25/x25_in.c
+++ b/net/x25/x25_in.c
@@ -247,7 +247,7 @@ static int x25_state3_machine(struct sock *sk, struct sk_buff *skb, int frametyp
 					break;
 				}
 				if (atomic_read(&sk->sk_rmem_alloc) >
-				    (sk->sk_rcvbuf / 2))
+				    (sk->sk_rcvbuf >> 1))
 					x25->condition |= X25_COND_OWN_RX_BUSY;
 			}
 			/*
diff --git a/net/x25/x25_proc.c b/net/x25/x25_proc.c
index 7d55e50c936..78b05342606 100644
--- a/net/x25/x25_proc.c
+++ b/net/x25/x25_proc.c
@@ -41,6 +41,7 @@ found:
 }
 
 static void *x25_seq_route_start(struct seq_file *seq, loff_t *pos)
+	__acquires(x25_route_list_lock)
 {
 	loff_t l = *pos;
 
@@ -70,6 +71,7 @@ out:
 }
 
 static void x25_seq_route_stop(struct seq_file *seq, void *v)
+	__releases(x25_route_list_lock)
 {
 	read_unlock_bh(&x25_route_list_lock);
 }
@@ -105,6 +107,7 @@ found:
 }
 
 static void *x25_seq_socket_start(struct seq_file *seq, loff_t *pos)
+	__acquires(x25_list_lock)
 {
 	loff_t l = *pos;
 
@@ -127,6 +130,7 @@ out:
 }
 
 static void x25_seq_socket_stop(struct seq_file *seq, void *v)
+	__releases(x25_list_lock)
 {
 	read_unlock_bh(&x25_list_lock);
 }
@@ -183,6 +187,7 @@ found:
 }
 
 static void *x25_seq_forward_start(struct seq_file *seq, loff_t *pos)
+	__acquires(x25_forward_list_lock)
 {
 	loff_t l = *pos;
 
@@ -213,6 +218,7 @@ out:
 }
 
 static void x25_seq_forward_stop(struct seq_file *seq, void *v)
+	__releases(x25_forward_list_lock)
 {
 	read_unlock_bh(&x25_forward_list_lock);
 }
diff --git a/net/x25/x25_subr.c b/net/x25/x25_subr.c
index 8d6220aa5d0..511a5986af3 100644
--- a/net/x25/x25_subr.c
+++ b/net/x25/x25_subr.c
@@ -359,7 +359,7 @@ void x25_check_rbuf(struct sock *sk)
 {
 	struct x25_sock *x25 = x25_sk(sk);
 
-	if (atomic_read(&sk->sk_rmem_alloc) < (sk->sk_rcvbuf / 2) &&
+	if (atomic_read(&sk->sk_rmem_alloc) < (sk->sk_rcvbuf >> 1) &&
 	    (x25->condition & X25_COND_OWN_RX_BUSY)) {
 		x25->condition &= ~X25_COND_OWN_RX_BUSY;
 		x25->condition &= ~X25_COND_ACK_PENDING;
-- 
cgit v1.2.3


From f16f3026db6fa63cbb0f4a37833562aa999c93e5 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <dada1@cosmosbay.com>
Date: Sun, 13 Jan 2008 22:29:41 -0800
Subject: [AX25]: sparse cleanups

net/ax25/ax25_route.c:251:13: warning: context imbalance in
'ax25_rt_seq_start' - wrong count at exit
net/ax25/ax25_route.c:276:13: warning: context imbalance in 'ax25_rt_seq_stop'
- unexpected unlock
net/ax25/ax25_std_timer.c:65:25: warning: expensive signed divide
net/ax25/ax25_uid.c:46:1: warning: symbol 'ax25_uid_list' was not declared.
Should it be static?
net/ax25/ax25_uid.c:146:13: warning: context imbalance in 'ax25_uid_seq_start'
- wrong count at exit
net/ax25/ax25_uid.c:169:13: warning: context imbalance in 'ax25_uid_seq_stop'
- unexpected unlock
net/ax25/af_ax25.c:573:28: warning: expensive signed divide
net/ax25/af_ax25.c:1865:13: warning: context imbalance in 'ax25_info_start' -
wrong count at exit
net/ax25/af_ax25.c:1888:13: warning: context imbalance in 'ax25_info_stop' -
unexpected unlock
net/ax25/ax25_ds_timer.c:133:25: warning: expensive signed divide

Signed-off-by: Eric Dumazet <dada1@cosmosbay.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ax25/af_ax25.c        | 4 +++-
 net/ax25/ax25_ds_timer.c  | 2 +-
 net/ax25/ax25_route.c     | 2 ++
 net/ax25/ax25_std_timer.c | 4 ++--
 net/ax25/ax25_uid.c       | 6 ++++--
 5 files changed, 12 insertions(+), 6 deletions(-)

(limited to 'net')

diff --git a/net/ax25/af_ax25.c b/net/ax25/af_ax25.c
index a028d37ba2e..1bc0e85f04a 100644
--- a/net/ax25/af_ax25.c
+++ b/net/ax25/af_ax25.c
@@ -570,7 +570,7 @@ static int ax25_setsockopt(struct socket *sock, int level, int optname,
 			res = -EINVAL;
 			break;
 		}
-		ax25->rtt = (opt * HZ) / 2;
+		ax25->rtt = (opt * HZ) >> 1;
 		ax25->t1  = opt * HZ;
 		break;
 
@@ -1863,6 +1863,7 @@ static int ax25_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
 #ifdef CONFIG_PROC_FS
 
 static void *ax25_info_start(struct seq_file *seq, loff_t *pos)
+	__acquires(ax25_list_lock)
 {
 	struct ax25_cb *ax25;
 	struct hlist_node *node;
@@ -1886,6 +1887,7 @@ static void *ax25_info_next(struct seq_file *seq, void *v, loff_t *pos)
 }
 
 static void ax25_info_stop(struct seq_file *seq, void *v)
+	__releases(ax25_list_lock)
 {
 	spin_unlock_bh(&ax25_list_lock);
 }
diff --git a/net/ax25/ax25_ds_timer.c b/net/ax25/ax25_ds_timer.c
index 4f44185955c..c4e3b025d21 100644
--- a/net/ax25/ax25_ds_timer.c
+++ b/net/ax25/ax25_ds_timer.c
@@ -130,7 +130,7 @@ void ax25_ds_heartbeat_expiry(ax25_cb *ax25)
 		 */
 		if (sk != NULL) {
 			if (atomic_read(&sk->sk_rmem_alloc) <
-			    (sk->sk_rcvbuf / 2) &&
+			    (sk->sk_rcvbuf >> 1) &&
 			    (ax25->condition & AX25_COND_OWN_RX_BUSY)) {
 				ax25->condition &= ~AX25_COND_OWN_RX_BUSY;
 				ax25->condition &= ~AX25_COND_ACK_PENDING;
diff --git a/net/ax25/ax25_route.c b/net/ax25/ax25_route.c
index 9ecf6f1df86..38c7f3087ec 100644
--- a/net/ax25/ax25_route.c
+++ b/net/ax25/ax25_route.c
@@ -249,6 +249,7 @@ int ax25_rt_ioctl(unsigned int cmd, void __user *arg)
 #ifdef CONFIG_PROC_FS
 
 static void *ax25_rt_seq_start(struct seq_file *seq, loff_t *pos)
+	__acquires(ax25_route_lock)
 {
 	struct ax25_route *ax25_rt;
 	int i = 1;
@@ -274,6 +275,7 @@ static void *ax25_rt_seq_next(struct seq_file *seq, void *v, loff_t *pos)
 }
 
 static void ax25_rt_seq_stop(struct seq_file *seq, void *v)
+	__releases(ax25_route_lock)
 {
 	read_unlock(&ax25_route_lock);
 }
diff --git a/net/ax25/ax25_std_timer.c b/net/ax25/ax25_std_timer.c
index f2f6918ac9b..96e4b927325 100644
--- a/net/ax25/ax25_std_timer.c
+++ b/net/ax25/ax25_std_timer.c
@@ -32,7 +32,7 @@
 
 void ax25_std_heartbeat_expiry(ax25_cb *ax25)
 {
-	struct sock *sk=ax25->sk;
+	struct sock *sk = ax25->sk;
 
 	if (sk)
 		bh_lock_sock(sk);
@@ -62,7 +62,7 @@ void ax25_std_heartbeat_expiry(ax25_cb *ax25)
 		 */
 		if (sk != NULL) {
 			if (atomic_read(&sk->sk_rmem_alloc) <
-			    (sk->sk_rcvbuf / 2) &&
+			    (sk->sk_rcvbuf >> 1) &&
 			    (ax25->condition & AX25_COND_OWN_RX_BUSY)) {
 				ax25->condition &= ~AX25_COND_OWN_RX_BUSY;
 				ax25->condition &= ~AX25_COND_ACK_PENDING;
diff --git a/net/ax25/ax25_uid.c b/net/ax25/ax25_uid.c
index ce0b13d4438..5f4eb73fb9d 100644
--- a/net/ax25/ax25_uid.c
+++ b/net/ax25/ax25_uid.c
@@ -43,10 +43,10 @@
  *	Callsign/UID mapper. This is in kernel space for security on multi-amateur machines.
  */
 
-HLIST_HEAD(ax25_uid_list);
+static HLIST_HEAD(ax25_uid_list);
 static DEFINE_RWLOCK(ax25_uid_lock);
 
-int ax25_uid_policy = 0;
+int ax25_uid_policy;
 
 EXPORT_SYMBOL(ax25_uid_policy);
 
@@ -144,6 +144,7 @@ int ax25_uid_ioctl(int cmd, struct sockaddr_ax25 *sax)
 #ifdef CONFIG_PROC_FS
 
 static void *ax25_uid_seq_start(struct seq_file *seq, loff_t *pos)
+	__acquires(ax25_uid_lock)
 {
 	struct ax25_uid_assoc *pt;
 	struct hlist_node *node;
@@ -167,6 +168,7 @@ static void *ax25_uid_seq_next(struct seq_file *seq, void *v, loff_t *pos)
 }
 
 static void ax25_uid_seq_stop(struct seq_file *seq, void *v)
+	__releases(ax25_uid_lock)
 {
 	read_unlock(&ax25_uid_lock);
 }
-- 
cgit v1.2.3


From 8d96544475b236a0f319e492f4828aa8c0801c7f Mon Sep 17 00:00:00 2001
From: Eric Dumazet <dada1@cosmosbay.com>
Date: Sun, 13 Jan 2008 22:31:44 -0800
Subject: [FIB]: full_children & empty_children should be uint, not ushort

If declared as unsigned short, these fields can overflow, and whole
trie logic is broken. I could not make the machine crash, but some
tnode can never be freed.

Note for 64 bit arches : By reordering t_key and parent in [node,
leaf, tnode] structures, we can use 32 bits hole after t_key so that
sizeof(struct tnode) doesnt change after this patch.

Signed-off-by: Eric Dumazet <dada1@cosmosbay.com>
Signed-off-by: Robert Olsson <robert.olsson@its.uu.se>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/fib_trie.c | 25 ++++++++++++-------------
 1 file changed, 12 insertions(+), 13 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index da6681ddc50..18fb73958a4 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -97,13 +97,13 @@ typedef unsigned int t_key;
 #define IS_LEAF(n) (n->parent & T_LEAF)
 
 struct node {
-	t_key key;
 	unsigned long parent;
+	t_key key;
 };
 
 struct leaf {
-	t_key key;
 	unsigned long parent;
+	t_key key;
 	struct hlist_head list;
 	struct rcu_head rcu;
 };
@@ -116,12 +116,12 @@ struct leaf_info {
 };
 
 struct tnode {
-	t_key key;
 	unsigned long parent;
+	t_key key;
 	unsigned char pos;		/* 2log(KEYLENGTH) bits needed */
 	unsigned char bits;		/* 2log(KEYLENGTH) bits needed */
-	unsigned short full_children;	/* KEYLENGTH bits needed */
-	unsigned short empty_children;	/* KEYLENGTH bits needed */
+	unsigned int full_children;	/* KEYLENGTH bits needed */
+	unsigned int empty_children;	/* KEYLENGTH bits needed */
 	struct rcu_head rcu;
 	struct node *child[0];
 };
@@ -329,12 +329,12 @@ static inline void free_leaf_info(struct leaf_info *leaf)
 	call_rcu(&leaf->rcu, __leaf_info_free_rcu);
 }
 
-static struct tnode *tnode_alloc(unsigned int size)
+static struct tnode *tnode_alloc(size_t size)
 {
 	struct page *pages;
 
 	if (size <= PAGE_SIZE)
-		return kcalloc(size, 1, GFP_KERNEL);
+		return kzalloc(size, GFP_KERNEL);
 
 	pages = alloc_pages(GFP_KERNEL|__GFP_ZERO, get_order(size));
 	if (!pages)
@@ -346,8 +346,8 @@ static struct tnode *tnode_alloc(unsigned int size)
 static void __tnode_free_rcu(struct rcu_head *head)
 {
 	struct tnode *tn = container_of(head, struct tnode, rcu);
-	unsigned int size = sizeof(struct tnode) +
-		(1 << tn->bits) * sizeof(struct node *);
+	size_t size = sizeof(struct tnode) +
+		      (sizeof(struct node *) << tn->bits);
 
 	if (size <= PAGE_SIZE)
 		kfree(tn);
@@ -386,8 +386,7 @@ static struct leaf_info *leaf_info_new(int plen)
 
 static struct tnode* tnode_new(t_key key, int pos, int bits)
 {
-	int nchildren = 1<<bits;
-	int sz = sizeof(struct tnode) + nchildren * sizeof(struct node *);
+	size_t sz = sizeof(struct tnode) + (sizeof(struct node *) << bits);
 	struct tnode *tn = tnode_alloc(sz);
 
 	if (tn) {
@@ -399,8 +398,8 @@ static struct tnode* tnode_new(t_key key, int pos, int bits)
 		tn->empty_children = 1<<bits;
 	}
 
-	pr_debug("AT %p s=%u %u\n", tn, (unsigned int) sizeof(struct tnode),
-		 (unsigned int) (sizeof(struct node) * 1<<bits));
+	pr_debug("AT %p s=%u %lu\n", tn, (unsigned int) sizeof(struct tnode),
+		 (unsigned long) (sizeof(struct node) << bits));
 	return tn;
 }
 
-- 
cgit v1.2.3


From be185884b31093555dc10aa32efe0b73c835312e Mon Sep 17 00:00:00 2001
From: Pavel Emelyanov <xemul@openvz.org>
Date: Mon, 14 Jan 2008 05:35:31 -0800
Subject: [NETNS][RAW]: Make ipv[46] raw sockets lookup namespaces aware.

This requires just to pass the appropriate struct net pointer
into __raw_v[46]_lookup and skip sockets that do not belong
to a needed namespace.

The proper net is get from skb->dev in all the cases.

Signed-off-by: Pavel Emelyanov <xemul@openvz.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/raw.c | 21 +++++++++++++--------
 net/ipv6/raw.c | 18 +++++++++++++-----
 2 files changed, 26 insertions(+), 13 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c
index 747911a8241..a490a9d5471 100644
--- a/net/ipv4/raw.c
+++ b/net/ipv4/raw.c
@@ -116,16 +116,15 @@ static void raw_v4_unhash(struct sock *sk)
 	raw_unhash_sk(sk, &raw_v4_hashinfo);
 }
 
-static struct sock *__raw_v4_lookup(struct sock *sk, unsigned short num,
-			     __be32 raddr, __be32 laddr,
-			     int dif)
+static struct sock *__raw_v4_lookup(struct net *net, struct sock *sk,
+		unsigned short num, __be32 raddr, __be32 laddr, int dif)
 {
 	struct hlist_node *node;
 
 	sk_for_each_from(sk, node) {
 		struct inet_sock *inet = inet_sk(sk);
 
-		if (inet->num == num 					&&
+		if (sk->sk_net == net && inet->num == num 		&&
 		    !(inet->daddr && inet->daddr != raddr) 		&&
 		    !(inet->rcv_saddr && inet->rcv_saddr != laddr)	&&
 		    !(sk->sk_bound_dev_if && sk->sk_bound_dev_if != dif))
@@ -169,12 +168,15 @@ static int raw_v4_input(struct sk_buff *skb, struct iphdr *iph, int hash)
 	struct sock *sk;
 	struct hlist_head *head;
 	int delivered = 0;
+	struct net *net;
 
 	read_lock(&raw_v4_hashinfo.lock);
 	head = &raw_v4_hashinfo.ht[hash];
 	if (hlist_empty(head))
 		goto out;
-	sk = __raw_v4_lookup(__sk_head(head), iph->protocol,
+
+	net = skb->dev->nd_net;
+	sk = __raw_v4_lookup(net, __sk_head(head), iph->protocol,
 			     iph->saddr, iph->daddr,
 			     skb->dev->ifindex);
 
@@ -187,7 +189,7 @@ static int raw_v4_input(struct sk_buff *skb, struct iphdr *iph, int hash)
 			if (clone)
 				raw_rcv(sk, clone);
 		}
-		sk = __raw_v4_lookup(sk_next(sk), iph->protocol,
+		sk = __raw_v4_lookup(net, sk_next(sk), iph->protocol,
 				     iph->saddr, iph->daddr,
 				     skb->dev->ifindex);
 	}
@@ -273,6 +275,7 @@ void raw_icmp_error(struct sk_buff *skb, int protocol, u32 info)
 	int hash;
 	struct sock *raw_sk;
 	struct iphdr *iph;
+	struct net *net;
 
 	hash = protocol & (RAW_HTABLE_SIZE - 1);
 
@@ -280,8 +283,10 @@ void raw_icmp_error(struct sk_buff *skb, int protocol, u32 info)
 	raw_sk = sk_head(&raw_v4_hashinfo.ht[hash]);
 	if (raw_sk != NULL) {
 		iph = (struct iphdr *)skb->data;
-		while ((raw_sk = __raw_v4_lookup(raw_sk, protocol, iph->daddr,
-						iph->saddr,
+		net = skb->dev->nd_net;
+
+		while ((raw_sk = __raw_v4_lookup(net, raw_sk, protocol,
+						iph->daddr, iph->saddr,
 						skb->dev->ifindex)) != NULL) {
 			raw_err(raw_sk, skb, info);
 			raw_sk = sk_next(raw_sk);
diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c
index cb0b110a2ac..6f20086064b 100644
--- a/net/ipv6/raw.c
+++ b/net/ipv6/raw.c
@@ -76,8 +76,9 @@ static void raw_v6_unhash(struct sock *sk)
 }
 
 
-static struct sock *__raw_v6_lookup(struct sock *sk, unsigned short num,
-		struct in6_addr *loc_addr, struct in6_addr *rmt_addr, int dif)
+static struct sock *__raw_v6_lookup(struct net *net, struct sock *sk,
+		unsigned short num, struct in6_addr *loc_addr,
+		struct in6_addr *rmt_addr, int dif)
 {
 	struct hlist_node *node;
 	int is_multicast = ipv6_addr_is_multicast(loc_addr);
@@ -86,6 +87,9 @@ static struct sock *__raw_v6_lookup(struct sock *sk, unsigned short num,
 		if (inet_sk(sk)->num == num) {
 			struct ipv6_pinfo *np = inet6_sk(sk);
 
+			if (sk->sk_net != net)
+				continue;
+
 			if (!ipv6_addr_any(&np->daddr) &&
 			    !ipv6_addr_equal(&np->daddr, rmt_addr))
 				continue;
@@ -165,6 +169,7 @@ static int ipv6_raw_deliver(struct sk_buff *skb, int nexthdr)
 	struct sock *sk;
 	int delivered = 0;
 	__u8 hash;
+	struct net *net;
 
 	saddr = &ipv6_hdr(skb)->saddr;
 	daddr = saddr + 1;
@@ -182,7 +187,8 @@ static int ipv6_raw_deliver(struct sk_buff *skb, int nexthdr)
 	if (sk == NULL)
 		goto out;
 
-	sk = __raw_v6_lookup(sk, nexthdr, daddr, saddr, IP6CB(skb)->iif);
+	net = skb->dev->nd_net;
+	sk = __raw_v6_lookup(net, sk, nexthdr, daddr, saddr, IP6CB(skb)->iif);
 
 	while (sk) {
 		int filtered;
@@ -225,7 +231,7 @@ static int ipv6_raw_deliver(struct sk_buff *skb, int nexthdr)
 				rawv6_rcv(sk, clone);
 			}
 		}
-		sk = __raw_v6_lookup(sk_next(sk), nexthdr, daddr, saddr,
+		sk = __raw_v6_lookup(net, sk_next(sk), nexthdr, daddr, saddr,
 				     IP6CB(skb)->iif);
 	}
 out:
@@ -359,6 +365,7 @@ void raw6_icmp_error(struct sk_buff *skb, int nexthdr,
 	struct sock *sk;
 	int hash;
 	struct in6_addr *saddr, *daddr;
+	struct net *net;
 
 	hash = nexthdr & (RAW_HTABLE_SIZE - 1);
 
@@ -367,8 +374,9 @@ void raw6_icmp_error(struct sk_buff *skb, int nexthdr,
 	if (sk != NULL) {
 		saddr = &ipv6_hdr(skb)->saddr;
 		daddr = &ipv6_hdr(skb)->daddr;
+		net = skb->dev->nd_net;
 
-		while ((sk = __raw_v6_lookup(sk, nexthdr, saddr, daddr,
+		while ((sk = __raw_v6_lookup(net, sk, nexthdr, saddr, daddr,
 						IP6CB(skb)->iif))) {
 			rawv6_err(sk, skb, NULL, type, code,
 					inner_offset, info);
-- 
cgit v1.2.3


From f51d599fbecf4881a36466f0355da6b0b346ea70 Mon Sep 17 00:00:00 2001
From: Pavel Emelyanov <xemul@openvz.org>
Date: Mon, 14 Jan 2008 05:35:57 -0800
Subject: [NETNS][RAW]: Make /proc/net/raw(6) show per-namespace socket list.

Pull the struct net pointer up to the showing functions
to filter the sockets depending on their namespaces.

Signed-off-by: Pavel Emelyanov <xemul@openvz.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/raw.c | 20 ++++++++++++--------
 net/ipv6/raw.c |  4 ++--
 2 files changed, 14 insertions(+), 10 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c
index a490a9d5471..4e95372a78e 100644
--- a/net/ipv4/raw.c
+++ b/net/ipv4/raw.c
@@ -860,7 +860,8 @@ static struct sock *raw_get_first(struct seq_file *seq)
 		struct hlist_node *node;
 
 		sk_for_each(sk, node, &state->h->ht[state->bucket])
-			if (sk->sk_family == state->family)
+			if (sk->sk_net == state->p.net &&
+					sk->sk_family == state->family)
 				goto found;
 	}
 	sk = NULL;
@@ -876,7 +877,8 @@ static struct sock *raw_get_next(struct seq_file *seq, struct sock *sk)
 		sk = sk_next(sk);
 try_again:
 		;
-	} while (sk && sk->sk_family != state->family);
+	} while (sk && sk->sk_net != state->p.net &&
+			sk->sk_family != state->family);
 
 	if (!sk && ++state->bucket < RAW_HTABLE_SIZE) {
 		sk = sk_head(&state->h->ht[state->bucket]);
@@ -970,16 +972,18 @@ static const struct seq_operations raw_seq_ops = {
 	.show  = raw_seq_show,
 };
 
-int raw_seq_open(struct file *file, struct raw_hashinfo *h,
+int raw_seq_open(struct inode *ino, struct file *file, struct raw_hashinfo *h,
 		unsigned short family)
 {
+	int err;
 	struct raw_iter_state *i;
 
-	i = __seq_open_private(file, &raw_seq_ops,
+	err = seq_open_net(ino, file, &raw_seq_ops,
 			sizeof(struct raw_iter_state));
-	if (i == NULL)
-		return -ENOMEM;
+	if (err < 0)
+		return err;
 
+	i = raw_seq_private((struct seq_file *)file->private_data);
 	i->h = h;
 	i->family = family;
 	return 0;
@@ -988,7 +992,7 @@ EXPORT_SYMBOL_GPL(raw_seq_open);
 
 static int raw_v4_seq_open(struct inode *inode, struct file *file)
 {
-	return raw_seq_open(file, &raw_v4_hashinfo, PF_INET);
+	return raw_seq_open(inode, file, &raw_v4_hashinfo, PF_INET);
 }
 
 static const struct file_operations raw_seq_fops = {
@@ -996,7 +1000,7 @@ static const struct file_operations raw_seq_fops = {
 	.open	 = raw_v4_seq_open,
 	.read	 = seq_read,
 	.llseek	 = seq_lseek,
-	.release = seq_release_private,
+	.release = seq_release_net,
 };
 
 int __init raw_proc_init(void)
diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c
index 6f20086064b..026fa910cb7 100644
--- a/net/ipv6/raw.c
+++ b/net/ipv6/raw.c
@@ -1259,7 +1259,7 @@ static const struct seq_operations raw6_seq_ops = {
 
 static int raw6_seq_open(struct inode *inode, struct file *file)
 {
-	return raw_seq_open(file, &raw_v6_hashinfo, PF_INET6);
+	return raw_seq_open(inode, file, &raw_v6_hashinfo, PF_INET6);
 }
 
 static const struct file_operations raw6_seq_fops = {
@@ -1267,7 +1267,7 @@ static const struct file_operations raw6_seq_fops = {
 	.open =		raw6_seq_open,
 	.read =		seq_read,
 	.llseek =	seq_lseek,
-	.release =	seq_release_private,
+	.release =	seq_release_net,
 };
 
 int __init raw6_proc_init(void)
-- 
cgit v1.2.3


From e5ba31f11f6cae785e893d5d10abd612fef0b6bc Mon Sep 17 00:00:00 2001
From: Pavel Emelyanov <xemul@openvz.org>
Date: Mon, 14 Jan 2008 05:36:27 -0800
Subject: [NETNS][RAW]: Eliminate explicit init_net references.

Happily, in all the rest places (->bind callbacks only), that require the
struct net, we have a socket, so get the net from it.

Signed-off-by: Pavel Emelyanov <xemul@openvz.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/raw.c | 2 +-
 net/ipv6/raw.c | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c
index 4e95372a78e..206c869db92 100644
--- a/net/ipv4/raw.c
+++ b/net/ipv4/raw.c
@@ -625,7 +625,7 @@ static int raw_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len)
 
 	if (sk->sk_state != TCP_CLOSE || addr_len < sizeof(struct sockaddr_in))
 		goto out;
-	chk_addr_ret = inet_addr_type(&init_net, addr->sin_addr.s_addr);
+	chk_addr_ret = inet_addr_type(sk->sk_net, addr->sin_addr.s_addr);
 	ret = -EADDRNOTAVAIL;
 	if (addr->sin_addr.s_addr && chk_addr_ret != RTN_LOCAL &&
 	    chk_addr_ret != RTN_MULTICAST && chk_addr_ret != RTN_BROADCAST)
diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c
index 026fa910cb7..970529e4754 100644
--- a/net/ipv6/raw.c
+++ b/net/ipv6/raw.c
@@ -291,7 +291,7 @@ static int rawv6_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len)
 			if (!sk->sk_bound_dev_if)
 				goto out;
 
-			dev = dev_get_by_index(&init_net, sk->sk_bound_dev_if);
+			dev = dev_get_by_index(sk->sk_net, sk->sk_bound_dev_if);
 			if (!dev) {
 				err = -ENODEV;
 				goto out;
@@ -304,7 +304,7 @@ static int rawv6_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len)
 		v4addr = LOOPBACK4_IPV6;
 		if (!(addr_type & IPV6_ADDR_MULTICAST))	{
 			err = -EADDRNOTAVAIL;
-			if (!ipv6_chk_addr(&init_net, &addr->sin6_addr,
+			if (!ipv6_chk_addr(sk->sk_net, &addr->sin6_addr,
 					   dev, 0)) {
 				if (dev)
 					dev_put(dev);
-- 
cgit v1.2.3


From a308da1627d711fd0c7542bfe892abc78d65d215 Mon Sep 17 00:00:00 2001
From: Pavel Emelyanov <xemul@openvz.org>
Date: Mon, 14 Jan 2008 05:36:50 -0800
Subject: [NETNS][RAW]: Create the /proc/net/raw(6) in each namespace.

To do so, just register the proper subsystem and create files in
->init callbacks.

No other special per-namespace handling for raw sockets is required.

Signed-off-by: Pavel Emelyanov <xemul@openvz.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/raw.c | 22 +++++++++++++++++++---
 net/ipv6/raw.c | 22 +++++++++++++++++++---
 2 files changed, 38 insertions(+), 6 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c
index 206c869db92..91a52184351 100644
--- a/net/ipv4/raw.c
+++ b/net/ipv4/raw.c
@@ -1003,15 +1003,31 @@ static const struct file_operations raw_seq_fops = {
 	.release = seq_release_net,
 };
 
-int __init raw_proc_init(void)
+static __net_init int raw_init_net(struct net *net)
 {
-	if (!proc_net_fops_create(&init_net, "raw", S_IRUGO, &raw_seq_fops))
+	if (!proc_net_fops_create(net, "raw", S_IRUGO, &raw_seq_fops))
 		return -ENOMEM;
+
 	return 0;
 }
 
+static __net_exit void raw_exit_net(struct net *net)
+{
+	proc_net_remove(net, "raw");
+}
+
+static __net_initdata struct pernet_operations raw_net_ops = {
+	.init = raw_init_net,
+	.exit = raw_exit_net,
+};
+
+int __init raw_proc_init(void)
+{
+	return register_pernet_subsys(&raw_net_ops);
+}
+
 void __init raw_proc_exit(void)
 {
-	proc_net_remove(&init_net, "raw");
+	unregister_pernet_subsys(&raw_net_ops);
 }
 #endif /* CONFIG_PROC_FS */
diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c
index 970529e4754..4d880551fe6 100644
--- a/net/ipv6/raw.c
+++ b/net/ipv6/raw.c
@@ -1270,16 +1270,32 @@ static const struct file_operations raw6_seq_fops = {
 	.release =	seq_release_net,
 };
 
-int __init raw6_proc_init(void)
+static int raw6_init_net(struct net *net)
 {
-	if (!proc_net_fops_create(&init_net, "raw6", S_IRUGO, &raw6_seq_fops))
+	if (!proc_net_fops_create(net, "raw6", S_IRUGO, &raw6_seq_fops))
 		return -ENOMEM;
+
 	return 0;
 }
 
+static void raw6_exit_net(struct net *net)
+{
+	proc_net_remove(net, "raw6");
+}
+
+static struct pernet_operations raw6_net_ops = {
+	.init = raw6_init_net,
+	.exit = raw6_exit_net,
+};
+
+int __init raw6_proc_init(void)
+{
+	return register_pernet_subsys(&raw6_net_ops);
+}
+
 void raw6_proc_exit(void)
 {
-	proc_net_remove(&init_net, "raw6");
+	unregister_pernet_subsys(&raw6_net_ops);
 }
 #endif	/* CONFIG_PROC_FS */
 
-- 
cgit v1.2.3


From a79878f00dad97d03a3e62a48b06227d55ae5fe4 Mon Sep 17 00:00:00 2001
From: "Denis V. Lunev" <den@openvz.org>
Date: Mon, 14 Jan 2008 22:56:01 -0800
Subject: [ARP]: Move inet_addr_type call after simple error checks in
 arp_contructor.

The neighbour entry will be destroyed in the case of error, so it is
pointless to perform constly routing table lookup in this case.

Signed-off-by: Denis V. Lunev <den@openvz.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/arp.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c
index 3f0730ec0a2..d12f31b0c10 100644
--- a/net/ipv4/arp.c
+++ b/net/ipv4/arp.c
@@ -235,8 +235,6 @@ static int arp_constructor(struct neighbour *neigh)
 	struct in_device *in_dev;
 	struct neigh_parms *parms;
 
-	neigh->type = inet_addr_type(&init_net, addr);
-
 	rcu_read_lock();
 	in_dev = __in_dev_get_rcu(dev);
 	if (in_dev == NULL) {
@@ -244,6 +242,8 @@ static int arp_constructor(struct neighbour *neigh)
 		return -EINVAL;
 	}
 
+	neigh->type = inet_addr_type(&init_net, addr);
+
 	parms = in_dev->arp_parms;
 	__neigh_parms_put(neigh->parms);
 	neigh->parms = neigh_parms_clone(parms);
-- 
cgit v1.2.3


From 2db82b534bd52b349f1b2ab3e63aa40ca0e466ab Mon Sep 17 00:00:00 2001
From: "Denis V. Lunev" <den@openvz.org>
Date: Mon, 14 Jan 2008 22:58:55 -0800
Subject: [NETNS]: Make arp code network namespace consistent.

Some calls in the arp.c have network namespace as an argument. Getting
init_net inside these functions is simply inconsistent. Fix this.

Signed-off-by: Denis V. Lunev <den@openvz.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/arp.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c
index d12f31b0c10..357e8987146 100644
--- a/net/ipv4/arp.c
+++ b/net/ipv4/arp.c
@@ -973,13 +973,13 @@ static int arp_req_set_public(struct net *net, struct arpreq *r,
 	if (mask && mask != htonl(0xFFFFFFFF))
 		return -EINVAL;
 	if (!dev && (r->arp_flags & ATF_COM)) {
-		dev = dev_getbyhwaddr(&init_net, r->arp_ha.sa_family,
+		dev = dev_getbyhwaddr(net, r->arp_ha.sa_family,
 				r->arp_ha.sa_data);
 		if (!dev)
 			return -ENODEV;
 	}
 	if (mask) {
-		if (pneigh_lookup(&arp_tbl, &init_net, &ip, dev, 1) == NULL)
+		if (pneigh_lookup(&arp_tbl, net, &ip, dev, 1) == NULL)
 			return -ENOBUFS;
 		return 0;
 	}
@@ -1088,7 +1088,7 @@ static int arp_req_delete_public(struct net *net, struct arpreq *r,
 	__be32 mask = ((struct sockaddr_in *)&r->arp_netmask)->sin_addr.s_addr;
 
 	if (mask == htonl(0xFFFFFFFF))
-		return pneigh_delete(&arp_tbl, &init_net, &ip, dev);
+		return pneigh_delete(&arp_tbl, net, &ip, dev);
 
 	if (mask)
 		return -EINVAL;
@@ -1166,7 +1166,7 @@ int arp_ioctl(struct net *net, unsigned int cmd, void __user *arg)
 	rtnl_lock();
 	if (r.arp_dev[0]) {
 		err = -ENODEV;
-		if ((dev = __dev_get_by_name(&init_net, r.arp_dev)) == NULL)
+		if ((dev = __dev_get_by_name(net, r.arp_dev)) == NULL)
 			goto out;
 
 		/* Mmmm... It is wrong... ARPHRD_NETROM==0 */
-- 
cgit v1.2.3


From 72132c1b6c7a7605cdfde86a9f58b6ca2b2195a6 Mon Sep 17 00:00:00 2001
From: "Denis V. Lunev" <den@openvz.org>
Date: Mon, 14 Jan 2008 22:59:30 -0800
Subject: [IPV4]: fib_rules_unregister is essentially void.

fib_rules_unregister is called only after successful register and the
return code is never checked.

Signed-off-by: Denis V. Lunev <den@openvz.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/fib_rules.c | 21 ++++-----------------
 1 file changed, 4 insertions(+), 17 deletions(-)

(limited to 'net')

diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c
index c5f78fed688..541728aa2ba 100644
--- a/net/core/fib_rules.c
+++ b/net/core/fib_rules.c
@@ -112,29 +112,16 @@ void fib_rules_cleanup_ops(struct fib_rules_ops *ops)
 }
 EXPORT_SYMBOL_GPL(fib_rules_cleanup_ops);
 
-int fib_rules_unregister(struct net *net, struct fib_rules_ops *ops)
+void fib_rules_unregister(struct net *net, struct fib_rules_ops *ops)
 {
-	int err = 0;
-	struct fib_rules_ops *o;
 
 	spin_lock(&net->rules_mod_lock);
-	list_for_each_entry(o, &net->rules_ops, list) {
-		if (o == ops) {
-			list_del_rcu(&o->list);
-			fib_rules_cleanup_ops(ops);
-			goto out;
-		}
-	}
-
-	err = -ENOENT;
-out:
+	list_del_rcu(&ops->list);
+	fib_rules_cleanup_ops(ops);
 	spin_unlock(&net->rules_mod_lock);
 
 	synchronize_rcu();
-	if (!err)
-		release_net(net);
-
-	return err;
+	release_net(net);
 }
 
 EXPORT_SYMBOL_GPL(fib_rules_unregister);
-- 
cgit v1.2.3


From 486b51d3706c5493b6c50992eaaafc44e628a7ed Mon Sep 17 00:00:00 2001
From: "Denis V. Lunev" <den@openvz.org>
Date: Mon, 14 Jan 2008 22:59:59 -0800
Subject: [ARP]: Remove overkill checks from neigh_param_alloc.

Valid network device is always passed into neigh_param_alloc, so
remove extra checking for dev == NULL. Additionally, cleanup bogus
netns assignment.

Signed-off-by: Denis V. Lunev <den@openvz.org>
Signed-off-by: Pavel Emelyanov <xemul@openvz.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/neighbour.c | 18 +++++++-----------
 1 file changed, 7 insertions(+), 11 deletions(-)

(limited to 'net')

diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index 19c0dd12fd5..62d47841c67 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -1291,10 +1291,7 @@ struct neigh_parms *neigh_parms_alloc(struct net_device *dev,
 	struct neigh_parms *p, *ref;
 	struct net *net;
 
-	net = &init_net;
-	if (dev)
-		net = dev->nd_net;
-
+	net = dev->nd_net;
 	ref = lookup_neigh_params(tbl, net, 0);
 	if (!ref)
 		return NULL;
@@ -1306,15 +1303,14 @@ struct neigh_parms *neigh_parms_alloc(struct net_device *dev,
 		INIT_RCU_HEAD(&p->rcu_head);
 		p->reachable_time =
 				neigh_rand_reach_time(p->base_reachable_time);
-		if (dev) {
-			if (dev->neigh_setup && dev->neigh_setup(dev, p)) {
-				kfree(p);
-				return NULL;
-			}
 
-			dev_hold(dev);
-			p->dev = dev;
+		if (dev->neigh_setup && dev->neigh_setup(dev, p)) {
+			kfree(p);
+			return NULL;
 		}
+
+		dev_hold(dev);
+		p->dev = dev;
 		p->net = hold_net(net);
 		p->sysctl_table = NULL;
 		write_lock_bh(&tbl->lock);
-- 
cgit v1.2.3


From 14db4133d59e2c1bed122bf87393e2ded05e42dc Mon Sep 17 00:00:00 2001
From: "Denis V. Lunev" <den@openvz.org>
Date: Mon, 14 Jan 2008 23:00:22 -0800
Subject: [ARP]: Remove forward declaration of neigh_changeaddr.

No need for this. It is declared in the neighbour.h

Signed-off-by: Denis V. Lunev <den@openvz.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/neighbour.c | 1 -
 1 file changed, 1 deletion(-)

(limited to 'net')

diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index 62d47841c67..a8be5ef1333 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -59,7 +59,6 @@ static void neigh_timer_handler(unsigned long arg);
 static void __neigh_notify(struct neighbour *n, int type, int flags);
 static void neigh_update_notify(struct neighbour *neigh);
 static int pneigh_ifdown(struct neigh_table *tbl, struct net_device *dev);
-void neigh_changeaddr(struct neigh_table *tbl, struct net_device *dev);
 
 static struct neigh_table *neigh_tables;
 #ifdef CONFIG_PROC_FS
-- 
cgit v1.2.3


From 06f0511df1b3b32fc8e0840514d4b207150f1fa7 Mon Sep 17 00:00:00 2001
From: "Denis V. Lunev" <den@openvz.org>
Date: Thu, 24 Jan 2008 00:30:58 -0800
Subject: [ARP]: neigh_parms_put(destroy) are essentially local to
 core/neighbour.c.

Make them static.

[ Moved the inline before, instead of after, call sites. -DaveM ]

Signed-off-by: Denis V. Lunev <den@openvz.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/neighbour.c | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index a8be5ef1333..a16cf1ec5e5 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -577,6 +577,13 @@ static int pneigh_ifdown(struct neigh_table *tbl, struct net_device *dev)
 	return -ENOENT;
 }
 
+static void neigh_parms_destroy(struct neigh_parms *parms);
+
+static inline void neigh_parms_put(struct neigh_parms *parms)
+{
+	if (atomic_dec_and_test(&parms->refcnt))
+		neigh_parms_destroy(parms);
+}
 
 /*
  *	neighbour must already be out of the table;
@@ -1350,7 +1357,7 @@ void neigh_parms_release(struct neigh_table *tbl, struct neigh_parms *parms)
 	NEIGH_PRINTK1("neigh_parms_release: not found\n");
 }
 
-void neigh_parms_destroy(struct neigh_parms *parms)
+static void neigh_parms_destroy(struct neigh_parms *parms)
 {
 	release_net(parms->net);
 	kfree(parms);
-- 
cgit v1.2.3


From 9bd85e32644d4d3744117b0a196ad4382f8acf35 Mon Sep 17 00:00:00 2001
From: "Denis V. Lunev" <den@openvz.org>
Date: Mon, 14 Jan 2008 23:05:55 -0800
Subject: [IPV4]: Remove extra argument from arp_ignore.

arp_ignore has two arguments: dev & in_dev. dev is used for
inet_confirm_addr calling only.

inet_confirm_addr, in turn, either gets in_dev from the device passed
or iterates over all network devices if the device passed is NULL. It
seems logical to directly pass in_dev into inet_confirm_addr.

Signed-off-by: Denis V. Lunev <den@openvz.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/arp.c     | 11 +++++------
 net/ipv4/devinet.c | 17 ++++++-----------
 2 files changed, 11 insertions(+), 17 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c
index 357e8987146..6f0827b2b15 100644
--- a/net/ipv4/arp.c
+++ b/net/ipv4/arp.c
@@ -382,8 +382,7 @@ static void arp_solicit(struct neighbour *neigh, struct sk_buff *skb)
 		read_unlock_bh(&neigh->lock);
 }
 
-static int arp_ignore(struct in_device *in_dev, struct net_device *dev,
-		      __be32 sip, __be32 tip)
+static int arp_ignore(struct in_device *in_dev, __be32 sip, __be32 tip)
 {
 	int scope;
 
@@ -403,7 +402,7 @@ static int arp_ignore(struct in_device *in_dev, struct net_device *dev,
 	case 3:	/* Do not reply for scope host addresses */
 		sip = 0;
 		scope = RT_SCOPE_LINK;
-		dev = NULL;
+		in_dev = NULL;
 		break;
 	case 4:	/* Reserved */
 	case 5:
@@ -415,7 +414,7 @@ static int arp_ignore(struct in_device *in_dev, struct net_device *dev,
 	default:
 		return 0;
 	}
-	return !inet_confirm_addr(dev, sip, tip, scope);
+	return !inet_confirm_addr(in_dev, sip, tip, scope);
 }
 
 static int arp_filter(__be32 sip, __be32 tip, struct net_device *dev)
@@ -807,7 +806,7 @@ static int arp_process(struct sk_buff *skb)
 	if (sip == 0) {
 		if (arp->ar_op == htons(ARPOP_REQUEST) &&
 		    inet_addr_type(&init_net, tip) == RTN_LOCAL &&
-		    !arp_ignore(in_dev,dev,sip,tip))
+		    !arp_ignore(in_dev, sip, tip))
 			arp_send(ARPOP_REPLY, ETH_P_ARP, sip, dev, tip, sha,
 				 dev->dev_addr, sha);
 		goto out;
@@ -825,7 +824,7 @@ static int arp_process(struct sk_buff *skb)
 				int dont_send = 0;
 
 				if (!dont_send)
-					dont_send |= arp_ignore(in_dev,dev,sip,tip);
+					dont_send |= arp_ignore(in_dev,sip,tip);
 				if (!dont_send && IN_DEV_ARPFILTER(in_dev))
 					dont_send |= arp_filter(sip,tip,dev);
 				if (!dont_send)
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index 03db15b1030..dc1665a2b07 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -968,24 +968,19 @@ static __be32 confirm_addr_indev(struct in_device *in_dev, __be32 dst,
 
 /*
  * Confirm that local IP address exists using wildcards:
- * - dev: only on this interface, 0=any interface
+ * - in_dev: only on this interface, 0=any interface
  * - dst: only in the same subnet as dst, 0=any dst
  * - local: address, 0=autoselect the local address
  * - scope: maximum allowed scope value for the local address
  */
-__be32 inet_confirm_addr(const struct net_device *dev, __be32 dst, __be32 local, int scope)
+__be32 inet_confirm_addr(struct in_device *in_dev,
+			 __be32 dst, __be32 local, int scope)
 {
 	__be32 addr = 0;
-	struct in_device *in_dev;
-
-	if (dev) {
-		rcu_read_lock();
-		if ((in_dev = __in_dev_get_rcu(dev)))
-			addr = confirm_addr_indev(in_dev, dst, local, scope);
-		rcu_read_unlock();
+	struct net_device *dev;
 
-		return addr;
-	}
+	if (in_dev != NULL)
+		return confirm_addr_indev(in_dev, dst, local, scope);
 
 	read_lock(&dev_base_lock);
 	rcu_read_lock();
-- 
cgit v1.2.3


From 39a6d06300128d32f361f4f790beba0ca83730eb Mon Sep 17 00:00:00 2001
From: "Denis V. Lunev" <den@openvz.org>
Date: Mon, 14 Jan 2008 23:06:19 -0800
Subject: [NETNS]: Process inet_confirm_addr in the correct namespace.

inet_confirm_addr can be called with NULL in_dev from arp_ignore iff
scope is RT_SCOPE_LINK.

Lets always pass the device and check for RT_SCOPE_LINK scope inside
inet_confirm_addr. This let us take network namespace from in_device a
need for an additional argument.

Signed-off-by: Denis V. Lunev <den@openvz.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/arp.c     | 1 -
 net/ipv4/devinet.c | 6 ++++--
 2 files changed, 4 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c
index 6f0827b2b15..b2c19cb1206 100644
--- a/net/ipv4/arp.c
+++ b/net/ipv4/arp.c
@@ -402,7 +402,6 @@ static int arp_ignore(struct in_device *in_dev, __be32 sip, __be32 tip)
 	case 3:	/* Do not reply for scope host addresses */
 		sip = 0;
 		scope = RT_SCOPE_LINK;
-		in_dev = NULL;
 		break;
 	case 4:	/* Reserved */
 	case 5:
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index dc1665a2b07..e381edb19b2 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -978,13 +978,15 @@ __be32 inet_confirm_addr(struct in_device *in_dev,
 {
 	__be32 addr = 0;
 	struct net_device *dev;
+	struct net *net;
 
-	if (in_dev != NULL)
+	if (scope != RT_SCOPE_LINK)
 		return confirm_addr_indev(in_dev, dst, local, scope);
 
+	net = in_dev->dev->nd_net;
 	read_lock(&dev_base_lock);
 	rcu_read_lock();
-	for_each_netdev(&init_net, dev) {
+	for_each_netdev(net, dev) {
 		if ((in_dev = __in_dev_get_rcu(dev))) {
 			addr = confirm_addr_indev(in_dev, dst, local, scope);
 			if (addr)
-- 
cgit v1.2.3


From 28d36e3702fcbed73c38e877bcf2a8f8946b7f3d Mon Sep 17 00:00:00 2001
From: Eric Dumazet <dada1@cosmosbay.com>
Date: Mon, 14 Jan 2008 23:09:56 -0800
Subject: [FIB]: Avoid using static variables without proper locking

fib_trie_seq_show() uses two helper functions, rtn_scope() and
rtn_type() that can write to static storage without locking.

Just pass to them a temporary buffer to avoid potential corruption
(probably not triggerable but still...)

Signed-off-by: Eric Dumazet <dada1@cosmosbay.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/fib_trie.c | 22 ++++++++++++----------
 1 file changed, 12 insertions(+), 10 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index 18fb73958a4..72c78c2209d 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -2284,10 +2284,8 @@ static void seq_indent(struct seq_file *seq, int n)
 	while (n-- > 0) seq_puts(seq, "   ");
 }
 
-static inline const char *rtn_scope(enum rt_scope_t s)
+static inline const char *rtn_scope(char *buf, size_t len, enum rt_scope_t s)
 {
-	static char buf[32];
-
 	switch (s) {
 	case RT_SCOPE_UNIVERSE: return "universe";
 	case RT_SCOPE_SITE:	return "site";
@@ -2295,7 +2293,7 @@ static inline const char *rtn_scope(enum rt_scope_t s)
 	case RT_SCOPE_HOST:	return "host";
 	case RT_SCOPE_NOWHERE:	return "nowhere";
 	default:
-		snprintf(buf, sizeof(buf), "scope=%d", s);
+		snprintf(buf, len, "scope=%d", s);
 		return buf;
 	}
 }
@@ -2315,13 +2313,11 @@ static const char *rtn_type_names[__RTN_MAX] = {
 	[RTN_XRESOLVE] = "XRESOLVE",
 };
 
-static inline const char *rtn_type(unsigned t)
+static inline const char *rtn_type(char *buf, size_t len, unsigned t)
 {
-	static char buf[32];
-
 	if (t < __RTN_MAX && rtn_type_names[t])
 		return rtn_type_names[t];
-	snprintf(buf, sizeof(buf), "type %u", t);
+	snprintf(buf, len, "type %u", t);
 	return buf;
 }
 
@@ -2359,13 +2355,19 @@ static int fib_trie_seq_show(struct seq_file *seq, void *v)
 		seq_printf(seq, "  |-- %d.%d.%d.%d\n", NIPQUAD(val));
 		for (i = 32; i >= 0; i--) {
 			struct leaf_info *li = find_leaf_info(l, i);
+
 			if (li) {
 				struct fib_alias *fa;
+
 				list_for_each_entry_rcu(fa, &li->falh, fa_list) {
+					char buf1[32], buf2[32];
+
 					seq_indent(seq, iter->depth+1);
 					seq_printf(seq, "  /%d %s %s", i,
-						   rtn_scope(fa->fa_scope),
-						   rtn_type(fa->fa_type));
+						   rtn_scope(buf1, sizeof(buf1),
+							     fa->fa_scope),
+						   rtn_type(buf2, sizeof(buf2),
+							     fa->fa_type));
 					if (fa->fa_tos)
 						seq_printf(seq, "tos =%d\n",
 							   fa->fa_tos);
-- 
cgit v1.2.3


From d717a9a62049a03e85c3c2dd3399416eeb34a8be Mon Sep 17 00:00:00 2001
From: Stephen Hemminger <stephen.hemminger@vyatta.com>
Date: Mon, 14 Jan 2008 23:11:54 -0800
Subject: [IPV4] fib_trie: size and statistics

Show number of entries in trie, the size field was being set but never used,
but it only counted leaves, not all entries. Refactor the two cases in
fib_triestat_seq_show into a single routine.

Note: the stat structure was being malloc'd but the stack usage isn't so
high (288 bytes) that it is worth the additional complexity.

Signed-off-by: Stephen Hemminger <stephen.hemminger@vyatta.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/fib_trie.c | 55 +++++++++++++++++++----------------------------------
 1 file changed, 20 insertions(+), 35 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index 72c78c2209d..a15cb0d2e11 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -148,10 +148,10 @@ struct trie_stat {
 
 struct trie {
 	struct node *trie;
+	unsigned int size;
 #ifdef CONFIG_IP_FIB_TRIE_STATS
 	struct trie_use_stats stats;
 #endif
-	int size;
 };
 
 static void put_child(struct trie *t, struct tnode *tn, int i, struct node *n);
@@ -1045,7 +1045,6 @@ static struct list_head *fib_insert_node(struct trie *t, u32 key, int plen)
 		insert_leaf_info(&l->list, li);
 		goto done;
 	}
-	t->size++;
 	l = leaf_new();
 
 	if (!l)
@@ -1261,6 +1260,8 @@ static int fn_trie_insert(struct fib_table *tb, struct fib_config *cfg)
 	list_add_tail_rcu(&new_fa->fa_list,
 			  (fa ? &fa->fa_list : fa_head));
 
+	t->size++;
+
 	rt_cache_flush(-1);
 	rtmsg_fib(RTM_NEWROUTE, htonl(key), new_fa, plen, tb->tb_id,
 		  &cfg->fc_nlinfo, 0);
@@ -2131,50 +2132,34 @@ static void trie_show_usage(struct seq_file *seq,
 }
 #endif /*  CONFIG_IP_FIB_TRIE_STATS */
 
+static void fib_trie_show(struct seq_file *seq, const char *name, struct trie *trie)
+{
+	struct trie_stat stat;
+
+	seq_printf(seq, "%s: %d\n", name, trie->size);
+	trie_collect_stats(trie, &stat);
+	trie_show_stats(seq, &stat);
+#ifdef CONFIG_IP_FIB_TRIE_STATS
+	trie_show_usage(seq, &trie->stats);
+#endif
+}
 
 static int fib_triestat_seq_show(struct seq_file *seq, void *v)
 {
 	struct net *net = (struct net *)seq->private;
-	struct trie *trie_local, *trie_main;
-	struct trie_stat *stat;
 	struct fib_table *tb;
 
-	trie_local = NULL;
+	seq_printf(seq,
+		   "Basic info: size of leaf: %Zd bytes, size of tnode: %Zd bytes.\n",
+		   sizeof(struct leaf), sizeof(struct tnode));
+
 	tb = fib_get_table(net, RT_TABLE_LOCAL);
 	if (tb)
-		trie_local = (struct trie *) tb->tb_data;
+		fib_trie_show(seq, "Local", (struct trie *) tb->tb_data);
 
-	trie_main = NULL;
 	tb = fib_get_table(net, RT_TABLE_MAIN);
 	if (tb)
-		trie_main = (struct trie *) tb->tb_data;
-
-
-	stat = kmalloc(sizeof(*stat), GFP_KERNEL);
-	if (!stat)
-		return -ENOMEM;
-
-	seq_printf(seq, "Basic info: size of leaf: %Zd bytes, size of tnode: %Zd bytes.\n",
-		   sizeof(struct leaf), sizeof(struct tnode));
-
-	if (trie_local) {
-		seq_printf(seq, "Local:\n");
-		trie_collect_stats(trie_local, stat);
-		trie_show_stats(seq, stat);
-#ifdef CONFIG_IP_FIB_TRIE_STATS
-		trie_show_usage(seq, &trie_local->stats);
-#endif
-	}
-
-	if (trie_main) {
-		seq_printf(seq, "Main:\n");
-		trie_collect_stats(trie_main, stat);
-		trie_show_stats(seq, stat);
-#ifdef CONFIG_IP_FIB_TRIE_STATS
-		trie_show_usage(seq, &trie_main->stats);
-#endif
-	}
-	kfree(stat);
+		fib_trie_show(seq, "Main", (struct trie *) tb->tb_data);
 
 	return 0;
 }
-- 
cgit v1.2.3


From 7f9b80529b8a2ad8b3273b15fb444a0e34b760a9 Mon Sep 17 00:00:00 2001
From: Stephen Hemminger <stephen.hemminger@vyatta.com>
Date: Mon, 14 Jan 2008 23:14:20 -0800
Subject: [IPV4]: fib hash|trie initialization

Initialization of the slab cache's should be done when IP is
initialized to make sure of available memory, and that code can be
marked __init.

Signed-off-by: Stephen Hemminger <stephen.hemminger@vyatta.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/fib_frontend.c |  9 ++++++---
 net/ipv4/fib_hash.c     | 24 +++++++++++-------------
 net/ipv4/fib_trie.c     | 16 ++++++++--------
 3 files changed, 25 insertions(+), 24 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index 02b5ff73357..8ddcd3f91a1 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -53,11 +53,11 @@ static int __net_init fib4_rules_init(struct net *net)
 {
 	struct fib_table *local_table, *main_table;
 
-	local_table = fib_hash_init(RT_TABLE_LOCAL);
+	local_table = fib_hash_table(RT_TABLE_LOCAL);
 	if (local_table == NULL)
 		return -ENOMEM;
 
-	main_table  = fib_hash_init(RT_TABLE_MAIN);
+	main_table  = fib_hash_table(RT_TABLE_MAIN);
 	if (main_table == NULL)
 		goto fail;
 
@@ -83,7 +83,8 @@ struct fib_table *fib_new_table(struct net *net, u32 id)
 	tb = fib_get_table(net, id);
 	if (tb)
 		return tb;
-	tb = fib_hash_init(id);
+
+	tb = fib_hash_table(id);
 	if (!tb)
 		return NULL;
 	h = id & (FIB_TABLE_HASHSZ - 1);
@@ -1042,6 +1043,8 @@ void __init ip_fib_init(void)
 	register_pernet_subsys(&fib_net_ops);
 	register_netdevice_notifier(&fib_netdev_notifier);
 	register_inetaddr_notifier(&fib_inetaddr_notifier);
+
+	fib_hash_init();
 }
 
 EXPORT_SYMBOL(inet_addr_type);
diff --git a/net/ipv4/fib_hash.c b/net/ipv4/fib_hash.c
index 258214f57fb..499522f3b30 100644
--- a/net/ipv4/fib_hash.c
+++ b/net/ipv4/fib_hash.c
@@ -747,21 +747,19 @@ static int fn_hash_dump(struct fib_table *tb, struct sk_buff *skb, struct netlin
 	return skb->len;
 }
 
-struct fib_table *fib_hash_init(u32 id)
+void __init fib_hash_init(void)
 {
-	struct fib_table *tb;
+	fn_hash_kmem = kmem_cache_create("ip_fib_hash", sizeof(struct fib_node),
+					 0, SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL);
+
+	fn_alias_kmem = kmem_cache_create("ip_fib_alias", sizeof(struct fib_alias),
+					  0, SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL);
+
+}
 
-	if (fn_hash_kmem == NULL)
-		fn_hash_kmem = kmem_cache_create("ip_fib_hash",
-						 sizeof(struct fib_node),
-						 0, SLAB_HWCACHE_ALIGN,
-						 NULL);
-
-	if (fn_alias_kmem == NULL)
-		fn_alias_kmem = kmem_cache_create("ip_fib_alias",
-						  sizeof(struct fib_alias),
-						  0, SLAB_HWCACHE_ALIGN,
-						  NULL);
+struct fib_table *fib_hash_table(u32 id)
+{
+	struct fib_table *tb;
 
 	tb = kmalloc(sizeof(struct fib_table) + sizeof(struct fn_hash),
 		     GFP_KERNEL);
diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index a15cb0d2e11..fbc80d15827 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -1926,19 +1926,19 @@ out:
 	return -1;
 }
 
-/* Fix more generic FIB names for init later */
+void __init fib_hash_init(void)
+{
+	fn_alias_kmem = kmem_cache_create("ip_fib_alias", sizeof(struct fib_alias),
+					  0, SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL);
+}
 
-struct fib_table *fib_hash_init(u32 id)
+
+/* Fix more generic FIB names for init later */
+struct fib_table *fib_hash_table(u32 id)
 {
 	struct fib_table *tb;
 	struct trie *t;
 
-	if (fn_alias_kmem == NULL)
-		fn_alias_kmem = kmem_cache_create("ip_fib_alias",
-						  sizeof(struct fib_alias),
-						  0, SLAB_HWCACHE_ALIGN,
-						  NULL);
-
 	tb = kmalloc(sizeof(struct fib_table) + sizeof(struct trie),
 		     GFP_KERNEL);
 	if (tb == NULL)
-- 
cgit v1.2.3


From b26e76b7ce35b673deafab4a40eb33db6f81036f Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Mon, 14 Jan 2008 23:30:56 -0800
Subject: [NETFILTER]: Hide a few more options under NETFILTER_ADVANCED

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/netfilter/Kconfig | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'net')

diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig
index 96dbe9f56bc..a711b423eb8 100644
--- a/net/netfilter/Kconfig
+++ b/net/netfilter/Kconfig
@@ -378,6 +378,7 @@ config NETFILTER_XT_TARGET_NOTRACK
 config NETFILTER_XT_TARGET_RATEEST
 	tristate '"RATEEST" target support'
 	depends on NETFILTER_XTABLES
+	depends on NETFILTER_ADVANCED
 	help
 	  This option adds a `RATEEST' target, which allows to measure
 	  rates similar to TC estimators. The `rateest' match can be
@@ -676,6 +677,7 @@ config NETFILTER_XT_MATCH_QUOTA
 config NETFILTER_XT_MATCH_RATEEST
 	tristate '"rateest" match support'
 	depends on NETFILTER_XTABLES
+	depends on NETFILTER_ADVANCED
 	select NETFILTER_XT_TARGET_RATEEST
 	help
 	  This option adds a `rateest' match, which allows to match on the
-- 
cgit v1.2.3


From 8ce22fcab432313717d393c96ad35f0aee016e83 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Mon, 14 Jan 2008 23:31:36 -0800
Subject: [NETFILTER]: Remove some EXPERIMENTAL dependencies

Most of the netfilter modules are not considered experimental anymore,
the only ones I want to keep marked as EXPERIMENTAL are:

- TCPOPTSTRIP target, which is brand new.

- SANE helper, which is quite new.

- CLUSTERIP target, which I believe hasn't had much testing despite
  being in the kernel for quite a long time.

- SCTP match and conntrack protocol, which are a mess and need to
  be reviewed and cleaned up before I would trust them.

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/netfilter/Kconfig |  4 ++--
 net/ipv6/netfilter/Kconfig | 12 ++++++------
 net/netfilter/Kconfig      | 24 ++++++++++++------------
 3 files changed, 20 insertions(+), 20 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig
index cface714edf..10ca307b849 100644
--- a/net/ipv4/netfilter/Kconfig
+++ b/net/ipv4/netfilter/Kconfig
@@ -229,8 +229,8 @@ config IP_NF_TARGET_NETMAP
 	  To compile it as a module, choose M here.  If unsure, say N.
 
 config NF_NAT_SNMP_BASIC
-	tristate "Basic SNMP-ALG support (EXPERIMENTAL)"
-	depends on EXPERIMENTAL && NF_NAT
+	tristate "Basic SNMP-ALG support"
+	depends on NF_NAT
 	depends on NETFILTER_ADVANCED
 	---help---
 
diff --git a/net/ipv6/netfilter/Kconfig b/net/ipv6/netfilter/Kconfig
index a6b4a9a1053..4fc0b023cfd 100644
--- a/net/ipv6/netfilter/Kconfig
+++ b/net/ipv6/netfilter/Kconfig
@@ -2,12 +2,12 @@
 # IP netfilter configuration
 #
 
-menu "IPv6: Netfilter Configuration (EXPERIMENTAL)"
-	depends on INET && IPV6 && NETFILTER && EXPERIMENTAL
+menu "IPv6: Netfilter Configuration"
+	depends on INET && IPV6 && NETFILTER
 
 config NF_CONNTRACK_IPV6
-	tristate "IPv6 connection tracking support (EXPERIMENTAL)"
-	depends on INET && IPV6 && EXPERIMENTAL && NF_CONNTRACK
+	tristate "IPv6 connection tracking support"
+	depends on INET && IPV6 && NF_CONNTRACK
 	default m if NETFILTER_ADVANCED=n
 	---help---
 	  Connection tracking keeps a record of what packets have passed
@@ -22,7 +22,7 @@ config NF_CONNTRACK_IPV6
 
 config IP6_NF_QUEUE
 	tristate "IP6 Userspace queueing via NETLINK (OBSOLETE)"
-	depends on INET && IPV6 && NETFILTER && EXPERIMENTAL
+	depends on INET && IPV6 && NETFILTER
 	depends on NETFILTER_ADVANCED
 	---help---
 
@@ -44,7 +44,7 @@ config IP6_NF_QUEUE
 
 config IP6_NF_IPTABLES
 	tristate "IP6 tables support (required for filtering)"
-	depends on INET && IPV6 && EXPERIMENTAL
+	depends on INET && IPV6
 	select NETFILTER_XTABLES
 	default m if NETFILTER_ADVANCED=n
 	help
diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig
index a711b423eb8..79d71437e31 100644
--- a/net/netfilter/Kconfig
+++ b/net/netfilter/Kconfig
@@ -76,8 +76,8 @@ config NF_CONNTRACK_SECMARK
 	  If unsure, say 'N'.
 
 config NF_CONNTRACK_EVENTS
-	bool "Connection tracking events (EXPERIMENTAL)"
-	depends on EXPERIMENTAL && NF_CONNTRACK
+	bool "Connection tracking events"
+	depends on NF_CONNTRACK
 	depends on NETFILTER_ADVANCED
 	help
 	  If this option is enabled, the connection tracking code will
@@ -102,8 +102,8 @@ config NF_CT_PROTO_SCTP
 	  <file:Documentation/kbuild/modules.txt>.  If unsure, say `N'.
 
 config NF_CT_PROTO_UDPLITE
-	tristate 'UDP-Lite protocol connection tracking support (EXPERIMENTAL)'
-	depends on EXPERIMENTAL && NF_CONNTRACK
+	tristate 'UDP-Lite protocol connection tracking support'
+	depends on NF_CONNTRACK
 	depends on NETFILTER_ADVANCED
 	help
 	  With this option enabled, the layer 3 independent connection
@@ -144,8 +144,8 @@ config NF_CONNTRACK_FTP
 	  To compile it as a module, choose M here.  If unsure, say N.
 
 config NF_CONNTRACK_H323
-	tristate "H.323 protocol support (EXPERIMENTAL)"
-	depends on EXPERIMENTAL && NF_CONNTRACK && (IPV6 || IPV6=n)
+	tristate "H.323 protocol support"
+	depends on NF_CONNTRACK && (IPV6 || IPV6=n)
 	depends on NETFILTER_ADVANCED
 	help
 	  H.323 is a VoIP signalling protocol from ITU-T. As one of the most
@@ -180,8 +180,8 @@ config NF_CONNTRACK_IRC
 	  To compile it as a module, choose M here.  If unsure, say N.
 
 config NF_CONNTRACK_NETBIOS_NS
-	tristate "NetBIOS name service protocol support (EXPERIMENTAL)"
-	depends on EXPERIMENTAL && NF_CONNTRACK
+	tristate "NetBIOS name service protocol support"
+	depends on NF_CONNTRACK
 	depends on NETFILTER_ADVANCED
 	help
 	  NetBIOS name service requests are sent as broadcast messages from an
@@ -234,8 +234,8 @@ config NF_CONNTRACK_SANE
 	  To compile it as a module, choose M here.  If unsure, say N.
 
 config NF_CONNTRACK_SIP
-	tristate "SIP protocol support (EXPERIMENTAL)"
-	depends on EXPERIMENTAL && NF_CONNTRACK
+	tristate "SIP protocol support"
+	depends on NF_CONNTRACK
 	default m if NETFILTER_ADVANCED=n
 	help
 	  SIP is an application-layer control protocol that can establish,
@@ -259,8 +259,8 @@ config NF_CONNTRACK_TFTP
 	  To compile it as a module, choose M here.  If unsure, say N.
 
 config NF_CT_NETLINK
-	tristate 'Connection tracking netlink interface (EXPERIMENTAL)'
-	depends on EXPERIMENTAL && NF_CONNTRACK
+	tristate 'Connection tracking netlink interface'
+	depends on NF_CONNTRACK
 	select NETFILTER_NETLINK
 	depends on NF_NAT=n || NF_NAT
 	default m if NETFILTER_ADVANCED=n
-- 
cgit v1.2.3


From 11fa2aa362fa54b9eaa8adce9a89f9b467cc9214 Mon Sep 17 00:00:00 2001
From: Jan Engelhardt <jengelh@computergmbh.de>
Date: Mon, 14 Jan 2008 23:32:13 -0800
Subject: [NETFILTER]: remove ipt_TOS.c

Commit 88c85d81f74f92371745158aebc5cbf490412002 forgot to remove the
old ipt_TOS file (whose code has been merged into xt_DSCP). Remove
it now.

Signed-off-by: Jan Engelhardt <jengelh@computergmbh.de>
Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/netfilter/ipt_TOS.c | 82 --------------------------------------------
 1 file changed, 82 deletions(-)
 delete mode 100644 net/ipv4/netfilter/ipt_TOS.c

(limited to 'net')

diff --git a/net/ipv4/netfilter/ipt_TOS.c b/net/ipv4/netfilter/ipt_TOS.c
deleted file mode 100644
index 1a9244104b1..00000000000
--- a/net/ipv4/netfilter/ipt_TOS.c
+++ /dev/null
@@ -1,82 +0,0 @@
-/* This is a module which is used for setting the TOS field of a packet. */
-
-/* (C) 1999-2001 Paul `Rusty' Russell
- * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#include <linux/module.h>
-#include <linux/skbuff.h>
-#include <linux/ip.h>
-#include <net/checksum.h>
-
-#include <linux/netfilter/x_tables.h>
-#include <linux/netfilter_ipv4/ipt_TOS.h>
-
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>");
-MODULE_DESCRIPTION("iptables TOS mangling module");
-
-static unsigned int
-tos_tg(struct sk_buff *skb, const struct net_device *in,
-       const struct net_device *out, unsigned int hooknum,
-       const struct xt_target *target, const void *targinfo)
-{
-	const struct ipt_tos_target_info *tosinfo = targinfo;
-	struct iphdr *iph = ip_hdr(skb);
-
-	if ((iph->tos & IPTOS_TOS_MASK) != tosinfo->tos) {
-		__u8 oldtos;
-		if (!skb_make_writable(skb, sizeof(struct iphdr)))
-			return NF_DROP;
-		iph = ip_hdr(skb);
-		oldtos = iph->tos;
-		iph->tos = (iph->tos & IPTOS_PREC_MASK) | tosinfo->tos;
-		csum_replace2(&iph->check, htons(oldtos), htons(iph->tos));
-	}
-	return XT_CONTINUE;
-}
-
-static bool
-tos_tg_check(const char *tablename, const void *e_void,
-             const struct xt_target *target, void *targinfo,
-             unsigned int hook_mask)
-{
-	const u_int8_t tos = ((struct ipt_tos_target_info *)targinfo)->tos;
-
-	if (tos != IPTOS_LOWDELAY
-	    && tos != IPTOS_THROUGHPUT
-	    && tos != IPTOS_RELIABILITY
-	    && tos != IPTOS_MINCOST
-	    && tos != IPTOS_NORMALSVC) {
-		printk(KERN_WARNING "TOS: bad tos value %#x\n", tos);
-		return false;
-	}
-	return true;
-}
-
-static struct xt_target tos_tg_reg __read_mostly = {
-	.name		= "TOS",
-	.family		= AF_INET,
-	.target		= tos_tg,
-	.targetsize	= sizeof(struct ipt_tos_target_info),
-	.table		= "mangle",
-	.checkentry	= tos_tg_check,
-	.me		= THIS_MODULE,
-};
-
-static int __init tos_tg_init(void)
-{
-	return xt_register_target(&tos_tg_reg);
-}
-
-static void __exit tos_tg_exit(void)
-{
-	xt_unregister_target(&tos_tg_reg);
-}
-
-module_init(tos_tg_init);
-module_exit(tos_tg_exit);
-- 
cgit v1.2.3


From 9bb268ed7c5f0ec76a5bd6824450a104231152ba Mon Sep 17 00:00:00 2001
From: Jan Engelhardt <jengelh@computergmbh.de>
Date: Mon, 14 Jan 2008 23:32:37 -0800
Subject: [NETFILTER]: xt_TOS: Change semantic of mask value

This patch changes the behavior of xt_TOS v1 so that the mask value
the user supplies means "zero out these bits" rather than "keep these
bits". This is more easy on the user, as (I would assume) people keep
more bits than zeroing, so, an example:

	Action:     Set bit 0x01.
    	before (&): iptables -j TOS --set-tos 0x01/0xFE
    	after (&~): iptables -j TOS --set-tos 0x01/0x01

This is not too "tragic" with xt_TOS, but where larger fields are used
(e.g. proposed xt_MARK v2), `--set-xmar 0x01/0x01` vs. `--set-xmark
0x01/0xFFFFFFFE` really makes a difference. Other target(!) modules,
such as xt_TPROXY also use &~ rather than &, so let's get to a common
ground.

(Since xt_TOS has not yet left the development tree en direction to
mainline, the semantic can be changed as proposed without breaking
iptables.)

Signed-off-by: Jan Engelhardt <jengelh@computergmbh.de>
Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/netfilter/xt_DSCP.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/netfilter/xt_DSCP.c b/net/netfilter/xt_DSCP.c
index fd7500ecadf..9951e7f85e3 100644
--- a/net/netfilter/xt_DSCP.c
+++ b/net/netfilter/xt_DSCP.c
@@ -128,7 +128,7 @@ tos_tg(struct sk_buff *skb, const struct net_device *in,
 	u_int8_t orig, nv;
 
 	orig = ipv4_get_dsfield(iph);
-	nv   = (orig & info->tos_mask) ^ info->tos_value;
+	nv   = (orig & ~info->tos_mask) ^ info->tos_value;
 
 	if (orig != nv) {
 		if (!skb_make_writable(skb, sizeof(struct iphdr)))
-- 
cgit v1.2.3


From cdfe8b9797f1a47fe24a9bf9e98a351bae11ab99 Mon Sep 17 00:00:00 2001
From: Jan Engelhardt <jengelh@computergmbh.de>
Date: Mon, 14 Jan 2008 23:32:54 -0800
Subject: [NETFILTER]: xt_TOS: Properly set the TOS field

Fix incorrect mask value passed to ipv4_change_dsfield/ipv6_change_dsfield.

Signed-off-by: Jan Engelhardt <jengelh@computergmbh.de>
Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/netfilter/xt_DSCP.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/netfilter/xt_DSCP.c b/net/netfilter/xt_DSCP.c
index 9951e7f85e3..3d216d650c8 100644
--- a/net/netfilter/xt_DSCP.c
+++ b/net/netfilter/xt_DSCP.c
@@ -134,7 +134,7 @@ tos_tg(struct sk_buff *skb, const struct net_device *in,
 		if (!skb_make_writable(skb, sizeof(struct iphdr)))
 			return NF_DROP;
 		iph = ip_hdr(skb);
-		ipv4_change_dsfield(iph, ~0, nv);
+		ipv4_change_dsfield(iph, 0, nv);
 	}
 
 	return XT_CONTINUE;
@@ -156,7 +156,7 @@ tos_tg6(struct sk_buff *skb, const struct net_device *in,
 		if (!skb_make_writable(skb, sizeof(struct iphdr)))
 			return NF_DROP;
 		iph = ipv6_hdr(skb);
-		ipv6_change_dsfield(iph, ~0, nv);
+		ipv6_change_dsfield(iph, 0, nv);
 	}
 
 	return XT_CONTINUE;
-- 
cgit v1.2.3


From 0dc8c76029f4675c2345eefd947f123e64de1aae Mon Sep 17 00:00:00 2001
From: Jan Engelhardt <jengelh@computergmbh.de>
Date: Mon, 14 Jan 2008 23:38:34 -0800
Subject: [NETFILTER]: xt_CONNMARK target, revision 1

Introduces the xt_CONNMARK target revision 1. It uses fixed types, and
also uses the more expressive XOR logic. Futhermore, it allows to
selectively pick bits from both the ctmark and the nfmark in the SAVE
and RESTORE operations.

Signed-off-by: Jan Engelhardt <jengelh@computergmbh.de>
Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/netfilter/xt_CONNMARK.c | 117 ++++++++++++++++++++++++++++++++++++--------
 1 file changed, 97 insertions(+), 20 deletions(-)

(limited to 'net')

diff --git a/net/netfilter/xt_CONNMARK.c b/net/netfilter/xt_CONNMARK.c
index ec2eb341d2e..b9bd7723346 100644
--- a/net/netfilter/xt_CONNMARK.c
+++ b/net/netfilter/xt_CONNMARK.c
@@ -1,8 +1,10 @@
-/* This kernel module is used to modify the connection mark values, or
- * to optionally restore the skb nfmark from the connection mark
+/*
+ *	xt_CONNMARK - Netfilter module to modify the connection mark values
  *
- * Copyright (C) 2002,2004 MARA Systems AB <http://www.marasystems.com>
- * by Henrik Nordstrom <hno@marasystems.com>
+ *	Copyright (C) 2002,2004 MARA Systems AB <http://www.marasystems.com>
+ *	by Henrik Nordstrom <hno@marasystems.com>
+ *	Copyright © CC Computer Consultants GmbH, 2007 - 2008
+ *	Jan Engelhardt <jengelh@computergmbh.de>
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
@@ -34,9 +36,9 @@ MODULE_ALIAS("ip6t_CONNMARK");
 #include <net/netfilter/nf_conntrack_ecache.h>
 
 static unsigned int
-connmark_tg(struct sk_buff *skb, const struct net_device *in,
-            const struct net_device *out, unsigned int hooknum,
-            const struct xt_target *target, const void *targinfo)
+connmark_tg_v0(struct sk_buff *skb, const struct net_device *in,
+               const struct net_device *out, unsigned int hooknum,
+               const struct xt_target *target, const void *targinfo)
 {
 	const struct xt_connmark_target_info *markinfo = targinfo;
 	struct nf_conn *ct;
@@ -74,10 +76,50 @@ connmark_tg(struct sk_buff *skb, const struct net_device *in,
 	return XT_CONTINUE;
 }
 
+static unsigned int
+connmark_tg(struct sk_buff *skb, const struct net_device *in,
+            const struct net_device *out, unsigned int hooknum,
+            const struct xt_target *target, const void *targinfo)
+{
+	const struct xt_connmark_tginfo1 *info = targinfo;
+	enum ip_conntrack_info ctinfo;
+	struct nf_conn *ct;
+	u_int32_t newmark;
+
+	ct = nf_ct_get(skb, &ctinfo);
+	if (ct == NULL)
+		return XT_CONTINUE;
+
+	switch (info->mode) {
+	case XT_CONNMARK_SET:
+		newmark = (ct->mark & ~info->ctmask) ^ info->ctmark;
+		if (ct->mark != newmark) {
+			ct->mark = newmark;
+			nf_conntrack_event_cache(IPCT_MARK, skb);
+		}
+		break;
+	case XT_CONNMARK_SAVE:
+		newmark = (ct->mark & ~info->ctmask) ^
+		          (skb->mark & info->nfmask);
+		if (ct->mark != newmark) {
+			ct->mark = newmark;
+			nf_conntrack_event_cache(IPCT_MARK, skb);
+		}
+		break;
+	case XT_CONNMARK_RESTORE:
+		newmark = (skb->mark & ~info->nfmask) ^
+		          (ct->mark & info->ctmask);
+		skb->mark = newmark;
+		break;
+	}
+
+	return XT_CONTINUE;
+}
+
 static bool
-connmark_tg_check(const char *tablename, const void *entry,
-                  const struct xt_target *target, void *targinfo,
-                  unsigned int hook_mask)
+connmark_tg_check_v0(const char *tablename, const void *entry,
+                     const struct xt_target *target, void *targinfo,
+                     unsigned int hook_mask)
 {
 	const struct xt_connmark_target_info *matchinfo = targinfo;
 
@@ -101,6 +143,19 @@ connmark_tg_check(const char *tablename, const void *entry,
 	return true;
 }
 
+static bool
+connmark_tg_check(const char *tablename, const void *entry,
+                  const struct xt_target *target, void *targinfo,
+                  unsigned int hook_mask)
+{
+	if (nf_ct_l3proto_try_module_get(target->family) < 0) {
+		printk(KERN_WARNING "cannot load conntrack support for "
+		       "proto=%u\n", target->family);
+		return false;
+	}
+	return true;
+}
+
 static void
 connmark_tg_destroy(const struct xt_target *target, void *targinfo)
 {
@@ -115,7 +170,7 @@ struct compat_xt_connmark_target_info {
 	u_int16_t	__pad2;
 };
 
-static void connmark_tg_compat_from_user(void *dst, void *src)
+static void connmark_tg_compat_from_user_v0(void *dst, void *src)
 {
 	const struct compat_xt_connmark_target_info *cm = src;
 	struct xt_connmark_target_info m = {
@@ -126,7 +181,7 @@ static void connmark_tg_compat_from_user(void *dst, void *src)
 	memcpy(dst, &m, sizeof(m));
 }
 
-static int connmark_tg_compat_to_user(void __user *dst, void *src)
+static int connmark_tg_compat_to_user_v0(void __user *dst, void *src)
 {
 	const struct xt_connmark_target_info *m = src;
 	struct compat_xt_connmark_target_info cm = {
@@ -141,32 +196,54 @@ static int connmark_tg_compat_to_user(void __user *dst, void *src)
 static struct xt_target connmark_tg_reg[] __read_mostly = {
 	{
 		.name		= "CONNMARK",
+		.revision	= 0,
 		.family		= AF_INET,
-		.checkentry	= connmark_tg_check,
+		.checkentry	= connmark_tg_check_v0,
 		.destroy	= connmark_tg_destroy,
-		.target		= connmark_tg,
+		.target		= connmark_tg_v0,
 		.targetsize	= sizeof(struct xt_connmark_target_info),
 #ifdef CONFIG_COMPAT
 		.compatsize	= sizeof(struct compat_xt_connmark_target_info),
-		.compat_from_user = connmark_tg_compat_from_user,
-		.compat_to_user	= connmark_tg_compat_to_user,
+		.compat_from_user = connmark_tg_compat_from_user_v0,
+		.compat_to_user	= connmark_tg_compat_to_user_v0,
 #endif
 		.me		= THIS_MODULE
 	},
 	{
 		.name		= "CONNMARK",
+		.revision	= 0,
 		.family		= AF_INET6,
-		.checkentry	= connmark_tg_check,
+		.checkentry	= connmark_tg_check_v0,
 		.destroy	= connmark_tg_destroy,
-		.target		= connmark_tg,
+		.target		= connmark_tg_v0,
 		.targetsize	= sizeof(struct xt_connmark_target_info),
 #ifdef CONFIG_COMPAT
 		.compatsize	= sizeof(struct compat_xt_connmark_target_info),
-		.compat_from_user = connmark_tg_compat_from_user,
-		.compat_to_user	= connmark_tg_compat_to_user,
+		.compat_from_user = connmark_tg_compat_from_user_v0,
+		.compat_to_user	= connmark_tg_compat_to_user_v0,
 #endif
 		.me		= THIS_MODULE
 	},
+	{
+		.name           = "CONNMARK",
+		.revision       = 1,
+		.family         = AF_INET,
+		.checkentry     = connmark_tg_check,
+		.target         = connmark_tg,
+		.targetsize     = sizeof(struct xt_connmark_tginfo1),
+		.destroy        = connmark_tg_destroy,
+		.me             = THIS_MODULE,
+	},
+	{
+		.name           = "CONNMARK",
+		.revision       = 1,
+		.family         = AF_INET6,
+		.checkentry     = connmark_tg_check,
+		.target         = connmark_tg,
+		.targetsize     = sizeof(struct xt_connmark_tginfo1),
+		.destroy        = connmark_tg_destroy,
+		.me             = THIS_MODULE,
+	},
 };
 
 static int __init connmark_tg_init(void)
-- 
cgit v1.2.3


From e0a812aea5cbf2085f7645bf2bfd9cba91c8a672 Mon Sep 17 00:00:00 2001
From: Jan Engelhardt <jengelh@computergmbh.de>
Date: Mon, 14 Jan 2008 23:38:52 -0800
Subject: [NETFILTER]: xt_MARK target, revision 2

Introduces the xt_MARK target revision 2. It uses fixed types, and
also uses the more expressive XOR logic.

Signed-off-by: Jan Engelhardt <jengelh@computergmbh.de>
Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/netfilter/xt_MARK.c | 74 ++++++++++++++++++++++++++++++++++---------------
 1 file changed, 52 insertions(+), 22 deletions(-)

(limited to 'net')

diff --git a/net/netfilter/xt_MARK.c b/net/netfilter/xt_MARK.c
index 57c6d55e33d..1c3fb75adfd 100644
--- a/net/netfilter/xt_MARK.c
+++ b/net/netfilter/xt_MARK.c
@@ -1,10 +1,13 @@
-/* This is a module which is used for setting the NFMARK field of an skb. */
-
-/* (C) 1999-2001 Marc Boucher <marc@mbsi.ca>
+/*
+ *	xt_MARK - Netfilter module to modify the NFMARK field of an skb
+ *
+ *	(C) 1999-2001 Marc Boucher <marc@mbsi.ca>
+ *	Copyright © CC Computer Consultants GmbH, 2007 - 2008
+ *	Jan Engelhardt <jengelh@computergmbh.de>
  *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
+ *	This program is free software; you can redistribute it and/or modify
+ *	it under the terms of the GNU General Public License version 2 as
+ *	published by the Free Software Foundation.
  */
 
 #include <linux/module.h>
@@ -33,9 +36,9 @@ mark_tg_v0(struct sk_buff *skb, const struct net_device *in,
 }
 
 static unsigned int
-mark_tg(struct sk_buff *skb, const struct net_device *in,
-        const struct net_device *out, unsigned int hooknum,
-        const struct xt_target *target, const void *targinfo)
+mark_tg_v1(struct sk_buff *skb, const struct net_device *in,
+           const struct net_device *out, unsigned int hooknum,
+           const struct xt_target *target, const void *targinfo)
 {
 	const struct xt_mark_target_info_v1 *markinfo = targinfo;
 	int mark = 0;
@@ -58,6 +61,17 @@ mark_tg(struct sk_buff *skb, const struct net_device *in,
 	return XT_CONTINUE;
 }
 
+static unsigned int
+mark_tg(struct sk_buff *skb, const struct net_device *in,
+        const struct net_device *out, unsigned int hooknum,
+        const struct xt_target *target, const void *targinfo)
+{
+	const struct xt_mark_tginfo2 *info = targinfo;
+
+	skb->mark = (skb->mark & ~info->mask) ^ info->mark;
+	return XT_CONTINUE;
+}
+
 static bool
 mark_tg_check_v0(const char *tablename, const void *entry,
                  const struct xt_target *target, void *targinfo,
@@ -73,9 +87,9 @@ mark_tg_check_v0(const char *tablename, const void *entry,
 }
 
 static bool
-mark_tg_check(const char *tablename, const void *entry,
-              const struct xt_target *target, void *targinfo,
-              unsigned int hook_mask)
+mark_tg_check_v1(const char *tablename, const void *entry,
+                 const struct xt_target *target, void *targinfo,
+                 unsigned int hook_mask)
 {
 	const struct xt_mark_target_info_v1 *markinfo = targinfo;
 
@@ -98,7 +112,7 @@ struct compat_xt_mark_target_info {
 	compat_ulong_t	mark;
 };
 
-static void mark_tg_compat_from_user(void *dst, void *src)
+static void mark_tg_compat_from_user_v0(void *dst, void *src)
 {
 	const struct compat_xt_mark_target_info *cm = src;
 	struct xt_mark_target_info m = {
@@ -107,7 +121,7 @@ static void mark_tg_compat_from_user(void *dst, void *src)
 	memcpy(dst, &m, sizeof(m));
 }
 
-static int mark_tg_compat_to_user(void __user *dst, void *src)
+static int mark_tg_compat_to_user_v0(void __user *dst, void *src)
 {
 	const struct xt_mark_target_info *m = src;
 	struct compat_xt_mark_target_info cm = {
@@ -154,8 +168,8 @@ static struct xt_target mark_tg_reg[] __read_mostly = {
 		.targetsize	= sizeof(struct xt_mark_target_info),
 #ifdef CONFIG_COMPAT
 		.compatsize	= sizeof(struct compat_xt_mark_target_info),
-		.compat_from_user = mark_tg_compat_from_user,
-		.compat_to_user	= mark_tg_compat_to_user,
+		.compat_from_user = mark_tg_compat_from_user_v0,
+		.compat_to_user	= mark_tg_compat_to_user_v0,
 #endif
 		.table		= "mangle",
 		.me		= THIS_MODULE,
@@ -164,8 +178,8 @@ static struct xt_target mark_tg_reg[] __read_mostly = {
 		.name		= "MARK",
 		.family		= AF_INET,
 		.revision	= 1,
-		.checkentry	= mark_tg_check,
-		.target		= mark_tg,
+		.checkentry	= mark_tg_check_v1,
+		.target		= mark_tg_v1,
 		.targetsize	= sizeof(struct xt_mark_target_info_v1),
 #ifdef CONFIG_COMPAT
 		.compatsize	= sizeof(struct compat_xt_mark_target_info_v1),
@@ -184,8 +198,8 @@ static struct xt_target mark_tg_reg[] __read_mostly = {
 		.targetsize	= sizeof(struct xt_mark_target_info),
 #ifdef CONFIG_COMPAT
 		.compatsize	= sizeof(struct compat_xt_mark_target_info),
-		.compat_from_user = mark_tg_compat_from_user,
-		.compat_to_user	= mark_tg_compat_to_user,
+		.compat_from_user = mark_tg_compat_from_user_v0,
+		.compat_to_user	= mark_tg_compat_to_user_v0,
 #endif
 		.table		= "mangle",
 		.me		= THIS_MODULE,
@@ -194,8 +208,8 @@ static struct xt_target mark_tg_reg[] __read_mostly = {
 		.name		= "MARK",
 		.family		= AF_INET6,
 		.revision	= 1,
-		.checkentry	= mark_tg_check,
-		.target		= mark_tg,
+		.checkentry	= mark_tg_check_v1,
+		.target		= mark_tg_v1,
 		.targetsize	= sizeof(struct xt_mark_target_info_v1),
 #ifdef CONFIG_COMPAT
 		.compatsize	= sizeof(struct compat_xt_mark_target_info_v1),
@@ -205,6 +219,22 @@ static struct xt_target mark_tg_reg[] __read_mostly = {
 		.table		= "mangle",
 		.me		= THIS_MODULE,
 	},
+	{
+		.name           = "MARK",
+		.revision       = 2,
+		.family         = AF_INET,
+		.target         = mark_tg,
+		.targetsize     = sizeof(struct xt_mark_tginfo2),
+		.me             = THIS_MODULE,
+	},
+	{
+		.name           = "MARK",
+		.revision       = 2,
+		.family         = AF_INET6,
+		.target         = mark_tg,
+		.targetsize     = sizeof(struct xt_mark_tginfo2),
+		.me             = THIS_MODULE,
+	},
 };
 
 static int __init mark_tg_init(void)
-- 
cgit v1.2.3


From 96e3227265852ffad332f911887c9cf26c85e40a Mon Sep 17 00:00:00 2001
From: Jan Engelhardt <jengelh@computergmbh.de>
Date: Mon, 14 Jan 2008 23:39:13 -0800
Subject: [NETFILTER]: xt_connmark match, revision 1

Introduces the xt_connmark match revision 1. It uses fixed types,
eventually obsoleting revision 0 some day (uses nonfixed types).
(Unfixed types like "unsigned long" do not play well with mixed
user-/kernelspace "bitness", e.g. 32/64, as is common on SPARC64,
and need extra compat code.)

Signed-off-by: Jan Engelhardt <jengelh@computergmbh.de>
Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/netfilter/xt_connmark.c | 88 ++++++++++++++++++++++++++++++++++++---------
 1 file changed, 71 insertions(+), 17 deletions(-)

(limited to 'net')

diff --git a/net/netfilter/xt_connmark.c b/net/netfilter/xt_connmark.c
index 8ad875bc158..55c62350b1f 100644
--- a/net/netfilter/xt_connmark.c
+++ b/net/netfilter/xt_connmark.c
@@ -1,8 +1,10 @@
-/* This kernel module matches connection mark values set by the
- * CONNMARK target
+/*
+ *	xt_connmark - Netfilter module to match connection mark values
  *
- * Copyright (C) 2002,2004 MARA Systems AB <http://www.marasystems.com>
- * by Henrik Nordstrom <hno@marasystems.com>
+ *	Copyright (C) 2002,2004 MARA Systems AB <http://www.marasystems.com>
+ *	by Henrik Nordstrom <hno@marasystems.com>
+ *	Copyright © CC Computer Consultants GmbH, 2007 - 2008
+ *	Jan Engelhardt <jengelh@computergmbh.de>
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
@@ -36,6 +38,23 @@ connmark_mt(const struct sk_buff *skb, const struct net_device *in,
             const struct net_device *out, const struct xt_match *match,
             const void *matchinfo, int offset, unsigned int protoff,
             bool *hotdrop)
+{
+	const struct xt_connmark_mtinfo1 *info = matchinfo;
+	enum ip_conntrack_info ctinfo;
+	const struct nf_conn *ct;
+
+	ct = nf_ct_get(skb, &ctinfo);
+	if (ct == NULL)
+		return false;
+
+	return ((ct->mark & info->mask) == info->mark) ^ info->invert;
+}
+
+static bool
+connmark_mt_v0(const struct sk_buff *skb, const struct net_device *in,
+               const struct net_device *out, const struct xt_match *match,
+               const void *matchinfo, int offset, unsigned int protoff,
+               bool *hotdrop)
 {
 	const struct xt_connmark_info *info = matchinfo;
 	const struct nf_conn *ct;
@@ -49,9 +68,9 @@ connmark_mt(const struct sk_buff *skb, const struct net_device *in,
 }
 
 static bool
-connmark_mt_check(const char *tablename, const void *ip,
-                  const struct xt_match *match, void *matchinfo,
-                  unsigned int hook_mask)
+connmark_mt_check_v0(const char *tablename, const void *ip,
+                     const struct xt_match *match, void *matchinfo,
+                     unsigned int hook_mask)
 {
 	const struct xt_connmark_info *cm = matchinfo;
 
@@ -67,6 +86,19 @@ connmark_mt_check(const char *tablename, const void *ip,
 	return true;
 }
 
+static bool
+connmark_mt_check(const char *tablename, const void *ip,
+                  const struct xt_match *match, void *matchinfo,
+                  unsigned int hook_mask)
+{
+	if (nf_ct_l3proto_try_module_get(match->family) < 0) {
+		printk(KERN_WARNING "cannot load conntrack support for "
+		       "proto=%u\n", match->family);
+		return false;
+	}
+	return true;
+}
+
 static void
 connmark_mt_destroy(const struct xt_match *match, void *matchinfo)
 {
@@ -81,7 +113,7 @@ struct compat_xt_connmark_info {
 	u_int16_t	__pad2;
 };
 
-static void connmark_mt_compat_from_user(void *dst, void *src)
+static void connmark_mt_compat_from_user_v0(void *dst, void *src)
 {
 	const struct compat_xt_connmark_info *cm = src;
 	struct xt_connmark_info m = {
@@ -92,7 +124,7 @@ static void connmark_mt_compat_from_user(void *dst, void *src)
 	memcpy(dst, &m, sizeof(m));
 }
 
-static int connmark_mt_compat_to_user(void __user *dst, void *src)
+static int connmark_mt_compat_to_user_v0(void __user *dst, void *src)
 {
 	const struct xt_connmark_info *m = src;
 	struct compat_xt_connmark_info cm = {
@@ -107,32 +139,54 @@ static int connmark_mt_compat_to_user(void __user *dst, void *src)
 static struct xt_match connmark_mt_reg[] __read_mostly = {
 	{
 		.name		= "connmark",
+		.revision	= 0,
 		.family		= AF_INET,
-		.checkentry	= connmark_mt_check,
-		.match		= connmark_mt,
+		.checkentry	= connmark_mt_check_v0,
+		.match		= connmark_mt_v0,
 		.destroy	= connmark_mt_destroy,
 		.matchsize	= sizeof(struct xt_connmark_info),
 #ifdef CONFIG_COMPAT
 		.compatsize	= sizeof(struct compat_xt_connmark_info),
-		.compat_from_user = connmark_mt_compat_from_user,
-		.compat_to_user	= connmark_mt_compat_to_user,
+		.compat_from_user = connmark_mt_compat_from_user_v0,
+		.compat_to_user	= connmark_mt_compat_to_user_v0,
 #endif
 		.me		= THIS_MODULE
 	},
 	{
 		.name		= "connmark",
+		.revision	= 0,
 		.family		= AF_INET6,
-		.checkentry	= connmark_mt_check,
-		.match		= connmark_mt,
+		.checkentry	= connmark_mt_check_v0,
+		.match		= connmark_mt_v0,
 		.destroy	= connmark_mt_destroy,
 		.matchsize	= sizeof(struct xt_connmark_info),
 #ifdef CONFIG_COMPAT
 		.compatsize	= sizeof(struct compat_xt_connmark_info),
-		.compat_from_user = connmark_mt_compat_from_user,
-		.compat_to_user	= connmark_mt_compat_to_user,
+		.compat_from_user = connmark_mt_compat_from_user_v0,
+		.compat_to_user	= connmark_mt_compat_to_user_v0,
 #endif
 		.me		= THIS_MODULE
 	},
+	{
+		.name           = "connmark",
+		.revision       = 1,
+		.family         = AF_INET,
+		.checkentry     = connmark_mt_check,
+		.match          = connmark_mt,
+		.matchsize      = sizeof(struct xt_connmark_mtinfo1),
+		.destroy        = connmark_mt_destroy,
+		.me             = THIS_MODULE,
+	},
+	{
+		.name           = "connmark",
+		.revision       = 1,
+		.family         = AF_INET6,
+		.checkentry     = connmark_mt_check,
+		.match          = connmark_mt,
+		.matchsize      = sizeof(struct xt_connmark_mtinfo1),
+		.destroy        = connmark_mt_destroy,
+		.me             = THIS_MODULE,
+	},
 };
 
 static int __init connmark_mt_init(void)
-- 
cgit v1.2.3


From 64eb12f9972d45f3b9b0f0a33a966e311c3d5275 Mon Sep 17 00:00:00 2001
From: Jan Engelhardt <jengelh@computergmbh.de>
Date: Mon, 14 Jan 2008 23:40:53 -0800
Subject: [NETFILTER]: xt_conntrack match, revision 1

Introduces the xt_conntrack match revision 1. It uses fixed types, the
new nf_inet_addr and comes with IPv6 support, thereby completely
superseding xt_state.

Signed-off-by: Jan Engelhardt <jengelh@computergmbh.de>
Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/netfilter/xt_conntrack.c | 207 +++++++++++++++++++++++++++++++++++++------
 1 file changed, 182 insertions(+), 25 deletions(-)

(limited to 'net')

diff --git a/net/netfilter/xt_conntrack.c b/net/netfilter/xt_conntrack.c
index 3f8bfbaa9b1..dc9e7378686 100644
--- a/net/netfilter/xt_conntrack.c
+++ b/net/netfilter/xt_conntrack.c
@@ -1,15 +1,19 @@
-/* Kernel module to match connection tracking information.
- * Superset of Rusty's minimalistic state match.
+/*
+ *	xt_conntrack - Netfilter module to match connection tracking
+ *	information. (Superset of Rusty's minimalistic state match.)
  *
- * (C) 2001  Marc Boucher (marc@mbsi.ca).
+ *	(C) 2001  Marc Boucher (marc@mbsi.ca).
+ *	Copyright © CC Computer Consultants GmbH, 2007 - 2008
+ *	Jan Engelhardt <jengelh@computergmbh.de>
  *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
+ *	This program is free software; you can redistribute it and/or modify
+ *	it under the terms of the GNU General Public License version 2 as
+ *	published by the Free Software Foundation.
  */
 
 #include <linux/module.h>
 #include <linux/skbuff.h>
+#include <net/ipv6.h>
 #include <linux/netfilter/x_tables.h>
 #include <linux/netfilter/xt_conntrack.h>
 #include <net/netfilter/nf_conntrack.h>
@@ -18,12 +22,13 @@ MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Marc Boucher <marc@mbsi.ca>");
 MODULE_DESCRIPTION("iptables connection tracking match module");
 MODULE_ALIAS("ipt_conntrack");
+MODULE_ALIAS("ip6t_conntrack");
 
 static bool
-conntrack_mt(const struct sk_buff *skb, const struct net_device *in,
-             const struct net_device *out, const struct xt_match *match,
-             const void *matchinfo, int offset, unsigned int protoff,
-             bool *hotdrop)
+conntrack_mt_v0(const struct sk_buff *skb, const struct net_device *in,
+                const struct net_device *out, const struct xt_match *match,
+                const void *matchinfo, int offset, unsigned int protoff,
+                bool *hotdrop)
 {
 	const struct xt_conntrack_info *sinfo = matchinfo;
 	const struct nf_conn *ct;
@@ -111,6 +116,134 @@ conntrack_mt(const struct sk_buff *skb, const struct net_device *in,
 #undef FWINV
 }
 
+static bool
+conntrack_addrcmp(const union nf_inet_addr *kaddr,
+                  const union nf_inet_addr *uaddr,
+                  const union nf_inet_addr *umask, unsigned int l3proto)
+{
+	if (l3proto == AF_INET)
+		return (kaddr->ip & umask->ip) == uaddr->ip;
+	else if (l3proto == AF_INET6)
+		return ipv6_masked_addr_cmp(&kaddr->in6, &umask->in6,
+		       &uaddr->in6) == 0;
+	else
+		return false;
+}
+
+static inline bool
+conntrack_mt_origsrc(const struct nf_conn *ct,
+                     const struct xt_conntrack_mtinfo1 *info,
+                     unsigned int family)
+{
+	return conntrack_addrcmp(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u3,
+	       &info->origsrc_addr, &info->origsrc_mask, family);
+}
+
+static inline bool
+conntrack_mt_origdst(const struct nf_conn *ct,
+                     const struct xt_conntrack_mtinfo1 *info,
+                     unsigned int family)
+{
+	return conntrack_addrcmp(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.u3,
+	       &info->origdst_addr, &info->origdst_mask, family);
+}
+
+static inline bool
+conntrack_mt_replsrc(const struct nf_conn *ct,
+                     const struct xt_conntrack_mtinfo1 *info,
+                     unsigned int family)
+{
+	return conntrack_addrcmp(&ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.u3,
+	       &info->replsrc_addr, &info->replsrc_mask, family);
+}
+
+static inline bool
+conntrack_mt_repldst(const struct nf_conn *ct,
+                     const struct xt_conntrack_mtinfo1 *info,
+                     unsigned int family)
+{
+	return conntrack_addrcmp(&ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.u3,
+	       &info->repldst_addr, &info->repldst_mask, family);
+}
+
+static bool
+conntrack_mt(const struct sk_buff *skb, const struct net_device *in,
+             const struct net_device *out, const struct xt_match *match,
+             const void *matchinfo, int offset, unsigned int protoff,
+             bool *hotdrop)
+{
+	const struct xt_conntrack_mtinfo1 *info = matchinfo;
+	enum ip_conntrack_info ctinfo;
+	const struct nf_conn *ct;
+	unsigned int statebit;
+
+	ct = nf_ct_get(skb, &ctinfo);
+
+	if (ct == &nf_conntrack_untracked)
+		statebit = XT_CONNTRACK_STATE_UNTRACKED;
+	else if (ct != NULL)
+		statebit = XT_CONNTRACK_STATE_BIT(ctinfo);
+	else
+		statebit = XT_CONNTRACK_STATE_INVALID;
+
+	if (info->match_flags & XT_CONNTRACK_STATE) {
+		if (ct != NULL) {
+			if (test_bit(IPS_SRC_NAT_BIT, &ct->status))
+				statebit |= XT_CONNTRACK_STATE_SNAT;
+			if (test_bit(IPS_DST_NAT_BIT, &ct->status))
+				statebit |= XT_CONNTRACK_STATE_DNAT;
+		}
+		if ((info->state_mask & statebit) ^
+		    !(info->invert_flags & XT_CONNTRACK_STATE))
+			return false;
+	}
+
+	if (ct == NULL)
+		return info->match_flags & XT_CONNTRACK_STATE;
+
+	if ((info->match_flags & XT_CONNTRACK_PROTO) &&
+	    ((ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.protonum ==
+	    info->l4proto) ^ !(info->invert_flags & XT_CONNTRACK_PROTO)))
+		return false;
+
+	if (info->match_flags & XT_CONNTRACK_ORIGSRC)
+		if (conntrack_mt_origsrc(ct, info, match->family) ^
+		    !(info->invert_flags & XT_CONNTRACK_ORIGSRC))
+			return false;
+
+	if (info->match_flags & XT_CONNTRACK_ORIGDST)
+		if (conntrack_mt_origdst(ct, info, match->family) ^
+		    !(info->invert_flags & XT_CONNTRACK_ORIGDST))
+			return false;
+
+	if (info->match_flags & XT_CONNTRACK_REPLSRC)
+		if (conntrack_mt_replsrc(ct, info, match->family) ^
+		    !(info->invert_flags & XT_CONNTRACK_REPLSRC))
+			return false;
+
+	if (info->match_flags & XT_CONNTRACK_REPLDST)
+		if (conntrack_mt_repldst(ct, info, match->family) ^
+		    !(info->invert_flags & XT_CONNTRACK_REPLDST))
+			return false;
+
+	if ((info->match_flags & XT_CONNTRACK_STATUS) &&
+	    (!!(info->status_mask & ct->status) ^
+	    !(info->invert_flags & XT_CONNTRACK_STATUS)))
+		return false;
+
+	if (info->match_flags & XT_CONNTRACK_EXPIRES) {
+		unsigned long expires = 0;
+
+		if (timer_pending(&ct->timeout))
+			expires = (ct->timeout.expires - jiffies) / HZ;
+		if ((expires >= info->expires_min &&
+		    expires <= info->expires_max) ^
+		    !(info->invert_flags & XT_CONNTRACK_EXPIRES))
+			return false;
+	}
+	return true;
+}
+
 static bool
 conntrack_mt_check(const char *tablename, const void *ip,
                    const struct xt_match *match, void *matchinfo,
@@ -144,7 +277,7 @@ struct compat_xt_conntrack_info
 	u_int8_t			invflags;
 };
 
-static void conntrack_mt_compat_from_user(void *dst, void *src)
+static void conntrack_mt_compat_from_user_v0(void *dst, void *src)
 {
 	const struct compat_xt_conntrack_info *cm = src;
 	struct xt_conntrack_info m = {
@@ -161,7 +294,7 @@ static void conntrack_mt_compat_from_user(void *dst, void *src)
 	memcpy(dst, &m, sizeof(m));
 }
 
-static int conntrack_mt_compat_to_user(void __user *dst, void *src)
+static int conntrack_mt_compat_to_user_v0(void __user *dst, void *src)
 {
 	const struct xt_conntrack_info *m = src;
 	struct compat_xt_conntrack_info cm = {
@@ -179,29 +312,53 @@ static int conntrack_mt_compat_to_user(void __user *dst, void *src)
 }
 #endif
 
-static struct xt_match conntrack_mt_reg __read_mostly = {
-	.name		= "conntrack",
-	.match		= conntrack_mt,
-	.checkentry	= conntrack_mt_check,
-	.destroy	= conntrack_mt_destroy,
-	.matchsize	= sizeof(struct xt_conntrack_info),
+static struct xt_match conntrack_mt_reg[] __read_mostly = {
+	{
+		.name       = "conntrack",
+		.revision   = 0,
+		.family     = AF_INET,
+		.match      = conntrack_mt_v0,
+		.checkentry = conntrack_mt_check,
+		.destroy    = conntrack_mt_destroy,
+		.matchsize  = sizeof(struct xt_conntrack_info),
+		.me         = THIS_MODULE,
 #ifdef CONFIG_COMPAT
-	.compatsize	= sizeof(struct compat_xt_conntrack_info),
-	.compat_from_user = conntrack_mt_compat_from_user,
-	.compat_to_user	= conntrack_mt_compat_to_user,
+		.compatsize       = sizeof(struct compat_xt_conntrack_info),
+		.compat_from_user = conntrack_mt_compat_from_user_v0,
+		.compat_to_user   = conntrack_mt_compat_to_user_v0,
 #endif
-	.family		= AF_INET,
-	.me		= THIS_MODULE,
+	},
+	{
+		.name       = "conntrack",
+		.revision   = 1,
+		.family     = AF_INET,
+		.matchsize  = sizeof(struct xt_conntrack_mtinfo1),
+		.match      = conntrack_mt,
+		.checkentry = conntrack_mt_check,
+		.destroy    = conntrack_mt_destroy,
+		.me         = THIS_MODULE,
+	},
+	{
+		.name       = "conntrack",
+		.revision   = 1,
+		.family     = AF_INET6,
+		.matchsize  = sizeof(struct xt_conntrack_mtinfo1),
+		.match      = conntrack_mt,
+		.checkentry = conntrack_mt_check,
+		.destroy    = conntrack_mt_destroy,
+		.me         = THIS_MODULE,
+	},
 };
 
 static int __init conntrack_mt_init(void)
 {
-	return xt_register_match(&conntrack_mt_reg);
+	return xt_register_matches(conntrack_mt_reg,
+	       ARRAY_SIZE(conntrack_mt_reg));
 }
 
 static void __exit conntrack_mt_exit(void)
 {
-	xt_unregister_match(&conntrack_mt_reg);
+	xt_unregister_matches(conntrack_mt_reg, ARRAY_SIZE(conntrack_mt_reg));
 }
 
 module_init(conntrack_mt_init);
-- 
cgit v1.2.3


From 17b0d7ef658583842da75eebf8001dc617f0b52e Mon Sep 17 00:00:00 2001
From: Jan Engelhardt <jengelh@computergmbh.de>
Date: Mon, 14 Jan 2008 23:41:11 -0800
Subject: [NETFILTER]: xt_mark match, revision 1

Introduces the xt_mark match revision 1. It uses fixed types,
eventually obsoleting revision 0 some day (uses nonfixed types).

Signed-off-by: Jan Engelhardt <jengelh@computergmbh.de>
Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/netfilter/xt_mark.c | 72 +++++++++++++++++++++++++++++++++++--------------
 1 file changed, 52 insertions(+), 20 deletions(-)

(limited to 'net')

diff --git a/net/netfilter/xt_mark.c b/net/netfilter/xt_mark.c
index ce8735e9762..5cc8cc57c72 100644
--- a/net/netfilter/xt_mark.c
+++ b/net/netfilter/xt_mark.c
@@ -1,10 +1,13 @@
-/* Kernel module to match NFMARK values. */
-
-/* (C) 1999-2001 Marc Boucher <marc@mbsi.ca>
+/*
+ *	xt_mark - Netfilter module to match NFMARK value
+ *
+ *	(C) 1999-2001 Marc Boucher <marc@mbsi.ca>
+ *	Copyright © CC Computer Consultants GmbH, 2007 - 2008
+ *	Jan Engelhardt <jengelh@computergmbh.de>
  *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
+ *	This program is free software; you can redistribute it and/or modify
+ *	it under the terms of the GNU General Public License version 2 as
+ *	published by the Free Software Foundation.
  */
 
 #include <linux/module.h>
@@ -19,20 +22,31 @@ MODULE_DESCRIPTION("iptables mark matching module");
 MODULE_ALIAS("ipt_mark");
 MODULE_ALIAS("ip6t_mark");
 
+static bool
+mark_mt_v0(const struct sk_buff *skb, const struct net_device *in,
+           const struct net_device *out, const struct xt_match *match,
+           const void *matchinfo, int offset, unsigned int protoff,
+           bool *hotdrop)
+{
+	const struct xt_mark_info *info = matchinfo;
+
+	return ((skb->mark & info->mask) == info->mark) ^ info->invert;
+}
+
 static bool
 mark_mt(const struct sk_buff *skb, const struct net_device *in,
         const struct net_device *out, const struct xt_match *match,
         const void *matchinfo, int offset, unsigned int protoff, bool *hotdrop)
 {
-	const struct xt_mark_info *info = matchinfo;
+	const struct xt_mark_mtinfo1 *info = matchinfo;
 
 	return ((skb->mark & info->mask) == info->mark) ^ info->invert;
 }
 
 static bool
-mark_mt_check(const char *tablename, const void *entry,
-              const struct xt_match *match, void *matchinfo,
-              unsigned int hook_mask)
+mark_mt_check_v0(const char *tablename, const void *entry,
+                 const struct xt_match *match, void *matchinfo,
+                 unsigned int hook_mask)
 {
 	const struct xt_mark_info *minfo = matchinfo;
 
@@ -51,7 +65,7 @@ struct compat_xt_mark_info {
 	u_int16_t	__pad2;
 };
 
-static void mark_mt_compat_from_user(void *dst, void *src)
+static void mark_mt_compat_from_user_v0(void *dst, void *src)
 {
 	const struct compat_xt_mark_info *cm = src;
 	struct xt_mark_info m = {
@@ -62,7 +76,7 @@ static void mark_mt_compat_from_user(void *dst, void *src)
 	memcpy(dst, &m, sizeof(m));
 }
 
-static int mark_mt_compat_to_user(void __user *dst, void *src)
+static int mark_mt_compat_to_user_v0(void __user *dst, void *src)
 {
 	const struct xt_mark_info *m = src;
 	struct compat_xt_mark_info cm = {
@@ -77,30 +91,48 @@ static int mark_mt_compat_to_user(void __user *dst, void *src)
 static struct xt_match mark_mt_reg[] __read_mostly = {
 	{
 		.name		= "mark",
+		.revision	= 0,
 		.family		= AF_INET,
-		.checkentry	= mark_mt_check,
-		.match		= mark_mt,
+		.checkentry	= mark_mt_check_v0,
+		.match		= mark_mt_v0,
 		.matchsize	= sizeof(struct xt_mark_info),
 #ifdef CONFIG_COMPAT
 		.compatsize	= sizeof(struct compat_xt_mark_info),
-		.compat_from_user = mark_mt_compat_from_user,
-		.compat_to_user	= mark_mt_compat_to_user,
+		.compat_from_user = mark_mt_compat_from_user_v0,
+		.compat_to_user	= mark_mt_compat_to_user_v0,
 #endif
 		.me		= THIS_MODULE,
 	},
 	{
 		.name		= "mark",
+		.revision	= 0,
 		.family		= AF_INET6,
-		.checkentry	= mark_mt_check,
-		.match		= mark_mt,
+		.checkentry	= mark_mt_check_v0,
+		.match		= mark_mt_v0,
 		.matchsize	= sizeof(struct xt_mark_info),
 #ifdef CONFIG_COMPAT
 		.compatsize	= sizeof(struct compat_xt_mark_info),
-		.compat_from_user = mark_mt_compat_from_user,
-		.compat_to_user	= mark_mt_compat_to_user,
+		.compat_from_user = mark_mt_compat_from_user_v0,
+		.compat_to_user	= mark_mt_compat_to_user_v0,
 #endif
 		.me		= THIS_MODULE,
 	},
+	{
+		.name           = "mark",
+		.revision       = 1,
+		.family         = AF_INET,
+		.match          = mark_mt,
+		.matchsize      = sizeof(struct xt_mark_mtinfo1),
+		.me             = THIS_MODULE,
+	},
+	{
+		.name           = "mark",
+		.revision       = 1,
+		.family         = AF_INET6,
+		.match          = mark_mt,
+		.matchsize      = sizeof(struct xt_mark_mtinfo1),
+		.me             = THIS_MODULE,
+	},
 };
 
 static int __init mark_mt_init(void)
-- 
cgit v1.2.3


From 13b0e83b5b52d1a0ab87772ecc93fe91b2740386 Mon Sep 17 00:00:00 2001
From: Jan Engelhardt <jengelh@computergmbh.de>
Date: Mon, 14 Jan 2008 23:41:34 -0800
Subject: [NETFILTER]: xt_pkttype: Add explicit check for IPv4

In the PACKET_LOOPBACK case, the skb data was always interpreted as
IPv4, but that is not valid for IPv6, obviously. Fix this by adding an
extra condition to check for AF_INET.

Signed-off-by: Jan Engelhardt <jengelh@computergmbh.de>
Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/netfilter/xt_pkttype.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/netfilter/xt_pkttype.c b/net/netfilter/xt_pkttype.c
index 276244902ab..080f3246eee 100644
--- a/net/netfilter/xt_pkttype.c
+++ b/net/netfilter/xt_pkttype.c
@@ -31,7 +31,8 @@ pkttype_mt(const struct sk_buff *skb, const struct net_device *in,
 	const struct xt_pkttype_info *info = matchinfo;
 
 	if (skb->pkt_type == PACKET_LOOPBACK)
-		type = ipv4_is_multicast(ip_hdr(skb)->daddr)
+		type = match->family == AF_INET &&
+		       ipv4_is_multicast(ip_hdr(skb)->daddr)
 			? PACKET_MULTICAST
 			: PACKET_BROADCAST;
 	else
-- 
cgit v1.2.3


From 57de0abbffa9724e2a89860a49725d805bfc07ca Mon Sep 17 00:00:00 2001
From: Jan Engelhardt <jengelh@computergmbh.de>
Date: Mon, 14 Jan 2008 23:41:50 -0800
Subject: [NETFILTER]: xt_pkttype: IPv6 multicast address recognition

Signed-off-by: Jan Engelhart <jengelh@computergmbh.de>
Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/netfilter/xt_pkttype.c | 18 +++++++++++-------
 1 file changed, 11 insertions(+), 7 deletions(-)

(limited to 'net')

diff --git a/net/netfilter/xt_pkttype.c b/net/netfilter/xt_pkttype.c
index 080f3246eee..cbcb8ea3b8d 100644
--- a/net/netfilter/xt_pkttype.c
+++ b/net/netfilter/xt_pkttype.c
@@ -11,6 +11,7 @@
 #include <linux/if_packet.h>
 #include <linux/in.h>
 #include <linux/ip.h>
+#include <linux/ipv6.h>
 
 #include <linux/netfilter/xt_pkttype.h>
 #include <linux/netfilter/x_tables.h>
@@ -27,16 +28,19 @@ pkttype_mt(const struct sk_buff *skb, const struct net_device *in,
            const void *matchinfo, int offset, unsigned int protoff,
            bool *hotdrop)
 {
-	u_int8_t type;
 	const struct xt_pkttype_info *info = matchinfo;
+	u_int8_t type;
 
-	if (skb->pkt_type == PACKET_LOOPBACK)
-		type = match->family == AF_INET &&
-		       ipv4_is_multicast(ip_hdr(skb)->daddr)
-			? PACKET_MULTICAST
-			: PACKET_BROADCAST;
-	else
+	if (skb->pkt_type != PACKET_LOOPBACK)
 		type = skb->pkt_type;
+	else if (match->family == AF_INET &&
+	    ipv4_is_multicast(ip_hdr(skb)->daddr))
+		type = PACKET_MULTICAST;
+	else if (match->family == AF_INET6 &&
+	    ipv6_hdr(skb)->daddr.s6_addr[0] == 0xFF)
+		type = PACKET_MULTICAST;
+	else
+		type = PACKET_BROADCAST;
 
 	return (type == info->pkttype) ^ info->invert;
 }
-- 
cgit v1.2.3


From 917b6fbd6e8dd952c64d1d7468897160467d2cc0 Mon Sep 17 00:00:00 2001
From: Jan Engelhardt <jengelh@computergmbh.de>
Date: Mon, 14 Jan 2008 23:42:06 -0800
Subject: [NETFILTER]: xt_policy: use the new union nf_inet_addr

Signed-off-by: Jan Engelhardt <jengelh@computergmbh.de>
Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/netfilter/xt_policy.c | 15 ++++++++-------
 1 file changed, 8 insertions(+), 7 deletions(-)

(limited to 'net')

diff --git a/net/netfilter/xt_policy.c b/net/netfilter/xt_policy.c
index 45731ca15b7..47c2e4328a1 100644
--- a/net/netfilter/xt_policy.c
+++ b/net/netfilter/xt_policy.c
@@ -13,6 +13,7 @@
 #include <linux/init.h>
 #include <net/xfrm.h>
 
+#include <linux/netfilter.h>
 #include <linux/netfilter/xt_policy.h>
 #include <linux/netfilter/x_tables.h>
 
@@ -21,14 +22,14 @@ MODULE_DESCRIPTION("Xtables IPsec policy matching module");
 MODULE_LICENSE("GPL");
 
 static inline bool
-xt_addr_cmp(const union xt_policy_addr *a1, const union xt_policy_addr *m,
-	    const union xt_policy_addr *a2, unsigned short family)
+xt_addr_cmp(const union nf_inet_addr *a1, const union nf_inet_addr *m,
+	    const union nf_inet_addr *a2, unsigned short family)
 {
 	switch (family) {
 	case AF_INET:
-		return !((a1->a4.s_addr ^ a2->a4.s_addr) & m->a4.s_addr);
+		return ((a1->ip ^ a2->ip) & m->ip) == 0;
 	case AF_INET6:
-		return !ipv6_masked_addr_cmp(&a1->a6, &m->a6, &a2->a6);
+		return ipv6_masked_addr_cmp(&a1->in6, &m->in6, &a2->in6) == 0;
 	}
 	return false;
 }
@@ -38,12 +39,12 @@ match_xfrm_state(const struct xfrm_state *x, const struct xt_policy_elem *e,
 		 unsigned short family)
 {
 #define MATCH_ADDR(x,y,z)	(!e->match.x ||			       \
-				 (xt_addr_cmp(&e->x, &e->y, (z), family) \
+				 (xt_addr_cmp(&e->x, &e->y, (const union nf_inet_addr *)(z), family) \
 				  ^ e->invert.x))
 #define MATCH(x,y)		(!e->match.x || ((e->x == (y)) ^ e->invert.x))
 
-	return MATCH_ADDR(saddr, smask, (union xt_policy_addr *)&x->props.saddr) &&
-	       MATCH_ADDR(daddr, dmask, (union xt_policy_addr *)&x->id.daddr) &&
+	return MATCH_ADDR(saddr, smask, &x->props.saddr) &&
+	       MATCH_ADDR(daddr, dmask, &x->id.daddr) &&
 	       MATCH(proto, x->id.proto) &&
 	       MATCH(mode, x->props.mode) &&
 	       MATCH(spi, x->id.spi) &&
-- 
cgit v1.2.3


From 2ae15b64e6a1608c840c60df38e8e5eef7b2b8c3 Mon Sep 17 00:00:00 2001
From: Jan Engelhardt <jengelh@computergmbh.de>
Date: Mon, 14 Jan 2008 23:42:28 -0800
Subject: [NETFILTER]: Update modules' descriptions

Updates the MODULE_DESCRIPTION() tags for all Netfilter modules,
actually describing what the module does and not just
"netfilter XYZ target".

Signed-off-by: Jan Engelhardt <jengelh@computergmbh.de>
Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/netfilter/ipt_CLUSTERIP.c   | 2 +-
 net/ipv4/netfilter/ipt_ECN.c         | 2 +-
 net/ipv4/netfilter/ipt_LOG.c         | 2 +-
 net/ipv4/netfilter/ipt_MASQUERADE.c  | 2 +-
 net/ipv4/netfilter/ipt_NETMAP.c      | 2 +-
 net/ipv4/netfilter/ipt_REDIRECT.c    | 2 +-
 net/ipv4/netfilter/ipt_REJECT.c      | 2 +-
 net/ipv4/netfilter/ipt_TTL.c         | 2 +-
 net/ipv4/netfilter/ipt_ULOG.c        | 2 +-
 net/ipv4/netfilter/ipt_addrtype.c    | 2 +-
 net/ipv4/netfilter/ipt_ah.c          | 2 +-
 net/ipv4/netfilter/ipt_ecn.c         | 2 +-
 net/ipv4/netfilter/ipt_iprange.c     | 2 +-
 net/ipv4/netfilter/ipt_recent.c      | 2 +-
 net/ipv4/netfilter/ipt_ttl.c         | 2 +-
 net/ipv6/netfilter/ip6t_HL.c         | 2 +-
 net/ipv6/netfilter/ip6t_LOG.c        | 2 +-
 net/ipv6/netfilter/ip6t_REJECT.c     | 2 +-
 net/ipv6/netfilter/ip6t_ah.c         | 2 +-
 net/ipv6/netfilter/ip6t_eui64.c      | 2 +-
 net/ipv6/netfilter/ip6t_frag.c       | 2 +-
 net/ipv6/netfilter/ip6t_hbh.c        | 2 +-
 net/ipv6/netfilter/ip6t_hl.c         | 2 +-
 net/ipv6/netfilter/ip6t_ipv6header.c | 2 +-
 net/ipv6/netfilter/ip6t_mh.c         | 2 +-
 net/ipv6/netfilter/ip6t_rt.c         | 2 +-
 net/netfilter/xt_CLASSIFY.c          | 2 +-
 net/netfilter/xt_CONNMARK.c          | 2 +-
 net/netfilter/xt_CONNSECMARK.c       | 2 +-
 net/netfilter/xt_DSCP.c              | 2 +-
 net/netfilter/xt_MARK.c              | 2 +-
 net/netfilter/xt_NFLOG.c             | 2 +-
 net/netfilter/xt_NFQUEUE.c           | 2 +-
 net/netfilter/xt_NOTRACK.c           | 1 +
 net/netfilter/xt_RATEEST.c           | 2 +-
 net/netfilter/xt_SECMARK.c           | 2 +-
 net/netfilter/xt_TCPMSS.c            | 2 +-
 net/netfilter/xt_TCPOPTSTRIP.c       | 2 +-
 net/netfilter/xt_TRACE.c             | 1 +
 net/netfilter/xt_comment.c           | 2 +-
 net/netfilter/xt_connbytes.c         | 2 +-
 net/netfilter/xt_connlimit.c         | 2 +-
 net/netfilter/xt_connmark.c          | 2 +-
 net/netfilter/xt_conntrack.c         | 2 +-
 net/netfilter/xt_dccp.c              | 2 +-
 net/netfilter/xt_dscp.c              | 2 +-
 net/netfilter/xt_esp.c               | 2 +-
 net/netfilter/xt_hashlimit.c         | 2 +-
 net/netfilter/xt_helper.c            | 2 +-
 net/netfilter/xt_length.c            | 2 +-
 net/netfilter/xt_limit.c             | 2 +-
 net/netfilter/xt_mac.c               | 2 +-
 net/netfilter/xt_mark.c              | 2 +-
 net/netfilter/xt_multiport.c         | 2 +-
 net/netfilter/xt_owner.c             | 2 +-
 net/netfilter/xt_physdev.c           | 2 +-
 net/netfilter/xt_pkttype.c           | 2 +-
 net/netfilter/xt_policy.c            | 2 +-
 net/netfilter/xt_quota.c             | 1 +
 net/netfilter/xt_realm.c             | 2 +-
 net/netfilter/xt_sctp.c              | 2 +-
 net/netfilter/xt_statistic.c         | 2 +-
 net/netfilter/xt_string.c            | 2 +-
 net/netfilter/xt_tcpmss.c            | 2 +-
 net/netfilter/xt_tcpudp.c            | 2 +-
 net/netfilter/xt_time.c              | 2 +-
 net/netfilter/xt_u32.c               | 2 +-
 67 files changed, 67 insertions(+), 64 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c b/net/ipv4/netfilter/ipt_CLUSTERIP.c
index df39ca07fb1..1b31f7d14d4 100644
--- a/net/ipv4/netfilter/ipt_CLUSTERIP.c
+++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c
@@ -32,7 +32,7 @@
 
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Harald Welte <laforge@netfilter.org>");
-MODULE_DESCRIPTION("iptables target for CLUSTERIP");
+MODULE_DESCRIPTION("Xtables: CLUSTERIP target");
 
 struct clusterip_config {
 	struct list_head list;			/* list of all configs */
diff --git a/net/ipv4/netfilter/ipt_ECN.c b/net/ipv4/netfilter/ipt_ECN.c
index ab417649161..21395bc2b27 100644
--- a/net/ipv4/netfilter/ipt_ECN.c
+++ b/net/ipv4/netfilter/ipt_ECN.c
@@ -21,7 +21,7 @@
 
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Harald Welte <laforge@netfilter.org>");
-MODULE_DESCRIPTION("iptables ECN modification module");
+MODULE_DESCRIPTION("Xtables: Explicit Congestion Notification (ECN) flag modification");
 
 /* set ECT codepoint from IP header.
  * 	return false if there was an error. */
diff --git a/net/ipv4/netfilter/ipt_LOG.c b/net/ipv4/netfilter/ipt_LOG.c
index 5acdddfa6ad..b38d7850f50 100644
--- a/net/ipv4/netfilter/ipt_LOG.c
+++ b/net/ipv4/netfilter/ipt_LOG.c
@@ -26,7 +26,7 @@
 
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>");
-MODULE_DESCRIPTION("iptables syslog logging module");
+MODULE_DESCRIPTION("Xtables: IPv4 packet logging to syslog");
 
 /* Use lock to serialize, so printks don't overlap */
 static DEFINE_SPINLOCK(log_lock);
diff --git a/net/ipv4/netfilter/ipt_MASQUERADE.c b/net/ipv4/netfilter/ipt_MASQUERADE.c
index 1cbff7b3084..d80fee8327e 100644
--- a/net/ipv4/netfilter/ipt_MASQUERADE.c
+++ b/net/ipv4/netfilter/ipt_MASQUERADE.c
@@ -25,7 +25,7 @@
 
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>");
-MODULE_DESCRIPTION("iptables MASQUERADE target module");
+MODULE_DESCRIPTION("Xtables: automatic-address SNAT");
 
 /* Lock protects masq region inside conntrack */
 static DEFINE_RWLOCK(masq_lock);
diff --git a/net/ipv4/netfilter/ipt_NETMAP.c b/net/ipv4/netfilter/ipt_NETMAP.c
index 5b71ef4d848..6739abfd152 100644
--- a/net/ipv4/netfilter/ipt_NETMAP.c
+++ b/net/ipv4/netfilter/ipt_NETMAP.c
@@ -20,7 +20,7 @@
 
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Svenning Soerensen <svenning@post5.tele.dk>");
-MODULE_DESCRIPTION("iptables 1:1 NAT mapping of IP networks target");
+MODULE_DESCRIPTION("Xtables: 1:1 NAT mapping of IPv4 subnets");
 
 static bool
 netmap_tg_check(const char *tablename, const void *e,
diff --git a/net/ipv4/netfilter/ipt_REDIRECT.c b/net/ipv4/netfilter/ipt_REDIRECT.c
index 3d9ec5c34c5..5c6292449d1 100644
--- a/net/ipv4/netfilter/ipt_REDIRECT.c
+++ b/net/ipv4/netfilter/ipt_REDIRECT.c
@@ -23,7 +23,7 @@
 
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>");
-MODULE_DESCRIPTION("iptables REDIRECT target module");
+MODULE_DESCRIPTION("Xtables: Connection redirection to localhost");
 
 /* FIXME: Take multiple ranges --RR */
 static bool
diff --git a/net/ipv4/netfilter/ipt_REJECT.c b/net/ipv4/netfilter/ipt_REJECT.c
index a299cebbd89..e3c2ecc341e 100644
--- a/net/ipv4/netfilter/ipt_REJECT.c
+++ b/net/ipv4/netfilter/ipt_REJECT.c
@@ -29,7 +29,7 @@
 
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>");
-MODULE_DESCRIPTION("iptables REJECT target module");
+MODULE_DESCRIPTION("Xtables: packet \"rejection\" target for IPv4");
 
 /* Send RST reply */
 static void send_reset(struct sk_buff *oldskb, int hook)
diff --git a/net/ipv4/netfilter/ipt_TTL.c b/net/ipv4/netfilter/ipt_TTL.c
index fa13cf6162b..30eed65e733 100644
--- a/net/ipv4/netfilter/ipt_TTL.c
+++ b/net/ipv4/netfilter/ipt_TTL.c
@@ -16,7 +16,7 @@
 #include <linux/netfilter_ipv4/ipt_TTL.h>
 
 MODULE_AUTHOR("Harald Welte <laforge@netfilter.org>");
-MODULE_DESCRIPTION("IP tables TTL modification module");
+MODULE_DESCRIPTION("Xtables: IPv4 TTL field modification target");
 MODULE_LICENSE("GPL");
 
 static unsigned int
diff --git a/net/ipv4/netfilter/ipt_ULOG.c b/net/ipv4/netfilter/ipt_ULOG.c
index 1d8e146345e..fa24efaf3ea 100644
--- a/net/ipv4/netfilter/ipt_ULOG.c
+++ b/net/ipv4/netfilter/ipt_ULOG.c
@@ -50,7 +50,7 @@
 
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Harald Welte <laforge@gnumonks.org>");
-MODULE_DESCRIPTION("iptables userspace logging module");
+MODULE_DESCRIPTION("Xtables: packet logging to netlink using ULOG");
 MODULE_ALIAS_NET_PF_PROTO(PF_NETLINK, NETLINK_NFLOG);
 
 #define ULOG_NL_EVENT		111		/* Harald's favorite number */
diff --git a/net/ipv4/netfilter/ipt_addrtype.c b/net/ipv4/netfilter/ipt_addrtype.c
index 8763902944b..49587a49722 100644
--- a/net/ipv4/netfilter/ipt_addrtype.c
+++ b/net/ipv4/netfilter/ipt_addrtype.c
@@ -21,7 +21,7 @@
 
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
-MODULE_DESCRIPTION("iptables addrtype match");
+MODULE_DESCRIPTION("Xtables: address type match for IPv4");
 
 static inline bool match_type(const struct net_device *dev, __be32 addr,
 			      u_int16_t mask)
diff --git a/net/ipv4/netfilter/ipt_ah.c b/net/ipv4/netfilter/ipt_ah.c
index 2b2fb26c12a..e977989629c 100644
--- a/net/ipv4/netfilter/ipt_ah.c
+++ b/net/ipv4/netfilter/ipt_ah.c
@@ -16,7 +16,7 @@
 
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Yon Uriarte <yon@astaro.de>");
-MODULE_DESCRIPTION("iptables AH SPI match module");
+MODULE_DESCRIPTION("Xtables: IPv4 IPsec-AH SPI match");
 
 #ifdef DEBUG_CONNTRACK
 #define duprintf(format, args...) printk(format , ## args)
diff --git a/net/ipv4/netfilter/ipt_ecn.c b/net/ipv4/netfilter/ipt_ecn.c
index ea1378460b0..749de8284ce 100644
--- a/net/ipv4/netfilter/ipt_ecn.c
+++ b/net/ipv4/netfilter/ipt_ecn.c
@@ -19,7 +19,7 @@
 #include <linux/netfilter_ipv4/ipt_ecn.h>
 
 MODULE_AUTHOR("Harald Welte <laforge@netfilter.org>");
-MODULE_DESCRIPTION("iptables ECN matching module");
+MODULE_DESCRIPTION("Xtables: Explicit Congestion Notification (ECN) flag match for IPv4");
 MODULE_LICENSE("GPL");
 
 static inline bool match_ip(const struct sk_buff *skb,
diff --git a/net/ipv4/netfilter/ipt_iprange.c b/net/ipv4/netfilter/ipt_iprange.c
index 82208ed5f70..9a2aba816c9 100644
--- a/net/ipv4/netfilter/ipt_iprange.c
+++ b/net/ipv4/netfilter/ipt_iprange.c
@@ -15,7 +15,7 @@
 
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>");
-MODULE_DESCRIPTION("iptables arbitrary IP range match module");
+MODULE_DESCRIPTION("Xtables: arbitrary IPv4 range matching");
 
 static bool
 iprange_mt(const struct sk_buff *skb, const struct net_device *in,
diff --git a/net/ipv4/netfilter/ipt_recent.c b/net/ipv4/netfilter/ipt_recent.c
index 4f3700d937a..e3154a99c08 100644
--- a/net/ipv4/netfilter/ipt_recent.c
+++ b/net/ipv4/netfilter/ipt_recent.c
@@ -30,7 +30,7 @@
 #include <linux/netfilter_ipv4/ipt_recent.h>
 
 MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
-MODULE_DESCRIPTION("IP tables recently seen matching module");
+MODULE_DESCRIPTION("Xtables: \"recently-seen\" host matching for IPv4");
 MODULE_LICENSE("GPL");
 
 static unsigned int ip_list_tot = 100;
diff --git a/net/ipv4/netfilter/ipt_ttl.c b/net/ipv4/netfilter/ipt_ttl.c
index b18d39162ff..e0b8caeb710 100644
--- a/net/ipv4/netfilter/ipt_ttl.c
+++ b/net/ipv4/netfilter/ipt_ttl.c
@@ -15,7 +15,7 @@
 #include <linux/netfilter/x_tables.h>
 
 MODULE_AUTHOR("Harald Welte <laforge@netfilter.org>");
-MODULE_DESCRIPTION("IP tables TTL matching module");
+MODULE_DESCRIPTION("Xtables: IPv4 TTL field match");
 MODULE_LICENSE("GPL");
 
 static bool
diff --git a/net/ipv6/netfilter/ip6t_HL.c b/net/ipv6/netfilter/ip6t_HL.c
index cefb4253711..d5f8fd5f29d 100644
--- a/net/ipv6/netfilter/ip6t_HL.c
+++ b/net/ipv6/netfilter/ip6t_HL.c
@@ -15,7 +15,7 @@
 #include <linux/netfilter_ipv6/ip6t_HL.h>
 
 MODULE_AUTHOR("Maciej Soltysiak <solt@dns.toxicfilms.tv>");
-MODULE_DESCRIPTION("IP6 tables Hop Limit modification module");
+MODULE_DESCRIPTION("Xtables: IPv6 Hop Limit field modification target");
 MODULE_LICENSE("GPL");
 
 static unsigned int
diff --git a/net/ipv6/netfilter/ip6t_LOG.c b/net/ipv6/netfilter/ip6t_LOG.c
index 474c2b12621..86a613810b6 100644
--- a/net/ipv6/netfilter/ip6t_LOG.c
+++ b/net/ipv6/netfilter/ip6t_LOG.c
@@ -26,7 +26,7 @@
 #include <net/netfilter/nf_log.h>
 
 MODULE_AUTHOR("Jan Rekorajski <baggins@pld.org.pl>");
-MODULE_DESCRIPTION("IP6 tables LOG target module");
+MODULE_DESCRIPTION("Xtables: IPv6 packet logging to syslog");
 MODULE_LICENSE("GPL");
 
 struct in_device;
diff --git a/net/ipv6/netfilter/ip6t_REJECT.c b/net/ipv6/netfilter/ip6t_REJECT.c
index a951c2cb6de..b23baa635fe 100644
--- a/net/ipv6/netfilter/ip6t_REJECT.c
+++ b/net/ipv6/netfilter/ip6t_REJECT.c
@@ -31,7 +31,7 @@
 #include <linux/netfilter_ipv6/ip6t_REJECT.h>
 
 MODULE_AUTHOR("Yasuyuki KOZAKAI <yasuyuki.kozakai@toshiba.co.jp>");
-MODULE_DESCRIPTION("IP6 tables REJECT target module");
+MODULE_DESCRIPTION("Xtables: packet \"rejection\" target for IPv6");
 MODULE_LICENSE("GPL");
 
 /* Send RST reply */
diff --git a/net/ipv6/netfilter/ip6t_ah.c b/net/ipv6/netfilter/ip6t_ah.c
index f5d08a8c011..429629fd63b 100644
--- a/net/ipv6/netfilter/ip6t_ah.c
+++ b/net/ipv6/netfilter/ip6t_ah.c
@@ -20,7 +20,7 @@
 #include <linux/netfilter_ipv6/ip6t_ah.h>
 
 MODULE_LICENSE("GPL");
-MODULE_DESCRIPTION("IPv6 AH match");
+MODULE_DESCRIPTION("Xtables: IPv6 IPsec-AH match");
 MODULE_AUTHOR("Andras Kis-Szabo <kisza@sch.bme.hu>");
 
 /* Returns 1 if the spi is matched by the range, 0 otherwise */
diff --git a/net/ipv6/netfilter/ip6t_eui64.c b/net/ipv6/netfilter/ip6t_eui64.c
index dd9e67df914..8f331f12b2e 100644
--- a/net/ipv6/netfilter/ip6t_eui64.c
+++ b/net/ipv6/netfilter/ip6t_eui64.c
@@ -15,7 +15,7 @@
 #include <linux/netfilter/x_tables.h>
 #include <linux/netfilter_ipv6/ip6_tables.h>
 
-MODULE_DESCRIPTION("IPv6 EUI64 address checking match");
+MODULE_DESCRIPTION("Xtables: IPv6 EUI64 address match");
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Andras Kis-Szabo <kisza@sch.bme.hu>");
 
diff --git a/net/ipv6/netfilter/ip6t_frag.c b/net/ipv6/netfilter/ip6t_frag.c
index ae8c714a80d..e2bbc63dba5 100644
--- a/net/ipv6/netfilter/ip6t_frag.c
+++ b/net/ipv6/netfilter/ip6t_frag.c
@@ -19,7 +19,7 @@
 #include <linux/netfilter_ipv6/ip6t_frag.h>
 
 MODULE_LICENSE("GPL");
-MODULE_DESCRIPTION("IPv6 FRAG match");
+MODULE_DESCRIPTION("Xtables: IPv6 fragment match");
 MODULE_AUTHOR("Andras Kis-Szabo <kisza@sch.bme.hu>");
 
 /* Returns 1 if the id is matched by the range, 0 otherwise */
diff --git a/net/ipv6/netfilter/ip6t_hbh.c b/net/ipv6/netfilter/ip6t_hbh.c
index b76e27dc73d..62e39ace058 100644
--- a/net/ipv6/netfilter/ip6t_hbh.c
+++ b/net/ipv6/netfilter/ip6t_hbh.c
@@ -21,7 +21,7 @@
 #include <linux/netfilter_ipv6/ip6t_opts.h>
 
 MODULE_LICENSE("GPL");
-MODULE_DESCRIPTION("IPv6 opts match");
+MODULE_DESCRIPTION("Xtables: IPv6 Hop-By-Hop and Destination Header match");
 MODULE_AUTHOR("Andras Kis-Szabo <kisza@sch.bme.hu>");
 MODULE_ALIAS("ip6t_dst");
 
diff --git a/net/ipv6/netfilter/ip6t_hl.c b/net/ipv6/netfilter/ip6t_hl.c
index 8f2d7d0ab40..34567167384 100644
--- a/net/ipv6/netfilter/ip6t_hl.c
+++ b/net/ipv6/netfilter/ip6t_hl.c
@@ -16,7 +16,7 @@
 #include <linux/netfilter/x_tables.h>
 
 MODULE_AUTHOR("Maciej Soltysiak <solt@dns.toxicfilms.tv>");
-MODULE_DESCRIPTION("IP tables Hop Limit matching module");
+MODULE_DESCRIPTION("Xtables: IPv6 Hop Limit field match");
 MODULE_LICENSE("GPL");
 
 static bool
diff --git a/net/ipv6/netfilter/ip6t_ipv6header.c b/net/ipv6/netfilter/ip6t_ipv6header.c
index ae497e7ac11..3a940171f82 100644
--- a/net/ipv6/netfilter/ip6t_ipv6header.c
+++ b/net/ipv6/netfilter/ip6t_ipv6header.c
@@ -23,7 +23,7 @@
 #include <linux/netfilter_ipv6/ip6t_ipv6header.h>
 
 MODULE_LICENSE("GPL");
-MODULE_DESCRIPTION("IPv6 headers match");
+MODULE_DESCRIPTION("Xtables: IPv6 header types match");
 MODULE_AUTHOR("Andras Kis-Szabo <kisza@sch.bme.hu>");
 
 static bool
diff --git a/net/ipv6/netfilter/ip6t_mh.c b/net/ipv6/netfilter/ip6t_mh.c
index 618e6b94b03..e06678d07ec 100644
--- a/net/ipv6/netfilter/ip6t_mh.c
+++ b/net/ipv6/netfilter/ip6t_mh.c
@@ -21,7 +21,7 @@
 #include <linux/netfilter/x_tables.h>
 #include <linux/netfilter_ipv6/ip6t_mh.h>
 
-MODULE_DESCRIPTION("ip6t_tables match for MH");
+MODULE_DESCRIPTION("Xtables: IPv6 Mobility Header match");
 MODULE_LICENSE("GPL");
 
 #ifdef DEBUG_IP_FIREWALL_USER
diff --git a/net/ipv6/netfilter/ip6t_rt.c b/net/ipv6/netfilter/ip6t_rt.c
index 038cea6407d..12a9efe9886 100644
--- a/net/ipv6/netfilter/ip6t_rt.c
+++ b/net/ipv6/netfilter/ip6t_rt.c
@@ -21,7 +21,7 @@
 #include <linux/netfilter_ipv6/ip6t_rt.h>
 
 MODULE_LICENSE("GPL");
-MODULE_DESCRIPTION("IPv6 RT match");
+MODULE_DESCRIPTION("Xtables: IPv6 Routing Header match");
 MODULE_AUTHOR("Andras Kis-Szabo <kisza@sch.bme.hu>");
 
 /* Returns 1 if the id is matched by the range, 0 otherwise */
diff --git a/net/netfilter/xt_CLASSIFY.c b/net/netfilter/xt_CLASSIFY.c
index 8e83dd4c13a..77a52bf8322 100644
--- a/net/netfilter/xt_CLASSIFY.c
+++ b/net/netfilter/xt_CLASSIFY.c
@@ -22,7 +22,7 @@
 
 MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
 MODULE_LICENSE("GPL");
-MODULE_DESCRIPTION("iptables qdisc classification target module");
+MODULE_DESCRIPTION("Xtables: Qdisc classification");
 MODULE_ALIAS("ipt_CLASSIFY");
 MODULE_ALIAS("ip6t_CLASSIFY");
 
diff --git a/net/netfilter/xt_CONNMARK.c b/net/netfilter/xt_CONNMARK.c
index b9bd7723346..5fecfb4794b 100644
--- a/net/netfilter/xt_CONNMARK.c
+++ b/net/netfilter/xt_CONNMARK.c
@@ -26,7 +26,7 @@
 #include <net/checksum.h>
 
 MODULE_AUTHOR("Henrik Nordstrom <hno@marasystems.com>");
-MODULE_DESCRIPTION("IP tables CONNMARK matching module");
+MODULE_DESCRIPTION("Xtables: connection mark modification");
 MODULE_LICENSE("GPL");
 MODULE_ALIAS("ipt_CONNMARK");
 MODULE_ALIAS("ip6t_CONNMARK");
diff --git a/net/netfilter/xt_CONNSECMARK.c b/net/netfilter/xt_CONNSECMARK.c
index 024106bdd37..1faa9136195 100644
--- a/net/netfilter/xt_CONNSECMARK.c
+++ b/net/netfilter/xt_CONNSECMARK.c
@@ -26,7 +26,7 @@
 
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("James Morris <jmorris@redhat.com>");
-MODULE_DESCRIPTION("ip[6]tables CONNSECMARK module");
+MODULE_DESCRIPTION("Xtables: target for copying between connection and security mark");
 MODULE_ALIAS("ipt_CONNSECMARK");
 MODULE_ALIAS("ip6t_CONNSECMARK");
 
diff --git a/net/netfilter/xt_DSCP.c b/net/netfilter/xt_DSCP.c
index 3d216d650c8..97efd74c04f 100644
--- a/net/netfilter/xt_DSCP.c
+++ b/net/netfilter/xt_DSCP.c
@@ -21,7 +21,7 @@
 #include <linux/netfilter_ipv4/ipt_TOS.h>
 
 MODULE_AUTHOR("Harald Welte <laforge@netfilter.org>");
-MODULE_DESCRIPTION("x_tables DSCP modification module");
+MODULE_DESCRIPTION("Xtables: DSCP/TOS field modification");
 MODULE_LICENSE("GPL");
 MODULE_ALIAS("ipt_DSCP");
 MODULE_ALIAS("ip6t_DSCP");
diff --git a/net/netfilter/xt_MARK.c b/net/netfilter/xt_MARK.c
index 1c3fb75adfd..f9ce20b5898 100644
--- a/net/netfilter/xt_MARK.c
+++ b/net/netfilter/xt_MARK.c
@@ -20,7 +20,7 @@
 
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Marc Boucher <marc@mbsi.ca>");
-MODULE_DESCRIPTION("ip[6]tables MARK modification module");
+MODULE_DESCRIPTION("Xtables: packet mark modification");
 MODULE_ALIAS("ipt_MARK");
 MODULE_ALIAS("ip6t_MARK");
 
diff --git a/net/netfilter/xt_NFLOG.c b/net/netfilter/xt_NFLOG.c
index 866facfa4f4..19ae8efae65 100644
--- a/net/netfilter/xt_NFLOG.c
+++ b/net/netfilter/xt_NFLOG.c
@@ -15,7 +15,7 @@
 #include <net/netfilter/nf_log.h>
 
 MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
-MODULE_DESCRIPTION("x_tables NFLOG target");
+MODULE_DESCRIPTION("Xtables: packet logging to netlink using NFLOG");
 MODULE_LICENSE("GPL");
 MODULE_ALIAS("ipt_NFLOG");
 MODULE_ALIAS("ip6t_NFLOG");
diff --git a/net/netfilter/xt_NFQUEUE.c b/net/netfilter/xt_NFQUEUE.c
index 16b57c23f35..beb24d19a56 100644
--- a/net/netfilter/xt_NFQUEUE.c
+++ b/net/netfilter/xt_NFQUEUE.c
@@ -17,7 +17,7 @@
 #include <linux/netfilter/xt_NFQUEUE.h>
 
 MODULE_AUTHOR("Harald Welte <laforge@netfilter.org>");
-MODULE_DESCRIPTION("[ip,ip6,arp]_tables NFQUEUE target");
+MODULE_DESCRIPTION("Xtables: packet forwarding to netlink");
 MODULE_LICENSE("GPL");
 MODULE_ALIAS("ipt_NFQUEUE");
 MODULE_ALIAS("ip6t_NFQUEUE");
diff --git a/net/netfilter/xt_NOTRACK.c b/net/netfilter/xt_NOTRACK.c
index 95712e4e997..6c9de611eb8 100644
--- a/net/netfilter/xt_NOTRACK.c
+++ b/net/netfilter/xt_NOTRACK.c
@@ -7,6 +7,7 @@
 #include <linux/netfilter/x_tables.h>
 #include <net/netfilter/nf_conntrack.h>
 
+MODULE_DESCRIPTION("Xtables: Disabling connection tracking for packets");
 MODULE_LICENSE("GPL");
 MODULE_ALIAS("ipt_NOTRACK");
 MODULE_ALIAS("ip6t_NOTRACK");
diff --git a/net/netfilter/xt_RATEEST.c b/net/netfilter/xt_RATEEST.c
index c0088839fde..c5ba525dc32 100644
--- a/net/netfilter/xt_RATEEST.c
+++ b/net/netfilter/xt_RATEEST.c
@@ -197,7 +197,7 @@ static void __exit xt_rateest_tg_fini(void)
 
 MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
 MODULE_LICENSE("GPL");
-MODULE_DESCRIPTION("xtables rate estimator");
+MODULE_DESCRIPTION("Xtables: packet rate estimator");
 MODULE_ALIAS("ipt_RATEEST");
 MODULE_ALIAS("ip6t_RATEEST");
 module_init(xt_rateest_tg_init);
diff --git a/net/netfilter/xt_SECMARK.c b/net/netfilter/xt_SECMARK.c
index 7d5439c9f18..b11b3ecbb39 100644
--- a/net/netfilter/xt_SECMARK.c
+++ b/net/netfilter/xt_SECMARK.c
@@ -20,7 +20,7 @@
 
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("James Morris <jmorris@redhat.com>");
-MODULE_DESCRIPTION("ip[6]tables SECMARK modification module");
+MODULE_DESCRIPTION("Xtables: packet security mark modification");
 MODULE_ALIAS("ipt_SECMARK");
 MODULE_ALIAS("ip6t_SECMARK");
 
diff --git a/net/netfilter/xt_TCPMSS.c b/net/netfilter/xt_TCPMSS.c
index a1bc77fcd68..60e3767cc71 100644
--- a/net/netfilter/xt_TCPMSS.c
+++ b/net/netfilter/xt_TCPMSS.c
@@ -24,7 +24,7 @@
 
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Marc Boucher <marc@mbsi.ca>");
-MODULE_DESCRIPTION("x_tables TCP MSS modification module");
+MODULE_DESCRIPTION("Xtables: TCP Maximum Segment Size (MSS) adjustment");
 MODULE_ALIAS("ipt_TCPMSS");
 MODULE_ALIAS("ip6t_TCPMSS");
 
diff --git a/net/netfilter/xt_TCPOPTSTRIP.c b/net/netfilter/xt_TCPOPTSTRIP.c
index 43d6ac224f5..3b2aa56833b 100644
--- a/net/netfilter/xt_TCPOPTSTRIP.c
+++ b/net/netfilter/xt_TCPOPTSTRIP.c
@@ -141,7 +141,7 @@ static void __exit tcpoptstrip_tg_exit(void)
 module_init(tcpoptstrip_tg_init);
 module_exit(tcpoptstrip_tg_exit);
 MODULE_AUTHOR("Sven Schnelle <svens@bitebene.org>, Jan Engelhardt <jengelh@computergmbh.de>");
-MODULE_DESCRIPTION("netfilter \"TCPOPTSTRIP\" target module");
+MODULE_DESCRIPTION("Xtables: TCP option stripping");
 MODULE_LICENSE("GPL");
 MODULE_ALIAS("ipt_TCPOPTSTRIP");
 MODULE_ALIAS("ip6t_TCPOPTSTRIP");
diff --git a/net/netfilter/xt_TRACE.c b/net/netfilter/xt_TRACE.c
index 219b9d2445c..30dab79a343 100644
--- a/net/netfilter/xt_TRACE.c
+++ b/net/netfilter/xt_TRACE.c
@@ -5,6 +5,7 @@
 
 #include <linux/netfilter/x_tables.h>
 
+MODULE_DESCRIPTION("Xtables: packet flow tracing");
 MODULE_LICENSE("GPL");
 MODULE_ALIAS("ipt_TRACE");
 MODULE_ALIAS("ip6t_TRACE");
diff --git a/net/netfilter/xt_comment.c b/net/netfilter/xt_comment.c
index 4539d435ec7..89f47364e84 100644
--- a/net/netfilter/xt_comment.c
+++ b/net/netfilter/xt_comment.c
@@ -10,7 +10,7 @@
 #include <linux/netfilter/xt_comment.h>
 
 MODULE_AUTHOR("Brad Fisher <brad@info-link.net>");
-MODULE_DESCRIPTION("iptables comment match module");
+MODULE_DESCRIPTION("Xtables: No-op match which can be tagged with a comment");
 MODULE_LICENSE("GPL");
 MODULE_ALIAS("ipt_comment");
 MODULE_ALIAS("ip6t_comment");
diff --git a/net/netfilter/xt_connbytes.c b/net/netfilter/xt_connbytes.c
index 7d4940adc3c..b15e7e2fa14 100644
--- a/net/netfilter/xt_connbytes.c
+++ b/net/netfilter/xt_connbytes.c
@@ -12,7 +12,7 @@
 
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Harald Welte <laforge@netfilter.org>");
-MODULE_DESCRIPTION("iptables match for matching number of pkts/bytes per connection");
+MODULE_DESCRIPTION("Xtables: Number of packets/bytes per connection matching");
 MODULE_ALIAS("ipt_connbytes");
 MODULE_ALIAS("ip6t_connbytes");
 
diff --git a/net/netfilter/xt_connlimit.c b/net/netfilter/xt_connlimit.c
index 6a9e2a35718..e00ecd974fa 100644
--- a/net/netfilter/xt_connlimit.c
+++ b/net/netfilter/xt_connlimit.c
@@ -309,7 +309,7 @@ static void __exit connlimit_mt_exit(void)
 module_init(connlimit_mt_init);
 module_exit(connlimit_mt_exit);
 MODULE_AUTHOR("Jan Engelhardt <jengelh@computergmbh.de>");
-MODULE_DESCRIPTION("netfilter xt_connlimit match module");
+MODULE_DESCRIPTION("Xtables: Number of connections matching");
 MODULE_LICENSE("GPL");
 MODULE_ALIAS("ipt_connlimit");
 MODULE_ALIAS("ip6t_connlimit");
diff --git a/net/netfilter/xt_connmark.c b/net/netfilter/xt_connmark.c
index 55c62350b1f..aaa1b96691f 100644
--- a/net/netfilter/xt_connmark.c
+++ b/net/netfilter/xt_connmark.c
@@ -28,7 +28,7 @@
 #include <linux/netfilter/xt_connmark.h>
 
 MODULE_AUTHOR("Henrik Nordstrom <hno@marasystems.com>");
-MODULE_DESCRIPTION("IP tables connmark match module");
+MODULE_DESCRIPTION("Xtables: connection mark match");
 MODULE_LICENSE("GPL");
 MODULE_ALIAS("ipt_connmark");
 MODULE_ALIAS("ip6t_connmark");
diff --git a/net/netfilter/xt_conntrack.c b/net/netfilter/xt_conntrack.c
index dc9e7378686..e92190eafcc 100644
--- a/net/netfilter/xt_conntrack.c
+++ b/net/netfilter/xt_conntrack.c
@@ -20,7 +20,7 @@
 
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Marc Boucher <marc@mbsi.ca>");
-MODULE_DESCRIPTION("iptables connection tracking match module");
+MODULE_DESCRIPTION("Xtables: connection tracking state match");
 MODULE_ALIAS("ipt_conntrack");
 MODULE_ALIAS("ip6t_conntrack");
 
diff --git a/net/netfilter/xt_dccp.c b/net/netfilter/xt_dccp.c
index ab2f7e9f75a..667f45e72cd 100644
--- a/net/netfilter/xt_dccp.c
+++ b/net/netfilter/xt_dccp.c
@@ -22,7 +22,7 @@
 
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Harald Welte <laforge@netfilter.org>");
-MODULE_DESCRIPTION("Match for DCCP protocol packets");
+MODULE_DESCRIPTION("Xtables: DCCP protocol packet match");
 MODULE_ALIAS("ipt_dccp");
 MODULE_ALIAS("ip6t_dccp");
 
diff --git a/net/netfilter/xt_dscp.c b/net/netfilter/xt_dscp.c
index 834e4372031..26f4aab9c42 100644
--- a/net/netfilter/xt_dscp.c
+++ b/net/netfilter/xt_dscp.c
@@ -18,7 +18,7 @@
 #include <linux/netfilter_ipv4/ipt_tos.h>
 
 MODULE_AUTHOR("Harald Welte <laforge@netfilter.org>");
-MODULE_DESCRIPTION("x_tables DSCP/tos matching module");
+MODULE_DESCRIPTION("Xtables: DSCP/TOS field match");
 MODULE_LICENSE("GPL");
 MODULE_ALIAS("ipt_dscp");
 MODULE_ALIAS("ip6t_dscp");
diff --git a/net/netfilter/xt_esp.c b/net/netfilter/xt_esp.c
index d7c90ac393d..71c7c378526 100644
--- a/net/netfilter/xt_esp.c
+++ b/net/netfilter/xt_esp.c
@@ -20,7 +20,7 @@
 
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Yon Uriarte <yon@astaro.de>");
-MODULE_DESCRIPTION("x_tables ESP SPI match module");
+MODULE_DESCRIPTION("Xtables: IPsec-ESP packet match");
 MODULE_ALIAS("ipt_esp");
 MODULE_ALIAS("ip6t_esp");
 
diff --git a/net/netfilter/xt_hashlimit.c b/net/netfilter/xt_hashlimit.c
index c35d220a7ae..d479ca98011 100644
--- a/net/netfilter/xt_hashlimit.c
+++ b/net/netfilter/xt_hashlimit.c
@@ -35,7 +35,7 @@
 
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Harald Welte <laforge@netfilter.org>");
-MODULE_DESCRIPTION("iptables match for limiting per hash-bucket");
+MODULE_DESCRIPTION("Xtables: per hash-bucket rate-limit match");
 MODULE_ALIAS("ipt_hashlimit");
 MODULE_ALIAS("ip6t_hashlimit");
 
diff --git a/net/netfilter/xt_helper.c b/net/netfilter/xt_helper.c
index 5d063e51f88..dada2905d66 100644
--- a/net/netfilter/xt_helper.c
+++ b/net/netfilter/xt_helper.c
@@ -18,7 +18,7 @@
 
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Martin Josefsson <gandalf@netfilter.org>");
-MODULE_DESCRIPTION("iptables helper match module");
+MODULE_DESCRIPTION("Xtables: Related connection matching");
 MODULE_ALIAS("ipt_helper");
 MODULE_ALIAS("ip6t_helper");
 
diff --git a/net/netfilter/xt_length.c b/net/netfilter/xt_length.c
index ea545785965..b8640f97295 100644
--- a/net/netfilter/xt_length.c
+++ b/net/netfilter/xt_length.c
@@ -15,7 +15,7 @@
 #include <linux/netfilter/x_tables.h>
 
 MODULE_AUTHOR("James Morris <jmorris@intercode.com.au>");
-MODULE_DESCRIPTION("IP tables packet length matching module");
+MODULE_DESCRIPTION("Xtables: Packet length (Layer3,4,5) match");
 MODULE_LICENSE("GPL");
 MODULE_ALIAS("ipt_length");
 MODULE_ALIAS("ip6t_length");
diff --git a/net/netfilter/xt_limit.c b/net/netfilter/xt_limit.c
index 2ef0dbf6392..aad9ab8d204 100644
--- a/net/netfilter/xt_limit.c
+++ b/net/netfilter/xt_limit.c
@@ -16,7 +16,7 @@
 
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Herve Eychenne <rv@wallfire.org>");
-MODULE_DESCRIPTION("iptables rate limit match");
+MODULE_DESCRIPTION("Xtables: rate-limit match");
 MODULE_ALIAS("ipt_limit");
 MODULE_ALIAS("ip6t_limit");
 
diff --git a/net/netfilter/xt_mac.c b/net/netfilter/xt_mac.c
index 7d89863a78b..b3e96a0ec17 100644
--- a/net/netfilter/xt_mac.c
+++ b/net/netfilter/xt_mac.c
@@ -20,7 +20,7 @@
 
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>");
-MODULE_DESCRIPTION("iptables mac matching module");
+MODULE_DESCRIPTION("Xtables: MAC address match");
 MODULE_ALIAS("ipt_mac");
 MODULE_ALIAS("ip6t_mac");
 
diff --git a/net/netfilter/xt_mark.c b/net/netfilter/xt_mark.c
index 5cc8cc57c72..9f78f6120fb 100644
--- a/net/netfilter/xt_mark.c
+++ b/net/netfilter/xt_mark.c
@@ -18,7 +18,7 @@
 
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Marc Boucher <marc@mbsi.ca>");
-MODULE_DESCRIPTION("iptables mark matching module");
+MODULE_DESCRIPTION("Xtables: packet mark match");
 MODULE_ALIAS("ipt_mark");
 MODULE_ALIAS("ip6t_mark");
 
diff --git a/net/netfilter/xt_multiport.c b/net/netfilter/xt_multiport.c
index d03cc376811..31daa819242 100644
--- a/net/netfilter/xt_multiport.c
+++ b/net/netfilter/xt_multiport.c
@@ -22,7 +22,7 @@
 
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>");
-MODULE_DESCRIPTION("x_tables multiple port match module");
+MODULE_DESCRIPTION("Xtables: multiple port matching for TCP, UDP, UDP-Lite, SCTP and DCCP");
 MODULE_ALIAS("ipt_multiport");
 MODULE_ALIAS("ip6t_multiport");
 
diff --git a/net/netfilter/xt_owner.c b/net/netfilter/xt_owner.c
index 4222fa2c1b1..d382f9cc38b 100644
--- a/net/netfilter/xt_owner.c
+++ b/net/netfilter/xt_owner.c
@@ -205,7 +205,7 @@ static void __exit owner_mt_exit(void)
 module_init(owner_mt_init);
 module_exit(owner_mt_exit);
 MODULE_AUTHOR("Jan Engelhardt <jengelh@computergmbh.de>");
-MODULE_DESCRIPTION("netfilter \"owner\" match module");
+MODULE_DESCRIPTION("Xtables: socket owner matching");
 MODULE_LICENSE("GPL");
 MODULE_ALIAS("ipt_owner");
 MODULE_ALIAS("ip6t_owner");
diff --git a/net/netfilter/xt_physdev.c b/net/netfilter/xt_physdev.c
index 678b6833a80..4ec1094bda9 100644
--- a/net/netfilter/xt_physdev.c
+++ b/net/netfilter/xt_physdev.c
@@ -16,7 +16,7 @@
 
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Bart De Schuymer <bdschuym@pandora.be>");
-MODULE_DESCRIPTION("iptables bridge physical device match module");
+MODULE_DESCRIPTION("Xtables: Bridge physical device match");
 MODULE_ALIAS("ipt_physdev");
 MODULE_ALIAS("ip6t_physdev");
 
diff --git a/net/netfilter/xt_pkttype.c b/net/netfilter/xt_pkttype.c
index cbcb8ea3b8d..7936f7e2325 100644
--- a/net/netfilter/xt_pkttype.c
+++ b/net/netfilter/xt_pkttype.c
@@ -18,7 +18,7 @@
 
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Michal Ludvig <michal@logix.cz>");
-MODULE_DESCRIPTION("IP tables match to match on linklayer packet type");
+MODULE_DESCRIPTION("Xtables: link layer packet type match");
 MODULE_ALIAS("ipt_pkttype");
 MODULE_ALIAS("ip6t_pkttype");
 
diff --git a/net/netfilter/xt_policy.c b/net/netfilter/xt_policy.c
index 47c2e4328a1..9e918add228 100644
--- a/net/netfilter/xt_policy.c
+++ b/net/netfilter/xt_policy.c
@@ -18,7 +18,7 @@
 #include <linux/netfilter/x_tables.h>
 
 MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
-MODULE_DESCRIPTION("Xtables IPsec policy matching module");
+MODULE_DESCRIPTION("Xtables: IPsec policy match");
 MODULE_LICENSE("GPL");
 
 static inline bool
diff --git a/net/netfilter/xt_quota.c b/net/netfilter/xt_quota.c
index 887874b5684..3b021d0c522 100644
--- a/net/netfilter/xt_quota.c
+++ b/net/netfilter/xt_quota.c
@@ -11,6 +11,7 @@
 
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Sam Johnston <samj@samj.net>");
+MODULE_DESCRIPTION("Xtables: countdown quota match");
 MODULE_ALIAS("ipt_quota");
 MODULE_ALIAS("ip6t_quota");
 
diff --git a/net/netfilter/xt_realm.c b/net/netfilter/xt_realm.c
index 63289b40c8d..7df1627c536 100644
--- a/net/netfilter/xt_realm.c
+++ b/net/netfilter/xt_realm.c
@@ -18,7 +18,7 @@
 
 MODULE_AUTHOR("Sampsa Ranta <sampsa@netsonic.fi>");
 MODULE_LICENSE("GPL");
-MODULE_DESCRIPTION("X_tables realm match");
+MODULE_DESCRIPTION("Xtables: Routing realm match");
 MODULE_ALIAS("ipt_realm");
 
 static bool
diff --git a/net/netfilter/xt_sctp.c b/net/netfilter/xt_sctp.c
index 1c8a4ee9cd0..b718ec64333 100644
--- a/net/netfilter/xt_sctp.c
+++ b/net/netfilter/xt_sctp.c
@@ -11,7 +11,7 @@
 
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Kiran Kumar Immidi");
-MODULE_DESCRIPTION("Match for SCTP protocol packets");
+MODULE_DESCRIPTION("Xtables: SCTP protocol packet match");
 MODULE_ALIAS("ipt_sctp");
 MODULE_ALIAS("ip6t_sctp");
 
diff --git a/net/netfilter/xt_statistic.c b/net/netfilter/xt_statistic.c
index fb166483acd..43133080da7 100644
--- a/net/netfilter/xt_statistic.c
+++ b/net/netfilter/xt_statistic.c
@@ -18,7 +18,7 @@
 
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
-MODULE_DESCRIPTION("xtables statistical match module");
+MODULE_DESCRIPTION("Xtables: statistics-based matching (\"Nth\", random)");
 MODULE_ALIAS("ipt_statistic");
 MODULE_ALIAS("ip6t_statistic");
 
diff --git a/net/netfilter/xt_string.c b/net/netfilter/xt_string.c
index aff7a116055..72f694d947f 100644
--- a/net/netfilter/xt_string.c
+++ b/net/netfilter/xt_string.c
@@ -16,7 +16,7 @@
 #include <linux/textsearch.h>
 
 MODULE_AUTHOR("Pablo Neira Ayuso <pablo@eurodev.net>");
-MODULE_DESCRIPTION("IP tables string match module");
+MODULE_DESCRIPTION("Xtables: string-based matching");
 MODULE_LICENSE("GPL");
 MODULE_ALIAS("ipt_string");
 MODULE_ALIAS("ip6t_string");
diff --git a/net/netfilter/xt_tcpmss.c b/net/netfilter/xt_tcpmss.c
index 2a3e4c30e50..d7a5b27fe81 100644
--- a/net/netfilter/xt_tcpmss.c
+++ b/net/netfilter/xt_tcpmss.c
@@ -20,7 +20,7 @@
 
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Marc Boucher <marc@mbsi.ca>");
-MODULE_DESCRIPTION("iptables TCP MSS match module");
+MODULE_DESCRIPTION("Xtables: TCP MSS match");
 MODULE_ALIAS("ipt_tcpmss");
 MODULE_ALIAS("ip6t_tcpmss");
 
diff --git a/net/netfilter/xt_tcpudp.c b/net/netfilter/xt_tcpudp.c
index 6be5f2df5a5..4fa3b669f69 100644
--- a/net/netfilter/xt_tcpudp.c
+++ b/net/netfilter/xt_tcpudp.c
@@ -10,7 +10,7 @@
 #include <linux/netfilter_ipv4/ip_tables.h>
 #include <linux/netfilter_ipv6/ip6_tables.h>
 
-MODULE_DESCRIPTION("x_tables match for TCP and UDP(-Lite), supports IPv4 and IPv6");
+MODULE_DESCRIPTION("Xtables: TCP, UDP and UDP-Lite match");
 MODULE_LICENSE("GPL");
 MODULE_ALIAS("xt_tcp");
 MODULE_ALIAS("xt_udp");
diff --git a/net/netfilter/xt_time.c b/net/netfilter/xt_time.c
index 96da93c9ece..e9a8794bc3a 100644
--- a/net/netfilter/xt_time.c
+++ b/net/netfilter/xt_time.c
@@ -264,7 +264,7 @@ static void __exit time_mt_exit(void)
 module_init(time_mt_init);
 module_exit(time_mt_exit);
 MODULE_AUTHOR("Jan Engelhardt <jengelh@computergmbh.de>");
-MODULE_DESCRIPTION("netfilter time match");
+MODULE_DESCRIPTION("Xtables: time-based matching");
 MODULE_LICENSE("GPL");
 MODULE_ALIAS("ipt_time");
 MODULE_ALIAS("ip6t_time");
diff --git a/net/netfilter/xt_u32.c b/net/netfilter/xt_u32.c
index 3d8f5b3986a..9b8ed390a8e 100644
--- a/net/netfilter/xt_u32.c
+++ b/net/netfilter/xt_u32.c
@@ -130,7 +130,7 @@ static void __exit u32_mt_exit(void)
 module_init(u32_mt_init);
 module_exit(u32_mt_exit);
 MODULE_AUTHOR("Jan Engelhardt <jengelh@computergmbh.de>");
-MODULE_DESCRIPTION("netfilter u32 match module");
+MODULE_DESCRIPTION("Xtables: arbitrary byte matching");
 MODULE_LICENSE("GPL");
 MODULE_ALIAS("ipt_u32");
 MODULE_ALIAS("ip6t_u32");
-- 
cgit v1.2.3


From f72e25a897c7edda03a0e1f767925d98772684da Mon Sep 17 00:00:00 2001
From: Jan Engelhardt <jengelh@computergmbh.de>
Date: Mon, 14 Jan 2008 23:42:47 -0800
Subject: [NETFILTER]: Rename ipt_iprange to xt_iprange

This patch moves ipt_iprange to xt_iprange, in preparation for adding
IPv6 support to xt_iprange.

Signed-off-by: Jan Engelhardt <jengelh@computergmbh.de>
Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/netfilter/Kconfig       | 10 ------
 net/ipv4/netfilter/Makefile      |  1 -
 net/ipv4/netfilter/ipt_iprange.c | 77 ----------------------------------------
 net/netfilter/Kconfig            | 11 ++++++
 net/netfilter/Makefile           |  1 +
 net/netfilter/xt_iprange.c       | 76 +++++++++++++++++++++++++++++++++++++++
 6 files changed, 88 insertions(+), 88 deletions(-)
 delete mode 100644 net/ipv4/netfilter/ipt_iprange.c
 create mode 100644 net/netfilter/xt_iprange.c

(limited to 'net')

diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig
index 10ca307b849..9a077cb2479 100644
--- a/net/ipv4/netfilter/Kconfig
+++ b/net/ipv4/netfilter/Kconfig
@@ -57,16 +57,6 @@ config IP_NF_IPTABLES
 	  To compile it as a module, choose M here.  If unsure, say N.
 
 # The matches.
-config IP_NF_MATCH_IPRANGE
-	tristate '"iprange" match support'
-	depends on IP_NF_IPTABLES
-	depends on NETFILTER_ADVANCED
-	help
-	  This option makes possible to match IP addresses against IP address
-	  ranges.
-
-	  To compile it as a module, choose M here.  If unsure, say N.
-
 config IP_NF_MATCH_RECENT
 	tristate '"recent" match support'
 	depends on IP_NF_IPTABLES
diff --git a/net/ipv4/netfilter/Makefile b/net/ipv4/netfilter/Makefile
index fd7d4a5b436..0c7dc78a62e 100644
--- a/net/ipv4/netfilter/Makefile
+++ b/net/ipv4/netfilter/Makefile
@@ -44,7 +44,6 @@ obj-$(CONFIG_IP_NF_RAW) += iptable_raw.o
 obj-$(CONFIG_IP_NF_MATCH_ADDRTYPE) += ipt_addrtype.o
 obj-$(CONFIG_IP_NF_MATCH_AH) += ipt_ah.o
 obj-$(CONFIG_IP_NF_MATCH_ECN) += ipt_ecn.o
-obj-$(CONFIG_IP_NF_MATCH_IPRANGE) += ipt_iprange.o
 obj-$(CONFIG_IP_NF_MATCH_RECENT) += ipt_recent.o
 obj-$(CONFIG_IP_NF_MATCH_TTL) += ipt_ttl.o
 
diff --git a/net/ipv4/netfilter/ipt_iprange.c b/net/ipv4/netfilter/ipt_iprange.c
deleted file mode 100644
index 9a2aba816c9..00000000000
--- a/net/ipv4/netfilter/ipt_iprange.c
+++ /dev/null
@@ -1,77 +0,0 @@
-/*
- * iptables module to match IP address ranges
- *
- * (C) 2003 Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-#include <linux/module.h>
-#include <linux/skbuff.h>
-#include <linux/ip.h>
-#include <linux/netfilter/x_tables.h>
-#include <linux/netfilter_ipv4/ipt_iprange.h>
-
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>");
-MODULE_DESCRIPTION("Xtables: arbitrary IPv4 range matching");
-
-static bool
-iprange_mt(const struct sk_buff *skb, const struct net_device *in,
-           const struct net_device *out, const struct xt_match *match,
-           const void *matchinfo, int offset, unsigned int protoff,
-           bool *hotdrop)
-{
-	const struct ipt_iprange_info *info = matchinfo;
-	const struct iphdr *iph = ip_hdr(skb);
-
-	if (info->flags & IPRANGE_SRC) {
-		if ((ntohl(iph->saddr) < ntohl(info->src.min_ip)
-			  || ntohl(iph->saddr) > ntohl(info->src.max_ip))
-			 ^ !!(info->flags & IPRANGE_SRC_INV)) {
-			pr_debug("src IP %u.%u.%u.%u NOT in range %s"
-				 "%u.%u.%u.%u-%u.%u.%u.%u\n",
-				 NIPQUAD(iph->saddr),
-				 info->flags & IPRANGE_SRC_INV ? "(INV) " : "",
-				 NIPQUAD(info->src.min_ip),
-				 NIPQUAD(info->src.max_ip));
-			return false;
-		}
-	}
-	if (info->flags & IPRANGE_DST) {
-		if ((ntohl(iph->daddr) < ntohl(info->dst.min_ip)
-			  || ntohl(iph->daddr) > ntohl(info->dst.max_ip))
-			 ^ !!(info->flags & IPRANGE_DST_INV)) {
-			pr_debug("dst IP %u.%u.%u.%u NOT in range %s"
-				 "%u.%u.%u.%u-%u.%u.%u.%u\n",
-				 NIPQUAD(iph->daddr),
-				 info->flags & IPRANGE_DST_INV ? "(INV) " : "",
-				 NIPQUAD(info->dst.min_ip),
-				 NIPQUAD(info->dst.max_ip));
-			return false;
-		}
-	}
-	return true;
-}
-
-static struct xt_match iprange_mt_reg __read_mostly = {
-	.name		= "iprange",
-	.family		= AF_INET,
-	.match		= iprange_mt,
-	.matchsize	= sizeof(struct ipt_iprange_info),
-	.me		= THIS_MODULE
-};
-
-static int __init iprange_mt_init(void)
-{
-	return xt_register_match(&iprange_mt_reg);
-}
-
-static void __exit iprange_mt_exit(void)
-{
-	xt_unregister_match(&iprange_mt_reg);
-}
-
-module_init(iprange_mt_init);
-module_exit(iprange_mt_exit);
diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig
index 79d71437e31..daf5b881064 100644
--- a/net/netfilter/Kconfig
+++ b/net/netfilter/Kconfig
@@ -567,6 +567,17 @@ config NETFILTER_XT_MATCH_HELPER
 
 	  To compile it as a module, choose M here.  If unsure, say Y.
 
+config NETFILTER_XT_MATCH_IPRANGE
+	tristate '"iprange" address range match support'
+	depends on NETFILTER_XTABLES
+	depends on NETFILTER_ADVANCED
+	---help---
+	This option adds a "iprange" match, which allows you to match based on
+	an IP address range. (Normal iptables only matches on single addresses
+	with an optional mask.)
+
+	If unsure, say M.
+
 config NETFILTER_XT_MATCH_LENGTH
 	tristate '"length" match support'
 	depends on NETFILTER_XTABLES
diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile
index 3b9ea8fb3a0..c910caee0d4 100644
--- a/net/netfilter/Makefile
+++ b/net/netfilter/Makefile
@@ -63,6 +63,7 @@ obj-$(CONFIG_NETFILTER_XT_MATCH_DSCP) += xt_dscp.o
 obj-$(CONFIG_NETFILTER_XT_MATCH_ESP) += xt_esp.o
 obj-$(CONFIG_NETFILTER_XT_MATCH_HASHLIMIT) += xt_hashlimit.o
 obj-$(CONFIG_NETFILTER_XT_MATCH_HELPER) += xt_helper.o
+obj-$(CONFIG_NETFILTER_XT_MATCH_IPRANGE) += xt_iprange.o
 obj-$(CONFIG_NETFILTER_XT_MATCH_LENGTH) += xt_length.o
 obj-$(CONFIG_NETFILTER_XT_MATCH_LIMIT) += xt_limit.o
 obj-$(CONFIG_NETFILTER_XT_MATCH_MAC) += xt_mac.o
diff --git a/net/netfilter/xt_iprange.c b/net/netfilter/xt_iprange.c
new file mode 100644
index 00000000000..c57a6cf8a08
--- /dev/null
+++ b/net/netfilter/xt_iprange.c
@@ -0,0 +1,76 @@
+/*
+ *	xt_iprange - Netfilter module to match IP address ranges
+ *
+ *	(C) 2003 Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
+ *
+ *	This program is free software; you can redistribute it and/or modify
+ *	it under the terms of the GNU General Public License version 2 as
+ *	published by the Free Software Foundation.
+ */
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <linux/ip.h>
+#include <linux/netfilter/x_tables.h>
+#include <linux/netfilter_ipv4/ipt_iprange.h>
+
+static bool
+iprange_mt_v0(const struct sk_buff *skb, const struct net_device *in,
+              const struct net_device *out, const struct xt_match *match,
+              const void *matchinfo, int offset, unsigned int protoff,
+              bool *hotdrop)
+{
+	const struct ipt_iprange_info *info = matchinfo;
+	const struct iphdr *iph = ip_hdr(skb);
+
+	if (info->flags & IPRANGE_SRC) {
+		if ((ntohl(iph->saddr) < ntohl(info->src.min_ip)
+			  || ntohl(iph->saddr) > ntohl(info->src.max_ip))
+			 ^ !!(info->flags & IPRANGE_SRC_INV)) {
+			pr_debug("src IP %u.%u.%u.%u NOT in range %s"
+				 "%u.%u.%u.%u-%u.%u.%u.%u\n",
+				 NIPQUAD(iph->saddr),
+				 info->flags & IPRANGE_SRC_INV ? "(INV) " : "",
+				 NIPQUAD(info->src.min_ip),
+				 NIPQUAD(info->src.max_ip));
+			return false;
+		}
+	}
+	if (info->flags & IPRANGE_DST) {
+		if ((ntohl(iph->daddr) < ntohl(info->dst.min_ip)
+			  || ntohl(iph->daddr) > ntohl(info->dst.max_ip))
+			 ^ !!(info->flags & IPRANGE_DST_INV)) {
+			pr_debug("dst IP %u.%u.%u.%u NOT in range %s"
+				 "%u.%u.%u.%u-%u.%u.%u.%u\n",
+				 NIPQUAD(iph->daddr),
+				 info->flags & IPRANGE_DST_INV ? "(INV) " : "",
+				 NIPQUAD(info->dst.min_ip),
+				 NIPQUAD(info->dst.max_ip));
+			return false;
+		}
+	}
+	return true;
+}
+
+static struct xt_match iprange_mt_reg __read_mostly = {
+	.name		= "iprange",
+	.family		= AF_INET,
+	.match		= iprange_mt_v0,
+	.matchsize	= sizeof(struct ipt_iprange_info),
+	.me		= THIS_MODULE
+};
+
+static int __init iprange_mt_init(void)
+{
+	return xt_register_match(&iprange_mt_reg);
+}
+
+static void __exit iprange_mt_exit(void)
+{
+	xt_unregister_match(&iprange_mt_reg);
+}
+
+module_init(iprange_mt_init);
+module_exit(iprange_mt_exit);
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>");
+MODULE_DESCRIPTION("Xtables: arbitrary IPv4 range matching");
-- 
cgit v1.2.3


From 1a50c5a1fe20864f1f902ef76862519a65d14723 Mon Sep 17 00:00:00 2001
From: Jan Engelhardt <jengelh@computergmbh.de>
Date: Mon, 14 Jan 2008 23:43:03 -0800
Subject: [NETFILTER]: xt_iprange match, revision 1

Adds IPv6 support to xt_iprange, making it possible to match on IPv6
address ranges with ip6tables.

Signed-off-by: Jan Engelhardt <jengelh@computergmbh.de>
Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/netfilter/xt_iprange.c | 122 +++++++++++++++++++++++++++++++++++++++++----
 1 file changed, 113 insertions(+), 9 deletions(-)

(limited to 'net')

diff --git a/net/netfilter/xt_iprange.c b/net/netfilter/xt_iprange.c
index c57a6cf8a08..dbea0e0893f 100644
--- a/net/netfilter/xt_iprange.c
+++ b/net/netfilter/xt_iprange.c
@@ -2,6 +2,7 @@
  *	xt_iprange - Netfilter module to match IP address ranges
  *
  *	(C) 2003 Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
+ *	(C) CC Computer Consultants GmbH, 2008
  *
  *	This program is free software; you can redistribute it and/or modify
  *	it under the terms of the GNU General Public License version 2 as
@@ -10,6 +11,7 @@
 #include <linux/module.h>
 #include <linux/skbuff.h>
 #include <linux/ip.h>
+#include <linux/ipv6.h>
 #include <linux/netfilter/x_tables.h>
 #include <linux/netfilter_ipv4/ipt_iprange.h>
 
@@ -51,26 +53,128 @@ iprange_mt_v0(const struct sk_buff *skb, const struct net_device *in,
 	return true;
 }
 
-static struct xt_match iprange_mt_reg __read_mostly = {
-	.name		= "iprange",
-	.family		= AF_INET,
-	.match		= iprange_mt_v0,
-	.matchsize	= sizeof(struct ipt_iprange_info),
-	.me		= THIS_MODULE
+static bool
+iprange_mt4(const struct sk_buff *skb, const struct net_device *in,
+            const struct net_device *out, const struct xt_match *match,
+            const void *matchinfo, int offset, unsigned int protoff,
+            bool *hotdrop)
+{
+	const struct xt_iprange_mtinfo *info = matchinfo;
+	const struct iphdr *iph = ip_hdr(skb);
+	bool m;
+
+	if (info->flags & IPRANGE_SRC) {
+		m  = ntohl(iph->saddr) < ntohl(info->src_min.ip);
+		m |= ntohl(iph->saddr) > ntohl(info->src_max.ip);
+		m ^= info->flags & IPRANGE_SRC_INV;
+		if (m) {
+			pr_debug("src IP " NIPQUAD_FMT " NOT in range %s"
+			         NIPQUAD_FMT "-" NIPQUAD_FMT "\n",
+			         NIPQUAD(iph->saddr),
+			         (info->flags & IPRANGE_SRC_INV) ? "(INV) " : "",
+			         NIPQUAD(info->src_max.ip),
+			         NIPQUAD(info->src_max.ip));
+			return false;
+		}
+	}
+	if (info->flags & IPRANGE_DST) {
+		m  = ntohl(iph->daddr) < ntohl(info->dst_min.ip);
+		m |= ntohl(iph->daddr) > ntohl(info->dst_max.ip);
+		m ^= info->flags & IPRANGE_DST_INV;
+		if (m) {
+			pr_debug("dst IP " NIPQUAD_FMT " NOT in range %s"
+			         NIPQUAD_FMT "-" NIPQUAD_FMT "\n",
+			         NIPQUAD(iph->daddr),
+			         (info->flags & IPRANGE_DST_INV) ? "(INV) " : "",
+			         NIPQUAD(info->dst_min.ip),
+			         NIPQUAD(info->dst_max.ip));
+			return false;
+		}
+	}
+	return true;
+}
+
+static inline int
+iprange_ipv6_sub(const struct in6_addr *a, const struct in6_addr *b)
+{
+	unsigned int i;
+	int r;
+
+	for (i = 0; i < 4; ++i) {
+		r = a->s6_addr32[i] - b->s6_addr32[i];
+		if (r != 0)
+			return r;
+	}
+
+	return 0;
+}
+
+static bool
+iprange_mt6(const struct sk_buff *skb, const struct net_device *in,
+            const struct net_device *out, const struct xt_match *match,
+            const void *matchinfo, int offset, unsigned int protoff,
+            bool *hotdrop)
+{
+	const struct xt_iprange_mtinfo *info = matchinfo;
+	const struct ipv6hdr *iph = ipv6_hdr(skb);
+	bool m;
+
+	if (info->flags & IPRANGE_SRC) {
+		m  = iprange_ipv6_sub(&iph->saddr, &info->src_min.in6) < 0;
+		m |= iprange_ipv6_sub(&iph->saddr, &info->src_max.in6) > 0;
+		m ^= info->flags & IPRANGE_SRC_INV;
+		if (m)
+			return false;
+	}
+	if (info->flags & IPRANGE_DST) {
+		m  = iprange_ipv6_sub(&iph->daddr, &info->dst_min.in6) < 0;
+		m |= iprange_ipv6_sub(&iph->daddr, &info->dst_max.in6) > 0;
+		m ^= info->flags & IPRANGE_DST_INV;
+		if (m)
+			return false;
+	}
+	return true;
+}
+
+static struct xt_match iprange_mt_reg[] __read_mostly = {
+	{
+		.name      = "iprange",
+		.revision  = 0,
+		.family    = AF_INET,
+		.match     = iprange_mt_v0,
+		.matchsize = sizeof(struct ipt_iprange_info),
+		.me        = THIS_MODULE,
+	},
+	{
+		.name      = "iprange",
+		.revision  = 1,
+		.family    = AF_INET6,
+		.match     = iprange_mt4,
+		.matchsize = sizeof(struct xt_iprange_mtinfo),
+		.me        = THIS_MODULE,
+	},
+	{
+		.name      = "iprange",
+		.revision  = 1,
+		.family    = AF_INET6,
+		.match     = iprange_mt6,
+		.matchsize = sizeof(struct xt_iprange_mtinfo),
+		.me        = THIS_MODULE,
+	},
 };
 
 static int __init iprange_mt_init(void)
 {
-	return xt_register_match(&iprange_mt_reg);
+	return xt_register_matches(iprange_mt_reg, ARRAY_SIZE(iprange_mt_reg));
 }
 
 static void __exit iprange_mt_exit(void)
 {
-	xt_unregister_match(&iprange_mt_reg);
+	xt_unregister_matches(iprange_mt_reg, ARRAY_SIZE(iprange_mt_reg));
 }
 
 module_init(iprange_mt_init);
 module_exit(iprange_mt_exit);
 MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>");
+MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>, Jan Engelhardt <jengelh@computergmbh.de>");
 MODULE_DESCRIPTION("Xtables: arbitrary IPv4 range matching");
-- 
cgit v1.2.3


From 022748a9357c4c1a0113ec1ce5612f383b80156f Mon Sep 17 00:00:00 2001
From: Denys Vlasenko <vda.linux@googlemail.com>
Date: Mon, 14 Jan 2008 23:44:05 -0800
Subject: [NETFILTER]: {ip,ip6}_tables: remove some inlines

This patch removes inlines except those which are used
by packet matching code and thus are performance-critical.

Before:

$ size */*/*/ip*tables*.o
   text    data     bss     dec     hex filename
   6402     500      16    6918    1b06 net/ipv4/netfilter/ip_tables.o
   7130     500      16    7646    1dde net/ipv6/netfilter/ip6_tables.o

After:

$ size */*/*/ip*tables*.o
   text    data     bss     dec     hex filename
   6307     500      16    6823    1aa7 net/ipv4/netfilter/ip_tables.o
   7010     500      16    7526    1d66 net/ipv6/netfilter/ip6_tables.o

Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/netfilter/ip_tables.c  | 47 +++++++++++++++++++++++------------------
 net/ipv6/netfilter/ip6_tables.c | 42 +++++++++++++++++++-----------------
 2 files changed, 50 insertions(+), 39 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c
index f5b66ec18b0..982b7f98629 100644
--- a/net/ipv4/netfilter/ip_tables.c
+++ b/net/ipv4/netfilter/ip_tables.c
@@ -75,6 +75,7 @@ do {								\
    Hence the start of any table is given by get_table() below.  */
 
 /* Returns whether matches rule or not. */
+/* Performance critical - called for every packet */
 static inline bool
 ip_packet_match(const struct iphdr *ip,
 		const char *indev,
@@ -153,7 +154,7 @@ ip_packet_match(const struct iphdr *ip,
 	return true;
 }
 
-static inline bool
+static bool
 ip_checkentry(const struct ipt_ip *ip)
 {
 	if (ip->flags & ~IPT_F_MASK) {
@@ -183,8 +184,9 @@ ipt_error(struct sk_buff *skb,
 	return NF_DROP;
 }
 
-static inline
-bool do_match(struct ipt_entry_match *m,
+/* Performance critical - called for every packet */
+static inline bool
+do_match(struct ipt_entry_match *m,
 	      const struct sk_buff *skb,
 	      const struct net_device *in,
 	      const struct net_device *out,
@@ -199,6 +201,7 @@ bool do_match(struct ipt_entry_match *m,
 		return false;
 }
 
+/* Performance critical */
 static inline struct ipt_entry *
 get_entry(void *base, unsigned int offset)
 {
@@ -206,6 +209,7 @@ get_entry(void *base, unsigned int offset)
 }
 
 /* All zeroes == unconditional rule. */
+/* Mildly perf critical (only if packet tracing is on) */
 static inline int
 unconditional(const struct ipt_ip *ip)
 {
@@ -221,7 +225,7 @@ unconditional(const struct ipt_ip *ip)
 
 #if defined(CONFIG_NETFILTER_XT_TARGET_TRACE) || \
     defined(CONFIG_NETFILTER_XT_TARGET_TRACE_MODULE)
-static const char *hooknames[] = {
+static const char *const hooknames[] = {
 	[NF_INET_PRE_ROUTING]		= "PREROUTING",
 	[NF_INET_LOCAL_IN]		= "INPUT",
 	[NF_INET_FORWARD]		= "FORWARD",
@@ -235,7 +239,7 @@ enum nf_ip_trace_comments {
 	NF_IP_TRACE_COMMENT_POLICY,
 };
 
-static const char *comments[] = {
+static const char *const comments[] = {
 	[NF_IP_TRACE_COMMENT_RULE]	= "rule",
 	[NF_IP_TRACE_COMMENT_RETURN]	= "return",
 	[NF_IP_TRACE_COMMENT_POLICY]	= "policy",
@@ -251,6 +255,7 @@ static struct nf_loginfo trace_loginfo = {
 	},
 };
 
+/* Mildly perf critical (only if packet tracing is on) */
 static inline int
 get_chainname_rulenum(struct ipt_entry *s, struct ipt_entry *e,
 		      char *hookname, char **chainname,
@@ -567,7 +572,7 @@ mark_source_chains(struct xt_table_info *newinfo,
 	return 1;
 }
 
-static inline int
+static int
 cleanup_match(struct ipt_entry_match *m, unsigned int *i)
 {
 	if (i && (*i)-- == 0)
@@ -579,7 +584,7 @@ cleanup_match(struct ipt_entry_match *m, unsigned int *i)
 	return 0;
 }
 
-static inline int
+static int
 check_entry(struct ipt_entry *e, const char *name)
 {
 	struct ipt_entry_target *t;
@@ -600,7 +605,8 @@ check_entry(struct ipt_entry *e, const char *name)
 	return 0;
 }
 
-static inline int check_match(struct ipt_entry_match *m, const char *name,
+static int
+check_match(struct ipt_entry_match *m, const char *name,
 			      const struct ipt_ip *ip,
 			      unsigned int hookmask, unsigned int *i)
 {
@@ -623,7 +629,7 @@ static inline int check_match(struct ipt_entry_match *m, const char *name,
 	return ret;
 }
 
-static inline int
+static int
 find_check_match(struct ipt_entry_match *m,
 		 const char *name,
 		 const struct ipt_ip *ip,
@@ -652,7 +658,7 @@ err:
 	return ret;
 }
 
-static inline int check_target(struct ipt_entry *e, const char *name)
+static int check_target(struct ipt_entry *e, const char *name)
 {
 	struct ipt_entry_target *t;
 	struct xt_target *target;
@@ -673,7 +679,7 @@ static inline int check_target(struct ipt_entry *e, const char *name)
 	return ret;
 }
 
-static inline int
+static int
 find_check_entry(struct ipt_entry *e, const char *name, unsigned int size,
 		 unsigned int *i)
 {
@@ -717,7 +723,7 @@ find_check_entry(struct ipt_entry *e, const char *name, unsigned int size,
 	return ret;
 }
 
-static inline int
+static int
 check_entry_size_and_hooks(struct ipt_entry *e,
 			   struct xt_table_info *newinfo,
 			   unsigned char *base,
@@ -760,7 +766,7 @@ check_entry_size_and_hooks(struct ipt_entry *e,
 	return 0;
 }
 
-static inline int
+static int
 cleanup_entry(struct ipt_entry *e, unsigned int *i)
 {
 	struct ipt_entry_target *t;
@@ -916,7 +922,7 @@ get_counters(const struct xt_table_info *t,
 	}
 }
 
-static inline struct xt_counters * alloc_counters(struct xt_table *table)
+static struct xt_counters * alloc_counters(struct xt_table *table)
 {
 	unsigned int countersize;
 	struct xt_counters *counters;
@@ -1304,7 +1310,7 @@ do_replace(void __user *user, unsigned int len)
 
 /* We're lazy, and add to the first CPU; overflow works its fey magic
  * and everything is OK. */
-static inline int
+static int
 add_counter_to_entry(struct ipt_entry *e,
 		     const struct xt_counters addme[],
 		     unsigned int *i)
@@ -1465,7 +1471,7 @@ out:
 	return ret;
 }
 
-static inline int
+static int
 compat_find_calc_match(struct ipt_entry_match *m,
 		       const char *name,
 		       const struct ipt_ip *ip,
@@ -1489,7 +1495,7 @@ compat_find_calc_match(struct ipt_entry_match *m,
 	return 0;
 }
 
-static inline int
+static int
 compat_release_match(struct ipt_entry_match *m, unsigned int *i)
 {
 	if (i && (*i)-- == 0)
@@ -1499,7 +1505,7 @@ compat_release_match(struct ipt_entry_match *m, unsigned int *i)
 	return 0;
 }
 
-static inline int
+static int
 compat_release_entry(struct compat_ipt_entry *e, unsigned int *i)
 {
 	struct ipt_entry_target *t;
@@ -1514,7 +1520,7 @@ compat_release_entry(struct compat_ipt_entry *e, unsigned int *i)
 	return 0;
 }
 
-static inline int
+static int
 check_compat_entry_size_and_hooks(struct compat_ipt_entry *e,
 				  struct xt_table_info *newinfo,
 				  unsigned int *size,
@@ -1637,7 +1643,8 @@ compat_copy_entry_from_user(struct compat_ipt_entry *e, void **dstptr,
 	return ret;
 }
 
-static inline int compat_check_entry(struct ipt_entry *e, const char *name,
+static int
+compat_check_entry(struct ipt_entry *e, const char *name,
 				     unsigned int *i)
 {
 	int j, ret;
diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c
index 4ed16d254b9..dd7860fea61 100644
--- a/net/ipv6/netfilter/ip6_tables.c
+++ b/net/ipv6/netfilter/ip6_tables.c
@@ -90,6 +90,7 @@ ip6t_ext_hdr(u8 nexthdr)
 }
 
 /* Returns whether matches rule or not. */
+/* Performance critical - called for every packet */
 static inline bool
 ip6_packet_match(const struct sk_buff *skb,
 		 const char *indev,
@@ -182,7 +183,7 @@ ip6_packet_match(const struct sk_buff *skb,
 }
 
 /* should be ip6 safe */
-static inline bool
+static bool
 ip6_checkentry(const struct ip6t_ip6 *ipv6)
 {
 	if (ipv6->flags & ~IP6T_F_MASK) {
@@ -212,8 +213,9 @@ ip6t_error(struct sk_buff *skb,
 	return NF_DROP;
 }
 
-static inline
-bool do_match(struct ip6t_entry_match *m,
+/* Performance critical - called for every packet */
+static inline bool
+do_match(struct ip6t_entry_match *m,
 	      const struct sk_buff *skb,
 	      const struct net_device *in,
 	      const struct net_device *out,
@@ -236,6 +238,7 @@ get_entry(void *base, unsigned int offset)
 }
 
 /* All zeroes == unconditional rule. */
+/* Mildly perf critical (only if packet tracing is on) */
 static inline int
 unconditional(const struct ip6t_ip6 *ipv6)
 {
@@ -251,7 +254,7 @@ unconditional(const struct ip6t_ip6 *ipv6)
 #if defined(CONFIG_NETFILTER_XT_TARGET_TRACE) || \
     defined(CONFIG_NETFILTER_XT_TARGET_TRACE_MODULE)
 /* This cries for unification! */
-static const char *hooknames[] = {
+static const char *const hooknames[] = {
 	[NF_INET_PRE_ROUTING]		= "PREROUTING",
 	[NF_INET_LOCAL_IN]		= "INPUT",
 	[NF_INET_FORWARD]		= "FORWARD",
@@ -265,7 +268,7 @@ enum nf_ip_trace_comments {
 	NF_IP6_TRACE_COMMENT_POLICY,
 };
 
-static const char *comments[] = {
+static const char *const comments[] = {
 	[NF_IP6_TRACE_COMMENT_RULE]	= "rule",
 	[NF_IP6_TRACE_COMMENT_RETURN]	= "return",
 	[NF_IP6_TRACE_COMMENT_POLICY]	= "policy",
@@ -281,6 +284,7 @@ static struct nf_loginfo trace_loginfo = {
 	},
 };
 
+/* Mildly perf critical (only if packet tracing is on) */
 static inline int
 get_chainname_rulenum(struct ip6t_entry *s, struct ip6t_entry *e,
 		      char *hookname, char **chainname,
@@ -595,7 +599,7 @@ mark_source_chains(struct xt_table_info *newinfo,
 	return 1;
 }
 
-static inline int
+static int
 cleanup_match(struct ip6t_entry_match *m, unsigned int *i)
 {
 	if (i && (*i)-- == 0)
@@ -607,7 +611,7 @@ cleanup_match(struct ip6t_entry_match *m, unsigned int *i)
 	return 0;
 }
 
-static inline int
+static int
 check_entry(struct ip6t_entry *e, const char *name)
 {
 	struct ip6t_entry_target *t;
@@ -628,7 +632,7 @@ check_entry(struct ip6t_entry *e, const char *name)
 	return 0;
 }
 
-static inline int check_match(struct ip6t_entry_match *m, const char *name,
+static int check_match(struct ip6t_entry_match *m, const char *name,
 			      const struct ip6t_ip6 *ipv6,
 			      unsigned int hookmask, unsigned int *i)
 {
@@ -651,7 +655,7 @@ static inline int check_match(struct ip6t_entry_match *m, const char *name,
 	return ret;
 }
 
-static inline int
+static int
 find_check_match(struct ip6t_entry_match *m,
 		 const char *name,
 		 const struct ip6t_ip6 *ipv6,
@@ -680,7 +684,7 @@ err:
 	return ret;
 }
 
-static inline int check_target(struct ip6t_entry *e, const char *name)
+static int check_target(struct ip6t_entry *e, const char *name)
 {
 	struct ip6t_entry_target *t;
 	struct xt_target *target;
@@ -701,7 +705,7 @@ static inline int check_target(struct ip6t_entry *e, const char *name)
 	return ret;
 }
 
-static inline int
+static int
 find_check_entry(struct ip6t_entry *e, const char *name, unsigned int size,
 		 unsigned int *i)
 {
@@ -745,7 +749,7 @@ find_check_entry(struct ip6t_entry *e, const char *name, unsigned int size,
 	return ret;
 }
 
-static inline int
+static int
 check_entry_size_and_hooks(struct ip6t_entry *e,
 			   struct xt_table_info *newinfo,
 			   unsigned char *base,
@@ -788,7 +792,7 @@ check_entry_size_and_hooks(struct ip6t_entry *e,
 	return 0;
 }
 
-static inline int
+static int
 cleanup_entry(struct ip6t_entry *e, unsigned int *i)
 {
 	struct ip6t_entry_target *t;
@@ -944,7 +948,7 @@ get_counters(const struct xt_table_info *t,
 	}
 }
 
-static inline struct xt_counters *alloc_counters(struct xt_table *table)
+static struct xt_counters *alloc_counters(struct xt_table *table)
 {
 	unsigned int countersize;
 	struct xt_counters *counters;
@@ -1494,7 +1498,7 @@ out:
 	return ret;
 }
 
-static inline int
+static int
 compat_find_calc_match(struct ip6t_entry_match *m,
 		       const char *name,
 		       const struct ip6t_ip6 *ipv6,
@@ -1518,7 +1522,7 @@ compat_find_calc_match(struct ip6t_entry_match *m,
 	return 0;
 }
 
-static inline int
+static int
 compat_release_match(struct ip6t_entry_match *m, unsigned int *i)
 {
 	if (i && (*i)-- == 0)
@@ -1528,7 +1532,7 @@ compat_release_match(struct ip6t_entry_match *m, unsigned int *i)
 	return 0;
 }
 
-static inline int
+static int
 compat_release_entry(struct compat_ip6t_entry *e, unsigned int *i)
 {
 	struct ip6t_entry_target *t;
@@ -1543,7 +1547,7 @@ compat_release_entry(struct compat_ip6t_entry *e, unsigned int *i)
 	return 0;
 }
 
-static inline int
+static int
 check_compat_entry_size_and_hooks(struct compat_ip6t_entry *e,
 				  struct xt_table_info *newinfo,
 				  unsigned int *size,
@@ -1666,7 +1670,7 @@ compat_copy_entry_from_user(struct compat_ip6t_entry *e, void **dstptr,
 	return ret;
 }
 
-static inline int compat_check_entry(struct ip6t_entry *e, const char *name,
+static int compat_check_entry(struct ip6t_entry *e, const char *name,
 				     unsigned int *i)
 {
 	int j, ret;
-- 
cgit v1.2.3


From 9ba99b0d3f45d0aedeafce1cfa4f720b19d04477 Mon Sep 17 00:00:00 2001
From: Denys Vlasenko <vda.linux@googlemail.com>
Date: Mon, 14 Jan 2008 23:44:26 -0800
Subject: [NETFILTER]: ipt_REJECT: properly handle IP options

The current TCP RST construction reuses the old packet and can't
deal with IP options as a consequence of that. Construct the
RST from scratch instead.

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/netfilter/ipt_REJECT.c | 102 +++++++++++++++-------------------------
 1 file changed, 37 insertions(+), 65 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/netfilter/ipt_REJECT.c b/net/ipv4/netfilter/ipt_REJECT.c
index e3c2ecc341e..22606e2baa1 100644
--- a/net/ipv4/netfilter/ipt_REJECT.c
+++ b/net/ipv4/netfilter/ipt_REJECT.c
@@ -35,11 +35,8 @@ MODULE_DESCRIPTION("Xtables: packet \"rejection\" target for IPv4");
 static void send_reset(struct sk_buff *oldskb, int hook)
 {
 	struct sk_buff *nskb;
-	struct iphdr *niph;
+	struct iphdr *oiph, *niph;
 	struct tcphdr _otcph, *oth, *tcph;
-	__be16 tmp_port;
-	__be32 tmp_addr;
-	int needs_ack;
 	unsigned int addr_type;
 
 	/* IP header checks: fragment. */
@@ -58,69 +55,47 @@ static void send_reset(struct sk_buff *oldskb, int hook)
 	/* Check checksum */
 	if (nf_ip_checksum(oldskb, hook, ip_hdrlen(oldskb), IPPROTO_TCP))
 		return;
+	oiph = ip_hdr(oldskb);
 
-	/* We need a linear, writeable skb.  We also need to expand
-	   headroom in case hh_len of incoming interface < hh_len of
-	   outgoing interface */
-	nskb = skb_copy_expand(oldskb, LL_MAX_HEADER, skb_tailroom(oldskb),
-			       GFP_ATOMIC);
+	nskb = alloc_skb(sizeof(struct iphdr) + sizeof(struct tcphdr) +
+			 LL_MAX_HEADER, GFP_ATOMIC);
 	if (!nskb)
 		return;
 
-	/* This packet will not be the same as the other: clear nf fields */
-	nf_reset(nskb);
-	nskb->mark = 0;
-	skb_init_secmark(nskb);
-
-	skb_shinfo(nskb)->gso_size = 0;
-	skb_shinfo(nskb)->gso_segs = 0;
-	skb_shinfo(nskb)->gso_type = 0;
-
-	tcph = (struct tcphdr *)(skb_network_header(nskb) + ip_hdrlen(nskb));
-
-	/* Swap source and dest */
-	niph = ip_hdr(nskb);
-	tmp_addr = niph->saddr;
-	niph->saddr = niph->daddr;
-	niph->daddr = tmp_addr;
-	tmp_port = tcph->source;
-	tcph->source = tcph->dest;
-	tcph->dest = tmp_port;
-
-	/* Truncate to length (no data) */
-	tcph->doff = sizeof(struct tcphdr)/4;
-	skb_trim(nskb, ip_hdrlen(nskb) + sizeof(struct tcphdr));
-
-	if (tcph->ack) {
-		needs_ack = 0;
+	skb_reserve(nskb, LL_MAX_HEADER);
+
+	skb_reset_network_header(nskb);
+	niph = (struct iphdr *)skb_put(nskb, sizeof(struct iphdr));
+	niph->version	= 4;
+	niph->ihl	= sizeof(struct iphdr) / 4;
+	niph->tos	= 0;
+	niph->id	= 0;
+	niph->frag_off	= htons(IP_DF);
+	niph->protocol	= IPPROTO_TCP;
+	niph->check	= 0;
+	niph->saddr	= oiph->daddr;
+	niph->daddr	= oiph->saddr;
+
+	tcph = (struct tcphdr *)skb_put(nskb, sizeof(struct tcphdr));
+	memset(tcph, 0, sizeof(*tcph));
+	tcph->source	= oth->dest;
+	tcph->dest	= oth->source;
+	tcph->doff	= sizeof(struct tcphdr) / 4;
+
+	if (oth->ack)
 		tcph->seq = oth->ack_seq;
-		tcph->ack_seq = 0;
-	} else {
-		needs_ack = 1;
+	else {
 		tcph->ack_seq = htonl(ntohl(oth->seq) + oth->syn + oth->fin +
 				      oldskb->len - ip_hdrlen(oldskb) -
 				      (oth->doff << 2));
-		tcph->seq = 0;
+		tcph->ack = 1;
 	}
 
-	/* Reset flags */
-	((u_int8_t *)tcph)[13] = 0;
-	tcph->rst = 1;
-	tcph->ack = needs_ack;
-
-	tcph->window = 0;
-	tcph->urg_ptr = 0;
-
-	/* Adjust TCP checksum */
-	tcph->check = 0;
-	tcph->check = tcp_v4_check(sizeof(struct tcphdr),
-				   niph->saddr, niph->daddr,
-				   csum_partial(tcph,
-						sizeof(struct tcphdr), 0));
-
-	/* Set DF, id = 0 */
-	niph->frag_off = htons(IP_DF);
-	niph->id = 0;
+	tcph->rst	= 1;
+	tcph->check	= tcp_v4_check(sizeof(struct tcphdr),
+				       niph->saddr, niph->daddr,
+				       csum_partial(tcph,
+						    sizeof(struct tcphdr), 0));
 
 	addr_type = RTN_UNSPEC;
 	if (hook != NF_INET_FORWARD
@@ -130,14 +105,16 @@ static void send_reset(struct sk_buff *oldskb, int hook)
 	   )
 		addr_type = RTN_LOCAL;
 
+	/* ip_route_me_harder expects skb->dst to be set */
+	dst_hold(oldskb->dst);
+	nskb->dst = oldskb->dst;
+
 	if (ip_route_me_harder(nskb, addr_type))
 		goto free_nskb;
 
+	niph->ttl	= dst_metric(nskb->dst, RTAX_HOPLIMIT);
 	nskb->ip_summed = CHECKSUM_NONE;
 
-	/* Adjust IP TTL */
-	niph->ttl = dst_metric(nskb->dst, RTAX_HOPLIMIT);
-
 	/* "Never happens" */
 	if (nskb->len > dst_mtu(nskb->dst))
 		goto free_nskb;
@@ -163,11 +140,6 @@ reject_tg(struct sk_buff *skb, const struct net_device *in,
 {
 	const struct ipt_reject_info *reject = targinfo;
 
-	/* Our naive response construction doesn't deal with IP
-	   options, and probably shouldn't try. */
-	if (ip_hdrlen(skb) != sizeof(struct iphdr))
-		return NF_DROP;
-
 	/* WARNING: This code causes reentry within iptables.
 	   This means that the iptables jump stack is now crap.  We
 	   must return an absolute verdict. --RR */
-- 
cgit v1.2.3


From 77e2420b85573f57c2774775ead81ebb500b803d Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Mon, 14 Jan 2008 23:44:49 -0800
Subject: [NETFILTER]: nf_conntrack_{tcp,sctp}: mark state table const

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/netfilter/nf_conntrack_proto_sctp.c | 2 +-
 net/netfilter/nf_conntrack_proto_tcp.c  | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/netfilter/nf_conntrack_proto_sctp.c b/net/netfilter/nf_conntrack_proto_sctp.c
index 9296fd28189..47c1dbead84 100644
--- a/net/netfilter/nf_conntrack_proto_sctp.c
+++ b/net/netfilter/nf_conntrack_proto_sctp.c
@@ -110,7 +110,7 @@ cookie echoed to closed.
 */
 
 /* SCTP conntrack state transitions */
-static enum sctp_conntrack sctp_conntracks[2][9][SCTP_CONNTRACK_MAX] = {
+static const enum sctp_conntrack sctp_conntracks[2][9][SCTP_CONNTRACK_MAX] = {
 	{
 /*	ORIGINAL	*/
 /*                  sNO, sCL, sCW, sCE, sES, sSS, sSR, sSA */
diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c
index 1d496b912bd..be19bd94ddf 100644
--- a/net/netfilter/nf_conntrack_proto_tcp.c
+++ b/net/netfilter/nf_conntrack_proto_tcp.c
@@ -149,7 +149,7 @@ enum tcp_bit_set {
  *	if they are invalid
  *	or we do not support the request (simultaneous open)
  */
-static enum tcp_conntrack tcp_conntracks[2][6][TCP_CONNTRACK_MAX] = {
+static const enum tcp_conntrack tcp_conntracks[2][6][TCP_CONNTRACK_MAX] = {
 	{
 /* ORIGINAL */
 /* 	     sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sLI	*/
-- 
cgit v1.2.3


From a5e73c29d9243cc2e889a9d7155f331923eee655 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Mon, 14 Jan 2008 23:45:11 -0800
Subject: [NETFILTER]: nf_conntrack_{tcp,sctp}: shrink state table

The TCP and SCTP conntrack state transition tables only holds
small numbers, but gcc uses 4 byte per entry for the enum. Switching
to an u8 reduces the size from 480 to 120 bytes for TCP and from
576 to 144 bytes for SCTP.

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/netfilter/nf_conntrack_proto_sctp.c | 2 +-
 net/netfilter/nf_conntrack_proto_tcp.c  | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/netfilter/nf_conntrack_proto_sctp.c b/net/netfilter/nf_conntrack_proto_sctp.c
index 47c1dbead84..9a5ec53b442 100644
--- a/net/netfilter/nf_conntrack_proto_sctp.c
+++ b/net/netfilter/nf_conntrack_proto_sctp.c
@@ -110,7 +110,7 @@ cookie echoed to closed.
 */
 
 /* SCTP conntrack state transitions */
-static const enum sctp_conntrack sctp_conntracks[2][9][SCTP_CONNTRACK_MAX] = {
+static const u8 sctp_conntracks[2][9][SCTP_CONNTRACK_MAX] = {
 	{
 /*	ORIGINAL	*/
 /*                  sNO, sCL, sCW, sCE, sES, sSS, sSR, sSA */
diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c
index be19bd94ddf..dbd8f84fa19 100644
--- a/net/netfilter/nf_conntrack_proto_tcp.c
+++ b/net/netfilter/nf_conntrack_proto_tcp.c
@@ -149,7 +149,7 @@ enum tcp_bit_set {
  *	if they are invalid
  *	or we do not support the request (simultaneous open)
  */
-static const enum tcp_conntrack tcp_conntracks[2][6][TCP_CONNTRACK_MAX] = {
+static const u8 tcp_conntracks[2][6][TCP_CONNTRACK_MAX] = {
 	{
 /* ORIGINAL */
 /* 	     sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sLI	*/
-- 
cgit v1.2.3


From 2d6462869f8c2edca9dbb53ca3b661a52fc4c144 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Mon, 14 Jan 2008 23:45:32 -0800
Subject: [NETFILTER]: nf_conntrack_tcp: remove timeout indirection

Instead of keeping pointers to the timeout values in a table, simply
put the timeout values in the table directly.

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/netfilter/nf_conntrack_proto_tcp.c | 67 ++++++++++++++--------------------
 1 file changed, 28 insertions(+), 39 deletions(-)

(limited to 'net')

diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c
index dbd8f84fa19..64c9b910419 100644
--- a/net/netfilter/nf_conntrack_proto_tcp.c
+++ b/net/netfilter/nf_conntrack_proto_tcp.c
@@ -64,32 +64,21 @@ static const char *tcp_conntrack_names[] = {
 #define HOURS * 60 MINS
 #define DAYS * 24 HOURS
 
-static unsigned int nf_ct_tcp_timeout_syn_sent __read_mostly =      2 MINS;
-static unsigned int nf_ct_tcp_timeout_syn_recv __read_mostly =     60 SECS;
-static unsigned int nf_ct_tcp_timeout_established __read_mostly =   5 DAYS;
-static unsigned int nf_ct_tcp_timeout_fin_wait __read_mostly =      2 MINS;
-static unsigned int nf_ct_tcp_timeout_close_wait __read_mostly =   60 SECS;
-static unsigned int nf_ct_tcp_timeout_last_ack __read_mostly =     30 SECS;
-static unsigned int nf_ct_tcp_timeout_time_wait __read_mostly =     2 MINS;
-static unsigned int nf_ct_tcp_timeout_close __read_mostly =        10 SECS;
-
 /* RFC1122 says the R2 limit should be at least 100 seconds.
    Linux uses 15 packets as limit, which corresponds
    to ~13-30min depending on RTO. */
 static unsigned int nf_ct_tcp_timeout_max_retrans __read_mostly =   5 MINS;
 
-static unsigned int * tcp_timeouts[] = {
-    NULL,                              /* TCP_CONNTRACK_NONE */
-    &nf_ct_tcp_timeout_syn_sent,       /* TCP_CONNTRACK_SYN_SENT, */
-    &nf_ct_tcp_timeout_syn_recv,       /* TCP_CONNTRACK_SYN_RECV, */
-    &nf_ct_tcp_timeout_established,    /* TCP_CONNTRACK_ESTABLISHED, */
-    &nf_ct_tcp_timeout_fin_wait,       /* TCP_CONNTRACK_FIN_WAIT, */
-    &nf_ct_tcp_timeout_close_wait,     /* TCP_CONNTRACK_CLOSE_WAIT, */
-    &nf_ct_tcp_timeout_last_ack,       /* TCP_CONNTRACK_LAST_ACK, */
-    &nf_ct_tcp_timeout_time_wait,      /* TCP_CONNTRACK_TIME_WAIT, */
-    &nf_ct_tcp_timeout_close,          /* TCP_CONNTRACK_CLOSE, */
-    NULL,                              /* TCP_CONNTRACK_LISTEN */
- };
+static unsigned int tcp_timeouts[TCP_CONNTRACK_MAX] __read_mostly = {
+	[TCP_CONNTRACK_SYN_SENT]	= 2 MINS,
+	[TCP_CONNTRACK_SYN_RECV]	= 60 SECS,
+	[TCP_CONNTRACK_ESTABLISHED]	= 5 DAYS,
+	[TCP_CONNTRACK_FIN_WAIT]	= 2 MINS,
+	[TCP_CONNTRACK_CLOSE_WAIT]	= 60 SECS,
+	[TCP_CONNTRACK_LAST_ACK]	= 30 SECS,
+	[TCP_CONNTRACK_TIME_WAIT]	= 2 MINS,
+	[TCP_CONNTRACK_CLOSE]		= 10 SECS,
+};
 
 #define sNO TCP_CONNTRACK_NONE
 #define sSS TCP_CONNTRACK_SYN_SENT
@@ -941,8 +930,8 @@ static int tcp_packet(struct nf_conn *conntrack,
 		|| new_state == TCP_CONNTRACK_CLOSE))
 		conntrack->proto.tcp.seen[dir].flags |= IP_CT_TCP_FLAG_CLOSE_INIT;
 	timeout = conntrack->proto.tcp.retrans >= nf_ct_tcp_max_retrans
-		  && *tcp_timeouts[new_state] > nf_ct_tcp_timeout_max_retrans
-		  ? nf_ct_tcp_timeout_max_retrans : *tcp_timeouts[new_state];
+		  && tcp_timeouts[new_state] > nf_ct_tcp_timeout_max_retrans
+		  ? nf_ct_tcp_timeout_max_retrans : tcp_timeouts[new_state];
 	write_unlock_bh(&tcp_lock);
 
 	nf_conntrack_event_cache(IPCT_PROTOINFO_VOLATILE, skb);
@@ -1163,56 +1152,56 @@ static struct ctl_table_header *tcp_sysctl_header;
 static struct ctl_table tcp_sysctl_table[] = {
 	{
 		.procname	= "nf_conntrack_tcp_timeout_syn_sent",
-		.data		= &nf_ct_tcp_timeout_syn_sent,
+		.data		= &tcp_timeouts[TCP_CONNTRACK_SYN_SENT],
 		.maxlen		= sizeof(unsigned int),
 		.mode		= 0644,
 		.proc_handler	= &proc_dointvec_jiffies,
 	},
 	{
 		.procname	= "nf_conntrack_tcp_timeout_syn_recv",
-		.data		= &nf_ct_tcp_timeout_syn_recv,
+		.data		= &tcp_timeouts[TCP_CONNTRACK_SYN_RECV],
 		.maxlen		= sizeof(unsigned int),
 		.mode		= 0644,
 		.proc_handler	= &proc_dointvec_jiffies,
 	},
 	{
 		.procname	= "nf_conntrack_tcp_timeout_established",
-		.data		= &nf_ct_tcp_timeout_established,
+		.data		= &tcp_timeouts[TCP_CONNTRACK_ESTABLISHED],
 		.maxlen		= sizeof(unsigned int),
 		.mode		= 0644,
 		.proc_handler	= &proc_dointvec_jiffies,
 	},
 	{
 		.procname	= "nf_conntrack_tcp_timeout_fin_wait",
-		.data		= &nf_ct_tcp_timeout_fin_wait,
+		.data		= &tcp_timeouts[TCP_CONNTRACK_FIN_WAIT],
 		.maxlen		= sizeof(unsigned int),
 		.mode		= 0644,
 		.proc_handler	= &proc_dointvec_jiffies,
 	},
 	{
 		.procname	= "nf_conntrack_tcp_timeout_close_wait",
-		.data		= &nf_ct_tcp_timeout_close_wait,
+		.data		= &tcp_timeouts[TCP_CONNTRACK_CLOSE_WAIT],
 		.maxlen		= sizeof(unsigned int),
 		.mode		= 0644,
 		.proc_handler	= &proc_dointvec_jiffies,
 	},
 	{
 		.procname	= "nf_conntrack_tcp_timeout_last_ack",
-		.data		= &nf_ct_tcp_timeout_last_ack,
+		.data		= &tcp_timeouts[TCP_CONNTRACK_LAST_ACK],
 		.maxlen		= sizeof(unsigned int),
 		.mode		= 0644,
 		.proc_handler	= &proc_dointvec_jiffies,
 	},
 	{
 		.procname	= "nf_conntrack_tcp_timeout_time_wait",
-		.data		= &nf_ct_tcp_timeout_time_wait,
+		.data		= &tcp_timeouts[TCP_CONNTRACK_TIME_WAIT],
 		.maxlen		= sizeof(unsigned int),
 		.mode		= 0644,
 		.proc_handler	= &proc_dointvec_jiffies,
 	},
 	{
 		.procname	= "nf_conntrack_tcp_timeout_close",
-		.data		= &nf_ct_tcp_timeout_close,
+		.data		= &tcp_timeouts[TCP_CONNTRACK_CLOSE],
 		.maxlen		= sizeof(unsigned int),
 		.mode		= 0644,
 		.proc_handler	= &proc_dointvec_jiffies,
@@ -1257,56 +1246,56 @@ static struct ctl_table tcp_sysctl_table[] = {
 static struct ctl_table tcp_compat_sysctl_table[] = {
 	{
 		.procname	= "ip_conntrack_tcp_timeout_syn_sent",
-		.data		= &nf_ct_tcp_timeout_syn_sent,
+		.data		= &tcp_timeouts[TCP_CONNTRACK_SYN_SENT],
 		.maxlen		= sizeof(unsigned int),
 		.mode		= 0644,
 		.proc_handler	= &proc_dointvec_jiffies,
 	},
 	{
 		.procname	= "ip_conntrack_tcp_timeout_syn_recv",
-		.data		= &nf_ct_tcp_timeout_syn_recv,
+		.data		= &tcp_timeouts[TCP_CONNTRACK_SYN_RECV],
 		.maxlen		= sizeof(unsigned int),
 		.mode		= 0644,
 		.proc_handler	= &proc_dointvec_jiffies,
 	},
 	{
 		.procname	= "ip_conntrack_tcp_timeout_established",
-		.data		= &nf_ct_tcp_timeout_established,
+		.data		= &tcp_timeouts[TCP_CONNTRACK_ESTABLISHED],
 		.maxlen		= sizeof(unsigned int),
 		.mode		= 0644,
 		.proc_handler	= &proc_dointvec_jiffies,
 	},
 	{
 		.procname	= "ip_conntrack_tcp_timeout_fin_wait",
-		.data		= &nf_ct_tcp_timeout_fin_wait,
+		.data		= &tcp_timeouts[TCP_CONNTRACK_FIN_WAIT],
 		.maxlen		= sizeof(unsigned int),
 		.mode		= 0644,
 		.proc_handler	= &proc_dointvec_jiffies,
 	},
 	{
 		.procname	= "ip_conntrack_tcp_timeout_close_wait",
-		.data		= &nf_ct_tcp_timeout_close_wait,
+		.data		= &tcp_timeouts[TCP_CONNTRACK_CLOSE_WAIT],
 		.maxlen		= sizeof(unsigned int),
 		.mode		= 0644,
 		.proc_handler	= &proc_dointvec_jiffies,
 	},
 	{
 		.procname	= "ip_conntrack_tcp_timeout_last_ack",
-		.data		= &nf_ct_tcp_timeout_last_ack,
+		.data		= &tcp_timeouts[TCP_CONNTRACK_LAST_ACK],
 		.maxlen		= sizeof(unsigned int),
 		.mode		= 0644,
 		.proc_handler	= &proc_dointvec_jiffies,
 	},
 	{
 		.procname	= "ip_conntrack_tcp_timeout_time_wait",
-		.data		= &nf_ct_tcp_timeout_time_wait,
+		.data		= &tcp_timeouts[TCP_CONNTRACK_TIME_WAIT],
 		.maxlen		= sizeof(unsigned int),
 		.mode		= 0644,
 		.proc_handler	= &proc_dointvec_jiffies,
 	},
 	{
 		.procname	= "ip_conntrack_tcp_timeout_close",
-		.data		= &nf_ct_tcp_timeout_close,
+		.data		= &tcp_timeouts[TCP_CONNTRACK_CLOSE],
 		.maxlen		= sizeof(unsigned int),
 		.mode		= 0644,
 		.proc_handler	= &proc_dointvec_jiffies,
-- 
cgit v1.2.3


From 5447d4777c48218e90844fa6f6e544119075faad Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Mon, 14 Jan 2008 23:45:48 -0800
Subject: [NETFILTER]: nf_conntrack_sctp: basic cleanups

Reindent switch cases properly, get rid of weird constructs like "!(x == y)",
put logical operations on the end of the line instead of the next line, get
rid of superfluous braces.

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/netfilter/nf_conntrack_proto_sctp.c | 140 ++++++++++++++++----------------
 1 file changed, 72 insertions(+), 68 deletions(-)

(limited to 'net')

diff --git a/net/netfilter/nf_conntrack_proto_sctp.c b/net/netfilter/nf_conntrack_proto_sctp.c
index 9a5ec53b442..689cc505bf6 100644
--- a/net/netfilter/nf_conntrack_proto_sctp.c
+++ b/net/netfilter/nf_conntrack_proto_sctp.c
@@ -206,28 +206,26 @@ static int do_basic_checks(struct nf_conn *conntrack,
 	for_each_sctp_chunk (skb, sch, _sch, offset, dataoff, count) {
 		pr_debug("Chunk Num: %d  Type: %d\n", count, sch->type);
 
-		if (sch->type == SCTP_CID_INIT
-			|| sch->type == SCTP_CID_INIT_ACK
-			|| sch->type == SCTP_CID_SHUTDOWN_COMPLETE) {
+		if (sch->type == SCTP_CID_INIT ||
+		    sch->type == SCTP_CID_INIT_ACK ||
+		    sch->type == SCTP_CID_SHUTDOWN_COMPLETE)
 			flag = 1;
-		}
 
 		/*
 		 * Cookie Ack/Echo chunks not the first OR
 		 * Init / Init Ack / Shutdown compl chunks not the only chunks
 		 * OR zero-length.
 		 */
-		if (((sch->type == SCTP_CID_COOKIE_ACK
-			|| sch->type == SCTP_CID_COOKIE_ECHO
-			|| flag)
-		      && count !=0) || !sch->length) {
+		if (((sch->type == SCTP_CID_COOKIE_ACK ||
+		      sch->type == SCTP_CID_COOKIE_ECHO ||
+		      flag) &&
+		     count != 0) || !sch->length) {
 			pr_debug("Basic checks failed\n");
 			return 1;
 		}
 
-		if (map) {
+		if (map)
 			set_bit(sch->type, (void *)map);
-		}
 	}
 
 	pr_debug("Basic checks passed\n");
@@ -243,39 +241,48 @@ static int new_state(enum ip_conntrack_dir dir,
 	pr_debug("Chunk type: %d\n", chunk_type);
 
 	switch (chunk_type) {
-		case SCTP_CID_INIT:
-			pr_debug("SCTP_CID_INIT\n");
-			i = 0; break;
-		case SCTP_CID_INIT_ACK:
-			pr_debug("SCTP_CID_INIT_ACK\n");
-			i = 1; break;
-		case SCTP_CID_ABORT:
-			pr_debug("SCTP_CID_ABORT\n");
-			i = 2; break;
-		case SCTP_CID_SHUTDOWN:
-			pr_debug("SCTP_CID_SHUTDOWN\n");
-			i = 3; break;
-		case SCTP_CID_SHUTDOWN_ACK:
-			pr_debug("SCTP_CID_SHUTDOWN_ACK\n");
-			i = 4; break;
-		case SCTP_CID_ERROR:
-			pr_debug("SCTP_CID_ERROR\n");
-			i = 5; break;
-		case SCTP_CID_COOKIE_ECHO:
-			pr_debug("SCTP_CID_COOKIE_ECHO\n");
-			i = 6; break;
-		case SCTP_CID_COOKIE_ACK:
-			pr_debug("SCTP_CID_COOKIE_ACK\n");
-			i = 7; break;
-		case SCTP_CID_SHUTDOWN_COMPLETE:
-			pr_debug("SCTP_CID_SHUTDOWN_COMPLETE\n");
-			i = 8; break;
-		default:
-			/* Other chunks like DATA, SACK, HEARTBEAT and
-			its ACK do not cause a change in state */
-			pr_debug("Unknown chunk type, Will stay in %s\n",
-				 sctp_conntrack_names[cur_state]);
-			return cur_state;
+	case SCTP_CID_INIT:
+		pr_debug("SCTP_CID_INIT\n");
+		i = 0;
+		break;
+	case SCTP_CID_INIT_ACK:
+		pr_debug("SCTP_CID_INIT_ACK\n");
+		i = 1;
+		break;
+	case SCTP_CID_ABORT:
+		pr_debug("SCTP_CID_ABORT\n");
+		i = 2;
+		break;
+	case SCTP_CID_SHUTDOWN:
+		pr_debug("SCTP_CID_SHUTDOWN\n");
+		i = 3;
+		break;
+	case SCTP_CID_SHUTDOWN_ACK:
+		pr_debug("SCTP_CID_SHUTDOWN_ACK\n");
+		i = 4;
+		break;
+	case SCTP_CID_ERROR:
+		pr_debug("SCTP_CID_ERROR\n");
+		i = 5;
+		break;
+	case SCTP_CID_COOKIE_ECHO:
+		pr_debug("SCTP_CID_COOKIE_ECHO\n");
+		i = 6;
+		break;
+	case SCTP_CID_COOKIE_ACK:
+		pr_debug("SCTP_CID_COOKIE_ACK\n");
+		i = 7;
+		break;
+	case SCTP_CID_SHUTDOWN_COMPLETE:
+		pr_debug("SCTP_CID_SHUTDOWN_COMPLETE\n");
+		i = 8;
+		break;
+	default:
+		/* Other chunks like DATA, SACK, HEARTBEAT and
+		its ACK do not cause a change in state */
+		pr_debug("Unknown chunk type, Will stay in %s\n",
+			 sctp_conntrack_names[cur_state]);
+		return cur_state;
 	}
 
 	pr_debug("dir: %d   cur_state: %s  chunk_type: %d  new_state: %s\n",
@@ -307,12 +314,12 @@ static int sctp_packet(struct nf_conn *conntrack,
 		return -1;
 
 	/* Check the verification tag (Sec 8.5) */
-	if (!test_bit(SCTP_CID_INIT, (void *)map)
-		&& !test_bit(SCTP_CID_SHUTDOWN_COMPLETE, (void *)map)
-		&& !test_bit(SCTP_CID_COOKIE_ECHO, (void *)map)
-		&& !test_bit(SCTP_CID_ABORT, (void *)map)
-		&& !test_bit(SCTP_CID_SHUTDOWN_ACK, (void *)map)
-		&& (sh->vtag != conntrack->proto.sctp.vtag[CTINFO2DIR(ctinfo)])) {
+	if (!test_bit(SCTP_CID_INIT, (void *)map) &&
+	    !test_bit(SCTP_CID_SHUTDOWN_COMPLETE, (void *)map) &&
+	    !test_bit(SCTP_CID_COOKIE_ECHO, (void *)map) &&
+	    !test_bit(SCTP_CID_ABORT, (void *)map) &&
+	    !test_bit(SCTP_CID_SHUTDOWN_ACK, (void *)map) &&
+	    sh->vtag != conntrack->proto.sctp.vtag[CTINFO2DIR(ctinfo)]) {
 		pr_debug("Verification tag check failed\n");
 		return -1;
 	}
@@ -330,24 +337,22 @@ static int sctp_packet(struct nf_conn *conntrack,
 			}
 		} else if (sch->type == SCTP_CID_ABORT) {
 			/* Sec 8.5.1 (B) */
-			if (!(sh->vtag == conntrack->proto.sctp.vtag[CTINFO2DIR(ctinfo)])
-				&& !(sh->vtag == conntrack->proto.sctp.vtag
-							[1 - CTINFO2DIR(ctinfo)])) {
+			if (sh->vtag != conntrack->proto.sctp.vtag[CTINFO2DIR(ctinfo)] &&
+			    sh->vtag != conntrack->proto.sctp.vtag[1 - CTINFO2DIR(ctinfo)]) {
 				write_unlock_bh(&sctp_lock);
 				return -1;
 			}
 		} else if (sch->type == SCTP_CID_SHUTDOWN_COMPLETE) {
 			/* Sec 8.5.1 (C) */
-			if (!(sh->vtag == conntrack->proto.sctp.vtag[CTINFO2DIR(ctinfo)])
-				&& !(sh->vtag == conntrack->proto.sctp.vtag
-							[1 - CTINFO2DIR(ctinfo)]
-					&& (sch->flags & 1))) {
+			if (sh->vtag != conntrack->proto.sctp.vtag[CTINFO2DIR(ctinfo)] &&
+			    sh->vtag != conntrack->proto.sctp.vtag[1 - CTINFO2DIR(ctinfo)] &&
+			    (sch->flags & 1)) {
 				write_unlock_bh(&sctp_lock);
 				return -1;
 			}
 		} else if (sch->type == SCTP_CID_COOKIE_ECHO) {
 			/* Sec 8.5.1 (D) */
-			if (!(sh->vtag == conntrack->proto.sctp.vtag[CTINFO2DIR(ctinfo)])) {
+			if (sh->vtag != conntrack->proto.sctp.vtag[CTINFO2DIR(ctinfo)]) {
 				write_unlock_bh(&sctp_lock);
 				return -1;
 			}
@@ -366,15 +371,15 @@ static int sctp_packet(struct nf_conn *conntrack,
 		}
 
 		/* If it is an INIT or an INIT ACK note down the vtag */
-		if (sch->type == SCTP_CID_INIT
-			|| sch->type == SCTP_CID_INIT_ACK) {
+		if (sch->type == SCTP_CID_INIT ||
+		    sch->type == SCTP_CID_INIT_ACK) {
 			sctp_inithdr_t _inithdr, *ih;
 
 			ih = skb_header_pointer(skb, offset + sizeof(sctp_chunkhdr_t),
 						sizeof(_inithdr), &_inithdr);
 			if (ih == NULL) {
-					write_unlock_bh(&sctp_lock);
-					return -1;
+				write_unlock_bh(&sctp_lock);
+				return -1;
 			}
 			pr_debug("Setting vtag %x for dir %d\n",
 				 ih->init_tag, !CTINFO2DIR(ctinfo));
@@ -389,9 +394,9 @@ static int sctp_packet(struct nf_conn *conntrack,
 
 	nf_ct_refresh_acct(conntrack, ctinfo, skb, *sctp_timeouts[newconntrack]);
 
-	if (oldsctpstate == SCTP_CONNTRACK_COOKIE_ECHOED
-		&& CTINFO2DIR(ctinfo) == IP_CT_DIR_REPLY
-		&& newconntrack == SCTP_CONNTRACK_ESTABLISHED) {
+	if (oldsctpstate == SCTP_CONNTRACK_COOKIE_ECHOED &&
+	    CTINFO2DIR(ctinfo) == IP_CT_DIR_REPLY &&
+	    newconntrack == SCTP_CONNTRACK_ESTABLISHED) {
 		pr_debug("Setting assured bit\n");
 		set_bit(IPS_ASSURED_BIT, &conntrack->status);
 		nf_conntrack_event_cache(IPCT_STATUS, skb);
@@ -418,11 +423,10 @@ static int sctp_new(struct nf_conn *conntrack, const struct sk_buff *skb,
 		return 0;
 
 	/* If an OOTB packet has any of these chunks discard (Sec 8.4) */
-	if ((test_bit (SCTP_CID_ABORT, (void *)map))
-		|| (test_bit (SCTP_CID_SHUTDOWN_COMPLETE, (void *)map))
-		|| (test_bit (SCTP_CID_COOKIE_ACK, (void *)map))) {
+	if (test_bit (SCTP_CID_ABORT, (void *)map) ||
+	    test_bit (SCTP_CID_SHUTDOWN_COMPLETE, (void *)map) ||
+	    test_bit (SCTP_CID_COOKIE_ACK, (void *)map))
 		return 0;
-	}
 
 	newconntrack = SCTP_CONNTRACK_MAX;
 	for_each_sctp_chunk (skb, sch, _sch, offset, dataoff, count) {
-- 
cgit v1.2.3


From 35c6d3cbe1b97b860087f6082e764ac8da2a12b2 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Mon, 14 Jan 2008 23:46:05 -0800
Subject: [NETFILTER]: nf_conntrack_sctp: use proper types for bitops

Use unsigned long instead of char for the bitmap and removed lots
of casts.

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/netfilter/nf_conntrack_proto_sctp.c | 24 ++++++++++++------------
 1 file changed, 12 insertions(+), 12 deletions(-)

(limited to 'net')

diff --git a/net/netfilter/nf_conntrack_proto_sctp.c b/net/netfilter/nf_conntrack_proto_sctp.c
index 689cc505bf6..5166bb388ff 100644
--- a/net/netfilter/nf_conntrack_proto_sctp.c
+++ b/net/netfilter/nf_conntrack_proto_sctp.c
@@ -195,7 +195,7 @@ for ((offset) = (dataoff) + sizeof(sctp_sctphdr_t), (count) = 0;	\
 static int do_basic_checks(struct nf_conn *conntrack,
 			   const struct sk_buff *skb,
 			   unsigned int dataoff,
-			   char *map)
+			   unsigned long *map)
 {
 	u_int32_t offset, count;
 	sctp_chunkhdr_t _sch, *sch;
@@ -225,7 +225,7 @@ static int do_basic_checks(struct nf_conn *conntrack,
 		}
 
 		if (map)
-			set_bit(sch->type, (void *)map);
+			set_bit(sch->type, map);
 	}
 
 	pr_debug("Basic checks passed\n");
@@ -304,7 +304,7 @@ static int sctp_packet(struct nf_conn *conntrack,
 	sctp_sctphdr_t _sctph, *sh;
 	sctp_chunkhdr_t _sch, *sch;
 	u_int32_t offset, count;
-	char map[256 / sizeof (char)] = {0};
+	unsigned long map[256 / sizeof(unsigned long)] = { 0 };
 
 	sh = skb_header_pointer(skb, dataoff, sizeof(_sctph), &_sctph);
 	if (sh == NULL)
@@ -314,11 +314,11 @@ static int sctp_packet(struct nf_conn *conntrack,
 		return -1;
 
 	/* Check the verification tag (Sec 8.5) */
-	if (!test_bit(SCTP_CID_INIT, (void *)map) &&
-	    !test_bit(SCTP_CID_SHUTDOWN_COMPLETE, (void *)map) &&
-	    !test_bit(SCTP_CID_COOKIE_ECHO, (void *)map) &&
-	    !test_bit(SCTP_CID_ABORT, (void *)map) &&
-	    !test_bit(SCTP_CID_SHUTDOWN_ACK, (void *)map) &&
+	if (!test_bit(SCTP_CID_INIT, map) &&
+	    !test_bit(SCTP_CID_SHUTDOWN_COMPLETE, map) &&
+	    !test_bit(SCTP_CID_COOKIE_ECHO, map) &&
+	    !test_bit(SCTP_CID_ABORT, map) &&
+	    !test_bit(SCTP_CID_SHUTDOWN_ACK, map) &&
 	    sh->vtag != conntrack->proto.sctp.vtag[CTINFO2DIR(ctinfo)]) {
 		pr_debug("Verification tag check failed\n");
 		return -1;
@@ -413,7 +413,7 @@ static int sctp_new(struct nf_conn *conntrack, const struct sk_buff *skb,
 	sctp_sctphdr_t _sctph, *sh;
 	sctp_chunkhdr_t _sch, *sch;
 	u_int32_t offset, count;
-	char map[256 / sizeof (char)] = {0};
+	unsigned long map[256 / sizeof(unsigned long)] = { 0 };
 
 	sh = skb_header_pointer(skb, dataoff, sizeof(_sctph), &_sctph);
 	if (sh == NULL)
@@ -423,9 +423,9 @@ static int sctp_new(struct nf_conn *conntrack, const struct sk_buff *skb,
 		return 0;
 
 	/* If an OOTB packet has any of these chunks discard (Sec 8.4) */
-	if (test_bit (SCTP_CID_ABORT, (void *)map) ||
-	    test_bit (SCTP_CID_SHUTDOWN_COMPLETE, (void *)map) ||
-	    test_bit (SCTP_CID_COOKIE_ACK, (void *)map))
+	if (test_bit(SCTP_CID_ABORT, map) ||
+	    test_bit(SCTP_CID_SHUTDOWN_COMPLETE, map) ||
+	    test_bit(SCTP_CID_COOKIE_ACK, map))
 		return 0;
 
 	newconntrack = SCTP_CONNTRACK_MAX;
-- 
cgit v1.2.3


From 112f35c9c164e06e038d506dd3eb15e76829ef8a Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Mon, 14 Jan 2008 23:46:20 -0800
Subject: [NETFILTER]: nf_conntrack_sctp: reduce line length

Reduce the length of some overly long lines by renaming all
"conntrack" variables to "ct".

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/netfilter/nf_conntrack_proto_sctp.c | 43 ++++++++++++++++-----------------
 1 file changed, 21 insertions(+), 22 deletions(-)

(limited to 'net')

diff --git a/net/netfilter/nf_conntrack_proto_sctp.c b/net/netfilter/nf_conntrack_proto_sctp.c
index 5166bb388ff..84e37e92b76 100644
--- a/net/netfilter/nf_conntrack_proto_sctp.c
+++ b/net/netfilter/nf_conntrack_proto_sctp.c
@@ -173,13 +173,12 @@ static int sctp_print_tuple(struct seq_file *s,
 }
 
 /* Print out the private part of the conntrack. */
-static int sctp_print_conntrack(struct seq_file *s,
-				const struct nf_conn *conntrack)
+static int sctp_print_conntrack(struct seq_file *s, const struct nf_conn *ct)
 {
 	enum sctp_conntrack state;
 
 	read_lock_bh(&sctp_lock);
-	state = conntrack->proto.sctp.state;
+	state = ct->proto.sctp.state;
 	read_unlock_bh(&sctp_lock);
 
 	return seq_printf(s, "%s ", sctp_conntrack_names[state]);
@@ -192,7 +191,7 @@ for ((offset) = (dataoff) + sizeof(sctp_sctphdr_t), (count) = 0;	\
 	(offset) += (ntohs((sch)->length) + 3) & ~3, (count)++)
 
 /* Some validity checks to make sure the chunks are fine */
-static int do_basic_checks(struct nf_conn *conntrack,
+static int do_basic_checks(struct nf_conn *ct,
 			   const struct sk_buff *skb,
 			   unsigned int dataoff,
 			   unsigned long *map)
@@ -293,7 +292,7 @@ static int new_state(enum ip_conntrack_dir dir,
 }
 
 /* Returns verdict for packet, or -1 for invalid. */
-static int sctp_packet(struct nf_conn *conntrack,
+static int sctp_packet(struct nf_conn *ct,
 		       const struct sk_buff *skb,
 		       unsigned int dataoff,
 		       enum ip_conntrack_info ctinfo,
@@ -310,7 +309,7 @@ static int sctp_packet(struct nf_conn *conntrack,
 	if (sh == NULL)
 		return -1;
 
-	if (do_basic_checks(conntrack, skb, dataoff, map) != 0)
+	if (do_basic_checks(ct, skb, dataoff, map) != 0)
 		return -1;
 
 	/* Check the verification tag (Sec 8.5) */
@@ -319,7 +318,7 @@ static int sctp_packet(struct nf_conn *conntrack,
 	    !test_bit(SCTP_CID_COOKIE_ECHO, map) &&
 	    !test_bit(SCTP_CID_ABORT, map) &&
 	    !test_bit(SCTP_CID_SHUTDOWN_ACK, map) &&
-	    sh->vtag != conntrack->proto.sctp.vtag[CTINFO2DIR(ctinfo)]) {
+	    sh->vtag != ct->proto.sctp.vtag[CTINFO2DIR(ctinfo)]) {
 		pr_debug("Verification tag check failed\n");
 		return -1;
 	}
@@ -337,28 +336,28 @@ static int sctp_packet(struct nf_conn *conntrack,
 			}
 		} else if (sch->type == SCTP_CID_ABORT) {
 			/* Sec 8.5.1 (B) */
-			if (sh->vtag != conntrack->proto.sctp.vtag[CTINFO2DIR(ctinfo)] &&
-			    sh->vtag != conntrack->proto.sctp.vtag[1 - CTINFO2DIR(ctinfo)]) {
+			if (sh->vtag != ct->proto.sctp.vtag[CTINFO2DIR(ctinfo)] &&
+			    sh->vtag != ct->proto.sctp.vtag[1 - CTINFO2DIR(ctinfo)]) {
 				write_unlock_bh(&sctp_lock);
 				return -1;
 			}
 		} else if (sch->type == SCTP_CID_SHUTDOWN_COMPLETE) {
 			/* Sec 8.5.1 (C) */
-			if (sh->vtag != conntrack->proto.sctp.vtag[CTINFO2DIR(ctinfo)] &&
-			    sh->vtag != conntrack->proto.sctp.vtag[1 - CTINFO2DIR(ctinfo)] &&
+			if (sh->vtag != ct->proto.sctp.vtag[CTINFO2DIR(ctinfo)] &&
+			    sh->vtag != ct->proto.sctp.vtag[1 - CTINFO2DIR(ctinfo)] &&
 			    (sch->flags & 1)) {
 				write_unlock_bh(&sctp_lock);
 				return -1;
 			}
 		} else if (sch->type == SCTP_CID_COOKIE_ECHO) {
 			/* Sec 8.5.1 (D) */
-			if (sh->vtag != conntrack->proto.sctp.vtag[CTINFO2DIR(ctinfo)]) {
+			if (sh->vtag != ct->proto.sctp.vtag[CTINFO2DIR(ctinfo)]) {
 				write_unlock_bh(&sctp_lock);
 				return -1;
 			}
 		}
 
-		oldsctpstate = conntrack->proto.sctp.state;
+		oldsctpstate = ct->proto.sctp.state;
 		newconntrack = new_state(CTINFO2DIR(ctinfo), oldsctpstate, sch->type);
 
 		/* Invalid */
@@ -383,22 +382,22 @@ static int sctp_packet(struct nf_conn *conntrack,
 			}
 			pr_debug("Setting vtag %x for dir %d\n",
 				 ih->init_tag, !CTINFO2DIR(ctinfo));
-			conntrack->proto.sctp.vtag[!CTINFO2DIR(ctinfo)] = ih->init_tag;
+			ct->proto.sctp.vtag[!CTINFO2DIR(ctinfo)] = ih->init_tag;
 		}
 
-		conntrack->proto.sctp.state = newconntrack;
+		ct->proto.sctp.state = newconntrack;
 		if (oldsctpstate != newconntrack)
 			nf_conntrack_event_cache(IPCT_PROTOINFO, skb);
 		write_unlock_bh(&sctp_lock);
 	}
 
-	nf_ct_refresh_acct(conntrack, ctinfo, skb, *sctp_timeouts[newconntrack]);
+	nf_ct_refresh_acct(ct, ctinfo, skb, *sctp_timeouts[newconntrack]);
 
 	if (oldsctpstate == SCTP_CONNTRACK_COOKIE_ECHOED &&
 	    CTINFO2DIR(ctinfo) == IP_CT_DIR_REPLY &&
 	    newconntrack == SCTP_CONNTRACK_ESTABLISHED) {
 		pr_debug("Setting assured bit\n");
-		set_bit(IPS_ASSURED_BIT, &conntrack->status);
+		set_bit(IPS_ASSURED_BIT, &ct->status);
 		nf_conntrack_event_cache(IPCT_STATUS, skb);
 	}
 
@@ -406,7 +405,7 @@ static int sctp_packet(struct nf_conn *conntrack,
 }
 
 /* Called when a new connection for this protocol found. */
-static int sctp_new(struct nf_conn *conntrack, const struct sk_buff *skb,
+static int sctp_new(struct nf_conn *ct, const struct sk_buff *skb,
 		    unsigned int dataoff)
 {
 	enum sctp_conntrack newconntrack;
@@ -419,7 +418,7 @@ static int sctp_new(struct nf_conn *conntrack, const struct sk_buff *skb,
 	if (sh == NULL)
 		return 0;
 
-	if (do_basic_checks(conntrack, skb, dataoff, map) != 0)
+	if (do_basic_checks(ct, skb, dataoff, map) != 0)
 		return 0;
 
 	/* If an OOTB packet has any of these chunks discard (Sec 8.4) */
@@ -454,7 +453,7 @@ static int sctp_new(struct nf_conn *conntrack, const struct sk_buff *skb,
 				pr_debug("Setting vtag %x for new conn\n",
 					 ih->init_tag);
 
-				conntrack->proto.sctp.vtag[IP_CT_DIR_REPLY] =
+				ct->proto.sctp.vtag[IP_CT_DIR_REPLY] =
 								ih->init_tag;
 			} else {
 				/* Sec 8.5.1 (A) */
@@ -466,10 +465,10 @@ static int sctp_new(struct nf_conn *conntrack, const struct sk_buff *skb,
 		else {
 			pr_debug("Setting vtag %x for new conn OOTB\n",
 				 sh->vtag);
-			conntrack->proto.sctp.vtag[IP_CT_DIR_REPLY] = sh->vtag;
+			ct->proto.sctp.vtag[IP_CT_DIR_REPLY] = sh->vtag;
 		}
 
-		conntrack->proto.sctp.state = newconntrack;
+		ct->proto.sctp.state = newconntrack;
 	}
 
 	return 1;
-- 
cgit v1.2.3


From 8528819adc613e0b4bc3e5cb4123b4b33d2b03c4 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Mon, 14 Jan 2008 23:46:37 -0800
Subject: [NETFILTER]: nf_conntrack_sctp: reduce line length further

Eliminate a few lines over 80 characters by using a local variable to
hold the conntrack direction instead of using CTINFO2DIR everywhere.

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/netfilter/nf_conntrack_proto_sctp.c | 23 ++++++++++++-----------
 1 file changed, 12 insertions(+), 11 deletions(-)

(limited to 'net')

diff --git a/net/netfilter/nf_conntrack_proto_sctp.c b/net/netfilter/nf_conntrack_proto_sctp.c
index 84e37e92b76..fdabef56bf1 100644
--- a/net/netfilter/nf_conntrack_proto_sctp.c
+++ b/net/netfilter/nf_conntrack_proto_sctp.c
@@ -300,6 +300,7 @@ static int sctp_packet(struct nf_conn *ct,
 		       unsigned int hooknum)
 {
 	enum sctp_conntrack newconntrack, oldsctpstate;
+	enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
 	sctp_sctphdr_t _sctph, *sh;
 	sctp_chunkhdr_t _sch, *sch;
 	u_int32_t offset, count;
@@ -318,7 +319,7 @@ static int sctp_packet(struct nf_conn *ct,
 	    !test_bit(SCTP_CID_COOKIE_ECHO, map) &&
 	    !test_bit(SCTP_CID_ABORT, map) &&
 	    !test_bit(SCTP_CID_SHUTDOWN_ACK, map) &&
-	    sh->vtag != ct->proto.sctp.vtag[CTINFO2DIR(ctinfo)]) {
+	    sh->vtag != ct->proto.sctp.vtag[dir]) {
 		pr_debug("Verification tag check failed\n");
 		return -1;
 	}
@@ -336,35 +337,35 @@ static int sctp_packet(struct nf_conn *ct,
 			}
 		} else if (sch->type == SCTP_CID_ABORT) {
 			/* Sec 8.5.1 (B) */
-			if (sh->vtag != ct->proto.sctp.vtag[CTINFO2DIR(ctinfo)] &&
-			    sh->vtag != ct->proto.sctp.vtag[1 - CTINFO2DIR(ctinfo)]) {
+			if (sh->vtag != ct->proto.sctp.vtag[dir] &&
+			    sh->vtag != ct->proto.sctp.vtag[!dir]) {
 				write_unlock_bh(&sctp_lock);
 				return -1;
 			}
 		} else if (sch->type == SCTP_CID_SHUTDOWN_COMPLETE) {
 			/* Sec 8.5.1 (C) */
-			if (sh->vtag != ct->proto.sctp.vtag[CTINFO2DIR(ctinfo)] &&
-			    sh->vtag != ct->proto.sctp.vtag[1 - CTINFO2DIR(ctinfo)] &&
+			if (sh->vtag != ct->proto.sctp.vtag[dir] &&
+			    sh->vtag != ct->proto.sctp.vtag[!dir] &&
 			    (sch->flags & 1)) {
 				write_unlock_bh(&sctp_lock);
 				return -1;
 			}
 		} else if (sch->type == SCTP_CID_COOKIE_ECHO) {
 			/* Sec 8.5.1 (D) */
-			if (sh->vtag != ct->proto.sctp.vtag[CTINFO2DIR(ctinfo)]) {
+			if (sh->vtag != ct->proto.sctp.vtag[dir]) {
 				write_unlock_bh(&sctp_lock);
 				return -1;
 			}
 		}
 
 		oldsctpstate = ct->proto.sctp.state;
-		newconntrack = new_state(CTINFO2DIR(ctinfo), oldsctpstate, sch->type);
+		newconntrack = new_state(dir, oldsctpstate, sch->type);
 
 		/* Invalid */
 		if (newconntrack == SCTP_CONNTRACK_MAX) {
 			pr_debug("nf_conntrack_sctp: Invalid dir=%i ctype=%u "
 				 "conntrack=%u\n",
-				 CTINFO2DIR(ctinfo), sch->type, oldsctpstate);
+				 dir, sch->type, oldsctpstate);
 			write_unlock_bh(&sctp_lock);
 			return -1;
 		}
@@ -381,8 +382,8 @@ static int sctp_packet(struct nf_conn *ct,
 				return -1;
 			}
 			pr_debug("Setting vtag %x for dir %d\n",
-				 ih->init_tag, !CTINFO2DIR(ctinfo));
-			ct->proto.sctp.vtag[!CTINFO2DIR(ctinfo)] = ih->init_tag;
+				 ih->init_tag, !dir);
+			ct->proto.sctp.vtag[!dir] = ih->init_tag;
 		}
 
 		ct->proto.sctp.state = newconntrack;
@@ -394,7 +395,7 @@ static int sctp_packet(struct nf_conn *ct,
 	nf_ct_refresh_acct(ct, ctinfo, skb, *sctp_timeouts[newconntrack]);
 
 	if (oldsctpstate == SCTP_CONNTRACK_COOKIE_ECHOED &&
-	    CTINFO2DIR(ctinfo) == IP_CT_DIR_REPLY &&
+	    dir == IP_CT_DIR_REPLY &&
 	    newconntrack == SCTP_CONNTRACK_ESTABLISHED) {
 		pr_debug("Setting assured bit\n");
 		set_bit(IPS_ASSURED_BIT, &ct->status);
-- 
cgit v1.2.3


From b37e933ac7bdad2d587a6048babb8db2718460de Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Mon, 14 Jan 2008 23:46:52 -0800
Subject: [NETFILTER]: nf_conntrack_sctp: consolidate sctp_packet() error paths

Consolidate error paths and use proper symbolic return value instead
of magic values.

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/netfilter/nf_conntrack_proto_sctp.c | 46 ++++++++++++++-------------------
 1 file changed, 20 insertions(+), 26 deletions(-)

(limited to 'net')

diff --git a/net/netfilter/nf_conntrack_proto_sctp.c b/net/netfilter/nf_conntrack_proto_sctp.c
index fdabef56bf1..e52b6b95b30 100644
--- a/net/netfilter/nf_conntrack_proto_sctp.c
+++ b/net/netfilter/nf_conntrack_proto_sctp.c
@@ -291,7 +291,7 @@ static int new_state(enum ip_conntrack_dir dir,
 	return sctp_conntracks[dir][i][cur_state];
 }
 
-/* Returns verdict for packet, or -1 for invalid. */
+/* Returns verdict for packet, or -NF_ACCEPT for invalid. */
 static int sctp_packet(struct nf_conn *ct,
 		       const struct sk_buff *skb,
 		       unsigned int dataoff,
@@ -308,10 +308,10 @@ static int sctp_packet(struct nf_conn *ct,
 
 	sh = skb_header_pointer(skb, dataoff, sizeof(_sctph), &_sctph);
 	if (sh == NULL)
-		return -1;
+		goto out;
 
 	if (do_basic_checks(ct, skb, dataoff, map) != 0)
-		return -1;
+		goto out;
 
 	/* Check the verification tag (Sec 8.5) */
 	if (!test_bit(SCTP_CID_INIT, map) &&
@@ -321,7 +321,7 @@ static int sctp_packet(struct nf_conn *ct,
 	    !test_bit(SCTP_CID_SHUTDOWN_ACK, map) &&
 	    sh->vtag != ct->proto.sctp.vtag[dir]) {
 		pr_debug("Verification tag check failed\n");
-		return -1;
+		goto out;
 	}
 
 	oldsctpstate = newconntrack = SCTP_CONNTRACK_MAX;
@@ -331,31 +331,23 @@ static int sctp_packet(struct nf_conn *ct,
 		/* Special cases of Verification tag check (Sec 8.5.1) */
 		if (sch->type == SCTP_CID_INIT) {
 			/* Sec 8.5.1 (A) */
-			if (sh->vtag != 0) {
-				write_unlock_bh(&sctp_lock);
-				return -1;
-			}
+			if (sh->vtag != 0)
+				goto out_unlock;
 		} else if (sch->type == SCTP_CID_ABORT) {
 			/* Sec 8.5.1 (B) */
 			if (sh->vtag != ct->proto.sctp.vtag[dir] &&
-			    sh->vtag != ct->proto.sctp.vtag[!dir]) {
-				write_unlock_bh(&sctp_lock);
-				return -1;
-			}
+			    sh->vtag != ct->proto.sctp.vtag[!dir])
+				goto out_unlock;
 		} else if (sch->type == SCTP_CID_SHUTDOWN_COMPLETE) {
 			/* Sec 8.5.1 (C) */
 			if (sh->vtag != ct->proto.sctp.vtag[dir] &&
 			    sh->vtag != ct->proto.sctp.vtag[!dir] &&
-			    (sch->flags & 1)) {
-				write_unlock_bh(&sctp_lock);
-				return -1;
-			}
+			    (sch->flags & 1))
+				goto out_unlock;
 		} else if (sch->type == SCTP_CID_COOKIE_ECHO) {
 			/* Sec 8.5.1 (D) */
-			if (sh->vtag != ct->proto.sctp.vtag[dir]) {
-				write_unlock_bh(&sctp_lock);
-				return -1;
-			}
+			if (sh->vtag != ct->proto.sctp.vtag[dir])
+				goto out_unlock;
 		}
 
 		oldsctpstate = ct->proto.sctp.state;
@@ -366,8 +358,7 @@ static int sctp_packet(struct nf_conn *ct,
 			pr_debug("nf_conntrack_sctp: Invalid dir=%i ctype=%u "
 				 "conntrack=%u\n",
 				 dir, sch->type, oldsctpstate);
-			write_unlock_bh(&sctp_lock);
-			return -1;
+			goto out_unlock;
 		}
 
 		/* If it is an INIT or an INIT ACK note down the vtag */
@@ -377,10 +368,8 @@ static int sctp_packet(struct nf_conn *ct,
 
 			ih = skb_header_pointer(skb, offset + sizeof(sctp_chunkhdr_t),
 						sizeof(_inithdr), &_inithdr);
-			if (ih == NULL) {
-				write_unlock_bh(&sctp_lock);
-				return -1;
-			}
+			if (ih == NULL)
+				goto out_unlock;
 			pr_debug("Setting vtag %x for dir %d\n",
 				 ih->init_tag, !dir);
 			ct->proto.sctp.vtag[!dir] = ih->init_tag;
@@ -403,6 +392,11 @@ static int sctp_packet(struct nf_conn *ct,
 	}
 
 	return NF_ACCEPT;
+
+out_unlock:
+	write_unlock_bh(&sctp_lock);
+out:
+	return -NF_ACCEPT;
 }
 
 /* Called when a new connection for this protocol found. */
-- 
cgit v1.2.3


From efe9f68afeaf2ce12636990a8c811fb1cc23b12b Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Mon, 14 Jan 2008 23:47:09 -0800
Subject: [NETFILTER]: nf_conntrack_sctp: rename "newconntrack" variable

The name is misleading, it holds the new connection state, so rename it
to "newstate". Also rename "oldsctpstate" to "oldstate" for consistency.

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/netfilter/nf_conntrack_proto_sctp.c | 42 ++++++++++++++++-----------------
 1 file changed, 21 insertions(+), 21 deletions(-)

(limited to 'net')

diff --git a/net/netfilter/nf_conntrack_proto_sctp.c b/net/netfilter/nf_conntrack_proto_sctp.c
index e52b6b95b30..7baabc04534 100644
--- a/net/netfilter/nf_conntrack_proto_sctp.c
+++ b/net/netfilter/nf_conntrack_proto_sctp.c
@@ -231,9 +231,9 @@ static int do_basic_checks(struct nf_conn *ct,
 	return count == 0;
 }
 
-static int new_state(enum ip_conntrack_dir dir,
-		     enum sctp_conntrack cur_state,
-		     int chunk_type)
+static int sctp_new_state(enum ip_conntrack_dir dir,
+			  enum sctp_conntrack cur_state,
+			  int chunk_type)
 {
 	int i;
 
@@ -299,7 +299,7 @@ static int sctp_packet(struct nf_conn *ct,
 		       int pf,
 		       unsigned int hooknum)
 {
-	enum sctp_conntrack newconntrack, oldsctpstate;
+	enum sctp_conntrack new_state, old_state;
 	enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
 	sctp_sctphdr_t _sctph, *sh;
 	sctp_chunkhdr_t _sch, *sch;
@@ -324,7 +324,7 @@ static int sctp_packet(struct nf_conn *ct,
 		goto out;
 	}
 
-	oldsctpstate = newconntrack = SCTP_CONNTRACK_MAX;
+	old_state = new_state = SCTP_CONNTRACK_MAX;
 	for_each_sctp_chunk (skb, sch, _sch, offset, dataoff, count) {
 		write_lock_bh(&sctp_lock);
 
@@ -350,14 +350,14 @@ static int sctp_packet(struct nf_conn *ct,
 				goto out_unlock;
 		}
 
-		oldsctpstate = ct->proto.sctp.state;
-		newconntrack = new_state(dir, oldsctpstate, sch->type);
+		old_state = ct->proto.sctp.state;
+		new_state = sctp_new_state(dir, old_state, sch->type);
 
 		/* Invalid */
-		if (newconntrack == SCTP_CONNTRACK_MAX) {
+		if (new_state == SCTP_CONNTRACK_MAX) {
 			pr_debug("nf_conntrack_sctp: Invalid dir=%i ctype=%u "
 				 "conntrack=%u\n",
-				 dir, sch->type, oldsctpstate);
+				 dir, sch->type, old_state);
 			goto out_unlock;
 		}
 
@@ -375,17 +375,17 @@ static int sctp_packet(struct nf_conn *ct,
 			ct->proto.sctp.vtag[!dir] = ih->init_tag;
 		}
 
-		ct->proto.sctp.state = newconntrack;
-		if (oldsctpstate != newconntrack)
+		ct->proto.sctp.state = new_state;
+		if (old_state != new_state)
 			nf_conntrack_event_cache(IPCT_PROTOINFO, skb);
 		write_unlock_bh(&sctp_lock);
 	}
 
-	nf_ct_refresh_acct(ct, ctinfo, skb, *sctp_timeouts[newconntrack]);
+	nf_ct_refresh_acct(ct, ctinfo, skb, *sctp_timeouts[new_state]);
 
-	if (oldsctpstate == SCTP_CONNTRACK_COOKIE_ECHOED &&
+	if (old_state == SCTP_CONNTRACK_COOKIE_ECHOED &&
 	    dir == IP_CT_DIR_REPLY &&
-	    newconntrack == SCTP_CONNTRACK_ESTABLISHED) {
+	    new_state == SCTP_CONNTRACK_ESTABLISHED) {
 		pr_debug("Setting assured bit\n");
 		set_bit(IPS_ASSURED_BIT, &ct->status);
 		nf_conntrack_event_cache(IPCT_STATUS, skb);
@@ -403,7 +403,7 @@ out:
 static int sctp_new(struct nf_conn *ct, const struct sk_buff *skb,
 		    unsigned int dataoff)
 {
-	enum sctp_conntrack newconntrack;
+	enum sctp_conntrack new_state;
 	sctp_sctphdr_t _sctph, *sh;
 	sctp_chunkhdr_t _sch, *sch;
 	u_int32_t offset, count;
@@ -422,15 +422,15 @@ static int sctp_new(struct nf_conn *ct, const struct sk_buff *skb,
 	    test_bit(SCTP_CID_COOKIE_ACK, map))
 		return 0;
 
-	newconntrack = SCTP_CONNTRACK_MAX;
+	new_state = SCTP_CONNTRACK_MAX;
 	for_each_sctp_chunk (skb, sch, _sch, offset, dataoff, count) {
 		/* Don't need lock here: this conntrack not in circulation yet */
-		newconntrack = new_state(IP_CT_DIR_ORIGINAL,
-					 SCTP_CONNTRACK_NONE, sch->type);
+		new_state = sctp_new_state(IP_CT_DIR_ORIGINAL,
+					   SCTP_CONNTRACK_NONE, sch->type);
 
 		/* Invalid: delete conntrack */
-		if (newconntrack == SCTP_CONNTRACK_NONE ||
-		    newconntrack == SCTP_CONNTRACK_MAX) {
+		if (new_state == SCTP_CONNTRACK_NONE ||
+		    new_state == SCTP_CONNTRACK_MAX) {
 			pr_debug("nf_conntrack_sctp: invalid new deleting.\n");
 			return 0;
 		}
@@ -463,7 +463,7 @@ static int sctp_new(struct nf_conn *ct, const struct sk_buff *skb,
 			ct->proto.sctp.vtag[IP_CT_DIR_REPLY] = sh->vtag;
 		}
 
-		ct->proto.sctp.state = newconntrack;
+		ct->proto.sctp.state = new_state;
 	}
 
 	return 1;
-- 
cgit v1.2.3


From 4a64830af0fd4dbec908cfbab117def5086acd4a Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Mon, 14 Jan 2008 23:47:25 -0800
Subject: [NETFILTER]: nf_conntrack_sctp: don't take sctp_lock once per chunk

Don't take and release the lock once per SCTP chunk, simply hold it
the entire time while iterating through the chunks.

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/netfilter/nf_conntrack_proto_sctp.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/netfilter/nf_conntrack_proto_sctp.c b/net/netfilter/nf_conntrack_proto_sctp.c
index 7baabc04534..177e8f60aa1 100644
--- a/net/netfilter/nf_conntrack_proto_sctp.c
+++ b/net/netfilter/nf_conntrack_proto_sctp.c
@@ -325,9 +325,8 @@ static int sctp_packet(struct nf_conn *ct,
 	}
 
 	old_state = new_state = SCTP_CONNTRACK_MAX;
+	write_lock_bh(&sctp_lock);
 	for_each_sctp_chunk (skb, sch, _sch, offset, dataoff, count) {
-		write_lock_bh(&sctp_lock);
-
 		/* Special cases of Verification tag check (Sec 8.5.1) */
 		if (sch->type == SCTP_CID_INIT) {
 			/* Sec 8.5.1 (A) */
@@ -378,8 +377,8 @@ static int sctp_packet(struct nf_conn *ct,
 		ct->proto.sctp.state = new_state;
 		if (old_state != new_state)
 			nf_conntrack_event_cache(IPCT_PROTOINFO, skb);
-		write_unlock_bh(&sctp_lock);
 	}
+	write_unlock_bh(&sctp_lock);
 
 	nf_ct_refresh_acct(ct, ctinfo, skb, *sctp_timeouts[new_state]);
 
-- 
cgit v1.2.3


From 9b1c2cfd7a8b3840cf5c99d0560e641ff4a3425b Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Mon, 14 Jan 2008 23:48:02 -0800
Subject: [NETFILTER]: nf_conntrack_sctp: replace magic value by symbolic
 constant

Use SCTP_CHUNK_FLAG_T instead of 0x1.

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/netfilter/nf_conntrack_proto_sctp.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/netfilter/nf_conntrack_proto_sctp.c b/net/netfilter/nf_conntrack_proto_sctp.c
index 177e8f60aa1..14b1c02c477 100644
--- a/net/netfilter/nf_conntrack_proto_sctp.c
+++ b/net/netfilter/nf_conntrack_proto_sctp.c
@@ -341,7 +341,7 @@ static int sctp_packet(struct nf_conn *ct,
 			/* Sec 8.5.1 (C) */
 			if (sh->vtag != ct->proto.sctp.vtag[dir] &&
 			    sh->vtag != ct->proto.sctp.vtag[!dir] &&
-			    (sch->flags & 1))
+			    sch->flags & SCTP_CHUNK_FLAG_T)
 				goto out_unlock;
 		} else if (sch->type == SCTP_CID_COOKIE_ECHO) {
 			/* Sec 8.5.1 (D) */
-- 
cgit v1.2.3


From 86c0bf4095b35b978540aa43b12840d138a0b376 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Mon, 14 Jan 2008 23:48:17 -0800
Subject: [NETFILTER]: nf_conntrack_sctp: remove timeout indirection

Instead of keeping pointers to the timeout values in a table, simply
put the timeout values in the table directly.

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/netfilter/nf_conntrack_proto_sctp.c | 57 ++++++++++++++-------------------
 1 file changed, 24 insertions(+), 33 deletions(-)

(limited to 'net')

diff --git a/net/netfilter/nf_conntrack_proto_sctp.c b/net/netfilter/nf_conntrack_proto_sctp.c
index 14b1c02c477..21d29e782ba 100644
--- a/net/netfilter/nf_conntrack_proto_sctp.c
+++ b/net/netfilter/nf_conntrack_proto_sctp.c
@@ -49,24 +49,15 @@ static const char *sctp_conntrack_names[] = {
 #define HOURS * 60 MINS
 #define DAYS  * 24 HOURS
 
-static unsigned int nf_ct_sctp_timeout_closed __read_mostly          =  10 SECS;
-static unsigned int nf_ct_sctp_timeout_cookie_wait __read_mostly     =   3 SECS;
-static unsigned int nf_ct_sctp_timeout_cookie_echoed __read_mostly   =   3 SECS;
-static unsigned int nf_ct_sctp_timeout_established __read_mostly     =   5 DAYS;
-static unsigned int nf_ct_sctp_timeout_shutdown_sent __read_mostly   = 300 SECS / 1000;
-static unsigned int nf_ct_sctp_timeout_shutdown_recd __read_mostly   = 300 SECS / 1000;
-static unsigned int nf_ct_sctp_timeout_shutdown_ack_sent __read_mostly = 3 SECS;
-
-static unsigned int * sctp_timeouts[]
-= { NULL,                                  /* SCTP_CONNTRACK_NONE  */
-    &nf_ct_sctp_timeout_closed,	           /* SCTP_CONNTRACK_CLOSED */
-    &nf_ct_sctp_timeout_cookie_wait,       /* SCTP_CONNTRACK_COOKIE_WAIT */
-    &nf_ct_sctp_timeout_cookie_echoed,     /* SCTP_CONNTRACK_COOKIE_ECHOED */
-    &nf_ct_sctp_timeout_established,       /* SCTP_CONNTRACK_ESTABLISHED */
-    &nf_ct_sctp_timeout_shutdown_sent,     /* SCTP_CONNTRACK_SHUTDOWN_SENT */
-    &nf_ct_sctp_timeout_shutdown_recd,     /* SCTP_CONNTRACK_SHUTDOWN_RECD */
-    &nf_ct_sctp_timeout_shutdown_ack_sent  /* SCTP_CONNTRACK_SHUTDOWN_ACK_SENT */
- };
+static unsigned int sctp_timeouts[SCTP_CONNTRACK_MAX] __read_mostly = {
+	[SCTP_CONNTRACK_CLOSED]			= 10 SECS,
+	[SCTP_CONNTRACK_COOKIE_WAIT]		= 3 SECS,
+	[SCTP_CONNTRACK_COOKIE_ECHOED]		= 3 SECS,
+	[SCTP_CONNTRACK_ESTABLISHED]		= 5 DAYS,
+	[SCTP_CONNTRACK_SHUTDOWN_SENT]		= 300 SECS / 1000,
+	[SCTP_CONNTRACK_SHUTDOWN_RECD]		= 300 SECS / 1000,
+	[SCTP_CONNTRACK_SHUTDOWN_ACK_SENT]	= 3 SECS,
+};
 
 #define sNO SCTP_CONNTRACK_NONE
 #define	sCL SCTP_CONNTRACK_CLOSED
@@ -380,7 +371,7 @@ static int sctp_packet(struct nf_conn *ct,
 	}
 	write_unlock_bh(&sctp_lock);
 
-	nf_ct_refresh_acct(ct, ctinfo, skb, *sctp_timeouts[new_state]);
+	nf_ct_refresh_acct(ct, ctinfo, skb, sctp_timeouts[new_state]);
 
 	if (old_state == SCTP_CONNTRACK_COOKIE_ECHOED &&
 	    dir == IP_CT_DIR_REPLY &&
@@ -474,49 +465,49 @@ static struct ctl_table_header *sctp_sysctl_header;
 static struct ctl_table sctp_sysctl_table[] = {
 	{
 		.procname	= "nf_conntrack_sctp_timeout_closed",
-		.data		= &nf_ct_sctp_timeout_closed,
+		.data		= &sctp_timeouts[SCTP_CONNTRACK_CLOSED],
 		.maxlen		= sizeof(unsigned int),
 		.mode		= 0644,
 		.proc_handler	= &proc_dointvec_jiffies,
 	},
 	{
 		.procname	= "nf_conntrack_sctp_timeout_cookie_wait",
-		.data		= &nf_ct_sctp_timeout_cookie_wait,
+		.data		= &sctp_timeouts[SCTP_CONNTRACK_COOKIE_WAIT],
 		.maxlen		= sizeof(unsigned int),
 		.mode		= 0644,
 		.proc_handler	= &proc_dointvec_jiffies,
 	},
 	{
 		.procname	= "nf_conntrack_sctp_timeout_cookie_echoed",
-		.data		= &nf_ct_sctp_timeout_cookie_echoed,
+		.data		= &sctp_timeouts[SCTP_CONNTRACK_COOKIE_ECHOED],
 		.maxlen		= sizeof(unsigned int),
 		.mode		= 0644,
 		.proc_handler	= &proc_dointvec_jiffies,
 	},
 	{
 		.procname	= "nf_conntrack_sctp_timeout_established",
-		.data		= &nf_ct_sctp_timeout_established,
+		.data		= &sctp_timeouts[SCTP_CONNTRACK_ESTABLISHED],
 		.maxlen		= sizeof(unsigned int),
 		.mode		= 0644,
 		.proc_handler	= &proc_dointvec_jiffies,
 	},
 	{
 		.procname	= "nf_conntrack_sctp_timeout_shutdown_sent",
-		.data		= &nf_ct_sctp_timeout_shutdown_sent,
+		.data		= &sctp_timeouts[SCTP_CONNTRACK_SHUTDOWN_SENT],
 		.maxlen		= sizeof(unsigned int),
 		.mode		= 0644,
 		.proc_handler	= &proc_dointvec_jiffies,
 	},
 	{
 		.procname	= "nf_conntrack_sctp_timeout_shutdown_recd",
-		.data		= &nf_ct_sctp_timeout_shutdown_recd,
+		.data		= &sctp_timeouts[SCTP_CONNTRACK_SHUTDOWN_RECD],
 		.maxlen		= sizeof(unsigned int),
 		.mode		= 0644,
 		.proc_handler	= &proc_dointvec_jiffies,
 	},
 	{
 		.procname	= "nf_conntrack_sctp_timeout_shutdown_ack_sent",
-		.data		= &nf_ct_sctp_timeout_shutdown_ack_sent,
+		.data		= &sctp_timeouts[SCTP_CONNTRACK_SHUTDOWN_ACK_SENT],
 		.maxlen		= sizeof(unsigned int),
 		.mode		= 0644,
 		.proc_handler	= &proc_dointvec_jiffies,
@@ -530,49 +521,49 @@ static struct ctl_table sctp_sysctl_table[] = {
 static struct ctl_table sctp_compat_sysctl_table[] = {
 	{
 		.procname	= "ip_conntrack_sctp_timeout_closed",
-		.data		= &nf_ct_sctp_timeout_closed,
+		.data		= &sctp_timeouts[SCTP_CONNTRACK_CLOSED],
 		.maxlen		= sizeof(unsigned int),
 		.mode		= 0644,
 		.proc_handler	= &proc_dointvec_jiffies,
 	},
 	{
 		.procname	= "ip_conntrack_sctp_timeout_cookie_wait",
-		.data		= &nf_ct_sctp_timeout_cookie_wait,
+		.data		= &sctp_timeouts[SCTP_CONNTRACK_COOKIE_WAIT],
 		.maxlen		= sizeof(unsigned int),
 		.mode		= 0644,
 		.proc_handler	= &proc_dointvec_jiffies,
 	},
 	{
 		.procname	= "ip_conntrack_sctp_timeout_cookie_echoed",
-		.data		= &nf_ct_sctp_timeout_cookie_echoed,
+		.data		= &sctp_timeouts[SCTP_CONNTRACK_COOKIE_ECHOED],
 		.maxlen		= sizeof(unsigned int),
 		.mode		= 0644,
 		.proc_handler	= &proc_dointvec_jiffies,
 	},
 	{
 		.procname	= "ip_conntrack_sctp_timeout_established",
-		.data		= &nf_ct_sctp_timeout_established,
+		.data		= &sctp_timeouts[SCTP_CONNTRACK_ESTABLISHED],
 		.maxlen		= sizeof(unsigned int),
 		.mode		= 0644,
 		.proc_handler	= &proc_dointvec_jiffies,
 	},
 	{
 		.procname	= "ip_conntrack_sctp_timeout_shutdown_sent",
-		.data		= &nf_ct_sctp_timeout_shutdown_sent,
+		.data		= &sctp_timeouts[SCTP_CONNTRACK_SHUTDOWN_SENT],
 		.maxlen		= sizeof(unsigned int),
 		.mode		= 0644,
 		.proc_handler	= &proc_dointvec_jiffies,
 	},
 	{
 		.procname	= "ip_conntrack_sctp_timeout_shutdown_recd",
-		.data		= &nf_ct_sctp_timeout_shutdown_recd,
+		.data		= &sctp_timeouts[SCTP_CONNTRACK_SHUTDOWN_RECD],
 		.maxlen		= sizeof(unsigned int),
 		.mode		= 0644,
 		.proc_handler	= &proc_dointvec_jiffies,
 	},
 	{
 		.procname	= "ip_conntrack_sctp_timeout_shutdown_ack_sent",
-		.data		= &nf_ct_sctp_timeout_shutdown_ack_sent,
+		.data		= &sctp_timeouts[SCTP_CONNTRACK_SHUTDOWN_ACK_SENT],
 		.maxlen		= sizeof(unsigned int),
 		.mode		= 0644,
 		.proc_handler	= &proc_dointvec_jiffies,
-- 
cgit v1.2.3


From 4f536522dae9d5326ad1872cd254ee84681cf563 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Mon, 14 Jan 2008 23:48:39 -0800
Subject: [NETFILTER]: kill nf_sysctl.c

Since there now is generic support for shared sysctl paths, the only
remains are the net/netfilter and net/ipv4/netfilter paths. Move them
to net/netfilter/core.c and net/ipv4/netfilter.c and kill nf_sysctl.c.

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/netfilter.c      | 10 ++++++++++
 net/netfilter/Makefile    |  1 -
 net/netfilter/core.c      |  9 +++++++++
 net/netfilter/nf_sysctl.c | 25 -------------------------
 4 files changed, 19 insertions(+), 26 deletions(-)
 delete mode 100644 net/netfilter/nf_sysctl.c

(limited to 'net')

diff --git a/net/ipv4/netfilter.c b/net/ipv4/netfilter.c
index 0ed843ed420..63221553d26 100644
--- a/net/ipv4/netfilter.c
+++ b/net/ipv4/netfilter.c
@@ -211,3 +211,13 @@ static void ipv4_netfilter_fini(void)
 
 module_init(ipv4_netfilter_init);
 module_exit(ipv4_netfilter_fini);
+
+#ifdef CONFIG_SYSCTL
+struct ctl_path nf_net_ipv4_netfilter_sysctl_path[] = {
+	{ .procname = "net", .ctl_name = CTL_NET, },
+	{ .procname = "ipv4", .ctl_name = NET_IPV4, },
+	{ .procname = "netfilter", .ctl_name = NET_IPV4_NETFILTER, },
+	{ }
+};
+EXPORT_SYMBOL_GPL(nf_net_ipv4_netfilter_sysctl_path);
+#endif /* CONFIG_SYSCTL */
diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile
index c910caee0d4..ea7508387f9 100644
--- a/net/netfilter/Makefile
+++ b/net/netfilter/Makefile
@@ -4,7 +4,6 @@ nf_conntrack-y	:= nf_conntrack_core.o nf_conntrack_standalone.o nf_conntrack_exp
 nf_conntrack-$(CONFIG_NF_CONNTRACK_EVENTS) += nf_conntrack_ecache.o
 
 obj-$(CONFIG_NETFILTER) = netfilter.o
-obj-$(CONFIG_SYSCTL) += nf_sysctl.o
 
 obj-$(CONFIG_NETFILTER_NETLINK) += nfnetlink.o
 obj-$(CONFIG_NETFILTER_NETLINK_QUEUE) += nfnetlink_queue.o
diff --git a/net/netfilter/core.c b/net/netfilter/core.c
index e0263445484..c4065b8f9a9 100644
--- a/net/netfilter/core.c
+++ b/net/netfilter/core.c
@@ -272,3 +272,12 @@ void __init netfilter_init(void)
 	if (netfilter_log_init() < 0)
 		panic("cannot initialize nf_log");
 }
+
+#ifdef CONFIG_SYSCTL
+struct ctl_path nf_net_netfilter_sysctl_path[] = {
+	{ .procname = "net", .ctl_name = CTL_NET, },
+	{ .procname = "netfilter", .ctl_name = NET_NETFILTER, },
+	{ }
+};
+EXPORT_SYMBOL_GPL(nf_net_netfilter_sysctl_path);
+#endif /* CONFIG_SYSCTL */
diff --git a/net/netfilter/nf_sysctl.c b/net/netfilter/nf_sysctl.c
deleted file mode 100644
index d9fcc893301..00000000000
--- a/net/netfilter/nf_sysctl.c
+++ /dev/null
@@ -1,25 +0,0 @@
-/* nf_sysctl.c	netfilter sysctl registration/unregistation
- *
- * Copyright (c) 2006 Patrick McHardy <kaber@trash.net>
- */
-#include <linux/module.h>
-#include <linux/sysctl.h>
-#include <linux/string.h>
-#include <linux/slab.h>
-
-/* net/netfilter */
-struct ctl_path nf_net_netfilter_sysctl_path[] = {
-	{ .procname = "net", .ctl_name = CTL_NET, },
-	{ .procname = "netfilter", .ctl_name = NET_NETFILTER, },
-	{ }
-};
-EXPORT_SYMBOL_GPL(nf_net_netfilter_sysctl_path);
-
-/* net/ipv4/netfilter */
-struct ctl_path nf_net_ipv4_netfilter_sysctl_path[] = {
-	{ .procname = "net", .ctl_name = CTL_NET, },
-	{ .procname = "ipv4", .ctl_name = NET_IPV4, },
-	{ .procname = "netfilter", .ctl_name = NET_IPV4_NETFILTER, },
-	{ }
-};
-EXPORT_SYMBOL_GPL(nf_net_ipv4_netfilter_sysctl_path);
-- 
cgit v1.2.3


From b334aadc3c5cd4dae2a44f3dac09b3ef718ccde1 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Mon, 14 Jan 2008 23:48:57 -0800
Subject: [NETFILTER]: nf_conntrack: clean up a few header files

- Remove declarations of non-existing variables and functions
- Move helper init/cleanup function declarations to nf_conntrack_helper.h
- Remove unneeded __nf_conntrack_attach declaration and make it static

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/netfilter/nf_conntrack_core.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index 25564073ee2..078fff0335a 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -861,7 +861,7 @@ EXPORT_SYMBOL_GPL(nf_ct_port_nlattr_to_tuple);
 #endif
 
 /* Used by ipt_REJECT and ip6t_REJECT. */
-void __nf_conntrack_attach(struct sk_buff *nskb, struct sk_buff *skb)
+static void nf_conntrack_attach(struct sk_buff *nskb, struct sk_buff *skb)
 {
 	struct nf_conn *ct;
 	enum ip_conntrack_info ctinfo;
@@ -878,7 +878,6 @@ void __nf_conntrack_attach(struct sk_buff *nskb, struct sk_buff *skb)
 	nskb->nfctinfo = ctinfo;
 	nf_conntrack_get(nskb->nfct);
 }
-EXPORT_SYMBOL_GPL(__nf_conntrack_attach);
 
 static inline int
 do_iter(const struct nf_conntrack_tuple_hash *i,
@@ -1122,7 +1121,7 @@ int __init nf_conntrack_init(void)
 		goto out_fini_expect;
 
 	/* For use by REJECT target */
-	rcu_assign_pointer(ip_ct_attach, __nf_conntrack_attach);
+	rcu_assign_pointer(ip_ct_attach, nf_conntrack_attach);
 	rcu_assign_pointer(nf_ct_destroy, destroy_conntrack);
 
 	/* Set up fake conntrack:
-- 
cgit v1.2.3


From c56cc9c07b049acc9d2ca97be0b540978c0c80bf Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Mon, 14 Jan 2008 23:49:17 -0800
Subject: [NETFILTER]: nf_conntrack: remove print_conntrack function from
 l3protos

Its unused and unlikely to ever be used.

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c        | 7 -------
 net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c | 3 ---
 net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c        | 7 -------
 net/netfilter/nf_conntrack_l3proto_generic.c          | 7 -------
 net/netfilter/nf_conntrack_standalone.c               | 3 ---
 5 files changed, 27 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
index 78e64951137..ac3d61d8026 100644
--- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
+++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
@@ -56,12 +56,6 @@ static int ipv4_print_tuple(struct seq_file *s,
 			  NIPQUAD(tuple->dst.u3.ip));
 }
 
-static int ipv4_print_conntrack(struct seq_file *s,
-				const struct nf_conn *conntrack)
-{
-	return 0;
-}
-
 /* Returns new sk_buff, or NULL */
 static int nf_ct_ipv4_gather_frags(struct sk_buff *skb, u_int32_t user)
 {
@@ -403,7 +397,6 @@ struct nf_conntrack_l3proto nf_conntrack_l3proto_ipv4 __read_mostly = {
 	.pkt_to_tuple	 = ipv4_pkt_to_tuple,
 	.invert_tuple	 = ipv4_invert_tuple,
 	.print_tuple	 = ipv4_print_tuple,
-	.print_conntrack = ipv4_print_conntrack,
 	.get_l4proto	 = ipv4_get_l4proto,
 #if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE)
 	.tuple_to_nlattr = ipv4_tuple_to_nlattr,
diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c
index 741f3dfaa5a..acde9952cad 100644
--- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c
+++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c
@@ -121,9 +121,6 @@ static int ct_seq_show(struct seq_file *s, void *v)
 		      ? (long)(ct->timeout.expires - jiffies)/HZ : 0) != 0)
 		return -ENOSPC;
 
-	if (l3proto->print_conntrack(s, ct))
-		return -ENOSPC;
-
 	if (l4proto->print_conntrack(s, ct))
 		return -ENOSPC;
 
diff --git a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
index 97a553036dd..cf42f5cfc33 100644
--- a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
+++ b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
@@ -60,12 +60,6 @@ static int ipv6_print_tuple(struct seq_file *s,
 			  NIP6(*((struct in6_addr *)tuple->dst.u3.ip6)));
 }
 
-static int ipv6_print_conntrack(struct seq_file *s,
-				const struct nf_conn *conntrack)
-{
-	return 0;
-}
-
 /*
  * Based on ipv6_skip_exthdr() in net/ipv6/exthdr.c
  *
@@ -376,7 +370,6 @@ struct nf_conntrack_l3proto nf_conntrack_l3proto_ipv6 __read_mostly = {
 	.pkt_to_tuple		= ipv6_pkt_to_tuple,
 	.invert_tuple		= ipv6_invert_tuple,
 	.print_tuple		= ipv6_print_tuple,
-	.print_conntrack	= ipv6_print_conntrack,
 	.get_l4proto		= ipv6_get_l4proto,
 #if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE)
 	.tuple_to_nlattr	= ipv6_tuple_to_nlattr,
diff --git a/net/netfilter/nf_conntrack_l3proto_generic.c b/net/netfilter/nf_conntrack_l3proto_generic.c
index 991c52c9a28..8e914e5ffea 100644
--- a/net/netfilter/nf_conntrack_l3proto_generic.c
+++ b/net/netfilter/nf_conntrack_l3proto_generic.c
@@ -55,12 +55,6 @@ static int generic_print_tuple(struct seq_file *s,
 	return 0;
 }
 
-static int generic_print_conntrack(struct seq_file *s,
-				const struct nf_conn *conntrack)
-{
-	return 0;
-}
-
 static int generic_get_l4proto(const struct sk_buff *skb, unsigned int nhoff,
 			       unsigned int *dataoff, u_int8_t *protonum)
 {
@@ -75,7 +69,6 @@ struct nf_conntrack_l3proto nf_conntrack_l3proto_generic __read_mostly = {
 	.pkt_to_tuple	 = generic_pkt_to_tuple,
 	.invert_tuple	 = generic_invert_tuple,
 	.print_tuple	 = generic_print_tuple,
-	.print_conntrack = generic_print_conntrack,
 	.get_l4proto	 = generic_get_l4proto,
 };
 EXPORT_SYMBOL_GPL(nf_conntrack_l3proto_generic);
diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c
index 2ad49331302..d54ca78502c 100644
--- a/net/netfilter/nf_conntrack_standalone.c
+++ b/net/netfilter/nf_conntrack_standalone.c
@@ -142,9 +142,6 @@ static int ct_seq_show(struct seq_file *s, void *v)
 		       ? (long)(conntrack->timeout.expires - jiffies)/HZ : 0) != 0)
 		return -ENOSPC;
 
-	if (l3proto->print_conntrack(s, conntrack))
-		return -ENOSPC;
-
 	if (l4proto->print_conntrack(s, conntrack))
 		return -ENOSPC;
 
-- 
cgit v1.2.3


From c71e9167081a6a0d2a076cda674b696b89bb31c2 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Mon, 14 Jan 2008 23:49:37 -0800
Subject: [NETFILTER]: nf_conntrack: make print_conntrack function optional for
 l4protos

Allows to remove five empty implementations.

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c | 2 +-
 net/ipv4/netfilter/nf_conntrack_proto_icmp.c          | 8 --------
 net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c        | 8 --------
 net/netfilter/nf_conntrack_proto_generic.c            | 8 --------
 net/netfilter/nf_conntrack_proto_udp.c                | 9 ---------
 net/netfilter/nf_conntrack_proto_udplite.c            | 9 ---------
 net/netfilter/nf_conntrack_standalone.c               | 2 +-
 7 files changed, 2 insertions(+), 44 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c
index acde9952cad..543c02b74c9 100644
--- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c
+++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c
@@ -121,7 +121,7 @@ static int ct_seq_show(struct seq_file *s, void *v)
 		      ? (long)(ct->timeout.expires - jiffies)/HZ : 0) != 0)
 		return -ENOSPC;
 
-	if (l4proto->print_conntrack(s, ct))
+	if (l4proto->print_conntrack && l4proto->print_conntrack(s, ct))
 		return -ENOSPC;
 
 	if (print_tuple(s, &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple,
diff --git a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c
index cd0d6690627..4004a04c551 100644
--- a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c
+++ b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c
@@ -74,13 +74,6 @@ static int icmp_print_tuple(struct seq_file *s,
 			  ntohs(tuple->src.u.icmp.id));
 }
 
-/* Print out the private part of the conntrack. */
-static int icmp_print_conntrack(struct seq_file *s,
-				const struct nf_conn *conntrack)
-{
-	return 0;
-}
-
 /* Returns verdict for packet, or -1 for invalid. */
 static int icmp_packet(struct nf_conn *ct,
 		       const struct sk_buff *skb,
@@ -309,7 +302,6 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_icmp __read_mostly =
 	.pkt_to_tuple		= icmp_pkt_to_tuple,
 	.invert_tuple		= icmp_invert_tuple,
 	.print_tuple		= icmp_print_tuple,
-	.print_conntrack	= icmp_print_conntrack,
 	.packet			= icmp_packet,
 	.new			= icmp_new,
 	.error			= icmp_error,
diff --git a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c
index 02d60dfbab8..da924c6b5f0 100644
--- a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c
+++ b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c
@@ -75,13 +75,6 @@ static int icmpv6_print_tuple(struct seq_file *s,
 			  ntohs(tuple->src.u.icmp.id));
 }
 
-/* Print out the private part of the conntrack. */
-static int icmpv6_print_conntrack(struct seq_file *s,
-				  const struct nf_conn *conntrack)
-{
-	return 0;
-}
-
 /* Returns verdict for packet, or -1 for invalid. */
 static int icmpv6_packet(struct nf_conn *ct,
 		       const struct sk_buff *skb,
@@ -275,7 +268,6 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_icmpv6 __read_mostly =
 	.pkt_to_tuple		= icmpv6_pkt_to_tuple,
 	.invert_tuple		= icmpv6_invert_tuple,
 	.print_tuple		= icmpv6_print_tuple,
-	.print_conntrack	= icmpv6_print_conntrack,
 	.packet			= icmpv6_packet,
 	.new			= icmpv6_new,
 	.error			= icmpv6_error,
diff --git a/net/netfilter/nf_conntrack_proto_generic.c b/net/netfilter/nf_conntrack_proto_generic.c
index 13f81917964..22c5dcb6306 100644
--- a/net/netfilter/nf_conntrack_proto_generic.c
+++ b/net/netfilter/nf_conntrack_proto_generic.c
@@ -40,13 +40,6 @@ static int generic_print_tuple(struct seq_file *s,
 	return 0;
 }
 
-/* Print out the private part of the conntrack. */
-static int generic_print_conntrack(struct seq_file *s,
-				   const struct nf_conn *state)
-{
-	return 0;
-}
-
 /* Returns verdict for packet, or -1 for invalid. */
 static int packet(struct nf_conn *conntrack,
 		  const struct sk_buff *skb,
@@ -104,7 +97,6 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_generic __read_mostly =
 	.pkt_to_tuple		= generic_pkt_to_tuple,
 	.invert_tuple		= generic_invert_tuple,
 	.print_tuple		= generic_print_tuple,
-	.print_conntrack	= generic_print_conntrack,
 	.packet			= packet,
 	.new			= new,
 #ifdef CONFIG_SYSCTL
diff --git a/net/netfilter/nf_conntrack_proto_udp.c b/net/netfilter/nf_conntrack_proto_udp.c
index 7ac60731956..38487541108 100644
--- a/net/netfilter/nf_conntrack_proto_udp.c
+++ b/net/netfilter/nf_conntrack_proto_udp.c
@@ -60,13 +60,6 @@ static int udp_print_tuple(struct seq_file *s,
 			  ntohs(tuple->dst.u.udp.port));
 }
 
-/* Print out the private part of the conntrack. */
-static int udp_print_conntrack(struct seq_file *s,
-			       const struct nf_conn *conntrack)
-{
-	return 0;
-}
-
 /* Returns verdict for packet, and may modify conntracktype */
 static int udp_packet(struct nf_conn *conntrack,
 		      const struct sk_buff *skb,
@@ -193,7 +186,6 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_udp4 __read_mostly =
 	.pkt_to_tuple		= udp_pkt_to_tuple,
 	.invert_tuple		= udp_invert_tuple,
 	.print_tuple		= udp_print_tuple,
-	.print_conntrack	= udp_print_conntrack,
 	.packet			= udp_packet,
 	.new			= udp_new,
 	.error			= udp_error,
@@ -221,7 +213,6 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_udp6 __read_mostly =
 	.pkt_to_tuple		= udp_pkt_to_tuple,
 	.invert_tuple		= udp_invert_tuple,
 	.print_tuple		= udp_print_tuple,
-	.print_conntrack	= udp_print_conntrack,
 	.packet			= udp_packet,
 	.new			= udp_new,
 	.error			= udp_error,
diff --git a/net/netfilter/nf_conntrack_proto_udplite.c b/net/netfilter/nf_conntrack_proto_udplite.c
index 6518bcd17d6..070056d9bcd 100644
--- a/net/netfilter/nf_conntrack_proto_udplite.c
+++ b/net/netfilter/nf_conntrack_proto_udplite.c
@@ -59,13 +59,6 @@ static int udplite_print_tuple(struct seq_file *s,
 			  ntohs(tuple->dst.u.udp.port));
 }
 
-/* Print out the private part of the conntrack. */
-static int udplite_print_conntrack(struct seq_file *s,
-				   const struct nf_conn *conntrack)
-{
-	return 0;
-}
-
 /* Returns verdict for packet, and may modify conntracktype */
 static int udplite_packet(struct nf_conn *conntrack,
 			  const struct sk_buff *skb,
@@ -198,7 +191,6 @@ static struct nf_conntrack_l4proto nf_conntrack_l4proto_udplite4 __read_mostly =
 	.pkt_to_tuple		= udplite_pkt_to_tuple,
 	.invert_tuple		= udplite_invert_tuple,
 	.print_tuple		= udplite_print_tuple,
-	.print_conntrack	= udplite_print_conntrack,
 	.packet			= udplite_packet,
 	.new			= udplite_new,
 	.error			= udplite_error,
@@ -222,7 +214,6 @@ static struct nf_conntrack_l4proto nf_conntrack_l4proto_udplite6 __read_mostly =
 	.pkt_to_tuple		= udplite_pkt_to_tuple,
 	.invert_tuple		= udplite_invert_tuple,
 	.print_tuple		= udplite_print_tuple,
-	.print_conntrack	= udplite_print_conntrack,
 	.packet			= udplite_packet,
 	.new			= udplite_new,
 	.error			= udplite_error,
diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c
index d54ca78502c..696074a037c 100644
--- a/net/netfilter/nf_conntrack_standalone.c
+++ b/net/netfilter/nf_conntrack_standalone.c
@@ -142,7 +142,7 @@ static int ct_seq_show(struct seq_file *s, void *v)
 		       ? (long)(conntrack->timeout.expires - jiffies)/HZ : 0) != 0)
 		return -ENOSPC;
 
-	if (l4proto->print_conntrack(s, conntrack))
+	if (l4proto->print_conntrack && l4proto->print_conntrack(s, conntrack))
 		return -ENOSPC;
 
 	if (print_tuple(s, &conntrack->tuplehash[IP_CT_DIR_ORIGINAL].tuple,
-- 
cgit v1.2.3


From ca629f2472762088b105cd6081bf9aaa56d4547d Mon Sep 17 00:00:00 2001
From: Eric Dumazet <dada1@cosmosbay.com>
Date: Tue, 15 Jan 2008 03:28:43 -0800
Subject: [APPLETALK]: Annotations to clear sparse warnings

  CHECK   net/appletalk/aarp.c
net/appletalk/aarp.c:951:14: warning: context imbalance in 'aarp_seq_start' - wrong count at exit
net/appletalk/aarp.c:977:13: warning: context imbalance in 'aarp_seq_stop' - unexpected unlock
  CHECK   net/appletalk/atalk_proc.c
net/appletalk/atalk_proc.c:34:11: warning: context imbalance in 'atalk_seq_interface_start' - wrong count at exit
net/appletalk/atalk_proc.c:54:13: warning: context imbalance in 'atalk_seq_interface_stop' - unexpected unlock
net/appletalk/atalk_proc.c:93:11: warning: context imbalance in 'atalk_seq_route_start' - wrong count at exit
net/appletalk/atalk_proc.c:113:13: warning: context imbalance in 'atalk_seq_route_stop' - unexpected unlock
net/appletalk/atalk_proc.c:161:11: warning: context imbalance in 'atalk_seq_socket_start' - wrong count at exit
net/appletalk/atalk_proc.c:178:13: warning: context imbalance in 'atalk_seq_socket_stop' - unexpected unlock

Signed-off-by: Eric Dumazet <dada1@cosmosbay.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/appletalk/aarp.c       | 2 ++
 net/appletalk/atalk_proc.c | 6 ++++++
 2 files changed, 8 insertions(+)

(limited to 'net')

diff --git a/net/appletalk/aarp.c b/net/appletalk/aarp.c
index b950fb6bd2b..18058bbc796 100644
--- a/net/appletalk/aarp.c
+++ b/net/appletalk/aarp.c
@@ -941,6 +941,7 @@ static struct aarp_entry *iter_next(struct aarp_iter_state *iter, loff_t *pos)
 }
 
 static void *aarp_seq_start(struct seq_file *seq, loff_t *pos)
+	__acquires(aarp_lock)
 {
 	struct aarp_iter_state *iter = seq->private;
 
@@ -975,6 +976,7 @@ static void *aarp_seq_next(struct seq_file *seq, void *v, loff_t *pos)
 }
 
 static void aarp_seq_stop(struct seq_file *seq, void *v)
+	__releases(aarp_lock)
 {
 	read_unlock_bh(&aarp_lock);
 }
diff --git a/net/appletalk/atalk_proc.c b/net/appletalk/atalk_proc.c
index 05d9652afcb..8e8dcfd532d 100644
--- a/net/appletalk/atalk_proc.c
+++ b/net/appletalk/atalk_proc.c
@@ -27,6 +27,7 @@ static __inline__ struct atalk_iface *atalk_get_interface_idx(loff_t pos)
 }
 
 static void *atalk_seq_interface_start(struct seq_file *seq, loff_t *pos)
+	__acquires(atalk_interfaces_lock)
 {
 	loff_t l = *pos;
 
@@ -52,6 +53,7 @@ out:
 }
 
 static void atalk_seq_interface_stop(struct seq_file *seq, void *v)
+	__releases(atalk_interfaces_lock)
 {
 	read_unlock_bh(&atalk_interfaces_lock);
 }
@@ -86,6 +88,7 @@ static __inline__ struct atalk_route *atalk_get_route_idx(loff_t pos)
 }
 
 static void *atalk_seq_route_start(struct seq_file *seq, loff_t *pos)
+	__acquires(atalk_routes_lock)
 {
 	loff_t l = *pos;
 
@@ -111,6 +114,7 @@ out:
 }
 
 static void atalk_seq_route_stop(struct seq_file *seq, void *v)
+	__releases(atalk_routes_lock)
 {
 	read_unlock_bh(&atalk_routes_lock);
 }
@@ -154,6 +158,7 @@ found:
 }
 
 static void *atalk_seq_socket_start(struct seq_file *seq, loff_t *pos)
+	__acquires(atalk_sockets_lock)
 {
 	loff_t l = *pos;
 
@@ -176,6 +181,7 @@ out:
 }
 
 static void atalk_seq_socket_stop(struct seq_file *seq, void *v)
+	__releases(atalk_sockets_lock)
 {
 	read_unlock_bh(&atalk_sockets_lock);
 }
-- 
cgit v1.2.3


From 5c17d5f11212af5f12b91991b1132cf301dd1f28 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <dada1@cosmosbay.com>
Date: Tue, 15 Jan 2008 03:29:50 -0800
Subject: [ATM]: Suppress some sparse warnings

  CHECK   net/atm/br2684.c
net/atm/br2684.c:665:13: warning: context imbalance in 'br2684_seq_start' - wrong count at exit
net/atm/br2684.c:676:13: warning: context imbalance in 'br2684_seq_stop' - unexpected unlock
  CHECK   net/atm/lec.c
net/atm/lec.c:196:23: warning: expensive signed divide
  CHECK   net/atm/proc.c
net/atm/proc.c:151:14: warning: context imbalance in 'vcc_seq_start' - wrong count at exit
net/atm/proc.c:154:13: warning: context imbalance in 'vcc_seq_stop' - unexpected unlock

Signed-off-by: Eric Dumazet <dada1@cosmosbay.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/atm/br2684.c | 2 ++
 net/atm/lec.c    | 2 +-
 net/atm/proc.c   | 2 ++
 3 files changed, 5 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/atm/br2684.c b/net/atm/br2684.c
index 5c8a0dcb104..574d9a96417 100644
--- a/net/atm/br2684.c
+++ b/net/atm/br2684.c
@@ -663,6 +663,7 @@ static struct atm_ioctl br2684_ioctl_ops = {
 
 #ifdef CONFIG_PROC_FS
 static void *br2684_seq_start(struct seq_file *seq, loff_t * pos)
+	__acquires(devs_lock)
 {
 	read_lock(&devs_lock);
 	return seq_list_start(&br2684_devs, *pos);
@@ -674,6 +675,7 @@ static void *br2684_seq_next(struct seq_file *seq, void *v, loff_t * pos)
 }
 
 static void br2684_seq_stop(struct seq_file *seq, void *v)
+	__releases(devs_lock)
 {
 	read_unlock(&devs_lock);
 }
diff --git a/net/atm/lec.c b/net/atm/lec.c
index 0a9c4261968..1a8c4c6c0cd 100644
--- a/net/atm/lec.c
+++ b/net/atm/lec.c
@@ -176,7 +176,7 @@ static void lec_handle_bridge(struct sk_buff *skb, struct net_device *dev)
 static unsigned char *get_tr_dst(unsigned char *packet, unsigned char *rdesc)
 {
 	struct trh_hdr *trh;
-	int riflen, num_rdsc;
+	unsigned int riflen, num_rdsc;
 
 	trh = (struct trh_hdr *)packet;
 	if (trh->daddr[0] & (uint8_t) 0x80)
diff --git a/net/atm/proc.c b/net/atm/proc.c
index 565e75e62ca..49125110bb8 100644
--- a/net/atm/proc.c
+++ b/net/atm/proc.c
@@ -142,6 +142,7 @@ static int vcc_seq_release(struct inode *inode, struct file *file)
 }
 
 static void *vcc_seq_start(struct seq_file *seq, loff_t *pos)
+	__acquires(vcc_sklist_lock)
 {
 	struct vcc_state *state = seq->private;
 	loff_t left = *pos;
@@ -152,6 +153,7 @@ static void *vcc_seq_start(struct seq_file *seq, loff_t *pos)
 }
 
 static void vcc_seq_stop(struct seq_file *seq, void *v)
+	__releases(vcc_sklist_lock)
 {
 	read_unlock(&vcc_sklist_lock);
 }
-- 
cgit v1.2.3


From 95b7d924a589dbefc7ae2ea6c7144b86b75d6a47 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <dada1@cosmosbay.com>
Date: Tue, 15 Jan 2008 03:30:35 -0800
Subject: [ROSE]: Supress sparse warnings

  CHECK   net/rose/af_rose.c
net/rose/af_rose.c:125:11: warning: expensive signed divide
net/rose/af_rose.c:976:46: warning: expensive signed divide
net/rose/af_rose.c:1379:13: warning: context imbalance in 'rose_info_start' - wrong count at exit
net/rose/af_rose.c:1406:13: warning: context imbalance in 'rose_info_stop' - unexpected unlock
  CHECK   net/rose/rose_in.c
net/rose/rose_in.c:185:25: warning: expensive signed divide
  CHECK   net/rose/rose_route.c
net/rose/rose_route.c:997:46: warning: expensive signed divide
net/rose/rose_route.c:1070:13: warning: context imbalance in 'rose_node_start' - wrong count at exit
net/rose/rose_route.c:1093:13: warning: context imbalance in 'rose_node_stop' - unexpected unlock
net/rose/rose_route.c:1146:13: warning: context imbalance in 'rose_neigh_start' - wrong count at exit
net/rose/rose_route.c:1169:13: warning: context imbalance in 'rose_neigh_stop' - unexpected unlock
net/rose/rose_route.c:1229:13: warning: context imbalance in 'rose_route_start' - wrong count at exit
net/rose/rose_route.c:1252:13: warning: context imbalance in 'rose_route_stop' - unexpected unlock

Signed-off-by: Eric Dumazet <dada1@cosmosbay.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/rose/af_rose.c    |  8 +++++---
 net/rose/rose_in.c    |  2 +-
 net/rose/rose_route.c | 10 ++++++++--
 3 files changed, 14 insertions(+), 6 deletions(-)

(limited to 'net')

diff --git a/net/rose/af_rose.c b/net/rose/af_rose.c
index 323d42a7b36..4a31a81059a 100644
--- a/net/rose/af_rose.c
+++ b/net/rose/af_rose.c
@@ -116,7 +116,7 @@ int rosecmp(rose_address *addr1, rose_address *addr2)
  */
 int rosecmpm(rose_address *addr1, rose_address *addr2, unsigned short mask)
 {
-	int i, j;
+	unsigned int i, j;
 
 	if (mask > 10)
 		return 1;
@@ -973,8 +973,8 @@ int rose_rx_call_request(struct sk_buff *skb, struct net_device *dev, struct ros
 	 */
 	memset(&facilities, 0x00, sizeof(struct rose_facilities_struct));
 
-	len  = (((skb->data[3] >> 4) & 0x0F) + 1) / 2;
-	len += (((skb->data[3] >> 0) & 0x0F) + 1) / 2;
+	len  = (((skb->data[3] >> 4) & 0x0F) + 1) >> 1;
+	len += (((skb->data[3] >> 0) & 0x0F) + 1) >> 1;
 	if (!rose_parse_facilities(skb->data + len + 4, &facilities)) {
 		rose_transmit_clear_request(neigh, lci, ROSE_INVALID_FACILITY, 76);
 		return 0;
@@ -1377,6 +1377,7 @@ static int rose_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
 
 #ifdef CONFIG_PROC_FS
 static void *rose_info_start(struct seq_file *seq, loff_t *pos)
+	__acquires(rose_list_lock)
 {
 	int i;
 	struct sock *s;
@@ -1404,6 +1405,7 @@ static void *rose_info_next(struct seq_file *seq, void *v, loff_t *pos)
 }
 
 static void rose_info_stop(struct seq_file *seq, void *v)
+	__releases(rose_list_lock)
 {
 	spin_unlock_bh(&rose_list_lock);
 }
diff --git a/net/rose/rose_in.c b/net/rose/rose_in.c
index 4ee0879d354..7f7fcb46b4f 100644
--- a/net/rose/rose_in.c
+++ b/net/rose/rose_in.c
@@ -182,7 +182,7 @@ static int rose_state3_machine(struct sock *sk, struct sk_buff *skb, int framety
 				break;
 			}
 			if (atomic_read(&sk->sk_rmem_alloc) >
-			    (sk->sk_rcvbuf / 2))
+			    (sk->sk_rcvbuf >> 1))
 				rose->condition |= ROSE_COND_OWN_RX_BUSY;
 		}
 		/*
diff --git a/net/rose/rose_route.c b/net/rose/rose_route.c
index 540c0f26ffe..fb9359fb235 100644
--- a/net/rose/rose_route.c
+++ b/net/rose/rose_route.c
@@ -994,8 +994,8 @@ int rose_route_frame(struct sk_buff *skb, ax25_cb *ax25)
 		goto out;
 	}
 
-	len  = (((skb->data[3] >> 4) & 0x0F) + 1) / 2;
-	len += (((skb->data[3] >> 0) & 0x0F) + 1) / 2;
+	len  = (((skb->data[3] >> 4) & 0x0F) + 1) >> 1;
+	len += (((skb->data[3] >> 0) & 0x0F) + 1) >> 1;
 
 	memset(&facilities, 0x00, sizeof(struct rose_facilities_struct));
 
@@ -1068,6 +1068,7 @@ out:
 #ifdef CONFIG_PROC_FS
 
 static void *rose_node_start(struct seq_file *seq, loff_t *pos)
+	__acquires(rose_neigh_list_lock)
 {
 	struct rose_node *rose_node;
 	int i = 1;
@@ -1091,6 +1092,7 @@ static void *rose_node_next(struct seq_file *seq, void *v, loff_t *pos)
 }
 
 static void rose_node_stop(struct seq_file *seq, void *v)
+	__releases(rose_neigh_list_lock)
 {
 	spin_unlock_bh(&rose_neigh_list_lock);
 }
@@ -1144,6 +1146,7 @@ const struct file_operations rose_nodes_fops = {
 };
 
 static void *rose_neigh_start(struct seq_file *seq, loff_t *pos)
+	__acquires(rose_neigh_list_lock)
 {
 	struct rose_neigh *rose_neigh;
 	int i = 1;
@@ -1167,6 +1170,7 @@ static void *rose_neigh_next(struct seq_file *seq, void *v, loff_t *pos)
 }
 
 static void rose_neigh_stop(struct seq_file *seq, void *v)
+	__releases(rose_neigh_list_lock)
 {
 	spin_unlock_bh(&rose_neigh_list_lock);
 }
@@ -1227,6 +1231,7 @@ const struct file_operations rose_neigh_fops = {
 
 
 static void *rose_route_start(struct seq_file *seq, loff_t *pos)
+	__acquires(rose_route_list_lock)
 {
 	struct rose_route *rose_route;
 	int i = 1;
@@ -1250,6 +1255,7 @@ static void *rose_route_next(struct seq_file *seq, void *v, loff_t *pos)
 }
 
 static void rose_route_stop(struct seq_file *seq, void *v)
+	__releases(rose_route_list_lock)
 {
 	spin_unlock_bh(&rose_route_list_lock);
 }
-- 
cgit v1.2.3


From b59cfbf77dc8368c2c90b012c79553613f4d70c3 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <dada1@cosmosbay.com>
Date: Fri, 18 Jan 2008 03:31:36 -0800
Subject: [FIB]: Fix rcu_dereference() abuses in fib_trie.c

node_parent() and tnode_get_child() currently use rcu_dereference().

These functions are called from both
- readers only paths (where rcu_dereference() is needed), and
- writer path (where rcu_dereference() is not needed)

To make explicit where rcu_dereference() is really needed, I
introduced new node_parent_rcu() and tnode_get_child_rcu() functions
which use rcu_dereference(), while node_parent() and tnode_get_child()
dont use it.

Then I changed calling sites where rcu_dereference() was really needed
to call the _rcu() variants.

This should have no impact but for alpha architecture, and may help
future sparse checks.

Signed-off-by: Eric Dumazet <dada1@cosmosbay.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/fib_trie.c | 33 +++++++++++++++++++++------------
 1 file changed, 21 insertions(+), 12 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index fbc80d15827..a52334d30cf 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -165,9 +165,13 @@ static struct kmem_cache *fn_alias_kmem __read_mostly;
 
 static inline struct tnode *node_parent(struct node *node)
 {
-	struct tnode *ret;
+	return (struct tnode *)(node->parent & ~NODE_TYPE_MASK);
+}
+
+static inline struct tnode *node_parent_rcu(struct node *node)
+{
+	struct tnode *ret = node_parent(node);
 
-	ret = (struct tnode *)(node->parent & ~NODE_TYPE_MASK);
 	return rcu_dereference(ret);
 }
 
@@ -177,13 +181,18 @@ static inline void node_set_parent(struct node *node, struct tnode *ptr)
 			   (unsigned long)ptr | NODE_TYPE(node));
 }
 
-/* rcu_read_lock needs to be hold by caller from readside */
+static inline struct node *tnode_get_child(struct tnode *tn, unsigned int i)
+{
+	BUG_ON(i >= 1U << tn->bits);
 
-static inline struct node *tnode_get_child(struct tnode *tn, int i)
+	return tn->child[i];
+}
+
+static inline struct node *tnode_get_child_rcu(struct tnode *tn, unsigned int i)
 {
-	BUG_ON(i >= 1 << tn->bits);
+	struct node *ret = tnode_get_child(tn, i);
 
-	return rcu_dereference(tn->child[i]);
+	return rcu_dereference(ret);
 }
 
 static inline int tnode_child_length(const struct tnode *tn)
@@ -938,7 +947,7 @@ fib_find_node(struct trie *t, u32 key)
 
 		if (tkey_sub_equals(tn->key, pos, tn->pos-pos, key)) {
 			pos = tn->pos + tn->bits;
-			n = tnode_get_child(tn, tkey_extract_bits(key, tn->pos, tn->bits));
+			n = tnode_get_child_rcu(tn, tkey_extract_bits(key, tn->pos, tn->bits));
 		} else
 			break;
 	}
@@ -1688,7 +1697,7 @@ static struct leaf *nextleaf(struct trie *t, struct leaf *thisleaf)
 
 		p = (struct tnode*) trie;  /* Start */
 	} else
-		p = node_parent(c);
+		p = node_parent_rcu(c);
 
 	while (p) {
 		int pos, last;
@@ -1725,7 +1734,7 @@ static struct leaf *nextleaf(struct trie *t, struct leaf *thisleaf)
 up:
 		/* No more children go up one step  */
 		c = (struct node *) p;
-		p = node_parent(c);
+		p = node_parent_rcu(c);
 	}
 	return NULL; /* Ready. Root of trie */
 }
@@ -1987,7 +1996,7 @@ static struct node *fib_trie_get_next(struct fib_trie_iter *iter)
 		 iter->tnode, iter->index, iter->depth);
 rescan:
 	while (cindex < (1<<tn->bits)) {
-		struct node *n = tnode_get_child(tn, cindex);
+		struct node *n = tnode_get_child_rcu(tn, cindex);
 
 		if (n) {
 			if (IS_LEAF(n)) {
@@ -2006,7 +2015,7 @@ rescan:
 	}
 
 	/* Current node exhausted, pop back up */
-	p = node_parent((struct node *)tn);
+	p = node_parent_rcu((struct node *)tn);
 	if (p) {
 		cindex = tkey_extract_bits(tn->key, p->pos, p->bits)+1;
 		tn = p;
@@ -2315,7 +2324,7 @@ static int fib_trie_seq_show(struct seq_file *seq, void *v)
 	if (v == SEQ_START_TOKEN)
 		return 0;
 
-	if (!node_parent(n)) {
+	if (!node_parent_rcu(n)) {
 		if (iter->trie == iter->trie_local)
 			seq_puts(seq, "<local>:\n");
 		else
-- 
cgit v1.2.3


From a6501e080c318f8d4467679d17807f42b3a33cd5 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <dada1@cosmosbay.com>
Date: Fri, 18 Jan 2008 03:33:26 -0800
Subject: [IPV4] FIB_HASH: Reduce memory needs and speedup lookups

Currently, sizeof(struct fib_alias) is 24 or 48 bytes on 32/64 bits
arches.

Because of SLAB_HWCACHE_ALIGN requirement, these are rounded to 32 and
64 bytes respectively.

This patch moves rcu to the end of fib_alias, and conditionally
defines it only for CONFIG_IP_FIB_TRIE.

We also remove SLAB_HWCACHE_ALIGN requirement for fib_alias and
fib_node objects because it is not necessary.

(BTW SLUB currently denies it for objects smaller than
cache_line_size() / 2, but not SLAB)

Finally, sizeof(fib_alias) go back to 16 and 32 bytes.

Then, we can embed one fib_alias on each fib_node, to favor locality.
Most of the time access to the fib_alias will be free because one
cache line contains both the list head (fn_alias) and (one of) the
list element.

Signed-off-by: Eric Dumazet <dada1@cosmosbay.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/fib_hash.c   | 33 ++++++++++++++++++++-------------
 net/ipv4/fib_lookup.h |  4 +++-
 2 files changed, 23 insertions(+), 14 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/fib_hash.c b/net/ipv4/fib_hash.c
index 499522f3b30..a15b2f1b272 100644
--- a/net/ipv4/fib_hash.c
+++ b/net/ipv4/fib_hash.c
@@ -52,6 +52,7 @@ struct fib_node {
 	struct hlist_node	fn_hash;
 	struct list_head	fn_alias;
 	__be32			fn_key;
+	struct fib_alias        fn_embedded_alias;
 };
 
 struct fn_zone {
@@ -193,10 +194,13 @@ static inline void fn_free_node(struct fib_node * f)
 	kmem_cache_free(fn_hash_kmem, f);
 }
 
-static inline void fn_free_alias(struct fib_alias *fa)
+static inline void fn_free_alias(struct fib_alias *fa, struct fib_node *f)
 {
 	fib_release_info(fa->fa_info);
-	kmem_cache_free(fn_alias_kmem, fa);
+	if (fa == &f->fn_embedded_alias)
+		fa->fa_info = NULL;
+	else
+		kmem_cache_free(fn_alias_kmem, fa);
 }
 
 static struct fn_zone *
@@ -476,15 +480,12 @@ static int fn_hash_insert(struct fib_table *tb, struct fib_config *cfg)
 		goto out;
 
 	err = -ENOBUFS;
-	new_fa = kmem_cache_alloc(fn_alias_kmem, GFP_KERNEL);
-	if (new_fa == NULL)
-		goto out;
 
 	new_f = NULL;
 	if (!f) {
-		new_f = kmem_cache_alloc(fn_hash_kmem, GFP_KERNEL);
+		new_f = kmem_cache_zalloc(fn_hash_kmem, GFP_KERNEL);
 		if (new_f == NULL)
-			goto out_free_new_fa;
+			goto out;
 
 		INIT_HLIST_NODE(&new_f->fn_hash);
 		INIT_LIST_HEAD(&new_f->fn_alias);
@@ -492,6 +493,12 @@ static int fn_hash_insert(struct fib_table *tb, struct fib_config *cfg)
 		f = new_f;
 	}
 
+	new_fa = &f->fn_embedded_alias;
+	if (new_fa->fa_info != NULL) {
+		new_fa = kmem_cache_alloc(fn_alias_kmem, GFP_KERNEL);
+		if (new_fa == NULL)
+			goto out_free_new_f;
+	}
 	new_fa->fa_info = fi;
 	new_fa->fa_tos = tos;
 	new_fa->fa_type = cfg->fc_type;
@@ -518,8 +525,8 @@ static int fn_hash_insert(struct fib_table *tb, struct fib_config *cfg)
 		  &cfg->fc_nlinfo, 0);
 	return 0;
 
-out_free_new_fa:
-	kmem_cache_free(fn_alias_kmem, new_fa);
+out_free_new_f:
+	kmem_cache_free(fn_hash_kmem, new_f);
 out:
 	fib_release_info(fi);
 	return err;
@@ -595,7 +602,7 @@ static int fn_hash_delete(struct fib_table *tb, struct fib_config *cfg)
 
 		if (fa->fa_state & FA_S_ACCESSED)
 			rt_cache_flush(-1);
-		fn_free_alias(fa);
+		fn_free_alias(fa, f);
 		if (kill_fn) {
 			fn_free_node(f);
 			fz->fz_nent--;
@@ -631,7 +638,7 @@ static int fn_flush_list(struct fn_zone *fz, int idx)
 				fib_hash_genid++;
 				write_unlock_bh(&fib_hash_lock);
 
-				fn_free_alias(fa);
+				fn_free_alias(fa, f);
 				found++;
 			}
 		}
@@ -750,10 +757,10 @@ static int fn_hash_dump(struct fib_table *tb, struct sk_buff *skb, struct netlin
 void __init fib_hash_init(void)
 {
 	fn_hash_kmem = kmem_cache_create("ip_fib_hash", sizeof(struct fib_node),
-					 0, SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL);
+					 0, SLAB_PANIC, NULL);
 
 	fn_alias_kmem = kmem_cache_create("ip_fib_alias", sizeof(struct fib_alias),
-					  0, SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL);
+					  0, SLAB_PANIC, NULL);
 
 }
 
diff --git a/net/ipv4/fib_lookup.h b/net/ipv4/fib_lookup.h
index 26ee66d78c1..2c1623d2768 100644
--- a/net/ipv4/fib_lookup.h
+++ b/net/ipv4/fib_lookup.h
@@ -7,12 +7,14 @@
 
 struct fib_alias {
 	struct list_head	fa_list;
-	struct rcu_head rcu;
 	struct fib_info		*fa_info;
 	u8			fa_tos;
 	u8			fa_type;
 	u8			fa_scope;
 	u8			fa_state;
+#ifdef CONFIG_IP_FIB_TRIE
+	struct rcu_head		rcu;
+#endif
 };
 
 #define FA_S_ACCESSED	0x01
-- 
cgit v1.2.3


From 569d36452ee26c08523cc9f658901c5188640853 Mon Sep 17 00:00:00 2001
From: Daniel Lezcano <dlezcano@fr.ibm.com>
Date: Fri, 18 Jan 2008 03:56:57 -0800
Subject: [NETNS][DST] dst: pass the dst_ops as parameter to the gc functions

The garbage collection function receive the dst_ops structure as
parameter. This is useful for the next incoming patchset because it
will need the dst_ops (there will be several instances) and the
network namespace pointer (contained in the dst_ops).

The protocols which do not take care of the namespaces will not be
impacted by this change (expect for the function signature), they do
just ignore the parameter.

Signed-off-by: Daniel Lezcano <dlezcano@fr.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/dst.c          | 2 +-
 net/decnet/dn_route.c   | 4 ++--
 net/ipv4/route.c        | 6 +++---
 net/ipv4/xfrm4_policy.c | 2 +-
 net/ipv6/route.c        | 4 ++--
 net/ipv6/xfrm6_policy.c | 2 +-
 6 files changed, 10 insertions(+), 10 deletions(-)

(limited to 'net')

diff --git a/net/core/dst.c b/net/core/dst.c
index 7eceebaabaa..7deef483c79 100644
--- a/net/core/dst.c
+++ b/net/core/dst.c
@@ -165,7 +165,7 @@ void * dst_alloc(struct dst_ops * ops)
 	struct dst_entry * dst;
 
 	if (ops->gc && atomic_read(&ops->entries) > ops->gc_thresh) {
-		if (ops->gc())
+		if (ops->gc(ops))
 			return NULL;
 	}
 	dst = kmem_cache_zalloc(ops->kmem_cachep, GFP_ATOMIC);
diff --git a/net/decnet/dn_route.c b/net/decnet/dn_route.c
index 938ba7da217..31be29b8b5a 100644
--- a/net/decnet/dn_route.c
+++ b/net/decnet/dn_route.c
@@ -107,7 +107,7 @@ static const int dn_rt_mtu_expires = 10 * 60 * HZ;
 
 static unsigned long dn_rt_deadline;
 
-static int dn_dst_gc(void);
+static int dn_dst_gc(struct dst_ops *ops);
 static struct dst_entry *dn_dst_check(struct dst_entry *, __u32);
 static struct dst_entry *dn_dst_negative_advice(struct dst_entry *);
 static void dn_dst_link_failure(struct sk_buff *);
@@ -185,7 +185,7 @@ static void dn_dst_check_expire(unsigned long dummy)
 	mod_timer(&dn_route_timer, now + decnet_dst_gc_interval * HZ);
 }
 
-static int dn_dst_gc(void)
+static int dn_dst_gc(struct dst_ops *ops)
 {
 	struct dn_route *rt, **rtp;
 	int i;
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 1e59c0d4b11..fc0145385e8 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -154,7 +154,7 @@ static void		 ipv4_dst_ifdown(struct dst_entry *dst,
 static struct dst_entry *ipv4_negative_advice(struct dst_entry *dst);
 static void		 ipv4_link_failure(struct sk_buff *skb);
 static void		 ip_rt_update_pmtu(struct dst_entry *dst, u32 mtu);
-static int rt_garbage_collect(void);
+static int rt_garbage_collect(struct dst_ops *ops);
 
 
 static struct dst_ops ipv4_dst_ops = {
@@ -820,7 +820,7 @@ static void rt_secret_rebuild(unsigned long dummy)
    and when load increases it reduces to limit cache size.
  */
 
-static int rt_garbage_collect(void)
+static int rt_garbage_collect(struct dst_ops *ops)
 {
 	static unsigned long expire = RT_GC_TIMEOUT;
 	static unsigned long last_gc;
@@ -1035,7 +1035,7 @@ restart:
 				int saved_int = ip_rt_gc_min_interval;
 				ip_rt_gc_elasticity	= 1;
 				ip_rt_gc_min_interval	= 0;
-				rt_garbage_collect();
+				rt_garbage_collect(&ipv4_dst_ops);
 				ip_rt_gc_min_interval	= saved_int;
 				ip_rt_gc_elasticity	= saved_elasticity;
 				goto restart;
diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c
index 656345f75e0..f04516c880f 100644
--- a/net/ipv4/xfrm4_policy.c
+++ b/net/ipv4/xfrm4_policy.c
@@ -185,7 +185,7 @@ _decode_session4(struct sk_buff *skb, struct flowi *fl, int reverse)
 	fl->fl4_tos = iph->tos;
 }
 
-static inline int xfrm4_garbage_collect(void)
+static inline int xfrm4_garbage_collect(struct dst_ops *ops)
 {
 	xfrm4_policy_afinfo.garbage_collect();
 	return (atomic_read(&xfrm4_dst_ops.entries) > xfrm4_dst_ops.gc_thresh*2);
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index a429900d16a..4004c5f0b8d 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -79,7 +79,7 @@ static struct dst_entry *ip6_negative_advice(struct dst_entry *);
 static void		ip6_dst_destroy(struct dst_entry *);
 static void		ip6_dst_ifdown(struct dst_entry *,
 				       struct net_device *dev, int how);
-static int		 ip6_dst_gc(void);
+static int		 ip6_dst_gc(struct dst_ops *ops);
 
 static int		ip6_pkt_discard(struct sk_buff *skb);
 static int		ip6_pkt_discard_out(struct sk_buff *skb);
@@ -983,7 +983,7 @@ int ndisc_dst_gc(int *more)
 	return freed;
 }
 
-static int ip6_dst_gc(void)
+static int ip6_dst_gc(struct dst_ops *ops)
 {
 	static unsigned expire = 30*HZ;
 	static unsigned long last_gc;
diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c
index cf373b46a1b..c25a6b527fc 100644
--- a/net/ipv6/xfrm6_policy.c
+++ b/net/ipv6/xfrm6_policy.c
@@ -212,7 +212,7 @@ _decode_session6(struct sk_buff *skb, struct flowi *fl, int reverse)
 	}
 }
 
-static inline int xfrm6_garbage_collect(void)
+static inline int xfrm6_garbage_collect(struct dst_ops *ops)
 {
 	xfrm6_policy_afinfo.garbage_collect();
 	return (atomic_read(&xfrm6_dst_ops.entries) > xfrm6_dst_ops.gc_thresh*2);
-- 
cgit v1.2.3


From 66688ea7c8e5cb3ea987d8945fffd099ce80a299 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Fri, 18 Jan 2008 04:47:58 -0800
Subject: [SCTP]: Fix build warning in sctp_sf_do_5_1C_ack().

Reported by Andrew Morton.

net/sctp/sm_statefuns.c: In function 'sctp_sf_do_5_1C_ack':
net/sctp/sm_statefuns.c:484: warning: 'error' may be used uninitialized in this function

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sctp/sm_statefuns.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c
index b1267519183..6e127573594 100644
--- a/net/sctp/sm_statefuns.c
+++ b/net/sctp/sm_statefuns.c
@@ -481,7 +481,7 @@ sctp_disposition_t sctp_sf_do_5_1C_ack(const struct sctp_endpoint *ep,
 	sctp_init_chunk_t *initchunk;
 	struct sctp_chunk *err_chunk;
 	struct sctp_packet *packet;
-	sctp_error_t error;
+	sctp_error_t error = SCTP_ERROR_NO_ERROR;
 
 	if (!sctp_vtag_verify(chunk, asoc))
 		return sctp_sf_pdiscard(ep, asoc, type, arg, commands);
-- 
cgit v1.2.3


From 191df5737e3047de8b7d8ea4e17df241cf8eefca Mon Sep 17 00:00:00 2001
From: Rami Rosen <ramirose@gmail.com>
Date: Fri, 18 Jan 2008 05:38:31 -0800
Subject: [BRIDGE]: Remove unused include of a header file in ebtables.c

In net/bridge/netfilter/ebtables.c,
- remove unused include of a header file (linux/tty.h) and remove the
  corresponding comment above it.

Signed-off-by: Rami Rosen <ramirose@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/bridge/netfilter/ebtables.c | 2 --
 1 file changed, 2 deletions(-)

(limited to 'net')

diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c
index 817169e718c..32afff859e4 100644
--- a/net/bridge/netfilter/ebtables.c
+++ b/net/bridge/netfilter/ebtables.c
@@ -15,8 +15,6 @@
  *  2 of the License, or (at your option) any later version.
  */
 
-/* used for print_string */
-#include <linux/tty.h>
 
 #include <linux/kmod.h>
 #include <linux/module.h>
-- 
cgit v1.2.3


From f59d9782751bf1a2c51e7e1e9f614ffec35fb52e Mon Sep 17 00:00:00 2001
From: Roel Kluin <12o3l@tiscali.nl>
Date: Fri, 26 Oct 2007 21:51:26 +0200
Subject: wireless: fix '!x & y' typo's

Fix priority mistakes similar to '!x & y'

Signed-off-by: Roel Kluin <12o3l@tiscali.nl>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/ieee80211/ieee80211_wx.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/ieee80211/ieee80211_wx.c b/net/ieee80211/ieee80211_wx.c
index d309e8f1999..623489afa62 100644
--- a/net/ieee80211/ieee80211_wx.c
+++ b/net/ieee80211/ieee80211_wx.c
@@ -709,7 +709,7 @@ int ieee80211_wx_get_encodeext(struct ieee80211_device *ieee,
 	} else
 		idx = ieee->tx_keyidx;
 
-	if (!ext->ext_flags & IW_ENCODE_EXT_GROUP_KEY &&
+	if (!(ext->ext_flags & IW_ENCODE_EXT_GROUP_KEY) &&
 	    ext->alg != IW_ENCODE_ALG_WEP)
 		if (idx != 0 || ieee->iw_mode != IW_MODE_INFRA)
 			return -EINVAL;
-- 
cgit v1.2.3


From 7d460db953d6d205e4c8ecc2017aea1ec22b6c9a Mon Sep 17 00:00:00 2001
From: Daniel Lezcano <dlezcano@fr.ibm.com>
Date: Fri, 18 Jan 2008 23:52:35 -0800
Subject: [IPV6]: Fix ip6_frag ctl

Alexey Dobriyan reported an oops when unsharing the network
indefinitely inside a loop. This is because the ip6_frag is not per
namespace while the ctls are.

That happens at the fragment timer expiration:
inet_frag_secret_rebuild function is called and this one restarts the
timer using the value stored inside the sysctl field.

        "mod_timer(&f->secret_timer, now + f->ctl->secret_interval);"

When the network is unshared, ip6_frag.ctl is initialized with the new
sysctl instances, but ip6_frag has only one instance. A race in this
case will appear because f->ctl can be modified during the read access
in the timer callback.

Until the ip6_frag is not per namespace, I discard the assignation to
the ctl field of ip6_frags in ip6_frag_sysctl_init when the network
namespace is not the init net.

Signed-off-by: Daniel Lezcano <dlezcano@fr.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/reassembly.c | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'net')

diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c
index 5cd0bc693a5..4dfcddc871c 100644
--- a/net/ipv6/reassembly.c
+++ b/net/ipv6/reassembly.c
@@ -627,6 +627,9 @@ static struct inet6_protocol frag_protocol =
 
 void ipv6_frag_sysctl_init(struct net *net)
 {
+	if (net != &init_net)
+		return;
+
 	ip6_frags.ctl = &net->ipv6.sysctl.frags;
 }
 
-- 
cgit v1.2.3


From 869e58f87094b1e8a0df49232e4a5172678d46c9 Mon Sep 17 00:00:00 2001
From: "Denis V. Lunev" <den@openvz.org>
Date: Fri, 18 Jan 2008 23:53:31 -0800
Subject: [NETNS]: Double free in netlink_release.

Netlink protocol table is global for all namespaces. Some netlink
protocols have been virtualized, i.e. they have per/namespace netlink
socket. This difference can easily lead to double free if more than 1
namespace is started. Count the number of kernel netlink sockets to
track that this table is not used any more.

Signed-off-by: Denis V. Lunev <den@openvz.org>
Tested-by: Alexey Dobriyan <adobriyan@openvz.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/netlink/af_netlink.c | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index 21f9e30184e..29fef558aab 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -498,9 +498,12 @@ static int netlink_release(struct socket *sock)
 
 	netlink_table_grab();
 	if (netlink_is_kernel(sk)) {
-		kfree(nl_table[sk->sk_protocol].listeners);
-		nl_table[sk->sk_protocol].module = NULL;
-		nl_table[sk->sk_protocol].registered = 0;
+		BUG_ON(nl_table[sk->sk_protocol].registered == 0);
+		if (--nl_table[sk->sk_protocol].registered == 0) {
+			kfree(nl_table[sk->sk_protocol].listeners);
+			nl_table[sk->sk_protocol].module = NULL;
+			nl_table[sk->sk_protocol].registered = 0;
+		}
 	} else if (nlk->subscriptions)
 		netlink_update_listeners(sk);
 	netlink_table_ungrab();
@@ -1389,6 +1392,7 @@ netlink_kernel_create(struct net *net, int unit, unsigned int groups,
 		nl_table[unit].registered = 1;
 	} else {
 		kfree(listeners);
+		nl_table[unit].registered++;
 	}
 	netlink_table_ungrab();
 
-- 
cgit v1.2.3


From 4f84d82f7a623f8641af2574425c329431ff158f Mon Sep 17 00:00:00 2001
From: "Denis V. Lunev" <den@openvz.org>
Date: Fri, 18 Jan 2008 23:54:15 -0800
Subject: [NETNS]: Memory leak on network namespace stop.

Network namespace allocates 2 kernel netlink sockets, fibnl &
rtnl. These sockets should be disposed properly, i.e. by
sock_release. Plain sock_put is not enough.

Signed-off-by: Denis V. Lunev <den@openvz.org>
Tested-by: Alexey Dobriyan <adobriyan@openvz.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/rtnetlink.c    | 2 +-
 net/ipv4/fib_frontend.c | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 8c45d7e35ee..a5f4f661fa6 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -1384,7 +1384,7 @@ static void rtnetlink_net_exit(struct net *net)
 		 * free.
 		 */
 		sk->sk_net = get_net(&init_net);
-		sock_put(sk);
+		sock_release(net->rtnl->sk_socket);
 		net->rtnl = NULL;
 	}
 }
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index 8ddcd3f91a1..4e5216e9aac 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -881,7 +881,7 @@ static void nl_fib_lookup_exit(struct net *net)
 	 * initial network namespace. So the socket will  be safe to free.
 	 */
 	net->ipv4.fibnl->sk_net = get_net(&init_net);
-	sock_put(net->ipv4.fibnl);
+	sock_release(net->ipv4.fibnl->sk_socket);
 }
 
 static void fib_disable_ip(struct net_device *dev, int force)
-- 
cgit v1.2.3


From b7c6ba6eb1234e35a74fb8ba8123232a7b1ba9e4 Mon Sep 17 00:00:00 2001
From: "Denis V. Lunev" <den@openvz.org>
Date: Mon, 28 Jan 2008 14:41:19 -0800
Subject: [NETNS]: Consolidate kernel netlink socket destruction.

Create a specific helper for netlink kernel socket disposal. This just
let the code look better and provides a ground for proper disposal
inside a namespace.

Signed-off-by: Denis V. Lunev <den@openvz.org>
Tested-by: Alexey Dobriyan <adobriyan@openvz.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/bridge/netfilter/ebt_ulog.c |  4 ++--
 net/core/rtnetlink.c            |  2 +-
 net/decnet/netfilter/dn_rtmsg.c |  4 ++--
 net/ipv4/fib_frontend.c         |  2 +-
 net/ipv4/inet_diag.c            |  2 +-
 net/ipv4/netfilter/ip_queue.c   |  4 ++--
 net/ipv4/netfilter/ipt_ULOG.c   |  4 ++--
 net/ipv6/netfilter/ip6_queue.c  |  4 ++--
 net/netfilter/nfnetlink.c       |  2 +-
 net/netlink/af_netlink.c        | 11 +++++++++++
 net/xfrm/xfrm_user.c            |  2 +-
 11 files changed, 26 insertions(+), 15 deletions(-)

(limited to 'net')

diff --git a/net/bridge/netfilter/ebt_ulog.c b/net/bridge/netfilter/ebt_ulog.c
index b73ba28bcbe..8e7b00b68d3 100644
--- a/net/bridge/netfilter/ebt_ulog.c
+++ b/net/bridge/netfilter/ebt_ulog.c
@@ -307,7 +307,7 @@ static int __init ebt_ulog_init(void)
 	if (!ebtulognl)
 		ret = -ENOMEM;
 	else if ((ret = ebt_register_watcher(&ulog)))
-		sock_release(ebtulognl->sk_socket);
+		netlink_kernel_release(ebtulognl);
 
 	if (ret == 0)
 		nf_log_register(PF_BRIDGE, &ebt_ulog_logger);
@@ -333,7 +333,7 @@ static void __exit ebt_ulog_fini(void)
 		}
 		spin_unlock_bh(&ub->lock);
 	}
-	sock_release(ebtulognl->sk_socket);
+	netlink_kernel_release(ebtulognl);
 }
 
 module_init(ebt_ulog_init);
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index a5f4f661fa6..02cf848f71d 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -1384,7 +1384,7 @@ static void rtnetlink_net_exit(struct net *net)
 		 * free.
 		 */
 		sk->sk_net = get_net(&init_net);
-		sock_release(net->rtnl->sk_socket);
+		netlink_kernel_release(net->rtnl);
 		net->rtnl = NULL;
 	}
 }
diff --git a/net/decnet/netfilter/dn_rtmsg.c b/net/decnet/netfilter/dn_rtmsg.c
index 96375f2e64f..6d2bd320204 100644
--- a/net/decnet/netfilter/dn_rtmsg.c
+++ b/net/decnet/netfilter/dn_rtmsg.c
@@ -137,7 +137,7 @@ static int __init dn_rtmsg_init(void)
 
 	rv = nf_register_hook(&dnrmg_ops);
 	if (rv) {
-		sock_release(dnrmg->sk_socket);
+		netlink_kernel_release(dnrmg);
 	}
 
 	return rv;
@@ -146,7 +146,7 @@ static int __init dn_rtmsg_init(void)
 static void __exit dn_rtmsg_fini(void)
 {
 	nf_unregister_hook(&dnrmg_ops);
-	sock_release(dnrmg->sk_socket);
+	netlink_kernel_release(dnrmg);
 }
 
 
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index 4e5216e9aac..e787d215115 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -881,7 +881,7 @@ static void nl_fib_lookup_exit(struct net *net)
 	 * initial network namespace. So the socket will  be safe to free.
 	 */
 	net->ipv4.fibnl->sk_net = get_net(&init_net);
-	sock_release(net->ipv4.fibnl->sk_socket);
+	netlink_kernel_release(net->ipv4.fibnl);
 }
 
 static void fib_disable_ip(struct net_device *dev, int force)
diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c
index e468e7a7aac..605ed2cd797 100644
--- a/net/ipv4/inet_diag.c
+++ b/net/ipv4/inet_diag.c
@@ -935,7 +935,7 @@ out_free_table:
 
 static void __exit inet_diag_exit(void)
 {
-	sock_release(idiagnl->sk_socket);
+	netlink_kernel_release(idiagnl);
 	kfree(inet_diag_table);
 }
 
diff --git a/net/ipv4/netfilter/ip_queue.c b/net/ipv4/netfilter/ip_queue.c
index 7361315f20c..5109839da22 100644
--- a/net/ipv4/netfilter/ip_queue.c
+++ b/net/ipv4/netfilter/ip_queue.c
@@ -605,7 +605,7 @@ cleanup_sysctl:
 	unregister_netdevice_notifier(&ipq_dev_notifier);
 	proc_net_remove(&init_net, IPQ_PROC_FS_NAME);
 cleanup_ipqnl:
-	sock_release(ipqnl->sk_socket);
+	netlink_kernel_release(ipqnl);
 	mutex_lock(&ipqnl_mutex);
 	mutex_unlock(&ipqnl_mutex);
 
@@ -624,7 +624,7 @@ static void __exit ip_queue_fini(void)
 	unregister_netdevice_notifier(&ipq_dev_notifier);
 	proc_net_remove(&init_net, IPQ_PROC_FS_NAME);
 
-	sock_release(ipqnl->sk_socket);
+	netlink_kernel_release(ipqnl);
 	mutex_lock(&ipqnl_mutex);
 	mutex_unlock(&ipqnl_mutex);
 
diff --git a/net/ipv4/netfilter/ipt_ULOG.c b/net/ipv4/netfilter/ipt_ULOG.c
index fa24efaf3ea..b192756c6d0 100644
--- a/net/ipv4/netfilter/ipt_ULOG.c
+++ b/net/ipv4/netfilter/ipt_ULOG.c
@@ -415,7 +415,7 @@ static int __init ulog_tg_init(void)
 
 	ret = xt_register_target(&ulog_tg_reg);
 	if (ret < 0) {
-		sock_release(nflognl->sk_socket);
+		netlink_kernel_release(nflognl);
 		return ret;
 	}
 	if (nflog)
@@ -434,7 +434,7 @@ static void __exit ulog_tg_exit(void)
 	if (nflog)
 		nf_log_unregister(&ipt_ulog_logger);
 	xt_unregister_target(&ulog_tg_reg);
-	sock_release(nflognl->sk_socket);
+	netlink_kernel_release(nflognl);
 
 	/* remove pending timers and free allocated skb's */
 	for (i = 0; i < ULOG_MAXNLGROUPS; i++) {
diff --git a/net/ipv6/netfilter/ip6_queue.c b/net/ipv6/netfilter/ip6_queue.c
index a20db0bb5a1..56b4ea6d29e 100644
--- a/net/ipv6/netfilter/ip6_queue.c
+++ b/net/ipv6/netfilter/ip6_queue.c
@@ -609,7 +609,7 @@ cleanup_sysctl:
 	proc_net_remove(&init_net, IPQ_PROC_FS_NAME);
 
 cleanup_ipqnl:
-	sock_release(ipqnl->sk_socket);
+	netlink_kernel_release(ipqnl);
 	mutex_lock(&ipqnl_mutex);
 	mutex_unlock(&ipqnl_mutex);
 
@@ -628,7 +628,7 @@ static void __exit ip6_queue_fini(void)
 	unregister_netdevice_notifier(&ipq_dev_notifier);
 	proc_net_remove(&init_net, IPQ_PROC_FS_NAME);
 
-	sock_release(ipqnl->sk_socket);
+	netlink_kernel_release(ipqnl);
 	mutex_lock(&ipqnl_mutex);
 	mutex_unlock(&ipqnl_mutex);
 
diff --git a/net/netfilter/nfnetlink.c b/net/netfilter/nfnetlink.c
index 2128542995f..b75c9c4a995 100644
--- a/net/netfilter/nfnetlink.c
+++ b/net/netfilter/nfnetlink.c
@@ -179,7 +179,7 @@ static void nfnetlink_rcv(struct sk_buff *skb)
 static void __exit nfnetlink_exit(void)
 {
 	printk("Removing netfilter NETLINK layer.\n");
-	sock_release(nfnl->sk_socket);
+	netlink_kernel_release(nfnl);
 	return;
 }
 
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index 29fef558aab..626a5820629 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -1405,6 +1405,17 @@ out_sock_release:
 }
 EXPORT_SYMBOL(netlink_kernel_create);
 
+
+void
+netlink_kernel_release(struct sock *sk)
+{
+	if (sk == NULL || sk->sk_socket == NULL)
+		return;
+	sock_release(sk->sk_socket);
+}
+EXPORT_SYMBOL(netlink_kernel_release);
+
+
 /**
  * netlink_change_ngroups - change number of multicast groups
  *
diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c
index 35fc16ae50a..e0ccdf26781 100644
--- a/net/xfrm/xfrm_user.c
+++ b/net/xfrm/xfrm_user.c
@@ -2420,7 +2420,7 @@ static void __exit xfrm_user_exit(void)
 	xfrm_unregister_km(&netlink_mgr);
 	rcu_assign_pointer(xfrm_nl, NULL);
 	synchronize_rcu();
-	sock_release(nlsk->sk_socket);
+	netlink_kernel_release(nlsk);
 }
 
 module_init(xfrm_user_init);
-- 
cgit v1.2.3


From 775516bfa2bd7993620c9039191a0c30b8d8a496 Mon Sep 17 00:00:00 2001
From: "Denis V. Lunev" <den@openvz.org>
Date: Fri, 18 Jan 2008 23:55:19 -0800
Subject: [NETNS]: Namespace stop vs 'ip r l' race.

During network namespace stop process kernel side netlink sockets
belonging to a namespace should be closed. They should not prevent
namespace to stop, so they do not increment namespace usage
counter. Though this counter will be put during last sock_put.

The raplacement of the correct netns for init_ns solves the problem
only partial as socket to be stoped until proper stop is a valid
netlink kernel socket and can be looked up by the user processes. This
is not a problem until it resides in initial namespace (no processes
inside this net), but this is not true for init_net.

So, hold the referrence for a socket, remove it from lookup tables and
only after that change namespace and perform a last put.

Signed-off-by: Denis V. Lunev <den@openvz.org>
Tested-by: Alexey Dobriyan <adobriyan@openvz.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/rtnetlink.c     | 15 ++-------------
 net/ipv4/fib_frontend.c  |  7 +------
 net/netlink/af_netlink.c | 15 +++++++++++++++
 3 files changed, 18 insertions(+), 19 deletions(-)

(limited to 'net')

diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 02cf848f71d..ddbdde82a70 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -1368,25 +1368,14 @@ static int rtnetlink_net_init(struct net *net)
 				   rtnetlink_rcv, &rtnl_mutex, THIS_MODULE);
 	if (!sk)
 		return -ENOMEM;
-
-	/* Don't hold an extra reference on the namespace */
-	put_net(sk->sk_net);
 	net->rtnl = sk;
 	return 0;
 }
 
 static void rtnetlink_net_exit(struct net *net)
 {
-	struct sock *sk = net->rtnl;
-	if (sk) {
-		/* At the last minute lie and say this is a socket for the
-		 * initial network namespace.  So the socket will be safe to
-		 * free.
-		 */
-		sk->sk_net = get_net(&init_net);
-		netlink_kernel_release(net->rtnl);
-		net->rtnl = NULL;
-	}
+	netlink_kernel_release(net->rtnl);
+	net->rtnl = NULL;
 }
 
 static struct pernet_operations rtnetlink_net_ops = {
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index e787d215115..62bd791c204 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -869,19 +869,14 @@ static int nl_fib_lookup_init(struct net *net)
 				   nl_fib_input, NULL, THIS_MODULE);
 	if (sk == NULL)
 		return -EAFNOSUPPORT;
-	/* Don't hold an extra reference on the namespace */
-	put_net(sk->sk_net);
 	net->ipv4.fibnl = sk;
 	return 0;
 }
 
 static void nl_fib_lookup_exit(struct net *net)
 {
-	/* At the last minute lie and say this is a socket for the
-	 * initial network namespace. So the socket will  be safe to free.
-	 */
-	net->ipv4.fibnl->sk_net = get_net(&init_net);
 	netlink_kernel_release(net->ipv4.fibnl);
+	net->ipv4.fibnl = NULL;
 }
 
 static void fib_disable_ip(struct net_device *dev, int force)
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index 626a5820629..6b178e1247b 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -1396,6 +1396,9 @@ netlink_kernel_create(struct net *net, int unit, unsigned int groups,
 	}
 	netlink_table_ungrab();
 
+	/* Do not hold an extra referrence to a namespace as this socket is
+	 * internal to a namespace and does not prevent it to stop. */
+	put_net(net);
 	return sk;
 
 out_sock_release:
@@ -1411,7 +1414,19 @@ netlink_kernel_release(struct sock *sk)
 {
 	if (sk == NULL || sk->sk_socket == NULL)
 		return;
+
+	/*
+	 * Last sock_put should drop referrence to sk->sk_net. It has already
+	 * been dropped in netlink_kernel_create. Taking referrence to stopping
+	 * namespace is not an option.
+	 * Take referrence to a socket to remove it from netlink lookup table
+	 * _alive_ and after that destroy it in the context of init_net.
+	 */
+	sock_hold(sk);
 	sock_release(sk->sk_socket);
+
+	sk->sk_net = get_net(&init_net);
+	sock_put(sk);
 }
 EXPORT_SYMBOL(netlink_kernel_release);
 
-- 
cgit v1.2.3


From 6d97b53e92af822890b87818c99820df47fc589b Mon Sep 17 00:00:00 2001
From: Robie Basak <rb-oss-1@justgohome.co.uk>
Date: Fri, 18 Jan 2008 23:56:54 -0800
Subject: [IrDA]: Resend frames on timeout.

When final timer expires, it might also mean that the i:cmd wasn't
received properly. If we have rejected frames, we can try to resend them.

Signed-off-by: Robie Basak <rb-oss-1@justgohome.co.uk>
Signed-off-by: Samuel Ortiz <samuel@sortiz.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/irda/irlap_event.c | 12 +++++++++---
 1 file changed, 9 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/irda/irlap_event.c b/net/irda/irlap_event.c
index 4c33bf5c835..6d3aff862dc 100644
--- a/net/irda/irlap_event.c
+++ b/net/irda/irlap_event.c
@@ -1514,9 +1514,15 @@ static int irlap_state_nrm_p(struct irlap_cb *self, IRLAP_EVENT event,
 
 		/* N2 is the disconnect timer. Until we reach it, we retry */
 		if (self->retry_count < self->N2) {
-			/* Retry sending the pf bit to the secondary */
-			irlap_wait_min_turn_around(self, &self->qos_tx);
-			irlap_send_rr_frame(self, CMD_FRAME);
+			if (skb_peek(&self->wx_list) == NULL) {
+				/* Retry sending the pf bit to the secondary */
+				IRDA_DEBUG(4, "nrm_p: resending rr");
+				irlap_wait_min_turn_around(self, &self->qos_tx);
+				irlap_send_rr_frame(self, CMD_FRAME);
+			} else {
+				IRDA_DEBUG(4, "nrm_p: resend frames");
+				irlap_resend_rejected_frames(self, CMD_FRAME);
+			}
 
 			irlap_start_final_timer(self, self->final_timeout);
 			self->retry_count++;
-- 
cgit v1.2.3


From 5d780cd6585d242d9592a479fe75a007fd75155d Mon Sep 17 00:00:00 2001
From: Robie Basak <rb-oss-1@justgohome.co.uk>
Date: Fri, 18 Jan 2008 23:58:44 -0800
Subject: [IrDA]: Frame length validation.

When using a stir4200-based USB adaptor to talk to a device that uses
an mcp2150, the stir4200 sometimes drops an incoming frame causing the
mcp2150 to try and retransmit the lost frame. In this combination, the
next frame received from the mcp2150 is often invalid - either an
empty i:rsp or an IrCOMM i:rsp with an invalid clen. These corner
cases are now checked.

Signed-off-by: Robie Basak <rb-oss-1@justgohome.co.uk>
Signed-off-by: Samuel Ortiz <samuel@sortiz.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/irda/ircomm/ircomm_core.c | 12 ++++++++++++
 net/irda/irlap_event.c        | 13 +++++++++++++
 2 files changed, 25 insertions(+)

(limited to 'net')

diff --git a/net/irda/ircomm/ircomm_core.c b/net/irda/ircomm/ircomm_core.c
index 2d63fa8e155..b825399fc16 100644
--- a/net/irda/ircomm/ircomm_core.c
+++ b/net/irda/ircomm/ircomm_core.c
@@ -362,6 +362,18 @@ void ircomm_process_data(struct ircomm_cb *self, struct sk_buff *skb)
 
 	clen = skb->data[0];
 
+	/*
+	 * Input validation check: a stir4200/mcp2150 combinations sometimes
+	 * results in frames with clen > remaining packet size. These are
+	 * illegal; if we throw away just this frame then it seems to carry on
+	 * fine
+	 */
+	if (unlikely(skb->len < (clen + 1))) {
+		IRDA_DEBUG(2, "%s() throwing away illegal frame\n",
+			   __FUNCTION__ );
+		return;
+	}
+
 	/*
 	 * If there are any data hiding in the control channel, we must
 	 * deliver it first. The side effect is that the control channel
diff --git a/net/irda/irlap_event.c b/net/irda/irlap_event.c
index 6d3aff862dc..6af86eba746 100644
--- a/net/irda/irlap_event.c
+++ b/net/irda/irlap_event.c
@@ -1199,6 +1199,19 @@ static int irlap_state_nrm_p(struct irlap_cb *self, IRLAP_EVENT event,
 
 	switch (event) {
 	case RECV_I_RSP: /* Optimize for the common case */
+		if (unlikely(skb->len <= LAP_ADDR_HEADER + LAP_CTRL_HEADER)) {
+			/*
+			 * Input validation check: a stir4200/mcp2150
+			 * combination sometimes results in an empty i:rsp.
+			 * This makes no sense; we can just ignore the frame
+			 * and send an rr:cmd immediately. This happens before
+			 * changing nr or ns so triggers a retransmit
+			 */
+			irlap_wait_min_turn_around(self, &self->qos_tx);
+			irlap_send_rr_frame(self, CMD_FRAME);
+			/* Keep state */
+			break;
+		}
 		/* FIXME: must check for remote_busy below */
 #ifdef CONFIG_IRDA_FAST_RR
 		/*
-- 
cgit v1.2.3


From e9888f5498083f5e4d873cbbe16aa97d89aa1342 Mon Sep 17 00:00:00 2001
From: Adrian Bunk <bunk@kernel.org>
Date: Sat, 19 Jan 2008 00:00:42 -0800
Subject: [IrDA]: Irport removal - part 1

This patch removes IrPORT and the old dongle drivers (all off them
have replacement drivers).

Signed-off-by: Adrian Bunk <bunk@kernel.org>
Signed-off-by: Samuel Ortiz <samuel@sortiz.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/irda/irda_device.c | 169 +------------------------------------------------
 1 file changed, 1 insertion(+), 168 deletions(-)

(limited to 'net')

diff --git a/net/irda/irda_device.c b/net/irda/irda_device.c
index 435b563d29a..87185910d0e 100644
--- a/net/irda/irda_device.c
+++ b/net/irda/irda_device.c
@@ -57,20 +57,6 @@ static void __irda_task_delete(struct irda_task *task);
 static hashbin_t *dongles = NULL;
 static hashbin_t *tasks = NULL;
 
-#ifdef CONFIG_IRDA_DEBUG
-static const char *task_state[] = {
-	"IRDA_TASK_INIT",
-	"IRDA_TASK_DONE",
-	"IRDA_TASK_WAIT",
-	"IRDA_TASK_WAIT1",
-	"IRDA_TASK_WAIT2",
-	"IRDA_TASK_WAIT3",
-	"IRDA_TASK_CHILD_INIT",
-	"IRDA_TASK_CHILD_WAIT",
-	"IRDA_TASK_CHILD_DONE",
-};
-#endif	/* CONFIG_IRDA_DEBUG */
-
 static void irda_task_timer_expired(void *data);
 
 int __init irda_device_init( void)
@@ -176,14 +162,6 @@ int irda_device_is_receiving(struct net_device *dev)
 	return req.ifr_receiving;
 }
 
-void irda_task_next_state(struct irda_task *task, IRDA_TASK_STATE state)
-{
-	IRDA_DEBUG(2, "%s(), state = %s\n", __FUNCTION__, task_state[state]);
-
-	task->state = state;
-}
-EXPORT_SYMBOL(irda_task_next_state);
-
 static void __irda_task_delete(struct irda_task *task)
 {
 	del_timer(&task->timer);
@@ -191,14 +169,13 @@ static void __irda_task_delete(struct irda_task *task)
 	kfree(task);
 }
 
-void irda_task_delete(struct irda_task *task)
+static void irda_task_delete(struct irda_task *task)
 {
 	/* Unregister task */
 	hashbin_remove(tasks, (long) task, NULL);
 
 	__irda_task_delete(task);
 }
-EXPORT_SYMBOL(irda_task_delete);
 
 /*
  * Function irda_task_kick (task)
@@ -271,51 +248,6 @@ static int irda_task_kick(struct irda_task *task)
 	return finished;
 }
 
-/*
- * Function irda_task_execute (instance, function, finished)
- *
- *    This function registers and tries to execute tasks that may take some
- *    time to complete. We do it this hairy way since we may have been
- *    called from interrupt context, so it's not possible to use
- *    schedule_timeout()
- * Two important notes :
- *	o Make sure you irda_task_delete(task); in case you delete the
- *	  calling instance.
- *	o No real need to lock when calling this function, but you may
- *	  want to lock within the task handler.
- * Jean II
- */
-struct irda_task *irda_task_execute(void *instance,
-				    IRDA_TASK_CALLBACK function,
-				    IRDA_TASK_CALLBACK finished,
-				    struct irda_task *parent, void *param)
-{
-	struct irda_task *task;
-
-	IRDA_DEBUG(2, "%s()\n", __FUNCTION__);
-
-	task = kmalloc(sizeof(struct irda_task), GFP_ATOMIC);
-	if (!task)
-		return NULL;
-
-	task->state    = IRDA_TASK_INIT;
-	task->instance = instance;
-	task->function = function;
-	task->finished = finished;
-	task->parent   = parent;
-	task->param    = param;
-	task->magic    = IRDA_TASK_MAGIC;
-
-	init_timer(&task->timer);
-
-	/* Register task */
-	hashbin_insert(tasks, (irda_queue_t *) task, (long) task, NULL);
-
-	/* No time to waste, so lets get going! */
-	return irda_task_kick(task) ? NULL : task;
-}
-EXPORT_SYMBOL(irda_task_execute);
-
 /*
  * Function irda_task_timer_expired (data)
  *
@@ -364,105 +296,6 @@ struct net_device *alloc_irdadev(int sizeof_priv)
 }
 EXPORT_SYMBOL(alloc_irdadev);
 
-/*
- * Function irda_device_init_dongle (self, type, qos)
- *
- *    Initialize attached dongle.
- *
- * Important : request_module require us to call this function with
- * a process context and irq enabled. - Jean II
- */
-dongle_t *irda_device_dongle_init(struct net_device *dev, int type)
-{
-	struct dongle_reg *reg;
-	dongle_t *dongle = kzalloc(sizeof(dongle_t), GFP_KERNEL);
-
-	might_sleep();
-
-	spin_lock(&dongles->hb_spinlock);
-	reg = hashbin_find(dongles, type, NULL);
-
-#ifdef CONFIG_KMOD
-	/* Try to load the module needed */
-	if (!reg && capable(CAP_SYS_MODULE)) {
-		spin_unlock(&dongles->hb_spinlock);
-
-		request_module("irda-dongle-%d", type);
-
-		spin_lock(&dongles->hb_spinlock);
-		reg = hashbin_find(dongles, type, NULL);
-	}
-#endif
-
-	if (!reg || !try_module_get(reg->owner) ) {
-		IRDA_ERROR("IrDA: Unable to find requested dongle type %x\n",
-			   type);
-		kfree(dongle);
-		dongle = NULL;
-	}
-	if (dongle) {
-		/* Bind the registration info to this particular instance */
-		dongle->issue = reg;
-		dongle->dev = dev;
-	}
-	spin_unlock(&dongles->hb_spinlock);
-	return dongle;
-}
-EXPORT_SYMBOL(irda_device_dongle_init);
-
-/*
- * Function irda_device_dongle_cleanup (dongle)
- */
-int irda_device_dongle_cleanup(dongle_t *dongle)
-{
-	IRDA_ASSERT(dongle != NULL, return -1;);
-
-	dongle->issue->close(dongle);
-	module_put(dongle->issue->owner);
-	kfree(dongle);
-
-	return 0;
-}
-EXPORT_SYMBOL(irda_device_dongle_cleanup);
-
-/*
- * Function irda_device_register_dongle (dongle)
- */
-int irda_device_register_dongle(struct dongle_reg *new)
-{
-	spin_lock(&dongles->hb_spinlock);
-	/* Check if this dongle has been registered before */
-	if (hashbin_find(dongles, new->type, NULL)) {
-		IRDA_MESSAGE("%s: Dongle type %x already registered\n",
-			     __FUNCTION__, new->type);
-	} else {
-		/* Insert IrDA dongle into hashbin */
-		hashbin_insert(dongles, (irda_queue_t *) new, new->type, NULL);
-	}
-	spin_unlock(&dongles->hb_spinlock);
-
-	return 0;
-}
-EXPORT_SYMBOL(irda_device_register_dongle);
-
-/*
- * Function irda_device_unregister_dongle (dongle)
- *
- *    Unregister dongle, and remove dongle from list of registered dongles
- *
- */
-void irda_device_unregister_dongle(struct dongle_reg *dongle)
-{
-	struct dongle *node;
-
-	spin_lock(&dongles->hb_spinlock);
-	node = hashbin_remove(dongles, dongle->type, NULL);
-	if (!node)
-		IRDA_ERROR("%s: dongle not found!\n", __FUNCTION__);
-	spin_unlock(&dongles->hb_spinlock);
-}
-EXPORT_SYMBOL(irda_device_unregister_dongle);
-
 #ifdef CONFIG_ISA_DMA_API
 /*
  * Function setup_dma (idev, buffer, count, mode)
-- 
cgit v1.2.3


From 853f4b505578ea3a1d9c2f5fb4ca58658ea15780 Mon Sep 17 00:00:00 2001
From: Vlad Yasevich <vladislav.yasevich@hp.com>
Date: Sun, 20 Jan 2008 06:10:46 -0800
Subject: [SCTP]: Correctly initialize error when parameter validation failed.

When parameter validation fails, there should be error causes that
specify what type of failure we've encountered.  If the causes are not
there, we lacked memory to allocated them.  Thus make that the default
value for the error.

Signed-off-by: Vlad Yasevich <vladislav.yasevich@hp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sctp/sm_statefuns.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c
index 6e127573594..61cbd5a8dd0 100644
--- a/net/sctp/sm_statefuns.c
+++ b/net/sctp/sm_statefuns.c
@@ -481,7 +481,6 @@ sctp_disposition_t sctp_sf_do_5_1C_ack(const struct sctp_endpoint *ep,
 	sctp_init_chunk_t *initchunk;
 	struct sctp_chunk *err_chunk;
 	struct sctp_packet *packet;
-	sctp_error_t error = SCTP_ERROR_NO_ERROR;
 
 	if (!sctp_vtag_verify(chunk, asoc))
 		return sctp_sf_pdiscard(ep, asoc, type, arg, commands);
@@ -506,6 +505,8 @@ sctp_disposition_t sctp_sf_do_5_1C_ack(const struct sctp_endpoint *ep,
 			      (sctp_init_chunk_t *)chunk->chunk_hdr, chunk,
 			      &err_chunk)) {
 
+		sctp_error_t error = SCTP_ERROR_NO_RESOURCE;
+
 		/* This chunk contains fatal error. It is to be discarded.
 		 * Send an ABORT, with causes.  If there are no causes,
 		 * then there wasn't enough memory.  Just terminate
@@ -525,8 +526,6 @@ sctp_disposition_t sctp_sf_do_5_1C_ack(const struct sctp_endpoint *ep,
 						SCTP_PACKET(packet));
 				SCTP_INC_STATS(SCTP_MIB_OUTCTRLCHUNKS);
 				error = SCTP_ERROR_INV_PARAM;
-			} else {
-				error = SCTP_ERROR_NO_RESOURCE;
 			}
 		}
 
-- 
cgit v1.2.3


From 035923833369e4da5d3c4ad0700bc7c367a0fa37 Mon Sep 17 00:00:00 2001
From: "Denis V. Lunev" <den@openvz.org>
Date: Sun, 20 Jan 2008 16:46:01 -0800
Subject: [FIB]: Add netns to fib_rules_ops.

The backward link from FIB rules operations to the network namespace
will allow to simplify the API a bit.

Signed-off-by: Denis V. Lunev <den@openvz.org>
Acked-by: Daniel Lezcano <dlezcano@fr.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/decnet/dn_rules.c | 1 +
 net/ipv4/fib_rules.c  | 2 ++
 net/ipv6/fib6_rules.c | 1 +
 3 files changed, 4 insertions(+)

(limited to 'net')

diff --git a/net/decnet/dn_rules.c b/net/decnet/dn_rules.c
index c1fae23d226..964e6588760 100644
--- a/net/decnet/dn_rules.c
+++ b/net/decnet/dn_rules.c
@@ -249,6 +249,7 @@ static struct fib_rules_ops dn_fib_rules_ops = {
 	.policy		= dn_fib_rule_policy,
 	.rules_list	= LIST_HEAD_INIT(dn_fib_rules_ops.rules_list),
 	.owner		= THIS_MODULE,
+	.fro_net	= &init_net,
 };
 
 void __init dn_fib_rules_init(void)
diff --git a/net/ipv4/fib_rules.c b/net/ipv4/fib_rules.c
index 72232ab4ecb..8d0ebe7f360 100644
--- a/net/ipv4/fib_rules.c
+++ b/net/ipv4/fib_rules.c
@@ -315,6 +315,8 @@ int __net_init fib4_rules_init(struct net *net)
 	if (ops == NULL)
 		return -ENOMEM;
 	INIT_LIST_HEAD(&ops->rules_list);
+	ops->fro_net = net;
+
 	fib_rules_register(net, ops);
 
 	err = fib_default_rules_init(ops);
diff --git a/net/ipv6/fib6_rules.c b/net/ipv6/fib6_rules.c
index 76437a1fcab..ead5ab2da9a 100644
--- a/net/ipv6/fib6_rules.c
+++ b/net/ipv6/fib6_rules.c
@@ -249,6 +249,7 @@ static struct fib_rules_ops fib6_rules_ops = {
 	.policy			= fib6_rule_policy,
 	.rules_list		= LIST_HEAD_INIT(fib6_rules_ops.rules_list),
 	.owner			= THIS_MODULE,
+	.fro_net		= &init_net,
 };
 
 static int __init fib6_default_rules_init(void)
-- 
cgit v1.2.3


From 9e3a548781fc1c0da617fc65769a515f074be740 Mon Sep 17 00:00:00 2001
From: "Denis V. Lunev" <den@openvz.org>
Date: Sun, 20 Jan 2008 16:46:41 -0800
Subject: [NETNS]: FIB rules API cleanup.

Remove struct net from fib_rules_register(unregister)/notify_change
paths and diet code size a bit.

add/remove: 0/0 grow/shrink: 10/12 up/down: 35/-100 (-65)
function                                     old     new   delta
notify_rule_change                           273     280      +7
trie_show_stats                              471     475      +4
fn_trie_delete                               473     477      +4
fib_rules_unregister                         144     148      +4
fib4_rule_compare                            119     123      +4
resize                                      2842    2845      +3
fn_trie_select_default                       515     518      +3
inet_sk_rebuild_header                       836     838      +2
fib_trie_seq_show                            764     766      +2
__devinet_sysctl_register                    276     278      +2
fn_trie_lookup                              1124    1123      -1
ip_fib_check_default                         133     131      -2
devinet_conf_sysctl                          223     221      -2
snmp_fold_field                              126     123      -3
fn_trie_insert                              2091    2086      -5
inet_create                                  876     870      -6
fib4_rules_init                              197     191      -6
fib_sync_down                                452     444      -8
inet_gso_send_check                          334     325      -9
fib_create_info                             3003    2991     -12
fib_nl_delrule                               568     553     -15
fib_nl_newrule                               883     852     -31

Signed-off-by: Denis V. Lunev <den@openvz.org>
Acked-by: Daniel Lezcano <dlezcano@fr.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/fib_rules.c  | 20 +++++++++++++-------
 net/decnet/dn_rules.c |  4 ++--
 net/ipv4/fib_rules.c  |  6 +++---
 net/ipv6/fib6_rules.c |  4 ++--
 4 files changed, 20 insertions(+), 14 deletions(-)

(limited to 'net')

diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c
index 541728aa2ba..3cd4f13413f 100644
--- a/net/core/fib_rules.c
+++ b/net/core/fib_rules.c
@@ -37,8 +37,7 @@ int fib_default_rule_add(struct fib_rules_ops *ops,
 }
 EXPORT_SYMBOL(fib_default_rule_add);
 
-static void notify_rule_change(struct net *net, int event,
-			       struct fib_rule *rule,
+static void notify_rule_change(int event, struct fib_rule *rule,
 			       struct fib_rules_ops *ops, struct nlmsghdr *nlh,
 			       u32 pid);
 
@@ -72,10 +71,13 @@ static void flush_route_cache(struct fib_rules_ops *ops)
 		ops->flush_cache();
 }
 
-int fib_rules_register(struct net *net, struct fib_rules_ops *ops)
+int fib_rules_register(struct fib_rules_ops *ops)
 {
 	int err = -EEXIST;
 	struct fib_rules_ops *o;
+	struct net *net;
+
+	net = ops->fro_net;
 
 	if (ops->rule_size < sizeof(struct fib_rule))
 		return -EINVAL;
@@ -112,8 +114,9 @@ void fib_rules_cleanup_ops(struct fib_rules_ops *ops)
 }
 EXPORT_SYMBOL_GPL(fib_rules_cleanup_ops);
 
-void fib_rules_unregister(struct net *net, struct fib_rules_ops *ops)
+void fib_rules_unregister(struct fib_rules_ops *ops)
 {
+	struct net *net = ops->fro_net;
 
 	spin_lock(&net->rules_mod_lock);
 	list_del_rcu(&ops->list);
@@ -333,7 +336,7 @@ static int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
 	else
 		list_add_rcu(&rule->list, &ops->rules_list);
 
-	notify_rule_change(net, RTM_NEWRULE, rule, ops, nlh, NETLINK_CB(skb).pid);
+	notify_rule_change(RTM_NEWRULE, rule, ops, nlh, NETLINK_CB(skb).pid);
 	flush_route_cache(ops);
 	rules_ops_put(ops);
 	return 0;
@@ -423,7 +426,7 @@ static int fib_nl_delrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
 		}
 
 		synchronize_rcu();
-		notify_rule_change(net, RTM_DELRULE, rule, ops, nlh,
+		notify_rule_change(RTM_DELRULE, rule, ops, nlh,
 				   NETLINK_CB(skb).pid);
 		fib_rule_put(rule);
 		flush_route_cache(ops);
@@ -561,13 +564,15 @@ static int fib_nl_dumprule(struct sk_buff *skb, struct netlink_callback *cb)
 	return skb->len;
 }
 
-static void notify_rule_change(struct net *net, int event, struct fib_rule *rule,
+static void notify_rule_change(int event, struct fib_rule *rule,
 			       struct fib_rules_ops *ops, struct nlmsghdr *nlh,
 			       u32 pid)
 {
+	struct net *net;
 	struct sk_buff *skb;
 	int err = -ENOBUFS;
 
+	net = ops->fro_net;
 	skb = nlmsg_new(fib_rule_nlmsg_size(ops, rule), GFP_KERNEL);
 	if (skb == NULL)
 		goto errout;
@@ -579,6 +584,7 @@ static void notify_rule_change(struct net *net, int event, struct fib_rule *rule
 		kfree_skb(skb);
 		goto errout;
 	}
+
 	err = rtnl_notify(skb, net, pid, ops->nlgroup, nlh, GFP_KERNEL);
 errout:
 	if (err < 0)
diff --git a/net/decnet/dn_rules.c b/net/decnet/dn_rules.c
index 964e6588760..5b7539b7fe0 100644
--- a/net/decnet/dn_rules.c
+++ b/net/decnet/dn_rules.c
@@ -256,12 +256,12 @@ void __init dn_fib_rules_init(void)
 {
 	BUG_ON(fib_default_rule_add(&dn_fib_rules_ops, 0x7fff,
 			            RT_TABLE_MAIN, 0));
-	fib_rules_register(&init_net, &dn_fib_rules_ops);
+	fib_rules_register(&dn_fib_rules_ops);
 }
 
 void __exit dn_fib_rules_cleanup(void)
 {
-	fib_rules_unregister(&init_net, &dn_fib_rules_ops);
+	fib_rules_unregister(&dn_fib_rules_ops);
 }
 
 
diff --git a/net/ipv4/fib_rules.c b/net/ipv4/fib_rules.c
index 8d0ebe7f360..3b7affd5cb3 100644
--- a/net/ipv4/fib_rules.c
+++ b/net/ipv4/fib_rules.c
@@ -317,7 +317,7 @@ int __net_init fib4_rules_init(struct net *net)
 	INIT_LIST_HEAD(&ops->rules_list);
 	ops->fro_net = net;
 
-	fib_rules_register(net, ops);
+	fib_rules_register(ops);
 
 	err = fib_default_rules_init(ops);
 	if (err < 0)
@@ -327,13 +327,13 @@ int __net_init fib4_rules_init(struct net *net)
 
 fail:
 	/* also cleans all rules already added */
-	fib_rules_unregister(net, ops);
+	fib_rules_unregister(ops);
 	kfree(ops);
 	return err;
 }
 
 void __net_exit fib4_rules_exit(struct net *net)
 {
-	fib_rules_unregister(net, net->ipv4.rules_ops);
+	fib_rules_unregister(net->ipv4.rules_ops);
 	kfree(net->ipv4.rules_ops);
 }
diff --git a/net/ipv6/fib6_rules.c b/net/ipv6/fib6_rules.c
index ead5ab2da9a..695c0ca8a41 100644
--- a/net/ipv6/fib6_rules.c
+++ b/net/ipv6/fib6_rules.c
@@ -274,7 +274,7 @@ int __init fib6_rules_init(void)
 	if (ret)
 		goto out;
 
-	ret = fib_rules_register(&init_net, &fib6_rules_ops);
+	ret = fib_rules_register(&fib6_rules_ops);
 	if (ret)
 		goto out_default_rules_init;
 out:
@@ -287,5 +287,5 @@ out_default_rules_init:
 
 void fib6_rules_cleanup(void)
 {
-	fib_rules_unregister(&init_net, &fib6_rules_ops);
+	fib_rules_unregister(&fib6_rules_ops);
 }
-- 
cgit v1.2.3


From 51314a17baabc710e5fb12975fe8983dedd5ac0d Mon Sep 17 00:00:00 2001
From: "Denis V. Lunev" <den@openvz.org>
Date: Sun, 20 Jan 2008 16:47:09 -0800
Subject: [NETNS]: Process FIB rule action in the context of the namespace.

Save namespace context on the fib rule at the rule creation time and
call routing lookup in the correct namespace.

Signed-off-by: Denis V. Lunev <den@openvz.org>
Acked-by: Daniel Lezcano <dlezcano@fr.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/fib_rules.c | 2 ++
 net/ipv4/fib_rules.c | 2 +-
 2 files changed, 3 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c
index 3cd4f13413f..42ccaf5b850 100644
--- a/net/core/fib_rules.c
+++ b/net/core/fib_rules.c
@@ -29,6 +29,7 @@ int fib_default_rule_add(struct fib_rules_ops *ops,
 	r->pref = pref;
 	r->table = table;
 	r->flags = flags;
+	r->fr_net = ops->fro_net;
 
 	/* The lock is not required here, the list in unreacheable
 	 * at the moment this function is called */
@@ -242,6 +243,7 @@ static int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
 		err = -ENOMEM;
 		goto errout;
 	}
+	rule->fr_net = net;
 
 	if (tb[FRA_PRIORITY])
 		rule->pref = nla_get_u32(tb[FRA_PRIORITY]);
diff --git a/net/ipv4/fib_rules.c b/net/ipv4/fib_rules.c
index 3b7affd5cb3..d2001f1c28a 100644
--- a/net/ipv4/fib_rules.c
+++ b/net/ipv4/fib_rules.c
@@ -91,7 +91,7 @@ static int fib4_rule_action(struct fib_rule *rule, struct flowi *flp,
 		goto errout;
 	}
 
-	if ((tbl = fib_get_table(&init_net, rule->table)) == NULL)
+	if ((tbl = fib_get_table(rule->fr_net, rule->table)) == NULL)
 		goto errout;
 
 	err = tbl->tb_lookup(tbl, flp, (struct fib_result *) arg->result);
-- 
cgit v1.2.3


From d3e994830d35159320d0a98c36aa580410c29609 Mon Sep 17 00:00:00 2001
From: Stephen Hemminger <stephen.hemminger@vyatta.com>
Date: Sun, 20 Jan 2008 17:18:45 -0800
Subject: [PKT_SCHED] SFQ: timer is deferrable

The perturbation timer used for re-keying can be deferred, it doesn't
need to be deterministic.

Signed-off-by: Stephen Hemminger <stephen.hemminger@vyatta.com>
Acked-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/sch_sfq.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c
index c58fa6efc7a..ba3a85e29b2 100644
--- a/net/sched/sch_sfq.c
+++ b/net/sched/sch_sfq.c
@@ -426,7 +426,9 @@ static int sfq_init(struct Qdisc *sch, struct rtattr *opt)
 	struct sfq_sched_data *q = qdisc_priv(sch);
 	int i;
 
-	setup_timer(&q->perturb_timer, sfq_perturbation, (unsigned long)sch);
+	q->perturb_timer.function = sfq_perturbation;
+	q->perturb_timer.data = (unsigned long)sch;;
+	init_timer_deferrable(&q->perturb_timer);
 
 	for (i=0; i<SFQ_HASH_DIVISOR; i++)
 		q->ht[i] = SFQ_DEPTH;
-- 
cgit v1.2.3


From d46f8dd87d9e7d5356891cbe97b8472e74db1413 Mon Sep 17 00:00:00 2001
From: Stephen Hemminger <stephen.hemminger@vyatta.com>
Date: Sun, 20 Jan 2008 17:19:43 -0800
Subject: [PKT_SCHED] SFQ: use net_random

SFQ doesn't need true random numbers, it is only using them to salt a
hash. Therefore it is better to use net_random() and avoid any
possible problems with depleting the entropy pool.

Signed-off-by: Stephen Hemminger <stephen.hemminger@vyatta.com>
Acked-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/sch_sfq.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c
index ba3a85e29b2..4179758450f 100644
--- a/net/sched/sch_sfq.c
+++ b/net/sched/sch_sfq.c
@@ -386,7 +386,7 @@ static void sfq_perturbation(unsigned long arg)
 	struct Qdisc *sch = (struct Qdisc*)arg;
 	struct sfq_sched_data *q = qdisc_priv(sch);
 
-	get_random_bytes(&q->perturbation, 4);
+	q->perturbation = net_random();
 
 	if (q->perturb_period)
 		mod_timer(&q->perturb_timer, jiffies + q->perturb_period);
@@ -415,7 +415,7 @@ static int sfq_change(struct Qdisc *sch, struct rtattr *opt)
 	del_timer(&q->perturb_timer);
 	if (q->perturb_period) {
 		mod_timer(&q->perturb_timer, jiffies + q->perturb_period);
-		get_random_bytes(&q->perturbation, 4);
+		q->perturbation = net_random();
 	}
 	sch_tree_unlock(sch);
 	return 0;
@@ -443,7 +443,7 @@ static int sfq_init(struct Qdisc *sch, struct rtattr *opt)
 	if (opt == NULL) {
 		q->quantum = psched_mtu(sch->dev);
 		q->perturb_period = 0;
-		get_random_bytes(&q->perturbation, 4);
+		q->perturbation = net_random();
 	} else {
 		int err = sfq_change(sch, opt);
 		if (err)
-- 
cgit v1.2.3


From 6f9e98f7a96fdf4d621b8241d5a8a55c692de373 Mon Sep 17 00:00:00 2001
From: Stephen Hemminger <stephen.hemminger@vyatta.com>
Date: Sun, 20 Jan 2008 17:20:56 -0800
Subject: [PKT_SCHED] SFQ: whitespace cleanup

Add whitespace around operators, and add a few blank lines to improve
readability.

Signed-off-by: Stephen Hemminger <stephen.hemminger@vyatta.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/sch_sfq.c | 42 +++++++++++++++++++++++++-----------------
 1 file changed, 25 insertions(+), 17 deletions(-)

(limited to 'net')

diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c
index 4179758450f..25afe0f1d83 100644
--- a/net/sched/sch_sfq.c
+++ b/net/sched/sch_sfq.c
@@ -122,7 +122,7 @@ static unsigned sfq_hash(struct sfq_sched_data *q, struct sk_buff *skb)
 	{
 		const struct iphdr *iph = ip_hdr(skb);
 		h = iph->daddr;
-		h2 = iph->saddr^iph->protocol;
+		h2 = iph->saddr ^ iph->protocol;
 		if (!(iph->frag_off&htons(IP_MF|IP_OFFSET)) &&
 		    (iph->protocol == IPPROTO_TCP ||
 		     iph->protocol == IPPROTO_UDP ||
@@ -137,7 +137,7 @@ static unsigned sfq_hash(struct sfq_sched_data *q, struct sk_buff *skb)
 	{
 		struct ipv6hdr *iph = ipv6_hdr(skb);
 		h = iph->daddr.s6_addr32[3];
-		h2 = iph->saddr.s6_addr32[3]^iph->nexthdr;
+		h2 = iph->saddr.s6_addr32[3] ^ iph->nexthdr;
 		if (iph->nexthdr == IPPROTO_TCP ||
 		    iph->nexthdr == IPPROTO_UDP ||
 		    iph->nexthdr == IPPROTO_UDPLITE ||
@@ -148,9 +148,10 @@ static unsigned sfq_hash(struct sfq_sched_data *q, struct sk_buff *skb)
 		break;
 	}
 	default:
-		h = (u32)(unsigned long)skb->dst^skb->protocol;
-		h2 = (u32)(unsigned long)skb->sk;
+		h = (unsigned long)skb->dst ^ skb->protocol;
+		h2 = (unsigned long)skb->sk;
 	}
+
 	return sfq_fold_hash(q, h, h2);
 }
 
@@ -208,7 +209,7 @@ static unsigned int sfq_drop(struct Qdisc *sch)
 	   drop a packet from it */
 
 	if (d > 1) {
-		sfq_index x = q->dep[d+SFQ_DEPTH].next;
+		sfq_index x = q->dep[d + SFQ_DEPTH].next;
 		skb = q->qs[x].prev;
 		len = skb->len;
 		__skb_unlink(skb, &q->qs[x]);
@@ -241,7 +242,7 @@ static unsigned int sfq_drop(struct Qdisc *sch)
 }
 
 static int
-sfq_enqueue(struct sk_buff *skb, struct Qdisc* sch)
+sfq_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 {
 	struct sfq_sched_data *q = qdisc_priv(sch);
 	unsigned hash = sfq_hash(q, skb);
@@ -252,6 +253,7 @@ sfq_enqueue(struct sk_buff *skb, struct Qdisc* sch)
 		q->ht[hash] = x = q->dep[SFQ_DEPTH].next;
 		q->hash[x] = hash;
 	}
+
 	/* If selected queue has length q->limit, this means that
 	 * all another queues are empty and that we do simple tail drop,
 	 * i.e. drop _this_ packet.
@@ -284,7 +286,7 @@ sfq_enqueue(struct sk_buff *skb, struct Qdisc* sch)
 }
 
 static int
-sfq_requeue(struct sk_buff *skb, struct Qdisc* sch)
+sfq_requeue(struct sk_buff *skb, struct Qdisc *sch)
 {
 	struct sfq_sched_data *q = qdisc_priv(sch);
 	unsigned hash = sfq_hash(q, skb);
@@ -295,6 +297,7 @@ sfq_requeue(struct sk_buff *skb, struct Qdisc* sch)
 		q->ht[hash] = x = q->dep[SFQ_DEPTH].next;
 		q->hash[x] = hash;
 	}
+
 	sch->qstats.backlog += skb->len;
 	__skb_queue_head(&q->qs[x], skb);
 	/* If selected queue has length q->limit+1, this means that
@@ -310,6 +313,7 @@ sfq_requeue(struct sk_buff *skb, struct Qdisc* sch)
 		kfree_skb(skb);
 		return NET_XMIT_CN;
 	}
+
 	sfq_inc(q, x);
 	if (q->qs[x].qlen == 1) {		/* The flow is new */
 		if (q->tail == SFQ_DEPTH) {	/* It is the first flow */
@@ -322,6 +326,7 @@ sfq_requeue(struct sk_buff *skb, struct Qdisc* sch)
 			q->tail = x;
 		}
 	}
+
 	if (++sch->q.qlen <= q->limit) {
 		sch->qstats.requeues++;
 		return 0;
@@ -336,7 +341,7 @@ sfq_requeue(struct sk_buff *skb, struct Qdisc* sch)
 
 
 static struct sk_buff *
-sfq_dequeue(struct Qdisc* sch)
+sfq_dequeue(struct Qdisc *sch)
 {
 	struct sfq_sched_data *q = qdisc_priv(sch);
 	struct sk_buff *skb;
@@ -373,7 +378,7 @@ sfq_dequeue(struct Qdisc* sch)
 }
 
 static void
-sfq_reset(struct Qdisc* sch)
+sfq_reset(struct Qdisc *sch)
 {
 	struct sk_buff *skb;
 
@@ -383,7 +388,7 @@ sfq_reset(struct Qdisc* sch)
 
 static void sfq_perturbation(unsigned long arg)
 {
-	struct Qdisc *sch = (struct Qdisc*)arg;
+	struct Qdisc *sch = (struct Qdisc *)arg;
 	struct sfq_sched_data *q = qdisc_priv(sch);
 
 	q->perturbation = net_random();
@@ -403,7 +408,7 @@ static int sfq_change(struct Qdisc *sch, struct rtattr *opt)
 
 	sch_tree_lock(sch);
 	q->quantum = ctl->quantum ? : psched_mtu(sch->dev);
-	q->perturb_period = ctl->perturb_period*HZ;
+	q->perturb_period = ctl->perturb_period * HZ;
 	if (ctl->limit)
 		q->limit = min_t(u32, ctl->limit, SFQ_DEPTH - 1);
 
@@ -430,13 +435,15 @@ static int sfq_init(struct Qdisc *sch, struct rtattr *opt)
 	q->perturb_timer.data = (unsigned long)sch;;
 	init_timer_deferrable(&q->perturb_timer);
 
-	for (i=0; i<SFQ_HASH_DIVISOR; i++)
+	for (i = 0; i < SFQ_HASH_DIVISOR; i++)
 		q->ht[i] = SFQ_DEPTH;
-	for (i=0; i<SFQ_DEPTH; i++) {
+
+	for (i = 0; i < SFQ_DEPTH; i++) {
 		skb_queue_head_init(&q->qs[i]);
-		q->dep[i+SFQ_DEPTH].next = i+SFQ_DEPTH;
-		q->dep[i+SFQ_DEPTH].prev = i+SFQ_DEPTH;
+		q->dep[i + SFQ_DEPTH].next = i + SFQ_DEPTH;
+		q->dep[i + SFQ_DEPTH].prev = i + SFQ_DEPTH;
 	}
+
 	q->limit = SFQ_DEPTH - 1;
 	q->max_depth = 0;
 	q->tail = SFQ_DEPTH;
@@ -449,7 +456,8 @@ static int sfq_init(struct Qdisc *sch, struct rtattr *opt)
 		if (err)
 			return err;
 	}
-	for (i=0; i<SFQ_DEPTH; i++)
+
+	for (i = 0; i < SFQ_DEPTH; i++)
 		sfq_link(q, i);
 	return 0;
 }
@@ -467,7 +475,7 @@ static int sfq_dump(struct Qdisc *sch, struct sk_buff *skb)
 	struct tc_sfq_qopt opt;
 
 	opt.quantum = q->quantum;
-	opt.perturb_period = q->perturb_period/HZ;
+	opt.perturb_period = q->perturb_period / HZ;
 
 	opt.limit = q->limit;
 	opt.divisor = SFQ_HASH_DIVISOR;
-- 
cgit v1.2.3


From 58f4df423ee3e7ee33022d84bbd69561b03344a9 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Mon, 21 Jan 2008 00:11:01 -0800
Subject: [NET_SCHED]: sch_ingress: formatting fixes

Signed-off-by: Patrick McHardy <kaber@trash.net>
Acked-by: Jamal Hadi Salim <hadi@cyberus.ca>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/sch_ingress.c | 103 ++++++++++++++++++------------------------------
 1 file changed, 39 insertions(+), 64 deletions(-)

(limited to 'net')

diff --git a/net/sched/sch_ingress.c b/net/sched/sch_ingress.c
index 89c32a9bcc5..4880d44ee97 100644
--- a/net/sched/sch_ingress.c
+++ b/net/sched/sch_ingress.c
@@ -22,23 +22,20 @@
 #undef DEBUG_INGRESS
 
 #ifdef DEBUG_INGRESS  /* control */
-#define DPRINTK(format,args...) printk(KERN_DEBUG format,##args)
+#define DPRINTK(format, args...) printk(KERN_DEBUG format,##args)
 #else
-#define DPRINTK(format,args...)
+#define DPRINTK(format, args...)
 #endif
 
 #if 0  /* data */
-#define D2PRINTK(format,args...) printk(KERN_DEBUG format,##args)
+#define D2PRINTK(format, args...) printk(KERN_DEBUG format,##args)
 #else
-#define D2PRINTK(format,args...)
+#define D2PRINTK(format, args...)
 #endif
 
-
 #define PRIV(sch) qdisc_priv(sch)
 
-
-/* Thanks to Doron Oz for this hack
-*/
+/* Thanks to Doron Oz for this hack */
 #ifndef CONFIG_NET_CLS_ACT
 #ifdef CONFIG_NETFILTER
 static int nf_registered;
@@ -50,12 +47,10 @@ struct ingress_qdisc_data {
 	struct tcf_proto	*filter_list;
 };
 
-
 /* ------------------------- Class/flow operations ------------------------- */
 
-
-static int ingress_graft(struct Qdisc *sch,unsigned long arg,
-    struct Qdisc *new,struct Qdisc **old)
+static int ingress_graft(struct Qdisc *sch, unsigned long arg,
+			 struct Qdisc *new, struct Qdisc **old)
 {
 #ifdef DEBUG_INGRESS
 	struct ingress_qdisc_data *p = PRIV(sch);
@@ -67,37 +62,33 @@ static int ingress_graft(struct Qdisc *sch,unsigned long arg,
 	return 1;
 }
 
-
 static struct Qdisc *ingress_leaf(struct Qdisc *sch, unsigned long arg)
 {
 	return NULL;
 }
 
-
-static unsigned long ingress_get(struct Qdisc *sch,u32 classid)
+static unsigned long ingress_get(struct Qdisc *sch, u32 classid)
 {
 #ifdef DEBUG_INGRESS
 	struct ingress_qdisc_data *p = PRIV(sch);
 #endif
-	DPRINTK("ingress_get(sch %p,[qdisc %p],classid %x)\n", sch, p, classid);
+	DPRINTK("ingress_get(sch %p,[qdisc %p],classid %x)\n",
+		sch, p, classid);
 	return TC_H_MIN(classid) + 1;
 }
 
-
 static unsigned long ingress_bind_filter(struct Qdisc *sch,
-    unsigned long parent, u32 classid)
+					 unsigned long parent, u32 classid)
 {
 	return ingress_get(sch, classid);
 }
 
-
 static void ingress_put(struct Qdisc *sch, unsigned long cl)
 {
 }
 
-
 static int ingress_change(struct Qdisc *sch, u32 classid, u32 parent,
-    struct rtattr **tca, unsigned long *arg)
+			  struct rtattr **tca, unsigned long *arg)
 {
 #ifdef DEBUG_INGRESS
 	struct ingress_qdisc_data *p = PRIV(sch);
@@ -108,9 +99,7 @@ static int ingress_change(struct Qdisc *sch, u32 classid, u32 parent,
 	return 0;
 }
 
-
-
-static void ingress_walk(struct Qdisc *sch,struct qdisc_walker *walker)
+static void ingress_walk(struct Qdisc *sch, struct qdisc_walker *walker)
 {
 #ifdef DEBUG_INGRESS
 	struct ingress_qdisc_data *p = PRIV(sch);
@@ -119,19 +108,16 @@ static void ingress_walk(struct Qdisc *sch,struct qdisc_walker *walker)
 	DPRINTK("No effect. sch_ingress doesn't maintain classes at the moment");
 }
 
-
-static struct tcf_proto **ingress_find_tcf(struct Qdisc *sch,unsigned long cl)
+static struct tcf_proto **ingress_find_tcf(struct Qdisc *sch, unsigned long cl)
 {
 	struct ingress_qdisc_data *p = PRIV(sch);
 
 	return &p->filter_list;
 }
 
-
 /* --------------------------- Qdisc operations ---------------------------- */
 
-
-static int ingress_enqueue(struct sk_buff *skb,struct Qdisc *sch)
+static int ingress_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 {
 	struct ingress_qdisc_data *p = PRIV(sch);
 	struct tcf_result res;
@@ -148,20 +134,20 @@ static int ingress_enqueue(struct sk_buff *skb,struct Qdisc *sch)
 	sch->bstats.packets++;
 	sch->bstats.bytes += skb->len;
 	switch (result) {
-		case TC_ACT_SHOT:
-			result = TC_ACT_SHOT;
-			sch->qstats.drops++;
-			break;
-		case TC_ACT_STOLEN:
-		case TC_ACT_QUEUED:
-			result = TC_ACT_STOLEN;
-			break;
-		case TC_ACT_RECLASSIFY:
-		case TC_ACT_OK:
-			skb->tc_index = TC_H_MIN(res.classid);
-		default:
-			result = TC_ACT_OK;
-			break;
+	case TC_ACT_SHOT:
+		result = TC_ACT_SHOT;
+		sch->qstats.drops++;
+		break;
+	case TC_ACT_STOLEN:
+	case TC_ACT_QUEUED:
+		result = TC_ACT_STOLEN;
+		break;
+	case TC_ACT_RECLASSIFY:
+	case TC_ACT_OK:
+		skb->tc_index = TC_H_MIN(res.classid);
+	default:
+		result = TC_ACT_OK;
+		break;
 	}
 #else
 	D2PRINTK("Overriding result to ACCEPT\n");
@@ -173,7 +159,6 @@ static int ingress_enqueue(struct sk_buff *skb,struct Qdisc *sch)
 	return result;
 }
 
-
 static struct sk_buff *ingress_dequeue(struct Qdisc *sch)
 {
 /*
@@ -183,8 +168,7 @@ static struct sk_buff *ingress_dequeue(struct Qdisc *sch)
 	return NULL;
 }
 
-
-static int ingress_requeue(struct sk_buff *skb,struct Qdisc *sch)
+static int ingress_requeue(struct sk_buff *skb, struct Qdisc *sch)
 {
 /*
 	struct ingress_qdisc_data *p = PRIV(sch);
@@ -204,8 +188,7 @@ static unsigned int ingress_drop(struct Qdisc *sch)
 
 #ifndef CONFIG_NET_CLS_ACT
 #ifdef CONFIG_NETFILTER
-static unsigned int
-ing_hook(unsigned int hook, struct sk_buff *skb,
+static unsigned int ing_hook(unsigned int hook, struct sk_buff *skb,
 			     const struct net_device *indev,
 			     const struct net_device *outdev,
 			     int (*okfn)(struct sk_buff *))
@@ -213,7 +196,7 @@ ing_hook(unsigned int hook, struct sk_buff *skb,
 
 	struct Qdisc *q;
 	struct net_device *dev = skb->dev;
-	int fwres=NF_ACCEPT;
+	int fwres = NF_ACCEPT;
 
 	DPRINTK("ing_hook: skb %s dev=%s len=%u\n",
 		skb->sk ? "(owned)" : "(unowned)",
@@ -247,16 +230,15 @@ static struct nf_hook_ops ing_ops[] __read_mostly = {
 		.priority       = NF_IP6_PRI_FILTER + 1,
 	},
 };
-
 #endif
 #endif
 
-static int ingress_init(struct Qdisc *sch,struct rtattr *opt)
+static int ingress_init(struct Qdisc *sch, struct rtattr *opt)
 {
 	struct ingress_qdisc_data *p = PRIV(sch);
 
-/* Make sure either netfilter or preferably CLS_ACT is
-* compiled in */
+	/* Make sure either netfilter or preferably CLS_ACT is
+	 * compiled in */
 #ifndef CONFIG_NET_CLS_ACT
 #ifndef CONFIG_NETFILTER
 	printk("You MUST compile classifier actions into the kernel\n");
@@ -278,12 +260,11 @@ static int ingress_init(struct Qdisc *sch,struct rtattr *opt)
 #endif
 #endif
 
-	DPRINTK("ingress_init(sch %p,[qdisc %p],opt %p)\n",sch,p,opt);
+	DPRINTK("ingress_init(sch %p,[qdisc %p],opt %p)\n", sch, p, opt);
 	p->q = &noop_qdisc;
 	return 0;
 }
 
-
 static void ingress_reset(struct Qdisc *sch)
 {
 	struct ingress_qdisc_data *p = PRIV(sch);
@@ -300,9 +281,6 @@ static void ingress_reset(struct Qdisc *sch)
 */
 }
 
-/* ------------------------------------------------------------- */
-
-
 /* ------------------------------------------------------------- */
 
 static void ingress_destroy(struct Qdisc *sch)
@@ -317,13 +295,12 @@ static void ingress_destroy(struct Qdisc *sch)
 #endif
 }
 
-
 static int ingress_dump(struct Qdisc *sch, struct sk_buff *skb)
 {
 	unsigned char *b = skb_tail_pointer(skb);
 	struct rtattr *rta;
 
-	rta = (struct rtattr *) b;
+	rta = (struct rtattr *)b;
 	RTA_PUT(skb, TCA_OPTIONS, 0, NULL);
 	rta->rta_len = skb_tail_pointer(skb) - b;
 	return skb->len;
@@ -339,16 +316,13 @@ static const struct Qdisc_class_ops ingress_class_ops = {
 	.get		=	ingress_get,
 	.put		=	ingress_put,
 	.change		=	ingress_change,
-	.delete		=	NULL,
 	.walk		=	ingress_walk,
 	.tcf_chain	=	ingress_find_tcf,
 	.bind_tcf	=	ingress_bind_filter,
 	.unbind_tcf	=	ingress_put,
-	.dump		=	NULL,
 };
 
 static struct Qdisc_ops ingress_qdisc_ops __read_mostly = {
-	.next		=	NULL,
 	.cl_ops		=	&ingress_class_ops,
 	.id		=	"ingress",
 	.priv_size	=	sizeof(struct ingress_qdisc_data),
@@ -359,7 +333,6 @@ static struct Qdisc_ops ingress_qdisc_ops __read_mostly = {
 	.init		=	ingress_init,
 	.reset		=	ingress_reset,
 	.destroy	=	ingress_destroy,
-	.change		=	NULL,
 	.dump		=	ingress_dump,
 	.owner		=	THIS_MODULE,
 };
@@ -375,6 +348,7 @@ static int __init ingress_module_init(void)
 
 	return ret;
 }
+
 static void __exit ingress_module_exit(void)
 {
 	unregister_qdisc(&ingress_qdisc_ops);
@@ -385,6 +359,7 @@ static void __exit ingress_module_exit(void)
 #endif
 #endif
 }
+
 module_init(ingress_module_init)
 module_exit(ingress_module_exit)
 MODULE_LICENSE("GPL");
-- 
cgit v1.2.3


From a47812211bb38c6754a5a6a952ab406e711cc6e0 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Mon, 21 Jan 2008 00:11:21 -0800
Subject: [NET_SCHED]: sch_ingress: remove excessive debugging

Remove excessive debugging statements and some "future use" stuff.

Signed-off-by: Patrick McHardy <kaber@trash.net>
Acked-by: Jamal Hadi Salim <hadi@cyberus.ca>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/sch_ingress.c | 79 ++-----------------------------------------------
 1 file changed, 3 insertions(+), 76 deletions(-)

(limited to 'net')

diff --git a/net/sched/sch_ingress.c b/net/sched/sch_ingress.c
index 4880d44ee97..bdda28d9ebf 100644
--- a/net/sched/sch_ingress.c
+++ b/net/sched/sch_ingress.c
@@ -19,20 +19,6 @@
 #include <net/pkt_sched.h>
 
 
-#undef DEBUG_INGRESS
-
-#ifdef DEBUG_INGRESS  /* control */
-#define DPRINTK(format, args...) printk(KERN_DEBUG format,##args)
-#else
-#define DPRINTK(format, args...)
-#endif
-
-#if 0  /* data */
-#define D2PRINTK(format, args...) printk(KERN_DEBUG format,##args)
-#else
-#define D2PRINTK(format, args...)
-#endif
-
 #define PRIV(sch) qdisc_priv(sch)
 
 /* Thanks to Doron Oz for this hack */
@@ -52,13 +38,6 @@ struct ingress_qdisc_data {
 static int ingress_graft(struct Qdisc *sch, unsigned long arg,
 			 struct Qdisc *new, struct Qdisc **old)
 {
-#ifdef DEBUG_INGRESS
-	struct ingress_qdisc_data *p = PRIV(sch);
-#endif
-
-	DPRINTK("ingress_graft(sch %p,[qdisc %p],new %p,old %p)\n",
-		sch, p, new, old);
-	DPRINTK("\n ingress_graft: You cannot add qdiscs to classes");
 	return 1;
 }
 
@@ -69,11 +48,6 @@ static struct Qdisc *ingress_leaf(struct Qdisc *sch, unsigned long arg)
 
 static unsigned long ingress_get(struct Qdisc *sch, u32 classid)
 {
-#ifdef DEBUG_INGRESS
-	struct ingress_qdisc_data *p = PRIV(sch);
-#endif
-	DPRINTK("ingress_get(sch %p,[qdisc %p],classid %x)\n",
-		sch, p, classid);
 	return TC_H_MIN(classid) + 1;
 }
 
@@ -90,22 +64,12 @@ static void ingress_put(struct Qdisc *sch, unsigned long cl)
 static int ingress_change(struct Qdisc *sch, u32 classid, u32 parent,
 			  struct rtattr **tca, unsigned long *arg)
 {
-#ifdef DEBUG_INGRESS
-	struct ingress_qdisc_data *p = PRIV(sch);
-#endif
-	DPRINTK("ingress_change(sch %p,[qdisc %p],classid %x,parent %x),"
-		"arg 0x%lx\n", sch, p, classid, parent, *arg);
-	DPRINTK("No effect. sch_ingress doesn't maintain classes at the moment");
 	return 0;
 }
 
 static void ingress_walk(struct Qdisc *sch, struct qdisc_walker *walker)
 {
-#ifdef DEBUG_INGRESS
-	struct ingress_qdisc_data *p = PRIV(sch);
-#endif
-	DPRINTK("ingress_walk(sch %p,[qdisc %p],walker %p)\n", sch, p, walker);
-	DPRINTK("No effect. sch_ingress doesn't maintain classes at the moment");
+	return;
 }
 
 static struct tcf_proto **ingress_find_tcf(struct Qdisc *sch, unsigned long cl)
@@ -123,9 +87,8 @@ static int ingress_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 	struct tcf_result res;
 	int result;
 
-	D2PRINTK("ingress_enqueue(skb %p,sch %p,[qdisc %p])\n", skb, sch, p);
 	result = tc_classify(skb, p->filter_list, &res);
-	D2PRINTK("result %d class 0x%04x\n", result, res.classid);
+
 	/*
 	 * Unlike normal "enqueue" functions, ingress_enqueue returns a
 	 * firewall FW_* code.
@@ -150,7 +113,6 @@ static int ingress_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 		break;
 	}
 #else
-	D2PRINTK("Overriding result to ACCEPT\n");
 	result = NF_ACCEPT;
 	sch->bstats.packets++;
 	sch->bstats.bytes += skb->len;
@@ -161,28 +123,16 @@ static int ingress_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 
 static struct sk_buff *ingress_dequeue(struct Qdisc *sch)
 {
-/*
-	struct ingress_qdisc_data *p = PRIV(sch);
-	D2PRINTK("ingress_dequeue(sch %p,[qdisc %p])\n",sch,PRIV(p));
-*/
 	return NULL;
 }
 
 static int ingress_requeue(struct sk_buff *skb, struct Qdisc *sch)
 {
-/*
-	struct ingress_qdisc_data *p = PRIV(sch);
-	D2PRINTK("ingress_requeue(skb %p,sch %p,[qdisc %p])\n",skb,sch,PRIV(p));
-*/
 	return 0;
 }
 
 static unsigned int ingress_drop(struct Qdisc *sch)
 {
-#ifdef DEBUG_INGRESS
-	struct ingress_qdisc_data *p = PRIV(sch);
-#endif
-	DPRINTK("ingress_drop(sch %p,[qdisc %p])\n", sch, p);
 	return 0;
 }
 
@@ -198,11 +148,6 @@ static unsigned int ing_hook(unsigned int hook, struct sk_buff *skb,
 	struct net_device *dev = skb->dev;
 	int fwres = NF_ACCEPT;
 
-	DPRINTK("ing_hook: skb %s dev=%s len=%u\n",
-		skb->sk ? "(owned)" : "(unowned)",
-		skb->dev ? skb->dev->name : "(no dev)",
-		skb->len);
-
 	if (dev->qdisc_ingress) {
 		spin_lock(&dev->ingress_lock);
 		if ((q = dev->qdisc_ingress) != NULL)
@@ -259,26 +204,13 @@ static int ingress_init(struct Qdisc *sch, struct rtattr *opt)
 	}
 #endif
 #endif
-
-	DPRINTK("ingress_init(sch %p,[qdisc %p],opt %p)\n", sch, p, opt);
 	p->q = &noop_qdisc;
 	return 0;
 }
 
 static void ingress_reset(struct Qdisc *sch)
 {
-	struct ingress_qdisc_data *p = PRIV(sch);
-
-	DPRINTK("ingress_reset(sch %p,[qdisc %p])\n", sch, p);
-
-/*
-#if 0
-*/
-/* for future use */
-	qdisc_reset(p->q);
-/*
-#endif
-*/
+	return;
 }
 
 /* ------------------------------------------------------------- */
@@ -287,12 +219,7 @@ static void ingress_destroy(struct Qdisc *sch)
 {
 	struct ingress_qdisc_data *p = PRIV(sch);
 
-	DPRINTK("ingress_destroy(sch %p,[qdisc %p])\n", sch, p);
 	tcf_destroy_chain(p->filter_list);
-#if 0
-/* for future use */
-	qdisc_destroy(p->q);
-#endif
 }
 
 static int ingress_dump(struct Qdisc *sch, struct sk_buff *skb)
-- 
cgit v1.2.3


From cb53c048912976d1d81b8f3475463788f462eebd Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Mon, 21 Jan 2008 00:11:48 -0800
Subject: [NET_SCHED]: sch_ingress: remove qdisc_priv() wrapper

Signed-off-by: Patrick McHardy <kaber@trash.net>
Acked-by: Jamal Hadi Salim <hadi@cyberus.ca>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/sch_ingress.c | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

(limited to 'net')

diff --git a/net/sched/sch_ingress.c b/net/sched/sch_ingress.c
index bdda28d9ebf..cb8ba8b1ad3 100644
--- a/net/sched/sch_ingress.c
+++ b/net/sched/sch_ingress.c
@@ -19,8 +19,6 @@
 #include <net/pkt_sched.h>
 
 
-#define PRIV(sch) qdisc_priv(sch)
-
 /* Thanks to Doron Oz for this hack */
 #ifndef CONFIG_NET_CLS_ACT
 #ifdef CONFIG_NETFILTER
@@ -74,7 +72,7 @@ static void ingress_walk(struct Qdisc *sch, struct qdisc_walker *walker)
 
 static struct tcf_proto **ingress_find_tcf(struct Qdisc *sch, unsigned long cl)
 {
-	struct ingress_qdisc_data *p = PRIV(sch);
+	struct ingress_qdisc_data *p = qdisc_priv(sch);
 
 	return &p->filter_list;
 }
@@ -83,7 +81,7 @@ static struct tcf_proto **ingress_find_tcf(struct Qdisc *sch, unsigned long cl)
 
 static int ingress_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 {
-	struct ingress_qdisc_data *p = PRIV(sch);
+	struct ingress_qdisc_data *p = qdisc_priv(sch);
 	struct tcf_result res;
 	int result;
 
@@ -180,7 +178,7 @@ static struct nf_hook_ops ing_ops[] __read_mostly = {
 
 static int ingress_init(struct Qdisc *sch, struct rtattr *opt)
 {
-	struct ingress_qdisc_data *p = PRIV(sch);
+	struct ingress_qdisc_data *p = qdisc_priv(sch);
 
 	/* Make sure either netfilter or preferably CLS_ACT is
 	 * compiled in */
@@ -217,7 +215,7 @@ static void ingress_reset(struct Qdisc *sch)
 
 static void ingress_destroy(struct Qdisc *sch)
 {
-	struct ingress_qdisc_data *p = PRIV(sch);
+	struct ingress_qdisc_data *p = qdisc_priv(sch);
 
 	tcf_destroy_chain(p->filter_list);
 }
-- 
cgit v1.2.3


From c21d4d5dd27329c30c5129d5dbc4942456af361c Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Mon, 21 Jan 2008 00:12:10 -0800
Subject: [NET_SCHED]: sch_ingress: remove unused inner qdisc

Signed-off-by: Patrick McHardy <kaber@trash.net>
Acked-by: Jamal Hadi Salim <hadi@cyberus.ca>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/sch_ingress.c | 4 ----
 1 file changed, 4 deletions(-)

(limited to 'net')

diff --git a/net/sched/sch_ingress.c b/net/sched/sch_ingress.c
index cb8ba8b1ad3..d803cd13762 100644
--- a/net/sched/sch_ingress.c
+++ b/net/sched/sch_ingress.c
@@ -27,7 +27,6 @@ static int nf_registered;
 #endif
 
 struct ingress_qdisc_data {
-	struct Qdisc		*q;
 	struct tcf_proto	*filter_list;
 };
 
@@ -178,8 +177,6 @@ static struct nf_hook_ops ing_ops[] __read_mostly = {
 
 static int ingress_init(struct Qdisc *sch, struct rtattr *opt)
 {
-	struct ingress_qdisc_data *p = qdisc_priv(sch);
-
 	/* Make sure either netfilter or preferably CLS_ACT is
 	 * compiled in */
 #ifndef CONFIG_NET_CLS_ACT
@@ -202,7 +199,6 @@ static int ingress_init(struct Qdisc *sch, struct rtattr *opt)
 	}
 #endif
 #endif
-	p->q = &noop_qdisc;
 	return 0;
 }
 
-- 
cgit v1.2.3


From e037834758af82a24c4d390b93e1ce5e280a551a Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Mon, 21 Jan 2008 00:12:32 -0800
Subject: [NET_SCHED]: sch_ingress: return proper error code in ingress_graft()

Signed-off-by: Patrick McHardy <kaber@trash.net>
Acked-by: Jamal Hadi Salim <hadi@cyberus.ca>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/sch_ingress.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/sched/sch_ingress.c b/net/sched/sch_ingress.c
index d803cd13762..1bbc648f06e 100644
--- a/net/sched/sch_ingress.c
+++ b/net/sched/sch_ingress.c
@@ -35,7 +35,7 @@ struct ingress_qdisc_data {
 static int ingress_graft(struct Qdisc *sch, unsigned long arg,
 			 struct Qdisc *new, struct Qdisc **old)
 {
-	return 1;
+	return -EOPNOTSUPP;
 }
 
 static struct Qdisc *ingress_leaf(struct Qdisc *sch, unsigned long arg)
-- 
cgit v1.2.3


From c6ee877f2e4c4d44c934528b4cec6c8d7f54d3ad Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Mon, 21 Jan 2008 00:12:53 -0800
Subject: [NET_SCHED]: sch_ingress: remove unnecessary ops

- ->reset is optional
- sch_api provides identical defaults for ->dequeue/->requeue
- ->drop can't happen since ingress never has a parent qdisc

Signed-off-by: Patrick McHardy <kaber@trash.net>
Acked-by: Jamal Hadi Salim <hadi@cyberus.ca>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/sch_ingress.c | 24 ------------------------
 1 file changed, 24 deletions(-)

(limited to 'net')

diff --git a/net/sched/sch_ingress.c b/net/sched/sch_ingress.c
index 1bbc648f06e..c69e7bc7fb6 100644
--- a/net/sched/sch_ingress.c
+++ b/net/sched/sch_ingress.c
@@ -118,21 +118,6 @@ static int ingress_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 	return result;
 }
 
-static struct sk_buff *ingress_dequeue(struct Qdisc *sch)
-{
-	return NULL;
-}
-
-static int ingress_requeue(struct sk_buff *skb, struct Qdisc *sch)
-{
-	return 0;
-}
-
-static unsigned int ingress_drop(struct Qdisc *sch)
-{
-	return 0;
-}
-
 #ifndef CONFIG_NET_CLS_ACT
 #ifdef CONFIG_NETFILTER
 static unsigned int ing_hook(unsigned int hook, struct sk_buff *skb,
@@ -202,11 +187,6 @@ static int ingress_init(struct Qdisc *sch, struct rtattr *opt)
 	return 0;
 }
 
-static void ingress_reset(struct Qdisc *sch)
-{
-	return;
-}
-
 /* ------------------------------------------------------------- */
 
 static void ingress_destroy(struct Qdisc *sch)
@@ -248,11 +228,7 @@ static struct Qdisc_ops ingress_qdisc_ops __read_mostly = {
 	.id		=	"ingress",
 	.priv_size	=	sizeof(struct ingress_qdisc_data),
 	.enqueue	=	ingress_enqueue,
-	.dequeue	=	ingress_dequeue,
-	.requeue	=	ingress_requeue,
-	.drop		=	ingress_drop,
 	.init		=	ingress_init,
-	.reset		=	ingress_reset,
 	.destroy	=	ingress_destroy,
 	.dump		=	ingress_dump,
 	.owner		=	THIS_MODULE,
-- 
cgit v1.2.3


From 645a1e39e4e3e84a275c5e4a7c8049041146f9b5 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Mon, 21 Jan 2008 00:13:19 -0800
Subject: [NET_SCHED]: sch_ingress: move dependencies to Kconfig

Instead of complaining at scheduler initialization time, check the
dependencies in Kconfig.

Signed-off-by: Patrick McHardy <kaber@trash.net>
Acked-by: Jamal Hadi Salim <hadi@cyberus.ca>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/Kconfig       |  1 +
 net/sched/sch_ingress.c | 11 +----------
 2 files changed, 2 insertions(+), 10 deletions(-)

(limited to 'net')

diff --git a/net/sched/Kconfig b/net/sched/Kconfig
index f5ab54b1edd..554248e0fc4 100644
--- a/net/sched/Kconfig
+++ b/net/sched/Kconfig
@@ -198,6 +198,7 @@ config NET_SCH_NETEM
 
 config NET_SCH_INGRESS
 	tristate "Ingress Qdisc"
+	depends on NET_CLS_ACT || NETFILTER
 	---help---
 	  Say Y here if you want to use classifiers for incoming packets.
 	  If unsure, say Y.
diff --git a/net/sched/sch_ingress.c b/net/sched/sch_ingress.c
index c69e7bc7fb6..b30ca01bdc0 100644
--- a/net/sched/sch_ingress.c
+++ b/net/sched/sch_ingress.c
@@ -162,19 +162,10 @@ static struct nf_hook_ops ing_ops[] __read_mostly = {
 
 static int ingress_init(struct Qdisc *sch, struct rtattr *opt)
 {
-	/* Make sure either netfilter or preferably CLS_ACT is
-	 * compiled in */
 #ifndef CONFIG_NET_CLS_ACT
-#ifndef CONFIG_NETFILTER
-	printk("You MUST compile classifier actions into the kernel\n");
-	return -EINVAL;
-#else
+#ifdef CONFIG_NETFILTER
 	printk("Ingress scheduler: Classifier actions prefered over netfilter\n");
-#endif
-#endif
 
-#ifndef CONFIG_NET_CLS_ACT
-#ifdef CONFIG_NETFILTER
 	if (!nf_registered) {
 		if (nf_register_hooks(ing_ops, ARRAY_SIZE(ing_ops)) < 0) {
 			printk("ingress qdisc registration error \n");
-- 
cgit v1.2.3


From 13893567358a8426d03ac3c613befc55431f23ce Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Mon, 21 Jan 2008 00:13:44 -0800
Subject: [NET_SCHED]: sch_ingress: avoid a few #ifdefs

Move the repeating "ifndef CONFIG_NET_CLS_ACT/ifdef CONFIG_NETFILTER"
ifdefs into a single condition.

Signed-off-by: Patrick McHardy <kaber@trash.net>
Acked-by: Jamal Hadi Salim <hadi@cyberus.ca>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/sch_ingress.c | 16 ++++------------
 1 file changed, 4 insertions(+), 12 deletions(-)

(limited to 'net')

diff --git a/net/sched/sch_ingress.c b/net/sched/sch_ingress.c
index b30ca01bdc0..4c7f7e71291 100644
--- a/net/sched/sch_ingress.c
+++ b/net/sched/sch_ingress.c
@@ -20,11 +20,9 @@
 
 
 /* Thanks to Doron Oz for this hack */
-#ifndef CONFIG_NET_CLS_ACT
-#ifdef CONFIG_NETFILTER
+#if !defined(CONFIG_NET_CLS_ACT) && defined(CONFIG_NETFILTER)
 static int nf_registered;
 #endif
-#endif
 
 struct ingress_qdisc_data {
 	struct tcf_proto	*filter_list;
@@ -118,8 +116,7 @@ static int ingress_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 	return result;
 }
 
-#ifndef CONFIG_NET_CLS_ACT
-#ifdef CONFIG_NETFILTER
+#if !defined(CONFIG_NET_CLS_ACT) && defined(CONFIG_NETFILTER)
 static unsigned int ing_hook(unsigned int hook, struct sk_buff *skb,
 			     const struct net_device *indev,
 			     const struct net_device *outdev,
@@ -158,12 +155,10 @@ static struct nf_hook_ops ing_ops[] __read_mostly = {
 	},
 };
 #endif
-#endif
 
 static int ingress_init(struct Qdisc *sch, struct rtattr *opt)
 {
-#ifndef CONFIG_NET_CLS_ACT
-#ifdef CONFIG_NETFILTER
+#if !defined(CONFIG_NET_CLS_ACT) && defined(CONFIG_NETFILTER)
 	printk("Ingress scheduler: Classifier actions prefered over netfilter\n");
 
 	if (!nf_registered) {
@@ -173,7 +168,6 @@ static int ingress_init(struct Qdisc *sch, struct rtattr *opt)
 		}
 		nf_registered++;
 	}
-#endif
 #endif
 	return 0;
 }
@@ -240,12 +234,10 @@ static int __init ingress_module_init(void)
 static void __exit ingress_module_exit(void)
 {
 	unregister_qdisc(&ingress_qdisc_ops);
-#ifndef CONFIG_NET_CLS_ACT
-#ifdef CONFIG_NETFILTER
+#if !defined(CONFIG_NET_CLS_ACT) && defined(CONFIG_NETFILTER)
 	if (nf_registered)
 		nf_unregister_hooks(ing_ops, ARRAY_SIZE(ing_ops));
 #endif
-#endif
 }
 
 module_init(ingress_module_init)
-- 
cgit v1.2.3


From 891687649a5c44a1d53668b4f7728bf97c8db8d5 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Mon, 21 Jan 2008 00:14:05 -0800
Subject: [NET_SCHED]: sch_ingress: remove useless printk

The printk about ingress qdisc registration error can't be triggered
under normal circumstances. Since register_qdisc only fails for two
identical registrations, the only way to trigger it is by loading the
sch_ingress modules multiple times under different names, in which
case we already return -EEXIST to userspace.

Signed-off-by: Patrick McHardy <kaber@trash.net>
Acked-by: Jamal Hadi Salim <hadi@cyberus.ca>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/sch_ingress.c | 9 +--------
 1 file changed, 1 insertion(+), 8 deletions(-)

(limited to 'net')

diff --git a/net/sched/sch_ingress.c b/net/sched/sch_ingress.c
index 4c7f7e71291..72525710b66 100644
--- a/net/sched/sch_ingress.c
+++ b/net/sched/sch_ingress.c
@@ -221,14 +221,7 @@ static struct Qdisc_ops ingress_qdisc_ops __read_mostly = {
 
 static int __init ingress_module_init(void)
 {
-	int ret = 0;
-
-	if ((ret = register_qdisc(&ingress_qdisc_ops)) < 0) {
-		printk("Unable to register Ingress qdisc\n");
-		return ret;
-	}
-
-	return ret;
+	return register_qdisc(&ingress_qdisc_ops);
 }
 
 static void __exit ingress_module_exit(void)
-- 
cgit v1.2.3


From b7a4a83629c1ddde8c2e6a872618c66577cb20f0 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Mon, 21 Jan 2008 00:19:16 -0800
Subject: [VLAN]: Kill useless VLAN_NAME define

The only user already includes __FUNCTION__ (vlan_proto_init) in the
output, which is enough to identify what the message is about.

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/8021q/vlan.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c
index 032bf44eca5..af252556942 100644
--- a/net/8021q/vlan.c
+++ b/net/8021q/vlan.c
@@ -89,8 +89,8 @@ static int __init vlan_proto_init(void)
 	err = vlan_proc_init();
 	if (err < 0) {
 		printk(KERN_ERR
-		       "%s %s: can't create entry in proc filesystem!\n",
-		       __FUNCTION__, VLAN_NAME);
+		       "%s: can't create entry in proc filesystem!\n",
+		       __FUNCTION__);
 		return err;
 	}
 
-- 
cgit v1.2.3


From 7bd38d778e3f2250e96fc277040879d66c30ecb4 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Mon, 21 Jan 2008 00:19:31 -0800
Subject: [VLAN]: Use dev->stats

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/8021q/vlan.c     | 2 --
 net/8021q/vlan_dev.c | 8 ++++----
 net/8021q/vlanproc.c | 5 +----
 3 files changed, 5 insertions(+), 10 deletions(-)

(limited to 'net')

diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c
index af252556942..54f23469641 100644
--- a/net/8021q/vlan.c
+++ b/net/8021q/vlan.c
@@ -366,8 +366,6 @@ void vlan_setup(struct net_device *new_dev)
 	 * the global list.
 	 * iflink is set as well.
 	 */
-	new_dev->get_stats = vlan_dev_get_stats;
-
 	/* Make this thing known as a VLAN device */
 	new_dev->priv_flags |= IFF_802_1Q_VLAN;
 
diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c
index 4f99bb86af5..9543e918279 100644
--- a/net/8021q/vlan_dev.c
+++ b/net/8021q/vlan_dev.c
@@ -174,7 +174,7 @@ int vlan_skb_recv(struct sk_buff *skb, struct net_device *dev,
 	skb->dev->last_rx = jiffies;
 
 	/* Bump the rx counters for the VLAN device. */
-	stats = vlan_dev_get_stats(skb->dev);
+	stats = &skb->dev->stats;
 	stats->rx_packets++;
 	stats->rx_bytes += skb->len;
 
@@ -422,7 +422,7 @@ int vlan_dev_hard_header(struct sk_buff *skb, struct net_device *dev,
 		skb = skb_realloc_headroom(sk_tmp, dev->hard_header_len);
 		kfree_skb(sk_tmp);
 		if (skb == NULL) {
-			struct net_device_stats *stats = vlan_dev_get_stats(vdev);
+			struct net_device_stats *stats = &vdev->stats;
 			stats->tx_dropped++;
 			return -ENOMEM;
 		}
@@ -453,7 +453,7 @@ int vlan_dev_hard_header(struct sk_buff *skb, struct net_device *dev,
 
 int vlan_dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev)
 {
-	struct net_device_stats *stats = vlan_dev_get_stats(dev);
+	struct net_device_stats *stats = &dev->stats;
 	struct vlan_ethhdr *veth = (struct vlan_ethhdr *)(skb->data);
 
 	/* Handle non-VLAN frames if they are sent to us, for example by DHCP.
@@ -514,7 +514,7 @@ int vlan_dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev)
 
 int vlan_dev_hwaccel_hard_start_xmit(struct sk_buff *skb, struct net_device *dev)
 {
-	struct net_device_stats *stats = vlan_dev_get_stats(dev);
+	struct net_device_stats *stats = &dev->stats;
 	unsigned short veth_TCI;
 
 	/* Construct the second two bytes. This field looks something
diff --git a/net/8021q/vlanproc.c b/net/8021q/vlanproc.c
index 6cefdf8e381..1972d5cc34e 100644
--- a/net/8021q/vlanproc.c
+++ b/net/8021q/vlanproc.c
@@ -316,7 +316,7 @@ static int vlandev_seq_show(struct seq_file *seq, void *offset)
 {
 	struct net_device *vlandev = (struct net_device *) seq->private;
 	const struct vlan_dev_info *dev_info = VLAN_DEV_INFO(vlandev);
-	struct net_device_stats *stats;
+	struct net_device_stats *stats = &vlandev->stats;
 	static const char fmt[] = "%30s %12lu\n";
 	int i;
 
@@ -327,9 +327,6 @@ static int vlandev_seq_show(struct seq_file *seq, void *offset)
 		       vlandev->name, dev_info->vlan_id,
 		       (int)(dev_info->flags & 1), vlandev->priv_flags);
 
-
-	stats = vlan_dev_get_stats(vlandev);
-
 	seq_printf(seq, fmt, "total frames received", stats->rx_packets);
 	seq_printf(seq, fmt, "total bytes received", stats->rx_bytes);
 	seq_printf(seq, fmt, "Broadcast/Multicast Rcvd", stats->multicast);
-- 
cgit v1.2.3


From ef3eb3e59bd5723895b2349edec98998198ef5bb Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Mon, 21 Jan 2008 00:22:11 -0800
Subject: [VLAN]: Move device setup to vlan_dev.c

Move device setup to vlan_dev.c and make all the VLAN device methods
static.

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/8021q/vlan.c     |  81 -----------------------------------------
 net/8021q/vlan.h     |  13 -------
 net/8021q/vlan_dev.c | 100 +++++++++++++++++++++++++++++++++++++++++++--------
 3 files changed, 86 insertions(+), 108 deletions(-)

(limited to 'net')

diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c
index 54f23469641..8bc6385a06e 100644
--- a/net/8021q/vlan.c
+++ b/net/8021q/vlan.c
@@ -307,87 +307,6 @@ int unregister_vlan_device(struct net_device *dev)
 	return ret;
 }
 
-/*
- * vlan network devices have devices nesting below it, and are a special
- * "super class" of normal network devices; split their locks off into a
- * separate class since they always nest.
- */
-static struct lock_class_key vlan_netdev_xmit_lock_key;
-
-static const struct header_ops vlan_header_ops = {
-	.create	 = vlan_dev_hard_header,
-	.rebuild = vlan_dev_rebuild_header,
-	.parse	 = eth_header_parse,
-};
-
-static int vlan_dev_init(struct net_device *dev)
-{
-	struct net_device *real_dev = VLAN_DEV_INFO(dev)->real_dev;
-	int subclass = 0;
-
-	/* IFF_BROADCAST|IFF_MULTICAST; ??? */
-	dev->flags  = real_dev->flags & ~IFF_UP;
-	dev->iflink = real_dev->ifindex;
-	dev->state  = (real_dev->state & ((1<<__LINK_STATE_NOCARRIER) |
-					  (1<<__LINK_STATE_DORMANT))) |
-		      (1<<__LINK_STATE_PRESENT);
-
-	/* ipv6 shared card related stuff */
-	dev->dev_id = real_dev->dev_id;
-
-	if (is_zero_ether_addr(dev->dev_addr))
-		memcpy(dev->dev_addr, real_dev->dev_addr, dev->addr_len);
-	if (is_zero_ether_addr(dev->broadcast))
-		memcpy(dev->broadcast, real_dev->broadcast, dev->addr_len);
-
-	if (real_dev->features & NETIF_F_HW_VLAN_TX) {
-		dev->header_ops      = real_dev->header_ops;
-		dev->hard_header_len = real_dev->hard_header_len;
-		dev->hard_start_xmit = vlan_dev_hwaccel_hard_start_xmit;
-	} else {
-		dev->header_ops      = &vlan_header_ops;
-		dev->hard_header_len = real_dev->hard_header_len + VLAN_HLEN;
-		dev->hard_start_xmit = vlan_dev_hard_start_xmit;
-	}
-
-	if (real_dev->priv_flags & IFF_802_1Q_VLAN)
-		subclass = 1;
-
-	lockdep_set_class_and_subclass(&dev->_xmit_lock,
-				&vlan_netdev_xmit_lock_key, subclass);
-	return 0;
-}
-
-void vlan_setup(struct net_device *new_dev)
-{
-	ether_setup(new_dev);
-
-	/* new_dev->ifindex = 0;  it will be set when added to
-	 * the global list.
-	 * iflink is set as well.
-	 */
-	/* Make this thing known as a VLAN device */
-	new_dev->priv_flags |= IFF_802_1Q_VLAN;
-
-	/* Set us up to have no queue, as the underlying Hardware device
-	 * can do all the queueing we could want.
-	 */
-	new_dev->tx_queue_len = 0;
-
-	/* set up method calls */
-	new_dev->change_mtu = vlan_dev_change_mtu;
-	new_dev->init = vlan_dev_init;
-	new_dev->open = vlan_dev_open;
-	new_dev->stop = vlan_dev_stop;
-	new_dev->set_mac_address = vlan_set_mac_address;
-	new_dev->set_multicast_list = vlan_dev_set_multicast_list;
-	new_dev->change_rx_flags = vlan_change_rx_flags;
-	new_dev->destructor = free_netdev;
-	new_dev->do_ioctl = vlan_dev_ioctl;
-
-	memset(new_dev->broadcast, 0, ETH_ALEN);
-}
-
 static void vlan_transfer_operstate(const struct net_device *dev, struct net_device *vlandev)
 {
 	/* Have to respect userspace enforced dormant state
diff --git a/net/8021q/vlan.h b/net/8021q/vlan.h
index 2cd1393073e..7b615d6b6f4 100644
--- a/net/8021q/vlan.h
+++ b/net/8021q/vlan.h
@@ -49,19 +49,8 @@ struct net_device *__find_vlan_dev(struct net_device* real_dev,
 				   unsigned short VID); /* vlan.c */
 
 /* found in vlan_dev.c */
-int vlan_dev_rebuild_header(struct sk_buff *skb);
 int vlan_skb_recv(struct sk_buff *skb, struct net_device *dev,
 		  struct packet_type *ptype, struct net_device *orig_dev);
-int vlan_dev_hard_header(struct sk_buff *skb, struct net_device *dev,
-			 unsigned short type, const void *daddr,
-			 const void *saddr, unsigned len);
-int vlan_dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev);
-int vlan_dev_hwaccel_hard_start_xmit(struct sk_buff *skb, struct net_device *dev);
-int vlan_dev_change_mtu(struct net_device *dev, int new_mtu);
-int vlan_dev_open(struct net_device* dev);
-int vlan_dev_stop(struct net_device* dev);
-int vlan_set_mac_address(struct net_device *dev, void *p);
-int vlan_dev_ioctl(struct net_device* dev, struct ifreq *ifr, int cmd);
 void vlan_dev_set_ingress_priority(const struct net_device *dev,
 				   u32 skb_prio, short vlan_prio);
 int vlan_dev_set_egress_priority(const struct net_device *dev,
@@ -70,8 +59,6 @@ int vlan_dev_set_vlan_flag(const struct net_device *dev,
 			   u32 flag, short flag_val);
 void vlan_dev_get_realdev_name(const struct net_device *dev, char *result);
 void vlan_dev_get_vid(const struct net_device *dev, unsigned short *result);
-void vlan_change_rx_flags(struct net_device *dev, int change);
-void vlan_dev_set_multicast_list(struct net_device *vlan_dev);
 
 int vlan_check_real_dev(struct net_device *real_dev, unsigned short vlan_id);
 void vlan_setup(struct net_device *dev);
diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c
index 9543e918279..51ce4217c03 100644
--- a/net/8021q/vlan_dev.c
+++ b/net/8021q/vlan_dev.c
@@ -47,7 +47,7 @@
  *
  * TODO:  This needs a checkup, I'm ignorant here. --BLG
  */
-int vlan_dev_rebuild_header(struct sk_buff *skb)
+static int vlan_dev_rebuild_header(struct sk_buff *skb)
 {
 	struct net_device *dev = skb->dev;
 	struct vlan_ethhdr *veth = (struct vlan_ethhdr *)(skb->data);
@@ -342,9 +342,10 @@ static inline unsigned short vlan_dev_get_egress_qos_mask(struct net_device* dev
  *  This is called when the SKB is moving down the stack towards the
  *  physical devices.
  */
-int vlan_dev_hard_header(struct sk_buff *skb, struct net_device *dev,
-			 unsigned short type,
-			 const void *daddr, const void *saddr, unsigned len)
+static int vlan_dev_hard_header(struct sk_buff *skb, struct net_device *dev,
+				unsigned short type,
+				const void *daddr, const void *saddr,
+				unsigned int len)
 {
 	struct vlan_hdr *vhdr;
 	unsigned short veth_TCI = 0;
@@ -451,7 +452,7 @@ int vlan_dev_hard_header(struct sk_buff *skb, struct net_device *dev,
 	return rc;
 }
 
-int vlan_dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev)
+static int vlan_dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev)
 {
 	struct net_device_stats *stats = &dev->stats;
 	struct vlan_ethhdr *veth = (struct vlan_ethhdr *)(skb->data);
@@ -512,7 +513,8 @@ int vlan_dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev)
 	return 0;
 }
 
-int vlan_dev_hwaccel_hard_start_xmit(struct sk_buff *skb, struct net_device *dev)
+static int vlan_dev_hwaccel_hard_start_xmit(struct sk_buff *skb,
+					    struct net_device *dev)
 {
 	struct net_device_stats *stats = &dev->stats;
 	unsigned short veth_TCI;
@@ -536,7 +538,7 @@ int vlan_dev_hwaccel_hard_start_xmit(struct sk_buff *skb, struct net_device *dev
 	return 0;
 }
 
-int vlan_dev_change_mtu(struct net_device *dev, int new_mtu)
+static int vlan_dev_change_mtu(struct net_device *dev, int new_mtu)
 {
 	/* TODO: gotta make sure the underlying layer can handle it,
 	 * maybe an IFF_VLAN_CAPABLE flag for devices?
@@ -626,7 +628,7 @@ void vlan_dev_get_vid(const struct net_device *dev, unsigned short *result)
 	*result = VLAN_DEV_INFO(dev)->vlan_id;
 }
 
-int vlan_dev_open(struct net_device *dev)
+static int vlan_dev_open(struct net_device *dev)
 {
 	struct vlan_dev_info *vlan = VLAN_DEV_INFO(dev);
 	struct net_device *real_dev = vlan->real_dev;
@@ -650,7 +652,7 @@ int vlan_dev_open(struct net_device *dev)
 	return 0;
 }
 
-int vlan_dev_stop(struct net_device *dev)
+static int vlan_dev_stop(struct net_device *dev)
 {
 	struct net_device *real_dev = VLAN_DEV_INFO(dev)->real_dev;
 
@@ -666,7 +668,7 @@ int vlan_dev_stop(struct net_device *dev)
 	return 0;
 }
 
-int vlan_set_mac_address(struct net_device *dev, void *p)
+static int vlan_dev_set_mac_address(struct net_device *dev, void *p)
 {
 	struct net_device *real_dev = VLAN_DEV_INFO(dev)->real_dev;
 	struct sockaddr *addr = p;
@@ -692,7 +694,7 @@ out:
 	return 0;
 }
 
-int vlan_dev_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
+static int vlan_dev_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
 {
 	struct net_device *real_dev = VLAN_DEV_INFO(dev)->real_dev;
 	struct ifreq ifrr;
@@ -716,7 +718,7 @@ int vlan_dev_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
 	return err;
 }
 
-void vlan_change_rx_flags(struct net_device *dev, int change)
+static void vlan_dev_change_rx_flags(struct net_device *dev, int change)
 {
 	struct net_device *real_dev = VLAN_DEV_INFO(dev)->real_dev;
 
@@ -726,8 +728,78 @@ void vlan_change_rx_flags(struct net_device *dev, int change)
 		dev_set_promiscuity(real_dev, dev->flags & IFF_PROMISC ? 1 : -1);
 }
 
-/** Taken from Gleb + Lennert's VLAN code, and modified... */
-void vlan_dev_set_multicast_list(struct net_device *vlan_dev)
+static void vlan_dev_set_multicast_list(struct net_device *vlan_dev)
 {
 	dev_mc_sync(VLAN_DEV_INFO(vlan_dev)->real_dev, vlan_dev);
 }
+
+/*
+ * vlan network devices have devices nesting below it, and are a special
+ * "super class" of normal network devices; split their locks off into a
+ * separate class since they always nest.
+ */
+static struct lock_class_key vlan_netdev_xmit_lock_key;
+
+static const struct header_ops vlan_header_ops = {
+	.create	 = vlan_dev_hard_header,
+	.rebuild = vlan_dev_rebuild_header,
+	.parse	 = eth_header_parse,
+};
+
+static int vlan_dev_init(struct net_device *dev)
+{
+	struct net_device *real_dev = VLAN_DEV_INFO(dev)->real_dev;
+	int subclass = 0;
+
+	/* IFF_BROADCAST|IFF_MULTICAST; ??? */
+	dev->flags  = real_dev->flags & ~IFF_UP;
+	dev->iflink = real_dev->ifindex;
+	dev->state  = (real_dev->state & ((1<<__LINK_STATE_NOCARRIER) |
+					  (1<<__LINK_STATE_DORMANT))) |
+		      (1<<__LINK_STATE_PRESENT);
+
+	/* ipv6 shared card related stuff */
+	dev->dev_id = real_dev->dev_id;
+
+	if (is_zero_ether_addr(dev->dev_addr))
+		memcpy(dev->dev_addr, real_dev->dev_addr, dev->addr_len);
+	if (is_zero_ether_addr(dev->broadcast))
+		memcpy(dev->broadcast, real_dev->broadcast, dev->addr_len);
+
+	if (real_dev->features & NETIF_F_HW_VLAN_TX) {
+		dev->header_ops      = real_dev->header_ops;
+		dev->hard_header_len = real_dev->hard_header_len;
+		dev->hard_start_xmit = vlan_dev_hwaccel_hard_start_xmit;
+	} else {
+		dev->header_ops      = &vlan_header_ops;
+		dev->hard_header_len = real_dev->hard_header_len + VLAN_HLEN;
+		dev->hard_start_xmit = vlan_dev_hard_start_xmit;
+	}
+
+	if (real_dev->priv_flags & IFF_802_1Q_VLAN)
+		subclass = 1;
+
+	lockdep_set_class_and_subclass(&dev->_xmit_lock,
+				&vlan_netdev_xmit_lock_key, subclass);
+	return 0;
+}
+
+void vlan_setup(struct net_device *dev)
+{
+	ether_setup(dev);
+
+	dev->priv_flags		|= IFF_802_1Q_VLAN;
+	dev->tx_queue_len	= 0;
+
+	dev->change_mtu		= vlan_dev_change_mtu;
+	dev->init		= vlan_dev_init;
+	dev->open		= vlan_dev_open;
+	dev->stop		= vlan_dev_stop;
+	dev->set_mac_address	= vlan_dev_set_mac_address;
+	dev->set_multicast_list	= vlan_dev_set_multicast_list;
+	dev->change_rx_flags	= vlan_dev_change_rx_flags;
+	dev->do_ioctl		= vlan_dev_ioctl;
+	dev->destructor		= free_netdev;
+
+	memset(dev->broadcast, 0, ETH_ALEN);
+}
-- 
cgit v1.2.3


From 62f99efce61024f1c645c2c574882b510c77c579 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Mon, 21 Jan 2008 00:22:30 -0800
Subject: [VLAN]: Kill useless check

vlan->real_dev is always equal to the device since thats what we used
for the lookup. It doesn't even seem worth a WARN_ON or BUG_ON.

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/8021q/vlan_dev.c | 18 ------------------
 1 file changed, 18 deletions(-)

(limited to 'net')

diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c
index 51ce4217c03..50d8edcbb89 100644
--- a/net/8021q/vlan_dev.c
+++ b/net/8021q/vlan_dev.c
@@ -181,24 +181,6 @@ int vlan_skb_recv(struct sk_buff *skb, struct net_device *dev,
 	/* Take off the VLAN header (4 bytes currently) */
 	skb_pull_rcsum(skb, VLAN_HLEN);
 
-	/* Ok, lets check to make sure the device (dev) we
-	 * came in on is what this VLAN is attached to.
-	 */
-
-	if (dev != VLAN_DEV_INFO(skb->dev)->real_dev) {
-		rcu_read_unlock();
-
-#ifdef VLAN_DEBUG
-		printk(VLAN_DBG "%s: dropping skb: %p because came in on wrong device, dev: %s  real_dev: %s, skb_dev: %s\n",
-			__FUNCTION__, skb, dev->name,
-			VLAN_DEV_INFO(skb->dev)->real_dev->name,
-			skb->dev->name);
-#endif
-		kfree_skb(skb);
-		stats->rx_errors++;
-		return -1;
-	}
-
 	/*
 	 * Deal with ingress priority mapping.
 	 */
-- 
cgit v1.2.3


From 40f98e1af4c6082f7f98391540a2a1ade030480a Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Mon, 21 Jan 2008 00:24:30 -0800
Subject: [VLAN]: Clean up debugging and printks

- use pr_* functions and common prefix for non-device related messages

- remove VLAN_ printk levels

- kill lots of useless debugging statements

- remove a few unnecessary printks like for double VID registration (already
  returns -EEXIST) and kill of a number of unnecessary checks in
  vlan_proc_{add,rem}_dev() that are already performed by the caller

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/8021q/vlan.c     | 58 ++++++++++------------------------------------------
 net/8021q/vlan.h     | 25 ----------------------
 net/8021q/vlan_dev.c | 56 +++++++++++++++++---------------------------------
 net/8021q/vlanproc.c | 29 --------------------------
 4 files changed, 30 insertions(+), 138 deletions(-)

(limited to 'net')

diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c
index 8bc6385a06e..6edd1919d18 100644
--- a/net/8021q/vlan.c
+++ b/net/8021q/vlan.c
@@ -80,16 +80,13 @@ static int __init vlan_proto_init(void)
 {
 	int err;
 
-	printk(VLAN_INF "%s v%s %s\n",
-	       vlan_fullname, vlan_version, vlan_copyright);
-	printk(VLAN_INF "All bugs added by %s\n",
-	       vlan_buggyright);
+	pr_info("%s v%s %s\n", vlan_fullname, vlan_version, vlan_copyright);
+	pr_info("All bugs added by %s\n", vlan_buggyright);
 
 	/* proc file system initialization */
 	err = vlan_proc_init();
 	if (err < 0) {
-		printk(KERN_ERR
-		       "%s: can't create entry in proc filesystem!\n",
+		pr_err("%s: can't create entry in proc filesystem!\n",
 		       __FUNCTION__);
 		return err;
 	}
@@ -233,10 +230,6 @@ static int unregister_vlan_dev(struct net_device *real_dev,
 	struct vlan_group *grp;
 	int i, ret;
 
-#ifdef VLAN_DEBUG
-	printk(VLAN_DBG "%s: VID: %i\n", __FUNCTION__, vlan_id);
-#endif
-
 	/* sanity check */
 	if (vlan_id >= VLAN_VID_MASK)
 		return -EINVAL;
@@ -329,23 +322,22 @@ static void vlan_transfer_operstate(const struct net_device *dev, struct net_dev
 
 int vlan_check_real_dev(struct net_device *real_dev, unsigned short vlan_id)
 {
+	char *name = real_dev->name;
+
 	if (real_dev->features & NETIF_F_VLAN_CHALLENGED) {
-		printk(VLAN_DBG "%s: VLANs not supported on %s.\n",
-			__FUNCTION__, real_dev->name);
+		pr_info("8021q: VLANs not supported on %s\n", name);
 		return -EOPNOTSUPP;
 	}
 
 	if ((real_dev->features & NETIF_F_HW_VLAN_RX) &&
 	    !real_dev->vlan_rx_register) {
-		printk(VLAN_DBG "%s: Device %s has buggy VLAN hw accel.\n",
-			__FUNCTION__, real_dev->name);
+		pr_info("8021q: device %s has buggy VLAN hw accel\n", name);
 		return -EOPNOTSUPP;
 	}
 
 	if ((real_dev->features & NETIF_F_HW_VLAN_FILTER) &&
 	    (!real_dev->vlan_rx_add_vid || !real_dev->vlan_rx_kill_vid)) {
-		printk(VLAN_DBG "%s: Device %s has buggy VLAN hw accel.\n",
-			__FUNCTION__, real_dev->name);
+		pr_info("8021q: Device %s has buggy VLAN hw accel\n", name);
 		return -EOPNOTSUPP;
 	}
 
@@ -355,11 +347,8 @@ int vlan_check_real_dev(struct net_device *real_dev, unsigned short vlan_id)
 	if (!(real_dev->flags & IFF_UP))
 		return -ENETDOWN;
 
-	if (__find_vlan_dev(real_dev, vlan_id) != NULL) {
-		/* was already registered. */
-		printk(VLAN_DBG "%s: ALREADY had VLAN registered\n", __FUNCTION__);
+	if (__find_vlan_dev(real_dev, vlan_id) != NULL)
 		return -EEXIST;
-	}
 
 	return 0;
 }
@@ -399,8 +388,8 @@ int register_vlan_dev(struct net_device *dev)
 		real_dev->vlan_rx_add_vid(real_dev, vlan_id);
 
 	if (vlan_proc_add_dev(dev) < 0)
-		printk(KERN_WARNING "VLAN: failed to add proc entry for %s\n",
-		       dev->name);
+		pr_warning("8021q: failed to add proc entry for %s\n",
+			   dev->name);
 	return 0;
 
 out_free_group:
@@ -419,11 +408,6 @@ static int register_vlan_device(struct net_device *real_dev,
 	char name[IFNAMSIZ];
 	int err;
 
-#ifdef VLAN_DEBUG
-	printk(VLAN_DBG "%s: if_name -:%s:-	vid: %i\n",
-		__FUNCTION__, eth_IF_name, VLAN_ID);
-#endif
-
 	if (VLAN_ID >= VLAN_VID_MASK)
 		return -ERANGE;
 
@@ -432,10 +416,6 @@ static int register_vlan_device(struct net_device *real_dev,
 		return err;
 
 	/* Gotta set up the fields for the device. */
-#ifdef VLAN_DEBUG
-	printk(VLAN_DBG "About to allocate name, vlan_name_type: %i\n",
-	       vlan_name_type);
-#endif
 	switch (vlan_name_type) {
 	case VLAN_NAME_TYPE_RAW_PLUS_VID:
 		/* name will look like:	 eth1.0005 */
@@ -472,13 +452,6 @@ static int register_vlan_device(struct net_device *real_dev,
 	 */
 	new_dev->mtu = real_dev->mtu;
 
-#ifdef VLAN_DEBUG
-	printk(VLAN_DBG "Allocated new name -:%s:-\n", new_dev->name);
-	VLAN_MEM_DBG("new_dev->priv malloc, addr: %p  size: %i\n",
-		     new_dev->priv,
-		     sizeof(struct vlan_dev_info));
-#endif
-
 	VLAN_DEV_INFO(new_dev)->vlan_id = VLAN_ID; /* 1 through VLAN_VID_MASK */
 	VLAN_DEV_INFO(new_dev)->real_dev = real_dev;
 	VLAN_DEV_INFO(new_dev)->dent = NULL;
@@ -489,9 +462,6 @@ static int register_vlan_device(struct net_device *real_dev,
 	if (err < 0)
 		goto out_free_newdev;
 
-#ifdef VLAN_DEBUG
-	printk(VLAN_DBG "Allocated new device successfully, returning.\n");
-#endif
 	return 0;
 
 out_free_newdev:
@@ -641,10 +611,6 @@ static int vlan_ioctl_handler(struct net *net, void __user *arg)
 	args.device1[23] = 0;
 	args.u.device2[23] = 0;
 
-#ifdef VLAN_DEBUG
-	printk(VLAN_DBG "%s: args.cmd: %x\n", __FUNCTION__, args.cmd);
-#endif
-
 	rtnl_lock();
 
 	switch (args.cmd) {
@@ -763,8 +729,6 @@ static int vlan_ioctl_handler(struct net *net, void __user *arg)
 
 	default:
 		/* pass on to underlying device instead?? */
-		printk(VLAN_DBG "%s: Unknown VLAN CMD: %x \n",
-			__FUNCTION__, args.cmd);
 		err = -EINVAL;
 		break;
 	}
diff --git a/net/8021q/vlan.h b/net/8021q/vlan.h
index 7b615d6b6f4..56378651cc4 100644
--- a/net/8021q/vlan.h
+++ b/net/8021q/vlan.h
@@ -3,31 +3,6 @@
 
 #include <linux/if_vlan.h>
 
-/*  Uncomment this if you want debug traces to be shown. */
-/* #define VLAN_DEBUG */
-
-#define VLAN_ERR KERN_ERR
-#define VLAN_INF KERN_INFO
-#define VLAN_DBG KERN_ALERT /* change these... to debug, having a hard time
-			     * changing the log level at run-time..for some reason.
-			     */
-
-/*
-
-These I use for memory debugging.  I feared a leak at one time, but
-I never found it..and the problem seems to have dissappeared.  Still,
-I'll bet they might prove useful again... --Ben
-
-
-#define VLAN_MEM_DBG(x, y, z) printk(VLAN_DBG "%s:  "  x, __FUNCTION__, y, z);
-#define VLAN_FMEM_DBG(x, y) printk(VLAN_DBG "%s:  " x, __FUNCTION__, y);
-*/
-
-/* This way they don't do anything! */
-#define VLAN_MEM_DBG(x, y, z)
-#define VLAN_FMEM_DBG(x, y)
-
-
 extern unsigned short vlan_name_type;
 
 #define VLAN_GRP_HASH_SHIFT	5
diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c
index 50d8edcbb89..756a71c1f4a 100644
--- a/net/8021q/vlan_dev.c
+++ b/net/8021q/vlan_dev.c
@@ -60,9 +60,8 @@ static int vlan_dev_rebuild_header(struct sk_buff *skb)
 		return arp_find(veth->h_dest, skb);
 #endif
 	default:
-		printk(VLAN_DBG
-		       "%s: unable to resolve type %X addresses.\n",
-		       dev->name, ntohs(veth->h_vlan_encapsulated_proto));
+		pr_debug("%s: unable to resolve type %X addresses.\n",
+			 dev->name, ntohs(veth->h_vlan_encapsulated_proto));
 
 		memcpy(veth->h_source, dev->dev_addr, ETH_ALEN);
 		break;
@@ -142,11 +141,6 @@ int vlan_skb_recv(struct sk_buff *skb, struct net_device *dev,
 
 	vid = (vlan_TCI & VLAN_VID_MASK);
 
-#ifdef VLAN_DEBUG
-	printk(VLAN_DBG "%s: skb: %p vlan_id: %hx\n",
-		__FUNCTION__, skb, vid);
-#endif
-
 	/* Ok, we will find the correct VLAN device, strip the header,
 	 * and then go on as usual.
 	 */
@@ -162,11 +156,8 @@ int vlan_skb_recv(struct sk_buff *skb, struct net_device *dev,
 	skb->dev = __find_vlan_dev(dev, vid);
 	if (!skb->dev) {
 		rcu_read_unlock();
-
-#ifdef VLAN_DEBUG
-		printk(VLAN_DBG "%s: ERROR: No net_device for VID: %i on dev: %s [%i]\n",
-			__FUNCTION__, (unsigned int)(vid), dev->name, dev->ifindex);
-#endif
+		pr_debug("%s: ERROR: No net_device for VID: %u on dev: %s [%i]\n",
+			 __FUNCTION__, (unsigned int)vid, dev->name, dev->ifindex);
 		kfree_skb(skb);
 		return -1;
 	}
@@ -186,11 +177,8 @@ int vlan_skb_recv(struct sk_buff *skb, struct net_device *dev,
 	 */
 	skb->priority = vlan_get_ingress_priority(skb->dev, ntohs(vhdr->h_vlan_TCI));
 
-#ifdef VLAN_DEBUG
-	printk(VLAN_DBG "%s: priority: %lu  for TCI: %hu (hbo)\n",
-		__FUNCTION__, (unsigned long)(skb->priority),
-		ntohs(vhdr->h_vlan_TCI));
-#endif
+	pr_debug("%s: priority: %u for TCI: %hu\n",
+		 __FUNCTION__, skb->priority, ntohs(vhdr->h_vlan_TCI));
 
 	/* The ethernet driver already did the pkt_type calculations
 	 * for us...
@@ -335,10 +323,8 @@ static int vlan_dev_hard_header(struct sk_buff *skb, struct net_device *dev,
 	int build_vlan_header = 0;
 	struct net_device *vdev = dev; /* save this for the bottom of the method */
 
-#ifdef VLAN_DEBUG
-	printk(VLAN_DBG "%s: skb: %p type: %hx len: %x vlan_id: %hx, daddr: %p\n",
-		__FUNCTION__, skb, type, len, VLAN_DEV_INFO(dev)->vlan_id, daddr);
-#endif
+	pr_debug("%s: skb: %p type: %hx len: %u vlan_id: %hx, daddr: %p\n",
+		 __FUNCTION__, skb, type, len, VLAN_DEV_INFO(dev)->vlan_id, daddr);
 
 	/* build vlan header only if re_order_header flag is NOT set.  This
 	 * fixes some programs that get confused when they see a VLAN device
@@ -410,9 +396,7 @@ static int vlan_dev_hard_header(struct sk_buff *skb, struct net_device *dev,
 			return -ENOMEM;
 		}
 		VLAN_DEV_INFO(vdev)->cnt_inc_headroom_on_tx++;
-#ifdef VLAN_DEBUG
-		printk(VLAN_DBG "%s: %s: had to grow skb.\n", __FUNCTION__, vdev->name);
-#endif
+		pr_debug("%s: %s: had to grow skb.\n", __FUNCTION__, vdev->name);
 	}
 
 	if (build_vlan_header) {
@@ -453,10 +437,8 @@ static int vlan_dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev)
 		/* This is not a VLAN frame...but we can fix that! */
 		VLAN_DEV_INFO(dev)->cnt_encap_on_xmit++;
 
-#ifdef VLAN_DEBUG
-		printk(VLAN_DBG "%s: proto to encap: 0x%hx (hbo)\n",
-			__FUNCTION__, htons(veth->h_vlan_proto));
-#endif
+		pr_debug("%s: proto to encap: 0x%hx\n",
+			 __FUNCTION__, htons(veth->h_vlan_proto));
 		/* Construct the second two bytes. This field looks something
 		 * like:
 		 * usr_priority: 3 bits	 (high bits)
@@ -477,14 +459,15 @@ static int vlan_dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev)
 		}
 	}
 
-#ifdef VLAN_DEBUG
-	printk(VLAN_DBG "%s: about to send skb: %p to dev: %s\n",
+	pr_debug("%s: about to send skb: %p to dev: %s\n",
 		__FUNCTION__, skb, skb->dev->name);
-	printk(VLAN_DBG "  %2hx.%2hx.%2hx.%2xh.%2hx.%2hx %2hx.%2hx.%2hx.%2hx.%2hx.%2hx %4hx %4hx %4hx\n",
-	       veth->h_dest[0], veth->h_dest[1], veth->h_dest[2], veth->h_dest[3], veth->h_dest[4], veth->h_dest[5],
-	       veth->h_source[0], veth->h_source[1], veth->h_source[2], veth->h_source[3], veth->h_source[4], veth->h_source[5],
-	       veth->h_vlan_proto, veth->h_vlan_TCI, veth->h_vlan_encapsulated_proto);
-#endif
+	pr_debug("  " MAC_FMT " " MAC_FMT " %4hx %4hx %4hx\n",
+		 veth->h_dest[0], veth->h_dest[1], veth->h_dest[2],
+		 veth->h_dest[3], veth->h_dest[4], veth->h_dest[5],
+		 veth->h_source[0], veth->h_source[1], veth->h_source[2],
+		 veth->h_source[3], veth->h_source[4], veth->h_source[5],
+		 veth->h_vlan_proto, veth->h_vlan_TCI,
+		 veth->h_vlan_encapsulated_proto);
 
 	stats->tx_packets++; /* for statics only */
 	stats->tx_bytes += skb->len;
@@ -596,7 +579,6 @@ int vlan_dev_set_vlan_flag(const struct net_device *dev,
 		}
 		return 0;
 	}
-	printk(KERN_ERR "%s: flag %i is not valid.\n", __FUNCTION__, flag);
 	return -EINVAL;
 }
 
diff --git a/net/8021q/vlanproc.c b/net/8021q/vlanproc.c
index 1972d5cc34e..5da02e29a2c 100644
--- a/net/8021q/vlanproc.c
+++ b/net/8021q/vlanproc.c
@@ -179,13 +179,6 @@ int vlan_proc_add_dev (struct net_device *vlandev)
 {
 	struct vlan_dev_info *dev_info = VLAN_DEV_INFO(vlandev);
 
-	if (!(vlandev->priv_flags & IFF_802_1Q_VLAN)) {
-		printk(KERN_ERR
-		       "ERROR:	vlan_proc_add, device -:%s:- is NOT a VLAN\n",
-		       vlandev->name);
-		return -EINVAL;
-	}
-
 	dev_info->dent = create_proc_entry(vlandev->name,
 					   S_IFREG|S_IRUSR|S_IWUSR,
 					   proc_vlan_dir);
@@ -194,11 +187,6 @@ int vlan_proc_add_dev (struct net_device *vlandev)
 
 	dev_info->dent->proc_fops = &vlandev_fops;
 	dev_info->dent->data = vlandev;
-
-#ifdef VLAN_DEBUG
-	printk(KERN_ERR "vlan_proc_add, device -:%s:- being added.\n",
-	       vlandev->name);
-#endif
 	return 0;
 }
 
@@ -207,28 +195,11 @@ int vlan_proc_add_dev (struct net_device *vlandev)
  */
 int vlan_proc_rem_dev(struct net_device *vlandev)
 {
-	if (!vlandev) {
-		printk(VLAN_ERR "%s: invalid argument: %p\n",
-			__FUNCTION__, vlandev);
-		return -EINVAL;
-	}
-
-	if (!(vlandev->priv_flags & IFF_802_1Q_VLAN)) {
-		printk(VLAN_DBG "%s: invalid argument, device: %s is not a VLAN device, priv_flags: 0x%4hX.\n",
-			__FUNCTION__, vlandev->name, vlandev->priv_flags);
-		return -EINVAL;
-	}
-
-#ifdef VLAN_DEBUG
-	printk(VLAN_DBG "%s: dev: %p\n", __FUNCTION__, vlandev);
-#endif
-
 	/** NOTE:  This will consume the memory pointed to by dent, it seems. */
 	if (VLAN_DEV_INFO(vlandev)->dent) {
 		remove_proc_entry(VLAN_DEV_INFO(vlandev)->dent->name, proc_vlan_dir);
 		VLAN_DEV_INFO(vlandev)->dent = NULL;
 	}
-
 	return 0;
 }
 
-- 
cgit v1.2.3


From 198a291ce3a9103f4738600e3cf5416b66e009d9 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Mon, 21 Jan 2008 00:24:59 -0800
Subject: [VLAN]: Remove non-implemented ioctls

The GET_VLAN_INGRESS_PRIORITY_CMD/GET_VLAN_EGRESS_PRIORITY_CMD ioctls are
not implemented and won't be, new functionality will be added to the netlink
interface. Remove the code and make the ioctl handler return -EOPNOTSUPP
for unknown commands instead of -EINVAL.

Also remove a comment about passing unknown commands to the underlying
device, that doesn't make any sense since its a VLAN specific ioctl and
if its not implemented here, its implemented nowhere.

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/8021q/vlan.c | 23 +----------------------
 1 file changed, 1 insertion(+), 22 deletions(-)

(limited to 'net')

diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c
index 6edd1919d18..69a9e0207b1 100644
--- a/net/8021q/vlan.c
+++ b/net/8021q/vlan.c
@@ -688,26 +688,6 @@ static int vlan_ioctl_handler(struct net *net, void __user *arg)
 		err = unregister_vlan_device(dev);
 		break;
 
-	case GET_VLAN_INGRESS_PRIORITY_CMD:
-		/* TODO:  Implement
-		   err = vlan_dev_get_ingress_priority(args);
-		   if (copy_to_user((void*)arg, &args,
-			sizeof(struct vlan_ioctl_args))) {
-			err = -EFAULT;
-		   }
-		*/
-		err = -EINVAL;
-		break;
-	case GET_VLAN_EGRESS_PRIORITY_CMD:
-		/* TODO:  Implement
-		   err = vlan_dev_get_egress_priority(args.device1, &(args.args);
-		   if (copy_to_user((void*)arg, &args,
-			sizeof(struct vlan_ioctl_args))) {
-			err = -EFAULT;
-		   }
-		*/
-		err = -EINVAL;
-		break;
 	case GET_VLAN_REALDEV_NAME_CMD:
 		err = 0;
 		vlan_dev_get_realdev_name(dev, args.u.device2);
@@ -728,8 +708,7 @@ static int vlan_ioctl_handler(struct net *net, void __user *arg)
 		break;
 
 	default:
-		/* pass on to underlying device instead?? */
-		err = -EINVAL;
+		err = -EOPNOTSUPP;
 		break;
 	}
 out:
-- 
cgit v1.2.3


From 69ab4b7d6db68396dbfa827daa8d6f30f9b546a8 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Mon, 21 Jan 2008 00:25:15 -0800
Subject: [VLAN]: Clean up initialization code

- move module init/exit functions to end of file, remove some now unnecessary
  forward declarations
- remove some obvious comments
- clean up proc init function and move a proc-related printk there

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/8021q/vlan.c     | 141 +++++++++++++++++++++------------------------------
 net/8021q/vlanproc.c |  21 ++++----
 2 files changed, 70 insertions(+), 92 deletions(-)

(limited to 'net')

diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c
index 69a9e0207b1..006d9a9bac9 100644
--- a/net/8021q/vlan.c
+++ b/net/8021q/vlan.c
@@ -50,16 +50,6 @@ static char vlan_version[] = DRV_VERSION;
 static char vlan_copyright[] = "Ben Greear <greearb@candelatech.com>";
 static char vlan_buggyright[] = "David S. Miller <davem@redhat.com>";
 
-static int vlan_device_event(struct notifier_block *, unsigned long, void *);
-static int vlan_ioctl_handler(struct net *net, void __user *);
-static int unregister_vlan_dev(struct net_device *, unsigned short );
-
-static struct notifier_block vlan_notifier_block = {
-	.notifier_call = vlan_device_event,
-};
-
-/* These may be changed at run-time through IOCTLs */
-
 /* Determines interface naming scheme. */
 unsigned short vlan_name_type = VLAN_NAME_TYPE_RAW_PLUS_VID_NO_PAD;
 
@@ -70,79 +60,6 @@ static struct packet_type vlan_packet_type = {
 
 /* End of global variables definitions. */
 
-/*
- * Function vlan_proto_init (pro)
- *
- *    Initialize VLAN protocol layer,
- *
- */
-static int __init vlan_proto_init(void)
-{
-	int err;
-
-	pr_info("%s v%s %s\n", vlan_fullname, vlan_version, vlan_copyright);
-	pr_info("All bugs added by %s\n", vlan_buggyright);
-
-	/* proc file system initialization */
-	err = vlan_proc_init();
-	if (err < 0) {
-		pr_err("%s: can't create entry in proc filesystem!\n",
-		       __FUNCTION__);
-		return err;
-	}
-
-	dev_add_pack(&vlan_packet_type);
-
-	/* Register us to receive netdevice events */
-	err = register_netdevice_notifier(&vlan_notifier_block);
-	if (err < 0)
-		goto err1;
-
-	err = vlan_netlink_init();
-	if (err < 0)
-		goto err2;
-
-	vlan_ioctl_set(vlan_ioctl_handler);
-	return 0;
-
-err2:
-	unregister_netdevice_notifier(&vlan_notifier_block);
-err1:
-	vlan_proc_cleanup();
-	dev_remove_pack(&vlan_packet_type);
-	return err;
-}
-
-/*
- *     Module 'remove' entry point.
- *     o delete /proc/net/router directory and static entries.
- */
-static void __exit vlan_cleanup_module(void)
-{
-	int i;
-
-	vlan_ioctl_set(NULL);
-	vlan_netlink_fini();
-
-	/* Un-register us from receiving netdevice events */
-	unregister_netdevice_notifier(&vlan_notifier_block);
-
-	dev_remove_pack(&vlan_packet_type);
-
-	/* This table must be empty if there are no module
-	 * references left.
-	 */
-	for (i = 0; i < VLAN_GRP_HASH_SIZE; i++) {
-		BUG_ON(!hlist_empty(&vlan_group_hash[i]));
-	}
-	vlan_proc_cleanup();
-
-	synchronize_net();
-}
-
-module_init(vlan_proto_init);
-module_exit(vlan_cleanup_module);
-
 /* Must be invoked with RCU read lock (no preempt) */
 static struct vlan_group *__vlan_find_group(int real_dev_ifindex)
 {
@@ -592,6 +509,10 @@ out:
 	return NOTIFY_DONE;
 }
 
+static struct notifier_block vlan_notifier_block __read_mostly = {
+	.notifier_call = vlan_device_event,
+};
+
 /*
  *	VLAN IOCTL handler.
  *	o execute requested action or pass command to the device driver
@@ -716,5 +637,59 @@ out:
 	return err;
 }
 
+static int __init vlan_proto_init(void)
+{
+	int err;
+
+	pr_info("%s v%s %s\n", vlan_fullname, vlan_version, vlan_copyright);
+	pr_info("All bugs added by %s\n", vlan_buggyright);
+
+	err = vlan_proc_init();
+	if (err < 0)
+		goto err1;
+
+	err = register_netdevice_notifier(&vlan_notifier_block);
+	if (err < 0)
+		goto err2;
+
+	err = vlan_netlink_init();
+	if (err < 0)
+		goto err3;
+
+	dev_add_pack(&vlan_packet_type);
+	vlan_ioctl_set(vlan_ioctl_handler);
+	return 0;
+
+err3:
+	unregister_netdevice_notifier(&vlan_notifier_block);
+err2:
+	vlan_proc_cleanup();
+err1:
+	return err;
+}
+
+static void __exit vlan_cleanup_module(void)
+{
+	unsigned int i;
+
+	vlan_ioctl_set(NULL);
+	vlan_netlink_fini();
+
+	unregister_netdevice_notifier(&vlan_notifier_block);
+
+	dev_remove_pack(&vlan_packet_type);
+
+	/* This table must be empty if there are no module references left. */
+	for (i = 0; i < VLAN_GRP_HASH_SIZE; i++)
+		BUG_ON(!hlist_empty(&vlan_group_hash[i]));
+
+	vlan_proc_cleanup();
+
+	synchronize_net();
+}
+
+module_init(vlan_proto_init);
+module_exit(vlan_cleanup_module);
+
 MODULE_LICENSE("GPL");
 MODULE_VERSION(DRV_VERSION);
diff --git a/net/8021q/vlanproc.c b/net/8021q/vlanproc.c
index 5da02e29a2c..971e6233801 100644
--- a/net/8021q/vlanproc.c
+++ b/net/8021q/vlanproc.c
@@ -158,15 +158,18 @@ void vlan_proc_cleanup(void)
 int __init vlan_proc_init(void)
 {
 	proc_vlan_dir = proc_mkdir(name_root, init_net.proc_net);
-	if (proc_vlan_dir) {
-		proc_vlan_conf = create_proc_entry(name_conf,
-						   S_IFREG|S_IRUSR|S_IWUSR,
-						   proc_vlan_dir);
-		if (proc_vlan_conf) {
-			proc_vlan_conf->proc_fops = &vlan_fops;
-			return 0;
-		}
-	}
+	if (!proc_vlan_dir)
+		goto err;
+
+	proc_vlan_conf = create_proc_entry(name_conf, S_IFREG|S_IRUSR|S_IWUSR,
+					   proc_vlan_dir);
+	if (!proc_vlan_conf)
+		goto err;
+	proc_vlan_conf->proc_fops = &vlan_fops;
+	return 0;
+
+err:
+	pr_err("%s: can't create entry in proc filesystem!\n", __FUNCTION__);
 	vlan_proc_cleanup();
 	return -ENOBUFS;
 }
-- 
cgit v1.2.3


From acc5efbcd2a023c8801f2bba39971cf93812ce7c Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Mon, 21 Jan 2008 00:25:31 -0800
Subject: [VLAN]: Clean up unregister_vlan_dev

Save two levels of indentation by aborting on error conditions,
remove unnecessary initialization to NULL and remove two obvious
comments.

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/8021q/vlan.c | 72 +++++++++++++++++++++++++-------------------------------
 1 file changed, 32 insertions(+), 40 deletions(-)

(limited to 'net')

diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c
index 006d9a9bac9..ad34e4a0326 100644
--- a/net/8021q/vlan.c
+++ b/net/8021q/vlan.c
@@ -142,63 +142,55 @@ static void vlan_rcu_free(struct rcu_head *rcu)
 static int unregister_vlan_dev(struct net_device *real_dev,
 			       unsigned short vlan_id)
 {
-	struct net_device *dev = NULL;
+	struct net_device *dev;
 	int real_dev_ifindex = real_dev->ifindex;
 	struct vlan_group *grp;
-	int i, ret;
+	unsigned int i;
+	int ret;
 
-	/* sanity check */
 	if (vlan_id >= VLAN_VID_MASK)
 		return -EINVAL;
 
 	ASSERT_RTNL();
 	grp = __vlan_find_group(real_dev_ifindex);
+	if (!grp)
+		return -ENOENT;
 
-	ret = 0;
-
-	if (grp) {
-		dev = vlan_group_get_device(grp, vlan_id);
-		if (dev) {
-			/* Remove proc entry */
-			vlan_proc_rem_dev(dev);
-
-			/* Take it out of our own structures, but be sure to
-			 * interlock with HW accelerating devices or SW vlan
-			 * input packet processing.
-			 */
-			if (real_dev->features & NETIF_F_HW_VLAN_FILTER)
-				real_dev->vlan_rx_kill_vid(real_dev, vlan_id);
+	dev = vlan_group_get_device(grp, vlan_id);
+	if (!dev)
+		return -ENOENT;
 
-			vlan_group_set_device(grp, vlan_id, NULL);
-			synchronize_net();
+	vlan_proc_rem_dev(dev);
 
+	/* Take it out of our own structures, but be sure to interlock with
+	 * HW accelerating devices or SW vlan input packet processing.
+	 */
+	if (real_dev->features & NETIF_F_HW_VLAN_FILTER)
+		real_dev->vlan_rx_kill_vid(real_dev, vlan_id);
 
-			/* Caller unregisters (and if necessary, puts)
-			 * VLAN device, but we get rid of the reference to
-			 * real_dev here.
-			 */
-			dev_put(real_dev);
+	vlan_group_set_device(grp, vlan_id, NULL);
+	synchronize_net();
 
-			/* If the group is now empty, kill off the
-			 * group.
-			 */
-			for (i = 0; i < VLAN_VID_MASK; i++)
-				if (vlan_group_get_device(grp, i))
-					break;
+	/* Caller unregisters (and if necessary, puts) VLAN device, but we
+	 * get rid of the reference to real_dev here.
+	 */
+	dev_put(real_dev);
 
-			if (i == VLAN_VID_MASK) {
-				if (real_dev->features & NETIF_F_HW_VLAN_RX)
-					real_dev->vlan_rx_register(real_dev, NULL);
+	/* If the group is now empty, kill off the group. */
+	ret = 0;
+	for (i = 0; i < VLAN_VID_MASK; i++)
+		if (vlan_group_get_device(grp, i))
+			break;
 
-				hlist_del_rcu(&grp->hlist);
+	if (i == VLAN_VID_MASK) {
+		if (real_dev->features & NETIF_F_HW_VLAN_RX)
+			real_dev->vlan_rx_register(real_dev, NULL);
 
-				/* Free the group, after all cpu's are done. */
-				call_rcu(&grp->rcu, vlan_rcu_free);
+		hlist_del_rcu(&grp->hlist);
 
-				grp = NULL;
-				ret = 1;
-			}
-		}
+		/* Free the group, after all cpu's are done. */
+		call_rcu(&grp->rcu, vlan_rcu_free);
+		ret = 1;
 	}
 
 	return ret;
-- 
cgit v1.2.3


From af30151709bcace1ca844d4bb8b7e2e392ff81eb Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Mon, 21 Jan 2008 00:25:50 -0800
Subject: [VLAN]: Simplify vlan unregistration

Keep track of the number of VLAN devices in a vlan group. This allows
to have the caller sense when the group is going to be destroyed and
stop using it, which in turn allows to remove the wrapper around
unregister_vlan_dev for the NETDEV_UNREGISTER notifier and avoid
iterating over all possible VLAN ids whenever a device in unregistered.

Also fix what looks like a use-after-free (but is actually safe since
we're holding the RTNL), the real_dev reference should not be dropped
while we still use it.

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/8021q/vlan.c         | 76 +++++++++++++-----------------------------------
 net/8021q/vlan.h         |  2 +-
 net/8021q/vlan_netlink.c |  7 +----
 3 files changed, 22 insertions(+), 63 deletions(-)

(limited to 'net')

diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c
index ad34e4a0326..ac796385410 100644
--- a/net/8021q/vlan.c
+++ b/net/8021q/vlan.c
@@ -132,33 +132,17 @@ static void vlan_rcu_free(struct rcu_head *rcu)
 	vlan_group_free(container_of(rcu, struct vlan_group, rcu));
 }
 
-
-/* This returns 0 if everything went fine.
- * It will return 1 if the group was killed as a result.
- * A negative return indicates failure.
- *
- * The RTNL lock must be held.
- */
-static int unregister_vlan_dev(struct net_device *real_dev,
-			       unsigned short vlan_id)
+void unregister_vlan_dev(struct net_device *dev)
 {
-	struct net_device *dev;
-	int real_dev_ifindex = real_dev->ifindex;
+	struct vlan_dev_info *vlan = VLAN_DEV_INFO(dev);
+	struct net_device *real_dev = vlan->real_dev;
 	struct vlan_group *grp;
-	unsigned int i;
-	int ret;
-
-	if (vlan_id >= VLAN_VID_MASK)
-		return -EINVAL;
+	unsigned short vlan_id = vlan->vlan_id;
 
 	ASSERT_RTNL();
-	grp = __vlan_find_group(real_dev_ifindex);
-	if (!grp)
-		return -ENOENT;
 
-	dev = vlan_group_get_device(grp, vlan_id);
-	if (!dev)
-		return -ENOENT;
+	grp = __vlan_find_group(real_dev->ifindex);
+	BUG_ON(!grp);
 
 	vlan_proc_rem_dev(dev);
 
@@ -169,20 +153,12 @@ static int unregister_vlan_dev(struct net_device *real_dev,
 		real_dev->vlan_rx_kill_vid(real_dev, vlan_id);
 
 	vlan_group_set_device(grp, vlan_id, NULL);
-	synchronize_net();
+	grp->nr_vlans--;
 
-	/* Caller unregisters (and if necessary, puts) VLAN device, but we
-	 * get rid of the reference to real_dev here.
-	 */
-	dev_put(real_dev);
+	synchronize_net();
 
 	/* If the group is now empty, kill off the group. */
-	ret = 0;
-	for (i = 0; i < VLAN_VID_MASK; i++)
-		if (vlan_group_get_device(grp, i))
-			break;
-
-	if (i == VLAN_VID_MASK) {
+	if (grp->nr_vlans == 0) {
 		if (real_dev->features & NETIF_F_HW_VLAN_RX)
 			real_dev->vlan_rx_register(real_dev, NULL);
 
@@ -190,23 +166,12 @@ static int unregister_vlan_dev(struct net_device *real_dev,
 
 		/* Free the group, after all cpu's are done. */
 		call_rcu(&grp->rcu, vlan_rcu_free);
-		ret = 1;
 	}
 
-	return ret;
-}
-
-int unregister_vlan_device(struct net_device *dev)
-{
-	int ret;
+	/* Get rid of the vlan's reference to real_dev */
+	dev_put(real_dev);
 
-	ret = unregister_vlan_dev(VLAN_DEV_INFO(dev)->real_dev,
-				  VLAN_DEV_INFO(dev)->vlan_id);
 	unregister_netdevice(dev);
-
-	if (ret == 1)
-		ret = 0;
-	return ret;
 }
 
 static void vlan_transfer_operstate(const struct net_device *dev, struct net_device *vlandev)
@@ -291,6 +256,8 @@ int register_vlan_dev(struct net_device *dev)
 	 * it into our local structure.
 	 */
 	vlan_group_set_device(grp, vlan_id, dev);
+	grp->nr_vlans++;
+
 	if (ngrp && real_dev->features & NETIF_F_HW_VLAN_RX)
 		real_dev->vlan_rx_register(real_dev, ngrp);
 	if (real_dev->features & NETIF_F_HW_VLAN_FILTER)
@@ -479,20 +446,16 @@ static int vlan_device_event(struct notifier_block *unused, unsigned long event,
 	case NETDEV_UNREGISTER:
 		/* Delete all VLANs for this dev. */
 		for (i = 0; i < VLAN_GROUP_ARRAY_LEN; i++) {
-			int ret;
-
 			vlandev = vlan_group_get_device(grp, i);
 			if (!vlandev)
 				continue;
 
-			ret = unregister_vlan_dev(dev,
-						  VLAN_DEV_INFO(vlandev)->vlan_id);
-
-			unregister_netdevice(vlandev);
+			/* unregistration of last vlan destroys group, abort
+			 * afterwards */
+			if (grp->nr_vlans == 1)
+				i = VLAN_GROUP_ARRAY_LEN;
 
-			/* Group was destroyed? */
-			if (ret == 1)
-				break;
+			unregister_vlan_dev(vlandev);
 		}
 		break;
 	}
@@ -598,7 +561,8 @@ static int vlan_ioctl_handler(struct net *net, void __user *arg)
 		err = -EPERM;
 		if (!capable(CAP_NET_ADMIN))
 			break;
-		err = unregister_vlan_device(dev);
+		unregister_vlan_dev(dev);
+		err = 0;
 		break;
 
 	case GET_VLAN_REALDEV_NAME_CMD:
diff --git a/net/8021q/vlan.h b/net/8021q/vlan.h
index 56378651cc4..0cfdf77b497 100644
--- a/net/8021q/vlan.h
+++ b/net/8021q/vlan.h
@@ -38,7 +38,7 @@ void vlan_dev_get_vid(const struct net_device *dev, unsigned short *result);
 int vlan_check_real_dev(struct net_device *real_dev, unsigned short vlan_id);
 void vlan_setup(struct net_device *dev);
 int register_vlan_dev(struct net_device *dev);
-int unregister_vlan_device(struct net_device *dev);
+void unregister_vlan_dev(struct net_device *dev);
 
 int vlan_netlink_init(void);
 void vlan_netlink_fini(void);
diff --git a/net/8021q/vlan_netlink.c b/net/8021q/vlan_netlink.c
index 0996185e2ed..9ee63583ed2 100644
--- a/net/8021q/vlan_netlink.c
+++ b/net/8021q/vlan_netlink.c
@@ -137,11 +137,6 @@ static int vlan_newlink(struct net_device *dev,
 	return register_vlan_dev(dev);
 }
 
-static void vlan_dellink(struct net_device *dev)
-{
-	unregister_vlan_device(dev);
-}
-
 static inline size_t vlan_qos_map_size(unsigned int n)
 {
 	if (n == 0)
@@ -226,7 +221,7 @@ struct rtnl_link_ops vlan_link_ops __read_mostly = {
 	.validate	= vlan_validate,
 	.newlink	= vlan_newlink,
 	.changelink	= vlan_changelink,
-	.dellink	= vlan_dellink,
+	.dellink	= unregister_vlan_dev,
 	.get_size	= vlan_get_size,
 	.fill_info	= vlan_fill_info,
 };
-- 
cgit v1.2.3


From 9dfebcc6479c55c001e4bb5fe7cc16b6799c43a7 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Mon, 21 Jan 2008 00:26:07 -0800
Subject: [VLAN]: Turn VLAN_DEV_INFO into inline function

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/8021q/vlan.c         | 14 ++++++------
 net/8021q/vlan_dev.c     | 56 ++++++++++++++++++++++++------------------------
 net/8021q/vlan_netlink.c | 10 ++++-----
 net/8021q/vlanproc.c     | 12 +++++------
 4 files changed, 46 insertions(+), 46 deletions(-)

(limited to 'net')

diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c
index ac796385410..d058c0ef3b6 100644
--- a/net/8021q/vlan.c
+++ b/net/8021q/vlan.c
@@ -134,7 +134,7 @@ static void vlan_rcu_free(struct rcu_head *rcu)
 
 void unregister_vlan_dev(struct net_device *dev)
 {
-	struct vlan_dev_info *vlan = VLAN_DEV_INFO(dev);
+	struct vlan_dev_info *vlan = vlan_dev_info(dev);
 	struct net_device *real_dev = vlan->real_dev;
 	struct vlan_group *grp;
 	unsigned short vlan_id = vlan->vlan_id;
@@ -229,7 +229,7 @@ int vlan_check_real_dev(struct net_device *real_dev, unsigned short vlan_id)
 
 int register_vlan_dev(struct net_device *dev)
 {
-	struct vlan_dev_info *vlan = VLAN_DEV_INFO(dev);
+	struct vlan_dev_info *vlan = vlan_dev_info(dev);
 	struct net_device *real_dev = vlan->real_dev;
 	unsigned short vlan_id = vlan->vlan_id;
 	struct vlan_group *grp, *ngrp = NULL;
@@ -328,10 +328,10 @@ static int register_vlan_device(struct net_device *real_dev,
 	 */
 	new_dev->mtu = real_dev->mtu;
 
-	VLAN_DEV_INFO(new_dev)->vlan_id = VLAN_ID; /* 1 through VLAN_VID_MASK */
-	VLAN_DEV_INFO(new_dev)->real_dev = real_dev;
-	VLAN_DEV_INFO(new_dev)->dent = NULL;
-	VLAN_DEV_INFO(new_dev)->flags = VLAN_FLAG_REORDER_HDR;
+	vlan_dev_info(new_dev)->vlan_id = VLAN_ID; /* 1 through VLAN_VID_MASK */
+	vlan_dev_info(new_dev)->real_dev = real_dev;
+	vlan_dev_info(new_dev)->dent = NULL;
+	vlan_dev_info(new_dev)->flags = VLAN_FLAG_REORDER_HDR;
 
 	new_dev->rtnl_link_ops = &vlan_link_ops;
 	err = register_vlan_dev(new_dev);
@@ -348,7 +348,7 @@ out_free_newdev:
 static void vlan_sync_address(struct net_device *dev,
 			      struct net_device *vlandev)
 {
-	struct vlan_dev_info *vlan = VLAN_DEV_INFO(vlandev);
+	struct vlan_dev_info *vlan = vlan_dev_info(vlandev);
 
 	/* May be called without an actual change */
 	if (!compare_ether_addr(vlan->real_dev_addr, dev->dev_addr))
diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c
index 756a71c1f4a..a846559da85 100644
--- a/net/8021q/vlan_dev.c
+++ b/net/8021q/vlan_dev.c
@@ -72,7 +72,7 @@ static int vlan_dev_rebuild_header(struct sk_buff *skb)
 
 static inline struct sk_buff *vlan_check_reorder_header(struct sk_buff *skb)
 {
-	if (VLAN_DEV_INFO(skb->dev)->flags & VLAN_FLAG_REORDER_HDR) {
+	if (vlan_dev_info(skb->dev)->flags & VLAN_FLAG_REORDER_HDR) {
 		if (skb_shared(skb) || skb_cloned(skb)) {
 			struct sk_buff *nskb = skb_copy(skb, GFP_ATOMIC);
 			kfree_skb(skb);
@@ -290,7 +290,7 @@ static inline unsigned short vlan_dev_get_egress_qos_mask(struct net_device* dev
 							  struct sk_buff* skb)
 {
 	struct vlan_priority_tci_mapping *mp =
-		VLAN_DEV_INFO(dev)->egress_priority_map[(skb->priority & 0xF)];
+		vlan_dev_info(dev)->egress_priority_map[(skb->priority & 0xF)];
 
 	while (mp) {
 		if (mp->priority == skb->priority) {
@@ -324,7 +324,7 @@ static int vlan_dev_hard_header(struct sk_buff *skb, struct net_device *dev,
 	struct net_device *vdev = dev; /* save this for the bottom of the method */
 
 	pr_debug("%s: skb: %p type: %hx len: %u vlan_id: %hx, daddr: %p\n",
-		 __FUNCTION__, skb, type, len, VLAN_DEV_INFO(dev)->vlan_id, daddr);
+		 __FUNCTION__, skb, type, len, vlan_dev_info(dev)->vlan_id, daddr);
 
 	/* build vlan header only if re_order_header flag is NOT set.  This
 	 * fixes some programs that get confused when they see a VLAN device
@@ -334,7 +334,7 @@ static int vlan_dev_hard_header(struct sk_buff *skb, struct net_device *dev,
 	 * header shuffling in the hard_start_xmit.  Users can turn off this
 	 * REORDER behaviour with the vconfig tool.
 	 */
-	if (!(VLAN_DEV_INFO(dev)->flags & VLAN_FLAG_REORDER_HDR))
+	if (!(vlan_dev_info(dev)->flags & VLAN_FLAG_REORDER_HDR))
 		build_vlan_header = 1;
 
 	if (build_vlan_header) {
@@ -349,7 +349,7 @@ static int vlan_dev_hard_header(struct sk_buff *skb, struct net_device *dev,
 		 * VLAN ID	 12 bits (low bits)
 		 *
 		 */
-		veth_TCI = VLAN_DEV_INFO(dev)->vlan_id;
+		veth_TCI = vlan_dev_info(dev)->vlan_id;
 		veth_TCI |= vlan_dev_get_egress_qos_mask(dev, skb);
 
 		vhdr->h_vlan_TCI = htons(veth_TCI);
@@ -374,7 +374,7 @@ static int vlan_dev_hard_header(struct sk_buff *skb, struct net_device *dev,
 	if (saddr == NULL)
 		saddr = dev->dev_addr;
 
-	dev = VLAN_DEV_INFO(dev)->real_dev;
+	dev = vlan_dev_info(dev)->real_dev;
 
 	/* MPLS can send us skbuffs w/out enough space.	 This check will grow the
 	 * skb if it doesn't have enough headroom.  Not a beautiful solution, so
@@ -395,7 +395,7 @@ static int vlan_dev_hard_header(struct sk_buff *skb, struct net_device *dev,
 			stats->tx_dropped++;
 			return -ENOMEM;
 		}
-		VLAN_DEV_INFO(vdev)->cnt_inc_headroom_on_tx++;
+		vlan_dev_info(vdev)->cnt_inc_headroom_on_tx++;
 		pr_debug("%s: %s: had to grow skb.\n", __FUNCTION__, vdev->name);
 	}
 
@@ -430,12 +430,12 @@ static int vlan_dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev)
 	 */
 
 	if (veth->h_vlan_proto != htons(ETH_P_8021Q) ||
-		VLAN_DEV_INFO(dev)->flags & VLAN_FLAG_REORDER_HDR) {
+		vlan_dev_info(dev)->flags & VLAN_FLAG_REORDER_HDR) {
 		int orig_headroom = skb_headroom(skb);
 		unsigned short veth_TCI;
 
 		/* This is not a VLAN frame...but we can fix that! */
-		VLAN_DEV_INFO(dev)->cnt_encap_on_xmit++;
+		vlan_dev_info(dev)->cnt_encap_on_xmit++;
 
 		pr_debug("%s: proto to encap: 0x%hx\n",
 			 __FUNCTION__, htons(veth->h_vlan_proto));
@@ -445,7 +445,7 @@ static int vlan_dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev)
 		 * CFI		 1 bit
 		 * VLAN ID	 12 bits (low bits)
 		 */
-		veth_TCI = VLAN_DEV_INFO(dev)->vlan_id;
+		veth_TCI = vlan_dev_info(dev)->vlan_id;
 		veth_TCI |= vlan_dev_get_egress_qos_mask(dev, skb);
 
 		skb = __vlan_put_tag(skb, veth_TCI);
@@ -455,7 +455,7 @@ static int vlan_dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev)
 		}
 
 		if (orig_headroom < VLAN_HLEN) {
-			VLAN_DEV_INFO(dev)->cnt_inc_headroom_on_tx++;
+			vlan_dev_info(dev)->cnt_inc_headroom_on_tx++;
 		}
 	}
 
@@ -472,7 +472,7 @@ static int vlan_dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev)
 	stats->tx_packets++; /* for statics only */
 	stats->tx_bytes += skb->len;
 
-	skb->dev = VLAN_DEV_INFO(dev)->real_dev;
+	skb->dev = vlan_dev_info(dev)->real_dev;
 	dev_queue_xmit(skb);
 
 	return 0;
@@ -490,14 +490,14 @@ static int vlan_dev_hwaccel_hard_start_xmit(struct sk_buff *skb,
 	 * CFI		 1 bit
 	 * VLAN ID	 12 bits (low bits)
 	 */
-	veth_TCI = VLAN_DEV_INFO(dev)->vlan_id;
+	veth_TCI = vlan_dev_info(dev)->vlan_id;
 	veth_TCI |= vlan_dev_get_egress_qos_mask(dev, skb);
 	skb = __vlan_hwaccel_put_tag(skb, veth_TCI);
 
 	stats->tx_packets++;
 	stats->tx_bytes += skb->len;
 
-	skb->dev = VLAN_DEV_INFO(dev)->real_dev;
+	skb->dev = vlan_dev_info(dev)->real_dev;
 	dev_queue_xmit(skb);
 
 	return 0;
@@ -508,7 +508,7 @@ static int vlan_dev_change_mtu(struct net_device *dev, int new_mtu)
 	/* TODO: gotta make sure the underlying layer can handle it,
 	 * maybe an IFF_VLAN_CAPABLE flag for devices?
 	 */
-	if (VLAN_DEV_INFO(dev)->real_dev->mtu < new_mtu)
+	if (vlan_dev_info(dev)->real_dev->mtu < new_mtu)
 		return -ERANGE;
 
 	dev->mtu = new_mtu;
@@ -519,7 +519,7 @@ static int vlan_dev_change_mtu(struct net_device *dev, int new_mtu)
 void vlan_dev_set_ingress_priority(const struct net_device *dev,
 				   u32 skb_prio, short vlan_prio)
 {
-	struct vlan_dev_info *vlan = VLAN_DEV_INFO(dev);
+	struct vlan_dev_info *vlan = vlan_dev_info(dev);
 
 	if (vlan->ingress_priority_map[vlan_prio & 0x7] && !skb_prio)
 		vlan->nr_ingress_mappings--;
@@ -532,7 +532,7 @@ void vlan_dev_set_ingress_priority(const struct net_device *dev,
 int vlan_dev_set_egress_priority(const struct net_device *dev,
 				 u32 skb_prio, short vlan_prio)
 {
-	struct vlan_dev_info *vlan = VLAN_DEV_INFO(dev);
+	struct vlan_dev_info *vlan = vlan_dev_info(dev);
 	struct vlan_priority_tci_mapping *mp = NULL;
 	struct vlan_priority_tci_mapping *np;
 	u32 vlan_qos = (vlan_prio << 13) & 0xE000;
@@ -573,9 +573,9 @@ int vlan_dev_set_vlan_flag(const struct net_device *dev,
 	/* verify flag is supported */
 	if (flag == VLAN_FLAG_REORDER_HDR) {
 		if (flag_val) {
-			VLAN_DEV_INFO(dev)->flags |= VLAN_FLAG_REORDER_HDR;
+			vlan_dev_info(dev)->flags |= VLAN_FLAG_REORDER_HDR;
 		} else {
-			VLAN_DEV_INFO(dev)->flags &= ~VLAN_FLAG_REORDER_HDR;
+			vlan_dev_info(dev)->flags &= ~VLAN_FLAG_REORDER_HDR;
 		}
 		return 0;
 	}
@@ -584,17 +584,17 @@ int vlan_dev_set_vlan_flag(const struct net_device *dev,
 
 void vlan_dev_get_realdev_name(const struct net_device *dev, char *result)
 {
-	strncpy(result, VLAN_DEV_INFO(dev)->real_dev->name, 23);
+	strncpy(result, vlan_dev_info(dev)->real_dev->name, 23);
 }
 
 void vlan_dev_get_vid(const struct net_device *dev, unsigned short *result)
 {
-	*result = VLAN_DEV_INFO(dev)->vlan_id;
+	*result = vlan_dev_info(dev)->vlan_id;
 }
 
 static int vlan_dev_open(struct net_device *dev)
 {
-	struct vlan_dev_info *vlan = VLAN_DEV_INFO(dev);
+	struct vlan_dev_info *vlan = vlan_dev_info(dev);
 	struct net_device *real_dev = vlan->real_dev;
 	int err;
 
@@ -618,7 +618,7 @@ static int vlan_dev_open(struct net_device *dev)
 
 static int vlan_dev_stop(struct net_device *dev)
 {
-	struct net_device *real_dev = VLAN_DEV_INFO(dev)->real_dev;
+	struct net_device *real_dev = vlan_dev_info(dev)->real_dev;
 
 	dev_mc_unsync(real_dev, dev);
 	if (dev->flags & IFF_ALLMULTI)
@@ -634,7 +634,7 @@ static int vlan_dev_stop(struct net_device *dev)
 
 static int vlan_dev_set_mac_address(struct net_device *dev, void *p)
 {
-	struct net_device *real_dev = VLAN_DEV_INFO(dev)->real_dev;
+	struct net_device *real_dev = vlan_dev_info(dev)->real_dev;
 	struct sockaddr *addr = p;
 	int err;
 
@@ -660,7 +660,7 @@ out:
 
 static int vlan_dev_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
 {
-	struct net_device *real_dev = VLAN_DEV_INFO(dev)->real_dev;
+	struct net_device *real_dev = vlan_dev_info(dev)->real_dev;
 	struct ifreq ifrr;
 	int err = -EOPNOTSUPP;
 
@@ -684,7 +684,7 @@ static int vlan_dev_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
 
 static void vlan_dev_change_rx_flags(struct net_device *dev, int change)
 {
-	struct net_device *real_dev = VLAN_DEV_INFO(dev)->real_dev;
+	struct net_device *real_dev = vlan_dev_info(dev)->real_dev;
 
 	if (change & IFF_ALLMULTI)
 		dev_set_allmulti(real_dev, dev->flags & IFF_ALLMULTI ? 1 : -1);
@@ -694,7 +694,7 @@ static void vlan_dev_change_rx_flags(struct net_device *dev, int change)
 
 static void vlan_dev_set_multicast_list(struct net_device *vlan_dev)
 {
-	dev_mc_sync(VLAN_DEV_INFO(vlan_dev)->real_dev, vlan_dev);
+	dev_mc_sync(vlan_dev_info(vlan_dev)->real_dev, vlan_dev);
 }
 
 /*
@@ -712,7 +712,7 @@ static const struct header_ops vlan_header_ops = {
 
 static int vlan_dev_init(struct net_device *dev)
 {
-	struct net_device *real_dev = VLAN_DEV_INFO(dev)->real_dev;
+	struct net_device *real_dev = vlan_dev_info(dev)->real_dev;
 	int subclass = 0;
 
 	/* IFF_BROADCAST|IFF_MULTICAST; ??? */
diff --git a/net/8021q/vlan_netlink.c b/net/8021q/vlan_netlink.c
index 9ee63583ed2..e32eeb37987 100644
--- a/net/8021q/vlan_netlink.c
+++ b/net/8021q/vlan_netlink.c
@@ -75,7 +75,7 @@ static int vlan_validate(struct nlattr *tb[], struct nlattr *data[])
 static int vlan_changelink(struct net_device *dev,
 			   struct nlattr *tb[], struct nlattr *data[])
 {
-	struct vlan_dev_info *vlan = VLAN_DEV_INFO(dev);
+	struct vlan_dev_info *vlan = vlan_dev_info(dev);
 	struct ifla_vlan_flags *flags;
 	struct ifla_vlan_qos_mapping *m;
 	struct nlattr *attr;
@@ -104,7 +104,7 @@ static int vlan_changelink(struct net_device *dev,
 static int vlan_newlink(struct net_device *dev,
 			struct nlattr *tb[], struct nlattr *data[])
 {
-	struct vlan_dev_info *vlan = VLAN_DEV_INFO(dev);
+	struct vlan_dev_info *vlan = vlan_dev_info(dev);
 	struct net_device *real_dev;
 	int err;
 
@@ -148,7 +148,7 @@ static inline size_t vlan_qos_map_size(unsigned int n)
 
 static size_t vlan_get_size(const struct net_device *dev)
 {
-	struct vlan_dev_info *vlan = VLAN_DEV_INFO(dev);
+	struct vlan_dev_info *vlan = vlan_dev_info(dev);
 
 	return nla_total_size(2) +	/* IFLA_VLAN_ID */
 	       vlan_qos_map_size(vlan->nr_ingress_mappings) +
@@ -157,14 +157,14 @@ static size_t vlan_get_size(const struct net_device *dev)
 
 static int vlan_fill_info(struct sk_buff *skb, const struct net_device *dev)
 {
-	struct vlan_dev_info *vlan = VLAN_DEV_INFO(dev);
+	struct vlan_dev_info *vlan = vlan_dev_info(dev);
 	struct vlan_priority_tci_mapping *pm;
 	struct ifla_vlan_flags f;
 	struct ifla_vlan_qos_mapping m;
 	struct nlattr *nest;
 	unsigned int i;
 
-	NLA_PUT_U16(skb, IFLA_VLAN_ID, VLAN_DEV_INFO(dev)->vlan_id);
+	NLA_PUT_U16(skb, IFLA_VLAN_ID, vlan_dev_info(dev)->vlan_id);
 	if (vlan->flags) {
 		f.flags = vlan->flags;
 		f.mask  = ~0;
diff --git a/net/8021q/vlanproc.c b/net/8021q/vlanproc.c
index 971e6233801..b5202443b1a 100644
--- a/net/8021q/vlanproc.c
+++ b/net/8021q/vlanproc.c
@@ -180,7 +180,7 @@ err:
 
 int vlan_proc_add_dev (struct net_device *vlandev)
 {
-	struct vlan_dev_info *dev_info = VLAN_DEV_INFO(vlandev);
+	struct vlan_dev_info *dev_info = vlan_dev_info(vlandev);
 
 	dev_info->dent = create_proc_entry(vlandev->name,
 					   S_IFREG|S_IRUSR|S_IWUSR,
@@ -199,9 +199,9 @@ int vlan_proc_add_dev (struct net_device *vlandev)
 int vlan_proc_rem_dev(struct net_device *vlandev)
 {
 	/** NOTE:  This will consume the memory pointed to by dent, it seems. */
-	if (VLAN_DEV_INFO(vlandev)->dent) {
-		remove_proc_entry(VLAN_DEV_INFO(vlandev)->dent->name, proc_vlan_dir);
-		VLAN_DEV_INFO(vlandev)->dent = NULL;
+	if (vlan_dev_info(vlandev)->dent) {
+		remove_proc_entry(vlan_dev_info(vlandev)->dent->name, proc_vlan_dir);
+		vlan_dev_info(vlandev)->dent = NULL;
 	}
 	return 0;
 }
@@ -278,7 +278,7 @@ static int vlan_seq_show(struct seq_file *seq, void *v)
 			   nmtype ? nmtype :  "UNKNOWN" );
 	} else {
 		const struct net_device *vlandev = v;
-		const struct vlan_dev_info *dev_info = VLAN_DEV_INFO(vlandev);
+		const struct vlan_dev_info *dev_info = vlan_dev_info(vlandev);
 
 		seq_printf(seq, "%-15s| %d  | %s\n",  vlandev->name,
 			   dev_info->vlan_id,    dev_info->real_dev->name);
@@ -289,7 +289,7 @@ static int vlan_seq_show(struct seq_file *seq, void *v)
 static int vlandev_seq_show(struct seq_file *seq, void *offset)
 {
 	struct net_device *vlandev = (struct net_device *) seq->private;
-	const struct vlan_dev_info *dev_info = VLAN_DEV_INFO(vlandev);
+	const struct vlan_dev_info *dev_info = vlan_dev_info(vlandev);
 	struct net_device_stats *stats = &vlandev->stats;
 	static const char fmt[] = "%30s %12lu\n";
 	int i;
-- 
cgit v1.2.3


From 2029cc2c84fb1169c80c6cf6fc375f11194ed8b5 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Mon, 21 Jan 2008 00:26:41 -0800
Subject: [VLAN]: checkpatch cleanups

Checkpatch cleanups, consisting mainly of overly long lines and
missing spaces.

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/8021q/vlan.c     | 20 ++++++-----
 net/8021q/vlan.h     |  2 +-
 net/8021q/vlan_dev.c | 98 ++++++++++++++++++++++++++--------------------------
 net/8021q/vlanproc.c | 42 +++++++++++-----------
 net/8021q/vlanproc.h | 11 +++---
 5 files changed, 89 insertions(+), 84 deletions(-)

(limited to 'net')

diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c
index d058c0ef3b6..8b93799c6a0 100644
--- a/net/8021q/vlan.c
+++ b/net/8021q/vlan.c
@@ -43,7 +43,6 @@
 
 /* Our listing of VLAN group(s) */
 static struct hlist_head vlan_group_hash[VLAN_GRP_HASH_SIZE];
-#define vlan_grp_hashfn(IDX)	((((IDX) >> VLAN_GRP_HASH_SHIFT) ^ (IDX)) & VLAN_GRP_HASH_MASK)
 
 static char vlan_fullname[] = "802.1Q VLAN Support";
 static char vlan_version[] = DRV_VERSION;
@@ -60,6 +59,11 @@ static struct packet_type vlan_packet_type = {
 
 /* End of global variables definitions. */
 
+static inline unsigned int vlan_grp_hashfn(unsigned int idx)
+{
+	return ((idx >> VLAN_GRP_HASH_SHIFT) ^ idx) & VLAN_GRP_HASH_MASK;
+}
+
 /* Must be invoked with RCU read lock (no preempt) */
 static struct vlan_group *__vlan_find_group(int real_dev_ifindex)
 {
@@ -94,7 +98,7 @@ static void vlan_group_free(struct vlan_group *grp)
 {
 	int i;
 
-	for (i=0; i < VLAN_GROUP_ARRAY_SPLIT_PARTS; i++)
+	for (i = 0; i < VLAN_GROUP_ARRAY_SPLIT_PARTS; i++)
 		kfree(grp->vlan_devices_arrays[i]);
 	kfree(grp);
 }
@@ -174,7 +178,8 @@ void unregister_vlan_dev(struct net_device *dev)
 	unregister_netdevice(dev);
 }
 
-static void vlan_transfer_operstate(const struct net_device *dev, struct net_device *vlandev)
+static void vlan_transfer_operstate(const struct net_device *dev,
+				    struct net_device *vlandev)
 {
 	/* Have to respect userspace enforced dormant state
 	 * of real device, also must allow supplicant running
@@ -369,7 +374,8 @@ static void vlan_sync_address(struct net_device *dev,
 	memcpy(vlan->real_dev_addr, dev->dev_addr, ETH_ALEN);
 }
 
-static int vlan_device_event(struct notifier_block *unused, unsigned long event, void *ptr)
+static int vlan_device_event(struct notifier_block *unused, unsigned long event,
+			     void *ptr)
 {
 	struct net_device *dev = ptr;
 	struct vlan_group *grp = __vlan_find_group(dev->ifindex);
@@ -569,9 +575,8 @@ static int vlan_ioctl_handler(struct net *net, void __user *arg)
 		err = 0;
 		vlan_dev_get_realdev_name(dev, args.u.device2);
 		if (copy_to_user(arg, &args,
-				 sizeof(struct vlan_ioctl_args))) {
+				 sizeof(struct vlan_ioctl_args)))
 			err = -EFAULT;
-		}
 		break;
 
 	case GET_VLAN_VID_CMD:
@@ -579,9 +584,8 @@ static int vlan_ioctl_handler(struct net *net, void __user *arg)
 		vlan_dev_get_vid(dev, &vid);
 		args.u.VID = vid;
 		if (copy_to_user(arg, &args,
-				 sizeof(struct vlan_ioctl_args))) {
+				 sizeof(struct vlan_ioctl_args)))
 		      err = -EFAULT;
-		}
 		break;
 
 	default:
diff --git a/net/8021q/vlan.h b/net/8021q/vlan.h
index 0cfdf77b497..73efcc715cc 100644
--- a/net/8021q/vlan.h
+++ b/net/8021q/vlan.h
@@ -20,7 +20,7 @@ extern unsigned short vlan_name_type;
  *  Must be invoked with rcu_read_lock (ie preempt disabled)
  *  or with RTNL.
  */
-struct net_device *__find_vlan_dev(struct net_device* real_dev,
+struct net_device *__find_vlan_dev(struct net_device *real_dev,
 				   unsigned short VID); /* vlan.c */
 
 /* found in vlan_dev.c */
diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c
index a846559da85..2ff7659b5e5 100644
--- a/net/8021q/vlan_dev.c
+++ b/net/8021q/vlan_dev.c
@@ -106,13 +106,13 @@ static inline struct sk_buff *vlan_check_reorder_header(struct sk_buff *skb)
  *  SANITY NOTE 2: We are referencing to the VLAN_HDR frields, which MAY be
  *                 stored UNALIGNED in the memory.  RISC systems don't like
  *                 such cases very much...
- *  SANITY NOTE 2a:  According to Dave Miller & Alexey, it will always be aligned,
- *                 so there doesn't need to be any of the unaligned stuff.  It has
- *                 been commented out now...  --Ben
+ *  SANITY NOTE 2a: According to Dave Miller & Alexey, it will always be
+ *  		    aligned, so there doesn't need to be any of the unaligned
+ *  		    stuff.  It has been commented out now...  --Ben
  *
  */
 int vlan_skb_recv(struct sk_buff *skb, struct net_device *dev,
-		  struct packet_type* ptype, struct net_device *orig_dev)
+		  struct packet_type *ptype, struct net_device *orig_dev)
 {
 	unsigned char *rawp = NULL;
 	struct vlan_hdr *vhdr;
@@ -126,7 +126,8 @@ int vlan_skb_recv(struct sk_buff *skb, struct net_device *dev,
 		return -1;
 	}
 
-	if ((skb = skb_share_check(skb, GFP_ATOMIC)) == NULL)
+	skb = skb_share_check(skb, GFP_ATOMIC);
+	if (skb == NULL)
 		return -1;
 
 	if (unlikely(!pskb_may_pull(skb, VLAN_HLEN))) {
@@ -156,8 +157,8 @@ int vlan_skb_recv(struct sk_buff *skb, struct net_device *dev,
 	skb->dev = __find_vlan_dev(dev, vid);
 	if (!skb->dev) {
 		rcu_read_unlock();
-		pr_debug("%s: ERROR: No net_device for VID: %u on dev: %s [%i]\n",
-			 __FUNCTION__, (unsigned int)vid, dev->name, dev->ifindex);
+		pr_debug("%s: ERROR: No net_device for VID: %u on dev: %s\n",
+			 __FUNCTION__, (unsigned int)vid, dev->name);
 		kfree_skb(skb);
 		return -1;
 	}
@@ -175,7 +176,8 @@ int vlan_skb_recv(struct sk_buff *skb, struct net_device *dev,
 	/*
 	 * Deal with ingress priority mapping.
 	 */
-	skb->priority = vlan_get_ingress_priority(skb->dev, ntohs(vhdr->h_vlan_TCI));
+	skb->priority = vlan_get_ingress_priority(skb->dev,
+						  ntohs(vhdr->h_vlan_TCI));
 
 	pr_debug("%s: priority: %u for TCI: %hu\n",
 		 __FUNCTION__, skb->priority, ntohs(vhdr->h_vlan_TCI));
@@ -185,7 +187,7 @@ int vlan_skb_recv(struct sk_buff *skb, struct net_device *dev,
 	 */
 	switch (skb->pkt_type) {
 	case PACKET_BROADCAST: /* Yeah, stats collect these together.. */
-		// stats->broadcast ++; // no such counter :-(
+		/* stats->broadcast ++; // no such counter :-( */
 		break;
 
 	case PACKET_MULTICAST:
@@ -194,13 +196,13 @@ int vlan_skb_recv(struct sk_buff *skb, struct net_device *dev,
 
 	case PACKET_OTHERHOST:
 		/* Our lower layer thinks this is not local, let's make sure.
-		 * This allows the VLAN to have a different MAC than the underlying
-		 * device, and still route correctly.
+		 * This allows the VLAN to have a different MAC than the
+		 * underlying device, and still route correctly.
 		 */
-		if (!compare_ether_addr(eth_hdr(skb)->h_dest, skb->dev->dev_addr)) {
+		if (!compare_ether_addr(eth_hdr(skb)->h_dest,
+					skb->dev->dev_addr))
 			/* It is for our (changed) MAC-address! */
 			skb->pkt_type = PACKET_HOST;
-		}
 		break;
 	default:
 		break;
@@ -244,8 +246,8 @@ int vlan_skb_recv(struct sk_buff *skb, struct net_device *dev,
 	 */
 	if (*(unsigned short *)rawp == 0xFFFF) {
 		skb->protocol = htons(ETH_P_802_3);
-		/* place it back on the queue to be handled by true layer 3 protocols.
-		 */
+		/* place it back on the queue to be handled by true layer 3
+		 * protocols. */
 
 		/* See if we are configured to re-write the VLAN header
 		 * to make it look like ethernet...
@@ -286,17 +288,17 @@ int vlan_skb_recv(struct sk_buff *skb, struct net_device *dev,
 	return 0;
 }
 
-static inline unsigned short vlan_dev_get_egress_qos_mask(struct net_device* dev,
-							  struct sk_buff* skb)
+static inline unsigned short
+vlan_dev_get_egress_qos_mask(struct net_device *dev, struct sk_buff *skb)
 {
-	struct vlan_priority_tci_mapping *mp =
-		vlan_dev_info(dev)->egress_priority_map[(skb->priority & 0xF)];
+	struct vlan_priority_tci_mapping *mp;
 
+	mp = vlan_dev_info(dev)->egress_priority_map[(skb->priority & 0xF)];
 	while (mp) {
 		if (mp->priority == skb->priority) {
-			return mp->vlan_qos; /* This should already be shifted to mask
-					      * correctly with the VLAN's TCI
-					      */
+			return mp->vlan_qos; /* This should already be shifted
+					      * to mask correctly with the
+					      * VLAN's TCI */
 		}
 		mp = mp->next;
 	}
@@ -321,10 +323,11 @@ static int vlan_dev_hard_header(struct sk_buff *skb, struct net_device *dev,
 	unsigned short veth_TCI = 0;
 	int rc = 0;
 	int build_vlan_header = 0;
-	struct net_device *vdev = dev; /* save this for the bottom of the method */
+	struct net_device *vdev = dev;
 
 	pr_debug("%s: skb: %p type: %hx len: %u vlan_id: %hx, daddr: %p\n",
-		 __FUNCTION__, skb, type, len, vlan_dev_info(dev)->vlan_id, daddr);
+		 __FUNCTION__, skb, type, len, vlan_dev_info(dev)->vlan_id,
+		 daddr);
 
 	/* build vlan header only if re_order_header flag is NOT set.  This
 	 * fixes some programs that get confused when they see a VLAN device
@@ -342,8 +345,8 @@ static int vlan_dev_hard_header(struct sk_buff *skb, struct net_device *dev,
 
 		/* build the four bytes that make this a VLAN header. */
 
-		/* Now, construct the second two bytes. This field looks something
-		 * like:
+		/* Now, construct the second two bytes. This field looks
+		 * something like:
 		 * usr_priority: 3 bits	 (high bits)
 		 * CFI		 1 bit
 		 * VLAN ID	 12 bits (low bits)
@@ -355,16 +358,15 @@ static int vlan_dev_hard_header(struct sk_buff *skb, struct net_device *dev,
 		vhdr->h_vlan_TCI = htons(veth_TCI);
 
 		/*
-		 *  Set the protocol type.
-		 *  For a packet of type ETH_P_802_3 we put the length in here instead.
-		 *  It is up to the 802.2 layer to carry protocol information.
+		 *  Set the protocol type. For a packet of type ETH_P_802_3 we
+		 *  put the length in here instead. It is up to the 802.2
+		 *  layer to carry protocol information.
 		 */
 
-		if (type != ETH_P_802_3) {
+		if (type != ETH_P_802_3)
 			vhdr->h_vlan_encapsulated_proto = htons(type);
-		} else {
+		else
 			vhdr->h_vlan_encapsulated_proto = htons(len);
-		}
 
 		skb->protocol = htons(ETH_P_8021Q);
 		skb_reset_network_header(skb);
@@ -376,14 +378,14 @@ static int vlan_dev_hard_header(struct sk_buff *skb, struct net_device *dev,
 
 	dev = vlan_dev_info(dev)->real_dev;
 
-	/* MPLS can send us skbuffs w/out enough space.	 This check will grow the
-	 * skb if it doesn't have enough headroom.  Not a beautiful solution, so
-	 * I'll tick a counter so that users can know it's happening...	 If they
-	 * care...
+	/* MPLS can send us skbuffs w/out enough space.	This check will grow
+	 * the skb if it doesn't have enough headroom. Not a beautiful solution,
+	 * so I'll tick a counter so that users can know it's happening...
+	 * If they care...
 	 */
 
-	/* NOTE:  This may still break if the underlying device is not the final
-	 * device (and thus there are more headers to add...)  It should work for
+	/* NOTE: This may still break if the underlying device is not the final
+	 * device (and thus there are more headers to add...) It should work for
 	 * good-ole-ethernet though.
 	 */
 	if (skb_headroom(skb) < dev->hard_header_len) {
@@ -396,7 +398,7 @@ static int vlan_dev_hard_header(struct sk_buff *skb, struct net_device *dev,
 			return -ENOMEM;
 		}
 		vlan_dev_info(vdev)->cnt_inc_headroom_on_tx++;
-		pr_debug("%s: %s: had to grow skb.\n", __FUNCTION__, vdev->name);
+		pr_debug("%s: %s: had to grow skb\n", __FUNCTION__, vdev->name);
 	}
 
 	if (build_vlan_header) {
@@ -408,10 +410,10 @@ static int vlan_dev_hard_header(struct sk_buff *skb, struct net_device *dev,
 		else if (rc < 0)
 			rc -= VLAN_HLEN;
 	} else
-		/* If here, then we'll just make a normal looking ethernet frame,
-		 * but, the hard_start_xmit method will insert the tag (it has to
-		 * be able to do this for bridged and other skbs that don't come
-		 * down the protocol stack in an orderly manner.
+		/* If here, then we'll just make a normal looking ethernet
+		 * frame, but, the hard_start_xmit method will insert the tag
+		 * (it has to be able to do this for bridged and other skbs
+		 * that don't come down the protocol stack in an orderly manner.
 		 */
 		rc = dev_hard_header(skb, dev, type, daddr, saddr, len);
 
@@ -454,9 +456,8 @@ static int vlan_dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev)
 			return 0;
 		}
 
-		if (orig_headroom < VLAN_HLEN) {
+		if (orig_headroom < VLAN_HLEN)
 			vlan_dev_info(dev)->cnt_inc_headroom_on_tx++;
-		}
 	}
 
 	pr_debug("%s: about to send skb: %p to dev: %s\n",
@@ -572,11 +573,10 @@ int vlan_dev_set_vlan_flag(const struct net_device *dev,
 {
 	/* verify flag is supported */
 	if (flag == VLAN_FLAG_REORDER_HDR) {
-		if (flag_val) {
+		if (flag_val)
 			vlan_dev_info(dev)->flags |= VLAN_FLAG_REORDER_HDR;
-		} else {
+		else
 			vlan_dev_info(dev)->flags &= ~VLAN_FLAG_REORDER_HDR;
-		}
 		return 0;
 	}
 	return -EINVAL;
@@ -667,7 +667,7 @@ static int vlan_dev_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
 	strncpy(ifrr.ifr_name, real_dev->name, IFNAMSIZ);
 	ifrr.ifr_ifru = ifr->ifr_ifru;
 
-	switch(cmd) {
+	switch (cmd) {
 	case SIOCGMIIPHY:
 	case SIOCGMIIREG:
 	case SIOCSMIIREG:
diff --git a/net/8021q/vlanproc.c b/net/8021q/vlanproc.c
index b5202443b1a..2a4e1aabb23 100644
--- a/net/8021q/vlanproc.c
+++ b/net/8021q/vlanproc.c
@@ -125,10 +125,10 @@ static struct proc_dir_entry *proc_vlan_conf;
 
 /* Strings */
 static const char *vlan_name_type_str[VLAN_NAME_TYPE_HIGHEST] = {
-    [VLAN_NAME_TYPE_RAW_PLUS_VID]       = "VLAN_NAME_TYPE_RAW_PLUS_VID",
-    [VLAN_NAME_TYPE_PLUS_VID_NO_PAD]	= "VLAN_NAME_TYPE_PLUS_VID_NO_PAD",
-    [VLAN_NAME_TYPE_RAW_PLUS_VID_NO_PAD]= "VLAN_NAME_TYPE_RAW_PLUS_VID_NO_PAD",
-    [VLAN_NAME_TYPE_PLUS_VID]		= "VLAN_NAME_TYPE_PLUS_VID",
+    [VLAN_NAME_TYPE_RAW_PLUS_VID]        = "VLAN_NAME_TYPE_RAW_PLUS_VID",
+    [VLAN_NAME_TYPE_PLUS_VID_NO_PAD]	 = "VLAN_NAME_TYPE_PLUS_VID_NO_PAD",
+    [VLAN_NAME_TYPE_RAW_PLUS_VID_NO_PAD] = "VLAN_NAME_TYPE_RAW_PLUS_VID_NO_PAD",
+    [VLAN_NAME_TYPE_PLUS_VID]		 = "VLAN_NAME_TYPE_PLUS_VID",
 };
 /*
  *	Interface functions
@@ -178,7 +178,7 @@ err:
  *	Add directory entry for VLAN device.
  */
 
-int vlan_proc_add_dev (struct net_device *vlandev)
+int vlan_proc_add_dev(struct net_device *vlandev)
 {
 	struct vlan_dev_info *dev_info = vlan_dev_info(vlandev);
 
@@ -200,7 +200,8 @@ int vlan_proc_rem_dev(struct net_device *vlandev)
 {
 	/** NOTE:  This will consume the memory pointed to by dent, it seems. */
 	if (vlan_dev_info(vlandev)->dent) {
-		remove_proc_entry(vlan_dev_info(vlandev)->dent->name, proc_vlan_dir);
+		remove_proc_entry(vlan_dev_info(vlandev)->dent->name,
+				  proc_vlan_dir);
 		vlan_dev_info(vlandev)->dent = NULL;
 	}
 	return 0;
@@ -275,7 +276,7 @@ static int vlan_seq_show(struct seq_file *seq, void *v)
 		    nmtype =  vlan_name_type_str[vlan_name_type];
 
 		seq_printf(seq, "Name-Type: %s\n",
-			   nmtype ? nmtype :  "UNKNOWN" );
+			   nmtype ? nmtype :  "UNKNOWN");
 	} else {
 		const struct net_device *vlandev = v;
 		const struct vlan_dev_info *dev_info = vlan_dev_info(vlandev);
@@ -297,9 +298,10 @@ static int vlandev_seq_show(struct seq_file *seq, void *offset)
 	if (!(vlandev->priv_flags & IFF_802_1Q_VLAN))
 		return 0;
 
-	seq_printf(seq, "%s  VID: %d	 REORDER_HDR: %i  dev->priv_flags: %hx\n",
-		       vlandev->name, dev_info->vlan_id,
-		       (int)(dev_info->flags & 1), vlandev->priv_flags);
+	seq_printf(seq,
+		   "%s  VID: %d	 REORDER_HDR: %i  dev->priv_flags: %hx\n",
+		   vlandev->name, dev_info->vlan_id,
+		   (int)(dev_info->flags & 1), vlandev->priv_flags);
 
 	seq_printf(seq, fmt, "total frames received", stats->rx_packets);
 	seq_printf(seq, fmt, "total bytes received", stats->rx_bytes);
@@ -313,16 +315,16 @@ static int vlandev_seq_show(struct seq_file *seq, void *offset)
 		   dev_info->cnt_encap_on_xmit);
 	seq_printf(seq, "Device: %s", dev_info->real_dev->name);
 	/* now show all PRIORITY mappings relating to this VLAN */
-	seq_printf(seq,
-		       "\nINGRESS priority mappings: 0:%u  1:%u  2:%u  3:%u  4:%u  5:%u  6:%u 7:%u\n",
-		       dev_info->ingress_priority_map[0],
-		       dev_info->ingress_priority_map[1],
-		       dev_info->ingress_priority_map[2],
-		       dev_info->ingress_priority_map[3],
-		       dev_info->ingress_priority_map[4],
-		       dev_info->ingress_priority_map[5],
-		       dev_info->ingress_priority_map[6],
-		       dev_info->ingress_priority_map[7]);
+	seq_printf(seq, "\nINGRESS priority mappings: "
+			"0:%u  1:%u  2:%u  3:%u  4:%u  5:%u  6:%u 7:%u\n",
+		   dev_info->ingress_priority_map[0],
+		   dev_info->ingress_priority_map[1],
+		   dev_info->ingress_priority_map[2],
+		   dev_info->ingress_priority_map[3],
+		   dev_info->ingress_priority_map[4],
+		   dev_info->ingress_priority_map[5],
+		   dev_info->ingress_priority_map[6],
+		   dev_info->ingress_priority_map[7]);
 
 	seq_printf(seq, "EGRESSS priority Mappings: ");
 	for (i = 0; i < 16; i++) {
diff --git a/net/8021q/vlanproc.h b/net/8021q/vlanproc.h
index f908ee332fd..da542cacc5a 100644
--- a/net/8021q/vlanproc.h
+++ b/net/8021q/vlanproc.h
@@ -4,16 +4,15 @@
 #ifdef CONFIG_PROC_FS
 int vlan_proc_init(void);
 int vlan_proc_rem_dev(struct net_device *vlandev);
-int vlan_proc_add_dev (struct net_device *vlandev);
-void vlan_proc_cleanup (void);
+int vlan_proc_add_dev(struct net_device *vlandev);
+void vlan_proc_cleanup(void);
 
 #else /* No CONFIG_PROC_FS */
 
 #define vlan_proc_init()	(0)
-#define vlan_proc_cleanup()	do {} while(0)
-#define vlan_proc_add_dev(dev)	({(void)(dev), 0;})
-#define vlan_proc_rem_dev(dev)	({(void)(dev), 0;})
-
+#define vlan_proc_cleanup()	do {} while (0)
+#define vlan_proc_add_dev(dev)	({(void)(dev), 0; })
+#define vlan_proc_rem_dev(dev)	({(void)(dev), 0; })
 #endif
 
 #endif /* !(__BEN_VLAN_PROC_INC__) */
-- 
cgit v1.2.3


From ad712087f78469a783281d0d15657edfbff69594 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Mon, 21 Jan 2008 00:27:00 -0800
Subject: [VLAN]: Update list address

VLAN related mail should go to netdev.

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/8021q/vlan.c     | 2 +-
 net/8021q/vlan_dev.c | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c
index 8b93799c6a0..dbc81b96509 100644
--- a/net/8021q/vlan.c
+++ b/net/8021q/vlan.c
@@ -3,7 +3,7 @@
  *		Ethernet-type device handling.
  *
  * Authors:	Ben Greear <greearb@candelatech.com>
- *              Please send support related email to: vlan@scry.wanfear.com
+ *              Please send support related email to: netdev@vger.kernel.org
  *              VLAN Home Page: http://www.candelatech.com/~greear/vlan.html
  *
  * Fixes:
diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c
index 2ff7659b5e5..e19e49184ae 100644
--- a/net/8021q/vlan_dev.c
+++ b/net/8021q/vlan_dev.c
@@ -3,7 +3,7 @@
  *		Ethernet-type device handling.
  *
  * Authors:	Ben Greear <greearb@candelatech.com>
- *              Please send support related email to: vlan@scry.wanfear.com
+ *              Please send support related email to: netdev@vger.kernel.org
  *              VLAN Home Page: http://www.candelatech.com/~greear/vlan.html
  *
  * Fixes:       Mar 22 2001: Martin Bokaemper <mbokaemper@unispherenetworks.com>
-- 
cgit v1.2.3


From 31ffdbcb5989c121f2f81a6b5b20c1c4bb21e5fd Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Mon, 21 Jan 2008 00:27:18 -0800
Subject: [VLAN]: Clean up vlan_skb_recv()

- remove three instances of identical code
- remove unnecessary NULL initialization
- remove obvious and unnecessary comments

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/8021q/vlan_dev.c | 113 +++++++++++----------------------------------------
 1 file changed, 24 insertions(+), 89 deletions(-)

(limited to 'net')

diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c
index e19e49184ae..57799af5108 100644
--- a/net/8021q/vlan_dev.c
+++ b/net/8021q/vlan_dev.c
@@ -114,77 +114,49 @@ static inline struct sk_buff *vlan_check_reorder_header(struct sk_buff *skb)
 int vlan_skb_recv(struct sk_buff *skb, struct net_device *dev,
 		  struct packet_type *ptype, struct net_device *orig_dev)
 {
-	unsigned char *rawp = NULL;
+	unsigned char *rawp;
 	struct vlan_hdr *vhdr;
 	unsigned short vid;
 	struct net_device_stats *stats;
 	unsigned short vlan_TCI;
 	__be16 proto;
 
-	if (dev->nd_net != &init_net) {
-		kfree_skb(skb);
-		return -1;
-	}
+	if (dev->nd_net != &init_net)
+		goto err_free;
 
 	skb = skb_share_check(skb, GFP_ATOMIC);
 	if (skb == NULL)
-		return -1;
-
-	if (unlikely(!pskb_may_pull(skb, VLAN_HLEN))) {
-		kfree_skb(skb);
-		return -1;
-	}
+		goto err_free;
 
-	vhdr = (struct vlan_hdr *)(skb->data);
+	if (unlikely(!pskb_may_pull(skb, VLAN_HLEN)))
+		goto err_free;
 
-	/* vlan_TCI = ntohs(get_unaligned(&vhdr->h_vlan_TCI)); */
+	vhdr = (struct vlan_hdr *)skb->data;
 	vlan_TCI = ntohs(vhdr->h_vlan_TCI);
-
 	vid = (vlan_TCI & VLAN_VID_MASK);
 
-	/* Ok, we will find the correct VLAN device, strip the header,
-	 * and then go on as usual.
-	 */
-
-	/* We have 12 bits of vlan ID.
-	 *
-	 * We must not drop allow preempt until we hold a
-	 * reference to the device (netif_rx does that) or we
-	 * fail.
-	 */
-
 	rcu_read_lock();
 	skb->dev = __find_vlan_dev(dev, vid);
 	if (!skb->dev) {
-		rcu_read_unlock();
 		pr_debug("%s: ERROR: No net_device for VID: %u on dev: %s\n",
 			 __FUNCTION__, (unsigned int)vid, dev->name);
-		kfree_skb(skb);
-		return -1;
+		goto err_unlock;
 	}
 
 	skb->dev->last_rx = jiffies;
 
-	/* Bump the rx counters for the VLAN device. */
 	stats = &skb->dev->stats;
 	stats->rx_packets++;
 	stats->rx_bytes += skb->len;
 
-	/* Take off the VLAN header (4 bytes currently) */
 	skb_pull_rcsum(skb, VLAN_HLEN);
 
-	/*
-	 * Deal with ingress priority mapping.
-	 */
 	skb->priority = vlan_get_ingress_priority(skb->dev,
 						  ntohs(vhdr->h_vlan_TCI));
 
 	pr_debug("%s: priority: %u for TCI: %hu\n",
 		 __FUNCTION__, skb->priority, ntohs(vhdr->h_vlan_TCI));
 
-	/* The ethernet driver already did the pkt_type calculations
-	 * for us...
-	 */
 	switch (skb->pkt_type) {
 	case PACKET_BROADCAST: /* Yeah, stats collect these together.. */
 		/* stats->broadcast ++; // no such counter :-( */
@@ -201,7 +173,6 @@ int vlan_skb_recv(struct sk_buff *skb, struct net_device *dev,
 		 */
 		if (!compare_ether_addr(eth_hdr(skb)->h_dest,
 					skb->dev->dev_addr))
-			/* It is for our (changed) MAC-address! */
 			skb->pkt_type = PACKET_HOST;
 		break;
 	default:
@@ -211,81 +182,45 @@ int vlan_skb_recv(struct sk_buff *skb, struct net_device *dev,
 	/*  Was a VLAN packet, grab the encapsulated protocol, which the layer
 	 * three protocols care about.
 	 */
-	/* proto = get_unaligned(&vhdr->h_vlan_encapsulated_proto); */
 	proto = vhdr->h_vlan_encapsulated_proto;
-
-	skb->protocol = proto;
 	if (ntohs(proto) >= 1536) {
-		/* place it back on the queue to be handled by
-		 * true layer 3 protocols.
-		 */
-
-		/* See if we are configured to re-write the VLAN header
-		 * to make it look like ethernet...
-		 */
-		skb = vlan_check_reorder_header(skb);
-
-		/* Can be null if skb-clone fails when re-ordering */
-		if (skb) {
-			netif_rx(skb);
-		} else {
-			/* TODO:  Add a more specific counter here. */
-			stats->rx_errors++;
-		}
-		rcu_read_unlock();
-		return 0;
+		skb->protocol = proto;
+		goto recv;
 	}
 
-	rawp = skb->data;
-
 	/*
 	 * This is a magic hack to spot IPX packets. Older Novell breaks
 	 * the protocol design and runs IPX over 802.3 without an 802.2 LLC
 	 * layer. We look for FFFF which isn't a used 802.2 SSAP/DSAP. This
 	 * won't work for fault tolerant netware but does for the rest.
 	 */
+	rawp = skb->data;
 	if (*(unsigned short *)rawp == 0xFFFF) {
 		skb->protocol = htons(ETH_P_802_3);
-		/* place it back on the queue to be handled by true layer 3
-		 * protocols. */
-
-		/* See if we are configured to re-write the VLAN header
-		 * to make it look like ethernet...
-		 */
-		skb = vlan_check_reorder_header(skb);
-
-		/* Can be null if skb-clone fails when re-ordering */
-		if (skb) {
-			netif_rx(skb);
-		} else {
-			/* TODO:  Add a more specific counter here. */
-			stats->rx_errors++;
-		}
-		rcu_read_unlock();
-		return 0;
+		goto recv;
 	}
 
 	/*
 	 *	Real 802.2 LLC
 	 */
 	skb->protocol = htons(ETH_P_802_2);
-	/* place it back on the queue to be handled by upper layer protocols.
-	 */
 
-	/* See if we are configured to re-write the VLAN header
-	 * to make it look like ethernet...
-	 */
+recv:
 	skb = vlan_check_reorder_header(skb);
-
-	/* Can be null if skb-clone fails when re-ordering */
-	if (skb) {
-		netif_rx(skb);
-	} else {
-		/* TODO:  Add a more specific counter here. */
+	if (!skb) {
 		stats->rx_errors++;
+		goto err_unlock;
 	}
+
+	netif_rx(skb);
 	rcu_read_unlock();
-	return 0;
+	return NET_RX_SUCCESS;
+
+err_unlock:
+	rcu_read_unlock();
+err_free:
+	kfree_skb(skb);
+	return NET_RX_DROP;
 }
 
 static inline unsigned short
-- 
cgit v1.2.3


From 91b4f954759653272504c55b715b757207ed1700 Mon Sep 17 00:00:00 2001
From: Pavel Emelyanov <xemul@openvz.org>
Date: Mon, 21 Jan 2008 00:28:03 -0800
Subject: [VLAN]: Move protocol determination to seperate function

I think, that we can make this code flow easier to understand
by introducing the vlan_set_encap_proto() function (I hope the
name is good) to setup the skb proto and merge the paths calling
netif_rx() together.

[Patrick: Modified to apply on top of my previous patches]

Signed-off-by: Pavel Emelyanov <xemul@openvz.org>
Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/8021q/vlan_dev.c | 63 +++++++++++++++++++++++++++++-----------------------
 1 file changed, 35 insertions(+), 28 deletions(-)

(limited to 'net')

diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c
index 57799af5108..8059fa42b08 100644
--- a/net/8021q/vlan_dev.c
+++ b/net/8021q/vlan_dev.c
@@ -89,6 +89,40 @@ static inline struct sk_buff *vlan_check_reorder_header(struct sk_buff *skb)
 	return skb;
 }
 
+static inline void vlan_set_encap_proto(struct sk_buff *skb,
+		struct vlan_hdr *vhdr)
+{
+	__be16 proto;
+	unsigned char *rawp;
+
+	/*
+	 * Was a VLAN packet, grab the encapsulated protocol, which the layer
+	 * three protocols care about.
+	 */
+
+	proto = vhdr->h_vlan_encapsulated_proto;
+	if (ntohs(proto) >= 1536) {
+		skb->protocol = proto;
+		return;
+	}
+
+	rawp = skb->data;
+	if (*(unsigned short *)rawp == 0xFFFF)
+		/*
+		 * This is a magic hack to spot IPX packets. Older Novell
+		 * breaks the protocol design and runs IPX over 802.3 without
+		 * an 802.2 LLC layer. We look for FFFF which isn't a used
+		 * 802.2 SSAP/DSAP. This won't work for fault tolerant netware
+		 * but does for the rest.
+		 */
+		skb->protocol = htons(ETH_P_802_3);
+	else
+		/*
+		 * Real 802.2 LLC
+		 */
+		skb->protocol = htons(ETH_P_802_2);
+}
+
 /*
  *	Determine the packet's protocol ID. The rule here is that we
  *	assume 802.3 if the type field is short enough to be a length.
@@ -114,12 +148,10 @@ static inline struct sk_buff *vlan_check_reorder_header(struct sk_buff *skb)
 int vlan_skb_recv(struct sk_buff *skb, struct net_device *dev,
 		  struct packet_type *ptype, struct net_device *orig_dev)
 {
-	unsigned char *rawp;
 	struct vlan_hdr *vhdr;
 	unsigned short vid;
 	struct net_device_stats *stats;
 	unsigned short vlan_TCI;
-	__be16 proto;
 
 	if (dev->nd_net != &init_net)
 		goto err_free;
@@ -179,33 +211,8 @@ int vlan_skb_recv(struct sk_buff *skb, struct net_device *dev,
 		break;
 	}
 
-	/*  Was a VLAN packet, grab the encapsulated protocol, which the layer
-	 * three protocols care about.
-	 */
-	proto = vhdr->h_vlan_encapsulated_proto;
-	if (ntohs(proto) >= 1536) {
-		skb->protocol = proto;
-		goto recv;
-	}
-
-	/*
-	 * This is a magic hack to spot IPX packets. Older Novell breaks
-	 * the protocol design and runs IPX over 802.3 without an 802.2 LLC
-	 * layer. We look for FFFF which isn't a used 802.2 SSAP/DSAP. This
-	 * won't work for fault tolerant netware but does for the rest.
-	 */
-	rawp = skb->data;
-	if (*(unsigned short *)rawp == 0xFFFF) {
-		skb->protocol = htons(ETH_P_802_3);
-		goto recv;
-	}
-
-	/*
-	 *	Real 802.2 LLC
-	 */
-	skb->protocol = htons(ETH_P_802_2);
+	vlan_set_encap_proto(skb, vhdr);
 
-recv:
 	skb = vlan_check_reorder_header(skb);
 	if (!skb) {
 		stats->rx_errors++;
-- 
cgit v1.2.3


From 13a0a096e58a1149a8cffbd7722b820044e3801e Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Mon, 21 Jan 2008 00:47:43 -0800
Subject: [NET_SCHED]: kill obsolete NET_CLS_POLICE option

The code is already gone for about half a year, the config option
has been kept around to select the replacement options for easier
upgrades. This seems long enough, people upgrading from older
kernels will have to reconfigure a lot anyway.

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/Kconfig | 9 ---------
 1 file changed, 9 deletions(-)

(limited to 'net')

diff --git a/net/sched/Kconfig b/net/sched/Kconfig
index 554248e0fc4..87af7c913d8 100644
--- a/net/sched/Kconfig
+++ b/net/sched/Kconfig
@@ -476,15 +476,6 @@ config NET_ACT_SIMP
 	  To compile this code as a module, choose M here: the
 	  module will be called simple.
 
-config NET_CLS_POLICE
-	bool "Traffic Policing (obsolete)"
-	select NET_CLS_ACT
-	select NET_ACT_POLICE
-	---help---
-	  Say Y here if you want to do traffic policing, i.e. strict
-	  bandwidth limiting. This option is obsolete and just selects
-	  the option replacing it. It will be removed in the future.
-
 config NET_CLS_IND
 	bool "Incoming device classification"
 	depends on NET_CLS_U32 || NET_CLS_FW
-- 
cgit v1.2.3


From d20b3109e9d122460929c50b857fcde251706ece Mon Sep 17 00:00:00 2001
From: Stephen Hemminger <shemminger@vyatta.com>
Date: Mon, 21 Jan 2008 00:48:43 -0800
Subject: [IPV6]: addrconf sparse warnings

Get rid of a couple of sparse warnings in IPV6 addrconf code.

Signed-off-by: Stephen Hemminger <shemminger@vyatta.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/addrconf.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 803caf1a389..aba7b5d52a9 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -1900,7 +1900,7 @@ int addrconf_set_dstaddr(void __user *arg)
 		p.iph.ihl = 5;
 		p.iph.protocol = IPPROTO_IPV6;
 		p.iph.ttl = 64;
-		ifr.ifr_ifru.ifru_data = (void __user *)&p;
+		ifr.ifr_ifru.ifru_data = (__force void __user *)&p;
 
 		oldfs = get_fs(); set_fs(KERNEL_DS);
 		err = dev->do_ioctl(dev, &ifr, SIOCADDTUNNEL);
@@ -2799,6 +2799,7 @@ static struct inet6_ifaddr *if6_get_idx(struct seq_file *seq, loff_t pos)
 }
 
 static void *if6_seq_start(struct seq_file *seq, loff_t *pos)
+	__acquires(addrconf_hash_lock)
 {
 	read_lock_bh(&addrconf_hash_lock);
 	return if6_get_idx(seq, *pos);
@@ -2814,6 +2815,7 @@ static void *if6_seq_next(struct seq_file *seq, void *v, loff_t *pos)
 }
 
 static void if6_seq_stop(struct seq_file *seq, void *v)
+	__releases(addrconf_hash_lock)
 {
 	read_unlock_bh(&addrconf_hash_lock);
 }
-- 
cgit v1.2.3


From 81da99ed71254a08d9a0bce46c258c1e15ac3948 Mon Sep 17 00:00:00 2001
From: Stephen Hemminger <shemminger@vyatta.com>
Date: Mon, 21 Jan 2008 00:50:09 -0800
Subject: [PKT_SCHED] dsmark: get rid of wrappers

Remove extraneous macro wrappers for printk and qdisc_priv.

Signed-off-by: Stephen Hemminger <shemminger@vyatta.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/sch_dsmark.c | 83 +++++++++++++++++++++-----------------------------
 1 file changed, 34 insertions(+), 49 deletions(-)

(limited to 'net')

diff --git a/net/sched/sch_dsmark.c b/net/sched/sch_dsmark.c
index b9fe6975fbe..a9732aef2ca 100644
--- a/net/sched/sch_dsmark.c
+++ b/net/sched/sch_dsmark.c
@@ -15,23 +15,6 @@
 #include <net/inet_ecn.h>
 #include <asm/byteorder.h>
 
-
-#if 0 /* control */
-#define DPRINTK(format,args...) printk(KERN_DEBUG format,##args)
-#else
-#define DPRINTK(format,args...)
-#endif
-
-#if 0 /* data */
-#define D2PRINTK(format,args...) printk(KERN_DEBUG format,##args)
-#else
-#define D2PRINTK(format,args...)
-#endif
-
-
-#define PRIV(sch) ((struct dsmark_qdisc_data *) qdisc_priv(sch))
-
-
 /*
  * classid	class		marking
  * -------	-----		-------
@@ -81,9 +64,9 @@ static inline int dsmark_valid_index(struct dsmark_qdisc_data *p, u16 index)
 static int dsmark_graft(struct Qdisc *sch, unsigned long arg,
 			struct Qdisc *new, struct Qdisc **old)
 {
-	struct dsmark_qdisc_data *p = PRIV(sch);
+	struct dsmark_qdisc_data *p = qdisc_priv(sch);
 
-	DPRINTK("dsmark_graft(sch %p,[qdisc %p],new %p,old %p)\n",
+	pr_debug("dsmark_graft(sch %p,[qdisc %p],new %p,old %p)\n",
 		sch, p, new, old);
 
 	if (new == NULL) {
@@ -104,13 +87,14 @@ static int dsmark_graft(struct Qdisc *sch, unsigned long arg,
 
 static struct Qdisc *dsmark_leaf(struct Qdisc *sch, unsigned long arg)
 {
-	return PRIV(sch)->q;
+	struct dsmark_qdisc_data *p = qdisc_priv(sch);
+	return p->q;
 }
 
 static unsigned long dsmark_get(struct Qdisc *sch, u32 classid)
 {
-	DPRINTK("dsmark_get(sch %p,[qdisc %p],classid %x)\n",
-		sch, PRIV(sch), classid);
+	pr_debug("dsmark_get(sch %p,[qdisc %p],classid %x)\n",
+		sch, qdisc_priv(sch), classid);
 
 	return TC_H_MIN(classid) + 1;
 }
@@ -128,13 +112,13 @@ static void dsmark_put(struct Qdisc *sch, unsigned long cl)
 static int dsmark_change(struct Qdisc *sch, u32 classid, u32 parent,
 			 struct rtattr **tca, unsigned long *arg)
 {
-	struct dsmark_qdisc_data *p = PRIV(sch);
+	struct dsmark_qdisc_data *p = qdisc_priv(sch);
 	struct rtattr *opt = tca[TCA_OPTIONS-1];
 	struct rtattr *tb[TCA_DSMARK_MAX];
 	int err = -EINVAL;
 	u8 mask = 0;
 
-	DPRINTK("dsmark_change(sch %p,[qdisc %p],classid %x,parent %x),"
+	pr_debug("dsmark_change(sch %p,[qdisc %p],classid %x,parent %x),"
 		"arg 0x%lx\n", sch, p, classid, parent, *arg);
 
 	if (!dsmark_valid_index(p, *arg)) {
@@ -162,7 +146,7 @@ rtattr_failure:
 
 static int dsmark_delete(struct Qdisc *sch, unsigned long arg)
 {
-	struct dsmark_qdisc_data *p = PRIV(sch);
+	struct dsmark_qdisc_data *p = qdisc_priv(sch);
 
 	if (!dsmark_valid_index(p, arg))
 		return -EINVAL;
@@ -175,10 +159,10 @@ static int dsmark_delete(struct Qdisc *sch, unsigned long arg)
 
 static void dsmark_walk(struct Qdisc *sch,struct qdisc_walker *walker)
 {
-	struct dsmark_qdisc_data *p = PRIV(sch);
+	struct dsmark_qdisc_data *p = qdisc_priv(sch);
 	int i;
 
-	DPRINTK("dsmark_walk(sch %p,[qdisc %p],walker %p)\n", sch, p, walker);
+	pr_debug("dsmark_walk(sch %p,[qdisc %p],walker %p)\n", sch, p, walker);
 
 	if (walker->stop)
 		return;
@@ -197,19 +181,20 @@ ignore:
 	}
 }
 
-static struct tcf_proto **dsmark_find_tcf(struct Qdisc *sch,unsigned long cl)
+static inline struct tcf_proto **dsmark_find_tcf(struct Qdisc *sch,unsigned long cl)
 {
-	return &PRIV(sch)->filter_list;
+	struct dsmark_qdisc_data *p = qdisc_priv(sch);
+	return &p->filter_list;
 }
 
 /* --------------------------- Qdisc operations ---------------------------- */
 
 static int dsmark_enqueue(struct sk_buff *skb,struct Qdisc *sch)
 {
-	struct dsmark_qdisc_data *p = PRIV(sch);
+	struct dsmark_qdisc_data *p = qdisc_priv(sch);
 	int err;
 
-	D2PRINTK("dsmark_enqueue(skb %p,sch %p,[qdisc %p])\n", skb, sch, p);
+	pr_debug("dsmark_enqueue(skb %p,sch %p,[qdisc %p])\n", skb, sch, p);
 
 	if (p->set_tc_index) {
 		/* FIXME: Safe with non-linear skbs? --RR */
@@ -234,7 +219,7 @@ static int dsmark_enqueue(struct sk_buff *skb,struct Qdisc *sch)
 		struct tcf_result res;
 		int result = tc_classify(skb, p->filter_list, &res);
 
-		D2PRINTK("result %d class 0x%04x\n", result, res.classid);
+		pr_debug("result %d class 0x%04x\n", result, res.classid);
 
 		switch (result) {
 #ifdef CONFIG_NET_CLS_ACT
@@ -272,11 +257,11 @@ static int dsmark_enqueue(struct sk_buff *skb,struct Qdisc *sch)
 
 static struct sk_buff *dsmark_dequeue(struct Qdisc *sch)
 {
-	struct dsmark_qdisc_data *p = PRIV(sch);
+	struct dsmark_qdisc_data *p = qdisc_priv(sch);
 	struct sk_buff *skb;
 	u32 index;
 
-	D2PRINTK("dsmark_dequeue(sch %p,[qdisc %p])\n", sch, p);
+	pr_debug("dsmark_dequeue(sch %p,[qdisc %p])\n", sch, p);
 
 	skb = p->q->ops->dequeue(p->q);
 	if (skb == NULL)
@@ -285,7 +270,7 @@ static struct sk_buff *dsmark_dequeue(struct Qdisc *sch)
 	sch->q.qlen--;
 
 	index = skb->tc_index & (p->indices - 1);
-	D2PRINTK("index %d->%d\n", skb->tc_index, index);
+	pr_debug("index %d->%d\n", skb->tc_index, index);
 
 	switch (skb->protocol) {
 		case __constant_htons(ETH_P_IP):
@@ -314,10 +299,10 @@ static struct sk_buff *dsmark_dequeue(struct Qdisc *sch)
 
 static int dsmark_requeue(struct sk_buff *skb,struct Qdisc *sch)
 {
-	struct dsmark_qdisc_data *p = PRIV(sch);
+	struct dsmark_qdisc_data *p = qdisc_priv(sch);
 	int err;
 
-	D2PRINTK("dsmark_requeue(skb %p,sch %p,[qdisc %p])\n", skb, sch, p);
+	pr_debug("dsmark_requeue(skb %p,sch %p,[qdisc %p])\n", skb, sch, p);
 
 	err = p->q->ops->requeue(skb, p->q);
 	if (err != NET_XMIT_SUCCESS) {
@@ -333,10 +318,10 @@ static int dsmark_requeue(struct sk_buff *skb,struct Qdisc *sch)
 
 static unsigned int dsmark_drop(struct Qdisc *sch)
 {
-	struct dsmark_qdisc_data *p = PRIV(sch);
+	struct dsmark_qdisc_data *p = qdisc_priv(sch);
 	unsigned int len;
 
-	DPRINTK("dsmark_reset(sch %p,[qdisc %p])\n", sch, p);
+	pr_debug("dsmark_reset(sch %p,[qdisc %p])\n", sch, p);
 
 	if (p->q->ops->drop == NULL)
 		return 0;
@@ -350,14 +335,14 @@ static unsigned int dsmark_drop(struct Qdisc *sch)
 
 static int dsmark_init(struct Qdisc *sch, struct rtattr *opt)
 {
-	struct dsmark_qdisc_data *p = PRIV(sch);
+	struct dsmark_qdisc_data *p = qdisc_priv(sch);
 	struct rtattr *tb[TCA_DSMARK_MAX];
 	int err = -EINVAL;
 	u32 default_index = NO_DEFAULT_INDEX;
 	u16 indices;
 	u8 *mask;
 
-	DPRINTK("dsmark_init(sch %p,[qdisc %p],opt %p)\n", sch, p, opt);
+	pr_debug("dsmark_init(sch %p,[qdisc %p],opt %p)\n", sch, p, opt);
 
 	if (!opt || rtattr_parse_nested(tb, TCA_DSMARK_MAX, opt) < 0)
 		goto errout;
@@ -389,7 +374,7 @@ static int dsmark_init(struct Qdisc *sch, struct rtattr *opt)
 	if (p->q == NULL)
 		p->q = &noop_qdisc;
 
-	DPRINTK("dsmark_init: qdisc %p\n", p->q);
+	pr_debug("dsmark_init: qdisc %p\n", p->q);
 
 	err = 0;
 errout:
@@ -399,18 +384,18 @@ rtattr_failure:
 
 static void dsmark_reset(struct Qdisc *sch)
 {
-	struct dsmark_qdisc_data *p = PRIV(sch);
+	struct dsmark_qdisc_data *p = qdisc_priv(sch);
 
-	DPRINTK("dsmark_reset(sch %p,[qdisc %p])\n", sch, p);
+	pr_debug("dsmark_reset(sch %p,[qdisc %p])\n", sch, p);
 	qdisc_reset(p->q);
 	sch->q.qlen = 0;
 }
 
 static void dsmark_destroy(struct Qdisc *sch)
 {
-	struct dsmark_qdisc_data *p = PRIV(sch);
+	struct dsmark_qdisc_data *p = qdisc_priv(sch);
 
-	DPRINTK("dsmark_destroy(sch %p,[qdisc %p])\n", sch, p);
+	pr_debug("dsmark_destroy(sch %p,[qdisc %p])\n", sch, p);
 
 	tcf_destroy_chain(p->filter_list);
 	qdisc_destroy(p->q);
@@ -420,10 +405,10 @@ static void dsmark_destroy(struct Qdisc *sch)
 static int dsmark_dump_class(struct Qdisc *sch, unsigned long cl,
 			     struct sk_buff *skb, struct tcmsg *tcm)
 {
-	struct dsmark_qdisc_data *p = PRIV(sch);
+	struct dsmark_qdisc_data *p = qdisc_priv(sch);
 	struct rtattr *opts = NULL;
 
-	DPRINTK("dsmark_dump_class(sch %p,[qdisc %p],class %ld\n", sch, p, cl);
+	pr_debug("dsmark_dump_class(sch %p,[qdisc %p],class %ld\n", sch, p, cl);
 
 	if (!dsmark_valid_index(p, cl))
 		return -EINVAL;
@@ -443,7 +428,7 @@ rtattr_failure:
 
 static int dsmark_dump(struct Qdisc *sch, struct sk_buff *skb)
 {
-	struct dsmark_qdisc_data *p = PRIV(sch);
+	struct dsmark_qdisc_data *p = qdisc_priv(sch);
 	struct rtattr *opts = NULL;
 
 	opts = RTA_NEST(skb, TCA_OPTIONS);
-- 
cgit v1.2.3


From 5b0ac72bc5fdda9634fb07db4cb0237fa9b6df68 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Mon, 21 Jan 2008 02:21:45 -0800
Subject: [PKT_SCHED] dsmark: Use hweight32() instead of convoluted loop.

Based upon a patch by Stephen Hemminger and suggestions
from Patrick McHardy.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/sch_dsmark.c | 15 +++------------
 1 file changed, 3 insertions(+), 12 deletions(-)

(limited to 'net')

diff --git a/net/sched/sch_dsmark.c b/net/sched/sch_dsmark.c
index a9732aef2ca..d96eaf0aa6b 100644
--- a/net/sched/sch_dsmark.c
+++ b/net/sched/sch_dsmark.c
@@ -10,6 +10,7 @@
 #include <linux/errno.h>
 #include <linux/skbuff.h>
 #include <linux/rtnetlink.h>
+#include <linux/bitops.h>
 #include <net/pkt_sched.h>
 #include <net/dsfield.h>
 #include <net/inet_ecn.h>
@@ -43,17 +44,6 @@ struct dsmark_qdisc_data {
 	int			set_tc_index;
 };
 
-static inline int dsmark_valid_indices(u16 indices)
-{
-	while (indices != 1) {
-		if (indices & 1)
-			return 0;
-		indices >>= 1;
-	}
-
-	return 1;
-}
-
 static inline int dsmark_valid_index(struct dsmark_qdisc_data *p, u16 index)
 {
 	return (index <= p->indices && index > 0);
@@ -348,7 +338,8 @@ static int dsmark_init(struct Qdisc *sch, struct rtattr *opt)
 		goto errout;
 
 	indices = RTA_GET_U16(tb[TCA_DSMARK_INDICES-1]);
-	if (!indices || !dsmark_valid_indices(indices))
+
+	if (hweight32(indices) != 1)
 		goto errout;
 
 	if (tb[TCA_DSMARK_DEFAULT_INDEX-1])
-- 
cgit v1.2.3


From 4c30719f4f550d9b3034d9c00da9cb7fb99e6c0b Mon Sep 17 00:00:00 2001
From: Stephen Hemminger <shemminger@vyatta.com>
Date: Mon, 21 Jan 2008 02:23:49 -0800
Subject: [PKT_SCHED] dsmark: handle cloned and non-linear skb's

Make dsmark work properly with non-linear and cloned skb's
Before modifying the header, it needs to check that skb header is
writeable.

Note: this makes the assumption, that if it queues a good skb
then a good skb will come out of the embedded qdisc.

Signed-off-by: Stephen Hemminger <shemminger@vyatta.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/sch_dsmark.c | 19 +++++++++++++++----
 1 file changed, 15 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/sched/sch_dsmark.c b/net/sched/sch_dsmark.c
index d96eaf0aa6b..ad30b7b4769 100644
--- a/net/sched/sch_dsmark.c
+++ b/net/sched/sch_dsmark.c
@@ -187,13 +187,19 @@ static int dsmark_enqueue(struct sk_buff *skb,struct Qdisc *sch)
 	pr_debug("dsmark_enqueue(skb %p,sch %p,[qdisc %p])\n", skb, sch, p);
 
 	if (p->set_tc_index) {
-		/* FIXME: Safe with non-linear skbs? --RR */
 		switch (skb->protocol) {
 			case __constant_htons(ETH_P_IP):
+				if (skb_cow_head(skb, sizeof(struct iphdr)))
+					goto drop;
+
 				skb->tc_index = ipv4_get_dsfield(ip_hdr(skb))
 					& ~INET_ECN_MASK;
 				break;
+
 			case __constant_htons(ETH_P_IPV6):
+				if (skb_cow_head(skb, sizeof(struct ipv6hdr)))
+					goto drop;
+
 				skb->tc_index = ipv6_get_dsfield(ipv6_hdr(skb))
 					& ~INET_ECN_MASK;
 				break;
@@ -217,14 +223,14 @@ static int dsmark_enqueue(struct sk_buff *skb,struct Qdisc *sch)
 		case TC_ACT_STOLEN:
 			kfree_skb(skb);
 			return NET_XMIT_SUCCESS;
+
 		case TC_ACT_SHOT:
-			kfree_skb(skb);
-			sch->qstats.drops++;
-			return NET_XMIT_BYPASS;
+			goto drop;
 #endif
 		case TC_ACT_OK:
 			skb->tc_index = TC_H_MIN(res.classid);
 			break;
+
 		default:
 			if (p->default_index != NO_DEFAULT_INDEX)
 				skb->tc_index = p->default_index;
@@ -243,6 +249,11 @@ static int dsmark_enqueue(struct sk_buff *skb,struct Qdisc *sch)
 	sch->q.qlen++;
 
 	return NET_XMIT_SUCCESS;
+
+drop:
+	kfree_skb(skb);
+	sch->qstats.drops++;
+	return NET_XMIT_BYPASS;
 }
 
 static struct sk_buff *dsmark_dequeue(struct Qdisc *sch)
-- 
cgit v1.2.3


From 9d127fbdd26f9f16c41893b5f85b38321980e096 Mon Sep 17 00:00:00 2001
From: Stephen Hemminger <shemminger@vyatta.com>
Date: Mon, 21 Jan 2008 02:24:21 -0800
Subject: [PKT_SCHED] dsmark: checkpatch warning cleanup

Get rid of all style things checkpatch warns about, indentation and
whitespace.

Signed-off-by: Stephen Hemminger <shemminger@vyatta.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/sch_dsmark.c | 79 +++++++++++++++++++++++++-------------------------
 1 file changed, 40 insertions(+), 39 deletions(-)

(limited to 'net')

diff --git a/net/sched/sch_dsmark.c b/net/sched/sch_dsmark.c
index ad30b7b4769..40e06a6890d 100644
--- a/net/sched/sch_dsmark.c
+++ b/net/sched/sch_dsmark.c
@@ -147,7 +147,7 @@ static int dsmark_delete(struct Qdisc *sch, unsigned long arg)
 	return 0;
 }
 
-static void dsmark_walk(struct Qdisc *sch,struct qdisc_walker *walker)
+static void dsmark_walk(struct Qdisc *sch, struct qdisc_walker *walker)
 {
 	struct dsmark_qdisc_data *p = qdisc_priv(sch);
 	int i;
@@ -171,7 +171,8 @@ ignore:
 	}
 }
 
-static inline struct tcf_proto **dsmark_find_tcf(struct Qdisc *sch,unsigned long cl)
+static inline struct tcf_proto **dsmark_find_tcf(struct Qdisc *sch,
+						 unsigned long cl)
 {
 	struct dsmark_qdisc_data *p = qdisc_priv(sch);
 	return &p->filter_list;
@@ -179,7 +180,7 @@ static inline struct tcf_proto **dsmark_find_tcf(struct Qdisc *sch,unsigned long
 
 /* --------------------------- Qdisc operations ---------------------------- */
 
-static int dsmark_enqueue(struct sk_buff *skb,struct Qdisc *sch)
+static int dsmark_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 {
 	struct dsmark_qdisc_data *p = qdisc_priv(sch);
 	int err;
@@ -188,24 +189,24 @@ static int dsmark_enqueue(struct sk_buff *skb,struct Qdisc *sch)
 
 	if (p->set_tc_index) {
 		switch (skb->protocol) {
-			case __constant_htons(ETH_P_IP):
-				if (skb_cow_head(skb, sizeof(struct iphdr)))
-					goto drop;
+		case __constant_htons(ETH_P_IP):
+			if (skb_cow_head(skb, sizeof(struct iphdr)))
+				goto drop;
 
-				skb->tc_index = ipv4_get_dsfield(ip_hdr(skb))
-					& ~INET_ECN_MASK;
-				break;
+			skb->tc_index = ipv4_get_dsfield(ip_hdr(skb))
+				& ~INET_ECN_MASK;
+			break;
 
-			case __constant_htons(ETH_P_IPV6):
-				if (skb_cow_head(skb, sizeof(struct ipv6hdr)))
-					goto drop;
+		case __constant_htons(ETH_P_IPV6):
+			if (skb_cow_head(skb, sizeof(struct ipv6hdr)))
+				goto drop;
 
-				skb->tc_index = ipv6_get_dsfield(ipv6_hdr(skb))
-					& ~INET_ECN_MASK;
-				break;
-			default:
-				skb->tc_index = 0;
-				break;
+			skb->tc_index = ipv6_get_dsfield(ipv6_hdr(skb))
+				& ~INET_ECN_MASK;
+			break;
+		default:
+			skb->tc_index = 0;
+			break;
 		}
 	}
 
@@ -238,7 +239,7 @@ static int dsmark_enqueue(struct sk_buff *skb,struct Qdisc *sch)
 		}
 	}
 
-	err = p->q->enqueue(skb,p->q);
+	err = p->q->enqueue(skb, p->q);
 	if (err != NET_XMIT_SUCCESS) {
 		sch->qstats.drops++;
 		return err;
@@ -274,31 +275,31 @@ static struct sk_buff *dsmark_dequeue(struct Qdisc *sch)
 	pr_debug("index %d->%d\n", skb->tc_index, index);
 
 	switch (skb->protocol) {
-		case __constant_htons(ETH_P_IP):
-			ipv4_change_dsfield(ip_hdr(skb), p->mask[index],
-					    p->value[index]);
+	case __constant_htons(ETH_P_IP):
+		ipv4_change_dsfield(ip_hdr(skb), p->mask[index],
+				    p->value[index]);
 			break;
-		case __constant_htons(ETH_P_IPV6):
-			ipv6_change_dsfield(ipv6_hdr(skb), p->mask[index],
-					    p->value[index]);
-			break;
-		default:
-			/*
-			 * Only complain if a change was actually attempted.
-			 * This way, we can send non-IP traffic through dsmark
-			 * and don't need yet another qdisc as a bypass.
-			 */
-			if (p->mask[index] != 0xff || p->value[index])
-				printk(KERN_WARNING "dsmark_dequeue: "
-				       "unsupported protocol %d\n",
-				       ntohs(skb->protocol));
+	case __constant_htons(ETH_P_IPV6):
+		ipv6_change_dsfield(ipv6_hdr(skb), p->mask[index],
+				    p->value[index]);
 			break;
+	default:
+		/*
+		 * Only complain if a change was actually attempted.
+		 * This way, we can send non-IP traffic through dsmark
+		 * and don't need yet another qdisc as a bypass.
+		 */
+		if (p->mask[index] != 0xff || p->value[index])
+			printk(KERN_WARNING
+			       "dsmark_dequeue: unsupported protocol %d\n",
+			       ntohs(skb->protocol));
+		break;
 	}
 
 	return skb;
 }
 
-static int dsmark_requeue(struct sk_buff *skb,struct Qdisc *sch)
+static int dsmark_requeue(struct sk_buff *skb, struct Qdisc *sch)
 {
 	struct dsmark_qdisc_data *p = qdisc_priv(sch);
 	int err;
@@ -419,8 +420,8 @@ static int dsmark_dump_class(struct Qdisc *sch, unsigned long cl,
 	tcm->tcm_info = p->q->handle;
 
 	opts = RTA_NEST(skb, TCA_OPTIONS);
-	RTA_PUT_U8(skb,TCA_DSMARK_MASK, p->mask[cl-1]);
-	RTA_PUT_U8(skb,TCA_DSMARK_VALUE, p->value[cl-1]);
+	RTA_PUT_U8(skb, TCA_DSMARK_MASK, p->mask[cl-1]);
+	RTA_PUT_U8(skb, TCA_DSMARK_VALUE, p->value[cl-1]);
 
 	return RTA_NEST_END(skb, opts);
 
-- 
cgit v1.2.3


From 786a90366f7571b5755d0e2773be9fe4cde80871 Mon Sep 17 00:00:00 2001
From: Stephen Hemminger <shemminger@vyatta.com>
Date: Mon, 21 Jan 2008 02:25:29 -0800
Subject: [PKT_SCHED] sch_atm: style cleanup

ATM scheduler clean house:
  * get rid of printk and qdisc_priv() wrapper
  * split some assignment in if() statements
  * whitespace and line breaks.

Signed-off-by: Stephen Hemminger <shemminger@vyatta.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/sch_atm.c | 139 ++++++++++++++++++++++++----------------------------
 1 file changed, 65 insertions(+), 74 deletions(-)

(limited to 'net')

diff --git a/net/sched/sch_atm.c b/net/sched/sch_atm.c
index d870a4115d9..734be9d37d4 100644
--- a/net/sched/sch_atm.c
+++ b/net/sched/sch_atm.c
@@ -16,18 +16,6 @@
 
 extern struct socket *sockfd_lookup(int fd, int *err);	/* @@@ fix this */
 
-#if 0 /* control */
-#define DPRINTK(format,args...) printk(KERN_DEBUG format,##args)
-#else
-#define DPRINTK(format,args...)
-#endif
-
-#if 0 /* data */
-#define D2PRINTK(format,args...) printk(KERN_DEBUG format,##args)
-#else
-#define D2PRINTK(format,args...)
-#endif
-
 /*
  * The ATM queuing discipline provides a framework for invoking classifiers
  * (aka "filters"), which in turn select classes of this queuing discipline.
@@ -49,7 +37,6 @@ extern struct socket *sockfd_lookup(int fd, int *err);	/* @@@ fix this */
  *  - should lock the flow while there is data in the queue (?)
  */
 
-#define PRIV(sch) qdisc_priv(sch)
 #define VCC2FLOW(vcc) ((struct atm_flow_data *) ((vcc)->user_back))
 
 struct atm_flow_data {
@@ -57,7 +44,7 @@ struct atm_flow_data {
 	struct tcf_proto	*filter_list;
 	struct atm_vcc		*vcc;	/* VCC; NULL if VCC is closed */
 	void			(*old_pop)(struct atm_vcc *vcc,
-					   struct sk_buff * skb); /* chaining */
+					   struct sk_buff *skb); /* chaining */
 	struct atm_qdisc_data	*parent;	/* parent qdisc */
 	struct socket		*sock;		/* for closing */
 	u32			classid;	/* x:y type ID */
@@ -84,17 +71,17 @@ static int find_flow(struct atm_qdisc_data *qdisc, struct atm_flow_data *flow)
 {
 	struct atm_flow_data *walk;
 
-	DPRINTK("find_flow(qdisc %p,flow %p)\n", qdisc, flow);
+	pr_debug("find_flow(qdisc %p,flow %p)\n", qdisc, flow);
 	for (walk = qdisc->flows; walk; walk = walk->next)
 		if (walk == flow)
 			return 1;
-	DPRINTK("find_flow: not found\n");
+	pr_debug("find_flow: not found\n");
 	return 0;
 }
 
 static inline struct atm_flow_data *lookup_flow(struct Qdisc *sch, u32 classid)
 {
-	struct atm_qdisc_data *p = PRIV(sch);
+	struct atm_qdisc_data *p = qdisc_priv(sch);
 	struct atm_flow_data *flow;
 
 	for (flow = p->flows; flow; flow = flow->next)
@@ -106,10 +93,10 @@ static inline struct atm_flow_data *lookup_flow(struct Qdisc *sch, u32 classid)
 static int atm_tc_graft(struct Qdisc *sch, unsigned long arg,
 			struct Qdisc *new, struct Qdisc **old)
 {
-	struct atm_qdisc_data *p = PRIV(sch);
+	struct atm_qdisc_data *p = qdisc_priv(sch);
 	struct atm_flow_data *flow = (struct atm_flow_data *)arg;
 
-	DPRINTK("atm_tc_graft(sch %p,[qdisc %p],flow %p,new %p,old %p)\n",
+	pr_debug("atm_tc_graft(sch %p,[qdisc %p],flow %p,new %p,old %p)\n",
 		sch, p, flow, new, old);
 	if (!find_flow(p, flow))
 		return -EINVAL;
@@ -125,20 +112,20 @@ static struct Qdisc *atm_tc_leaf(struct Qdisc *sch, unsigned long cl)
 {
 	struct atm_flow_data *flow = (struct atm_flow_data *)cl;
 
-	DPRINTK("atm_tc_leaf(sch %p,flow %p)\n", sch, flow);
+	pr_debug("atm_tc_leaf(sch %p,flow %p)\n", sch, flow);
 	return flow ? flow->q : NULL;
 }
 
 static unsigned long atm_tc_get(struct Qdisc *sch, u32 classid)
 {
-	struct atm_qdisc_data *p __maybe_unused = PRIV(sch);
+	struct atm_qdisc_data *p __maybe_unused = qdisc_priv(sch);
 	struct atm_flow_data *flow;
 
-	DPRINTK("atm_tc_get(sch %p,[qdisc %p],classid %x)\n", sch, p, classid);
+	pr_debug("atm_tc_get(sch %p,[qdisc %p],classid %x)\n", sch, p, classid);
 	flow = lookup_flow(sch, classid);
 	if (flow)
 		flow->ref++;
-	DPRINTK("atm_tc_get: flow %p\n", flow);
+	pr_debug("atm_tc_get: flow %p\n", flow);
 	return (unsigned long)flow;
 }
 
@@ -155,14 +142,14 @@ static unsigned long atm_tc_bind_filter(struct Qdisc *sch,
  */
 static void atm_tc_put(struct Qdisc *sch, unsigned long cl)
 {
-	struct atm_qdisc_data *p = PRIV(sch);
+	struct atm_qdisc_data *p = qdisc_priv(sch);
 	struct atm_flow_data *flow = (struct atm_flow_data *)cl;
 	struct atm_flow_data **prev;
 
-	DPRINTK("atm_tc_put(sch %p,[qdisc %p],flow %p)\n", sch, p, flow);
+	pr_debug("atm_tc_put(sch %p,[qdisc %p],flow %p)\n", sch, p, flow);
 	if (--flow->ref)
 		return;
-	DPRINTK("atm_tc_put: destroying\n");
+	pr_debug("atm_tc_put: destroying\n");
 	for (prev = &p->flows; *prev; prev = &(*prev)->next)
 		if (*prev == flow)
 			break;
@@ -171,11 +158,11 @@ static void atm_tc_put(struct Qdisc *sch, unsigned long cl)
 		return;
 	}
 	*prev = flow->next;
-	DPRINTK("atm_tc_put: qdisc %p\n", flow->q);
+	pr_debug("atm_tc_put: qdisc %p\n", flow->q);
 	qdisc_destroy(flow->q);
 	tcf_destroy_chain(flow->filter_list);
 	if (flow->sock) {
-		DPRINTK("atm_tc_put: f_count %d\n",
+		pr_debug("atm_tc_put: f_count %d\n",
 			file_count(flow->sock->file));
 		flow->vcc->pop = flow->old_pop;
 		sockfd_put(flow->sock);
@@ -194,7 +181,7 @@ static void sch_atm_pop(struct atm_vcc *vcc, struct sk_buff *skb)
 {
 	struct atm_qdisc_data *p = VCC2FLOW(vcc)->parent;
 
-	D2PRINTK("sch_atm_pop(vcc %p,skb %p,[qdisc %p])\n", vcc, skb, p);
+	pr_debug("sch_atm_pop(vcc %p,skb %p,[qdisc %p])\n", vcc, skb, p);
 	VCC2FLOW(vcc)->old_pop(vcc, skb);
 	tasklet_schedule(&p->task);
 }
@@ -211,7 +198,7 @@ static const u8 llc_oui_ip[] = {
 static int atm_tc_change(struct Qdisc *sch, u32 classid, u32 parent,
 			 struct rtattr **tca, unsigned long *arg)
 {
-	struct atm_qdisc_data *p = PRIV(sch);
+	struct atm_qdisc_data *p = qdisc_priv(sch);
 	struct atm_flow_data *flow = (struct atm_flow_data *)*arg;
 	struct atm_flow_data *excess = NULL;
 	struct rtattr *opt = tca[TCA_OPTIONS - 1];
@@ -220,7 +207,7 @@ static int atm_tc_change(struct Qdisc *sch, u32 classid, u32 parent,
 	int fd, error, hdr_len;
 	void *hdr;
 
-	DPRINTK("atm_tc_change(sch %p,[qdisc %p],classid %x,parent %x,"
+	pr_debug("atm_tc_change(sch %p,[qdisc %p],classid %x,parent %x,"
 		"flow %p,opt %p)\n", sch, p, classid, parent, flow, opt);
 	/*
 	 * The concept of parents doesn't apply for this qdisc.
@@ -241,7 +228,7 @@ static int atm_tc_change(struct Qdisc *sch, u32 classid, u32 parent,
 	if (!tb[TCA_ATM_FD - 1] || RTA_PAYLOAD(tb[TCA_ATM_FD - 1]) < sizeof(fd))
 		return -EINVAL;
 	fd = *(int *)RTA_DATA(tb[TCA_ATM_FD - 1]);
-	DPRINTK("atm_tc_change: fd %d\n", fd);
+	pr_debug("atm_tc_change: fd %d\n", fd);
 	if (tb[TCA_ATM_HDR - 1]) {
 		hdr_len = RTA_PAYLOAD(tb[TCA_ATM_HDR - 1]);
 		hdr = RTA_DATA(tb[TCA_ATM_HDR - 1]);
@@ -259,11 +246,12 @@ static int atm_tc_change(struct Qdisc *sch, u32 classid, u32 parent,
 		if (!excess)
 			return -ENOENT;
 	}
-	DPRINTK("atm_tc_change: type %d, payload %d, hdr_len %d\n",
-		opt->rta_type, RTA_PAYLOAD(opt), hdr_len);
-	if (!(sock = sockfd_lookup(fd, &error)))
+	pr_debug("atm_tc_change: type %d, payload %lu, hdr_len %d\n",
+		 opt->rta_type, RTA_PAYLOAD(opt), hdr_len);
+	sock = sockfd_lookup(fd, &error);
+	if (!sock)
 		return error;	/* f_count++ */
-	DPRINTK("atm_tc_change: f_count %d\n", file_count(sock->file));
+	pr_debug("atm_tc_change: f_count %d\n", file_count(sock->file));
 	if (sock->ops->family != PF_ATMSVC && sock->ops->family != PF_ATMPVC) {
 		error = -EPROTOTYPE;
 		goto err_out;
@@ -272,7 +260,7 @@ static int atm_tc_change(struct Qdisc *sch, u32 classid, u32 parent,
 	   on vcc->send */
 	if (classid) {
 		if (TC_H_MAJ(classid ^ sch->handle)) {
-			DPRINTK("atm_tc_change: classid mismatch\n");
+			pr_debug("atm_tc_change: classid mismatch\n");
 			error = -EINVAL;
 			goto err_out;
 		}
@@ -286,26 +274,28 @@ static int atm_tc_change(struct Qdisc *sch, u32 classid, u32 parent,
 
 		for (i = 1; i < 0x8000; i++) {
 			classid = TC_H_MAKE(sch->handle, 0x8000 | i);
-			if (!(cl = atm_tc_get(sch, classid)))
+			cl = atm_tc_get(sch, classid);
+			if (!cl)
 				break;
 			atm_tc_put(sch, cl);
 		}
 	}
-	DPRINTK("atm_tc_change: new id %x\n", classid);
+	pr_debug("atm_tc_change: new id %x\n", classid);
 	flow = kzalloc(sizeof(struct atm_flow_data) + hdr_len, GFP_KERNEL);
-	DPRINTK("atm_tc_change: flow %p\n", flow);
+	pr_debug("atm_tc_change: flow %p\n", flow);
 	if (!flow) {
 		error = -ENOBUFS;
 		goto err_out;
 	}
 	flow->filter_list = NULL;
-	if (!(flow->q = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops, classid)))
+	flow->q = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops, classid);
+	if (!flow->q)
 		flow->q = &noop_qdisc;
-	DPRINTK("atm_tc_change: qdisc %p\n", flow->q);
+	pr_debug("atm_tc_change: qdisc %p\n", flow->q);
 	flow->sock = sock;
 	flow->vcc = ATM_SD(sock);	/* speedup */
 	flow->vcc->user_back = flow;
-	DPRINTK("atm_tc_change: vcc %p\n", flow->vcc);
+	pr_debug("atm_tc_change: vcc %p\n", flow->vcc);
 	flow->old_pop = flow->vcc->pop;
 	flow->parent = p;
 	flow->vcc->pop = sch_atm_pop;
@@ -330,11 +320,11 @@ err_out:
 
 static int atm_tc_delete(struct Qdisc *sch, unsigned long arg)
 {
-	struct atm_qdisc_data *p = PRIV(sch);
+	struct atm_qdisc_data *p = qdisc_priv(sch);
 	struct atm_flow_data *flow = (struct atm_flow_data *)arg;
 
-	DPRINTK("atm_tc_delete(sch %p,[qdisc %p],flow %p)\n", sch, p, flow);
-	if (!find_flow(PRIV(sch), flow))
+	pr_debug("atm_tc_delete(sch %p,[qdisc %p],flow %p)\n", sch, p, flow);
+	if (!find_flow(qdisc_priv(sch), flow))
 		return -EINVAL;
 	if (flow->filter_list || flow == &p->link)
 		return -EBUSY;
@@ -354,10 +344,10 @@ static int atm_tc_delete(struct Qdisc *sch, unsigned long arg)
 
 static void atm_tc_walk(struct Qdisc *sch, struct qdisc_walker *walker)
 {
-	struct atm_qdisc_data *p = PRIV(sch);
+	struct atm_qdisc_data *p = qdisc_priv(sch);
 	struct atm_flow_data *flow;
 
-	DPRINTK("atm_tc_walk(sch %p,[qdisc %p],walker %p)\n", sch, p, walker);
+	pr_debug("atm_tc_walk(sch %p,[qdisc %p],walker %p)\n", sch, p, walker);
 	if (walker->stop)
 		return;
 	for (flow = p->flows; flow; flow = flow->next) {
@@ -372,10 +362,10 @@ static void atm_tc_walk(struct Qdisc *sch, struct qdisc_walker *walker)
 
 static struct tcf_proto **atm_tc_find_tcf(struct Qdisc *sch, unsigned long cl)
 {
-	struct atm_qdisc_data *p = PRIV(sch);
+	struct atm_qdisc_data *p = qdisc_priv(sch);
 	struct atm_flow_data *flow = (struct atm_flow_data *)cl;
 
-	DPRINTK("atm_tc_find_tcf(sch %p,[qdisc %p],flow %p)\n", sch, p, flow);
+	pr_debug("atm_tc_find_tcf(sch %p,[qdisc %p],flow %p)\n", sch, p, flow);
 	return flow ? &flow->filter_list : &p->link.filter_list;
 }
 
@@ -383,13 +373,13 @@ static struct tcf_proto **atm_tc_find_tcf(struct Qdisc *sch, unsigned long cl)
 
 static int atm_tc_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 {
-	struct atm_qdisc_data *p = PRIV(sch);
+	struct atm_qdisc_data *p = qdisc_priv(sch);
 	struct atm_flow_data *flow = NULL;	/* @@@ */
 	struct tcf_result res;
 	int result;
 	int ret = NET_XMIT_POLICED;
 
-	D2PRINTK("atm_tc_enqueue(skb %p,sch %p,[qdisc %p])\n", skb, sch, p);
+	pr_debug("atm_tc_enqueue(skb %p,sch %p,[qdisc %p])\n", skb, sch, p);
 	result = TC_POLICE_OK;	/* be nice to gcc */
 	if (TC_H_MAJ(skb->priority) != sch->handle ||
 	    !(flow = (struct atm_flow_data *)atm_tc_get(sch, skb->priority)))
@@ -430,7 +420,8 @@ static int atm_tc_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 #endif
 	}
 
-	if ((ret = flow->q->enqueue(skb, flow->q)) != 0) {
+	ret = flow->q->enqueue(skb, flow->q);
+	if (ret != 0) {
 drop: __maybe_unused
 		sch->qstats.drops++;
 		if (flow)
@@ -468,11 +459,11 @@ drop: __maybe_unused
 static void sch_atm_dequeue(unsigned long data)
 {
 	struct Qdisc *sch = (struct Qdisc *)data;
-	struct atm_qdisc_data *p = PRIV(sch);
+	struct atm_qdisc_data *p = qdisc_priv(sch);
 	struct atm_flow_data *flow;
 	struct sk_buff *skb;
 
-	D2PRINTK("sch_atm_dequeue(sch %p,[qdisc %p])\n", sch, p);
+	pr_debug("sch_atm_dequeue(sch %p,[qdisc %p])\n", sch, p);
 	for (flow = p->link.next; flow; flow = flow->next)
 		/*
 		 * If traffic is properly shaped, this won't generate nasty
@@ -483,7 +474,7 @@ static void sch_atm_dequeue(unsigned long data)
 				(void)flow->q->ops->requeue(skb, flow->q);
 				break;
 			}
-			D2PRINTK("atm_tc_dequeue: sending on class %p\n", flow);
+			pr_debug("atm_tc_dequeue: sending on class %p\n", flow);
 			/* remove any LL header somebody else has attached */
 			skb_pull(skb, skb_network_offset(skb));
 			if (skb_headroom(skb) < flow->hdr_len) {
@@ -495,7 +486,7 @@ static void sch_atm_dequeue(unsigned long data)
 					continue;
 				skb = new;
 			}
-			D2PRINTK("sch_atm_dequeue: ip %p, data %p\n",
+			pr_debug("sch_atm_dequeue: ip %p, data %p\n",
 				 skb_network_header(skb), skb->data);
 			ATM_SKB(skb)->vcc = flow->vcc;
 			memcpy(skb_push(skb, flow->hdr_len), flow->hdr,
@@ -509,10 +500,10 @@ static void sch_atm_dequeue(unsigned long data)
 
 static struct sk_buff *atm_tc_dequeue(struct Qdisc *sch)
 {
-	struct atm_qdisc_data *p = PRIV(sch);
+	struct atm_qdisc_data *p = qdisc_priv(sch);
 	struct sk_buff *skb;
 
-	D2PRINTK("atm_tc_dequeue(sch %p,[qdisc %p])\n", sch, p);
+	pr_debug("atm_tc_dequeue(sch %p,[qdisc %p])\n", sch, p);
 	tasklet_schedule(&p->task);
 	skb = p->link.q->dequeue(p->link.q);
 	if (skb)
@@ -522,10 +513,10 @@ static struct sk_buff *atm_tc_dequeue(struct Qdisc *sch)
 
 static int atm_tc_requeue(struct sk_buff *skb, struct Qdisc *sch)
 {
-	struct atm_qdisc_data *p = PRIV(sch);
+	struct atm_qdisc_data *p = qdisc_priv(sch);
 	int ret;
 
-	D2PRINTK("atm_tc_requeue(skb %p,sch %p,[qdisc %p])\n", skb, sch, p);
+	pr_debug("atm_tc_requeue(skb %p,sch %p,[qdisc %p])\n", skb, sch, p);
 	ret = p->link.q->ops->requeue(skb, p->link.q);
 	if (!ret) {
 		sch->q.qlen++;
@@ -539,11 +530,11 @@ static int atm_tc_requeue(struct sk_buff *skb, struct Qdisc *sch)
 
 static unsigned int atm_tc_drop(struct Qdisc *sch)
 {
-	struct atm_qdisc_data *p = PRIV(sch);
+	struct atm_qdisc_data *p = qdisc_priv(sch);
 	struct atm_flow_data *flow;
 	unsigned int len;
 
-	DPRINTK("atm_tc_drop(sch %p,[qdisc %p])\n", sch, p);
+	pr_debug("atm_tc_drop(sch %p,[qdisc %p])\n", sch, p);
 	for (flow = p->flows; flow; flow = flow->next)
 		if (flow->q->ops->drop && (len = flow->q->ops->drop(flow->q)))
 			return len;
@@ -552,14 +543,14 @@ static unsigned int atm_tc_drop(struct Qdisc *sch)
 
 static int atm_tc_init(struct Qdisc *sch, struct rtattr *opt)
 {
-	struct atm_qdisc_data *p = PRIV(sch);
+	struct atm_qdisc_data *p = qdisc_priv(sch);
 
-	DPRINTK("atm_tc_init(sch %p,[qdisc %p],opt %p)\n", sch, p, opt);
+	pr_debug("atm_tc_init(sch %p,[qdisc %p],opt %p)\n", sch, p, opt);
 	p->flows = &p->link;
-	if (!(p->link.q = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops,
-					    sch->handle)))
+	p->link.q = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops, sch->handle);
+	if (!p->link.q)
 		p->link.q = &noop_qdisc;
-	DPRINTK("atm_tc_init: link (%p) qdisc %p\n", &p->link, p->link.q);
+	pr_debug("atm_tc_init: link (%p) qdisc %p\n", &p->link, p->link.q);
 	p->link.filter_list = NULL;
 	p->link.vcc = NULL;
 	p->link.sock = NULL;
@@ -572,10 +563,10 @@ static int atm_tc_init(struct Qdisc *sch, struct rtattr *opt)
 
 static void atm_tc_reset(struct Qdisc *sch)
 {
-	struct atm_qdisc_data *p = PRIV(sch);
+	struct atm_qdisc_data *p = qdisc_priv(sch);
 	struct atm_flow_data *flow;
 
-	DPRINTK("atm_tc_reset(sch %p,[qdisc %p])\n", sch, p);
+	pr_debug("atm_tc_reset(sch %p,[qdisc %p])\n", sch, p);
 	for (flow = p->flows; flow; flow = flow->next)
 		qdisc_reset(flow->q);
 	sch->q.qlen = 0;
@@ -583,10 +574,10 @@ static void atm_tc_reset(struct Qdisc *sch)
 
 static void atm_tc_destroy(struct Qdisc *sch)
 {
-	struct atm_qdisc_data *p = PRIV(sch);
+	struct atm_qdisc_data *p = qdisc_priv(sch);
 	struct atm_flow_data *flow;
 
-	DPRINTK("atm_tc_destroy(sch %p,[qdisc %p])\n", sch, p);
+	pr_debug("atm_tc_destroy(sch %p,[qdisc %p])\n", sch, p);
 	/* races ? */
 	while ((flow = p->flows)) {
 		tcf_destroy_chain(flow->filter_list);
@@ -608,12 +599,12 @@ static void atm_tc_destroy(struct Qdisc *sch)
 static int atm_tc_dump_class(struct Qdisc *sch, unsigned long cl,
 			     struct sk_buff *skb, struct tcmsg *tcm)
 {
-	struct atm_qdisc_data *p = PRIV(sch);
+	struct atm_qdisc_data *p = qdisc_priv(sch);
 	struct atm_flow_data *flow = (struct atm_flow_data *)cl;
 	unsigned char *b = skb_tail_pointer(skb);
 	struct rtattr *rta;
 
-	DPRINTK("atm_tc_dump_class(sch %p,[qdisc %p],flow %p,skb %p,tcm %p)\n",
+	pr_debug("atm_tc_dump_class(sch %p,[qdisc %p],flow %p,skb %p,tcm %p)\n",
 		sch, p, flow, skb, tcm);
 	if (!find_flow(p, flow))
 		return -EINVAL;
-- 
cgit v1.2.3


From aa767bfea4828936fffb7800204294ba4c8ba283 Mon Sep 17 00:00:00 2001
From: Stephen Hemminger <shemminger@vyatta.com>
Date: Mon, 21 Jan 2008 02:26:41 -0800
Subject: [PKT_SCHED] net classifier: style cleanup's

Classifier code cleanup. Get rid of printk wrapper, and fix whitespace
and other style stuff reported by checkpatch

Signed-off-by: Stephen Hemminger <shemminger@vyatta.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/cls_api.c     | 100 ++++++++++++++++++++++++------------------------
 net/sched/cls_tcindex.c |  60 ++++++++++++-----------------
 2 files changed, 73 insertions(+), 87 deletions(-)

(limited to 'net')

diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
index 80dccac769d..e53773612bc 100644
--- a/net/sched/cls_api.c
+++ b/net/sched/cls_api.c
@@ -29,12 +29,6 @@
 #include <net/pkt_sched.h>
 #include <net/pkt_cls.h>
 
-#if 0 /* control */
-#define DPRINTK(format,args...) printk(KERN_DEBUG format,##args)
-#else
-#define DPRINTK(format,args...)
-#endif
-
 /* The list of all installed classifier types */
 
 static struct tcf_proto_ops *tcf_proto_base;
@@ -44,7 +38,7 @@ static DEFINE_RWLOCK(cls_mod_lock);
 
 /* Find classifier type by string name */
 
-static struct tcf_proto_ops * tcf_proto_lookup_ops(struct rtattr *kind)
+static struct tcf_proto_ops *tcf_proto_lookup_ops(struct rtattr *kind)
 {
 	struct tcf_proto_ops *t = NULL;
 
@@ -81,6 +75,7 @@ out:
 	write_unlock(&cls_mod_lock);
 	return rc;
 }
+EXPORT_SYMBOL(register_tcf_proto_ops);
 
 int unregister_tcf_proto_ops(struct tcf_proto_ops *ops)
 {
@@ -100,6 +95,7 @@ out:
 	write_unlock(&cls_mod_lock);
 	return rc;
 }
+EXPORT_SYMBOL(unregister_tcf_proto_ops);
 
 static int tfilter_notify(struct sk_buff *oskb, struct nlmsghdr *n,
 			  struct tcf_proto *tp, unsigned long fh, int event);
@@ -107,9 +103,9 @@ static int tfilter_notify(struct sk_buff *oskb, struct nlmsghdr *n,
 
 /* Select new prio value from the range, managed by kernel. */
 
-static __inline__ u32 tcf_auto_prio(struct tcf_proto *tp)
+static inline u32 tcf_auto_prio(struct tcf_proto *tp)
 {
-	u32 first = TC_H_MAKE(0xC0000000U,0U);
+	u32 first = TC_H_MAKE(0xC0000000U, 0U);
 
 	if (tp)
 		first = tp->prio-1;
@@ -154,21 +150,25 @@ replay:
 		/* If no priority is given, user wants we allocated it. */
 		if (n->nlmsg_type != RTM_NEWTFILTER || !(n->nlmsg_flags&NLM_F_CREATE))
 			return -ENOENT;
-		prio = TC_H_MAKE(0x80000000U,0U);
+		prio = TC_H_MAKE(0x80000000U, 0U);
 	}
 
 	/* Find head of filter chain. */
 
 	/* Find link */
-	if ((dev = __dev_get_by_index(&init_net, t->tcm_ifindex)) == NULL)
+	dev = __dev_get_by_index(&init_net, t->tcm_ifindex);
+	if (dev == NULL)
 		return -ENODEV;
 
 	/* Find qdisc */
 	if (!parent) {
 		q = dev->qdisc_sleeping;
 		parent = q->handle;
-	} else if ((q = qdisc_lookup(dev, TC_H_MAJ(t->tcm_parent))) == NULL)
-		return -EINVAL;
+	} else {
+		q = qdisc_lookup(dev, TC_H_MAJ(t->tcm_parent));
+		if (q == NULL)
+			return -EINVAL;
+	}
 
 	/* Is it classful? */
 	if ((cops = q->ops->cl_ops) == NULL)
@@ -213,7 +213,8 @@ replay:
 		/* Create new proto tcf */
 
 		err = -ENOBUFS;
-		if ((tp = kzalloc(sizeof(*tp), GFP_KERNEL)) == NULL)
+		tp = kzalloc(sizeof(*tp), GFP_KERNEL);
+		if (tp == NULL)
 			goto errout;
 		err = -EINVAL;
 		tp_ops = tcf_proto_lookup_ops(tca[TCA_KIND-1]);
@@ -249,7 +250,9 @@ replay:
 		tp->q = q;
 		tp->classify = tp_ops->classify;
 		tp->classid = parent;
-		if ((err = tp_ops->init(tp)) != 0) {
+
+		err = tp_ops->init(tp);
+		if (err != 0) {
 			module_put(tp_ops->owner);
 			kfree(tp);
 			goto errout;
@@ -278,13 +281,14 @@ replay:
 		}
 
 		err = -ENOENT;
-		if (n->nlmsg_type != RTM_NEWTFILTER || !(n->nlmsg_flags&NLM_F_CREATE))
+		if (n->nlmsg_type != RTM_NEWTFILTER ||
+		    !(n->nlmsg_flags & NLM_F_CREATE))
 			goto errout;
 	} else {
 		switch (n->nlmsg_type) {
 		case RTM_NEWTFILTER:
 			err = -EEXIST;
-			if (n->nlmsg_flags&NLM_F_EXCL)
+			if (n->nlmsg_flags & NLM_F_EXCL)
 				goto errout;
 			break;
 		case RTM_DELTFILTER:
@@ -314,9 +318,8 @@ errout:
 	return err;
 }
 
-static int
-tcf_fill_node(struct sk_buff *skb, struct tcf_proto *tp, unsigned long fh,
-	      u32 pid, u32 seq, u16 flags, int event)
+static int tcf_fill_node(struct sk_buff *skb, struct tcf_proto *tp,
+			 unsigned long fh, u32 pid, u32 seq, u16 flags, int event)
 {
 	struct tcmsg *tcm;
 	struct nlmsghdr  *nlh;
@@ -361,19 +364,20 @@ static int tfilter_notify(struct sk_buff *oskb, struct nlmsghdr *n,
 		return -EINVAL;
 	}
 
-	return rtnetlink_send(skb, &init_net, pid, RTNLGRP_TC, n->nlmsg_flags&NLM_F_ECHO);
+	return rtnetlink_send(skb, &init_net, pid, RTNLGRP_TC,
+			      n->nlmsg_flags & NLM_F_ECHO);
 }
 
-struct tcf_dump_args
-{
+struct tcf_dump_args {
 	struct tcf_walker w;
 	struct sk_buff *skb;
 	struct netlink_callback *cb;
 };
 
-static int tcf_node_dump(struct tcf_proto *tp, unsigned long n, struct tcf_walker *arg)
+static int tcf_node_dump(struct tcf_proto *tp, unsigned long n,
+			 struct tcf_walker *arg)
 {
-	struct tcf_dump_args *a = (void*)arg;
+	struct tcf_dump_args *a = (void *)arg;
 
 	return tcf_fill_node(a->skb, tp, n, NETLINK_CB(a->cb->skb).pid,
 			     a->cb->nlh->nlmsg_seq, NLM_F_MULTI, RTM_NEWTFILTER);
@@ -387,7 +391,7 @@ static int tc_dump_tfilter(struct sk_buff *skb, struct netlink_callback *cb)
 	struct net_device *dev;
 	struct Qdisc *q;
 	struct tcf_proto *tp, **chain;
-	struct tcmsg *tcm = (struct tcmsg*)NLMSG_DATA(cb->nlh);
+	struct tcmsg *tcm = (struct tcmsg *)NLMSG_DATA(cb->nlh);
 	unsigned long cl = 0;
 	const struct Qdisc_class_ops *cops;
 	struct tcf_dump_args arg;
@@ -431,9 +435,10 @@ static int tc_dump_tfilter(struct sk_buff *skb, struct netlink_callback *cb)
 			memset(&cb->args[1], 0, sizeof(cb->args)-sizeof(cb->args[0]));
 		if (cb->args[1] == 0) {
 			if (tcf_fill_node(skb, tp, 0, NETLINK_CB(cb->skb).pid,
-					  cb->nlh->nlmsg_seq, NLM_F_MULTI, RTM_NEWTFILTER) <= 0) {
+					  cb->nlh->nlmsg_seq, NLM_F_MULTI,
+					  RTM_NEWTFILTER) <= 0)
 				break;
-			}
+
 			cb->args[1] = 1;
 		}
 		if (tp->ops->walk == NULL)
@@ -460,8 +465,7 @@ out:
 	return skb->len;
 }
 
-void
-tcf_exts_destroy(struct tcf_proto *tp, struct tcf_exts *exts)
+void tcf_exts_destroy(struct tcf_proto *tp, struct tcf_exts *exts)
 {
 #ifdef CONFIG_NET_CLS_ACT
 	if (exts->action) {
@@ -470,10 +474,9 @@ tcf_exts_destroy(struct tcf_proto *tp, struct tcf_exts *exts)
 	}
 #endif
 }
+EXPORT_SYMBOL(tcf_exts_destroy);
 
-
-int
-tcf_exts_validate(struct tcf_proto *tp, struct rtattr **tb,
+int tcf_exts_validate(struct tcf_proto *tp, struct rtattr **tb,
 		  struct rtattr *rate_tlv, struct tcf_exts *exts,
 		  struct tcf_ext_map *map)
 {
@@ -485,8 +488,9 @@ tcf_exts_validate(struct tcf_proto *tp, struct rtattr **tb,
 		struct tc_action *act;
 
 		if (map->police && tb[map->police-1]) {
-			act = tcf_action_init_1(tb[map->police-1], rate_tlv, "police",
-				TCA_ACT_NOREPLACE, TCA_ACT_BIND, &err);
+			act = tcf_action_init_1(tb[map->police-1], rate_tlv,
+						"police", TCA_ACT_NOREPLACE,
+						TCA_ACT_BIND, &err);
 			if (act == NULL)
 				return err;
 
@@ -509,10 +513,10 @@ tcf_exts_validate(struct tcf_proto *tp, struct rtattr **tb,
 
 	return 0;
 }
+EXPORT_SYMBOL(tcf_exts_validate);
 
-void
-tcf_exts_change(struct tcf_proto *tp, struct tcf_exts *dst,
-		struct tcf_exts *src)
+void tcf_exts_change(struct tcf_proto *tp, struct tcf_exts *dst,
+		     struct tcf_exts *src)
 {
 #ifdef CONFIG_NET_CLS_ACT
 	if (src->action) {
@@ -525,9 +529,9 @@ tcf_exts_change(struct tcf_proto *tp, struct tcf_exts *dst,
 	}
 #endif
 }
+EXPORT_SYMBOL(tcf_exts_change);
 
-int
-tcf_exts_dump(struct sk_buff *skb, struct tcf_exts *exts,
+int tcf_exts_dump(struct sk_buff *skb, struct tcf_exts *exts,
 	      struct tcf_ext_map *map)
 {
 #ifdef CONFIG_NET_CLS_ACT
@@ -556,10 +560,11 @@ tcf_exts_dump(struct sk_buff *skb, struct tcf_exts *exts,
 rtattr_failure: __attribute__ ((unused))
 	return -1;
 }
+EXPORT_SYMBOL(tcf_exts_dump);
 
-int
-tcf_exts_dump_stats(struct sk_buff *skb, struct tcf_exts *exts,
-		    struct tcf_ext_map *map)
+
+int tcf_exts_dump_stats(struct sk_buff *skb, struct tcf_exts *exts,
+			struct tcf_ext_map *map)
 {
 #ifdef CONFIG_NET_CLS_ACT
 	if (exts->action)
@@ -570,6 +575,7 @@ tcf_exts_dump_stats(struct sk_buff *skb, struct tcf_exts *exts,
 rtattr_failure: __attribute__ ((unused))
 	return -1;
 }
+EXPORT_SYMBOL(tcf_exts_dump_stats);
 
 static int __init tc_filter_init(void)
 {
@@ -582,11 +588,3 @@ static int __init tc_filter_init(void)
 }
 
 subsys_initcall(tc_filter_init);
-
-EXPORT_SYMBOL(register_tcf_proto_ops);
-EXPORT_SYMBOL(unregister_tcf_proto_ops);
-EXPORT_SYMBOL(tcf_exts_validate);
-EXPORT_SYMBOL(tcf_exts_destroy);
-EXPORT_SYMBOL(tcf_exts_change);
-EXPORT_SYMBOL(tcf_exts_dump);
-EXPORT_SYMBOL(tcf_exts_dump_stats);
diff --git a/net/sched/cls_tcindex.c b/net/sched/cls_tcindex.c
index 2314820a080..471909e5480 100644
--- a/net/sched/cls_tcindex.c
+++ b/net/sched/cls_tcindex.c
@@ -29,19 +29,6 @@
 #define DEFAULT_HASH_SIZE	64	/* optimized for diffserv */
 
 
-#if 1 /* control */
-#define DPRINTK(format,args...) printk(KERN_DEBUG format,##args)
-#else
-#define DPRINTK(format,args...)
-#endif
-
-#if 0 /* data */
-#define D2PRINTK(format,args...) printk(KERN_DEBUG format,##args)
-#else
-#define D2PRINTK(format,args...)
-#endif
-
-
 #define	PRIV(tp)	((struct tcindex_data *) (tp)->root)
 
 
@@ -104,7 +91,8 @@ static int tcindex_classify(struct sk_buff *skb, struct tcf_proto *tp,
 	struct tcindex_filter_result *f;
 	int key = (skb->tc_index & p->mask) >> p->shift;
 
-	D2PRINTK("tcindex_classify(skb %p,tp %p,res %p),p %p\n",skb,tp,res,p);
+	pr_debug("tcindex_classify(skb %p,tp %p,res %p),p %p\n",
+		 skb, tp, res, p);
 
 	f = tcindex_lookup(p, key);
 	if (!f) {
@@ -112,11 +100,11 @@ static int tcindex_classify(struct sk_buff *skb, struct tcf_proto *tp,
 			return -1;
 		res->classid = TC_H_MAKE(TC_H_MAJ(tp->q->handle), key);
 		res->class = 0;
-		D2PRINTK("alg 0x%x\n",res->classid);
+		pr_debug("alg 0x%x\n", res->classid);
 		return 0;
 	}
 	*res = f->res;
-	D2PRINTK("map 0x%x\n",res->classid);
+	pr_debug("map 0x%x\n", res->classid);
 
 	return tcf_exts_exec(skb, &f->exts, res);
 }
@@ -127,7 +115,7 @@ static unsigned long tcindex_get(struct tcf_proto *tp, u32 handle)
 	struct tcindex_data *p = PRIV(tp);
 	struct tcindex_filter_result *r;
 
-	DPRINTK("tcindex_get(tp %p,handle 0x%08x)\n",tp,handle);
+	pr_debug("tcindex_get(tp %p,handle 0x%08x)\n", tp, handle);
 	if (p->perfect && handle >= p->alloc_hash)
 		return 0;
 	r = tcindex_lookup(p, handle);
@@ -137,7 +125,7 @@ static unsigned long tcindex_get(struct tcf_proto *tp, u32 handle)
 
 static void tcindex_put(struct tcf_proto *tp, unsigned long f)
 {
-	DPRINTK("tcindex_put(tp %p,f 0x%lx)\n",tp,f);
+	pr_debug("tcindex_put(tp %p,f 0x%lx)\n", tp, f);
 }
 
 
@@ -145,8 +133,8 @@ static int tcindex_init(struct tcf_proto *tp)
 {
 	struct tcindex_data *p;
 
-	DPRINTK("tcindex_init(tp %p)\n",tp);
-	p = kzalloc(sizeof(struct tcindex_data),GFP_KERNEL);
+	pr_debug("tcindex_init(tp %p)\n", tp);
+	p = kzalloc(sizeof(struct tcindex_data), GFP_KERNEL);
 	if (!p)
 		return -ENOMEM;
 
@@ -166,7 +154,7 @@ __tcindex_delete(struct tcf_proto *tp, unsigned long arg, int lock)
 	struct tcindex_filter_result *r = (struct tcindex_filter_result *) arg;
 	struct tcindex_filter *f = NULL;
 
-	DPRINTK("tcindex_delete(tp %p,arg 0x%lx),p %p,f %p\n",tp,arg,p,f);
+	pr_debug("tcindex_delete(tp %p,arg 0x%lx),p %p,f %p\n", tp, arg, p, f);
 	if (p->perfect) {
 		if (!r->res.class)
 			return -ENOENT;
@@ -363,7 +351,7 @@ tcindex_change(struct tcf_proto *tp, unsigned long base, u32 handle,
 	struct tcindex_data *p = PRIV(tp);
 	struct tcindex_filter_result *r = (struct tcindex_filter_result *) *arg;
 
-	DPRINTK("tcindex_change(tp %p,handle 0x%08x,tca %p,arg %p),opt %p,"
+	pr_debug("tcindex_change(tp %p,handle 0x%08x,tca %p,arg %p),opt %p,"
 	    "p %p,r %p,*arg 0x%lx\n",
 	    tp, handle, tca, arg, opt, p, r, arg ? *arg : 0L);
 
@@ -380,10 +368,10 @@ tcindex_change(struct tcf_proto *tp, unsigned long base, u32 handle,
 static void tcindex_walk(struct tcf_proto *tp, struct tcf_walker *walker)
 {
 	struct tcindex_data *p = PRIV(tp);
-	struct tcindex_filter *f,*next;
+	struct tcindex_filter *f, *next;
 	int i;
 
-	DPRINTK("tcindex_walk(tp %p,walker %p),p %p\n",tp,walker,p);
+	pr_debug("tcindex_walk(tp %p,walker %p),p %p\n", tp, walker, p);
 	if (p->perfect) {
 		for (i = 0; i < p->hash; i++) {
 			if (!p->perfect[i].res.class)
@@ -405,7 +393,7 @@ static void tcindex_walk(struct tcf_proto *tp, struct tcf_walker *walker)
 		for (f = p->h[i]; f; f = next) {
 			next = f->next;
 			if (walker->count >= walker->skip) {
-				if (walker->fn(tp,(unsigned long) &f->result,
+				if (walker->fn(tp, (unsigned long) &f->result,
 				    walker) < 0) {
 					walker->stop = 1;
 					return;
@@ -429,11 +417,11 @@ static void tcindex_destroy(struct tcf_proto *tp)
 	struct tcindex_data *p = PRIV(tp);
 	struct tcf_walker walker;
 
-	DPRINTK("tcindex_destroy(tp %p),p %p\n",tp,p);
+	pr_debug("tcindex_destroy(tp %p),p %p\n", tp, p);
 	walker.count = 0;
 	walker.skip = 0;
 	walker.fn = &tcindex_destroy_element;
-	tcindex_walk(tp,&walker);
+	tcindex_walk(tp, &walker);
 	kfree(p->perfect);
 	kfree(p->h);
 	kfree(p);
@@ -449,17 +437,17 @@ static int tcindex_dump(struct tcf_proto *tp, unsigned long fh,
 	unsigned char *b = skb_tail_pointer(skb);
 	struct rtattr *rta;
 
-	DPRINTK("tcindex_dump(tp %p,fh 0x%lx,skb %p,t %p),p %p,r %p,b %p\n",
-	    tp,fh,skb,t,p,r,b);
-	DPRINTK("p->perfect %p p->h %p\n",p->perfect,p->h);
+	pr_debug("tcindex_dump(tp %p,fh 0x%lx,skb %p,t %p),p %p,r %p,b %p\n",
+		 tp, fh, skb, t, p, r, b);
+	pr_debug("p->perfect %p p->h %p\n", p->perfect, p->h);
 	rta = (struct rtattr *) b;
-	RTA_PUT(skb,TCA_OPTIONS,0,NULL);
+	RTA_PUT(skb, TCA_OPTIONS, 0, NULL);
 	if (!fh) {
 		t->tcm_handle = ~0; /* whatever ... */
-		RTA_PUT(skb,TCA_TCINDEX_HASH,sizeof(p->hash),&p->hash);
-		RTA_PUT(skb,TCA_TCINDEX_MASK,sizeof(p->mask),&p->mask);
-		RTA_PUT(skb,TCA_TCINDEX_SHIFT,sizeof(p->shift),&p->shift);
-		RTA_PUT(skb,TCA_TCINDEX_FALL_THROUGH,sizeof(p->fall_through),
+		RTA_PUT(skb, TCA_TCINDEX_HASH, sizeof(p->hash), &p->hash);
+		RTA_PUT(skb, TCA_TCINDEX_MASK, sizeof(p->mask), &p->mask);
+		RTA_PUT(skb, TCA_TCINDEX_SHIFT, sizeof(p->shift), &p->shift);
+		RTA_PUT(skb, TCA_TCINDEX_FALL_THROUGH, sizeof(p->fall_through),
 		    &p->fall_through);
 		rta->rta_len = skb_tail_pointer(skb) - b;
 	} else {
@@ -478,7 +466,7 @@ static int tcindex_dump(struct tcf_proto *tp, unsigned long fh,
 				}
 			}
 		}
-		DPRINTK("handle = %d\n",t->tcm_handle);
+		pr_debug("handle = %d\n", t->tcm_handle);
 		if (r->res.class)
 			RTA_PUT(skb, TCA_TCINDEX_CLASSID, 4, &r->res.classid);
 
-- 
cgit v1.2.3


From 72348a424f989d6b748d9b816d46839b01fcd4cd Mon Sep 17 00:00:00 2001
From: Stephen Hemminger <shemminger@vyatta.com>
Date: Mon, 21 Jan 2008 02:27:29 -0800
Subject: [PKT_SCHED] net: add sparse annotation to ptype_seq_start/stop

Get rid of some more sparse warnings.

Signed-off-by: Stephen Hemminger <shemminger@vyatta.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/dev.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'net')

diff --git a/net/core/dev.c b/net/core/dev.c
index eee77424309..c9c593e1ba6 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2543,6 +2543,7 @@ static void *ptype_get_idx(loff_t pos)
 }
 
 static void *ptype_seq_start(struct seq_file *seq, loff_t *pos)
+	__acquires(RCU)
 {
 	rcu_read_lock();
 	return *pos ? ptype_get_idx(*pos - 1) : SEQ_START_TOKEN;
@@ -2578,6 +2579,7 @@ found:
 }
 
 static void ptype_seq_stop(struct seq_file *seq, void *v)
+	__releases(RCU)
 {
 	rcu_read_unlock();
 }
-- 
cgit v1.2.3


From 96750162b5f7350ec7ba7cf747a6623858d65dd2 Mon Sep 17 00:00:00 2001
From: Jarek Poplawski <jarkao2@gmail.com>
Date: Mon, 21 Jan 2008 02:36:02 -0800
Subject: [NET] gen_estimator: gen_replace_estimator() cosmetic changes

White spaces etc. are changed in gen_replace_estimator() to make it
similar to others in a file.

Signed-off-by: Jarek Poplawski <jarkao2@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/gen_estimator.c | 13 ++++++-------
 1 file changed, 6 insertions(+), 7 deletions(-)

(limited to 'net')

diff --git a/net/core/gen_estimator.c b/net/core/gen_estimator.c
index 86037d16f19..7ab9060bccd 100644
--- a/net/core/gen_estimator.c
+++ b/net/core/gen_estimator.c
@@ -241,7 +241,7 @@ void gen_kill_estimator(struct gnet_stats_basic *bstats,
 }
 
 /**
- * gen_replace_estimator - replace rate estimator configruation
+ * gen_replace_estimator - replace rate estimator configuration
  * @bstats: basic statistics
  * @rate_est: rate estimator statistics
  * @stats_lock: statistics lock
@@ -252,13 +252,12 @@ void gen_kill_estimator(struct gnet_stats_basic *bstats,
  *
  * Returns 0 on success or a negative error code.
  */
-int
-gen_replace_estimator(struct gnet_stats_basic *bstats,
-	struct gnet_stats_rate_est *rate_est, spinlock_t *stats_lock,
-	struct rtattr *opt)
+int gen_replace_estimator(struct gnet_stats_basic *bstats,
+			  struct gnet_stats_rate_est *rate_est,
+			  spinlock_t *stats_lock, struct rtattr *opt)
 {
-    gen_kill_estimator(bstats, rate_est);
-    return gen_new_estimator(bstats, rate_est, stats_lock, opt);
+	gen_kill_estimator(bstats, rate_est);
+	return gen_new_estimator(bstats, rate_est, stats_lock, opt);
 }
 
 
-- 
cgit v1.2.3


From 1e637c74b0f84eaca02b914c0b8c6f67276e9697 Mon Sep 17 00:00:00 2001
From: Jan Engelhardt <jengelh@computergmbh.de>
Date: Mon, 21 Jan 2008 03:18:08 -0800
Subject: [IPV4]: Enable use of 240/4 address space.

This short patch modifies the IPv4 networking to enable use of the
240.0.0.0/4 (aka "class-E") address space as propsed in the internet
draft draft-fuller-240space-00.txt.

Signed-off-by: Jan Engelhardt <jengelh@computergmbh.de>
Acked-by: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/pktgen.c       |  2 +-
 net/ipv4/fib_frontend.c |  2 +-
 net/ipv4/route.c        | 12 ++++++------
 3 files changed, 8 insertions(+), 8 deletions(-)

(limited to 'net')

diff --git a/net/core/pktgen.c b/net/core/pktgen.c
index d18fdb10236..eebccdbdbac 100644
--- a/net/core/pktgen.c
+++ b/net/core/pktgen.c
@@ -2266,7 +2266,7 @@ static void mod_cur_headers(struct pktgen_dev *pkt_dev)
 
 					while (ipv4_is_loopback(s) ||
 					       ipv4_is_multicast(s) ||
-					       ipv4_is_badclass(s) ||
+					       ipv4_is_lbcast(s) ||
 					       ipv4_is_zeronet(s) ||
 					       ipv4_is_local_multicast(s)) {
 						t = random32() % (imx - imn) + imn;
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index 62bd791c204..8987046d97f 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -176,7 +176,7 @@ static inline unsigned __inet_dev_addr_type(struct net *net,
 	unsigned ret = RTN_BROADCAST;
 	struct fib_table *local_table;
 
-	if (ipv4_is_zeronet(addr) || ipv4_is_badclass(addr))
+	if (ipv4_is_zeronet(addr) || ipv4_is_lbcast(addr))
 		return RTN_BROADCAST;
 	if (ipv4_is_multicast(addr))
 		return RTN_MULTICAST;
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index fc0145385e8..f80c761ea0b 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -1154,7 +1154,7 @@ void ip_rt_redirect(__be32 old_gw, __be32 daddr, __be32 new_gw,
 		return;
 
 	if (new_gw == old_gw || !IN_DEV_RX_REDIRECTS(in_dev)
-	    || ipv4_is_multicast(new_gw) || ipv4_is_badclass(new_gw)
+	    || ipv4_is_multicast(new_gw) || ipv4_is_lbcast(new_gw)
 	    || ipv4_is_zeronet(new_gw))
 		goto reject_redirect;
 
@@ -1634,7 +1634,7 @@ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr,
 	if (in_dev == NULL)
 		return -EINVAL;
 
-	if (ipv4_is_multicast(saddr) || ipv4_is_badclass(saddr) ||
+	if (ipv4_is_multicast(saddr) || ipv4_is_lbcast(saddr) ||
 	    ipv4_is_loopback(saddr) || skb->protocol != htons(ETH_P_IP))
 		goto e_inval;
 
@@ -1891,7 +1891,7 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr,
 	   by fib_lookup.
 	 */
 
-	if (ipv4_is_multicast(saddr) || ipv4_is_badclass(saddr) ||
+	if (ipv4_is_multicast(saddr) || ipv4_is_lbcast(saddr) ||
 	    ipv4_is_loopback(saddr))
 		goto martian_source;
 
@@ -1904,7 +1904,7 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr,
 	if (ipv4_is_zeronet(saddr))
 		goto martian_source;
 
-	if (ipv4_is_badclass(daddr) || ipv4_is_zeronet(daddr) ||
+	if (ipv4_is_lbcast(daddr) || ipv4_is_zeronet(daddr) ||
 	    ipv4_is_loopback(daddr))
 		goto martian_destination;
 
@@ -2125,7 +2125,7 @@ static inline int __mkroute_output(struct rtable **result,
 		res->type = RTN_BROADCAST;
 	else if (ipv4_is_multicast(fl->fl4_dst))
 		res->type = RTN_MULTICAST;
-	else if (ipv4_is_badclass(fl->fl4_dst) || ipv4_is_zeronet(fl->fl4_dst))
+	else if (ipv4_is_lbcast(fl->fl4_dst) || ipv4_is_zeronet(fl->fl4_dst))
 		return -EINVAL;
 
 	if (dev_out->flags & IFF_LOOPBACK)
@@ -2276,7 +2276,7 @@ static int ip_route_output_slow(struct rtable **rp, const struct flowi *oldflp)
 	if (oldflp->fl4_src) {
 		err = -EINVAL;
 		if (ipv4_is_multicast(oldflp->fl4_src) ||
-		    ipv4_is_badclass(oldflp->fl4_src) ||
+		    ipv4_is_lbcast(oldflp->fl4_src) ||
 		    ipv4_is_zeronet(oldflp->fl4_src))
 			goto out;
 
-- 
cgit v1.2.3


From 8fffc15dc777ce3fe12ddc582d7c87a642591d3b Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Thu, 27 Dec 2007 01:25:40 -0500
Subject: eliminate byteswapping in struct ieee80211_qos_parameters

Make it match the on-the-wire endianness, eliminate byteswapping.
The only driver that used this sucker (ipw2200) updated.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/ieee80211/ieee80211_rx.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/ieee80211/ieee80211_rx.c b/net/ieee80211/ieee80211_rx.c
index 13b12a67019..0a18edbdfcb 100644
--- a/net/ieee80211/ieee80211_rx.c
+++ b/net/ieee80211/ieee80211_rx.c
@@ -1032,16 +1032,16 @@ static int ieee80211_qos_convert_ac_to_parameters(struct
 		qos_param->aifs[i] -= (qos_param->aifs[i] < 2) ? 0 : 2;
 
 		cw_min = ac_params->ecw_min_max & 0x0F;
-		qos_param->cw_min[i] = (u16) ((1 << cw_min) - 1);
+		qos_param->cw_min[i] = cpu_to_le16((1 << cw_min) - 1);
 
 		cw_max = (ac_params->ecw_min_max & 0xF0) >> 4;
-		qos_param->cw_max[i] = (u16) ((1 << cw_max) - 1);
+		qos_param->cw_max[i] = cpu_to_le16((1 << cw_max) - 1);
 
 		qos_param->flag[i] =
 		    (ac_params->aci_aifsn & 0x10) ? 0x01 : 0x00;
 
 		txop = le16_to_cpu(ac_params->tx_op_limit) * 32;
-		qos_param->tx_op_limit[i] = (u16) txop;
+		qos_param->tx_op_limit[i] = cpu_to_le16(txop);
 	}
 	return rc;
 }
-- 
cgit v1.2.3


From b16f13d00c6f7e7317d3074f9bd07b5c9f313891 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sat, 29 Dec 2007 04:08:15 -0500
Subject: several missing cpu_to_le16() in ieee80211softmac_capabilities()

on some codepaths we forgot to convert to little-endian as we do on the
rest of them and as the caller expects from us.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/ieee80211/softmac/ieee80211softmac_io.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

(limited to 'net')

diff --git a/net/ieee80211/softmac/ieee80211softmac_io.c b/net/ieee80211/softmac/ieee80211softmac_io.c
index 26c35253be3..73b4b13fbd8 100644
--- a/net/ieee80211/softmac/ieee80211softmac_io.c
+++ b/net/ieee80211/softmac/ieee80211softmac_io.c
@@ -148,11 +148,11 @@ ieee80211softmac_hdr_3addr(struct ieee80211softmac_device *mac,
 	 * shouldn't the sequence number be in ieee80211? */
 }
 
-static u16
+static __le16
 ieee80211softmac_capabilities(struct ieee80211softmac_device *mac,
 	struct ieee80211softmac_network *net)
 {
-	u16 capability = 0;
+	__le16 capability = 0;
 
 	/* ESS and IBSS bits are set according to the current mode */
 	switch (mac->ieee->iw_mode) {
@@ -163,8 +163,8 @@ ieee80211softmac_capabilities(struct ieee80211softmac_device *mac,
 		capability = cpu_to_le16(WLAN_CAPABILITY_IBSS);
 		break;
 	case IW_MODE_AUTO:
-		capability = net->capabilities &
-			(WLAN_CAPABILITY_ESS|WLAN_CAPABILITY_IBSS);
+		capability = cpu_to_le16(net->capabilities &
+			(WLAN_CAPABILITY_ESS|WLAN_CAPABILITY_IBSS));
 		break;
 	default:
 		/* bleh. we don't ever go to these modes */
@@ -182,7 +182,7 @@ ieee80211softmac_capabilities(struct ieee80211softmac_device *mac,
 	/* Short Preamble */
 	/* Always supported: we probably won't ever be powering devices which
 	 * dont support this... */
-	capability |= WLAN_CAPABILITY_SHORT_PREAMBLE;
+	capability |= cpu_to_le16(WLAN_CAPABILITY_SHORT_PREAMBLE);
 
 	/* PBCC */
 	/* Not widely used */
-- 
cgit v1.2.3


From c414e84b2200ca8a7e7ae565cad200e5c02e02ec Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sat, 29 Dec 2007 04:58:39 -0500
Subject: ieee80211softmac_auth_resp() fix

The struct ieee8021_auth * passed to it comes straight from skb->data
without any conversions; members of the struct are little-endian, so
we'd better take that into account when doing switch by auth->algorithm,
etc.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/ieee80211/softmac/ieee80211softmac_auth.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/ieee80211/softmac/ieee80211softmac_auth.c b/net/ieee80211/softmac/ieee80211softmac_auth.c
index a53a751d070..1a96c257257 100644
--- a/net/ieee80211/softmac/ieee80211softmac_auth.c
+++ b/net/ieee80211/softmac/ieee80211softmac_auth.c
@@ -178,11 +178,11 @@ ieee80211softmac_auth_resp(struct net_device *dev, struct ieee80211_auth *auth)
 	}
 
 	/* Parse the auth packet */
-	switch(auth->algorithm) {
+	switch(le16_to_cpu(auth->algorithm)) {
 	case WLAN_AUTH_OPEN:
 		/* Check the status code of the response */
 
-		switch(auth->status) {
+		switch(le16_to_cpu(auth->status)) {
 		case WLAN_STATUS_SUCCESS:
 			/* Update the status to Authenticated */
 			spin_lock_irqsave(&mac->lock, flags);
@@ -210,7 +210,7 @@ ieee80211softmac_auth_resp(struct net_device *dev, struct ieee80211_auth *auth)
 		break;
 	case WLAN_AUTH_SHARED_KEY:
 		/* Figure out where we are in the process */
-		switch(auth->transaction) {
+		switch(le16_to_cpu(auth->transaction)) {
 		case IEEE80211SOFTMAC_AUTH_SHARED_CHALLENGE:
 			/* Check to make sure we have a challenge IE */
 			data = (u8 *)auth->info_element;
-- 
cgit v1.2.3


From d9e94d5647ee6700773d81514a8ccb7dc6342fb4 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sat, 29 Dec 2007 05:01:07 -0500
Subject: ieee80211: fix misannotations

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/ieee80211/ieee80211_crypt_tkip.c | 22 +++++++++++-----------
 net/ieee80211/ieee80211_rx.c         |  2 +-
 net/ieee80211/ieee80211_tx.c         | 14 +++++++-------
 3 files changed, 19 insertions(+), 19 deletions(-)

(limited to 'net')

diff --git a/net/ieee80211/ieee80211_crypt_tkip.c b/net/ieee80211/ieee80211_crypt_tkip.c
index 8e146949fc6..bba0152e2d7 100644
--- a/net/ieee80211/ieee80211_crypt_tkip.c
+++ b/net/ieee80211/ieee80211_crypt_tkip.c
@@ -189,7 +189,7 @@ static inline u16 Mk16(u8 hi, u8 lo)
 	return lo | (((u16) hi) << 8);
 }
 
-static inline u16 Mk16_le(u16 * v)
+static inline u16 Mk16_le(__le16 * v)
 {
 	return le16_to_cpu(*v);
 }
@@ -275,15 +275,15 @@ static void tkip_mixing_phase2(u8 * WEPSeed, const u8 * TK, const u16 * TTAK,
 	PPK[5] = TTAK[4] + IV16;
 
 	/* Step 2 - 96-bit bijective mixing using S-box */
-	PPK[0] += _S_(PPK[5] ^ Mk16_le((u16 *) & TK[0]));
-	PPK[1] += _S_(PPK[0] ^ Mk16_le((u16 *) & TK[2]));
-	PPK[2] += _S_(PPK[1] ^ Mk16_le((u16 *) & TK[4]));
-	PPK[3] += _S_(PPK[2] ^ Mk16_le((u16 *) & TK[6]));
-	PPK[4] += _S_(PPK[3] ^ Mk16_le((u16 *) & TK[8]));
-	PPK[5] += _S_(PPK[4] ^ Mk16_le((u16 *) & TK[10]));
-
-	PPK[0] += RotR1(PPK[5] ^ Mk16_le((u16 *) & TK[12]));
-	PPK[1] += RotR1(PPK[0] ^ Mk16_le((u16 *) & TK[14]));
+	PPK[0] += _S_(PPK[5] ^ Mk16_le((__le16 *) & TK[0]));
+	PPK[1] += _S_(PPK[0] ^ Mk16_le((__le16 *) & TK[2]));
+	PPK[2] += _S_(PPK[1] ^ Mk16_le((__le16 *) & TK[4]));
+	PPK[3] += _S_(PPK[2] ^ Mk16_le((__le16 *) & TK[6]));
+	PPK[4] += _S_(PPK[3] ^ Mk16_le((__le16 *) & TK[8]));
+	PPK[5] += _S_(PPK[4] ^ Mk16_le((__le16 *) & TK[10]));
+
+	PPK[0] += RotR1(PPK[5] ^ Mk16_le((__le16 *) & TK[12]));
+	PPK[1] += RotR1(PPK[0] ^ Mk16_le((__le16 *) & TK[14]));
 	PPK[2] += RotR1(PPK[1]);
 	PPK[3] += RotR1(PPK[2]);
 	PPK[4] += RotR1(PPK[3]);
@@ -294,7 +294,7 @@ static void tkip_mixing_phase2(u8 * WEPSeed, const u8 * TK, const u16 * TTAK,
 	WEPSeed[0] = Hi8(IV16);
 	WEPSeed[1] = (Hi8(IV16) | 0x20) & 0x7F;
 	WEPSeed[2] = Lo8(IV16);
-	WEPSeed[3] = Lo8((PPK[5] ^ Mk16_le((u16 *) & TK[0])) >> 1);
+	WEPSeed[3] = Lo8((PPK[5] ^ Mk16_le((__le16 *) & TK[0])) >> 1);
 
 #ifdef __BIG_ENDIAN
 	{
diff --git a/net/ieee80211/ieee80211_rx.c b/net/ieee80211/ieee80211_rx.c
index 0a18edbdfcb..6a9c07f9b10 100644
--- a/net/ieee80211/ieee80211_rx.c
+++ b/net/ieee80211/ieee80211_rx.c
@@ -754,7 +754,7 @@ int ieee80211_rx(struct ieee80211_device *ieee, struct sk_buff *skb,
 		memcpy(skb_push(skb, ETH_ALEN), src, ETH_ALEN);
 		memcpy(skb_push(skb, ETH_ALEN), dst, ETH_ALEN);
 	} else {
-		u16 len;
+		__be16 len;
 		/* Leave Ethernet header part of hdr and full payload */
 		skb_pull(skb, hdrlen);
 		len = htons(skb->len);
diff --git a/net/ieee80211/ieee80211_tx.c b/net/ieee80211/ieee80211_tx.c
index 6d06f1385e2..d8b02603cbe 100644
--- a/net/ieee80211/ieee80211_tx.c
+++ b/net/ieee80211/ieee80211_tx.c
@@ -126,7 +126,7 @@ payload of each frame is reduced to 492 bytes.
 static u8 P802_1H_OUI[P80211_OUI_LEN] = { 0x00, 0x00, 0xf8 };
 static u8 RFC1042_OUI[P80211_OUI_LEN] = { 0x00, 0x00, 0x00 };
 
-static int ieee80211_copy_snap(u8 * data, u16 h_proto)
+static int ieee80211_copy_snap(u8 * data, __be16 h_proto)
 {
 	struct ieee80211_snap_hdr *snap;
 	u8 *oui;
@@ -136,7 +136,7 @@ static int ieee80211_copy_snap(u8 * data, u16 h_proto)
 	snap->ssap = 0xaa;
 	snap->ctrl = 0x03;
 
-	if (h_proto == 0x8137 || h_proto == 0x80f3)
+	if (h_proto == htons(ETH_P_AARP) || h_proto == htons(ETH_P_IPX))
 		oui = P802_1H_OUI;
 	else
 		oui = RFC1042_OUI;
@@ -144,7 +144,6 @@ static int ieee80211_copy_snap(u8 * data, u16 h_proto)
 	snap->oui[1] = oui[1];
 	snap->oui[2] = oui[2];
 
-	h_proto = htons(h_proto);
 	memcpy(data + SNAP_SIZE, &h_proto, sizeof(u16));
 
 	return SNAP_SIZE + sizeof(u16);
@@ -261,7 +260,8 @@ int ieee80211_xmit(struct sk_buff *skb, struct net_device *dev)
 	    rts_required;
 	unsigned long flags;
 	struct net_device_stats *stats = &ieee->stats;
-	int ether_type, encrypt, host_encrypt, host_encrypt_msdu, host_build_iv;
+	int encrypt, host_encrypt, host_encrypt_msdu, host_build_iv;
+	__be16 ether_type;
 	int bytes, fc, hdr_len;
 	struct sk_buff *skb_frag;
 	struct ieee80211_hdr_3addrqos header = {/* Ensure zero initialized */
@@ -292,11 +292,11 @@ int ieee80211_xmit(struct sk_buff *skb, struct net_device *dev)
 		goto success;
 	}
 
-	ether_type = ntohs(((struct ethhdr *)skb->data)->h_proto);
+	ether_type = ((struct ethhdr *)skb->data)->h_proto;
 
 	crypt = ieee->crypt[ieee->tx_keyidx];
 
-	encrypt = !(ether_type == ETH_P_PAE && ieee->ieee802_1x) &&
+	encrypt = !(ether_type == htons(ETH_P_PAE) && ieee->ieee802_1x) &&
 	    ieee->sec.encrypt;
 
 	host_encrypt = ieee->host_encrypt && encrypt && crypt;
@@ -304,7 +304,7 @@ int ieee80211_xmit(struct sk_buff *skb, struct net_device *dev)
 	host_build_iv = ieee->host_build_iv && encrypt && crypt;
 
 	if (!encrypt && ieee->ieee802_1x &&
-	    ieee->drop_unencrypted && ether_type != ETH_P_PAE) {
+	    ieee->drop_unencrypted && ether_type != htons(ETH_P_PAE)) {
 		stats->tx_dropped++;
 		goto success;
 	}
-- 
cgit v1.2.3


From 8524f59d4735e1ff9c9dc3e09ebcc7bdb3b32b7b Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sat, 29 Dec 2007 05:03:35 -0500
Subject: ieee80211: beacon->capability is little-endian

It's only a debugging printk, so it went unnoticed; still, the
fix is trivial, so...

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/ieee80211/ieee80211_rx.c | 39 +++++++++++++++++++--------------------
 1 file changed, 19 insertions(+), 20 deletions(-)

(limited to 'net')

diff --git a/net/ieee80211/ieee80211_rx.c b/net/ieee80211/ieee80211_rx.c
index 6a9c07f9b10..1e3f87c8c01 100644
--- a/net/ieee80211/ieee80211_rx.c
+++ b/net/ieee80211/ieee80211_rx.c
@@ -1585,26 +1585,25 @@ static void ieee80211_process_probe_response(struct ieee80211_device
 	DECLARE_MAC_BUF(mac);
 
 	IEEE80211_DEBUG_SCAN("'%s' (%s"
-			     "): %c%c%c%c %c%c%c%c-%c%c%c%c %c%c%c%c\n",
-			     escape_essid(info_element->data,
-					  info_element->len),
-			     print_mac(mac, beacon->header.addr3),
-			     (beacon->capability & (1 << 0xf)) ? '1' : '0',
-			     (beacon->capability & (1 << 0xe)) ? '1' : '0',
-			     (beacon->capability & (1 << 0xd)) ? '1' : '0',
-			     (beacon->capability & (1 << 0xc)) ? '1' : '0',
-			     (beacon->capability & (1 << 0xb)) ? '1' : '0',
-			     (beacon->capability & (1 << 0xa)) ? '1' : '0',
-			     (beacon->capability & (1 << 0x9)) ? '1' : '0',
-			     (beacon->capability & (1 << 0x8)) ? '1' : '0',
-			     (beacon->capability & (1 << 0x7)) ? '1' : '0',
-			     (beacon->capability & (1 << 0x6)) ? '1' : '0',
-			     (beacon->capability & (1 << 0x5)) ? '1' : '0',
-			     (beacon->capability & (1 << 0x4)) ? '1' : '0',
-			     (beacon->capability & (1 << 0x3)) ? '1' : '0',
-			     (beacon->capability & (1 << 0x2)) ? '1' : '0',
-			     (beacon->capability & (1 << 0x1)) ? '1' : '0',
-			     (beacon->capability & (1 << 0x0)) ? '1' : '0');
+		     "): %c%c%c%c %c%c%c%c-%c%c%c%c %c%c%c%c\n",
+		     escape_essid(info_element->data, info_element->len),
+		     print_mac(mac, beacon->header.addr3),
+		     (beacon->capability & cpu_to_le16(1 << 0xf)) ? '1' : '0',
+		     (beacon->capability & cpu_to_le16(1 << 0xe)) ? '1' : '0',
+		     (beacon->capability & cpu_to_le16(1 << 0xd)) ? '1' : '0',
+		     (beacon->capability & cpu_to_le16(1 << 0xc)) ? '1' : '0',
+		     (beacon->capability & cpu_to_le16(1 << 0xb)) ? '1' : '0',
+		     (beacon->capability & cpu_to_le16(1 << 0xa)) ? '1' : '0',
+		     (beacon->capability & cpu_to_le16(1 << 0x9)) ? '1' : '0',
+		     (beacon->capability & cpu_to_le16(1 << 0x8)) ? '1' : '0',
+		     (beacon->capability & cpu_to_le16(1 << 0x7)) ? '1' : '0',
+		     (beacon->capability & cpu_to_le16(1 << 0x6)) ? '1' : '0',
+		     (beacon->capability & cpu_to_le16(1 << 0x5)) ? '1' : '0',
+		     (beacon->capability & cpu_to_le16(1 << 0x4)) ? '1' : '0',
+		     (beacon->capability & cpu_to_le16(1 << 0x3)) ? '1' : '0',
+		     (beacon->capability & cpu_to_le16(1 << 0x2)) ? '1' : '0',
+		     (beacon->capability & cpu_to_le16(1 << 0x1)) ? '1' : '0',
+		     (beacon->capability & cpu_to_le16(1 << 0x0)) ? '1' : '0');
 
 	if (ieee80211_network_init(ieee, beacon, &network, stats)) {
 		IEEE80211_DEBUG_SCAN("Dropped '%s' (%s) via %s.\n",
-- 
cgit v1.2.3


From 32bfd35d4b63bd63de4bb0d791ef049c3c868726 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Wed, 19 Dec 2007 01:31:26 +0100
Subject: mac80211: dont use interface indices in drivers

This patch gets rid of the if_id stuff where possible in favour of
a new per-virtual-interface structure "struct ieee80211_vif". This
structure is located at the end of the per-interface structure and
contains a variable length driver-use data area.

This has two advantages:
 * removes the need to look up interfaces by if_id, this is better
   for working with network namespaces and performance
 * allows drivers to store and retrieve per-interface data without
   having to allocate own lists/hash tables

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/ieee80211.c       | 12 ++++++-----
 net/mac80211/ieee80211_i.h     |  8 ++++++++
 net/mac80211/ieee80211_iface.c |  2 +-
 net/mac80211/sta_info.c        | 27 +++++++++++++++++++------
 net/mac80211/tx.c              | 46 +++++++++++++++++++-----------------------
 net/mac80211/util.c            | 45 ++++++++++++++---------------------------
 6 files changed, 73 insertions(+), 67 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/ieee80211.c b/net/mac80211/ieee80211.c
index 4807e5215a7..42c27089f00 100644
--- a/net/mac80211/ieee80211.c
+++ b/net/mac80211/ieee80211.c
@@ -243,7 +243,7 @@ static int ieee80211_open(struct net_device *dev)
 		sdata->u.sta.flags &= ~IEEE80211_STA_PREV_BSSID_SET;
 		/* fall through */
 	default:
-		conf.if_id = dev->ifindex;
+		conf.vif = &sdata->vif;
 		conf.type = sdata->type;
 		conf.mac_addr = dev->dev_addr;
 		res = local->ops->add_interface(local_to_hw(local), &conf);
@@ -378,7 +378,7 @@ static int ieee80211_stop(struct net_device *dev)
 		sdata->u.sta.extra_ie_len = 0;
 		/* fall through */
 	default:
-		conf.if_id = dev->ifindex;
+		conf.vif = &sdata->vif;
 		conf.type = sdata->type;
 		conf.mac_addr = dev->dev_addr;
 		/* disable all keys for as long as this netdev is down */
@@ -515,7 +515,7 @@ static int __ieee80211_if_config(struct net_device *dev,
 		conf.beacon_control = control;
 	}
 	return local->ops->config_interface(local_to_hw(local),
-					   dev->ifindex, &conf);
+					    &sdata->vif, &conf);
 }
 
 int ieee80211_if_config(struct net_device *dev)
@@ -527,11 +527,13 @@ int ieee80211_if_config_beacon(struct net_device *dev)
 {
 	struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr);
 	struct ieee80211_tx_control control;
+	struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
 	struct sk_buff *skb;
 
 	if (!(local->hw.flags & IEEE80211_HW_HOST_GEN_BEACON_TEMPLATE))
 		return 0;
-	skb = ieee80211_beacon_get(local_to_hw(local), dev->ifindex, &control);
+	skb = ieee80211_beacon_get(local_to_hw(local), &sdata->vif,
+				   &control);
 	if (!skb)
 		return -ENOMEM;
 	return __ieee80211_if_config(dev, skb, &control);
@@ -736,7 +738,7 @@ static void ieee80211_remove_tx_extra(struct ieee80211_local *local,
 	struct ieee80211_tx_packet_data *pkt_data;
 
 	pkt_data = (struct ieee80211_tx_packet_data *)skb->cb;
-	pkt_data->ifindex = control->ifindex;
+	pkt_data->ifindex = vif_to_sdata(control->vif)->dev->ifindex;
 	pkt_data->flags = 0;
 	if (control->flags & IEEE80211_TXCTL_REQ_TX_STATUS)
 		pkt_data->flags |= IEEE80211_TXPD_REQ_TX_STATUS;
diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index b898b316bda..c551a7f379e 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -387,8 +387,16 @@ struct ieee80211_sub_if_data {
 		struct dentry *default_key;
 	} debugfs;
 #endif
+	/* must be last, dynamically sized area in this! */
+	struct ieee80211_vif vif;
 };
 
+static inline
+struct ieee80211_sub_if_data *vif_to_sdata(struct ieee80211_vif *p)
+{
+	return container_of(p, struct ieee80211_sub_if_data, vif);
+}
+
 #define IEEE80211_DEV_TO_SUB_IF(dev) netdev_priv(dev)
 
 enum {
diff --git a/net/mac80211/ieee80211_iface.c b/net/mac80211/ieee80211_iface.c
index 7cfd8660b23..b7206637880 100644
--- a/net/mac80211/ieee80211_iface.c
+++ b/net/mac80211/ieee80211_iface.c
@@ -47,7 +47,7 @@ int ieee80211_if_add(struct net_device *dev, const char *name,
 	int ret;
 
 	ASSERT_RTNL();
-	ndev = alloc_netdev(sizeof(struct ieee80211_sub_if_data),
+	ndev = alloc_netdev(sizeof(*sdata) + local->hw.vif_data_size,
 			    name, ieee80211_if_setup);
 	if (!ndev)
 		return -ENOMEM;
diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c
index 1257c7aab2a..32e24176472 100644
--- a/net/mac80211/sta_info.c
+++ b/net/mac80211/sta_info.c
@@ -177,9 +177,16 @@ struct sta_info * sta_info_add(struct ieee80211_local *local,
 	list_add(&sta->list, &local->sta_list);
 	local->num_sta++;
 	sta_info_hash_add(local, sta);
-	if (local->ops->sta_notify)
-		local->ops->sta_notify(local_to_hw(local), dev->ifindex,
-					STA_NOTIFY_ADD, addr);
+	if (local->ops->sta_notify) {
+		struct ieee80211_sub_if_data *sdata;
+
+		sdata = IEEE80211_DEV_TO_SUB_IF(dev);
+		if (sdata->type == IEEE80211_IF_TYPE_VLAN)
+			sdata = sdata->u.vlan.ap;
+
+		local->ops->sta_notify(local_to_hw(local), &sdata->vif,
+				       STA_NOTIFY_ADD, addr);
+	}
 	write_unlock_bh(&local->sta_lock);
 
 #ifdef CONFIG_MAC80211_VERBOSE_DEBUG
@@ -247,9 +254,17 @@ void sta_info_free(struct sta_info *sta)
 	ieee80211_key_free(sta->key);
 	sta->key = NULL;
 
-	if (local->ops->sta_notify)
-		local->ops->sta_notify(local_to_hw(local), sta->dev->ifindex,
-					STA_NOTIFY_REMOVE, sta->addr);
+	if (local->ops->sta_notify) {
+		struct ieee80211_sub_if_data *sdata;
+
+		sdata = IEEE80211_DEV_TO_SUB_IF(sta->dev);
+
+		if (sdata->type == IEEE80211_IF_TYPE_VLAN)
+			sdata = sdata->u.vlan.ap;
+
+		local->ops->sta_notify(local_to_hw(local), &sdata->vif,
+				       STA_NOTIFY_REMOVE, sta->addr);
+	}
 
 	rate_control_remove_sta_debugfs(sta);
 	ieee80211_sta_debugfs_remove(sta);
diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index f6194167253..1b772ee2fe1 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -999,9 +999,7 @@ __ieee80211_tx_prepare(struct ieee80211_txrx_data *tx,
 	return TXRX_CONTINUE;
 }
 
-/* Device in tx->dev has a reference added; use dev_put(tx->dev) when
- * finished with it.
- *
+/*
  * NB: @tx is uninitialised when passed in here
  */
 static int ieee80211_tx_prepare(struct ieee80211_txrx_data *tx,
@@ -1022,6 +1020,7 @@ static int ieee80211_tx_prepare(struct ieee80211_txrx_data *tx,
 		return -ENODEV;
 	/* initialises tx with control */
 	__ieee80211_tx_prepare(tx, skb, dev, control);
+	dev_put(dev);
 	return 0;
 }
 
@@ -1252,7 +1251,7 @@ int ieee80211_master_start_xmit(struct sk_buff *skb,
 		}
 	}
 
-	control.ifindex = odev->ifindex;
+	control.vif = &osdata->vif;
 	control.type = osdata->type;
 	if (pkt_data->flags & IEEE80211_TXPD_REQ_TX_STATUS)
 		control.flags |= IEEE80211_TXCTL_REQ_TX_STATUS;
@@ -1691,7 +1690,8 @@ static void ieee80211_beacon_add_tim(struct ieee80211_local *local,
 	read_unlock_bh(&local->sta_lock);
 }
 
-struct sk_buff *ieee80211_beacon_get(struct ieee80211_hw *hw, int if_id,
+struct sk_buff *ieee80211_beacon_get(struct ieee80211_hw *hw,
+				     struct ieee80211_vif *vif,
 				     struct ieee80211_tx_control *control)
 {
 	struct ieee80211_local *local = hw_to_local(hw);
@@ -1703,19 +1703,16 @@ struct sk_buff *ieee80211_beacon_get(struct ieee80211_hw *hw, int if_id,
 	u8 *b_head, *b_tail;
 	int bh_len, bt_len;
 
-	bdev = dev_get_by_index(&init_net, if_id);
-	if (bdev) {
-		sdata = IEEE80211_DEV_TO_SUB_IF(bdev);
-		ap = &sdata->u.ap;
-		dev_put(bdev);
-	}
+	sdata = vif_to_sdata(vif);
+	bdev = sdata->dev;
+	ap = &sdata->u.ap;
 
 	if (!ap || sdata->type != IEEE80211_IF_TYPE_AP ||
 	    !ap->beacon_head) {
 #ifdef CONFIG_MAC80211_VERBOSE_DEBUG
 		if (net_ratelimit())
-			printk(KERN_DEBUG "no beacon data avail for idx=%d "
-			       "(%s)\n", if_id, bdev ? bdev->name : "N/A");
+			printk(KERN_DEBUG "no beacon data avail for %s\n",
+			       bdev->name);
 #endif /* CONFIG_MAC80211_VERBOSE_DEBUG */
 		return NULL;
 	}
@@ -1771,7 +1768,7 @@ struct sk_buff *ieee80211_beacon_get(struct ieee80211_hw *hw, int if_id,
 }
 EXPORT_SYMBOL(ieee80211_beacon_get);
 
-void ieee80211_rts_get(struct ieee80211_hw *hw, int if_id,
+void ieee80211_rts_get(struct ieee80211_hw *hw, struct ieee80211_vif *vif,
 		       const void *frame, size_t frame_len,
 		       const struct ieee80211_tx_control *frame_txctl,
 		       struct ieee80211_rts *rts)
@@ -1781,13 +1778,14 @@ void ieee80211_rts_get(struct ieee80211_hw *hw, int if_id,
 
 	fctl = IEEE80211_FTYPE_CTL | IEEE80211_STYPE_RTS;
 	rts->frame_control = cpu_to_le16(fctl);
-	rts->duration = ieee80211_rts_duration(hw, if_id, frame_len, frame_txctl);
+	rts->duration = ieee80211_rts_duration(hw, vif, frame_len,
+					       frame_txctl);
 	memcpy(rts->ra, hdr->addr1, sizeof(rts->ra));
 	memcpy(rts->ta, hdr->addr2, sizeof(rts->ta));
 }
 EXPORT_SYMBOL(ieee80211_rts_get);
 
-void ieee80211_ctstoself_get(struct ieee80211_hw *hw, int if_id,
+void ieee80211_ctstoself_get(struct ieee80211_hw *hw, struct ieee80211_vif *vif,
 			     const void *frame, size_t frame_len,
 			     const struct ieee80211_tx_control *frame_txctl,
 			     struct ieee80211_cts *cts)
@@ -1797,13 +1795,15 @@ void ieee80211_ctstoself_get(struct ieee80211_hw *hw, int if_id,
 
 	fctl = IEEE80211_FTYPE_CTL | IEEE80211_STYPE_CTS;
 	cts->frame_control = cpu_to_le16(fctl);
-	cts->duration = ieee80211_ctstoself_duration(hw, if_id, frame_len, frame_txctl);
+	cts->duration = ieee80211_ctstoself_duration(hw, vif,
+						     frame_len, frame_txctl);
 	memcpy(cts->ra, hdr->addr1, sizeof(cts->ra));
 }
 EXPORT_SYMBOL(ieee80211_ctstoself_get);
 
 struct sk_buff *
-ieee80211_get_buffered_bc(struct ieee80211_hw *hw, int if_id,
+ieee80211_get_buffered_bc(struct ieee80211_hw *hw,
+			  struct ieee80211_vif *vif,
 			  struct ieee80211_tx_control *control)
 {
 	struct ieee80211_local *local = hw_to_local(hw);
@@ -1816,12 +1816,9 @@ ieee80211_get_buffered_bc(struct ieee80211_hw *hw, int if_id,
 	struct ieee80211_sub_if_data *sdata;
 	struct ieee80211_if_ap *bss = NULL;
 
-	bdev = dev_get_by_index(&init_net, if_id);
-	if (bdev) {
-		sdata = IEEE80211_DEV_TO_SUB_IF(bdev);
-		bss = &sdata->u.ap;
-		dev_put(bdev);
-	}
+	sdata = vif_to_sdata(vif);
+	bdev = sdata->dev;
+
 	if (!bss || sdata->type != IEEE80211_IF_TYPE_AP || !bss->beacon_head)
 		return NULL;
 
@@ -1857,7 +1854,6 @@ ieee80211_get_buffered_bc(struct ieee80211_hw *hw, int if_id,
 		if (res == TXRX_DROP || res == TXRX_QUEUED)
 			break;
 	}
-	dev_put(tx.dev);
 	skb = tx.skb; /* handlers are allowed to change skb */
 
 	if (res == TXRX_DROP) {
diff --git a/net/mac80211/util.c b/net/mac80211/util.c
index adb85dd5098..15503ca3e94 100644
--- a/net/mac80211/util.c
+++ b/net/mac80211/util.c
@@ -302,44 +302,34 @@ int ieee80211_frame_duration(struct ieee80211_local *local, size_t len,
 }
 
 /* Exported duration function for driver use */
-__le16 ieee80211_generic_frame_duration(struct ieee80211_hw *hw, int if_id,
+__le16 ieee80211_generic_frame_duration(struct ieee80211_hw *hw,
+					struct ieee80211_vif *vif,
 					size_t frame_len, int rate)
 {
 	struct ieee80211_local *local = hw_to_local(hw);
-	struct net_device *bdev = dev_get_by_index(&init_net, if_id);
-	struct ieee80211_sub_if_data *sdata;
+	struct ieee80211_sub_if_data *sdata = vif_to_sdata(vif);
 	u16 dur;
 	int erp;
 
-	if (unlikely(!bdev))
-		return 0;
-
-	sdata = IEEE80211_DEV_TO_SUB_IF(bdev);
 	erp = ieee80211_is_erp_rate(hw->conf.phymode, rate);
 	dur = ieee80211_frame_duration(local, frame_len, rate,
 		       erp, sdata->flags & IEEE80211_SDATA_SHORT_PREAMBLE);
 
-	dev_put(bdev);
 	return cpu_to_le16(dur);
 }
 EXPORT_SYMBOL(ieee80211_generic_frame_duration);
 
-__le16 ieee80211_rts_duration(struct ieee80211_hw *hw, int if_id,
-			      size_t frame_len,
+__le16 ieee80211_rts_duration(struct ieee80211_hw *hw,
+			      struct ieee80211_vif *vif, size_t frame_len,
 			      const struct ieee80211_tx_control *frame_txctl)
 {
 	struct ieee80211_local *local = hw_to_local(hw);
 	struct ieee80211_rate *rate;
-	struct net_device *bdev = dev_get_by_index(&init_net, if_id);
-	struct ieee80211_sub_if_data *sdata;
+	struct ieee80211_sub_if_data *sdata = vif_to_sdata(vif);
 	int short_preamble;
 	int erp;
 	u16 dur;
 
-	if (unlikely(!bdev))
-		return 0;
-
-	sdata = IEEE80211_DEV_TO_SUB_IF(bdev);
 	short_preamble = sdata->flags & IEEE80211_SDATA_SHORT_PREAMBLE;
 
 	rate = frame_txctl->rts_rate;
@@ -355,27 +345,22 @@ __le16 ieee80211_rts_duration(struct ieee80211_hw *hw, int if_id,
 	dur += ieee80211_frame_duration(local, 10, rate->rate,
 					erp, short_preamble);
 
-	dev_put(bdev);
 	return cpu_to_le16(dur);
 }
 EXPORT_SYMBOL(ieee80211_rts_duration);
 
-__le16 ieee80211_ctstoself_duration(struct ieee80211_hw *hw, int if_id,
+__le16 ieee80211_ctstoself_duration(struct ieee80211_hw *hw,
+				    struct ieee80211_vif *vif,
 				    size_t frame_len,
 				    const struct ieee80211_tx_control *frame_txctl)
 {
 	struct ieee80211_local *local = hw_to_local(hw);
 	struct ieee80211_rate *rate;
-	struct net_device *bdev = dev_get_by_index(&init_net, if_id);
-	struct ieee80211_sub_if_data *sdata;
+	struct ieee80211_sub_if_data *sdata = vif_to_sdata(vif);
 	int short_preamble;
 	int erp;
 	u16 dur;
 
-	if (unlikely(!bdev))
-		return 0;
-
-	sdata = IEEE80211_DEV_TO_SUB_IF(bdev);
 	short_preamble = sdata->flags & IEEE80211_SDATA_SHORT_PREAMBLE;
 
 	rate = frame_txctl->rts_rate;
@@ -390,7 +375,6 @@ __le16 ieee80211_ctstoself_duration(struct ieee80211_hw *hw, int if_id,
 						erp, short_preamble);
 	}
 
-	dev_put(bdev);
 	return cpu_to_le16(dur);
 }
 EXPORT_SYMBOL(ieee80211_ctstoself_duration);
@@ -475,10 +459,11 @@ void ieee80211_wake_queues(struct ieee80211_hw *hw)
 }
 EXPORT_SYMBOL(ieee80211_wake_queues);
 
-void ieee80211_iterate_active_interfaces(struct ieee80211_hw *hw,
-					 void (*iterator)(void *data, u8 *mac,
-							  int if_id),
-					 void *data)
+void ieee80211_iterate_active_interfaces(
+	struct ieee80211_hw *hw,
+	void (*iterator)(void *data, u8 *mac,
+			 struct ieee80211_vif *vif),
+	void *data)
 {
 	struct ieee80211_local *local = hw_to_local(hw);
 	struct ieee80211_sub_if_data *sdata;
@@ -501,7 +486,7 @@ void ieee80211_iterate_active_interfaces(struct ieee80211_hw *hw,
 			continue;
 		if (netif_running(sdata->dev))
 			iterator(data, sdata->dev->dev_addr,
-				 sdata->dev->ifindex);
+				 &sdata->vif);
 	}
 
 	rcu_read_unlock();
-- 
cgit v1.2.3


From 51fb61e76d952e6bc2fbdd9f0d38425fbab1cf31 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Wed, 19 Dec 2007 01:31:27 +0100
Subject: mac80211: move interface type to vif structure

Drivers that support mixed AP/STA operation may well need to
know the type of a virtual interface when iterating over them.
The easiest way to support that is to move the interface type
variable into the vif structure.

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/cfg.c             |  2 +-
 net/mac80211/debugfs_netdev.c  |  4 +--
 net/mac80211/ieee80211.c       | 36 +++++++++++------------
 net/mac80211/ieee80211_i.h     |  1 -
 net/mac80211/ieee80211_iface.c | 10 +++----
 net/mac80211/ieee80211_ioctl.c | 65 +++++++++++++++++++++---------------------
 net/mac80211/ieee80211_sta.c   | 48 +++++++++++++++----------------
 net/mac80211/key.c             |  6 ++--
 net/mac80211/rx.c              | 38 ++++++++++++------------
 net/mac80211/sta_info.c        |  4 +--
 net/mac80211/tx.c              | 17 +++++------
 net/mac80211/util.c            |  2 +-
 12 files changed, 116 insertions(+), 117 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index 11156b381ec..d02d9ef6b1e 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -91,7 +91,7 @@ static int ieee80211_change_iface(struct wiphy *wiphy, int ifindex,
 
 	sdata = IEEE80211_DEV_TO_SUB_IF(dev);
 
-        if (sdata->type == IEEE80211_IF_TYPE_VLAN)
+	if (sdata->vif.type == IEEE80211_IF_TYPE_VLAN)
 		return -EOPNOTSUPP;
 
 	ieee80211_if_reinit(dev);
diff --git a/net/mac80211/debugfs_netdev.c b/net/mac80211/debugfs_netdev.c
index bf715d28643..d2d3c076c85 100644
--- a/net/mac80211/debugfs_netdev.c
+++ b/net/mac80211/debugfs_netdev.c
@@ -226,7 +226,7 @@ static void add_files(struct ieee80211_sub_if_data *sdata)
 	if (!sdata->debugfsdir)
 		return;
 
-	switch (sdata->type) {
+	switch (sdata->vif.type) {
 	case IEEE80211_IF_TYPE_STA:
 	case IEEE80211_IF_TYPE_IBSS:
 		add_sta_files(sdata);
@@ -353,7 +353,7 @@ void ieee80211_debugfs_add_netdev(struct ieee80211_sub_if_data *sdata)
 
 void ieee80211_debugfs_remove_netdev(struct ieee80211_sub_if_data *sdata)
 {
-	del_files(sdata, sdata->type);
+	del_files(sdata, sdata->vif.type);
 	debugfs_remove(sdata->debugfsdir);
 	sdata->debugfsdir = NULL;
 }
diff --git a/net/mac80211/ieee80211.c b/net/mac80211/ieee80211.c
index 42c27089f00..c9981701ef6 100644
--- a/net/mac80211/ieee80211.c
+++ b/net/mac80211/ieee80211.c
@@ -177,21 +177,21 @@ static int ieee80211_open(struct net_device *dev)
 			/*
 			 * check whether it may have the same address
 			 */
-			if (!identical_mac_addr_allowed(sdata->type,
-							nsdata->type))
+			if (!identical_mac_addr_allowed(sdata->vif.type,
+							nsdata->vif.type))
 				return -ENOTUNIQ;
 
 			/*
 			 * can only add VLANs to enabled APs
 			 */
-			if (sdata->type == IEEE80211_IF_TYPE_VLAN &&
-			    nsdata->type == IEEE80211_IF_TYPE_AP &&
+			if (sdata->vif.type == IEEE80211_IF_TYPE_VLAN &&
+			    nsdata->vif.type == IEEE80211_IF_TYPE_AP &&
 			    netif_running(nsdata->dev))
 				sdata->u.vlan.ap = nsdata;
 		}
 	}
 
-	switch (sdata->type) {
+	switch (sdata->vif.type) {
 	case IEEE80211_IF_TYPE_WDS:
 		if (is_zero_ether_addr(sdata->u.wds.remote_addr))
 			return -ENOLINK;
@@ -222,7 +222,7 @@ static int ieee80211_open(struct net_device *dev)
 		ieee80211_led_radio(local, local->hw.conf.radio_enabled);
 	}
 
-	switch (sdata->type) {
+	switch (sdata->vif.type) {
 	case IEEE80211_IF_TYPE_VLAN:
 		list_add(&sdata->u.vlan.list, &sdata->u.vlan.ap->u.ap.vlans);
 		/* no need to tell driver */
@@ -244,7 +244,7 @@ static int ieee80211_open(struct net_device *dev)
 		/* fall through */
 	default:
 		conf.vif = &sdata->vif;
-		conf.type = sdata->type;
+		conf.type = sdata->vif.type;
 		conf.mac_addr = dev->dev_addr;
 		res = local->ops->add_interface(local_to_hw(local), &conf);
 		if (res && !local->open_count && local->ops->stop)
@@ -256,7 +256,7 @@ static int ieee80211_open(struct net_device *dev)
 		ieee80211_reset_erp_info(dev);
 		ieee80211_enable_keys(sdata);
 
-		if (sdata->type == IEEE80211_IF_TYPE_STA &&
+		if (sdata->vif.type == IEEE80211_IF_TYPE_STA &&
 		    !(sdata->flags & IEEE80211_SDATA_USERSPACE_MLME))
 			netif_carrier_off(dev);
 		else
@@ -322,7 +322,7 @@ static int ieee80211_stop(struct net_device *dev)
 	dev_mc_unsync(local->mdev, dev);
 
 	/* down all dependent devices, that is VLANs */
-	if (sdata->type == IEEE80211_IF_TYPE_AP) {
+	if (sdata->vif.type == IEEE80211_IF_TYPE_AP) {
 		struct ieee80211_sub_if_data *vlan, *tmp;
 
 		list_for_each_entry_safe(vlan, tmp, &sdata->u.ap.vlans,
@@ -333,7 +333,7 @@ static int ieee80211_stop(struct net_device *dev)
 
 	local->open_count--;
 
-	switch (sdata->type) {
+	switch (sdata->vif.type) {
 	case IEEE80211_IF_TYPE_VLAN:
 		list_del(&sdata->u.vlan.list);
 		sdata->u.vlan.ap = NULL;
@@ -379,7 +379,7 @@ static int ieee80211_stop(struct net_device *dev)
 		/* fall through */
 	default:
 		conf.vif = &sdata->vif;
-		conf.type = sdata->type;
+		conf.type = sdata->vif.type;
 		conf.mac_addr = dev->dev_addr;
 		/* disable all keys for as long as this netdev is down */
 		ieee80211_disable_keys(sdata);
@@ -502,13 +502,13 @@ static int __ieee80211_if_config(struct net_device *dev,
 		return 0;
 
 	memset(&conf, 0, sizeof(conf));
-	conf.type = sdata->type;
-	if (sdata->type == IEEE80211_IF_TYPE_STA ||
-	    sdata->type == IEEE80211_IF_TYPE_IBSS) {
+	conf.type = sdata->vif.type;
+	if (sdata->vif.type == IEEE80211_IF_TYPE_STA ||
+	    sdata->vif.type == IEEE80211_IF_TYPE_IBSS) {
 		conf.bssid = sdata->u.sta.bssid;
 		conf.ssid = sdata->u.sta.ssid;
 		conf.ssid_len = sdata->u.sta.ssid_len;
-	} else if (sdata->type == IEEE80211_IF_TYPE_AP) {
+	} else if (sdata->vif.type == IEEE80211_IF_TYPE_AP) {
 		conf.ssid = sdata->u.ap.ssid;
 		conf.ssid_len = sdata->u.ap.ssid_len;
 		conf.beacon = beacon;
@@ -703,7 +703,7 @@ static void ieee80211_tasklet_handler(unsigned long data)
 		case IEEE80211_RX_MSG:
 			/* status is in skb->cb */
 			memcpy(&rx_status, skb->cb, sizeof(rx_status));
-			/* Clear skb->type in order to not confuse kernel
+			/* Clear skb->pkt_type in order to not confuse kernel
 			 * netstack. */
 			skb->pkt_type = 0;
 			__ieee80211_rx(local_to_hw(local), skb, &rx_status);
@@ -962,7 +962,7 @@ void ieee80211_tx_status(struct ieee80211_hw *hw, struct sk_buff *skb,
 		if (!monitors || !skb)
 			goto out;
 
-		if (sdata->type == IEEE80211_IF_TYPE_MNTR) {
+		if (sdata->vif.type == IEEE80211_IF_TYPE_MNTR) {
 			if (!netif_running(sdata->dev))
 				continue;
 			monitors--;
@@ -1084,7 +1084,7 @@ struct ieee80211_hw *ieee80211_alloc_hw(size_t priv_data_len,
 	mdev->header_ops = &ieee80211_header_ops;
 	mdev->set_multicast_list = ieee80211_master_set_multicast_list;
 
-	sdata->type = IEEE80211_IF_TYPE_AP;
+	sdata->vif.type = IEEE80211_IF_TYPE_AP;
 	sdata->dev = mdev;
 	sdata->local = local;
 	sdata->u.ap.force_unicast_rateidx = -1;
diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index c551a7f379e..91edaad4c62 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -294,7 +294,6 @@ struct ieee80211_if_sta {
 #define IEEE80211_SDATA_USERSPACE_MLME	BIT(4)
 struct ieee80211_sub_if_data {
 	struct list_head list;
-	enum ieee80211_if_types type;
 
 	struct wireless_dev wdev;
 
diff --git a/net/mac80211/ieee80211_iface.c b/net/mac80211/ieee80211_iface.c
index b7206637880..be96aa84c79 100644
--- a/net/mac80211/ieee80211_iface.c
+++ b/net/mac80211/ieee80211_iface.c
@@ -66,7 +66,7 @@ int ieee80211_if_add(struct net_device *dev, const char *name,
 	sdata = IEEE80211_DEV_TO_SUB_IF(ndev);
 	ndev->ieee80211_ptr = &sdata->wdev;
 	sdata->wdev.wiphy = local->hw.wiphy;
-	sdata->type = IEEE80211_IF_TYPE_AP;
+	sdata->vif.type = IEEE80211_IF_TYPE_AP;
 	sdata->dev = ndev;
 	sdata->local = local;
 	ieee80211_if_sdata_init(sdata);
@@ -98,7 +98,7 @@ fail:
 void ieee80211_if_set_type(struct net_device *dev, int type)
 {
 	struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
-	int oldtype = sdata->type;
+	int oldtype = sdata->vif.type;
 
 	/*
 	 * We need to call this function on the master interface
@@ -116,7 +116,7 @@ void ieee80211_if_set_type(struct net_device *dev, int type)
 
 	/* most have no BSS pointer */
 	sdata->bss = NULL;
-	sdata->type = type;
+	sdata->vif.type = type;
 
 	switch (type) {
 	case IEEE80211_IF_TYPE_WDS:
@@ -181,7 +181,7 @@ void ieee80211_if_reinit(struct net_device *dev)
 
 	ieee80211_if_sdata_deinit(sdata);
 
-	switch (sdata->type) {
+	switch (sdata->vif.type) {
 	case IEEE80211_IF_TYPE_INVALID:
 		/* cannot happen */
 		WARN_ON(1);
@@ -279,7 +279,7 @@ int ieee80211_if_remove(struct net_device *dev, const char *name, int id)
 	ASSERT_RTNL();
 
 	list_for_each_entry_safe(sdata, n, &local->interfaces, list) {
-		if ((sdata->type == id || id == -1) &&
+		if ((sdata->vif.type == id || id == -1) &&
 		    strcmp(name, sdata->dev->name) == 0 &&
 		    sdata->dev != local->mdev) {
 			list_del_rcu(&sdata->list);
diff --git a/net/mac80211/ieee80211_ioctl.c b/net/mac80211/ieee80211_ioctl.c
index 2604e21c05a..5024d373383 100644
--- a/net/mac80211/ieee80211_ioctl.c
+++ b/net/mac80211/ieee80211_ioctl.c
@@ -112,8 +112,8 @@ static int ieee80211_ioctl_siwgenie(struct net_device *dev,
 	if (sdata->flags & IEEE80211_SDATA_USERSPACE_MLME)
 		return -EOPNOTSUPP;
 
-	if (sdata->type == IEEE80211_IF_TYPE_STA ||
-	    sdata->type == IEEE80211_IF_TYPE_IBSS) {
+	if (sdata->vif.type == IEEE80211_IF_TYPE_STA ||
+	    sdata->vif.type == IEEE80211_IF_TYPE_IBSS) {
 		int ret = ieee80211_sta_set_extra_ie(dev, extra, data->length);
 		if (ret)
 			return ret;
@@ -232,7 +232,7 @@ static int ieee80211_ioctl_siwmode(struct net_device *dev,
 	struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
 	int type;
 
-	if (sdata->type == IEEE80211_IF_TYPE_VLAN)
+	if (sdata->vif.type == IEEE80211_IF_TYPE_VLAN)
 		return -EOPNOTSUPP;
 
 	switch (*mode) {
@@ -249,7 +249,7 @@ static int ieee80211_ioctl_siwmode(struct net_device *dev,
 		return -EINVAL;
 	}
 
-	if (type == sdata->type)
+	if (type == sdata->vif.type)
 		return 0;
 	if (netif_running(dev))
 		return -EBUSY;
@@ -268,7 +268,7 @@ static int ieee80211_ioctl_giwmode(struct net_device *dev,
 	struct ieee80211_sub_if_data *sdata;
 
 	sdata = IEEE80211_DEV_TO_SUB_IF(dev);
-	switch (sdata->type) {
+	switch (sdata->vif.type) {
 	case IEEE80211_IF_TYPE_AP:
 		*mode = IW_MODE_MASTER;
 		break;
@@ -336,13 +336,13 @@ static int ieee80211_ioctl_siwfreq(struct net_device *dev,
 	struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr);
 	struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
 
-	if (sdata->type == IEEE80211_IF_TYPE_STA)
+	if (sdata->vif.type == IEEE80211_IF_TYPE_STA)
 		sdata->u.sta.flags &= ~IEEE80211_STA_AUTO_CHANNEL_SEL;
 
 	/* freq->e == 0: freq->m = channel; otherwise freq = m * 10^e */
 	if (freq->e == 0) {
 		if (freq->m < 0) {
-			if (sdata->type == IEEE80211_IF_TYPE_STA)
+			if (sdata->vif.type == IEEE80211_IF_TYPE_STA)
 				sdata->u.sta.flags |=
 					IEEE80211_STA_AUTO_CHANNEL_SEL;
 			return 0;
@@ -388,8 +388,8 @@ static int ieee80211_ioctl_siwessid(struct net_device *dev,
 		len--;
 
 	sdata = IEEE80211_DEV_TO_SUB_IF(dev);
-	if (sdata->type == IEEE80211_IF_TYPE_STA ||
-	    sdata->type == IEEE80211_IF_TYPE_IBSS) {
+	if (sdata->vif.type == IEEE80211_IF_TYPE_STA ||
+	    sdata->vif.type == IEEE80211_IF_TYPE_IBSS) {
 		int ret;
 		if (sdata->flags & IEEE80211_SDATA_USERSPACE_MLME) {
 			if (len > IEEE80211_MAX_SSID_LEN)
@@ -409,7 +409,7 @@ static int ieee80211_ioctl_siwessid(struct net_device *dev,
 		return 0;
 	}
 
-	if (sdata->type == IEEE80211_IF_TYPE_AP) {
+	if (sdata->vif.type == IEEE80211_IF_TYPE_AP) {
 		memcpy(sdata->u.ap.ssid, ssid, len);
 		memset(sdata->u.ap.ssid + len, 0,
 		       IEEE80211_MAX_SSID_LEN - len);
@@ -428,8 +428,8 @@ static int ieee80211_ioctl_giwessid(struct net_device *dev,
 
 	struct ieee80211_sub_if_data *sdata;
 	sdata = IEEE80211_DEV_TO_SUB_IF(dev);
-	if (sdata->type == IEEE80211_IF_TYPE_STA ||
-	    sdata->type == IEEE80211_IF_TYPE_IBSS) {
+	if (sdata->vif.type == IEEE80211_IF_TYPE_STA ||
+	    sdata->vif.type == IEEE80211_IF_TYPE_IBSS) {
 		int res = ieee80211_sta_get_ssid(dev, ssid, &len);
 		if (res == 0) {
 			data->length = len;
@@ -439,7 +439,7 @@ static int ieee80211_ioctl_giwessid(struct net_device *dev,
 		return res;
 	}
 
-	if (sdata->type == IEEE80211_IF_TYPE_AP) {
+	if (sdata->vif.type == IEEE80211_IF_TYPE_AP) {
 		len = sdata->u.ap.ssid_len;
 		if (len > IW_ESSID_MAX_SIZE)
 			len = IW_ESSID_MAX_SIZE;
@@ -459,8 +459,8 @@ static int ieee80211_ioctl_siwap(struct net_device *dev,
 	struct ieee80211_sub_if_data *sdata;
 
 	sdata = IEEE80211_DEV_TO_SUB_IF(dev);
-	if (sdata->type == IEEE80211_IF_TYPE_STA ||
-	    sdata->type == IEEE80211_IF_TYPE_IBSS) {
+	if (sdata->vif.type == IEEE80211_IF_TYPE_STA ||
+	    sdata->vif.type == IEEE80211_IF_TYPE_IBSS) {
 		int ret;
 		if (sdata->flags & IEEE80211_SDATA_USERSPACE_MLME) {
 			memcpy(sdata->u.sta.bssid, (u8 *) &ap_addr->sa_data,
@@ -479,7 +479,7 @@ static int ieee80211_ioctl_siwap(struct net_device *dev,
 			return ret;
 		ieee80211_sta_req_auth(dev, &sdata->u.sta);
 		return 0;
-	} else if (sdata->type == IEEE80211_IF_TYPE_WDS) {
+	} else if (sdata->vif.type == IEEE80211_IF_TYPE_WDS) {
 		if (memcmp(sdata->u.wds.remote_addr, (u8 *) &ap_addr->sa_data,
 			   ETH_ALEN) == 0)
 			return 0;
@@ -497,12 +497,12 @@ static int ieee80211_ioctl_giwap(struct net_device *dev,
 	struct ieee80211_sub_if_data *sdata;
 
 	sdata = IEEE80211_DEV_TO_SUB_IF(dev);
-	if (sdata->type == IEEE80211_IF_TYPE_STA ||
-	    sdata->type == IEEE80211_IF_TYPE_IBSS) {
+	if (sdata->vif.type == IEEE80211_IF_TYPE_STA ||
+	    sdata->vif.type == IEEE80211_IF_TYPE_IBSS) {
 		ap_addr->sa_family = ARPHRD_ETHER;
 		memcpy(&ap_addr->sa_data, sdata->u.sta.bssid, ETH_ALEN);
 		return 0;
-	} else if (sdata->type == IEEE80211_IF_TYPE_WDS) {
+	} else if (sdata->vif.type == IEEE80211_IF_TYPE_WDS) {
 		ap_addr->sa_family = ARPHRD_ETHER;
 		memcpy(&ap_addr->sa_data, sdata->u.wds.remote_addr, ETH_ALEN);
 		return 0;
@@ -524,11 +524,10 @@ static int ieee80211_ioctl_siwscan(struct net_device *dev,
 	if (!netif_running(dev))
 		return -ENETDOWN;
 
-	if (sdata->type != IEEE80211_IF_TYPE_STA &&
-	    sdata->type != IEEE80211_IF_TYPE_IBSS &&
-	    sdata->type != IEEE80211_IF_TYPE_AP) {
+	if (sdata->vif.type != IEEE80211_IF_TYPE_STA &&
+	    sdata->vif.type != IEEE80211_IF_TYPE_IBSS &&
+	    sdata->vif.type != IEEE80211_IF_TYPE_AP)
 		return -EOPNOTSUPP;
-	}
 
 	/* if SSID was specified explicitly then use that */
 	if (wrqu->data.length == sizeof(struct iw_scan_req) &&
@@ -606,7 +605,7 @@ static int ieee80211_ioctl_giwrate(struct net_device *dev,
 	struct ieee80211_sub_if_data *sdata;
 
 	sdata = IEEE80211_DEV_TO_SUB_IF(dev);
-	if (sdata->type == IEEE80211_IF_TYPE_STA)
+	if (sdata->vif.type == IEEE80211_IF_TYPE_STA)
 		sta = sta_info_get(local, sdata->u.sta.bssid);
 	else
 		return -EOPNOTSUPP;
@@ -820,8 +819,8 @@ static int ieee80211_ioctl_siwmlme(struct net_device *dev,
 	struct iw_mlme *mlme = (struct iw_mlme *) extra;
 
 	sdata = IEEE80211_DEV_TO_SUB_IF(dev);
-	if (sdata->type != IEEE80211_IF_TYPE_STA &&
-	    sdata->type != IEEE80211_IF_TYPE_IBSS)
+	if (sdata->vif.type != IEEE80211_IF_TYPE_STA &&
+	    sdata->vif.type != IEEE80211_IF_TYPE_IBSS)
 		return -EINVAL;
 
 	switch (mlme->cmd) {
@@ -938,7 +937,7 @@ static int ieee80211_ioctl_siwauth(struct net_device *dev,
 		sdata->drop_unencrypted = !!data->value;
 		break;
 	case IW_AUTH_PRIVACY_INVOKED:
-		if (sdata->type != IEEE80211_IF_TYPE_STA)
+		if (sdata->vif.type != IEEE80211_IF_TYPE_STA)
 			ret = -EINVAL;
 		else {
 			sdata->u.sta.flags &= ~IEEE80211_STA_PRIVACY_INVOKED;
@@ -953,8 +952,8 @@ static int ieee80211_ioctl_siwauth(struct net_device *dev,
 		}
 		break;
 	case IW_AUTH_80211_AUTH_ALG:
-		if (sdata->type == IEEE80211_IF_TYPE_STA ||
-		    sdata->type == IEEE80211_IF_TYPE_IBSS)
+		if (sdata->vif.type == IEEE80211_IF_TYPE_STA ||
+		    sdata->vif.type == IEEE80211_IF_TYPE_IBSS)
 			sdata->u.sta.auth_algs = data->value;
 		else
 			ret = -EOPNOTSUPP;
@@ -974,8 +973,8 @@ static struct iw_statistics *ieee80211_get_wireless_stats(struct net_device *dev
 	struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
 	struct sta_info *sta = NULL;
 
-	if (sdata->type == IEEE80211_IF_TYPE_STA ||
-	    sdata->type == IEEE80211_IF_TYPE_IBSS)
+	if (sdata->vif.type == IEEE80211_IF_TYPE_STA ||
+	    sdata->vif.type == IEEE80211_IF_TYPE_IBSS)
 		sta = sta_info_get(local, sdata->u.sta.bssid);
 	if (!sta) {
 		wstats->discard.fragment = 0;
@@ -1003,8 +1002,8 @@ static int ieee80211_ioctl_giwauth(struct net_device *dev,
 
 	switch (data->flags & IW_AUTH_INDEX) {
 	case IW_AUTH_80211_AUTH_ALG:
-		if (sdata->type == IEEE80211_IF_TYPE_STA ||
-		    sdata->type == IEEE80211_IF_TYPE_IBSS)
+		if (sdata->vif.type == IEEE80211_IF_TYPE_STA ||
+		    sdata->vif.type == IEEE80211_IF_TYPE_IBSS)
 			data->value = sdata->u.sta.auth_algs;
 		else
 			ret = -EOPNOTSUPP;
diff --git a/net/mac80211/ieee80211_sta.c b/net/mac80211/ieee80211_sta.c
index d1f7199a208..b1e7d17ee25 100644
--- a/net/mac80211/ieee80211_sta.c
+++ b/net/mac80211/ieee80211_sta.c
@@ -471,7 +471,7 @@ static void ieee80211_set_associated(struct net_device *dev,
 		ifsta->flags |= IEEE80211_STA_ASSOCIATED;
 
 		sdata = IEEE80211_DEV_TO_SUB_IF(dev);
-		if (sdata->type != IEEE80211_IF_TYPE_STA)
+		if (sdata->vif.type != IEEE80211_IF_TYPE_STA)
 			return;
 
 		bss = ieee80211_rx_bss_get(dev, ifsta->bssid,
@@ -1025,7 +1025,7 @@ static void ieee80211_send_addba_resp(struct net_device *dev, u8 *da, u16 tid,
 	memset(mgmt, 0, 24);
 	memcpy(mgmt->da, da, ETH_ALEN);
 	memcpy(mgmt->sa, dev->dev_addr, ETH_ALEN);
-	if (sdata->type == IEEE80211_IF_TYPE_AP)
+	if (sdata->vif.type == IEEE80211_IF_TYPE_AP)
 		memcpy(mgmt->bssid, dev->dev_addr, ETH_ALEN);
 	else
 		memcpy(mgmt->bssid, ifsta->bssid, ETH_ALEN);
@@ -1184,7 +1184,7 @@ void ieee80211_send_delba(struct net_device *dev, const u8 *da, u16 tid,
 	memset(mgmt, 0, 24);
 	memcpy(mgmt->da, da, ETH_ALEN);
 	memcpy(mgmt->sa, dev->dev_addr, ETH_ALEN);
-	if (sdata->type == IEEE80211_IF_TYPE_AP)
+	if (sdata->vif.type == IEEE80211_IF_TYPE_AP)
 		memcpy(mgmt->bssid, dev->dev_addr, ETH_ALEN);
 	else
 		memcpy(mgmt->bssid, ifsta->bssid, ETH_ALEN);
@@ -1329,7 +1329,7 @@ static void ieee80211_rx_mgmt_auth(struct net_device *dev,
 	DECLARE_MAC_BUF(mac);
 
 	if (ifsta->state != IEEE80211_AUTHENTICATE &&
-	    sdata->type != IEEE80211_IF_TYPE_IBSS) {
+	    sdata->vif.type != IEEE80211_IF_TYPE_IBSS) {
 		printk(KERN_DEBUG "%s: authentication frame received from "
 		       "%s, but not in authenticate state - ignored\n",
 		       dev->name, print_mac(mac, mgmt->sa));
@@ -1343,7 +1343,7 @@ static void ieee80211_rx_mgmt_auth(struct net_device *dev,
 		return;
 	}
 
-	if (sdata->type != IEEE80211_IF_TYPE_IBSS &&
+	if (sdata->vif.type != IEEE80211_IF_TYPE_IBSS &&
 	    memcmp(ifsta->bssid, mgmt->sa, ETH_ALEN) != 0) {
 		printk(KERN_DEBUG "%s: authentication frame received from "
 		       "unknown AP (SA=%s BSSID=%s) - "
@@ -1352,7 +1352,7 @@ static void ieee80211_rx_mgmt_auth(struct net_device *dev,
 		return;
 	}
 
-	if (sdata->type != IEEE80211_IF_TYPE_IBSS &&
+	if (sdata->vif.type != IEEE80211_IF_TYPE_IBSS &&
 	    memcmp(ifsta->bssid, mgmt->bssid, ETH_ALEN) != 0) {
 		printk(KERN_DEBUG "%s: authentication frame received from "
 		       "unknown BSSID (SA=%s BSSID=%s) - "
@@ -1370,7 +1370,7 @@ static void ieee80211_rx_mgmt_auth(struct net_device *dev,
 	       dev->name, print_mac(mac, mgmt->sa), auth_alg,
 	       auth_transaction, status_code);
 
-	if (sdata->type == IEEE80211_IF_TYPE_IBSS) {
+	if (sdata->vif.type == IEEE80211_IF_TYPE_IBSS) {
 		/* IEEE 802.11 standard does not require authentication in IBSS
 		 * networks and most implementations do not seem to use it.
 		 * However, try to reply to authentication attempts if someone
@@ -1849,7 +1849,7 @@ static void ieee80211_rx_bss_info(struct net_device *dev,
 
 	timestamp = le64_to_cpu(mgmt->u.beacon.timestamp);
 
-	if (sdata->type == IEEE80211_IF_TYPE_IBSS && beacon &&
+	if (sdata->vif.type == IEEE80211_IF_TYPE_IBSS && beacon &&
 	    memcmp(mgmt->bssid, sdata->u.sta.bssid, ETH_ALEN) == 0) {
 #ifdef CONFIG_MAC80211_IBSS_DEBUG
 		static unsigned long last_tsf_debug = 0;
@@ -1874,7 +1874,7 @@ static void ieee80211_rx_bss_info(struct net_device *dev,
 
 	ieee802_11_parse_elems(mgmt->u.beacon.variable, len - baselen, &elems);
 
-	if (sdata->type == IEEE80211_IF_TYPE_IBSS && elems.supp_rates &&
+	if (sdata->vif.type == IEEE80211_IF_TYPE_IBSS && elems.supp_rates &&
 	    memcmp(mgmt->bssid, sdata->u.sta.bssid, ETH_ALEN) == 0 &&
 	    (sta = sta_info_get(local, mgmt->sa))) {
 		struct ieee80211_hw_mode *mode;
@@ -2103,7 +2103,7 @@ static void ieee80211_rx_mgmt_beacon(struct net_device *dev,
 	ieee80211_rx_bss_info(dev, mgmt, len, rx_status, 1);
 
 	sdata = IEEE80211_DEV_TO_SUB_IF(dev);
-	if (sdata->type != IEEE80211_IF_TYPE_STA)
+	if (sdata->vif.type != IEEE80211_IF_TYPE_STA)
 		return;
 	ifsta = &sdata->u.sta;
 
@@ -2163,7 +2163,7 @@ static void ieee80211_rx_mgmt_probe_req(struct net_device *dev,
 	DECLARE_MAC_BUF(mac3);
 #endif
 
-	if (sdata->type != IEEE80211_IF_TYPE_IBSS ||
+	if (sdata->vif.type != IEEE80211_IF_TYPE_IBSS ||
 	    ifsta->state != IEEE80211_IBSS_JOINED ||
 	    len < 24 + 2 || !ifsta->probe_resp)
 		return;
@@ -2474,10 +2474,10 @@ void ieee80211_sta_work(struct work_struct *work)
 	if (local->sta_sw_scanning || local->sta_hw_scanning)
 		return;
 
-	if (sdata->type != IEEE80211_IF_TYPE_STA &&
-	    sdata->type != IEEE80211_IF_TYPE_IBSS) {
+	if (sdata->vif.type != IEEE80211_IF_TYPE_STA &&
+	    sdata->vif.type != IEEE80211_IF_TYPE_IBSS) {
 		printk(KERN_DEBUG "%s: ieee80211_sta_work: non-STA interface "
-		       "(type=%d)\n", dev->name, sdata->type);
+		       "(type=%d)\n", dev->name, sdata->vif.type);
 		return;
 	}
 	ifsta = &sdata->u.sta;
@@ -2572,7 +2572,7 @@ void ieee80211_sta_req_auth(struct net_device *dev,
 	struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr);
 	struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
 
-	if (sdata->type != IEEE80211_IF_TYPE_STA)
+	if (sdata->vif.type != IEEE80211_IF_TYPE_STA)
 		return;
 
 	if ((ifsta->flags & (IEEE80211_STA_BSSID_SET |
@@ -3039,7 +3039,7 @@ int ieee80211_sta_set_ssid(struct net_device *dev, char *ssid, size_t len)
 		ifsta->flags |= IEEE80211_STA_SSID_SET;
 	else
 		ifsta->flags &= ~IEEE80211_STA_SSID_SET;
-	if (sdata->type == IEEE80211_IF_TYPE_IBSS &&
+	if (sdata->vif.type == IEEE80211_IF_TYPE_IBSS &&
 	    !(ifsta->flags & IEEE80211_STA_BSSID_SET)) {
 		ifsta->ibss_join_req = jiffies;
 		ifsta->state = IEEE80211_IBSS_SEARCH;
@@ -3157,7 +3157,7 @@ void ieee80211_scan_completed(struct ieee80211_hw *hw)
 		if (sdata->dev == local->mdev)
 			continue;
 
-		if (sdata->type == IEEE80211_IF_TYPE_STA) {
+		if (sdata->vif.type == IEEE80211_IF_TYPE_STA) {
 			if (sdata->u.sta.flags & IEEE80211_STA_ASSOCIATED)
 				ieee80211_send_nullfunc(local, sdata, 0);
 			ieee80211_sta_timer((unsigned long)sdata);
@@ -3169,7 +3169,7 @@ void ieee80211_scan_completed(struct ieee80211_hw *hw)
 
 done:
 	sdata = IEEE80211_DEV_TO_SUB_IF(dev);
-	if (sdata->type == IEEE80211_IF_TYPE_IBSS) {
+	if (sdata->vif.type == IEEE80211_IF_TYPE_IBSS) {
 		struct ieee80211_if_sta *ifsta = &sdata->u.sta;
 		if (!(ifsta->flags & IEEE80211_STA_BSSID_SET) ||
 		    (!ifsta->state == IEEE80211_IBSS_JOINED &&
@@ -3204,7 +3204,7 @@ void ieee80211_sta_scan_work(struct work_struct *work)
 		skip = !(local->enabled_modes & (1 << mode->mode));
 		chan = &mode->channels[local->scan_channel_idx];
 		if (!(chan->flag & IEEE80211_CHAN_W_SCAN) ||
-		    (sdata->type == IEEE80211_IF_TYPE_IBSS &&
+		    (sdata->vif.type == IEEE80211_IF_TYPE_IBSS &&
 		     !(chan->flag & IEEE80211_CHAN_W_IBSS)) ||
 		    (local->hw_modes & local->enabled_modes &
 		     (1 << MODE_IEEE80211G) && mode->mode == MODE_IEEE80211B))
@@ -3312,7 +3312,7 @@ static int ieee80211_sta_start_scan(struct net_device *dev,
 			continue;
 
 		netif_stop_queue(sdata->dev);
-		if (sdata->type == IEEE80211_IF_TYPE_STA &&
+		if (sdata->vif.type == IEEE80211_IF_TYPE_STA &&
 		    (sdata->u.sta.flags & IEEE80211_STA_ASSOCIATED))
 			ieee80211_send_nullfunc(local, sdata, 1);
 	}
@@ -3353,7 +3353,7 @@ int ieee80211_sta_req_scan(struct net_device *dev, u8 *ssid, size_t ssid_len)
 	struct ieee80211_if_sta *ifsta = &sdata->u.sta;
 	struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr);
 
-	if (sdata->type != IEEE80211_IF_TYPE_STA)
+	if (sdata->vif.type != IEEE80211_IF_TYPE_STA)
 		return ieee80211_sta_start_scan(dev, ssid, ssid_len);
 
 	if (local->sta_sw_scanning || local->sta_hw_scanning) {
@@ -3576,8 +3576,8 @@ int ieee80211_sta_deauthenticate(struct net_device *dev, u16 reason)
 	printk(KERN_DEBUG "%s: deauthenticate(reason=%d)\n",
 	       dev->name, reason);
 
-	if (sdata->type != IEEE80211_IF_TYPE_STA &&
-	    sdata->type != IEEE80211_IF_TYPE_IBSS)
+	if (sdata->vif.type != IEEE80211_IF_TYPE_STA &&
+	    sdata->vif.type != IEEE80211_IF_TYPE_IBSS)
 		return -EINVAL;
 
 	ieee80211_send_deauth(dev, ifsta, reason);
@@ -3594,7 +3594,7 @@ int ieee80211_sta_disassociate(struct net_device *dev, u16 reason)
 	printk(KERN_DEBUG "%s: disassociate(reason=%d)\n",
 	       dev->name, reason);
 
-	if (sdata->type != IEEE80211_IF_TYPE_STA)
+	if (sdata->vif.type != IEEE80211_IF_TYPE_STA)
 		return -EINVAL;
 
 	if (!(ifsta->flags & IEEE80211_STA_ASSOCIATED))
diff --git a/net/mac80211/key.c b/net/mac80211/key.c
index 0b2328f7d67..ed57fb8e82f 100644
--- a/net/mac80211/key.c
+++ b/net/mac80211/key.c
@@ -49,8 +49,8 @@ static const u8 *get_mac_for_key(struct ieee80211_key *key)
 	 * address to indicate a transmit-only key.
 	 */
 	if (key->conf.alg != ALG_WEP &&
-	    (key->sdata->type == IEEE80211_IF_TYPE_AP ||
-	     key->sdata->type == IEEE80211_IF_TYPE_VLAN))
+	    (key->sdata->vif.type == IEEE80211_IF_TYPE_AP ||
+	     key->sdata->vif.type == IEEE80211_IF_TYPE_VLAN))
 		addr = zero_addr;
 
 	if (key->sta)
@@ -172,7 +172,7 @@ struct ieee80211_key *ieee80211_key_alloc(struct ieee80211_sub_if_data *sdata,
 		if (sta->flags & WLAN_STA_WME)
 			key->conf.flags |= IEEE80211_KEY_FLAG_WMM_STA;
 	} else {
-		if (sdata->type == IEEE80211_IF_TYPE_STA) {
+		if (sdata->vif.type == IEEE80211_IF_TYPE_STA) {
 			struct sta_info *ap;
 
 			/* same here, the AP could be using QoS */
diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index ed3b8163920..465fce0f62a 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -223,7 +223,7 @@ ieee80211_rx_monitor(struct ieee80211_local *local, struct sk_buff *origskb,
 		if (!netif_running(sdata->dev))
 			continue;
 
-		if (sdata->type != IEEE80211_IF_TYPE_MNTR)
+		if (sdata->vif.type != IEEE80211_IF_TYPE_MNTR)
 			continue;
 
 		if (prev_dev) {
@@ -419,7 +419,7 @@ ieee80211_rx_h_check(struct ieee80211_txrx_data *rx)
 	if (unlikely(((rx->fc & IEEE80211_FCTL_FTYPE) == IEEE80211_FTYPE_DATA ||
 		      ((rx->fc & IEEE80211_FCTL_FTYPE) == IEEE80211_FTYPE_CTL &&
 		       (rx->fc & IEEE80211_FCTL_STYPE) == IEEE80211_STYPE_PSPOLL)) &&
-		     rx->sdata->type != IEEE80211_IF_TYPE_IBSS &&
+		     rx->sdata->vif.type != IEEE80211_IF_TYPE_IBSS &&
 		     (!rx->sta || !(rx->sta->flags & WLAN_STA_ASSOC)))) {
 		if ((!(rx->fc & IEEE80211_FCTL_FROMDS) &&
 		     !(rx->fc & IEEE80211_FCTL_TODS) &&
@@ -639,14 +639,14 @@ ieee80211_rx_h_sta_process(struct ieee80211_txrx_data *rx)
 	/* Update last_rx only for IBSS packets which are for the current
 	 * BSSID to avoid keeping the current IBSS network alive in cases where
 	 * other STAs are using different BSSID. */
-	if (rx->sdata->type == IEEE80211_IF_TYPE_IBSS) {
+	if (rx->sdata->vif.type == IEEE80211_IF_TYPE_IBSS) {
 		u8 *bssid = ieee80211_get_bssid(hdr, rx->skb->len,
 						IEEE80211_IF_TYPE_IBSS);
 		if (compare_ether_addr(bssid, rx->sdata->u.sta.bssid) == 0)
 			sta->last_rx = jiffies;
 	} else
 	if (!is_multicast_ether_addr(hdr->addr1) ||
-	    rx->sdata->type == IEEE80211_IF_TYPE_STA) {
+	    rx->sdata->vif.type == IEEE80211_IF_TYPE_STA) {
 		/* Update last_rx only for unicast frames in order to prevent
 		 * the Probe Request frames (the only broadcast frames from a
 		 * STA in infrastructure mode) from keeping a connection alive.
@@ -901,8 +901,8 @@ ieee80211_rx_h_ps_poll(struct ieee80211_txrx_data *rx)
 		   !(rx->flags & IEEE80211_TXRXD_RXRA_MATCH)))
 		return TXRX_CONTINUE;
 
-	if ((sdata->type != IEEE80211_IF_TYPE_AP) &&
-	    (sdata->type != IEEE80211_IF_TYPE_VLAN))
+	if ((sdata->vif.type != IEEE80211_IF_TYPE_AP) &&
+	    (sdata->vif.type != IEEE80211_IF_TYPE_VLAN))
 		return TXRX_DROP;
 
 	skb = skb_dequeue(&rx->sta->tx_filtered);
@@ -1058,8 +1058,8 @@ ieee80211_data_to_8023(struct ieee80211_txrx_data *rx)
 		memcpy(dst, hdr->addr3, ETH_ALEN);
 		memcpy(src, hdr->addr2, ETH_ALEN);
 
-		if (unlikely(sdata->type != IEEE80211_IF_TYPE_AP &&
-			     sdata->type != IEEE80211_IF_TYPE_VLAN)) {
+		if (unlikely(sdata->vif.type != IEEE80211_IF_TYPE_AP &&
+			     sdata->vif.type != IEEE80211_IF_TYPE_VLAN)) {
 			if (net_ratelimit())
 				printk(KERN_DEBUG "%s: dropped ToDS frame "
 				       "(BSSID=%s SA=%s DA=%s)\n",
@@ -1075,7 +1075,7 @@ ieee80211_data_to_8023(struct ieee80211_txrx_data *rx)
 		memcpy(dst, hdr->addr3, ETH_ALEN);
 		memcpy(src, hdr->addr4, ETH_ALEN);
 
-		if (unlikely(sdata->type != IEEE80211_IF_TYPE_WDS)) {
+		if (unlikely(sdata->vif.type != IEEE80211_IF_TYPE_WDS)) {
 			if (net_ratelimit())
 				printk(KERN_DEBUG "%s: dropped FromDS&ToDS "
 				       "frame (RA=%s TA=%s DA=%s SA=%s)\n",
@@ -1092,7 +1092,7 @@ ieee80211_data_to_8023(struct ieee80211_txrx_data *rx)
 		memcpy(dst, hdr->addr1, ETH_ALEN);
 		memcpy(src, hdr->addr3, ETH_ALEN);
 
-		if (sdata->type != IEEE80211_IF_TYPE_STA ||
+		if (sdata->vif.type != IEEE80211_IF_TYPE_STA ||
 		    (is_multicast_ether_addr(dst) &&
 		     !compare_ether_addr(src, dev->dev_addr)))
 			return -1;
@@ -1102,7 +1102,7 @@ ieee80211_data_to_8023(struct ieee80211_txrx_data *rx)
 		memcpy(dst, hdr->addr1, ETH_ALEN);
 		memcpy(src, hdr->addr2, ETH_ALEN);
 
-		if (sdata->type != IEEE80211_IF_TYPE_IBSS) {
+		if (sdata->vif.type != IEEE80211_IF_TYPE_IBSS) {
 			if (net_ratelimit()) {
 				printk(KERN_DEBUG "%s: dropped IBSS frame "
 				       "(DA=%s SA=%s BSSID=%s)\n",
@@ -1190,8 +1190,8 @@ ieee80211_deliver_skb(struct ieee80211_txrx_data *rx)
 	skb = rx->skb;
 	xmit_skb = NULL;
 
-	if (local->bridge_packets && (sdata->type == IEEE80211_IF_TYPE_AP ||
-				      sdata->type == IEEE80211_IF_TYPE_VLAN) &&
+	if (local->bridge_packets && (sdata->vif.type == IEEE80211_IF_TYPE_AP ||
+				      sdata->vif.type == IEEE80211_IF_TYPE_VLAN) &&
 	    (rx->flags & IEEE80211_TXRXD_RXRA_MATCH)) {
 		if (is_multicast_ether_addr(ehdr->h_dest)) {
 			/*
@@ -1435,8 +1435,8 @@ ieee80211_rx_h_mgmt(struct ieee80211_txrx_data *rx)
 		return TXRX_DROP;
 
 	sdata = IEEE80211_DEV_TO_SUB_IF(rx->dev);
-	if ((sdata->type == IEEE80211_IF_TYPE_STA ||
-	     sdata->type == IEEE80211_IF_TYPE_IBSS) &&
+	if ((sdata->vif.type == IEEE80211_IF_TYPE_STA ||
+	     sdata->vif.type == IEEE80211_IF_TYPE_IBSS) &&
 	    !(sdata->flags & IEEE80211_SDATA_USERSPACE_MLME))
 		ieee80211_sta_rx_mgmt(rx->dev, rx->skb, rx->u.rx.status);
 	else
@@ -1528,7 +1528,7 @@ static void ieee80211_rx_michael_mic_report(struct net_device *dev,
 		goto ignore;
 	}
 
-	if (rx->sdata->type == IEEE80211_IF_TYPE_AP && keyidx) {
+	if (rx->sdata->vif.type == IEEE80211_IF_TYPE_AP && keyidx) {
 		/*
 		 * APs with pairwise keys should never receive Michael MIC
 		 * errors for non-zero keyidx because these are reserved for
@@ -1590,7 +1590,7 @@ static int prepare_for_handlers(struct ieee80211_sub_if_data *sdata,
 {
 	int multicast = is_multicast_ether_addr(hdr->addr1);
 
-	switch (sdata->type) {
+	switch (sdata->vif.type) {
 	case IEEE80211_IF_TYPE_STA:
 		if (!bssid)
 			return 0;
@@ -1738,10 +1738,10 @@ void __ieee80211_rx_handle_packet(struct ieee80211_hw *hw, struct sk_buff *skb,
 		if (!netif_running(sdata->dev))
 			continue;
 
-		if (sdata->type == IEEE80211_IF_TYPE_MNTR)
+		if (sdata->vif.type == IEEE80211_IF_TYPE_MNTR)
 			continue;
 
-		bssid = ieee80211_get_bssid(hdr, skb->len, sdata->type);
+		bssid = ieee80211_get_bssid(hdr, skb->len, sdata->vif.type);
 		rx.flags |= IEEE80211_TXRXD_RXRA_MATCH;
 		prepares = prepare_for_handlers(sdata, bssid, &rx, hdr);
 		/* prepare_for_handlers can change sta */
diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c
index 32e24176472..1f74bd29635 100644
--- a/net/mac80211/sta_info.c
+++ b/net/mac80211/sta_info.c
@@ -181,7 +181,7 @@ struct sta_info * sta_info_add(struct ieee80211_local *local,
 		struct ieee80211_sub_if_data *sdata;
 
 		sdata = IEEE80211_DEV_TO_SUB_IF(dev);
-		if (sdata->type == IEEE80211_IF_TYPE_VLAN)
+		if (sdata->vif.type == IEEE80211_IF_TYPE_VLAN)
 			sdata = sdata->u.vlan.ap;
 
 		local->ops->sta_notify(local_to_hw(local), &sdata->vif,
@@ -259,7 +259,7 @@ void sta_info_free(struct sta_info *sta)
 
 		sdata = IEEE80211_DEV_TO_SUB_IF(sta->dev);
 
-		if (sdata->type == IEEE80211_IF_TYPE_VLAN)
+		if (sdata->vif.type == IEEE80211_IF_TYPE_VLAN)
 			sdata = sdata->u.vlan.ap;
 
 		local->ops->sta_notify(local_to_hw(local), &sdata->vif,
diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index 1b772ee2fe1..99590e4ce30 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -237,7 +237,7 @@ ieee80211_tx_h_check_assoc(struct ieee80211_txrx_data *tx)
 
 	if (likely(tx->flags & IEEE80211_TXRXD_TXUNICAST)) {
 		if (unlikely(!(sta_flags & WLAN_STA_ASSOC) &&
-			     tx->sdata->type != IEEE80211_IF_TYPE_IBSS &&
+			     tx->sdata->vif.type != IEEE80211_IF_TYPE_IBSS &&
 			     (tx->fc & IEEE80211_FCTL_FTYPE) == IEEE80211_FTYPE_DATA)) {
 #ifdef CONFIG_MAC80211_VERBOSE_DEBUG
 			DECLARE_MAC_BUF(mac);
@@ -251,7 +251,7 @@ ieee80211_tx_h_check_assoc(struct ieee80211_txrx_data *tx)
 	} else {
 		if (unlikely((tx->fc & IEEE80211_FCTL_FTYPE) == IEEE80211_FTYPE_DATA &&
 			     tx->local->num_sta == 0 &&
-			     tx->sdata->type != IEEE80211_IF_TYPE_IBSS)) {
+			     tx->sdata->vif.type != IEEE80211_IF_TYPE_IBSS)) {
 			/*
 			 * No associated STAs - no need to send multicast
 			 * frames.
@@ -294,7 +294,7 @@ static void purge_old_ps_buffers(struct ieee80211_local *local)
 	list_for_each_entry_rcu(sdata, &local->interfaces, list) {
 		struct ieee80211_if_ap *ap;
 		if (sdata->dev == local->mdev ||
-		    sdata->type != IEEE80211_IF_TYPE_AP)
+		    sdata->vif.type != IEEE80211_IF_TYPE_AP)
 			continue;
 		ap = &sdata->u.ap;
 		skb = skb_dequeue(&ap->ps_bc_buf);
@@ -949,7 +949,7 @@ __ieee80211_tx_prepare(struct ieee80211_txrx_data *tx,
 
 	/* process and remove the injection radiotap header */
 	sdata = IEEE80211_DEV_TO_SUB_IF(dev);
-	if (unlikely(sdata->type == IEEE80211_IF_TYPE_MNTR)) {
+	if (unlikely(sdata->vif.type == IEEE80211_IF_TYPE_MNTR)) {
 		if (__ieee80211_parse_tx_radiotap(tx, skb) == TXRX_DROP)
 			return TXRX_DROP;
 
@@ -1252,7 +1252,7 @@ int ieee80211_master_start_xmit(struct sk_buff *skb,
 	}
 
 	control.vif = &osdata->vif;
-	control.type = osdata->type;
+	control.type = osdata->vif.type;
 	if (pkt_data->flags & IEEE80211_TXPD_REQ_TX_STATUS)
 		control.flags |= IEEE80211_TXCTL_REQ_TX_STATUS;
 	if (pkt_data->flags & IEEE80211_TXPD_DO_NOT_ENCRYPT)
@@ -1371,7 +1371,7 @@ int ieee80211_subif_start_xmit(struct sk_buff *skb,
 	ethertype = (skb->data[12] << 8) | skb->data[13];
 	fc = IEEE80211_FTYPE_DATA | IEEE80211_STYPE_DATA;
 
-	switch (sdata->type) {
+	switch (sdata->vif.type) {
 	case IEEE80211_IF_TYPE_AP:
 	case IEEE80211_IF_TYPE_VLAN:
 		fc |= IEEE80211_FCTL_FROMDS;
@@ -1707,7 +1707,7 @@ struct sk_buff *ieee80211_beacon_get(struct ieee80211_hw *hw,
 	bdev = sdata->dev;
 	ap = &sdata->u.ap;
 
-	if (!ap || sdata->type != IEEE80211_IF_TYPE_AP ||
+	if (!ap || sdata->vif.type != IEEE80211_IF_TYPE_AP ||
 	    !ap->beacon_head) {
 #ifdef CONFIG_MAC80211_VERBOSE_DEBUG
 		if (net_ratelimit())
@@ -1819,7 +1819,8 @@ ieee80211_get_buffered_bc(struct ieee80211_hw *hw,
 	sdata = vif_to_sdata(vif);
 	bdev = sdata->dev;
 
-	if (!bss || sdata->type != IEEE80211_IF_TYPE_AP || !bss->beacon_head)
+	if (!bss || sdata->vif.type != IEEE80211_IF_TYPE_AP ||
+	    !bss->beacon_head)
 		return NULL;
 
 	if (bss->dtim_count != 0)
diff --git a/net/mac80211/util.c b/net/mac80211/util.c
index 15503ca3e94..ba81cf54281 100644
--- a/net/mac80211/util.c
+++ b/net/mac80211/util.c
@@ -471,7 +471,7 @@ void ieee80211_iterate_active_interfaces(
 	rcu_read_lock();
 
 	list_for_each_entry_rcu(sdata, &local->interfaces, list) {
-		switch (sdata->type) {
+		switch (sdata->vif.type) {
 		case IEEE80211_IF_TYPE_INVALID:
 		case IEEE80211_IF_TYPE_MNTR:
 		case IEEE80211_IF_TYPE_VLAN:
-- 
cgit v1.2.3


From 5dfdaf58d61f06a458529430c24b1191ea4d1a27 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Wed, 19 Dec 2007 02:03:33 +0100
Subject: mac80211: add beacon configuration via cfg80211

This patch implements the cfg80211 hooks for configuring beaconing
on an access point interface in mac80211. While doing so, it fixes
a number of races that could badly crash the machine when the
beacon is changed while being requested by the driver.

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/cfg.c             | 156 +++++++++++++++++++++++++++++++++++++++++
 net/mac80211/debugfs_netdev.c  |  27 -------
 net/mac80211/ieee80211.c       |   9 ++-
 net/mac80211/ieee80211_i.h     |  14 ++--
 net/mac80211/ieee80211_iface.c |   4 +-
 net/mac80211/tx.c              |  65 ++++++++++-------
 6 files changed, 213 insertions(+), 62 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index d02d9ef6b1e..5a4c6edd934 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -10,6 +10,7 @@
 #include <linux/nl80211.h>
 #include <linux/rtnetlink.h>
 #include <net/net_namespace.h>
+#include <linux/rcupdate.h>
 #include <net/cfg80211.h>
 #include "ieee80211_i.h"
 #include "cfg.h"
@@ -294,6 +295,158 @@ static int ieee80211_get_station(struct wiphy *wiphy, struct net_device *dev,
 	return 0;
 }
 
+/*
+ * This handles both adding a beacon and setting new beacon info
+ */
+static int ieee80211_config_beacon(struct ieee80211_sub_if_data *sdata,
+				   struct beacon_parameters *params)
+{
+	struct beacon_data *new, *old;
+	int new_head_len, new_tail_len;
+	int size;
+	int err = -EINVAL;
+
+	old = sdata->u.ap.beacon;
+
+	/* head must not be zero-length */
+	if (params->head && !params->head_len)
+		return -EINVAL;
+
+	/*
+	 * This is a kludge. beacon interval should really be part
+	 * of the beacon information.
+	 */
+	if (params->interval) {
+		sdata->local->hw.conf.beacon_int = params->interval;
+		if (ieee80211_hw_config(sdata->local))
+			return -EINVAL;
+		/*
+		 * We updated some parameter so if below bails out
+		 * it's not an error.
+		 */
+		err = 0;
+	}
+
+	/* Need to have a beacon head if we don't have one yet */
+	if (!params->head && !old)
+		return err;
+
+	/* sorry, no way to start beaconing without dtim period */
+	if (!params->dtim_period && !old)
+		return err;
+
+	/* new or old head? */
+	if (params->head)
+		new_head_len = params->head_len;
+	else
+		new_head_len = old->head_len;
+
+	/* new or old tail? */
+	if (params->tail || !old)
+		/* params->tail_len will be zero for !params->tail */
+		new_tail_len = params->tail_len;
+	else
+		new_tail_len = old->tail_len;
+
+	size = sizeof(*new) + new_head_len + new_tail_len;
+
+	new = kzalloc(size, GFP_KERNEL);
+	if (!new)
+		return -ENOMEM;
+
+	/* start filling the new info now */
+
+	/* new or old dtim period? */
+	if (params->dtim_period)
+		new->dtim_period = params->dtim_period;
+	else
+		new->dtim_period = old->dtim_period;
+
+	/*
+	 * pointers go into the block we allocated,
+	 * memory is | beacon_data | head | tail |
+	 */
+	new->head = ((u8 *) new) + sizeof(*new);
+	new->tail = new->head + new_head_len;
+	new->head_len = new_head_len;
+	new->tail_len = new_tail_len;
+
+	/* copy in head */
+	if (params->head)
+		memcpy(new->head, params->head, new_head_len);
+	else
+		memcpy(new->head, old->head, new_head_len);
+
+	/* copy in optional tail */
+	if (params->tail)
+		memcpy(new->tail, params->tail, new_tail_len);
+	else
+		if (old)
+			memcpy(new->tail, old->tail, new_tail_len);
+
+	rcu_assign_pointer(sdata->u.ap.beacon, new);
+
+	synchronize_rcu();
+
+	kfree(old);
+
+	return ieee80211_if_config_beacon(sdata->dev);
+}
+
+static int ieee80211_add_beacon(struct wiphy *wiphy, struct net_device *dev,
+				struct beacon_parameters *params)
+{
+	struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
+	struct beacon_data *old;
+
+	if (sdata->vif.type != IEEE80211_IF_TYPE_AP)
+		return -EINVAL;
+
+	old = sdata->u.ap.beacon;
+
+	if (old)
+		return -EALREADY;
+
+	return ieee80211_config_beacon(sdata, params);
+}
+
+static int ieee80211_set_beacon(struct wiphy *wiphy, struct net_device *dev,
+				struct beacon_parameters *params)
+{
+	struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
+	struct beacon_data *old;
+
+	if (sdata->vif.type != IEEE80211_IF_TYPE_AP)
+		return -EINVAL;
+
+	old = sdata->u.ap.beacon;
+
+	if (!old)
+		return -ENOENT;
+
+	return ieee80211_config_beacon(sdata, params);
+}
+
+static int ieee80211_del_beacon(struct wiphy *wiphy, struct net_device *dev)
+{
+	struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
+	struct beacon_data *old;
+
+	if (sdata->vif.type != IEEE80211_IF_TYPE_AP)
+		return -EINVAL;
+
+	old = sdata->u.ap.beacon;
+
+	if (!old)
+		return -ENOENT;
+
+	rcu_assign_pointer(sdata->u.ap.beacon, NULL);
+	synchronize_rcu();
+	kfree(old);
+
+	return ieee80211_if_config_beacon(dev);
+}
+
 struct cfg80211_ops mac80211_config_ops = {
 	.add_virtual_intf = ieee80211_add_iface,
 	.del_virtual_intf = ieee80211_del_iface,
@@ -302,5 +455,8 @@ struct cfg80211_ops mac80211_config_ops = {
 	.del_key = ieee80211_del_key,
 	.get_key = ieee80211_get_key,
 	.set_default_key = ieee80211_config_default_key,
+	.add_beacon = ieee80211_add_beacon,
+	.set_beacon = ieee80211_set_beacon,
+	.del_beacon = ieee80211_del_beacon,
 	.get_station = ieee80211_get_station,
 };
diff --git a/net/mac80211/debugfs_netdev.c b/net/mac80211/debugfs_netdev.c
index d2d3c076c85..3500fe0d380 100644
--- a/net/mac80211/debugfs_netdev.c
+++ b/net/mac80211/debugfs_netdev.c
@@ -124,7 +124,6 @@ __IEEE80211_IF_FILE(flags);
 
 /* AP attributes */
 IEEE80211_IF_FILE(num_sta_ps, u.ap.num_sta_ps, ATOMIC);
-IEEE80211_IF_FILE(dtim_period, u.ap.dtim_period, DEC);
 IEEE80211_IF_FILE(dtim_count, u.ap.dtim_count, DEC);
 IEEE80211_IF_FILE(num_beacons, u.ap.num_beacons, DEC);
 IEEE80211_IF_FILE(force_unicast_rateidx, u.ap.force_unicast_rateidx, DEC);
@@ -138,26 +137,6 @@ static ssize_t ieee80211_if_fmt_num_buffered_multicast(
 }
 __IEEE80211_IF_FILE(num_buffered_multicast);
 
-static ssize_t ieee80211_if_fmt_beacon_head_len(
-	const struct ieee80211_sub_if_data *sdata, char *buf, int buflen)
-{
-	if (sdata->u.ap.beacon_head)
-		return scnprintf(buf, buflen, "%d\n",
-				 sdata->u.ap.beacon_head_len);
-	return scnprintf(buf, buflen, "\n");
-}
-__IEEE80211_IF_FILE(beacon_head_len);
-
-static ssize_t ieee80211_if_fmt_beacon_tail_len(
-	const struct ieee80211_sub_if_data *sdata, char *buf, int buflen)
-{
-	if (sdata->u.ap.beacon_tail)
-		return scnprintf(buf, buflen, "%d\n",
-				 sdata->u.ap.beacon_tail_len);
-	return scnprintf(buf, buflen, "\n");
-}
-__IEEE80211_IF_FILE(beacon_tail_len);
-
 /* WDS attributes */
 IEEE80211_IF_FILE(peer, u.wds.remote_addr, MAC);
 
@@ -192,14 +171,11 @@ static void add_ap_files(struct ieee80211_sub_if_data *sdata)
 	DEBUGFS_ADD(drop_unencrypted, ap);
 	DEBUGFS_ADD(ieee802_1x_pac, ap);
 	DEBUGFS_ADD(num_sta_ps, ap);
-	DEBUGFS_ADD(dtim_period, ap);
 	DEBUGFS_ADD(dtim_count, ap);
 	DEBUGFS_ADD(num_beacons, ap);
 	DEBUGFS_ADD(force_unicast_rateidx, ap);
 	DEBUGFS_ADD(max_ratectrl_rateidx, ap);
 	DEBUGFS_ADD(num_buffered_multicast, ap);
-	DEBUGFS_ADD(beacon_head_len, ap);
-	DEBUGFS_ADD(beacon_tail_len, ap);
 }
 
 static void add_wds_files(struct ieee80211_sub_if_data *sdata)
@@ -281,14 +257,11 @@ static void del_ap_files(struct ieee80211_sub_if_data *sdata)
 	DEBUGFS_DEL(drop_unencrypted, ap);
 	DEBUGFS_DEL(ieee802_1x_pac, ap);
 	DEBUGFS_DEL(num_sta_ps, ap);
-	DEBUGFS_DEL(dtim_period, ap);
 	DEBUGFS_DEL(dtim_count, ap);
 	DEBUGFS_DEL(num_beacons, ap);
 	DEBUGFS_DEL(force_unicast_rateidx, ap);
 	DEBUGFS_DEL(max_ratectrl_rateidx, ap);
 	DEBUGFS_DEL(num_buffered_multicast, ap);
-	DEBUGFS_DEL(beacon_head_len, ap);
-	DEBUGFS_DEL(beacon_tail_len, ap);
 }
 
 static void del_wds_files(struct ieee80211_sub_if_data *sdata)
diff --git a/net/mac80211/ieee80211.c b/net/mac80211/ieee80211.c
index c9981701ef6..859682eee54 100644
--- a/net/mac80211/ieee80211.c
+++ b/net/mac80211/ieee80211.c
@@ -321,10 +321,17 @@ static int ieee80211_stop(struct net_device *dev)
 
 	dev_mc_unsync(local->mdev, dev);
 
-	/* down all dependent devices, that is VLANs */
+	/* APs need special treatment */
 	if (sdata->vif.type == IEEE80211_IF_TYPE_AP) {
 		struct ieee80211_sub_if_data *vlan, *tmp;
+		struct beacon_data *old_beacon = sdata->u.ap.beacon;
 
+		/* remove beacon */
+		rcu_assign_pointer(sdata->u.ap.beacon, NULL);
+		synchronize_rcu();
+		kfree(old_beacon);
+
+		/* down all dependent devices, that is VLANs */
 		list_for_each_entry_safe(vlan, tmp, &sdata->u.ap.vlans,
 					 u.vlan.list)
 			dev_close(vlan->dev);
diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index 91edaad4c62..08a6c0cff69 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -190,9 +190,14 @@ typedef ieee80211_txrx_result (*ieee80211_tx_handler)
 typedef ieee80211_txrx_result (*ieee80211_rx_handler)
 (struct ieee80211_txrx_data *rx);
 
+struct beacon_data {
+	u8 *head, *tail;
+	int head_len, tail_len;
+	int dtim_period;
+};
+
 struct ieee80211_if_ap {
-	u8 *beacon_head, *beacon_tail;
-	int beacon_head_len, beacon_tail_len;
+	struct beacon_data *beacon;
 
 	struct list_head vlans;
 
@@ -205,7 +210,7 @@ struct ieee80211_if_ap {
 	u8 tim[sizeof(unsigned long) * BITS_TO_LONGS(IEEE80211_MAX_AID + 1)];
 	atomic_t num_sta_ps; /* number of stations in PS mode */
 	struct sk_buff_head ps_bc_buf;
-	int dtim_period, dtim_count;
+	int dtim_count;
 	int force_unicast_rateidx; /* forced TX rateidx for unicast frames */
 	int max_ratectrl_rateidx; /* max TX rateidx for rate control */
 	int num_beacons; /* number of TXed beacon frames for this BSS */
@@ -360,14 +365,11 @@ struct ieee80211_sub_if_data {
 			struct dentry *drop_unencrypted;
 			struct dentry *ieee802_1x_pac;
 			struct dentry *num_sta_ps;
-			struct dentry *dtim_period;
 			struct dentry *dtim_count;
 			struct dentry *num_beacons;
 			struct dentry *force_unicast_rateidx;
 			struct dentry *max_ratectrl_rateidx;
 			struct dentry *num_buffered_multicast;
-			struct dentry *beacon_head_len;
-			struct dentry *beacon_tail_len;
 		} ap;
 		struct {
 			struct dentry *channel_use;
diff --git a/net/mac80211/ieee80211_iface.c b/net/mac80211/ieee80211_iface.c
index be96aa84c79..92f1eb2da31 100644
--- a/net/mac80211/ieee80211_iface.c
+++ b/net/mac80211/ieee80211_iface.c
@@ -126,7 +126,6 @@ void ieee80211_if_set_type(struct net_device *dev, int type)
 		sdata->u.vlan.ap = NULL;
 		break;
 	case IEEE80211_IF_TYPE_AP:
-		sdata->u.ap.dtim_period = 2;
 		sdata->u.ap.force_unicast_rateidx = -1;
 		sdata->u.ap.max_ratectrl_rateidx = -1;
 		skb_queue_head_init(&sdata->u.ap.ps_bc_buf);
@@ -207,8 +206,7 @@ void ieee80211_if_reinit(struct net_device *dev)
 			}
 		}
 
-		kfree(sdata->u.ap.beacon_head);
-		kfree(sdata->u.ap.beacon_tail);
+		kfree(sdata->u.ap.beacon);
 
 		while ((skb = skb_dequeue(&sdata->u.ap.ps_bc_buf))) {
 			local->total_ps_buffered--;
diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index 99590e4ce30..51c0f00d02d 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -1628,7 +1628,8 @@ void ieee80211_tx_pending(unsigned long data)
 
 static void ieee80211_beacon_add_tim(struct ieee80211_local *local,
 				     struct ieee80211_if_ap *bss,
-				     struct sk_buff *skb)
+				     struct sk_buff *skb,
+				     struct beacon_data *beacon)
 {
 	u8 *pos, *tim;
 	int aid0 = 0;
@@ -1644,7 +1645,7 @@ static void ieee80211_beacon_add_tim(struct ieee80211_local *local,
 					  IEEE80211_MAX_AID+1);
 
 	if (bss->dtim_count == 0)
-		bss->dtim_count = bss->dtim_period - 1;
+		bss->dtim_count = beacon->dtim_period - 1;
 	else
 		bss->dtim_count--;
 
@@ -1652,7 +1653,7 @@ static void ieee80211_beacon_add_tim(struct ieee80211_local *local,
 	*pos++ = WLAN_EID_TIM;
 	*pos++ = 4;
 	*pos++ = bss->dtim_count;
-	*pos++ = bss->dtim_period;
+	*pos++ = beacon->dtim_period;
 
 	if (bss->dtim_count == 0 && !skb_queue_empty(&bss->ps_bc_buf))
 		aid0 = 1;
@@ -1700,44 +1701,43 @@ struct sk_buff *ieee80211_beacon_get(struct ieee80211_hw *hw,
 	struct ieee80211_sub_if_data *sdata = NULL;
 	struct ieee80211_if_ap *ap = NULL;
 	struct rate_selection rsel;
-	u8 *b_head, *b_tail;
-	int bh_len, bt_len;
+	struct beacon_data *beacon;
+
+	rcu_read_lock();
 
 	sdata = vif_to_sdata(vif);
 	bdev = sdata->dev;
 	ap = &sdata->u.ap;
 
-	if (!ap || sdata->vif.type != IEEE80211_IF_TYPE_AP ||
-	    !ap->beacon_head) {
+	beacon = rcu_dereference(ap->beacon);
+
+	if (!ap || sdata->vif.type != IEEE80211_IF_TYPE_AP || !beacon) {
 #ifdef CONFIG_MAC80211_VERBOSE_DEBUG
 		if (net_ratelimit())
 			printk(KERN_DEBUG "no beacon data avail for %s\n",
 			       bdev->name);
 #endif /* CONFIG_MAC80211_VERBOSE_DEBUG */
-		return NULL;
+		skb = NULL;
+		goto out;
 	}
 
-	/* Assume we are generating the normal beacon locally */
-	b_head = ap->beacon_head;
-	b_tail = ap->beacon_tail;
-	bh_len = ap->beacon_head_len;
-	bt_len = ap->beacon_tail_len;
-
-	skb = dev_alloc_skb(local->tx_headroom +
-		bh_len + bt_len + 256 /* maximum TIM len */);
+	/* headroom, head length, tail length and maximum TIM length */
+	skb = dev_alloc_skb(local->tx_headroom + beacon->head_len +
+			    beacon->tail_len + 256);
 	if (!skb)
-		return NULL;
+		goto out;
 
 	skb_reserve(skb, local->tx_headroom);
-	memcpy(skb_put(skb, bh_len), b_head, bh_len);
+	memcpy(skb_put(skb, beacon->head_len), beacon->head,
+	       beacon->head_len);
 
 	ieee80211_include_sequence(sdata, (struct ieee80211_hdr *)skb->data);
 
-	ieee80211_beacon_add_tim(local, ap, skb);
+	ieee80211_beacon_add_tim(local, ap, skb, beacon);
 
-	if (b_tail) {
-		memcpy(skb_put(skb, bt_len), b_tail, bt_len);
-	}
+	if (beacon->tail)
+		memcpy(skb_put(skb, beacon->tail_len), beacon->tail,
+		       beacon->tail_len);
 
 	if (control) {
 		rate_control_get_rate(local->mdev, local->oper_hw_mode, skb,
@@ -1749,7 +1749,8 @@ struct sk_buff *ieee80211_beacon_get(struct ieee80211_hw *hw,
 				       wiphy_name(local->hw.wiphy));
 			}
 			dev_kfree_skb(skb);
-			return NULL;
+			skb = NULL;
+			goto out;
 		}
 
 		control->tx_rate =
@@ -1764,6 +1765,9 @@ struct sk_buff *ieee80211_beacon_get(struct ieee80211_hw *hw,
 	}
 
 	ap->num_beacons++;
+
+ out:
+	rcu_read_unlock();
 	return skb;
 }
 EXPORT_SYMBOL(ieee80211_beacon_get);
@@ -1815,13 +1819,24 @@ ieee80211_get_buffered_bc(struct ieee80211_hw *hw,
 	struct net_device *bdev;
 	struct ieee80211_sub_if_data *sdata;
 	struct ieee80211_if_ap *bss = NULL;
+	struct beacon_data *beacon;
 
 	sdata = vif_to_sdata(vif);
 	bdev = sdata->dev;
 
-	if (!bss || sdata->vif.type != IEEE80211_IF_TYPE_AP ||
-	    !bss->beacon_head)
+
+	if (!bss)
+		return NULL;
+
+	rcu_read_lock();
+	beacon = rcu_dereference(bss->beacon);
+
+	if (sdata->vif.type != IEEE80211_IF_TYPE_AP || !beacon ||
+	    !beacon->head) {
+		rcu_read_unlock();
 		return NULL;
+	}
+	rcu_read_unlock();
 
 	if (bss->dtim_count != 0)
 		return NULL; /* send buffered bc/mc only after DTIM beacon */
-- 
cgit v1.2.3


From 4fd6931ebe24640bec72b91ba612325843a5e3cc Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Wed, 19 Dec 2007 02:03:35 +0100
Subject: mac80211: implement cfg80211 station handling

This implements station handling from userspace via cfg80211
in mac80211.

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/cfg.c | 192 +++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 192 insertions(+)

(limited to 'net')

diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index 5a4c6edd934..22c9619ba77 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -14,6 +14,7 @@
 #include <net/cfg80211.h>
 #include "ieee80211_i.h"
 #include "cfg.h"
+#include "ieee80211_rate.h"
 
 static enum ieee80211_if_types
 nl80211_type_to_mac80211_type(enum nl80211_iftype type)
@@ -447,6 +448,194 @@ static int ieee80211_del_beacon(struct wiphy *wiphy, struct net_device *dev)
 	return ieee80211_if_config_beacon(dev);
 }
 
+/* Layer 2 Update frame (802.2 Type 1 LLC XID Update response) */
+struct iapp_layer2_update {
+	u8 da[ETH_ALEN];	/* broadcast */
+	u8 sa[ETH_ALEN];	/* STA addr */
+	__be16 len;		/* 6 */
+	u8 dsap;		/* 0 */
+	u8 ssap;		/* 0 */
+	u8 control;
+	u8 xid_info[3];
+} __attribute__ ((packed));
+
+static void ieee80211_send_layer2_update(struct sta_info *sta)
+{
+	struct iapp_layer2_update *msg;
+	struct sk_buff *skb;
+
+	/* Send Level 2 Update Frame to update forwarding tables in layer 2
+	 * bridge devices */
+
+	skb = dev_alloc_skb(sizeof(*msg));
+	if (!skb)
+		return;
+	msg = (struct iapp_layer2_update *)skb_put(skb, sizeof(*msg));
+
+	/* 802.2 Type 1 Logical Link Control (LLC) Exchange Identifier (XID)
+	 * Update response frame; IEEE Std 802.2-1998, 5.4.1.2.1 */
+
+	memset(msg->da, 0xff, ETH_ALEN);
+	memcpy(msg->sa, sta->addr, ETH_ALEN);
+	msg->len = htons(6);
+	msg->dsap = 0;
+	msg->ssap = 0x01;	/* NULL LSAP, CR Bit: Response */
+	msg->control = 0xaf;	/* XID response lsb.1111F101.
+				 * F=0 (no poll command; unsolicited frame) */
+	msg->xid_info[0] = 0x81;	/* XID format identifier */
+	msg->xid_info[1] = 1;	/* LLC types/classes: Type 1 LLC */
+	msg->xid_info[2] = 0;	/* XID sender's receive window size (RW) */
+
+	skb->dev = sta->dev;
+	skb->protocol = eth_type_trans(skb, sta->dev);
+	memset(skb->cb, 0, sizeof(skb->cb));
+	netif_rx(skb);
+}
+
+static void sta_apply_parameters(struct ieee80211_local *local,
+				 struct sta_info *sta,
+				 struct station_parameters *params)
+{
+	u32 rates;
+	int i, j;
+	struct ieee80211_hw_mode *mode;
+
+	if (params->station_flags & STATION_FLAG_CHANGED) {
+		sta->flags &= ~WLAN_STA_AUTHORIZED;
+		if (params->station_flags & STATION_FLAG_AUTHORIZED)
+			sta->flags |= WLAN_STA_AUTHORIZED;
+
+		sta->flags &= ~WLAN_STA_SHORT_PREAMBLE;
+		if (params->station_flags & STATION_FLAG_SHORT_PREAMBLE)
+			sta->flags |= WLAN_STA_SHORT_PREAMBLE;
+
+		sta->flags &= ~WLAN_STA_WME;
+		if (params->station_flags & STATION_FLAG_WME)
+			sta->flags |= WLAN_STA_WME;
+	}
+
+	if (params->aid) {
+		sta->aid = params->aid;
+		if (sta->aid > IEEE80211_MAX_AID)
+			sta->aid = 0; /* XXX: should this be an error? */
+	}
+
+	if (params->listen_interval >= 0)
+		sta->listen_interval = params->listen_interval;
+
+	if (params->supported_rates) {
+		rates = 0;
+		mode = local->oper_hw_mode;
+		for (i = 0; i < params->supported_rates_len; i++) {
+			int rate = (params->supported_rates[i] & 0x7f) * 5;
+			for (j = 0; j < mode->num_rates; j++) {
+				if (mode->rates[j].rate == rate)
+					rates |= BIT(j);
+			}
+		}
+		sta->supp_rates = rates;
+	}
+}
+
+static int ieee80211_add_station(struct wiphy *wiphy, struct net_device *dev,
+				 u8 *mac, struct station_parameters *params)
+{
+	struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr);
+	struct sta_info *sta;
+	struct ieee80211_sub_if_data *sdata;
+
+	/* Prevent a race with changing the rate control algorithm */
+	if (!netif_running(dev))
+		return -ENETDOWN;
+
+	/* XXX: get sta belonging to dev */
+	sta = sta_info_get(local, mac);
+	if (sta) {
+		sta_info_put(sta);
+		return -EEXIST;
+	}
+
+	if (params->vlan) {
+		sdata = IEEE80211_DEV_TO_SUB_IF(params->vlan);
+
+		if (sdata->vif.type != IEEE80211_IF_TYPE_VLAN ||
+		    sdata->vif.type != IEEE80211_IF_TYPE_AP)
+			return -EINVAL;
+	} else
+		sdata = IEEE80211_DEV_TO_SUB_IF(dev);
+
+	sta = sta_info_add(local, dev, mac, GFP_KERNEL);
+	if (!sta)
+		return -ENOMEM;
+
+	sta->dev = sdata->dev;
+	if (sdata->vif.type == IEEE80211_IF_TYPE_VLAN ||
+	    sdata->vif.type == IEEE80211_IF_TYPE_AP)
+		ieee80211_send_layer2_update(sta);
+
+	sta->flags = WLAN_STA_AUTH | WLAN_STA_ASSOC;
+
+	sta_apply_parameters(local, sta, params);
+
+	rate_control_rate_init(sta, local);
+
+	sta_info_put(sta);
+
+	return 0;
+}
+
+static int ieee80211_del_station(struct wiphy *wiphy, struct net_device *dev,
+				 u8 *mac)
+{
+	struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr);
+	struct sta_info *sta;
+
+	if (mac) {
+		/* XXX: get sta belonging to dev */
+		sta = sta_info_get(local, mac);
+		if (!sta)
+			return -ENOENT;
+
+		sta_info_free(sta);
+		sta_info_put(sta);
+	} else
+		sta_info_flush(local, dev);
+
+	return 0;
+}
+
+static int ieee80211_change_station(struct wiphy *wiphy,
+				    struct net_device *dev,
+				    u8 *mac,
+				    struct station_parameters *params)
+{
+	struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr);
+	struct sta_info *sta;
+	struct ieee80211_sub_if_data *vlansdata;
+
+	/* XXX: get sta belonging to dev */
+	sta = sta_info_get(local, mac);
+	if (!sta)
+		return -ENOENT;
+
+	if (params->vlan && params->vlan != sta->dev) {
+		vlansdata = IEEE80211_DEV_TO_SUB_IF(params->vlan);
+
+		if (vlansdata->vif.type != IEEE80211_IF_TYPE_VLAN ||
+		    vlansdata->vif.type != IEEE80211_IF_TYPE_AP)
+			return -EINVAL;
+
+		sta->dev = params->vlan;
+		ieee80211_send_layer2_update(sta);
+	}
+
+	sta_apply_parameters(local, sta, params);
+
+	sta_info_put(sta);
+
+	return 0;
+}
+
 struct cfg80211_ops mac80211_config_ops = {
 	.add_virtual_intf = ieee80211_add_iface,
 	.del_virtual_intf = ieee80211_del_iface,
@@ -458,5 +647,8 @@ struct cfg80211_ops mac80211_config_ops = {
 	.add_beacon = ieee80211_add_beacon,
 	.set_beacon = ieee80211_set_beacon,
 	.del_beacon = ieee80211_del_beacon,
+	.add_station = ieee80211_add_station,
+	.del_station = ieee80211_del_station,
+	.change_station = ieee80211_change_station,
 	.get_station = ieee80211_get_station,
 };
-- 
cgit v1.2.3


From 2bc454b0b30b3645d114689b64321cb49be99923 Mon Sep 17 00:00:00 2001
From: Michael Wu <flamingice@sourmilk.net>
Date: Tue, 25 Dec 2007 19:33:16 -0500
Subject: mac80211: Fix rate reporting regression

Mattias Nissler's "clean up rate selection" patch incorrectly changes
the behavior of txrate setting in sta_info. This patch backs out parts
of the rate selection consolidation in order to fix this issue for now.

Signed-off-by: Michael Wu <flamingice@sourmilk.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/ieee80211_rate.c   | 18 +-----------------
 net/mac80211/rc80211_pid_algo.c | 20 +++++++++++++++++---
 net/mac80211/rc80211_simple.c   | 19 +++++++++++++++++--
 3 files changed, 35 insertions(+), 22 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/ieee80211_rate.c b/net/mac80211/ieee80211_rate.c
index 5676a26a7c7..b957e67c5fb 100644
--- a/net/mac80211/ieee80211_rate.c
+++ b/net/mac80211/ieee80211_rate.c
@@ -168,29 +168,13 @@ void rate_control_get_rate(struct net_device *dev,
 {
 	struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr);
 	struct rate_control_ref *ref = local->rate_ctrl;
-	struct ieee80211_sub_if_data *sdata;
 	struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data;
 	struct sta_info *sta = sta_info_get(local, hdr->addr1);
 	int i;
-	u16 fc;
 
 	memset(sel, 0, sizeof(struct rate_selection));
 
-	/* Send management frames and broadcast/multicast data using lowest
-	 * rate. */
-	fc = le16_to_cpu(hdr->frame_control);
-	if ((fc & IEEE80211_FCTL_FTYPE) != IEEE80211_FTYPE_DATA ||
-	    is_multicast_ether_addr(hdr->addr1))
-		sel->rate = rate_lowest(local, mode, sta);
-
-	/* If a forced rate is in effect, select it. */
-	sdata = IEEE80211_DEV_TO_SUB_IF(dev);
-	if (sdata->bss && sdata->bss->force_unicast_rateidx > -1)
-		sel->rate = &mode->rates[sdata->bss->force_unicast_rateidx];
-
-	/* If we haven't found the rate yet, ask the rate control algo. */
-	if (!sel->rate)
-		ref->ops->get_rate(ref->priv, dev, mode, skb, sel);
+	ref->ops->get_rate(ref->priv, dev, mode, skb, sel);
 
 	/* Select a non-ERP backup rate. */
 	if (!sel->nonerp) {
diff --git a/net/mac80211/rc80211_pid_algo.c b/net/mac80211/rc80211_pid_algo.c
index 66cae53a647..962286d893d 100644
--- a/net/mac80211/rc80211_pid_algo.c
+++ b/net/mac80211/rc80211_pid_algo.c
@@ -310,22 +310,36 @@ static void rate_control_pid_get_rate(void *priv, struct net_device *dev,
 {
 	struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr);
 	struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data;
+	struct ieee80211_sub_if_data *sdata;
 	struct sta_info *sta;
 	int rateidx;
+	u16 fc;
 
 	sta = sta_info_get(local, hdr->addr1);
 
-	if (!sta) {
-		sel->rate = rate_lowest(local, mode, NULL);
-		sta_info_put(sta);
+	/* Send management frames and broadcast/multicast data using lowest
+	 * rate. */
+	fc = le16_to_cpu(hdr->frame_control);
+	if ((fc & IEEE80211_FCTL_FTYPE) != IEEE80211_FTYPE_DATA ||
+	    is_multicast_ether_addr(hdr->addr1) || !sta) {
+		sel->rate = rate_lowest(local, mode, sta);
+		if (sta)
+			sta_info_put(sta);
 		return;
 	}
 
+	/* If a forced rate is in effect, select it. */
+	sdata = IEEE80211_DEV_TO_SUB_IF(dev);
+	if (sdata->bss && sdata->bss->force_unicast_rateidx > -1)
+		sta->txrate = sdata->bss->force_unicast_rateidx;
+
 	rateidx = sta->txrate;
 
 	if (rateidx >= mode->num_rates)
 		rateidx = mode->num_rates - 1;
 
+	sta->last_txrate = rateidx;
+
 	sta_info_put(sta);
 
 	sel->rate = &mode->rates[rateidx];
diff --git a/net/mac80211/rc80211_simple.c b/net/mac80211/rc80211_simple.c
index 33de6f967e5..934676d687d 100644
--- a/net/mac80211/rc80211_simple.c
+++ b/net/mac80211/rc80211_simple.c
@@ -207,21 +207,36 @@ rate_control_simple_get_rate(void *priv, struct net_device *dev,
 {
 	struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr);
 	struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data;
+	struct ieee80211_sub_if_data *sdata;
 	struct sta_info *sta;
 	int rateidx;
+	u16 fc;
 
 	sta = sta_info_get(local, hdr->addr1);
 
-	if (!sta) {
-		sel->rate = rate_lowest(local, mode, NULL);
+	/* Send management frames and broadcast/multicast data using lowest
+	 * rate. */
+	fc = le16_to_cpu(hdr->frame_control);
+	if ((fc & IEEE80211_FCTL_FTYPE) != IEEE80211_FTYPE_DATA ||
+	    is_multicast_ether_addr(hdr->addr1) || !sta) {
+		sel->rate = rate_lowest(local, mode, sta);
+		if (sta)
+			sta_info_put(sta);
 		return;
 	}
 
+	/* If a forced rate is in effect, select it. */
+	sdata = IEEE80211_DEV_TO_SUB_IF(dev);
+	if (sdata->bss && sdata->bss->force_unicast_rateidx > -1)
+		sta->txrate = sdata->bss->force_unicast_rateidx;
+
 	rateidx = sta->txrate;
 
 	if (rateidx >= mode->num_rates)
 		rateidx = mode->num_rates - 1;
 
+	sta->last_txrate = rateidx;
+
 	sta_info_put(sta);
 
 	sel->rate = &mode->rates[rateidx];
-- 
cgit v1.2.3


From 471b3efdfccc257591331724145f8ccf8b3217e1 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Fri, 28 Dec 2007 14:32:58 +0100
Subject: mac80211: add unified BSS configuration

This patch (based on Ron Rindjunsky's) creates a framework for
a unified way to pass BSS configuration to drivers that require
the information, e.g. for implementing power save mode.

This patch introduces new ieee80211_bss_conf structure that is
passed to the driver via the new bss_info_changed() callback
when the BSS configuration changes.

This new BSS configuration infrastructure adds the following
new features:
 * drivers are notified of their association AID
 * drivers are notified of association status

and replaces the erp_ie_changed() callback. The patch also does
the relevant driver updates for the latter change.

Signed-off-by: Ron Rindjunsky <ron.rindjunsky@intel.com>
Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/debugfs_netdev.c |  2 +-
 net/mac80211/ieee80211.c      | 29 ++++++++++--------
 net/mac80211/ieee80211_i.h    | 19 +++++++-----
 net/mac80211/ieee80211_sta.c  | 69 ++++++++++++++++++++++---------------------
 net/mac80211/tx.c             | 13 ++++----
 net/mac80211/util.c           | 12 ++++----
 6 files changed, 77 insertions(+), 67 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/debugfs_netdev.c b/net/mac80211/debugfs_netdev.c
index 3500fe0d380..829872a3ae8 100644
--- a/net/mac80211/debugfs_netdev.c
+++ b/net/mac80211/debugfs_netdev.c
@@ -118,7 +118,7 @@ static ssize_t ieee80211_if_fmt_flags(
 		 sdata->u.sta.flags & IEEE80211_STA_AUTHENTICATED ? "AUTH\n" : "",
 		 sdata->u.sta.flags & IEEE80211_STA_ASSOCIATED ? "ASSOC\n" : "",
 		 sdata->u.sta.flags & IEEE80211_STA_PROBEREQ_POLL ? "PROBEREQ POLL\n" : "",
-		 sdata->flags & IEEE80211_SDATA_USE_PROTECTION ? "CTS prot\n" : "");
+		 sdata->bss_conf.use_cts_prot ? "CTS prot\n" : "");
 }
 __IEEE80211_IF_FILE(flags);
 
diff --git a/net/mac80211/ieee80211.c b/net/mac80211/ieee80211.c
index 859682eee54..17704021823 100644
--- a/net/mac80211/ieee80211.c
+++ b/net/mac80211/ieee80211.c
@@ -635,25 +635,30 @@ int ieee80211_hw_config_ht(struct ieee80211_local *local, int enable_ht,
 	return 0;
 }
 
-void ieee80211_erp_info_change_notify(struct net_device *dev, u8 changes)
+void ieee80211_bss_info_change_notify(struct ieee80211_sub_if_data *sdata,
+				      u32 changed)
 {
-	struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr);
-	struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
-	if (local->ops->erp_ie_changed)
-		local->ops->erp_ie_changed(local_to_hw(local), changes,
-			!!(sdata->flags & IEEE80211_SDATA_USE_PROTECTION),
-			!(sdata->flags & IEEE80211_SDATA_SHORT_PREAMBLE));
+	struct ieee80211_local *local = sdata->local;
+
+	if (!changed)
+		return;
+
+	if (local->ops->bss_info_changed)
+		local->ops->bss_info_changed(local_to_hw(local),
+					     &sdata->vif,
+					     &sdata->bss_conf,
+					     changed);
 }
 
 void ieee80211_reset_erp_info(struct net_device *dev)
 {
 	struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
 
-	sdata->flags &= ~(IEEE80211_SDATA_USE_PROTECTION |
-			IEEE80211_SDATA_SHORT_PREAMBLE);
-	ieee80211_erp_info_change_notify(dev,
-					 IEEE80211_ERP_CHANGE_PROTECTION |
-					 IEEE80211_ERP_CHANGE_PREAMBLE);
+	sdata->bss_conf.use_cts_prot = 0;
+	sdata->bss_conf.use_short_preamble = 0;
+	ieee80211_bss_info_change_notify(sdata,
+					 BSS_CHANGED_ERP_CTS_PROT |
+					 BSS_CHANGED_ERP_PREAMBLE);
 }
 
 void ieee80211_tx_status_irqsafe(struct ieee80211_hw *hw,
diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index 08a6c0cff69..72ecbf7bf96 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -291,12 +291,7 @@ struct ieee80211_if_sta {
 /* flags used in struct ieee80211_sub_if_data.flags */
 #define IEEE80211_SDATA_ALLMULTI	BIT(0)
 #define IEEE80211_SDATA_PROMISC		BIT(1)
-#define IEEE80211_SDATA_USE_PROTECTION	BIT(2) /* CTS protect ERP frames */
-/* use short preamble with IEEE 802.11b: this flag is set when the AP or beacon
- * generator reports that there are no present stations that cannot support short
- * preambles */
-#define IEEE80211_SDATA_SHORT_PREAMBLE	BIT(3)
-#define IEEE80211_SDATA_USERSPACE_MLME	BIT(4)
+#define IEEE80211_SDATA_USERSPACE_MLME	BIT(2)
 struct ieee80211_sub_if_data {
 	struct list_head list;
 
@@ -327,6 +322,15 @@ struct ieee80211_sub_if_data {
 	struct ieee80211_key *keys[NUM_DEFAULT_KEYS];
 	struct ieee80211_key *default_key;
 
+	/*
+	 * BSS configuration for this interface.
+	 *
+	 * FIXME: I feel bad putting this here when we already have a
+	 *	  bss pointer, but the bss pointer is just wrong when
+	 *	  you have multiple virtual STA mode interfaces...
+	 *	  This needs to be fixed.
+	 */
+	struct ieee80211_bss_conf bss_conf;
 	struct ieee80211_if_ap *bss; /* BSS that this device belongs to */
 
 	union {
@@ -770,7 +774,8 @@ struct sta_info * ieee80211_ibss_add_sta(struct net_device *dev,
 					 u8 *addr);
 int ieee80211_sta_deauthenticate(struct net_device *dev, u16 reason);
 int ieee80211_sta_disassociate(struct net_device *dev, u16 reason);
-void ieee80211_erp_info_change_notify(struct net_device *dev, u8 changes);
+void ieee80211_bss_info_change_notify(struct ieee80211_sub_if_data *sdata,
+				      u32 changed);
 void ieee80211_reset_erp_info(struct net_device *dev);
 int ieee80211_ht_cap_ie_to_ht_info(struct ieee80211_ht_cap *ht_cap_ie,
 				   struct ieee80211_ht_info *ht_info);
diff --git a/net/mac80211/ieee80211_sta.c b/net/mac80211/ieee80211_sta.c
index b1e7d17ee25..866eb807812 100644
--- a/net/mac80211/ieee80211_sta.c
+++ b/net/mac80211/ieee80211_sta.c
@@ -313,48 +313,42 @@ static void ieee80211_sta_wmm_params(struct net_device *dev,
 }
 
 
-static void ieee80211_handle_erp_ie(struct net_device *dev, u8 erp_value)
+static u32 ieee80211_handle_erp_ie(struct ieee80211_sub_if_data *sdata,
+				   u8 erp_value)
 {
-	struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
+	struct ieee80211_bss_conf *bss_conf = &sdata->bss_conf;
 	struct ieee80211_if_sta *ifsta = &sdata->u.sta;
-	int use_protection = (erp_value & WLAN_ERP_USE_PROTECTION) != 0;
-	int preamble_mode = (erp_value & WLAN_ERP_BARKER_PREAMBLE) != 0;
-	u8 changes = 0;
+	bool use_protection = (erp_value & WLAN_ERP_USE_PROTECTION) != 0;
+	bool preamble_mode = (erp_value & WLAN_ERP_BARKER_PREAMBLE) != 0;
 	DECLARE_MAC_BUF(mac);
+	u32 changed = 0;
 
-	if (use_protection != !!(sdata->flags & IEEE80211_SDATA_USE_PROTECTION)) {
+	if (use_protection != bss_conf->use_cts_prot) {
 		if (net_ratelimit()) {
 			printk(KERN_DEBUG "%s: CTS protection %s (BSSID="
 			       "%s)\n",
-			       dev->name,
+			       sdata->dev->name,
 			       use_protection ? "enabled" : "disabled",
 			       print_mac(mac, ifsta->bssid));
 		}
-		if (use_protection)
-			sdata->flags |= IEEE80211_SDATA_USE_PROTECTION;
-		else
-			sdata->flags &= ~IEEE80211_SDATA_USE_PROTECTION;
-		changes |= IEEE80211_ERP_CHANGE_PROTECTION;
+		bss_conf->use_cts_prot = use_protection;
+		changed |= BSS_CHANGED_ERP_CTS_PROT;
 	}
 
-	if (preamble_mode != !(sdata->flags & IEEE80211_SDATA_SHORT_PREAMBLE)) {
+	if (preamble_mode != bss_conf->use_short_preamble) {
 		if (net_ratelimit()) {
 			printk(KERN_DEBUG "%s: switched to %s barker preamble"
 			       " (BSSID=%s)\n",
-			       dev->name,
+			       sdata->dev->name,
 			       (preamble_mode == WLAN_ERP_PREAMBLE_SHORT) ?
 					"short" : "long",
 			       print_mac(mac, ifsta->bssid));
 		}
-		if (preamble_mode)
-			sdata->flags &= ~IEEE80211_SDATA_SHORT_PREAMBLE;
-		else
-			sdata->flags |= IEEE80211_SDATA_SHORT_PREAMBLE;
-		changes |= IEEE80211_ERP_CHANGE_PREAMBLE;
+		bss_conf->use_short_preamble = preamble_mode;
+		changed |= BSS_CHANGED_ERP_PREAMBLE;
 	}
 
-	if (changes)
-		ieee80211_erp_info_change_notify(dev, changes);
+	return changed;
 }
 
 int ieee80211_ht_cap_ie_to_ht_info(struct ieee80211_ht_cap *ht_cap_ie,
@@ -458,19 +452,16 @@ static void ieee80211_set_associated(struct net_device *dev,
 				     struct ieee80211_if_sta *ifsta,
 				     bool assoc)
 {
-	struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr);
+	struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
+	struct ieee80211_local *local = sdata->local;
 	union iwreq_data wrqu;
-
-	if (!!(ifsta->flags & IEEE80211_STA_ASSOCIATED) == assoc)
-		return;
+	u32 changed = BSS_CHANGED_ASSOC;
 
 	if (assoc) {
-		struct ieee80211_sub_if_data *sdata;
 		struct ieee80211_sta_bss *bss;
 
 		ifsta->flags |= IEEE80211_STA_ASSOCIATED;
 
-		sdata = IEEE80211_DEV_TO_SUB_IF(dev);
 		if (sdata->vif.type != IEEE80211_IF_TYPE_STA)
 			return;
 
@@ -479,7 +470,8 @@ static void ieee80211_set_associated(struct net_device *dev,
 					   ifsta->ssid, ifsta->ssid_len);
 		if (bss) {
 			if (bss->has_erp_value)
-				ieee80211_handle_erp_ie(dev, bss->erp_value);
+				changed |= ieee80211_handle_erp_ie(
+						sdata, bss->erp_value);
 			ieee80211_rx_bss_put(dev, bss);
 		}
 
@@ -499,6 +491,8 @@ static void ieee80211_set_associated(struct net_device *dev,
 	wireless_send_event(dev, SIOCGIWAP, &wrqu, NULL);
 	ifsta->last_probe = jiffies;
 	ieee80211_led_assoc(local, assoc);
+
+	ieee80211_bss_info_change_notify(sdata, changed);
 }
 
 static void ieee80211_set_disassoc(struct net_device *dev,
@@ -1536,18 +1530,20 @@ static void ieee80211_rx_mgmt_disassoc(struct net_device *dev,
 }
 
 
-static void ieee80211_rx_mgmt_assoc_resp(struct net_device *dev,
+static void ieee80211_rx_mgmt_assoc_resp(struct ieee80211_sub_if_data *sdata,
 					 struct ieee80211_if_sta *ifsta,
 					 struct ieee80211_mgmt *mgmt,
 					 size_t len,
 					 int reassoc)
 {
-	struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr);
+	struct ieee80211_local *local = sdata->local;
+	struct net_device *dev = sdata->dev;
 	struct ieee80211_hw_mode *mode;
 	struct sta_info *sta;
 	u32 rates;
 	u16 capab_info, status_code, aid;
 	struct ieee802_11_elems elems;
+	struct ieee80211_bss_conf *bss_conf = &sdata->bss_conf;
 	u8 *pos;
 	int i, j;
 	DECLARE_MAC_BUF(mac);
@@ -1620,6 +1616,8 @@ static void ieee80211_rx_mgmt_assoc_resp(struct net_device *dev,
 	if (ifsta->assocresp_ies)
 		memcpy(ifsta->assocresp_ies, pos, ifsta->assocresp_ies_len);
 
+	/* set AID, ieee80211_set_associated() will tell the driver */
+	bss_conf->aid = aid;
 	ieee80211_set_associated(dev, ifsta, 1);
 
 	/* Add STA entry for the AP */
@@ -2099,6 +2097,7 @@ static void ieee80211_rx_mgmt_beacon(struct net_device *dev,
 	struct ieee802_11_elems elems;
 	struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr);
 	struct ieee80211_conf *conf = &local->hw.conf;
+	u32 changed = 0;
 
 	ieee80211_rx_bss_info(dev, mgmt, len, rx_status, 1);
 
@@ -2119,7 +2118,7 @@ static void ieee80211_rx_mgmt_beacon(struct net_device *dev,
 	ieee802_11_parse_elems(mgmt->u.beacon.variable, len - baselen, &elems);
 
 	if (elems.erp_info && elems.erp_info_len >= 1)
-		ieee80211_handle_erp_ie(dev, elems.erp_info[0]);
+		changed |= ieee80211_handle_erp_ie(sdata, elems.erp_info[0]);
 
 	if (elems.ht_cap_elem && elems.ht_info_elem &&
 	    elems.wmm_param && local->ops->conf_ht &&
@@ -2142,6 +2141,8 @@ static void ieee80211_rx_mgmt_beacon(struct net_device *dev,
 		ieee80211_sta_wmm_params(dev, ifsta, elems.wmm_param,
 					 elems.wmm_param_len);
 	}
+
+	ieee80211_bss_info_change_notify(sdata, changed);
 }
 
 
@@ -2329,10 +2330,10 @@ static void ieee80211_sta_rx_queued_mgmt(struct net_device *dev,
 		ieee80211_rx_mgmt_auth(dev, ifsta, mgmt, skb->len);
 		break;
 	case IEEE80211_STYPE_ASSOC_RESP:
-		ieee80211_rx_mgmt_assoc_resp(dev, ifsta, mgmt, skb->len, 0);
+		ieee80211_rx_mgmt_assoc_resp(sdata, ifsta, mgmt, skb->len, 0);
 		break;
 	case IEEE80211_STYPE_REASSOC_RESP:
-		ieee80211_rx_mgmt_assoc_resp(dev, ifsta, mgmt, skb->len, 1);
+		ieee80211_rx_mgmt_assoc_resp(sdata, ifsta, mgmt, skb->len, 1);
 		break;
 	case IEEE80211_STYPE_DEAUTH:
 		ieee80211_rx_mgmt_deauth(dev, ifsta, mgmt, skb->len);
@@ -2787,7 +2788,7 @@ static int ieee80211_sta_join_ibss(struct net_device *dev,
 			break;
 		}
 		control.tx_rate =
-			((sdata->flags & IEEE80211_SDATA_SHORT_PREAMBLE) &&
+			(sdata->bss_conf.use_short_preamble &&
 			(ratesel.rate->flags & IEEE80211_RATE_PREAMBLE2)) ?
 			ratesel.rate->val2 : ratesel.rate->val;
 		control.antenna_sel_tx = local->hw.conf.antenna_sel_tx;
diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index 51c0f00d02d..f9088fe34d5 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -176,7 +176,7 @@ static u16 ieee80211_duration(struct ieee80211_txrx_data *tx, int group_addr,
 	 * to closest integer */
 
 	dur = ieee80211_frame_duration(local, 10, rate, erp,
-		       tx->sdata->flags & IEEE80211_SDATA_SHORT_PREAMBLE);
+				tx->sdata->bss_conf.use_short_preamble);
 
 	if (next_frag_len) {
 		/* Frame is fragmented: duration increases with time needed to
@@ -185,8 +185,7 @@ static u16 ieee80211_duration(struct ieee80211_txrx_data *tx, int group_addr,
 		/* next fragment */
 		dur += ieee80211_frame_duration(local, next_frag_len,
 				txrate->rate, erp,
-				tx->sdata->flags &
-					IEEE80211_SDATA_SHORT_PREAMBLE);
+				tx->sdata->bss_conf.use_short_preamble);
 	}
 
 	return dur;
@@ -605,7 +604,7 @@ ieee80211_tx_h_rate_ctrl(struct ieee80211_txrx_data *tx)
 		tx->u.tx.control->alt_retry_rate = -1;
 
 	if (tx->u.tx.mode->mode == MODE_IEEE80211G &&
-	    (tx->sdata->flags & IEEE80211_SDATA_USE_PROTECTION) &&
+	    tx->sdata->bss_conf.use_cts_prot &&
 	    (tx->flags & IEEE80211_TXRXD_FRAGMENTED) && rsel.nonerp) {
 		tx->u.tx.last_frag_rate = tx->u.tx.rate;
 		if (rsel.probe)
@@ -667,7 +666,7 @@ ieee80211_tx_h_misc(struct ieee80211_txrx_data *tx)
 	if (mode->mode == MODE_IEEE80211G &&
 	    (tx->u.tx.rate->flags & IEEE80211_RATE_ERP) &&
 	    (tx->flags & IEEE80211_TXRXD_TXUNICAST) &&
-	    (tx->sdata->flags & IEEE80211_SDATA_USE_PROTECTION) &&
+	    tx->sdata->bss_conf.use_cts_prot &&
 	    !(control->flags & IEEE80211_TXCTL_USE_RTS_CTS))
 		control->flags |= IEEE80211_TXCTL_USE_CTS_PROTECT;
 
@@ -676,7 +675,7 @@ ieee80211_tx_h_misc(struct ieee80211_txrx_data *tx)
 	 * available on the network at the current point in time. */
 	if (((fc & IEEE80211_FCTL_FTYPE) == IEEE80211_FTYPE_DATA) &&
 	    (tx->u.tx.rate->flags & IEEE80211_RATE_PREAMBLE2) &&
-	    (tx->sdata->flags & IEEE80211_SDATA_SHORT_PREAMBLE) &&
+	    tx->sdata->bss_conf.use_short_preamble &&
 	    (!tx->sta || (tx->sta->flags & WLAN_STA_SHORT_PREAMBLE))) {
 		tx->u.tx.control->tx_rate = tx->u.tx.rate->val2;
 	}
@@ -1754,7 +1753,7 @@ struct sk_buff *ieee80211_beacon_get(struct ieee80211_hw *hw,
 		}
 
 		control->tx_rate =
-			((sdata->flags & IEEE80211_SDATA_SHORT_PREAMBLE) &&
+			(sdata->bss_conf.use_short_preamble &&
 			(rsel.rate->flags & IEEE80211_RATE_PREAMBLE2)) ?
 			rsel.rate->val2 : rsel.rate->val;
 		control->antenna_sel_tx = local->hw.conf.antenna_sel_tx;
diff --git a/net/mac80211/util.c b/net/mac80211/util.c
index ba81cf54281..5e631ce98d7 100644
--- a/net/mac80211/util.c
+++ b/net/mac80211/util.c
@@ -312,8 +312,8 @@ __le16 ieee80211_generic_frame_duration(struct ieee80211_hw *hw,
 	int erp;
 
 	erp = ieee80211_is_erp_rate(hw->conf.phymode, rate);
-	dur = ieee80211_frame_duration(local, frame_len, rate,
-		       erp, sdata->flags & IEEE80211_SDATA_SHORT_PREAMBLE);
+	dur = ieee80211_frame_duration(local, frame_len, rate, erp,
+				       sdata->bss_conf.use_short_preamble);
 
 	return cpu_to_le16(dur);
 }
@@ -326,11 +326,11 @@ __le16 ieee80211_rts_duration(struct ieee80211_hw *hw,
 	struct ieee80211_local *local = hw_to_local(hw);
 	struct ieee80211_rate *rate;
 	struct ieee80211_sub_if_data *sdata = vif_to_sdata(vif);
-	int short_preamble;
+	bool short_preamble;
 	int erp;
 	u16 dur;
 
-	short_preamble = sdata->flags & IEEE80211_SDATA_SHORT_PREAMBLE;
+	short_preamble = sdata->bss_conf.use_short_preamble;
 
 	rate = frame_txctl->rts_rate;
 	erp = !!(rate->flags & IEEE80211_RATE_ERP);
@@ -357,11 +357,11 @@ __le16 ieee80211_ctstoself_duration(struct ieee80211_hw *hw,
 	struct ieee80211_local *local = hw_to_local(hw);
 	struct ieee80211_rate *rate;
 	struct ieee80211_sub_if_data *sdata = vif_to_sdata(vif);
-	int short_preamble;
+	bool short_preamble;
 	int erp;
 	u16 dur;
 
-	short_preamble = sdata->flags & IEEE80211_SDATA_SHORT_PREAMBLE;
+	short_preamble = sdata->bss_conf.use_short_preamble;
 
 	rate = frame_txctl->rts_rate;
 	erp = !!(rate->flags & IEEE80211_RATE_ERP);
-- 
cgit v1.2.3


From 0f7054e32fab251af5cab116da0ef6624a1a0c8b Mon Sep 17 00:00:00 2001
From: Ivo van Doorn <ivdoorn@gmail.com>
Date: Sun, 13 Jan 2008 14:16:47 +0100
Subject: mac80211: Initialize vif pointer

Before calling update_beacon() mac80211 must
initialize the control.vif pointer so it can
be used by the driver to determine which
interface is trying to send the beacon.

v2: ieee80211_beacon_get() should also initialize the
vif pointer since it can be called by mac80211 internally
before calling config_interface().

Signed-off-by: Ivo van Doorn <IvDoorn@gmail.com>
Acked-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/ieee80211_sta.c | 1 +
 net/mac80211/tx.c            | 1 +
 2 files changed, 2 insertions(+)

(limited to 'net')

diff --git a/net/mac80211/ieee80211_sta.c b/net/mac80211/ieee80211_sta.c
index 866eb807812..8b47d81fc71 100644
--- a/net/mac80211/ieee80211_sta.c
+++ b/net/mac80211/ieee80211_sta.c
@@ -2787,6 +2787,7 @@ static int ieee80211_sta_join_ibss(struct net_device *dev,
 			       "for IBSS beacon\n", dev->name);
 			break;
 		}
+		control.vif = &sdata->vif;
 		control.tx_rate =
 			(sdata->bss_conf.use_short_preamble &&
 			(ratesel.rate->flags & IEEE80211_RATE_PREAMBLE2)) ?
diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index f9088fe34d5..67b509edd43 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -1752,6 +1752,7 @@ struct sk_buff *ieee80211_beacon_get(struct ieee80211_hw *hw,
 			goto out;
 		}
 
+		control->vif = vif;
 		control->tx_rate =
 			(sdata->bss_conf.use_short_preamble &&
 			(rsel.rate->flags & IEEE80211_RATE_PREAMBLE2)) ?
-- 
cgit v1.2.3


From 5b3d71d90ec554a2b145db7fcb245e64bf28dda7 Mon Sep 17 00:00:00 2001
From: Ron Rindjunsky <ron.rindjunsky@intel.com>
Date: Sun, 13 Jan 2008 18:21:58 +0200
Subject: mac80211: A-MPDU Rx stop aggregation on proper dev

This patch adds a check to insure that Rx A-MPDU will be stopped only
for the proper device.

Signed-off-by: Ron Rindjunsky <ron.rindjunsky@intel.com>
Acked-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/ieee80211.c | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/ieee80211.c b/net/mac80211/ieee80211.c
index 17704021823..5dcc2d61551 100644
--- a/net/mac80211/ieee80211.c
+++ b/net/mac80211/ieee80211.c
@@ -299,9 +299,11 @@ static int ieee80211_stop(struct net_device *dev)
 	sdata = IEEE80211_DEV_TO_SUB_IF(dev);
 
 	list_for_each_entry(sta, &local->sta_list, list) {
-		for (i = 0; i <  STA_TID_NUM; i++)
-			ieee80211_sta_stop_rx_ba_session(sta->dev, sta->addr,
-						i, WLAN_BACK_RECIPIENT,
+		if (sta->dev == dev)
+			for (i = 0; i <  STA_TID_NUM; i++)
+				ieee80211_sta_stop_rx_ba_session(sta->dev,
+						sta->addr, i,
+						WLAN_BACK_RECIPIENT,
 						WLAN_REASON_QSTA_LEAVE_QBSS);
 	}
 
-- 
cgit v1.2.3


From 66dcb6bdc57a799a16e8d2942b9ab38b8546eb3b Mon Sep 17 00:00:00 2001
From: Ron Rindjunsky <ron.rindjunsky@intel.com>
Date: Sun, 13 Jan 2008 18:25:43 +0200
Subject: mac80211: A-MPDU Rx remove stop_rx_ba_session warning print

This patch removes a warning print from ieee80211_sta_stop_rx_ba_session
in case the tid is inactive when interface goes down.

Signed-off-by: Ron Rindjunsky <ron.rindjunsky@intel.com>
Acked-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/ieee80211_sta.c | 3 ---
 1 file changed, 3 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/ieee80211_sta.c b/net/mac80211/ieee80211_sta.c
index 8b47d81fc71..e7da1cde3ab 100644
--- a/net/mac80211/ieee80211_sta.c
+++ b/net/mac80211/ieee80211_sta.c
@@ -1215,9 +1215,6 @@ void ieee80211_sta_stop_rx_ba_session(struct net_device *dev, u8 *ra, u16 tid,
 	if (sta->ampdu_mlme.tid_rx[tid].state
 				!= HT_AGG_STATE_OPERATIONAL) {
 		spin_unlock_bh(&sta->ampdu_mlme.ampdu_rx);
-		if (net_ratelimit())
-			printk(KERN_DEBUG "rx BA session requested to stop on "
-				"inactive tid %d\n", tid);
 		sta_info_put(sta);
 		return;
 	}
-- 
cgit v1.2.3


From 1402c8519acba215f5a6101b9e5ada07ab371273 Mon Sep 17 00:00:00 2001
From: Stephen Hemminger <shemminger@vyatta.com>
Date: Mon, 21 Jan 2008 17:27:44 -0800
Subject: [VLAN]: sparse warning fix

Minor sparse warning fix.

Signed-off-by: Stephen Hemminger <shemminger@vyatta.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/8021q/vlanproc.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'net')

diff --git a/net/8021q/vlanproc.c b/net/8021q/vlanproc.c
index 2a4e1aabb23..a0ec4792559 100644
--- a/net/8021q/vlanproc.c
+++ b/net/8021q/vlanproc.c
@@ -220,6 +220,7 @@ static inline int is_vlan_dev(struct net_device *dev)
 
 /* start read of /proc/net/vlan/config */
 static void *vlan_seq_start(struct seq_file *seq, loff_t *pos)
+	__acquires(dev_base_lock)
 {
 	struct net_device *dev;
 	loff_t i = 1;
@@ -261,6 +262,7 @@ static void *vlan_seq_next(struct seq_file *seq, void *v, loff_t *pos)
 }
 
 static void vlan_seq_stop(struct seq_file *seq, void *v)
+	__releases(dev_base_lock)
 {
 	read_unlock(&dev_base_lock);
 }
-- 
cgit v1.2.3


From dd329bfa96dd9dabfc3b5154317bf5fbe9440455 Mon Sep 17 00:00:00 2001
From: Stephen Hemminger <shemminger@vyatta.com>
Date: Mon, 21 Jan 2008 17:28:31 -0800
Subject: [IPV4]: igmp sparse warnings

Partial sparse warning fix.  The other conditional locking
is too much for sparse to handle.

Signed-off-by: Stephen Hemminger <shemminger@vyatta.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/igmp.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'net')

diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c
index 285d26218a5..016cfdb184f 100644
--- a/net/ipv4/igmp.c
+++ b/net/ipv4/igmp.c
@@ -2329,6 +2329,7 @@ static struct ip_mc_list *igmp_mc_get_idx(struct seq_file *seq, loff_t pos)
 }
 
 static void *igmp_mc_seq_start(struct seq_file *seq, loff_t *pos)
+	__acquires(dev_base_lock)
 {
 	read_lock(&dev_base_lock);
 	return *pos ? igmp_mc_get_idx(seq, *pos - 1) : SEQ_START_TOKEN;
@@ -2346,6 +2347,7 @@ static void *igmp_mc_seq_next(struct seq_file *seq, void *v, loff_t *pos)
 }
 
 static void igmp_mc_seq_stop(struct seq_file *seq, void *v)
+	__releases(dev_base_lock)
 {
 	struct igmp_mc_iter_state *state = igmp_mc_seq_private(seq);
 	if (likely(state->in_dev != NULL)) {
-- 
cgit v1.2.3


From ba93ef746560df597b19bbcee04ce7ed70ebc700 Mon Sep 17 00:00:00 2001
From: Stephen Hemminger <shemminger@vyatta.com>
Date: Mon, 21 Jan 2008 17:28:59 -0800
Subject: [IPV4]: ipmr sparse warnings

Get rid of some of the sparse warnings.

Signed-off-by: Stephen Hemminger <shemminger@vyatta.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/ipmr.c | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index 772daf77878..4198615ca67 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -141,7 +141,7 @@ struct net_device *ipmr_new_tunnel(struct vifctl *v)
 		p.iph.ihl = 5;
 		p.iph.protocol = IPPROTO_IPIP;
 		sprintf(p.name, "dvmrp%d", v->vifc_vifi);
-		ifr.ifr_ifru.ifru_data = (void*)&p;
+		ifr.ifr_ifru.ifru_data = (__force void __user *)&p;
 
 		oldfs = get_fs(); set_fs(KERNEL_DS);
 		err = dev->do_ioctl(dev, &ifr, SIOCADDTUNNEL);
@@ -954,10 +954,12 @@ int ip_mroute_setsockopt(struct sock *sk,int optname,char __user *optval,int opt
 #ifdef CONFIG_IP_PIMSM
 	case MRT_PIM:
 	{
-		int v, ret;
+		int v;
+
 		if (get_user(v,(int __user *)optval))
 			return -EFAULT;
-		v = (v)?1:0;
+		v = (v) ? 1 : 0;
+
 		rtnl_lock();
 		ret = 0;
 		if (v != mroute_do_pim) {
@@ -1659,6 +1661,7 @@ static struct vif_device *ipmr_vif_seq_idx(struct ipmr_vif_iter *iter,
 }
 
 static void *ipmr_vif_seq_start(struct seq_file *seq, loff_t *pos)
+	__acquires(mrt_lock)
 {
 	read_lock(&mrt_lock);
 	return *pos ? ipmr_vif_seq_idx(seq->private, *pos - 1)
@@ -1682,6 +1685,7 @@ static void *ipmr_vif_seq_next(struct seq_file *seq, void *v, loff_t *pos)
 }
 
 static void ipmr_vif_seq_stop(struct seq_file *seq, void *v)
+	__releases(mrt_lock)
 {
 	read_unlock(&mrt_lock);
 }
-- 
cgit v1.2.3


From da0e28cb68a7e22b47c6ae1a5b12cb538c13c69f Mon Sep 17 00:00:00 2001
From: "Denis V. Lunev" <den@openvz.org>
Date: Mon, 21 Jan 2008 17:31:55 -0800
Subject: [NETNS]: Add netns parameter to fib_lookup.

Signed-off-by: Denis V. Lunev <den@openvz.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/fib_frontend.c  | 4 ++--
 net/ipv4/fib_rules.c     | 4 ++--
 net/ipv4/fib_semantics.c | 2 +-
 net/ipv4/route.c         | 6 +++---
 4 files changed, 8 insertions(+), 8 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index 8987046d97f..e056154076b 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -241,7 +241,7 @@ int fib_validate_source(__be32 src, __be32 dst, u8 tos, int oif,
 	if (in_dev == NULL)
 		goto e_inval;
 
-	if (fib_lookup(&fl, &res))
+	if (fib_lookup(&init_net, &fl, &res))
 		goto last_resort;
 	if (res.type != RTN_UNICAST)
 		goto e_inval_res;
@@ -265,7 +265,7 @@ int fib_validate_source(__be32 src, __be32 dst, u8 tos, int oif,
 	fl.oif = dev->ifindex;
 
 	ret = 0;
-	if (fib_lookup(&fl, &res) == 0) {
+	if (fib_lookup(&init_net, &fl, &res) == 0) {
 		if (res.type == RTN_UNICAST) {
 			*spec_dst = FIB_RES_PREFSRC(res);
 			ret = FIB_RES_NH(res).nh_scope >= RT_SCOPE_HOST;
diff --git a/net/ipv4/fib_rules.c b/net/ipv4/fib_rules.c
index d2001f1c28a..1effb4ab688 100644
--- a/net/ipv4/fib_rules.c
+++ b/net/ipv4/fib_rules.c
@@ -54,14 +54,14 @@ u32 fib_rules_tclass(struct fib_result *res)
 }
 #endif
 
-int fib_lookup(struct flowi *flp, struct fib_result *res)
+int fib_lookup(struct net *net, struct flowi *flp, struct fib_result *res)
 {
 	struct fib_lookup_arg arg = {
 		.result = res,
 	};
 	int err;
 
-	err = fib_rules_lookup(init_net.ipv4.rules_ops, flp, 0, &arg);
+	err = fib_rules_lookup(net->ipv4.rules_ops, flp, 0, &arg);
 	res->r = arg.rule;
 
 	return err;
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index 0e08df4d6f9..ecd91c60975 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -559,7 +559,7 @@ static int fib_check_nh(struct fib_config *cfg, struct fib_info *fi,
 			/* It is not necessary, but requires a bit of thinking */
 			if (fl.fl4_scope < RT_SCOPE_LINK)
 				fl.fl4_scope = RT_SCOPE_LINK;
-			if ((err = fib_lookup(&fl, &res)) != 0)
+			if ((err = fib_lookup(&init_net, &fl, &res)) != 0)
 				return err;
 		}
 		err = -EINVAL;
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index f80c761ea0b..a7651c64bb4 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -1559,7 +1559,7 @@ void ip_rt_get_source(u8 *addr, struct rtable *rt)
 
 	if (rt->fl.iif == 0)
 		src = rt->rt_src;
-	else if (fib_lookup(&rt->fl, &res) == 0) {
+	else if (fib_lookup(&init_net, &rt->fl, &res) == 0) {
 		src = FIB_RES_PREFSRC(res);
 		fib_res_put(&res);
 	} else
@@ -1911,7 +1911,7 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr,
 	/*
 	 *	Now we are ready to route packet.
 	 */
-	if ((err = fib_lookup(&fl, &res)) != 0) {
+	if ((err = fib_lookup(&init_net, &fl, &res)) != 0) {
 		if (!IN_DEV_FORWARD(in_dev))
 			goto e_hostunreach;
 		goto no_route;
@@ -2363,7 +2363,7 @@ static int ip_route_output_slow(struct rtable **rp, const struct flowi *oldflp)
 		goto make_route;
 	}
 
-	if (fib_lookup(&fl, &res)) {
+	if (fib_lookup(&init_net, &fl, &res)) {
 		res.fi = NULL;
 		if (oldflp->oif) {
 			/* Apparently, routing tables are wrong. Assume,
-- 
cgit v1.2.3


From 7fee0ca23711ce1a6b13d3ab78915809a72a59ec Mon Sep 17 00:00:00 2001
From: "Denis V. Lunev" <den@openvz.org>
Date: Mon, 21 Jan 2008 17:32:38 -0800
Subject: [NETNS]: Add netns parameter to inetdev_by_index.

Signed-off-by: Denis V. Lunev <den@openvz.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/devinet.c       | 6 +++---
 net/ipv4/fib_semantics.c | 2 +-
 net/ipv4/igmp.c          | 4 ++--
 net/ipv4/ip_gre.c        | 3 ++-
 4 files changed, 8 insertions(+), 7 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index e381edb19b2..21f71bf912d 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -409,12 +409,12 @@ static int inet_set_ifa(struct net_device *dev, struct in_ifaddr *ifa)
 	return inet_insert_ifa(ifa);
 }
 
-struct in_device *inetdev_by_index(int ifindex)
+struct in_device *inetdev_by_index(struct net *net, int ifindex)
 {
 	struct net_device *dev;
 	struct in_device *in_dev = NULL;
 	read_lock(&dev_base_lock);
-	dev = __dev_get_by_index(&init_net, ifindex);
+	dev = __dev_get_by_index(net, ifindex);
 	if (dev)
 		in_dev = in_dev_get(dev);
 	read_unlock(&dev_base_lock);
@@ -454,7 +454,7 @@ static int inet_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg
 		goto errout;
 
 	ifm = nlmsg_data(nlh);
-	in_dev = inetdev_by_index(ifm->ifa_index);
+	in_dev = inetdev_by_index(net, ifm->ifa_index);
 	if (in_dev == NULL) {
 		err = -ENODEV;
 		goto errout;
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index ecd91c60975..8b47e112ae5 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -583,7 +583,7 @@ out:
 		if (nh->nh_flags&(RTNH_F_PERVASIVE|RTNH_F_ONLINK))
 			return -EINVAL;
 
-		in_dev = inetdev_by_index(nh->nh_oif);
+		in_dev = inetdev_by_index(&init_net, nh->nh_oif);
 		if (in_dev == NULL)
 			return -ENODEV;
 		if (!(in_dev->dev->flags&IFF_UP)) {
diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c
index 016cfdb184f..928bc328455 100644
--- a/net/ipv4/igmp.c
+++ b/net/ipv4/igmp.c
@@ -1389,7 +1389,7 @@ static struct in_device * ip_mc_find_dev(struct ip_mreqn *imr)
 	struct in_device *idev = NULL;
 
 	if (imr->imr_ifindex) {
-		idev = inetdev_by_index(imr->imr_ifindex);
+		idev = inetdev_by_index(&init_net, imr->imr_ifindex);
 		if (idev)
 			__in_dev_put(idev);
 		return idev;
@@ -2222,7 +2222,7 @@ void ip_mc_drop_socket(struct sock *sk)
 		struct in_device *in_dev;
 		inet->mc_list = iml->next;
 
-		in_dev = inetdev_by_index(iml->multi.imr_ifindex);
+		in_dev = inetdev_by_index(&init_net, iml->multi.imr_ifindex);
 		(void) ip_mc_leave_src(sk, iml, in_dev);
 		if (in_dev != NULL) {
 			ip_mc_dec_group(in_dev, iml->multi.imr_multiaddr.s_addr);
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index 8b81deb8ff1..a74983d8c89 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -1193,7 +1193,8 @@ static int ipgre_close(struct net_device *dev)
 {
 	struct ip_tunnel *t = netdev_priv(dev);
 	if (ipv4_is_multicast(t->parms.iph.daddr) && t->mlink) {
-		struct in_device *in_dev = inetdev_by_index(t->mlink);
+		struct in_device *in_dev;
+		in_dev = inetdev_by_index(dev->nd_net, t->mlink);
 		if (in_dev) {
 			ip_mc_dec_group(in_dev, t->parms.iph.daddr);
 			in_dev_put(in_dev);
-- 
cgit v1.2.3


From 5b707aaae4ca7b7204eb4a472721c84866d85f0f Mon Sep 17 00:00:00 2001
From: "Denis V. Lunev" <den@openvz.org>
Date: Mon, 21 Jan 2008 17:33:15 -0800
Subject: [NETNS]: Pass correct namespace in fib_validate_source.

Correct network namespace is available inside fib_validate_source. It
can be obtained from the device passed in. The device is not NULL as
in_device is obtained from it just above.

Signed-off-by: Denis V. Lunev <den@openvz.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/fib_frontend.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index e056154076b..6761639dd0e 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -228,6 +228,7 @@ int fib_validate_source(__be32 src, __be32 dst, u8 tos, int oif,
 	struct fib_result res;
 	int no_addr, rpf;
 	int ret;
+	struct net *net;
 
 	no_addr = rpf = 0;
 	rcu_read_lock();
@@ -241,7 +242,8 @@ int fib_validate_source(__be32 src, __be32 dst, u8 tos, int oif,
 	if (in_dev == NULL)
 		goto e_inval;
 
-	if (fib_lookup(&init_net, &fl, &res))
+	net = dev->nd_net;
+	if (fib_lookup(net, &fl, &res))
 		goto last_resort;
 	if (res.type != RTN_UNICAST)
 		goto e_inval_res;
@@ -265,7 +267,7 @@ int fib_validate_source(__be32 src, __be32 dst, u8 tos, int oif,
 	fl.oif = dev->ifindex;
 
 	ret = 0;
-	if (fib_lookup(&init_net, &fl, &res) == 0) {
+	if (fib_lookup(net, &fl, &res) == 0) {
 		if (res.type == RTN_UNICAST) {
 			*spec_dst = FIB_RES_PREFSRC(res);
 			ret = FIB_RES_NH(res).nh_scope >= RT_SCOPE_HOST;
-- 
cgit v1.2.3


From 86167a377f1c4fb40742302ae7682dd574abde86 Mon Sep 17 00:00:00 2001
From: "Denis V. Lunev" <den@openvz.org>
Date: Mon, 21 Jan 2008 17:34:00 -0800
Subject: [NETNS]: Pass correct namespace in context fib_check_nh.

Correct network namespace is already used in fib_check_nh. Re-work its
usage for better readability and pass into fib_lookup &
inetdev_by_index.

Signed-off-by: Denis V. Lunev <den@openvz.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/fib_semantics.c | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index 8b47e112ae5..c7912866d98 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -519,7 +519,9 @@ static int fib_check_nh(struct fib_config *cfg, struct fib_info *fi,
 			struct fib_nh *nh)
 {
 	int err;
+	struct net *net;
 
+	net = cfg->fc_nlinfo.nl_net;
 	if (nh->nh_gw) {
 		struct fib_result res;
 
@@ -532,11 +534,9 @@ static int fib_check_nh(struct fib_config *cfg, struct fib_info *fi,
 
 			if (cfg->fc_scope >= RT_SCOPE_LINK)
 				return -EINVAL;
-			if (inet_addr_type(cfg->fc_nlinfo.nl_net,
-					   nh->nh_gw) != RTN_UNICAST)
+			if (inet_addr_type(net, nh->nh_gw) != RTN_UNICAST)
 				return -EINVAL;
-			if ((dev = __dev_get_by_index(cfg->fc_nlinfo.nl_net,
-						      nh->nh_oif)) == NULL)
+			if ((dev = __dev_get_by_index(net, nh->nh_oif)) == NULL)
 				return -ENODEV;
 			if (!(dev->flags&IFF_UP))
 				return -ENETDOWN;
@@ -559,7 +559,7 @@ static int fib_check_nh(struct fib_config *cfg, struct fib_info *fi,
 			/* It is not necessary, but requires a bit of thinking */
 			if (fl.fl4_scope < RT_SCOPE_LINK)
 				fl.fl4_scope = RT_SCOPE_LINK;
-			if ((err = fib_lookup(&init_net, &fl, &res)) != 0)
+			if ((err = fib_lookup(net, &fl, &res)) != 0)
 				return err;
 		}
 		err = -EINVAL;
@@ -583,7 +583,7 @@ out:
 		if (nh->nh_flags&(RTNH_F_PERVASIVE|RTNH_F_ONLINK))
 			return -EINVAL;
 
-		in_dev = inetdev_by_index(&init_net, nh->nh_oif);
+		in_dev = inetdev_by_index(net, nh->nh_oif);
 		if (in_dev == NULL)
 			return -ENODEV;
 		if (!(in_dev->dev->flags&IFF_UP)) {
-- 
cgit v1.2.3


From 84a885f44961c17a91cae9a9c03e4b3dae5d8d94 Mon Sep 17 00:00:00 2001
From: "Denis V. Lunev" <den@openvz.org>
Date: Mon, 21 Jan 2008 17:34:35 -0800
Subject: [NETNS]: Pass correct namespace in ip_route_input_slow.

The packet on the input path always has a referrence to an input
network device it is passed from. Extract network namespace from it.

Signed-off-by: Denis V. Lunev <den@openvz.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/route.c | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index a7651c64bb4..f988ae39c4c 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -1881,6 +1881,7 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr,
 	__be32		spec_dst;
 	int		err = -EINVAL;
 	int		free_res = 0;
+	struct net    * net = dev->nd_net;
 
 	/* IP on this device is disabled. */
 
@@ -1911,7 +1912,7 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr,
 	/*
 	 *	Now we are ready to route packet.
 	 */
-	if ((err = fib_lookup(&init_net, &fl, &res)) != 0) {
+	if ((err = fib_lookup(net, &fl, &res)) != 0) {
 		if (!IN_DEV_FORWARD(in_dev))
 			goto e_hostunreach;
 		goto no_route;
@@ -1926,7 +1927,7 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr,
 	if (res.type == RTN_LOCAL) {
 		int result;
 		result = fib_validate_source(saddr, daddr, tos,
-					     init_net.loopback_dev->ifindex,
+					     net->loopback_dev->ifindex,
 					     dev, &spec_dst, &itag);
 		if (result < 0)
 			goto martian_source;
@@ -1988,7 +1989,7 @@ local_input:
 #endif
 	rth->rt_iif	=
 	rth->fl.iif	= dev->ifindex;
-	rth->u.dst.dev	= init_net.loopback_dev;
+	rth->u.dst.dev	= net->loopback_dev;
 	dev_hold(rth->u.dst.dev);
 	rth->idev	= in_dev_get(rth->u.dst.dev);
 	rth->rt_gateway	= daddr;
-- 
cgit v1.2.3


From ecfdc8c5424079393a9b05e8a5aaf5c6873029f6 Mon Sep 17 00:00:00 2001
From: "Denis V. Lunev" <den@openvz.org>
Date: Mon, 21 Jan 2008 17:35:06 -0800
Subject: [NETNS]: Pass correct namespace in ip_rt_get_source.

ip_rt_get_source is the infamous place for which dst_ifdown kludges
have been implemented. This means that rt->u.dst.dev can be safely
dereferrenced obtain nd_net.

Signed-off-by: Denis V. Lunev <den@openvz.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/route.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index f988ae39c4c..27e0f81060a 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -1559,7 +1559,7 @@ void ip_rt_get_source(u8 *addr, struct rtable *rt)
 
 	if (rt->fl.iif == 0)
 		src = rt->rt_src;
-	else if (fib_lookup(&init_net, &rt->fl, &res) == 0) {
+	else if (fib_lookup(rt->u.dst.dev->nd_net, &rt->fl, &res) == 0) {
 		src = FIB_RES_PREFSRC(res);
 		fib_res_put(&res);
 	} else
-- 
cgit v1.2.3


From fc80be87dca8968fa087aae81544ba3f86afdd50 Mon Sep 17 00:00:00 2001
From: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
Date: Tue, 22 Jan 2008 17:05:31 +0900
Subject: [IPV4] UDP,UDPLITE: Sparse: {__udp4_lib,udp,udplite}_err() are of
 void.

Fix following sparse warnings:
| net/ipv4/udp.c:421:2: warning: returning void-valued expression
| net/ipv4/udplite.c:38:2: warning: returning void-valued expression

Signed-off-by: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
---
 net/ipv4/udp.c     | 2 +-
 net/ipv4/udplite.c | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index cb2411cb87d..ecd9d91b30c 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -418,7 +418,7 @@ out:
 
 void udp_err(struct sk_buff *skb, u32 info)
 {
-	return __udp4_lib_err(skb, info, udp_hash);
+	__udp4_lib_err(skb, info, udp_hash);
 }
 
 /*
diff --git a/net/ipv4/udplite.c b/net/ipv4/udplite.c
index f5baeb3e8b8..001b881ca36 100644
--- a/net/ipv4/udplite.c
+++ b/net/ipv4/udplite.c
@@ -35,7 +35,7 @@ static int udplite_rcv(struct sk_buff *skb)
 
 static void udplite_err(struct sk_buff *skb, u32 info)
 {
-	return __udp4_lib_err(skb, info, udplite_hash);
+	__udp4_lib_err(skb, info, udplite_hash);
 }
 
 static	struct net_protocol udplite_protocol = {
-- 
cgit v1.2.3


From 77d0d350e96c9453be255d8eff8dc97555710b17 Mon Sep 17 00:00:00 2001
From: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
Date: Tue, 22 Jan 2008 17:09:55 +0900
Subject: [IPV6] UDP,UDPLITE: Sparse: {__udp6_lib,udp,udplite}_err() are of
 void.

Fix following sparse warnings:
| net/ipv6/udp.c:262:2: warning: returning void-valued expression
| net/ipv6/udplite.c:29:2: warning: returning void-valued expression

Signed-off-by: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
---
 net/ipv6/udp.c     | 2 +-
 net/ipv6/udplite.c | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index bf58acab206..bd4b9df8f61 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -259,7 +259,7 @@ static __inline__ void udpv6_err(struct sk_buff *skb,
 				 struct inet6_skb_parm *opt, int type,
 				 int code, int offset, __be32 info     )
 {
-	return __udp6_lib_err(skb, opt, type, code, offset, info, udp_hash);
+	__udp6_lib_err(skb, opt, type, code, offset, info, udp_hash);
 }
 
 int udpv6_queue_rcv_skb(struct sock * sk, struct sk_buff *skb)
diff --git a/net/ipv6/udplite.c b/net/ipv6/udplite.c
index 39f070518e6..87d4202522e 100644
--- a/net/ipv6/udplite.c
+++ b/net/ipv6/udplite.c
@@ -26,7 +26,7 @@ static void udplitev6_err(struct sk_buff *skb,
 			  struct inet6_skb_parm *opt,
 			  int type, int code, int offset, __be32 info)
 {
-	return __udp6_lib_err(skb, opt, type, code, offset, info, udplite_hash);
+	__udp6_lib_err(skb, opt, type, code, offset, info, udplite_hash);
 }
 
 static struct inet6_protocol udplitev6_protocol = {
-- 
cgit v1.2.3


From 5e8b9df6e8786e4d5ee5ac951240cb2eaaac3014 Mon Sep 17 00:00:00 2001
From: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
Date: Tue, 22 Jan 2008 17:25:46 +0900
Subject: [IPV6] UDPLITE: Sparse: Declare non-static symbols in header.

Fix the following sparse warnings:
| net/ipv6/udplite.c:45:14: warning: symbol 'udplitev6_prot' was not declared. Should it be static?
| net/ipv6/udplite.c:80:12: warning: symbol 'udplitev6_init' was not declared. Should it be static?
| net/ipv6/udplite.c:99:6: warning: symbol 'udplitev6_exit' was not declared. Should it be static?

Signed-off-by: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
---
 net/ipv6/udp_impl.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'net')

diff --git a/net/ipv6/udp_impl.h b/net/ipv6/udp_impl.h
index 2d3fda60123..21be3a83e7b 100644
--- a/net/ipv6/udp_impl.h
+++ b/net/ipv6/udp_impl.h
@@ -5,6 +5,7 @@
 #include <net/protocol.h>
 #include <net/addrconf.h>
 #include <net/inet_common.h>
+#include <net/transp_v6.h>
 
 extern int  	__udp6_lib_rcv(struct sk_buff *, struct hlist_head [], int );
 extern void 	__udp6_lib_err(struct sk_buff *, struct inet6_skb_parm *,
-- 
cgit v1.2.3


From 40fee36e11b49f92bc7c385bd45d7805c0127a34 Mon Sep 17 00:00:00 2001
From: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
Date: Tue, 22 Jan 2008 17:12:50 +0900
Subject: [IPV6] ADDRLABEL: Sparse: Make several functions static.

Fix following sparse warnings:
| net/ipv6/addrlabel.c:172:25: warning: symbol 'ip6addrlbl_alloc' was not declared. Should it be static?
| net/ipv6/addrlabel.c:219:5: warning: symbol '__ip6addrlbl_add' was not declared. Should it be static?
| net/ipv6/addrlabel.c:260:5: warning: symbol 'ip6addrlbl_add' was not declared. Should it be static?
| net/ipv6/addrlabel.c:285:5: warning: symbol '__ip6addrlbl_del' was not declared. Should it be static?
| net/ipv6/addrlabel.c:311:5: warning: symbol 'ip6addrlbl_del' was not declared. Should it be static?

Signed-off-by: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
---
 net/ipv6/addrlabel.c | 20 ++++++++++----------
 1 file changed, 10 insertions(+), 10 deletions(-)

(limited to 'net')

diff --git a/net/ipv6/addrlabel.c b/net/ipv6/addrlabel.c
index 6f1ca607edd..38674121ae5 100644
--- a/net/ipv6/addrlabel.c
+++ b/net/ipv6/addrlabel.c
@@ -169,9 +169,9 @@ u32 ipv6_addr_label(const struct in6_addr *addr, int type, int ifindex)
 }
 
 /* allocate one entry */
-struct ip6addrlbl_entry *ip6addrlbl_alloc(const struct in6_addr *prefix,
-					  int prefixlen, int ifindex,
-					  u32 label)
+static struct ip6addrlbl_entry *ip6addrlbl_alloc(const struct in6_addr *prefix,
+						 int prefixlen, int ifindex,
+						 u32 label)
 {
 	struct ip6addrlbl_entry *newp;
 	int addrtype;
@@ -216,7 +216,7 @@ struct ip6addrlbl_entry *ip6addrlbl_alloc(const struct in6_addr *prefix,
 }
 
 /* add a label */
-int __ip6addrlbl_add(struct ip6addrlbl_entry *newp, int replace)
+static int __ip6addrlbl_add(struct ip6addrlbl_entry *newp, int replace)
 {
 	int ret = 0;
 
@@ -257,8 +257,8 @@ out:
 }
 
 /* add a label */
-int ip6addrlbl_add(const struct in6_addr *prefix, int prefixlen,
-		       int ifindex, u32 label, int replace)
+static int ip6addrlbl_add(const struct in6_addr *prefix, int prefixlen,
+			  int ifindex, u32 label, int replace)
 {
 	struct ip6addrlbl_entry *newp;
 	int ret = 0;
@@ -282,8 +282,8 @@ int ip6addrlbl_add(const struct in6_addr *prefix, int prefixlen,
 }
 
 /* remove a label */
-int __ip6addrlbl_del(const struct in6_addr *prefix, int prefixlen,
-			  int ifindex)
+static int __ip6addrlbl_del(const struct in6_addr *prefix, int prefixlen,
+			    int ifindex)
 {
 	struct ip6addrlbl_entry *p = NULL;
 	struct hlist_node *pos, *n;
@@ -308,8 +308,8 @@ int __ip6addrlbl_del(const struct in6_addr *prefix, int prefixlen,
 	return ret;
 }
 
-int ip6addrlbl_del(const struct in6_addr *prefix, int prefixlen,
-		       int ifindex)
+static int ip6addrlbl_del(const struct in6_addr *prefix, int prefixlen,
+			  int ifindex)
 {
 	struct in6_addr prefix_buf;
 	int ret;
-- 
cgit v1.2.3


From 2334ecbdb27bd1745c0fc6d05cce09ed9585e4c1 Mon Sep 17 00:00:00 2001
From: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
Date: Tue, 22 Jan 2008 17:18:38 +0900
Subject: [IPV6]: Sparse: Declare non-static
 ipv6_{route,icmp,frag}_sysctl_init() in header.

Fix the following sparse warnings:
| net/ipv6/route.c:2491:18: warning: symbol 'ipv6_route_sysctl_init' was not declared. Should it be static?
| net/ipv6/icmp.c:922:18: warning: symbol 'ipv6_icmp_sysctl_init' was not declared. Should it be static?
| net/ipv6/reassembly.c:628:6: warning: symbol 'ipv6_frag_sysctl_init' was not declared. Should it be static?

Signed-off-by: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
---
 net/ipv6/af_inet6.c        | 2 --
 net/ipv6/sysctl_net_ipv6.c | 3 ---
 2 files changed, 5 deletions(-)

(limited to 'net')

diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index 3150c4be3c0..6738a7b0e67 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -72,8 +72,6 @@ MODULE_LICENSE("GPL");
 static struct list_head inetsw6[SOCK_MAX];
 static DEFINE_SPINLOCK(inetsw6_lock);
 
-void ipv6_frag_sysctl_init(struct net *net);
-
 static __inline__ struct ipv6_pinfo *inet6_sk_generic(struct sock *sk)
 {
 	const int offset = sk->sk_prot->obj_size - sizeof(struct ipv6_pinfo);
diff --git a/net/ipv6/sysctl_net_ipv6.c b/net/ipv6/sysctl_net_ipv6.c
index 5e0af4d4632..7197eb74a75 100644
--- a/net/ipv6/sysctl_net_ipv6.c
+++ b/net/ipv6/sysctl_net_ipv6.c
@@ -14,9 +14,6 @@
 #include <net/addrconf.h>
 #include <net/inet_frag.h>
 
-extern struct ctl_table *ipv6_route_sysctl_init(struct net *net);
-extern struct ctl_table *ipv6_icmp_sysctl_init(struct net *net);
-
 static ctl_table ipv6_table_template[] = {
 	{
 		.ctl_name	= NET_IPV6_ROUTE,
-- 
cgit v1.2.3


From 5d5619b40c2474de01c64bdf6bb9f1211d3e967a Mon Sep 17 00:00:00 2001
From: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
Date: Tue, 22 Jan 2008 17:29:40 +0900
Subject: [IPV6] ADDRCONF: Sparse: Make inet6_dump_addr() code paths more
 straight-forward.

Fix the following sparse warning:
| net/ipv6/addrconf.c:3384:2: warning: context imbalance in 'inet6_dump_addr' - different lock contexts for basic block

Signed-off-by: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
---
 net/ipv6/addrconf.c | 38 ++++++++++++++++++--------------------
 1 file changed, 18 insertions(+), 20 deletions(-)

(limited to 'net')

diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index aba7b5d52a9..e40213db9e4 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -3335,11 +3335,11 @@ static int inet6_dump_addr(struct sk_buff *skb, struct netlink_callback *cb,
 			     ifa = ifa->if_next, ip_idx++) {
 				if (ip_idx < s_ip_idx)
 					continue;
-				if ((err = inet6_fill_ifaddr(skb, ifa,
-				    NETLINK_CB(cb->skb).pid,
-				    cb->nlh->nlmsg_seq, RTM_NEWADDR,
-				    NLM_F_MULTI)) <= 0)
-					goto done;
+				err = inet6_fill_ifaddr(skb, ifa,
+							NETLINK_CB(cb->skb).pid,
+							cb->nlh->nlmsg_seq,
+							RTM_NEWADDR,
+							NLM_F_MULTI);
 			}
 			break;
 		case MULTICAST_ADDR:
@@ -3348,11 +3348,11 @@ static int inet6_dump_addr(struct sk_buff *skb, struct netlink_callback *cb,
 			     ifmca = ifmca->next, ip_idx++) {
 				if (ip_idx < s_ip_idx)
 					continue;
-				if ((err = inet6_fill_ifmcaddr(skb, ifmca,
-				    NETLINK_CB(cb->skb).pid,
-				    cb->nlh->nlmsg_seq, RTM_GETMULTICAST,
-				    NLM_F_MULTI)) <= 0)
-					goto done;
+				err = inet6_fill_ifmcaddr(skb, ifmca,
+							  NETLINK_CB(cb->skb).pid,
+							  cb->nlh->nlmsg_seq,
+							  RTM_GETMULTICAST,
+							  NLM_F_MULTI);
 			}
 			break;
 		case ANYCAST_ADDR:
@@ -3361,11 +3361,11 @@ static int inet6_dump_addr(struct sk_buff *skb, struct netlink_callback *cb,
 			     ifaca = ifaca->aca_next, ip_idx++) {
 				if (ip_idx < s_ip_idx)
 					continue;
-				if ((err = inet6_fill_ifacaddr(skb, ifaca,
-				    NETLINK_CB(cb->skb).pid,
-				    cb->nlh->nlmsg_seq, RTM_GETANYCAST,
-				    NLM_F_MULTI)) <= 0)
-					goto done;
+				err = inet6_fill_ifacaddr(skb, ifaca,
+							  NETLINK_CB(cb->skb).pid,
+							  cb->nlh->nlmsg_seq,
+							  RTM_GETANYCAST,
+							  NLM_F_MULTI);
 			}
 			break;
 		default:
@@ -3373,14 +3373,12 @@ static int inet6_dump_addr(struct sk_buff *skb, struct netlink_callback *cb,
 		}
 		read_unlock_bh(&idev->lock);
 		in6_dev_put(idev);
+
+		if (err <= 0)
+			break;
 cont:
 		idx++;
 	}
-done:
-	if (err <= 0) {
-		read_unlock_bh(&idev->lock);
-		in6_dev_put(idev);
-	}
 	cb->args[0] = idx;
 	cb->args[1] = ip_idx;
 	return skb->len;
-- 
cgit v1.2.3


From 61cf46ad581ba43073d3bcb0be549eb60fbbf9f8 Mon Sep 17 00:00:00 2001
From: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
Date: Tue, 22 Jan 2008 17:32:53 +0900
Subject: [IPV6] NDISC: Sparse: Use different variable name for local use.

Fix the following sparse warnings:
| net/ipv6/ndisc.c:1300:21: warning: symbol 'opt' shadows an earlier one
| net/ipv6/ndisc.c:1078:7: originally declared here

Signed-off-by: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
---
 net/ipv6/ndisc.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

(limited to 'net')

diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c
index bdfc4ea6194..0d33a7d3212 100644
--- a/net/ipv6/ndisc.c
+++ b/net/ipv6/ndisc.c
@@ -1297,11 +1297,11 @@ skip_defrtr:
 	}
 
 	if (ndopts.nd_useropts) {
-		struct nd_opt_hdr *opt;
-		for (opt = ndopts.nd_useropts;
-		     opt;
-		     opt = ndisc_next_useropt(opt, ndopts.nd_useropts_end)) {
-				ndisc_ra_useropt(skb, opt);
+		struct nd_opt_hdr *p;
+		for (p = ndopts.nd_useropts;
+		     p;
+		     p = ndisc_next_useropt(p, ndopts.nd_useropts_end)) {
+			ndisc_ra_useropt(skb, p);
 		}
 	}
 
-- 
cgit v1.2.3


From 8d8354d2fb9277f165715a6e1cb92bcc89259975 Mon Sep 17 00:00:00 2001
From: Pavel Emelyanov <xemul@openvz.org>
Date: Tue, 22 Jan 2008 05:58:31 -0800
Subject: [NETNS][FRAGS]: Move ctl tables around.

This is a preparation for sysctl netns-ization.
Move the ctl tables to the files, where the tuning
variables reside. Plus make the helpers to register
the tables.

This will simplify the later patches and will keep
similar things closer to each other.

ipv4, ipv6 and conntrack_reasm are patched differently,
but the result is all the tables are in appropriate files.

Signed-off-by: Pavel Emelyanov <xemul@openvz.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/ip_fragment.c                         | 74 +++++++++++++++++++++++++-
 net/ipv4/sysctl_net_ipv4.c                     | 42 ---------------
 net/ipv6/af_inet6.c                            |  5 --
 net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c | 29 ----------
 net/ipv6/netfilter/nf_conntrack_reasm.c        | 31 ++++++++++-
 net/ipv6/reassembly.c                          | 66 +++++++++++++++++++++--
 net/ipv6/sysctl_net_ipv6.c                     | 40 +-------------
 7 files changed, 166 insertions(+), 121 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c
index 2143bf30597..a53463e594b 100644
--- a/net/ipv4/ip_fragment.c
+++ b/net/ipv4/ip_fragment.c
@@ -50,7 +50,7 @@
  * as well. Or notify me, at least. --ANK
  */
 
-int sysctl_ipfrag_max_dist __read_mostly = 64;
+static int sysctl_ipfrag_max_dist __read_mostly = 64;
 
 struct ipfrag_skb_cb
 {
@@ -74,7 +74,7 @@ struct ipq {
 	struct inet_peer *peer;
 };
 
-struct inet_frags_ctl ip4_frags_ctl __read_mostly = {
+static struct inet_frags_ctl ip4_frags_ctl __read_mostly = {
 	/*
 	 * Fragment cache limits. We will commit 256K at one time. Should we
 	 * cross that limit we will prune down to 192K. This should cope with
@@ -607,8 +607,78 @@ int ip_defrag(struct sk_buff *skb, u32 user)
 	return -ENOMEM;
 }
 
+#ifdef CONFIG_SYSCTL
+static int zero;
+
+static struct ctl_table ip4_frags_ctl_table[] = {
+	{
+		.ctl_name	= NET_IPV4_IPFRAG_HIGH_THRESH,
+		.procname	= "ipfrag_high_thresh",
+		.data		= &ip4_frags_ctl.high_thresh,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec
+	},
+	{
+		.ctl_name	= NET_IPV4_IPFRAG_LOW_THRESH,
+		.procname	= "ipfrag_low_thresh",
+		.data		= &ip4_frags_ctl.low_thresh,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec
+	},
+	{
+		.ctl_name	= NET_IPV4_IPFRAG_TIME,
+		.procname	= "ipfrag_time",
+		.data		= &ip4_frags_ctl.timeout,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec_jiffies,
+		.strategy	= &sysctl_jiffies
+	},
+	{
+		.ctl_name	= NET_IPV4_IPFRAG_SECRET_INTERVAL,
+		.procname	= "ipfrag_secret_interval",
+		.data		= &ip4_frags_ctl.secret_interval,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec_jiffies,
+		.strategy	= &sysctl_jiffies
+	},
+	{
+		.procname	= "ipfrag_max_dist",
+		.data		= &sysctl_ipfrag_max_dist,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec_minmax,
+		.extra1		= &zero
+	},
+	{ }
+};
+
+static int ip4_frags_ctl_register(struct net *net)
+{
+	struct ctl_table_header *hdr;
+
+	hdr = register_net_sysctl_table(net, net_ipv4_ctl_path,
+			ip4_frags_ctl_table);
+	return hdr == NULL ? -ENOMEM : 0;
+}
+#else
+static inline int ip4_frags_ctl_register(struct net *net)
+{
+	return 0;
+}
+#endif
+
+static int ipv4_frags_init_net(struct net *net)
+{
+	return ip4_frags_ctl_register(net);
+}
+
 void __init ipfrag_init(void)
 {
+	ipv4_frags_init_net(&init_net);
 	ip4_frags.ctl = &ip4_frags_ctl;
 	ip4_frags.hashfn = ip4_hashfn;
 	ip4_frags.constructor = ip4_frag_init;
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index 45536a91266..82cdf23837e 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -283,22 +283,6 @@ static struct ctl_table ipv4_table[] = {
 		.mode		= 0644,
 		.proc_handler	= &proc_dointvec
 	},
-	{
-		.ctl_name	= NET_IPV4_IPFRAG_HIGH_THRESH,
-		.procname	= "ipfrag_high_thresh",
-		.data		= &ip4_frags_ctl.high_thresh,
-		.maxlen		= sizeof(int),
-		.mode		= 0644,
-		.proc_handler	= &proc_dointvec
-	},
-	{
-		.ctl_name	= NET_IPV4_IPFRAG_LOW_THRESH,
-		.procname	= "ipfrag_low_thresh",
-		.data		= &ip4_frags_ctl.low_thresh,
-		.maxlen		= sizeof(int),
-		.mode		= 0644,
-		.proc_handler	= &proc_dointvec
-	},
 	{
 		.ctl_name	= NET_IPV4_DYNADDR,
 		.procname	= "ip_dynaddr",
@@ -307,15 +291,6 @@ static struct ctl_table ipv4_table[] = {
 		.mode		= 0644,
 		.proc_handler	= &proc_dointvec
 	},
-	{
-		.ctl_name	= NET_IPV4_IPFRAG_TIME,
-		.procname	= "ipfrag_time",
-		.data		= &ip4_frags_ctl.timeout,
-		.maxlen		= sizeof(int),
-		.mode		= 0644,
-		.proc_handler	= &proc_dointvec_jiffies,
-		.strategy	= &sysctl_jiffies
-	},
 	{
 		.ctl_name	= NET_IPV4_TCP_KEEPALIVE_TIME,
 		.procname	= "tcp_keepalive_time",
@@ -658,23 +633,6 @@ static struct ctl_table ipv4_table[] = {
 		.mode		= 0644,
 		.proc_handler	= &proc_dointvec
 	},
-	{
-		.ctl_name	= NET_IPV4_IPFRAG_SECRET_INTERVAL,
-		.procname	= "ipfrag_secret_interval",
-		.data		= &ip4_frags_ctl.secret_interval,
-		.maxlen		= sizeof(int),
-		.mode		= 0644,
-		.proc_handler	= &proc_dointvec_jiffies,
-		.strategy	= &sysctl_jiffies
-	},
-	{
-		.procname	= "ipfrag_max_dist",
-		.data		= &sysctl_ipfrag_max_dist,
-		.maxlen		= sizeof(int),
-		.mode		= 0644,
-		.proc_handler	= &proc_dointvec_minmax,
-		.extra1		= &zero
-	},
 	{
 		.ctl_name	= NET_TCP_NO_METRICS_SAVE,
 		.procname	= "tcp_no_metrics_save",
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index 6738a7b0e67..bddac0e8780 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -721,10 +721,6 @@ static void cleanup_ipv6_mibs(void)
 static int inet6_net_init(struct net *net)
 {
 	net->ipv6.sysctl.bindv6only = 0;
-	net->ipv6.sysctl.frags.high_thresh = 256 * 1024;
-	net->ipv6.sysctl.frags.low_thresh = 192 * 1024;
-	net->ipv6.sysctl.frags.timeout = IPV6_FRAG_TIMEOUT;
-	net->ipv6.sysctl.frags.secret_interval = 10 * 60 * HZ;
 	net->ipv6.sysctl.flush_delay = 0;
 	net->ipv6.sysctl.ip6_rt_max_size = 4096;
 	net->ipv6.sysctl.ip6_rt_gc_min_interval = HZ / 2;
@@ -734,7 +730,6 @@ static int inet6_net_init(struct net *net)
 	net->ipv6.sysctl.ip6_rt_mtu_expires = 10*60*HZ;
 	net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
 	net->ipv6.sysctl.icmpv6_time = 1*HZ;
-	ipv6_frag_sysctl_init(net);
 
 	return 0;
 }
diff --git a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
index cf42f5cfc33..2d7b0246475 100644
--- a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
+++ b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
@@ -297,35 +297,6 @@ static struct nf_hook_ops ipv6_conntrack_ops[] __read_mostly = {
 	},
 };
 
-#ifdef CONFIG_SYSCTL
-static ctl_table nf_ct_ipv6_sysctl_table[] = {
-	{
-		.procname	= "nf_conntrack_frag6_timeout",
-		.data		= &nf_frags_ctl.timeout,
-		.maxlen		= sizeof(unsigned int),
-		.mode		= 0644,
-		.proc_handler	= &proc_dointvec_jiffies,
-	},
-	{
-		.ctl_name	= NET_NF_CONNTRACK_FRAG6_LOW_THRESH,
-		.procname	= "nf_conntrack_frag6_low_thresh",
-		.data		= &nf_frags_ctl.low_thresh,
-		.maxlen		= sizeof(unsigned int),
-		.mode		= 0644,
-		.proc_handler	= &proc_dointvec,
-	},
-	{
-		.ctl_name	= NET_NF_CONNTRACK_FRAG6_HIGH_THRESH,
-		.procname	= "nf_conntrack_frag6_high_thresh",
-		.data		= &nf_frags_ctl.high_thresh,
-		.maxlen		= sizeof(unsigned int),
-		.mode		= 0644,
-		.proc_handler	= &proc_dointvec,
-	},
-	{ .ctl_name = 0 }
-};
-#endif
-
 #if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE)
 
 #include <linux/netfilter/nfnetlink.h>
diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c
index e170c67c47a..d631631189b 100644
--- a/net/ipv6/netfilter/nf_conntrack_reasm.c
+++ b/net/ipv6/netfilter/nf_conntrack_reasm.c
@@ -70,7 +70,7 @@ struct nf_ct_frag6_queue
 	__u16			nhoffset;
 };
 
-struct inet_frags_ctl nf_frags_ctl __read_mostly = {
+static struct inet_frags_ctl nf_frags_ctl __read_mostly = {
 	.high_thresh	 = 256 * 1024,
 	.low_thresh	 = 192 * 1024,
 	.timeout	 = IPV6_FRAG_TIMEOUT,
@@ -79,6 +79,35 @@ struct inet_frags_ctl nf_frags_ctl __read_mostly = {
 
 static struct inet_frags nf_frags;
 
+#ifdef CONFIG_SYSCTL
+struct ctl_table nf_ct_ipv6_sysctl_table[] = {
+	{
+		.procname	= "nf_conntrack_frag6_timeout",
+		.data		= &nf_frags_ctl.timeout,
+		.maxlen		= sizeof(unsigned int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec_jiffies,
+	},
+	{
+		.ctl_name	= NET_NF_CONNTRACK_FRAG6_LOW_THRESH,
+		.procname	= "nf_conntrack_frag6_low_thresh",
+		.data		= &nf_frags_ctl.low_thresh,
+		.maxlen		= sizeof(unsigned int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec,
+	},
+	{
+		.ctl_name	= NET_NF_CONNTRACK_FRAG6_HIGH_THRESH,
+		.procname	= "nf_conntrack_frag6_high_thresh",
+		.data		= &nf_frags_ctl.high_thresh,
+		.maxlen		= sizeof(unsigned int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec,
+	},
+	{ .ctl_name = 0 }
+};
+#endif
+
 static unsigned int ip6qhashfn(__be32 id, struct in6_addr *saddr,
 			       struct in6_addr *daddr)
 {
diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c
index 4dfcddc871c..1815ff0cf62 100644
--- a/net/ipv6/reassembly.c
+++ b/net/ipv6/reassembly.c
@@ -625,12 +625,70 @@ static struct inet6_protocol frag_protocol =
 	.flags		=	INET6_PROTO_NOPOLICY,
 };
 
-void ipv6_frag_sysctl_init(struct net *net)
+#ifdef CONFIG_SYSCTL
+static struct ctl_table ip6_frags_ctl_table[] = {
+	{
+		.ctl_name	= NET_IPV6_IP6FRAG_HIGH_THRESH,
+		.procname	= "ip6frag_high_thresh",
+		.data		= &init_net.ipv6.sysctl.frags.high_thresh,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec
+	},
+	{
+		.ctl_name	= NET_IPV6_IP6FRAG_LOW_THRESH,
+		.procname	= "ip6frag_low_thresh",
+		.data		= &init_net.ipv6.sysctl.frags.low_thresh,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec
+	},
+	{
+		.ctl_name	= NET_IPV6_IP6FRAG_TIME,
+		.procname	= "ip6frag_time",
+		.data		= &init_net.ipv6.sysctl.frags.timeout,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec_jiffies,
+		.strategy	= &sysctl_jiffies,
+	},
+	{
+		.ctl_name	= NET_IPV6_IP6FRAG_SECRET_INTERVAL,
+		.procname	= "ip6frag_secret_interval",
+		.data		= &init_net.ipv6.sysctl.frags.secret_interval,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec_jiffies,
+		.strategy	= &sysctl_jiffies
+	},
+	{ }
+};
+
+static int ip6_frags_sysctl_register(struct net *net)
+{
+	struct ctl_table_header *hdr;
+
+	hdr = register_net_sysctl_table(net, net_ipv6_ctl_path,
+			ip6_frags_ctl_table);
+	return hdr == NULL ? -ENOMEM : 0;
+}
+#else
+static inline int ip6_frags_sysctl_register(struct net *net)
 {
-	if (net != &init_net)
-		return;
+	return 0;
+}
+#endif
 
+static int ipv6_frags_init_net(struct net *net)
+{
 	ip6_frags.ctl = &net->ipv6.sysctl.frags;
+
+	net->ipv6.sysctl.frags.high_thresh = 256 * 1024;
+	net->ipv6.sysctl.frags.low_thresh = 192 * 1024;
+	net->ipv6.sysctl.frags.timeout = IPV6_FRAG_TIMEOUT;
+	net->ipv6.sysctl.frags.secret_interval = 10 * 60 * HZ;
+
+	return ip6_frags_sysctl_register(net);
 }
 
 int __init ipv6_frag_init(void)
@@ -641,6 +699,8 @@ int __init ipv6_frag_init(void)
 	if (ret)
 		goto out;
 
+	ipv6_frags_init_net(&init_net);
+
 	ip6_frags.hashfn = ip6_hashfn;
 	ip6_frags.constructor = ip6_frag_init;
 	ip6_frags.destructor = NULL;
diff --git a/net/ipv6/sysctl_net_ipv6.c b/net/ipv6/sysctl_net_ipv6.c
index 7197eb74a75..408691b777c 100644
--- a/net/ipv6/sysctl_net_ipv6.c
+++ b/net/ipv6/sysctl_net_ipv6.c
@@ -37,40 +37,6 @@ static ctl_table ipv6_table_template[] = {
 		.mode		= 0644,
 		.proc_handler	= &proc_dointvec
 	},
-	{
-		.ctl_name	= NET_IPV6_IP6FRAG_HIGH_THRESH,
-		.procname	= "ip6frag_high_thresh",
-		.data		= &init_net.ipv6.sysctl.frags.high_thresh,
-		.maxlen		= sizeof(int),
-		.mode		= 0644,
-		.proc_handler	= &proc_dointvec
-	},
-	{
-		.ctl_name	= NET_IPV6_IP6FRAG_LOW_THRESH,
-		.procname	= "ip6frag_low_thresh",
-		.data		= &init_net.ipv6.sysctl.frags.low_thresh,
-		.maxlen		= sizeof(int),
-		.mode		= 0644,
-		.proc_handler	= &proc_dointvec
-	},
-	{
-		.ctl_name	= NET_IPV6_IP6FRAG_TIME,
-		.procname	= "ip6frag_time",
-		.data		= &init_net.ipv6.sysctl.frags.timeout,
-		.maxlen		= sizeof(int),
-		.mode		= 0644,
-		.proc_handler	= &proc_dointvec_jiffies,
-		.strategy	= &sysctl_jiffies,
-	},
-	{
-		.ctl_name	= NET_IPV6_IP6FRAG_SECRET_INTERVAL,
-		.procname	= "ip6frag_secret_interval",
-		.data		= &init_net.ipv6.sysctl.frags.secret_interval,
-		.maxlen		= sizeof(int),
-		.mode		= 0644,
-		.proc_handler	= &proc_dointvec_jiffies,
-		.strategy	= &sysctl_jiffies
-	},
 	{
 		.ctl_name	= NET_IPV6_MLD_MAX_MSF,
 		.procname	= "mld_max_msf",
@@ -126,16 +92,12 @@ static int ipv6_sysctl_net_init(struct net *net)
 	ipv6_table[1].child = ipv6_icmp_table;
 
 	ipv6_table[2].data = &net->ipv6.sysctl.bindv6only;
-	ipv6_table[3].data = &net->ipv6.sysctl.frags.high_thresh;
-	ipv6_table[4].data = &net->ipv6.sysctl.frags.low_thresh;
-	ipv6_table[5].data = &net->ipv6.sysctl.frags.timeout;
-	ipv6_table[6].data = &net->ipv6.sysctl.frags.secret_interval;
 
 	/* We don't want this value to be per namespace, it should be global
 	   to all namespaces, so make it read-only when we are not in the
 	   init network namespace */
 	if (net != &init_net)
-		ipv6_table[7].mode = 0444;
+		ipv6_table[3].mode = 0444;
 
 	net->ipv6.sysctl.table = register_net_sysctl_table(net, net_ipv6_ctl_path,
 							   ipv6_table);
-- 
cgit v1.2.3


From ac18e7509e7df327e30d6e073a787d922eaf211d Mon Sep 17 00:00:00 2001
From: Pavel Emelyanov <xemul@openvz.org>
Date: Tue, 22 Jan 2008 06:02:14 -0800
Subject: [NETNS][FRAGS]: Make the inet_frag_queue lookup work in namespaces.

Since fragment management code is consolidated, we cannot have the
pointer from inet_frag_queue to struct net, since we must know what
king of fragment this is.

So, I introduce the netns_frags structure. This one is currently
empty, but will be eventually filled with per-namespace
attributes. Each inet_frag_queue is tagged with this one.

The conntrack_reasm is not "netns-izated", so it has one static
netns_frags instance to keep working in init namespace.

Signed-off-by: Pavel Emelyanov <xemul@openvz.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/inet_fragment.c                | 27 +++++++++++++++------------
 net/ipv4/ip_fragment.c                  |  8 +++++---
 net/ipv6/netfilter/nf_conntrack_reasm.c |  3 ++-
 net/ipv6/reassembly.c                   |  8 +++++---
 4 files changed, 27 insertions(+), 19 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/inet_fragment.c b/net/ipv4/inet_fragment.c
index 737910767ff..158c5f60d02 100644
--- a/net/ipv4/inet_fragment.c
+++ b/net/ipv4/inet_fragment.c
@@ -174,8 +174,9 @@ int inet_frag_evictor(struct inet_frags *f)
 }
 EXPORT_SYMBOL(inet_frag_evictor);
 
-static struct inet_frag_queue *inet_frag_intern(struct inet_frag_queue *qp_in,
-		struct inet_frags *f, unsigned int hash, void *arg)
+static struct inet_frag_queue *inet_frag_intern(struct netns_frags *nf,
+		struct inet_frag_queue *qp_in, struct inet_frags *f,
+		unsigned int hash, void *arg)
 {
 	struct inet_frag_queue *qp;
 #ifdef CONFIG_SMP
@@ -189,7 +190,7 @@ static struct inet_frag_queue *inet_frag_intern(struct inet_frag_queue *qp_in,
 	 * promoted read lock to write lock.
 	 */
 	hlist_for_each_entry(qp, n, &f->hash[hash], list) {
-		if (f->match(qp, arg)) {
+		if (qp->net == nf && f->match(qp, arg)) {
 			atomic_inc(&qp->refcnt);
 			write_unlock(&f->lock);
 			qp_in->last_in |= COMPLETE;
@@ -210,7 +211,8 @@ static struct inet_frag_queue *inet_frag_intern(struct inet_frag_queue *qp_in,
 	return qp;
 }
 
-static struct inet_frag_queue *inet_frag_alloc(struct inet_frags *f, void *arg)
+static struct inet_frag_queue *inet_frag_alloc(struct netns_frags *nf,
+		struct inet_frags *f, void *arg)
 {
 	struct inet_frag_queue *q;
 
@@ -223,31 +225,32 @@ static struct inet_frag_queue *inet_frag_alloc(struct inet_frags *f, void *arg)
 	setup_timer(&q->timer, f->frag_expire, (unsigned long)q);
 	spin_lock_init(&q->lock);
 	atomic_set(&q->refcnt, 1);
+	q->net = nf;
 
 	return q;
 }
 
-static struct inet_frag_queue *inet_frag_create(struct inet_frags *f,
-		void *arg, unsigned int hash)
+static struct inet_frag_queue *inet_frag_create(struct netns_frags *nf,
+		struct inet_frags *f, void *arg, unsigned int hash)
 {
 	struct inet_frag_queue *q;
 
-	q = inet_frag_alloc(f, arg);
+	q = inet_frag_alloc(nf, f, arg);
 	if (q == NULL)
 		return NULL;
 
-	return inet_frag_intern(q, f, hash, arg);
+	return inet_frag_intern(nf, q, f, hash, arg);
 }
 
-struct inet_frag_queue *inet_frag_find(struct inet_frags *f, void *key,
-		unsigned int hash)
+struct inet_frag_queue *inet_frag_find(struct netns_frags *nf,
+		struct inet_frags *f, void *key, unsigned int hash)
 {
 	struct inet_frag_queue *q;
 	struct hlist_node *n;
 
 	read_lock(&f->lock);
 	hlist_for_each_entry(q, n, &f->hash[hash], list) {
-		if (f->match(q, key)) {
+		if (q->net == nf && f->match(q, key)) {
 			atomic_inc(&q->refcnt);
 			read_unlock(&f->lock);
 			return q;
@@ -255,6 +258,6 @@ struct inet_frag_queue *inet_frag_find(struct inet_frags *f, void *key,
 	}
 	read_unlock(&f->lock);
 
-	return inet_frag_create(f, key, hash);
+	return inet_frag_create(nf, f, key, hash);
 }
 EXPORT_SYMBOL(inet_frag_find);
diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c
index a53463e594b..56211ef46ee 100644
--- a/net/ipv4/ip_fragment.c
+++ b/net/ipv4/ip_fragment.c
@@ -236,7 +236,7 @@ out:
 /* Find the correct entry in the "incomplete datagrams" queue for
  * this IP datagram, and create new one, if nothing is found.
  */
-static inline struct ipq *ip_find(struct iphdr *iph, u32 user)
+static inline struct ipq *ip_find(struct net *net, struct iphdr *iph, u32 user)
 {
 	struct inet_frag_queue *q;
 	struct ip4_create_arg arg;
@@ -246,7 +246,7 @@ static inline struct ipq *ip_find(struct iphdr *iph, u32 user)
 	arg.user = user;
 	hash = ipqhashfn(iph->id, iph->saddr, iph->daddr, iph->protocol);
 
-	q = inet_frag_find(&ip4_frags, &arg, hash);
+	q = inet_frag_find(&net->ipv4.frags, &ip4_frags, &arg, hash);
 	if (q == NULL)
 		goto out_nomem;
 
@@ -582,15 +582,17 @@ out_fail:
 int ip_defrag(struct sk_buff *skb, u32 user)
 {
 	struct ipq *qp;
+	struct net *net;
 
 	IP_INC_STATS_BH(IPSTATS_MIB_REASMREQDS);
 
+	net = skb->dev->nd_net;
 	/* Start by cleaning up the memory. */
 	if (atomic_read(&ip4_frags.mem) > ip4_frags_ctl.high_thresh)
 		ip_evictor();
 
 	/* Lookup (or create) queue header */
-	if ((qp = ip_find(ip_hdr(skb), user)) != NULL) {
+	if ((qp = ip_find(net, ip_hdr(skb), user)) != NULL) {
 		int ret;
 
 		spin_lock(&qp->q.lock);
diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c
index d631631189b..18accd4eab0 100644
--- a/net/ipv6/netfilter/nf_conntrack_reasm.c
+++ b/net/ipv6/netfilter/nf_conntrack_reasm.c
@@ -78,6 +78,7 @@ static struct inet_frags_ctl nf_frags_ctl __read_mostly = {
 };
 
 static struct inet_frags nf_frags;
+static struct netns_frags nf_init_frags;
 
 #ifdef CONFIG_SYSCTL
 struct ctl_table nf_ct_ipv6_sysctl_table[] = {
@@ -212,7 +213,7 @@ fq_find(__be32 id, struct in6_addr *src, struct in6_addr *dst)
 	arg.dst = dst;
 	hash = ip6qhashfn(id, src, dst);
 
-	q = inet_frag_find(&nf_frags, &arg, hash);
+	q = inet_frag_find(&nf_init_frags, &nf_frags, &arg, hash);
 	if (q == NULL)
 		goto oom;
 
diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c
index 1815ff0cf62..ab2d53b81b7 100644
--- a/net/ipv6/reassembly.c
+++ b/net/ipv6/reassembly.c
@@ -234,7 +234,7 @@ out:
 }
 
 static __inline__ struct frag_queue *
-fq_find(__be32 id, struct in6_addr *src, struct in6_addr *dst,
+fq_find(struct net *net, __be32 id, struct in6_addr *src, struct in6_addr *dst,
 	struct inet6_dev *idev)
 {
 	struct inet_frag_queue *q;
@@ -246,7 +246,7 @@ fq_find(__be32 id, struct in6_addr *src, struct in6_addr *dst,
 	arg.dst = dst;
 	hash = ip6qhashfn(id, src, dst);
 
-	q = inet_frag_find(&ip6_frags, &arg, hash);
+	q = inet_frag_find(&net->ipv6.frags, &ip6_frags, &arg, hash);
 	if (q == NULL)
 		goto oom;
 
@@ -568,6 +568,7 @@ static int ipv6_frag_rcv(struct sk_buff *skb)
 	struct frag_hdr *fhdr;
 	struct frag_queue *fq;
 	struct ipv6hdr *hdr = ipv6_hdr(skb);
+	struct net *net;
 
 	IP6_INC_STATS_BH(ip6_dst_idev(skb->dst), IPSTATS_MIB_REASMREQDS);
 
@@ -598,10 +599,11 @@ static int ipv6_frag_rcv(struct sk_buff *skb)
 		return 1;
 	}
 
+	net = skb->dev->nd_net;
 	if (atomic_read(&ip6_frags.mem) > init_net.ipv6.sysctl.frags.high_thresh)
 		ip6_evictor(ip6_dst_idev(skb->dst));
 
-	if ((fq = fq_find(fhdr->identification, &hdr->saddr, &hdr->daddr,
+	if ((fq = fq_find(net, fhdr->identification, &hdr->saddr, &hdr->daddr,
 			  ip6_dst_idev(skb->dst))) != NULL) {
 		int ret;
 
-- 
cgit v1.2.3


From e5a2bb842cd9681d00d4ca963e63e4d3647e66f8 Mon Sep 17 00:00:00 2001
From: Pavel Emelyanov <xemul@openvz.org>
Date: Tue, 22 Jan 2008 06:06:23 -0800
Subject: [NETNS][FRAGS]: Make the nqueues counter per-namespace.

This is simple - just move the variable from struct inet_frags
to struct netns_frags and adjust the usage appropriately.

Signed-off-by: Pavel Emelyanov <xemul@openvz.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/inet_fragment.c                | 11 ++++++++---
 net/ipv4/ip_fragment.c                  |  6 ++++--
 net/ipv4/proc.c                         |  2 +-
 net/ipv6/netfilter/nf_conntrack_reasm.c |  1 +
 net/ipv6/proc.c                         |  2 +-
 net/ipv6/reassembly.c                   |  6 ++++--
 6 files changed, 19 insertions(+), 9 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/inet_fragment.c b/net/ipv4/inet_fragment.c
index 158c5f60d02..4fec0b911f8 100644
--- a/net/ipv4/inet_fragment.c
+++ b/net/ipv4/inet_fragment.c
@@ -63,7 +63,6 @@ void inet_frags_init(struct inet_frags *f)
 	f->rnd = (u32) ((num_physpages ^ (num_physpages>>7)) ^
 				   (jiffies ^ (jiffies >> 6)));
 
-	f->nqueues = 0;
 	atomic_set(&f->mem, 0);
 
 	setup_timer(&f->secret_timer, inet_frag_secret_rebuild,
@@ -73,6 +72,12 @@ void inet_frags_init(struct inet_frags *f)
 }
 EXPORT_SYMBOL(inet_frags_init);
 
+void inet_frags_init_net(struct netns_frags *nf)
+{
+	nf->nqueues = 0;
+}
+EXPORT_SYMBOL(inet_frags_init_net);
+
 void inet_frags_fini(struct inet_frags *f)
 {
 	del_timer(&f->secret_timer);
@@ -84,7 +89,7 @@ static inline void fq_unlink(struct inet_frag_queue *fq, struct inet_frags *f)
 	write_lock(&f->lock);
 	hlist_del(&fq->list);
 	list_del(&fq->lru_list);
-	f->nqueues--;
+	fq->net->nqueues--;
 	write_unlock(&f->lock);
 }
 
@@ -206,7 +211,7 @@ static struct inet_frag_queue *inet_frag_intern(struct netns_frags *nf,
 	atomic_inc(&qp->refcnt);
 	hlist_add_head(&qp->list, &f->hash[hash]);
 	list_add_tail(&qp->lru_list, &f->lru_list);
-	f->nqueues++;
+	nf->nqueues++;
 	write_unlock(&f->lock);
 	return qp;
 }
diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c
index 56211ef46ee..cd8c83025b4 100644
--- a/net/ipv4/ip_fragment.c
+++ b/net/ipv4/ip_fragment.c
@@ -95,9 +95,9 @@ static struct inet_frags_ctl ip4_frags_ctl __read_mostly = {
 
 static struct inet_frags ip4_frags;
 
-int ip_frag_nqueues(void)
+int ip_frag_nqueues(struct net *net)
 {
-	return ip4_frags.nqueues;
+	return net->ipv4.frags.nqueues;
 }
 
 int ip_frag_mem(void)
@@ -675,6 +675,8 @@ static inline int ip4_frags_ctl_register(struct net *net)
 
 static int ipv4_frags_init_net(struct net *net)
 {
+	inet_frags_init_net(&net->ipv4.frags);
+
 	return ip4_frags_ctl_register(net);
 }
 
diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c
index cb3787fbeb9..bae32808616 100644
--- a/net/ipv4/proc.c
+++ b/net/ipv4/proc.c
@@ -62,7 +62,7 @@ static int sockstat_seq_show(struct seq_file *seq, void *v)
 	seq_printf(seq, "UDPLITE: inuse %d\n", sock_prot_inuse_get(&udplite_prot));
 	seq_printf(seq, "RAW: inuse %d\n", sock_prot_inuse_get(&raw_prot));
 	seq_printf(seq,  "FRAG: inuse %d memory %d\n",
-			ip_frag_nqueues(), ip_frag_mem());
+			ip_frag_nqueues(&init_net), ip_frag_mem());
 	return 0;
 }
 
diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c
index 18accd4eab0..0b9d0097b68 100644
--- a/net/ipv6/netfilter/nf_conntrack_reasm.c
+++ b/net/ipv6/netfilter/nf_conntrack_reasm.c
@@ -712,6 +712,7 @@ int nf_ct_frag6_init(void)
 	nf_frags.qsize = sizeof(struct nf_ct_frag6_queue);
 	nf_frags.match = ip6_frag_match;
 	nf_frags.frag_expire = nf_ct_frag6_expire;
+	inet_frags_init_net(&nf_init_frags);
 	inet_frags_init(&nf_frags);
 
 	return 0;
diff --git a/net/ipv6/proc.c b/net/ipv6/proc.c
index 571d95a21c1..dec34c87cb4 100644
--- a/net/ipv6/proc.c
+++ b/net/ipv6/proc.c
@@ -44,7 +44,7 @@ static int sockstat6_seq_show(struct seq_file *seq, void *v)
 	seq_printf(seq, "RAW6: inuse %d\n",
 		       sock_prot_inuse_get(&rawv6_prot));
 	seq_printf(seq, "FRAG6: inuse %d memory %d\n",
-		       ip6_frag_nqueues(), ip6_frag_mem());
+		       ip6_frag_nqueues(&init_net), ip6_frag_mem());
 	return 0;
 }
 
diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c
index ab2d53b81b7..77a874020f3 100644
--- a/net/ipv6/reassembly.c
+++ b/net/ipv6/reassembly.c
@@ -84,9 +84,9 @@ struct frag_queue
 
 static struct inet_frags ip6_frags;
 
-int ip6_frag_nqueues(void)
+int ip6_frag_nqueues(struct net *net)
 {
-	return ip6_frags.nqueues;
+	return net->ipv6.frags.nqueues;
 }
 
 int ip6_frag_mem(void)
@@ -690,6 +690,8 @@ static int ipv6_frags_init_net(struct net *net)
 	net->ipv6.sysctl.frags.timeout = IPV6_FRAG_TIMEOUT;
 	net->ipv6.sysctl.frags.secret_interval = 10 * 60 * HZ;
 
+	inet_frags_init_net(&net->ipv6.frags);
+
 	return ip6_frags_sysctl_register(net);
 }
 
-- 
cgit v1.2.3


From 6ddc082223ef0f73717b4133fa7e648842bbfd02 Mon Sep 17 00:00:00 2001
From: Pavel Emelyanov <xemul@openvz.org>
Date: Tue, 22 Jan 2008 06:07:25 -0800
Subject: [NETNS][FRAGS]: Make the mem counter per-namespace.

This is also simple, but introduces more changes, since
then mem counter is altered in more places.

Signed-off-by: Pavel Emelyanov <xemul@openvz.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/inet_fragment.c                | 21 +++++++++++----------
 net/ipv4/ip_fragment.c                  | 29 +++++++++++++++--------------
 net/ipv4/proc.c                         |  2 +-
 net/ipv6/netfilter/nf_conntrack_reasm.c | 14 +++++++-------
 net/ipv6/proc.c                         |  2 +-
 net/ipv6/reassembly.c                   | 28 +++++++++++++++-------------
 6 files changed, 50 insertions(+), 46 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/inet_fragment.c b/net/ipv4/inet_fragment.c
index 4fec0b911f8..ad79ae0c026 100644
--- a/net/ipv4/inet_fragment.c
+++ b/net/ipv4/inet_fragment.c
@@ -63,8 +63,6 @@ void inet_frags_init(struct inet_frags *f)
 	f->rnd = (u32) ((num_physpages ^ (num_physpages>>7)) ^
 				   (jiffies ^ (jiffies >> 6)));
 
-	atomic_set(&f->mem, 0);
-
 	setup_timer(&f->secret_timer, inet_frag_secret_rebuild,
 			(unsigned long)f);
 	f->secret_timer.expires = jiffies + f->ctl->secret_interval;
@@ -75,6 +73,7 @@ EXPORT_SYMBOL(inet_frags_init);
 void inet_frags_init_net(struct netns_frags *nf)
 {
 	nf->nqueues = 0;
+	atomic_set(&nf->mem, 0);
 }
 EXPORT_SYMBOL(inet_frags_init_net);
 
@@ -107,13 +106,13 @@ void inet_frag_kill(struct inet_frag_queue *fq, struct inet_frags *f)
 
 EXPORT_SYMBOL(inet_frag_kill);
 
-static inline void frag_kfree_skb(struct inet_frags *f, struct sk_buff *skb,
-						int *work)
+static inline void frag_kfree_skb(struct netns_frags *nf, struct inet_frags *f,
+		struct sk_buff *skb, int *work)
 {
 	if (work)
 		*work -= skb->truesize;
 
-	atomic_sub(skb->truesize, &f->mem);
+	atomic_sub(skb->truesize, &nf->mem);
 	if (f->skb_free)
 		f->skb_free(skb);
 	kfree_skb(skb);
@@ -123,22 +122,24 @@ void inet_frag_destroy(struct inet_frag_queue *q, struct inet_frags *f,
 					int *work)
 {
 	struct sk_buff *fp;
+	struct netns_frags *nf;
 
 	BUG_TRAP(q->last_in & COMPLETE);
 	BUG_TRAP(del_timer(&q->timer) == 0);
 
 	/* Release all fragment data. */
 	fp = q->fragments;
+	nf = q->net;
 	while (fp) {
 		struct sk_buff *xp = fp->next;
 
-		frag_kfree_skb(f, fp, work);
+		frag_kfree_skb(nf, f, fp, work);
 		fp = xp;
 	}
 
 	if (work)
 		*work -= f->qsize;
-	atomic_sub(f->qsize, &f->mem);
+	atomic_sub(f->qsize, &nf->mem);
 
 	if (f->destructor)
 		f->destructor(q);
@@ -147,12 +148,12 @@ void inet_frag_destroy(struct inet_frag_queue *q, struct inet_frags *f,
 }
 EXPORT_SYMBOL(inet_frag_destroy);
 
-int inet_frag_evictor(struct inet_frags *f)
+int inet_frag_evictor(struct netns_frags *nf, struct inet_frags *f)
 {
 	struct inet_frag_queue *q;
 	int work, evicted = 0;
 
-	work = atomic_read(&f->mem) - f->ctl->low_thresh;
+	work = atomic_read(&nf->mem) - f->ctl->low_thresh;
 	while (work > 0) {
 		read_lock(&f->lock);
 		if (list_empty(&f->lru_list)) {
@@ -226,7 +227,7 @@ static struct inet_frag_queue *inet_frag_alloc(struct netns_frags *nf,
 		return NULL;
 
 	f->constructor(q, arg);
-	atomic_add(f->qsize, &f->mem);
+	atomic_add(f->qsize, &nf->mem);
 	setup_timer(&q->timer, f->frag_expire, (unsigned long)q);
 	spin_lock_init(&q->lock);
 	atomic_set(&q->refcnt, 1);
diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c
index cd8c83025b4..4f013343cef 100644
--- a/net/ipv4/ip_fragment.c
+++ b/net/ipv4/ip_fragment.c
@@ -100,9 +100,9 @@ int ip_frag_nqueues(struct net *net)
 	return net->ipv4.frags.nqueues;
 }
 
-int ip_frag_mem(void)
+int ip_frag_mem(struct net *net)
 {
-	return atomic_read(&ip4_frags.mem);
+	return atomic_read(&net->ipv4.frags.mem);
 }
 
 static int ip_frag_reasm(struct ipq *qp, struct sk_buff *prev,
@@ -142,11 +142,12 @@ static int ip4_frag_match(struct inet_frag_queue *q, void *a)
 }
 
 /* Memory Tracking Functions. */
-static __inline__ void frag_kfree_skb(struct sk_buff *skb, int *work)
+static __inline__ void frag_kfree_skb(struct netns_frags *nf,
+		struct sk_buff *skb, int *work)
 {
 	if (work)
 		*work -= skb->truesize;
-	atomic_sub(skb->truesize, &ip4_frags.mem);
+	atomic_sub(skb->truesize, &nf->mem);
 	kfree_skb(skb);
 }
 
@@ -192,11 +193,11 @@ static void ipq_kill(struct ipq *ipq)
 /* Memory limiting on fragments.  Evictor trashes the oldest
  * fragment queue until we are back under the threshold.
  */
-static void ip_evictor(void)
+static void ip_evictor(struct net *net)
 {
 	int evicted;
 
-	evicted = inet_frag_evictor(&ip4_frags);
+	evicted = inet_frag_evictor(&net->ipv4.frags, &ip4_frags);
 	if (evicted)
 		IP_ADD_STATS_BH(IPSTATS_MIB_REASMFAILS, evicted);
 }
@@ -294,7 +295,7 @@ static int ip_frag_reinit(struct ipq *qp)
 	fp = qp->q.fragments;
 	do {
 		struct sk_buff *xp = fp->next;
-		frag_kfree_skb(fp, NULL);
+		frag_kfree_skb(qp->q.net, fp, NULL);
 		fp = xp;
 	} while (fp);
 
@@ -431,7 +432,7 @@ static int ip_frag_queue(struct ipq *qp, struct sk_buff *skb)
 				qp->q.fragments = next;
 
 			qp->q.meat -= free_it->len;
-			frag_kfree_skb(free_it, NULL);
+			frag_kfree_skb(qp->q.net, free_it, NULL);
 		}
 	}
 
@@ -451,7 +452,7 @@ static int ip_frag_queue(struct ipq *qp, struct sk_buff *skb)
 	}
 	qp->q.stamp = skb->tstamp;
 	qp->q.meat += skb->len;
-	atomic_add(skb->truesize, &ip4_frags.mem);
+	atomic_add(skb->truesize, &qp->q.net->mem);
 	if (offset == 0)
 		qp->q.last_in |= FIRST_IN;
 
@@ -534,12 +535,12 @@ static int ip_frag_reasm(struct ipq *qp, struct sk_buff *prev,
 		head->len -= clone->len;
 		clone->csum = 0;
 		clone->ip_summed = head->ip_summed;
-		atomic_add(clone->truesize, &ip4_frags.mem);
+		atomic_add(clone->truesize, &qp->q.net->mem);
 	}
 
 	skb_shinfo(head)->frag_list = head->next;
 	skb_push(head, head->data - skb_network_header(head));
-	atomic_sub(head->truesize, &ip4_frags.mem);
+	atomic_sub(head->truesize, &qp->q.net->mem);
 
 	for (fp=head->next; fp; fp = fp->next) {
 		head->data_len += fp->len;
@@ -549,7 +550,7 @@ static int ip_frag_reasm(struct ipq *qp, struct sk_buff *prev,
 		else if (head->ip_summed == CHECKSUM_COMPLETE)
 			head->csum = csum_add(head->csum, fp->csum);
 		head->truesize += fp->truesize;
-		atomic_sub(fp->truesize, &ip4_frags.mem);
+		atomic_sub(fp->truesize, &qp->q.net->mem);
 	}
 
 	head->next = NULL;
@@ -588,8 +589,8 @@ int ip_defrag(struct sk_buff *skb, u32 user)
 
 	net = skb->dev->nd_net;
 	/* Start by cleaning up the memory. */
-	if (atomic_read(&ip4_frags.mem) > ip4_frags_ctl.high_thresh)
-		ip_evictor();
+	if (atomic_read(&net->ipv4.frags.mem) > ip4_frags_ctl.high_thresh)
+		ip_evictor(net);
 
 	/* Lookup (or create) queue header */
 	if ((qp = ip_find(net, ip_hdr(skb), user)) != NULL) {
diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c
index bae32808616..d63474c6b40 100644
--- a/net/ipv4/proc.c
+++ b/net/ipv4/proc.c
@@ -62,7 +62,7 @@ static int sockstat_seq_show(struct seq_file *seq, void *v)
 	seq_printf(seq, "UDPLITE: inuse %d\n", sock_prot_inuse_get(&udplite_prot));
 	seq_printf(seq, "RAW: inuse %d\n", sock_prot_inuse_get(&raw_prot));
 	seq_printf(seq,  "FRAG: inuse %d memory %d\n",
-			ip_frag_nqueues(&init_net), ip_frag_mem());
+			ip_frag_nqueues(&init_net), ip_frag_mem(&init_net));
 	return 0;
 }
 
diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c
index 0b9d0097b68..cb826bea4b1 100644
--- a/net/ipv6/netfilter/nf_conntrack_reasm.c
+++ b/net/ipv6/netfilter/nf_conntrack_reasm.c
@@ -155,7 +155,7 @@ static inline void frag_kfree_skb(struct sk_buff *skb, unsigned int *work)
 {
 	if (work)
 		*work -= skb->truesize;
-	atomic_sub(skb->truesize, &nf_frags.mem);
+	atomic_sub(skb->truesize, &nf_init_frags.mem);
 	nf_skb_free(skb);
 	kfree_skb(skb);
 }
@@ -177,7 +177,7 @@ static __inline__ void fq_kill(struct nf_ct_frag6_queue *fq)
 
 static void nf_ct_frag6_evictor(void)
 {
-	inet_frag_evictor(&nf_frags);
+	inet_frag_evictor(&nf_init_frags, &nf_frags);
 }
 
 static void nf_ct_frag6_expire(unsigned long data)
@@ -382,7 +382,7 @@ static int nf_ct_frag6_queue(struct nf_ct_frag6_queue *fq, struct sk_buff *skb,
 	skb->dev = NULL;
 	fq->q.stamp = skb->tstamp;
 	fq->q.meat += skb->len;
-	atomic_add(skb->truesize, &nf_frags.mem);
+	atomic_add(skb->truesize, &nf_init_frags.mem);
 
 	/* The first fragment.
 	 * nhoffset is obtained from the first fragment, of course.
@@ -459,7 +459,7 @@ nf_ct_frag6_reasm(struct nf_ct_frag6_queue *fq, struct net_device *dev)
 		clone->ip_summed = head->ip_summed;
 
 		NFCT_FRAG6_CB(clone)->orig = NULL;
-		atomic_add(clone->truesize, &nf_frags.mem);
+		atomic_add(clone->truesize, &nf_init_frags.mem);
 	}
 
 	/* We have to remove fragment header from datagram and to relocate
@@ -473,7 +473,7 @@ nf_ct_frag6_reasm(struct nf_ct_frag6_queue *fq, struct net_device *dev)
 	skb_shinfo(head)->frag_list = head->next;
 	skb_reset_transport_header(head);
 	skb_push(head, head->data - skb_network_header(head));
-	atomic_sub(head->truesize, &nf_frags.mem);
+	atomic_sub(head->truesize, &nf_init_frags.mem);
 
 	for (fp=head->next; fp; fp = fp->next) {
 		head->data_len += fp->len;
@@ -483,7 +483,7 @@ nf_ct_frag6_reasm(struct nf_ct_frag6_queue *fq, struct net_device *dev)
 		else if (head->ip_summed == CHECKSUM_COMPLETE)
 			head->csum = csum_add(head->csum, fp->csum);
 		head->truesize += fp->truesize;
-		atomic_sub(fp->truesize, &nf_frags.mem);
+		atomic_sub(fp->truesize, &nf_init_frags.mem);
 	}
 
 	head->next = NULL;
@@ -633,7 +633,7 @@ struct sk_buff *nf_ct_frag6_gather(struct sk_buff *skb)
 		goto ret_orig;
 	}
 
-	if (atomic_read(&nf_frags.mem) > nf_frags_ctl.high_thresh)
+	if (atomic_read(&nf_init_frags.mem) > nf_frags_ctl.high_thresh)
 		nf_ct_frag6_evictor();
 
 	fq = fq_find(fhdr->identification, &hdr->saddr, &hdr->daddr);
diff --git a/net/ipv6/proc.c b/net/ipv6/proc.c
index dec34c87cb4..35e502a7249 100644
--- a/net/ipv6/proc.c
+++ b/net/ipv6/proc.c
@@ -44,7 +44,7 @@ static int sockstat6_seq_show(struct seq_file *seq, void *v)
 	seq_printf(seq, "RAW6: inuse %d\n",
 		       sock_prot_inuse_get(&rawv6_prot));
 	seq_printf(seq, "FRAG6: inuse %d memory %d\n",
-		       ip6_frag_nqueues(&init_net), ip6_frag_mem());
+		       ip6_frag_nqueues(&init_net), ip6_frag_mem(&init_net));
 	return 0;
 }
 
diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c
index 77a874020f3..241b2cc49bf 100644
--- a/net/ipv6/reassembly.c
+++ b/net/ipv6/reassembly.c
@@ -89,9 +89,9 @@ int ip6_frag_nqueues(struct net *net)
 	return net->ipv6.frags.nqueues;
 }
 
-int ip6_frag_mem(void)
+int ip6_frag_mem(struct net *net)
 {
-	return atomic_read(&ip6_frags.mem);
+	return atomic_read(&net->ipv6.frags.mem);
 }
 
 static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev,
@@ -149,11 +149,12 @@ int ip6_frag_match(struct inet_frag_queue *q, void *a)
 EXPORT_SYMBOL(ip6_frag_match);
 
 /* Memory Tracking Functions. */
-static inline void frag_kfree_skb(struct sk_buff *skb, int *work)
+static inline void frag_kfree_skb(struct netns_frags *nf,
+		struct sk_buff *skb, int *work)
 {
 	if (work)
 		*work -= skb->truesize;
-	atomic_sub(skb->truesize, &ip6_frags.mem);
+	atomic_sub(skb->truesize, &nf->mem);
 	kfree_skb(skb);
 }
 
@@ -183,11 +184,11 @@ static __inline__ void fq_kill(struct frag_queue *fq)
 	inet_frag_kill(&fq->q, &ip6_frags);
 }
 
-static void ip6_evictor(struct inet6_dev *idev)
+static void ip6_evictor(struct net *net, struct inet6_dev *idev)
 {
 	int evicted;
 
-	evicted = inet_frag_evictor(&ip6_frags);
+	evicted = inet_frag_evictor(&net->ipv6.frags, &ip6_frags);
 	if (evicted)
 		IP6_ADD_STATS_BH(idev, IPSTATS_MIB_REASMFAILS, evicted);
 }
@@ -389,7 +390,7 @@ static int ip6_frag_queue(struct frag_queue *fq, struct sk_buff *skb,
 				fq->q.fragments = next;
 
 			fq->q.meat -= free_it->len;
-			frag_kfree_skb(free_it, NULL);
+			frag_kfree_skb(fq->q.net, free_it, NULL);
 		}
 	}
 
@@ -409,7 +410,7 @@ static int ip6_frag_queue(struct frag_queue *fq, struct sk_buff *skb,
 	}
 	fq->q.stamp = skb->tstamp;
 	fq->q.meat += skb->len;
-	atomic_add(skb->truesize, &ip6_frags.mem);
+	atomic_add(skb->truesize, &fq->q.net->mem);
 
 	/* The first fragment.
 	 * nhoffset is obtained from the first fragment, of course.
@@ -503,7 +504,7 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev,
 		head->len -= clone->len;
 		clone->csum = 0;
 		clone->ip_summed = head->ip_summed;
-		atomic_add(clone->truesize, &ip6_frags.mem);
+		atomic_add(clone->truesize, &fq->q.net->mem);
 	}
 
 	/* We have to remove fragment header from datagram and to relocate
@@ -518,7 +519,7 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev,
 	skb_shinfo(head)->frag_list = head->next;
 	skb_reset_transport_header(head);
 	skb_push(head, head->data - skb_network_header(head));
-	atomic_sub(head->truesize, &ip6_frags.mem);
+	atomic_sub(head->truesize, &fq->q.net->mem);
 
 	for (fp=head->next; fp; fp = fp->next) {
 		head->data_len += fp->len;
@@ -528,7 +529,7 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev,
 		else if (head->ip_summed == CHECKSUM_COMPLETE)
 			head->csum = csum_add(head->csum, fp->csum);
 		head->truesize += fp->truesize;
-		atomic_sub(fp->truesize, &ip6_frags.mem);
+		atomic_sub(fp->truesize, &fq->q.net->mem);
 	}
 
 	head->next = NULL;
@@ -600,8 +601,9 @@ static int ipv6_frag_rcv(struct sk_buff *skb)
 	}
 
 	net = skb->dev->nd_net;
-	if (atomic_read(&ip6_frags.mem) > init_net.ipv6.sysctl.frags.high_thresh)
-		ip6_evictor(ip6_dst_idev(skb->dst));
+	if (atomic_read(&net->ipv6.frags.mem) >
+			init_net.ipv6.sysctl.frags.high_thresh)
+		ip6_evictor(net, ip6_dst_idev(skb->dst));
 
 	if ((fq = fq_find(net, fhdr->identification, &hdr->saddr, &hdr->daddr,
 			  ip6_dst_idev(skb->dst))) != NULL) {
-- 
cgit v1.2.3


From e4a2d5c2bccd5bd29de5ae4f14ff4448fac9cfc8 Mon Sep 17 00:00:00 2001
From: Pavel Emelyanov <xemul@openvz.org>
Date: Tue, 22 Jan 2008 06:08:36 -0800
Subject: [NETNS][FRAGS]: Duplicate sysctl tables for new namespaces.

Each namespace has to have own tables to tune their
different parameters, so duplicate the tables and
register them.

All the tables in sub-namespaces are temporarily made
read-only.

Signed-off-by: Pavel Emelyanov <xemul@openvz.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/ip_fragment.c | 42 +++++++++++++++++++++++++++++++++++++++---
 net/ipv6/reassembly.c  | 41 ++++++++++++++++++++++++++++++++++++++---
 2 files changed, 77 insertions(+), 6 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c
index 4f013343cef..c51e1a11dc6 100644
--- a/net/ipv4/ip_fragment.c
+++ b/net/ipv4/ip_fragment.c
@@ -661,17 +661,53 @@ static struct ctl_table ip4_frags_ctl_table[] = {
 
 static int ip4_frags_ctl_register(struct net *net)
 {
+	struct ctl_table *table;
 	struct ctl_table_header *hdr;
 
-	hdr = register_net_sysctl_table(net, net_ipv4_ctl_path,
-			ip4_frags_ctl_table);
-	return hdr == NULL ? -ENOMEM : 0;
+	table = ip4_frags_ctl_table;
+	if (net != &init_net) {
+		table = kmemdup(table, sizeof(ip4_frags_ctl_table), GFP_KERNEL);
+		if (table == NULL)
+			goto err_alloc;
+
+		table[0].mode &= ~0222;
+		table[1].mode &= ~0222;
+		table[2].mode &= ~0222;
+		table[3].mode &= ~0222;
+		table[4].mode &= ~0222;
+	}
+
+	hdr = register_net_sysctl_table(net, net_ipv4_ctl_path, table);
+	if (hdr == NULL)
+		goto err_reg;
+
+	net->ipv4.frags_hdr = hdr;
+	return 0;
+
+err_reg:
+	if (net != &init_net)
+		kfree(table);
+err_alloc:
+	return -ENOMEM;
+}
+
+static void ip4_frags_ctl_unregister(struct net *net)
+{
+	struct ctl_table *table;
+
+	table = net->ipv4.frags_hdr->ctl_table_arg;
+	unregister_net_sysctl_table(net->ipv4.frags_hdr);
+	kfree(table);
 }
 #else
 static inline int ip4_frags_ctl_register(struct net *net)
 {
 	return 0;
 }
+
+static inline void ip4_frags_ctl_unregister(struct net *net)
+{
+}
 #endif
 
 static int ipv4_frags_init_net(struct net *net)
diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c
index 241b2cc49bf..0300dcbf1a7 100644
--- a/net/ipv6/reassembly.c
+++ b/net/ipv6/reassembly.c
@@ -670,17 +670,52 @@ static struct ctl_table ip6_frags_ctl_table[] = {
 
 static int ip6_frags_sysctl_register(struct net *net)
 {
+	struct ctl_table *table;
 	struct ctl_table_header *hdr;
 
-	hdr = register_net_sysctl_table(net, net_ipv6_ctl_path,
-			ip6_frags_ctl_table);
-	return hdr == NULL ? -ENOMEM : 0;
+	table = ip6_frags_ctl_table;
+	if (net != &init_net) {
+		table = kmemdup(table, sizeof(ip6_frags_ctl_table), GFP_KERNEL);
+		if (table == NULL)
+			goto err_alloc;
+
+		table[0].mode &= ~0222;
+		table[1].mode &= ~0222;
+		table[2].mode &= ~0222;
+		table[3].mode &= ~0222;
+	}
+
+	hdr = register_net_sysctl_table(net, net_ipv6_ctl_path, table);
+	if (hdr == NULL)
+		goto err_reg;
+
+	net->ipv6.sysctl.frags_hdr = hdr;
+	return 0;
+
+err_reg:
+	if (net != &init_net)
+		kfree(table);
+err_alloc:
+	return -ENOMEM;
+}
+
+static void ip6_frags_sysctl_unregister(struct net *net)
+{
+	struct ctl_table *table;
+
+	table = net->ipv6.sysctl.frags_hdr->ctl_table_arg;
+	unregister_net_sysctl_table(net->ipv6.sysctl.frags_hdr);
+	kfree(table);
 }
 #else
 static inline int ip6_frags_sysctl_register(struct net *net)
 {
 	return 0;
 }
+
+static inline void ip6_frags_sysctl_unregister(struct net *net)
+{
+}
 #endif
 
 static int ipv6_frags_init_net(struct net *net)
-- 
cgit v1.2.3


From b2fd5321dd160ef309dfb6cfc78ed8de4a830659 Mon Sep 17 00:00:00 2001
From: Pavel Emelyanov <xemul@openvz.org>
Date: Tue, 22 Jan 2008 06:09:37 -0800
Subject: [NETNS][FRAGS]: Make the net.ipv4.ipfrag_timeout work in namespaces.

Move it to the netns_frags, adjust the usage and
make the appropriate ctl table writable.

Now fragment, that live in different namespaces can
live for different times.

Signed-off-by: Pavel Emelyanov <xemul@openvz.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/inet_fragment.c                |  2 +-
 net/ipv4/ip_fragment.c                  | 20 ++++++++++----------
 net/ipv6/netfilter/nf_conntrack_reasm.c |  4 ++--
 net/ipv6/reassembly.c                   |  6 +++---
 4 files changed, 16 insertions(+), 16 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/inet_fragment.c b/net/ipv4/inet_fragment.c
index ad79ae0c026..9da96792fff 100644
--- a/net/ipv4/inet_fragment.c
+++ b/net/ipv4/inet_fragment.c
@@ -206,7 +206,7 @@ static struct inet_frag_queue *inet_frag_intern(struct netns_frags *nf,
 	}
 #endif
 	qp = qp_in;
-	if (!mod_timer(&qp->timer, jiffies + f->ctl->timeout))
+	if (!mod_timer(&qp->timer, jiffies + nf->timeout))
 		atomic_inc(&qp->refcnt);
 
 	atomic_inc(&qp->refcnt);
diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c
index c51e1a11dc6..70d241c8d2a 100644
--- a/net/ipv4/ip_fragment.c
+++ b/net/ipv4/ip_fragment.c
@@ -83,13 +83,6 @@ static struct inet_frags_ctl ip4_frags_ctl __read_mostly = {
 	 */
 	.high_thresh	 = 256 * 1024,
 	.low_thresh	 = 192 * 1024,
-
-	/*
-	 * Important NOTE! Fragment queue must be destroyed before MSL expires.
-	 * RFC791 is wrong proposing to prolongate timer each fragment arrival
-	 * by TTL.
-	 */
-	.timeout	 = IP_FRAG_TIME,
 	.secret_interval = 10 * 60 * HZ,
 };
 
@@ -287,7 +280,7 @@ static int ip_frag_reinit(struct ipq *qp)
 {
 	struct sk_buff *fp;
 
-	if (!mod_timer(&qp->q.timer, jiffies + ip4_frags_ctl.timeout)) {
+	if (!mod_timer(&qp->q.timer, jiffies + qp->q.net->timeout)) {
 		atomic_inc(&qp->q.refcnt);
 		return -ETIMEDOUT;
 	}
@@ -633,7 +626,7 @@ static struct ctl_table ip4_frags_ctl_table[] = {
 	{
 		.ctl_name	= NET_IPV4_IPFRAG_TIME,
 		.procname	= "ipfrag_time",
-		.data		= &ip4_frags_ctl.timeout,
+		.data		= &init_net.ipv4.frags.timeout,
 		.maxlen		= sizeof(int),
 		.mode		= 0644,
 		.proc_handler	= &proc_dointvec_jiffies,
@@ -672,7 +665,7 @@ static int ip4_frags_ctl_register(struct net *net)
 
 		table[0].mode &= ~0222;
 		table[1].mode &= ~0222;
-		table[2].mode &= ~0222;
+		table[2].data = &net->ipv4.frags.timeout;
 		table[3].mode &= ~0222;
 		table[4].mode &= ~0222;
 	}
@@ -712,6 +705,13 @@ static inline void ip4_frags_ctl_unregister(struct net *net)
 
 static int ipv4_frags_init_net(struct net *net)
 {
+	/*
+	 * Important NOTE! Fragment queue must be destroyed before MSL expires.
+	 * RFC791 is wrong proposing to prolongate timer each fragment arrival
+	 * by TTL.
+	 */
+	net->ipv4.frags.timeout = IP_FRAG_TIME;
+
 	inet_frags_init_net(&net->ipv4.frags);
 
 	return ip4_frags_ctl_register(net);
diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c
index cb826bea4b1..92a311ff79c 100644
--- a/net/ipv6/netfilter/nf_conntrack_reasm.c
+++ b/net/ipv6/netfilter/nf_conntrack_reasm.c
@@ -73,7 +73,6 @@ struct nf_ct_frag6_queue
 static struct inet_frags_ctl nf_frags_ctl __read_mostly = {
 	.high_thresh	 = 256 * 1024,
 	.low_thresh	 = 192 * 1024,
-	.timeout	 = IPV6_FRAG_TIMEOUT,
 	.secret_interval = 10 * 60 * HZ,
 };
 
@@ -84,7 +83,7 @@ static struct netns_frags nf_init_frags;
 struct ctl_table nf_ct_ipv6_sysctl_table[] = {
 	{
 		.procname	= "nf_conntrack_frag6_timeout",
-		.data		= &nf_frags_ctl.timeout,
+		.data		= &nf_init_frags.timeout,
 		.maxlen		= sizeof(unsigned int),
 		.mode		= 0644,
 		.proc_handler	= &proc_dointvec_jiffies,
@@ -712,6 +711,7 @@ int nf_ct_frag6_init(void)
 	nf_frags.qsize = sizeof(struct nf_ct_frag6_queue);
 	nf_frags.match = ip6_frag_match;
 	nf_frags.frag_expire = nf_ct_frag6_expire;
+	nf_init_frags.timeout = IPV6_FRAG_TIMEOUT;
 	inet_frags_init_net(&nf_init_frags);
 	inet_frags_init(&nf_frags);
 
diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c
index 0300dcbf1a7..91761365b18 100644
--- a/net/ipv6/reassembly.c
+++ b/net/ipv6/reassembly.c
@@ -650,7 +650,7 @@ static struct ctl_table ip6_frags_ctl_table[] = {
 	{
 		.ctl_name	= NET_IPV6_IP6FRAG_TIME,
 		.procname	= "ip6frag_time",
-		.data		= &init_net.ipv6.sysctl.frags.timeout,
+		.data		= &init_net.ipv6.frags.timeout,
 		.maxlen		= sizeof(int),
 		.mode		= 0644,
 		.proc_handler	= &proc_dointvec_jiffies,
@@ -681,7 +681,7 @@ static int ip6_frags_sysctl_register(struct net *net)
 
 		table[0].mode &= ~0222;
 		table[1].mode &= ~0222;
-		table[2].mode &= ~0222;
+		table[2].data = &net->ipv6.frags.timeout;
 		table[3].mode &= ~0222;
 	}
 
@@ -724,7 +724,7 @@ static int ipv6_frags_init_net(struct net *net)
 
 	net->ipv6.sysctl.frags.high_thresh = 256 * 1024;
 	net->ipv6.sysctl.frags.low_thresh = 192 * 1024;
-	net->ipv6.sysctl.frags.timeout = IPV6_FRAG_TIMEOUT;
+	net->ipv6.frags.timeout = IPV6_FRAG_TIMEOUT;
 	net->ipv6.sysctl.frags.secret_interval = 10 * 60 * HZ;
 
 	inet_frags_init_net(&net->ipv6.frags);
-- 
cgit v1.2.3


From e31e0bdc7e7fb9a4b09d2f3266c035a18fdcee9d Mon Sep 17 00:00:00 2001
From: Pavel Emelyanov <xemul@openvz.org>
Date: Tue, 22 Jan 2008 06:10:13 -0800
Subject: [NETNS][FRAGS]: Make thresholds work in namespaces.

This is the same as with the timeout variable.

Currently, after exceeding the high threshold _all_
the fragments are evicted, but it will be fixed in
later patch.

Signed-off-by: Pavel Emelyanov <xemul@openvz.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/inet_fragment.c                |  2 +-
 net/ipv4/ip_fragment.c                  | 26 +++++++++++++-------------
 net/ipv6/netfilter/nf_conntrack_reasm.c | 12 ++++++------
 net/ipv6/reassembly.c                   | 15 +++++++--------
 4 files changed, 27 insertions(+), 28 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/inet_fragment.c b/net/ipv4/inet_fragment.c
index 9da96792fff..5ab399c1528 100644
--- a/net/ipv4/inet_fragment.c
+++ b/net/ipv4/inet_fragment.c
@@ -153,7 +153,7 @@ int inet_frag_evictor(struct netns_frags *nf, struct inet_frags *f)
 	struct inet_frag_queue *q;
 	int work, evicted = 0;
 
-	work = atomic_read(&nf->mem) - f->ctl->low_thresh;
+	work = atomic_read(&nf->mem) - nf->low_thresh;
 	while (work > 0) {
 		read_lock(&f->lock);
 		if (list_empty(&f->lru_list)) {
diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c
index 70d241c8d2a..80c2c19196c 100644
--- a/net/ipv4/ip_fragment.c
+++ b/net/ipv4/ip_fragment.c
@@ -75,14 +75,6 @@ struct ipq {
 };
 
 static struct inet_frags_ctl ip4_frags_ctl __read_mostly = {
-	/*
-	 * Fragment cache limits. We will commit 256K at one time. Should we
-	 * cross that limit we will prune down to 192K. This should cope with
-	 * even the most extreme cases without allowing an attacker to
-	 * measurably harm machine performance.
-	 */
-	.high_thresh	 = 256 * 1024,
-	.low_thresh	 = 192 * 1024,
 	.secret_interval = 10 * 60 * HZ,
 };
 
@@ -582,7 +574,7 @@ int ip_defrag(struct sk_buff *skb, u32 user)
 
 	net = skb->dev->nd_net;
 	/* Start by cleaning up the memory. */
-	if (atomic_read(&net->ipv4.frags.mem) > ip4_frags_ctl.high_thresh)
+	if (atomic_read(&net->ipv4.frags.mem) > net->ipv4.frags.high_thresh)
 		ip_evictor(net);
 
 	/* Lookup (or create) queue header */
@@ -610,7 +602,7 @@ static struct ctl_table ip4_frags_ctl_table[] = {
 	{
 		.ctl_name	= NET_IPV4_IPFRAG_HIGH_THRESH,
 		.procname	= "ipfrag_high_thresh",
-		.data		= &ip4_frags_ctl.high_thresh,
+		.data		= &init_net.ipv4.frags.high_thresh,
 		.maxlen		= sizeof(int),
 		.mode		= 0644,
 		.proc_handler	= &proc_dointvec
@@ -618,7 +610,7 @@ static struct ctl_table ip4_frags_ctl_table[] = {
 	{
 		.ctl_name	= NET_IPV4_IPFRAG_LOW_THRESH,
 		.procname	= "ipfrag_low_thresh",
-		.data		= &ip4_frags_ctl.low_thresh,
+		.data		= &init_net.ipv4.frags.low_thresh,
 		.maxlen		= sizeof(int),
 		.mode		= 0644,
 		.proc_handler	= &proc_dointvec
@@ -663,8 +655,8 @@ static int ip4_frags_ctl_register(struct net *net)
 		if (table == NULL)
 			goto err_alloc;
 
-		table[0].mode &= ~0222;
-		table[1].mode &= ~0222;
+		table[0].data = &net->ipv4.frags.high_thresh;
+		table[1].data = &net->ipv4.frags.low_thresh;
 		table[2].data = &net->ipv4.frags.timeout;
 		table[3].mode &= ~0222;
 		table[4].mode &= ~0222;
@@ -705,6 +697,14 @@ static inline void ip4_frags_ctl_unregister(struct net *net)
 
 static int ipv4_frags_init_net(struct net *net)
 {
+	/*
+	 * Fragment cache limits. We will commit 256K at one time. Should we
+	 * cross that limit we will prune down to 192K. This should cope with
+	 * even the most extreme cases without allowing an attacker to
+	 * measurably harm machine performance.
+	 */
+	net->ipv4.frags.high_thresh = 256 * 1024;
+	net->ipv4.frags.low_thresh = 192 * 1024;
 	/*
 	 * Important NOTE! Fragment queue must be destroyed before MSL expires.
 	 * RFC791 is wrong proposing to prolongate timer each fragment arrival
diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c
index 92a311ff79c..c75ac17e394 100644
--- a/net/ipv6/netfilter/nf_conntrack_reasm.c
+++ b/net/ipv6/netfilter/nf_conntrack_reasm.c
@@ -71,8 +71,6 @@ struct nf_ct_frag6_queue
 };
 
 static struct inet_frags_ctl nf_frags_ctl __read_mostly = {
-	.high_thresh	 = 256 * 1024,
-	.low_thresh	 = 192 * 1024,
 	.secret_interval = 10 * 60 * HZ,
 };
 
@@ -91,7 +89,7 @@ struct ctl_table nf_ct_ipv6_sysctl_table[] = {
 	{
 		.ctl_name	= NET_NF_CONNTRACK_FRAG6_LOW_THRESH,
 		.procname	= "nf_conntrack_frag6_low_thresh",
-		.data		= &nf_frags_ctl.low_thresh,
+		.data		= &nf_init_frags.low_thresh,
 		.maxlen		= sizeof(unsigned int),
 		.mode		= 0644,
 		.proc_handler	= &proc_dointvec,
@@ -99,7 +97,7 @@ struct ctl_table nf_ct_ipv6_sysctl_table[] = {
 	{
 		.ctl_name	= NET_NF_CONNTRACK_FRAG6_HIGH_THRESH,
 		.procname	= "nf_conntrack_frag6_high_thresh",
-		.data		= &nf_frags_ctl.high_thresh,
+		.data		= &nf_init_frags.high_thresh,
 		.maxlen		= sizeof(unsigned int),
 		.mode		= 0644,
 		.proc_handler	= &proc_dointvec,
@@ -632,7 +630,7 @@ struct sk_buff *nf_ct_frag6_gather(struct sk_buff *skb)
 		goto ret_orig;
 	}
 
-	if (atomic_read(&nf_init_frags.mem) > nf_frags_ctl.high_thresh)
+	if (atomic_read(&nf_init_frags.mem) > nf_init_frags.high_thresh)
 		nf_ct_frag6_evictor();
 
 	fq = fq_find(fhdr->identification, &hdr->saddr, &hdr->daddr);
@@ -712,6 +710,8 @@ int nf_ct_frag6_init(void)
 	nf_frags.match = ip6_frag_match;
 	nf_frags.frag_expire = nf_ct_frag6_expire;
 	nf_init_frags.timeout = IPV6_FRAG_TIMEOUT;
+	nf_init_frags.high_thresh = 256 * 1024;
+	nf_init_frags.low_thresh = 192 * 1024;
 	inet_frags_init_net(&nf_init_frags);
 	inet_frags_init(&nf_frags);
 
@@ -722,6 +722,6 @@ void nf_ct_frag6_cleanup(void)
 {
 	inet_frags_fini(&nf_frags);
 
-	nf_frags_ctl.low_thresh = 0;
+	nf_init_frags.low_thresh = 0;
 	nf_ct_frag6_evictor();
 }
diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c
index 91761365b18..85f3fa38223 100644
--- a/net/ipv6/reassembly.c
+++ b/net/ipv6/reassembly.c
@@ -601,8 +601,7 @@ static int ipv6_frag_rcv(struct sk_buff *skb)
 	}
 
 	net = skb->dev->nd_net;
-	if (atomic_read(&net->ipv6.frags.mem) >
-			init_net.ipv6.sysctl.frags.high_thresh)
+	if (atomic_read(&net->ipv6.frags.mem) > net->ipv6.frags.high_thresh)
 		ip6_evictor(net, ip6_dst_idev(skb->dst));
 
 	if ((fq = fq_find(net, fhdr->identification, &hdr->saddr, &hdr->daddr,
@@ -634,7 +633,7 @@ static struct ctl_table ip6_frags_ctl_table[] = {
 	{
 		.ctl_name	= NET_IPV6_IP6FRAG_HIGH_THRESH,
 		.procname	= "ip6frag_high_thresh",
-		.data		= &init_net.ipv6.sysctl.frags.high_thresh,
+		.data		= &init_net.ipv6.frags.high_thresh,
 		.maxlen		= sizeof(int),
 		.mode		= 0644,
 		.proc_handler	= &proc_dointvec
@@ -642,7 +641,7 @@ static struct ctl_table ip6_frags_ctl_table[] = {
 	{
 		.ctl_name	= NET_IPV6_IP6FRAG_LOW_THRESH,
 		.procname	= "ip6frag_low_thresh",
-		.data		= &init_net.ipv6.sysctl.frags.low_thresh,
+		.data		= &init_net.ipv6.frags.low_thresh,
 		.maxlen		= sizeof(int),
 		.mode		= 0644,
 		.proc_handler	= &proc_dointvec
@@ -679,8 +678,8 @@ static int ip6_frags_sysctl_register(struct net *net)
 		if (table == NULL)
 			goto err_alloc;
 
-		table[0].mode &= ~0222;
-		table[1].mode &= ~0222;
+		table[0].data = &net->ipv6.frags.high_thresh;
+		table[1].data = &net->ipv6.frags.low_thresh;
 		table[2].data = &net->ipv6.frags.timeout;
 		table[3].mode &= ~0222;
 	}
@@ -722,8 +721,8 @@ static int ipv6_frags_init_net(struct net *net)
 {
 	ip6_frags.ctl = &net->ipv6.sysctl.frags;
 
-	net->ipv6.sysctl.frags.high_thresh = 256 * 1024;
-	net->ipv6.sysctl.frags.low_thresh = 192 * 1024;
+	net->ipv6.frags.high_thresh = 256 * 1024;
+	net->ipv6.frags.low_thresh = 192 * 1024;
 	net->ipv6.frags.timeout = IPV6_FRAG_TIMEOUT;
 	net->ipv6.sysctl.frags.secret_interval = 10 * 60 * HZ;
 
-- 
cgit v1.2.3


From 3b4bc4a2bfe80d01ebd4f2b6dcc58986c970ed16 Mon Sep 17 00:00:00 2001
From: Pavel Emelyanov <xemul@openvz.org>
Date: Tue, 22 Jan 2008 06:11:04 -0800
Subject: [NETNS][FRAGS]: Isolate the secret interval from namespaces.

Since we have one hashtable to lookup the fragment, having
different secret_interval-s for hash rebuild doesn't make
sense, so move this one to inet_frags.

The inet_frags_ctl becomes empty after this, so remove it.
The appropriate ctl table is kept read-only in namespaces.

Signed-off-by: Pavel Emelyanov <xemul@openvz.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/inet_fragment.c                | 4 ++--
 net/ipv4/ip_fragment.c                  | 8 ++------
 net/ipv6/netfilter/nf_conntrack_reasm.c | 6 +-----
 net/ipv6/reassembly.c                   | 6 ++----
 4 files changed, 7 insertions(+), 17 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/inet_fragment.c b/net/ipv4/inet_fragment.c
index 5ab399c1528..fcf5252166f 100644
--- a/net/ipv4/inet_fragment.c
+++ b/net/ipv4/inet_fragment.c
@@ -47,7 +47,7 @@ static void inet_frag_secret_rebuild(unsigned long dummy)
 	}
 	write_unlock(&f->lock);
 
-	mod_timer(&f->secret_timer, now + f->ctl->secret_interval);
+	mod_timer(&f->secret_timer, now + f->secret_interval);
 }
 
 void inet_frags_init(struct inet_frags *f)
@@ -65,7 +65,7 @@ void inet_frags_init(struct inet_frags *f)
 
 	setup_timer(&f->secret_timer, inet_frag_secret_rebuild,
 			(unsigned long)f);
-	f->secret_timer.expires = jiffies + f->ctl->secret_interval;
+	f->secret_timer.expires = jiffies + f->secret_interval;
 	add_timer(&f->secret_timer);
 }
 EXPORT_SYMBOL(inet_frags_init);
diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c
index 80c2c19196c..00646ed451f 100644
--- a/net/ipv4/ip_fragment.c
+++ b/net/ipv4/ip_fragment.c
@@ -74,10 +74,6 @@ struct ipq {
 	struct inet_peer *peer;
 };
 
-static struct inet_frags_ctl ip4_frags_ctl __read_mostly = {
-	.secret_interval = 10 * 60 * HZ,
-};
-
 static struct inet_frags ip4_frags;
 
 int ip_frag_nqueues(struct net *net)
@@ -627,7 +623,7 @@ static struct ctl_table ip4_frags_ctl_table[] = {
 	{
 		.ctl_name	= NET_IPV4_IPFRAG_SECRET_INTERVAL,
 		.procname	= "ipfrag_secret_interval",
-		.data		= &ip4_frags_ctl.secret_interval,
+		.data		= &ip4_frags.secret_interval,
 		.maxlen		= sizeof(int),
 		.mode		= 0644,
 		.proc_handler	= &proc_dointvec_jiffies,
@@ -720,7 +716,6 @@ static int ipv4_frags_init_net(struct net *net)
 void __init ipfrag_init(void)
 {
 	ipv4_frags_init_net(&init_net);
-	ip4_frags.ctl = &ip4_frags_ctl;
 	ip4_frags.hashfn = ip4_hashfn;
 	ip4_frags.constructor = ip4_frag_init;
 	ip4_frags.destructor = ip4_frag_free;
@@ -728,6 +723,7 @@ void __init ipfrag_init(void)
 	ip4_frags.qsize = sizeof(struct ipq);
 	ip4_frags.match = ip4_frag_match;
 	ip4_frags.frag_expire = ip_expire;
+	ip4_frags.secret_interval = 10 * 60 * HZ;
 	inet_frags_init(&ip4_frags);
 }
 
diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c
index c75ac17e394..6eed991a4a3 100644
--- a/net/ipv6/netfilter/nf_conntrack_reasm.c
+++ b/net/ipv6/netfilter/nf_conntrack_reasm.c
@@ -70,10 +70,6 @@ struct nf_ct_frag6_queue
 	__u16			nhoffset;
 };
 
-static struct inet_frags_ctl nf_frags_ctl __read_mostly = {
-	.secret_interval = 10 * 60 * HZ,
-};
-
 static struct inet_frags nf_frags;
 static struct netns_frags nf_init_frags;
 
@@ -701,7 +697,6 @@ int nf_ct_frag6_kfree_frags(struct sk_buff *skb)
 
 int nf_ct_frag6_init(void)
 {
-	nf_frags.ctl = &nf_frags_ctl;
 	nf_frags.hashfn = nf_hashfn;
 	nf_frags.constructor = ip6_frag_init;
 	nf_frags.destructor = NULL;
@@ -709,6 +704,7 @@ int nf_ct_frag6_init(void)
 	nf_frags.qsize = sizeof(struct nf_ct_frag6_queue);
 	nf_frags.match = ip6_frag_match;
 	nf_frags.frag_expire = nf_ct_frag6_expire;
+	nf_frags.secret_interval = 10 * 60 * HZ;
 	nf_init_frags.timeout = IPV6_FRAG_TIMEOUT;
 	nf_init_frags.high_thresh = 256 * 1024;
 	nf_init_frags.low_thresh = 192 * 1024;
diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c
index 85f3fa38223..85207008730 100644
--- a/net/ipv6/reassembly.c
+++ b/net/ipv6/reassembly.c
@@ -658,7 +658,7 @@ static struct ctl_table ip6_frags_ctl_table[] = {
 	{
 		.ctl_name	= NET_IPV6_IP6FRAG_SECRET_INTERVAL,
 		.procname	= "ip6frag_secret_interval",
-		.data		= &init_net.ipv6.sysctl.frags.secret_interval,
+		.data		= &ip6_frags.secret_interval,
 		.maxlen		= sizeof(int),
 		.mode		= 0644,
 		.proc_handler	= &proc_dointvec_jiffies,
@@ -719,12 +719,9 @@ static inline void ip6_frags_sysctl_unregister(struct net *net)
 
 static int ipv6_frags_init_net(struct net *net)
 {
-	ip6_frags.ctl = &net->ipv6.sysctl.frags;
-
 	net->ipv6.frags.high_thresh = 256 * 1024;
 	net->ipv6.frags.low_thresh = 192 * 1024;
 	net->ipv6.frags.timeout = IPV6_FRAG_TIMEOUT;
-	net->ipv6.sysctl.frags.secret_interval = 10 * 60 * HZ;
 
 	inet_frags_init_net(&net->ipv6.frags);
 
@@ -748,6 +745,7 @@ int __init ipv6_frag_init(void)
 	ip6_frags.qsize = sizeof(struct frag_queue);
 	ip6_frags.match = ip6_frag_match;
 	ip6_frags.frag_expire = ip6_frag_expire;
+	ip6_frags.secret_interval = 10 * 60 * HZ;
 	inet_frags_init(&ip6_frags);
 out:
 	return ret;
-- 
cgit v1.2.3


From 3140c25c82106645a6b1fc469dab7006a1d09fd0 Mon Sep 17 00:00:00 2001
From: Pavel Emelyanov <xemul@openvz.org>
Date: Tue, 22 Jan 2008 06:11:48 -0800
Subject: [NETNS][FRAGS]: Make the LRU list per namespace.

The inet_frags.lru_list is used for evicting only, so we have
to make it per-namespace, to evict only those fragments, who's
namespace exceeded its high threshold, but not the whole hash.
Besides, this helps to avoid long loops  in evictor.

The spinlock is not per-namespace because it protects the
hash table as well, which is global.

Signed-off-by: Pavel Emelyanov <xemul@openvz.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/inet_fragment.c                | 8 ++++----
 net/ipv4/ip_fragment.c                  | 2 +-
 net/ipv6/netfilter/nf_conntrack_reasm.c | 2 +-
 net/ipv6/reassembly.c                   | 2 +-
 4 files changed, 7 insertions(+), 7 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/inet_fragment.c b/net/ipv4/inet_fragment.c
index fcf5252166f..f1b95e12877 100644
--- a/net/ipv4/inet_fragment.c
+++ b/net/ipv4/inet_fragment.c
@@ -57,7 +57,6 @@ void inet_frags_init(struct inet_frags *f)
 	for (i = 0; i < INETFRAGS_HASHSZ; i++)
 		INIT_HLIST_HEAD(&f->hash[i]);
 
-	INIT_LIST_HEAD(&f->lru_list);
 	rwlock_init(&f->lock);
 
 	f->rnd = (u32) ((num_physpages ^ (num_physpages>>7)) ^
@@ -74,6 +73,7 @@ void inet_frags_init_net(struct netns_frags *nf)
 {
 	nf->nqueues = 0;
 	atomic_set(&nf->mem, 0);
+	INIT_LIST_HEAD(&nf->lru_list);
 }
 EXPORT_SYMBOL(inet_frags_init_net);
 
@@ -156,12 +156,12 @@ int inet_frag_evictor(struct netns_frags *nf, struct inet_frags *f)
 	work = atomic_read(&nf->mem) - nf->low_thresh;
 	while (work > 0) {
 		read_lock(&f->lock);
-		if (list_empty(&f->lru_list)) {
+		if (list_empty(&nf->lru_list)) {
 			read_unlock(&f->lock);
 			break;
 		}
 
-		q = list_first_entry(&f->lru_list,
+		q = list_first_entry(&nf->lru_list,
 				struct inet_frag_queue, lru_list);
 		atomic_inc(&q->refcnt);
 		read_unlock(&f->lock);
@@ -211,7 +211,7 @@ static struct inet_frag_queue *inet_frag_intern(struct netns_frags *nf,
 
 	atomic_inc(&qp->refcnt);
 	hlist_add_head(&qp->list, &f->hash[hash]);
-	list_add_tail(&qp->lru_list, &f->lru_list);
+	list_add_tail(&qp->lru_list, &nf->lru_list);
 	nf->nqueues++;
 	write_unlock(&f->lock);
 	return qp;
diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c
index 00646ed451f..29b4b0972e4 100644
--- a/net/ipv4/ip_fragment.c
+++ b/net/ipv4/ip_fragment.c
@@ -441,7 +441,7 @@ static int ip_frag_queue(struct ipq *qp, struct sk_buff *skb)
 		return ip_frag_reasm(qp, prev, dev);
 
 	write_lock(&ip4_frags.lock);
-	list_move_tail(&qp->q.lru_list, &ip4_frags.lru_list);
+	list_move_tail(&qp->q.lru_list, &qp->q.net->lru_list);
 	write_unlock(&ip4_frags.lock);
 	return -EINPROGRESS;
 
diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c
index 6eed991a4a3..022da6ce4c0 100644
--- a/net/ipv6/netfilter/nf_conntrack_reasm.c
+++ b/net/ipv6/netfilter/nf_conntrack_reasm.c
@@ -385,7 +385,7 @@ static int nf_ct_frag6_queue(struct nf_ct_frag6_queue *fq, struct sk_buff *skb,
 		fq->q.last_in |= FIRST_IN;
 	}
 	write_lock(&nf_frags.lock);
-	list_move_tail(&fq->q.lru_list, &nf_frags.lru_list);
+	list_move_tail(&fq->q.lru_list, &nf_init_frags.lru_list);
 	write_unlock(&nf_frags.lock);
 	return 0;
 
diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c
index 85207008730..0c4bc46dee0 100644
--- a/net/ipv6/reassembly.c
+++ b/net/ipv6/reassembly.c
@@ -424,7 +424,7 @@ static int ip6_frag_queue(struct frag_queue *fq, struct sk_buff *skb,
 		return ip6_frag_reasm(fq, prev, dev);
 
 	write_lock(&ip6_frags.lock);
-	list_move_tail(&fq->q.lru_list, &ip6_frags.lru_list);
+	list_move_tail(&fq->q.lru_list, &fq->q.net->lru_list);
 	write_unlock(&ip6_frags.lock);
 	return -1;
 
-- 
cgit v1.2.3


From 81566e8322c3f6c6f9a2277fe0e440fee8d917bd Mon Sep 17 00:00:00 2001
From: Pavel Emelyanov <xemul@openvz.org>
Date: Tue, 22 Jan 2008 06:12:39 -0800
Subject: [NETNS][FRAGS]: Make the pernet subsystem for fragments.

On namespace start we mainly prepare the ctl variables.

When the namespace is stopped we have to kill all the fragments that
point to this namespace.  The inet_frags_exit_net() handles it.

Signed-off-by: Pavel Emelyanov <xemul@openvz.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/inet_fragment.c |  7 +++++++
 net/ipv4/ip_fragment.c   | 13 ++++++++++++-
 net/ipv6/reassembly.c    | 14 +++++++++++++-
 3 files changed, 32 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/inet_fragment.c b/net/ipv4/inet_fragment.c
index f1b95e12877..724d69aed03 100644
--- a/net/ipv4/inet_fragment.c
+++ b/net/ipv4/inet_fragment.c
@@ -83,6 +83,13 @@ void inet_frags_fini(struct inet_frags *f)
 }
 EXPORT_SYMBOL(inet_frags_fini);
 
+void inet_frags_exit_net(struct netns_frags *nf, struct inet_frags *f)
+{
+	nf->low_thresh = 0;
+	inet_frag_evictor(nf, f);
+}
+EXPORT_SYMBOL(inet_frags_exit_net);
+
 static inline void fq_unlink(struct inet_frag_queue *fq, struct inet_frags *f)
 {
 	write_lock(&f->lock);
diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c
index 29b4b0972e4..a2e92f9709d 100644
--- a/net/ipv4/ip_fragment.c
+++ b/net/ipv4/ip_fragment.c
@@ -713,9 +713,20 @@ static int ipv4_frags_init_net(struct net *net)
 	return ip4_frags_ctl_register(net);
 }
 
+static void ipv4_frags_exit_net(struct net *net)
+{
+	ip4_frags_ctl_unregister(net);
+	inet_frags_exit_net(&net->ipv4.frags, &ip4_frags);
+}
+
+static struct pernet_operations ip4_frags_ops = {
+	.init = ipv4_frags_init_net,
+	.exit = ipv4_frags_exit_net,
+};
+
 void __init ipfrag_init(void)
 {
-	ipv4_frags_init_net(&init_net);
+	register_pernet_subsys(&ip4_frags_ops);
 	ip4_frags.hashfn = ip4_hashfn;
 	ip4_frags.constructor = ip4_frag_init;
 	ip4_frags.destructor = ip4_frag_free;
diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c
index 0c4bc46dee0..f936d045a39 100644
--- a/net/ipv6/reassembly.c
+++ b/net/ipv6/reassembly.c
@@ -728,6 +728,17 @@ static int ipv6_frags_init_net(struct net *net)
 	return ip6_frags_sysctl_register(net);
 }
 
+static void ipv6_frags_exit_net(struct net *net)
+{
+	ip6_frags_sysctl_unregister(net);
+	inet_frags_exit_net(&net->ipv6.frags, &ip6_frags);
+}
+
+static struct pernet_operations ip6_frags_ops = {
+	.init = ipv6_frags_init_net,
+	.exit = ipv6_frags_exit_net,
+};
+
 int __init ipv6_frag_init(void)
 {
 	int ret;
@@ -736,7 +747,7 @@ int __init ipv6_frag_init(void)
 	if (ret)
 		goto out;
 
-	ipv6_frags_init_net(&init_net);
+	register_pernet_subsys(&ip6_frags_ops);
 
 	ip6_frags.hashfn = ip6_hashfn;
 	ip6_frags.constructor = ip6_frag_init;
@@ -754,5 +765,6 @@ out:
 void ipv6_frag_exit(void)
 {
 	inet_frags_fini(&ip6_frags);
+	unregister_pernet_subsys(&ip6_frags_ops);
 	inet6_del_protocol(&frag_protocol, IPPROTO_FRAGMENT);
 }
-- 
cgit v1.2.3


From 69a73829dbb10e7c8554e66a80cb4fde57347fff Mon Sep 17 00:00:00 2001
From: Eric Dumazet <dada1@cosmosbay.com>
Date: Tue, 22 Jan 2008 06:18:34 -0800
Subject: [DST]: shrinks sizeof(struct rtable) by 64 bytes on x86_64

On x86_64, sizeof(struct rtable) is 0x148, which is rounded up to
0x180 bytes by SLAB allocator.

We can reduce this to exactly 0x140 bytes, without alignment overhead,
and store 12 struct rtable per PAGE instead of 10.

rate_tokens is currently defined as an "unsigned long", while its
content should not exceed 6*HZ. It can safely be converted to an
unsigned int.

Moving tclassid right after rate_tokens to fill the 4 bytes hole
permits to save 8 bytes on 'struct dst_entry', which finally permits
to save 8 bytes on 'struct rtable'

Signed-off-by: Eric Dumazet <dada1@cosmosbay.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/icmp.c | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
index e57f1673bf6..37cdea0c26b 100644
--- a/net/ipv4/icmp.c
+++ b/net/ipv4/icmp.c
@@ -275,18 +275,19 @@ static inline void icmp_xmit_unlock(void)
 #define XRLIM_BURST_FACTOR 6
 int xrlim_allow(struct dst_entry *dst, int timeout)
 {
-	unsigned long now;
+	unsigned long now, token = dst->rate_tokens;
 	int rc = 0;
 
 	now = jiffies;
-	dst->rate_tokens += now - dst->rate_last;
+	token += now - dst->rate_last;
 	dst->rate_last = now;
-	if (dst->rate_tokens > XRLIM_BURST_FACTOR * timeout)
-		dst->rate_tokens = XRLIM_BURST_FACTOR * timeout;
-	if (dst->rate_tokens >= timeout) {
-		dst->rate_tokens -= timeout;
+	if (token > XRLIM_BURST_FACTOR * timeout)
+		token = XRLIM_BURST_FACTOR * timeout;
+	if (token >= timeout) {
+		token -= timeout;
 		rc = 1;
 	}
+	dst->rate_tokens = token;
 	return rc;
 }
 
-- 
cgit v1.2.3


From a8bdf29c6cd117644d27677962fe832b33036c77 Mon Sep 17 00:00:00 2001
From: Guy Cohen <guy.cohen@intel.com>
Date: Wed, 9 Jan 2008 19:12:48 +0200
Subject: mac80211: Assign correct TID for local bridged packets

This patch assigns correct TID to frames transmitted between
two stations in the same BSS in AP mode.
The problem is that skb->protocol is not set to ETH_P_IP and it is wrong
to use that field at this stage.
The fix compares the LLC/Protocol headers explicitly to check if the
encapsulated frame is IP frame

Signed-off-by: Guy Cohen <guy.cohen@intel.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/wme.c | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/wme.c b/net/mac80211/wme.c
index 455fadc3d84..024519522d3 100644
--- a/net/mac80211/wme.c
+++ b/net/mac80211/wme.c
@@ -28,6 +28,7 @@ struct ieee80211_sched_data
 	struct sk_buff_head requeued[TC_80211_MAX_QUEUES];
 };
 
+static const char llc_ip_hdr[8] = {0xAA, 0xAA, 0x3, 0, 0, 0, 0x08, 0};
 
 /* given a data frame determine the 802.1p/1d tag to use */
 static inline unsigned classify_1d(struct sk_buff *skb, struct Qdisc *qd)
@@ -54,12 +55,12 @@ static inline unsigned classify_1d(struct sk_buff *skb, struct Qdisc *qd)
 		return skb->priority - 256;
 
 	/* check there is a valid IP header present */
-	offset = ieee80211_get_hdrlen_from_skb(skb) + 8 /* LLC + proto */;
-	if (skb->protocol != htons(ETH_P_IP) ||
-	    skb->len < offset + sizeof(*ip))
+	offset = ieee80211_get_hdrlen_from_skb(skb);
+	if (skb->len < offset + sizeof(llc_ip_hdr) + sizeof(*ip) ||
+	    memcmp(skb->data + offset, llc_ip_hdr, sizeof(llc_ip_hdr)))
 		return 0;
 
-	ip = (struct iphdr *) (skb->data + offset);
+	ip = (struct iphdr *) (skb->data + offset + sizeof(llc_ip_hdr));
 
 	dscp = ip->tos & 0xfc;
 	if (dscp & 0x1c)
-- 
cgit v1.2.3


From f99b751fca5b16bea53c0d6724935e1949350052 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Wed, 16 Jan 2008 21:47:40 +0100
Subject: mac80211: fix RCU locking in __ieee80211_rx_handle_packet

Commit c7a51bda ("mac80211: restructure __ieee80211_rx") extracted
__ieee80211_rx_handle_packet out of __ieee80211_rx and hence changed
the locking rules for __ieee80211_rx_handle_packet(), it is now
invoked under RCU lock. There is, however, one instance left where
it contains an rcu_read_unlock() in an error path, which is a bug.

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/rx.c | 1 -
 1 file changed, 1 deletion(-)

(limited to 'net')

diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index 465fce0f62a..96b0faf40b1 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -1730,7 +1730,6 @@ void __ieee80211_rx_handle_packet(struct ieee80211_hw *hw, struct sk_buff *skb,
 		ieee80211_invoke_rx_handlers(local, local->rx_handlers, &rx,
 					     rx.sta);
 		sta_info_put(sta);
-		rcu_read_unlock();
 		return;
 	}
 
-- 
cgit v1.2.3


From c09c7237eadc65916305835ca1e3ee8a03f01159 Mon Sep 17 00:00:00 2001
From: Stefano Brivio <stefano.brivio@polimi.it>
Date: Thu, 17 Jan 2008 00:38:29 +0100
Subject: rc80211-pid: fix last_sample initialization

Fix last_sample initialization. kzalloc'ing the per-STA data wasn't enough,
as jiffies can assume negative values as well. This fixes a bug which
prevented correct PID operation for a while after booting.

Thanks to Michael Buesch for reporting this.

Reported-and-tested-by: Michael Buesch <mb@bu3sch.de>
Signed-off-by: Stefano Brivio <stefano.brivio@polimi.it>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/rc80211_pid_algo.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'net')

diff --git a/net/mac80211/rc80211_pid_algo.c b/net/mac80211/rc80211_pid_algo.c
index 962286d893d..554c4baed6f 100644
--- a/net/mac80211/rc80211_pid_algo.c
+++ b/net/mac80211/rc80211_pid_algo.c
@@ -496,6 +496,8 @@ static void *rate_control_pid_alloc_sta(void *priv, gfp_t gfp)
 	if (spinfo == NULL)
 		return NULL;
 
+	spinfo->last_sample = jiffies;
+
 #ifdef CONFIG_MAC80211_DEBUGFS
 	spin_lock_init(&spinfo->events.lock);
 	init_waitqueue_head(&spinfo->events.waitqueue);
-- 
cgit v1.2.3


From 71ebb4aac87e4a1504a155084d658d0a92ac63fb Mon Sep 17 00:00:00 2001
From: Ron Rindjunsky <ron.rindjunsky@intel.com>
Date: Mon, 21 Jan 2008 12:39:12 +0200
Subject: mac80211: fix rx flow sparse errors, make functions static

This patch adds static declarations to functions in the Rx flow in order to
eliminate sparse errors

Signed-off-by: Ron Rindjunsky <ron.rindjunsky@intel.com>
Signed-off-by: Tomas Winkler <tomas.winkler@intel.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/ieee80211_sta.c |  4 ++--
 net/mac80211/rx.c            | 12 +++++++-----
 2 files changed, 9 insertions(+), 7 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/ieee80211_sta.c b/net/mac80211/ieee80211_sta.c
index e7da1cde3ab..2019b4f0528 100644
--- a/net/mac80211/ieee80211_sta.c
+++ b/net/mac80211/ieee80211_sta.c
@@ -1154,8 +1154,8 @@ end_no_lock:
 	sta_info_put(sta);
 }
 
-void ieee80211_send_delba(struct net_device *dev, const u8 *da, u16 tid,
-				u16 initiator, u16 reason_code)
+static void ieee80211_send_delba(struct net_device *dev, const u8 *da, u16 tid,
+				 u16 initiator, u16 reason_code)
 {
 	struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr);
 	struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index 96b0faf40b1..89e1e3070ec 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -295,7 +295,7 @@ ieee80211_rx_h_parse_qos(struct ieee80211_txrx_data *rx)
 }
 
 
-u32 ieee80211_rx_load_stats(struct ieee80211_local *local,
+static u32 ieee80211_rx_load_stats(struct ieee80211_local *local,
 			      struct sk_buff *skb,
 			      struct ieee80211_rx_status *status)
 {
@@ -1664,8 +1664,10 @@ static int prepare_for_handlers(struct ieee80211_sub_if_data *sdata,
  * This is the actual Rx frames handler. as it blongs to Rx path it must
  * be called with rcu_read_lock protection.
  */
-void __ieee80211_rx_handle_packet(struct ieee80211_hw *hw, struct sk_buff *skb,
-			    struct ieee80211_rx_status *status, u32 load)
+static void __ieee80211_rx_handle_packet(struct ieee80211_hw *hw,
+					 struct sk_buff *skb,
+					 struct ieee80211_rx_status *status,
+					 u32 load)
 {
 	struct ieee80211_local *local = hw_to_local(hw);
 	struct ieee80211_sub_if_data *sdata;
@@ -1919,8 +1921,8 @@ u8 ieee80211_sta_manage_reorder_buf(struct ieee80211_hw *hw,
 	return 1;
 }
 
-u8 ieee80211_rx_reorder_ampdu(struct ieee80211_local *local,
-			      struct sk_buff *skb)
+static u8 ieee80211_rx_reorder_ampdu(struct ieee80211_local *local,
+				     struct sk_buff *skb)
 {
 	struct ieee80211_hw *hw = &local->hw;
 	struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data;
-- 
cgit v1.2.3


From 4248d2f81159f62b7c7b83398fede653d449fd56 Mon Sep 17 00:00:00 2001
From: Masakazu Mokuno <mokuno@sm.sony.co.jp>
Date: Tue, 22 Jan 2008 15:22:29 +0900
Subject: WEXT: remove unused variable

As event_type_pk_size[] is not used,  Remove it.

Signed-off-by: Masakazu Mokuno <mokuno@sm.sony.co.jp>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/wireless/wext.c | 14 --------------
 1 file changed, 14 deletions(-)

(limited to 'net')

diff --git a/net/wireless/wext.c b/net/wireless/wext.c
index 1e4cf615f87..2c569b63e7d 100644
--- a/net/wireless/wext.c
+++ b/net/wireless/wext.c
@@ -417,20 +417,6 @@ static const int event_type_size[] = {
 	IW_EV_QUAL_LEN,			/* IW_HEADER_TYPE_QUAL */
 };
 
-/* Size (in bytes) of various events, as packed */
-static const int event_type_pk_size[] = {
-	IW_EV_LCP_PK_LEN,		/* IW_HEADER_TYPE_NULL */
-	0,
-	IW_EV_CHAR_PK_LEN,		/* IW_HEADER_TYPE_CHAR */
-	0,
-	IW_EV_UINT_PK_LEN,		/* IW_HEADER_TYPE_UINT */
-	IW_EV_FREQ_PK_LEN,		/* IW_HEADER_TYPE_FREQ */
-	IW_EV_ADDR_PK_LEN,		/* IW_HEADER_TYPE_ADDR */
-	0,
-	IW_EV_POINT_PK_LEN,		/* Without variable payload */
-	IW_EV_PARAM_PK_LEN,		/* IW_HEADER_TYPE_PARAM */
-	IW_EV_QUAL_PK_LEN,		/* IW_HEADER_TYPE_QUAL */
-};
 
 /************************ COMMON SUBROUTINES ************************/
 /*
-- 
cgit v1.2.3


From 91cde6f7d295a9eafea51d821f6e609288736c79 Mon Sep 17 00:00:00 2001
From: Ross Burton <ross@openedhand.com>
Date: Tue, 22 Jan 2008 18:27:53 -0800
Subject: [IrDA]: LMP discovery timer not started by default

By default, LMP sets up a 3 seconds timer for discovery.
We don't need it until discovery is set to 1.

Signed-off-by: Ross Burton <ross@openedhand.com>
Signed-off-by: Samuel Ortiz <samuel@sortiz.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/irda/irlmp.c       |  7 +++++--
 net/irda/irlmp_event.c |  4 +---
 net/irda/irsysctl.c    | 28 +++++++++++++++++++++++++++-
 3 files changed, 33 insertions(+), 6 deletions(-)

(limited to 'net')

diff --git a/net/irda/irlmp.c b/net/irda/irlmp.c
index f24cb755908..135ac6907bb 100644
--- a/net/irda/irlmp.c
+++ b/net/irda/irlmp.c
@@ -103,9 +103,12 @@ int __init irlmp_init(void)
 	irlmp->last_lsap_sel = 0x0f; /* Reserved 0x00-0x0f */
 	strcpy(sysctl_devname, "Linux");
 
-	/* Do discovery every 3 seconds */
 	init_timer(&irlmp->discovery_timer);
-	irlmp_start_discovery_timer(irlmp, sysctl_discovery_timeout*HZ);
+
+	/* Do discovery every 3 seconds, conditionaly */
+	if (sysctl_discovery)
+		irlmp_start_discovery_timer(irlmp,
+					    sysctl_discovery_timeout*HZ);
 
 	return 0;
 }
diff --git a/net/irda/irlmp_event.c b/net/irda/irlmp_event.c
index 1bba87e7860..150cd3f1129 100644
--- a/net/irda/irlmp_event.c
+++ b/net/irda/irlmp_event.c
@@ -174,9 +174,7 @@ void irlmp_discovery_timer_expired(void *data)
 	/* We always cleanup the log (active & passive discovery) */
 	irlmp_do_expiry();
 
-	/* Active discovery is conditional */
-	if (sysctl_discovery)
-		irlmp_do_discovery(sysctl_discovery_slots);
+	irlmp_do_discovery(sysctl_discovery_slots);
 
 	/* Restart timer */
 	irlmp_start_discovery_timer(irlmp, sysctl_discovery_timeout * HZ);
diff --git a/net/irda/irsysctl.c b/net/irda/irsysctl.c
index d8aba869ff1..9ab3df15425 100644
--- a/net/irda/irsysctl.c
+++ b/net/irda/irsysctl.c
@@ -29,6 +29,8 @@
 #include <linux/init.h>
 
 #include <net/irda/irda.h>		/* irda_debug */
+#include <net/irda/irlmp.h>
+#include <net/irda/timer.h>
 #include <net/irda/irias_object.h>
 
 extern int  sysctl_discovery;
@@ -45,6 +47,8 @@ extern int  sysctl_max_noreply_time;
 extern int  sysctl_warn_noreply_time;
 extern int  sysctl_lap_keepalive_time;
 
+extern struct irlmp_cb *irlmp;
+
 /* this is needed for the proc_dointvec_minmax - Jean II */
 static int max_discovery_slots = 16;		/* ??? */
 static int min_discovery_slots = 1;
@@ -85,6 +89,27 @@ static int do_devname(ctl_table *table, int write, struct file *filp,
 	return ret;
 }
 
+
+static int do_discovery(ctl_table *table, int write, struct file *filp,
+                    void __user *buffer, size_t *lenp, loff_t *ppos)
+{
+       int ret;
+
+       ret = proc_dointvec(table, write, filp, buffer, lenp, ppos);
+       if (ret)
+	       return ret;
+
+       if (irlmp == NULL)
+	       return -ENODEV;
+
+       if (sysctl_discovery)
+	       irlmp_start_discovery_timer(irlmp, sysctl_discovery_timeout*HZ);
+       else
+	       del_timer_sync(&irlmp->discovery_timer);
+
+       return ret;
+}
+
 /* One file */
 static ctl_table irda_table[] = {
 	{
@@ -93,7 +118,8 @@ static ctl_table irda_table[] = {
 		.data		= &sysctl_discovery,
 		.maxlen		= sizeof(int),
 		.mode		= 0644,
-		.proc_handler	= &proc_dointvec
+		.proc_handler	= &do_discovery,
+		.strategy       = &sysctl_intvec
 	},
 	{
 		.ctl_name	= NET_IRDA_DEVNAME,
-- 
cgit v1.2.3


From a1f6f5a7689098f01d9b7b7181795ee78563ce37 Mon Sep 17 00:00:00 2001
From: Jan Engelhardt <jengelh@computergmbh.de>
Date: Tue, 22 Jan 2008 18:30:06 -0800
Subject: [AF_X25]: constify function pointer tables

Signed-off-by: Jan Engelhardt <jengelh@computergmbh.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/x25/x25_proc.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/x25/x25_proc.c b/net/x25/x25_proc.c
index 78b05342606..3f52b09bed0 100644
--- a/net/x25/x25_proc.c
+++ b/net/x25/x25_proc.c
@@ -293,7 +293,7 @@ static const struct file_operations x25_seq_route_fops = {
 	.release	= seq_release,
 };
 
-static struct file_operations x25_seq_forward_fops = {
+static const struct file_operations x25_seq_forward_fops = {
 	.owner		= THIS_MODULE,
 	.open		= x25_seq_forward_open,
 	.read		= seq_read,
-- 
cgit v1.2.3


From bc3c8c1e02ae89668239742fd592f21e1998fa46 Mon Sep 17 00:00:00 2001
From: Stephen Hemminger <stephen.hemminger@vyatta.com>
Date: Tue, 22 Jan 2008 21:51:50 -0800
Subject: [IPV4] fib_trie: put leaf nodes in a slab cache

This improves locality for operations that touch all the leaves.  Save
space since these entries don't need to be hardware cache aligned.

Signed-off-by: Stephen Hemminger <stephen.hemminger@vyatta.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/fib_trie.c | 13 ++++++++++---
 1 file changed, 10 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index a52334d30cf..9291e7c41fc 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -162,6 +162,7 @@ static struct tnode *halve(struct trie *t, struct tnode *tn);
 static void tnode_free(struct tnode *tn);
 
 static struct kmem_cache *fn_alias_kmem __read_mostly;
+static struct kmem_cache *trie_leaf_kmem __read_mostly;
 
 static inline struct tnode *node_parent(struct node *node)
 {
@@ -325,7 +326,8 @@ static inline void alias_free_mem_rcu(struct fib_alias *fa)
 
 static void __leaf_free_rcu(struct rcu_head *head)
 {
-	kfree(container_of(head, struct leaf, rcu));
+	struct leaf *l = container_of(head, struct leaf, rcu);
+	kmem_cache_free(trie_leaf_kmem, l);
 }
 
 static void __leaf_info_free_rcu(struct rcu_head *head)
@@ -375,7 +377,7 @@ static inline void tnode_free(struct tnode *tn)
 
 static struct leaf *leaf_new(void)
 {
-	struct leaf *l = kmalloc(sizeof(struct leaf),  GFP_KERNEL);
+	struct leaf *l = kmem_cache_alloc(trie_leaf_kmem, GFP_KERNEL);
 	if (l) {
 		l->parent = T_LEAF;
 		INIT_HLIST_HEAD(&l->list);
@@ -1938,7 +1940,12 @@ out:
 void __init fib_hash_init(void)
 {
 	fn_alias_kmem = kmem_cache_create("ip_fib_alias", sizeof(struct fib_alias),
-					  0, SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL);
+					  0, SLAB_PANIC, NULL);
+
+	trie_leaf_kmem = kmem_cache_create("ip_fib_trie",
+					   max(sizeof(struct leaf),
+					       sizeof(struct leaf_info)),
+					   0, SLAB_PANIC, NULL);
 }
 
 
-- 
cgit v1.2.3


From a07f5f508a4d9728c8e57d7f66294bf5b254ff7f Mon Sep 17 00:00:00 2001
From: Stephen Hemminger <stephen.hemminger@vyatta.com>
Date: Tue, 22 Jan 2008 21:53:36 -0800
Subject: [IPV4] fib_trie: style cleanup

Style cleanups:
      * make check_leaf return -1 or plen, rather than by reference
      * Get rid of #ifdef that is always set
      * split out embedded function calls in if statements.
      * checkpatch warnings

Signed-off-by: Stephen Hemminger <stephen.hemminger@vyatta.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/fib_trie.c | 237 +++++++++++++++++++++++++++++++---------------------
 1 file changed, 142 insertions(+), 95 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index 9291e7c41fc..899210b3c6b 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -155,7 +155,8 @@ struct trie {
 };
 
 static void put_child(struct trie *t, struct tnode *tn, int i, struct node *n);
-static void tnode_put_child_reorg(struct tnode *tn, int i, struct node *n, int wasfull);
+static void tnode_put_child_reorg(struct tnode *tn, int i, struct node *n,
+				  int wasfull);
 static struct node *resize(struct trie *t, struct tnode *tn);
 static struct tnode *inflate(struct trie *t, struct tnode *tn);
 static struct tnode *halve(struct trie *t, struct tnode *tn);
@@ -395,7 +396,7 @@ static struct leaf_info *leaf_info_new(int plen)
 	return li;
 }
 
-static struct tnode* tnode_new(t_key key, int pos, int bits)
+static struct tnode *tnode_new(t_key key, int pos, int bits)
 {
 	size_t sz = sizeof(struct tnode) + (sizeof(struct node *) << bits);
 	struct tnode *tn = tnode_alloc(sz);
@@ -427,7 +428,8 @@ static inline int tnode_full(const struct tnode *tn, const struct node *n)
 	return ((struct tnode *) n)->pos == tn->pos + tn->bits;
 }
 
-static inline void put_child(struct trie *t, struct tnode *tn, int i, struct node *n)
+static inline void put_child(struct trie *t, struct tnode *tn, int i,
+			     struct node *n)
 {
 	tnode_put_child_reorg(tn, i, n, -1);
 }
@@ -437,7 +439,8 @@ static inline void put_child(struct trie *t, struct tnode *tn, int i, struct nod
   * Update the value of full_children and empty_children.
   */
 
-static void tnode_put_child_reorg(struct tnode *tn, int i, struct node *n, int wasfull)
+static void tnode_put_child_reorg(struct tnode *tn, int i, struct node *n,
+				  int wasfull)
 {
 	struct node *chi = tn->child[i];
 	int isfull;
@@ -577,11 +580,13 @@ static struct node *resize(struct trie *t, struct tnode *tn)
 	err = 0;
 	max_resize = 10;
 	while ((tn->full_children > 0 &&  max_resize-- &&
-	       50 * (tn->full_children + tnode_child_length(tn) - tn->empty_children) >=
-				inflate_threshold_use * tnode_child_length(tn))) {
+		50 * (tn->full_children + tnode_child_length(tn)
+		      - tn->empty_children)
+		>= inflate_threshold_use * tnode_child_length(tn))) {
 
 		old_tn = tn;
 		tn = inflate(t, tn);
+
 		if (IS_ERR(tn)) {
 			tn = old_tn;
 #ifdef CONFIG_IP_FIB_TRIE_STATS
@@ -593,11 +598,13 @@ static struct node *resize(struct trie *t, struct tnode *tn)
 
 	if (max_resize < 0) {
 		if (!tn->parent)
-			printk(KERN_WARNING "Fix inflate_threshold_root. Now=%d size=%d bits\n",
-			       inflate_threshold_root, tn->bits);
+			pr_warning("Fix inflate_threshold_root."
+				   " Now=%d size=%d bits\n",
+				   inflate_threshold_root, tn->bits);
 		else
-			printk(KERN_WARNING "Fix inflate_threshold. Now=%d size=%d bits\n",
-			       inflate_threshold, tn->bits);
+			pr_warning("Fix inflate_threshold."
+				   " Now=%d size=%d bits\n",
+				   inflate_threshold, tn->bits);
 	}
 
 	check_tnode(tn);
@@ -634,11 +641,13 @@ static struct node *resize(struct trie *t, struct tnode *tn)
 
 	if (max_resize < 0) {
 		if (!tn->parent)
-			printk(KERN_WARNING "Fix halve_threshold_root. Now=%d size=%d bits\n",
-			       halve_threshold_root, tn->bits);
+			pr_warning("Fix halve_threshold_root."
+				   " Now=%d size=%d bits\n",
+				   halve_threshold_root, tn->bits);
 		else
-			printk(KERN_WARNING "Fix halve_threshold. Now=%d size=%d bits\n",
-			       halve_threshold, tn->bits);
+			pr_warning("Fix halve_threshold."
+				   " Now=%d size=%d bits\n",
+				   halve_threshold, tn->bits);
 	}
 
 	/* Only one child remains */
@@ -681,8 +690,9 @@ static struct tnode *inflate(struct trie *t, struct tnode *tn)
 	 */
 
 	for (i = 0; i < olen; i++) {
-		struct tnode *inode = (struct tnode *) tnode_get_child(oldtnode, i);
+		struct tnode *inode;
 
+		inode = (struct tnode *) tnode_get_child(oldtnode, i);
 		if (inode &&
 		    IS_TNODE(inode) &&
 		    inode->pos == oldtnode->pos + oldtnode->bits &&
@@ -722,8 +732,9 @@ static struct tnode *inflate(struct trie *t, struct tnode *tn)
 
 		if (IS_LEAF(node) || ((struct tnode *) node)->pos >
 		   tn->pos + tn->bits - 1) {
-			if (tkey_extract_bits(node->key, oldtnode->pos + oldtnode->bits,
-					     1) == 0)
+			if (tkey_extract_bits(node->key,
+					      oldtnode->pos + oldtnode->bits,
+					      1) == 0)
 				put_child(t, tn, 2*i, node);
 			else
 				put_child(t, tn, 2*i+1, node);
@@ -899,7 +910,7 @@ static struct leaf_info *find_leaf_info(struct leaf *l, int plen)
 	return NULL;
 }
 
-static inline struct list_head * get_fa_head(struct leaf *l, int plen)
+static inline struct list_head *get_fa_head(struct leaf *l, int plen)
 {
 	struct leaf_info *li = find_leaf_info(l, plen);
 
@@ -949,7 +960,10 @@ fib_find_node(struct trie *t, u32 key)
 
 		if (tkey_sub_equals(tn->key, pos, tn->pos-pos, key)) {
 			pos = tn->pos + tn->bits;
-			n = tnode_get_child_rcu(tn, tkey_extract_bits(key, tn->pos, tn->bits));
+			n = tnode_get_child_rcu(tn,
+						tkey_extract_bits(key,
+								  tn->pos,
+								  tn->bits));
 		} else
 			break;
 	}
@@ -970,8 +984,10 @@ static struct node *trie_rebalance(struct trie *t, struct tnode *tn)
 	while (tn != NULL && (tp = node_parent((struct node *)tn)) != NULL) {
 		cindex = tkey_extract_bits(key, tp->pos, tp->bits);
 		wasfull = tnode_full(tp, tnode_get_child(tp, cindex));
-		tn = (struct tnode *) resize (t, (struct tnode *)tn);
-		tnode_put_child_reorg((struct tnode *)tp, cindex,(struct node*)tn, wasfull);
+		tn = (struct tnode *) resize(t, (struct tnode *)tn);
+
+		tnode_put_child_reorg((struct tnode *)tp, cindex,
+				      (struct node *)tn, wasfull);
 
 		tp = node_parent((struct node *) tn);
 		if (!tp)
@@ -981,9 +997,9 @@ static struct node *trie_rebalance(struct trie *t, struct tnode *tn)
 
 	/* Handle last (top) tnode */
 	if (IS_TNODE(tn))
-		tn = (struct tnode*) resize(t, (struct tnode *)tn);
+		tn = (struct tnode *)resize(t, (struct tnode *)tn);
 
-	return (struct node*) tn;
+	return (struct node *)tn;
 }
 
 /* only used from updater-side */
@@ -1028,7 +1044,10 @@ static struct list_head *fib_insert_node(struct trie *t, u32 key, int plen)
 		if (tkey_sub_equals(tn->key, pos, tn->pos-pos, key)) {
 			tp = tn;
 			pos = tn->pos + tn->bits;
-			n = tnode_get_child(tn, tkey_extract_bits(key, tn->pos, tn->bits));
+			n = tnode_get_child(tn,
+					    tkey_extract_bits(key,
+							      tn->pos,
+							      tn->bits));
 
 			BUG_ON(n && node_parent(n) != tn);
 		} else
@@ -1113,16 +1132,18 @@ static struct list_head *fib_insert_node(struct trie *t, u32 key, int plen)
 
 		if (tp) {
 			cindex = tkey_extract_bits(key, tp->pos, tp->bits);
-			put_child(t, (struct tnode *)tp, cindex, (struct node *)tn);
+			put_child(t, (struct tnode *)tp, cindex,
+				  (struct node *)tn);
 		} else {
-			rcu_assign_pointer(t->trie, (struct node *)tn); /* First tnode */
+			rcu_assign_pointer(t->trie, (struct node *)tn);
 			tp = tn;
 		}
 	}
 
 	if (tp && tp->pos + tp->bits > 32)
-		printk(KERN_WARNING "fib_trie tp=%p pos=%d, bits=%d, key=%0x plen=%d\n",
-		       tp, tp->pos, tp->bits, key, plen);
+		pr_warning("fib_trie"
+			   " tp=%p pos=%d, bits=%d, key=%0x plen=%d\n",
+			   tp, tp->pos, tp->bits, key, plen);
 
 	/* Rebalance the trie */
 
@@ -1235,10 +1256,10 @@ static int fn_trie_insert(struct fib_table *tb, struct fib_config *cfg)
 				break;
 			if (fa->fa_type == cfg->fc_type &&
 			    fa->fa_scope == cfg->fc_scope &&
-			    fa->fa_info == fi) {
+			    fa->fa_info == fi)
 				goto out;
-			}
 		}
+
 		if (!(cfg->fc_nlflags & NLM_F_APPEND))
 			fa = fa_orig;
 	}
@@ -1289,38 +1310,40 @@ err:
 
 
 /* should be called with rcu_read_lock */
-static inline int check_leaf(struct trie *t, struct leaf *l,
-			     t_key key, int *plen, const struct flowi *flp,
-			     struct fib_result *res)
+static int check_leaf(struct trie *t, struct leaf *l,
+		      t_key key,  const struct flowi *flp,
+		      struct fib_result *res)
 {
-	int err, i;
-	__be32 mask;
 	struct leaf_info *li;
 	struct hlist_head *hhead = &l->list;
 	struct hlist_node *node;
 
 	hlist_for_each_entry_rcu(li, node, hhead, hlist) {
-		i = li->plen;
-		mask = inet_make_mask(i);
+		int err;
+		int plen = li->plen;
+		__be32 mask = inet_make_mask(plen);
+
 		if (l->key != (key & ntohl(mask)))
 			continue;
 
-		if ((err = fib_semantic_match(&li->falh, flp, res, htonl(l->key), mask, i)) <= 0) {
-			*plen = i;
+		err = fib_semantic_match(&li->falh, flp, res,
+					 htonl(l->key), mask, plen);
+
 #ifdef CONFIG_IP_FIB_TRIE_STATS
+		if (err <= 0)
 			t->stats.semantic_match_passed++;
+		else
+			t->stats.semantic_match_miss++;
 #endif
-			return err;
-		}
-#ifdef CONFIG_IP_FIB_TRIE_STATS
-		t->stats.semantic_match_miss++;
-#endif
+		if (err <= 0)
+			return plen;
 	}
-	return 1;
+
+	return -1;
 }
 
-static int
-fn_trie_lookup(struct fib_table *tb, const struct flowi *flp, struct fib_result *res)
+static int fn_trie_lookup(struct fib_table *tb, const struct flowi *flp,
+			  struct fib_result *res)
 {
 	struct trie *t = (struct trie *) tb->tb_data;
 	int plen, ret = 0;
@@ -1347,10 +1370,13 @@ fn_trie_lookup(struct fib_table *tb, const struct flowi *flp, struct fib_result
 
 	/* Just a leaf? */
 	if (IS_LEAF(n)) {
-		if ((ret = check_leaf(t, (struct leaf *)n, key, &plen, flp, res)) <= 0)
-			goto found;
-		goto failed;
+		plen = check_leaf(t, (struct leaf *)n, key, flp, res);
+		if (plen < 0)
+			goto failed;
+		ret = 0;
+		goto found;
 	}
+
 	pn = (struct tnode *) n;
 	chopped_off = 0;
 
@@ -1372,14 +1398,14 @@ fn_trie_lookup(struct fib_table *tb, const struct flowi *flp, struct fib_result
 		}
 
 		if (IS_LEAF(n)) {
-			if ((ret = check_leaf(t, (struct leaf *)n, key, &plen, flp, res)) <= 0)
-				goto found;
-			else
+			plen = check_leaf(t, (struct leaf *)n, key, flp, res);
+			if (plen < 0)
 				goto backtrace;
+
+			ret = 0;
+			goto found;
 		}
 
-#define HL_OPTIMIZE
-#ifdef HL_OPTIMIZE
 		cn = (struct tnode *)n;
 
 		/*
@@ -1408,12 +1434,13 @@ fn_trie_lookup(struct fib_table *tb, const struct flowi *flp, struct fib_result
 		 * *are* zero.
 		 */
 
-		/* NOTA BENE: CHECKING ONLY SKIPPED BITS FOR THE NEW NODE HERE */
+		/* NOTA BENE: Checking only skipped bits
+		   for the new node here */
 
 		if (current_prefix_length < pos+bits) {
 			if (tkey_extract_bits(cn->key, current_prefix_length,
-						cn->pos - current_prefix_length) != 0 ||
-			    !(cn->child[0]))
+						cn->pos - current_prefix_length)
+			    || !(cn->child[0]))
 				goto backtrace;
 		}
 
@@ -1436,14 +1463,17 @@ fn_trie_lookup(struct fib_table *tb, const struct flowi *flp, struct fib_result
 		 * new tnode's key.
 		 */
 
-		/* Note: We aren't very concerned about the piece of the key
-		 * that precede pn->pos+pn->bits, since these have already been
-		 * checked. The bits after cn->pos aren't checked since these are
-		 * by definition "unknown" at this point. Thus, what we want to
-		 * see is if we are about to enter the "prefix matching" state,
-		 * and in that case verify that the skipped bits that will prevail
-		 * throughout this subtree are zero, as they have to be if we are
-		 * to find a matching prefix.
+		/*
+		 * Note: We aren't very concerned about the piece of
+		 * the key that precede pn->pos+pn->bits, since these
+		 * have already been checked. The bits after cn->pos
+		 * aren't checked since these are by definition
+		 * "unknown" at this point. Thus, what we want to see
+		 * is if we are about to enter the "prefix matching"
+		 * state, and in that case verify that the skipped
+		 * bits that will prevail throughout this subtree are
+		 * zero, as they have to be if we are to find a
+		 * matching prefix.
 		 */
 
 		node_prefix = mask_pfx(cn->key, cn->pos);
@@ -1451,13 +1481,15 @@ fn_trie_lookup(struct fib_table *tb, const struct flowi *flp, struct fib_result
 		pref_mismatch = key_prefix^node_prefix;
 		mp = 0;
 
-		/* In short: If skipped bits in this node do not match the search
-		 * key, enter the "prefix matching" state.directly.
+		/*
+		 * In short: If skipped bits in this node do not match
+		 * the search key, enter the "prefix matching"
+		 * state.directly.
 		 */
 		if (pref_mismatch) {
 			while (!(pref_mismatch & (1<<(KEYLENGTH-1)))) {
 				mp++;
-				pref_mismatch = pref_mismatch <<1;
+				pref_mismatch = pref_mismatch << 1;
 			}
 			key_prefix = tkey_extract_bits(cn->key, mp, cn->pos-mp);
 
@@ -1467,7 +1499,7 @@ fn_trie_lookup(struct fib_table *tb, const struct flowi *flp, struct fib_result
 			if (current_prefix_length >= cn->pos)
 				current_prefix_length = mp;
 		}
-#endif
+
 		pn = (struct tnode *)n; /* Descend */
 		chopped_off = 0;
 		continue;
@@ -1476,12 +1508,14 @@ backtrace:
 		chopped_off++;
 
 		/* As zero don't change the child key (cindex) */
-		while ((chopped_off <= pn->bits) && !(cindex & (1<<(chopped_off-1))))
+		while ((chopped_off <= pn->bits)
+		       && !(cindex & (1<<(chopped_off-1))))
 			chopped_off++;
 
 		/* Decrease current_... with bits chopped off */
 		if (current_prefix_length > pn->pos + pn->bits - chopped_off)
-			current_prefix_length = pn->pos + pn->bits - chopped_off;
+			current_prefix_length = pn->pos + pn->bits
+				- chopped_off;
 
 		/*
 		 * Either we do the actual chop off according or if we have
@@ -1531,7 +1565,8 @@ static int trie_leaf_remove(struct trie *t, t_key key)
 	while (n != NULL && IS_TNODE(n)) {
 		struct tnode *tn = (struct tnode *) n;
 		check_tnode(tn);
-		n = tnode_get_child(tn ,tkey_extract_bits(key, tn->pos, tn->bits));
+		n = tnode_get_child(tn, tkey_extract_bits(key,
+							  tn->pos, tn->bits));
 
 		BUG_ON(n && node_parent(n) != tn);
 	}
@@ -1697,7 +1732,7 @@ static struct leaf *nextleaf(struct trie *t, struct leaf *thisleaf)
 		if (IS_LEAF(trie))          /* trie w. just a leaf */
 			return (struct leaf *) trie;
 
-		p = (struct tnode*) trie;  /* Start */
+		p = (struct tnode *)trie;  /* Start */
 	} else
 		p = node_parent_rcu(c);
 
@@ -1765,8 +1800,9 @@ static int fn_trie_flush(struct fib_table *tb)
 	return found;
 }
 
-static void
-fn_trie_select_default(struct fib_table *tb, const struct flowi *flp, struct fib_result *res)
+static void fn_trie_select_default(struct fib_table *tb,
+				   const struct flowi *flp,
+				   struct fib_result *res)
 {
 	struct trie *t = (struct trie *) tb->tb_data;
 	int order, last_idx;
@@ -1837,7 +1873,8 @@ out:
 	rcu_read_unlock();
 }
 
-static int fn_trie_dump_fa(t_key key, int plen, struct list_head *fah, struct fib_table *tb,
+static int fn_trie_dump_fa(t_key key, int plen, struct list_head *fah,
+			   struct fib_table *tb,
 			   struct sk_buff *skb, struct netlink_callback *cb)
 {
 	int i, s_i;
@@ -1876,8 +1913,8 @@ static int fn_trie_dump_fa(t_key key, int plen, struct list_head *fah, struct fi
 	return skb->len;
 }
 
-static int fn_trie_dump_plen(struct trie *t, int plen, struct fib_table *tb, struct sk_buff *skb,
-			     struct netlink_callback *cb)
+static int fn_trie_dump_plen(struct trie *t, int plen, struct fib_table *tb,
+			     struct sk_buff *skb, struct netlink_callback *cb)
 {
 	int h, s_h;
 	struct list_head *fa_head;
@@ -1900,7 +1937,7 @@ static int fn_trie_dump_plen(struct trie *t, int plen, struct fib_table *tb, str
 		if (list_empty(fa_head))
 			continue;
 
-		if (fn_trie_dump_fa(l->key, plen, fa_head, tb, skb, cb)<0) {
+		if (fn_trie_dump_fa(l->key, plen, fa_head, tb, skb, cb) < 0) {
 			cb->args[3] = h;
 			return -1;
 		}
@@ -1909,7 +1946,8 @@ static int fn_trie_dump_plen(struct trie *t, int plen, struct fib_table *tb, str
 	return skb->len;
 }
 
-static int fn_trie_dump(struct fib_table *tb, struct sk_buff *skb, struct netlink_callback *cb)
+static int fn_trie_dump(struct fib_table *tb, struct sk_buff *skb,
+			struct netlink_callback *cb)
 {
 	int m, s_m;
 	struct trie *t = (struct trie *) tb->tb_data;
@@ -1924,7 +1962,7 @@ static int fn_trie_dump(struct fib_table *tb, struct sk_buff *skb, struct netlin
 			memset(&cb->args[3], 0,
 				sizeof(cb->args) - 3*sizeof(cb->args[0]));
 
-		if (fn_trie_dump_plen(t, 32-m, tb, skb, cb)<0) {
+		if (fn_trie_dump_plen(t, 32-m, tb, skb, cb) < 0) {
 			cb->args[2] = m;
 			goto out;
 		}
@@ -1939,7 +1977,8 @@ out:
 
 void __init fib_hash_init(void)
 {
-	fn_alias_kmem = kmem_cache_create("ip_fib_alias", sizeof(struct fib_alias),
+	fn_alias_kmem = kmem_cache_create("ip_fib_alias",
+					  sizeof(struct fib_alias),
 					  0, SLAB_PANIC, NULL);
 
 	trie_leaf_kmem = kmem_cache_create("ip_fib_trie",
@@ -1973,7 +2012,7 @@ struct fib_table *fib_hash_table(u32 id)
 	memset(t, 0, sizeof(*t));
 
 	if (id == RT_TABLE_LOCAL)
-		printk(KERN_INFO "IPv4 FIB: Using LC-trie version %s\n", VERSION);
+		pr_info("IPv4 FIB: Using LC-trie version %s\n", VERSION);
 
 	return tb;
 }
@@ -2107,7 +2146,8 @@ static void trie_show_stats(struct seq_file *seq, struct trie_stat *stat)
 	else
 		avdepth = 0;
 
-	seq_printf(seq, "\tAver depth:     %u.%02d\n", avdepth / 100, avdepth % 100 );
+	seq_printf(seq, "\tAver depth:     %u.%02d\n",
+		   avdepth / 100, avdepth % 100);
 	seq_printf(seq, "\tMax depth:      %u\n", stat->maxdepth);
 
 	seq_printf(seq, "\tLeaves:         %u\n", stat->leaves);
@@ -2139,16 +2179,20 @@ static void trie_show_usage(struct seq_file *seq,
 			    const struct trie_use_stats *stats)
 {
 	seq_printf(seq, "\nCounters:\n---------\n");
-	seq_printf(seq,"gets = %u\n", stats->gets);
-	seq_printf(seq,"backtracks = %u\n", stats->backtrack);
-	seq_printf(seq,"semantic match passed = %u\n", stats->semantic_match_passed);
-	seq_printf(seq,"semantic match miss = %u\n", stats->semantic_match_miss);
-	seq_printf(seq,"null node hit= %u\n", stats->null_node_hit);
-	seq_printf(seq,"skipped node resize = %u\n\n", stats->resize_node_skipped);
+	seq_printf(seq, "gets = %u\n", stats->gets);
+	seq_printf(seq, "backtracks = %u\n", stats->backtrack);
+	seq_printf(seq, "semantic match passed = %u\n",
+		   stats->semantic_match_passed);
+	seq_printf(seq, "semantic match miss = %u\n",
+		   stats->semantic_match_miss);
+	seq_printf(seq, "null node hit= %u\n", stats->null_node_hit);
+	seq_printf(seq, "skipped node resize = %u\n\n",
+		   stats->resize_node_skipped);
 }
 #endif /*  CONFIG_IP_FIB_TRIE_STATS */
 
-static void fib_trie_show(struct seq_file *seq, const char *name, struct trie *trie)
+static void fib_trie_show(struct seq_file *seq, const char *name,
+			  struct trie *trie)
 {
 	struct trie_stat stat;
 
@@ -2166,7 +2210,8 @@ static int fib_triestat_seq_show(struct seq_file *seq, void *v)
 	struct fib_table *tb;
 
 	seq_printf(seq,
-		   "Basic info: size of leaf: %Zd bytes, size of tnode: %Zd bytes.\n",
+		   "Basic info: size of leaf:"
+		   " %Zd bytes, size of tnode: %Zd bytes.\n",
 		   sizeof(struct leaf), sizeof(struct tnode));
 
 	tb = fib_get_table(net, RT_TABLE_LOCAL);
@@ -2439,10 +2484,11 @@ static int fib_route_seq_show(struct seq_file *seq, void *v)
 
 	if (iter->trie == iter->trie_local)
 		return 0;
+
 	if (IS_TNODE(l))
 		return 0;
 
-	for (i=32; i>=0; i--) {
+	for (i = 32; i >= 0; i--) {
 		struct leaf_info *li = find_leaf_info(l, i);
 		struct fib_alias *fa;
 		__be32 mask, prefix;
@@ -2469,7 +2515,8 @@ static int fib_route_seq_show(struct seq_file *seq, void *v)
 					 fi->fib_nh->nh_gw, flags, 0, 0,
 					 fi->fib_priority,
 					 mask,
-					 (fi->fib_advmss ? fi->fib_advmss + 40 : 0),
+					 (fi->fib_advmss ?
+					  fi->fib_advmss + 40 : 0),
 					 fi->fib_window,
 					 fi->fib_rtt >> 3);
 			else
-- 
cgit v1.2.3


From 936722922f6d2366378de606a40c14f96915474d Mon Sep 17 00:00:00 2001
From: Stephen Hemminger <stephen.hemminger@vyatta.com>
Date: Tue, 22 Jan 2008 21:54:05 -0800
Subject: [IPV4] fib_trie: compute size when needed

Compute the number of prefixes when needed, rather than doing bookeeping.

Signed-off-by: Stephen Hemminger <shemminger@vyatta.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/fib_trie.c | 21 +++++++++++++--------
 1 file changed, 13 insertions(+), 8 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index 899210b3c6b..1a9231fcebb 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -143,12 +143,12 @@ struct trie_stat {
 	unsigned int tnodes;
 	unsigned int leaves;
 	unsigned int nullpointers;
+	unsigned int prefixes;
 	unsigned int nodesizes[MAX_STAT_DEPTH];
 };
 
 struct trie {
 	struct node *trie;
-	unsigned int size;
 #ifdef CONFIG_IP_FIB_TRIE_STATS
 	struct trie_use_stats stats;
 #endif
@@ -1292,8 +1292,6 @@ static int fn_trie_insert(struct fib_table *tb, struct fib_config *cfg)
 	list_add_tail_rcu(&new_fa->fa_list,
 			  (fa ? &fa->fa_list : fa_head));
 
-	t->size++;
-
 	rt_cache_flush(-1);
 	rtmsg_fib(RTM_NEWROUTE, htonl(key), new_fa, plen, tb->tb_id,
 		  &cfg->fc_nlinfo, 0);
@@ -1579,9 +1577,6 @@ static int trie_leaf_remove(struct trie *t, t_key key)
 	 * Key found.
 	 * Remove the leaf and rebalance the tree
 	 */
-
-	t->size--;
-
 	tp = node_parent(n);
 	tnode_free((struct tnode *) n);
 
@@ -2114,10 +2109,17 @@ static void trie_collect_stats(struct trie *t, struct trie_stat *s)
 	for (n = fib_trie_get_first(&iter, t); n;
 	     n = fib_trie_get_next(&iter)) {
 		if (IS_LEAF(n)) {
+			struct leaf *l = (struct leaf *)n;
+			struct leaf_info *li;
+			struct hlist_node *tmp;
+
 			s->leaves++;
 			s->totdepth += iter.depth;
 			if (iter.depth > s->maxdepth)
 				s->maxdepth = iter.depth;
+
+			hlist_for_each_entry_rcu(li, tmp, &l->list, hlist)
+				++s->prefixes;
 		} else {
 			const struct tnode *tn = (const struct tnode *) n;
 			int i;
@@ -2151,8 +2153,11 @@ static void trie_show_stats(struct seq_file *seq, struct trie_stat *stat)
 	seq_printf(seq, "\tMax depth:      %u\n", stat->maxdepth);
 
 	seq_printf(seq, "\tLeaves:         %u\n", stat->leaves);
-
 	bytes = sizeof(struct leaf) * stat->leaves;
+
+	seq_printf(seq, "\tPrefixes:       %u\n", stat->prefixes);
+	bytes += sizeof(struct leaf_info) * stat->prefixes;
+
 	seq_printf(seq, "\tInternal nodes: %u\n\t", stat->tnodes);
 	bytes += sizeof(struct tnode) * stat->tnodes;
 
@@ -2196,8 +2201,8 @@ static void fib_trie_show(struct seq_file *seq, const char *name,
 {
 	struct trie_stat stat;
 
-	seq_printf(seq, "%s: %d\n", name, trie->size);
 	trie_collect_stats(trie, &stat);
+	seq_printf(seq, "%s:\n", name);
 	trie_show_stats(seq, &stat);
 #ifdef CONFIG_IP_FIB_TRIE_STATS
 	trie_show_usage(seq, &trie->stats);
-- 
cgit v1.2.3


From 1328042e268c936189f15eba5bd9a5a4605a8581 Mon Sep 17 00:00:00 2001
From: Stephen Hemminger <stephen.hemminger@vyatta.com>
Date: Tue, 22 Jan 2008 21:54:37 -0800
Subject: [IPV4] fib_trie: use hash list

The code to dump can use the existing hash chain rather than doing
repeated lookup.

Signed-off-by: Stephen Hemminger <stephen.hemminger@vyatta.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/fib_trie.c | 49 ++++++++++++++++++++++++-------------------------
 1 file changed, 24 insertions(+), 25 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index 1a9231fcebb..c19d68551eb 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -2399,31 +2399,30 @@ static int fib_trie_seq_show(struct seq_file *seq, void *v)
 
 	} else {
 		struct leaf *l = (struct leaf *) n;
-		int i;
+		struct leaf_info *li;
+		struct hlist_node *node;
+
 		__be32 val = htonl(l->key);
 
 		seq_indent(seq, iter->depth);
 		seq_printf(seq, "  |-- %d.%d.%d.%d\n", NIPQUAD(val));
-		for (i = 32; i >= 0; i--) {
-			struct leaf_info *li = find_leaf_info(l, i);
-
-			if (li) {
-				struct fib_alias *fa;
-
-				list_for_each_entry_rcu(fa, &li->falh, fa_list) {
-					char buf1[32], buf2[32];
-
-					seq_indent(seq, iter->depth+1);
-					seq_printf(seq, "  /%d %s %s", i,
-						   rtn_scope(buf1, sizeof(buf1),
-							     fa->fa_scope),
-						   rtn_type(buf2, sizeof(buf2),
-							     fa->fa_type));
-					if (fa->fa_tos)
-						seq_printf(seq, "tos =%d\n",
-							   fa->fa_tos);
-					seq_putc(seq, '\n');
-				}
+
+		hlist_for_each_entry_rcu(li, node, &l->list, hlist) {
+			struct fib_alias *fa;
+
+			list_for_each_entry_rcu(fa, &li->falh, fa_list) {
+				char buf1[32], buf2[32];
+
+				seq_indent(seq, iter->depth+1);
+				seq_printf(seq, "  /%d %s %s", li->plen,
+					   rtn_scope(buf1, sizeof(buf1),
+						     fa->fa_scope),
+					   rtn_type(buf2, sizeof(buf2),
+						    fa->fa_type));
+				if (fa->fa_tos)
+					seq_printf(seq, "tos =%d\n",
+						   fa->fa_tos);
+				seq_putc(seq, '\n');
 			}
 		}
 	}
@@ -2477,8 +2476,8 @@ static int fib_route_seq_show(struct seq_file *seq, void *v)
 {
 	const struct fib_trie_iter *iter = seq->private;
 	struct leaf *l = v;
-	int i;
-	char bf[128];
+	struct leaf_info *li;
+	struct hlist_node *node;
 
 	if (v == SEQ_START_TOKEN) {
 		seq_printf(seq, "%-127s\n", "Iface\tDestination\tGateway "
@@ -2493,8 +2492,7 @@ static int fib_route_seq_show(struct seq_file *seq, void *v)
 	if (IS_TNODE(l))
 		return 0;
 
-	for (i = 32; i >= 0; i--) {
-		struct leaf_info *li = find_leaf_info(l, i);
+	hlist_for_each_entry_rcu(li, node, &l->list, hlist) {
 		struct fib_alias *fa;
 		__be32 mask, prefix;
 
@@ -2507,6 +2505,7 @@ static int fib_route_seq_show(struct seq_file *seq, void *v)
 		list_for_each_entry_rcu(fa, &li->falh, fa_list) {
 			const struct fib_info *fi = fa->fa_info;
 			unsigned flags = fib_flag_trans(fa->fa_type, mask, fi);
+			char bf[128];
 
 			if (fa->fa_type == RTN_BROADCAST
 			    || fa->fa_type == RTN_MULTICAST)
-- 
cgit v1.2.3


From 64347f786d13349d6a6f812f3a83c269e26c0136 Mon Sep 17 00:00:00 2001
From: Stephen Hemminger <stephen.hemminger@vyatta.com>
Date: Tue, 22 Jan 2008 21:55:01 -0800
Subject: [IPV4] fib_trie: dump message multiple part flag

Match fib_hash, and set NLM_F_MULTI to handle multiple part messages.

Signed-off-by: Stephen Hemminger <shemminger@vyatta.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/fib_trie.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index c19d68551eb..41264695039 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -1898,7 +1898,7 @@ static int fn_trie_dump_fa(t_key key, int plen, struct list_head *fah,
 				  xkey,
 				  plen,
 				  fa->fa_tos,
-				  fa->fa_info, 0) < 0) {
+				  fa->fa_info, NLM_F_MULTI) < 0) {
 			cb->args[4] = i;
 			return -1;
 		}
-- 
cgit v1.2.3


From 82cfbb008572b1a953091ef78f767aa3ca213092 Mon Sep 17 00:00:00 2001
From: Stephen Hemminger <stephen.hemminger@vyatta.com>
Date: Tue, 22 Jan 2008 21:55:32 -0800
Subject: [IPV4] fib_trie: iterator recode

Remove the complex loop structure of nextleaf() and replace it with a
simpler tree walker. This improves the performance and is much
cleaner.

Signed-off-by: Stephen Hemminger <shemminger@vyatta.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/fib_trie.c | 103 ++++++++++++++++++++++++++--------------------------
 1 file changed, 52 insertions(+), 51 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index 41264695039..dab439b5267 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -1711,64 +1711,65 @@ static int trie_flush_leaf(struct trie *t, struct leaf *l)
 	return found;
 }
 
-/* rcu_read_lock needs to be hold by caller from readside */
-
-static struct leaf *nextleaf(struct trie *t, struct leaf *thisleaf)
+/*
+ * Scan for the next right leaf starting at node p->child[idx]
+ * Since we have back pointer, no recursion necessary.
+ */
+static struct leaf *leaf_walk_rcu(struct tnode *p, struct node *c)
 {
-	struct node *c = (struct node *) thisleaf;
-	struct tnode *p;
-	int idx;
-	struct node *trie = rcu_dereference(t->trie);
+	do {
+		t_key idx;
 
-	if (c == NULL) {
-		if (trie == NULL)
-			return NULL;
-
-		if (IS_LEAF(trie))          /* trie w. just a leaf */
-			return (struct leaf *) trie;
-
-		p = (struct tnode *)trie;  /* Start */
-	} else
-		p = node_parent_rcu(c);
-
-	while (p) {
-		int pos, last;
-
-		/*  Find the next child of the parent */
 		if (c)
-			pos = 1 + tkey_extract_bits(c->key, p->pos, p->bits);
+			idx = tkey_extract_bits(c->key, p->pos, p->bits) + 1;
 		else
-			pos = 0;
-
-		last = 1 << p->bits;
-		for (idx = pos; idx < last ; idx++) {
-			c = rcu_dereference(p->child[idx]);
+			idx = 0;
 
+		while (idx < 1u << p->bits) {
+			c = tnode_get_child_rcu(p, idx++);
 			if (!c)
 				continue;
 
-			/* Decend if tnode */
-			while (IS_TNODE(c)) {
-				p = (struct tnode *) c;
-				idx = 0;
-
-				/* Rightmost non-NULL branch */
-				if (p && IS_TNODE(p))
-					while (!(c = rcu_dereference(p->child[idx]))
-					       && idx < (1<<p->bits)) idx++;
-
-				/* Done with this tnode? */
-				if (idx >= (1 << p->bits) || !c)
-					goto up;
+			if (IS_LEAF(c)) {
+				prefetch(p->child[idx]);
+				return (struct leaf *) c;
 			}
-			return (struct leaf *) c;
+
+			/* Rescan start scanning in new node */
+			p = (struct tnode *) c;
+			idx = 0;
 		}
-up:
-		/* No more children go up one step  */
+
+		/* Node empty, walk back up to parent */
 		c = (struct node *) p;
-		p = node_parent_rcu(c);
-	}
-	return NULL; /* Ready. Root of trie */
+	} while ( (p = node_parent_rcu(c)) != NULL);
+
+	return NULL; /* Root of trie */
+}
+
+
+static struct leaf *trie_firstleaf(struct trie *t)
+{
+	struct tnode *n = (struct tnode *) rcu_dereference(t->trie);
+
+	if (!n)
+		return NULL;
+
+	if (IS_LEAF(n))          /* trie is just a leaf */
+		return (struct leaf *) n;
+
+	return leaf_walk_rcu(n, NULL);
+}
+
+static struct leaf *trie_nextleaf(struct leaf *l)
+{
+	struct node *c = (struct node *) l;
+	struct tnode *p = node_parent(c);
+
+	if (!p)
+		return NULL;	/* trie with just one leaf */
+
+	return leaf_walk_rcu(p, c);
 }
 
 /*
@@ -1778,9 +1779,9 @@ static int fn_trie_flush(struct fib_table *tb)
 {
 	struct trie *t = (struct trie *) tb->tb_data;
 	struct leaf *ll = NULL, *l = NULL;
-	int found = 0, h;
+	int found = 0;
 
-	for (h = 0; (l = nextleaf(t, l)) != NULL; h++) {
+	for (l = trie_firstleaf(t); l; l = trie_nextleaf(l)) {
 		found += trie_flush_leaf(t, l);
 
 		if (ll && hlist_empty(&ll->list))
@@ -1887,7 +1888,6 @@ static int fn_trie_dump_fa(t_key key, int plen, struct list_head *fah,
 			i++;
 			continue;
 		}
-		BUG_ON(!fa->fa_info);
 
 		if (fib_dump_info(skb, NETLINK_CB(cb->skb).pid,
 				  cb->nlh->nlmsg_seq,
@@ -1916,8 +1916,9 @@ static int fn_trie_dump_plen(struct trie *t, int plen, struct fib_table *tb,
 	struct leaf *l = NULL;
 
 	s_h = cb->args[3];
+	h = 0;
 
-	for (h = 0; (l = nextleaf(t, l)) != NULL; h++) {
+	for (l = trie_firstleaf(t); l != NULL; h++, l = trie_nextleaf(l)) {
 		if (h < s_h)
 			continue;
 		if (h > s_h)
-- 
cgit v1.2.3


From a88ee229253b31e3a844b30525ff77fbfe3111d3 Mon Sep 17 00:00:00 2001
From: Stephen Hemminger <stephen.hemminger@vyatta.com>
Date: Tue, 22 Jan 2008 21:56:11 -0800
Subject: [IPV4] fib_trie: dump table in sorted order

It is easier with TRIE to dump the data traversal rather than
interating over every possible prefix. This saves some time and makes
the dump come out in sorted order.

Signed-off-by: Stephen Hemminger <shemminger@vyatta.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/fib_trie.c | 74 ++++++++++++++++++++++++++++-------------------------
 1 file changed, 39 insertions(+), 35 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index dab439b5267..2ea94ebe19f 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -1908,67 +1908,71 @@ static int fn_trie_dump_fa(t_key key, int plen, struct list_head *fah,
 	return skb->len;
 }
 
-static int fn_trie_dump_plen(struct trie *t, int plen, struct fib_table *tb,
-			     struct sk_buff *skb, struct netlink_callback *cb)
+
+static int fn_trie_dump_leaf(struct leaf *l, struct fib_table *tb,
+			struct sk_buff *skb, struct netlink_callback *cb)
 {
-	int h, s_h;
-	struct list_head *fa_head;
-	struct leaf *l = NULL;
+	struct leaf_info *li;
+	struct hlist_node *node;
+	int i, s_i;
 
-	s_h = cb->args[3];
-	h = 0;
+	s_i = cb->args[3];
+	i = 0;
 
-	for (l = trie_firstleaf(t); l != NULL; h++, l = trie_nextleaf(l)) {
-		if (h < s_h)
+	/* rcu_read_lock is hold by caller */
+	hlist_for_each_entry_rcu(li, node, &l->list, hlist) {
+		if (i < s_i) {
+			i++;
 			continue;
-		if (h > s_h)
-			memset(&cb->args[4], 0,
-			       sizeof(cb->args) - 4*sizeof(cb->args[0]));
-
-		fa_head = get_fa_head(l, plen);
+		}
 
-		if (!fa_head)
-			continue;
+		if (i > s_i)
+			cb->args[4] = 0;
 
-		if (list_empty(fa_head))
+		if (list_empty(&li->falh))
 			continue;
 
-		if (fn_trie_dump_fa(l->key, plen, fa_head, tb, skb, cb) < 0) {
-			cb->args[3] = h;
+		if (fn_trie_dump_fa(l->key, li->plen, &li->falh, tb, skb, cb) < 0) {
+			cb->args[3] = i;
 			return -1;
 		}
+		i++;
 	}
-	cb->args[3] = h;
+
+	cb->args[3] = i;
 	return skb->len;
 }
 
+
+
 static int fn_trie_dump(struct fib_table *tb, struct sk_buff *skb,
 			struct netlink_callback *cb)
 {
-	int m, s_m;
+	struct leaf *l;
 	struct trie *t = (struct trie *) tb->tb_data;
-
-	s_m = cb->args[2];
+	int h = 0;
+	int s_h = cb->args[2];
 
 	rcu_read_lock();
-	for (m = 0; m <= 32; m++) {
-		if (m < s_m)
+	for (h = 0, l = trie_firstleaf(t); l != NULL; h++, l = trie_nextleaf(l)) {
+		if (h < s_h)
 			continue;
-		if (m > s_m)
-			memset(&cb->args[3], 0,
-				sizeof(cb->args) - 3*sizeof(cb->args[0]));
 
-		if (fn_trie_dump_plen(t, 32-m, tb, skb, cb) < 0) {
-			cb->args[2] = m;
-			goto out;
+		if (h > s_h) {
+			cb->args[3] = 0;
+			cb->args[4] = 0;
+		}
+
+		if (fn_trie_dump_leaf(l, tb, skb, cb) < 0) {
+			rcu_read_unlock();
+			cb->args[2] = h;
+			return -1;
 		}
 	}
 	rcu_read_unlock();
-	cb->args[2] = m;
+
+	cb->args[2] = h;
 	return skb->len;
-out:
-	rcu_read_unlock();
-	return -1;
 }
 
 void __init fib_hash_init(void)
-- 
cgit v1.2.3


From 9195bef7fb0ba0a91d5ffa566bcf8e007e3c7172 Mon Sep 17 00:00:00 2001
From: Stephen Hemminger <stephen.hemminger@vyatta.com>
Date: Tue, 22 Jan 2008 21:56:34 -0800
Subject: [IPV4] fib_trie: avoid extra search on delete

Get rid of extra search that made route deletion O(n).

Signed-off-by: Stephen Hemminger <shemminger@vyatta.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/fib_trie.c | 50 ++++++++++++--------------------------------------
 1 file changed, 12 insertions(+), 38 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index 2ea94ebe19f..441c4eafb9e 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -1545,49 +1545,23 @@ found:
 	return ret;
 }
 
-/* only called from updater side */
-static int trie_leaf_remove(struct trie *t, t_key key)
+/*
+ * Remove the leaf and return parent.
+ */
+static void trie_leaf_remove(struct trie *t, struct leaf *l)
 {
-	t_key cindex;
-	struct tnode *tp = NULL;
-	struct node *n = t->trie;
-	struct leaf *l;
-
-	pr_debug("entering trie_leaf_remove(%p)\n", n);
+	struct tnode *tp = node_parent((struct node *) l);
 
-	/* Note that in the case skipped bits, those bits are *not* checked!
-	 * When we finish this, we will have NULL or a T_LEAF, and the
-	 * T_LEAF may or may not match our key.
-	 */
-
-	while (n != NULL && IS_TNODE(n)) {
-		struct tnode *tn = (struct tnode *) n;
-		check_tnode(tn);
-		n = tnode_get_child(tn, tkey_extract_bits(key,
-							  tn->pos, tn->bits));
-
-		BUG_ON(n && node_parent(n) != tn);
-	}
-	l = (struct leaf *) n;
-
-	if (!n || !tkey_equals(l->key, key))
-		return 0;
-
-	/*
-	 * Key found.
-	 * Remove the leaf and rebalance the tree
-	 */
-	tp = node_parent(n);
-	tnode_free((struct tnode *) n);
+	pr_debug("entering trie_leaf_remove(%p)\n", l);
 
 	if (tp) {
-		cindex = tkey_extract_bits(key, tp->pos, tp->bits);
+		t_key cindex = tkey_extract_bits(l->key, tp->pos, tp->bits);
 		put_child(t, (struct tnode *)tp, cindex, NULL);
 		rcu_assign_pointer(t->trie, trie_rebalance(t, tp));
 	} else
 		rcu_assign_pointer(t->trie, NULL);
 
-	return 1;
+	tnode_free((struct tnode *) l);
 }
 
 /*
@@ -1665,7 +1639,7 @@ static int fn_trie_delete(struct fib_table *tb, struct fib_config *cfg)
 	}
 
 	if (hlist_empty(&l->list))
-		trie_leaf_remove(t, key);
+		trie_leaf_remove(t, l);
 
 	if (fa->fa_state & FA_S_ACCESSED)
 		rt_cache_flush(-1);
@@ -1778,19 +1752,19 @@ static struct leaf *trie_nextleaf(struct leaf *l)
 static int fn_trie_flush(struct fib_table *tb)
 {
 	struct trie *t = (struct trie *) tb->tb_data;
-	struct leaf *ll = NULL, *l = NULL;
+	struct leaf *l, *ll = NULL;
 	int found = 0;
 
 	for (l = trie_firstleaf(t); l; l = trie_nextleaf(l)) {
 		found += trie_flush_leaf(t, l);
 
 		if (ll && hlist_empty(&ll->list))
-			trie_leaf_remove(t, ll->key);
+			trie_leaf_remove(t, ll);
 		ll = l;
 	}
 
 	if (ll && hlist_empty(&ll->list))
-		trie_leaf_remove(t, ll->key);
+		trie_leaf_remove(t, ll);
 
 	pr_debug("trie_flush found=%d\n", found);
 	return found;
-- 
cgit v1.2.3


From d5ce8a0e97073169b5fe0b7c52bd020cdb017dfa Mon Sep 17 00:00:00 2001
From: Stephen Hemminger <stephen.hemminger@vyatta.com>
Date: Tue, 22 Jan 2008 21:57:22 -0800
Subject: [IPV4] fib_trie: avoid rescan on dump

This converts dumping (and flushing) of large route tables form O(N^2)
to O(N). If the route dump took multiple pages then the dump routine
gets called again. The old code kept track of location by counter, the
new code instead uses the last key.

This is a really big win ( 0.3 sec vs 12 sec) for big route tables.

One side effect is that if the table changes during the dump, then the
last key will not be found, and we will return -EBUSY.

Signed-off-by: Stephen Hemminger <shemminger@vyatta.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/fib_trie.c | 34 +++++++++++++++++++++-------------
 1 file changed, 21 insertions(+), 13 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index 441c4eafb9e..f1005fe1789 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -1917,35 +1917,43 @@ static int fn_trie_dump_leaf(struct leaf *l, struct fib_table *tb,
 	return skb->len;
 }
 
-
-
 static int fn_trie_dump(struct fib_table *tb, struct sk_buff *skb,
 			struct netlink_callback *cb)
 {
 	struct leaf *l;
 	struct trie *t = (struct trie *) tb->tb_data;
-	int h = 0;
-	int s_h = cb->args[2];
+	t_key key = cb->args[2];
 
 	rcu_read_lock();
-	for (h = 0, l = trie_firstleaf(t); l != NULL; h++, l = trie_nextleaf(l)) {
-		if (h < s_h)
-			continue;
-
-		if (h > s_h) {
-			cb->args[3] = 0;
-			cb->args[4] = 0;
+	/* Dump starting at last key.
+	 * Note: 0.0.0.0/0 (ie default) is first key.
+	 */
+	if (!key)
+		l = trie_firstleaf(t);
+	else {
+		l = fib_find_node(t, key);
+		if (!l) {
+			/* The table changed during the dump, rather than
+			 * giving partial data, just make application retry.
+			 */
+			rcu_read_unlock();
+			return -EBUSY;
 		}
+	}
 
+	while (l) {
+		cb->args[2] = l->key;
 		if (fn_trie_dump_leaf(l, tb, skb, cb) < 0) {
 			rcu_read_unlock();
-			cb->args[2] = h;
 			return -1;
 		}
+
+		l = trie_nextleaf(l);
+		memset(&cb->args[3], 0,
+		       sizeof(cb->args) - 3*sizeof(cb->args[0]));
 	}
 	rcu_read_unlock();
 
-	cb->args[2] = h;
 	return skb->len;
 }
 
-- 
cgit v1.2.3


From 64c2d5382954ccf6054424653f4c7f4f04c1ff21 Mon Sep 17 00:00:00 2001
From: "Denis V. Lunev" <den@openvz.org>
Date: Tue, 22 Jan 2008 22:03:33 -0800
Subject: [IPV4]: Consolidate fib_select_default.

The difference in the implementation of the fib_select_default when
CONFIG_IP_MULTIPLE_TABLES is (not) defined looks
negligible. Consolidate it and place into fib_frontend.c.

Signed-off-by: Denis V. Lunev <den@openvz.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/fib_frontend.c | 14 ++++++++++++++
 net/ipv4/fib_rules.c    | 10 ----------
 2 files changed, 14 insertions(+), 10 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index 6761639dd0e..13bf01de0ed 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -116,6 +116,20 @@ struct fib_table *fib_get_table(struct net *net, u32 id)
 }
 #endif /* CONFIG_IP_MULTIPLE_TABLES */
 
+void fib_select_default(const struct flowi *flp, struct fib_result *res)
+{
+	struct fib_table *tb;
+	int table = RT_TABLE_MAIN;
+#ifdef CONFIG_IP_MULTIPLE_TABLES
+	if (res->r == NULL || res->r->action != FR_ACT_TO_TBL)
+		return;
+	table = res->r->table;
+#endif
+	tb = fib_get_table(&init_net, table);
+	if (FIB_RES_GW(*res) && FIB_RES_NH(*res).nh_scope == RT_SCOPE_LINK)
+		tb->tb_select_default(tb, flp, res);
+}
+
 static void fib_flush(struct net *net)
 {
 	int flushed = 0;
diff --git a/net/ipv4/fib_rules.c b/net/ipv4/fib_rules.c
index 1effb4ab688..19274d01afa 100644
--- a/net/ipv4/fib_rules.c
+++ b/net/ipv4/fib_rules.c
@@ -102,16 +102,6 @@ errout:
 }
 
 
-void fib_select_default(const struct flowi *flp, struct fib_result *res)
-{
-	if (res->r && res->r->action == FR_ACT_TO_TBL &&
-	    FIB_RES_GW(*res) && FIB_RES_NH(*res).nh_scope == RT_SCOPE_LINK) {
-		struct fib_table *tb;
-		if ((tb = fib_get_table(&init_net, res->r->table)) != NULL)
-			tb->tb_select_default(tb, flp, res);
-	}
-}
-
 static int fib4_rule_match(struct fib_rule *rule, struct flowi *fl, int flags)
 {
 	struct fib4_rule *r = (struct fib4_rule *) rule;
-- 
cgit v1.2.3


From 010278ec4cdf404aefc0bbd5e7406674fec95286 Mon Sep 17 00:00:00 2001
From: "Denis V. Lunev" <den@openvz.org>
Date: Tue, 22 Jan 2008 22:04:04 -0800
Subject: [NETNS]: Add netns parameter to fib_select_default.

Currently fib_select_default calls fib_get_table() with the
init_net. Prepare it to provide a correct namespace to lookup default
route.

Signed-off-by: Denis V. Lunev <den@openvz.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/fib_frontend.c | 5 +++--
 net/ipv4/route.c        | 2 +-
 2 files changed, 4 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index 13bf01de0ed..7e3e7329dac 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -116,7 +116,8 @@ struct fib_table *fib_get_table(struct net *net, u32 id)
 }
 #endif /* CONFIG_IP_MULTIPLE_TABLES */
 
-void fib_select_default(const struct flowi *flp, struct fib_result *res)
+void fib_select_default(struct net *net,
+			const struct flowi *flp, struct fib_result *res)
 {
 	struct fib_table *tb;
 	int table = RT_TABLE_MAIN;
@@ -125,7 +126,7 @@ void fib_select_default(const struct flowi *flp, struct fib_result *res)
 		return;
 	table = res->r->table;
 #endif
-	tb = fib_get_table(&init_net, table);
+	tb = fib_get_table(net, table);
 	if (FIB_RES_GW(*res) && FIB_RES_NH(*res).nh_scope == RT_SCOPE_LINK)
 		tb->tb_select_default(tb, flp, res);
 }
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 27e0f81060a..4313255e5a1 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -2419,7 +2419,7 @@ static int ip_route_output_slow(struct rtable **rp, const struct flowi *oldflp)
 	else
 #endif
 	if (!res.prefixlen && res.type == RTN_UNICAST && !fl.oif)
-		fib_select_default(&fl, &res);
+		fib_select_default(&init_net, &fl, &res);
 
 	if (!fl.fl4_src)
 		fl.fl4_src = FIB_RES_PREFSRC(res);
-- 
cgit v1.2.3


From 1ab352768fc73838b062776ca5d1add3876a019f Mon Sep 17 00:00:00 2001
From: "Denis V. Lunev" <den@openvz.org>
Date: Tue, 22 Jan 2008 22:04:30 -0800
Subject: [NETNS]: Add namespace parameter to ip_dev_find.

in_dev_find() need a namespace to pass it to fib_get_table(), so add
an argument.

Signed-off-by: Denis V. Lunev <den@openvz.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/fib_frontend.c | 4 ++--
 net/ipv4/igmp.c         | 2 +-
 net/ipv4/ip_sockglue.c  | 2 +-
 net/ipv4/ipmr.c         | 2 +-
 net/ipv4/route.c        | 6 +++---
 5 files changed, 8 insertions(+), 8 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index 7e3e7329dac..d28261826bc 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -153,7 +153,7 @@ static void fib_flush(struct net *net)
  *	Find the first device with a given source address.
  */
 
-struct net_device * ip_dev_find(__be32 addr)
+struct net_device * ip_dev_find(struct net *net, __be32 addr)
 {
 	struct flowi fl = { .nl_u = { .ip4_u = { .daddr = addr } } };
 	struct fib_result res;
@@ -164,7 +164,7 @@ struct net_device * ip_dev_find(__be32 addr)
 	res.r = NULL;
 #endif
 
-	local_table = fib_get_table(&init_net, RT_TABLE_LOCAL);
+	local_table = fib_get_table(net, RT_TABLE_LOCAL);
 	if (!local_table || local_table->tb_lookup(local_table, &fl, &res))
 		return NULL;
 	if (res.type != RTN_LOCAL)
diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c
index 928bc328455..1f5314ca109 100644
--- a/net/ipv4/igmp.c
+++ b/net/ipv4/igmp.c
@@ -1395,7 +1395,7 @@ static struct in_device * ip_mc_find_dev(struct ip_mreqn *imr)
 		return idev;
 	}
 	if (imr->imr_address.s_addr) {
-		dev = ip_dev_find(imr->imr_address.s_addr);
+		dev = ip_dev_find(&init_net, imr->imr_address.s_addr);
 		if (!dev)
 			return NULL;
 		dev_put(dev);
diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c
index 82817e55436..754b0a5bbfe 100644
--- a/net/ipv4/ip_sockglue.c
+++ b/net/ipv4/ip_sockglue.c
@@ -594,7 +594,7 @@ static int do_ip_setsockopt(struct sock *sk, int level,
 				err = 0;
 				break;
 			}
-			dev = ip_dev_find(mreq.imr_address.s_addr);
+			dev = ip_dev_find(&init_net, mreq.imr_address.s_addr);
 			if (dev) {
 				mreq.imr_ifindex = dev->ifindex;
 				dev_put(dev);
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index 4198615ca67..221271758b9 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -423,7 +423,7 @@ static int vif_add(struct vifctl *vifc, int mrtsock)
 			return -ENOBUFS;
 		break;
 	case 0:
-		dev = ip_dev_find(vifc->vifc_lcl_addr.s_addr);
+		dev = ip_dev_find(&init_net, vifc->vifc_lcl_addr.s_addr);
 		if (!dev)
 			return -EADDRNOTAVAIL;
 		dev_put(dev);
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 4313255e5a1..674575b622a 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -2282,14 +2282,14 @@ static int ip_route_output_slow(struct rtable **rp, const struct flowi *oldflp)
 			goto out;
 
 		/* It is equivalent to inet_addr_type(saddr) == RTN_LOCAL */
-		dev_out = ip_dev_find(oldflp->fl4_src);
+		dev_out = ip_dev_find(&init_net, oldflp->fl4_src);
 		if (dev_out == NULL)
 			goto out;
 
 		/* I removed check for oif == dev_out->oif here.
 		   It was wrong for two reasons:
-		   1. ip_dev_find(saddr) can return wrong iface, if saddr is
-		      assigned to multiple interfaces.
+		   1. ip_dev_find(net, saddr) can return wrong iface, if saddr
+		      is assigned to multiple interfaces.
 		   2. Moreover, we are allowed to send packets with saddr
 		      of another iface. --ANK
 		 */
-- 
cgit v1.2.3


From b40afd0e5c568aba79a4ae2a963cd2af1f09490d Mon Sep 17 00:00:00 2001
From: "Denis V. Lunev" <den@openvz.org>
Date: Tue, 22 Jan 2008 22:06:19 -0800
Subject: [NETNS]: Add namespace parameter to ip_route_output_slow.

This function needs a net namespace to lookup devices, fib tables,
etc. in, so pass it there.

Signed-off-by: Denis V. Lunev <den@openvz.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/route.c | 21 +++++++++++----------
 1 file changed, 11 insertions(+), 10 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 674575b622a..0576ff79c55 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -2248,7 +2248,8 @@ static inline int ip_mkroute_output(struct rtable **rp,
  * Major route resolver routine.
  */
 
-static int ip_route_output_slow(struct rtable **rp, const struct flowi *oldflp)
+static int ip_route_output_slow(struct net *net, struct rtable **rp,
+				const struct flowi *oldflp)
 {
 	u32 tos	= RT_FL_TOS(oldflp);
 	struct flowi fl = { .nl_u = { .ip4_u =
@@ -2260,7 +2261,7 @@ static int ip_route_output_slow(struct rtable **rp, const struct flowi *oldflp)
 						  RT_SCOPE_UNIVERSE),
 				      } },
 			    .mark = oldflp->mark,
-			    .iif = init_net.loopback_dev->ifindex,
+			    .iif = net->loopback_dev->ifindex,
 			    .oif = oldflp->oif };
 	struct fib_result res;
 	unsigned flags = 0;
@@ -2282,7 +2283,7 @@ static int ip_route_output_slow(struct rtable **rp, const struct flowi *oldflp)
 			goto out;
 
 		/* It is equivalent to inet_addr_type(saddr) == RTN_LOCAL */
-		dev_out = ip_dev_find(&init_net, oldflp->fl4_src);
+		dev_out = ip_dev_find(net, oldflp->fl4_src);
 		if (dev_out == NULL)
 			goto out;
 
@@ -2322,7 +2323,7 @@ static int ip_route_output_slow(struct rtable **rp, const struct flowi *oldflp)
 
 
 	if (oldflp->oif) {
-		dev_out = dev_get_by_index(&init_net, oldflp->oif);
+		dev_out = dev_get_by_index(net, oldflp->oif);
 		err = -ENODEV;
 		if (dev_out == NULL)
 			goto out;
@@ -2356,15 +2357,15 @@ static int ip_route_output_slow(struct rtable **rp, const struct flowi *oldflp)
 			fl.fl4_dst = fl.fl4_src = htonl(INADDR_LOOPBACK);
 		if (dev_out)
 			dev_put(dev_out);
-		dev_out = init_net.loopback_dev;
+		dev_out = net->loopback_dev;
 		dev_hold(dev_out);
-		fl.oif = init_net.loopback_dev->ifindex;
+		fl.oif = net->loopback_dev->ifindex;
 		res.type = RTN_LOCAL;
 		flags |= RTCF_LOCAL;
 		goto make_route;
 	}
 
-	if (fib_lookup(&init_net, &fl, &res)) {
+	if (fib_lookup(net, &fl, &res)) {
 		res.fi = NULL;
 		if (oldflp->oif) {
 			/* Apparently, routing tables are wrong. Assume,
@@ -2403,7 +2404,7 @@ static int ip_route_output_slow(struct rtable **rp, const struct flowi *oldflp)
 			fl.fl4_src = fl.fl4_dst;
 		if (dev_out)
 			dev_put(dev_out);
-		dev_out = init_net.loopback_dev;
+		dev_out = net->loopback_dev;
 		dev_hold(dev_out);
 		fl.oif = dev_out->ifindex;
 		if (res.fi)
@@ -2419,7 +2420,7 @@ static int ip_route_output_slow(struct rtable **rp, const struct flowi *oldflp)
 	else
 #endif
 	if (!res.prefixlen && res.type == RTN_UNICAST && !fl.oif)
-		fib_select_default(&init_net, &fl, &res);
+		fib_select_default(net, &fl, &res);
 
 	if (!fl.fl4_src)
 		fl.fl4_src = FIB_RES_PREFSRC(res);
@@ -2469,7 +2470,7 @@ int __ip_route_output_key(struct rtable **rp, const struct flowi *flp)
 	}
 	rcu_read_unlock_bh();
 
-	return ip_route_output_slow(rp, flp);
+	return ip_route_output_slow(&init_net, rp, flp);
 }
 
 EXPORT_SYMBOL_GPL(__ip_route_output_key);
-- 
cgit v1.2.3


From 611c183ebcb5af384df3a4ddb391034a1b6ac255 Mon Sep 17 00:00:00 2001
From: "Denis V. Lunev" <den@openvz.org>
Date: Tue, 22 Jan 2008 22:06:48 -0800
Subject: [NETNS]: Add namespace parameter to __ip_route_output_key.

This is only required to propagate it down to the
ip_route_output_slow.

Signed-off-by: Denis V. Lunev <den@openvz.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/icmp.c         | 4 ++--
 net/ipv4/route.c        | 7 ++++---
 net/ipv4/xfrm4_policy.c | 2 +-
 3 files changed, 7 insertions(+), 6 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
index 37cdea0c26b..11760310f91 100644
--- a/net/ipv4/icmp.c
+++ b/net/ipv4/icmp.c
@@ -569,7 +569,7 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info)
 		struct rtable *rt2;
 
 		security_skb_classify_flow(skb_in, &fl);
-		if (__ip_route_output_key(&rt, &fl))
+		if (__ip_route_output_key(&init_net, &rt, &fl))
 			goto out_unlock;
 
 		/* No need to clone since we're just using its address. */
@@ -592,7 +592,7 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info)
 			goto out_unlock;
 
 		if (inet_addr_type(&init_net, fl.fl4_src) == RTN_LOCAL)
-			err = __ip_route_output_key(&rt2, &fl);
+			err = __ip_route_output_key(&init_net, &rt2, &fl);
 		else {
 			struct flowi fl2 = {};
 			struct dst_entry *odst;
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 0576ff79c55..971ab0253a0 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -2443,7 +2443,8 @@ make_route:
 out:	return err;
 }
 
-int __ip_route_output_key(struct rtable **rp, const struct flowi *flp)
+int __ip_route_output_key(struct net *net, struct rtable **rp,
+			  const struct flowi *flp)
 {
 	unsigned hash;
 	struct rtable *rth;
@@ -2470,7 +2471,7 @@ int __ip_route_output_key(struct rtable **rp, const struct flowi *flp)
 	}
 	rcu_read_unlock_bh();
 
-	return ip_route_output_slow(&init_net, rp, flp);
+	return ip_route_output_slow(net, rp, flp);
 }
 
 EXPORT_SYMBOL_GPL(__ip_route_output_key);
@@ -2536,7 +2537,7 @@ int ip_route_output_flow(struct rtable **rp, struct flowi *flp, struct sock *sk,
 {
 	int err;
 
-	if ((err = __ip_route_output_key(rp, flp)) != 0)
+	if ((err = __ip_route_output_key(&init_net, rp, flp)) != 0)
 		return err;
 
 	if (flp->proto) {
diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c
index f04516c880f..3783e3ee56a 100644
--- a/net/ipv4/xfrm4_policy.c
+++ b/net/ipv4/xfrm4_policy.c
@@ -36,7 +36,7 @@ static struct dst_entry *xfrm4_dst_lookup(int tos, xfrm_address_t *saddr,
 	if (saddr)
 		fl.fl4_src = saddr->a4;
 
-	err = __ip_route_output_key(&rt, &fl);
+	err = __ip_route_output_key(&init_net, &rt, &fl);
 	dst = &rt->u.dst;
 	if (err)
 		dst = ERR_PTR(err);
-- 
cgit v1.2.3


From f1b050bf7a88910f9f00c9c8989c1bf5a67dd140 Mon Sep 17 00:00:00 2001
From: "Denis V. Lunev" <den@openvz.org>
Date: Tue, 22 Jan 2008 22:07:10 -0800
Subject: [NETNS]: Add namespace parameter to ip_route_output_flow.

Needed to propagate it down to the __ip_route_output_key.

Signed_off_by: Denis V. Lunev <den@openvz.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/dccp/ipv4.c                 | 2 +-
 net/ipv4/af_inet.c              | 2 +-
 net/ipv4/inet_connection_sock.c | 2 +-
 net/ipv4/ip_output.c            | 2 +-
 net/ipv4/raw.c                  | 2 +-
 net/ipv4/route.c                | 7 ++++---
 net/ipv4/udp.c                  | 2 +-
 7 files changed, 10 insertions(+), 9 deletions(-)

(limited to 'net')

diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c
index f450df2fc86..9e38b0d6195 100644
--- a/net/dccp/ipv4.c
+++ b/net/dccp/ipv4.c
@@ -469,7 +469,7 @@ static struct dst_entry* dccp_v4_route_skb(struct sock *sk,
 			  };
 
 	security_skb_classify_flow(skb, &fl);
-	if (ip_route_output_flow(&rt, &fl, sk, 0)) {
+	if (ip_route_output_flow(&init_net, &rt, &fl, sk, 0)) {
 		IP_INC_STATS_BH(IPSTATS_MIB_OUTNOROUTES);
 		return NULL;
 	}
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index bcf8c8a4a3a..09ca5293d08 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -1113,7 +1113,7 @@ int inet_sk_rebuild_header(struct sock *sk)
 	};
 
 	security_sk_classify_flow(sk, &fl);
-	err = ip_route_output_flow(&rt, &fl, sk, 0);
+	err = ip_route_output_flow(&init_net, &rt, &fl, sk, 0);
 }
 	if (!err)
 		sk_setup_caps(sk, &rt->u.dst);
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index 1c2a32f6bfc..7801cceb2d1 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -333,7 +333,7 @@ struct dst_entry* inet_csk_route_req(struct sock *sk,
 					 .dport = ireq->rmt_port } } };
 
 	security_req_classify_flow(req, &fl);
-	if (ip_route_output_flow(&rt, &fl, sk, 0)) {
+	if (ip_route_output_flow(&init_net, &rt, &fl, sk, 0)) {
 		IP_INC_STATS_BH(IPSTATS_MIB_OUTNOROUTES);
 		return NULL;
 	}
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index 8950d18001f..6d78e1d6b78 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -350,7 +350,7 @@ int ip_queue_xmit(struct sk_buff *skb, int ipfragok)
 			 * itself out.
 			 */
 			security_sk_classify_flow(sk, &fl);
-			if (ip_route_output_flow(&rt, &fl, sk, 0))
+			if (ip_route_output_flow(&init_net, &rt, &fl, sk, 0))
 				goto no_route;
 		}
 		sk_setup_caps(sk, &rt->u.dst);
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c
index 91a52184351..85c08696abb 100644
--- a/net/ipv4/raw.c
+++ b/net/ipv4/raw.c
@@ -558,7 +558,7 @@ static int raw_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
 		}
 
 		security_sk_classify_flow(sk, &fl);
-		err = ip_route_output_flow(&rt, &fl, sk, 1);
+		err = ip_route_output_flow(&init_net, &rt, &fl, sk, 1);
 	}
 	if (err)
 		goto done;
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 971ab0253a0..c75fc20b07e 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -2533,11 +2533,12 @@ static int ipv4_dst_blackhole(struct rtable **rp, struct flowi *flp, struct sock
 	return (rt ? 0 : -ENOMEM);
 }
 
-int ip_route_output_flow(struct rtable **rp, struct flowi *flp, struct sock *sk, int flags)
+int ip_route_output_flow(struct net *net, struct rtable **rp, struct flowi *flp,
+			 struct sock *sk, int flags)
 {
 	int err;
 
-	if ((err = __ip_route_output_key(&init_net, rp, flp)) != 0)
+	if ((err = __ip_route_output_key(net, rp, flp)) != 0)
 		return err;
 
 	if (flp->proto) {
@@ -2560,7 +2561,7 @@ EXPORT_SYMBOL_GPL(ip_route_output_flow);
 
 int ip_route_output_key(struct rtable **rp, struct flowi *flp)
 {
-	return ip_route_output_flow(rp, flp, NULL, 0);
+	return ip_route_output_flow(&init_net, rp, flp, NULL, 0);
 }
 
 static int rt_fill_info(struct sk_buff *skb, u32 pid, u32 seq, int event,
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index ecd9d91b30c..2fb8d731026 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -660,7 +660,7 @@ int udp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
 					       { .sport = inet->sport,
 						 .dport = dport } } };
 		security_sk_classify_flow(sk, &fl);
-		err = ip_route_output_flow(&rt, &fl, sk, 1);
+		err = ip_route_output_flow(&init_net, &rt, &fl, sk, 1);
 		if (err) {
 			if (err == -ENETUNREACH)
 				IP_INC_STATS_BH(IPSTATS_MIB_OUTNOROUTES);
-- 
cgit v1.2.3


From f206351a50ea86250fabea96b9af8d8f8fc02603 Mon Sep 17 00:00:00 2001
From: "Denis V. Lunev" <den@openvz.org>
Date: Tue, 22 Jan 2008 22:07:34 -0800
Subject: [NETNS]: Add namespace parameter to ip_route_output_key.

Needed to propagate it down to the ip_route_output_flow.

Signed-off-by: Denis V. Lunev <den@openvz.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/atm/clip.c                   |  2 +-
 net/bridge/br_netfilter.c        |  2 +-
 net/ipv4/arp.c                   |  6 +++---
 net/ipv4/icmp.c                  |  4 ++--
 net/ipv4/igmp.c                  |  6 +++---
 net/ipv4/ip_gre.c                | 10 +++++-----
 net/ipv4/ip_output.c             |  2 +-
 net/ipv4/ipip.c                  |  8 ++++----
 net/ipv4/ipmr.c                  |  4 ++--
 net/ipv4/ipvs/ip_vs_xmit.c       |  6 +++---
 net/ipv4/netfilter.c             |  6 +++---
 net/ipv4/netfilter/nf_nat_rule.c |  2 +-
 net/ipv4/route.c                 |  6 +++---
 net/ipv4/syncookies.c            |  2 +-
 net/ipv6/ip6_tunnel.c            |  4 ++--
 net/ipv6/sit.c                   |  4 ++--
 net/rxrpc/ar-peer.c              |  2 +-
 net/sctp/protocol.c              |  4 ++--
 18 files changed, 40 insertions(+), 40 deletions(-)

(limited to 'net')

diff --git a/net/atm/clip.c b/net/atm/clip.c
index 45e08620c8c..86b885ec1cb 100644
--- a/net/atm/clip.c
+++ b/net/atm/clip.c
@@ -534,7 +534,7 @@ static int clip_setentry(struct atm_vcc *vcc, __be32 ip)
 		unlink_clip_vcc(clip_vcc);
 		return 0;
 	}
-	error = ip_route_output_key(&rt, &fl);
+	error = ip_route_output_key(&init_net, &rt, &fl);
 	if (error)
 		return error;
 	neigh = __neigh_lookup(&clip_tbl, &ip, rt->u.dst.dev, 1);
diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c
index 141f069e77a..80014bab81b 100644
--- a/net/bridge/br_netfilter.c
+++ b/net/bridge/br_netfilter.c
@@ -353,7 +353,7 @@ static int br_nf_pre_routing_finish(struct sk_buff *skb)
 			if (err != -EHOSTUNREACH || !in_dev || IN_DEV_FORWARD(in_dev))
 				goto free_skb;
 
-			if (!ip_route_output_key(&rt, &fl)) {
+			if (!ip_route_output_key(&init_net, &rt, &fl)) {
 				/* - Bridged-and-DNAT'ed traffic doesn't
 				 *   require ip_forwarding. */
 				if (((struct dst_entry *)rt)->dev == dev) {
diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c
index b2c19cb1206..5976c598cc4 100644
--- a/net/ipv4/arp.c
+++ b/net/ipv4/arp.c
@@ -424,7 +424,7 @@ static int arp_filter(__be32 sip, __be32 tip, struct net_device *dev)
 	int flag = 0;
 	/*unsigned long now; */
 
-	if (ip_route_output_key(&rt, &fl) < 0)
+	if (ip_route_output_key(&init_net, &rt, &fl) < 0)
 		return 1;
 	if (rt->u.dst.dev != dev) {
 		NET_INC_STATS_BH(LINUX_MIB_ARPFILTER);
@@ -1002,7 +1002,7 @@ static int arp_req_set(struct net *net, struct arpreq *r,
 		struct flowi fl = { .nl_u = { .ip4_u = { .daddr = ip,
 							 .tos = RTO_ONLINK } } };
 		struct rtable * rt;
-		if ((err = ip_route_output_key(&rt, &fl)) != 0)
+		if ((err = ip_route_output_key(net, &rt, &fl)) != 0)
 			return err;
 		dev = rt->u.dst.dev;
 		ip_rt_put(rt);
@@ -1109,7 +1109,7 @@ static int arp_req_delete(struct net *net, struct arpreq *r,
 		struct flowi fl = { .nl_u = { .ip4_u = { .daddr = ip,
 							 .tos = RTO_ONLINK } } };
 		struct rtable * rt;
-		if ((err = ip_route_output_key(&rt, &fl)) != 0)
+		if ((err = ip_route_output_key(net, &rt, &fl)) != 0)
 			return err;
 		dev = rt->u.dst.dev;
 		ip_rt_put(rt);
diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
index 11760310f91..a142f19fec4 100644
--- a/net/ipv4/icmp.c
+++ b/net/ipv4/icmp.c
@@ -405,7 +405,7 @@ static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb)
 						.tos = RT_TOS(ip_hdr(skb)->tos) } },
 				    .proto = IPPROTO_ICMP };
 		security_skb_classify_flow(skb, &fl);
-		if (ip_route_output_key(&rt, &fl))
+		if (ip_route_output_key(&init_net, &rt, &fl))
 			goto out_unlock;
 	}
 	if (icmpv4_xrlim_allow(rt, icmp_param->data.icmph.type,
@@ -598,7 +598,7 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info)
 			struct dst_entry *odst;
 
 			fl2.fl4_dst = fl.fl4_src;
-			if (ip_route_output_key(&rt2, &fl2))
+			if (ip_route_output_key(&init_net, &rt2, &fl2))
 				goto out_unlock;
 
 			/* Ugh! */
diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c
index 1f5314ca109..994648be80a 100644
--- a/net/ipv4/igmp.c
+++ b/net/ipv4/igmp.c
@@ -301,7 +301,7 @@ static struct sk_buff *igmpv3_newpack(struct net_device *dev, int size)
 				    .nl_u = { .ip4_u = {
 				    .daddr = IGMPV3_ALL_MCR } },
 				    .proto = IPPROTO_IGMP };
-		if (ip_route_output_key(&rt, &fl)) {
+		if (ip_route_output_key(&init_net, &rt, &fl)) {
 			kfree_skb(skb);
 			return NULL;
 		}
@@ -645,7 +645,7 @@ static int igmp_send_report(struct in_device *in_dev, struct ip_mc_list *pmc,
 		struct flowi fl = { .oif = dev->ifindex,
 				    .nl_u = { .ip4_u = { .daddr = dst } },
 				    .proto = IPPROTO_IGMP };
-		if (ip_route_output_key(&rt, &fl))
+		if (ip_route_output_key(&init_net, &rt, &fl))
 			return -1;
 	}
 	if (rt->rt_src == 0) {
@@ -1401,7 +1401,7 @@ static struct in_device * ip_mc_find_dev(struct ip_mreqn *imr)
 		dev_put(dev);
 	}
 
-	if (!dev && !ip_route_output_key(&rt, &fl)) {
+	if (!dev && !ip_route_output_key(&init_net, &rt, &fl)) {
 		dev = rt->u.dst.dev;
 		ip_rt_put(rt);
 	}
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index a74983d8c89..63f69171935 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -480,7 +480,7 @@ out:
 	fl.fl4_dst = eiph->saddr;
 	fl.fl4_tos = RT_TOS(eiph->tos);
 	fl.proto = IPPROTO_GRE;
-	if (ip_route_output_key(&rt, &fl)) {
+	if (ip_route_output_key(&init_net, &rt, &fl)) {
 		kfree_skb(skb2);
 		return;
 	}
@@ -493,7 +493,7 @@ out:
 		fl.fl4_dst = eiph->daddr;
 		fl.fl4_src = eiph->saddr;
 		fl.fl4_tos = eiph->tos;
-		if (ip_route_output_key(&rt, &fl) ||
+		if (ip_route_output_key(&init_net, &rt, &fl) ||
 		    rt->u.dst.dev->type != ARPHRD_IPGRE) {
 			ip_rt_put(rt);
 			kfree_skb(skb2);
@@ -748,7 +748,7 @@ static int ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
 						.saddr = tiph->saddr,
 						.tos = RT_TOS(tos) } },
 				    .proto = IPPROTO_GRE };
-		if (ip_route_output_key(&rt, &fl)) {
+		if (ip_route_output_key(&init_net, &rt, &fl)) {
 			tunnel->stat.tx_carrier_errors++;
 			goto tx_error;
 		}
@@ -921,7 +921,7 @@ static void ipgre_tunnel_bind_dev(struct net_device *dev)
 						.tos = RT_TOS(iph->tos) } },
 				    .proto = IPPROTO_GRE };
 		struct rtable *rt;
-		if (!ip_route_output_key(&rt, &fl)) {
+		if (!ip_route_output_key(&init_net, &rt, &fl)) {
 			tdev = rt->u.dst.dev;
 			ip_rt_put(rt);
 		}
@@ -1177,7 +1177,7 @@ static int ipgre_open(struct net_device *dev)
 						.tos = RT_TOS(t->parms.iph.tos) } },
 				    .proto = IPPROTO_GRE };
 		struct rtable *rt;
-		if (ip_route_output_key(&rt, &fl))
+		if (ip_route_output_key(&init_net, &rt, &fl))
 			return -EADDRNOTAVAIL;
 		dev = rt->u.dst.dev;
 		ip_rt_put(rt);
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index 6d78e1d6b78..1725e061398 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -1379,7 +1379,7 @@ void ip_send_reply(struct sock *sk, struct sk_buff *skb, struct ip_reply_arg *ar
 						 .dport = tcp_hdr(skb)->source } },
 				    .proto = sk->sk_protocol };
 		security_skb_classify_flow(skb, &fl);
-		if (ip_route_output_key(&rt, &fl))
+		if (ip_route_output_key(&init_net, &rt, &fl))
 			return;
 	}
 
diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c
index 160535b5170..da281581692 100644
--- a/net/ipv4/ipip.c
+++ b/net/ipv4/ipip.c
@@ -405,7 +405,7 @@ out:
 	fl.fl4_daddr = eiph->saddr;
 	fl.fl4_tos = RT_TOS(eiph->tos);
 	fl.proto = IPPROTO_IPIP;
-	if (ip_route_output_key(&rt, &key)) {
+	if (ip_route_output_key(&init_net, &rt, &key)) {
 		kfree_skb(skb2);
 		return 0;
 	}
@@ -418,7 +418,7 @@ out:
 		fl.fl4_daddr = eiph->daddr;
 		fl.fl4_src = eiph->saddr;
 		fl.fl4_tos = eiph->tos;
-		if (ip_route_output_key(&rt, &fl) ||
+		if (ip_route_output_key(&init_net, &rt, &fl) ||
 		    rt->u.dst.dev->type != ARPHRD_TUNNEL) {
 			ip_rt_put(rt);
 			kfree_skb(skb2);
@@ -547,7 +547,7 @@ static int ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
 						.saddr = tiph->saddr,
 						.tos = RT_TOS(tos) } },
 				    .proto = IPPROTO_IPIP };
-		if (ip_route_output_key(&rt, &fl)) {
+		if (ip_route_output_key(&init_net, &rt, &fl)) {
 			tunnel->stat.tx_carrier_errors++;
 			goto tx_error_icmp;
 		}
@@ -668,7 +668,7 @@ static void ipip_tunnel_bind_dev(struct net_device *dev)
 						.tos = RT_TOS(iph->tos) } },
 				    .proto = IPPROTO_IPIP };
 		struct rtable *rt;
-		if (!ip_route_output_key(&rt, &fl)) {
+		if (!ip_route_output_key(&init_net, &rt, &fl)) {
 			tdev = rt->u.dst.dev;
 			ip_rt_put(rt);
 		}
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index 221271758b9..a94f52c207a 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -1185,7 +1185,7 @@ static void ipmr_queue_xmit(struct sk_buff *skb, struct mfc_cache *c, int vifi)
 						.saddr = vif->local,
 						.tos = RT_TOS(iph->tos) } },
 				    .proto = IPPROTO_IPIP };
-		if (ip_route_output_key(&rt, &fl))
+		if (ip_route_output_key(&init_net, &rt, &fl))
 			goto out_free;
 		encap = sizeof(struct iphdr);
 	} else {
@@ -1194,7 +1194,7 @@ static void ipmr_queue_xmit(struct sk_buff *skb, struct mfc_cache *c, int vifi)
 					      { .daddr = iph->daddr,
 						.tos = RT_TOS(iph->tos) } },
 				    .proto = IPPROTO_IPIP };
-		if (ip_route_output_key(&rt, &fl))
+		if (ip_route_output_key(&init_net, &rt, &fl))
 			goto out_free;
 	}
 
diff --git a/net/ipv4/ipvs/ip_vs_xmit.c b/net/ipv4/ipvs/ip_vs_xmit.c
index 8436bf81859..f63006caea0 100644
--- a/net/ipv4/ipvs/ip_vs_xmit.c
+++ b/net/ipv4/ipvs/ip_vs_xmit.c
@@ -78,7 +78,7 @@ __ip_vs_get_out_rt(struct ip_vs_conn *cp, u32 rtos)
 						.tos = rtos, } },
 			};
 
-			if (ip_route_output_key(&rt, &fl)) {
+			if (ip_route_output_key(&init_net, &rt, &fl)) {
 				spin_unlock(&dest->dst_lock);
 				IP_VS_DBG_RL("ip_route_output error, "
 					     "dest: %u.%u.%u.%u\n",
@@ -101,7 +101,7 @@ __ip_vs_get_out_rt(struct ip_vs_conn *cp, u32 rtos)
 					.tos = rtos, } },
 		};
 
-		if (ip_route_output_key(&rt, &fl)) {
+		if (ip_route_output_key(&init_net, &rt, &fl)) {
 			IP_VS_DBG_RL("ip_route_output error, dest: "
 				     "%u.%u.%u.%u\n", NIPQUAD(cp->daddr));
 			return NULL;
@@ -170,7 +170,7 @@ ip_vs_bypass_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
 
 	EnterFunction(10);
 
-	if (ip_route_output_key(&rt, &fl)) {
+	if (ip_route_output_key(&init_net, &rt, &fl)) {
 		IP_VS_DBG_RL("ip_vs_bypass_xmit(): ip_route_output error, "
 			     "dest: %u.%u.%u.%u\n", NIPQUAD(iph->daddr));
 		goto tx_error_icmp;
diff --git a/net/ipv4/netfilter.c b/net/ipv4/netfilter.c
index 63221553d26..9a904c6c0dc 100644
--- a/net/ipv4/netfilter.c
+++ b/net/ipv4/netfilter.c
@@ -33,7 +33,7 @@ int ip_route_me_harder(struct sk_buff *skb, unsigned addr_type)
 		fl.nl_u.ip4_u.tos = RT_TOS(iph->tos);
 		fl.oif = skb->sk ? skb->sk->sk_bound_dev_if : 0;
 		fl.mark = skb->mark;
-		if (ip_route_output_key(&rt, &fl) != 0)
+		if (ip_route_output_key(&init_net, &rt, &fl) != 0)
 			return -1;
 
 		/* Drop old route. */
@@ -43,7 +43,7 @@ int ip_route_me_harder(struct sk_buff *skb, unsigned addr_type)
 		/* non-local src, find valid iif to satisfy
 		 * rp-filter when calling ip_route_input. */
 		fl.nl_u.ip4_u.daddr = iph->saddr;
-		if (ip_route_output_key(&rt, &fl) != 0)
+		if (ip_route_output_key(&init_net, &rt, &fl) != 0)
 			return -1;
 
 		odst = skb->dst;
@@ -187,7 +187,7 @@ EXPORT_SYMBOL(nf_ip_checksum);
 
 static int nf_ip_route(struct dst_entry **dst, struct flowi *fl)
 {
-	return ip_route_output_key((struct rtable **)dst, fl);
+	return ip_route_output_key(&init_net, (struct rtable **)dst, fl);
 }
 
 static const struct nf_afinfo nf_ip_afinfo = {
diff --git a/net/ipv4/netfilter/nf_nat_rule.c b/net/ipv4/netfilter/nf_nat_rule.c
index 4391aec56ab..519182269e7 100644
--- a/net/ipv4/netfilter/nf_nat_rule.c
+++ b/net/ipv4/netfilter/nf_nat_rule.c
@@ -97,7 +97,7 @@ static void warn_if_extra_mangle(__be32 dstip, __be32 srcip)
 	struct flowi fl = { .nl_u = { .ip4_u = { .daddr = dstip } } };
 	struct rtable *rt;
 
-	if (ip_route_output_key(&rt, &fl) != 0)
+	if (ip_route_output_key(&init_net, &rt, &fl) != 0)
 		return;
 
 	if (rt->rt_src != srcip && !warned) {
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index c75fc20b07e..39a40342142 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -2559,9 +2559,9 @@ int ip_route_output_flow(struct net *net, struct rtable **rp, struct flowi *flp,
 
 EXPORT_SYMBOL_GPL(ip_route_output_flow);
 
-int ip_route_output_key(struct rtable **rp, struct flowi *flp)
+int ip_route_output_key(struct net *net, struct rtable **rp, struct flowi *flp)
 {
-	return ip_route_output_flow(&init_net, rp, flp, NULL, 0);
+	return ip_route_output_flow(net, rp, flp, NULL, 0);
 }
 
 static int rt_fill_info(struct sk_buff *skb, u32 pid, u32 seq, int event,
@@ -2728,7 +2728,7 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void
 			},
 			.oif = tb[RTA_OIF] ? nla_get_u32(tb[RTA_OIF]) : 0,
 		};
-		err = ip_route_output_key(&rt, &fl);
+		err = ip_route_output_key(&init_net, &rt, &fl);
 	}
 
 	if (err)
diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c
index 2da1be0589a..f470fe4511d 100644
--- a/net/ipv4/syncookies.c
+++ b/net/ipv4/syncookies.c
@@ -264,7 +264,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb,
 					       { .sport = th->dest,
 						 .dport = th->source } } };
 		security_req_classify_flow(req, &fl);
-		if (ip_route_output_key(&rt, &fl)) {
+		if (ip_route_output_key(&init_net, &rt, &fl)) {
 			reqsk_free(req);
 			goto out;
 		}
diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c
index 425c9ae8b31..9031e521c1d 100644
--- a/net/ipv6/ip6_tunnel.c
+++ b/net/ipv6/ip6_tunnel.c
@@ -533,7 +533,7 @@ ip4ip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
 	fl.fl4_dst = eiph->saddr;
 	fl.fl4_tos = RT_TOS(eiph->tos);
 	fl.proto = IPPROTO_IPIP;
-	if (ip_route_output_key(&rt, &fl))
+	if (ip_route_output_key(&init_net, &rt, &fl))
 		goto out;
 
 	skb2->dev = rt->u.dst.dev;
@@ -545,7 +545,7 @@ ip4ip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
 		fl.fl4_dst = eiph->daddr;
 		fl.fl4_src = eiph->saddr;
 		fl.fl4_tos = eiph->tos;
-		if (ip_route_output_key(&rt, &fl) ||
+		if (ip_route_output_key(&init_net, &rt, &fl) ||
 		    rt->u.dst.dev->type != ARPHRD_TUNNEL) {
 			ip_rt_put(rt);
 			goto out;
diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c
index 1c6fddb80b3..e77239d02bf 100644
--- a/net/ipv6/sit.c
+++ b/net/ipv6/sit.c
@@ -557,7 +557,7 @@ static int ipip6_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
 						.tos = RT_TOS(tos) } },
 				    .oif = tunnel->parms.link,
 				    .proto = IPPROTO_IPV6 };
-		if (ip_route_output_key(&rt, &fl)) {
+		if (ip_route_output_key(&init_net, &rt, &fl)) {
 			tunnel->stat.tx_carrier_errors++;
 			goto tx_error_icmp;
 		}
@@ -686,7 +686,7 @@ static void ipip6_tunnel_bind_dev(struct net_device *dev)
 				    .oif = tunnel->parms.link,
 				    .proto = IPPROTO_IPV6 };
 		struct rtable *rt;
-		if (!ip_route_output_key(&rt, &fl)) {
+		if (!ip_route_output_key(&init_net, &rt, &fl)) {
 			tdev = rt->u.dst.dev;
 			ip_rt_put(rt);
 		}
diff --git a/net/rxrpc/ar-peer.c b/net/rxrpc/ar-peer.c
index 90fa107a8af..2abe2081a5e 100644
--- a/net/rxrpc/ar-peer.c
+++ b/net/rxrpc/ar-peer.c
@@ -57,7 +57,7 @@ static void rxrpc_assess_MTU_size(struct rxrpc_peer *peer)
 		BUG();
 	}
 
-	ret = ip_route_output_key(&rt, &fl);
+	ret = ip_route_output_key(&init_net, &rt, &fl);
 	if (ret < 0) {
 		_leave(" [route err %d]", ret);
 		return;
diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c
index 3f7def2936b..1339742e49f 100644
--- a/net/sctp/protocol.c
+++ b/net/sctp/protocol.c
@@ -454,7 +454,7 @@ static struct dst_entry *sctp_v4_get_dst(struct sctp_association *asoc,
 			  __FUNCTION__, NIPQUAD(fl.fl4_dst),
 			  NIPQUAD(fl.fl4_src));
 
-	if (!ip_route_output_key(&rt, &fl)) {
+	if (!ip_route_output_key(&init_net, &rt, &fl)) {
 		dst = &rt->u.dst;
 	}
 
@@ -497,7 +497,7 @@ static struct dst_entry *sctp_v4_get_dst(struct sctp_association *asoc,
 		if ((laddr->state == SCTP_ADDR_SRC) &&
 		    (AF_INET == laddr->a.sa.sa_family)) {
 			fl.fl4_src = laddr->a.v4.sin_addr.s_addr;
-			if (!ip_route_output_key(&rt, &fl)) {
+			if (!ip_route_output_key(&init_net, &rt, &fl)) {
 				dst = &rt->u.dst;
 				goto out_unlock;
 			}
-- 
cgit v1.2.3


From 62e3ba1b558e5f393ef746880613fb8222e64d03 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Tue, 22 Jan 2008 22:10:23 -0800
Subject: [NET_SCHED]: Move EXPORT_SYMBOL next to exported symbol

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/act_api.c     |  9 ++++-----
 net/sched/ematch.c      | 11 +++++------
 net/sched/sch_api.c     |  9 ++++-----
 net/sched/sch_fifo.c    |  3 +--
 net/sched/sch_generic.c | 17 ++++++++---------
 5 files changed, 22 insertions(+), 27 deletions(-)

(limited to 'net')

diff --git a/net/sched/act_api.c b/net/sched/act_api.c
index 81506474a4f..3825508fdcd 100644
--- a/net/sched/act_api.c
+++ b/net/sched/act_api.c
@@ -263,6 +263,7 @@ int tcf_register_action(struct tc_action_ops *act)
 	write_unlock(&act_mod_lock);
 	return 0;
 }
+EXPORT_SYMBOL(tcf_register_action);
 
 int tcf_unregister_action(struct tc_action_ops *act)
 {
@@ -281,6 +282,7 @@ int tcf_unregister_action(struct tc_action_ops *act)
 	write_unlock(&act_mod_lock);
 	return err;
 }
+EXPORT_SYMBOL(tcf_unregister_action);
 
 /* lookup by name */
 static struct tc_action_ops *tc_lookup_action_n(char *kind)
@@ -377,6 +379,7 @@ repeat:
 exec_done:
 	return ret;
 }
+EXPORT_SYMBOL(tcf_action_exec);
 
 void tcf_action_destroy(struct tc_action *act, int bind)
 {
@@ -430,6 +433,7 @@ rtattr_failure:
 	nlmsg_trim(skb, b);
 	return -1;
 }
+EXPORT_SYMBOL(tcf_action_dump_1);
 
 int
 tcf_action_dump(struct sk_buff *skb, struct tc_action *act, int bind, int ref)
@@ -1077,8 +1081,3 @@ static int __init tc_action_init(void)
 }
 
 subsys_initcall(tc_action_init);
-
-EXPORT_SYMBOL(tcf_register_action);
-EXPORT_SYMBOL(tcf_unregister_action);
-EXPORT_SYMBOL(tcf_action_exec);
-EXPORT_SYMBOL(tcf_action_dump_1);
diff --git a/net/sched/ematch.c b/net/sched/ematch.c
index f3a104e323b..27941cfc0ab 100644
--- a/net/sched/ematch.c
+++ b/net/sched/ematch.c
@@ -141,6 +141,7 @@ errout:
 	write_unlock(&ematch_mod_lock);
 	return err;
 }
+EXPORT_SYMBOL(tcf_em_register);
 
 /**
  * tcf_em_unregister - unregster and extended match
@@ -171,6 +172,7 @@ out:
 	write_unlock(&ematch_mod_lock);
 	return err;
 }
+EXPORT_SYMBOL(tcf_em_unregister);
 
 static inline struct tcf_ematch * tcf_em_get_match(struct tcf_ematch_tree *tree,
 						   int index)
@@ -380,6 +382,7 @@ errout_abort:
 	tcf_em_tree_destroy(tp, tree);
 	return err;
 }
+EXPORT_SYMBOL(tcf_em_tree_validate);
 
 /**
  * tcf_em_tree_destroy - destroy an ematch tree
@@ -413,6 +416,7 @@ void tcf_em_tree_destroy(struct tcf_proto *tp, struct tcf_ematch_tree *tree)
 	tree->hdr.nmatches = 0;
 	kfree(tree->matches);
 }
+EXPORT_SYMBOL(tcf_em_tree_destroy);
 
 /**
  * tcf_em_tree_dump - dump ematch tree into a rtnl message
@@ -472,6 +476,7 @@ int tcf_em_tree_dump(struct sk_buff *skb, struct tcf_ematch_tree *tree, int tlv)
 rtattr_failure:
 	return -1;
 }
+EXPORT_SYMBOL(tcf_em_tree_dump);
 
 static inline int tcf_em_match(struct sk_buff *skb, struct tcf_ematch *em,
 			       struct tcf_pkt_info *info)
@@ -529,10 +534,4 @@ stack_overflow:
 		printk("Local stack overflow, increase NET_EMATCH_STACK\n");
 	return -1;
 }
-
-EXPORT_SYMBOL(tcf_em_register);
-EXPORT_SYMBOL(tcf_em_unregister);
-EXPORT_SYMBOL(tcf_em_tree_validate);
-EXPORT_SYMBOL(tcf_em_tree_destroy);
-EXPORT_SYMBOL(tcf_em_tree_dump);
 EXPORT_SYMBOL(__tcf_em_tree_match);
diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index 273c628be05..dc89a9343f3 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -158,6 +158,7 @@ out:
 	write_unlock(&qdisc_mod_lock);
 	return rc;
 }
+EXPORT_SYMBOL(register_qdisc);
 
 int unregister_qdisc(struct Qdisc_ops *qops)
 {
@@ -176,6 +177,7 @@ int unregister_qdisc(struct Qdisc_ops *qops)
 	write_unlock(&qdisc_mod_lock);
 	return err;
 }
+EXPORT_SYMBOL(unregister_qdisc);
 
 /* We know handle. Find qdisc among all qdisc's attached to device
    (root qdisc, all its children, children of children etc.)
@@ -255,6 +257,7 @@ struct qdisc_rate_table *qdisc_get_rtab(struct tc_ratespec *r, struct rtattr *ta
 	}
 	return rtab;
 }
+EXPORT_SYMBOL(qdisc_get_rtab);
 
 void qdisc_put_rtab(struct qdisc_rate_table *tab)
 {
@@ -271,6 +274,7 @@ void qdisc_put_rtab(struct qdisc_rate_table *tab)
 		}
 	}
 }
+EXPORT_SYMBOL(qdisc_put_rtab);
 
 static enum hrtimer_restart qdisc_watchdog(struct hrtimer *timer)
 {
@@ -1289,8 +1293,3 @@ static int __init pktsched_init(void)
 }
 
 subsys_initcall(pktsched_init);
-
-EXPORT_SYMBOL(qdisc_get_rtab);
-EXPORT_SYMBOL(qdisc_put_rtab);
-EXPORT_SYMBOL(register_qdisc);
-EXPORT_SYMBOL(unregister_qdisc);
diff --git a/net/sched/sch_fifo.c b/net/sched/sch_fifo.c
index d71dbfc790c..fd0591903c8 100644
--- a/net/sched/sch_fifo.c
+++ b/net/sched/sch_fifo.c
@@ -91,6 +91,7 @@ struct Qdisc_ops pfifo_qdisc_ops __read_mostly = {
 	.dump		=	fifo_dump,
 	.owner		=	THIS_MODULE,
 };
+EXPORT_SYMBOL(pfifo_qdisc_ops);
 
 struct Qdisc_ops bfifo_qdisc_ops __read_mostly = {
 	.id		=	"bfifo",
@@ -105,6 +106,4 @@ struct Qdisc_ops bfifo_qdisc_ops __read_mostly = {
 	.dump		=	fifo_dump,
 	.owner		=	THIS_MODULE,
 };
-
 EXPORT_SYMBOL(bfifo_qdisc_ops);
-EXPORT_SYMBOL(pfifo_qdisc_ops);
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index ea5a05b172c..51e64acd509 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -46,6 +46,7 @@ void qdisc_lock_tree(struct net_device *dev)
 	spin_lock_bh(&dev->queue_lock);
 	spin_lock(&dev->ingress_lock);
 }
+EXPORT_SYMBOL(qdisc_lock_tree);
 
 void qdisc_unlock_tree(struct net_device *dev)
 	__releases(dev->ingress_lock)
@@ -54,6 +55,7 @@ void qdisc_unlock_tree(struct net_device *dev)
 	spin_unlock(&dev->ingress_lock);
 	spin_unlock_bh(&dev->queue_lock);
 }
+EXPORT_SYMBOL(qdisc_unlock_tree);
 
 static inline int qdisc_qlen(struct Qdisc *q)
 {
@@ -253,6 +255,7 @@ void netif_carrier_on(struct net_device *dev)
 			__netdev_watchdog_up(dev);
 	}
 }
+EXPORT_SYMBOL(netif_carrier_on);
 
 /**
  *	netif_carrier_off - clear carrier
@@ -265,6 +268,7 @@ void netif_carrier_off(struct net_device *dev)
 	if (!test_and_set_bit(__LINK_STATE_NOCARRIER, &dev->state))
 		linkwatch_fire_event(dev);
 }
+EXPORT_SYMBOL(netif_carrier_off);
 
 /* "NOOP" scheduler: the best scheduler, recommended for all interfaces
    under all circumstances. It is difficult to invent anything faster or
@@ -307,6 +311,7 @@ struct Qdisc noop_qdisc = {
 	.ops		=	&noop_qdisc_ops,
 	.list		=	LIST_HEAD_INIT(noop_qdisc.list),
 };
+EXPORT_SYMBOL(noop_qdisc);
 
 static struct Qdisc_ops noqueue_qdisc_ops __read_mostly = {
 	.id		=	"noqueue",
@@ -471,6 +476,7 @@ struct Qdisc * qdisc_create_dflt(struct net_device *dev, struct Qdisc_ops *ops,
 errout:
 	return NULL;
 }
+EXPORT_SYMBOL(qdisc_create_dflt);
 
 /* Under dev->queue_lock and BH! */
 
@@ -481,6 +487,7 @@ void qdisc_reset(struct Qdisc *qdisc)
 	if (ops->reset)
 		ops->reset(qdisc);
 }
+EXPORT_SYMBOL(qdisc_reset);
 
 /* this is the rcu callback function to clean up a qdisc when there
  * are no further references to it */
@@ -512,6 +519,7 @@ void qdisc_destroy(struct Qdisc *qdisc)
 	dev_put(qdisc->dev);
 	call_rcu(&qdisc->q_rcu, __qdisc_destroy);
 }
+EXPORT_SYMBOL(qdisc_destroy);
 
 void dev_activate(struct net_device *dev)
 {
@@ -626,12 +634,3 @@ void dev_shutdown(struct net_device *dev)
 	BUG_TRAP(!timer_pending(&dev->watchdog_timer));
 	qdisc_unlock_tree(dev);
 }
-
-EXPORT_SYMBOL(netif_carrier_on);
-EXPORT_SYMBOL(netif_carrier_off);
-EXPORT_SYMBOL(noop_qdisc);
-EXPORT_SYMBOL(qdisc_create_dflt);
-EXPORT_SYMBOL(qdisc_destroy);
-EXPORT_SYMBOL(qdisc_reset);
-EXPORT_SYMBOL(qdisc_lock_tree);
-EXPORT_SYMBOL(qdisc_unlock_tree);
-- 
cgit v1.2.3


From 2eb9d75c723252c1fa8f0206e6a0df220e3c64c0 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Tue, 22 Jan 2008 22:10:42 -0800
Subject: [NET_SCHED]: mark classifier ops __read_mostly

Additionally remove unnecessary NULL initilizations of the next pointer.

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/cls_api.c     | 2 +-
 net/sched/cls_basic.c   | 2 +-
 net/sched/cls_fw.c      | 3 +--
 net/sched/cls_route.c   | 3 +--
 net/sched/cls_tcindex.c | 3 +--
 net/sched/cls_u32.c     | 3 +--
 6 files changed, 6 insertions(+), 10 deletions(-)

(limited to 'net')

diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
index e53773612bc..9eeb3c6c82f 100644
--- a/net/sched/cls_api.c
+++ b/net/sched/cls_api.c
@@ -31,7 +31,7 @@
 
 /* The list of all installed classifier types */
 
-static struct tcf_proto_ops *tcf_proto_base;
+static struct tcf_proto_ops *tcf_proto_base __read_mostly;
 
 /* Protects list of registered TC modules. It is pure SMP lock. */
 static DEFINE_RWLOCK(cls_mod_lock);
diff --git a/net/sched/cls_basic.c b/net/sched/cls_basic.c
index 8dbcf2771a4..b31f9f97198 100644
--- a/net/sched/cls_basic.c
+++ b/net/sched/cls_basic.c
@@ -271,7 +271,7 @@ rtattr_failure:
 	return -1;
 }
 
-static struct tcf_proto_ops cls_basic_ops = {
+static struct tcf_proto_ops cls_basic_ops __read_mostly = {
 	.kind		=	"basic",
 	.classify	=	basic_classify,
 	.init		=	basic_init,
diff --git a/net/sched/cls_fw.c b/net/sched/cls_fw.c
index 8adbd6a37d1..b45038770e7 100644
--- a/net/sched/cls_fw.c
+++ b/net/sched/cls_fw.c
@@ -370,8 +370,7 @@ rtattr_failure:
 	return -1;
 }
 
-static struct tcf_proto_ops cls_fw_ops = {
-	.next		=	NULL,
+static struct tcf_proto_ops cls_fw_ops __read_mostly = {
 	.kind		=	"fw",
 	.classify	=	fw_classify,
 	.init		=	fw_init,
diff --git a/net/sched/cls_route.c b/net/sched/cls_route.c
index 0a8409c1d28..e70edd0f7bc 100644
--- a/net/sched/cls_route.c
+++ b/net/sched/cls_route.c
@@ -590,8 +590,7 @@ rtattr_failure:
 	return -1;
 }
 
-static struct tcf_proto_ops cls_route4_ops = {
-	.next		=	NULL,
+static struct tcf_proto_ops cls_route4_ops __read_mostly = {
 	.kind		=	"route",
 	.classify	=	route4_classify,
 	.init		=	route4_init,
diff --git a/net/sched/cls_tcindex.c b/net/sched/cls_tcindex.c
index 471909e5480..e36977b17fa 100644
--- a/net/sched/cls_tcindex.c
+++ b/net/sched/cls_tcindex.c
@@ -485,8 +485,7 @@ rtattr_failure:
 	return -1;
 }
 
-static struct tcf_proto_ops cls_tcindex_ops = {
-	.next		=	NULL,
+static struct tcf_proto_ops cls_tcindex_ops __read_mostly = {
 	.kind		=	"tcindex",
 	.classify	=	tcindex_classify,
 	.init		=	tcindex_init,
diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c
index c3900820916..7bf3cd4e731 100644
--- a/net/sched/cls_u32.c
+++ b/net/sched/cls_u32.c
@@ -751,8 +751,7 @@ rtattr_failure:
 	return -1;
 }
 
-static struct tcf_proto_ops cls_u32_ops = {
-	.next		=	NULL,
+static struct tcf_proto_ops cls_u32_ops __read_mostly = {
 	.kind		=	"u32",
 	.classify	=	u32_classify,
 	.init		=	u32_init,
-- 
cgit v1.2.3


From 01480e1cf5e2118eba8a8968239f3242072f9563 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Tue, 22 Jan 2008 22:10:59 -0800
Subject: [NETLINK]: Add nla_append()

Used to append data to a message without a header or padding.

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/netlink/attr.c | 19 +++++++++++++++++++
 1 file changed, 19 insertions(+)

(limited to 'net')

diff --git a/net/netlink/attr.c b/net/netlink/attr.c
index ec39d12c242..feb326f4a75 100644
--- a/net/netlink/attr.c
+++ b/net/netlink/attr.c
@@ -430,6 +430,24 @@ int nla_put_nohdr(struct sk_buff *skb, int attrlen, const void *data)
 	return 0;
 }
 
+/**
+ * nla_append - Add a netlink attribute without header or padding
+ * @skb: socket buffer to add attribute to
+ * @attrlen: length of attribute payload
+ * @data: head of attribute payload
+ *
+ * Returns -1 if the tailroom of the skb is insufficient to store
+ * the attribute payload.
+ */
+int nla_append(struct sk_buff *skb, int attrlen, const void *data)
+{
+	if (unlikely(skb_tailroom(skb) < NLA_ALIGN(attrlen)))
+		return -1;
+
+	memcpy(skb_put(skb, attrlen), data, attrlen);
+	return 0;
+}
+
 EXPORT_SYMBOL(nla_validate);
 EXPORT_SYMBOL(nla_parse);
 EXPORT_SYMBOL(nla_find);
@@ -445,3 +463,4 @@ EXPORT_SYMBOL(nla_put_nohdr);
 EXPORT_SYMBOL(nla_memcpy);
 EXPORT_SYMBOL(nla_memcmp);
 EXPORT_SYMBOL(nla_strcmp);
+EXPORT_SYMBOL(nla_append);
-- 
cgit v1.2.3


From 1e90474c377e92db7262a8968a45c1dd980ca9e5 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Tue, 22 Jan 2008 22:11:17 -0800
Subject: [NET_SCHED]: Convert packet schedulers from rtnetlink to new netlink
 API

Convert packet schedulers to use the netlink API. Unfortunately a gradual
conversion is not possible without breaking compilation in the middle or
adding lots of casts, so this patch converts them all in one step. The
patch has been mostly generated automatically with some minor edits to
at least allow seperate conversion of classifiers and actions.

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/gen_estimator.c   |   8 +--
 net/core/gen_stats.c       |   9 +--
 net/mac80211/wme.c         |  14 ++--
 net/netfilter/xt_RATEEST.c |   7 +-
 net/sched/act_api.c        |   2 +-
 net/sched/act_police.c     |   6 +-
 net/sched/sch_api.c        |  87 ++++++++++++++-----------
 net/sched/sch_atm.c        |  48 +++++++-------
 net/sched/sch_cbq.c        | 156 ++++++++++++++++++++++-----------------------
 net/sched/sch_dsmark.c     |  85 +++++++++++++-----------
 net/sched/sch_fifo.c       |  10 +--
 net/sched/sch_generic.c    |   6 +-
 net/sched/sch_gred.c       |  59 +++++++++--------
 net/sched/sch_hfsc.c       |  72 ++++++++++-----------
 net/sched/sch_htb.c        |  68 ++++++++++----------
 net/sched/sch_ingress.c    |  14 ++--
 net/sched/sch_netem.c      | 100 ++++++++++++++---------------
 net/sched/sch_prio.c       |  30 +++++----
 net/sched/sch_red.c        |  52 +++++++--------
 net/sched/sch_sfq.c        |  12 ++--
 net/sched/sch_tbf.c        |  49 +++++++-------
 net/sched/sch_teql.c       |   2 +-
 22 files changed, 467 insertions(+), 429 deletions(-)

(limited to 'net')

diff --git a/net/core/gen_estimator.c b/net/core/gen_estimator.c
index 7ab9060bccd..57abe8266be 100644
--- a/net/core/gen_estimator.c
+++ b/net/core/gen_estimator.c
@@ -159,13 +159,13 @@ skip:
 int gen_new_estimator(struct gnet_stats_basic *bstats,
 		      struct gnet_stats_rate_est *rate_est,
 		      spinlock_t *stats_lock,
-		      struct rtattr *opt)
+		      struct nlattr *opt)
 {
 	struct gen_estimator *est;
-	struct gnet_estimator *parm = RTA_DATA(opt);
+	struct gnet_estimator *parm = nla_data(opt);
 	int idx;
 
-	if (RTA_PAYLOAD(opt) < sizeof(*parm))
+	if (nla_len(opt) < sizeof(*parm))
 		return -EINVAL;
 
 	if (parm->interval < -2 || parm->interval > 3)
@@ -254,7 +254,7 @@ void gen_kill_estimator(struct gnet_stats_basic *bstats,
  */
 int gen_replace_estimator(struct gnet_stats_basic *bstats,
 			  struct gnet_stats_rate_est *rate_est,
-			  spinlock_t *stats_lock, struct rtattr *opt)
+			  spinlock_t *stats_lock, struct nlattr *opt)
 {
 	gen_kill_estimator(bstats, rate_est);
 	return gen_new_estimator(bstats, rate_est, stats_lock, opt);
diff --git a/net/core/gen_stats.c b/net/core/gen_stats.c
index 8073561f7c6..c3d0ffeac24 100644
--- a/net/core/gen_stats.c
+++ b/net/core/gen_stats.c
@@ -20,16 +20,17 @@
 #include <linux/socket.h>
 #include <linux/rtnetlink.h>
 #include <linux/gen_stats.h>
+#include <net/netlink.h>
 #include <net/gen_stats.h>
 
 
 static inline int
 gnet_stats_copy(struct gnet_dump *d, int type, void *buf, int size)
 {
-	RTA_PUT(d->skb, type, size, buf);
+	NLA_PUT(d->skb, type, size, buf);
 	return 0;
 
-rtattr_failure:
+nla_put_failure:
 	spin_unlock_bh(d->lock);
 	return -1;
 }
@@ -62,7 +63,7 @@ gnet_stats_start_copy_compat(struct sk_buff *skb, int type, int tc_stats_type,
 	spin_lock_bh(lock);
 	d->lock = lock;
 	if (type)
-		d->tail = (struct rtattr *)skb_tail_pointer(skb);
+		d->tail = (struct nlattr *)skb_tail_pointer(skb);
 	d->skb = skb;
 	d->compat_tc_stats = tc_stats_type;
 	d->compat_xstats = xstats_type;
@@ -213,7 +214,7 @@ int
 gnet_stats_finish_copy(struct gnet_dump *d)
 {
 	if (d->tail)
-		d->tail->rta_len = skb_tail_pointer(d->skb) - (u8 *)d->tail;
+		d->tail->nla_len = skb_tail_pointer(d->skb) - (u8 *)d->tail;
 
 	if (d->compat_tc_stats)
 		if (gnet_stats_copy(d, d->compat_tc_stats, &d->tc_stats,
diff --git a/net/mac80211/wme.c b/net/mac80211/wme.c
index 024519522d3..4e236599dd3 100644
--- a/net/mac80211/wme.c
+++ b/net/mac80211/wme.c
@@ -297,16 +297,16 @@ static void wme_qdiscop_destroy(struct Qdisc* qd)
 
 
 /* called whenever parameters are updated on existing qdisc */
-static int wme_qdiscop_tune(struct Qdisc *qd, struct rtattr *opt)
+static int wme_qdiscop_tune(struct Qdisc *qd, struct nlattr *opt)
 {
 /*	struct ieee80211_sched_data *q = qdisc_priv(qd);
 */
 	/* check our options block is the right size */
 	/* copy any options to our local structure */
 /*	Ignore options block for now - always use static mapping
-	struct tc_ieee80211_qopt *qopt = RTA_DATA(opt);
+	struct tc_ieee80211_qopt *qopt = nla_data(opt);
 
-	if (opt->rta_len < RTA_LENGTH(sizeof(*qopt)))
+	if (opt->nla_len < nla_attr_size(sizeof(*qopt)))
 		return -EINVAL;
 	memcpy(q->tag2queue, qopt->tag2queue, sizeof(qopt->tag2queue));
 */
@@ -315,7 +315,7 @@ static int wme_qdiscop_tune(struct Qdisc *qd, struct rtattr *opt)
 
 
 /* called during initial creation of qdisc on device */
-static int wme_qdiscop_init(struct Qdisc *qd, struct rtattr *opt)
+static int wme_qdiscop_init(struct Qdisc *qd, struct nlattr *opt)
 {
 	struct ieee80211_sched_data *q = qdisc_priv(qd);
 	struct net_device *dev = qd->dev;
@@ -370,10 +370,10 @@ static int wme_qdiscop_dump(struct Qdisc *qd, struct sk_buff *skb)
 	struct tc_ieee80211_qopt opt;
 
 	memcpy(&opt.tag2queue, q->tag2queue, TC_80211_MAX_TAG + 1);
-	RTA_PUT(skb, TCA_OPTIONS, sizeof(opt), &opt);
+	NLA_PUT(skb, TCA_OPTIONS, sizeof(opt), &opt);
 */	return skb->len;
 /*
-rtattr_failure:
+nla_put_failure:
 	skb_trim(skb, p - skb->data);*/
 	return -1;
 }
@@ -444,7 +444,7 @@ static void wme_classop_put(struct Qdisc *q, unsigned long cl)
 
 
 static int wme_classop_change(struct Qdisc *qd, u32 handle, u32 parent,
-			      struct rtattr **tca, unsigned long *arg)
+			      struct nlattr **tca, unsigned long *arg)
 {
 	unsigned long cl = *arg;
 	struct ieee80211_local *local = wdev_priv(qd->dev->ieee80211_ptr);
diff --git a/net/netfilter/xt_RATEEST.c b/net/netfilter/xt_RATEEST.c
index c5ba525dc32..24c73ba31ea 100644
--- a/net/netfilter/xt_RATEEST.c
+++ b/net/netfilter/xt_RATEEST.c
@@ -12,6 +12,7 @@
 #include <linux/rtnetlink.h>
 #include <linux/random.h>
 #include <net/gen_stats.h>
+#include <net/netlink.h>
 
 #include <linux/netfilter/x_tables.h>
 #include <linux/netfilter/xt_RATEEST.h>
@@ -98,7 +99,7 @@ xt_rateest_tg_checkentry(const char *tablename,
 	struct xt_rateest_target_info *info = (void *)targinfo;
 	struct xt_rateest *est;
 	struct {
-		struct rtattr		opt;
+		struct nlattr		opt;
 		struct gnet_estimator	est;
 	} cfg;
 
@@ -128,8 +129,8 @@ xt_rateest_tg_checkentry(const char *tablename,
 	est->params.interval	= info->interval;
 	est->params.ewma_log	= info->ewma_log;
 
-	cfg.opt.rta_len		= RTA_LENGTH(sizeof(cfg.est));
-	cfg.opt.rta_type	= TCA_STATS_RATE_EST;
+	cfg.opt.nla_len		= nla_attr_size(sizeof(cfg.est));
+	cfg.opt.nla_type	= TCA_STATS_RATE_EST;
 	cfg.est.interval	= info->interval;
 	cfg.est.ewma_log	= info->ewma_log;
 
diff --git a/net/sched/act_api.c b/net/sched/act_api.c
index 3825508fdcd..11f3097a691 100644
--- a/net/sched/act_api.c
+++ b/net/sched/act_api.c
@@ -227,7 +227,7 @@ struct tcf_common *tcf_hash_create(u32 index, struct rtattr *est, struct tc_acti
 	p->tcfc_tm.lastuse = jiffies;
 	if (est)
 		gen_new_estimator(&p->tcfc_bstats, &p->tcfc_rate_est,
-				  &p->tcfc_lock, est);
+				  &p->tcfc_lock, (struct nlattr *)est);
 	a->priv = (void *) p;
 	return p;
 }
diff --git a/net/sched/act_police.c b/net/sched/act_police.c
index a73e3e6d87e..07ffdf9c5e5 100644
--- a/net/sched/act_police.c
+++ b/net/sched/act_police.c
@@ -174,12 +174,12 @@ static int tcf_act_police_locate(struct rtattr *rta, struct rtattr *est,
 override:
 	if (parm->rate.rate) {
 		err = -ENOMEM;
-		R_tab = qdisc_get_rtab(&parm->rate, tb[TCA_POLICE_RATE-1]);
+		R_tab = qdisc_get_rtab(&parm->rate, (struct nlattr *)tb[TCA_POLICE_RATE-1]);
 		if (R_tab == NULL)
 			goto failure;
 		if (parm->peakrate.rate) {
 			P_tab = qdisc_get_rtab(&parm->peakrate,
-					       tb[TCA_POLICE_PEAKRATE-1]);
+					       (struct nlattr *)tb[TCA_POLICE_PEAKRATE-1]);
 			if (P_tab == NULL) {
 				qdisc_put_rtab(R_tab);
 				goto failure;
@@ -216,7 +216,7 @@ override:
 	if (est)
 		gen_replace_estimator(&police->tcf_bstats,
 				      &police->tcf_rate_est,
-				      &police->tcf_lock, est);
+				      &police->tcf_lock, (struct nlattr *)est);
 
 	spin_unlock_bh(&police->tcf_lock);
 	if (ret != ACT_P_CREATED)
diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index dc89a9343f3..7abb028dd96 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -213,14 +213,14 @@ static struct Qdisc *qdisc_leaf(struct Qdisc *p, u32 classid)
 
 /* Find queueing discipline by name */
 
-static struct Qdisc_ops *qdisc_lookup_ops(struct rtattr *kind)
+static struct Qdisc_ops *qdisc_lookup_ops(struct nlattr *kind)
 {
 	struct Qdisc_ops *q = NULL;
 
 	if (kind) {
 		read_lock(&qdisc_mod_lock);
 		for (q = qdisc_base; q; q = q->next) {
-			if (rtattr_strcmp(kind, q->id) == 0) {
+			if (nla_strcmp(kind, q->id) == 0) {
 				if (!try_module_get(q->owner))
 					q = NULL;
 				break;
@@ -233,7 +233,7 @@ static struct Qdisc_ops *qdisc_lookup_ops(struct rtattr *kind)
 
 static struct qdisc_rate_table *qdisc_rtab_list;
 
-struct qdisc_rate_table *qdisc_get_rtab(struct tc_ratespec *r, struct rtattr *tab)
+struct qdisc_rate_table *qdisc_get_rtab(struct tc_ratespec *r, struct nlattr *tab)
 {
 	struct qdisc_rate_table *rtab;
 
@@ -244,14 +244,14 @@ struct qdisc_rate_table *qdisc_get_rtab(struct tc_ratespec *r, struct rtattr *ta
 		}
 	}
 
-	if (tab == NULL || r->rate == 0 || r->cell_log == 0 || RTA_PAYLOAD(tab) != 1024)
+	if (tab == NULL || r->rate == 0 || r->cell_log == 0 || nla_len(tab) != 1024)
 		return NULL;
 
 	rtab = kmalloc(sizeof(*rtab), GFP_KERNEL);
 	if (rtab) {
 		rtab->rate = *r;
 		rtab->refcnt = 1;
-		memcpy(rtab->data, RTA_DATA(tab), 1024);
+		memcpy(rtab->data, nla_data(tab), 1024);
 		rtab->next = qdisc_rtab_list;
 		qdisc_rtab_list = rtab;
 	}
@@ -445,10 +445,10 @@ static int qdisc_graft(struct net_device *dev, struct Qdisc *parent,
 
 static struct Qdisc *
 qdisc_create(struct net_device *dev, u32 parent, u32 handle,
-	   struct rtattr **tca, int *errp)
+	   struct nlattr **tca, int *errp)
 {
 	int err;
-	struct rtattr *kind = tca[TCA_KIND-1];
+	struct nlattr *kind = tca[TCA_KIND];
 	struct Qdisc *sch;
 	struct Qdisc_ops *ops;
 
@@ -456,7 +456,7 @@ qdisc_create(struct net_device *dev, u32 parent, u32 handle,
 #ifdef CONFIG_KMOD
 	if (ops == NULL && kind != NULL) {
 		char name[IFNAMSIZ];
-		if (rtattr_strlcpy(name, kind, IFNAMSIZ) < IFNAMSIZ) {
+		if (nla_strlcpy(name, kind, IFNAMSIZ) < IFNAMSIZ) {
 			/* We dropped the RTNL semaphore in order to
 			 * perform the module load.  So, even if we
 			 * succeeded in loading the module we have to
@@ -509,11 +509,11 @@ qdisc_create(struct net_device *dev, u32 parent, u32 handle,
 
 	sch->handle = handle;
 
-	if (!ops->init || (err = ops->init(sch, tca[TCA_OPTIONS-1])) == 0) {
-		if (tca[TCA_RATE-1]) {
+	if (!ops->init || (err = ops->init(sch, tca[TCA_OPTIONS])) == 0) {
+		if (tca[TCA_RATE]) {
 			err = gen_new_estimator(&sch->bstats, &sch->rate_est,
 						sch->stats_lock,
-						tca[TCA_RATE-1]);
+						tca[TCA_RATE]);
 			if (err) {
 				/*
 				 * Any broken qdiscs that would require
@@ -541,20 +541,20 @@ err_out:
 	return NULL;
 }
 
-static int qdisc_change(struct Qdisc *sch, struct rtattr **tca)
+static int qdisc_change(struct Qdisc *sch, struct nlattr **tca)
 {
-	if (tca[TCA_OPTIONS-1]) {
+	if (tca[TCA_OPTIONS]) {
 		int err;
 
 		if (sch->ops->change == NULL)
 			return -EINVAL;
-		err = sch->ops->change(sch, tca[TCA_OPTIONS-1]);
+		err = sch->ops->change(sch, tca[TCA_OPTIONS]);
 		if (err)
 			return err;
 	}
-	if (tca[TCA_RATE-1])
+	if (tca[TCA_RATE])
 		gen_replace_estimator(&sch->bstats, &sch->rate_est,
-			sch->stats_lock, tca[TCA_RATE-1]);
+			sch->stats_lock, tca[TCA_RATE]);
 	return 0;
 }
 
@@ -606,7 +606,7 @@ static int tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
 {
 	struct net *net = skb->sk->sk_net;
 	struct tcmsg *tcm = NLMSG_DATA(n);
-	struct rtattr **tca = arg;
+	struct nlattr *tca[TCA_MAX + 1];
 	struct net_device *dev;
 	u32 clid = tcm->tcm_parent;
 	struct Qdisc *q = NULL;
@@ -619,6 +619,10 @@ static int tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
 	if ((dev = __dev_get_by_index(&init_net, tcm->tcm_ifindex)) == NULL)
 		return -ENODEV;
 
+	err = nlmsg_parse(n, sizeof(*tcm), tca, TCA_MAX, NULL);
+	if (err < 0)
+		return err;
+
 	if (clid) {
 		if (clid != TC_H_ROOT) {
 			if (TC_H_MAJ(clid) != TC_H_MAJ(TC_H_INGRESS)) {
@@ -641,7 +645,7 @@ static int tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
 			return -ENOENT;
 	}
 
-	if (tca[TCA_KIND-1] && rtattr_strcmp(tca[TCA_KIND-1], q->ops->id))
+	if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], q->ops->id))
 		return -EINVAL;
 
 	if (n->nlmsg_type == RTM_DELQDISC) {
@@ -671,7 +675,7 @@ static int tc_modify_qdisc(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
 {
 	struct net *net = skb->sk->sk_net;
 	struct tcmsg *tcm;
-	struct rtattr **tca;
+	struct nlattr *tca[TCA_MAX + 1];
 	struct net_device *dev;
 	u32 clid;
 	struct Qdisc *q, *p;
@@ -683,13 +687,16 @@ static int tc_modify_qdisc(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
 replay:
 	/* Reinit, just in case something touches this. */
 	tcm = NLMSG_DATA(n);
-	tca = arg;
 	clid = tcm->tcm_parent;
 	q = p = NULL;
 
 	if ((dev = __dev_get_by_index(&init_net, tcm->tcm_ifindex)) == NULL)
 		return -ENODEV;
 
+	err = nlmsg_parse(n, sizeof(*tcm), tca, TCA_MAX, NULL);
+	if (err < 0)
+		return err;
+
 	if (clid) {
 		if (clid != TC_H_ROOT) {
 			if (clid != TC_H_INGRESS) {
@@ -717,7 +724,7 @@ replay:
 					goto create_n_graft;
 				if (n->nlmsg_flags&NLM_F_EXCL)
 					return -EEXIST;
-				if (tca[TCA_KIND-1] && rtattr_strcmp(tca[TCA_KIND-1], q->ops->id))
+				if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], q->ops->id))
 					return -EINVAL;
 				if (q == p ||
 				    (p && check_loop(q, p, 0)))
@@ -750,8 +757,8 @@ replay:
 				if ((n->nlmsg_flags&NLM_F_CREATE) &&
 				    (n->nlmsg_flags&NLM_F_REPLACE) &&
 				    ((n->nlmsg_flags&NLM_F_EXCL) ||
-				     (tca[TCA_KIND-1] &&
-				      rtattr_strcmp(tca[TCA_KIND-1], q->ops->id))))
+				     (tca[TCA_KIND] &&
+				      nla_strcmp(tca[TCA_KIND], q->ops->id))))
 					goto create_n_graft;
 			}
 		}
@@ -766,7 +773,7 @@ replay:
 		return -ENOENT;
 	if (n->nlmsg_flags&NLM_F_EXCL)
 		return -EEXIST;
-	if (tca[TCA_KIND-1] && rtattr_strcmp(tca[TCA_KIND-1], q->ops->id))
+	if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], q->ops->id))
 		return -EINVAL;
 	err = qdisc_change(q, tca);
 	if (err == 0)
@@ -827,31 +834,31 @@ static int tc_fill_qdisc(struct sk_buff *skb, struct Qdisc *q, u32 clid,
 	tcm->tcm_parent = clid;
 	tcm->tcm_handle = q->handle;
 	tcm->tcm_info = atomic_read(&q->refcnt);
-	RTA_PUT(skb, TCA_KIND, IFNAMSIZ, q->ops->id);
+	NLA_PUT(skb, TCA_KIND, IFNAMSIZ, q->ops->id);
 	if (q->ops->dump && q->ops->dump(q, skb) < 0)
-		goto rtattr_failure;
+		goto nla_put_failure;
 	q->qstats.qlen = q->q.qlen;
 
 	if (gnet_stats_start_copy_compat(skb, TCA_STATS2, TCA_STATS,
 			TCA_XSTATS, q->stats_lock, &d) < 0)
-		goto rtattr_failure;
+		goto nla_put_failure;
 
 	if (q->ops->dump_stats && q->ops->dump_stats(q, &d) < 0)
-		goto rtattr_failure;
+		goto nla_put_failure;
 
 	if (gnet_stats_copy_basic(&d, &q->bstats) < 0 ||
 	    gnet_stats_copy_rate_est(&d, &q->rate_est) < 0 ||
 	    gnet_stats_copy_queue(&d, &q->qstats) < 0)
-		goto rtattr_failure;
+		goto nla_put_failure;
 
 	if (gnet_stats_finish_copy(&d) < 0)
-		goto rtattr_failure;
+		goto nla_put_failure;
 
 	nlh->nlmsg_len = skb_tail_pointer(skb) - b;
 	return skb->len;
 
 nlmsg_failure:
-rtattr_failure:
+nla_put_failure:
 	nlmsg_trim(skb, b);
 	return -1;
 }
@@ -939,7 +946,7 @@ static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
 {
 	struct net *net = skb->sk->sk_net;
 	struct tcmsg *tcm = NLMSG_DATA(n);
-	struct rtattr **tca = arg;
+	struct nlattr *tca[TCA_MAX + 1];
 	struct net_device *dev;
 	struct Qdisc *q = NULL;
 	const struct Qdisc_class_ops *cops;
@@ -956,6 +963,10 @@ static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
 	if ((dev = __dev_get_by_index(&init_net, tcm->tcm_ifindex)) == NULL)
 		return -ENODEV;
 
+	err = nlmsg_parse(n, sizeof(*tcm), tca, TCA_MAX, NULL);
+	if (err < 0)
+		return err;
+
 	/*
 	   parent == TC_H_UNSPEC - unspecified parent.
 	   parent == TC_H_ROOT   - class is root, which has no parent.
@@ -1069,25 +1080,25 @@ static int tc_fill_tclass(struct sk_buff *skb, struct Qdisc *q,
 	tcm->tcm_parent = q->handle;
 	tcm->tcm_handle = q->handle;
 	tcm->tcm_info = 0;
-	RTA_PUT(skb, TCA_KIND, IFNAMSIZ, q->ops->id);
+	NLA_PUT(skb, TCA_KIND, IFNAMSIZ, q->ops->id);
 	if (cl_ops->dump && cl_ops->dump(q, cl, skb, tcm) < 0)
-		goto rtattr_failure;
+		goto nla_put_failure;
 
 	if (gnet_stats_start_copy_compat(skb, TCA_STATS2, TCA_STATS,
 			TCA_XSTATS, q->stats_lock, &d) < 0)
-		goto rtattr_failure;
+		goto nla_put_failure;
 
 	if (cl_ops->dump_stats && cl_ops->dump_stats(q, cl, &d) < 0)
-		goto rtattr_failure;
+		goto nla_put_failure;
 
 	if (gnet_stats_finish_copy(&d) < 0)
-		goto rtattr_failure;
+		goto nla_put_failure;
 
 	nlh->nlmsg_len = skb_tail_pointer(skb) - b;
 	return skb->len;
 
 nlmsg_failure:
-rtattr_failure:
+nla_put_failure:
 	nlmsg_trim(skb, b);
 	return -1;
 }
diff --git a/net/sched/sch_atm.c b/net/sched/sch_atm.c
index 734be9d37d4..eb01aae117d 100644
--- a/net/sched/sch_atm.c
+++ b/net/sched/sch_atm.c
@@ -196,13 +196,13 @@ static const u8 llc_oui_ip[] = {
 };				/* Ethertype IP (0800) */
 
 static int atm_tc_change(struct Qdisc *sch, u32 classid, u32 parent,
-			 struct rtattr **tca, unsigned long *arg)
+			 struct nlattr **tca, unsigned long *arg)
 {
 	struct atm_qdisc_data *p = qdisc_priv(sch);
 	struct atm_flow_data *flow = (struct atm_flow_data *)*arg;
 	struct atm_flow_data *excess = NULL;
-	struct rtattr *opt = tca[TCA_OPTIONS - 1];
-	struct rtattr *tb[TCA_ATM_MAX];
+	struct nlattr *opt = tca[TCA_OPTIONS];
+	struct nlattr *tb[TCA_ATM_MAX + 1];
 	struct socket *sock;
 	int fd, error, hdr_len;
 	void *hdr;
@@ -223,31 +223,31 @@ static int atm_tc_change(struct Qdisc *sch, u32 classid, u32 parent,
 	 */
 	if (flow)
 		return -EBUSY;
-	if (opt == NULL || rtattr_parse_nested(tb, TCA_ATM_MAX, opt))
+	if (opt == NULL || nla_parse_nested(tb, TCA_ATM_MAX, opt, NULL))
 		return -EINVAL;
-	if (!tb[TCA_ATM_FD - 1] || RTA_PAYLOAD(tb[TCA_ATM_FD - 1]) < sizeof(fd))
+	if (!tb[TCA_ATM_FD] || nla_len(tb[TCA_ATM_FD]) < sizeof(fd))
 		return -EINVAL;
-	fd = *(int *)RTA_DATA(tb[TCA_ATM_FD - 1]);
+	fd = *(int *)nla_data(tb[TCA_ATM_FD]);
 	pr_debug("atm_tc_change: fd %d\n", fd);
-	if (tb[TCA_ATM_HDR - 1]) {
-		hdr_len = RTA_PAYLOAD(tb[TCA_ATM_HDR - 1]);
-		hdr = RTA_DATA(tb[TCA_ATM_HDR - 1]);
+	if (tb[TCA_ATM_HDR]) {
+		hdr_len = nla_len(tb[TCA_ATM_HDR]);
+		hdr = nla_data(tb[TCA_ATM_HDR]);
 	} else {
 		hdr_len = RFC1483LLC_LEN;
 		hdr = NULL;	/* default LLC/SNAP for IP */
 	}
-	if (!tb[TCA_ATM_EXCESS - 1])
+	if (!tb[TCA_ATM_EXCESS])
 		excess = NULL;
 	else {
-		if (RTA_PAYLOAD(tb[TCA_ATM_EXCESS - 1]) != sizeof(u32))
+		if (nla_len(tb[TCA_ATM_EXCESS]) != sizeof(u32))
 			return -EINVAL;
 		excess = (struct atm_flow_data *)
-			atm_tc_get(sch, *(u32 *)RTA_DATA(tb[TCA_ATM_EXCESS - 1]));
+			atm_tc_get(sch, *(u32 *)nla_data(tb[TCA_ATM_EXCESS]));
 		if (!excess)
 			return -ENOENT;
 	}
 	pr_debug("atm_tc_change: type %d, payload %lu, hdr_len %d\n",
-		 opt->rta_type, RTA_PAYLOAD(opt), hdr_len);
+		 opt->nla_type, nla_len(opt), hdr_len);
 	sock = sockfd_lookup(fd, &error);
 	if (!sock)
 		return error;	/* f_count++ */
@@ -541,7 +541,7 @@ static unsigned int atm_tc_drop(struct Qdisc *sch)
 	return 0;
 }
 
-static int atm_tc_init(struct Qdisc *sch, struct rtattr *opt)
+static int atm_tc_init(struct Qdisc *sch, struct nlattr *opt)
 {
 	struct atm_qdisc_data *p = qdisc_priv(sch);
 
@@ -602,7 +602,7 @@ static int atm_tc_dump_class(struct Qdisc *sch, unsigned long cl,
 	struct atm_qdisc_data *p = qdisc_priv(sch);
 	struct atm_flow_data *flow = (struct atm_flow_data *)cl;
 	unsigned char *b = skb_tail_pointer(skb);
-	struct rtattr *rta;
+	struct nlattr *nla;
 
 	pr_debug("atm_tc_dump_class(sch %p,[qdisc %p],flow %p,skb %p,tcm %p)\n",
 		sch, p, flow, skb, tcm);
@@ -610,9 +610,9 @@ static int atm_tc_dump_class(struct Qdisc *sch, unsigned long cl,
 		return -EINVAL;
 	tcm->tcm_handle = flow->classid;
 	tcm->tcm_info = flow->q->handle;
-	rta = (struct rtattr *)b;
-	RTA_PUT(skb, TCA_OPTIONS, 0, NULL);
-	RTA_PUT(skb, TCA_ATM_HDR, flow->hdr_len, flow->hdr);
+	nla = (struct nlattr *)b;
+	NLA_PUT(skb, TCA_OPTIONS, 0, NULL);
+	NLA_PUT(skb, TCA_ATM_HDR, flow->hdr_len, flow->hdr);
 	if (flow->vcc) {
 		struct sockaddr_atmpvc pvc;
 		int state;
@@ -621,21 +621,21 @@ static int atm_tc_dump_class(struct Qdisc *sch, unsigned long cl,
 		pvc.sap_addr.itf = flow->vcc->dev ? flow->vcc->dev->number : -1;
 		pvc.sap_addr.vpi = flow->vcc->vpi;
 		pvc.sap_addr.vci = flow->vcc->vci;
-		RTA_PUT(skb, TCA_ATM_ADDR, sizeof(pvc), &pvc);
+		NLA_PUT(skb, TCA_ATM_ADDR, sizeof(pvc), &pvc);
 		state = ATM_VF2VS(flow->vcc->flags);
-		RTA_PUT(skb, TCA_ATM_STATE, sizeof(state), &state);
+		NLA_PUT(skb, TCA_ATM_STATE, sizeof(state), &state);
 	}
 	if (flow->excess)
-		RTA_PUT(skb, TCA_ATM_EXCESS, sizeof(u32), &flow->classid);
+		NLA_PUT(skb, TCA_ATM_EXCESS, sizeof(u32), &flow->classid);
 	else {
 		static u32 zero;
 
-		RTA_PUT(skb, TCA_ATM_EXCESS, sizeof(zero), &zero);
+		NLA_PUT(skb, TCA_ATM_EXCESS, sizeof(zero), &zero);
 	}
-	rta->rta_len = skb_tail_pointer(skb) - b;
+	nla->nla_len = skb_tail_pointer(skb) - b;
 	return skb->len;
 
-rtattr_failure:
+nla_put_failure:
 	nlmsg_trim(skb, b);
 	return -1;
 }
diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c
index bea123fc24a..5c8667ef4ba 100644
--- a/net/sched/sch_cbq.c
+++ b/net/sched/sch_cbq.c
@@ -1377,24 +1377,24 @@ static int cbq_set_fopt(struct cbq_class *cl, struct tc_cbq_fopt *fopt)
 	return 0;
 }
 
-static int cbq_init(struct Qdisc *sch, struct rtattr *opt)
+static int cbq_init(struct Qdisc *sch, struct nlattr *opt)
 {
 	struct cbq_sched_data *q = qdisc_priv(sch);
-	struct rtattr *tb[TCA_CBQ_MAX];
+	struct nlattr *tb[TCA_CBQ_MAX + 1];
 	struct tc_ratespec *r;
 
-	if (rtattr_parse_nested(tb, TCA_CBQ_MAX, opt) < 0 ||
-	    tb[TCA_CBQ_RTAB-1] == NULL || tb[TCA_CBQ_RATE-1] == NULL ||
-	    RTA_PAYLOAD(tb[TCA_CBQ_RATE-1]) < sizeof(struct tc_ratespec))
+	if (nla_parse_nested(tb, TCA_CBQ_MAX, opt, NULL) < 0 ||
+	    tb[TCA_CBQ_RTAB] == NULL || tb[TCA_CBQ_RATE] == NULL ||
+	    nla_len(tb[TCA_CBQ_RATE]) < sizeof(struct tc_ratespec))
 		return -EINVAL;
 
-	if (tb[TCA_CBQ_LSSOPT-1] &&
-	    RTA_PAYLOAD(tb[TCA_CBQ_LSSOPT-1]) < sizeof(struct tc_cbq_lssopt))
+	if (tb[TCA_CBQ_LSSOPT] &&
+	    nla_len(tb[TCA_CBQ_LSSOPT]) < sizeof(struct tc_cbq_lssopt))
 		return -EINVAL;
 
-	r = RTA_DATA(tb[TCA_CBQ_RATE-1]);
+	r = nla_data(tb[TCA_CBQ_RATE]);
 
-	if ((q->link.R_tab = qdisc_get_rtab(r, tb[TCA_CBQ_RTAB-1])) == NULL)
+	if ((q->link.R_tab = qdisc_get_rtab(r, tb[TCA_CBQ_RTAB])) == NULL)
 		return -EINVAL;
 
 	q->link.refcnt = 1;
@@ -1427,8 +1427,8 @@ static int cbq_init(struct Qdisc *sch, struct rtattr *opt)
 
 	cbq_link_class(&q->link);
 
-	if (tb[TCA_CBQ_LSSOPT-1])
-		cbq_set_lss(&q->link, RTA_DATA(tb[TCA_CBQ_LSSOPT-1]));
+	if (tb[TCA_CBQ_LSSOPT])
+		cbq_set_lss(&q->link, nla_data(tb[TCA_CBQ_LSSOPT]));
 
 	cbq_addprio(q, &q->link);
 	return 0;
@@ -1438,10 +1438,10 @@ static __inline__ int cbq_dump_rate(struct sk_buff *skb, struct cbq_class *cl)
 {
 	unsigned char *b = skb_tail_pointer(skb);
 
-	RTA_PUT(skb, TCA_CBQ_RATE, sizeof(cl->R_tab->rate), &cl->R_tab->rate);
+	NLA_PUT(skb, TCA_CBQ_RATE, sizeof(cl->R_tab->rate), &cl->R_tab->rate);
 	return skb->len;
 
-rtattr_failure:
+nla_put_failure:
 	nlmsg_trim(skb, b);
 	return -1;
 }
@@ -1463,10 +1463,10 @@ static __inline__ int cbq_dump_lss(struct sk_buff *skb, struct cbq_class *cl)
 	opt.minidle = (u32)(-cl->minidle);
 	opt.offtime = cl->offtime;
 	opt.change = ~0;
-	RTA_PUT(skb, TCA_CBQ_LSSOPT, sizeof(opt), &opt);
+	NLA_PUT(skb, TCA_CBQ_LSSOPT, sizeof(opt), &opt);
 	return skb->len;
 
-rtattr_failure:
+nla_put_failure:
 	nlmsg_trim(skb, b);
 	return -1;
 }
@@ -1481,10 +1481,10 @@ static __inline__ int cbq_dump_wrr(struct sk_buff *skb, struct cbq_class *cl)
 	opt.priority = cl->priority+1;
 	opt.cpriority = cl->cpriority+1;
 	opt.weight = cl->weight;
-	RTA_PUT(skb, TCA_CBQ_WRROPT, sizeof(opt), &opt);
+	NLA_PUT(skb, TCA_CBQ_WRROPT, sizeof(opt), &opt);
 	return skb->len;
 
-rtattr_failure:
+nla_put_failure:
 	nlmsg_trim(skb, b);
 	return -1;
 }
@@ -1498,10 +1498,10 @@ static __inline__ int cbq_dump_ovl(struct sk_buff *skb, struct cbq_class *cl)
 	opt.priority2 = cl->priority2+1;
 	opt.pad = 0;
 	opt.penalty = cl->penalty;
-	RTA_PUT(skb, TCA_CBQ_OVL_STRATEGY, sizeof(opt), &opt);
+	NLA_PUT(skb, TCA_CBQ_OVL_STRATEGY, sizeof(opt), &opt);
 	return skb->len;
 
-rtattr_failure:
+nla_put_failure:
 	nlmsg_trim(skb, b);
 	return -1;
 }
@@ -1515,11 +1515,11 @@ static __inline__ int cbq_dump_fopt(struct sk_buff *skb, struct cbq_class *cl)
 		opt.split = cl->split ? cl->split->classid : 0;
 		opt.defmap = cl->defmap;
 		opt.defchange = ~0;
-		RTA_PUT(skb, TCA_CBQ_FOPT, sizeof(opt), &opt);
+		NLA_PUT(skb, TCA_CBQ_FOPT, sizeof(opt), &opt);
 	}
 	return skb->len;
 
-rtattr_failure:
+nla_put_failure:
 	nlmsg_trim(skb, b);
 	return -1;
 }
@@ -1534,11 +1534,11 @@ static __inline__ int cbq_dump_police(struct sk_buff *skb, struct cbq_class *cl)
 		opt.police = cl->police;
 		opt.__res1 = 0;
 		opt.__res2 = 0;
-		RTA_PUT(skb, TCA_CBQ_POLICE, sizeof(opt), &opt);
+		NLA_PUT(skb, TCA_CBQ_POLICE, sizeof(opt), &opt);
 	}
 	return skb->len;
 
-rtattr_failure:
+nla_put_failure:
 	nlmsg_trim(skb, b);
 	return -1;
 }
@@ -1562,16 +1562,16 @@ static int cbq_dump(struct Qdisc *sch, struct sk_buff *skb)
 {
 	struct cbq_sched_data *q = qdisc_priv(sch);
 	unsigned char *b = skb_tail_pointer(skb);
-	struct rtattr *rta;
+	struct nlattr *nla;
 
-	rta = (struct rtattr*)b;
-	RTA_PUT(skb, TCA_OPTIONS, 0, NULL);
+	nla = (struct nlattr*)b;
+	NLA_PUT(skb, TCA_OPTIONS, 0, NULL);
 	if (cbq_dump_attr(skb, &q->link) < 0)
-		goto rtattr_failure;
-	rta->rta_len = skb_tail_pointer(skb) - b;
+		goto nla_put_failure;
+	nla->nla_len = skb_tail_pointer(skb) - b;
 	return skb->len;
 
-rtattr_failure:
+nla_put_failure:
 	nlmsg_trim(skb, b);
 	return -1;
 }
@@ -1591,7 +1591,7 @@ cbq_dump_class(struct Qdisc *sch, unsigned long arg,
 {
 	struct cbq_class *cl = (struct cbq_class*)arg;
 	unsigned char *b = skb_tail_pointer(skb);
-	struct rtattr *rta;
+	struct nlattr *nla;
 
 	if (cl->tparent)
 		tcm->tcm_parent = cl->tparent->classid;
@@ -1600,14 +1600,14 @@ cbq_dump_class(struct Qdisc *sch, unsigned long arg,
 	tcm->tcm_handle = cl->classid;
 	tcm->tcm_info = cl->q->handle;
 
-	rta = (struct rtattr*)b;
-	RTA_PUT(skb, TCA_OPTIONS, 0, NULL);
+	nla = (struct nlattr*)b;
+	NLA_PUT(skb, TCA_OPTIONS, 0, NULL);
 	if (cbq_dump_attr(skb, cl) < 0)
-		goto rtattr_failure;
-	rta->rta_len = skb_tail_pointer(skb) - b;
+		goto nla_put_failure;
+	nla->nla_len = skb_tail_pointer(skb) - b;
 	return skb->len;
 
-rtattr_failure:
+nla_put_failure:
 	nlmsg_trim(skb, b);
 	return -1;
 }
@@ -1753,43 +1753,43 @@ static void cbq_put(struct Qdisc *sch, unsigned long arg)
 }
 
 static int
-cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct rtattr **tca,
+cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct nlattr **tca,
 		 unsigned long *arg)
 {
 	int err;
 	struct cbq_sched_data *q = qdisc_priv(sch);
 	struct cbq_class *cl = (struct cbq_class*)*arg;
-	struct rtattr *opt = tca[TCA_OPTIONS-1];
-	struct rtattr *tb[TCA_CBQ_MAX];
+	struct nlattr *opt = tca[TCA_OPTIONS];
+	struct nlattr *tb[TCA_CBQ_MAX + 1];
 	struct cbq_class *parent;
 	struct qdisc_rate_table *rtab = NULL;
 
-	if (opt==NULL || rtattr_parse_nested(tb, TCA_CBQ_MAX, opt))
+	if (opt==NULL || nla_parse_nested(tb, TCA_CBQ_MAX, opt, NULL))
 		return -EINVAL;
 
-	if (tb[TCA_CBQ_OVL_STRATEGY-1] &&
-	    RTA_PAYLOAD(tb[TCA_CBQ_OVL_STRATEGY-1]) < sizeof(struct tc_cbq_ovl))
+	if (tb[TCA_CBQ_OVL_STRATEGY] &&
+	    nla_len(tb[TCA_CBQ_OVL_STRATEGY]) < sizeof(struct tc_cbq_ovl))
 		return -EINVAL;
 
-	if (tb[TCA_CBQ_FOPT-1] &&
-	    RTA_PAYLOAD(tb[TCA_CBQ_FOPT-1]) < sizeof(struct tc_cbq_fopt))
+	if (tb[TCA_CBQ_FOPT] &&
+	    nla_len(tb[TCA_CBQ_FOPT]) < sizeof(struct tc_cbq_fopt))
 		return -EINVAL;
 
-	if (tb[TCA_CBQ_RATE-1] &&
-	    RTA_PAYLOAD(tb[TCA_CBQ_RATE-1]) < sizeof(struct tc_ratespec))
+	if (tb[TCA_CBQ_RATE] &&
+	    nla_len(tb[TCA_CBQ_RATE]) < sizeof(struct tc_ratespec))
 			return -EINVAL;
 
-	if (tb[TCA_CBQ_LSSOPT-1] &&
-	    RTA_PAYLOAD(tb[TCA_CBQ_LSSOPT-1]) < sizeof(struct tc_cbq_lssopt))
+	if (tb[TCA_CBQ_LSSOPT] &&
+	    nla_len(tb[TCA_CBQ_LSSOPT]) < sizeof(struct tc_cbq_lssopt))
 			return -EINVAL;
 
-	if (tb[TCA_CBQ_WRROPT-1] &&
-	    RTA_PAYLOAD(tb[TCA_CBQ_WRROPT-1]) < sizeof(struct tc_cbq_wrropt))
+	if (tb[TCA_CBQ_WRROPT] &&
+	    nla_len(tb[TCA_CBQ_WRROPT]) < sizeof(struct tc_cbq_wrropt))
 			return -EINVAL;
 
 #ifdef CONFIG_NET_CLS_ACT
-	if (tb[TCA_CBQ_POLICE-1] &&
-	    RTA_PAYLOAD(tb[TCA_CBQ_POLICE-1]) < sizeof(struct tc_cbq_police))
+	if (tb[TCA_CBQ_POLICE] &&
+	    nla_len(tb[TCA_CBQ_POLICE]) < sizeof(struct tc_cbq_police))
 			return -EINVAL;
 #endif
 
@@ -1802,8 +1802,8 @@ cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct rtattr **t
 				return -EINVAL;
 		}
 
-		if (tb[TCA_CBQ_RATE-1]) {
-			rtab = qdisc_get_rtab(RTA_DATA(tb[TCA_CBQ_RATE-1]), tb[TCA_CBQ_RTAB-1]);
+		if (tb[TCA_CBQ_RATE]) {
+			rtab = qdisc_get_rtab(nla_data(tb[TCA_CBQ_RATE]), tb[TCA_CBQ_RTAB]);
 			if (rtab == NULL)
 				return -EINVAL;
 		}
@@ -1819,45 +1819,45 @@ cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct rtattr **t
 			qdisc_put_rtab(rtab);
 		}
 
-		if (tb[TCA_CBQ_LSSOPT-1])
-			cbq_set_lss(cl, RTA_DATA(tb[TCA_CBQ_LSSOPT-1]));
+		if (tb[TCA_CBQ_LSSOPT])
+			cbq_set_lss(cl, nla_data(tb[TCA_CBQ_LSSOPT]));
 
-		if (tb[TCA_CBQ_WRROPT-1]) {
+		if (tb[TCA_CBQ_WRROPT]) {
 			cbq_rmprio(q, cl);
-			cbq_set_wrr(cl, RTA_DATA(tb[TCA_CBQ_WRROPT-1]));
+			cbq_set_wrr(cl, nla_data(tb[TCA_CBQ_WRROPT]));
 		}
 
-		if (tb[TCA_CBQ_OVL_STRATEGY-1])
-			cbq_set_overlimit(cl, RTA_DATA(tb[TCA_CBQ_OVL_STRATEGY-1]));
+		if (tb[TCA_CBQ_OVL_STRATEGY])
+			cbq_set_overlimit(cl, nla_data(tb[TCA_CBQ_OVL_STRATEGY]));
 
 #ifdef CONFIG_NET_CLS_ACT
-		if (tb[TCA_CBQ_POLICE-1])
-			cbq_set_police(cl, RTA_DATA(tb[TCA_CBQ_POLICE-1]));
+		if (tb[TCA_CBQ_POLICE])
+			cbq_set_police(cl, nla_data(tb[TCA_CBQ_POLICE]));
 #endif
 
-		if (tb[TCA_CBQ_FOPT-1])
-			cbq_set_fopt(cl, RTA_DATA(tb[TCA_CBQ_FOPT-1]));
+		if (tb[TCA_CBQ_FOPT])
+			cbq_set_fopt(cl, nla_data(tb[TCA_CBQ_FOPT]));
 
 		if (cl->q->q.qlen)
 			cbq_activate_class(cl);
 
 		sch_tree_unlock(sch);
 
-		if (tca[TCA_RATE-1])
+		if (tca[TCA_RATE])
 			gen_replace_estimator(&cl->bstats, &cl->rate_est,
 					      &sch->dev->queue_lock,
-					      tca[TCA_RATE-1]);
+					      tca[TCA_RATE]);
 		return 0;
 	}
 
 	if (parentid == TC_H_ROOT)
 		return -EINVAL;
 
-	if (tb[TCA_CBQ_WRROPT-1] == NULL || tb[TCA_CBQ_RATE-1] == NULL ||
-	    tb[TCA_CBQ_LSSOPT-1] == NULL)
+	if (tb[TCA_CBQ_WRROPT] == NULL || tb[TCA_CBQ_RATE] == NULL ||
+	    tb[TCA_CBQ_LSSOPT] == NULL)
 		return -EINVAL;
 
-	rtab = qdisc_get_rtab(RTA_DATA(tb[TCA_CBQ_RATE-1]), tb[TCA_CBQ_RTAB-1]);
+	rtab = qdisc_get_rtab(nla_data(tb[TCA_CBQ_RATE]), tb[TCA_CBQ_RTAB]);
 	if (rtab == NULL)
 		return -EINVAL;
 
@@ -1912,8 +1912,8 @@ cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct rtattr **t
 		cl->share = cl->tparent;
 	cbq_adjust_levels(parent);
 	cl->minidle = -0x7FFFFFFF;
-	cbq_set_lss(cl, RTA_DATA(tb[TCA_CBQ_LSSOPT-1]));
-	cbq_set_wrr(cl, RTA_DATA(tb[TCA_CBQ_WRROPT-1]));
+	cbq_set_lss(cl, nla_data(tb[TCA_CBQ_LSSOPT]));
+	cbq_set_wrr(cl, nla_data(tb[TCA_CBQ_WRROPT]));
 	if (cl->ewma_log==0)
 		cl->ewma_log = q->link.ewma_log;
 	if (cl->maxidle==0)
@@ -1921,19 +1921,19 @@ cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct rtattr **t
 	if (cl->avpkt==0)
 		cl->avpkt = q->link.avpkt;
 	cl->overlimit = cbq_ovl_classic;
-	if (tb[TCA_CBQ_OVL_STRATEGY-1])
-		cbq_set_overlimit(cl, RTA_DATA(tb[TCA_CBQ_OVL_STRATEGY-1]));
+	if (tb[TCA_CBQ_OVL_STRATEGY])
+		cbq_set_overlimit(cl, nla_data(tb[TCA_CBQ_OVL_STRATEGY]));
 #ifdef CONFIG_NET_CLS_ACT
-	if (tb[TCA_CBQ_POLICE-1])
-		cbq_set_police(cl, RTA_DATA(tb[TCA_CBQ_POLICE-1]));
+	if (tb[TCA_CBQ_POLICE])
+		cbq_set_police(cl, nla_data(tb[TCA_CBQ_POLICE]));
 #endif
-	if (tb[TCA_CBQ_FOPT-1])
-		cbq_set_fopt(cl, RTA_DATA(tb[TCA_CBQ_FOPT-1]));
+	if (tb[TCA_CBQ_FOPT])
+		cbq_set_fopt(cl, nla_data(tb[TCA_CBQ_FOPT]));
 	sch_tree_unlock(sch);
 
-	if (tca[TCA_RATE-1])
+	if (tca[TCA_RATE])
 		gen_new_estimator(&cl->bstats, &cl->rate_est,
-				  &sch->dev->queue_lock, tca[TCA_RATE-1]);
+				  &sch->dev->queue_lock, tca[TCA_RATE]);
 
 	*arg = (unsigned long)cl;
 	return 0;
diff --git a/net/sched/sch_dsmark.c b/net/sched/sch_dsmark.c
index 40e06a6890d..f183ab76887 100644
--- a/net/sched/sch_dsmark.c
+++ b/net/sched/sch_dsmark.c
@@ -100,11 +100,11 @@ static void dsmark_put(struct Qdisc *sch, unsigned long cl)
 }
 
 static int dsmark_change(struct Qdisc *sch, u32 classid, u32 parent,
-			 struct rtattr **tca, unsigned long *arg)
+			 struct nlattr **tca, unsigned long *arg)
 {
 	struct dsmark_qdisc_data *p = qdisc_priv(sch);
-	struct rtattr *opt = tca[TCA_OPTIONS-1];
-	struct rtattr *tb[TCA_DSMARK_MAX];
+	struct nlattr *opt = tca[TCA_OPTIONS];
+	struct nlattr *tb[TCA_DSMARK_MAX + 1];
 	int err = -EINVAL;
 	u8 mask = 0;
 
@@ -113,24 +113,29 @@ static int dsmark_change(struct Qdisc *sch, u32 classid, u32 parent,
 
 	if (!dsmark_valid_index(p, *arg)) {
 		err = -ENOENT;
-		goto rtattr_failure;
+		goto errout;
 	}
 
-	if (!opt || rtattr_parse_nested(tb, TCA_DSMARK_MAX, opt))
-		goto rtattr_failure;
-
-	if (tb[TCA_DSMARK_MASK-1])
-		mask = RTA_GET_U8(tb[TCA_DSMARK_MASK-1]);
+	if (!opt || nla_parse_nested(tb, TCA_DSMARK_MAX, opt, NULL))
+		goto errout;
 
-	if (tb[TCA_DSMARK_VALUE-1])
-		p->value[*arg-1] = RTA_GET_U8(tb[TCA_DSMARK_VALUE-1]);
+	if (tb[TCA_DSMARK_MASK]) {
+		if (nla_len(tb[TCA_DSMARK_MASK]) < sizeof(u8))
+			goto errout;
+		mask = nla_get_u8(tb[TCA_DSMARK_MASK]);
+	}
+	if (tb[TCA_DSMARK_VALUE]) {
+		if (nla_len(tb[TCA_DSMARK_VALUE]) < sizeof(u8))
+			goto errout;
+		p->value[*arg-1] = nla_get_u8(tb[TCA_DSMARK_VALUE]);
+	}
 
-	if (tb[TCA_DSMARK_MASK-1])
+	if (tb[TCA_DSMARK_MASK])
 		p->mask[*arg-1] = mask;
 
 	err = 0;
 
-rtattr_failure:
+errout:
 	return err;
 }
 
@@ -335,10 +340,10 @@ static unsigned int dsmark_drop(struct Qdisc *sch)
 	return len;
 }
 
-static int dsmark_init(struct Qdisc *sch, struct rtattr *opt)
+static int dsmark_init(struct Qdisc *sch, struct nlattr *opt)
 {
 	struct dsmark_qdisc_data *p = qdisc_priv(sch);
-	struct rtattr *tb[TCA_DSMARK_MAX];
+	struct nlattr *tb[TCA_DSMARK_MAX + 1];
 	int err = -EINVAL;
 	u32 default_index = NO_DEFAULT_INDEX;
 	u16 indices;
@@ -346,16 +351,21 @@ static int dsmark_init(struct Qdisc *sch, struct rtattr *opt)
 
 	pr_debug("dsmark_init(sch %p,[qdisc %p],opt %p)\n", sch, p, opt);
 
-	if (!opt || rtattr_parse_nested(tb, TCA_DSMARK_MAX, opt) < 0)
+	if (!opt || nla_parse_nested(tb, TCA_DSMARK_MAX, opt, NULL) < 0)
 		goto errout;
 
-	indices = RTA_GET_U16(tb[TCA_DSMARK_INDICES-1]);
+	if (nla_len(tb[TCA_DSMARK_INDICES]) < sizeof(u16))
+		goto errout;
+	indices = nla_get_u16(tb[TCA_DSMARK_INDICES]);
 
 	if (hweight32(indices) != 1)
 		goto errout;
 
-	if (tb[TCA_DSMARK_DEFAULT_INDEX-1])
-		default_index = RTA_GET_U16(tb[TCA_DSMARK_DEFAULT_INDEX-1]);
+	if (tb[TCA_DSMARK_DEFAULT_INDEX]) {
+		if (nla_len(tb[TCA_DSMARK_DEFAULT_INDEX]) < sizeof(u16))
+			goto errout;
+		default_index = nla_get_u16(tb[TCA_DSMARK_DEFAULT_INDEX]);
+	}
 
 	mask = kmalloc(indices * 2, GFP_KERNEL);
 	if (mask == NULL) {
@@ -371,7 +381,7 @@ static int dsmark_init(struct Qdisc *sch, struct rtattr *opt)
 
 	p->indices = indices;
 	p->default_index = default_index;
-	p->set_tc_index = RTA_GET_FLAG(tb[TCA_DSMARK_SET_TC_INDEX-1]);
+	p->set_tc_index = nla_get_flag(tb[TCA_DSMARK_SET_TC_INDEX]);
 
 	p->q = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops, sch->handle);
 	if (p->q == NULL)
@@ -381,7 +391,6 @@ static int dsmark_init(struct Qdisc *sch, struct rtattr *opt)
 
 	err = 0;
 errout:
-rtattr_failure:
 	return err;
 }
 
@@ -409,7 +418,7 @@ static int dsmark_dump_class(struct Qdisc *sch, unsigned long cl,
 			     struct sk_buff *skb, struct tcmsg *tcm)
 {
 	struct dsmark_qdisc_data *p = qdisc_priv(sch);
-	struct rtattr *opts = NULL;
+	struct nlattr *opts = NULL;
 
 	pr_debug("dsmark_dump_class(sch %p,[qdisc %p],class %ld\n", sch, p, cl);
 
@@ -419,34 +428,38 @@ static int dsmark_dump_class(struct Qdisc *sch, unsigned long cl,
 	tcm->tcm_handle = TC_H_MAKE(TC_H_MAJ(sch->handle), cl-1);
 	tcm->tcm_info = p->q->handle;
 
-	opts = RTA_NEST(skb, TCA_OPTIONS);
-	RTA_PUT_U8(skb, TCA_DSMARK_MASK, p->mask[cl-1]);
-	RTA_PUT_U8(skb, TCA_DSMARK_VALUE, p->value[cl-1]);
+	opts = nla_nest_start(skb, TCA_OPTIONS);
+	if (opts == NULL)
+		goto nla_put_failure;
+	NLA_PUT_U8(skb, TCA_DSMARK_MASK, p->mask[cl-1]);
+	NLA_PUT_U8(skb, TCA_DSMARK_VALUE, p->value[cl-1]);
 
-	return RTA_NEST_END(skb, opts);
+	return nla_nest_end(skb, opts);
 
-rtattr_failure:
-	return RTA_NEST_CANCEL(skb, opts);
+nla_put_failure:
+	return nla_nest_cancel(skb, opts);
 }
 
 static int dsmark_dump(struct Qdisc *sch, struct sk_buff *skb)
 {
 	struct dsmark_qdisc_data *p = qdisc_priv(sch);
-	struct rtattr *opts = NULL;
+	struct nlattr *opts = NULL;
 
-	opts = RTA_NEST(skb, TCA_OPTIONS);
-	RTA_PUT_U16(skb, TCA_DSMARK_INDICES, p->indices);
+	opts = nla_nest_start(skb, TCA_OPTIONS);
+	if (opts == NULL)
+		goto nla_put_failure;
+	NLA_PUT_U16(skb, TCA_DSMARK_INDICES, p->indices);
 
 	if (p->default_index != NO_DEFAULT_INDEX)
-		RTA_PUT_U16(skb, TCA_DSMARK_DEFAULT_INDEX, p->default_index);
+		NLA_PUT_U16(skb, TCA_DSMARK_DEFAULT_INDEX, p->default_index);
 
 	if (p->set_tc_index)
-		RTA_PUT_FLAG(skb, TCA_DSMARK_SET_TC_INDEX);
+		NLA_PUT_FLAG(skb, TCA_DSMARK_SET_TC_INDEX);
 
-	return RTA_NEST_END(skb, opts);
+	return nla_nest_end(skb, opts);
 
-rtattr_failure:
-	return RTA_NEST_CANCEL(skb, opts);
+nla_put_failure:
+	return nla_nest_cancel(skb, opts);
 }
 
 static const struct Qdisc_class_ops dsmark_class_ops = {
diff --git a/net/sched/sch_fifo.c b/net/sched/sch_fifo.c
index fd0591903c8..95ed4822165 100644
--- a/net/sched/sch_fifo.c
+++ b/net/sched/sch_fifo.c
@@ -43,7 +43,7 @@ static int pfifo_enqueue(struct sk_buff *skb, struct Qdisc* sch)
 	return qdisc_reshape_fail(skb, sch);
 }
 
-static int fifo_init(struct Qdisc *sch, struct rtattr *opt)
+static int fifo_init(struct Qdisc *sch, struct nlattr *opt)
 {
 	struct fifo_sched_data *q = qdisc_priv(sch);
 
@@ -55,9 +55,9 @@ static int fifo_init(struct Qdisc *sch, struct rtattr *opt)
 
 		q->limit = limit;
 	} else {
-		struct tc_fifo_qopt *ctl = RTA_DATA(opt);
+		struct tc_fifo_qopt *ctl = nla_data(opt);
 
-		if (RTA_PAYLOAD(opt) < sizeof(*ctl))
+		if (nla_len(opt) < sizeof(*ctl))
 			return -EINVAL;
 
 		q->limit = ctl->limit;
@@ -71,10 +71,10 @@ static int fifo_dump(struct Qdisc *sch, struct sk_buff *skb)
 	struct fifo_sched_data *q = qdisc_priv(sch);
 	struct tc_fifo_qopt opt = { .limit = q->limit };
 
-	RTA_PUT(skb, TCA_OPTIONS, sizeof(opt), &opt);
+	NLA_PUT(skb, TCA_OPTIONS, sizeof(opt), &opt);
 	return skb->len;
 
-rtattr_failure:
+nla_put_failure:
 	return -1;
 }
 
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index 51e64acd509..10b5c0887ff 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -397,14 +397,14 @@ static int pfifo_fast_dump(struct Qdisc *qdisc, struct sk_buff *skb)
 	struct tc_prio_qopt opt = { .bands = PFIFO_FAST_BANDS };
 
 	memcpy(&opt.priomap, prio2band, TC_PRIO_MAX+1);
-	RTA_PUT(skb, TCA_OPTIONS, sizeof(opt), &opt);
+	NLA_PUT(skb, TCA_OPTIONS, sizeof(opt), &opt);
 	return skb->len;
 
-rtattr_failure:
+nla_put_failure:
 	return -1;
 }
 
-static int pfifo_fast_init(struct Qdisc *qdisc, struct rtattr *opt)
+static int pfifo_fast_init(struct Qdisc *qdisc, struct nlattr *opt)
 {
 	int prio;
 	struct sk_buff_head *list = qdisc_priv(qdisc);
diff --git a/net/sched/sch_gred.c b/net/sched/sch_gred.c
index e2bcd6682c7..6b784838a53 100644
--- a/net/sched/sch_gred.c
+++ b/net/sched/sch_gred.c
@@ -350,16 +350,16 @@ static inline void gred_destroy_vq(struct gred_sched_data *q)
 	kfree(q);
 }
 
-static inline int gred_change_table_def(struct Qdisc *sch, struct rtattr *dps)
+static inline int gred_change_table_def(struct Qdisc *sch, struct nlattr *dps)
 {
 	struct gred_sched *table = qdisc_priv(sch);
 	struct tc_gred_sopt *sopt;
 	int i;
 
-	if (dps == NULL || RTA_PAYLOAD(dps) < sizeof(*sopt))
+	if (dps == NULL || nla_len(dps) < sizeof(*sopt))
 		return -EINVAL;
 
-	sopt = RTA_DATA(dps);
+	sopt = nla_data(dps);
 
 	if (sopt->DPs > MAX_DPs || sopt->DPs == 0 || sopt->def_DP >= sopt->DPs)
 		return -EINVAL;
@@ -425,28 +425,28 @@ static inline int gred_change_vq(struct Qdisc *sch, int dp,
 	return 0;
 }
 
-static int gred_change(struct Qdisc *sch, struct rtattr *opt)
+static int gred_change(struct Qdisc *sch, struct nlattr *opt)
 {
 	struct gred_sched *table = qdisc_priv(sch);
 	struct tc_gred_qopt *ctl;
-	struct rtattr *tb[TCA_GRED_MAX];
+	struct nlattr *tb[TCA_GRED_MAX + 1];
 	int err = -EINVAL, prio = GRED_DEF_PRIO;
 	u8 *stab;
 
-	if (opt == NULL || rtattr_parse_nested(tb, TCA_GRED_MAX, opt))
+	if (opt == NULL || nla_parse_nested(tb, TCA_GRED_MAX, opt, NULL))
 		return -EINVAL;
 
-	if (tb[TCA_GRED_PARMS-1] == NULL && tb[TCA_GRED_STAB-1] == NULL)
+	if (tb[TCA_GRED_PARMS] == NULL && tb[TCA_GRED_STAB] == NULL)
 		return gred_change_table_def(sch, opt);
 
-	if (tb[TCA_GRED_PARMS-1] == NULL ||
-	    RTA_PAYLOAD(tb[TCA_GRED_PARMS-1]) < sizeof(*ctl) ||
-	    tb[TCA_GRED_STAB-1] == NULL ||
-	    RTA_PAYLOAD(tb[TCA_GRED_STAB-1]) < 256)
+	if (tb[TCA_GRED_PARMS] == NULL ||
+	    nla_len(tb[TCA_GRED_PARMS]) < sizeof(*ctl) ||
+	    tb[TCA_GRED_STAB] == NULL ||
+	    nla_len(tb[TCA_GRED_STAB]) < 256)
 		return -EINVAL;
 
-	ctl = RTA_DATA(tb[TCA_GRED_PARMS-1]);
-	stab = RTA_DATA(tb[TCA_GRED_STAB-1]);
+	ctl = nla_data(tb[TCA_GRED_PARMS]);
+	stab = nla_data(tb[TCA_GRED_STAB]);
 
 	if (ctl->DP >= table->DPs)
 		goto errout;
@@ -486,23 +486,23 @@ errout:
 	return err;
 }
 
-static int gred_init(struct Qdisc *sch, struct rtattr *opt)
+static int gred_init(struct Qdisc *sch, struct nlattr *opt)
 {
-	struct rtattr *tb[TCA_GRED_MAX];
+	struct nlattr *tb[TCA_GRED_MAX + 1];
 
-	if (opt == NULL || rtattr_parse_nested(tb, TCA_GRED_MAX, opt))
+	if (opt == NULL || nla_parse_nested(tb, TCA_GRED_MAX, opt, NULL))
 		return -EINVAL;
 
-	if (tb[TCA_GRED_PARMS-1] || tb[TCA_GRED_STAB-1])
+	if (tb[TCA_GRED_PARMS] || tb[TCA_GRED_STAB])
 		return -EINVAL;
 
-	return gred_change_table_def(sch, tb[TCA_GRED_DPS-1]);
+	return gred_change_table_def(sch, tb[TCA_GRED_DPS]);
 }
 
 static int gred_dump(struct Qdisc *sch, struct sk_buff *skb)
 {
 	struct gred_sched *table = qdisc_priv(sch);
-	struct rtattr *parms, *opts = NULL;
+	struct nlattr *parms, *opts = NULL;
 	int i;
 	struct tc_gred_sopt sopt = {
 		.DPs	= table->DPs,
@@ -511,9 +511,13 @@ static int gred_dump(struct Qdisc *sch, struct sk_buff *skb)
 		.flags	= table->red_flags,
 	};
 
-	opts = RTA_NEST(skb, TCA_OPTIONS);
-	RTA_PUT(skb, TCA_GRED_DPS, sizeof(sopt), &sopt);
-	parms = RTA_NEST(skb, TCA_GRED_PARMS);
+	opts = nla_nest_start(skb, TCA_OPTIONS);
+	if (opts == NULL)
+		goto nla_put_failure;
+	NLA_PUT(skb, TCA_GRED_DPS, sizeof(sopt), &sopt);
+	parms = nla_nest_start(skb, TCA_GRED_PARMS);
+	if (parms == NULL)
+		goto nla_put_failure;
 
 	for (i = 0; i < MAX_DPs; i++) {
 		struct gred_sched_data *q = table->tab[i];
@@ -555,15 +559,16 @@ static int gred_dump(struct Qdisc *sch, struct sk_buff *skb)
 		opt.qave = red_calc_qavg(&q->parms, q->parms.qavg);
 
 append_opt:
-		RTA_APPEND(skb, sizeof(opt), &opt);
+		if (nla_append(skb, sizeof(opt), &opt) < 0)
+			goto nla_put_failure;
 	}
 
-	RTA_NEST_END(skb, parms);
+	nla_nest_end(skb, parms);
 
-	return RTA_NEST_END(skb, opts);
+	return nla_nest_end(skb, opts);
 
-rtattr_failure:
-	return RTA_NEST_CANCEL(skb, opts);
+nla_put_failure:
+	return nla_nest_cancel(skb, opts);
 }
 
 static void gred_destroy(struct Qdisc *sch)
diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c
index 69dc3bccf02..4e6a164d305 100644
--- a/net/sched/sch_hfsc.c
+++ b/net/sched/sch_hfsc.c
@@ -988,39 +988,39 @@ hfsc_change_usc(struct hfsc_class *cl, struct tc_service_curve *usc,
 
 static int
 hfsc_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
-		  struct rtattr **tca, unsigned long *arg)
+		  struct nlattr **tca, unsigned long *arg)
 {
 	struct hfsc_sched *q = qdisc_priv(sch);
 	struct hfsc_class *cl = (struct hfsc_class *)*arg;
 	struct hfsc_class *parent = NULL;
-	struct rtattr *opt = tca[TCA_OPTIONS-1];
-	struct rtattr *tb[TCA_HFSC_MAX];
+	struct nlattr *opt = tca[TCA_OPTIONS];
+	struct nlattr *tb[TCA_HFSC_MAX + 1];
 	struct tc_service_curve *rsc = NULL, *fsc = NULL, *usc = NULL;
 	u64 cur_time;
 
-	if (opt == NULL || rtattr_parse_nested(tb, TCA_HFSC_MAX, opt))
+	if (opt == NULL || nla_parse_nested(tb, TCA_HFSC_MAX, opt, NULL))
 		return -EINVAL;
 
-	if (tb[TCA_HFSC_RSC-1]) {
-		if (RTA_PAYLOAD(tb[TCA_HFSC_RSC-1]) < sizeof(*rsc))
+	if (tb[TCA_HFSC_RSC]) {
+		if (nla_len(tb[TCA_HFSC_RSC]) < sizeof(*rsc))
 			return -EINVAL;
-		rsc = RTA_DATA(tb[TCA_HFSC_RSC-1]);
+		rsc = nla_data(tb[TCA_HFSC_RSC]);
 		if (rsc->m1 == 0 && rsc->m2 == 0)
 			rsc = NULL;
 	}
 
-	if (tb[TCA_HFSC_FSC-1]) {
-		if (RTA_PAYLOAD(tb[TCA_HFSC_FSC-1]) < sizeof(*fsc))
+	if (tb[TCA_HFSC_FSC]) {
+		if (nla_len(tb[TCA_HFSC_FSC]) < sizeof(*fsc))
 			return -EINVAL;
-		fsc = RTA_DATA(tb[TCA_HFSC_FSC-1]);
+		fsc = nla_data(tb[TCA_HFSC_FSC]);
 		if (fsc->m1 == 0 && fsc->m2 == 0)
 			fsc = NULL;
 	}
 
-	if (tb[TCA_HFSC_USC-1]) {
-		if (RTA_PAYLOAD(tb[TCA_HFSC_USC-1]) < sizeof(*usc))
+	if (tb[TCA_HFSC_USC]) {
+		if (nla_len(tb[TCA_HFSC_USC]) < sizeof(*usc))
 			return -EINVAL;
-		usc = RTA_DATA(tb[TCA_HFSC_USC-1]);
+		usc = nla_data(tb[TCA_HFSC_USC]);
 		if (usc->m1 == 0 && usc->m2 == 0)
 			usc = NULL;
 	}
@@ -1050,10 +1050,10 @@ hfsc_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
 		}
 		sch_tree_unlock(sch);
 
-		if (tca[TCA_RATE-1])
+		if (tca[TCA_RATE])
 			gen_replace_estimator(&cl->bstats, &cl->rate_est,
 					      &sch->dev->queue_lock,
-					      tca[TCA_RATE-1]);
+					      tca[TCA_RATE]);
 		return 0;
 	}
 
@@ -1106,9 +1106,9 @@ hfsc_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
 	cl->cl_pcvtoff = parent->cl_cvtoff;
 	sch_tree_unlock(sch);
 
-	if (tca[TCA_RATE-1])
+	if (tca[TCA_RATE])
 		gen_new_estimator(&cl->bstats, &cl->rate_est,
-				  &sch->dev->queue_lock, tca[TCA_RATE-1]);
+				  &sch->dev->queue_lock, tca[TCA_RATE]);
 	*arg = (unsigned long)cl;
 	return 0;
 }
@@ -1304,11 +1304,11 @@ hfsc_dump_sc(struct sk_buff *skb, int attr, struct internal_sc *sc)
 	tsc.m1 = sm2m(sc->sm1);
 	tsc.d  = dx2d(sc->dx);
 	tsc.m2 = sm2m(sc->sm2);
-	RTA_PUT(skb, attr, sizeof(tsc), &tsc);
+	NLA_PUT(skb, attr, sizeof(tsc), &tsc);
 
 	return skb->len;
 
- rtattr_failure:
+ nla_put_failure:
 	return -1;
 }
 
@@ -1317,19 +1317,19 @@ hfsc_dump_curves(struct sk_buff *skb, struct hfsc_class *cl)
 {
 	if ((cl->cl_flags & HFSC_RSC) &&
 	    (hfsc_dump_sc(skb, TCA_HFSC_RSC, &cl->cl_rsc) < 0))
-		goto rtattr_failure;
+		goto nla_put_failure;
 
 	if ((cl->cl_flags & HFSC_FSC) &&
 	    (hfsc_dump_sc(skb, TCA_HFSC_FSC, &cl->cl_fsc) < 0))
-		goto rtattr_failure;
+		goto nla_put_failure;
 
 	if ((cl->cl_flags & HFSC_USC) &&
 	    (hfsc_dump_sc(skb, TCA_HFSC_USC, &cl->cl_usc) < 0))
-		goto rtattr_failure;
+		goto nla_put_failure;
 
 	return skb->len;
 
- rtattr_failure:
+ nla_put_failure:
 	return -1;
 }
 
@@ -1339,20 +1339,20 @@ hfsc_dump_class(struct Qdisc *sch, unsigned long arg, struct sk_buff *skb,
 {
 	struct hfsc_class *cl = (struct hfsc_class *)arg;
 	unsigned char *b = skb_tail_pointer(skb);
-	struct rtattr *rta = (struct rtattr *)b;
+	struct nlattr *nla = (struct nlattr *)b;
 
 	tcm->tcm_parent = cl->cl_parent ? cl->cl_parent->classid : TC_H_ROOT;
 	tcm->tcm_handle = cl->classid;
 	if (cl->level == 0)
 		tcm->tcm_info = cl->qdisc->handle;
 
-	RTA_PUT(skb, TCA_OPTIONS, 0, NULL);
+	NLA_PUT(skb, TCA_OPTIONS, 0, NULL);
 	if (hfsc_dump_curves(skb, cl) < 0)
-		goto rtattr_failure;
-	rta->rta_len = skb_tail_pointer(skb) - b;
+		goto nla_put_failure;
+	nla->nla_len = skb_tail_pointer(skb) - b;
 	return skb->len;
 
- rtattr_failure:
+ nla_put_failure:
 	nlmsg_trim(skb, b);
 	return -1;
 }
@@ -1423,15 +1423,15 @@ hfsc_schedule_watchdog(struct Qdisc *sch)
 }
 
 static int
-hfsc_init_qdisc(struct Qdisc *sch, struct rtattr *opt)
+hfsc_init_qdisc(struct Qdisc *sch, struct nlattr *opt)
 {
 	struct hfsc_sched *q = qdisc_priv(sch);
 	struct tc_hfsc_qopt *qopt;
 	unsigned int i;
 
-	if (opt == NULL || RTA_PAYLOAD(opt) < sizeof(*qopt))
+	if (opt == NULL || nla_len(opt) < sizeof(*qopt))
 		return -EINVAL;
-	qopt = RTA_DATA(opt);
+	qopt = nla_data(opt);
 
 	q->defcls = qopt->defcls;
 	for (i = 0; i < HFSC_HSIZE; i++)
@@ -1459,14 +1459,14 @@ hfsc_init_qdisc(struct Qdisc *sch, struct rtattr *opt)
 }
 
 static int
-hfsc_change_qdisc(struct Qdisc *sch, struct rtattr *opt)
+hfsc_change_qdisc(struct Qdisc *sch, struct nlattr *opt)
 {
 	struct hfsc_sched *q = qdisc_priv(sch);
 	struct tc_hfsc_qopt *qopt;
 
-	if (opt == NULL || RTA_PAYLOAD(opt) < sizeof(*qopt))
+	if (opt == NULL || nla_len(opt) < sizeof(*qopt))
 		return -EINVAL;
-	qopt = RTA_DATA(opt);
+	qopt = nla_data(opt);
 
 	sch_tree_lock(sch);
 	q->defcls = qopt->defcls;
@@ -1550,10 +1550,10 @@ hfsc_dump_qdisc(struct Qdisc *sch, struct sk_buff *skb)
 	struct tc_hfsc_qopt qopt;
 
 	qopt.defcls = q->defcls;
-	RTA_PUT(skb, TCA_OPTIONS, sizeof(qopt), &qopt);
+	NLA_PUT(skb, TCA_OPTIONS, sizeof(qopt), &qopt);
 	return skb->len;
 
- rtattr_failure:
+ nla_put_failure:
 	nlmsg_trim(skb, b);
 	return -1;
 }
diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c
index 6a2352cd9c2..3b3ff641b6d 100644
--- a/net/sched/sch_htb.c
+++ b/net/sched/sch_htb.c
@@ -992,19 +992,19 @@ static void htb_reset(struct Qdisc *sch)
 		INIT_LIST_HEAD(q->drops + i);
 }
 
-static int htb_init(struct Qdisc *sch, struct rtattr *opt)
+static int htb_init(struct Qdisc *sch, struct nlattr *opt)
 {
 	struct htb_sched *q = qdisc_priv(sch);
-	struct rtattr *tb[TCA_HTB_INIT];
+	struct nlattr *tb[TCA_HTB_INIT + 1];
 	struct tc_htb_glob *gopt;
 	int i;
-	if (!opt || rtattr_parse_nested(tb, TCA_HTB_INIT, opt) ||
-	    tb[TCA_HTB_INIT - 1] == NULL ||
-	    RTA_PAYLOAD(tb[TCA_HTB_INIT - 1]) < sizeof(*gopt)) {
+	if (!opt || nla_parse_nested(tb, TCA_HTB_INIT, opt, NULL) ||
+	    tb[TCA_HTB_INIT] == NULL ||
+	    nla_len(tb[TCA_HTB_INIT]) < sizeof(*gopt)) {
 		printk(KERN_ERR "HTB: hey probably you have bad tc tool ?\n");
 		return -EINVAL;
 	}
-	gopt = RTA_DATA(tb[TCA_HTB_INIT - 1]);
+	gopt = nla_data(tb[TCA_HTB_INIT]);
 	if (gopt->version != HTB_VER >> 16) {
 		printk(KERN_ERR
 		       "HTB: need tc/htb version %d (minor is %d), you have %d\n",
@@ -1036,7 +1036,7 @@ static int htb_dump(struct Qdisc *sch, struct sk_buff *skb)
 {
 	struct htb_sched *q = qdisc_priv(sch);
 	unsigned char *b = skb_tail_pointer(skb);
-	struct rtattr *rta;
+	struct nlattr *nla;
 	struct tc_htb_glob gopt;
 	spin_lock_bh(&sch->dev->queue_lock);
 	gopt.direct_pkts = q->direct_pkts;
@@ -1045,13 +1045,13 @@ static int htb_dump(struct Qdisc *sch, struct sk_buff *skb)
 	gopt.rate2quantum = q->rate2quantum;
 	gopt.defcls = q->defcls;
 	gopt.debug = 0;
-	rta = (struct rtattr *)b;
-	RTA_PUT(skb, TCA_OPTIONS, 0, NULL);
-	RTA_PUT(skb, TCA_HTB_INIT, sizeof(gopt), &gopt);
-	rta->rta_len = skb_tail_pointer(skb) - b;
+	nla = (struct nlattr *)b;
+	NLA_PUT(skb, TCA_OPTIONS, 0, NULL);
+	NLA_PUT(skb, TCA_HTB_INIT, sizeof(gopt), &gopt);
+	nla->nla_len = skb_tail_pointer(skb) - b;
 	spin_unlock_bh(&sch->dev->queue_lock);
 	return skb->len;
-rtattr_failure:
+nla_put_failure:
 	spin_unlock_bh(&sch->dev->queue_lock);
 	nlmsg_trim(skb, skb_tail_pointer(skb));
 	return -1;
@@ -1062,7 +1062,7 @@ static int htb_dump_class(struct Qdisc *sch, unsigned long arg,
 {
 	struct htb_class *cl = (struct htb_class *)arg;
 	unsigned char *b = skb_tail_pointer(skb);
-	struct rtattr *rta;
+	struct nlattr *nla;
 	struct tc_htb_opt opt;
 
 	spin_lock_bh(&sch->dev->queue_lock);
@@ -1071,8 +1071,8 @@ static int htb_dump_class(struct Qdisc *sch, unsigned long arg,
 	if (!cl->level && cl->un.leaf.q)
 		tcm->tcm_info = cl->un.leaf.q->handle;
 
-	rta = (struct rtattr *)b;
-	RTA_PUT(skb, TCA_OPTIONS, 0, NULL);
+	nla = (struct nlattr *)b;
+	NLA_PUT(skb, TCA_OPTIONS, 0, NULL);
 
 	memset(&opt, 0, sizeof(opt));
 
@@ -1083,11 +1083,11 @@ static int htb_dump_class(struct Qdisc *sch, unsigned long arg,
 	opt.quantum = cl->un.leaf.quantum;
 	opt.prio = cl->un.leaf.prio;
 	opt.level = cl->level;
-	RTA_PUT(skb, TCA_HTB_PARMS, sizeof(opt), &opt);
-	rta->rta_len = skb_tail_pointer(skb) - b;
+	NLA_PUT(skb, TCA_HTB_PARMS, sizeof(opt), &opt);
+	nla->nla_len = skb_tail_pointer(skb) - b;
 	spin_unlock_bh(&sch->dev->queue_lock);
 	return skb->len;
-rtattr_failure:
+nla_put_failure:
 	spin_unlock_bh(&sch->dev->queue_lock);
 	nlmsg_trim(skb, b);
 	return -1;
@@ -1290,29 +1290,29 @@ static void htb_put(struct Qdisc *sch, unsigned long arg)
 }
 
 static int htb_change_class(struct Qdisc *sch, u32 classid,
-			    u32 parentid, struct rtattr **tca,
+			    u32 parentid, struct nlattr **tca,
 			    unsigned long *arg)
 {
 	int err = -EINVAL;
 	struct htb_sched *q = qdisc_priv(sch);
 	struct htb_class *cl = (struct htb_class *)*arg, *parent;
-	struct rtattr *opt = tca[TCA_OPTIONS - 1];
+	struct nlattr *opt = tca[TCA_OPTIONS];
 	struct qdisc_rate_table *rtab = NULL, *ctab = NULL;
-	struct rtattr *tb[TCA_HTB_RTAB];
+	struct nlattr *tb[TCA_HTB_RTAB + 1];
 	struct tc_htb_opt *hopt;
 
 	/* extract all subattrs from opt attr */
-	if (!opt || rtattr_parse_nested(tb, TCA_HTB_RTAB, opt) ||
-	    tb[TCA_HTB_PARMS - 1] == NULL ||
-	    RTA_PAYLOAD(tb[TCA_HTB_PARMS - 1]) < sizeof(*hopt))
+	if (!opt || nla_parse_nested(tb, TCA_HTB_RTAB, opt, NULL) ||
+	    tb[TCA_HTB_PARMS] == NULL ||
+	    nla_len(tb[TCA_HTB_PARMS]) < sizeof(*hopt))
 		goto failure;
 
 	parent = parentid == TC_H_ROOT ? NULL : htb_find(parentid, sch);
 
-	hopt = RTA_DATA(tb[TCA_HTB_PARMS - 1]);
+	hopt = nla_data(tb[TCA_HTB_PARMS]);
 
-	rtab = qdisc_get_rtab(&hopt->rate, tb[TCA_HTB_RTAB - 1]);
-	ctab = qdisc_get_rtab(&hopt->ceil, tb[TCA_HTB_CTAB - 1]);
+	rtab = qdisc_get_rtab(&hopt->rate, tb[TCA_HTB_RTAB]);
+	ctab = qdisc_get_rtab(&hopt->ceil, tb[TCA_HTB_CTAB]);
 	if (!rtab || !ctab)
 		goto failure;
 
@@ -1320,12 +1320,12 @@ static int htb_change_class(struct Qdisc *sch, u32 classid,
 		struct Qdisc *new_q;
 		int prio;
 		struct {
-			struct rtattr		rta;
+			struct nlattr		nla;
 			struct gnet_estimator	opt;
 		} est = {
-			.rta = {
-				.rta_len	= RTA_LENGTH(sizeof(est.opt)),
-				.rta_type	= TCA_RATE,
+			.nla = {
+				.nla_len	= nla_attr_size(sizeof(est.opt)),
+				.nla_type	= TCA_RATE,
 			},
 			.opt = {
 				/* 4s interval, 16s averaging constant */
@@ -1350,7 +1350,7 @@ static int htb_change_class(struct Qdisc *sch, u32 classid,
 
 		gen_new_estimator(&cl->bstats, &cl->rate_est,
 				  &sch->dev->queue_lock,
-				  tca[TCA_RATE-1] ? : &est.rta);
+				  tca[TCA_RATE] ? : &est.nla);
 		cl->refcnt = 1;
 		INIT_LIST_HEAD(&cl->sibling);
 		INIT_HLIST_NODE(&cl->hlist);
@@ -1403,10 +1403,10 @@ static int htb_change_class(struct Qdisc *sch, u32 classid,
 		list_add_tail(&cl->sibling,
 			      parent ? &parent->children : &q->root);
 	} else {
-		if (tca[TCA_RATE-1])
+		if (tca[TCA_RATE])
 			gen_replace_estimator(&cl->bstats, &cl->rate_est,
 					      &sch->dev->queue_lock,
-					      tca[TCA_RATE-1]);
+					      tca[TCA_RATE]);
 		sch_tree_lock(sch);
 	}
 
diff --git a/net/sched/sch_ingress.c b/net/sched/sch_ingress.c
index 72525710b66..f6decbb5664 100644
--- a/net/sched/sch_ingress.c
+++ b/net/sched/sch_ingress.c
@@ -57,7 +57,7 @@ static void ingress_put(struct Qdisc *sch, unsigned long cl)
 }
 
 static int ingress_change(struct Qdisc *sch, u32 classid, u32 parent,
-			  struct rtattr **tca, unsigned long *arg)
+			  struct nlattr **tca, unsigned long *arg)
 {
 	return 0;
 }
@@ -156,7 +156,7 @@ static struct nf_hook_ops ing_ops[] __read_mostly = {
 };
 #endif
 
-static int ingress_init(struct Qdisc *sch, struct rtattr *opt)
+static int ingress_init(struct Qdisc *sch, struct nlattr *opt)
 {
 #if !defined(CONFIG_NET_CLS_ACT) && defined(CONFIG_NETFILTER)
 	printk("Ingress scheduler: Classifier actions prefered over netfilter\n");
@@ -184,14 +184,14 @@ static void ingress_destroy(struct Qdisc *sch)
 static int ingress_dump(struct Qdisc *sch, struct sk_buff *skb)
 {
 	unsigned char *b = skb_tail_pointer(skb);
-	struct rtattr *rta;
+	struct nlattr *nla;
 
-	rta = (struct rtattr *)b;
-	RTA_PUT(skb, TCA_OPTIONS, 0, NULL);
-	rta->rta_len = skb_tail_pointer(skb) - b;
+	nla = (struct nlattr *)b;
+	NLA_PUT(skb, TCA_OPTIONS, 0, NULL);
+	nla->nla_len = skb_tail_pointer(skb) - b;
 	return skb->len;
 
-rtattr_failure:
+nla_put_failure:
 	nlmsg_trim(skb, b);
 	return -1;
 }
diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c
index 6c344ade33c..a7b58df4546 100644
--- a/net/sched/sch_netem.c
+++ b/net/sched/sch_netem.c
@@ -313,21 +313,21 @@ static void netem_reset(struct Qdisc *sch)
 /* Pass size change message down to embedded FIFO */
 static int set_fifo_limit(struct Qdisc *q, int limit)
 {
-	struct rtattr *rta;
+	struct nlattr *nla;
 	int ret = -ENOMEM;
 
 	/* Hack to avoid sending change message to non-FIFO */
 	if (strncmp(q->ops->id + 1, "fifo", 4) != 0)
 		return 0;
 
-	rta = kmalloc(RTA_LENGTH(sizeof(struct tc_fifo_qopt)), GFP_KERNEL);
-	if (rta) {
-		rta->rta_type = RTM_NEWQDISC;
-		rta->rta_len = RTA_LENGTH(sizeof(struct tc_fifo_qopt));
-		((struct tc_fifo_qopt *)RTA_DATA(rta))->limit = limit;
+	nla = kmalloc(nla_attr_size(sizeof(struct tc_fifo_qopt)), GFP_KERNEL);
+	if (nla) {
+		nla->nla_type = RTM_NEWQDISC;
+		nla->nla_len = nla_attr_size(sizeof(struct tc_fifo_qopt));
+		((struct tc_fifo_qopt *)nla_data(nla))->limit = limit;
 
-		ret = q->ops->change(q, rta);
-		kfree(rta);
+		ret = q->ops->change(q, nla);
+		kfree(nla);
 	}
 	return ret;
 }
@@ -336,11 +336,11 @@ static int set_fifo_limit(struct Qdisc *q, int limit)
  * Distribution data is a variable size payload containing
  * signed 16 bit values.
  */
-static int get_dist_table(struct Qdisc *sch, const struct rtattr *attr)
+static int get_dist_table(struct Qdisc *sch, const struct nlattr *attr)
 {
 	struct netem_sched_data *q = qdisc_priv(sch);
-	unsigned long n = RTA_PAYLOAD(attr)/sizeof(__s16);
-	const __s16 *data = RTA_DATA(attr);
+	unsigned long n = nla_len(attr)/sizeof(__s16);
+	const __s16 *data = nla_data(attr);
 	struct disttable *d;
 	int i;
 
@@ -363,12 +363,12 @@ static int get_dist_table(struct Qdisc *sch, const struct rtattr *attr)
 	return 0;
 }
 
-static int get_correlation(struct Qdisc *sch, const struct rtattr *attr)
+static int get_correlation(struct Qdisc *sch, const struct nlattr *attr)
 {
 	struct netem_sched_data *q = qdisc_priv(sch);
-	const struct tc_netem_corr *c = RTA_DATA(attr);
+	const struct tc_netem_corr *c = nla_data(attr);
 
-	if (RTA_PAYLOAD(attr) != sizeof(*c))
+	if (nla_len(attr) != sizeof(*c))
 		return -EINVAL;
 
 	init_crandom(&q->delay_cor, c->delay_corr);
@@ -377,12 +377,12 @@ static int get_correlation(struct Qdisc *sch, const struct rtattr *attr)
 	return 0;
 }
 
-static int get_reorder(struct Qdisc *sch, const struct rtattr *attr)
+static int get_reorder(struct Qdisc *sch, const struct nlattr *attr)
 {
 	struct netem_sched_data *q = qdisc_priv(sch);
-	const struct tc_netem_reorder *r = RTA_DATA(attr);
+	const struct tc_netem_reorder *r = nla_data(attr);
 
-	if (RTA_PAYLOAD(attr) != sizeof(*r))
+	if (nla_len(attr) != sizeof(*r))
 		return -EINVAL;
 
 	q->reorder = r->probability;
@@ -390,12 +390,12 @@ static int get_reorder(struct Qdisc *sch, const struct rtattr *attr)
 	return 0;
 }
 
-static int get_corrupt(struct Qdisc *sch, const struct rtattr *attr)
+static int get_corrupt(struct Qdisc *sch, const struct nlattr *attr)
 {
 	struct netem_sched_data *q = qdisc_priv(sch);
-	const struct tc_netem_corrupt *r = RTA_DATA(attr);
+	const struct tc_netem_corrupt *r = nla_data(attr);
 
-	if (RTA_PAYLOAD(attr) != sizeof(*r))
+	if (nla_len(attr) != sizeof(*r))
 		return -EINVAL;
 
 	q->corrupt = r->probability;
@@ -404,16 +404,16 @@ static int get_corrupt(struct Qdisc *sch, const struct rtattr *attr)
 }
 
 /* Parse netlink message to set options */
-static int netem_change(struct Qdisc *sch, struct rtattr *opt)
+static int netem_change(struct Qdisc *sch, struct nlattr *opt)
 {
 	struct netem_sched_data *q = qdisc_priv(sch);
 	struct tc_netem_qopt *qopt;
 	int ret;
 
-	if (opt == NULL || RTA_PAYLOAD(opt) < sizeof(*qopt))
+	if (opt == NULL || nla_len(opt) < sizeof(*qopt))
 		return -EINVAL;
 
-	qopt = RTA_DATA(opt);
+	qopt = nla_data(opt);
 	ret = set_fifo_limit(q->qdisc, qopt->limit);
 	if (ret) {
 		pr_debug("netem: can't set fifo limit\n");
@@ -437,33 +437,33 @@ static int netem_change(struct Qdisc *sch, struct rtattr *opt)
 	/* Handle nested options after initial queue options.
 	 * Should have put all options in nested format but too late now.
 	 */
-	if (RTA_PAYLOAD(opt) > sizeof(*qopt)) {
-		struct rtattr *tb[TCA_NETEM_MAX];
-		if (rtattr_parse(tb, TCA_NETEM_MAX,
-				 RTA_DATA(opt) + sizeof(*qopt),
-				 RTA_PAYLOAD(opt) - sizeof(*qopt)))
+	if (nla_len(opt) > sizeof(*qopt)) {
+		struct nlattr *tb[TCA_NETEM_MAX + 1];
+		if (nla_parse(tb, TCA_NETEM_MAX,
+			      nla_data(opt) + sizeof(*qopt),
+			      nla_len(opt) - sizeof(*qopt), NULL))
 			return -EINVAL;
 
-		if (tb[TCA_NETEM_CORR-1]) {
-			ret = get_correlation(sch, tb[TCA_NETEM_CORR-1]);
+		if (tb[TCA_NETEM_CORR]) {
+			ret = get_correlation(sch, tb[TCA_NETEM_CORR]);
 			if (ret)
 				return ret;
 		}
 
-		if (tb[TCA_NETEM_DELAY_DIST-1]) {
-			ret = get_dist_table(sch, tb[TCA_NETEM_DELAY_DIST-1]);
+		if (tb[TCA_NETEM_DELAY_DIST]) {
+			ret = get_dist_table(sch, tb[TCA_NETEM_DELAY_DIST]);
 			if (ret)
 				return ret;
 		}
 
-		if (tb[TCA_NETEM_REORDER-1]) {
-			ret = get_reorder(sch, tb[TCA_NETEM_REORDER-1]);
+		if (tb[TCA_NETEM_REORDER]) {
+			ret = get_reorder(sch, tb[TCA_NETEM_REORDER]);
 			if (ret)
 				return ret;
 		}
 
-		if (tb[TCA_NETEM_CORRUPT-1]) {
-			ret = get_corrupt(sch, tb[TCA_NETEM_CORRUPT-1]);
+		if (tb[TCA_NETEM_CORRUPT]) {
+			ret = get_corrupt(sch, tb[TCA_NETEM_CORRUPT]);
 			if (ret)
 				return ret;
 		}
@@ -515,13 +515,13 @@ static int tfifo_enqueue(struct sk_buff *nskb, struct Qdisc *sch)
 	return qdisc_reshape_fail(nskb, sch);
 }
 
-static int tfifo_init(struct Qdisc *sch, struct rtattr *opt)
+static int tfifo_init(struct Qdisc *sch, struct nlattr *opt)
 {
 	struct fifo_sched_data *q = qdisc_priv(sch);
 
 	if (opt) {
-		struct tc_fifo_qopt *ctl = RTA_DATA(opt);
-		if (RTA_PAYLOAD(opt) < sizeof(*ctl))
+		struct tc_fifo_qopt *ctl = nla_data(opt);
+		if (nla_len(opt) < sizeof(*ctl))
 			return -EINVAL;
 
 		q->limit = ctl->limit;
@@ -537,10 +537,10 @@ static int tfifo_dump(struct Qdisc *sch, struct sk_buff *skb)
 	struct fifo_sched_data *q = qdisc_priv(sch);
 	struct tc_fifo_qopt opt = { .limit = q->limit };
 
-	RTA_PUT(skb, TCA_OPTIONS, sizeof(opt), &opt);
+	NLA_PUT(skb, TCA_OPTIONS, sizeof(opt), &opt);
 	return skb->len;
 
-rtattr_failure:
+nla_put_failure:
 	return -1;
 }
 
@@ -557,7 +557,7 @@ static struct Qdisc_ops tfifo_qdisc_ops __read_mostly = {
 	.dump		=	tfifo_dump,
 };
 
-static int netem_init(struct Qdisc *sch, struct rtattr *opt)
+static int netem_init(struct Qdisc *sch, struct nlattr *opt)
 {
 	struct netem_sched_data *q = qdisc_priv(sch);
 	int ret;
@@ -595,7 +595,7 @@ static int netem_dump(struct Qdisc *sch, struct sk_buff *skb)
 {
 	const struct netem_sched_data *q = qdisc_priv(sch);
 	unsigned char *b = skb_tail_pointer(skb);
-	struct rtattr *rta = (struct rtattr *) b;
+	struct nlattr *nla = (struct nlattr *) b;
 	struct tc_netem_qopt qopt;
 	struct tc_netem_corr cor;
 	struct tc_netem_reorder reorder;
@@ -607,26 +607,26 @@ static int netem_dump(struct Qdisc *sch, struct sk_buff *skb)
 	qopt.loss = q->loss;
 	qopt.gap = q->gap;
 	qopt.duplicate = q->duplicate;
-	RTA_PUT(skb, TCA_OPTIONS, sizeof(qopt), &qopt);
+	NLA_PUT(skb, TCA_OPTIONS, sizeof(qopt), &qopt);
 
 	cor.delay_corr = q->delay_cor.rho;
 	cor.loss_corr = q->loss_cor.rho;
 	cor.dup_corr = q->dup_cor.rho;
-	RTA_PUT(skb, TCA_NETEM_CORR, sizeof(cor), &cor);
+	NLA_PUT(skb, TCA_NETEM_CORR, sizeof(cor), &cor);
 
 	reorder.probability = q->reorder;
 	reorder.correlation = q->reorder_cor.rho;
-	RTA_PUT(skb, TCA_NETEM_REORDER, sizeof(reorder), &reorder);
+	NLA_PUT(skb, TCA_NETEM_REORDER, sizeof(reorder), &reorder);
 
 	corrupt.probability = q->corrupt;
 	corrupt.correlation = q->corrupt_cor.rho;
-	RTA_PUT(skb, TCA_NETEM_CORRUPT, sizeof(corrupt), &corrupt);
+	NLA_PUT(skb, TCA_NETEM_CORRUPT, sizeof(corrupt), &corrupt);
 
-	rta->rta_len = skb_tail_pointer(skb) - b;
+	nla->nla_len = skb_tail_pointer(skb) - b;
 
 	return skb->len;
 
-rtattr_failure:
+nla_put_failure:
 	nlmsg_trim(skb, b);
 	return -1;
 }
@@ -678,7 +678,7 @@ static void netem_put(struct Qdisc *sch, unsigned long arg)
 }
 
 static int netem_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
-			    struct rtattr **tca, unsigned long *arg)
+			    struct nlattr **tca, unsigned long *arg)
 {
 	return -ENOSYS;
 }
diff --git a/net/sched/sch_prio.c b/net/sched/sch_prio.c
index 2243aaa8d85..a4f932df86e 100644
--- a/net/sched/sch_prio.c
+++ b/net/sched/sch_prio.c
@@ -224,15 +224,15 @@ prio_destroy(struct Qdisc* sch)
 		qdisc_destroy(q->queues[prio]);
 }
 
-static int prio_tune(struct Qdisc *sch, struct rtattr *opt)
+static int prio_tune(struct Qdisc *sch, struct nlattr *opt)
 {
 	struct prio_sched_data *q = qdisc_priv(sch);
 	struct tc_prio_qopt *qopt;
-	struct rtattr *tb[TCA_PRIO_MAX];
+	struct nlattr *tb[TCA_PRIO_MAX + 1];
 	int i;
 
-	if (rtattr_parse_nested_compat(tb, TCA_PRIO_MAX, opt, qopt,
-				       sizeof(*qopt)))
+	if (nla_parse_nested_compat(tb, TCA_PRIO_MAX, opt, NULL, qopt,
+				    sizeof(*qopt)))
 		return -EINVAL;
 	q->bands = qopt->bands;
 	/* If we're multiqueue, make sure the number of incoming bands
@@ -242,7 +242,7 @@ static int prio_tune(struct Qdisc *sch, struct rtattr *opt)
 	 * only one that is enabled for multiqueue, since it's the only one
 	 * that interacts with the underlying device.
 	 */
-	q->mq = RTA_GET_FLAG(tb[TCA_PRIO_MQ - 1]);
+	q->mq = nla_get_flag(tb[TCA_PRIO_MQ]);
 	if (q->mq) {
 		if (sch->parent != TC_H_ROOT)
 			return -EINVAL;
@@ -296,7 +296,7 @@ static int prio_tune(struct Qdisc *sch, struct rtattr *opt)
 	return 0;
 }
 
-static int prio_init(struct Qdisc *sch, struct rtattr *opt)
+static int prio_init(struct Qdisc *sch, struct nlattr *opt)
 {
 	struct prio_sched_data *q = qdisc_priv(sch);
 	int i;
@@ -319,20 +319,24 @@ static int prio_dump(struct Qdisc *sch, struct sk_buff *skb)
 {
 	struct prio_sched_data *q = qdisc_priv(sch);
 	unsigned char *b = skb_tail_pointer(skb);
-	struct rtattr *nest;
+	struct nlattr *nest;
 	struct tc_prio_qopt opt;
 
 	opt.bands = q->bands;
 	memcpy(&opt.priomap, q->prio2band, TC_PRIO_MAX+1);
 
-	nest = RTA_NEST_COMPAT(skb, TCA_OPTIONS, sizeof(opt), &opt);
-	if (q->mq)
-		RTA_PUT_FLAG(skb, TCA_PRIO_MQ);
-	RTA_NEST_COMPAT_END(skb, nest);
+	nest = nla_nest_compat_start(skb, TCA_OPTIONS, sizeof(opt), &opt);
+	if (nest == NULL)
+		goto nla_put_failure;
+	if (q->mq) {
+		if (nla_put_flag(skb, TCA_PRIO_MQ) < 0)
+			goto nla_put_failure;
+	}
+	nla_nest_compat_end(skb, nest);
 
 	return skb->len;
 
-rtattr_failure:
+nla_put_failure:
 	nlmsg_trim(skb, b);
 	return -1;
 }
@@ -392,7 +396,7 @@ static void prio_put(struct Qdisc *q, unsigned long cl)
 	return;
 }
 
-static int prio_change(struct Qdisc *sch, u32 handle, u32 parent, struct rtattr **tca, unsigned long *arg)
+static int prio_change(struct Qdisc *sch, u32 handle, u32 parent, struct nlattr **tca, unsigned long *arg)
 {
 	unsigned long cl = *arg;
 	struct prio_sched_data *q = qdisc_priv(sch);
diff --git a/net/sched/sch_red.c b/net/sched/sch_red.c
index f1e9647f7db..6ce8da5aca0 100644
--- a/net/sched/sch_red.c
+++ b/net/sched/sch_red.c
@@ -177,21 +177,21 @@ static void red_destroy(struct Qdisc *sch)
 static struct Qdisc *red_create_dflt(struct Qdisc *sch, u32 limit)
 {
 	struct Qdisc *q;
-	struct rtattr *rta;
+	struct nlattr *nla;
 	int ret;
 
 	q = qdisc_create_dflt(sch->dev, &bfifo_qdisc_ops,
 			      TC_H_MAKE(sch->handle, 1));
 	if (q) {
-		rta = kmalloc(RTA_LENGTH(sizeof(struct tc_fifo_qopt)),
+		nla = kmalloc(nla_attr_size(sizeof(struct tc_fifo_qopt)),
 			      GFP_KERNEL);
-		if (rta) {
-			rta->rta_type = RTM_NEWQDISC;
-			rta->rta_len = RTA_LENGTH(sizeof(struct tc_fifo_qopt));
-			((struct tc_fifo_qopt *)RTA_DATA(rta))->limit = limit;
+		if (nla) {
+			nla->nla_type = RTM_NEWQDISC;
+			nla->nla_len = nla_attr_size(sizeof(struct tc_fifo_qopt));
+			((struct tc_fifo_qopt *)nla_data(nla))->limit = limit;
 
-			ret = q->ops->change(q, rta);
-			kfree(rta);
+			ret = q->ops->change(q, nla);
+			kfree(nla);
 
 			if (ret == 0)
 				return q;
@@ -201,23 +201,23 @@ static struct Qdisc *red_create_dflt(struct Qdisc *sch, u32 limit)
 	return NULL;
 }
 
-static int red_change(struct Qdisc *sch, struct rtattr *opt)
+static int red_change(struct Qdisc *sch, struct nlattr *opt)
 {
 	struct red_sched_data *q = qdisc_priv(sch);
-	struct rtattr *tb[TCA_RED_MAX];
+	struct nlattr *tb[TCA_RED_MAX + 1];
 	struct tc_red_qopt *ctl;
 	struct Qdisc *child = NULL;
 
-	if (opt == NULL || rtattr_parse_nested(tb, TCA_RED_MAX, opt))
+	if (opt == NULL || nla_parse_nested(tb, TCA_RED_MAX, opt, NULL))
 		return -EINVAL;
 
-	if (tb[TCA_RED_PARMS-1] == NULL ||
-	    RTA_PAYLOAD(tb[TCA_RED_PARMS-1]) < sizeof(*ctl) ||
-	    tb[TCA_RED_STAB-1] == NULL ||
-	    RTA_PAYLOAD(tb[TCA_RED_STAB-1]) < RED_STAB_SIZE)
+	if (tb[TCA_RED_PARMS] == NULL ||
+	    nla_len(tb[TCA_RED_PARMS]) < sizeof(*ctl) ||
+	    tb[TCA_RED_STAB] == NULL ||
+	    nla_len(tb[TCA_RED_STAB]) < RED_STAB_SIZE)
 		return -EINVAL;
 
-	ctl = RTA_DATA(tb[TCA_RED_PARMS-1]);
+	ctl = nla_data(tb[TCA_RED_PARMS]);
 
 	if (ctl->limit > 0) {
 		child = red_create_dflt(sch, ctl->limit);
@@ -235,7 +235,7 @@ static int red_change(struct Qdisc *sch, struct rtattr *opt)
 
 	red_set_parms(&q->parms, ctl->qth_min, ctl->qth_max, ctl->Wlog,
 				 ctl->Plog, ctl->Scell_log,
-				 RTA_DATA(tb[TCA_RED_STAB-1]));
+				 nla_data(tb[TCA_RED_STAB]));
 
 	if (skb_queue_empty(&sch->q))
 		red_end_of_idle_period(&q->parms);
@@ -244,7 +244,7 @@ static int red_change(struct Qdisc *sch, struct rtattr *opt)
 	return 0;
 }
 
-static int red_init(struct Qdisc* sch, struct rtattr *opt)
+static int red_init(struct Qdisc* sch, struct nlattr *opt)
 {
 	struct red_sched_data *q = qdisc_priv(sch);
 
@@ -255,7 +255,7 @@ static int red_init(struct Qdisc* sch, struct rtattr *opt)
 static int red_dump(struct Qdisc *sch, struct sk_buff *skb)
 {
 	struct red_sched_data *q = qdisc_priv(sch);
-	struct rtattr *opts = NULL;
+	struct nlattr *opts = NULL;
 	struct tc_red_qopt opt = {
 		.limit		= q->limit,
 		.flags		= q->flags,
@@ -266,12 +266,14 @@ static int red_dump(struct Qdisc *sch, struct sk_buff *skb)
 		.Scell_log	= q->parms.Scell_log,
 	};
 
-	opts = RTA_NEST(skb, TCA_OPTIONS);
-	RTA_PUT(skb, TCA_RED_PARMS, sizeof(opt), &opt);
-	return RTA_NEST_END(skb, opts);
+	opts = nla_nest_start(skb, TCA_OPTIONS);
+	if (opts == NULL)
+		goto nla_put_failure;
+	NLA_PUT(skb, TCA_RED_PARMS, sizeof(opt), &opt);
+	return nla_nest_end(skb, opts);
 
-rtattr_failure:
-	return RTA_NEST_CANCEL(skb, opts);
+nla_put_failure:
+	return nla_nest_cancel(skb, opts);
 }
 
 static int red_dump_stats(struct Qdisc *sch, struct gnet_dump *d)
@@ -332,7 +334,7 @@ static void red_put(struct Qdisc *sch, unsigned long arg)
 }
 
 static int red_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
-			    struct rtattr **tca, unsigned long *arg)
+			    struct nlattr **tca, unsigned long *arg)
 {
 	return -ENOSYS;
 }
diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c
index 25afe0f1d83..91af539ab6e 100644
--- a/net/sched/sch_sfq.c
+++ b/net/sched/sch_sfq.c
@@ -397,13 +397,13 @@ static void sfq_perturbation(unsigned long arg)
 		mod_timer(&q->perturb_timer, jiffies + q->perturb_period);
 }
 
-static int sfq_change(struct Qdisc *sch, struct rtattr *opt)
+static int sfq_change(struct Qdisc *sch, struct nlattr *opt)
 {
 	struct sfq_sched_data *q = qdisc_priv(sch);
-	struct tc_sfq_qopt *ctl = RTA_DATA(opt);
+	struct tc_sfq_qopt *ctl = nla_data(opt);
 	unsigned int qlen;
 
-	if (opt->rta_len < RTA_LENGTH(sizeof(*ctl)))
+	if (opt->nla_len < nla_attr_size(sizeof(*ctl)))
 		return -EINVAL;
 
 	sch_tree_lock(sch);
@@ -426,7 +426,7 @@ static int sfq_change(struct Qdisc *sch, struct rtattr *opt)
 	return 0;
 }
 
-static int sfq_init(struct Qdisc *sch, struct rtattr *opt)
+static int sfq_init(struct Qdisc *sch, struct nlattr *opt)
 {
 	struct sfq_sched_data *q = qdisc_priv(sch);
 	int i;
@@ -481,11 +481,11 @@ static int sfq_dump(struct Qdisc *sch, struct sk_buff *skb)
 	opt.divisor = SFQ_HASH_DIVISOR;
 	opt.flows = q->limit;
 
-	RTA_PUT(skb, TCA_OPTIONS, sizeof(opt), &opt);
+	NLA_PUT(skb, TCA_OPTIONS, sizeof(opt), &opt);
 
 	return skb->len;
 
-rtattr_failure:
+nla_put_failure:
 	nlmsg_trim(skb, b);
 	return -1;
 }
diff --git a/net/sched/sch_tbf.c b/net/sched/sch_tbf.c
index d88fea9d6b6..6c4ad7e677b 100644
--- a/net/sched/sch_tbf.c
+++ b/net/sched/sch_tbf.c
@@ -245,20 +245,21 @@ static void tbf_reset(struct Qdisc* sch)
 static struct Qdisc *tbf_create_dflt_qdisc(struct Qdisc *sch, u32 limit)
 {
 	struct Qdisc *q;
-	struct rtattr *rta;
+	struct nlattr *nla;
 	int ret;
 
 	q = qdisc_create_dflt(sch->dev, &bfifo_qdisc_ops,
 			      TC_H_MAKE(sch->handle, 1));
 	if (q) {
-		rta = kmalloc(RTA_LENGTH(sizeof(struct tc_fifo_qopt)), GFP_KERNEL);
-		if (rta) {
-			rta->rta_type = RTM_NEWQDISC;
-			rta->rta_len = RTA_LENGTH(sizeof(struct tc_fifo_qopt));
-			((struct tc_fifo_qopt *)RTA_DATA(rta))->limit = limit;
+		nla = kmalloc(nla_attr_size(sizeof(struct tc_fifo_qopt)),
+			      GFP_KERNEL);
+		if (nla) {
+			nla->nla_type = RTM_NEWQDISC;
+			nla->nla_len = nla_attr_size(sizeof(struct tc_fifo_qopt));
+			((struct tc_fifo_qopt *)nla_data(nla))->limit = limit;
 
-			ret = q->ops->change(q, rta);
-			kfree(rta);
+			ret = q->ops->change(q, nla);
+			kfree(nla);
 
 			if (ret == 0)
 				return q;
@@ -269,30 +270,30 @@ static struct Qdisc *tbf_create_dflt_qdisc(struct Qdisc *sch, u32 limit)
 	return NULL;
 }
 
-static int tbf_change(struct Qdisc* sch, struct rtattr *opt)
+static int tbf_change(struct Qdisc* sch, struct nlattr *opt)
 {
 	int err = -EINVAL;
 	struct tbf_sched_data *q = qdisc_priv(sch);
-	struct rtattr *tb[TCA_TBF_PTAB];
+	struct nlattr *tb[TCA_TBF_PTAB + 1];
 	struct tc_tbf_qopt *qopt;
 	struct qdisc_rate_table *rtab = NULL;
 	struct qdisc_rate_table *ptab = NULL;
 	struct Qdisc *child = NULL;
 	int max_size,n;
 
-	if (rtattr_parse_nested(tb, TCA_TBF_PTAB, opt) ||
-	    tb[TCA_TBF_PARMS-1] == NULL ||
-	    RTA_PAYLOAD(tb[TCA_TBF_PARMS-1]) < sizeof(*qopt))
+	if (nla_parse_nested(tb, TCA_TBF_PTAB, opt, NULL) ||
+	    tb[TCA_TBF_PARMS] == NULL ||
+	    nla_len(tb[TCA_TBF_PARMS]) < sizeof(*qopt))
 		goto done;
 
-	qopt = RTA_DATA(tb[TCA_TBF_PARMS-1]);
-	rtab = qdisc_get_rtab(&qopt->rate, tb[TCA_TBF_RTAB-1]);
+	qopt = nla_data(tb[TCA_TBF_PARMS]);
+	rtab = qdisc_get_rtab(&qopt->rate, tb[TCA_TBF_RTAB]);
 	if (rtab == NULL)
 		goto done;
 
 	if (qopt->peakrate.rate) {
 		if (qopt->peakrate.rate > qopt->rate.rate)
-			ptab = qdisc_get_rtab(&qopt->peakrate, tb[TCA_TBF_PTAB-1]);
+			ptab = qdisc_get_rtab(&qopt->peakrate, tb[TCA_TBF_PTAB]);
 		if (ptab == NULL)
 			goto done;
 	}
@@ -339,7 +340,7 @@ done:
 	return err;
 }
 
-static int tbf_init(struct Qdisc* sch, struct rtattr *opt)
+static int tbf_init(struct Qdisc* sch, struct nlattr *opt)
 {
 	struct tbf_sched_data *q = qdisc_priv(sch);
 
@@ -371,11 +372,11 @@ static int tbf_dump(struct Qdisc *sch, struct sk_buff *skb)
 {
 	struct tbf_sched_data *q = qdisc_priv(sch);
 	unsigned char *b = skb_tail_pointer(skb);
-	struct rtattr *rta;
+	struct nlattr *nla;
 	struct tc_tbf_qopt opt;
 
-	rta = (struct rtattr*)b;
-	RTA_PUT(skb, TCA_OPTIONS, 0, NULL);
+	nla = (struct nlattr*)b;
+	NLA_PUT(skb, TCA_OPTIONS, 0, NULL);
 
 	opt.limit = q->limit;
 	opt.rate = q->R_tab->rate;
@@ -385,12 +386,12 @@ static int tbf_dump(struct Qdisc *sch, struct sk_buff *skb)
 		memset(&opt.peakrate, 0, sizeof(opt.peakrate));
 	opt.mtu = q->mtu;
 	opt.buffer = q->buffer;
-	RTA_PUT(skb, TCA_TBF_PARMS, sizeof(opt), &opt);
-	rta->rta_len = skb_tail_pointer(skb) - b;
+	NLA_PUT(skb, TCA_TBF_PARMS, sizeof(opt), &opt);
+	nla->nla_len = skb_tail_pointer(skb) - b;
 
 	return skb->len;
 
-rtattr_failure:
+nla_put_failure:
 	nlmsg_trim(skb, b);
 	return -1;
 }
@@ -442,7 +443,7 @@ static void tbf_put(struct Qdisc *sch, unsigned long arg)
 }
 
 static int tbf_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
-			    struct rtattr **tca, unsigned long *arg)
+			    struct nlattr **tca, unsigned long *arg)
 {
 	return -ENOSYS;
 }
diff --git a/net/sched/sch_teql.c b/net/sched/sch_teql.c
index c0ed06d4a50..1411c7b1fbd 100644
--- a/net/sched/sch_teql.c
+++ b/net/sched/sch_teql.c
@@ -168,7 +168,7 @@ teql_destroy(struct Qdisc* sch)
 	}
 }
 
-static int teql_qdisc_init(struct Qdisc *sch, struct rtattr *opt)
+static int teql_qdisc_init(struct Qdisc *sch, struct nlattr *opt)
 {
 	struct net_device *dev = sch->dev;
 	struct teql_master *m = (struct teql_master*)sch->ops;
-- 
cgit v1.2.3


From add93b610a4e66d36d0cf0b2596c3d3bcfdaee39 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Tue, 22 Jan 2008 22:11:33 -0800
Subject: [NET_SCHED]: Convert classifiers from rtnetlink to new netlink API

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/cls_api.c     | 65 ++++++++++++++++++++++-------------------
 net/sched/cls_basic.c   | 40 ++++++++++++-------------
 net/sched/cls_fw.c      | 54 +++++++++++++++++-----------------
 net/sched/cls_route.c   | 70 ++++++++++++++++++++++----------------------
 net/sched/cls_rsvp.h    | 48 +++++++++++++++---------------
 net/sched/cls_tcindex.c | 66 ++++++++++++++++++++---------------------
 net/sched/cls_u32.c     | 78 ++++++++++++++++++++++++-------------------------
 net/sched/em_meta.c     | 56 +++++++++++++++++------------------
 net/sched/em_text.c     |  9 ++++--
 net/sched/ematch.c      | 74 +++++++++++++++++++++++-----------------------
 10 files changed, 284 insertions(+), 276 deletions(-)

(limited to 'net')

diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
index 9eeb3c6c82f..87be2b2fc29 100644
--- a/net/sched/cls_api.c
+++ b/net/sched/cls_api.c
@@ -38,14 +38,14 @@ static DEFINE_RWLOCK(cls_mod_lock);
 
 /* Find classifier type by string name */
 
-static struct tcf_proto_ops *tcf_proto_lookup_ops(struct rtattr *kind)
+static struct tcf_proto_ops *tcf_proto_lookup_ops(struct nlattr *kind)
 {
 	struct tcf_proto_ops *t = NULL;
 
 	if (kind) {
 		read_lock(&cls_mod_lock);
 		for (t = tcf_proto_base; t; t = t->next) {
-			if (rtattr_strcmp(kind, t->kind) == 0) {
+			if (nla_strcmp(kind, t->kind) == 0) {
 				if (!try_module_get(t->owner))
 					t = NULL;
 				break;
@@ -118,7 +118,7 @@ static inline u32 tcf_auto_prio(struct tcf_proto *tp)
 static int tc_ctl_tfilter(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
 {
 	struct net *net = skb->sk->sk_net;
-	struct rtattr **tca;
+	struct nlattr *tca[TCA_MAX + 1];
 	struct tcmsg *t;
 	u32 protocol;
 	u32 prio;
@@ -138,7 +138,6 @@ static int tc_ctl_tfilter(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
 		return -EINVAL;
 
 replay:
-	tca = arg;
 	t = NLMSG_DATA(n);
 	protocol = TC_H_MIN(t->tcm_info);
 	prio = TC_H_MAJ(t->tcm_info);
@@ -160,6 +159,10 @@ replay:
 	if (dev == NULL)
 		return -ENODEV;
 
+	err = nlmsg_parse(n, sizeof(*t), tca, TCA_MAX, NULL);
+	if (err < 0)
+		return err;
+
 	/* Find qdisc */
 	if (!parent) {
 		q = dev->qdisc_sleeping;
@@ -202,7 +205,7 @@ replay:
 	if (tp == NULL) {
 		/* Proto-tcf does not exist, create new one */
 
-		if (tca[TCA_KIND-1] == NULL || !protocol)
+		if (tca[TCA_KIND] == NULL || !protocol)
 			goto errout;
 
 		err = -ENOENT;
@@ -217,14 +220,14 @@ replay:
 		if (tp == NULL)
 			goto errout;
 		err = -EINVAL;
-		tp_ops = tcf_proto_lookup_ops(tca[TCA_KIND-1]);
+		tp_ops = tcf_proto_lookup_ops(tca[TCA_KIND]);
 		if (tp_ops == NULL) {
 #ifdef CONFIG_KMOD
-			struct rtattr *kind = tca[TCA_KIND-1];
+			struct nlattr *kind = tca[TCA_KIND];
 			char name[IFNAMSIZ];
 
 			if (kind != NULL &&
-			    rtattr_strlcpy(name, kind, IFNAMSIZ) < IFNAMSIZ) {
+			    nla_strlcpy(name, kind, IFNAMSIZ) < IFNAMSIZ) {
 				rtnl_unlock();
 				request_module("cls_%s", name);
 				rtnl_lock();
@@ -263,7 +266,7 @@ replay:
 		*back = tp;
 		qdisc_unlock_tree(dev);
 
-	} else if (tca[TCA_KIND-1] && rtattr_strcmp(tca[TCA_KIND-1], tp->ops->kind))
+	} else if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], tp->ops->kind))
 		goto errout;
 
 	fh = tp->ops->get(tp, t->tcm_handle);
@@ -333,18 +336,18 @@ static int tcf_fill_node(struct sk_buff *skb, struct tcf_proto *tp,
 	tcm->tcm_ifindex = tp->q->dev->ifindex;
 	tcm->tcm_parent = tp->classid;
 	tcm->tcm_info = TC_H_MAKE(tp->prio, tp->protocol);
-	RTA_PUT(skb, TCA_KIND, IFNAMSIZ, tp->ops->kind);
+	NLA_PUT(skb, TCA_KIND, IFNAMSIZ, tp->ops->kind);
 	tcm->tcm_handle = fh;
 	if (RTM_DELTFILTER != event) {
 		tcm->tcm_handle = 0;
 		if (tp->ops->dump && tp->ops->dump(tp, fh, skb, tcm) < 0)
-			goto rtattr_failure;
+			goto nla_put_failure;
 	}
 	nlh->nlmsg_len = skb_tail_pointer(skb) - b;
 	return skb->len;
 
 nlmsg_failure:
-rtattr_failure:
+nla_put_failure:
 	nlmsg_trim(skb, b);
 	return -1;
 }
@@ -476,8 +479,8 @@ void tcf_exts_destroy(struct tcf_proto *tp, struct tcf_exts *exts)
 }
 EXPORT_SYMBOL(tcf_exts_destroy);
 
-int tcf_exts_validate(struct tcf_proto *tp, struct rtattr **tb,
-		  struct rtattr *rate_tlv, struct tcf_exts *exts,
+int tcf_exts_validate(struct tcf_proto *tp, struct nlattr **tb,
+		  struct nlattr *rate_tlv, struct tcf_exts *exts,
 		  struct tcf_ext_map *map)
 {
 	memset(exts, 0, sizeof(*exts));
@@ -487,8 +490,9 @@ int tcf_exts_validate(struct tcf_proto *tp, struct rtattr **tb,
 		int err;
 		struct tc_action *act;
 
-		if (map->police && tb[map->police-1]) {
-			act = tcf_action_init_1(tb[map->police-1], rate_tlv,
+		if (map->police && tb[map->police]) {
+			act = tcf_action_init_1((struct rtattr *)tb[map->police],
+						(struct rtattr *)rate_tlv,
 						"police", TCA_ACT_NOREPLACE,
 						TCA_ACT_BIND, &err);
 			if (act == NULL)
@@ -496,8 +500,9 @@ int tcf_exts_validate(struct tcf_proto *tp, struct rtattr **tb,
 
 			act->type = TCA_OLD_COMPAT;
 			exts->action = act;
-		} else if (map->action && tb[map->action-1]) {
-			act = tcf_action_init(tb[map->action-1], rate_tlv, NULL,
+		} else if (map->action && tb[map->action]) {
+			act = tcf_action_init((struct rtattr *)tb[map->action],
+					      (struct rtattr *)rate_tlv, NULL,
 				TCA_ACT_NOREPLACE, TCA_ACT_BIND, &err);
 			if (act == NULL)
 				return err;
@@ -506,8 +511,8 @@ int tcf_exts_validate(struct tcf_proto *tp, struct rtattr **tb,
 		}
 	}
 #else
-	if ((map->action && tb[map->action-1]) ||
-	    (map->police && tb[map->police-1]))
+	if ((map->action && tb[map->action]) ||
+	    (map->police && tb[map->police]))
 		return -EOPNOTSUPP;
 #endif
 
@@ -541,23 +546,23 @@ int tcf_exts_dump(struct sk_buff *skb, struct tcf_exts *exts,
 		 * to work with both old and new modes of entering
 		 * tc data even if iproute2  was newer - jhs
 		 */
-		struct rtattr *p_rta = (struct rtattr *)skb_tail_pointer(skb);
+		struct nlattr *p_rta = (struct nlattr *)skb_tail_pointer(skb);
 
 		if (exts->action->type != TCA_OLD_COMPAT) {
-			RTA_PUT(skb, map->action, 0, NULL);
+			NLA_PUT(skb, map->action, 0, NULL);
 			if (tcf_action_dump(skb, exts->action, 0, 0) < 0)
-				goto rtattr_failure;
-			p_rta->rta_len = skb_tail_pointer(skb) - (u8 *)p_rta;
+				goto nla_put_failure;
+			p_rta->nla_len = skb_tail_pointer(skb) - (u8 *)p_rta;
 		} else if (map->police) {
-			RTA_PUT(skb, map->police, 0, NULL);
+			NLA_PUT(skb, map->police, 0, NULL);
 			if (tcf_action_dump_old(skb, exts->action, 0, 0) < 0)
-				goto rtattr_failure;
-			p_rta->rta_len = skb_tail_pointer(skb) - (u8 *)p_rta;
+				goto nla_put_failure;
+			p_rta->nla_len = skb_tail_pointer(skb) - (u8 *)p_rta;
 		}
 	}
 #endif
 	return 0;
-rtattr_failure: __attribute__ ((unused))
+nla_put_failure: __attribute__ ((unused))
 	return -1;
 }
 EXPORT_SYMBOL(tcf_exts_dump);
@@ -569,10 +574,10 @@ int tcf_exts_dump_stats(struct sk_buff *skb, struct tcf_exts *exts,
 #ifdef CONFIG_NET_CLS_ACT
 	if (exts->action)
 		if (tcf_action_copy_stats(skb, exts->action, 1) < 0)
-			goto rtattr_failure;
+			goto nla_put_failure;
 #endif
 	return 0;
-rtattr_failure: __attribute__ ((unused))
+nla_put_failure: __attribute__ ((unused))
 	return -1;
 }
 EXPORT_SYMBOL(tcf_exts_dump_stats);
diff --git a/net/sched/cls_basic.c b/net/sched/cls_basic.c
index b31f9f97198..3953da33956 100644
--- a/net/sched/cls_basic.c
+++ b/net/sched/cls_basic.c
@@ -130,27 +130,27 @@ static int basic_delete(struct tcf_proto *tp, unsigned long arg)
 }
 
 static inline int basic_set_parms(struct tcf_proto *tp, struct basic_filter *f,
-				  unsigned long base, struct rtattr **tb,
-				  struct rtattr *est)
+				  unsigned long base, struct nlattr **tb,
+				  struct nlattr *est)
 {
 	int err = -EINVAL;
 	struct tcf_exts e;
 	struct tcf_ematch_tree t;
 
-	if (tb[TCA_BASIC_CLASSID-1])
-		if (RTA_PAYLOAD(tb[TCA_BASIC_CLASSID-1]) < sizeof(u32))
+	if (tb[TCA_BASIC_CLASSID])
+		if (nla_len(tb[TCA_BASIC_CLASSID]) < sizeof(u32))
 			return err;
 
 	err = tcf_exts_validate(tp, tb, est, &e, &basic_ext_map);
 	if (err < 0)
 		return err;
 
-	err = tcf_em_tree_validate(tp, tb[TCA_BASIC_EMATCHES-1], &t);
+	err = tcf_em_tree_validate(tp, tb[TCA_BASIC_EMATCHES], &t);
 	if (err < 0)
 		goto errout;
 
-	if (tb[TCA_BASIC_CLASSID-1]) {
-		f->res.classid = *(u32*)RTA_DATA(tb[TCA_BASIC_CLASSID-1]);
+	if (tb[TCA_BASIC_CLASSID]) {
+		f->res.classid = *(u32*)nla_data(tb[TCA_BASIC_CLASSID]);
 		tcf_bind_filter(tp, &f->res, base);
 	}
 
@@ -164,23 +164,23 @@ errout:
 }
 
 static int basic_change(struct tcf_proto *tp, unsigned long base, u32 handle,
-			struct rtattr **tca, unsigned long *arg)
+			struct nlattr **tca, unsigned long *arg)
 {
 	int err = -EINVAL;
 	struct basic_head *head = (struct basic_head *) tp->root;
-	struct rtattr *tb[TCA_BASIC_MAX];
+	struct nlattr *tb[TCA_BASIC_MAX + 1];
 	struct basic_filter *f = (struct basic_filter *) *arg;
 
-	if (tca[TCA_OPTIONS-1] == NULL)
+	if (tca[TCA_OPTIONS] == NULL)
 		return -EINVAL;
 
-	if (rtattr_parse_nested(tb, TCA_BASIC_MAX, tca[TCA_OPTIONS-1]) < 0)
+	if (nla_parse_nested(tb, TCA_BASIC_MAX, tca[TCA_OPTIONS], NULL) < 0)
 		return -EINVAL;
 
 	if (f != NULL) {
 		if (handle && f->handle != handle)
 			return -EINVAL;
-		return basic_set_parms(tp, f, base, tb, tca[TCA_RATE-1]);
+		return basic_set_parms(tp, f, base, tb, tca[TCA_RATE]);
 	}
 
 	err = -ENOBUFS;
@@ -206,7 +206,7 @@ static int basic_change(struct tcf_proto *tp, unsigned long base, u32 handle,
 		f->handle = head->hgenerator;
 	}
 
-	err = basic_set_parms(tp, f, base, tb, tca[TCA_RATE-1]);
+	err = basic_set_parms(tp, f, base, tb, tca[TCA_RATE]);
 	if (err < 0)
 		goto errout;
 
@@ -246,27 +246,27 @@ static int basic_dump(struct tcf_proto *tp, unsigned long fh,
 {
 	struct basic_filter *f = (struct basic_filter *) fh;
 	unsigned char *b = skb_tail_pointer(skb);
-	struct rtattr *rta;
+	struct nlattr *nla;
 
 	if (f == NULL)
 		return skb->len;
 
 	t->tcm_handle = f->handle;
 
-	rta = (struct rtattr *) b;
-	RTA_PUT(skb, TCA_OPTIONS, 0, NULL);
+	nla = (struct nlattr *) b;
+	NLA_PUT(skb, TCA_OPTIONS, 0, NULL);
 
 	if (f->res.classid)
-		RTA_PUT(skb, TCA_BASIC_CLASSID, sizeof(u32), &f->res.classid);
+		NLA_PUT(skb, TCA_BASIC_CLASSID, sizeof(u32), &f->res.classid);
 
 	if (tcf_exts_dump(skb, &f->exts, &basic_ext_map) < 0 ||
 	    tcf_em_tree_dump(skb, &f->ematches, TCA_BASIC_EMATCHES) < 0)
-		goto rtattr_failure;
+		goto nla_put_failure;
 
-	rta->rta_len = skb_tail_pointer(skb) - b;
+	nla->nla_len = skb_tail_pointer(skb) - b;
 	return skb->len;
 
-rtattr_failure:
+nla_put_failure:
 	nlmsg_trim(skb, b);
 	return -1;
 }
diff --git a/net/sched/cls_fw.c b/net/sched/cls_fw.c
index b45038770e7..db6e90a3784 100644
--- a/net/sched/cls_fw.c
+++ b/net/sched/cls_fw.c
@@ -188,37 +188,37 @@ out:
 
 static int
 fw_change_attrs(struct tcf_proto *tp, struct fw_filter *f,
-	struct rtattr **tb, struct rtattr **tca, unsigned long base)
+	struct nlattr **tb, struct nlattr **tca, unsigned long base)
 {
 	struct fw_head *head = (struct fw_head *)tp->root;
 	struct tcf_exts e;
 	u32 mask;
 	int err;
 
-	err = tcf_exts_validate(tp, tb, tca[TCA_RATE-1], &e, &fw_ext_map);
+	err = tcf_exts_validate(tp, tb, tca[TCA_RATE], &e, &fw_ext_map);
 	if (err < 0)
 		return err;
 
 	err = -EINVAL;
-	if (tb[TCA_FW_CLASSID-1]) {
-		if (RTA_PAYLOAD(tb[TCA_FW_CLASSID-1]) != sizeof(u32))
+	if (tb[TCA_FW_CLASSID]) {
+		if (nla_len(tb[TCA_FW_CLASSID]) != sizeof(u32))
 			goto errout;
-		f->res.classid = *(u32*)RTA_DATA(tb[TCA_FW_CLASSID-1]);
+		f->res.classid = *(u32*)nla_data(tb[TCA_FW_CLASSID]);
 		tcf_bind_filter(tp, &f->res, base);
 	}
 
 #ifdef CONFIG_NET_CLS_IND
-	if (tb[TCA_FW_INDEV-1]) {
-		err = tcf_change_indev(tp, f->indev, tb[TCA_FW_INDEV-1]);
+	if (tb[TCA_FW_INDEV]) {
+		err = tcf_change_indev(tp, f->indev, tb[TCA_FW_INDEV]);
 		if (err < 0)
 			goto errout;
 	}
 #endif /* CONFIG_NET_CLS_IND */
 
-	if (tb[TCA_FW_MASK-1]) {
-		if (RTA_PAYLOAD(tb[TCA_FW_MASK-1]) != sizeof(u32))
+	if (tb[TCA_FW_MASK]) {
+		if (nla_len(tb[TCA_FW_MASK]) != sizeof(u32))
 			goto errout;
-		mask = *(u32*)RTA_DATA(tb[TCA_FW_MASK-1]);
+		mask = *(u32*)nla_data(tb[TCA_FW_MASK]);
 		if (mask != head->mask)
 			goto errout;
 	} else if (head->mask != 0xFFFFFFFF)
@@ -234,19 +234,19 @@ errout:
 
 static int fw_change(struct tcf_proto *tp, unsigned long base,
 		     u32 handle,
-		     struct rtattr **tca,
+		     struct nlattr **tca,
 		     unsigned long *arg)
 {
 	struct fw_head *head = (struct fw_head*)tp->root;
 	struct fw_filter *f = (struct fw_filter *) *arg;
-	struct rtattr *opt = tca[TCA_OPTIONS-1];
-	struct rtattr *tb[TCA_FW_MAX];
+	struct nlattr *opt = tca[TCA_OPTIONS];
+	struct nlattr *tb[TCA_FW_MAX + 1];
 	int err;
 
 	if (!opt)
 		return handle ? -EINVAL : 0;
 
-	if (rtattr_parse_nested(tb, TCA_FW_MAX, opt) < 0)
+	if (nla_parse_nested(tb, TCA_FW_MAX, opt, NULL) < 0)
 		return -EINVAL;
 
 	if (f != NULL) {
@@ -260,10 +260,10 @@ static int fw_change(struct tcf_proto *tp, unsigned long base,
 
 	if (head == NULL) {
 		u32 mask = 0xFFFFFFFF;
-		if (tb[TCA_FW_MASK-1]) {
-			if (RTA_PAYLOAD(tb[TCA_FW_MASK-1]) != sizeof(u32))
+		if (tb[TCA_FW_MASK]) {
+			if (nla_len(tb[TCA_FW_MASK]) != sizeof(u32))
 				return -EINVAL;
-			mask = *(u32*)RTA_DATA(tb[TCA_FW_MASK-1]);
+			mask = *(u32*)nla_data(tb[TCA_FW_MASK]);
 		}
 
 		head = kzalloc(sizeof(struct fw_head), GFP_KERNEL);
@@ -333,7 +333,7 @@ static int fw_dump(struct tcf_proto *tp, unsigned long fh,
 	struct fw_head *head = (struct fw_head *)tp->root;
 	struct fw_filter *f = (struct fw_filter*)fh;
 	unsigned char *b = skb_tail_pointer(skb);
-	struct rtattr *rta;
+	struct nlattr *nla;
 
 	if (f == NULL)
 		return skb->len;
@@ -343,29 +343,29 @@ static int fw_dump(struct tcf_proto *tp, unsigned long fh,
 	if (!f->res.classid && !tcf_exts_is_available(&f->exts))
 		return skb->len;
 
-	rta = (struct rtattr*)b;
-	RTA_PUT(skb, TCA_OPTIONS, 0, NULL);
+	nla = (struct nlattr*)b;
+	NLA_PUT(skb, TCA_OPTIONS, 0, NULL);
 
 	if (f->res.classid)
-		RTA_PUT(skb, TCA_FW_CLASSID, 4, &f->res.classid);
+		NLA_PUT(skb, TCA_FW_CLASSID, 4, &f->res.classid);
 #ifdef CONFIG_NET_CLS_IND
 	if (strlen(f->indev))
-		RTA_PUT(skb, TCA_FW_INDEV, IFNAMSIZ, f->indev);
+		NLA_PUT(skb, TCA_FW_INDEV, IFNAMSIZ, f->indev);
 #endif /* CONFIG_NET_CLS_IND */
 	if (head->mask != 0xFFFFFFFF)
-		RTA_PUT(skb, TCA_FW_MASK, 4, &head->mask);
+		NLA_PUT(skb, TCA_FW_MASK, 4, &head->mask);
 
 	if (tcf_exts_dump(skb, &f->exts, &fw_ext_map) < 0)
-		goto rtattr_failure;
+		goto nla_put_failure;
 
-	rta->rta_len = skb_tail_pointer(skb) - b;
+	nla->nla_len = skb_tail_pointer(skb) - b;
 
 	if (tcf_exts_dump_stats(skb, &f->exts, &fw_ext_map) < 0)
-		goto rtattr_failure;
+		goto nla_put_failure;
 
 	return skb->len;
 
-rtattr_failure:
+nla_put_failure:
 	nlmsg_trim(skb, b);
 	return -1;
 }
diff --git a/net/sched/cls_route.c b/net/sched/cls_route.c
index e70edd0f7bc..b1aae84cbad 100644
--- a/net/sched/cls_route.c
+++ b/net/sched/cls_route.c
@@ -325,7 +325,7 @@ static int route4_delete(struct tcf_proto *tp, unsigned long arg)
 
 static int route4_set_parms(struct tcf_proto *tp, unsigned long base,
 	struct route4_filter *f, u32 handle, struct route4_head *head,
-	struct rtattr **tb, struct rtattr *est, int new)
+	struct nlattr **tb, struct nlattr *est, int new)
 {
 	int err;
 	u32 id = 0, to = 0, nhandle = 0x8000;
@@ -339,34 +339,34 @@ static int route4_set_parms(struct tcf_proto *tp, unsigned long base,
 		return err;
 
 	err = -EINVAL;
-	if (tb[TCA_ROUTE4_CLASSID-1])
-		if (RTA_PAYLOAD(tb[TCA_ROUTE4_CLASSID-1]) < sizeof(u32))
+	if (tb[TCA_ROUTE4_CLASSID])
+		if (nla_len(tb[TCA_ROUTE4_CLASSID]) < sizeof(u32))
 			goto errout;
 
-	if (tb[TCA_ROUTE4_TO-1]) {
+	if (tb[TCA_ROUTE4_TO]) {
 		if (new && handle & 0x8000)
 			goto errout;
-		if (RTA_PAYLOAD(tb[TCA_ROUTE4_TO-1]) < sizeof(u32))
+		if (nla_len(tb[TCA_ROUTE4_TO]) < sizeof(u32))
 			goto errout;
-		to = *(u32*)RTA_DATA(tb[TCA_ROUTE4_TO-1]);
+		to = *(u32*)nla_data(tb[TCA_ROUTE4_TO]);
 		if (to > 0xFF)
 			goto errout;
 		nhandle = to;
 	}
 
-	if (tb[TCA_ROUTE4_FROM-1]) {
-		if (tb[TCA_ROUTE4_IIF-1])
+	if (tb[TCA_ROUTE4_FROM]) {
+		if (tb[TCA_ROUTE4_IIF])
 			goto errout;
-		if (RTA_PAYLOAD(tb[TCA_ROUTE4_FROM-1]) < sizeof(u32))
+		if (nla_len(tb[TCA_ROUTE4_FROM]) < sizeof(u32))
 			goto errout;
-		id = *(u32*)RTA_DATA(tb[TCA_ROUTE4_FROM-1]);
+		id = *(u32*)nla_data(tb[TCA_ROUTE4_FROM]);
 		if (id > 0xFF)
 			goto errout;
 		nhandle |= id << 16;
-	} else if (tb[TCA_ROUTE4_IIF-1]) {
-		if (RTA_PAYLOAD(tb[TCA_ROUTE4_IIF-1]) < sizeof(u32))
+	} else if (tb[TCA_ROUTE4_IIF]) {
+		if (nla_len(tb[TCA_ROUTE4_IIF]) < sizeof(u32))
 			goto errout;
-		id = *(u32*)RTA_DATA(tb[TCA_ROUTE4_IIF-1]);
+		id = *(u32*)nla_data(tb[TCA_ROUTE4_IIF]);
 		if (id > 0x7FFF)
 			goto errout;
 		nhandle |= (id | 0x8000) << 16;
@@ -398,20 +398,20 @@ static int route4_set_parms(struct tcf_proto *tp, unsigned long base,
 	}
 
 	tcf_tree_lock(tp);
-	if (tb[TCA_ROUTE4_TO-1])
+	if (tb[TCA_ROUTE4_TO])
 		f->id = to;
 
-	if (tb[TCA_ROUTE4_FROM-1])
+	if (tb[TCA_ROUTE4_FROM])
 		f->id = to | id<<16;
-	else if (tb[TCA_ROUTE4_IIF-1])
+	else if (tb[TCA_ROUTE4_IIF])
 		f->iif = id;
 
 	f->handle = nhandle;
 	f->bkt = b;
 	tcf_tree_unlock(tp);
 
-	if (tb[TCA_ROUTE4_CLASSID-1]) {
-		f->res.classid = *(u32*)RTA_DATA(tb[TCA_ROUTE4_CLASSID-1]);
+	if (tb[TCA_ROUTE4_CLASSID]) {
+		f->res.classid = *(u32*)nla_data(tb[TCA_ROUTE4_CLASSID]);
 		tcf_bind_filter(tp, &f->res, base);
 	}
 
@@ -425,14 +425,14 @@ errout:
 
 static int route4_change(struct tcf_proto *tp, unsigned long base,
 		       u32 handle,
-		       struct rtattr **tca,
+		       struct nlattr **tca,
 		       unsigned long *arg)
 {
 	struct route4_head *head = tp->root;
 	struct route4_filter *f, *f1, **fp;
 	struct route4_bucket *b;
-	struct rtattr *opt = tca[TCA_OPTIONS-1];
-	struct rtattr *tb[TCA_ROUTE4_MAX];
+	struct nlattr *opt = tca[TCA_OPTIONS];
+	struct nlattr *tb[TCA_ROUTE4_MAX + 1];
 	unsigned int h, th;
 	u32 old_handle = 0;
 	int err;
@@ -440,7 +440,7 @@ static int route4_change(struct tcf_proto *tp, unsigned long base,
 	if (opt == NULL)
 		return handle ? -EINVAL : 0;
 
-	if (rtattr_parse_nested(tb, TCA_ROUTE4_MAX, opt) < 0)
+	if (nla_parse_nested(tb, TCA_ROUTE4_MAX, opt, NULL) < 0)
 		return -EINVAL;
 
 	if ((f = (struct route4_filter*)*arg) != NULL) {
@@ -451,7 +451,7 @@ static int route4_change(struct tcf_proto *tp, unsigned long base,
 			old_handle = f->handle;
 
 		err = route4_set_parms(tp, base, f, handle, head, tb,
-			tca[TCA_RATE-1], 0);
+			tca[TCA_RATE], 0);
 		if (err < 0)
 			return err;
 
@@ -474,7 +474,7 @@ static int route4_change(struct tcf_proto *tp, unsigned long base,
 		goto errout;
 
 	err = route4_set_parms(tp, base, f, handle, head, tb,
-		tca[TCA_RATE-1], 1);
+		tca[TCA_RATE], 1);
 	if (err < 0)
 		goto errout;
 
@@ -550,7 +550,7 @@ static int route4_dump(struct tcf_proto *tp, unsigned long fh,
 {
 	struct route4_filter *f = (struct route4_filter*)fh;
 	unsigned char *b = skb_tail_pointer(skb);
-	struct rtattr *rta;
+	struct nlattr *nla;
 	u32 id;
 
 	if (f == NULL)
@@ -558,34 +558,34 @@ static int route4_dump(struct tcf_proto *tp, unsigned long fh,
 
 	t->tcm_handle = f->handle;
 
-	rta = (struct rtattr*)b;
-	RTA_PUT(skb, TCA_OPTIONS, 0, NULL);
+	nla = (struct nlattr*)b;
+	NLA_PUT(skb, TCA_OPTIONS, 0, NULL);
 
 	if (!(f->handle&0x8000)) {
 		id = f->id&0xFF;
-		RTA_PUT(skb, TCA_ROUTE4_TO, sizeof(id), &id);
+		NLA_PUT(skb, TCA_ROUTE4_TO, sizeof(id), &id);
 	}
 	if (f->handle&0x80000000) {
 		if ((f->handle>>16) != 0xFFFF)
-			RTA_PUT(skb, TCA_ROUTE4_IIF, sizeof(f->iif), &f->iif);
+			NLA_PUT(skb, TCA_ROUTE4_IIF, sizeof(f->iif), &f->iif);
 	} else {
 		id = f->id>>16;
-		RTA_PUT(skb, TCA_ROUTE4_FROM, sizeof(id), &id);
+		NLA_PUT(skb, TCA_ROUTE4_FROM, sizeof(id), &id);
 	}
 	if (f->res.classid)
-		RTA_PUT(skb, TCA_ROUTE4_CLASSID, 4, &f->res.classid);
+		NLA_PUT(skb, TCA_ROUTE4_CLASSID, 4, &f->res.classid);
 
 	if (tcf_exts_dump(skb, &f->exts, &route_ext_map) < 0)
-		goto rtattr_failure;
+		goto nla_put_failure;
 
-	rta->rta_len = skb_tail_pointer(skb) - b;
+	nla->nla_len = skb_tail_pointer(skb) - b;
 
 	if (tcf_exts_dump_stats(skb, &f->exts, &route_ext_map) < 0)
-		goto rtattr_failure;
+		goto nla_put_failure;
 
 	return skb->len;
 
-rtattr_failure:
+nla_put_failure:
 	nlmsg_trim(skb, b);
 	return -1;
 }
diff --git a/net/sched/cls_rsvp.h b/net/sched/cls_rsvp.h
index 22f9ede70e8..2364c79d083 100644
--- a/net/sched/cls_rsvp.h
+++ b/net/sched/cls_rsvp.h
@@ -399,15 +399,15 @@ static u32 gen_tunnel(struct rsvp_head *data)
 
 static int rsvp_change(struct tcf_proto *tp, unsigned long base,
 		       u32 handle,
-		       struct rtattr **tca,
+		       struct nlattr **tca,
 		       unsigned long *arg)
 {
 	struct rsvp_head *data = tp->root;
 	struct rsvp_filter *f, **fp;
 	struct rsvp_session *s, **sp;
 	struct tc_rsvp_pinfo *pinfo = NULL;
-	struct rtattr *opt = tca[TCA_OPTIONS-1];
-	struct rtattr *tb[TCA_RSVP_MAX];
+	struct nlattr *opt = tca[TCA_OPTIONS-1];
+	struct nlattr *tb[TCA_RSVP_MAX + 1];
 	struct tcf_exts e;
 	unsigned h1, h2;
 	__be32 *dst;
@@ -416,7 +416,7 @@ static int rsvp_change(struct tcf_proto *tp, unsigned long base,
 	if (opt == NULL)
 		return handle ? -EINVAL : 0;
 
-	if (rtattr_parse_nested(tb, TCA_RSVP_MAX, opt) < 0)
+	if (nla_parse_nested(tb, TCA_RSVP_MAX, opt, NULL) < 0)
 		return -EINVAL;
 
 	err = tcf_exts_validate(tp, tb, tca[TCA_RATE-1], &e, &rsvp_ext_map);
@@ -429,7 +429,7 @@ static int rsvp_change(struct tcf_proto *tp, unsigned long base,
 		if (f->handle != handle && handle)
 			goto errout2;
 		if (tb[TCA_RSVP_CLASSID-1]) {
-			f->res.classid = *(u32*)RTA_DATA(tb[TCA_RSVP_CLASSID-1]);
+			f->res.classid = *(u32*)nla_data(tb[TCA_RSVP_CLASSID-1]);
 			tcf_bind_filter(tp, &f->res, base);
 		}
 
@@ -452,30 +452,30 @@ static int rsvp_change(struct tcf_proto *tp, unsigned long base,
 	h2 = 16;
 	if (tb[TCA_RSVP_SRC-1]) {
 		err = -EINVAL;
-		if (RTA_PAYLOAD(tb[TCA_RSVP_SRC-1]) != sizeof(f->src))
+		if (nla_len(tb[TCA_RSVP_SRC-1]) != sizeof(f->src))
 			goto errout;
-		memcpy(f->src, RTA_DATA(tb[TCA_RSVP_SRC-1]), sizeof(f->src));
+		memcpy(f->src, nla_data(tb[TCA_RSVP_SRC-1]), sizeof(f->src));
 		h2 = hash_src(f->src);
 	}
 	if (tb[TCA_RSVP_PINFO-1]) {
 		err = -EINVAL;
-		if (RTA_PAYLOAD(tb[TCA_RSVP_PINFO-1]) < sizeof(struct tc_rsvp_pinfo))
+		if (nla_len(tb[TCA_RSVP_PINFO-1]) < sizeof(struct tc_rsvp_pinfo))
 			goto errout;
-		pinfo = RTA_DATA(tb[TCA_RSVP_PINFO-1]);
+		pinfo = nla_data(tb[TCA_RSVP_PINFO-1]);
 		f->spi = pinfo->spi;
 		f->tunnelhdr = pinfo->tunnelhdr;
 	}
 	if (tb[TCA_RSVP_CLASSID-1]) {
 		err = -EINVAL;
-		if (RTA_PAYLOAD(tb[TCA_RSVP_CLASSID-1]) != 4)
+		if (nla_len(tb[TCA_RSVP_CLASSID-1]) != 4)
 			goto errout;
-		f->res.classid = *(u32*)RTA_DATA(tb[TCA_RSVP_CLASSID-1]);
+		f->res.classid = *(u32*)nla_data(tb[TCA_RSVP_CLASSID-1]);
 	}
 
 	err = -EINVAL;
-	if (RTA_PAYLOAD(tb[TCA_RSVP_DST-1]) != sizeof(f->src))
+	if (nla_len(tb[TCA_RSVP_DST-1]) != sizeof(f->src))
 		goto errout;
-	dst = RTA_DATA(tb[TCA_RSVP_DST-1]);
+	dst = nla_data(tb[TCA_RSVP_DST-1]);
 	h1 = hash_dst(dst, pinfo ? pinfo->protocol : 0, pinfo ? pinfo->tunnelid : 0);
 
 	err = -ENOMEM;
@@ -594,7 +594,7 @@ static int rsvp_dump(struct tcf_proto *tp, unsigned long fh,
 	struct rsvp_filter *f = (struct rsvp_filter*)fh;
 	struct rsvp_session *s;
 	unsigned char *b = skb_tail_pointer(skb);
-	struct rtattr *rta;
+	struct nlattr *nla;
 	struct tc_rsvp_pinfo pinfo;
 
 	if (f == NULL)
@@ -604,32 +604,32 @@ static int rsvp_dump(struct tcf_proto *tp, unsigned long fh,
 	t->tcm_handle = f->handle;
 
 
-	rta = (struct rtattr*)b;
-	RTA_PUT(skb, TCA_OPTIONS, 0, NULL);
+	nla = (struct nlattr*)b;
+	NLA_PUT(skb, TCA_OPTIONS, 0, NULL);
 
-	RTA_PUT(skb, TCA_RSVP_DST, sizeof(s->dst), &s->dst);
+	NLA_PUT(skb, TCA_RSVP_DST, sizeof(s->dst), &s->dst);
 	pinfo.dpi = s->dpi;
 	pinfo.spi = f->spi;
 	pinfo.protocol = s->protocol;
 	pinfo.tunnelid = s->tunnelid;
 	pinfo.tunnelhdr = f->tunnelhdr;
 	pinfo.pad = 0;
-	RTA_PUT(skb, TCA_RSVP_PINFO, sizeof(pinfo), &pinfo);
+	NLA_PUT(skb, TCA_RSVP_PINFO, sizeof(pinfo), &pinfo);
 	if (f->res.classid)
-		RTA_PUT(skb, TCA_RSVP_CLASSID, 4, &f->res.classid);
+		NLA_PUT(skb, TCA_RSVP_CLASSID, 4, &f->res.classid);
 	if (((f->handle>>8)&0xFF) != 16)
-		RTA_PUT(skb, TCA_RSVP_SRC, sizeof(f->src), f->src);
+		NLA_PUT(skb, TCA_RSVP_SRC, sizeof(f->src), f->src);
 
 	if (tcf_exts_dump(skb, &f->exts, &rsvp_ext_map) < 0)
-		goto rtattr_failure;
+		goto nla_put_failure;
 
-	rta->rta_len = skb_tail_pointer(skb) - b;
+	nla->nla_len = skb_tail_pointer(skb) - b;
 
 	if (tcf_exts_dump_stats(skb, &f->exts, &rsvp_ext_map) < 0)
-		goto rtattr_failure;
+		goto nla_put_failure;
 	return skb->len;
 
-rtattr_failure:
+nla_put_failure:
 	nlmsg_trim(skb, b);
 	return -1;
 }
diff --git a/net/sched/cls_tcindex.c b/net/sched/cls_tcindex.c
index e36977b17fa..ed8023944fe 100644
--- a/net/sched/cls_tcindex.c
+++ b/net/sched/cls_tcindex.c
@@ -196,7 +196,7 @@ valid_perfect_hash(struct tcindex_data *p)
 static int
 tcindex_set_parms(struct tcf_proto *tp, unsigned long base, u32 handle,
 		  struct tcindex_data *p, struct tcindex_filter_result *r,
-		  struct rtattr **tb, struct rtattr *est)
+		  struct nlattr **tb, struct nlattr *est)
 {
 	int err, balloc = 0;
 	struct tcindex_filter_result new_filter_result, *old_r = r;
@@ -218,22 +218,22 @@ tcindex_set_parms(struct tcf_proto *tp, unsigned long base, u32 handle,
 		memset(&cr, 0, sizeof(cr));
 
 	err = -EINVAL;
-	if (tb[TCA_TCINDEX_HASH-1]) {
-		if (RTA_PAYLOAD(tb[TCA_TCINDEX_HASH-1]) < sizeof(u32))
+	if (tb[TCA_TCINDEX_HASH]) {
+		if (nla_len(tb[TCA_TCINDEX_HASH]) < sizeof(u32))
 			goto errout;
-		cp.hash = *(u32 *) RTA_DATA(tb[TCA_TCINDEX_HASH-1]);
+		cp.hash = *(u32 *) nla_data(tb[TCA_TCINDEX_HASH]);
 	}
 
-	if (tb[TCA_TCINDEX_MASK-1]) {
-		if (RTA_PAYLOAD(tb[TCA_TCINDEX_MASK-1]) < sizeof(u16))
+	if (tb[TCA_TCINDEX_MASK]) {
+		if (nla_len(tb[TCA_TCINDEX_MASK]) < sizeof(u16))
 			goto errout;
-		cp.mask = *(u16 *) RTA_DATA(tb[TCA_TCINDEX_MASK-1]);
+		cp.mask = *(u16 *) nla_data(tb[TCA_TCINDEX_MASK]);
 	}
 
-	if (tb[TCA_TCINDEX_SHIFT-1]) {
-		if (RTA_PAYLOAD(tb[TCA_TCINDEX_SHIFT-1]) < sizeof(int))
+	if (tb[TCA_TCINDEX_SHIFT]) {
+		if (nla_len(tb[TCA_TCINDEX_SHIFT]) < sizeof(int))
 			goto errout;
-		cp.shift = *(int *) RTA_DATA(tb[TCA_TCINDEX_SHIFT-1]);
+		cp.shift = *(int *) nla_data(tb[TCA_TCINDEX_SHIFT]);
 	}
 
 	err = -EBUSY;
@@ -248,11 +248,11 @@ tcindex_set_parms(struct tcf_proto *tp, unsigned long base, u32 handle,
 		goto errout;
 
 	err = -EINVAL;
-	if (tb[TCA_TCINDEX_FALL_THROUGH-1]) {
-		if (RTA_PAYLOAD(tb[TCA_TCINDEX_FALL_THROUGH-1]) < sizeof(u32))
+	if (tb[TCA_TCINDEX_FALL_THROUGH]) {
+		if (nla_len(tb[TCA_TCINDEX_FALL_THROUGH]) < sizeof(u32))
 			goto errout;
 		cp.fall_through =
-			*(u32 *) RTA_DATA(tb[TCA_TCINDEX_FALL_THROUGH-1]);
+			*(u32 *) nla_data(tb[TCA_TCINDEX_FALL_THROUGH]);
 	}
 
 	if (!cp.hash) {
@@ -304,8 +304,8 @@ tcindex_set_parms(struct tcf_proto *tp, unsigned long base, u32 handle,
 			goto errout_alloc;
 	}
 
-	if (tb[TCA_TCINDEX_CLASSID-1]) {
-		cr.res.classid = *(u32 *) RTA_DATA(tb[TCA_TCINDEX_CLASSID-1]);
+	if (tb[TCA_TCINDEX_CLASSID]) {
+		cr.res.classid = *(u32 *) nla_data(tb[TCA_TCINDEX_CLASSID]);
 		tcf_bind_filter(tp, &cr.res, base);
 	}
 
@@ -344,10 +344,10 @@ errout:
 
 static int
 tcindex_change(struct tcf_proto *tp, unsigned long base, u32 handle,
-	       struct rtattr **tca, unsigned long *arg)
+	       struct nlattr **tca, unsigned long *arg)
 {
-	struct rtattr *opt = tca[TCA_OPTIONS-1];
-	struct rtattr *tb[TCA_TCINDEX_MAX];
+	struct nlattr *opt = tca[TCA_OPTIONS];
+	struct nlattr *tb[TCA_TCINDEX_MAX + 1];
 	struct tcindex_data *p = PRIV(tp);
 	struct tcindex_filter_result *r = (struct tcindex_filter_result *) *arg;
 
@@ -358,10 +358,10 @@ tcindex_change(struct tcf_proto *tp, unsigned long base, u32 handle,
 	if (!opt)
 		return 0;
 
-	if (rtattr_parse_nested(tb, TCA_TCINDEX_MAX, opt) < 0)
+	if (nla_parse_nested(tb, TCA_TCINDEX_MAX, opt, NULL) < 0)
 		return -EINVAL;
 
-	return tcindex_set_parms(tp, base, handle, p, r, tb, tca[TCA_RATE-1]);
+	return tcindex_set_parms(tp, base, handle, p, r, tb, tca[TCA_RATE]);
 }
 
 
@@ -435,21 +435,21 @@ static int tcindex_dump(struct tcf_proto *tp, unsigned long fh,
 	struct tcindex_data *p = PRIV(tp);
 	struct tcindex_filter_result *r = (struct tcindex_filter_result *) fh;
 	unsigned char *b = skb_tail_pointer(skb);
-	struct rtattr *rta;
+	struct nlattr *nla;
 
 	pr_debug("tcindex_dump(tp %p,fh 0x%lx,skb %p,t %p),p %p,r %p,b %p\n",
 		 tp, fh, skb, t, p, r, b);
 	pr_debug("p->perfect %p p->h %p\n", p->perfect, p->h);
-	rta = (struct rtattr *) b;
-	RTA_PUT(skb, TCA_OPTIONS, 0, NULL);
+	nla = (struct nlattr *) b;
+	NLA_PUT(skb, TCA_OPTIONS, 0, NULL);
 	if (!fh) {
 		t->tcm_handle = ~0; /* whatever ... */
-		RTA_PUT(skb, TCA_TCINDEX_HASH, sizeof(p->hash), &p->hash);
-		RTA_PUT(skb, TCA_TCINDEX_MASK, sizeof(p->mask), &p->mask);
-		RTA_PUT(skb, TCA_TCINDEX_SHIFT, sizeof(p->shift), &p->shift);
-		RTA_PUT(skb, TCA_TCINDEX_FALL_THROUGH, sizeof(p->fall_through),
+		NLA_PUT(skb, TCA_TCINDEX_HASH, sizeof(p->hash), &p->hash);
+		NLA_PUT(skb, TCA_TCINDEX_MASK, sizeof(p->mask), &p->mask);
+		NLA_PUT(skb, TCA_TCINDEX_SHIFT, sizeof(p->shift), &p->shift);
+		NLA_PUT(skb, TCA_TCINDEX_FALL_THROUGH, sizeof(p->fall_through),
 		    &p->fall_through);
-		rta->rta_len = skb_tail_pointer(skb) - b;
+		nla->nla_len = skb_tail_pointer(skb) - b;
 	} else {
 		if (p->perfect) {
 			t->tcm_handle = r-p->perfect;
@@ -468,19 +468,19 @@ static int tcindex_dump(struct tcf_proto *tp, unsigned long fh,
 		}
 		pr_debug("handle = %d\n", t->tcm_handle);
 		if (r->res.class)
-			RTA_PUT(skb, TCA_TCINDEX_CLASSID, 4, &r->res.classid);
+			NLA_PUT(skb, TCA_TCINDEX_CLASSID, 4, &r->res.classid);
 
 		if (tcf_exts_dump(skb, &r->exts, &tcindex_ext_map) < 0)
-			goto rtattr_failure;
-		rta->rta_len = skb_tail_pointer(skb) - b;
+			goto nla_put_failure;
+		nla->nla_len = skb_tail_pointer(skb) - b;
 
 		if (tcf_exts_dump_stats(skb, &r->exts, &tcindex_ext_map) < 0)
-			goto rtattr_failure;
+			goto nla_put_failure;
 	}
 
 	return skb->len;
 
-rtattr_failure:
+nla_put_failure:
 	nlmsg_trim(skb, b);
 	return -1;
 }
diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c
index 7bf3cd4e731..aaf5049f951 100644
--- a/net/sched/cls_u32.c
+++ b/net/sched/cls_u32.c
@@ -462,8 +462,8 @@ static u32 gen_new_kid(struct tc_u_hnode *ht, u32 handle)
 
 static int u32_set_parms(struct tcf_proto *tp, unsigned long base,
 			 struct tc_u_hnode *ht,
-			 struct tc_u_knode *n, struct rtattr **tb,
-			 struct rtattr *est)
+			 struct tc_u_knode *n, struct nlattr **tb,
+			 struct nlattr *est)
 {
 	int err;
 	struct tcf_exts e;
@@ -473,8 +473,8 @@ static int u32_set_parms(struct tcf_proto *tp, unsigned long base,
 		return err;
 
 	err = -EINVAL;
-	if (tb[TCA_U32_LINK-1]) {
-		u32 handle = *(u32*)RTA_DATA(tb[TCA_U32_LINK-1]);
+	if (tb[TCA_U32_LINK]) {
+		u32 handle = *(u32*)nla_data(tb[TCA_U32_LINK]);
 		struct tc_u_hnode *ht_down = NULL;
 
 		if (TC_U32_KEY(handle))
@@ -495,14 +495,14 @@ static int u32_set_parms(struct tcf_proto *tp, unsigned long base,
 		if (ht_down)
 			ht_down->refcnt--;
 	}
-	if (tb[TCA_U32_CLASSID-1]) {
-		n->res.classid = *(u32*)RTA_DATA(tb[TCA_U32_CLASSID-1]);
+	if (tb[TCA_U32_CLASSID]) {
+		n->res.classid = *(u32*)nla_data(tb[TCA_U32_CLASSID]);
 		tcf_bind_filter(tp, &n->res, base);
 	}
 
 #ifdef CONFIG_NET_CLS_IND
-	if (tb[TCA_U32_INDEV-1]) {
-		err = tcf_change_indev(tp, n->indev, tb[TCA_U32_INDEV-1]);
+	if (tb[TCA_U32_INDEV]) {
+		err = tcf_change_indev(tp, n->indev, tb[TCA_U32_INDEV]);
 		if (err < 0)
 			goto errout;
 	}
@@ -516,33 +516,33 @@ errout:
 }
 
 static int u32_change(struct tcf_proto *tp, unsigned long base, u32 handle,
-		      struct rtattr **tca,
+		      struct nlattr **tca,
 		      unsigned long *arg)
 {
 	struct tc_u_common *tp_c = tp->data;
 	struct tc_u_hnode *ht;
 	struct tc_u_knode *n;
 	struct tc_u32_sel *s;
-	struct rtattr *opt = tca[TCA_OPTIONS-1];
-	struct rtattr *tb[TCA_U32_MAX];
+	struct nlattr *opt = tca[TCA_OPTIONS];
+	struct nlattr *tb[TCA_U32_MAX + 1];
 	u32 htid;
 	int err;
 
 	if (opt == NULL)
 		return handle ? -EINVAL : 0;
 
-	if (rtattr_parse_nested(tb, TCA_U32_MAX, opt) < 0)
+	if (nla_parse_nested(tb, TCA_U32_MAX, opt, NULL) < 0)
 		return -EINVAL;
 
 	if ((n = (struct tc_u_knode*)*arg) != NULL) {
 		if (TC_U32_KEY(n->handle) == 0)
 			return -EINVAL;
 
-		return u32_set_parms(tp, base, n->ht_up, n, tb, tca[TCA_RATE-1]);
+		return u32_set_parms(tp, base, n->ht_up, n, tb, tca[TCA_RATE]);
 	}
 
-	if (tb[TCA_U32_DIVISOR-1]) {
-		unsigned divisor = *(unsigned*)RTA_DATA(tb[TCA_U32_DIVISOR-1]);
+	if (tb[TCA_U32_DIVISOR]) {
+		unsigned divisor = *(unsigned*)nla_data(tb[TCA_U32_DIVISOR]);
 
 		if (--divisor > 0x100)
 			return -EINVAL;
@@ -567,8 +567,8 @@ static int u32_change(struct tcf_proto *tp, unsigned long base, u32 handle,
 		return 0;
 	}
 
-	if (tb[TCA_U32_HASH-1]) {
-		htid = *(unsigned*)RTA_DATA(tb[TCA_U32_HASH-1]);
+	if (tb[TCA_U32_HASH]) {
+		htid = *(unsigned*)nla_data(tb[TCA_U32_HASH]);
 		if (TC_U32_HTID(htid) == TC_U32_ROOT) {
 			ht = tp->root;
 			htid = ht->handle;
@@ -592,11 +592,11 @@ static int u32_change(struct tcf_proto *tp, unsigned long base, u32 handle,
 	} else
 		handle = gen_new_kid(ht, htid);
 
-	if (tb[TCA_U32_SEL-1] == NULL ||
-	    RTA_PAYLOAD(tb[TCA_U32_SEL-1]) < sizeof(struct tc_u32_sel))
+	if (tb[TCA_U32_SEL] == NULL ||
+	    nla_len(tb[TCA_U32_SEL]) < sizeof(struct tc_u32_sel))
 		return -EINVAL;
 
-	s = RTA_DATA(tb[TCA_U32_SEL-1]);
+	s = nla_data(tb[TCA_U32_SEL]);
 
 	n = kzalloc(sizeof(*n) + s->nkeys*sizeof(struct tc_u32_key), GFP_KERNEL);
 	if (n == NULL)
@@ -616,23 +616,23 @@ static int u32_change(struct tcf_proto *tp, unsigned long base, u32 handle,
 	n->fshift = s->hmask ? ffs(ntohl(s->hmask)) - 1 : 0;
 
 #ifdef CONFIG_CLS_U32_MARK
-	if (tb[TCA_U32_MARK-1]) {
+	if (tb[TCA_U32_MARK]) {
 		struct tc_u32_mark *mark;
 
-		if (RTA_PAYLOAD(tb[TCA_U32_MARK-1]) < sizeof(struct tc_u32_mark)) {
+		if (nla_len(tb[TCA_U32_MARK]) < sizeof(struct tc_u32_mark)) {
 #ifdef CONFIG_CLS_U32_PERF
 			kfree(n->pf);
 #endif
 			kfree(n);
 			return -EINVAL;
 		}
-		mark = RTA_DATA(tb[TCA_U32_MARK-1]);
+		mark = nla_data(tb[TCA_U32_MARK]);
 		memcpy(&n->mark, mark, sizeof(struct tc_u32_mark));
 		n->mark.success = 0;
 	}
 #endif
 
-	err = u32_set_parms(tp, base, ht, n, tb, tca[TCA_RATE-1]);
+	err = u32_set_parms(tp, base, ht, n, tb, tca[TCA_RATE]);
 	if (err == 0) {
 		struct tc_u_knode **ins;
 		for (ins = &ht->ht[TC_U32_HASH(handle)]; *ins; ins = &(*ins)->next)
@@ -694,59 +694,59 @@ static int u32_dump(struct tcf_proto *tp, unsigned long fh,
 {
 	struct tc_u_knode *n = (struct tc_u_knode*)fh;
 	unsigned char *b = skb_tail_pointer(skb);
-	struct rtattr *rta;
+	struct nlattr *nla;
 
 	if (n == NULL)
 		return skb->len;
 
 	t->tcm_handle = n->handle;
 
-	rta = (struct rtattr*)b;
-	RTA_PUT(skb, TCA_OPTIONS, 0, NULL);
+	nla = (struct nlattr*)b;
+	NLA_PUT(skb, TCA_OPTIONS, 0, NULL);
 
 	if (TC_U32_KEY(n->handle) == 0) {
 		struct tc_u_hnode *ht = (struct tc_u_hnode*)fh;
 		u32 divisor = ht->divisor+1;
-		RTA_PUT(skb, TCA_U32_DIVISOR, 4, &divisor);
+		NLA_PUT(skb, TCA_U32_DIVISOR, 4, &divisor);
 	} else {
-		RTA_PUT(skb, TCA_U32_SEL,
+		NLA_PUT(skb, TCA_U32_SEL,
 			sizeof(n->sel) + n->sel.nkeys*sizeof(struct tc_u32_key),
 			&n->sel);
 		if (n->ht_up) {
 			u32 htid = n->handle & 0xFFFFF000;
-			RTA_PUT(skb, TCA_U32_HASH, 4, &htid);
+			NLA_PUT(skb, TCA_U32_HASH, 4, &htid);
 		}
 		if (n->res.classid)
-			RTA_PUT(skb, TCA_U32_CLASSID, 4, &n->res.classid);
+			NLA_PUT(skb, TCA_U32_CLASSID, 4, &n->res.classid);
 		if (n->ht_down)
-			RTA_PUT(skb, TCA_U32_LINK, 4, &n->ht_down->handle);
+			NLA_PUT(skb, TCA_U32_LINK, 4, &n->ht_down->handle);
 
 #ifdef CONFIG_CLS_U32_MARK
 		if (n->mark.val || n->mark.mask)
-			RTA_PUT(skb, TCA_U32_MARK, sizeof(n->mark), &n->mark);
+			NLA_PUT(skb, TCA_U32_MARK, sizeof(n->mark), &n->mark);
 #endif
 
 		if (tcf_exts_dump(skb, &n->exts, &u32_ext_map) < 0)
-			goto rtattr_failure;
+			goto nla_put_failure;
 
 #ifdef CONFIG_NET_CLS_IND
 		if(strlen(n->indev))
-			RTA_PUT(skb, TCA_U32_INDEV, IFNAMSIZ, n->indev);
+			NLA_PUT(skb, TCA_U32_INDEV, IFNAMSIZ, n->indev);
 #endif
 #ifdef CONFIG_CLS_U32_PERF
-		RTA_PUT(skb, TCA_U32_PCNT,
+		NLA_PUT(skb, TCA_U32_PCNT,
 		sizeof(struct tc_u32_pcnt) + n->sel.nkeys*sizeof(u64),
 			n->pf);
 #endif
 	}
 
-	rta->rta_len = skb_tail_pointer(skb) - b;
+	nla->nla_len = skb_tail_pointer(skb) - b;
 	if (TC_U32_KEY(n->handle))
 		if (tcf_exts_dump_stats(skb, &n->exts, &u32_ext_map) < 0)
-			goto rtattr_failure;
+			goto nla_put_failure;
 	return skb->len;
 
-rtattr_failure:
+nla_put_failure:
 	nlmsg_trim(skb, b);
 	return -1;
 }
diff --git a/net/sched/em_meta.c b/net/sched/em_meta.c
index ceda8890ab0..92b6863e928 100644
--- a/net/sched/em_meta.c
+++ b/net/sched/em_meta.c
@@ -542,11 +542,11 @@ static int meta_var_compare(struct meta_obj *a, struct meta_obj *b)
 	return r;
 }
 
-static int meta_var_change(struct meta_value *dst, struct rtattr *rta)
+static int meta_var_change(struct meta_value *dst, struct nlattr *nla)
 {
-	int len = RTA_PAYLOAD(rta);
+	int len = nla_len(nla);
 
-	dst->val = (unsigned long)kmemdup(RTA_DATA(rta), len, GFP_KERNEL);
+	dst->val = (unsigned long)kmemdup(nla_data(nla), len, GFP_KERNEL);
 	if (dst->val == 0UL)
 		return -ENOMEM;
 	dst->len = len;
@@ -570,10 +570,10 @@ static void meta_var_apply_extras(struct meta_value *v,
 static int meta_var_dump(struct sk_buff *skb, struct meta_value *v, int tlv)
 {
 	if (v->val && v->len)
-		RTA_PUT(skb, tlv, v->len, (void *) v->val);
+		NLA_PUT(skb, tlv, v->len, (void *) v->val);
 	return 0;
 
-rtattr_failure:
+nla_put_failure:
 	return -1;
 }
 
@@ -594,13 +594,13 @@ static int meta_int_compare(struct meta_obj *a, struct meta_obj *b)
 		return 1;
 }
 
-static int meta_int_change(struct meta_value *dst, struct rtattr *rta)
+static int meta_int_change(struct meta_value *dst, struct nlattr *nla)
 {
-	if (RTA_PAYLOAD(rta) >= sizeof(unsigned long)) {
-		dst->val = *(unsigned long *) RTA_DATA(rta);
+	if (nla_len(nla) >= sizeof(unsigned long)) {
+		dst->val = *(unsigned long *) nla_data(nla);
 		dst->len = sizeof(unsigned long);
-	} else if (RTA_PAYLOAD(rta) == sizeof(u32)) {
-		dst->val = *(u32 *) RTA_DATA(rta);
+	} else if (nla_len(nla) == sizeof(u32)) {
+		dst->val = *(u32 *) nla_data(nla);
 		dst->len = sizeof(u32);
 	} else
 		return -EINVAL;
@@ -621,15 +621,15 @@ static void meta_int_apply_extras(struct meta_value *v,
 static int meta_int_dump(struct sk_buff *skb, struct meta_value *v, int tlv)
 {
 	if (v->len == sizeof(unsigned long))
-		RTA_PUT(skb, tlv, sizeof(unsigned long), &v->val);
+		NLA_PUT(skb, tlv, sizeof(unsigned long), &v->val);
 	else if (v->len == sizeof(u32)) {
 		u32 d = v->val;
-		RTA_PUT(skb, tlv, sizeof(d), &d);
+		NLA_PUT(skb, tlv, sizeof(d), &d);
 	}
 
 	return 0;
 
-rtattr_failure:
+nla_put_failure:
 	return -1;
 }
 
@@ -641,7 +641,7 @@ struct meta_type_ops
 {
 	void	(*destroy)(struct meta_value *);
 	int	(*compare)(struct meta_obj *, struct meta_obj *);
-	int	(*change)(struct meta_value *, struct rtattr *);
+	int	(*change)(struct meta_value *, struct nlattr *);
 	void	(*apply_extras)(struct meta_value *, struct meta_obj *);
 	int	(*dump)(struct sk_buff *, struct meta_value *, int);
 };
@@ -729,13 +729,13 @@ static inline void meta_delete(struct meta_match *meta)
 	kfree(meta);
 }
 
-static inline int meta_change_data(struct meta_value *dst, struct rtattr *rta)
+static inline int meta_change_data(struct meta_value *dst, struct nlattr *nla)
 {
-	if (rta) {
-		if (RTA_PAYLOAD(rta) == 0)
+	if (nla) {
+		if (nla_len(nla) == 0)
 			return -EINVAL;
 
-		return meta_type_ops(dst)->change(dst, rta);
+		return meta_type_ops(dst)->change(dst, nla);
 	}
 
 	return 0;
@@ -750,17 +750,17 @@ static int em_meta_change(struct tcf_proto *tp, void *data, int len,
 			  struct tcf_ematch *m)
 {
 	int err = -EINVAL;
-	struct rtattr *tb[TCA_EM_META_MAX];
+	struct nlattr *tb[TCA_EM_META_MAX + 1];
 	struct tcf_meta_hdr *hdr;
 	struct meta_match *meta = NULL;
 
-	if (rtattr_parse(tb, TCA_EM_META_MAX, data, len) < 0)
+	if (nla_parse(tb, TCA_EM_META_MAX, data, len, NULL) < 0)
 		goto errout;
 
-	if (tb[TCA_EM_META_HDR-1] == NULL ||
-	    RTA_PAYLOAD(tb[TCA_EM_META_HDR-1]) < sizeof(*hdr))
+	if (tb[TCA_EM_META_HDR] == NULL ||
+	    nla_len(tb[TCA_EM_META_HDR]) < sizeof(*hdr))
 		goto errout;
-	hdr = RTA_DATA(tb[TCA_EM_META_HDR-1]);
+	hdr = nla_data(tb[TCA_EM_META_HDR]);
 
 	if (TCF_META_TYPE(hdr->left.kind) != TCF_META_TYPE(hdr->right.kind) ||
 	    TCF_META_TYPE(hdr->left.kind) > TCF_META_TYPE_MAX ||
@@ -781,8 +781,8 @@ static int em_meta_change(struct tcf_proto *tp, void *data, int len,
 		goto errout;
 	}
 
-	if (meta_change_data(&meta->lvalue, tb[TCA_EM_META_LVALUE-1]) < 0 ||
-	    meta_change_data(&meta->rvalue, tb[TCA_EM_META_RVALUE-1]) < 0)
+	if (meta_change_data(&meta->lvalue, tb[TCA_EM_META_LVALUE]) < 0 ||
+	    meta_change_data(&meta->rvalue, tb[TCA_EM_META_RVALUE]) < 0)
 		goto errout;
 
 	m->datalen = sizeof(*meta);
@@ -811,16 +811,16 @@ static int em_meta_dump(struct sk_buff *skb, struct tcf_ematch *em)
 	memcpy(&hdr.left, &meta->lvalue.hdr, sizeof(hdr.left));
 	memcpy(&hdr.right, &meta->rvalue.hdr, sizeof(hdr.right));
 
-	RTA_PUT(skb, TCA_EM_META_HDR, sizeof(hdr), &hdr);
+	NLA_PUT(skb, TCA_EM_META_HDR, sizeof(hdr), &hdr);
 
 	ops = meta_type_ops(&meta->lvalue);
 	if (ops->dump(skb, &meta->lvalue, TCA_EM_META_LVALUE) < 0 ||
 	    ops->dump(skb, &meta->rvalue, TCA_EM_META_RVALUE) < 0)
-		goto rtattr_failure;
+		goto nla_put_failure;
 
 	return 0;
 
-rtattr_failure:
+nla_put_failure:
 	return -1;
 }
 
diff --git a/net/sched/em_text.c b/net/sched/em_text.c
index d5cd86efb7d..853c5ead87f 100644
--- a/net/sched/em_text.c
+++ b/net/sched/em_text.c
@@ -118,11 +118,14 @@ static int em_text_dump(struct sk_buff *skb, struct tcf_ematch *m)
 	conf.pattern_len = textsearch_get_pattern_len(tm->config);
 	conf.pad = 0;
 
-	RTA_PUT_NOHDR(skb, sizeof(conf), &conf);
-	RTA_APPEND(skb, conf.pattern_len, textsearch_get_pattern(tm->config));
+	if (nla_put_nohdr(skb, sizeof(conf), &conf) < 0)
+		goto nla_put_failure;
+	if (nla_append(skb, conf.pattern_len,
+		       textsearch_get_pattern(tm->config)) < 0)
+		goto nla_put_failure;
 	return 0;
 
-rtattr_failure:
+nla_put_failure:
 	return -1;
 }
 
diff --git a/net/sched/ematch.c b/net/sched/ematch.c
index 27941cfc0ab..72d9b273524 100644
--- a/net/sched/ematch.c
+++ b/net/sched/ematch.c
@@ -183,11 +183,11 @@ static inline struct tcf_ematch * tcf_em_get_match(struct tcf_ematch_tree *tree,
 
 static int tcf_em_validate(struct tcf_proto *tp,
 			   struct tcf_ematch_tree_hdr *tree_hdr,
-			   struct tcf_ematch *em, struct rtattr *rta, int idx)
+			   struct tcf_ematch *em, struct nlattr *nla, int idx)
 {
 	int err = -EINVAL;
-	struct tcf_ematch_hdr *em_hdr = RTA_DATA(rta);
-	int data_len = RTA_PAYLOAD(rta) - sizeof(*em_hdr);
+	struct tcf_ematch_hdr *em_hdr = nla_data(nla);
+	int data_len = nla_len(nla) - sizeof(*em_hdr);
 	void *data = (void *) em_hdr + sizeof(*em_hdr);
 
 	if (!TCF_EM_REL_VALID(em_hdr->flags))
@@ -286,11 +286,11 @@ errout:
  * tcf_em_tree_validate - validate ematch config TLV and build ematch tree
  *
  * @tp: classifier kind handle
- * @rta: ematch tree configuration TLV
+ * @nla: ematch tree configuration TLV
  * @tree: destination ematch tree variable to store the resulting
  *        ematch tree.
  *
- * This function validates the given configuration TLV @rta and builds an
+ * This function validates the given configuration TLV @nla and builds an
  * ematch tree in @tree. The resulting tree must later be copied into
  * the private classifier data using tcf_em_tree_change(). You MUST NOT
  * provide the ematch tree variable of the private classifier data directly,
@@ -298,45 +298,45 @@ errout:
  *
  * Returns a negative error code if the configuration TLV contains errors.
  */
-int tcf_em_tree_validate(struct tcf_proto *tp, struct rtattr *rta,
+int tcf_em_tree_validate(struct tcf_proto *tp, struct nlattr *nla,
 			 struct tcf_ematch_tree *tree)
 {
 	int idx, list_len, matches_len, err = -EINVAL;
-	struct rtattr *tb[TCA_EMATCH_TREE_MAX];
-	struct rtattr *rt_match, *rt_hdr, *rt_list;
+	struct nlattr *tb[TCA_EMATCH_TREE_MAX + 1];
+	struct nlattr *rt_match, *rt_hdr, *rt_list;
 	struct tcf_ematch_tree_hdr *tree_hdr;
 	struct tcf_ematch *em;
 
-	if (!rta) {
+	if (!nla) {
 		memset(tree, 0, sizeof(*tree));
 		return 0;
 	}
 
-	if (rtattr_parse_nested(tb, TCA_EMATCH_TREE_MAX, rta) < 0)
+	if (nla_parse_nested(tb, TCA_EMATCH_TREE_MAX, nla, NULL) < 0)
 		goto errout;
 
-	rt_hdr = tb[TCA_EMATCH_TREE_HDR-1];
-	rt_list = tb[TCA_EMATCH_TREE_LIST-1];
+	rt_hdr = tb[TCA_EMATCH_TREE_HDR];
+	rt_list = tb[TCA_EMATCH_TREE_LIST];
 
 	if (rt_hdr == NULL || rt_list == NULL)
 		goto errout;
 
-	if (RTA_PAYLOAD(rt_hdr) < sizeof(*tree_hdr) ||
-	    RTA_PAYLOAD(rt_list) < sizeof(*rt_match))
+	if (nla_len(rt_hdr) < sizeof(*tree_hdr) ||
+	    nla_len(rt_list) < sizeof(*rt_match))
 		goto errout;
 
-	tree_hdr = RTA_DATA(rt_hdr);
+	tree_hdr = nla_data(rt_hdr);
 	memcpy(&tree->hdr, tree_hdr, sizeof(*tree_hdr));
 
-	rt_match = RTA_DATA(rt_list);
-	list_len = RTA_PAYLOAD(rt_list);
+	rt_match = nla_data(rt_list);
+	list_len = nla_len(rt_list);
 	matches_len = tree_hdr->nmatches * sizeof(*em);
 
 	tree->matches = kzalloc(matches_len, GFP_KERNEL);
 	if (tree->matches == NULL)
 		goto errout;
 
-	/* We do not use rtattr_parse_nested here because the maximum
+	/* We do not use nla_parse_nested here because the maximum
 	 * number of attributes is unknown. This saves us the allocation
 	 * for a tb buffer which would serve no purpose at all.
 	 *
@@ -344,16 +344,16 @@ int tcf_em_tree_validate(struct tcf_proto *tp, struct rtattr *rta,
 	 * provided, their type must be incremental from 1 to n. Even
 	 * if it does not serve any real purpose, a failure of sticking
 	 * to this policy will result in parsing failure. */
-	for (idx = 0; RTA_OK(rt_match, list_len); idx++) {
+	for (idx = 0; nla_ok(rt_match, list_len); idx++) {
 		err = -EINVAL;
 
-		if (rt_match->rta_type != (idx + 1))
+		if (rt_match->nla_type != (idx + 1))
 			goto errout_abort;
 
 		if (idx >= tree_hdr->nmatches)
 			goto errout_abort;
 
-		if (RTA_PAYLOAD(rt_match) < sizeof(struct tcf_ematch_hdr))
+		if (nla_len(rt_match) < sizeof(struct tcf_ematch_hdr))
 			goto errout_abort;
 
 		em = tcf_em_get_match(tree, idx);
@@ -362,7 +362,7 @@ int tcf_em_tree_validate(struct tcf_proto *tp, struct rtattr *rta,
 		if (err < 0)
 			goto errout_abort;
 
-		rt_match = RTA_NEXT(rt_match, list_len);
+		rt_match = nla_next(rt_match, &list_len);
 	}
 
 	/* Check if the number of matches provided by userspace actually
@@ -434,18 +434,18 @@ int tcf_em_tree_dump(struct sk_buff *skb, struct tcf_ematch_tree *tree, int tlv)
 {
 	int i;
 	u8 *tail;
-	struct rtattr *top_start = (struct rtattr *)skb_tail_pointer(skb);
-	struct rtattr *list_start;
+	struct nlattr *top_start = (struct nlattr *)skb_tail_pointer(skb);
+	struct nlattr *list_start;
 
-	RTA_PUT(skb, tlv, 0, NULL);
-	RTA_PUT(skb, TCA_EMATCH_TREE_HDR, sizeof(tree->hdr), &tree->hdr);
+	NLA_PUT(skb, tlv, 0, NULL);
+	NLA_PUT(skb, TCA_EMATCH_TREE_HDR, sizeof(tree->hdr), &tree->hdr);
 
-	list_start = (struct rtattr *)skb_tail_pointer(skb);
-	RTA_PUT(skb, TCA_EMATCH_TREE_LIST, 0, NULL);
+	list_start = (struct nlattr *)skb_tail_pointer(skb);
+	NLA_PUT(skb, TCA_EMATCH_TREE_LIST, 0, NULL);
 
 	tail = skb_tail_pointer(skb);
 	for (i = 0; i < tree->hdr.nmatches; i++) {
-		struct rtattr *match_start = (struct rtattr *)tail;
+		struct nlattr *match_start = (struct nlattr *)tail;
 		struct tcf_ematch *em = tcf_em_get_match(tree, i);
 		struct tcf_ematch_hdr em_hdr = {
 			.kind = em->ops ? em->ops->kind : TCF_EM_CONTAINER,
@@ -453,27 +453,27 @@ int tcf_em_tree_dump(struct sk_buff *skb, struct tcf_ematch_tree *tree, int tlv)
 			.flags = em->flags
 		};
 
-		RTA_PUT(skb, i+1, sizeof(em_hdr), &em_hdr);
+		NLA_PUT(skb, i+1, sizeof(em_hdr), &em_hdr);
 
 		if (em->ops && em->ops->dump) {
 			if (em->ops->dump(skb, em) < 0)
-				goto rtattr_failure;
+				goto nla_put_failure;
 		} else if (tcf_em_is_container(em) || tcf_em_is_simple(em)) {
 			u32 u = em->data;
-			RTA_PUT_NOHDR(skb, sizeof(u), &u);
+			nla_put_nohdr(skb, sizeof(u), &u);
 		} else if (em->datalen > 0)
-			RTA_PUT_NOHDR(skb, em->datalen, (void *) em->data);
+			nla_put_nohdr(skb, em->datalen, (void *) em->data);
 
 		tail = skb_tail_pointer(skb);
-		match_start->rta_len = tail - (u8 *)match_start;
+		match_start->nla_len = tail - (u8 *)match_start;
 	}
 
-	list_start->rta_len = tail - (u8 *)list_start;
-	top_start->rta_len = tail - (u8 *)top_start;
+	list_start->nla_len = tail - (u8 *)list_start;
+	top_start->nla_len = tail - (u8 *)top_start;
 
 	return 0;
 
-rtattr_failure:
+nla_put_failure:
 	return -1;
 }
 EXPORT_SYMBOL(tcf_em_tree_dump);
-- 
cgit v1.2.3


From 7ba699c604ab811972eee2e041fd6b07659a2e6e Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Tue, 22 Jan 2008 22:11:50 -0800
Subject: [NET_SCHED]: Convert actions from rtnetlink to new netlink API

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/act_api.c    | 214 +++++++++++++++++++++++++------------------------
 net/sched/act_gact.c   |  28 +++----
 net/sched/act_ipt.c    |  46 +++++------
 net/sched/act_mirred.c |  18 ++---
 net/sched/act_nat.c    |  18 ++---
 net/sched/act_pedit.c  |  20 ++---
 net/sched/act_police.c |  52 ++++++------
 net/sched/act_simple.c |  24 +++---
 net/sched/cls_api.c    |   6 +-
 9 files changed, 214 insertions(+), 212 deletions(-)

(limited to 'net')

diff --git a/net/sched/act_api.c b/net/sched/act_api.c
index 11f3097a691..ebd21d2cb5f 100644
--- a/net/sched/act_api.c
+++ b/net/sched/act_api.c
@@ -68,7 +68,7 @@ static int tcf_dump_walker(struct sk_buff *skb, struct netlink_callback *cb,
 {
 	struct tcf_common *p;
 	int err = 0, index = -1,i = 0, s_i = 0, n_i = 0;
-	struct rtattr *r ;
+	struct nlattr *r ;
 
 	read_lock_bh(hinfo->lock);
 
@@ -83,15 +83,15 @@ static int tcf_dump_walker(struct sk_buff *skb, struct netlink_callback *cb,
 				continue;
 			a->priv = p;
 			a->order = n_i;
-			r = (struct rtattr *)skb_tail_pointer(skb);
-			RTA_PUT(skb, a->order, 0, NULL);
+			r = (struct nlattr *)skb_tail_pointer(skb);
+			NLA_PUT(skb, a->order, 0, NULL);
 			err = tcf_action_dump_1(skb, a, 0, 0);
 			if (err < 0) {
 				index--;
 				nlmsg_trim(skb, r);
 				goto done;
 			}
-			r->rta_len = skb_tail_pointer(skb) - (u8 *)r;
+			r->nla_len = skb_tail_pointer(skb) - (u8 *)r;
 			n_i++;
 			if (n_i >= TCA_ACT_MAX_PRIO)
 				goto done;
@@ -103,7 +103,7 @@ done:
 		cb->args[0] += n_i;
 	return n_i;
 
-rtattr_failure:
+nla_put_failure:
 	nlmsg_trim(skb, r);
 	goto done;
 }
@@ -112,12 +112,12 @@ static int tcf_del_walker(struct sk_buff *skb, struct tc_action *a,
 			  struct tcf_hashinfo *hinfo)
 {
 	struct tcf_common *p, *s_p;
-	struct rtattr *r ;
+	struct nlattr *r ;
 	int i= 0, n_i = 0;
 
-	r = (struct rtattr *)skb_tail_pointer(skb);
-	RTA_PUT(skb, a->order, 0, NULL);
-	RTA_PUT(skb, TCA_KIND, IFNAMSIZ, a->ops->kind);
+	r = (struct nlattr *)skb_tail_pointer(skb);
+	NLA_PUT(skb, a->order, 0, NULL);
+	NLA_PUT(skb, TCA_KIND, IFNAMSIZ, a->ops->kind);
 	for (i = 0; i < (hinfo->hmask + 1); i++) {
 		p = hinfo->htab[tcf_hash(i, hinfo->hmask)];
 
@@ -129,11 +129,11 @@ static int tcf_del_walker(struct sk_buff *skb, struct tc_action *a,
 			p = s_p;
 		}
 	}
-	RTA_PUT(skb, TCA_FCNT, 4, &n_i);
-	r->rta_len = skb_tail_pointer(skb) - (u8 *)r;
+	NLA_PUT(skb, TCA_FCNT, 4, &n_i);
+	r->nla_len = skb_tail_pointer(skb) - (u8 *)r;
 
 	return n_i;
-rtattr_failure:
+nla_put_failure:
 	nlmsg_trim(skb, r);
 	return -EINVAL;
 }
@@ -211,7 +211,7 @@ struct tcf_common *tcf_hash_check(u32 index, struct tc_action *a, int bind,
 }
 EXPORT_SYMBOL(tcf_hash_check);
 
-struct tcf_common *tcf_hash_create(u32 index, struct rtattr *est, struct tc_action *a, int size, int bind, u32 *idx_gen, struct tcf_hashinfo *hinfo)
+struct tcf_common *tcf_hash_create(u32 index, struct nlattr *est, struct tc_action *a, int size, int bind, u32 *idx_gen, struct tcf_hashinfo *hinfo)
 {
 	struct tcf_common *p = kzalloc(size, GFP_KERNEL);
 
@@ -227,7 +227,7 @@ struct tcf_common *tcf_hash_create(u32 index, struct rtattr *est, struct tc_acti
 	p->tcfc_tm.lastuse = jiffies;
 	if (est)
 		gen_new_estimator(&p->tcfc_bstats, &p->tcfc_rate_est,
-				  &p->tcfc_lock, (struct nlattr *)est);
+				  &p->tcfc_lock, est);
 	a->priv = (void *) p;
 	return p;
 }
@@ -305,15 +305,15 @@ static struct tc_action_ops *tc_lookup_action_n(char *kind)
 	return a;
 }
 
-/* lookup by rtattr */
-static struct tc_action_ops *tc_lookup_action(struct rtattr *kind)
+/* lookup by nlattr */
+static struct tc_action_ops *tc_lookup_action(struct nlattr *kind)
 {
 	struct tc_action_ops *a = NULL;
 
 	if (kind) {
 		read_lock(&act_mod_lock);
 		for (a = act_base; a; a = a->next) {
-			if (rtattr_strcmp(kind, a->kind) == 0) {
+			if (nla_strcmp(kind, a->kind) == 0) {
 				if (!try_module_get(a->owner)) {
 					read_unlock(&act_mod_lock);
 					return NULL;
@@ -414,22 +414,22 @@ tcf_action_dump_1(struct sk_buff *skb, struct tc_action *a, int bind, int ref)
 {
 	int err = -EINVAL;
 	unsigned char *b = skb_tail_pointer(skb);
-	struct rtattr *r;
+	struct nlattr *r;
 
 	if (a->ops == NULL || a->ops->dump == NULL)
 		return err;
 
-	RTA_PUT(skb, TCA_KIND, IFNAMSIZ, a->ops->kind);
+	NLA_PUT(skb, TCA_KIND, IFNAMSIZ, a->ops->kind);
 	if (tcf_action_copy_stats(skb, a, 0))
-		goto rtattr_failure;
-	r = (struct rtattr *)skb_tail_pointer(skb);
-	RTA_PUT(skb, TCA_OPTIONS, 0, NULL);
+		goto nla_put_failure;
+	r = (struct nlattr *)skb_tail_pointer(skb);
+	NLA_PUT(skb, TCA_OPTIONS, 0, NULL);
 	if ((err = tcf_action_dump_old(skb, a, bind, ref)) > 0) {
-		r->rta_len = skb_tail_pointer(skb) - (u8 *)r;
+		r->nla_len = skb_tail_pointer(skb) - (u8 *)r;
 		return err;
 	}
 
-rtattr_failure:
+nla_put_failure:
 	nlmsg_trim(skb, b);
 	return -1;
 }
@@ -441,45 +441,45 @@ tcf_action_dump(struct sk_buff *skb, struct tc_action *act, int bind, int ref)
 	struct tc_action *a;
 	int err = -EINVAL;
 	unsigned char *b = skb_tail_pointer(skb);
-	struct rtattr *r ;
+	struct nlattr *r ;
 
 	while ((a = act) != NULL) {
-		r = (struct rtattr *)skb_tail_pointer(skb);
+		r = (struct nlattr *)skb_tail_pointer(skb);
 		act = a->next;
-		RTA_PUT(skb, a->order, 0, NULL);
+		NLA_PUT(skb, a->order, 0, NULL);
 		err = tcf_action_dump_1(skb, a, bind, ref);
 		if (err < 0)
 			goto errout;
-		r->rta_len = skb_tail_pointer(skb) - (u8 *)r;
+		r->nla_len = skb_tail_pointer(skb) - (u8 *)r;
 	}
 
 	return 0;
 
-rtattr_failure:
+nla_put_failure:
 	err = -EINVAL;
 errout:
 	nlmsg_trim(skb, b);
 	return err;
 }
 
-struct tc_action *tcf_action_init_1(struct rtattr *rta, struct rtattr *est,
+struct tc_action *tcf_action_init_1(struct nlattr *nla, struct nlattr *est,
 				    char *name, int ovr, int bind, int *err)
 {
 	struct tc_action *a;
 	struct tc_action_ops *a_o;
 	char act_name[IFNAMSIZ];
-	struct rtattr *tb[TCA_ACT_MAX+1];
-	struct rtattr *kind;
+	struct nlattr *tb[TCA_ACT_MAX+1];
+	struct nlattr *kind;
 
 	*err = -EINVAL;
 
 	if (name == NULL) {
-		if (rtattr_parse_nested(tb, TCA_ACT_MAX, rta) < 0)
+		if (nla_parse_nested(tb, TCA_ACT_MAX, nla, NULL) < 0)
 			goto err_out;
-		kind = tb[TCA_ACT_KIND-1];
+		kind = tb[TCA_ACT_KIND];
 		if (kind == NULL)
 			goto err_out;
-		if (rtattr_strlcpy(act_name, kind, IFNAMSIZ) >= IFNAMSIZ)
+		if (nla_strlcpy(act_name, kind, IFNAMSIZ) >= IFNAMSIZ)
 			goto err_out;
 	} else {
 		if (strlcpy(act_name, name, IFNAMSIZ) >= IFNAMSIZ)
@@ -517,9 +517,9 @@ struct tc_action *tcf_action_init_1(struct rtattr *rta, struct rtattr *est,
 
 	/* backward compatibility for policer */
 	if (name == NULL)
-		*err = a_o->init(tb[TCA_ACT_OPTIONS-1], est, a, ovr, bind);
+		*err = a_o->init(tb[TCA_ACT_OPTIONS], est, a, ovr, bind);
 	else
-		*err = a_o->init(rta, est, a, ovr, bind);
+		*err = a_o->init(nla, est, a, ovr, bind);
 	if (*err < 0)
 		goto err_free;
 
@@ -542,23 +542,23 @@ err_out:
 	return NULL;
 }
 
-struct tc_action *tcf_action_init(struct rtattr *rta, struct rtattr *est,
+struct tc_action *tcf_action_init(struct nlattr *nla, struct nlattr *est,
 				  char *name, int ovr, int bind, int *err)
 {
-	struct rtattr *tb[TCA_ACT_MAX_PRIO+1];
+	struct nlattr *tb[TCA_ACT_MAX_PRIO+1];
 	struct tc_action *head = NULL, *act, *act_prev = NULL;
 	int i;
 
-	if (rtattr_parse_nested(tb, TCA_ACT_MAX_PRIO, rta) < 0) {
+	if (nla_parse_nested(tb, TCA_ACT_MAX_PRIO, nla, NULL) < 0) {
 		*err = -EINVAL;
 		return head;
 	}
 
-	for (i=0; i < TCA_ACT_MAX_PRIO && tb[i]; i++) {
+	for (i = 1; i <= TCA_ACT_MAX_PRIO && tb[i]; i++) {
 		act = tcf_action_init_1(tb[i], est, name, ovr, bind, err);
 		if (act == NULL)
 			goto err;
-		act->order = i+1;
+		act->order = i;
 
 		if (head == NULL)
 			head = act;
@@ -625,7 +625,7 @@ tca_get_fill(struct sk_buff *skb, struct tc_action *a, u32 pid, u32 seq,
 	struct tcamsg *t;
 	struct nlmsghdr *nlh;
 	unsigned char *b = skb_tail_pointer(skb);
-	struct rtattr *x;
+	struct nlattr *x;
 
 	nlh = NLMSG_NEW(skb, pid, seq, event, sizeof(*t), flags);
 
@@ -634,18 +634,18 @@ tca_get_fill(struct sk_buff *skb, struct tc_action *a, u32 pid, u32 seq,
 	t->tca__pad1 = 0;
 	t->tca__pad2 = 0;
 
-	x = (struct rtattr *)skb_tail_pointer(skb);
-	RTA_PUT(skb, TCA_ACT_TAB, 0, NULL);
+	x = (struct nlattr *)skb_tail_pointer(skb);
+	NLA_PUT(skb, TCA_ACT_TAB, 0, NULL);
 
 	if (tcf_action_dump(skb, a, bind, ref) < 0)
-		goto rtattr_failure;
+		goto nla_put_failure;
 
-	x->rta_len = skb_tail_pointer(skb) - (u8 *)x;
+	x->nla_len = skb_tail_pointer(skb) - (u8 *)x;
 
 	nlh->nlmsg_len = skb_tail_pointer(skb) - b;
 	return skb->len;
 
-rtattr_failure:
+nla_put_failure:
 nlmsg_failure:
 	nlmsg_trim(skb, b);
 	return -1;
@@ -668,20 +668,20 @@ act_get_notify(u32 pid, struct nlmsghdr *n, struct tc_action *a, int event)
 }
 
 static struct tc_action *
-tcf_action_get_1(struct rtattr *rta, struct nlmsghdr *n, u32 pid, int *err)
+tcf_action_get_1(struct nlattr *nla, struct nlmsghdr *n, u32 pid, int *err)
 {
-	struct rtattr *tb[TCA_ACT_MAX+1];
+	struct nlattr *tb[TCA_ACT_MAX+1];
 	struct tc_action *a;
 	int index;
 
 	*err = -EINVAL;
-	if (rtattr_parse_nested(tb, TCA_ACT_MAX, rta) < 0)
+	if (nla_parse_nested(tb, TCA_ACT_MAX, nla, NULL) < 0)
 		return NULL;
 
-	if (tb[TCA_ACT_INDEX - 1] == NULL ||
-	    RTA_PAYLOAD(tb[TCA_ACT_INDEX - 1]) < sizeof(index))
+	if (tb[TCA_ACT_INDEX] == NULL ||
+	    nla_len(tb[TCA_ACT_INDEX]) < sizeof(index))
 		return NULL;
-	index = *(int *)RTA_DATA(tb[TCA_ACT_INDEX - 1]);
+	index = *(int *)nla_data(tb[TCA_ACT_INDEX]);
 
 	*err = -ENOMEM;
 	a = kzalloc(sizeof(struct tc_action), GFP_KERNEL);
@@ -689,7 +689,7 @@ tcf_action_get_1(struct rtattr *rta, struct nlmsghdr *n, u32 pid, int *err)
 		return NULL;
 
 	*err = -EINVAL;
-	a->ops = tc_lookup_action(tb[TCA_ACT_KIND - 1]);
+	a->ops = tc_lookup_action(tb[TCA_ACT_KIND]);
 	if (a->ops == NULL)
 		goto err_free;
 	if (a->ops->lookup == NULL)
@@ -731,16 +731,16 @@ static struct tc_action *create_a(int i)
 	return act;
 }
 
-static int tca_action_flush(struct rtattr *rta, struct nlmsghdr *n, u32 pid)
+static int tca_action_flush(struct nlattr *nla, struct nlmsghdr *n, u32 pid)
 {
 	struct sk_buff *skb;
 	unsigned char *b;
 	struct nlmsghdr *nlh;
 	struct tcamsg *t;
 	struct netlink_callback dcb;
-	struct rtattr *x;
-	struct rtattr *tb[TCA_ACT_MAX+1];
-	struct rtattr *kind;
+	struct nlattr *x;
+	struct nlattr *tb[TCA_ACT_MAX+1];
+	struct nlattr *kind;
 	struct tc_action *a = create_a(0);
 	int err = -EINVAL;
 
@@ -758,10 +758,10 @@ static int tca_action_flush(struct rtattr *rta, struct nlmsghdr *n, u32 pid)
 
 	b = skb_tail_pointer(skb);
 
-	if (rtattr_parse_nested(tb, TCA_ACT_MAX, rta) < 0)
+	if (nla_parse_nested(tb, TCA_ACT_MAX, nla, NULL) < 0)
 		goto err_out;
 
-	kind = tb[TCA_ACT_KIND-1];
+	kind = tb[TCA_ACT_KIND];
 	a->ops = tc_lookup_action(kind);
 	if (a->ops == NULL)
 		goto err_out;
@@ -772,14 +772,14 @@ static int tca_action_flush(struct rtattr *rta, struct nlmsghdr *n, u32 pid)
 	t->tca__pad1 = 0;
 	t->tca__pad2 = 0;
 
-	x = (struct rtattr *)skb_tail_pointer(skb);
-	RTA_PUT(skb, TCA_ACT_TAB, 0, NULL);
+	x = (struct nlattr *)skb_tail_pointer(skb);
+	NLA_PUT(skb, TCA_ACT_TAB, 0, NULL);
 
 	err = a->ops->walk(skb, &dcb, RTM_DELACTION, a);
 	if (err < 0)
-		goto rtattr_failure;
+		goto nla_put_failure;
 
-	x->rta_len = skb_tail_pointer(skb) - (u8 *)x;
+	x->nla_len = skb_tail_pointer(skb) - (u8 *)x;
 
 	nlh->nlmsg_len = skb_tail_pointer(skb) - b;
 	nlh->nlmsg_flags |= NLM_F_ROOT;
@@ -791,7 +791,7 @@ static int tca_action_flush(struct rtattr *rta, struct nlmsghdr *n, u32 pid)
 
 	return err;
 
-rtattr_failure:
+nla_put_failure:
 nlmsg_failure:
 	module_put(a->ops->owner);
 err_out:
@@ -801,13 +801,13 @@ err_out:
 }
 
 static int
-tca_action_gd(struct rtattr *rta, struct nlmsghdr *n, u32 pid, int event)
+tca_action_gd(struct nlattr *nla, struct nlmsghdr *n, u32 pid, int event)
 {
 	int i, ret = 0;
-	struct rtattr *tb[TCA_ACT_MAX_PRIO+1];
+	struct nlattr *tb[TCA_ACT_MAX_PRIO+1];
 	struct tc_action *head = NULL, *act, *act_prev = NULL;
 
-	if (rtattr_parse_nested(tb, TCA_ACT_MAX_PRIO, rta) < 0)
+	if (nla_parse_nested(tb, TCA_ACT_MAX_PRIO, nla, NULL) < 0)
 		return -EINVAL;
 
 	if (event == RTM_DELACTION && n->nlmsg_flags&NLM_F_ROOT) {
@@ -815,11 +815,11 @@ tca_action_gd(struct rtattr *rta, struct nlmsghdr *n, u32 pid, int event)
 			return tca_action_flush(tb[0], n, pid);
 	}
 
-	for (i=0; i < TCA_ACT_MAX_PRIO && tb[i]; i++) {
+	for (i = 1; i <= TCA_ACT_MAX_PRIO && tb[i]; i++) {
 		act = tcf_action_get_1(tb[i], n, pid, &ret);
 		if (act == NULL)
 			goto err;
-		act->order = i+1;
+		act->order = i;
 
 		if (head == NULL)
 			head = act;
@@ -865,7 +865,7 @@ static int tcf_add_notify(struct tc_action *a, u32 pid, u32 seq, int event,
 	struct tcamsg *t;
 	struct nlmsghdr *nlh;
 	struct sk_buff *skb;
-	struct rtattr *x;
+	struct nlattr *x;
 	unsigned char *b;
 	int err = 0;
 
@@ -881,13 +881,13 @@ static int tcf_add_notify(struct tc_action *a, u32 pid, u32 seq, int event,
 	t->tca__pad1 = 0;
 	t->tca__pad2 = 0;
 
-	x = (struct rtattr *)skb_tail_pointer(skb);
-	RTA_PUT(skb, TCA_ACT_TAB, 0, NULL);
+	x = (struct nlattr *)skb_tail_pointer(skb);
+	NLA_PUT(skb, TCA_ACT_TAB, 0, NULL);
 
 	if (tcf_action_dump(skb, a, 0, 0) < 0)
-		goto rtattr_failure;
+		goto nla_put_failure;
 
-	x->rta_len = skb_tail_pointer(skb) - (u8 *)x;
+	x->nla_len = skb_tail_pointer(skb) - (u8 *)x;
 
 	nlh->nlmsg_len = skb_tail_pointer(skb) - b;
 	NETLINK_CB(skb).dst_group = RTNLGRP_TC;
@@ -897,7 +897,7 @@ static int tcf_add_notify(struct tc_action *a, u32 pid, u32 seq, int event,
 		err = 0;
 	return err;
 
-rtattr_failure:
+nla_put_failure:
 nlmsg_failure:
 	kfree_skb(skb);
 	return -1;
@@ -905,14 +905,14 @@ nlmsg_failure:
 
 
 static int
-tcf_action_add(struct rtattr *rta, struct nlmsghdr *n, u32 pid, int ovr)
+tcf_action_add(struct nlattr *nla, struct nlmsghdr *n, u32 pid, int ovr)
 {
 	int ret = 0;
 	struct tc_action *act;
 	struct tc_action *a;
 	u32 seq = n->nlmsg_seq;
 
-	act = tcf_action_init(rta, NULL, NULL, ovr, 0, &ret);
+	act = tcf_action_init(nla, NULL, NULL, ovr, 0, &ret);
 	if (act == NULL)
 		goto done;
 
@@ -931,14 +931,18 @@ done:
 static int tc_ctl_action(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
 {
 	struct net *net = skb->sk->sk_net;
-	struct rtattr **tca = arg;
+	struct nlattr *tca[TCA_ACT_MAX + 1];
 	u32 pid = skb ? NETLINK_CB(skb).pid : 0;
 	int ret = 0, ovr = 0;
 
 	if (net != &init_net)
 		return -EINVAL;
 
-	if (tca[TCA_ACT_TAB-1] == NULL) {
+	ret = nlmsg_parse(n, sizeof(struct tcamsg), tca, TCA_ACT_MAX, NULL);
+	if (ret < 0)
+		return ret;
+
+	if (tca[TCA_ACT_TAB] == NULL) {
 		printk("tc_ctl_action: received NO action attribs\n");
 		return -EINVAL;
 	}
@@ -956,15 +960,15 @@ static int tc_ctl_action(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
 		if (n->nlmsg_flags&NLM_F_REPLACE)
 			ovr = 1;
 replay:
-		ret = tcf_action_add(tca[TCA_ACT_TAB-1], n, pid, ovr);
+		ret = tcf_action_add(tca[TCA_ACT_TAB], n, pid, ovr);
 		if (ret == -EAGAIN)
 			goto replay;
 		break;
 	case RTM_DELACTION:
-		ret = tca_action_gd(tca[TCA_ACT_TAB-1], n, pid, RTM_DELACTION);
+		ret = tca_action_gd(tca[TCA_ACT_TAB], n, pid, RTM_DELACTION);
 		break;
 	case RTM_GETACTION:
-		ret = tca_action_gd(tca[TCA_ACT_TAB-1], n, pid, RTM_GETACTION);
+		ret = tca_action_gd(tca[TCA_ACT_TAB], n, pid, RTM_GETACTION);
 		break;
 	default:
 		BUG();
@@ -973,33 +977,33 @@ replay:
 	return ret;
 }
 
-static struct rtattr *
+static struct nlattr *
 find_dump_kind(struct nlmsghdr *n)
 {
-	struct rtattr *tb1, *tb2[TCA_ACT_MAX+1];
-	struct rtattr *tb[TCA_ACT_MAX_PRIO + 1];
-	struct rtattr *rta[TCAA_MAX + 1];
-	struct rtattr *kind;
+	struct nlattr *tb1, *tb2[TCA_ACT_MAX+1];
+	struct nlattr *tb[TCA_ACT_MAX_PRIO + 1];
+	struct nlattr *nla[TCAA_MAX + 1];
+	struct nlattr *kind;
 	int min_len = NLMSG_LENGTH(sizeof(struct tcamsg));
 	int attrlen = n->nlmsg_len - NLMSG_ALIGN(min_len);
-	struct rtattr *attr = (void *) n + NLMSG_ALIGN(min_len);
+	struct nlattr *attr = (void *) n + NLMSG_ALIGN(min_len);
 
-	if (rtattr_parse(rta, TCAA_MAX, attr, attrlen) < 0)
+	if (nla_parse(nla, TCAA_MAX, attr, attrlen, NULL) < 0)
 		return NULL;
-	tb1 = rta[TCA_ACT_TAB - 1];
+	tb1 = nla[TCA_ACT_TAB];
 	if (tb1 == NULL)
 		return NULL;
 
-	if (rtattr_parse(tb, TCA_ACT_MAX_PRIO, RTA_DATA(tb1),
-			 NLMSG_ALIGN(RTA_PAYLOAD(tb1))) < 0)
+	if (nla_parse(tb, TCA_ACT_MAX_PRIO, nla_data(tb1),
+		      NLMSG_ALIGN(nla_len(tb1)), NULL) < 0)
 		return NULL;
 	if (tb[0] == NULL)
 		return NULL;
 
-	if (rtattr_parse(tb2, TCA_ACT_MAX, RTA_DATA(tb[0]),
-			 RTA_PAYLOAD(tb[0])) < 0)
+	if (nla_parse(tb2, TCA_ACT_MAX, nla_data(tb[0]),
+		      nla_len(tb[0]), NULL) < 0)
 		return NULL;
-	kind = tb2[TCA_ACT_KIND-1];
+	kind = tb2[TCA_ACT_KIND];
 
 	return kind;
 }
@@ -1010,12 +1014,12 @@ tc_dump_action(struct sk_buff *skb, struct netlink_callback *cb)
 	struct net *net = skb->sk->sk_net;
 	struct nlmsghdr *nlh;
 	unsigned char *b = skb_tail_pointer(skb);
-	struct rtattr *x;
+	struct nlattr *x;
 	struct tc_action_ops *a_o;
 	struct tc_action a;
 	int ret = 0;
 	struct tcamsg *t = (struct tcamsg *) NLMSG_DATA(cb->nlh);
-	struct rtattr *kind = find_dump_kind(cb->nlh);
+	struct nlattr *kind = find_dump_kind(cb->nlh);
 
 	if (net != &init_net)
 		return 0;
@@ -1035,7 +1039,7 @@ tc_dump_action(struct sk_buff *skb, struct netlink_callback *cb)
 
 	if (a_o->walk == NULL) {
 		printk("tc_dump_action: %s !capable of dumping table\n", a_o->kind);
-		goto rtattr_failure;
+		goto nla_put_failure;
 	}
 
 	nlh = NLMSG_PUT(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq,
@@ -1045,15 +1049,15 @@ tc_dump_action(struct sk_buff *skb, struct netlink_callback *cb)
 	t->tca__pad1 = 0;
 	t->tca__pad2 = 0;
 
-	x = (struct rtattr *)skb_tail_pointer(skb);
-	RTA_PUT(skb, TCA_ACT_TAB, 0, NULL);
+	x = (struct nlattr *)skb_tail_pointer(skb);
+	NLA_PUT(skb, TCA_ACT_TAB, 0, NULL);
 
 	ret = a_o->walk(skb, cb, RTM_GETACTION, &a);
 	if (ret < 0)
-		goto rtattr_failure;
+		goto nla_put_failure;
 
 	if (ret > 0) {
-		x->rta_len = skb_tail_pointer(skb) - (u8 *)x;
+		x->nla_len = skb_tail_pointer(skb) - (u8 *)x;
 		ret = skb->len;
 	} else
 		nlmsg_trim(skb, x);
@@ -1064,7 +1068,7 @@ tc_dump_action(struct sk_buff *skb, struct netlink_callback *cb)
 	module_put(a_o->owner);
 	return skb->len;
 
-rtattr_failure:
+nla_put_failure:
 nlmsg_failure:
 	module_put(a_o->owner);
 	nlmsg_trim(skb, b);
diff --git a/net/sched/act_gact.c b/net/sched/act_gact.c
index a9631e426d9..5402cf885f9 100644
--- a/net/sched/act_gact.c
+++ b/net/sched/act_gact.c
@@ -53,26 +53,26 @@ typedef int (*g_rand)(struct tcf_gact *gact);
 static g_rand gact_rand[MAX_RAND]= { NULL, gact_net_rand, gact_determ };
 #endif /* CONFIG_GACT_PROB */
 
-static int tcf_gact_init(struct rtattr *rta, struct rtattr *est,
+static int tcf_gact_init(struct nlattr *nla, struct nlattr *est,
 			 struct tc_action *a, int ovr, int bind)
 {
-	struct rtattr *tb[TCA_GACT_MAX];
+	struct nlattr *tb[TCA_GACT_MAX + 1];
 	struct tc_gact *parm;
 	struct tcf_gact *gact;
 	struct tcf_common *pc;
 	int ret = 0;
 
-	if (rta == NULL || rtattr_parse_nested(tb, TCA_GACT_MAX, rta) < 0)
+	if (nla == NULL || nla_parse_nested(tb, TCA_GACT_MAX, nla, NULL) < 0)
 		return -EINVAL;
 
-	if (tb[TCA_GACT_PARMS - 1] == NULL ||
-	    RTA_PAYLOAD(tb[TCA_GACT_PARMS - 1]) < sizeof(*parm))
+	if (tb[TCA_GACT_PARMS] == NULL ||
+	    nla_len(tb[TCA_GACT_PARMS]) < sizeof(*parm))
 		return -EINVAL;
-	parm = RTA_DATA(tb[TCA_GACT_PARMS - 1]);
+	parm = nla_data(tb[TCA_GACT_PARMS]);
 
-	if (tb[TCA_GACT_PROB-1] != NULL)
+	if (tb[TCA_GACT_PROB] != NULL)
 #ifdef CONFIG_GACT_PROB
-		if (RTA_PAYLOAD(tb[TCA_GACT_PROB-1]) < sizeof(struct tc_gact_p))
+		if (nla_len(tb[TCA_GACT_PROB]) < sizeof(struct tc_gact_p))
 			return -EINVAL;
 #else
 		return -EOPNOTSUPP;
@@ -97,8 +97,8 @@ static int tcf_gact_init(struct rtattr *rta, struct rtattr *est,
 	spin_lock_bh(&gact->tcf_lock);
 	gact->tcf_action = parm->action;
 #ifdef CONFIG_GACT_PROB
-	if (tb[TCA_GACT_PROB-1] != NULL) {
-		struct tc_gact_p *p_parm = RTA_DATA(tb[TCA_GACT_PROB-1]);
+	if (tb[TCA_GACT_PROB] != NULL) {
+		struct tc_gact_p *p_parm = nla_data(tb[TCA_GACT_PROB]);
 		gact->tcfg_paction = p_parm->paction;
 		gact->tcfg_pval    = p_parm->pval;
 		gact->tcfg_ptype   = p_parm->ptype;
@@ -154,23 +154,23 @@ static int tcf_gact_dump(struct sk_buff *skb, struct tc_action *a, int bind, int
 	opt.refcnt = gact->tcf_refcnt - ref;
 	opt.bindcnt = gact->tcf_bindcnt - bind;
 	opt.action = gact->tcf_action;
-	RTA_PUT(skb, TCA_GACT_PARMS, sizeof(opt), &opt);
+	NLA_PUT(skb, TCA_GACT_PARMS, sizeof(opt), &opt);
 #ifdef CONFIG_GACT_PROB
 	if (gact->tcfg_ptype) {
 		struct tc_gact_p p_opt;
 		p_opt.paction = gact->tcfg_paction;
 		p_opt.pval = gact->tcfg_pval;
 		p_opt.ptype = gact->tcfg_ptype;
-		RTA_PUT(skb, TCA_GACT_PROB, sizeof(p_opt), &p_opt);
+		NLA_PUT(skb, TCA_GACT_PROB, sizeof(p_opt), &p_opt);
 	}
 #endif
 	t.install = jiffies_to_clock_t(jiffies - gact->tcf_tm.install);
 	t.lastuse = jiffies_to_clock_t(jiffies - gact->tcf_tm.lastuse);
 	t.expires = jiffies_to_clock_t(gact->tcf_tm.expires);
-	RTA_PUT(skb, TCA_GACT_TM, sizeof(t), &t);
+	NLA_PUT(skb, TCA_GACT_TM, sizeof(t), &t);
 	return skb->len;
 
-rtattr_failure:
+nla_put_failure:
 	nlmsg_trim(skb, b);
 	return -1;
 }
diff --git a/net/sched/act_ipt.c b/net/sched/act_ipt.c
index fa006e06ce3..fee5282637c 100644
--- a/net/sched/act_ipt.c
+++ b/net/sched/act_ipt.c
@@ -92,10 +92,10 @@ static int tcf_ipt_release(struct tcf_ipt *ipt, int bind)
 	return ret;
 }
 
-static int tcf_ipt_init(struct rtattr *rta, struct rtattr *est,
+static int tcf_ipt_init(struct nlattr *nla, struct nlattr *est,
 			struct tc_action *a, int ovr, int bind)
 {
-	struct rtattr *tb[TCA_IPT_MAX];
+	struct nlattr *tb[TCA_IPT_MAX + 1];
 	struct tcf_ipt *ipt;
 	struct tcf_common *pc;
 	struct ipt_entry_target *td, *t;
@@ -104,22 +104,22 @@ static int tcf_ipt_init(struct rtattr *rta, struct rtattr *est,
 	u32 hook = 0;
 	u32 index = 0;
 
-	if (rta == NULL || rtattr_parse_nested(tb, TCA_IPT_MAX, rta) < 0)
+	if (nla == NULL || nla_parse_nested(tb, TCA_IPT_MAX, nla, NULL) < 0)
 		return -EINVAL;
 
-	if (tb[TCA_IPT_HOOK-1] == NULL ||
-	    RTA_PAYLOAD(tb[TCA_IPT_HOOK-1]) < sizeof(u32))
+	if (tb[TCA_IPT_HOOK] == NULL ||
+	    nla_len(tb[TCA_IPT_HOOK]) < sizeof(u32))
 		return -EINVAL;
-	if (tb[TCA_IPT_TARG-1] == NULL ||
-	    RTA_PAYLOAD(tb[TCA_IPT_TARG-1]) < sizeof(*t))
+	if (tb[TCA_IPT_TARG] == NULL ||
+	    nla_len(tb[TCA_IPT_TARG]) < sizeof(*t))
 		return -EINVAL;
-	td = (struct ipt_entry_target *)RTA_DATA(tb[TCA_IPT_TARG-1]);
-	if (RTA_PAYLOAD(tb[TCA_IPT_TARG-1]) < td->u.target_size)
+	td = (struct ipt_entry_target *)nla_data(tb[TCA_IPT_TARG]);
+	if (nla_len(tb[TCA_IPT_TARG]) < td->u.target_size)
 		return -EINVAL;
 
-	if (tb[TCA_IPT_INDEX-1] != NULL &&
-	    RTA_PAYLOAD(tb[TCA_IPT_INDEX-1]) >= sizeof(u32))
-		index = *(u32 *)RTA_DATA(tb[TCA_IPT_INDEX-1]);
+	if (tb[TCA_IPT_INDEX] != NULL &&
+	    nla_len(tb[TCA_IPT_INDEX]) >= sizeof(u32))
+		index = *(u32 *)nla_data(tb[TCA_IPT_INDEX]);
 
 	pc = tcf_hash_check(index, a, bind, &ipt_hash_info);
 	if (!pc) {
@@ -136,14 +136,14 @@ static int tcf_ipt_init(struct rtattr *rta, struct rtattr *est,
 	}
 	ipt = to_ipt(pc);
 
-	hook = *(u32 *)RTA_DATA(tb[TCA_IPT_HOOK-1]);
+	hook = *(u32 *)nla_data(tb[TCA_IPT_HOOK]);
 
 	err = -ENOMEM;
 	tname = kmalloc(IFNAMSIZ, GFP_KERNEL);
 	if (unlikely(!tname))
 		goto err1;
-	if (tb[TCA_IPT_TABLE - 1] == NULL ||
-	    rtattr_strlcpy(tname, tb[TCA_IPT_TABLE-1], IFNAMSIZ) >= IFNAMSIZ)
+	if (tb[TCA_IPT_TABLE] == NULL ||
+	    nla_strlcpy(tname, tb[TCA_IPT_TABLE], IFNAMSIZ) >= IFNAMSIZ)
 		strcpy(tname, "mangle");
 
 	t = kmemdup(td, td->u.target_size, GFP_KERNEL);
@@ -243,25 +243,25 @@ static int tcf_ipt_dump(struct sk_buff *skb, struct tc_action *a, int bind, int
 
 	t = kmemdup(ipt->tcfi_t, ipt->tcfi_t->u.user.target_size, GFP_ATOMIC);
 	if (unlikely(!t))
-		goto rtattr_failure;
+		goto nla_put_failure;
 
 	c.bindcnt = ipt->tcf_bindcnt - bind;
 	c.refcnt = ipt->tcf_refcnt - ref;
 	strcpy(t->u.user.name, ipt->tcfi_t->u.kernel.target->name);
 
-	RTA_PUT(skb, TCA_IPT_TARG, ipt->tcfi_t->u.user.target_size, t);
-	RTA_PUT(skb, TCA_IPT_INDEX, 4, &ipt->tcf_index);
-	RTA_PUT(skb, TCA_IPT_HOOK, 4, &ipt->tcfi_hook);
-	RTA_PUT(skb, TCA_IPT_CNT, sizeof(struct tc_cnt), &c);
-	RTA_PUT(skb, TCA_IPT_TABLE, IFNAMSIZ, ipt->tcfi_tname);
+	NLA_PUT(skb, TCA_IPT_TARG, ipt->tcfi_t->u.user.target_size, t);
+	NLA_PUT(skb, TCA_IPT_INDEX, 4, &ipt->tcf_index);
+	NLA_PUT(skb, TCA_IPT_HOOK, 4, &ipt->tcfi_hook);
+	NLA_PUT(skb, TCA_IPT_CNT, sizeof(struct tc_cnt), &c);
+	NLA_PUT(skb, TCA_IPT_TABLE, IFNAMSIZ, ipt->tcfi_tname);
 	tm.install = jiffies_to_clock_t(jiffies - ipt->tcf_tm.install);
 	tm.lastuse = jiffies_to_clock_t(jiffies - ipt->tcf_tm.lastuse);
 	tm.expires = jiffies_to_clock_t(ipt->tcf_tm.expires);
-	RTA_PUT(skb, TCA_IPT_TM, sizeof (tm), &tm);
+	NLA_PUT(skb, TCA_IPT_TM, sizeof (tm), &tm);
 	kfree(t);
 	return skb->len;
 
-rtattr_failure:
+nla_put_failure:
 	nlmsg_trim(skb, b);
 	kfree(t);
 	return -1;
diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c
index c3fde9180f9..db943a8c738 100644
--- a/net/sched/act_mirred.c
+++ b/net/sched/act_mirred.c
@@ -54,10 +54,10 @@ static inline int tcf_mirred_release(struct tcf_mirred *m, int bind)
 	return 0;
 }
 
-static int tcf_mirred_init(struct rtattr *rta, struct rtattr *est,
+static int tcf_mirred_init(struct nlattr *nla, struct nlattr *est,
 			   struct tc_action *a, int ovr, int bind)
 {
-	struct rtattr *tb[TCA_MIRRED_MAX];
+	struct nlattr *tb[TCA_MIRRED_MAX + 1];
 	struct tc_mirred *parm;
 	struct tcf_mirred *m;
 	struct tcf_common *pc;
@@ -65,13 +65,13 @@ static int tcf_mirred_init(struct rtattr *rta, struct rtattr *est,
 	int ret = 0;
 	int ok_push = 0;
 
-	if (rta == NULL || rtattr_parse_nested(tb, TCA_MIRRED_MAX, rta) < 0)
+	if (nla == NULL || nla_parse_nested(tb, TCA_MIRRED_MAX, nla, NULL) < 0)
 		return -EINVAL;
 
-	if (tb[TCA_MIRRED_PARMS-1] == NULL ||
-	    RTA_PAYLOAD(tb[TCA_MIRRED_PARMS-1]) < sizeof(*parm))
+	if (tb[TCA_MIRRED_PARMS] == NULL ||
+	    nla_len(tb[TCA_MIRRED_PARMS]) < sizeof(*parm))
 		return -EINVAL;
-	parm = RTA_DATA(tb[TCA_MIRRED_PARMS-1]);
+	parm = nla_data(tb[TCA_MIRRED_PARMS]);
 
 	if (parm->ifindex) {
 		dev = __dev_get_by_index(&init_net, parm->ifindex);
@@ -207,14 +207,14 @@ static int tcf_mirred_dump(struct sk_buff *skb, struct tc_action *a, int bind, i
 	opt.bindcnt = m->tcf_bindcnt - bind;
 	opt.eaction = m->tcfm_eaction;
 	opt.ifindex = m->tcfm_ifindex;
-	RTA_PUT(skb, TCA_MIRRED_PARMS, sizeof(opt), &opt);
+	NLA_PUT(skb, TCA_MIRRED_PARMS, sizeof(opt), &opt);
 	t.install = jiffies_to_clock_t(jiffies - m->tcf_tm.install);
 	t.lastuse = jiffies_to_clock_t(jiffies - m->tcf_tm.lastuse);
 	t.expires = jiffies_to_clock_t(m->tcf_tm.expires);
-	RTA_PUT(skb, TCA_MIRRED_TM, sizeof(t), &t);
+	NLA_PUT(skb, TCA_MIRRED_TM, sizeof(t), &t);
 	return skb->len;
 
-rtattr_failure:
+nla_put_failure:
 	nlmsg_trim(skb, b);
 	return -1;
 }
diff --git a/net/sched/act_nat.c b/net/sched/act_nat.c
index da5c1eae422..be007bb31b5 100644
--- a/net/sched/act_nat.c
+++ b/net/sched/act_nat.c
@@ -40,22 +40,22 @@ static struct tcf_hashinfo nat_hash_info = {
 	.lock	=	&nat_lock,
 };
 
-static int tcf_nat_init(struct rtattr *rta, struct rtattr *est,
+static int tcf_nat_init(struct nlattr *nla, struct nlattr *est,
 			struct tc_action *a, int ovr, int bind)
 {
-	struct rtattr *tb[TCA_NAT_MAX];
+	struct nlattr *tb[TCA_NAT_MAX + 1];
 	struct tc_nat *parm;
 	int ret = 0;
 	struct tcf_nat *p;
 	struct tcf_common *pc;
 
-	if (rta == NULL || rtattr_parse_nested(tb, TCA_NAT_MAX, rta) < 0)
+	if (nla == NULL || nla_parse_nested(tb, TCA_NAT_MAX, nla, NULL) < 0)
 		return -EINVAL;
 
-	if (tb[TCA_NAT_PARMS - 1] == NULL ||
-	    RTA_PAYLOAD(tb[TCA_NAT_PARMS - 1]) < sizeof(*parm))
+	if (tb[TCA_NAT_PARMS] == NULL ||
+	    nla_len(tb[TCA_NAT_PARMS]) < sizeof(*parm))
 		return -EINVAL;
-	parm = RTA_DATA(tb[TCA_NAT_PARMS - 1]);
+	parm = nla_data(tb[TCA_NAT_PARMS]);
 
 	pc = tcf_hash_check(parm->index, a, bind, &nat_hash_info);
 	if (!pc) {
@@ -275,17 +275,17 @@ static int tcf_nat_dump(struct sk_buff *skb, struct tc_action *a,
 	opt->refcnt = p->tcf_refcnt - ref;
 	opt->bindcnt = p->tcf_bindcnt - bind;
 
-	RTA_PUT(skb, TCA_NAT_PARMS, s, opt);
+	NLA_PUT(skb, TCA_NAT_PARMS, s, opt);
 	t.install = jiffies_to_clock_t(jiffies - p->tcf_tm.install);
 	t.lastuse = jiffies_to_clock_t(jiffies - p->tcf_tm.lastuse);
 	t.expires = jiffies_to_clock_t(p->tcf_tm.expires);
-	RTA_PUT(skb, TCA_NAT_TM, sizeof(t), &t);
+	NLA_PUT(skb, TCA_NAT_TM, sizeof(t), &t);
 
 	kfree(opt);
 
 	return skb->len;
 
-rtattr_failure:
+nla_put_failure:
 	nlmsg_trim(skb, b);
 	kfree(opt);
 	return -1;
diff --git a/net/sched/act_pedit.c b/net/sched/act_pedit.c
index b46fab5fb32..88d8a15a192 100644
--- a/net/sched/act_pedit.c
+++ b/net/sched/act_pedit.c
@@ -33,10 +33,10 @@ static struct tcf_hashinfo pedit_hash_info = {
 	.lock	=	&pedit_lock,
 };
 
-static int tcf_pedit_init(struct rtattr *rta, struct rtattr *est,
+static int tcf_pedit_init(struct nlattr *nla, struct nlattr *est,
 			  struct tc_action *a, int ovr, int bind)
 {
-	struct rtattr *tb[TCA_PEDIT_MAX];
+	struct nlattr *tb[TCA_PEDIT_MAX + 1];
 	struct tc_pedit *parm;
 	int ret = 0;
 	struct tcf_pedit *p;
@@ -44,15 +44,15 @@ static int tcf_pedit_init(struct rtattr *rta, struct rtattr *est,
 	struct tc_pedit_key *keys = NULL;
 	int ksize;
 
-	if (rta == NULL || rtattr_parse_nested(tb, TCA_PEDIT_MAX, rta) < 0)
+	if (nla == NULL || nla_parse_nested(tb, TCA_PEDIT_MAX, nla, NULL) < 0)
 		return -EINVAL;
 
-	if (tb[TCA_PEDIT_PARMS - 1] == NULL ||
-	    RTA_PAYLOAD(tb[TCA_PEDIT_PARMS-1]) < sizeof(*parm))
+	if (tb[TCA_PEDIT_PARMS] == NULL ||
+	    nla_len(tb[TCA_PEDIT_PARMS]) < sizeof(*parm))
 		return -EINVAL;
-	parm = RTA_DATA(tb[TCA_PEDIT_PARMS-1]);
+	parm = nla_data(tb[TCA_PEDIT_PARMS]);
 	ksize = parm->nkeys * sizeof(struct tc_pedit_key);
-	if (RTA_PAYLOAD(tb[TCA_PEDIT_PARMS-1]) < sizeof(*parm) + ksize)
+	if (nla_len(tb[TCA_PEDIT_PARMS]) < sizeof(*parm) + ksize)
 		return -EINVAL;
 
 	pc = tcf_hash_check(parm->index, a, bind, &pedit_hash_info);
@@ -206,15 +206,15 @@ static int tcf_pedit_dump(struct sk_buff *skb, struct tc_action *a,
 	opt->refcnt = p->tcf_refcnt - ref;
 	opt->bindcnt = p->tcf_bindcnt - bind;
 
-	RTA_PUT(skb, TCA_PEDIT_PARMS, s, opt);
+	NLA_PUT(skb, TCA_PEDIT_PARMS, s, opt);
 	t.install = jiffies_to_clock_t(jiffies - p->tcf_tm.install);
 	t.lastuse = jiffies_to_clock_t(jiffies - p->tcf_tm.lastuse);
 	t.expires = jiffies_to_clock_t(p->tcf_tm.expires);
-	RTA_PUT(skb, TCA_PEDIT_TM, sizeof(t), &t);
+	NLA_PUT(skb, TCA_PEDIT_TM, sizeof(t), &t);
 	kfree(opt);
 	return skb->len;
 
-rtattr_failure:
+nla_put_failure:
 	nlmsg_trim(skb, b);
 	kfree(opt);
 	return -1;
diff --git a/net/sched/act_police.c b/net/sched/act_police.c
index 07ffdf9c5e5..3af5759aac2 100644
--- a/net/sched/act_police.c
+++ b/net/sched/act_police.c
@@ -54,7 +54,7 @@ static int tcf_act_police_walker(struct sk_buff *skb, struct netlink_callback *c
 {
 	struct tcf_common *p;
 	int err = 0, index = -1, i = 0, s_i = 0, n_i = 0;
-	struct rtattr *r;
+	struct nlattr *r;
 
 	read_lock_bh(&police_lock);
 
@@ -69,8 +69,8 @@ static int tcf_act_police_walker(struct sk_buff *skb, struct netlink_callback *c
 				continue;
 			a->priv = p;
 			a->order = index;
-			r = (struct rtattr *)skb_tail_pointer(skb);
-			RTA_PUT(skb, a->order, 0, NULL);
+			r = (struct nlattr *)skb_tail_pointer(skb);
+			NLA_PUT(skb, a->order, 0, NULL);
 			if (type == RTM_DELACTION)
 				err = tcf_action_dump_1(skb, a, 0, 1);
 			else
@@ -80,7 +80,7 @@ static int tcf_act_police_walker(struct sk_buff *skb, struct netlink_callback *c
 				nlmsg_trim(skb, r);
 				goto done;
 			}
-			r->rta_len = skb_tail_pointer(skb) - (u8 *)r;
+			r->nla_len = skb_tail_pointer(skb) - (u8 *)r;
 			n_i++;
 		}
 	}
@@ -90,7 +90,7 @@ done:
 		cb->args[0] += n_i;
 	return n_i;
 
-rtattr_failure:
+nla_put_failure:
 	nlmsg_trim(skb, r);
 	goto done;
 }
@@ -118,32 +118,32 @@ static void tcf_police_destroy(struct tcf_police *p)
 	BUG_TRAP(0);
 }
 
-static int tcf_act_police_locate(struct rtattr *rta, struct rtattr *est,
+static int tcf_act_police_locate(struct nlattr *nla, struct nlattr *est,
 				 struct tc_action *a, int ovr, int bind)
 {
 	unsigned h;
 	int ret = 0, err;
-	struct rtattr *tb[TCA_POLICE_MAX];
+	struct nlattr *tb[TCA_POLICE_MAX + 1];
 	struct tc_police *parm;
 	struct tcf_police *police;
 	struct qdisc_rate_table *R_tab = NULL, *P_tab = NULL;
 	int size;
 
-	if (rta == NULL || rtattr_parse_nested(tb, TCA_POLICE_MAX, rta) < 0)
+	if (nla == NULL || nla_parse_nested(tb, TCA_POLICE_MAX, nla, NULL) < 0)
 		return -EINVAL;
 
-	if (tb[TCA_POLICE_TBF-1] == NULL)
+	if (tb[TCA_POLICE_TBF] == NULL)
 		return -EINVAL;
-	size = RTA_PAYLOAD(tb[TCA_POLICE_TBF-1]);
+	size = nla_len(tb[TCA_POLICE_TBF]);
 	if (size != sizeof(*parm) && size != sizeof(struct tc_police_compat))
 		return -EINVAL;
-	parm = RTA_DATA(tb[TCA_POLICE_TBF-1]);
+	parm = nla_data(tb[TCA_POLICE_TBF]);
 
-	if (tb[TCA_POLICE_RESULT-1] != NULL &&
-	    RTA_PAYLOAD(tb[TCA_POLICE_RESULT-1]) != sizeof(u32))
+	if (tb[TCA_POLICE_RESULT] != NULL &&
+	    nla_len(tb[TCA_POLICE_RESULT]) != sizeof(u32))
 		return -EINVAL;
-	if (tb[TCA_POLICE_RESULT-1] != NULL &&
-	    RTA_PAYLOAD(tb[TCA_POLICE_RESULT-1]) != sizeof(u32))
+	if (tb[TCA_POLICE_RESULT] != NULL &&
+	    nla_len(tb[TCA_POLICE_RESULT]) != sizeof(u32))
 		return -EINVAL;
 
 	if (parm->index) {
@@ -174,12 +174,12 @@ static int tcf_act_police_locate(struct rtattr *rta, struct rtattr *est,
 override:
 	if (parm->rate.rate) {
 		err = -ENOMEM;
-		R_tab = qdisc_get_rtab(&parm->rate, (struct nlattr *)tb[TCA_POLICE_RATE-1]);
+		R_tab = qdisc_get_rtab(&parm->rate, tb[TCA_POLICE_RATE]);
 		if (R_tab == NULL)
 			goto failure;
 		if (parm->peakrate.rate) {
 			P_tab = qdisc_get_rtab(&parm->peakrate,
-					       (struct nlattr *)tb[TCA_POLICE_PEAKRATE-1]);
+					       tb[TCA_POLICE_PEAKRATE]);
 			if (P_tab == NULL) {
 				qdisc_put_rtab(R_tab);
 				goto failure;
@@ -197,8 +197,8 @@ override:
 		police->tcfp_P_tab = P_tab;
 	}
 
-	if (tb[TCA_POLICE_RESULT-1])
-		police->tcfp_result = *(u32*)RTA_DATA(tb[TCA_POLICE_RESULT-1]);
+	if (tb[TCA_POLICE_RESULT])
+		police->tcfp_result = *(u32*)nla_data(tb[TCA_POLICE_RESULT]);
 	police->tcfp_toks = police->tcfp_burst = parm->burst;
 	police->tcfp_mtu = parm->mtu;
 	if (police->tcfp_mtu == 0) {
@@ -210,13 +210,13 @@ override:
 		police->tcfp_ptoks = L2T_P(police, police->tcfp_mtu);
 	police->tcf_action = parm->action;
 
-	if (tb[TCA_POLICE_AVRATE-1])
+	if (tb[TCA_POLICE_AVRATE])
 		police->tcfp_ewma_rate =
-			*(u32*)RTA_DATA(tb[TCA_POLICE_AVRATE-1]);
+			*(u32*)nla_data(tb[TCA_POLICE_AVRATE]);
 	if (est)
 		gen_replace_estimator(&police->tcf_bstats,
 				      &police->tcf_rate_est,
-				      &police->tcf_lock, (struct nlattr *)est);
+				      &police->tcf_lock, est);
 
 	spin_unlock_bh(&police->tcf_lock);
 	if (ret != ACT_P_CREATED)
@@ -332,15 +332,15 @@ tcf_act_police_dump(struct sk_buff *skb, struct tc_action *a, int bind, int ref)
 		opt.peakrate = police->tcfp_P_tab->rate;
 	else
 		memset(&opt.peakrate, 0, sizeof(opt.peakrate));
-	RTA_PUT(skb, TCA_POLICE_TBF, sizeof(opt), &opt);
+	NLA_PUT(skb, TCA_POLICE_TBF, sizeof(opt), &opt);
 	if (police->tcfp_result)
-		RTA_PUT(skb, TCA_POLICE_RESULT, sizeof(int),
+		NLA_PUT(skb, TCA_POLICE_RESULT, sizeof(int),
 			&police->tcfp_result);
 	if (police->tcfp_ewma_rate)
-		RTA_PUT(skb, TCA_POLICE_AVRATE, 4, &police->tcfp_ewma_rate);
+		NLA_PUT(skb, TCA_POLICE_AVRATE, 4, &police->tcfp_ewma_rate);
 	return skb->len;
 
-rtattr_failure:
+nla_put_failure:
 	nlmsg_trim(skb, b);
 	return -1;
 }
diff --git a/net/sched/act_simple.c b/net/sched/act_simple.c
index fb84ef33d14..d3226e24070 100644
--- a/net/sched/act_simple.c
+++ b/net/sched/act_simple.c
@@ -84,10 +84,10 @@ static int realloc_defdata(struct tcf_defact *d, u32 datalen, void *defdata)
 	return alloc_defdata(d, datalen, defdata);
 }
 
-static int tcf_simp_init(struct rtattr *rta, struct rtattr *est,
+static int tcf_simp_init(struct nlattr *nla, struct nlattr *est,
 			 struct tc_action *a, int ovr, int bind)
 {
-	struct rtattr *tb[TCA_DEF_MAX];
+	struct nlattr *tb[TCA_DEF_MAX + 1];
 	struct tc_defact *parm;
 	struct tcf_defact *d;
 	struct tcf_common *pc;
@@ -95,19 +95,19 @@ static int tcf_simp_init(struct rtattr *rta, struct rtattr *est,
 	u32 datalen = 0;
 	int ret = 0;
 
-	if (rta == NULL || rtattr_parse_nested(tb, TCA_DEF_MAX, rta) < 0)
+	if (nla == NULL || nla_parse_nested(tb, TCA_DEF_MAX, nla, NULL) < 0)
 		return -EINVAL;
 
-	if (tb[TCA_DEF_PARMS - 1] == NULL ||
-	    RTA_PAYLOAD(tb[TCA_DEF_PARMS - 1]) < sizeof(*parm))
+	if (tb[TCA_DEF_PARMS] == NULL ||
+	    nla_len(tb[TCA_DEF_PARMS]) < sizeof(*parm))
 		return -EINVAL;
 
-	parm = RTA_DATA(tb[TCA_DEF_PARMS - 1]);
-	defdata = RTA_DATA(tb[TCA_DEF_DATA - 1]);
+	parm = nla_data(tb[TCA_DEF_PARMS]);
+	defdata = nla_data(tb[TCA_DEF_DATA]);
 	if (defdata == NULL)
 		return -EINVAL;
 
-	datalen = RTA_PAYLOAD(tb[TCA_DEF_DATA - 1]);
+	datalen = nla_len(tb[TCA_DEF_DATA]);
 	if (datalen <= 0)
 		return -EINVAL;
 
@@ -164,15 +164,15 @@ static inline int tcf_simp_dump(struct sk_buff *skb, struct tc_action *a,
 	opt.refcnt = d->tcf_refcnt - ref;
 	opt.bindcnt = d->tcf_bindcnt - bind;
 	opt.action = d->tcf_action;
-	RTA_PUT(skb, TCA_DEF_PARMS, sizeof(opt), &opt);
-	RTA_PUT(skb, TCA_DEF_DATA, d->tcfd_datalen, d->tcfd_defdata);
+	NLA_PUT(skb, TCA_DEF_PARMS, sizeof(opt), &opt);
+	NLA_PUT(skb, TCA_DEF_DATA, d->tcfd_datalen, d->tcfd_defdata);
 	t.install = jiffies_to_clock_t(jiffies - d->tcf_tm.install);
 	t.lastuse = jiffies_to_clock_t(jiffies - d->tcf_tm.lastuse);
 	t.expires = jiffies_to_clock_t(d->tcf_tm.expires);
-	RTA_PUT(skb, TCA_DEF_TM, sizeof(t), &t);
+	NLA_PUT(skb, TCA_DEF_TM, sizeof(t), &t);
 	return skb->len;
 
-rtattr_failure:
+nla_put_failure:
 	nlmsg_trim(skb, b);
 	return -1;
 }
diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
index 87be2b2fc29..d870478e364 100644
--- a/net/sched/cls_api.c
+++ b/net/sched/cls_api.c
@@ -491,8 +491,7 @@ int tcf_exts_validate(struct tcf_proto *tp, struct nlattr **tb,
 		struct tc_action *act;
 
 		if (map->police && tb[map->police]) {
-			act = tcf_action_init_1((struct rtattr *)tb[map->police],
-						(struct rtattr *)rate_tlv,
+			act = tcf_action_init_1(tb[map->police], rate_tlv,
 						"police", TCA_ACT_NOREPLACE,
 						TCA_ACT_BIND, &err);
 			if (act == NULL)
@@ -501,8 +500,7 @@ int tcf_exts_validate(struct tcf_proto *tp, struct nlattr **tb,
 			act->type = TCA_OLD_COMPAT;
 			exts->action = act;
 		} else if (map->action && tb[map->action]) {
-			act = tcf_action_init((struct rtattr *)tb[map->action],
-					      (struct rtattr *)rate_tlv, NULL,
+			act = tcf_action_init(tb[map->action], rate_tlv, NULL,
 				TCA_ACT_NOREPLACE, TCA_ACT_BIND, &err);
 			if (act == NULL)
 				return err;
-- 
cgit v1.2.3


From b5921910a1de4ba82add59154976c3dc7352c8c2 Mon Sep 17 00:00:00 2001
From: "Denis V. Lunev" <den@openvz.org>
Date: Tue, 22 Jan 2008 23:50:25 -0800
Subject: [NETNS]: Routing cache virtualization.

Basically, this piece looks relatively easy. Namespace is already
available on the dst entry via device and the device is safe to
dereferrence. Compare it with one of a searcher and skip entry if
appropriate.

The only exception is ip_rt_frag_needed. So, add namespace parameter to it.

Signed-off-by: Denis V. Lunev <den@openvz.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/icmp.c  |  2 +-
 net/ipv4/route.c | 21 ++++++++++++++++-----
 2 files changed, 17 insertions(+), 6 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
index a142f19fec4..63ffc7d86f9 100644
--- a/net/ipv4/icmp.c
+++ b/net/ipv4/icmp.c
@@ -696,7 +696,7 @@ static void icmp_unreach(struct sk_buff *skb)
 							 "and DF set.\n",
 					       NIPQUAD(iph->daddr));
 			} else {
-				info = ip_rt_frag_needed(iph,
+				info = ip_rt_frag_needed(&init_net, iph,
 						     ntohs(icmph->un.frag.mtu));
 				if (!info)
 					goto out;
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 39a40342142..896c768e41a 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -648,6 +648,11 @@ static inline int compare_keys(struct flowi *fl1, struct flowi *fl2)
 		(fl1->iif ^ fl2->iif)) == 0;
 }
 
+static inline int compare_netns(struct rtable *rt1, struct rtable *rt2)
+{
+	return rt1->u.dst.dev->nd_net == rt2->u.dst.dev->nd_net;
+}
+
 /*
  * Perform a full scan of hash table and free all entries.
  * Can be called by a softirq or a process.
@@ -961,7 +966,7 @@ restart:
 
 	spin_lock_bh(rt_hash_lock_addr(hash));
 	while ((rth = *rthp) != NULL) {
-		if (compare_keys(&rth->fl, &rt->fl)) {
+		if (compare_keys(&rth->fl, &rt->fl) && compare_netns(rth, rt)) {
 			/* Put it first */
 			*rthp = rth->u.dst.rt_next;
 			/*
@@ -1415,7 +1420,8 @@ static __inline__ unsigned short guess_mtu(unsigned short old_mtu)
 	return 68;
 }
 
-unsigned short ip_rt_frag_needed(struct iphdr *iph, unsigned short new_mtu)
+unsigned short ip_rt_frag_needed(struct net *net, struct iphdr *iph,
+				 unsigned short new_mtu)
 {
 	int i;
 	unsigned short old_mtu = ntohs(iph->tot_len);
@@ -1438,7 +1444,8 @@ unsigned short ip_rt_frag_needed(struct iphdr *iph, unsigned short new_mtu)
 			    rth->rt_dst  == daddr &&
 			    rth->rt_src  == iph->saddr &&
 			    rth->fl.iif == 0 &&
-			    !(dst_metric_locked(&rth->u.dst, RTAX_MTU))) {
+			    !(dst_metric_locked(&rth->u.dst, RTAX_MTU)) &&
+			    rth->u.dst.dev->nd_net == net) {
 				unsigned short mtu = new_mtu;
 
 				if (new_mtu < 68 || new_mtu >= old_mtu) {
@@ -2049,7 +2056,9 @@ int ip_route_input(struct sk_buff *skb, __be32 daddr, __be32 saddr,
 	struct rtable * rth;
 	unsigned	hash;
 	int iif = dev->ifindex;
+	struct net *net;
 
+	net = skb->dev->nd_net;
 	tos &= IPTOS_RT_MASK;
 	hash = rt_hash(daddr, saddr, iif);
 
@@ -2061,7 +2070,8 @@ int ip_route_input(struct sk_buff *skb, __be32 daddr, __be32 saddr,
 		    rth->fl.iif == iif &&
 		    rth->fl.oif == 0 &&
 		    rth->fl.mark == skb->mark &&
-		    rth->fl.fl4_tos == tos) {
+		    rth->fl.fl4_tos == tos &&
+		    rth->u.dst.dev->nd_net == net) {
 			dst_use(&rth->u.dst, jiffies);
 			RT_CACHE_STAT_INC(in_hit);
 			rcu_read_unlock();
@@ -2460,7 +2470,8 @@ int __ip_route_output_key(struct net *net, struct rtable **rp,
 		    rth->fl.oif == flp->oif &&
 		    rth->fl.mark == flp->mark &&
 		    !((rth->fl.fl4_tos ^ flp->fl4_tos) &
-			    (IPTOS_RT_MASK | RTO_ONLINK))) {
+			    (IPTOS_RT_MASK | RTO_ONLINK)) &&
+		    rth->u.dst.dev->nd_net == net) {
 			dst_use(&rth->u.dst, jiffies);
 			RT_CACHE_STAT_INC(out_hit);
 			rcu_read_unlock_bh();
-- 
cgit v1.2.3


From dde1bc0e6f86183bc095d0774cd109f4edf66ea2 Mon Sep 17 00:00:00 2001
From: "Denis V. Lunev" <den@openvz.org>
Date: Tue, 22 Jan 2008 23:50:57 -0800
Subject: [NETNS]: Add namespace for ICMP replying code.

All needed API is done, the namespace is available when required from
the device on the DST entry from the incoming packet. So, just replace
init_net with proper namespace.

Other protocols will follow.

Signed-off-by: Denis V. Lunev <den@openvz.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/icmp.c      | 21 +++++++++++++--------
 net/ipv4/ip_output.c |  2 +-
 2 files changed, 14 insertions(+), 9 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
index 63ffc7d86f9..a7321a82df6 100644
--- a/net/ipv4/icmp.c
+++ b/net/ipv4/icmp.c
@@ -405,7 +405,7 @@ static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb)
 						.tos = RT_TOS(ip_hdr(skb)->tos) } },
 				    .proto = IPPROTO_ICMP };
 		security_skb_classify_flow(skb, &fl);
-		if (ip_route_output_key(&init_net, &rt, &fl))
+		if (ip_route_output_key(rt->u.dst.dev->nd_net, &rt, &fl))
 			goto out_unlock;
 	}
 	if (icmpv4_xrlim_allow(rt, icmp_param->data.icmph.type,
@@ -437,9 +437,11 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info)
 	struct ipcm_cookie ipc;
 	__be32 saddr;
 	u8  tos;
+	struct net *net;
 
 	if (!rt)
 		goto out;
+	net = rt->u.dst.dev->nd_net;
 
 	/*
 	 *	Find the original header. It is expected to be valid, of course.
@@ -515,7 +517,7 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info)
 		struct net_device *dev = NULL;
 
 		if (rt->fl.iif && sysctl_icmp_errors_use_inbound_ifaddr)
-			dev = dev_get_by_index(&init_net, rt->fl.iif);
+			dev = dev_get_by_index(net, rt->fl.iif);
 
 		if (dev) {
 			saddr = inet_select_addr(dev, 0, RT_SCOPE_LINK);
@@ -569,7 +571,7 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info)
 		struct rtable *rt2;
 
 		security_skb_classify_flow(skb_in, &fl);
-		if (__ip_route_output_key(&init_net, &rt, &fl))
+		if (__ip_route_output_key(net, &rt, &fl))
 			goto out_unlock;
 
 		/* No need to clone since we're just using its address. */
@@ -591,14 +593,14 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info)
 		if (xfrm_decode_session_reverse(skb_in, &fl, AF_INET))
 			goto out_unlock;
 
-		if (inet_addr_type(&init_net, fl.fl4_src) == RTN_LOCAL)
-			err = __ip_route_output_key(&init_net, &rt2, &fl);
+		if (inet_addr_type(net, fl.fl4_src) == RTN_LOCAL)
+			err = __ip_route_output_key(net, &rt2, &fl);
 		else {
 			struct flowi fl2 = {};
 			struct dst_entry *odst;
 
 			fl2.fl4_dst = fl.fl4_src;
-			if (ip_route_output_key(&init_net, &rt2, &fl2))
+			if (ip_route_output_key(net, &rt2, &fl2))
 				goto out_unlock;
 
 			/* Ugh! */
@@ -666,6 +668,9 @@ static void icmp_unreach(struct sk_buff *skb)
 	int hash, protocol;
 	struct net_protocol *ipprot;
 	u32 info = 0;
+	struct net *net;
+
+	net = skb->dst->dev->nd_net;
 
 	/*
 	 *	Incomplete header ?
@@ -696,7 +701,7 @@ static void icmp_unreach(struct sk_buff *skb)
 							 "and DF set.\n",
 					       NIPQUAD(iph->daddr));
 			} else {
-				info = ip_rt_frag_needed(&init_net, iph,
+				info = ip_rt_frag_needed(net, iph,
 						     ntohs(icmph->un.frag.mtu));
 				if (!info)
 					goto out;
@@ -734,7 +739,7 @@ static void icmp_unreach(struct sk_buff *skb)
 	 */
 
 	if (!sysctl_icmp_ignore_bogus_error_responses &&
-	    inet_addr_type(&init_net, iph->daddr) == RTN_BROADCAST) {
+	    inet_addr_type(net, iph->daddr) == RTN_BROADCAST) {
 		if (net_ratelimit())
 			printk(KERN_WARNING "%u.%u.%u.%u sent an invalid ICMP "
 					    "type %u, code %u "
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index 1725e061398..18070ca6577 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -1379,7 +1379,7 @@ void ip_send_reply(struct sock *sk, struct sk_buff *skb, struct ip_reply_arg *ar
 						 .dport = tcp_hdr(skb)->source } },
 				    .proto = sk->sk_protocol };
 		security_skb_classify_flow(skb, &fl);
-		if (ip_route_output_key(&init_net, &rt, &fl))
+		if (ip_route_output_key(sk->sk_net, &rt, &fl))
 			return;
 	}
 
-- 
cgit v1.2.3


From f5e5cb75539749960d7d35f8a7da29e2f4b96320 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Wed, 23 Jan 2008 20:32:06 -0800
Subject: [NET_SCHED]: sch_atm: fix format string warning

Fix format string warning introduces by the netlink API conversion:

net/sched/sch_atm.c:250: warning: format '%lu' expects type 'long unsigned int', but argument 3 has type 'int'.

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/sch_atm.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/sched/sch_atm.c b/net/sched/sch_atm.c
index eb01aae117d..e5873915378 100644
--- a/net/sched/sch_atm.c
+++ b/net/sched/sch_atm.c
@@ -246,7 +246,7 @@ static int atm_tc_change(struct Qdisc *sch, u32 classid, u32 parent,
 		if (!excess)
 			return -ENOENT;
 	}
-	pr_debug("atm_tc_change: type %d, payload %lu, hdr_len %d\n",
+	pr_debug("atm_tc_change: type %d, payload %d, hdr_len %d\n",
 		 opt->nla_type, nla_len(opt), hdr_len);
 	sock = sockfd_lookup(fd, &error);
 	if (!sock)
-- 
cgit v1.2.3


From b03f4672007e533c8dbf0965f995182586216bf1 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Wed, 23 Jan 2008 20:32:21 -0800
Subject: [NET_SCHED]: sch_netem: use nla_parse_nested_compat

Replace open coded equivalent of nla_parse_nested_compat().

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/sch_netem.c | 58 +++++++++++++++++++++++----------------------------
 1 file changed, 26 insertions(+), 32 deletions(-)

(limited to 'net')

diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c
index a7b58df4546..1a755799ffb 100644
--- a/net/sched/sch_netem.c
+++ b/net/sched/sch_netem.c
@@ -407,13 +407,18 @@ static int get_corrupt(struct Qdisc *sch, const struct nlattr *attr)
 static int netem_change(struct Qdisc *sch, struct nlattr *opt)
 {
 	struct netem_sched_data *q = qdisc_priv(sch);
+	struct nlattr *tb[TCA_NETEM_MAX + 1];
 	struct tc_netem_qopt *qopt;
 	int ret;
 
-	if (opt == NULL || nla_len(opt) < sizeof(*qopt))
+	if (opt == NULL)
 		return -EINVAL;
 
-	qopt = nla_data(opt);
+	ret = nla_parse_nested_compat(tb, TCA_NETEM_MAX, opt, NULL, qopt,
+				      sizeof(*qopt));
+	if (ret < 0)
+		return ret;
+
 	ret = set_fifo_limit(q->qdisc, qopt->limit);
 	if (ret) {
 		pr_debug("netem: can't set fifo limit\n");
@@ -434,39 +439,28 @@ static int netem_change(struct Qdisc *sch, struct nlattr *opt)
 	if (q->gap)
 		q->reorder = ~0;
 
-	/* Handle nested options after initial queue options.
-	 * Should have put all options in nested format but too late now.
-	 */
-	if (nla_len(opt) > sizeof(*qopt)) {
-		struct nlattr *tb[TCA_NETEM_MAX + 1];
-		if (nla_parse(tb, TCA_NETEM_MAX,
-			      nla_data(opt) + sizeof(*qopt),
-			      nla_len(opt) - sizeof(*qopt), NULL))
-			return -EINVAL;
-
-		if (tb[TCA_NETEM_CORR]) {
-			ret = get_correlation(sch, tb[TCA_NETEM_CORR]);
-			if (ret)
-				return ret;
-		}
+	if (tb[TCA_NETEM_CORR]) {
+		ret = get_correlation(sch, tb[TCA_NETEM_CORR]);
+		if (ret)
+			return ret;
+	}
 
-		if (tb[TCA_NETEM_DELAY_DIST]) {
-			ret = get_dist_table(sch, tb[TCA_NETEM_DELAY_DIST]);
-			if (ret)
-				return ret;
-		}
+	if (tb[TCA_NETEM_DELAY_DIST]) {
+		ret = get_dist_table(sch, tb[TCA_NETEM_DELAY_DIST]);
+		if (ret)
+			return ret;
+	}
 
-		if (tb[TCA_NETEM_REORDER]) {
-			ret = get_reorder(sch, tb[TCA_NETEM_REORDER]);
-			if (ret)
-				return ret;
-		}
+	if (tb[TCA_NETEM_REORDER]) {
+		ret = get_reorder(sch, tb[TCA_NETEM_REORDER]);
+		if (ret)
+			return ret;
+	}
 
-		if (tb[TCA_NETEM_CORRUPT]) {
-			ret = get_corrupt(sch, tb[TCA_NETEM_CORRUPT]);
-			if (ret)
-				return ret;
-		}
+	if (tb[TCA_NETEM_CORRUPT]) {
+		ret = get_corrupt(sch, tb[TCA_NETEM_CORRUPT]);
+		if (ret)
+			return ret;
 	}
 
 	return 0;
-- 
cgit v1.2.3


From 6d834e04e596d6803cf1074a07fd67e7b5662f1b Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Wed, 23 Jan 2008 20:32:42 -0800
Subject: [NET_SCHED]: act_api: fix netlink API conversion bug

Fix two invalid attribute accesses, indices start at 1 with the new
netlink API.

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/act_api.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/sched/act_api.c b/net/sched/act_api.c
index ebd21d2cb5f..ae077ed208a 100644
--- a/net/sched/act_api.c
+++ b/net/sched/act_api.c
@@ -997,11 +997,11 @@ find_dump_kind(struct nlmsghdr *n)
 	if (nla_parse(tb, TCA_ACT_MAX_PRIO, nla_data(tb1),
 		      NLMSG_ALIGN(nla_len(tb1)), NULL) < 0)
 		return NULL;
-	if (tb[0] == NULL)
-		return NULL;
 
-	if (nla_parse(tb2, TCA_ACT_MAX, nla_data(tb[0]),
-		      nla_len(tb[0]), NULL) < 0)
+	if (tb[1] == NULL)
+		return NULL;
+	if (nla_parse(tb2, TCA_ACT_MAX, nla_data(tb[1]),
+		      nla_len(tb[1]), NULL) < 0)
 		return NULL;
 	kind = tb2[TCA_ACT_KIND];
 
-- 
cgit v1.2.3


From c96c9471dd86ba24dc3826bf5688b99d3caf3ace Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Wed, 23 Jan 2008 20:32:58 -0800
Subject: [NET_SCHED]: act_api: use nlmsg_parse

Convert open-coded nlmsg_parse to use the real function.

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/act_api.c | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/sched/act_api.c b/net/sched/act_api.c
index ae077ed208a..2fe0345ddcb 100644
--- a/net/sched/act_api.c
+++ b/net/sched/act_api.c
@@ -984,11 +984,8 @@ find_dump_kind(struct nlmsghdr *n)
 	struct nlattr *tb[TCA_ACT_MAX_PRIO + 1];
 	struct nlattr *nla[TCAA_MAX + 1];
 	struct nlattr *kind;
-	int min_len = NLMSG_LENGTH(sizeof(struct tcamsg));
-	int attrlen = n->nlmsg_len - NLMSG_ALIGN(min_len);
-	struct nlattr *attr = (void *) n + NLMSG_ALIGN(min_len);
 
-	if (nla_parse(nla, TCAA_MAX, attr, attrlen, NULL) < 0)
+	if (nlmsg_parse(n, sizeof(struct tcamsg), nla, TCAA_MAX, NULL) < 0)
 		return NULL;
 	tb1 = nla[TCA_ACT_TAB];
 	if (tb1 == NULL)
-- 
cgit v1.2.3


From ab27cfb85c5778400740ad0c401bde65616774eb Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Wed, 23 Jan 2008 20:33:13 -0800
Subject: [NET_SCHED]: act_api: use PTR_ERR in tcf_action_init/tcf_action_get

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/act_api.c | 71 +++++++++++++++++++++++++++++------------------------
 net/sched/cls_api.c | 14 +++++------
 2 files changed, 46 insertions(+), 39 deletions(-)

(limited to 'net')

diff --git a/net/sched/act_api.c b/net/sched/act_api.c
index 2fe0345ddcb..ea80f82dbb6 100644
--- a/net/sched/act_api.c
+++ b/net/sched/act_api.c
@@ -18,6 +18,7 @@
 #include <linux/skbuff.h>
 #include <linux/init.h>
 #include <linux/kmod.h>
+#include <linux/err.h>
 #include <net/net_namespace.h>
 #include <net/sock.h>
 #include <net/sch_generic.h>
@@ -463,15 +464,16 @@ errout:
 }
 
 struct tc_action *tcf_action_init_1(struct nlattr *nla, struct nlattr *est,
-				    char *name, int ovr, int bind, int *err)
+				    char *name, int ovr, int bind)
 {
 	struct tc_action *a;
 	struct tc_action_ops *a_o;
 	char act_name[IFNAMSIZ];
 	struct nlattr *tb[TCA_ACT_MAX+1];
 	struct nlattr *kind;
+	int err;
 
-	*err = -EINVAL;
+	err = -EINVAL;
 
 	if (name == NULL) {
 		if (nla_parse_nested(tb, TCA_ACT_MAX, nla, NULL) < 0)
@@ -502,36 +504,35 @@ struct tc_action *tcf_action_init_1(struct nlattr *nla, struct nlattr *est,
 		 * indicate this using -EAGAIN.
 		 */
 		if (a_o != NULL) {
-			*err = -EAGAIN;
+			err = -EAGAIN;
 			goto err_mod;
 		}
 #endif
-		*err = -ENOENT;
+		err = -ENOENT;
 		goto err_out;
 	}
 
-	*err = -ENOMEM;
+	err = -ENOMEM;
 	a = kzalloc(sizeof(*a), GFP_KERNEL);
 	if (a == NULL)
 		goto err_mod;
 
 	/* backward compatibility for policer */
 	if (name == NULL)
-		*err = a_o->init(tb[TCA_ACT_OPTIONS], est, a, ovr, bind);
+		err = a_o->init(tb[TCA_ACT_OPTIONS], est, a, ovr, bind);
 	else
-		*err = a_o->init(nla, est, a, ovr, bind);
-	if (*err < 0)
+		err = a_o->init(nla, est, a, ovr, bind);
+	if (err < 0)
 		goto err_free;
 
 	/* module count goes up only when brand new policy is created
 	   if it exists and is only bound to in a_o->init() then
 	   ACT_P_CREATED is not returned (a zero is).
 	*/
-	if (*err != ACT_P_CREATED)
+	if (err != ACT_P_CREATED)
 		module_put(a_o->owner);
 	a->ops = a_o;
 
-	*err = 0;
 	return a;
 
 err_free:
@@ -539,24 +540,22 @@ err_free:
 err_mod:
 	module_put(a_o->owner);
 err_out:
-	return NULL;
+	return ERR_PTR(err);
 }
 
 struct tc_action *tcf_action_init(struct nlattr *nla, struct nlattr *est,
-				  char *name, int ovr, int bind, int *err)
+				  char *name, int ovr, int bind)
 {
 	struct nlattr *tb[TCA_ACT_MAX_PRIO+1];
 	struct tc_action *head = NULL, *act, *act_prev = NULL;
 	int i;
 
-	if (nla_parse_nested(tb, TCA_ACT_MAX_PRIO, nla, NULL) < 0) {
-		*err = -EINVAL;
-		return head;
-	}
+	if (nla_parse_nested(tb, TCA_ACT_MAX_PRIO, nla, NULL) < 0)
+		return ERR_PTR(-EINVAL);
 
 	for (i = 1; i <= TCA_ACT_MAX_PRIO && tb[i]; i++) {
-		act = tcf_action_init_1(tb[i], est, name, ovr, bind, err);
-		if (act == NULL)
+		act = tcf_action_init_1(tb[i], est, name, ovr, bind);
+		if (IS_ERR(act))
 			goto err;
 		act->order = i;
 
@@ -571,7 +570,7 @@ struct tc_action *tcf_action_init(struct nlattr *nla, struct nlattr *est,
 err:
 	if (head != NULL)
 		tcf_action_destroy(head, bind);
-	return NULL;
+	return act;
 }
 
 int tcf_action_copy_stats(struct sk_buff *skb, struct tc_action *a,
@@ -668,44 +667,46 @@ act_get_notify(u32 pid, struct nlmsghdr *n, struct tc_action *a, int event)
 }
 
 static struct tc_action *
-tcf_action_get_1(struct nlattr *nla, struct nlmsghdr *n, u32 pid, int *err)
+tcf_action_get_1(struct nlattr *nla, struct nlmsghdr *n, u32 pid)
 {
 	struct nlattr *tb[TCA_ACT_MAX+1];
 	struct tc_action *a;
 	int index;
+	int err;
 
-	*err = -EINVAL;
+	err = -EINVAL;
 	if (nla_parse_nested(tb, TCA_ACT_MAX, nla, NULL) < 0)
-		return NULL;
+		goto err_out;
 
 	if (tb[TCA_ACT_INDEX] == NULL ||
 	    nla_len(tb[TCA_ACT_INDEX]) < sizeof(index))
-		return NULL;
+		goto err_out;
 	index = *(int *)nla_data(tb[TCA_ACT_INDEX]);
 
-	*err = -ENOMEM;
+	err = -ENOMEM;
 	a = kzalloc(sizeof(struct tc_action), GFP_KERNEL);
 	if (a == NULL)
-		return NULL;
+		goto err_out;
 
-	*err = -EINVAL;
+	err = -EINVAL;
 	a->ops = tc_lookup_action(tb[TCA_ACT_KIND]);
 	if (a->ops == NULL)
 		goto err_free;
 	if (a->ops->lookup == NULL)
 		goto err_mod;
-	*err = -ENOENT;
+	err = -ENOENT;
 	if (a->ops->lookup(a, index) == 0)
 		goto err_mod;
 
 	module_put(a->ops->owner);
-	*err = 0;
 	return a;
+
 err_mod:
 	module_put(a->ops->owner);
 err_free:
 	kfree(a);
-	return NULL;
+err_out:
+	return ERR_PTR(err);
 }
 
 static void cleanup_a(struct tc_action *act)
@@ -816,9 +817,11 @@ tca_action_gd(struct nlattr *nla, struct nlmsghdr *n, u32 pid, int event)
 	}
 
 	for (i = 1; i <= TCA_ACT_MAX_PRIO && tb[i]; i++) {
-		act = tcf_action_get_1(tb[i], n, pid, &ret);
-		if (act == NULL)
+		act = tcf_action_get_1(tb[i], n, pid);
+		if (IS_ERR(act)) {
+			ret = PTR_ERR(act);
 			goto err;
+		}
 		act->order = i;
 
 		if (head == NULL)
@@ -912,9 +915,13 @@ tcf_action_add(struct nlattr *nla, struct nlmsghdr *n, u32 pid, int ovr)
 	struct tc_action *a;
 	u32 seq = n->nlmsg_seq;
 
-	act = tcf_action_init(nla, NULL, NULL, ovr, 0, &ret);
+	act = tcf_action_init(nla, NULL, NULL, ovr, 0);
 	if (act == NULL)
 		goto done;
+	if (IS_ERR(act)) {
+		ret = PTR_ERR(act);
+		goto done;
+	}
 
 	/* dump then free all the actions after update; inserted policy
 	 * stays intact
diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
index d870478e364..92fa1559c21 100644
--- a/net/sched/cls_api.c
+++ b/net/sched/cls_api.c
@@ -23,6 +23,7 @@
 #include <linux/init.h>
 #include <linux/kmod.h>
 #include <linux/netlink.h>
+#include <linux/err.h>
 #include <net/net_namespace.h>
 #include <net/sock.h>
 #include <net/netlink.h>
@@ -487,23 +488,22 @@ int tcf_exts_validate(struct tcf_proto *tp, struct nlattr **tb,
 
 #ifdef CONFIG_NET_CLS_ACT
 	{
-		int err;
 		struct tc_action *act;
 
 		if (map->police && tb[map->police]) {
 			act = tcf_action_init_1(tb[map->police], rate_tlv,
 						"police", TCA_ACT_NOREPLACE,
-						TCA_ACT_BIND, &err);
-			if (act == NULL)
-				return err;
+						TCA_ACT_BIND);
+			if (IS_ERR(act))
+				return PTR_ERR(act);
 
 			act->type = TCA_OLD_COMPAT;
 			exts->action = act;
 		} else if (map->action && tb[map->action]) {
 			act = tcf_action_init(tb[map->action], rate_tlv, NULL,
-				TCA_ACT_NOREPLACE, TCA_ACT_BIND, &err);
-			if (act == NULL)
-				return err;
+					      TCA_ACT_NOREPLACE, TCA_ACT_BIND);
+			if (IS_ERR(act))
+				return PTR_ERR(act);
 
 			exts->action = act;
 		}
-- 
cgit v1.2.3


From cee63723b358e594225e812d6e14a2a0abfd5c88 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Wed, 23 Jan 2008 20:33:32 -0800
Subject: [NET_SCHED]: Propagate nla_parse return value

nla_parse() returns more detailed errno codes, propagate them back on
error.

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/act_api.c     | 29 ++++++++++++++++++-----------
 net/sched/act_gact.c    |  7 ++++++-
 net/sched/act_ipt.c     |  6 +++++-
 net/sched/act_mirred.c  |  8 ++++++--
 net/sched/act_nat.c     |  8 ++++++--
 net/sched/act_pedit.c   |  8 ++++++--
 net/sched/act_police.c  |  6 +++++-
 net/sched/act_simple.c  |  8 ++++++--
 net/sched/cls_basic.c   |  7 ++++---
 net/sched/cls_fw.c      |  5 +++--
 net/sched/cls_route.c   |  5 +++--
 net/sched/cls_rsvp.h    |  5 +++--
 net/sched/cls_tcindex.c |  6 ++++--
 net/sched/cls_u32.c     |  5 +++--
 net/sched/em_meta.c     |  6 ++++--
 net/sched/ematch.c      |  6 ++++--
 net/sched/sch_atm.c     |  6 +++++-
 net/sched/sch_cbq.c     | 14 +++++++++++---
 net/sched/sch_dsmark.c  | 14 ++++++++++++--
 net/sched/sch_gred.c    | 16 +++++++++++++---
 net/sched/sch_hfsc.c    |  7 ++++++-
 net/sched/sch_htb.c     | 23 +++++++++++++++++++----
 net/sched/sch_prio.c    |  9 ++++++---
 net/sched/sch_red.c     |  7 ++++++-
 net/sched/sch_tbf.c     | 10 +++++++---
 25 files changed, 171 insertions(+), 60 deletions(-)

(limited to 'net')

diff --git a/net/sched/act_api.c b/net/sched/act_api.c
index ea80f82dbb6..87818d7fb62 100644
--- a/net/sched/act_api.c
+++ b/net/sched/act_api.c
@@ -473,17 +473,18 @@ struct tc_action *tcf_action_init_1(struct nlattr *nla, struct nlattr *est,
 	struct nlattr *kind;
 	int err;
 
-	err = -EINVAL;
-
 	if (name == NULL) {
-		if (nla_parse_nested(tb, TCA_ACT_MAX, nla, NULL) < 0)
+		err = nla_parse_nested(tb, TCA_ACT_MAX, nla, NULL);
+		if (err < 0)
 			goto err_out;
+		err = -EINVAL;
 		kind = tb[TCA_ACT_KIND];
 		if (kind == NULL)
 			goto err_out;
 		if (nla_strlcpy(act_name, kind, IFNAMSIZ) >= IFNAMSIZ)
 			goto err_out;
 	} else {
+		err = -EINVAL;
 		if (strlcpy(act_name, name, IFNAMSIZ) >= IFNAMSIZ)
 			goto err_out;
 	}
@@ -548,10 +549,12 @@ struct tc_action *tcf_action_init(struct nlattr *nla, struct nlattr *est,
 {
 	struct nlattr *tb[TCA_ACT_MAX_PRIO+1];
 	struct tc_action *head = NULL, *act, *act_prev = NULL;
+	int err;
 	int i;
 
-	if (nla_parse_nested(tb, TCA_ACT_MAX_PRIO, nla, NULL) < 0)
-		return ERR_PTR(-EINVAL);
+	err = nla_parse_nested(tb, TCA_ACT_MAX_PRIO, nla, NULL);
+	if (err < 0)
+		return ERR_PTR(err);
 
 	for (i = 1; i <= TCA_ACT_MAX_PRIO && tb[i]; i++) {
 		act = tcf_action_init_1(tb[i], est, name, ovr, bind);
@@ -674,10 +677,11 @@ tcf_action_get_1(struct nlattr *nla, struct nlmsghdr *n, u32 pid)
 	int index;
 	int err;
 
-	err = -EINVAL;
-	if (nla_parse_nested(tb, TCA_ACT_MAX, nla, NULL) < 0)
+	err = nla_parse_nested(tb, TCA_ACT_MAX, nla, NULL);
+	if (err < 0)
 		goto err_out;
 
+	err = -EINVAL;
 	if (tb[TCA_ACT_INDEX] == NULL ||
 	    nla_len(tb[TCA_ACT_INDEX]) < sizeof(index))
 		goto err_out;
@@ -759,9 +763,11 @@ static int tca_action_flush(struct nlattr *nla, struct nlmsghdr *n, u32 pid)
 
 	b = skb_tail_pointer(skb);
 
-	if (nla_parse_nested(tb, TCA_ACT_MAX, nla, NULL) < 0)
+	err = nla_parse_nested(tb, TCA_ACT_MAX, nla, NULL);
+	if (err < 0)
 		goto err_out;
 
+	err = -EINVAL;
 	kind = tb[TCA_ACT_KIND];
 	a->ops = tc_lookup_action(kind);
 	if (a->ops == NULL)
@@ -804,12 +810,13 @@ err_out:
 static int
 tca_action_gd(struct nlattr *nla, struct nlmsghdr *n, u32 pid, int event)
 {
-	int i, ret = 0;
+	int i, ret;
 	struct nlattr *tb[TCA_ACT_MAX_PRIO+1];
 	struct tc_action *head = NULL, *act, *act_prev = NULL;
 
-	if (nla_parse_nested(tb, TCA_ACT_MAX_PRIO, nla, NULL) < 0)
-		return -EINVAL;
+	ret = nla_parse_nested(tb, TCA_ACT_MAX_PRIO, nla, NULL);
+	if (ret < 0)
+		return ret;
 
 	if (event == RTM_DELACTION && n->nlmsg_flags&NLM_F_ROOT) {
 		if (tb[0] != NULL && tb[1] == NULL)
diff --git a/net/sched/act_gact.c b/net/sched/act_gact.c
index 5402cf885f9..df214d47fc9 100644
--- a/net/sched/act_gact.c
+++ b/net/sched/act_gact.c
@@ -61,10 +61,15 @@ static int tcf_gact_init(struct nlattr *nla, struct nlattr *est,
 	struct tcf_gact *gact;
 	struct tcf_common *pc;
 	int ret = 0;
+	int err;
 
-	if (nla == NULL || nla_parse_nested(tb, TCA_GACT_MAX, nla, NULL) < 0)
+	if (nla == NULL)
 		return -EINVAL;
 
+	err = nla_parse_nested(tb, TCA_GACT_MAX, nla, NULL);
+	if (err < 0)
+		return err;
+
 	if (tb[TCA_GACT_PARMS] == NULL ||
 	    nla_len(tb[TCA_GACT_PARMS]) < sizeof(*parm))
 		return -EINVAL;
diff --git a/net/sched/act_ipt.c b/net/sched/act_ipt.c
index fee5282637c..12693347d56 100644
--- a/net/sched/act_ipt.c
+++ b/net/sched/act_ipt.c
@@ -104,9 +104,13 @@ static int tcf_ipt_init(struct nlattr *nla, struct nlattr *est,
 	u32 hook = 0;
 	u32 index = 0;
 
-	if (nla == NULL || nla_parse_nested(tb, TCA_IPT_MAX, nla, NULL) < 0)
+	if (nla == NULL)
 		return -EINVAL;
 
+	err = nla_parse_nested(tb, TCA_IPT_MAX, nla, NULL);
+	if (err < 0)
+		return err;
+
 	if (tb[TCA_IPT_HOOK] == NULL ||
 	    nla_len(tb[TCA_IPT_HOOK]) < sizeof(u32))
 		return -EINVAL;
diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c
index db943a8c738..6cb5e30dcf8 100644
--- a/net/sched/act_mirred.c
+++ b/net/sched/act_mirred.c
@@ -62,12 +62,16 @@ static int tcf_mirred_init(struct nlattr *nla, struct nlattr *est,
 	struct tcf_mirred *m;
 	struct tcf_common *pc;
 	struct net_device *dev = NULL;
-	int ret = 0;
+	int ret = 0, err;
 	int ok_push = 0;
 
-	if (nla == NULL || nla_parse_nested(tb, TCA_MIRRED_MAX, nla, NULL) < 0)
+	if (nla == NULL)
 		return -EINVAL;
 
+	err = nla_parse_nested(tb, TCA_MIRRED_MAX, nla, NULL);
+	if (err < 0)
+		return err;
+
 	if (tb[TCA_MIRRED_PARMS] == NULL ||
 	    nla_len(tb[TCA_MIRRED_PARMS]) < sizeof(*parm))
 		return -EINVAL;
diff --git a/net/sched/act_nat.c b/net/sched/act_nat.c
index be007bb31b5..5a512d4dc37 100644
--- a/net/sched/act_nat.c
+++ b/net/sched/act_nat.c
@@ -45,13 +45,17 @@ static int tcf_nat_init(struct nlattr *nla, struct nlattr *est,
 {
 	struct nlattr *tb[TCA_NAT_MAX + 1];
 	struct tc_nat *parm;
-	int ret = 0;
+	int ret = 0, err;
 	struct tcf_nat *p;
 	struct tcf_common *pc;
 
-	if (nla == NULL || nla_parse_nested(tb, TCA_NAT_MAX, nla, NULL) < 0)
+	if (nla == NULL)
 		return -EINVAL;
 
+	err = nla_parse_nested(tb, TCA_NAT_MAX, nla, NULL);
+	if (err < 0)
+		return err;
+
 	if (tb[TCA_NAT_PARMS] == NULL ||
 	    nla_len(tb[TCA_NAT_PARMS]) < sizeof(*parm))
 		return -EINVAL;
diff --git a/net/sched/act_pedit.c b/net/sched/act_pedit.c
index 88d8a15a192..1b9ca45a78e 100644
--- a/net/sched/act_pedit.c
+++ b/net/sched/act_pedit.c
@@ -38,15 +38,19 @@ static int tcf_pedit_init(struct nlattr *nla, struct nlattr *est,
 {
 	struct nlattr *tb[TCA_PEDIT_MAX + 1];
 	struct tc_pedit *parm;
-	int ret = 0;
+	int ret = 0, err;
 	struct tcf_pedit *p;
 	struct tcf_common *pc;
 	struct tc_pedit_key *keys = NULL;
 	int ksize;
 
-	if (nla == NULL || nla_parse_nested(tb, TCA_PEDIT_MAX, nla, NULL) < 0)
+	if (nla == NULL)
 		return -EINVAL;
 
+	err = nla_parse_nested(tb, TCA_PEDIT_MAX, nla, NULL);
+	if (err < 0)
+		return err;
+
 	if (tb[TCA_PEDIT_PARMS] == NULL ||
 	    nla_len(tb[TCA_PEDIT_PARMS]) < sizeof(*parm))
 		return -EINVAL;
diff --git a/net/sched/act_police.c b/net/sched/act_police.c
index 3af5759aac2..c0fce9b9841 100644
--- a/net/sched/act_police.c
+++ b/net/sched/act_police.c
@@ -129,9 +129,13 @@ static int tcf_act_police_locate(struct nlattr *nla, struct nlattr *est,
 	struct qdisc_rate_table *R_tab = NULL, *P_tab = NULL;
 	int size;
 
-	if (nla == NULL || nla_parse_nested(tb, TCA_POLICE_MAX, nla, NULL) < 0)
+	if (nla == NULL)
 		return -EINVAL;
 
+	err = nla_parse_nested(tb, TCA_POLICE_MAX, nla, NULL);
+	if (err < 0)
+		return err;
+
 	if (tb[TCA_POLICE_TBF] == NULL)
 		return -EINVAL;
 	size = nla_len(tb[TCA_POLICE_TBF]);
diff --git a/net/sched/act_simple.c b/net/sched/act_simple.c
index d3226e24070..cedaadf18eb 100644
--- a/net/sched/act_simple.c
+++ b/net/sched/act_simple.c
@@ -93,11 +93,15 @@ static int tcf_simp_init(struct nlattr *nla, struct nlattr *est,
 	struct tcf_common *pc;
 	void *defdata;
 	u32 datalen = 0;
-	int ret = 0;
+	int ret = 0, err;
 
-	if (nla == NULL || nla_parse_nested(tb, TCA_DEF_MAX, nla, NULL) < 0)
+	if (nla == NULL)
 		return -EINVAL;
 
+	err = nla_parse_nested(tb, TCA_DEF_MAX, nla, NULL);
+	if (err < 0)
+		return err;
+
 	if (tb[TCA_DEF_PARMS] == NULL ||
 	    nla_len(tb[TCA_DEF_PARMS]) < sizeof(*parm))
 		return -EINVAL;
diff --git a/net/sched/cls_basic.c b/net/sched/cls_basic.c
index 3953da33956..524b7885dc3 100644
--- a/net/sched/cls_basic.c
+++ b/net/sched/cls_basic.c
@@ -166,7 +166,7 @@ errout:
 static int basic_change(struct tcf_proto *tp, unsigned long base, u32 handle,
 			struct nlattr **tca, unsigned long *arg)
 {
-	int err = -EINVAL;
+	int err;
 	struct basic_head *head = (struct basic_head *) tp->root;
 	struct nlattr *tb[TCA_BASIC_MAX + 1];
 	struct basic_filter *f = (struct basic_filter *) *arg;
@@ -174,8 +174,9 @@ static int basic_change(struct tcf_proto *tp, unsigned long base, u32 handle,
 	if (tca[TCA_OPTIONS] == NULL)
 		return -EINVAL;
 
-	if (nla_parse_nested(tb, TCA_BASIC_MAX, tca[TCA_OPTIONS], NULL) < 0)
-		return -EINVAL;
+	err = nla_parse_nested(tb, TCA_BASIC_MAX, tca[TCA_OPTIONS], NULL);
+	if (err < 0)
+		return err;
 
 	if (f != NULL) {
 		if (handle && f->handle != handle)
diff --git a/net/sched/cls_fw.c b/net/sched/cls_fw.c
index db6e90a3784..a1a9f4d26b8 100644
--- a/net/sched/cls_fw.c
+++ b/net/sched/cls_fw.c
@@ -246,8 +246,9 @@ static int fw_change(struct tcf_proto *tp, unsigned long base,
 	if (!opt)
 		return handle ? -EINVAL : 0;
 
-	if (nla_parse_nested(tb, TCA_FW_MAX, opt, NULL) < 0)
-		return -EINVAL;
+	err = nla_parse_nested(tb, TCA_FW_MAX, opt, NULL);
+	if (err < 0)
+		return err;
 
 	if (f != NULL) {
 		if (f->id != handle && handle)
diff --git a/net/sched/cls_route.c b/net/sched/cls_route.c
index b1aae84cbad..3aa8109aa3c 100644
--- a/net/sched/cls_route.c
+++ b/net/sched/cls_route.c
@@ -440,8 +440,9 @@ static int route4_change(struct tcf_proto *tp, unsigned long base,
 	if (opt == NULL)
 		return handle ? -EINVAL : 0;
 
-	if (nla_parse_nested(tb, TCA_ROUTE4_MAX, opt, NULL) < 0)
-		return -EINVAL;
+	err = nla_parse_nested(tb, TCA_ROUTE4_MAX, opt, NULL);
+	if (err < 0)
+		return err;
 
 	if ((f = (struct route4_filter*)*arg) != NULL) {
 		if (f->handle != handle && handle)
diff --git a/net/sched/cls_rsvp.h b/net/sched/cls_rsvp.h
index 2364c79d083..5747408a7d4 100644
--- a/net/sched/cls_rsvp.h
+++ b/net/sched/cls_rsvp.h
@@ -416,8 +416,9 @@ static int rsvp_change(struct tcf_proto *tp, unsigned long base,
 	if (opt == NULL)
 		return handle ? -EINVAL : 0;
 
-	if (nla_parse_nested(tb, TCA_RSVP_MAX, opt, NULL) < 0)
-		return -EINVAL;
+	err = nla_parse_nested(tb, TCA_RSVP_MAX, opt, NULL);
+	if (err < 0)
+		return err;
 
 	err = tcf_exts_validate(tp, tb, tca[TCA_RATE-1], &e, &rsvp_ext_map);
 	if (err < 0)
diff --git a/net/sched/cls_tcindex.c b/net/sched/cls_tcindex.c
index ed8023944fe..6b84d276e5a 100644
--- a/net/sched/cls_tcindex.c
+++ b/net/sched/cls_tcindex.c
@@ -350,6 +350,7 @@ tcindex_change(struct tcf_proto *tp, unsigned long base, u32 handle,
 	struct nlattr *tb[TCA_TCINDEX_MAX + 1];
 	struct tcindex_data *p = PRIV(tp);
 	struct tcindex_filter_result *r = (struct tcindex_filter_result *) *arg;
+	int err;
 
 	pr_debug("tcindex_change(tp %p,handle 0x%08x,tca %p,arg %p),opt %p,"
 	    "p %p,r %p,*arg 0x%lx\n",
@@ -358,8 +359,9 @@ tcindex_change(struct tcf_proto *tp, unsigned long base, u32 handle,
 	if (!opt)
 		return 0;
 
-	if (nla_parse_nested(tb, TCA_TCINDEX_MAX, opt, NULL) < 0)
-		return -EINVAL;
+	err = nla_parse_nested(tb, TCA_TCINDEX_MAX, opt, NULL);
+	if (err < 0)
+		return err;
 
 	return tcindex_set_parms(tp, base, handle, p, r, tb, tca[TCA_RATE]);
 }
diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c
index aaf5049f951..3228cc4ae08 100644
--- a/net/sched/cls_u32.c
+++ b/net/sched/cls_u32.c
@@ -531,8 +531,9 @@ static int u32_change(struct tcf_proto *tp, unsigned long base, u32 handle,
 	if (opt == NULL)
 		return handle ? -EINVAL : 0;
 
-	if (nla_parse_nested(tb, TCA_U32_MAX, opt, NULL) < 0)
-		return -EINVAL;
+	err = nla_parse_nested(tb, TCA_U32_MAX, opt, NULL);
+	if (err < 0)
+		return err;
 
 	if ((n = (struct tc_u_knode*)*arg) != NULL) {
 		if (TC_U32_KEY(n->handle) == 0)
diff --git a/net/sched/em_meta.c b/net/sched/em_meta.c
index 92b6863e928..dd5723670d3 100644
--- a/net/sched/em_meta.c
+++ b/net/sched/em_meta.c
@@ -749,14 +749,16 @@ static inline int meta_is_supported(struct meta_value *val)
 static int em_meta_change(struct tcf_proto *tp, void *data, int len,
 			  struct tcf_ematch *m)
 {
-	int err = -EINVAL;
+	int err;
 	struct nlattr *tb[TCA_EM_META_MAX + 1];
 	struct tcf_meta_hdr *hdr;
 	struct meta_match *meta = NULL;
 
-	if (nla_parse(tb, TCA_EM_META_MAX, data, len, NULL) < 0)
+	err = nla_parse(tb, TCA_EM_META_MAX, data, len, NULL);
+	if (err < 0)
 		goto errout;
 
+	err = -EINVAL;
 	if (tb[TCA_EM_META_HDR] == NULL ||
 	    nla_len(tb[TCA_EM_META_HDR]) < sizeof(*hdr))
 		goto errout;
diff --git a/net/sched/ematch.c b/net/sched/ematch.c
index 72d9b273524..d2b480f01a4 100644
--- a/net/sched/ematch.c
+++ b/net/sched/ematch.c
@@ -301,7 +301,7 @@ errout:
 int tcf_em_tree_validate(struct tcf_proto *tp, struct nlattr *nla,
 			 struct tcf_ematch_tree *tree)
 {
-	int idx, list_len, matches_len, err = -EINVAL;
+	int idx, list_len, matches_len, err;
 	struct nlattr *tb[TCA_EMATCH_TREE_MAX + 1];
 	struct nlattr *rt_match, *rt_hdr, *rt_list;
 	struct tcf_ematch_tree_hdr *tree_hdr;
@@ -312,9 +312,11 @@ int tcf_em_tree_validate(struct tcf_proto *tp, struct nlattr *nla,
 		return 0;
 	}
 
-	if (nla_parse_nested(tb, TCA_EMATCH_TREE_MAX, nla, NULL) < 0)
+	err = nla_parse_nested(tb, TCA_EMATCH_TREE_MAX, nla, NULL);
+	if (err < 0)
 		goto errout;
 
+	err = -EINVAL;
 	rt_hdr = tb[TCA_EMATCH_TREE_HDR];
 	rt_list = tb[TCA_EMATCH_TREE_LIST];
 
diff --git a/net/sched/sch_atm.c b/net/sched/sch_atm.c
index e5873915378..aaa32a22726 100644
--- a/net/sched/sch_atm.c
+++ b/net/sched/sch_atm.c
@@ -223,8 +223,12 @@ static int atm_tc_change(struct Qdisc *sch, u32 classid, u32 parent,
 	 */
 	if (flow)
 		return -EBUSY;
-	if (opt == NULL || nla_parse_nested(tb, TCA_ATM_MAX, opt, NULL))
+	if (opt == NULL)
 		return -EINVAL;
+	error = nla_parse_nested(tb, TCA_ATM_MAX, opt, NULL);
+	if (error < 0)
+		return error;
+
 	if (!tb[TCA_ATM_FD] || nla_len(tb[TCA_ATM_FD]) < sizeof(fd))
 		return -EINVAL;
 	fd = *(int *)nla_data(tb[TCA_ATM_FD]);
diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c
index 5c8667ef4ba..585f8a6ec7e 100644
--- a/net/sched/sch_cbq.c
+++ b/net/sched/sch_cbq.c
@@ -1382,9 +1382,13 @@ static int cbq_init(struct Qdisc *sch, struct nlattr *opt)
 	struct cbq_sched_data *q = qdisc_priv(sch);
 	struct nlattr *tb[TCA_CBQ_MAX + 1];
 	struct tc_ratespec *r;
+	int err;
+
+	err = nla_parse_nested(tb, TCA_CBQ_MAX, opt, NULL);
+	if (err < 0)
+		return err;
 
-	if (nla_parse_nested(tb, TCA_CBQ_MAX, opt, NULL) < 0 ||
-	    tb[TCA_CBQ_RTAB] == NULL || tb[TCA_CBQ_RATE] == NULL ||
+	if (tb[TCA_CBQ_RTAB] == NULL || tb[TCA_CBQ_RATE] == NULL ||
 	    nla_len(tb[TCA_CBQ_RATE]) < sizeof(struct tc_ratespec))
 		return -EINVAL;
 
@@ -1764,9 +1768,13 @@ cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct nlattr **t
 	struct cbq_class *parent;
 	struct qdisc_rate_table *rtab = NULL;
 
-	if (opt==NULL || nla_parse_nested(tb, TCA_CBQ_MAX, opt, NULL))
+	if (opt == NULL)
 		return -EINVAL;
 
+	err = nla_parse_nested(tb, TCA_CBQ_MAX, opt, NULL);
+	if (err < 0)
+		return err;
+
 	if (tb[TCA_CBQ_OVL_STRATEGY] &&
 	    nla_len(tb[TCA_CBQ_OVL_STRATEGY]) < sizeof(struct tc_cbq_ovl))
 		return -EINVAL;
diff --git a/net/sched/sch_dsmark.c b/net/sched/sch_dsmark.c
index f183ab76887..f1d0a08aca7 100644
--- a/net/sched/sch_dsmark.c
+++ b/net/sched/sch_dsmark.c
@@ -116,9 +116,14 @@ static int dsmark_change(struct Qdisc *sch, u32 classid, u32 parent,
 		goto errout;
 	}
 
-	if (!opt || nla_parse_nested(tb, TCA_DSMARK_MAX, opt, NULL))
+	if (!opt)
 		goto errout;
 
+	err = nla_parse_nested(tb, TCA_DSMARK_MAX, opt, NULL);
+	if (err < 0)
+		return err;
+
+	err = -EINVAL;
 	if (tb[TCA_DSMARK_MASK]) {
 		if (nla_len(tb[TCA_DSMARK_MASK]) < sizeof(u8))
 			goto errout;
@@ -351,9 +356,14 @@ static int dsmark_init(struct Qdisc *sch, struct nlattr *opt)
 
 	pr_debug("dsmark_init(sch %p,[qdisc %p],opt %p)\n", sch, p, opt);
 
-	if (!opt || nla_parse_nested(tb, TCA_DSMARK_MAX, opt, NULL) < 0)
+	if (!opt)
+		goto errout;
+
+	err = nla_parse_nested(tb, TCA_DSMARK_MAX, opt, NULL);
+	if (err < 0)
 		goto errout;
 
+	err = -EINVAL;
 	if (nla_len(tb[TCA_DSMARK_INDICES]) < sizeof(u16))
 		goto errout;
 	indices = nla_get_u16(tb[TCA_DSMARK_INDICES]);
diff --git a/net/sched/sch_gred.c b/net/sched/sch_gred.c
index 6b784838a53..365c7d8b17a 100644
--- a/net/sched/sch_gred.c
+++ b/net/sched/sch_gred.c
@@ -430,12 +430,16 @@ static int gred_change(struct Qdisc *sch, struct nlattr *opt)
 	struct gred_sched *table = qdisc_priv(sch);
 	struct tc_gred_qopt *ctl;
 	struct nlattr *tb[TCA_GRED_MAX + 1];
-	int err = -EINVAL, prio = GRED_DEF_PRIO;
+	int err, prio = GRED_DEF_PRIO;
 	u8 *stab;
 
-	if (opt == NULL || nla_parse_nested(tb, TCA_GRED_MAX, opt, NULL))
+	if (opt == NULL)
 		return -EINVAL;
 
+	err = nla_parse_nested(tb, TCA_GRED_MAX, opt, NULL);
+	if (err < 0)
+		return err;
+
 	if (tb[TCA_GRED_PARMS] == NULL && tb[TCA_GRED_STAB] == NULL)
 		return gred_change_table_def(sch, opt);
 
@@ -445,6 +449,7 @@ static int gred_change(struct Qdisc *sch, struct nlattr *opt)
 	    nla_len(tb[TCA_GRED_STAB]) < 256)
 		return -EINVAL;
 
+	err = -EINVAL;
 	ctl = nla_data(tb[TCA_GRED_PARMS]);
 	stab = nla_data(tb[TCA_GRED_STAB]);
 
@@ -489,10 +494,15 @@ errout:
 static int gred_init(struct Qdisc *sch, struct nlattr *opt)
 {
 	struct nlattr *tb[TCA_GRED_MAX + 1];
+	int err;
 
-	if (opt == NULL || nla_parse_nested(tb, TCA_GRED_MAX, opt, NULL))
+	if (opt == NULL)
 		return -EINVAL;
 
+	err = nla_parse_nested(tb, TCA_GRED_MAX, opt, NULL);
+	if (err < 0)
+		return err;
+
 	if (tb[TCA_GRED_PARMS] || tb[TCA_GRED_STAB])
 		return -EINVAL;
 
diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c
index 4e6a164d305..fcb4826158d 100644
--- a/net/sched/sch_hfsc.c
+++ b/net/sched/sch_hfsc.c
@@ -997,10 +997,15 @@ hfsc_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
 	struct nlattr *tb[TCA_HFSC_MAX + 1];
 	struct tc_service_curve *rsc = NULL, *fsc = NULL, *usc = NULL;
 	u64 cur_time;
+	int err;
 
-	if (opt == NULL || nla_parse_nested(tb, TCA_HFSC_MAX, opt, NULL))
+	if (opt == NULL)
 		return -EINVAL;
 
+	err = nla_parse_nested(tb, TCA_HFSC_MAX, opt, NULL);
+	if (err < 0)
+		return err;
+
 	if (tb[TCA_HFSC_RSC]) {
 		if (nla_len(tb[TCA_HFSC_RSC]) < sizeof(*rsc))
 			return -EINVAL;
diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c
index 3b3ff641b6d..512df9a0a24 100644
--- a/net/sched/sch_htb.c
+++ b/net/sched/sch_htb.c
@@ -997,9 +997,17 @@ static int htb_init(struct Qdisc *sch, struct nlattr *opt)
 	struct htb_sched *q = qdisc_priv(sch);
 	struct nlattr *tb[TCA_HTB_INIT + 1];
 	struct tc_htb_glob *gopt;
+	int err;
 	int i;
-	if (!opt || nla_parse_nested(tb, TCA_HTB_INIT, opt, NULL) ||
-	    tb[TCA_HTB_INIT] == NULL ||
+
+	if (!opt)
+		return -EINVAL;
+
+	err = nla_parse_nested(tb, TCA_HTB_INIT, opt, NULL);
+	if (err < 0)
+		return err;
+
+	if (tb[TCA_HTB_INIT] == NULL ||
 	    nla_len(tb[TCA_HTB_INIT]) < sizeof(*gopt)) {
 		printk(KERN_ERR "HTB: hey probably you have bad tc tool ?\n");
 		return -EINVAL;
@@ -1302,8 +1310,15 @@ static int htb_change_class(struct Qdisc *sch, u32 classid,
 	struct tc_htb_opt *hopt;
 
 	/* extract all subattrs from opt attr */
-	if (!opt || nla_parse_nested(tb, TCA_HTB_RTAB, opt, NULL) ||
-	    tb[TCA_HTB_PARMS] == NULL ||
+	if (!opt)
+		goto failure;
+
+	err = nla_parse_nested(tb, TCA_HTB_RTAB, opt, NULL);
+	if (err < 0)
+		goto failure;
+
+	err = -EINVAL;
+	if (tb[TCA_HTB_PARMS] == NULL ||
 	    nla_len(tb[TCA_HTB_PARMS]) < sizeof(*hopt))
 		goto failure;
 
diff --git a/net/sched/sch_prio.c b/net/sched/sch_prio.c
index a4f932df86e..4aa2b45dad0 100644
--- a/net/sched/sch_prio.c
+++ b/net/sched/sch_prio.c
@@ -229,11 +229,14 @@ static int prio_tune(struct Qdisc *sch, struct nlattr *opt)
 	struct prio_sched_data *q = qdisc_priv(sch);
 	struct tc_prio_qopt *qopt;
 	struct nlattr *tb[TCA_PRIO_MAX + 1];
+	int err;
 	int i;
 
-	if (nla_parse_nested_compat(tb, TCA_PRIO_MAX, opt, NULL, qopt,
-				    sizeof(*qopt)))
-		return -EINVAL;
+	err = nla_parse_nested_compat(tb, TCA_PRIO_MAX, opt, NULL, qopt,
+				      sizeof(*qopt));
+	if (err < 0)
+		return err;
+
 	q->bands = qopt->bands;
 	/* If we're multiqueue, make sure the number of incoming bands
 	 * matches the number of queues on the device we're associating with.
diff --git a/net/sched/sch_red.c b/net/sched/sch_red.c
index 6ce8da5aca0..dcf6afc196f 100644
--- a/net/sched/sch_red.c
+++ b/net/sched/sch_red.c
@@ -207,10 +207,15 @@ static int red_change(struct Qdisc *sch, struct nlattr *opt)
 	struct nlattr *tb[TCA_RED_MAX + 1];
 	struct tc_red_qopt *ctl;
 	struct Qdisc *child = NULL;
+	int err;
 
-	if (opt == NULL || nla_parse_nested(tb, TCA_RED_MAX, opt, NULL))
+	if (opt == NULL)
 		return -EINVAL;
 
+	err = nla_parse_nested(tb, TCA_RED_MAX, opt, NULL);
+	if (err < 0)
+		return err;
+
 	if (tb[TCA_RED_PARMS] == NULL ||
 	    nla_len(tb[TCA_RED_PARMS]) < sizeof(*ctl) ||
 	    tb[TCA_RED_STAB] == NULL ||
diff --git a/net/sched/sch_tbf.c b/net/sched/sch_tbf.c
index 6c4ad7e677b..f9b1543e3d7 100644
--- a/net/sched/sch_tbf.c
+++ b/net/sched/sch_tbf.c
@@ -272,7 +272,7 @@ static struct Qdisc *tbf_create_dflt_qdisc(struct Qdisc *sch, u32 limit)
 
 static int tbf_change(struct Qdisc* sch, struct nlattr *opt)
 {
-	int err = -EINVAL;
+	int err;
 	struct tbf_sched_data *q = qdisc_priv(sch);
 	struct nlattr *tb[TCA_TBF_PTAB + 1];
 	struct tc_tbf_qopt *qopt;
@@ -281,8 +281,12 @@ static int tbf_change(struct Qdisc* sch, struct nlattr *opt)
 	struct Qdisc *child = NULL;
 	int max_size,n;
 
-	if (nla_parse_nested(tb, TCA_TBF_PTAB, opt, NULL) ||
-	    tb[TCA_TBF_PARMS] == NULL ||
+	err = nla_parse_nested(tb, TCA_TBF_PTAB, opt, NULL);
+	if (err < 0)
+		return err;
+
+	err = -EINVAL;
+	if (tb[TCA_TBF_PARMS] == NULL ||
 	    nla_len(tb[TCA_TBF_PARMS]) < sizeof(*qopt))
 		goto done;
 
-- 
cgit v1.2.3


From 4b3550ef530cfc153fa91f0b37cbda448bad11c6 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Wed, 23 Jan 2008 20:34:11 -0800
Subject: [NET_SCHED]: Use nla_nest_start/nla_nest_end

Use nla_nest_start/nla_nest_end for dumping nested attributes.

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/act_api.c     | 84 +++++++++++++++++++++++++++----------------------
 net/sched/act_police.c  | 13 ++++----
 net/sched/cls_api.c     | 14 ++++++---
 net/sched/cls_basic.c   | 12 +++----
 net/sched/cls_fw.c      |  9 +++---
 net/sched/cls_route.c   |  9 +++---
 net/sched/cls_rsvp.h    | 10 +++---
 net/sched/cls_tcindex.c | 13 +++++---
 net/sched/cls_u32.c     | 13 ++++----
 net/sched/ematch.c      | 16 ++++++----
 net/sched/sch_atm.c     | 15 +++++----
 net/sched/sch_cbq.c     | 24 +++++++-------
 net/sched/sch_hfsc.c    | 11 ++++---
 net/sched/sch_htb.c     | 32 +++++++++++--------
 net/sched/sch_ingress.c | 12 +++----
 net/sched/sch_tbf.c     | 12 +++----
 16 files changed, 166 insertions(+), 133 deletions(-)

(limited to 'net')

diff --git a/net/sched/act_api.c b/net/sched/act_api.c
index 87818d7fb62..36022605fc1 100644
--- a/net/sched/act_api.c
+++ b/net/sched/act_api.c
@@ -69,7 +69,7 @@ static int tcf_dump_walker(struct sk_buff *skb, struct netlink_callback *cb,
 {
 	struct tcf_common *p;
 	int err = 0, index = -1,i = 0, s_i = 0, n_i = 0;
-	struct nlattr *r ;
+	struct nlattr *nest;
 
 	read_lock_bh(hinfo->lock);
 
@@ -84,15 +84,17 @@ static int tcf_dump_walker(struct sk_buff *skb, struct netlink_callback *cb,
 				continue;
 			a->priv = p;
 			a->order = n_i;
-			r = (struct nlattr *)skb_tail_pointer(skb);
-			NLA_PUT(skb, a->order, 0, NULL);
+
+			nest = nla_nest_start(skb, a->order);
+			if (nest == NULL)
+				goto nla_put_failure;
 			err = tcf_action_dump_1(skb, a, 0, 0);
 			if (err < 0) {
 				index--;
-				nlmsg_trim(skb, r);
+				nlmsg_trim(skb, nest);
 				goto done;
 			}
-			r->nla_len = skb_tail_pointer(skb) - (u8 *)r;
+			nla_nest_end(skb, nest);
 			n_i++;
 			if (n_i >= TCA_ACT_MAX_PRIO)
 				goto done;
@@ -105,7 +107,7 @@ done:
 	return n_i;
 
 nla_put_failure:
-	nlmsg_trim(skb, r);
+	nla_nest_cancel(skb, nest);
 	goto done;
 }
 
@@ -113,11 +115,12 @@ static int tcf_del_walker(struct sk_buff *skb, struct tc_action *a,
 			  struct tcf_hashinfo *hinfo)
 {
 	struct tcf_common *p, *s_p;
-	struct nlattr *r ;
+	struct nlattr *nest;
 	int i= 0, n_i = 0;
 
-	r = (struct nlattr *)skb_tail_pointer(skb);
-	NLA_PUT(skb, a->order, 0, NULL);
+	nest = nla_nest_start(skb, a->order);
+	if (nest == NULL)
+		goto nla_put_failure;
 	NLA_PUT(skb, TCA_KIND, IFNAMSIZ, a->ops->kind);
 	for (i = 0; i < (hinfo->hmask + 1); i++) {
 		p = hinfo->htab[tcf_hash(i, hinfo->hmask)];
@@ -131,11 +134,11 @@ static int tcf_del_walker(struct sk_buff *skb, struct tc_action *a,
 		}
 	}
 	NLA_PUT(skb, TCA_FCNT, 4, &n_i);
-	r->nla_len = skb_tail_pointer(skb) - (u8 *)r;
+	nla_nest_end(skb, nest);
 
 	return n_i;
 nla_put_failure:
-	nlmsg_trim(skb, r);
+	nla_nest_cancel(skb, nest);
 	return -EINVAL;
 }
 
@@ -415,7 +418,7 @@ tcf_action_dump_1(struct sk_buff *skb, struct tc_action *a, int bind, int ref)
 {
 	int err = -EINVAL;
 	unsigned char *b = skb_tail_pointer(skb);
-	struct nlattr *r;
+	struct nlattr *nest;
 
 	if (a->ops == NULL || a->ops->dump == NULL)
 		return err;
@@ -423,10 +426,11 @@ tcf_action_dump_1(struct sk_buff *skb, struct tc_action *a, int bind, int ref)
 	NLA_PUT(skb, TCA_KIND, IFNAMSIZ, a->ops->kind);
 	if (tcf_action_copy_stats(skb, a, 0))
 		goto nla_put_failure;
-	r = (struct nlattr *)skb_tail_pointer(skb);
-	NLA_PUT(skb, TCA_OPTIONS, 0, NULL);
+	nest = nla_nest_start(skb, TCA_OPTIONS);
+	if (nest == NULL)
+		goto nla_put_failure;
 	if ((err = tcf_action_dump_old(skb, a, bind, ref)) > 0) {
-		r->nla_len = skb_tail_pointer(skb) - (u8 *)r;
+		nla_nest_end(skb, nest);
 		return err;
 	}
 
@@ -441,17 +445,17 @@ tcf_action_dump(struct sk_buff *skb, struct tc_action *act, int bind, int ref)
 {
 	struct tc_action *a;
 	int err = -EINVAL;
-	unsigned char *b = skb_tail_pointer(skb);
-	struct nlattr *r ;
+	struct nlattr *nest;
 
 	while ((a = act) != NULL) {
-		r = (struct nlattr *)skb_tail_pointer(skb);
 		act = a->next;
-		NLA_PUT(skb, a->order, 0, NULL);
+		nest = nla_nest_start(skb, a->order);
+		if (nest == NULL)
+			goto nla_put_failure;
 		err = tcf_action_dump_1(skb, a, bind, ref);
 		if (err < 0)
 			goto errout;
-		r->nla_len = skb_tail_pointer(skb) - (u8 *)r;
+		nla_nest_end(skb, nest);
 	}
 
 	return 0;
@@ -459,7 +463,7 @@ tcf_action_dump(struct sk_buff *skb, struct tc_action *act, int bind, int ref)
 nla_put_failure:
 	err = -EINVAL;
 errout:
-	nlmsg_trim(skb, b);
+	nla_nest_cancel(skb, nest);
 	return err;
 }
 
@@ -627,7 +631,7 @@ tca_get_fill(struct sk_buff *skb, struct tc_action *a, u32 pid, u32 seq,
 	struct tcamsg *t;
 	struct nlmsghdr *nlh;
 	unsigned char *b = skb_tail_pointer(skb);
-	struct nlattr *x;
+	struct nlattr *nest;
 
 	nlh = NLMSG_NEW(skb, pid, seq, event, sizeof(*t), flags);
 
@@ -636,13 +640,14 @@ tca_get_fill(struct sk_buff *skb, struct tc_action *a, u32 pid, u32 seq,
 	t->tca__pad1 = 0;
 	t->tca__pad2 = 0;
 
-	x = (struct nlattr *)skb_tail_pointer(skb);
-	NLA_PUT(skb, TCA_ACT_TAB, 0, NULL);
+	nest = nla_nest_start(skb, TCA_ACT_TAB);
+	if (nest == NULL)
+		goto nla_put_failure;
 
 	if (tcf_action_dump(skb, a, bind, ref) < 0)
 		goto nla_put_failure;
 
-	x->nla_len = skb_tail_pointer(skb) - (u8 *)x;
+	nla_nest_end(skb, nest);
 
 	nlh->nlmsg_len = skb_tail_pointer(skb) - b;
 	return skb->len;
@@ -743,7 +748,7 @@ static int tca_action_flush(struct nlattr *nla, struct nlmsghdr *n, u32 pid)
 	struct nlmsghdr *nlh;
 	struct tcamsg *t;
 	struct netlink_callback dcb;
-	struct nlattr *x;
+	struct nlattr *nest;
 	struct nlattr *tb[TCA_ACT_MAX+1];
 	struct nlattr *kind;
 	struct tc_action *a = create_a(0);
@@ -779,14 +784,15 @@ static int tca_action_flush(struct nlattr *nla, struct nlmsghdr *n, u32 pid)
 	t->tca__pad1 = 0;
 	t->tca__pad2 = 0;
 
-	x = (struct nlattr *)skb_tail_pointer(skb);
-	NLA_PUT(skb, TCA_ACT_TAB, 0, NULL);
+	nest = nla_nest_start(skb, TCA_ACT_TAB);
+	if (nest == NULL)
+		goto nla_put_failure;
 
 	err = a->ops->walk(skb, &dcb, RTM_DELACTION, a);
 	if (err < 0)
 		goto nla_put_failure;
 
-	x->nla_len = skb_tail_pointer(skb) - (u8 *)x;
+	nla_nest_end(skb, nest);
 
 	nlh->nlmsg_len = skb_tail_pointer(skb) - b;
 	nlh->nlmsg_flags |= NLM_F_ROOT;
@@ -875,7 +881,7 @@ static int tcf_add_notify(struct tc_action *a, u32 pid, u32 seq, int event,
 	struct tcamsg *t;
 	struct nlmsghdr *nlh;
 	struct sk_buff *skb;
-	struct nlattr *x;
+	struct nlattr *nest;
 	unsigned char *b;
 	int err = 0;
 
@@ -891,13 +897,14 @@ static int tcf_add_notify(struct tc_action *a, u32 pid, u32 seq, int event,
 	t->tca__pad1 = 0;
 	t->tca__pad2 = 0;
 
-	x = (struct nlattr *)skb_tail_pointer(skb);
-	NLA_PUT(skb, TCA_ACT_TAB, 0, NULL);
+	nest = nla_nest_start(skb, TCA_ACT_TAB);
+	if (nest == NULL)
+		goto nla_put_failure;
 
 	if (tcf_action_dump(skb, a, 0, 0) < 0)
 		goto nla_put_failure;
 
-	x->nla_len = skb_tail_pointer(skb) - (u8 *)x;
+	nla_nest_end(skb, nest);
 
 	nlh->nlmsg_len = skb_tail_pointer(skb) - b;
 	NETLINK_CB(skb).dst_group = RTNLGRP_TC;
@@ -1025,7 +1032,7 @@ tc_dump_action(struct sk_buff *skb, struct netlink_callback *cb)
 	struct net *net = skb->sk->sk_net;
 	struct nlmsghdr *nlh;
 	unsigned char *b = skb_tail_pointer(skb);
-	struct nlattr *x;
+	struct nlattr *nest;
 	struct tc_action_ops *a_o;
 	struct tc_action a;
 	int ret = 0;
@@ -1060,18 +1067,19 @@ tc_dump_action(struct sk_buff *skb, struct netlink_callback *cb)
 	t->tca__pad1 = 0;
 	t->tca__pad2 = 0;
 
-	x = (struct nlattr *)skb_tail_pointer(skb);
-	NLA_PUT(skb, TCA_ACT_TAB, 0, NULL);
+	nest = nla_nest_start(skb, TCA_ACT_TAB);
+	if (nest == NULL)
+		goto nla_put_failure;
 
 	ret = a_o->walk(skb, cb, RTM_GETACTION, &a);
 	if (ret < 0)
 		goto nla_put_failure;
 
 	if (ret > 0) {
-		x->nla_len = skb_tail_pointer(skb) - (u8 *)x;
+		nla_nest_end(skb, nest);
 		ret = skb->len;
 	} else
-		nlmsg_trim(skb, x);
+		nla_nest_cancel(skb, nest);
 
 	nlh->nlmsg_len = skb_tail_pointer(skb) - b;
 	if (NETLINK_CB(cb->skb).pid && ret)
diff --git a/net/sched/act_police.c b/net/sched/act_police.c
index c0fce9b9841..ee2f1b64dd7 100644
--- a/net/sched/act_police.c
+++ b/net/sched/act_police.c
@@ -54,7 +54,7 @@ static int tcf_act_police_walker(struct sk_buff *skb, struct netlink_callback *c
 {
 	struct tcf_common *p;
 	int err = 0, index = -1, i = 0, s_i = 0, n_i = 0;
-	struct nlattr *r;
+	struct nlattr *nest;
 
 	read_lock_bh(&police_lock);
 
@@ -69,18 +69,19 @@ static int tcf_act_police_walker(struct sk_buff *skb, struct netlink_callback *c
 				continue;
 			a->priv = p;
 			a->order = index;
-			r = (struct nlattr *)skb_tail_pointer(skb);
-			NLA_PUT(skb, a->order, 0, NULL);
+			nest = nla_nest_start(skb, a->order);
+			if (nest == NULL)
+				goto nla_put_failure;
 			if (type == RTM_DELACTION)
 				err = tcf_action_dump_1(skb, a, 0, 1);
 			else
 				err = tcf_action_dump_1(skb, a, 0, 0);
 			if (err < 0) {
 				index--;
-				nlmsg_trim(skb, r);
+				nla_nest_cancel(skb, nest);
 				goto done;
 			}
-			r->nla_len = skb_tail_pointer(skb) - (u8 *)r;
+			nla_nest_end(skb, nest);
 			n_i++;
 		}
 	}
@@ -91,7 +92,7 @@ done:
 	return n_i;
 
 nla_put_failure:
-	nlmsg_trim(skb, r);
+	nla_nest_cancel(skb, nest);
 	goto done;
 }
 
diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
index 92fa1559c21..5584e7cd4b9 100644
--- a/net/sched/cls_api.c
+++ b/net/sched/cls_api.c
@@ -544,18 +544,22 @@ int tcf_exts_dump(struct sk_buff *skb, struct tcf_exts *exts,
 		 * to work with both old and new modes of entering
 		 * tc data even if iproute2  was newer - jhs
 		 */
-		struct nlattr *p_rta = (struct nlattr *)skb_tail_pointer(skb);
+		struct nlattr *nest;
 
 		if (exts->action->type != TCA_OLD_COMPAT) {
-			NLA_PUT(skb, map->action, 0, NULL);
+			nest = nla_nest_start(skb, map->action);
+			if (nest == NULL)
+				goto nla_put_failure;
 			if (tcf_action_dump(skb, exts->action, 0, 0) < 0)
 				goto nla_put_failure;
-			p_rta->nla_len = skb_tail_pointer(skb) - (u8 *)p_rta;
+			nla_nest_end(skb, nest);
 		} else if (map->police) {
-			NLA_PUT(skb, map->police, 0, NULL);
+			nest = nla_nest_start(skb, map->police);
+			if (nest == NULL)
+				goto nla_put_failure;
 			if (tcf_action_dump_old(skb, exts->action, 0, 0) < 0)
 				goto nla_put_failure;
-			p_rta->nla_len = skb_tail_pointer(skb) - (u8 *)p_rta;
+			nla_nest_end(skb, nest);
 		}
 	}
 #endif
diff --git a/net/sched/cls_basic.c b/net/sched/cls_basic.c
index 524b7885dc3..6d08b429635 100644
--- a/net/sched/cls_basic.c
+++ b/net/sched/cls_basic.c
@@ -246,16 +246,16 @@ static int basic_dump(struct tcf_proto *tp, unsigned long fh,
 		      struct sk_buff *skb, struct tcmsg *t)
 {
 	struct basic_filter *f = (struct basic_filter *) fh;
-	unsigned char *b = skb_tail_pointer(skb);
-	struct nlattr *nla;
+	struct nlattr *nest;
 
 	if (f == NULL)
 		return skb->len;
 
 	t->tcm_handle = f->handle;
 
-	nla = (struct nlattr *) b;
-	NLA_PUT(skb, TCA_OPTIONS, 0, NULL);
+	nest = nla_nest_start(skb, TCA_OPTIONS);
+	if (nest == NULL)
+		goto nla_put_failure;
 
 	if (f->res.classid)
 		NLA_PUT(skb, TCA_BASIC_CLASSID, sizeof(u32), &f->res.classid);
@@ -264,11 +264,11 @@ static int basic_dump(struct tcf_proto *tp, unsigned long fh,
 	    tcf_em_tree_dump(skb, &f->ematches, TCA_BASIC_EMATCHES) < 0)
 		goto nla_put_failure;
 
-	nla->nla_len = skb_tail_pointer(skb) - b;
+	nla_nest_end(skb, nest);
 	return skb->len;
 
 nla_put_failure:
-	nlmsg_trim(skb, b);
+	nla_nest_cancel(skb, nest);
 	return -1;
 }
 
diff --git a/net/sched/cls_fw.c b/net/sched/cls_fw.c
index a1a9f4d26b8..e3dfbb3046c 100644
--- a/net/sched/cls_fw.c
+++ b/net/sched/cls_fw.c
@@ -334,7 +334,7 @@ static int fw_dump(struct tcf_proto *tp, unsigned long fh,
 	struct fw_head *head = (struct fw_head *)tp->root;
 	struct fw_filter *f = (struct fw_filter*)fh;
 	unsigned char *b = skb_tail_pointer(skb);
-	struct nlattr *nla;
+	struct nlattr *nest;
 
 	if (f == NULL)
 		return skb->len;
@@ -344,8 +344,9 @@ static int fw_dump(struct tcf_proto *tp, unsigned long fh,
 	if (!f->res.classid && !tcf_exts_is_available(&f->exts))
 		return skb->len;
 
-	nla = (struct nlattr*)b;
-	NLA_PUT(skb, TCA_OPTIONS, 0, NULL);
+	nest = nla_nest_start(skb, TCA_OPTIONS);
+	if (nest == NULL)
+		goto nla_put_failure;
 
 	if (f->res.classid)
 		NLA_PUT(skb, TCA_FW_CLASSID, 4, &f->res.classid);
@@ -359,7 +360,7 @@ static int fw_dump(struct tcf_proto *tp, unsigned long fh,
 	if (tcf_exts_dump(skb, &f->exts, &fw_ext_map) < 0)
 		goto nla_put_failure;
 
-	nla->nla_len = skb_tail_pointer(skb) - b;
+	nla_nest_end(skb, nest);
 
 	if (tcf_exts_dump_stats(skb, &f->exts, &fw_ext_map) < 0)
 		goto nla_put_failure;
diff --git a/net/sched/cls_route.c b/net/sched/cls_route.c
index 3aa8109aa3c..1ce1f3623d6 100644
--- a/net/sched/cls_route.c
+++ b/net/sched/cls_route.c
@@ -551,7 +551,7 @@ static int route4_dump(struct tcf_proto *tp, unsigned long fh,
 {
 	struct route4_filter *f = (struct route4_filter*)fh;
 	unsigned char *b = skb_tail_pointer(skb);
-	struct nlattr *nla;
+	struct nlattr *nest;
 	u32 id;
 
 	if (f == NULL)
@@ -559,8 +559,9 @@ static int route4_dump(struct tcf_proto *tp, unsigned long fh,
 
 	t->tcm_handle = f->handle;
 
-	nla = (struct nlattr*)b;
-	NLA_PUT(skb, TCA_OPTIONS, 0, NULL);
+	nest = nla_nest_start(skb, TCA_OPTIONS);
+	if (nest == NULL)
+		goto nla_put_failure;
 
 	if (!(f->handle&0x8000)) {
 		id = f->id&0xFF;
@@ -579,7 +580,7 @@ static int route4_dump(struct tcf_proto *tp, unsigned long fh,
 	if (tcf_exts_dump(skb, &f->exts, &route_ext_map) < 0)
 		goto nla_put_failure;
 
-	nla->nla_len = skb_tail_pointer(skb) - b;
+	nla_nest_end(skb, nest);
 
 	if (tcf_exts_dump_stats(skb, &f->exts, &route_ext_map) < 0)
 		goto nla_put_failure;
diff --git a/net/sched/cls_rsvp.h b/net/sched/cls_rsvp.h
index 5747408a7d4..77097e023f7 100644
--- a/net/sched/cls_rsvp.h
+++ b/net/sched/cls_rsvp.h
@@ -595,7 +595,7 @@ static int rsvp_dump(struct tcf_proto *tp, unsigned long fh,
 	struct rsvp_filter *f = (struct rsvp_filter*)fh;
 	struct rsvp_session *s;
 	unsigned char *b = skb_tail_pointer(skb);
-	struct nlattr *nla;
+	struct nlattr *nest;
 	struct tc_rsvp_pinfo pinfo;
 
 	if (f == NULL)
@@ -604,9 +604,9 @@ static int rsvp_dump(struct tcf_proto *tp, unsigned long fh,
 
 	t->tcm_handle = f->handle;
 
-
-	nla = (struct nlattr*)b;
-	NLA_PUT(skb, TCA_OPTIONS, 0, NULL);
+	nest = nla_nest_start(skb, TCA_OPTIONS);
+	if (nest == NULL)
+		goto nla_put_failure;
 
 	NLA_PUT(skb, TCA_RSVP_DST, sizeof(s->dst), &s->dst);
 	pinfo.dpi = s->dpi;
@@ -624,7 +624,7 @@ static int rsvp_dump(struct tcf_proto *tp, unsigned long fh,
 	if (tcf_exts_dump(skb, &f->exts, &rsvp_ext_map) < 0)
 		goto nla_put_failure;
 
-	nla->nla_len = skb_tail_pointer(skb) - b;
+	nla_nest_end(skb, nest);
 
 	if (tcf_exts_dump_stats(skb, &f->exts, &rsvp_ext_map) < 0)
 		goto nla_put_failure;
diff --git a/net/sched/cls_tcindex.c b/net/sched/cls_tcindex.c
index 6b84d276e5a..cd350d38bda 100644
--- a/net/sched/cls_tcindex.c
+++ b/net/sched/cls_tcindex.c
@@ -437,13 +437,16 @@ static int tcindex_dump(struct tcf_proto *tp, unsigned long fh,
 	struct tcindex_data *p = PRIV(tp);
 	struct tcindex_filter_result *r = (struct tcindex_filter_result *) fh;
 	unsigned char *b = skb_tail_pointer(skb);
-	struct nlattr *nla;
+	struct nlattr *nest;
 
 	pr_debug("tcindex_dump(tp %p,fh 0x%lx,skb %p,t %p),p %p,r %p,b %p\n",
 		 tp, fh, skb, t, p, r, b);
 	pr_debug("p->perfect %p p->h %p\n", p->perfect, p->h);
-	nla = (struct nlattr *) b;
-	NLA_PUT(skb, TCA_OPTIONS, 0, NULL);
+
+	nest = nla_nest_start(skb, TCA_OPTIONS);
+	if (nest == NULL)
+		goto nla_put_failure;
+
 	if (!fh) {
 		t->tcm_handle = ~0; /* whatever ... */
 		NLA_PUT(skb, TCA_TCINDEX_HASH, sizeof(p->hash), &p->hash);
@@ -451,7 +454,7 @@ static int tcindex_dump(struct tcf_proto *tp, unsigned long fh,
 		NLA_PUT(skb, TCA_TCINDEX_SHIFT, sizeof(p->shift), &p->shift);
 		NLA_PUT(skb, TCA_TCINDEX_FALL_THROUGH, sizeof(p->fall_through),
 		    &p->fall_through);
-		nla->nla_len = skb_tail_pointer(skb) - b;
+		nla_nest_end(skb, nest);
 	} else {
 		if (p->perfect) {
 			t->tcm_handle = r-p->perfect;
@@ -474,7 +477,7 @@ static int tcindex_dump(struct tcf_proto *tp, unsigned long fh,
 
 		if (tcf_exts_dump(skb, &r->exts, &tcindex_ext_map) < 0)
 			goto nla_put_failure;
-		nla->nla_len = skb_tail_pointer(skb) - b;
+		nla_nest_end(skb, nest);
 
 		if (tcf_exts_dump_stats(skb, &r->exts, &tcindex_ext_map) < 0)
 			goto nla_put_failure;
diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c
index 3228cc4ae08..b51c2c324ab 100644
--- a/net/sched/cls_u32.c
+++ b/net/sched/cls_u32.c
@@ -694,16 +694,16 @@ static int u32_dump(struct tcf_proto *tp, unsigned long fh,
 		     struct sk_buff *skb, struct tcmsg *t)
 {
 	struct tc_u_knode *n = (struct tc_u_knode*)fh;
-	unsigned char *b = skb_tail_pointer(skb);
-	struct nlattr *nla;
+	struct nlattr *nest;
 
 	if (n == NULL)
 		return skb->len;
 
 	t->tcm_handle = n->handle;
 
-	nla = (struct nlattr*)b;
-	NLA_PUT(skb, TCA_OPTIONS, 0, NULL);
+	nest = nla_nest_start(skb, TCA_OPTIONS);
+	if (nest == NULL)
+		goto nla_put_failure;
 
 	if (TC_U32_KEY(n->handle) == 0) {
 		struct tc_u_hnode *ht = (struct tc_u_hnode*)fh;
@@ -741,14 +741,15 @@ static int u32_dump(struct tcf_proto *tp, unsigned long fh,
 #endif
 	}
 
-	nla->nla_len = skb_tail_pointer(skb) - b;
+	nla_nest_end(skb, nest);
+
 	if (TC_U32_KEY(n->handle))
 		if (tcf_exts_dump_stats(skb, &n->exts, &u32_ext_map) < 0)
 			goto nla_put_failure;
 	return skb->len;
 
 nla_put_failure:
-	nlmsg_trim(skb, b);
+	nla_nest_cancel(skb, nest);
 	return -1;
 }
 
diff --git a/net/sched/ematch.c b/net/sched/ematch.c
index d2b480f01a4..daa9c4e7e81 100644
--- a/net/sched/ematch.c
+++ b/net/sched/ematch.c
@@ -436,14 +436,18 @@ int tcf_em_tree_dump(struct sk_buff *skb, struct tcf_ematch_tree *tree, int tlv)
 {
 	int i;
 	u8 *tail;
-	struct nlattr *top_start = (struct nlattr *)skb_tail_pointer(skb);
+	struct nlattr *top_start;
 	struct nlattr *list_start;
 
-	NLA_PUT(skb, tlv, 0, NULL);
+	top_start = nla_nest_start(skb, tlv);
+	if (top_start == NULL)
+		goto nla_put_failure;
+
 	NLA_PUT(skb, TCA_EMATCH_TREE_HDR, sizeof(tree->hdr), &tree->hdr);
 
-	list_start = (struct nlattr *)skb_tail_pointer(skb);
-	NLA_PUT(skb, TCA_EMATCH_TREE_LIST, 0, NULL);
+	list_start = nla_nest_start(skb, TCA_EMATCH_TREE_LIST);
+	if (list_start == NULL)
+		goto nla_put_failure;
 
 	tail = skb_tail_pointer(skb);
 	for (i = 0; i < tree->hdr.nmatches; i++) {
@@ -470,8 +474,8 @@ int tcf_em_tree_dump(struct sk_buff *skb, struct tcf_ematch_tree *tree, int tlv)
 		match_start->nla_len = tail - (u8 *)match_start;
 	}
 
-	list_start->nla_len = tail - (u8 *)list_start;
-	top_start->nla_len = tail - (u8 *)top_start;
+	nla_nest_end(skb, list_start);
+	nla_nest_end(skb, top_start);
 
 	return 0;
 
diff --git a/net/sched/sch_atm.c b/net/sched/sch_atm.c
index aaa32a22726..19c00074ba1 100644
--- a/net/sched/sch_atm.c
+++ b/net/sched/sch_atm.c
@@ -605,8 +605,7 @@ static int atm_tc_dump_class(struct Qdisc *sch, unsigned long cl,
 {
 	struct atm_qdisc_data *p = qdisc_priv(sch);
 	struct atm_flow_data *flow = (struct atm_flow_data *)cl;
-	unsigned char *b = skb_tail_pointer(skb);
-	struct nlattr *nla;
+	struct nlattr *nest;
 
 	pr_debug("atm_tc_dump_class(sch %p,[qdisc %p],flow %p,skb %p,tcm %p)\n",
 		sch, p, flow, skb, tcm);
@@ -614,8 +613,11 @@ static int atm_tc_dump_class(struct Qdisc *sch, unsigned long cl,
 		return -EINVAL;
 	tcm->tcm_handle = flow->classid;
 	tcm->tcm_info = flow->q->handle;
-	nla = (struct nlattr *)b;
-	NLA_PUT(skb, TCA_OPTIONS, 0, NULL);
+
+	nest = nla_nest_start(skb, TCA_OPTIONS);
+	if (nest == NULL)
+		goto nla_put_failure;
+
 	NLA_PUT(skb, TCA_ATM_HDR, flow->hdr_len, flow->hdr);
 	if (flow->vcc) {
 		struct sockaddr_atmpvc pvc;
@@ -636,11 +638,12 @@ static int atm_tc_dump_class(struct Qdisc *sch, unsigned long cl,
 
 		NLA_PUT(skb, TCA_ATM_EXCESS, sizeof(zero), &zero);
 	}
-	nla->nla_len = skb_tail_pointer(skb) - b;
+
+	nla_nest_end(skb, nest);
 	return skb->len;
 
 nla_put_failure:
-	nlmsg_trim(skb, b);
+	nla_nest_cancel(skb, nest);
 	return -1;
 }
 static int
diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c
index 585f8a6ec7e..da0f6c0152d 100644
--- a/net/sched/sch_cbq.c
+++ b/net/sched/sch_cbq.c
@@ -1565,18 +1565,18 @@ static int cbq_dump_attr(struct sk_buff *skb, struct cbq_class *cl)
 static int cbq_dump(struct Qdisc *sch, struct sk_buff *skb)
 {
 	struct cbq_sched_data *q = qdisc_priv(sch);
-	unsigned char *b = skb_tail_pointer(skb);
-	struct nlattr *nla;
+	struct nlattr *nest;
 
-	nla = (struct nlattr*)b;
-	NLA_PUT(skb, TCA_OPTIONS, 0, NULL);
+	nest = nla_nest_start(skb, TCA_OPTIONS);
+	if (nest == NULL)
+		goto nla_put_failure;
 	if (cbq_dump_attr(skb, &q->link) < 0)
 		goto nla_put_failure;
-	nla->nla_len = skb_tail_pointer(skb) - b;
+	nla_nest_end(skb, nest);
 	return skb->len;
 
 nla_put_failure:
-	nlmsg_trim(skb, b);
+	nla_nest_cancel(skb, nest);
 	return -1;
 }
 
@@ -1594,8 +1594,7 @@ cbq_dump_class(struct Qdisc *sch, unsigned long arg,
 	       struct sk_buff *skb, struct tcmsg *tcm)
 {
 	struct cbq_class *cl = (struct cbq_class*)arg;
-	unsigned char *b = skb_tail_pointer(skb);
-	struct nlattr *nla;
+	struct nlattr *nest;
 
 	if (cl->tparent)
 		tcm->tcm_parent = cl->tparent->classid;
@@ -1604,15 +1603,16 @@ cbq_dump_class(struct Qdisc *sch, unsigned long arg,
 	tcm->tcm_handle = cl->classid;
 	tcm->tcm_info = cl->q->handle;
 
-	nla = (struct nlattr*)b;
-	NLA_PUT(skb, TCA_OPTIONS, 0, NULL);
+	nest = nla_nest_start(skb, TCA_OPTIONS);
+	if (nest == NULL)
+		goto nla_put_failure;
 	if (cbq_dump_attr(skb, cl) < 0)
 		goto nla_put_failure;
-	nla->nla_len = skb_tail_pointer(skb) - b;
+	nla_nest_end(skb, nest);
 	return skb->len;
 
 nla_put_failure:
-	nlmsg_trim(skb, b);
+	nla_nest_cancel(skb, nest);
 	return -1;
 }
 
diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c
index fcb4826158d..10a2f35a27a 100644
--- a/net/sched/sch_hfsc.c
+++ b/net/sched/sch_hfsc.c
@@ -1343,22 +1343,23 @@ hfsc_dump_class(struct Qdisc *sch, unsigned long arg, struct sk_buff *skb,
 		struct tcmsg *tcm)
 {
 	struct hfsc_class *cl = (struct hfsc_class *)arg;
-	unsigned char *b = skb_tail_pointer(skb);
-	struct nlattr *nla = (struct nlattr *)b;
+	struct nlattr *nest;
 
 	tcm->tcm_parent = cl->cl_parent ? cl->cl_parent->classid : TC_H_ROOT;
 	tcm->tcm_handle = cl->classid;
 	if (cl->level == 0)
 		tcm->tcm_info = cl->qdisc->handle;
 
-	NLA_PUT(skb, TCA_OPTIONS, 0, NULL);
+	nest = nla_nest_start(skb, TCA_OPTIONS);
+	if (nest == NULL)
+		goto nla_put_failure;
 	if (hfsc_dump_curves(skb, cl) < 0)
 		goto nla_put_failure;
-	nla->nla_len = skb_tail_pointer(skb) - b;
+	nla_nest_end(skb, nest);
 	return skb->len;
 
  nla_put_failure:
-	nlmsg_trim(skb, b);
+	nla_nest_cancel(skb, nest);
 	return -1;
 }
 
diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c
index 512df9a0a24..69fac320f8b 100644
--- a/net/sched/sch_htb.c
+++ b/net/sched/sch_htb.c
@@ -1043,25 +1043,29 @@ static int htb_init(struct Qdisc *sch, struct nlattr *opt)
 static int htb_dump(struct Qdisc *sch, struct sk_buff *skb)
 {
 	struct htb_sched *q = qdisc_priv(sch);
-	unsigned char *b = skb_tail_pointer(skb);
-	struct nlattr *nla;
+	struct nlattr *nest;
 	struct tc_htb_glob gopt;
+
 	spin_lock_bh(&sch->dev->queue_lock);
-	gopt.direct_pkts = q->direct_pkts;
 
+	gopt.direct_pkts = q->direct_pkts;
 	gopt.version = HTB_VER;
 	gopt.rate2quantum = q->rate2quantum;
 	gopt.defcls = q->defcls;
 	gopt.debug = 0;
-	nla = (struct nlattr *)b;
-	NLA_PUT(skb, TCA_OPTIONS, 0, NULL);
+
+	nest = nla_nest_start(skb, TCA_OPTIONS);
+	if (nest == NULL)
+		goto nla_put_failure;
 	NLA_PUT(skb, TCA_HTB_INIT, sizeof(gopt), &gopt);
-	nla->nla_len = skb_tail_pointer(skb) - b;
+	nla_nest_end(skb, nest);
+
 	spin_unlock_bh(&sch->dev->queue_lock);
 	return skb->len;
+
 nla_put_failure:
 	spin_unlock_bh(&sch->dev->queue_lock);
-	nlmsg_trim(skb, skb_tail_pointer(skb));
+	nla_nest_cancel(skb, nest);
 	return -1;
 }
 
@@ -1069,8 +1073,7 @@ static int htb_dump_class(struct Qdisc *sch, unsigned long arg,
 			  struct sk_buff *skb, struct tcmsg *tcm)
 {
 	struct htb_class *cl = (struct htb_class *)arg;
-	unsigned char *b = skb_tail_pointer(skb);
-	struct nlattr *nla;
+	struct nlattr *nest;
 	struct tc_htb_opt opt;
 
 	spin_lock_bh(&sch->dev->queue_lock);
@@ -1079,8 +1082,9 @@ static int htb_dump_class(struct Qdisc *sch, unsigned long arg,
 	if (!cl->level && cl->un.leaf.q)
 		tcm->tcm_info = cl->un.leaf.q->handle;
 
-	nla = (struct nlattr *)b;
-	NLA_PUT(skb, TCA_OPTIONS, 0, NULL);
+	nest = nla_nest_start(skb, TCA_OPTIONS);
+	if (nest == NULL)
+		goto nla_put_failure;
 
 	memset(&opt, 0, sizeof(opt));
 
@@ -1092,12 +1096,14 @@ static int htb_dump_class(struct Qdisc *sch, unsigned long arg,
 	opt.prio = cl->un.leaf.prio;
 	opt.level = cl->level;
 	NLA_PUT(skb, TCA_HTB_PARMS, sizeof(opt), &opt);
-	nla->nla_len = skb_tail_pointer(skb) - b;
+
+	nla_nest_end(skb, nest);
 	spin_unlock_bh(&sch->dev->queue_lock);
 	return skb->len;
+
 nla_put_failure:
 	spin_unlock_bh(&sch->dev->queue_lock);
-	nlmsg_trim(skb, b);
+	nla_nest_cancel(skb, nest);
 	return -1;
 }
 
diff --git a/net/sched/sch_ingress.c b/net/sched/sch_ingress.c
index f6decbb5664..3f72d528273 100644
--- a/net/sched/sch_ingress.c
+++ b/net/sched/sch_ingress.c
@@ -183,16 +183,16 @@ static void ingress_destroy(struct Qdisc *sch)
 
 static int ingress_dump(struct Qdisc *sch, struct sk_buff *skb)
 {
-	unsigned char *b = skb_tail_pointer(skb);
-	struct nlattr *nla;
+	struct nlattr *nest;
 
-	nla = (struct nlattr *)b;
-	NLA_PUT(skb, TCA_OPTIONS, 0, NULL);
-	nla->nla_len = skb_tail_pointer(skb) - b;
+	nest = nla_nest_start(skb, TCA_OPTIONS);
+	if (nest == NULL)
+		goto nla_put_failure;
+	nla_nest_end(skb, nest);
 	return skb->len;
 
 nla_put_failure:
-	nlmsg_trim(skb, b);
+	nla_nest_cancel(skb, nest);
 	return -1;
 }
 
diff --git a/net/sched/sch_tbf.c b/net/sched/sch_tbf.c
index f9b1543e3d7..b7a185dc3de 100644
--- a/net/sched/sch_tbf.c
+++ b/net/sched/sch_tbf.c
@@ -375,12 +375,12 @@ static void tbf_destroy(struct Qdisc *sch)
 static int tbf_dump(struct Qdisc *sch, struct sk_buff *skb)
 {
 	struct tbf_sched_data *q = qdisc_priv(sch);
-	unsigned char *b = skb_tail_pointer(skb);
-	struct nlattr *nla;
+	struct nlattr *nest;
 	struct tc_tbf_qopt opt;
 
-	nla = (struct nlattr*)b;
-	NLA_PUT(skb, TCA_OPTIONS, 0, NULL);
+	nest = nla_nest_start(skb, TCA_OPTIONS);
+	if (nest == NULL)
+		goto nla_put_failure;
 
 	opt.limit = q->limit;
 	opt.rate = q->R_tab->rate;
@@ -391,12 +391,12 @@ static int tbf_dump(struct Qdisc *sch, struct sk_buff *skb)
 	opt.mtu = q->mtu;
 	opt.buffer = q->buffer;
 	NLA_PUT(skb, TCA_TBF_PARMS, sizeof(opt), &opt);
-	nla->nla_len = skb_tail_pointer(skb) - b;
 
+	nla_nest_end(skb, nest);
 	return skb->len;
 
 nla_put_failure:
-	nlmsg_trim(skb, b);
+	nla_nest_cancel(skb, nest);
 	return -1;
 }
 
-- 
cgit v1.2.3


From 57e1c487a4f5754cb77abeb00adb21faa88c484f Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Wed, 23 Jan 2008 20:34:28 -0800
Subject: [NET_SCHED]: Use NLA_PUT_STRING for string dumping

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/act_api.c | 4 ++--
 net/sched/act_ipt.c | 2 +-
 net/sched/cls_api.c | 2 +-
 net/sched/cls_fw.c  | 2 +-
 net/sched/cls_u32.c | 2 +-
 net/sched/sch_api.c | 4 ++--
 6 files changed, 8 insertions(+), 8 deletions(-)

(limited to 'net')

diff --git a/net/sched/act_api.c b/net/sched/act_api.c
index 36022605fc1..e33e43abe96 100644
--- a/net/sched/act_api.c
+++ b/net/sched/act_api.c
@@ -121,7 +121,7 @@ static int tcf_del_walker(struct sk_buff *skb, struct tc_action *a,
 	nest = nla_nest_start(skb, a->order);
 	if (nest == NULL)
 		goto nla_put_failure;
-	NLA_PUT(skb, TCA_KIND, IFNAMSIZ, a->ops->kind);
+	NLA_PUT_STRING(skb, TCA_KIND, a->ops->kind);
 	for (i = 0; i < (hinfo->hmask + 1); i++) {
 		p = hinfo->htab[tcf_hash(i, hinfo->hmask)];
 
@@ -423,7 +423,7 @@ tcf_action_dump_1(struct sk_buff *skb, struct tc_action *a, int bind, int ref)
 	if (a->ops == NULL || a->ops->dump == NULL)
 		return err;
 
-	NLA_PUT(skb, TCA_KIND, IFNAMSIZ, a->ops->kind);
+	NLA_PUT_STRING(skb, TCA_KIND, a->ops->kind);
 	if (tcf_action_copy_stats(skb, a, 0))
 		goto nla_put_failure;
 	nest = nla_nest_start(skb, TCA_OPTIONS);
diff --git a/net/sched/act_ipt.c b/net/sched/act_ipt.c
index 12693347d56..ecda51da9c1 100644
--- a/net/sched/act_ipt.c
+++ b/net/sched/act_ipt.c
@@ -257,7 +257,7 @@ static int tcf_ipt_dump(struct sk_buff *skb, struct tc_action *a, int bind, int
 	NLA_PUT(skb, TCA_IPT_INDEX, 4, &ipt->tcf_index);
 	NLA_PUT(skb, TCA_IPT_HOOK, 4, &ipt->tcfi_hook);
 	NLA_PUT(skb, TCA_IPT_CNT, sizeof(struct tc_cnt), &c);
-	NLA_PUT(skb, TCA_IPT_TABLE, IFNAMSIZ, ipt->tcfi_tname);
+	NLA_PUT_STRING(skb, TCA_IPT_TABLE, ipt->tcfi_tname);
 	tm.install = jiffies_to_clock_t(jiffies - ipt->tcf_tm.install);
 	tm.lastuse = jiffies_to_clock_t(jiffies - ipt->tcf_tm.lastuse);
 	tm.expires = jiffies_to_clock_t(ipt->tcf_tm.expires);
diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
index 5584e7cd4b9..3377ca0d0a0 100644
--- a/net/sched/cls_api.c
+++ b/net/sched/cls_api.c
@@ -337,7 +337,7 @@ static int tcf_fill_node(struct sk_buff *skb, struct tcf_proto *tp,
 	tcm->tcm_ifindex = tp->q->dev->ifindex;
 	tcm->tcm_parent = tp->classid;
 	tcm->tcm_info = TC_H_MAKE(tp->prio, tp->protocol);
-	NLA_PUT(skb, TCA_KIND, IFNAMSIZ, tp->ops->kind);
+	NLA_PUT_STRING(skb, TCA_KIND, tp->ops->kind);
 	tcm->tcm_handle = fh;
 	if (RTM_DELTFILTER != event) {
 		tcm->tcm_handle = 0;
diff --git a/net/sched/cls_fw.c b/net/sched/cls_fw.c
index e3dfbb3046c..31074735d1b 100644
--- a/net/sched/cls_fw.c
+++ b/net/sched/cls_fw.c
@@ -352,7 +352,7 @@ static int fw_dump(struct tcf_proto *tp, unsigned long fh,
 		NLA_PUT(skb, TCA_FW_CLASSID, 4, &f->res.classid);
 #ifdef CONFIG_NET_CLS_IND
 	if (strlen(f->indev))
-		NLA_PUT(skb, TCA_FW_INDEV, IFNAMSIZ, f->indev);
+		NLA_PUT_STRING(skb, TCA_FW_INDEV, f->indev);
 #endif /* CONFIG_NET_CLS_IND */
 	if (head->mask != 0xFFFFFFFF)
 		NLA_PUT(skb, TCA_FW_MASK, 4, &head->mask);
diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c
index b51c2c324ab..7a1502553b3 100644
--- a/net/sched/cls_u32.c
+++ b/net/sched/cls_u32.c
@@ -732,7 +732,7 @@ static int u32_dump(struct tcf_proto *tp, unsigned long fh,
 
 #ifdef CONFIG_NET_CLS_IND
 		if(strlen(n->indev))
-			NLA_PUT(skb, TCA_U32_INDEV, IFNAMSIZ, n->indev);
+			NLA_PUT_STRING(skb, TCA_U32_INDEV, n->indev);
 #endif
 #ifdef CONFIG_CLS_U32_PERF
 		NLA_PUT(skb, TCA_U32_PCNT,
diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index 7abb028dd96..8db554d5485 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -834,7 +834,7 @@ static int tc_fill_qdisc(struct sk_buff *skb, struct Qdisc *q, u32 clid,
 	tcm->tcm_parent = clid;
 	tcm->tcm_handle = q->handle;
 	tcm->tcm_info = atomic_read(&q->refcnt);
-	NLA_PUT(skb, TCA_KIND, IFNAMSIZ, q->ops->id);
+	NLA_PUT_STRING(skb, TCA_KIND, q->ops->id);
 	if (q->ops->dump && q->ops->dump(q, skb) < 0)
 		goto nla_put_failure;
 	q->qstats.qlen = q->q.qlen;
@@ -1080,7 +1080,7 @@ static int tc_fill_tclass(struct sk_buff *skb, struct Qdisc *q,
 	tcm->tcm_parent = q->handle;
 	tcm->tcm_handle = q->handle;
 	tcm->tcm_info = 0;
-	NLA_PUT(skb, TCA_KIND, IFNAMSIZ, q->ops->id);
+	NLA_PUT_STRING(skb, TCA_KIND, q->ops->id);
 	if (cl_ops->dump && cl_ops->dump(q, cl, skb, tcm) < 0)
 		goto nla_put_failure;
 
-- 
cgit v1.2.3


From 24beeab539c6f42c4a93e2ff7c3b5f272e60da45 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Wed, 23 Jan 2008 20:34:48 -0800
Subject: [NET_SCHED]: Use typeful attribute construction helpers

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/act_api.c     |  2 +-
 net/sched/act_ipt.c     |  4 ++--
 net/sched/act_police.c  |  5 ++---
 net/sched/cls_basic.c   |  2 +-
 net/sched/cls_fw.c      |  4 ++--
 net/sched/cls_route.c   |  8 ++++----
 net/sched/cls_rsvp.h    |  2 +-
 net/sched/cls_tcindex.c | 11 +++++------
 net/sched/cls_u32.c     |  8 ++++----
 net/sched/em_meta.c     |  3 +--
 net/sched/sch_atm.c     |  8 +++-----
 11 files changed, 26 insertions(+), 31 deletions(-)

(limited to 'net')

diff --git a/net/sched/act_api.c b/net/sched/act_api.c
index e33e43abe96..41fbd496aba 100644
--- a/net/sched/act_api.c
+++ b/net/sched/act_api.c
@@ -133,7 +133,7 @@ static int tcf_del_walker(struct sk_buff *skb, struct tc_action *a,
 			p = s_p;
 		}
 	}
-	NLA_PUT(skb, TCA_FCNT, 4, &n_i);
+	NLA_PUT_U32(skb, TCA_FCNT, n_i);
 	nla_nest_end(skb, nest);
 
 	return n_i;
diff --git a/net/sched/act_ipt.c b/net/sched/act_ipt.c
index ecda51da9c1..5dd701a7bc5 100644
--- a/net/sched/act_ipt.c
+++ b/net/sched/act_ipt.c
@@ -254,8 +254,8 @@ static int tcf_ipt_dump(struct sk_buff *skb, struct tc_action *a, int bind, int
 	strcpy(t->u.user.name, ipt->tcfi_t->u.kernel.target->name);
 
 	NLA_PUT(skb, TCA_IPT_TARG, ipt->tcfi_t->u.user.target_size, t);
-	NLA_PUT(skb, TCA_IPT_INDEX, 4, &ipt->tcf_index);
-	NLA_PUT(skb, TCA_IPT_HOOK, 4, &ipt->tcfi_hook);
+	NLA_PUT_U32(skb, TCA_IPT_INDEX, ipt->tcf_index);
+	NLA_PUT_U32(skb, TCA_IPT_HOOK, ipt->tcfi_hook);
 	NLA_PUT(skb, TCA_IPT_CNT, sizeof(struct tc_cnt), &c);
 	NLA_PUT_STRING(skb, TCA_IPT_TABLE, ipt->tcfi_tname);
 	tm.install = jiffies_to_clock_t(jiffies - ipt->tcf_tm.install);
diff --git a/net/sched/act_police.c b/net/sched/act_police.c
index ee2f1b64dd7..79db6bb8a5f 100644
--- a/net/sched/act_police.c
+++ b/net/sched/act_police.c
@@ -339,10 +339,9 @@ tcf_act_police_dump(struct sk_buff *skb, struct tc_action *a, int bind, int ref)
 		memset(&opt.peakrate, 0, sizeof(opt.peakrate));
 	NLA_PUT(skb, TCA_POLICE_TBF, sizeof(opt), &opt);
 	if (police->tcfp_result)
-		NLA_PUT(skb, TCA_POLICE_RESULT, sizeof(int),
-			&police->tcfp_result);
+		NLA_PUT_U32(skb, TCA_POLICE_RESULT, police->tcfp_result);
 	if (police->tcfp_ewma_rate)
-		NLA_PUT(skb, TCA_POLICE_AVRATE, 4, &police->tcfp_ewma_rate);
+		NLA_PUT_U32(skb, TCA_POLICE_AVRATE, police->tcfp_ewma_rate);
 	return skb->len;
 
 nla_put_failure:
diff --git a/net/sched/cls_basic.c b/net/sched/cls_basic.c
index 6d08b429635..58444fedf06 100644
--- a/net/sched/cls_basic.c
+++ b/net/sched/cls_basic.c
@@ -258,7 +258,7 @@ static int basic_dump(struct tcf_proto *tp, unsigned long fh,
 		goto nla_put_failure;
 
 	if (f->res.classid)
-		NLA_PUT(skb, TCA_BASIC_CLASSID, sizeof(u32), &f->res.classid);
+		NLA_PUT_U32(skb, TCA_BASIC_CLASSID, f->res.classid);
 
 	if (tcf_exts_dump(skb, &f->exts, &basic_ext_map) < 0 ||
 	    tcf_em_tree_dump(skb, &f->ematches, TCA_BASIC_EMATCHES) < 0)
diff --git a/net/sched/cls_fw.c b/net/sched/cls_fw.c
index 31074735d1b..61ebe25e5f7 100644
--- a/net/sched/cls_fw.c
+++ b/net/sched/cls_fw.c
@@ -349,13 +349,13 @@ static int fw_dump(struct tcf_proto *tp, unsigned long fh,
 		goto nla_put_failure;
 
 	if (f->res.classid)
-		NLA_PUT(skb, TCA_FW_CLASSID, 4, &f->res.classid);
+		NLA_PUT_U32(skb, TCA_FW_CLASSID, f->res.classid);
 #ifdef CONFIG_NET_CLS_IND
 	if (strlen(f->indev))
 		NLA_PUT_STRING(skb, TCA_FW_INDEV, f->indev);
 #endif /* CONFIG_NET_CLS_IND */
 	if (head->mask != 0xFFFFFFFF)
-		NLA_PUT(skb, TCA_FW_MASK, 4, &head->mask);
+		NLA_PUT_U32(skb, TCA_FW_MASK, head->mask);
 
 	if (tcf_exts_dump(skb, &f->exts, &fw_ext_map) < 0)
 		goto nla_put_failure;
diff --git a/net/sched/cls_route.c b/net/sched/cls_route.c
index 1ce1f3623d6..7752586e918 100644
--- a/net/sched/cls_route.c
+++ b/net/sched/cls_route.c
@@ -565,17 +565,17 @@ static int route4_dump(struct tcf_proto *tp, unsigned long fh,
 
 	if (!(f->handle&0x8000)) {
 		id = f->id&0xFF;
-		NLA_PUT(skb, TCA_ROUTE4_TO, sizeof(id), &id);
+		NLA_PUT_U32(skb, TCA_ROUTE4_TO, id);
 	}
 	if (f->handle&0x80000000) {
 		if ((f->handle>>16) != 0xFFFF)
-			NLA_PUT(skb, TCA_ROUTE4_IIF, sizeof(f->iif), &f->iif);
+			NLA_PUT_U32(skb, TCA_ROUTE4_IIF, f->iif);
 	} else {
 		id = f->id>>16;
-		NLA_PUT(skb, TCA_ROUTE4_FROM, sizeof(id), &id);
+		NLA_PUT_U32(skb, TCA_ROUTE4_FROM, id);
 	}
 	if (f->res.classid)
-		NLA_PUT(skb, TCA_ROUTE4_CLASSID, 4, &f->res.classid);
+		NLA_PUT_U32(skb, TCA_ROUTE4_CLASSID, f->res.classid);
 
 	if (tcf_exts_dump(skb, &f->exts, &route_ext_map) < 0)
 		goto nla_put_failure;
diff --git a/net/sched/cls_rsvp.h b/net/sched/cls_rsvp.h
index 77097e023f7..838a3ff5a2c 100644
--- a/net/sched/cls_rsvp.h
+++ b/net/sched/cls_rsvp.h
@@ -617,7 +617,7 @@ static int rsvp_dump(struct tcf_proto *tp, unsigned long fh,
 	pinfo.pad = 0;
 	NLA_PUT(skb, TCA_RSVP_PINFO, sizeof(pinfo), &pinfo);
 	if (f->res.classid)
-		NLA_PUT(skb, TCA_RSVP_CLASSID, 4, &f->res.classid);
+		NLA_PUT_U32(skb, TCA_RSVP_CLASSID, f->res.classid);
 	if (((f->handle>>8)&0xFF) != 16)
 		NLA_PUT(skb, TCA_RSVP_SRC, sizeof(f->src), f->src);
 
diff --git a/net/sched/cls_tcindex.c b/net/sched/cls_tcindex.c
index cd350d38bda..7d46df7eac0 100644
--- a/net/sched/cls_tcindex.c
+++ b/net/sched/cls_tcindex.c
@@ -449,11 +449,10 @@ static int tcindex_dump(struct tcf_proto *tp, unsigned long fh,
 
 	if (!fh) {
 		t->tcm_handle = ~0; /* whatever ... */
-		NLA_PUT(skb, TCA_TCINDEX_HASH, sizeof(p->hash), &p->hash);
-		NLA_PUT(skb, TCA_TCINDEX_MASK, sizeof(p->mask), &p->mask);
-		NLA_PUT(skb, TCA_TCINDEX_SHIFT, sizeof(p->shift), &p->shift);
-		NLA_PUT(skb, TCA_TCINDEX_FALL_THROUGH, sizeof(p->fall_through),
-		    &p->fall_through);
+		NLA_PUT_U32(skb, TCA_TCINDEX_HASH, p->hash);
+		NLA_PUT_U16(skb, TCA_TCINDEX_MASK, p->mask);
+		NLA_PUT_U32(skb, TCA_TCINDEX_SHIFT, p->shift);
+		NLA_PUT_U32(skb, TCA_TCINDEX_FALL_THROUGH, p->fall_through);
 		nla_nest_end(skb, nest);
 	} else {
 		if (p->perfect) {
@@ -473,7 +472,7 @@ static int tcindex_dump(struct tcf_proto *tp, unsigned long fh,
 		}
 		pr_debug("handle = %d\n", t->tcm_handle);
 		if (r->res.class)
-			NLA_PUT(skb, TCA_TCINDEX_CLASSID, 4, &r->res.classid);
+			NLA_PUT_U32(skb, TCA_TCINDEX_CLASSID, r->res.classid);
 
 		if (tcf_exts_dump(skb, &r->exts, &tcindex_ext_map) < 0)
 			goto nla_put_failure;
diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c
index 7a1502553b3..326711eb593 100644
--- a/net/sched/cls_u32.c
+++ b/net/sched/cls_u32.c
@@ -708,19 +708,19 @@ static int u32_dump(struct tcf_proto *tp, unsigned long fh,
 	if (TC_U32_KEY(n->handle) == 0) {
 		struct tc_u_hnode *ht = (struct tc_u_hnode*)fh;
 		u32 divisor = ht->divisor+1;
-		NLA_PUT(skb, TCA_U32_DIVISOR, 4, &divisor);
+		NLA_PUT_U32(skb, TCA_U32_DIVISOR, divisor);
 	} else {
 		NLA_PUT(skb, TCA_U32_SEL,
 			sizeof(n->sel) + n->sel.nkeys*sizeof(struct tc_u32_key),
 			&n->sel);
 		if (n->ht_up) {
 			u32 htid = n->handle & 0xFFFFF000;
-			NLA_PUT(skb, TCA_U32_HASH, 4, &htid);
+			NLA_PUT_U32(skb, TCA_U32_HASH, htid);
 		}
 		if (n->res.classid)
-			NLA_PUT(skb, TCA_U32_CLASSID, 4, &n->res.classid);
+			NLA_PUT_U32(skb, TCA_U32_CLASSID, n->res.classid);
 		if (n->ht_down)
-			NLA_PUT(skb, TCA_U32_LINK, 4, &n->ht_down->handle);
+			NLA_PUT_U32(skb, TCA_U32_LINK, n->ht_down->handle);
 
 #ifdef CONFIG_CLS_U32_MARK
 		if (n->mark.val || n->mark.mask)
diff --git a/net/sched/em_meta.c b/net/sched/em_meta.c
index dd5723670d3..63ae6a230c4 100644
--- a/net/sched/em_meta.c
+++ b/net/sched/em_meta.c
@@ -623,8 +623,7 @@ static int meta_int_dump(struct sk_buff *skb, struct meta_value *v, int tlv)
 	if (v->len == sizeof(unsigned long))
 		NLA_PUT(skb, tlv, sizeof(unsigned long), &v->val);
 	else if (v->len == sizeof(u32)) {
-		u32 d = v->val;
-		NLA_PUT(skb, tlv, sizeof(d), &d);
+		NLA_PUT_U32(skb, tlv, v->val);
 	}
 
 	return 0;
diff --git a/net/sched/sch_atm.c b/net/sched/sch_atm.c
index 19c00074ba1..4d876598d7d 100644
--- a/net/sched/sch_atm.c
+++ b/net/sched/sch_atm.c
@@ -629,14 +629,12 @@ static int atm_tc_dump_class(struct Qdisc *sch, unsigned long cl,
 		pvc.sap_addr.vci = flow->vcc->vci;
 		NLA_PUT(skb, TCA_ATM_ADDR, sizeof(pvc), &pvc);
 		state = ATM_VF2VS(flow->vcc->flags);
-		NLA_PUT(skb, TCA_ATM_STATE, sizeof(state), &state);
+		NLA_PUT_U32(skb, TCA_ATM_STATE, state);
 	}
 	if (flow->excess)
-		NLA_PUT(skb, TCA_ATM_EXCESS, sizeof(u32), &flow->classid);
+		NLA_PUT_U32(skb, TCA_ATM_EXCESS, flow->classid);
 	else {
-		static u32 zero;
-
-		NLA_PUT(skb, TCA_ATM_EXCESS, sizeof(zero), &zero);
+		NLA_PUT_U32(skb, TCA_ATM_EXCESS, 0);
 	}
 
 	nla_nest_end(skb, nest);
-- 
cgit v1.2.3


From 1587bac49f8491b5006a78f8d726111b71757941 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Wed, 23 Jan 2008 20:35:03 -0800
Subject: [NET_SCHED]: Use typeful attribute parsing helpers

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/act_api.c     |  2 +-
 net/sched/act_ipt.c     |  4 ++--
 net/sched/act_police.c  |  5 ++---
 net/sched/cls_basic.c   |  2 +-
 net/sched/cls_fw.c      |  6 +++---
 net/sched/cls_route.c   |  8 ++++----
 net/sched/cls_rsvp.h    |  4 ++--
 net/sched/cls_tcindex.c | 11 +++++------
 net/sched/cls_u32.c     |  8 ++++----
 net/sched/em_meta.c     |  2 +-
 net/sched/sch_atm.c     |  4 ++--
 11 files changed, 27 insertions(+), 29 deletions(-)

(limited to 'net')

diff --git a/net/sched/act_api.c b/net/sched/act_api.c
index 41fbd496aba..0b8eb235bc1 100644
--- a/net/sched/act_api.c
+++ b/net/sched/act_api.c
@@ -690,7 +690,7 @@ tcf_action_get_1(struct nlattr *nla, struct nlmsghdr *n, u32 pid)
 	if (tb[TCA_ACT_INDEX] == NULL ||
 	    nla_len(tb[TCA_ACT_INDEX]) < sizeof(index))
 		goto err_out;
-	index = *(int *)nla_data(tb[TCA_ACT_INDEX]);
+	index = nla_get_u32(tb[TCA_ACT_INDEX]);
 
 	err = -ENOMEM;
 	a = kzalloc(sizeof(struct tc_action), GFP_KERNEL);
diff --git a/net/sched/act_ipt.c b/net/sched/act_ipt.c
index 5dd701a7bc5..7ab2419b44e 100644
--- a/net/sched/act_ipt.c
+++ b/net/sched/act_ipt.c
@@ -123,7 +123,7 @@ static int tcf_ipt_init(struct nlattr *nla, struct nlattr *est,
 
 	if (tb[TCA_IPT_INDEX] != NULL &&
 	    nla_len(tb[TCA_IPT_INDEX]) >= sizeof(u32))
-		index = *(u32 *)nla_data(tb[TCA_IPT_INDEX]);
+		index = nla_get_u32(tb[TCA_IPT_INDEX]);
 
 	pc = tcf_hash_check(index, a, bind, &ipt_hash_info);
 	if (!pc) {
@@ -140,7 +140,7 @@ static int tcf_ipt_init(struct nlattr *nla, struct nlattr *est,
 	}
 	ipt = to_ipt(pc);
 
-	hook = *(u32 *)nla_data(tb[TCA_IPT_HOOK]);
+	hook = nla_get_u32(tb[TCA_IPT_HOOK]);
 
 	err = -ENOMEM;
 	tname = kmalloc(IFNAMSIZ, GFP_KERNEL);
diff --git a/net/sched/act_police.c b/net/sched/act_police.c
index 79db6bb8a5f..62de806af3a 100644
--- a/net/sched/act_police.c
+++ b/net/sched/act_police.c
@@ -203,7 +203,7 @@ override:
 	}
 
 	if (tb[TCA_POLICE_RESULT])
-		police->tcfp_result = *(u32*)nla_data(tb[TCA_POLICE_RESULT]);
+		police->tcfp_result = nla_get_u32(tb[TCA_POLICE_RESULT]);
 	police->tcfp_toks = police->tcfp_burst = parm->burst;
 	police->tcfp_mtu = parm->mtu;
 	if (police->tcfp_mtu == 0) {
@@ -216,8 +216,7 @@ override:
 	police->tcf_action = parm->action;
 
 	if (tb[TCA_POLICE_AVRATE])
-		police->tcfp_ewma_rate =
-			*(u32*)nla_data(tb[TCA_POLICE_AVRATE]);
+		police->tcfp_ewma_rate = nla_get_u32(tb[TCA_POLICE_AVRATE]);
 	if (est)
 		gen_replace_estimator(&police->tcf_bstats,
 				      &police->tcf_rate_est,
diff --git a/net/sched/cls_basic.c b/net/sched/cls_basic.c
index 58444fedf06..0c872a76c4b 100644
--- a/net/sched/cls_basic.c
+++ b/net/sched/cls_basic.c
@@ -150,7 +150,7 @@ static inline int basic_set_parms(struct tcf_proto *tp, struct basic_filter *f,
 		goto errout;
 
 	if (tb[TCA_BASIC_CLASSID]) {
-		f->res.classid = *(u32*)nla_data(tb[TCA_BASIC_CLASSID]);
+		f->res.classid = nla_get_u32(tb[TCA_BASIC_CLASSID]);
 		tcf_bind_filter(tp, &f->res, base);
 	}
 
diff --git a/net/sched/cls_fw.c b/net/sched/cls_fw.c
index 61ebe25e5f7..b75696d67ec 100644
--- a/net/sched/cls_fw.c
+++ b/net/sched/cls_fw.c
@@ -203,7 +203,7 @@ fw_change_attrs(struct tcf_proto *tp, struct fw_filter *f,
 	if (tb[TCA_FW_CLASSID]) {
 		if (nla_len(tb[TCA_FW_CLASSID]) != sizeof(u32))
 			goto errout;
-		f->res.classid = *(u32*)nla_data(tb[TCA_FW_CLASSID]);
+		f->res.classid = nla_get_u32(tb[TCA_FW_CLASSID]);
 		tcf_bind_filter(tp, &f->res, base);
 	}
 
@@ -218,7 +218,7 @@ fw_change_attrs(struct tcf_proto *tp, struct fw_filter *f,
 	if (tb[TCA_FW_MASK]) {
 		if (nla_len(tb[TCA_FW_MASK]) != sizeof(u32))
 			goto errout;
-		mask = *(u32*)nla_data(tb[TCA_FW_MASK]);
+		mask = nla_get_u32(tb[TCA_FW_MASK]);
 		if (mask != head->mask)
 			goto errout;
 	} else if (head->mask != 0xFFFFFFFF)
@@ -264,7 +264,7 @@ static int fw_change(struct tcf_proto *tp, unsigned long base,
 		if (tb[TCA_FW_MASK]) {
 			if (nla_len(tb[TCA_FW_MASK]) != sizeof(u32))
 				return -EINVAL;
-			mask = *(u32*)nla_data(tb[TCA_FW_MASK]);
+			mask = nla_get_u32(tb[TCA_FW_MASK]);
 		}
 
 		head = kzalloc(sizeof(struct fw_head), GFP_KERNEL);
diff --git a/net/sched/cls_route.c b/net/sched/cls_route.c
index 7752586e918..ae97238c57a 100644
--- a/net/sched/cls_route.c
+++ b/net/sched/cls_route.c
@@ -348,7 +348,7 @@ static int route4_set_parms(struct tcf_proto *tp, unsigned long base,
 			goto errout;
 		if (nla_len(tb[TCA_ROUTE4_TO]) < sizeof(u32))
 			goto errout;
-		to = *(u32*)nla_data(tb[TCA_ROUTE4_TO]);
+		to = nla_get_u32(tb[TCA_ROUTE4_TO]);
 		if (to > 0xFF)
 			goto errout;
 		nhandle = to;
@@ -359,14 +359,14 @@ static int route4_set_parms(struct tcf_proto *tp, unsigned long base,
 			goto errout;
 		if (nla_len(tb[TCA_ROUTE4_FROM]) < sizeof(u32))
 			goto errout;
-		id = *(u32*)nla_data(tb[TCA_ROUTE4_FROM]);
+		id = nla_get_u32(tb[TCA_ROUTE4_FROM]);
 		if (id > 0xFF)
 			goto errout;
 		nhandle |= id << 16;
 	} else if (tb[TCA_ROUTE4_IIF]) {
 		if (nla_len(tb[TCA_ROUTE4_IIF]) < sizeof(u32))
 			goto errout;
-		id = *(u32*)nla_data(tb[TCA_ROUTE4_IIF]);
+		id = nla_get_u32(tb[TCA_ROUTE4_IIF]);
 		if (id > 0x7FFF)
 			goto errout;
 		nhandle |= (id | 0x8000) << 16;
@@ -411,7 +411,7 @@ static int route4_set_parms(struct tcf_proto *tp, unsigned long base,
 	tcf_tree_unlock(tp);
 
 	if (tb[TCA_ROUTE4_CLASSID]) {
-		f->res.classid = *(u32*)nla_data(tb[TCA_ROUTE4_CLASSID]);
+		f->res.classid = nla_get_u32(tb[TCA_ROUTE4_CLASSID]);
 		tcf_bind_filter(tp, &f->res, base);
 	}
 
diff --git a/net/sched/cls_rsvp.h b/net/sched/cls_rsvp.h
index 838a3ff5a2c..61286a0f7a3 100644
--- a/net/sched/cls_rsvp.h
+++ b/net/sched/cls_rsvp.h
@@ -430,7 +430,7 @@ static int rsvp_change(struct tcf_proto *tp, unsigned long base,
 		if (f->handle != handle && handle)
 			goto errout2;
 		if (tb[TCA_RSVP_CLASSID-1]) {
-			f->res.classid = *(u32*)nla_data(tb[TCA_RSVP_CLASSID-1]);
+			f->res.classid = nla_get_u32(tb[TCA_RSVP_CLASSID-1]);
 			tcf_bind_filter(tp, &f->res, base);
 		}
 
@@ -470,7 +470,7 @@ static int rsvp_change(struct tcf_proto *tp, unsigned long base,
 		err = -EINVAL;
 		if (nla_len(tb[TCA_RSVP_CLASSID-1]) != 4)
 			goto errout;
-		f->res.classid = *(u32*)nla_data(tb[TCA_RSVP_CLASSID-1]);
+		f->res.classid = nla_get_u32(tb[TCA_RSVP_CLASSID-1]);
 	}
 
 	err = -EINVAL;
diff --git a/net/sched/cls_tcindex.c b/net/sched/cls_tcindex.c
index 7d46df7eac0..28098564b4d 100644
--- a/net/sched/cls_tcindex.c
+++ b/net/sched/cls_tcindex.c
@@ -221,19 +221,19 @@ tcindex_set_parms(struct tcf_proto *tp, unsigned long base, u32 handle,
 	if (tb[TCA_TCINDEX_HASH]) {
 		if (nla_len(tb[TCA_TCINDEX_HASH]) < sizeof(u32))
 			goto errout;
-		cp.hash = *(u32 *) nla_data(tb[TCA_TCINDEX_HASH]);
+		cp.hash = nla_get_u32(tb[TCA_TCINDEX_HASH]);
 	}
 
 	if (tb[TCA_TCINDEX_MASK]) {
 		if (nla_len(tb[TCA_TCINDEX_MASK]) < sizeof(u16))
 			goto errout;
-		cp.mask = *(u16 *) nla_data(tb[TCA_TCINDEX_MASK]);
+		cp.mask = nla_get_u16(tb[TCA_TCINDEX_MASK]);
 	}
 
 	if (tb[TCA_TCINDEX_SHIFT]) {
 		if (nla_len(tb[TCA_TCINDEX_SHIFT]) < sizeof(int))
 			goto errout;
-		cp.shift = *(int *) nla_data(tb[TCA_TCINDEX_SHIFT]);
+		cp.shift = nla_get_u32(tb[TCA_TCINDEX_SHIFT]);
 	}
 
 	err = -EBUSY;
@@ -251,8 +251,7 @@ tcindex_set_parms(struct tcf_proto *tp, unsigned long base, u32 handle,
 	if (tb[TCA_TCINDEX_FALL_THROUGH]) {
 		if (nla_len(tb[TCA_TCINDEX_FALL_THROUGH]) < sizeof(u32))
 			goto errout;
-		cp.fall_through =
-			*(u32 *) nla_data(tb[TCA_TCINDEX_FALL_THROUGH]);
+		cp.fall_through = nla_get_u32(tb[TCA_TCINDEX_FALL_THROUGH]);
 	}
 
 	if (!cp.hash) {
@@ -305,7 +304,7 @@ tcindex_set_parms(struct tcf_proto *tp, unsigned long base, u32 handle,
 	}
 
 	if (tb[TCA_TCINDEX_CLASSID]) {
-		cr.res.classid = *(u32 *) nla_data(tb[TCA_TCINDEX_CLASSID]);
+		cr.res.classid = nla_get_u32(tb[TCA_TCINDEX_CLASSID]);
 		tcf_bind_filter(tp, &cr.res, base);
 	}
 
diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c
index 326711eb593..a4e72e8a882 100644
--- a/net/sched/cls_u32.c
+++ b/net/sched/cls_u32.c
@@ -474,7 +474,7 @@ static int u32_set_parms(struct tcf_proto *tp, unsigned long base,
 
 	err = -EINVAL;
 	if (tb[TCA_U32_LINK]) {
-		u32 handle = *(u32*)nla_data(tb[TCA_U32_LINK]);
+		u32 handle = nla_get_u32(tb[TCA_U32_LINK]);
 		struct tc_u_hnode *ht_down = NULL;
 
 		if (TC_U32_KEY(handle))
@@ -496,7 +496,7 @@ static int u32_set_parms(struct tcf_proto *tp, unsigned long base,
 			ht_down->refcnt--;
 	}
 	if (tb[TCA_U32_CLASSID]) {
-		n->res.classid = *(u32*)nla_data(tb[TCA_U32_CLASSID]);
+		n->res.classid = nla_get_u32(tb[TCA_U32_CLASSID]);
 		tcf_bind_filter(tp, &n->res, base);
 	}
 
@@ -543,7 +543,7 @@ static int u32_change(struct tcf_proto *tp, unsigned long base, u32 handle,
 	}
 
 	if (tb[TCA_U32_DIVISOR]) {
-		unsigned divisor = *(unsigned*)nla_data(tb[TCA_U32_DIVISOR]);
+		unsigned divisor = nla_get_u32(tb[TCA_U32_DIVISOR]);
 
 		if (--divisor > 0x100)
 			return -EINVAL;
@@ -569,7 +569,7 @@ static int u32_change(struct tcf_proto *tp, unsigned long base, u32 handle,
 	}
 
 	if (tb[TCA_U32_HASH]) {
-		htid = *(unsigned*)nla_data(tb[TCA_U32_HASH]);
+		htid = nla_get_u32(tb[TCA_U32_HASH]);
 		if (TC_U32_HTID(htid) == TC_U32_ROOT) {
 			ht = tp->root;
 			htid = ht->handle;
diff --git a/net/sched/em_meta.c b/net/sched/em_meta.c
index 63ae6a230c4..d9f487d813c 100644
--- a/net/sched/em_meta.c
+++ b/net/sched/em_meta.c
@@ -600,7 +600,7 @@ static int meta_int_change(struct meta_value *dst, struct nlattr *nla)
 		dst->val = *(unsigned long *) nla_data(nla);
 		dst->len = sizeof(unsigned long);
 	} else if (nla_len(nla) == sizeof(u32)) {
-		dst->val = *(u32 *) nla_data(nla);
+		dst->val = nla_get_u32(nla);
 		dst->len = sizeof(u32);
 	} else
 		return -EINVAL;
diff --git a/net/sched/sch_atm.c b/net/sched/sch_atm.c
index 4d876598d7d..0c71f2eb96b 100644
--- a/net/sched/sch_atm.c
+++ b/net/sched/sch_atm.c
@@ -231,7 +231,7 @@ static int atm_tc_change(struct Qdisc *sch, u32 classid, u32 parent,
 
 	if (!tb[TCA_ATM_FD] || nla_len(tb[TCA_ATM_FD]) < sizeof(fd))
 		return -EINVAL;
-	fd = *(int *)nla_data(tb[TCA_ATM_FD]);
+	fd = nla_get_u32(tb[TCA_ATM_FD]);
 	pr_debug("atm_tc_change: fd %d\n", fd);
 	if (tb[TCA_ATM_HDR]) {
 		hdr_len = nla_len(tb[TCA_ATM_HDR]);
@@ -246,7 +246,7 @@ static int atm_tc_change(struct Qdisc *sch, u32 classid, u32 parent,
 		if (nla_len(tb[TCA_ATM_EXCESS]) != sizeof(u32))
 			return -EINVAL;
 		excess = (struct atm_flow_data *)
-			atm_tc_get(sch, *(u32 *)nla_data(tb[TCA_ATM_EXCESS]));
+			atm_tc_get(sch, nla_get_u32(tb[TCA_ATM_EXCESS]));
 		if (!excess)
 			return -ENOENT;
 	}
-- 
cgit v1.2.3


From 5feb5e1aaa887f6427b8290bce48bfb6b7010fc6 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Wed, 23 Jan 2008 20:35:19 -0800
Subject: [NET_SCHED]: sch_api: introduce constant for rate table size

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/sch_api.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index 8db554d5485..7e3c048ba9b 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -244,7 +244,8 @@ struct qdisc_rate_table *qdisc_get_rtab(struct tc_ratespec *r, struct nlattr *ta
 		}
 	}
 
-	if (tab == NULL || r->rate == 0 || r->cell_log == 0 || nla_len(tab) != 1024)
+	if (tab == NULL || r->rate == 0 || r->cell_log == 0 ||
+	    nla_len(tab) != TC_RTAB_SIZE)
 		return NULL;
 
 	rtab = kmalloc(sizeof(*rtab), GFP_KERNEL);
-- 
cgit v1.2.3


From 27a3421e4821734bc19496faa77b380605dc3b23 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Wed, 23 Jan 2008 20:35:39 -0800
Subject: [NET_SCHED]: Use nla_policy for attribute validation in packet
 schedulers

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/sch_atm.c    | 12 ++++++++----
 net/sched/sch_cbq.c    | 47 +++++++++++++----------------------------------
 net/sched/sch_dsmark.c | 33 +++++++++++++++------------------
 net/sched/sch_gred.c   | 16 ++++++++++------
 net/sched/sch_hfsc.c   | 14 +++++++-------
 net/sched/sch_htb.c    | 17 +++++++++++------
 net/sched/sch_netem.c  | 19 ++++++++-----------
 net/sched/sch_red.c    | 11 +++++++----
 net/sched/sch_tbf.c    | 11 ++++++++---
 9 files changed, 87 insertions(+), 93 deletions(-)

(limited to 'net')

diff --git a/net/sched/sch_atm.c b/net/sched/sch_atm.c
index 0c71f2eb96b..33527341638 100644
--- a/net/sched/sch_atm.c
+++ b/net/sched/sch_atm.c
@@ -195,6 +195,11 @@ static const u8 llc_oui_ip[] = {
 	0x08, 0x00
 };				/* Ethertype IP (0800) */
 
+static const struct nla_policy atm_policy[TCA_ATM_MAX + 1] = {
+	[TCA_ATM_FD]		= { .type = NLA_U32 },
+	[TCA_ATM_EXCESS]	= { .type = NLA_U32 },
+};
+
 static int atm_tc_change(struct Qdisc *sch, u32 classid, u32 parent,
 			 struct nlattr **tca, unsigned long *arg)
 {
@@ -225,11 +230,12 @@ static int atm_tc_change(struct Qdisc *sch, u32 classid, u32 parent,
 		return -EBUSY;
 	if (opt == NULL)
 		return -EINVAL;
-	error = nla_parse_nested(tb, TCA_ATM_MAX, opt, NULL);
+
+	error = nla_parse_nested(tb, TCA_ATM_MAX, opt, atm_policy);
 	if (error < 0)
 		return error;
 
-	if (!tb[TCA_ATM_FD] || nla_len(tb[TCA_ATM_FD]) < sizeof(fd))
+	if (!tb[TCA_ATM_FD])
 		return -EINVAL;
 	fd = nla_get_u32(tb[TCA_ATM_FD]);
 	pr_debug("atm_tc_change: fd %d\n", fd);
@@ -243,8 +249,6 @@ static int atm_tc_change(struct Qdisc *sch, u32 classid, u32 parent,
 	if (!tb[TCA_ATM_EXCESS])
 		excess = NULL;
 	else {
-		if (nla_len(tb[TCA_ATM_EXCESS]) != sizeof(u32))
-			return -EINVAL;
 		excess = (struct atm_flow_data *)
 			atm_tc_get(sch, nla_get_u32(tb[TCA_ATM_EXCESS]));
 		if (!excess)
diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c
index da0f6c0152d..09969c1fbc0 100644
--- a/net/sched/sch_cbq.c
+++ b/net/sched/sch_cbq.c
@@ -1377,6 +1377,16 @@ static int cbq_set_fopt(struct cbq_class *cl, struct tc_cbq_fopt *fopt)
 	return 0;
 }
 
+static const struct nla_policy cbq_policy[TCA_CBQ_MAX + 1] = {
+	[TCA_CBQ_LSSOPT]	= { .len = sizeof(struct tc_cbq_lssopt) },
+	[TCA_CBQ_WRROPT]	= { .len = sizeof(struct tc_cbq_wrropt) },
+	[TCA_CBQ_FOPT]		= { .len = sizeof(struct tc_cbq_fopt) },
+	[TCA_CBQ_OVL_STRATEGY]	= { .len = sizeof(struct tc_cbq_ovl) },
+	[TCA_CBQ_RATE]		= { .len = sizeof(struct tc_ratespec) },
+	[TCA_CBQ_RTAB]		= { .type = NLA_BINARY, .len = TC_RTAB_SIZE },
+	[TCA_CBQ_POLICE]	= { .len = sizeof(struct tc_cbq_police) },
+};
+
 static int cbq_init(struct Qdisc *sch, struct nlattr *opt)
 {
 	struct cbq_sched_data *q = qdisc_priv(sch);
@@ -1384,16 +1394,11 @@ static int cbq_init(struct Qdisc *sch, struct nlattr *opt)
 	struct tc_ratespec *r;
 	int err;
 
-	err = nla_parse_nested(tb, TCA_CBQ_MAX, opt, NULL);
+	err = nla_parse_nested(tb, TCA_CBQ_MAX, opt, cbq_policy);
 	if (err < 0)
 		return err;
 
-	if (tb[TCA_CBQ_RTAB] == NULL || tb[TCA_CBQ_RATE] == NULL ||
-	    nla_len(tb[TCA_CBQ_RATE]) < sizeof(struct tc_ratespec))
-		return -EINVAL;
-
-	if (tb[TCA_CBQ_LSSOPT] &&
-	    nla_len(tb[TCA_CBQ_LSSOPT]) < sizeof(struct tc_cbq_lssopt))
+	if (tb[TCA_CBQ_RTAB] == NULL || tb[TCA_CBQ_RATE] == NULL)
 		return -EINVAL;
 
 	r = nla_data(tb[TCA_CBQ_RATE]);
@@ -1771,36 +1776,10 @@ cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct nlattr **t
 	if (opt == NULL)
 		return -EINVAL;
 
-	err = nla_parse_nested(tb, TCA_CBQ_MAX, opt, NULL);
+	err = nla_parse_nested(tb, TCA_CBQ_MAX, opt, cbq_policy);
 	if (err < 0)
 		return err;
 
-	if (tb[TCA_CBQ_OVL_STRATEGY] &&
-	    nla_len(tb[TCA_CBQ_OVL_STRATEGY]) < sizeof(struct tc_cbq_ovl))
-		return -EINVAL;
-
-	if (tb[TCA_CBQ_FOPT] &&
-	    nla_len(tb[TCA_CBQ_FOPT]) < sizeof(struct tc_cbq_fopt))
-		return -EINVAL;
-
-	if (tb[TCA_CBQ_RATE] &&
-	    nla_len(tb[TCA_CBQ_RATE]) < sizeof(struct tc_ratespec))
-			return -EINVAL;
-
-	if (tb[TCA_CBQ_LSSOPT] &&
-	    nla_len(tb[TCA_CBQ_LSSOPT]) < sizeof(struct tc_cbq_lssopt))
-			return -EINVAL;
-
-	if (tb[TCA_CBQ_WRROPT] &&
-	    nla_len(tb[TCA_CBQ_WRROPT]) < sizeof(struct tc_cbq_wrropt))
-			return -EINVAL;
-
-#ifdef CONFIG_NET_CLS_ACT
-	if (tb[TCA_CBQ_POLICE] &&
-	    nla_len(tb[TCA_CBQ_POLICE]) < sizeof(struct tc_cbq_police))
-			return -EINVAL;
-#endif
-
 	if (cl) {
 		/* Check parent */
 		if (parentid) {
diff --git a/net/sched/sch_dsmark.c b/net/sched/sch_dsmark.c
index f1d0a08aca7..0df911fd67b 100644
--- a/net/sched/sch_dsmark.c
+++ b/net/sched/sch_dsmark.c
@@ -99,6 +99,14 @@ static void dsmark_put(struct Qdisc *sch, unsigned long cl)
 {
 }
 
+static const struct nla_policy dsmark_policy[TCA_DSMARK_MAX + 1] = {
+	[TCA_DSMARK_INDICES]		= { .type = NLA_U16 },
+	[TCA_DSMARK_DEFAULT_INDEX]	= { .type = NLA_U16 },
+	[TCA_DSMARK_SET_TC_INDEX]	= { .type = NLA_FLAG },
+	[TCA_DSMARK_MASK]		= { .type = NLA_U8 },
+	[TCA_DSMARK_VALUE]		= { .type = NLA_U8 },
+};
+
 static int dsmark_change(struct Qdisc *sch, u32 classid, u32 parent,
 			 struct nlattr **tca, unsigned long *arg)
 {
@@ -119,21 +127,15 @@ static int dsmark_change(struct Qdisc *sch, u32 classid, u32 parent,
 	if (!opt)
 		goto errout;
 
-	err = nla_parse_nested(tb, TCA_DSMARK_MAX, opt, NULL);
+	err = nla_parse_nested(tb, TCA_DSMARK_MAX, opt, dsmark_policy);
 	if (err < 0)
-		return err;
+		goto errout;
 
-	err = -EINVAL;
-	if (tb[TCA_DSMARK_MASK]) {
-		if (nla_len(tb[TCA_DSMARK_MASK]) < sizeof(u8))
-			goto errout;
+	if (tb[TCA_DSMARK_MASK])
 		mask = nla_get_u8(tb[TCA_DSMARK_MASK]);
-	}
-	if (tb[TCA_DSMARK_VALUE]) {
-		if (nla_len(tb[TCA_DSMARK_VALUE]) < sizeof(u8))
-			goto errout;
+
+	if (tb[TCA_DSMARK_VALUE])
 		p->value[*arg-1] = nla_get_u8(tb[TCA_DSMARK_VALUE]);
-	}
 
 	if (tb[TCA_DSMARK_MASK])
 		p->mask[*arg-1] = mask;
@@ -359,23 +361,18 @@ static int dsmark_init(struct Qdisc *sch, struct nlattr *opt)
 	if (!opt)
 		goto errout;
 
-	err = nla_parse_nested(tb, TCA_DSMARK_MAX, opt, NULL);
+	err = nla_parse_nested(tb, TCA_DSMARK_MAX, opt, dsmark_policy);
 	if (err < 0)
 		goto errout;
 
 	err = -EINVAL;
-	if (nla_len(tb[TCA_DSMARK_INDICES]) < sizeof(u16))
-		goto errout;
 	indices = nla_get_u16(tb[TCA_DSMARK_INDICES]);
 
 	if (hweight32(indices) != 1)
 		goto errout;
 
-	if (tb[TCA_DSMARK_DEFAULT_INDEX]) {
-		if (nla_len(tb[TCA_DSMARK_DEFAULT_INDEX]) < sizeof(u16))
-			goto errout;
+	if (tb[TCA_DSMARK_DEFAULT_INDEX])
 		default_index = nla_get_u16(tb[TCA_DSMARK_DEFAULT_INDEX]);
-	}
 
 	mask = kmalloc(indices * 2, GFP_KERNEL);
 	if (mask == NULL) {
diff --git a/net/sched/sch_gred.c b/net/sched/sch_gred.c
index 365c7d8b17a..3a9d226ff1e 100644
--- a/net/sched/sch_gred.c
+++ b/net/sched/sch_gred.c
@@ -356,7 +356,7 @@ static inline int gred_change_table_def(struct Qdisc *sch, struct nlattr *dps)
 	struct tc_gred_sopt *sopt;
 	int i;
 
-	if (dps == NULL || nla_len(dps) < sizeof(*sopt))
+	if (dps == NULL)
 		return -EINVAL;
 
 	sopt = nla_data(dps);
@@ -425,6 +425,12 @@ static inline int gred_change_vq(struct Qdisc *sch, int dp,
 	return 0;
 }
 
+static const struct nla_policy gred_policy[TCA_GRED_MAX + 1] = {
+	[TCA_GRED_PARMS]	= { .len = sizeof(struct tc_gred_qopt) },
+	[TCA_GRED_STAB]		= { .len = 256 },
+	[TCA_GRED_DPS]		= { .len = sizeof(struct tc_gred_sopt) },
+};
+
 static int gred_change(struct Qdisc *sch, struct nlattr *opt)
 {
 	struct gred_sched *table = qdisc_priv(sch);
@@ -436,7 +442,7 @@ static int gred_change(struct Qdisc *sch, struct nlattr *opt)
 	if (opt == NULL)
 		return -EINVAL;
 
-	err = nla_parse_nested(tb, TCA_GRED_MAX, opt, NULL);
+	err = nla_parse_nested(tb, TCA_GRED_MAX, opt, gred_policy);
 	if (err < 0)
 		return err;
 
@@ -444,9 +450,7 @@ static int gred_change(struct Qdisc *sch, struct nlattr *opt)
 		return gred_change_table_def(sch, opt);
 
 	if (tb[TCA_GRED_PARMS] == NULL ||
-	    nla_len(tb[TCA_GRED_PARMS]) < sizeof(*ctl) ||
-	    tb[TCA_GRED_STAB] == NULL ||
-	    nla_len(tb[TCA_GRED_STAB]) < 256)
+	    tb[TCA_GRED_STAB] == NULL)
 		return -EINVAL;
 
 	err = -EINVAL;
@@ -499,7 +503,7 @@ static int gred_init(struct Qdisc *sch, struct nlattr *opt)
 	if (opt == NULL)
 		return -EINVAL;
 
-	err = nla_parse_nested(tb, TCA_GRED_MAX, opt, NULL);
+	err = nla_parse_nested(tb, TCA_GRED_MAX, opt, gred_policy);
 	if (err < 0)
 		return err;
 
diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c
index 10a2f35a27a..87293d0db1d 100644
--- a/net/sched/sch_hfsc.c
+++ b/net/sched/sch_hfsc.c
@@ -986,6 +986,12 @@ hfsc_change_usc(struct hfsc_class *cl, struct tc_service_curve *usc,
 	cl->cl_flags |= HFSC_USC;
 }
 
+static const struct nla_policy hfsc_policy[TCA_HFSC_MAX + 1] = {
+	[TCA_HFSC_RSC]	= { .len = sizeof(struct tc_service_curve) },
+	[TCA_HFSC_FSC]	= { .len = sizeof(struct tc_service_curve) },
+	[TCA_HFSC_USC]	= { .len = sizeof(struct tc_service_curve) },
+};
+
 static int
 hfsc_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
 		  struct nlattr **tca, unsigned long *arg)
@@ -1002,29 +1008,23 @@ hfsc_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
 	if (opt == NULL)
 		return -EINVAL;
 
-	err = nla_parse_nested(tb, TCA_HFSC_MAX, opt, NULL);
+	err = nla_parse_nested(tb, TCA_HFSC_MAX, opt, hfsc_policy);
 	if (err < 0)
 		return err;
 
 	if (tb[TCA_HFSC_RSC]) {
-		if (nla_len(tb[TCA_HFSC_RSC]) < sizeof(*rsc))
-			return -EINVAL;
 		rsc = nla_data(tb[TCA_HFSC_RSC]);
 		if (rsc->m1 == 0 && rsc->m2 == 0)
 			rsc = NULL;
 	}
 
 	if (tb[TCA_HFSC_FSC]) {
-		if (nla_len(tb[TCA_HFSC_FSC]) < sizeof(*fsc))
-			return -EINVAL;
 		fsc = nla_data(tb[TCA_HFSC_FSC]);
 		if (fsc->m1 == 0 && fsc->m2 == 0)
 			fsc = NULL;
 	}
 
 	if (tb[TCA_HFSC_USC]) {
-		if (nla_len(tb[TCA_HFSC_USC]) < sizeof(*usc))
-			return -EINVAL;
 		usc = nla_data(tb[TCA_HFSC_USC]);
 		if (usc->m1 == 0 && usc->m2 == 0)
 			usc = NULL;
diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c
index 69fac320f8b..e1a579efc21 100644
--- a/net/sched/sch_htb.c
+++ b/net/sched/sch_htb.c
@@ -992,6 +992,13 @@ static void htb_reset(struct Qdisc *sch)
 		INIT_LIST_HEAD(q->drops + i);
 }
 
+static const struct nla_policy htb_policy[TCA_HTB_MAX + 1] = {
+	[TCA_HTB_PARMS]	= { .len = sizeof(struct tc_htb_opt) },
+	[TCA_HTB_INIT]	= { .len = sizeof(struct tc_htb_glob) },
+	[TCA_HTB_CTAB]	= { .type = NLA_BINARY, .len = TC_RTAB_SIZE },
+	[TCA_HTB_RTAB]	= { .type = NLA_BINARY, .len = TC_RTAB_SIZE },
+};
+
 static int htb_init(struct Qdisc *sch, struct nlattr *opt)
 {
 	struct htb_sched *q = qdisc_priv(sch);
@@ -1003,12 +1010,11 @@ static int htb_init(struct Qdisc *sch, struct nlattr *opt)
 	if (!opt)
 		return -EINVAL;
 
-	err = nla_parse_nested(tb, TCA_HTB_INIT, opt, NULL);
+	err = nla_parse_nested(tb, TCA_HTB_INIT, opt, htb_policy);
 	if (err < 0)
 		return err;
 
-	if (tb[TCA_HTB_INIT] == NULL ||
-	    nla_len(tb[TCA_HTB_INIT]) < sizeof(*gopt)) {
+	if (tb[TCA_HTB_INIT] == NULL) {
 		printk(KERN_ERR "HTB: hey probably you have bad tc tool ?\n");
 		return -EINVAL;
 	}
@@ -1319,13 +1325,12 @@ static int htb_change_class(struct Qdisc *sch, u32 classid,
 	if (!opt)
 		goto failure;
 
-	err = nla_parse_nested(tb, TCA_HTB_RTAB, opt, NULL);
+	err = nla_parse_nested(tb, TCA_HTB_RTAB, opt, htb_policy);
 	if (err < 0)
 		goto failure;
 
 	err = -EINVAL;
-	if (tb[TCA_HTB_PARMS] == NULL ||
-	    nla_len(tb[TCA_HTB_PARMS]) < sizeof(*hopt))
+	if (tb[TCA_HTB_PARMS] == NULL)
 		goto failure;
 
 	parent = parentid == TC_H_ROOT ? NULL : htb_find(parentid, sch);
diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c
index 1a755799ffb..c9c649b26ea 100644
--- a/net/sched/sch_netem.c
+++ b/net/sched/sch_netem.c
@@ -368,9 +368,6 @@ static int get_correlation(struct Qdisc *sch, const struct nlattr *attr)
 	struct netem_sched_data *q = qdisc_priv(sch);
 	const struct tc_netem_corr *c = nla_data(attr);
 
-	if (nla_len(attr) != sizeof(*c))
-		return -EINVAL;
-
 	init_crandom(&q->delay_cor, c->delay_corr);
 	init_crandom(&q->loss_cor, c->loss_corr);
 	init_crandom(&q->dup_cor, c->dup_corr);
@@ -382,9 +379,6 @@ static int get_reorder(struct Qdisc *sch, const struct nlattr *attr)
 	struct netem_sched_data *q = qdisc_priv(sch);
 	const struct tc_netem_reorder *r = nla_data(attr);
 
-	if (nla_len(attr) != sizeof(*r))
-		return -EINVAL;
-
 	q->reorder = r->probability;
 	init_crandom(&q->reorder_cor, r->correlation);
 	return 0;
@@ -395,14 +389,17 @@ static int get_corrupt(struct Qdisc *sch, const struct nlattr *attr)
 	struct netem_sched_data *q = qdisc_priv(sch);
 	const struct tc_netem_corrupt *r = nla_data(attr);
 
-	if (nla_len(attr) != sizeof(*r))
-		return -EINVAL;
-
 	q->corrupt = r->probability;
 	init_crandom(&q->corrupt_cor, r->correlation);
 	return 0;
 }
 
+static const struct nla_policy netem_policy[TCA_NETEM_MAX + 1] = {
+	[TCA_NETEM_CORR]	= { .len = sizeof(struct tc_netem_corr) },
+	[TCA_NETEM_REORDER]	= { .len = sizeof(struct tc_netem_reorder) },
+	[TCA_NETEM_CORRUPT]	= { .len = sizeof(struct tc_netem_corrupt) },
+};
+
 /* Parse netlink message to set options */
 static int netem_change(struct Qdisc *sch, struct nlattr *opt)
 {
@@ -414,8 +411,8 @@ static int netem_change(struct Qdisc *sch, struct nlattr *opt)
 	if (opt == NULL)
 		return -EINVAL;
 
-	ret = nla_parse_nested_compat(tb, TCA_NETEM_MAX, opt, NULL, qopt,
-				      sizeof(*qopt));
+	ret = nla_parse_nested_compat(tb, TCA_NETEM_MAX, opt, netem_policy,
+				      qopt, sizeof(*qopt));
 	if (ret < 0)
 		return ret;
 
diff --git a/net/sched/sch_red.c b/net/sched/sch_red.c
index dcf6afc196f..3dcd493f4f4 100644
--- a/net/sched/sch_red.c
+++ b/net/sched/sch_red.c
@@ -201,6 +201,11 @@ static struct Qdisc *red_create_dflt(struct Qdisc *sch, u32 limit)
 	return NULL;
 }
 
+static const struct nla_policy red_policy[TCA_RED_MAX + 1] = {
+	[TCA_RED_PARMS]	= { .len = sizeof(struct tc_red_qopt) },
+	[TCA_RED_STAB]	= { .len = RED_STAB_SIZE },
+};
+
 static int red_change(struct Qdisc *sch, struct nlattr *opt)
 {
 	struct red_sched_data *q = qdisc_priv(sch);
@@ -212,14 +217,12 @@ static int red_change(struct Qdisc *sch, struct nlattr *opt)
 	if (opt == NULL)
 		return -EINVAL;
 
-	err = nla_parse_nested(tb, TCA_RED_MAX, opt, NULL);
+	err = nla_parse_nested(tb, TCA_RED_MAX, opt, red_policy);
 	if (err < 0)
 		return err;
 
 	if (tb[TCA_RED_PARMS] == NULL ||
-	    nla_len(tb[TCA_RED_PARMS]) < sizeof(*ctl) ||
-	    tb[TCA_RED_STAB] == NULL ||
-	    nla_len(tb[TCA_RED_STAB]) < RED_STAB_SIZE)
+	    tb[TCA_RED_STAB] == NULL)
 		return -EINVAL;
 
 	ctl = nla_data(tb[TCA_RED_PARMS]);
diff --git a/net/sched/sch_tbf.c b/net/sched/sch_tbf.c
index b7a185dc3de..0b7d78f59d8 100644
--- a/net/sched/sch_tbf.c
+++ b/net/sched/sch_tbf.c
@@ -270,6 +270,12 @@ static struct Qdisc *tbf_create_dflt_qdisc(struct Qdisc *sch, u32 limit)
 	return NULL;
 }
 
+static const struct nla_policy tbf_policy[TCA_TBF_MAX + 1] = {
+	[TCA_TBF_PARMS]	= { .len = sizeof(struct tc_tbf_qopt) },
+	[TCA_TBF_RTAB]	= { .type = NLA_BINARY, .len = TC_RTAB_SIZE },
+	[TCA_TBF_PTAB]	= { .type = NLA_BINARY, .len = TC_RTAB_SIZE },
+};
+
 static int tbf_change(struct Qdisc* sch, struct nlattr *opt)
 {
 	int err;
@@ -281,13 +287,12 @@ static int tbf_change(struct Qdisc* sch, struct nlattr *opt)
 	struct Qdisc *child = NULL;
 	int max_size,n;
 
-	err = nla_parse_nested(tb, TCA_TBF_PTAB, opt, NULL);
+	err = nla_parse_nested(tb, TCA_TBF_PTAB, opt, tbf_policy);
 	if (err < 0)
 		return err;
 
 	err = -EINVAL;
-	if (tb[TCA_TBF_PARMS] == NULL ||
-	    nla_len(tb[TCA_TBF_PARMS]) < sizeof(*qopt))
+	if (tb[TCA_TBF_PARMS] == NULL)
 		goto done;
 
 	qopt = nla_data(tb[TCA_TBF_PARMS]);
-- 
cgit v1.2.3


From 6fa8c0144b770dac941cf2c15053b6e24f046c8a Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Wed, 23 Jan 2008 20:36:12 -0800
Subject: [NET_SCHED]: Use nla_policy for attribute validation in classifiers

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/cls_basic.c   | 12 +++++++-----
 net/sched/cls_fw.c      | 17 ++++++++---------
 net/sched/cls_route.c   | 19 ++++++++-----------
 net/sched/cls_rsvp.h    | 26 +++++++++++---------------
 net/sched/cls_tcindex.c | 31 +++++++++++++------------------
 net/sched/cls_u32.c     | 22 ++++++++++++----------
 6 files changed, 59 insertions(+), 68 deletions(-)

(limited to 'net')

diff --git a/net/sched/cls_basic.c b/net/sched/cls_basic.c
index 0c872a76c4b..bfb4342ea88 100644
--- a/net/sched/cls_basic.c
+++ b/net/sched/cls_basic.c
@@ -129,6 +129,11 @@ static int basic_delete(struct tcf_proto *tp, unsigned long arg)
 	return -ENOENT;
 }
 
+static const struct nla_policy basic_policy[TCA_BASIC_MAX + 1] = {
+	[TCA_BASIC_CLASSID]	= { .type = NLA_U32 },
+	[TCA_BASIC_EMATCHES]	= { .type = NLA_NESTED },
+};
+
 static inline int basic_set_parms(struct tcf_proto *tp, struct basic_filter *f,
 				  unsigned long base, struct nlattr **tb,
 				  struct nlattr *est)
@@ -137,10 +142,6 @@ static inline int basic_set_parms(struct tcf_proto *tp, struct basic_filter *f,
 	struct tcf_exts e;
 	struct tcf_ematch_tree t;
 
-	if (tb[TCA_BASIC_CLASSID])
-		if (nla_len(tb[TCA_BASIC_CLASSID]) < sizeof(u32))
-			return err;
-
 	err = tcf_exts_validate(tp, tb, est, &e, &basic_ext_map);
 	if (err < 0)
 		return err;
@@ -174,7 +175,8 @@ static int basic_change(struct tcf_proto *tp, unsigned long base, u32 handle,
 	if (tca[TCA_OPTIONS] == NULL)
 		return -EINVAL;
 
-	err = nla_parse_nested(tb, TCA_BASIC_MAX, tca[TCA_OPTIONS], NULL);
+	err = nla_parse_nested(tb, TCA_BASIC_MAX, tca[TCA_OPTIONS],
+			       basic_policy);
 	if (err < 0)
 		return err;
 
diff --git a/net/sched/cls_fw.c b/net/sched/cls_fw.c
index b75696d67ec..436a6e7c438 100644
--- a/net/sched/cls_fw.c
+++ b/net/sched/cls_fw.c
@@ -186,6 +186,12 @@ out:
 	return -EINVAL;
 }
 
+static const struct nla_policy fw_policy[TCA_FW_MAX + 1] = {
+	[TCA_FW_CLASSID]	= { .type = NLA_U32 },
+	[TCA_FW_INDEV]		= { .type = NLA_STRING, .len = IFNAMSIZ },
+	[TCA_FW_MASK]		= { .type = NLA_U32 },
+};
+
 static int
 fw_change_attrs(struct tcf_proto *tp, struct fw_filter *f,
 	struct nlattr **tb, struct nlattr **tca, unsigned long base)
@@ -201,8 +207,6 @@ fw_change_attrs(struct tcf_proto *tp, struct fw_filter *f,
 
 	err = -EINVAL;
 	if (tb[TCA_FW_CLASSID]) {
-		if (nla_len(tb[TCA_FW_CLASSID]) != sizeof(u32))
-			goto errout;
 		f->res.classid = nla_get_u32(tb[TCA_FW_CLASSID]);
 		tcf_bind_filter(tp, &f->res, base);
 	}
@@ -216,8 +220,6 @@ fw_change_attrs(struct tcf_proto *tp, struct fw_filter *f,
 #endif /* CONFIG_NET_CLS_IND */
 
 	if (tb[TCA_FW_MASK]) {
-		if (nla_len(tb[TCA_FW_MASK]) != sizeof(u32))
-			goto errout;
 		mask = nla_get_u32(tb[TCA_FW_MASK]);
 		if (mask != head->mask)
 			goto errout;
@@ -246,7 +248,7 @@ static int fw_change(struct tcf_proto *tp, unsigned long base,
 	if (!opt)
 		return handle ? -EINVAL : 0;
 
-	err = nla_parse_nested(tb, TCA_FW_MAX, opt, NULL);
+	err = nla_parse_nested(tb, TCA_FW_MAX, opt, fw_policy);
 	if (err < 0)
 		return err;
 
@@ -261,11 +263,8 @@ static int fw_change(struct tcf_proto *tp, unsigned long base,
 
 	if (head == NULL) {
 		u32 mask = 0xFFFFFFFF;
-		if (tb[TCA_FW_MASK]) {
-			if (nla_len(tb[TCA_FW_MASK]) != sizeof(u32))
-				return -EINVAL;
+		if (tb[TCA_FW_MASK])
 			mask = nla_get_u32(tb[TCA_FW_MASK]);
-		}
 
 		head = kzalloc(sizeof(struct fw_head), GFP_KERNEL);
 		if (head == NULL)
diff --git a/net/sched/cls_route.c b/net/sched/cls_route.c
index ae97238c57a..f7e7d3955d2 100644
--- a/net/sched/cls_route.c
+++ b/net/sched/cls_route.c
@@ -323,6 +323,13 @@ static int route4_delete(struct tcf_proto *tp, unsigned long arg)
 	return 0;
 }
 
+static const struct nla_policy route4_policy[TCA_ROUTE4_MAX + 1] = {
+	[TCA_ROUTE4_CLASSID]	= { .type = NLA_U32 },
+	[TCA_ROUTE4_TO]		= { .type = NLA_U32 },
+	[TCA_ROUTE4_FROM]	= { .type = NLA_U32 },
+	[TCA_ROUTE4_IIF]	= { .type = NLA_U32 },
+};
+
 static int route4_set_parms(struct tcf_proto *tp, unsigned long base,
 	struct route4_filter *f, u32 handle, struct route4_head *head,
 	struct nlattr **tb, struct nlattr *est, int new)
@@ -339,15 +346,9 @@ static int route4_set_parms(struct tcf_proto *tp, unsigned long base,
 		return err;
 
 	err = -EINVAL;
-	if (tb[TCA_ROUTE4_CLASSID])
-		if (nla_len(tb[TCA_ROUTE4_CLASSID]) < sizeof(u32))
-			goto errout;
-
 	if (tb[TCA_ROUTE4_TO]) {
 		if (new && handle & 0x8000)
 			goto errout;
-		if (nla_len(tb[TCA_ROUTE4_TO]) < sizeof(u32))
-			goto errout;
 		to = nla_get_u32(tb[TCA_ROUTE4_TO]);
 		if (to > 0xFF)
 			goto errout;
@@ -357,15 +358,11 @@ static int route4_set_parms(struct tcf_proto *tp, unsigned long base,
 	if (tb[TCA_ROUTE4_FROM]) {
 		if (tb[TCA_ROUTE4_IIF])
 			goto errout;
-		if (nla_len(tb[TCA_ROUTE4_FROM]) < sizeof(u32))
-			goto errout;
 		id = nla_get_u32(tb[TCA_ROUTE4_FROM]);
 		if (id > 0xFF)
 			goto errout;
 		nhandle |= id << 16;
 	} else if (tb[TCA_ROUTE4_IIF]) {
-		if (nla_len(tb[TCA_ROUTE4_IIF]) < sizeof(u32))
-			goto errout;
 		id = nla_get_u32(tb[TCA_ROUTE4_IIF]);
 		if (id > 0x7FFF)
 			goto errout;
@@ -440,7 +437,7 @@ static int route4_change(struct tcf_proto *tp, unsigned long base,
 	if (opt == NULL)
 		return handle ? -EINVAL : 0;
 
-	err = nla_parse_nested(tb, TCA_ROUTE4_MAX, opt, NULL);
+	err = nla_parse_nested(tb, TCA_ROUTE4_MAX, opt, route4_policy);
 	if (err < 0)
 		return err;
 
diff --git a/net/sched/cls_rsvp.h b/net/sched/cls_rsvp.h
index 61286a0f7a3..7034ea4530e 100644
--- a/net/sched/cls_rsvp.h
+++ b/net/sched/cls_rsvp.h
@@ -397,6 +397,15 @@ static u32 gen_tunnel(struct rsvp_head *data)
 	return 0;
 }
 
+static const struct nla_policy rsvp_policy[TCA_RSVP_MAX + 1] = {
+	[TCA_RSVP_CLASSID]	= { .type = NLA_U32 },
+	[TCA_RSVP_DST]		= { .type = NLA_BINARY,
+				    .len = RSVP_DST_LEN * sizeof(u32) },
+	[TCA_RSVP_SRC]		= { .type = NLA_BINARY,
+				    .len = RSVP_DST_LEN * sizeof(u32) },
+	[TCA_RSVP_PINFO]	= { .len = sizeof(struct tc_rsvp_pinfo) },
+};
+
 static int rsvp_change(struct tcf_proto *tp, unsigned long base,
 		       u32 handle,
 		       struct nlattr **tca,
@@ -416,7 +425,7 @@ static int rsvp_change(struct tcf_proto *tp, unsigned long base,
 	if (opt == NULL)
 		return handle ? -EINVAL : 0;
 
-	err = nla_parse_nested(tb, TCA_RSVP_MAX, opt, NULL);
+	err = nla_parse_nested(tb, TCA_RSVP_MAX, opt, rsvp_policy);
 	if (err < 0)
 		return err;
 
@@ -452,30 +461,17 @@ static int rsvp_change(struct tcf_proto *tp, unsigned long base,
 
 	h2 = 16;
 	if (tb[TCA_RSVP_SRC-1]) {
-		err = -EINVAL;
-		if (nla_len(tb[TCA_RSVP_SRC-1]) != sizeof(f->src))
-			goto errout;
 		memcpy(f->src, nla_data(tb[TCA_RSVP_SRC-1]), sizeof(f->src));
 		h2 = hash_src(f->src);
 	}
 	if (tb[TCA_RSVP_PINFO-1]) {
-		err = -EINVAL;
-		if (nla_len(tb[TCA_RSVP_PINFO-1]) < sizeof(struct tc_rsvp_pinfo))
-			goto errout;
 		pinfo = nla_data(tb[TCA_RSVP_PINFO-1]);
 		f->spi = pinfo->spi;
 		f->tunnelhdr = pinfo->tunnelhdr;
 	}
-	if (tb[TCA_RSVP_CLASSID-1]) {
-		err = -EINVAL;
-		if (nla_len(tb[TCA_RSVP_CLASSID-1]) != 4)
-			goto errout;
+	if (tb[TCA_RSVP_CLASSID-1])
 		f->res.classid = nla_get_u32(tb[TCA_RSVP_CLASSID-1]);
-	}
 
-	err = -EINVAL;
-	if (nla_len(tb[TCA_RSVP_DST-1]) != sizeof(f->src))
-		goto errout;
 	dst = nla_data(tb[TCA_RSVP_DST-1]);
 	h1 = hash_dst(dst, pinfo ? pinfo->protocol : 0, pinfo ? pinfo->tunnelid : 0);
 
diff --git a/net/sched/cls_tcindex.c b/net/sched/cls_tcindex.c
index 28098564b4d..ee60b2d1705 100644
--- a/net/sched/cls_tcindex.c
+++ b/net/sched/cls_tcindex.c
@@ -193,6 +193,14 @@ valid_perfect_hash(struct tcindex_data *p)
 	return  p->hash > (p->mask >> p->shift);
 }
 
+static const struct nla_policy tcindex_policy[TCA_TCINDEX_MAX + 1] = {
+	[TCA_TCINDEX_HASH]		= { .type = NLA_U32 },
+	[TCA_TCINDEX_MASK]		= { .type = NLA_U16 },
+	[TCA_TCINDEX_SHIFT]		= { .type = NLA_U32 },
+	[TCA_TCINDEX_FALL_THROUGH]	= { .type = NLA_U32 },
+	[TCA_TCINDEX_CLASSID]		= { .type = NLA_U32 },
+};
+
 static int
 tcindex_set_parms(struct tcf_proto *tp, unsigned long base, u32 handle,
 		  struct tcindex_data *p, struct tcindex_filter_result *r,
@@ -217,24 +225,14 @@ tcindex_set_parms(struct tcf_proto *tp, unsigned long base, u32 handle,
 	else
 		memset(&cr, 0, sizeof(cr));
 
-	err = -EINVAL;
-	if (tb[TCA_TCINDEX_HASH]) {
-		if (nla_len(tb[TCA_TCINDEX_HASH]) < sizeof(u32))
-			goto errout;
+	if (tb[TCA_TCINDEX_HASH])
 		cp.hash = nla_get_u32(tb[TCA_TCINDEX_HASH]);
-	}
 
-	if (tb[TCA_TCINDEX_MASK]) {
-		if (nla_len(tb[TCA_TCINDEX_MASK]) < sizeof(u16))
-			goto errout;
+	if (tb[TCA_TCINDEX_MASK])
 		cp.mask = nla_get_u16(tb[TCA_TCINDEX_MASK]);
-	}
 
-	if (tb[TCA_TCINDEX_SHIFT]) {
-		if (nla_len(tb[TCA_TCINDEX_SHIFT]) < sizeof(int))
-			goto errout;
+	if (tb[TCA_TCINDEX_SHIFT])
 		cp.shift = nla_get_u32(tb[TCA_TCINDEX_SHIFT]);
-	}
 
 	err = -EBUSY;
 	/* Hash already allocated, make sure that we still meet the
@@ -248,11 +246,8 @@ tcindex_set_parms(struct tcf_proto *tp, unsigned long base, u32 handle,
 		goto errout;
 
 	err = -EINVAL;
-	if (tb[TCA_TCINDEX_FALL_THROUGH]) {
-		if (nla_len(tb[TCA_TCINDEX_FALL_THROUGH]) < sizeof(u32))
-			goto errout;
+	if (tb[TCA_TCINDEX_FALL_THROUGH])
 		cp.fall_through = nla_get_u32(tb[TCA_TCINDEX_FALL_THROUGH]);
-	}
 
 	if (!cp.hash) {
 		/* Hash not specified, use perfect hash if the upper limit
@@ -358,7 +353,7 @@ tcindex_change(struct tcf_proto *tp, unsigned long base, u32 handle,
 	if (!opt)
 		return 0;
 
-	err = nla_parse_nested(tb, TCA_TCINDEX_MAX, opt, NULL);
+	err = nla_parse_nested(tb, TCA_TCINDEX_MAX, opt, tcindex_policy);
 	if (err < 0)
 		return err;
 
diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c
index a4e72e8a882..e8a77568912 100644
--- a/net/sched/cls_u32.c
+++ b/net/sched/cls_u32.c
@@ -460,6 +460,16 @@ static u32 gen_new_kid(struct tc_u_hnode *ht, u32 handle)
 	return handle|(i>0xFFF ? 0xFFF : i);
 }
 
+static const struct nla_policy u32_policy[TCA_U32_MAX + 1] = {
+	[TCA_U32_CLASSID]	= { .type = NLA_U32 },
+	[TCA_U32_HASH]		= { .type = NLA_U32 },
+	[TCA_U32_LINK]		= { .type = NLA_U32 },
+	[TCA_U32_DIVISOR]	= { .type = NLA_U32 },
+	[TCA_U32_SEL]		= { .len = sizeof(struct tc_u32_sel) },
+	[TCA_U32_INDEV]		= { .type = NLA_STRING, .len = IFNAMSIZ },
+	[TCA_U32_MARK]		= { .len = sizeof(struct tc_u32_mark) },
+};
+
 static int u32_set_parms(struct tcf_proto *tp, unsigned long base,
 			 struct tc_u_hnode *ht,
 			 struct tc_u_knode *n, struct nlattr **tb,
@@ -531,7 +541,7 @@ static int u32_change(struct tcf_proto *tp, unsigned long base, u32 handle,
 	if (opt == NULL)
 		return handle ? -EINVAL : 0;
 
-	err = nla_parse_nested(tb, TCA_U32_MAX, opt, NULL);
+	err = nla_parse_nested(tb, TCA_U32_MAX, opt, u32_policy);
 	if (err < 0)
 		return err;
 
@@ -593,8 +603,7 @@ static int u32_change(struct tcf_proto *tp, unsigned long base, u32 handle,
 	} else
 		handle = gen_new_kid(ht, htid);
 
-	if (tb[TCA_U32_SEL] == NULL ||
-	    nla_len(tb[TCA_U32_SEL]) < sizeof(struct tc_u32_sel))
+	if (tb[TCA_U32_SEL] == NULL)
 		return -EINVAL;
 
 	s = nla_data(tb[TCA_U32_SEL]);
@@ -620,13 +629,6 @@ static int u32_change(struct tcf_proto *tp, unsigned long base, u32 handle,
 	if (tb[TCA_U32_MARK]) {
 		struct tc_u32_mark *mark;
 
-		if (nla_len(tb[TCA_U32_MARK]) < sizeof(struct tc_u32_mark)) {
-#ifdef CONFIG_CLS_U32_PERF
-			kfree(n->pf);
-#endif
-			kfree(n);
-			return -EINVAL;
-		}
 		mark = nla_data(tb[TCA_U32_MARK]);
 		memcpy(&n->mark, mark, sizeof(struct tc_u32_mark));
 		n->mark.success = 0;
-- 
cgit v1.2.3


From 53b2bf3f8a652c9c8e86831f94ae2c5767ea54d7 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Wed, 23 Jan 2008 20:36:30 -0800
Subject: [NET_SCHED]: Use nla_policy for attribute validation in actions

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/act_gact.c   | 15 ++++++++-------
 net/sched/act_ipt.c    | 19 ++++++++++++-------
 net/sched/act_mirred.c |  9 ++++++---
 net/sched/act_nat.c    |  9 ++++++---
 net/sched/act_pedit.c  |  9 ++++++---
 net/sched/act_police.c | 16 ++++++++--------
 net/sched/act_simple.c |  7 +++++--
 7 files changed, 51 insertions(+), 33 deletions(-)

(limited to 'net')

diff --git a/net/sched/act_gact.c b/net/sched/act_gact.c
index df214d47fc9..422872c4f14 100644
--- a/net/sched/act_gact.c
+++ b/net/sched/act_gact.c
@@ -53,6 +53,11 @@ typedef int (*g_rand)(struct tcf_gact *gact);
 static g_rand gact_rand[MAX_RAND]= { NULL, gact_net_rand, gact_determ };
 #endif /* CONFIG_GACT_PROB */
 
+static const struct nla_policy gact_policy[TCA_GACT_MAX + 1] = {
+	[TCA_GACT_PARMS]	= { .len = sizeof(struct tc_gact) },
+	[TCA_GACT_PROB]		= { .len = sizeof(struct tc_gact_p) },
+};
+
 static int tcf_gact_init(struct nlattr *nla, struct nlattr *est,
 			 struct tc_action *a, int ovr, int bind)
 {
@@ -66,20 +71,16 @@ static int tcf_gact_init(struct nlattr *nla, struct nlattr *est,
 	if (nla == NULL)
 		return -EINVAL;
 
-	err = nla_parse_nested(tb, TCA_GACT_MAX, nla, NULL);
+	err = nla_parse_nested(tb, TCA_GACT_MAX, nla, gact_policy);
 	if (err < 0)
 		return err;
 
-	if (tb[TCA_GACT_PARMS] == NULL ||
-	    nla_len(tb[TCA_GACT_PARMS]) < sizeof(*parm))
+	if (tb[TCA_GACT_PARMS] == NULL)
 		return -EINVAL;
 	parm = nla_data(tb[TCA_GACT_PARMS]);
 
+#ifndef CONFIG_GACT_PROB
 	if (tb[TCA_GACT_PROB] != NULL)
-#ifdef CONFIG_GACT_PROB
-		if (nla_len(tb[TCA_GACT_PROB]) < sizeof(struct tc_gact_p))
-			return -EINVAL;
-#else
 		return -EOPNOTSUPP;
 #endif
 
diff --git a/net/sched/act_ipt.c b/net/sched/act_ipt.c
index 7ab2419b44e..da696fd3e34 100644
--- a/net/sched/act_ipt.c
+++ b/net/sched/act_ipt.c
@@ -92,6 +92,13 @@ static int tcf_ipt_release(struct tcf_ipt *ipt, int bind)
 	return ret;
 }
 
+static const struct nla_policy ipt_policy[TCA_IPT_MAX + 1] = {
+	[TCA_IPT_TABLE]	= { .type = NLA_STRING, .len = IFNAMSIZ },
+	[TCA_IPT_HOOK]	= { .type = NLA_U32 },
+	[TCA_IPT_INDEX]	= { .type = NLA_U32 },
+	[TCA_IPT_TARG]	= { .len = sizeof(struct ipt_entry_target) },
+};
+
 static int tcf_ipt_init(struct nlattr *nla, struct nlattr *est,
 			struct tc_action *a, int ovr, int bind)
 {
@@ -107,22 +114,20 @@ static int tcf_ipt_init(struct nlattr *nla, struct nlattr *est,
 	if (nla == NULL)
 		return -EINVAL;
 
-	err = nla_parse_nested(tb, TCA_IPT_MAX, nla, NULL);
+	err = nla_parse_nested(tb, TCA_IPT_MAX, nla, ipt_policy);
 	if (err < 0)
 		return err;
 
-	if (tb[TCA_IPT_HOOK] == NULL ||
-	    nla_len(tb[TCA_IPT_HOOK]) < sizeof(u32))
+	if (tb[TCA_IPT_HOOK] == NULL)
 		return -EINVAL;
-	if (tb[TCA_IPT_TARG] == NULL ||
-	    nla_len(tb[TCA_IPT_TARG]) < sizeof(*t))
+	if (tb[TCA_IPT_TARG] == NULL)
 		return -EINVAL;
+
 	td = (struct ipt_entry_target *)nla_data(tb[TCA_IPT_TARG]);
 	if (nla_len(tb[TCA_IPT_TARG]) < td->u.target_size)
 		return -EINVAL;
 
-	if (tb[TCA_IPT_INDEX] != NULL &&
-	    nla_len(tb[TCA_IPT_INDEX]) >= sizeof(u32))
+	if (tb[TCA_IPT_INDEX] != NULL)
 		index = nla_get_u32(tb[TCA_IPT_INDEX]);
 
 	pc = tcf_hash_check(index, a, bind, &ipt_hash_info);
diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c
index 6cb5e30dcf8..1aff005d95c 100644
--- a/net/sched/act_mirred.c
+++ b/net/sched/act_mirred.c
@@ -54,6 +54,10 @@ static inline int tcf_mirred_release(struct tcf_mirred *m, int bind)
 	return 0;
 }
 
+static const struct nla_policy mirred_policy[TCA_MIRRED_MAX + 1] = {
+	[TCA_MIRRED_PARMS]	= { .len = sizeof(struct tc_mirred) },
+};
+
 static int tcf_mirred_init(struct nlattr *nla, struct nlattr *est,
 			   struct tc_action *a, int ovr, int bind)
 {
@@ -68,12 +72,11 @@ static int tcf_mirred_init(struct nlattr *nla, struct nlattr *est,
 	if (nla == NULL)
 		return -EINVAL;
 
-	err = nla_parse_nested(tb, TCA_MIRRED_MAX, nla, NULL);
+	err = nla_parse_nested(tb, TCA_MIRRED_MAX, nla, mirred_policy);
 	if (err < 0)
 		return err;
 
-	if (tb[TCA_MIRRED_PARMS] == NULL ||
-	    nla_len(tb[TCA_MIRRED_PARMS]) < sizeof(*parm))
+	if (tb[TCA_MIRRED_PARMS] == NULL)
 		return -EINVAL;
 	parm = nla_data(tb[TCA_MIRRED_PARMS]);
 
diff --git a/net/sched/act_nat.c b/net/sched/act_nat.c
index 5a512d4dc37..0a3c8339767 100644
--- a/net/sched/act_nat.c
+++ b/net/sched/act_nat.c
@@ -40,6 +40,10 @@ static struct tcf_hashinfo nat_hash_info = {
 	.lock	=	&nat_lock,
 };
 
+static const struct nla_policy nat_policy[TCA_NAT_MAX + 1] = {
+	[TCA_NAT_PARMS]	= { .len = sizeof(struct tc_nat) },
+};
+
 static int tcf_nat_init(struct nlattr *nla, struct nlattr *est,
 			struct tc_action *a, int ovr, int bind)
 {
@@ -52,12 +56,11 @@ static int tcf_nat_init(struct nlattr *nla, struct nlattr *est,
 	if (nla == NULL)
 		return -EINVAL;
 
-	err = nla_parse_nested(tb, TCA_NAT_MAX, nla, NULL);
+	err = nla_parse_nested(tb, TCA_NAT_MAX, nla, nat_policy);
 	if (err < 0)
 		return err;
 
-	if (tb[TCA_NAT_PARMS] == NULL ||
-	    nla_len(tb[TCA_NAT_PARMS]) < sizeof(*parm))
+	if (tb[TCA_NAT_PARMS] == NULL)
 		return -EINVAL;
 	parm = nla_data(tb[TCA_NAT_PARMS]);
 
diff --git a/net/sched/act_pedit.c b/net/sched/act_pedit.c
index 1b9ca45a78e..3cc4cb9e500 100644
--- a/net/sched/act_pedit.c
+++ b/net/sched/act_pedit.c
@@ -33,6 +33,10 @@ static struct tcf_hashinfo pedit_hash_info = {
 	.lock	=	&pedit_lock,
 };
 
+static const struct nla_policy pedit_policy[TCA_PEDIT_MAX + 1] = {
+	[TCA_PEDIT_PARMS]	= { .len = sizeof(struct tcf_pedit) },
+};
+
 static int tcf_pedit_init(struct nlattr *nla, struct nlattr *est,
 			  struct tc_action *a, int ovr, int bind)
 {
@@ -47,12 +51,11 @@ static int tcf_pedit_init(struct nlattr *nla, struct nlattr *est,
 	if (nla == NULL)
 		return -EINVAL;
 
-	err = nla_parse_nested(tb, TCA_PEDIT_MAX, nla, NULL);
+	err = nla_parse_nested(tb, TCA_PEDIT_MAX, nla, pedit_policy);
 	if (err < 0)
 		return err;
 
-	if (tb[TCA_PEDIT_PARMS] == NULL ||
-	    nla_len(tb[TCA_PEDIT_PARMS]) < sizeof(*parm))
+	if (tb[TCA_PEDIT_PARMS] == NULL)
 		return -EINVAL;
 	parm = nla_data(tb[TCA_PEDIT_PARMS]);
 	ksize = parm->nkeys * sizeof(struct tc_pedit_key);
diff --git a/net/sched/act_police.c b/net/sched/act_police.c
index 62de806af3a..0898120bbcc 100644
--- a/net/sched/act_police.c
+++ b/net/sched/act_police.c
@@ -119,6 +119,13 @@ static void tcf_police_destroy(struct tcf_police *p)
 	BUG_TRAP(0);
 }
 
+static const struct nla_policy police_policy[TCA_POLICE_MAX + 1] = {
+	[TCA_POLICE_RATE]	= { .len = TC_RTAB_SIZE },
+	[TCA_POLICE_PEAKRATE]	= { .len = TC_RTAB_SIZE },
+	[TCA_POLICE_AVRATE]	= { .type = NLA_U32 },
+	[TCA_POLICE_RESULT]	= { .type = NLA_U32 },
+};
+
 static int tcf_act_police_locate(struct nlattr *nla, struct nlattr *est,
 				 struct tc_action *a, int ovr, int bind)
 {
@@ -133,7 +140,7 @@ static int tcf_act_police_locate(struct nlattr *nla, struct nlattr *est,
 	if (nla == NULL)
 		return -EINVAL;
 
-	err = nla_parse_nested(tb, TCA_POLICE_MAX, nla, NULL);
+	err = nla_parse_nested(tb, TCA_POLICE_MAX, nla, police_policy);
 	if (err < 0)
 		return err;
 
@@ -144,13 +151,6 @@ static int tcf_act_police_locate(struct nlattr *nla, struct nlattr *est,
 		return -EINVAL;
 	parm = nla_data(tb[TCA_POLICE_TBF]);
 
-	if (tb[TCA_POLICE_RESULT] != NULL &&
-	    nla_len(tb[TCA_POLICE_RESULT]) != sizeof(u32))
-		return -EINVAL;
-	if (tb[TCA_POLICE_RESULT] != NULL &&
-	    nla_len(tb[TCA_POLICE_RESULT]) != sizeof(u32))
-		return -EINVAL;
-
 	if (parm->index) {
 		struct tcf_common *pc;
 
diff --git a/net/sched/act_simple.c b/net/sched/act_simple.c
index cedaadf18eb..fbde461b716 100644
--- a/net/sched/act_simple.c
+++ b/net/sched/act_simple.c
@@ -84,6 +84,10 @@ static int realloc_defdata(struct tcf_defact *d, u32 datalen, void *defdata)
 	return alloc_defdata(d, datalen, defdata);
 }
 
+static const struct nla_policy simple_policy[TCA_DEF_MAX + 1] = {
+	[TCA_DEF_PARMS]	= { .len = sizeof(struct tc_defact) },
+};
+
 static int tcf_simp_init(struct nlattr *nla, struct nlattr *est,
 			 struct tc_action *a, int ovr, int bind)
 {
@@ -102,8 +106,7 @@ static int tcf_simp_init(struct nlattr *nla, struct nlattr *est,
 	if (err < 0)
 		return err;
 
-	if (tb[TCA_DEF_PARMS] == NULL ||
-	    nla_len(tb[TCA_DEF_PARMS]) < sizeof(*parm))
+	if (tb[TCA_DEF_PARMS] == NULL)
 		return -EINVAL;
 
 	parm = nla_data(tb[TCA_DEF_PARMS]);
-- 
cgit v1.2.3


From 7a9c1bd409d3522806d492aa573c1cc5384ca620 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Wed, 23 Jan 2008 20:36:45 -0800
Subject: [NET_SCHED]: Use nla_policy for attribute validation in ematches

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/em_meta.c |  9 ++++++---
 net/sched/ematch.c  | 11 ++++++-----
 2 files changed, 12 insertions(+), 8 deletions(-)

(limited to 'net')

diff --git a/net/sched/em_meta.c b/net/sched/em_meta.c
index d9f487d813c..a1e5619b187 100644
--- a/net/sched/em_meta.c
+++ b/net/sched/em_meta.c
@@ -745,6 +745,10 @@ static inline int meta_is_supported(struct meta_value *val)
 	return (!meta_id(val) || meta_ops(val)->get);
 }
 
+static const struct nla_policy meta_policy[TCA_EM_META_MAX + 1] = {
+	[TCA_EM_META_HDR]	= { .len = sizeof(struct tcf_meta_hdr) },
+};
+
 static int em_meta_change(struct tcf_proto *tp, void *data, int len,
 			  struct tcf_ematch *m)
 {
@@ -753,13 +757,12 @@ static int em_meta_change(struct tcf_proto *tp, void *data, int len,
 	struct tcf_meta_hdr *hdr;
 	struct meta_match *meta = NULL;
 
-	err = nla_parse(tb, TCA_EM_META_MAX, data, len, NULL);
+	err = nla_parse(tb, TCA_EM_META_MAX, data, len, meta_policy);
 	if (err < 0)
 		goto errout;
 
 	err = -EINVAL;
-	if (tb[TCA_EM_META_HDR] == NULL ||
-	    nla_len(tb[TCA_EM_META_HDR]) < sizeof(*hdr))
+	if (tb[TCA_EM_META_HDR] == NULL)
 		goto errout;
 	hdr = nla_data(tb[TCA_EM_META_HDR]);
 
diff --git a/net/sched/ematch.c b/net/sched/ematch.c
index daa9c4e7e81..74ff918455a 100644
--- a/net/sched/ematch.c
+++ b/net/sched/ematch.c
@@ -282,6 +282,11 @@ errout:
 	return err;
 }
 
+static const struct nla_policy em_policy[TCA_EMATCH_TREE_MAX + 1] = {
+	[TCA_EMATCH_TREE_HDR]	= { .len = sizeof(struct tcf_ematch_tree_hdr) },
+	[TCA_EMATCH_TREE_LIST]	= { .type = NLA_NESTED },
+};
+
 /**
  * tcf_em_tree_validate - validate ematch config TLV and build ematch tree
  *
@@ -312,7 +317,7 @@ int tcf_em_tree_validate(struct tcf_proto *tp, struct nlattr *nla,
 		return 0;
 	}
 
-	err = nla_parse_nested(tb, TCA_EMATCH_TREE_MAX, nla, NULL);
+	err = nla_parse_nested(tb, TCA_EMATCH_TREE_MAX, nla, em_policy);
 	if (err < 0)
 		goto errout;
 
@@ -323,10 +328,6 @@ int tcf_em_tree_validate(struct tcf_proto *tp, struct nlattr *nla,
 	if (rt_hdr == NULL || rt_list == NULL)
 		goto errout;
 
-	if (nla_len(rt_hdr) < sizeof(*tree_hdr) ||
-	    nla_len(rt_list) < sizeof(*rt_match))
-		goto errout;
-
 	tree_hdr = nla_data(rt_hdr);
 	memcpy(&tree->hdr, tree_hdr, sizeof(*tree_hdr));
 
-- 
cgit v1.2.3


From f638a2f0579f74dc93d7da4299146e2822c06889 Mon Sep 17 00:00:00 2001
From: Stephen Hemminger <shemminger@vyatta.com>
Date: Wed, 23 Jan 2008 20:37:50 -0800
Subject: [IPV4] fib_trie: More whitespace cleanup.

Remove extra blank lines.

Signed-off-by: Stephen Hemminger <shemminger@vyatta.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/fib_trie.c | 6 ------
 1 file changed, 6 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index f1005fe1789..b8d11492ef1 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -447,7 +447,6 @@ static void tnode_put_child_reorg(struct tnode *tn, int i, struct node *n,
 
 	BUG_ON(i >= 1<<tn->bits);
 
-
 	/* update emptyChildren */
 	if (n == NULL && chi != NULL)
 		tn->empty_children++;
@@ -1306,7 +1305,6 @@ err:
 	return err;
 }
 
-
 /* should be called with rcu_read_lock */
 static int check_leaf(struct trie *t, struct leaf *l,
 		      t_key key,  const struct flowi *flp,
@@ -1721,7 +1719,6 @@ static struct leaf *leaf_walk_rcu(struct tnode *p, struct node *c)
 	return NULL; /* Root of trie */
 }
 
-
 static struct leaf *trie_firstleaf(struct trie *t)
 {
 	struct tnode *n = (struct tnode *) rcu_dereference(t->trie);
@@ -1849,7 +1846,6 @@ static int fn_trie_dump_fa(t_key key, int plen, struct list_head *fah,
 {
 	int i, s_i;
 	struct fib_alias *fa;
-
 	__be32 xkey = htonl(key);
 
 	s_i = cb->args[4];
@@ -1882,7 +1878,6 @@ static int fn_trie_dump_fa(t_key key, int plen, struct list_head *fah,
 	return skb->len;
 }
 
-
 static int fn_trie_dump_leaf(struct leaf *l, struct fib_table *tb,
 			struct sk_buff *skb, struct netlink_callback *cb)
 {
@@ -2388,7 +2383,6 @@ static int fib_trie_seq_show(struct seq_file *seq, void *v)
 		struct leaf *l = (struct leaf *) n;
 		struct leaf_info *li;
 		struct hlist_node *node;
-
 		__be32 val = htonl(l->key);
 
 		seq_indent(seq, iter->depth);
-- 
cgit v1.2.3


From ac97f75faae2a18648145bc6bbcdd326bac6a1c2 Mon Sep 17 00:00:00 2001
From: Stephen Hemminger <shemminger@vyatta.com>
Date: Wed, 23 Jan 2008 20:38:24 -0800
Subject: [IPV4] fib_trie: remove unneeded NULL check

Since fib_route_seq_show now uses hlist_for_each_entry(), the leaf
info can not be NULL.

Signed-off-by: Stephen Hemminger <shemminger@vyatta.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/fib_trie.c | 3 ---
 1 file changed, 3 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index b8d11492ef1..f2f47033f31 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -2477,9 +2477,6 @@ static int fib_route_seq_show(struct seq_file *seq, void *v)
 		struct fib_alias *fa;
 		__be32 mask, prefix;
 
-		if (!li)
-			continue;
-
 		mask = inet_make_mask(li->plen);
 		prefix = htonl(l->key);
 
-- 
cgit v1.2.3


From 85040bcb4643cba578839e953f25e2d1965d83d0 Mon Sep 17 00:00:00 2001
From: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
Date: Mon, 28 Jan 2008 15:46:02 -0800
Subject: [IPV6] ADDRLABEL: Fix double free on label deletion.

If an entry is being deleted because it has only one reference,
we immediately delete it and blindly register the rcu handler for it,
This results in oops by double freeing that object.

This patch fixes it by consolidating the code paths for the deletion;
let its rcu handler delete the object if it has no more reference.

Bug was found by Mitsuru Chinen <mitch@linux.vnet.ibm.com>

Signed-off-by: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/addrlabel.c | 14 ++++++--------
 1 file changed, 6 insertions(+), 8 deletions(-)

(limited to 'net')

diff --git a/net/ipv6/addrlabel.c b/net/ipv6/addrlabel.c
index 38674121ae5..a3c5a72218f 100644
--- a/net/ipv6/addrlabel.c
+++ b/net/ipv6/addrlabel.c
@@ -106,6 +106,11 @@ static inline void ip6addrlbl_free(struct ip6addrlbl_entry *p)
 	kfree(p);
 }
 
+static void ip6addrlbl_free_rcu(struct rcu_head *h)
+{
+	ip6addrlbl_free(container_of(h, struct ip6addrlbl_entry, rcu));
+}
+
 static inline int ip6addrlbl_hold(struct ip6addrlbl_entry *p)
 {
 	return atomic_inc_not_zero(&p->refcnt);
@@ -114,12 +119,7 @@ static inline int ip6addrlbl_hold(struct ip6addrlbl_entry *p)
 static inline void ip6addrlbl_put(struct ip6addrlbl_entry *p)
 {
 	if (atomic_dec_and_test(&p->refcnt))
-		ip6addrlbl_free(p);
-}
-
-static void ip6addrlbl_free_rcu(struct rcu_head *h)
-{
-	ip6addrlbl_free(container_of(h, struct ip6addrlbl_entry, rcu));
+		call_rcu(&p->rcu, ip6addrlbl_free_rcu);
 }
 
 /* Find label */
@@ -240,7 +240,6 @@ static int __ip6addrlbl_add(struct ip6addrlbl_entry *newp, int replace)
 				}
 				hlist_replace_rcu(&p->list, &newp->list);
 				ip6addrlbl_put(p);
-				call_rcu(&p->rcu, ip6addrlbl_free_rcu);
 				goto out;
 			} else if ((p->prefixlen == newp->prefixlen && !p->ifindex) ||
 				   (p->prefixlen < newp->prefixlen)) {
@@ -300,7 +299,6 @@ static int __ip6addrlbl_del(const struct in6_addr *prefix, int prefixlen,
 		    ipv6_addr_equal(&p->prefix, prefix)) {
 			hlist_del_rcu(&p->list);
 			ip6addrlbl_put(p);
-			call_rcu(&p->rcu, ip6addrlbl_free_rcu);
 			ret = 0;
 			break;
 		}
-- 
cgit v1.2.3