Linux-2.6.12-rc2

Initial git repository build. I'm not bothering with the full history, even though we have it. We can create a separate "historical" git archive of that later if we want to, and in the meantime it's about 3.2GB when imported into git - space that would just make the early git days unnecessarily complicated, when we don't have a lot of good infrastructure for it. Let it rip!
author: Linus Torvalds <torvalds@ppc970.osdl.org> 2005-04-16 15:20:36 -0700
committer: Linus Torvalds <torvalds@ppc970.osdl.org> 2005-04-16 15:20:36 -0700
commit: 1da177e4c3f41524e886b7f1b8a0c1fc7321cac2 (patch)
tree: 0bba044c4ce775e45a88a51686b5d9f90697ea9d /net/core
26 files changed, 21067 insertions, 0 deletions
diff --git a/net/core/Makefile b/net/core/Makefile
new file mode 100644
index 00000000000..81f03243fe2
--- /dev/null
+++ b/net/core/Makefile
@@ -0,0 +1,17 @@
+#
+# Makefile for the Linux networking core.
+#
+
+obj-y := sock.o skbuff.o iovec.o datagram.o stream.o scm.o gen_stats.o gen_estimator.o
+
+obj-$(CONFIG_SYSCTL) += sysctl_net_core.o
+
+obj-y		     += flow.o dev.o ethtool.o dev_mcast.o dst.o \
+			neighbour.o rtnetlink.o utils.o link_watch.o filter.o
+
+obj-$(CONFIG_SYSFS) += net-sysfs.o
+obj-$(CONFIG_NETFILTER) += netfilter.o
+obj-$(CONFIG_NET_DIVERT) += dv.o
+obj-$(CONFIG_NET_PKTGEN) += pktgen.o
+obj-$(CONFIG_NET_RADIO) += wireless.o
+obj-$(CONFIG_NETPOLL) += netpoll.o
diff --git a/net/core/datagram.c b/net/core/datagram.c
new file mode 100644
index 00000000000..d1bfd279cc1
--- /dev/null
+++ b/net/core/datagram.c
@@ -0,0 +1,482 @@
+/*
+ *	SUCS NET3:
+ *
+ *	Generic datagram handling routines. These are generic for all
+ *	protocols. Possibly a generic IP version on top of these would
+ *	make sense. Not tonight however 8-).
+ *	This is used because UDP, RAW, PACKET, DDP, IPX, AX.25 and
+ *	NetROM layer all have identical poll code and mostly
+ *	identical recvmsg() code. So we share it here. The poll was
+ *	shared before but buried in udp.c so I moved it.
+ *
+ *	Authors:	Alan Cox <alan@redhat.com>. (datagram_poll() from old
+ *						     udp.c code)
+ *
+ *	Fixes:
+ *		Alan Cox	:	NULL return from skb_peek_copy()
+ *					understood
+ *		Alan Cox	:	Rewrote skb_read_datagram to avoid the
+ *					skb_peek_copy stuff.
+ *		Alan Cox	:	Added support for SOCK_SEQPACKET.
+ *					IPX can no longer use the SO_TYPE hack
+ *					but AX.25 now works right, and SPX is
+ *					feasible.
+ *		Alan Cox	:	Fixed write poll of non IP protocol
+ *					crash.
+ *		Florian  La Roche:	Changed for my new skbuff handling.
+ *		Darryl Miles	:	Fixed non-blocking SOCK_SEQPACKET.
+ *		Linus Torvalds	:	BSD semantic fixes.
+ *		Alan Cox	:	Datagram iovec handling
+ *		Darryl Miles	:	Fixed non-blocking SOCK_STREAM.
+ *		Alan Cox	:	POSIXisms
+ *		Pete Wyckoff    :       Unconnected accept() fix.
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <asm/uaccess.h>
+#include <asm/system.h>
+#include <linux/mm.h>
+#include <linux/interrupt.h>
+#include <linux/errno.h>
+#include <linux/sched.h>
+#include <linux/inet.h>
+#include <linux/tcp.h>
+#include <linux/netdevice.h>
+#include <linux/rtnetlink.h>
+#include <linux/poll.h>
+#include <linux/highmem.h>
+
+#include <net/protocol.h>
+#include <linux/skbuff.h>
+#include <net/sock.h>
+#include <net/checksum.h>
+
+
+/*
+ *	Is a socket 'connection oriented' ?
+ */
+static inline int connection_based(struct sock *sk)
+{
+	return sk->sk_type == SOCK_SEQPACKET || sk->sk_type == SOCK_STREAM;
+}
+
+/*
+ * Wait for a packet..
+ */
+static int wait_for_packet(struct sock *sk, int *err, long *timeo_p)
+{
+	int error;
+	DEFINE_WAIT(wait);
+
+	prepare_to_wait_exclusive(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE);
+
+	/* Socket errors? */
+	error = sock_error(sk);
+	if (error)
+		goto out_err;
+
+	if (!skb_queue_empty(&sk->sk_receive_queue))
+		goto out;
+
+	/* Socket shut down? */
+	if (sk->sk_shutdown & RCV_SHUTDOWN)
+		goto out_noerr;
+
+	/* Sequenced packets can come disconnected.
+	 * If so we report the problem
+	 */
+	error = -ENOTCONN;
+	if (connection_based(sk) &&
+	    !(sk->sk_state == TCP_ESTABLISHED || sk->sk_state == TCP_LISTEN))
+		goto out_err;
+
+	/* handle signals */
+	if (signal_pending(current))
+		goto interrupted;
+
+	error = 0;
+	*timeo_p = schedule_timeout(*timeo_p);
+out:
+	finish_wait(sk->sk_sleep, &wait);
+	return error;
+interrupted:
+	error = sock_intr_errno(*timeo_p);
+out_err:
+	*err = error;
+	goto out;
+out_noerr:
+	*err = 0;
+	error = 1;
+	goto out;
+}
+
+/**
+ *	skb_recv_datagram - Receive a datagram skbuff
+ *	@sk - socket
+ *	@flags - MSG_ flags
+ *	@noblock - blocking operation?
+ *	@err - error code returned
+ *
+ *	Get a datagram skbuff, understands the peeking, nonblocking wakeups
+ *	and possible races. This replaces identical code in packet, raw and
+ *	udp, as well as the IPX AX.25 and Appletalk. It also finally fixes
+ *	the long standing peek and read race for datagram sockets. If you
+ *	alter this routine remember it must be re-entrant.
+ *
+ *	This function will lock the socket if a skb is returned, so the caller
+ *	needs to unlock the socket in that case (usually by calling
+ *	skb_free_datagram)
+ *
+ *	* It does not lock socket since today. This function is
+ *	* free of race conditions. This measure should/can improve
+ *	* significantly datagram socket latencies at high loads,
+ *	* when data copying to user space takes lots of time.
+ *	* (BTW I've just killed the last cli() in IP/IPv6/core/netlink/packet
+ *	*  8) Great win.)
+ *	*			                    --ANK (980729)
+ *
+ *	The order of the tests when we find no data waiting are specified
+ *	quite explicitly by POSIX 1003.1g, don't change them without having
+ *	the standard around please.
+ */
+struct sk_buff *skb_recv_datagram(struct sock *sk, unsigned flags,
+				  int noblock, int *err)
+{
+	struct sk_buff *skb;
+	long timeo;
+	/*
+	 * Caller is allowed not to check sk->sk_err before skb_recv_datagram()
+	 */
+	int error = sock_error(sk);
+
+	if (error)
+		goto no_packet;
+
+	timeo = sock_rcvtimeo(sk, noblock);
+
+	do {
+		/* Again only user level code calls this function, so nothing
+		 * interrupt level will suddenly eat the receive_queue.
+		 *
+		 * Look at current nfs client by the way...
+		 * However, this function was corrent in any case. 8)
+		 */
+		if (flags & MSG_PEEK) {
+			unsigned long cpu_flags;
+
+			spin_lock_irqsave(&sk->sk_receive_queue.lock,
+					  cpu_flags);
+			skb = skb_peek(&sk->sk_receive_queue);
+			if (skb)
+				atomic_inc(&skb->users);
+			spin_unlock_irqrestore(&sk->sk_receive_queue.lock,
+					       cpu_flags);
+		} else
+			skb = skb_dequeue(&sk->sk_receive_queue);
+
+		if (skb)
+			return skb;
+
+		/* User doesn't want to wait */
+		error = -EAGAIN;
+		if (!timeo)
+			goto no_packet;
+
+	} while (!wait_for_packet(sk, err, &timeo));
+
+	return NULL;
+
+no_packet:
+	*err = error;
+	return NULL;
+}
+
+void skb_free_datagram(struct sock *sk, struct sk_buff *skb)
+{
+	kfree_skb(skb);
+}
+
+/**
+ *	skb_copy_datagram_iovec - Copy a datagram to an iovec.
+ *	@skb - buffer to copy
+ *	@offset - offset in the buffer to start copying from
+ *	@iovec - io vector to copy to
+ *	@len - amount of data to copy from buffer to iovec
+ *
+ *	Note: the iovec is modified during the copy.
+ */
+int skb_copy_datagram_iovec(const struct sk_buff *skb, int offset,
+			    struct iovec *to, int len)
+{
+	int start = skb_headlen(skb);
+	int i, copy = start - offset;
+
+	/* Copy header. */
+	if (copy > 0) {
+		if (copy > len)
+			copy = len;
+		if (memcpy_toiovec(to, skb->data + offset, copy))
+			goto fault;
+		if ((len -= copy) == 0)
+			return 0;
+		offset += copy;
+	}
+
+	/* Copy paged appendix. Hmm... why does this look so complicated? */
+	for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
+		int end;
+
+		BUG_TRAP(start <= offset + len);
+
+		end = start + skb_shinfo(skb)->frags[i].size;
+		if ((copy = end - offset) > 0) {
+			int err;
+			u8  *vaddr;
+			skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
+			struct page *page = frag->page;
+
+			if (copy > len)
+				copy = len;
+			vaddr = kmap(page);
+			err = memcpy_toiovec(to, vaddr + frag->page_offset +
+					     offset - start, copy);
+			kunmap(page);
+			if (err)
+				goto fault;
+			if (!(len -= copy))
+				return 0;
+			offset += copy;
+		}
+		start = end;
+	}
+
+	if (skb_shinfo(skb)->frag_list) {
+		struct sk_buff *list = skb_shinfo(skb)->frag_list;
+
+		for (; list; list = list->next) {
+			int end;
+
+			BUG_TRAP(start <= offset + len);
+
+			end = start + list->len;
+			if ((copy = end - offset) > 0) {
+				if (copy > len)
+					copy = len;
+				if (skb_copy_datagram_iovec(list,
+							    offset - start,
+							    to, copy))
+					goto fault;
+				if ((len -= copy) == 0)
+					return 0;
+				offset += copy;
+			}
+			start = end;
+		}
+	}
+	if (!len)
+		return 0;
+
+fault:
+	return -EFAULT;
+}
+
+static int skb_copy_and_csum_datagram(const struct sk_buff *skb, int offset,
+				      u8 __user *to, int len,
+				      unsigned int *csump)
+{
+	int start = skb_headlen(skb);
+	int pos = 0;
+	int i, copy = start - offset;
+
+	/* Copy header. */
+	if (copy > 0) {
+		int err = 0;
+		if (copy > len)
+			copy = len;
+		*csump = csum_and_copy_to_user(skb->data + offset, to, copy,
+					       *csump, &err);
+		if (err)
+			goto fault;
+		if ((len -= copy) == 0)
+			return 0;
+		offset += copy;
+		to += copy;
+		pos = copy;
+	}
+
+	for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
+		int end;
+
+		BUG_TRAP(start <= offset + len);
+
+		end = start + skb_shinfo(skb)->frags[i].size;
+		if ((copy = end - offset) > 0) {
+			unsigned int csum2;
+			int err = 0;
+			u8  *vaddr;
+			skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
+			struct page *page = frag->page;
+
+			if (copy > len)
+				copy = len;
+			vaddr = kmap(page);
+			csum2 = csum_and_copy_to_user(vaddr +
+							frag->page_offset +
+							offset - start,
+						      to, copy, 0, &err);
+			kunmap(page);
+			if (err)
+				goto fault;
+			*csump = csum_block_add(*csump, csum2, pos);
+			if (!(len -= copy))
+				return 0;
+			offset += copy;
+			to += copy;
+			pos += copy;
+		}
+		start = end;
+	}
+
+	if (skb_shinfo(skb)->frag_list) {
+		struct sk_buff *list = skb_shinfo(skb)->frag_list;
+
+		for (; list; list=list->next) {
+			int end;
+
+			BUG_TRAP(start <= offset + len);
+
+			end = start + list->len;
+			if ((copy = end - offset) > 0) {
+				unsigned int csum2 = 0;
+				if (copy > len)
+					copy = len;
+				if (skb_copy_and_csum_datagram(list,
+							       offset - start,
+							       to, copy,
+							       &csum2))
+					goto fault;
+				*csump = csum_block_add(*csump, csum2, pos);
+				if ((len -= copy) == 0)
+					return 0;
+				offset += copy;
+				to += copy;
+				pos += copy;
+			}
+			start = end;
+		}
+	}
+	if (!len)
+		return 0;
+
+fault:
+	return -EFAULT;
+}
+
+/**
+ *	skb_copy_and_csum_datagram_iovec - Copy and checkum skb to user iovec.
+ *	@skb - skbuff
+ *	@hlen - hardware length
+ *	@iovec - io vector
+ * 
+ *	Caller _must_ check that skb will fit to this iovec.
+ *
+ *	Returns: 0       - success.
+ *		 -EINVAL - checksum failure.
+ *		 -EFAULT - fault during copy. Beware, in this case iovec
+ *			   can be modified!
+ */
+int skb_copy_and_csum_datagram_iovec(const struct sk_buff *skb,
+				     int hlen, struct iovec *iov)
+{
+	unsigned int csum;
+	int chunk = skb->len - hlen;
+
+	/* Skip filled elements.
+	 * Pretty silly, look at memcpy_toiovec, though 8)
+	 */
+	while (!iov->iov_len)
+		iov++;
+
+	if (iov->iov_len < chunk) {
+		if ((unsigned short)csum_fold(skb_checksum(skb, 0, chunk + hlen,
+							   skb->csum)))
+			goto csum_error;
+		if (skb_copy_datagram_iovec(skb, hlen, iov, chunk))
+			goto fault;
+	} else {
+		csum = csum_partial(skb->data, hlen, skb->csum);
+		if (skb_copy_and_csum_datagram(skb, hlen, iov->iov_base,
+					       chunk, &csum))
+			goto fault;
+		if ((unsigned short)csum_fold(csum))
+			goto csum_error;
+		iov->iov_len -= chunk;
+		iov->iov_base += chunk;
+	}
+	return 0;
+csum_error:
+	return -EINVAL;
+fault:
+	return -EFAULT;
+}
+
+/**
+ * 	datagram_poll - generic datagram poll
+ *	@file - file struct
+ *	@sock - socket
+ *	@wait - poll table
+ *
+ *	Datagram poll: Again totally generic. This also handles
+ *	sequenced packet sockets providing the socket receive queue
+ *	is only ever holding data ready to receive.
+ *
+ *	Note: when you _don't_ use this routine for this protocol,
+ *	and you use a different write policy from sock_writeable()
+ *	then please supply your own write_space callback.
+ */
+unsigned int datagram_poll(struct file *file, struct socket *sock,
+			   poll_table *wait)
+{
+	struct sock *sk = sock->sk;
+	unsigned int mask;
+
+	poll_wait(file, sk->sk_sleep, wait);
+	mask = 0;
+
+	/* exceptional events? */
+	if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue))
+		mask |= POLLERR;
+	if (sk->sk_shutdown == SHUTDOWN_MASK)
+		mask |= POLLHUP;
+
+	/* readable? */
+	if (!skb_queue_empty(&sk->sk_receive_queue) ||
+	    (sk->sk_shutdown & RCV_SHUTDOWN))
+		mask |= POLLIN | POLLRDNORM;
+
+	/* Connection-based need to check for termination and startup */
+	if (connection_based(sk)) {
+		if (sk->sk_state == TCP_CLOSE)
+			mask |= POLLHUP;
+		/* connection hasn't started yet? */
+		if (sk->sk_state == TCP_SYN_SENT)
+			return mask;
+	}
+
+	/* writable? */
+	if (sock_writeable(sk))
+		mask |= POLLOUT | POLLWRNORM | POLLWRBAND;
+	else
+		set_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
+
+	return mask;
+}
+
+EXPORT_SYMBOL(datagram_poll);
+EXPORT_SYMBOL(skb_copy_and_csum_datagram_iovec);
+EXPORT_SYMBOL(skb_copy_datagram_iovec);
+EXPORT_SYMBOL(skb_free_datagram);
+EXPORT_SYMBOL(skb_recv_datagram);
diff --git a/net/core/dev.c b/net/core/dev.c
new file mode 100644
index 00000000000..42344d90369
--- /dev/null
+++ b/net/core/dev.c
@@ -0,0 +1,3359 @@
+/*
+ * 	NET3	Protocol independent device support routines.
+ *
+ *		This program is free software; you can redistribute it and/or
+ *		modify it under the terms of the GNU General Public License
+ *		as published by the Free Software Foundation; either version
+ *		2 of the License, or (at your option) any later version.
+ *
+ *	Derived from the non IP parts of dev.c 1.0.19
+ * 		Authors:	Ross Biro, <bir7@leland.Stanford.Edu>
+ *				Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
+ *				Mark Evans, <evansmp@uhura.aston.ac.uk>
+ *
+ *	Additional Authors:
+ *		Florian la Roche <rzsfl@rz.uni-sb.de>
+ *		Alan Cox <gw4pts@gw4pts.ampr.org>
+ *		David Hinds <dahinds@users.sourceforge.net>
+ *		Alexey Kuznetsov <kuznet@ms2.inr.ac.ru>
+ *		Adam Sulmicki <adam@cfar.umd.edu>
+ *              Pekka Riikonen <priikone@poesidon.pspt.fi>
+ *
+ *	Changes:
+ *              D.J. Barrow     :       Fixed bug where dev->refcnt gets set
+ *              			to 2 if register_netdev gets called
+ *              			before net_dev_init & also removed a
+ *              			few lines of code in the process.
+ *		Alan Cox	:	device private ioctl copies fields back.
+ *		Alan Cox	:	Transmit queue code does relevant
+ *					stunts to keep the queue safe.
+ *		Alan Cox	:	Fixed double lock.
+ *		Alan Cox	:	Fixed promisc NULL pointer trap
+ *		????????	:	Support the full private ioctl range
+ *		Alan Cox	:	Moved ioctl permission check into
+ *					drivers
+ *		Tim Kordas	:	SIOCADDMULTI/SIOCDELMULTI
+ *		Alan Cox	:	100 backlog just doesn't cut it when
+ *					you start doing multicast video 8)
+ *		Alan Cox	:	Rewrote net_bh and list manager.
+ *		Alan Cox	: 	Fix ETH_P_ALL echoback lengths.
+ *		Alan Cox	:	Took out transmit every packet pass
+ *					Saved a few bytes in the ioctl handler
+ *		Alan Cox	:	Network driver sets packet type before
+ *					calling netif_rx. Saves a function
+ *					call a packet.
+ *		Alan Cox	:	Hashed net_bh()
+ *		Richard Kooijman:	Timestamp fixes.
+ *		Alan Cox	:	Wrong field in SIOCGIFDSTADDR
+ *		Alan Cox	:	Device lock protection.
+ *		Alan Cox	: 	Fixed nasty side effect of device close
+ *					changes.
+ *		Rudi Cilibrasi	:	Pass the right thing to
+ *					set_mac_address()
+ *		Dave Miller	:	32bit quantity for the device lock to
+ *					make it work out on a Sparc.
+ *		Bjorn Ekwall	:	Added KERNELD hack.
+ *		Alan Cox	:	Cleaned up the backlog initialise.
+ *		Craig Metz	:	SIOCGIFCONF fix if space for under
+ *					1 device.
+ *	    Thomas Bogendoerfer :	Return ENODEV for dev_open, if there
+ *					is no device open function.
+ *		Andi Kleen	:	Fix error reporting for SIOCGIFCONF
+ *	    Michael Chastain	:	Fix signed/unsigned for SIOCGIFCONF
+ *		Cyrus Durgin	:	Cleaned for KMOD
+ *		Adam Sulmicki   :	Bug Fix : Network Device Unload
+ *					A network device unload needs to purge
+ *					the backlog queue.
+ *	Paul Rusty Russell	:	SIOCSIFNAME
+ *              Pekka Riikonen  :	Netdev boot-time settings code
+ *              Andrew Morton   :       Make unregister_netdevice wait
+ *              			indefinitely on dev->refcnt
+ * 		J Hadi Salim	:	- Backlog queue sampling
+ *				        - netif_rx() feedback
+ */
+
+#include <asm/uaccess.h>
+#include <asm/system.h>
+#include <linux/bitops.h>
+#include <linux/config.h>
+#include <linux/cpu.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/string.h>
+#include <linux/mm.h>
+#include <linux/socket.h>
+#include <linux/sockios.h>
+#include <linux/errno.h>
+#include <linux/interrupt.h>
+#include <linux/if_ether.h>
+#include <linux/netdevice.h>
+#include <linux/etherdevice.h>
+#include <linux/notifier.h>
+#include <linux/skbuff.h>
+#include <net/sock.h>
+#include <linux/rtnetlink.h>
+#include <linux/proc_fs.h>
+#include <linux/seq_file.h>
+#include <linux/stat.h>
+#include <linux/if_bridge.h>
+#include <linux/divert.h>
+#include <net/dst.h>
+#include <net/pkt_sched.h>
+#include <net/checksum.h>
+#include <linux/highmem.h>
+#include <linux/init.h>
+#include <linux/kmod.h>
+#include <linux/module.h>
+#include <linux/kallsyms.h>
+#include <linux/netpoll.h>
+#include <linux/rcupdate.h>
+#include <linux/delay.h>
+#ifdef CONFIG_NET_RADIO
+#include <linux/wireless.h>		/* Note : will define WIRELESS_EXT */
+#include <net/iw_handler.h>
+#endif	/* CONFIG_NET_RADIO */
+#include <asm/current.h>
+
+/* This define, if set, will randomly drop a packet when congestion
+ * is more than moderate.  It helps fairness in the multi-interface
+ * case when one of them is a hog, but it kills performance for the
+ * single interface case so it is off now by default.
+ */
+#undef RAND_LIE
+
+/* Setting this will sample the queue lengths and thus congestion
+ * via a timer instead of as each packet is received.
+ */
+#undef OFFLINE_SAMPLE
+
+/*
+ *	The list of packet types we will receive (as opposed to discard)
+ *	and the routines to invoke.
+ *
+ *	Why 16. Because with 16 the only overlap we get on a hash of the
+ *	low nibble of the protocol value is RARP/SNAP/X.25.
+ *
+ *      NOTE:  That is no longer true with the addition of VLAN tags.  Not
+ *             sure which should go first, but I bet it won't make much
+ *             difference if we are running VLANs.  The good news is that
+ *             this protocol won't be in the list unless compiled in, so
+ *             the average user (w/out VLANs) will not be adversly affected.
+ *             --BLG
+ *
+ *		0800	IP
+ *		8100    802.1Q VLAN
+ *		0001	802.3
+ *		0002	AX.25
+ *		0004	802.2
+ *		8035	RARP
+ *		0005	SNAP
+ *		0805	X.25
+ *		0806	ARP
+ *		8137	IPX
+ *		0009	Localtalk
+ *		86DD	IPv6
+ */
+
+static DEFINE_SPINLOCK(ptype_lock);
+static struct list_head ptype_base[16];	/* 16 way hashed list */
+static struct list_head ptype_all;		/* Taps */
+
+#ifdef OFFLINE_SAMPLE
+static void sample_queue(unsigned long dummy);
+static struct timer_list samp_timer = TIMER_INITIALIZER(sample_queue, 0, 0);
+#endif
+
+/*
+ * The @dev_base list is protected by @dev_base_lock and the rtln
+ * semaphore.
+ *
+ * Pure readers hold dev_base_lock for reading.
+ *
+ * Writers must hold the rtnl semaphore while they loop through the
+ * dev_base list, and hold dev_base_lock for writing when they do the
+ * actual updates.  This allows pure readers to access the list even
+ * while a writer is preparing to update it.
+ *
+ * To put it another way, dev_base_lock is held for writing only to
+ * protect against pure readers; the rtnl semaphore provides the
+ * protection against other writers.
+ *
+ * See, for example usages, register_netdevice() and
+ * unregister_netdevice(), which must be called with the rtnl
+ * semaphore held.
+ */
+struct net_device *dev_base;
+static struct net_device **dev_tail = &dev_base;
+DEFINE_RWLOCK(dev_base_lock);
+
+EXPORT_SYMBOL(dev_base);
+EXPORT_SYMBOL(dev_base_lock);
+
+#define NETDEV_HASHBITS	8
+static struct hlist_head dev_name_head[1<<NETDEV_HASHBITS];
+static struct hlist_head dev_index_head[1<<NETDEV_HASHBITS];
+
+static inline struct hlist_head *dev_name_hash(const char *name)
+{
+	unsigned hash = full_name_hash(name, strnlen(name, IFNAMSIZ));
+	return &dev_name_head[hash & ((1<<NETDEV_HASHBITS)-1)];
+}
+
+static inline struct hlist_head *dev_index_hash(int ifindex)
+{
+	return &dev_index_head[ifindex & ((1<<NETDEV_HASHBITS)-1)];
+}
+
+/*
+ *	Our notifier list
+ */
+
+static struct notifier_block *netdev_chain;
+
+/*
+ *	Device drivers call our routines to queue packets here. We empty the
+ *	queue in the local softnet handler.
+ */
+DEFINE_PER_CPU(struct softnet_data, softnet_data) = { 0, };
+
+#ifdef CONFIG_SYSFS
+extern int netdev_sysfs_init(void);
+extern int netdev_register_sysfs(struct net_device *);
+extern void netdev_unregister_sysfs(struct net_device *);
+#else
+#define netdev_sysfs_init()	 	(0)
+#define netdev_register_sysfs(dev)	(0)
+#define	netdev_unregister_sysfs(dev)	do { } while(0)
+#endif
+
+
+/*******************************************************************************
+
+		Protocol management and registration routines
+
+*******************************************************************************/
+
+/*
+ *	For efficiency
+ */
+
+int netdev_nit;
+
+/*
+ *	Add a protocol ID to the list. Now that the input handler is
+ *	smarter we can dispense with all the messy stuff that used to be
+ *	here.
+ *
+ *	BEWARE!!! Protocol handlers, mangling input packets,
+ *	MUST BE last in hash buckets and checking protocol handlers
+ *	MUST start from promiscuous ptype_all chain in net_bh.
+ *	It is true now, do not change it.
+ *	Explanation follows: if protocol handler, mangling packet, will
+ *	be the first on list, it is not able to sense, that packet
+ *	is cloned and should be copied-on-write, so that it will
+ *	change it and subsequent readers will get broken packet.
+ *							--ANK (980803)
+ */
+
+/**
+ *	dev_add_pack - add packet handler
+ *	@pt: packet type declaration
+ *
+ *	Add a protocol handler to the networking stack. The passed &packet_type
+ *	is linked into kernel lists and may not be freed until it has been
+ *	removed from the kernel lists.
+ *
+ *	This call does not sleep therefore it can not 
+ *	guarantee all CPU's that are in middle of receiving packets
+ *	will see the new packet type (until the next received packet).
+ */
+
+void dev_add_pack(struct packet_type *pt)
+{
+	int hash;
+
+	spin_lock_bh(&ptype_lock);
+	if (pt->type == htons(ETH_P_ALL)) {
+		netdev_nit++;
+		list_add_rcu(&pt->list, &ptype_all);
+	} else {
+		hash = ntohs(pt->type) & 15;
+		list_add_rcu(&pt->list, &ptype_base[hash]);
+	}
+	spin_unlock_bh(&ptype_lock);
+}
+
+extern void linkwatch_run_queue(void);
+
+
+
+/**
+ *	__dev_remove_pack	 - remove packet handler
+ *	@pt: packet type declaration
+ *
+ *	Remove a protocol handler that was previously added to the kernel
+ *	protocol handlers by dev_add_pack(). The passed &packet_type is removed
+ *	from the kernel lists and can be freed or reused once this function
+ *	returns. 
+ *
+ *      The packet type might still be in use by receivers
+ *	and must not be freed until after all the CPU's have gone
+ *	through a quiescent state.
+ */
+void __dev_remove_pack(struct packet_type *pt)
+{
+	struct list_head *head;
+	struct packet_type *pt1;
+
+	spin_lock_bh(&ptype_lock);
+
+	if (pt->type == htons(ETH_P_ALL)) {
+		netdev_nit--;
+		head = &ptype_all;
+	} else
+		head = &ptype_base[ntohs(pt->type) & 15];
+
+	list_for_each_entry(pt1, head, list) {
+		if (pt == pt1) {
+			list_del_rcu(&pt->list);
+			goto out;
+		}
+	}
+
+	printk(KERN_WARNING "dev_remove_pack: %p not found.\n", pt);
+out:
+	spin_unlock_bh(&ptype_lock);
+}
+/**
+ *	dev_remove_pack	 - remove packet handler
+ *	@pt: packet type declaration
+ *
+ *	Remove a protocol handler that was previously added to the kernel
+ *	protocol handlers by dev_add_pack(). The passed &packet_type is removed
+ *	from the kernel lists and can be freed or reused once this function
+ *	returns.
+ *
+ *	This call sleeps to guarantee that no CPU is looking at the packet
+ *	type after return.
+ */
+void dev_remove_pack(struct packet_type *pt)
+{
+	__dev_remove_pack(pt);
+	
+	synchronize_net();
+}
+
+/******************************************************************************
+
+		      Device Boot-time Settings Routines
+
+*******************************************************************************/
+
+/* Boot time configuration table */
+static struct netdev_boot_setup dev_boot_setup[NETDEV_BOOT_SETUP_MAX];
+
+/**
+ *	netdev_boot_setup_add	- add new setup entry
+ *	@name: name of the device
+ *	@map: configured settings for the device
+ *
+ *	Adds new setup entry to the dev_boot_setup list.  The function
+ *	returns 0 on error and 1 on success.  This is a generic routine to
+ *	all netdevices.
+ */
+static int netdev_boot_setup_add(char *name, struct ifmap *map)
+{
+	struct netdev_boot_setup *s;
+	int i;
+
+	s = dev_boot_setup;
+	for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++) {
+		if (s[i].name[0] == '\0' || s[i].name[0] == ' ') {
+			memset(s[i].name, 0, sizeof(s[i].name));
+			strcpy(s[i].name, name);
+			memcpy(&s[i].map, map, sizeof(s[i].map));
+			break;
+		}
+	}
+
+	return i >= NETDEV_BOOT_SETUP_MAX ? 0 : 1;
+}
+
+/**
+ *	netdev_boot_setup_check	- check boot time settings
+ *	@dev: the netdevice
+ *
+ * 	Check boot time settings for the device.
+ *	The found settings are set for the device to be used
+ *	later in the device probing.
+ *	Returns 0 if no settings found, 1 if they are.
+ */
+int netdev_boot_setup_check(struct net_device *dev)
+{
+	struct netdev_boot_setup *s = dev_boot_setup;
+	int i;
+
+	for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++) {
+		if (s[i].name[0] != '\0' && s[i].name[0] != ' ' &&
+		    !strncmp(dev->name, s[i].name, strlen(s[i].name))) {
+			dev->irq 	= s[i].map.irq;
+			dev->base_addr 	= s[i].map.base_addr;
+			dev->mem_start 	= s[i].map.mem_start;
+			dev->mem_end 	= s[i].map.mem_end;
+			return 1;
+		}
+	}
+	return 0;
+}
+
+
+/**
+ *	netdev_boot_base	- get address from boot time settings
+ *	@prefix: prefix for network device
+ *	@unit: id for network device
+ *
+ * 	Check boot time settings for the base address of device.
+ *	The found settings are set for the device to be used
+ *	later in the device probing.
+ *	Returns 0 if no settings found.
+ */
+unsigned long netdev_boot_base(const char *prefix, int unit)
+{
+	const struct netdev_boot_setup *s = dev_boot_setup;
+	char name[IFNAMSIZ];
+	int i;
+
+	sprintf(name, "%s%d", prefix, unit);
+
+	/*
+	 * If device already registered then return base of 1
+	 * to indicate not to probe for this interface
+	 */
+	if (__dev_get_by_name(name))
+		return 1;
+
+	for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++)
+		if (!strcmp(name, s[i].name))
+			return s[i].map.base_addr;
+	return 0;
+}
+
+/*
+ * Saves at boot time configured settings for any netdevice.
+ */
+int __init netdev_boot_setup(char *str)
+{
+	int ints[5];
+	struct ifmap map;
+
+	str = get_options(str, ARRAY_SIZE(ints), ints);
+	if (!str || !*str)
+		return 0;
+
+	/* Save settings */
+	memset(&map, 0, sizeof(map));
+	if (ints[0] > 0)
+		map.irq = ints[1];
+	if (ints[0] > 1)
+		map.base_addr = ints[2];
+	if (ints[0] > 2)
+		map.mem_start = ints[3];
+	if (ints[0] > 3)
+		map.mem_end = ints[4];
+
+	/* Add new entry to the list */
+	return netdev_boot_setup_add(str, &map);
+}
+
+__setup("netdev=", netdev_boot_setup);
+
+/*******************************************************************************
+
+			    Device Interface Subroutines
+
+*******************************************************************************/
+
+/**
+ *	__dev_get_by_name	- find a device by its name
+ *	@name: name to find
+ *
+ *	Find an interface by name. Must be called under RTNL semaphore
+ *	or @dev_base_lock. If the name is found a pointer to the device
+ *	is returned. If the name is not found then %NULL is returned. The
+ *	reference counters are not incremented so the caller must be
+ *	careful with locks.
+ */
+
+struct net_device *__dev_get_by_name(const char *name)
+{
+	struct hlist_node *p;
+
+	hlist_for_each(p, dev_name_hash(name)) {
+		struct net_device *dev
+			= hlist_entry(p, struct net_device, name_hlist);
+		if (!strncmp(dev->name, name, IFNAMSIZ))
+			return dev;
+	}
+	return NULL;
+}
+
+/**
+ *	dev_get_by_name		- find a device by its name
+ *	@name: name to find
+ *
+ *	Find an interface by name. This can be called from any
+ *	context and does its own locking. The returned handle has
+ *	the usage count incremented and the caller must use dev_put() to
+ *	release it when it is no longer needed. %NULL is returned if no
+ *	matching device is found.
+ */
+
+struct net_device *dev_get_by_name(const char *name)
+{
+	struct net_device *dev;
+
+	read_lock(&dev_base_lock);
+	dev = __dev_get_by_name(name);
+	if (dev)
+		dev_hold(dev);
+	read_unlock(&dev_base_lock);
+	return dev;
+}
+
+/**
+ *	__dev_get_by_index - find a device by its ifindex
+ *	@ifindex: index of device
+ *
+ *	Search for an interface by index. Returns %NULL if the device
+ *	is not found or a pointer to the device. The device has not
+ *	had its reference counter increased so the caller must be careful
+ *	about locking. The caller must hold either the RTNL semaphore
+ *	or @dev_base_lock.
+ */
+
+struct net_device *__dev_get_by_index(int ifindex)
+{
+	struct hlist_node *p;
+
+	hlist_for_each(p, dev_index_hash(ifindex)) {
+		struct net_device *dev
+			= hlist_entry(p, struct net_device, index_hlist);
+		if (dev->ifindex == ifindex)
+			return dev;
+	}
+	return NULL;
+}
+
+
+/**
+ *	dev_get_by_index - find a device by its ifindex
+ *	@ifindex: index of device
+ *
+ *	Search for an interface by index. Returns NULL if the device
+ *	is not found or a pointer to the device. The device returned has
+ *	had a reference added and the pointer is safe until the user calls
+ *	dev_put to indicate they have finished with it.
+ */
+
+struct net_device *dev_get_by_index(int ifindex)
+{
+	struct net_device *dev;
+
+	read_lock(&dev_base_lock);
+	dev = __dev_get_by_index(ifindex);
+	if (dev)
+		dev_hold(dev);
+	read_unlock(&dev_base_lock);
+	return dev;
+}
+
+/**
+ *	dev_getbyhwaddr - find a device by its hardware address
+ *	@type: media type of device
+ *	@ha: hardware address
+ *
+ *	Search for an interface by MAC address. Returns NULL if the device
+ *	is not found or a pointer to the device. The caller must hold the
+ *	rtnl semaphore. The returned device has not had its ref count increased
+ *	and the caller must therefore be careful about locking
+ *
+ *	BUGS:
+ *	If the API was consistent this would be __dev_get_by_hwaddr
+ */
+
+struct net_device *dev_getbyhwaddr(unsigned short type, char *ha)
+{
+	struct net_device *dev;
+
+	ASSERT_RTNL();
+
+	for (dev = dev_base; dev; dev = dev->next)
+		if (dev->type == type &&
+		    !memcmp(dev->dev_addr, ha, dev->addr_len))
+			break;
+	return dev;
+}
+
+struct net_device *dev_getfirstbyhwtype(unsigned short type)
+{
+	struct net_device *dev;
+
+	rtnl_lock();
+	for (dev = dev_base; dev; dev = dev->next) {
+		if (dev->type == type) {
+			dev_hold(dev);
+			break;
+		}
+	}
+	rtnl_unlock();
+	return dev;
+}
+
+EXPORT_SYMBOL(dev_getfirstbyhwtype);
+
+/**
+ *	dev_get_by_flags - find any device with given flags
+ *	@if_flags: IFF_* values
+ *	@mask: bitmask of bits in if_flags to check
+ *
+ *	Search for any interface with the given flags. Returns NULL if a device
+ *	is not found or a pointer to the device. The device returned has 
+ *	had a reference added and the pointer is safe until the user calls
+ *	dev_put to indicate they have finished with it.
+ */
+
+struct net_device * dev_get_by_flags(unsigned short if_flags, unsigned short mask)
+{
+	struct net_device *dev;
+
+	read_lock(&dev_base_lock);
+	for (dev = dev_base; dev != NULL; dev = dev->next) {
+		if (((dev->flags ^ if_flags) & mask) == 0) {
+			dev_hold(dev);
+			break;
+		}
+	}
+	read_unlock(&dev_base_lock);
+	return dev;
+}
+
+/**
+ *	dev_valid_name - check if name is okay for network device
+ *	@name: name string
+ *
+ *	Network device names need to be valid file names to
+ *	to allow sysfs to work
+ */
+static int dev_valid_name(const char *name)
+{
+	return !(*name == '\0' 
+		 || !strcmp(name, ".")
+		 || !strcmp(name, "..")
+		 || strchr(name, '/'));
+}
+
+/**
+ *	dev_alloc_name - allocate a name for a device
+ *	@dev: device
+ *	@name: name format string
+ *
+ *	Passed a format string - eg "lt%d" it will try and find a suitable
+ *	id. Not efficient for many devices, not called a lot. The caller
+ *	must hold the dev_base or rtnl lock while allocating the name and
+ *	adding the device in order to avoid duplicates. Returns the number
+ *	of the unit assigned or a negative errno code.
+ */
+
+int dev_alloc_name(struct net_device *dev, const char *name)
+{
+	int i = 0;
+	char buf[IFNAMSIZ];
+	const char *p;
+	const int max_netdevices = 8*PAGE_SIZE;
+	long *inuse;
+	struct net_device *d;
+
+	p = strnchr(name, IFNAMSIZ-1, '%');
+	if (p) {
+		/*
+		 * Verify the string as this thing may have come from
+		 * the user.  There must be either one "%d" and no other "%"
+		 * characters.
+		 */
+		if (p[1] != 'd' || strchr(p + 2, '%'))
+			return -EINVAL;
+
+		/* Use one page as a bit array of possible slots */
+		inuse = (long *) get_zeroed_page(GFP_ATOMIC);
+		if (!inuse)
+			return -ENOMEM;
+
+		for (d = dev_base; d; d = d->next) {
+			if (!sscanf(d->name, name, &i))
+				continue;
+			if (i < 0 || i >= max_netdevices)
+				continue;
+
+			/*  avoid cases where sscanf is not exact inverse of printf */
+			snprintf(buf, sizeof(buf), name, i);
+			if (!strncmp(buf, d->name, IFNAMSIZ))
+				set_bit(i, inuse);
+		}
+
+		i = find_first_zero_bit(inuse, max_netdevices);
+		free_page((unsigned long) inuse);
+	}
+
+	snprintf(buf, sizeof(buf), name, i);
+	if (!__dev_get_by_name(buf)) {
+		strlcpy(dev->name, buf, IFNAMSIZ);
+		return i;
+	}
+
+	/* It is possible to run out of possible slots
+	 * when the name is long and there isn't enough space left
+	 * for the digits, or if all bits are used.
+	 */
+	return -ENFILE;
+}
+
+
+/**
+ *	dev_change_name - change name of a device
+ *	@dev: device
+ *	@newname: name (or format string) must be at least IFNAMSIZ
+ *
+ *	Change name of a device, can pass format strings "eth%d".
+ *	for wildcarding.
+ */
+int dev_change_name(struct net_device *dev, char *newname)
+{
+	int err = 0;
+
+	ASSERT_RTNL();
+
+	if (dev->flags & IFF_UP)
+		return -EBUSY;
+
+	if (!dev_valid_name(newname))
+		return -EINVAL;
+
+	if (strchr(newname, '%')) {
+		err = dev_alloc_name(dev, newname);
+		if (err < 0)
+			return err;
+		strcpy(newname, dev->name);
+	}
+	else if (__dev_get_by_name(newname))
+		return -EEXIST;
+	else
+		strlcpy(dev->name, newname, IFNAMSIZ);
+
+	err = class_device_rename(&dev->class_dev, dev->name);
+	if (!err) {
+		hlist_del(&dev->name_hlist);
+		hlist_add_head(&dev->name_hlist, dev_name_hash(dev->name));
+		notifier_call_chain(&netdev_chain, NETDEV_CHANGENAME, dev);
+	}
+
+	return err;
+}
+
+/**
+ *	netdev_state_change - device changes state
+ *	@dev: device to cause notification
+ *
+ *	Called to indicate a device has changed state. This function calls
+ *	the notifier chains for netdev_chain and sends a NEWLINK message
+ *	to the routing socket.
+ */
+void netdev_state_change(struct net_device *dev)
+{
+	if (dev->flags & IFF_UP) {
+		notifier_call_chain(&netdev_chain, NETDEV_CHANGE, dev);
+		rtmsg_ifinfo(RTM_NEWLINK, dev, 0);
+	}
+}
+
+/**
+ *	dev_load 	- load a network module
+ *	@name: name of interface
+ *
+ *	If a network interface is not present and the process has suitable
+ *	privileges this function loads the module. If module loading is not
+ *	available in this kernel then it becomes a nop.
+ */
+
+void dev_load(const char *name)
+{
+	struct net_device *dev;  
+
+	read_lock(&dev_base_lock);
+	dev = __dev_get_by_name(name);
+	read_unlock(&dev_base_lock);
+
+	if (!dev && capable(CAP_SYS_MODULE))
+		request_module("%s", name);
+}
+
+static int default_rebuild_header(struct sk_buff *skb)
+{
+	printk(KERN_DEBUG "%s: default_rebuild_header called -- BUG!\n",
+	       skb->dev ? skb->dev->name : "NULL!!!");
+	kfree_skb(skb);
+	return 1;
+}
+
+
+/**
+ *	dev_open	- prepare an interface for use.
+ *	@dev:	device to open
+ *
+ *	Takes a device from down to up state. The device's private open
+ *	function is invoked and then the multicast lists are loaded. Finally
+ *	the device is moved into the up state and a %NETDEV_UP message is
+ *	sent to the netdev notifier chain.
+ *
+ *	Calling this function on an active interface is a nop. On a failure
+ *	a negative errno code is returned.
+ */
+int dev_open(struct net_device *dev)
+{
+	int ret = 0;
+
+	/*
+	 *	Is it already up?
+	 */
+
+	if (dev->flags & IFF_UP)
+		return 0;
+
+	/*
+	 *	Is it even present?
+	 */
+	if (!netif_device_present(dev))
+		return -ENODEV;
+
+	/*
+	 *	Call device private open method
+	 */
+	set_bit(__LINK_STATE_START, &dev->state);
+	if (dev->open) {
+		ret = dev->open(dev);
+		if (ret)
+			clear_bit(__LINK_STATE_START, &dev->state);
+	}
+
+ 	/*
+	 *	If it went open OK then:
+	 */
+
+	if (!ret) {
+		/*
+		 *	Set the flags.
+		 */
+		dev->flags |= IFF_UP;
+
+		/*
+		 *	Initialize multicasting status
+		 */
+		dev_mc_upload(dev);
+
+		/*
+		 *	Wakeup transmit queue engine
+		 */
+		dev_activate(dev);
+
+		/*
+		 *	... and announce new interface.
+		 */
+		notifier_call_chain(&netdev_chain, NETDEV_UP, dev);
+	}
+	return ret;
+}
+
+/**
+ *	dev_close - shutdown an interface.
+ *	@dev: device to shutdown
+ *
+ *	This function moves an active device into down state. A
+ *	%NETDEV_GOING_DOWN is sent to the netdev notifier chain. The device
+ *	is then deactivated and finally a %NETDEV_DOWN is sent to the notifier
+ *	chain.
+ */
+int dev_close(struct net_device *dev)
+{
+	if (!(dev->flags & IFF_UP))
+		return 0;
+
+	/*
+	 *	Tell people we are going down, so that they can
+	 *	prepare to death, when device is still operating.
+	 */
+	notifier_call_chain(&netdev_chain, NETDEV_GOING_DOWN, dev);
+
+	dev_deactivate(dev);
+
+	clear_bit(__LINK_STATE_START, &dev->state);
+
+	/* Synchronize to scheduled poll. We cannot touch poll list,
+	 * it can be even on different cpu. So just clear netif_running(),
+	 * and wait when poll really will happen. Actually, the best place
+	 * for this is inside dev->stop() after device stopped its irq
+	 * engine, but this requires more changes in devices. */
+
+	smp_mb__after_clear_bit(); /* Commit netif_running(). */
+	while (test_bit(__LINK_STATE_RX_SCHED, &dev->state)) {
+		/* No hurry. */
+		current->state = TASK_INTERRUPTIBLE;
+		schedule_timeout(1);
+	}
+
+	/*
+	 *	Call the device specific close. This cannot fail.
+	 *	Only if device is UP
+	 *
+	 *	We allow it to be called even after a DETACH hot-plug
+	 *	event.
+	 */
+	if (dev->stop)
+		dev->stop(dev);
+
+	/*
+	 *	Device is now down.
+	 */
+
+	dev->flags &= ~IFF_UP;
+
+	/*
+	 * Tell people we are down
+	 */
+	notifier_call_chain(&netdev_chain, NETDEV_DOWN, dev);
+
+	return 0;
+}
+
+
+/*
+ *	Device change register/unregister. These are not inline or static
+ *	as we export them to the world.
+ */
+
+/**
+ *	register_netdevice_notifier - register a network notifier block
+ *	@nb: notifier
+ *
+ *	Register a notifier to be called when network device events occur.
+ *	The notifier passed is linked into the kernel structures and must
+ *	not be reused until it has been unregistered. A negative errno code
+ *	is returned on a failure.
+ *
+ * 	When registered all registration and up events are replayed
+ *	to the new notifier to allow device to have a race free 
+ *	view of the network device list.
+ */
+
+int register_netdevice_notifier(struct notifier_block *nb)
+{
+	struct net_device *dev;
+	int err;
+
+	rtnl_lock();
+	err = notifier_chain_register(&netdev_chain, nb);
+	if (!err) {
+		for (dev = dev_base; dev; dev = dev->next) {
+			nb->notifier_call(nb, NETDEV_REGISTER, dev);
+
+			if (dev->flags & IFF_UP) 
+				nb->notifier_call(nb, NETDEV_UP, dev);
+		}
+	}
+	rtnl_unlock();
+	return err;
+}
+
+/**
+ *	unregister_netdevice_notifier - unregister a network notifier block
+ *	@nb: notifier
+ *
+ *	Unregister a notifier previously registered by
+ *	register_netdevice_notifier(). The notifier is unlinked into the
+ *	kernel structures and may then be reused. A negative errno code
+ *	is returned on a failure.
+ */
+
+int unregister_netdevice_notifier(struct notifier_block *nb)
+{
+	return notifier_chain_unregister(&netdev_chain, nb);
+}
+
+/**
+ *	call_netdevice_notifiers - call all network notifier blocks
+ *      @val: value passed unmodified to notifier function
+ *      @v:   pointer passed unmodified to notifier function
+ *
+ *	Call all network notifier blocks.  Parameters and return value
+ *	are as for notifier_call_chain().
+ */
+
+int call_netdevice_notifiers(unsigned long val, void *v)
+{
+	return notifier_call_chain(&netdev_chain, val, v);
+}
+
+/* When > 0 there are consumers of rx skb time stamps */
+static atomic_t netstamp_needed = ATOMIC_INIT(0);
+
+void net_enable_timestamp(void)
+{
+	atomic_inc(&netstamp_needed);
+}
+
+void net_disable_timestamp(void)
+{
+	atomic_dec(&netstamp_needed);
+}
+
+static inline void net_timestamp(struct timeval *stamp)
+{
+	if (atomic_read(&netstamp_needed))
+		do_gettimeofday(stamp);
+	else {
+		stamp->tv_sec = 0;
+		stamp->tv_usec = 0;
+	}
+}
+
+/*
+ *	Support routine. Sends outgoing frames to any network
+ *	taps currently in use.
+ */
+
+void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev)
+{
+	struct packet_type *ptype;
+	net_timestamp(&skb->stamp);
+
+	rcu_read_lock();
+	list_for_each_entry_rcu(ptype, &ptype_all, list) {
+		/* Never send packets back to the socket
+		 * they originated from - MvS (miquels@drinkel.ow.org)
+		 */
+		if ((ptype->dev == dev || !ptype->dev) &&
+		    (ptype->af_packet_priv == NULL ||
+		     (struct sock *)ptype->af_packet_priv != skb->sk)) {
+			struct sk_buff *skb2= skb_clone(skb, GFP_ATOMIC);
+			if (!skb2)
+				break;
+
+			/* skb->nh should be correctly
+			   set by sender, so that the second statement is
+			   just protection against buggy protocols.
+			 */
+			skb2->mac.raw = skb2->data;
+
+			if (skb2->nh.raw < skb2->data ||
+			    skb2->nh.raw > skb2->tail) {
+				if (net_ratelimit())
+					printk(KERN_CRIT "protocol %04x is "
+					       "buggy, dev %s\n",
+					       skb2->protocol, dev->name);
+				skb2->nh.raw = skb2->data;
+			}
+
+			skb2->h.raw = skb2->nh.raw;
+			skb2->pkt_type = PACKET_OUTGOING;
+			ptype->func(skb2, skb->dev, ptype);
+		}
+	}
+	rcu_read_unlock();
+}
+
+/*
+ * Invalidate hardware checksum when packet is to be mangled, and
+ * complete checksum manually on outgoing path.
+ */
+int skb_checksum_help(struct sk_buff *skb, int inward)
+{
+	unsigned int csum;
+	int ret = 0, offset = skb->h.raw - skb->data;
+
+	if (inward) {
+		skb->ip_summed = CHECKSUM_NONE;
+		goto out;
+	}
+
+	if (skb_cloned(skb)) {
+		ret = pskb_expand_head(skb, 0, 0, GFP_ATOMIC);
+		if (ret)
+			goto out;
+	}
+
+	if (offset > (int)skb->len)
+		BUG();
+	csum = skb_checksum(skb, offset, skb->len-offset, 0);
+
+	offset = skb->tail - skb->h.raw;
+	if (offset <= 0)
+		BUG();
+	if (skb->csum + 2 > offset)
+		BUG();
+
+	*(u16*)(skb->h.raw + skb->csum) = csum_fold(csum);
+	skb->ip_summed = CHECKSUM_NONE;
+out:	
+	return ret;
+}
+
+#ifdef CONFIG_HIGHMEM
+/* Actually, we should eliminate this check as soon as we know, that:
+ * 1. IOMMU is present and allows to map all the memory.
+ * 2. No high memory really exists on this machine.
+ */
+
+static inline int illegal_highdma(struct net_device *dev, struct sk_buff *skb)
+{
+	int i;
+
+	if (dev->features & NETIF_F_HIGHDMA)
+		return 0;
+
+	for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
+		if (PageHighMem(skb_shinfo(skb)->frags[i].page))
+			return 1;
+
+	return 0;
+}
+#else
+#define illegal_highdma(dev, skb)	(0)
+#endif
+
+extern void skb_release_data(struct sk_buff *);
+
+/* Keep head the same: replace data */
+int __skb_linearize(struct sk_buff *skb, int gfp_mask)
+{
+	unsigned int size;
+	u8 *data;
+	long offset;
+	struct skb_shared_info *ninfo;
+	int headerlen = skb->data - skb->head;
+	int expand = (skb->tail + skb->data_len) - skb->end;
+
+	if (skb_shared(skb))
+		BUG();
+
+	if (expand <= 0)
+		expand = 0;
+
+	size = skb->end - skb->head + expand;
+	size = SKB_DATA_ALIGN(size);
+	data = kmalloc(size + sizeof(struct skb_shared_info), gfp_mask);
+	if (!data)
+		return -ENOMEM;
+
+	/* Copy entire thing */
+	if (skb_copy_bits(skb, -headerlen, data, headerlen + skb->len))
+		BUG();
+
+	/* Set up shinfo */
+	ninfo = (struct skb_shared_info*)(data + size);
+	atomic_set(&ninfo->dataref, 1);
+	ninfo->tso_size = skb_shinfo(skb)->tso_size;
+	ninfo->tso_segs = skb_shinfo(skb)->tso_segs;
+	ninfo->nr_frags = 0;
+	ninfo->frag_list = NULL;
+
+	/* Offset between the two in bytes */
+	offset = data - skb->head;
+
+	/* Free old data. */
+	skb_release_data(skb);
+
+	skb->head = data;
+	skb->end  = data + size;
+
+	/* Set up new pointers */
+	skb->h.raw   += offset;
+	skb->nh.raw  += offset;
+	skb->mac.raw += offset;
+	skb->tail    += offset;
+	skb->data    += offset;
+
+	/* We are no longer a clone, even if we were. */
+	skb->cloned    = 0;
+
+	skb->tail     += skb->data_len;
+	skb->data_len  = 0;
+	return 0;
+}
+
+#define HARD_TX_LOCK(dev, cpu) {			\
+	if ((dev->features & NETIF_F_LLTX) == 0) {	\
+		spin_lock(&dev->xmit_lock);		\
+		dev->xmit_lock_owner = cpu;		\
+	}						\
+}
+
+#define HARD_TX_UNLOCK(dev) {				\
+	if ((dev->features & NETIF_F_LLTX) == 0) {	\
+		dev->xmit_lock_owner = -1;		\
+		spin_unlock(&dev->xmit_lock);		\
+	}						\
+}
+
+/**
+ *	dev_queue_xmit - transmit a buffer
+ *	@skb: buffer to transmit
+ *
+ *	Queue a buffer for transmission to a network device. The caller must
+ *	have set the device and priority and built the buffer before calling
+ *	this function. The function can be called from an interrupt.
+ *
+ *	A negative errno code is returned on a failure. A success does not
+ *	guarantee the frame will be transmitted as it may be dropped due
+ *	to congestion or traffic shaping.
+ */
+
+int dev_queue_xmit(struct sk_buff *skb)
+{
+	struct net_device *dev = skb->dev;
+	struct Qdisc *q;
+	int rc = -ENOMEM;
+
+	if (skb_shinfo(skb)->frag_list &&
+	    !(dev->features & NETIF_F_FRAGLIST) &&
+	    __skb_linearize(skb, GFP_ATOMIC))
+		goto out_kfree_skb;
+
+	/* Fragmented skb is linearized if device does not support SG,
+	 * or if at least one of fragments is in highmem and device
+	 * does not support DMA from it.
+	 */
+	if (skb_shinfo(skb)->nr_frags &&
+	    (!(dev->features & NETIF_F_SG) || illegal_highdma(dev, skb)) &&
+	    __skb_linearize(skb, GFP_ATOMIC))
+		goto out_kfree_skb;
+
+	/* If packet is not checksummed and device does not support
+	 * checksumming for this protocol, complete checksumming here.
+	 */
+	if (skb->ip_summed == CHECKSUM_HW &&
+	    (!(dev->features & (NETIF_F_HW_CSUM | NETIF_F_NO_CSUM)) &&
+	     (!(dev->features & NETIF_F_IP_CSUM) ||
+	      skb->protocol != htons(ETH_P_IP))))
+	      	if (skb_checksum_help(skb, 0))
+	      		goto out_kfree_skb;
+
+	/* Disable soft irqs for various locks below. Also 
+	 * stops preemption for RCU. 
+	 */
+	local_bh_disable(); 
+
+	/* Updates of qdisc are serialized by queue_lock. 
+	 * The struct Qdisc which is pointed to by qdisc is now a 
+	 * rcu structure - it may be accessed without acquiring 
+	 * a lock (but the structure may be stale.) The freeing of the
+	 * qdisc will be deferred until it's known that there are no 
+	 * more references to it.
+	 * 
+	 * If the qdisc has an enqueue function, we still need to 
+	 * hold the queue_lock before calling it, since queue_lock
+	 * also serializes access to the device queue.
+	 */
+
+	q = rcu_dereference(dev->qdisc);
+#ifdef CONFIG_NET_CLS_ACT
+	skb->tc_verd = SET_TC_AT(skb->tc_verd,AT_EGRESS);
+#endif
+	if (q->enqueue) {
+		/* Grab device queue */
+		spin_lock(&dev->queue_lock);
+
+		rc = q->enqueue(skb, q);
+
+		qdisc_run(dev);
+
+		spin_unlock(&dev->queue_lock);
+		rc = rc == NET_XMIT_BYPASS ? NET_XMIT_SUCCESS : rc;
+		goto out;
+	}
+
+	/* The device has no queue. Common case for software devices:
+	   loopback, all the sorts of tunnels...
+
+	   Really, it is unlikely that xmit_lock protection is necessary here.
+	   (f.e. loopback and IP tunnels are clean ignoring statistics
+	   counters.)
+	   However, it is possible, that they rely on protection
+	   made by us here.
+
+	   Check this and shot the lock. It is not prone from deadlocks.
+	   Either shot noqueue qdisc, it is even simpler 8)
+	 */
+	if (dev->flags & IFF_UP) {
+		int cpu = smp_processor_id(); /* ok because BHs are off */
+
+		if (dev->xmit_lock_owner != cpu) {
+
+			HARD_TX_LOCK(dev, cpu);
+
+			if (!netif_queue_stopped(dev)) {
+				if (netdev_nit)
+					dev_queue_xmit_nit(skb, dev);
+
+				rc = 0;
+				if (!dev->hard_start_xmit(skb, dev)) {
+					HARD_TX_UNLOCK(dev);
+					goto out;
+				}
+			}
+			HARD_TX_UNLOCK(dev);
+			if (net_ratelimit())
+				printk(KERN_CRIT "Virtual device %s asks to "
+				       "queue packet!\n", dev->name);
+		} else {
+			/* Recursion is detected! It is possible,
+			 * unfortunately */
+			if (net_ratelimit())
+				printk(KERN_CRIT "Dead loop on virtual device "
+				       "%s, fix it urgently!\n", dev->name);
+		}
+	}
+
+	rc = -ENETDOWN;
+	local_bh_enable();
+
+out_kfree_skb:
+	kfree_skb(skb);
+	return rc;
+out:
+	local_bh_enable();
+	return rc;
+}
+
+
+/*=======================================================================
+			Receiver routines
+  =======================================================================*/
+
+int netdev_max_backlog = 300;
+int weight_p = 64;            /* old backlog weight */
+/* These numbers are selected based on intuition and some
+ * experimentatiom, if you have more scientific way of doing this
+ * please go ahead and fix things.
+ */
+int no_cong_thresh = 10;
+int no_cong = 20;
+int lo_cong = 100;
+int mod_cong = 290;
+
+DEFINE_PER_CPU(struct netif_rx_stats, netdev_rx_stat) = { 0, };
+
+
+static void get_sample_stats(int cpu)
+{
+#ifdef RAND_LIE
+	unsigned long rd;
+	int rq;
+#endif
+	struct softnet_data *sd = &per_cpu(softnet_data, cpu);
+	int blog = sd->input_pkt_queue.qlen;
+	int avg_blog = sd->avg_blog;
+
+	avg_blog = (avg_blog >> 1) + (blog >> 1);
+
+	if (avg_blog > mod_cong) {
+		/* Above moderate congestion levels. */
+		sd->cng_level = NET_RX_CN_HIGH;
+#ifdef RAND_LIE
+		rd = net_random();
+		rq = rd % netdev_max_backlog;
+		if (rq < avg_blog) /* unlucky bastard */
+			sd->cng_level = NET_RX_DROP;
+#endif
+	} else if (avg_blog > lo_cong) {
+		sd->cng_level = NET_RX_CN_MOD;
+#ifdef RAND_LIE
+		rd = net_random();
+		rq = rd % netdev_max_backlog;
+			if (rq < avg_blog) /* unlucky bastard */
+				sd->cng_level = NET_RX_CN_HIGH;
+#endif
+	} else if (avg_blog > no_cong)
+		sd->cng_level = NET_RX_CN_LOW;
+	else  /* no congestion */
+		sd->cng_level = NET_RX_SUCCESS;
+
+	sd->avg_blog = avg_blog;
+}
+
+#ifdef OFFLINE_SAMPLE
+static void sample_queue(unsigned long dummy)
+{
+/* 10 ms 0r 1ms -- i don't care -- JHS */
+	int next_tick = 1;
+	int cpu = smp_processor_id();
+
+	get_sample_stats(cpu);
+	next_tick += jiffies;
+	mod_timer(&samp_timer, next_tick);
+}
+#endif
+
+
+/**
+ *	netif_rx	-	post buffer to the network code
+ *	@skb: buffer to post
+ *
+ *	This function receives a packet from a device driver and queues it for
+ *	the upper (protocol) levels to process.  It always succeeds. The buffer
+ *	may be dropped during processing for congestion control or by the
+ *	protocol layers.
+ *
+ *	return values:
+ *	NET_RX_SUCCESS	(no congestion)
+ *	NET_RX_CN_LOW   (low congestion)
+ *	NET_RX_CN_MOD   (moderate congestion)
+ *	NET_RX_CN_HIGH  (high congestion)
+ *	NET_RX_DROP     (packet was dropped)
+ *
+ */
+
+int netif_rx(struct sk_buff *skb)
+{
+	int this_cpu;
+	struct softnet_data *queue;
+	unsigned long flags;
+
+	/* if netpoll wants it, pretend we never saw it */
+	if (netpoll_rx(skb))
+		return NET_RX_DROP;
+
+	if (!skb->stamp.tv_sec)
+		net_timestamp(&skb->stamp);
+
+	/*
+	 * The code is rearranged so that the path is the most
+	 * short when CPU is congested, but is still operating.
+	 */
+	local_irq_save(flags);
+	this_cpu = smp_processor_id();
+	queue = &__get_cpu_var(softnet_data);
+
+	__get_cpu_var(netdev_rx_stat).total++;
+	if (queue->input_pkt_queue.qlen <= netdev_max_backlog) {
+		if (queue->input_pkt_queue.qlen) {
+			if (queue->throttle)
+				goto drop;
+
+enqueue:
+			dev_hold(skb->dev);
+			__skb_queue_tail(&queue->input_pkt_queue, skb);
+#ifndef OFFLINE_SAMPLE
+			get_sample_stats(this_cpu);
+#endif
+			local_irq_restore(flags);
+			return queue->cng_level;
+		}
+
+		if (queue->throttle)
+			queue->throttle = 0;
+
+		netif_rx_schedule(&queue->backlog_dev);
+		goto enqueue;
+	}
+
+	if (!queue->throttle) {
+		queue->throttle = 1;
+		__get_cpu_var(netdev_rx_stat).throttled++;
+	}
+
+drop:
+	__get_cpu_var(netdev_rx_stat).dropped++;
+	local_irq_restore(flags);
+
+	kfree_skb(skb);
+	return NET_RX_DROP;
+}
+
+int netif_rx_ni(struct sk_buff *skb)
+{
+	int err;
+
+	preempt_disable();
+	err = netif_rx(skb);
+	if (local_softirq_pending())
+		do_softirq();
+	preempt_enable();
+
+	return err;
+}
+
+EXPORT_SYMBOL(netif_rx_ni);
+
+static __inline__ void skb_bond(struct sk_buff *skb)
+{
+	struct net_device *dev = skb->dev;
+
+	if (dev->master) {
+		skb->real_dev = skb->dev;
+		skb->dev = dev->master;
+	}
+}
+
+static void net_tx_action(struct softirq_action *h)
+{
+	struct softnet_data *sd = &__get_cpu_var(softnet_data);
+
+	if (sd->completion_queue) {
+		struct sk_buff *clist;
+
+		local_irq_disable();
+		clist = sd->completion_queue;
+		sd->completion_queue = NULL;
+		local_irq_enable();
+
+		while (clist) {
+			struct sk_buff *skb = clist;
+			clist = clist->next;
+
+			BUG_TRAP(!atomic_read(&skb->users));
+			__kfree_skb(skb);
+		}
+	}
+
+	if (sd->output_queue) {
+		struct net_device *head;
+
+		local_irq_disable();
+		head = sd->output_queue;
+		sd->output_queue = NULL;
+		local_irq_enable();
+
+		while (head) {
+			struct net_device *dev = head;
+			head = head->next_sched;
+
+			smp_mb__before_clear_bit();
+			clear_bit(__LINK_STATE_SCHED, &dev->state);
+
+			if (spin_trylock(&dev->queue_lock)) {
+				qdisc_run(dev);
+				spin_unlock(&dev->queue_lock);
+			} else {
+				netif_schedule(dev);
+			}
+		}
+	}
+}
+
+static __inline__ int deliver_skb(struct sk_buff *skb,
+				  struct packet_type *pt_prev)
+{
+	atomic_inc(&skb->users);
+	return pt_prev->func(skb, skb->dev, pt_prev);
+}
+
+#if defined(CONFIG_BRIDGE) || defined (CONFIG_BRIDGE_MODULE)
+int (*br_handle_frame_hook)(struct net_bridge_port *p, struct sk_buff **pskb);
+struct net_bridge;
+struct net_bridge_fdb_entry *(*br_fdb_get_hook)(struct net_bridge *br,
+						unsigned char *addr);
+void (*br_fdb_put_hook)(struct net_bridge_fdb_entry *ent);
+
+static __inline__ int handle_bridge(struct sk_buff **pskb,
+				    struct packet_type **pt_prev, int *ret)
+{
+	struct net_bridge_port *port;
+
+	if ((*pskb)->pkt_type == PACKET_LOOPBACK ||
+	    (port = rcu_dereference((*pskb)->dev->br_port)) == NULL)
+		return 0;
+
+	if (*pt_prev) {
+		*ret = deliver_skb(*pskb, *pt_prev);
+		*pt_prev = NULL;
+	} 
+	
+	return br_handle_frame_hook(port, pskb);
+}
+#else
+#define handle_bridge(skb, pt_prev, ret)	(0)
+#endif
+
+#ifdef CONFIG_NET_CLS_ACT
+/* TODO: Maybe we should just force sch_ingress to be compiled in
+ * when CONFIG_NET_CLS_ACT is? otherwise some useless instructions
+ * a compare and 2 stores extra right now if we dont have it on
+ * but have CONFIG_NET_CLS_ACT
+ * NOTE: This doesnt stop any functionality; if you dont have 
+ * the ingress scheduler, you just cant add policies on ingress.
+ *
+ */
+static int ing_filter(struct sk_buff *skb) 
+{
+	struct Qdisc *q;
+	struct net_device *dev = skb->dev;
+	int result = TC_ACT_OK;
+	
+	if (dev->qdisc_ingress) {
+		__u32 ttl = (__u32) G_TC_RTTL(skb->tc_verd);
+		if (MAX_RED_LOOP < ttl++) {
+			printk("Redir loop detected Dropping packet (%s->%s)\n",
+				skb->input_dev?skb->input_dev->name:"??",skb->dev->name);
+			return TC_ACT_SHOT;
+		}
+
+		skb->tc_verd = SET_TC_RTTL(skb->tc_verd,ttl);
+
+		skb->tc_verd = SET_TC_AT(skb->tc_verd,AT_INGRESS);
+		if (NULL == skb->input_dev) {
+			skb->input_dev = skb->dev;
+			printk("ing_filter:  fixed  %s out %s\n",skb->input_dev->name,skb->dev->name);
+		}
+		spin_lock(&dev->ingress_lock);
+		if ((q = dev->qdisc_ingress) != NULL)
+			result = q->enqueue(skb, q);
+		spin_unlock(&dev->ingress_lock);
+
+	}
+
+	return result;
+}
+#endif
+
+int netif_receive_skb(struct sk_buff *skb)
+{
+	struct packet_type *ptype, *pt_prev;
+	int ret = NET_RX_DROP;
+	unsigned short type;
+
+	/* if we've gotten here through NAPI, check netpoll */
+	if (skb->dev->poll && netpoll_rx(skb))
+		return NET_RX_DROP;
+
+	if (!skb->stamp.tv_sec)
+		net_timestamp(&skb->stamp);
+
+	skb_bond(skb);
+
+	__get_cpu_var(netdev_rx_stat).total++;
+
+	skb->h.raw = skb->nh.raw = skb->data;
+	skb->mac_len = skb->nh.raw - skb->mac.raw;
+
+	pt_prev = NULL;
+
+	rcu_read_lock();
+
+#ifdef CONFIG_NET_CLS_ACT
+	if (skb->tc_verd & TC_NCLS) {
+		skb->tc_verd = CLR_TC_NCLS(skb->tc_verd);
+		goto ncls;
+	}
+#endif
+
+	list_for_each_entry_rcu(ptype, &ptype_all, list) {
+		if (!ptype->dev || ptype->dev == skb->dev) {
+			if (pt_prev) 
+				ret = deliver_skb(skb, pt_prev);
+			pt_prev = ptype;
+		}
+	}
+
+#ifdef CONFIG_NET_CLS_ACT
+	if (pt_prev) {
+		ret = deliver_skb(skb, pt_prev);
+		pt_prev = NULL; /* noone else should process this after*/
+	} else {
+		skb->tc_verd = SET_TC_OK2MUNGE(skb->tc_verd);
+	}
+
+	ret = ing_filter(skb);
+
+	if (ret == TC_ACT_SHOT || (ret == TC_ACT_STOLEN)) {
+		kfree_skb(skb);
+		goto out;
+	}
+
+	skb->tc_verd = 0;
+ncls:
+#endif
+
+	handle_diverter(skb);
+
+	if (handle_bridge(&skb, &pt_prev, &ret))
+		goto out;
+
+	type = skb->protocol;
+	list_for_each_entry_rcu(ptype, &ptype_base[ntohs(type)&15], list) {
+		if (ptype->type == type &&
+		    (!ptype->dev || ptype->dev == skb->dev)) {
+			if (pt_prev) 
+				ret = deliver_skb(skb, pt_prev);
+			pt_prev = ptype;
+		}
+	}
+
+	if (pt_prev) {
+		ret = pt_prev->func(skb, skb->dev, pt_prev);
+	} else {
+		kfree_skb(skb);
+		/* Jamal, now you will not able to escape explaining
+		 * me how you were going to use this. :-)
+		 */
+		ret = NET_RX_DROP;
+	}
+
+out:
+	rcu_read_unlock();
+	return ret;
+}
+
+static int process_backlog(struct net_device *backlog_dev, int *budget)
+{
+	int work = 0;
+	int quota = min(backlog_dev->quota, *budget);
+	struct softnet_data *queue = &__get_cpu_var(softnet_data);
+	unsigned long start_time = jiffies;
+
+	for (;;) {
+		struct sk_buff *skb;
+		struct net_device *dev;
+
+		local_irq_disable();
+		skb = __skb_dequeue(&queue->input_pkt_queue);
+		if (!skb)
+			goto job_done;
+		local_irq_enable();
+
+		dev = skb->dev;
+
+		netif_receive_skb(skb);
+
+		dev_put(dev);
+
+		work++;
+
+		if (work >= quota || jiffies - start_time > 1)
+			break;
+
+	}
+
+	backlog_dev->quota -= work;
+	*budget -= work;
+	return -1;
+
+job_done:
+	backlog_dev->quota -= work;
+	*budget -= work;
+
+	list_del(&backlog_dev->poll_list);
+	smp_mb__before_clear_bit();
+	netif_poll_enable(backlog_dev);
+
+	if (queue->throttle)
+		queue->throttle = 0;
+	local_irq_enable();
+	return 0;
+}
+
+static void net_rx_action(struct softirq_action *h)
+{
+	struct softnet_data *queue = &__get_cpu_var(softnet_data);
+	unsigned long start_time = jiffies;
+	int budget = netdev_max_backlog;
+
+	
+	local_irq_disable();
+
+	while (!list_empty(&queue->poll_list)) {
+		struct net_device *dev;
+
+		if (budget <= 0 || jiffies - start_time > 1)
+			goto softnet_break;
+
+		local_irq_enable();
+
+		dev = list_entry(queue->poll_list.next,
+				 struct net_device, poll_list);
+		netpoll_poll_lock(dev);
+
+		if (dev->quota <= 0 || dev->poll(dev, &budget)) {
+			netpoll_poll_unlock(dev);
+			local_irq_disable();
+			list_del(&dev->poll_list);
+			list_add_tail(&dev->poll_list, &queue->poll_list);
+			if (dev->quota < 0)
+				dev->quota += dev->weight;
+			else
+				dev->quota = dev->weight;
+		} else {
+			netpoll_poll_unlock(dev);
+			dev_put(dev);
+			local_irq_disable();
+		}
+	}
+out:
+	local_irq_enable();
+	return;
+
+softnet_break:
+	__get_cpu_var(netdev_rx_stat).time_squeeze++;
+	__raise_softirq_irqoff(NET_RX_SOFTIRQ);
+	goto out;
+}
+
+static gifconf_func_t * gifconf_list [NPROTO];
+
+/**
+ *	register_gifconf	-	register a SIOCGIF handler
+ *	@family: Address family
+ *	@gifconf: Function handler
+ *
+ *	Register protocol dependent address dumping routines. The handler
+ *	that is passed must not be freed or reused until it has been replaced
+ *	by another handler.
+ */
+int register_gifconf(unsigned int family, gifconf_func_t * gifconf)
+{
+	if (family >= NPROTO)
+		return -EINVAL;
+	gifconf_list[family] = gifconf;
+	return 0;
+}
+
+
+/*
+ *	Map an interface index to its name (SIOCGIFNAME)
+ */
+
+/*
+ *	We need this ioctl for efficient implementation of the
+ *	if_indextoname() function required by the IPv6 API.  Without
+ *	it, we would have to search all the interfaces to find a
+ *	match.  --pb
+ */
+
+static int dev_ifname(struct ifreq __user *arg)
+{
+	struct net_device *dev;
+	struct ifreq ifr;
+
+	/*
+	 *	Fetch the caller's info block.
+	 */
+
+	if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
+		return -EFAULT;
+
+	read_lock(&dev_base_lock);
+	dev = __dev_get_by_index(ifr.ifr_ifindex);
+	if (!dev) {
+		read_unlock(&dev_base_lock);
+		return -ENODEV;
+	}
+
+	strcpy(ifr.ifr_name, dev->name);
+	read_unlock(&dev_base_lock);
+
+	if (copy_to_user(arg, &ifr, sizeof(struct ifreq)))
+		return -EFAULT;
+	return 0;
+}
+
+/*
+ *	Perform a SIOCGIFCONF call. This structure will change
+ *	size eventually, and there is nothing I can do about it.
+ *	Thus we will need a 'compatibility mode'.
+ */
+
+static int dev_ifconf(char __user *arg)
+{
+	struct ifconf ifc;
+	struct net_device *dev;
+	char __user *pos;
+	int len;
+	int total;
+	int i;
+
+	/*
+	 *	Fetch the caller's info block.
+	 */
+
+	if (copy_from_user(&ifc, arg, sizeof(struct ifconf)))
+		return -EFAULT;
+
+	pos = ifc.ifc_buf;
+	len = ifc.ifc_len;
+
+	/*
+	 *	Loop over the interfaces, and write an info block for each.
+	 */
+
+	total = 0;
+	for (dev = dev_base; dev; dev = dev->next) {
+		for (i = 0; i < NPROTO; i++) {
+			if (gifconf_list[i]) {
+				int done;
+				if (!pos)
+					done = gifconf_list[i](dev, NULL, 0);
+				else
+					done = gifconf_list[i](dev, pos + total,
+							       len - total);
+				if (done < 0)
+					return -EFAULT;
+				total += done;
+			}
+		}
+  	}
+
+	/*
+	 *	All done.  Write the updated control block back to the caller.
+	 */
+	ifc.ifc_len = total;
+
+	/*
+	 * 	Both BSD and Solaris return 0 here, so we do too.
+	 */
+	return copy_to_user(arg, &ifc, sizeof(struct ifconf)) ? -EFAULT : 0;
+}
+
+#ifdef CONFIG_PROC_FS
+/*
+ *	This is invoked by the /proc filesystem handler to display a device
+ *	in detail.
+ */
+static __inline__ struct net_device *dev_get_idx(loff_t pos)
+{
+	struct net_device *dev;
+	loff_t i;
+
+	for (i = 0, dev = dev_base; dev && i < pos; ++i, dev = dev->next);
+
+	return i == pos ? dev : NULL;
+}
+
+void *dev_seq_start(struct seq_file *seq, loff_t *pos)
+{
+	read_lock(&dev_base_lock);
+	return *pos ? dev_get_idx(*pos - 1) : SEQ_START_TOKEN;
+}
+
+void *dev_seq_next(struct seq_file *seq, void *v, loff_t *pos)
+{
+	++*pos;
+	return v == SEQ_START_TOKEN ? dev_base : ((struct net_device *)v)->next;
+}
+
+void dev_seq_stop(struct seq_file *seq, void *v)
+{
+	read_unlock(&dev_base_lock);
+}
+
+static void dev_seq_printf_stats(struct seq_file *seq, struct net_device *dev)
+{
+	if (dev->get_stats) {
+		struct net_device_stats *stats = dev->get_stats(dev);
+
+		seq_printf(seq, "%6s:%8lu %7lu %4lu %4lu %4lu %5lu %10lu %9lu "
+				"%8lu %7lu %4lu %4lu %4lu %5lu %7lu %10lu\n",
+			   dev->name, stats->rx_bytes, stats->rx_packets,
+			   stats->rx_errors,
+			   stats->rx_dropped + stats->rx_missed_errors,
+			   stats->rx_fifo_errors,
+			   stats->rx_length_errors + stats->rx_over_errors +
+			     stats->rx_crc_errors + stats->rx_frame_errors,
+			   stats->rx_compressed, stats->multicast,
+			   stats->tx_bytes, stats->tx_packets,
+			   stats->tx_errors, stats->tx_dropped,
+			   stats->tx_fifo_errors, stats->collisions,
+			   stats->tx_carrier_errors +
+			     stats->tx_aborted_errors +
+			     stats->tx_window_errors +
+			     stats->tx_heartbeat_errors,
+			   stats->tx_compressed);
+	} else
+		seq_printf(seq, "%6s: No statistics available.\n", dev->name);
+}
+
+/*
+ *	Called from the PROCfs module. This now uses the new arbitrary sized
+ *	/proc/net interface to create /proc/net/dev
+ */
+static int dev_seq_show(struct seq_file *seq, void *v)
+{
+	if (v == SEQ_START_TOKEN)
+		seq_puts(seq, "Inter-|   Receive                            "
+			      "                    |  Transmit\n"
+			      " face |bytes    packets errs drop fifo frame "
+			      "compressed multicast|bytes    packets errs "
+			      "drop fifo colls carrier compressed\n");
+	else
+		dev_seq_printf_stats(seq, v);
+	return 0;
+}
+
+static struct netif_rx_stats *softnet_get_online(loff_t *pos)
+{
+	struct netif_rx_stats *rc = NULL;
+
+	while (*pos < NR_CPUS)
+	       	if (cpu_online(*pos)) {
+			rc = &per_cpu(netdev_rx_stat, *pos);
+			break;
+		} else
+			++*pos;
+	return rc;
+}
+
+static void *softnet_seq_start(struct seq_file *seq, loff_t *pos)
+{
+	return softnet_get_online(pos);
+}
+
+static void *softnet_seq_next(struct seq_file *seq, void *v, loff_t *pos)
+{
+	++*pos;
+	return softnet_get_online(pos);
+}
+
+static void softnet_seq_stop(struct seq_file *seq, void *v)
+{
+}
+
+static int softnet_seq_show(struct seq_file *seq, void *v)
+{
+	struct netif_rx_stats *s = v;
+
+	seq_printf(seq, "%08x %08x %08x %08x %08x %08x %08x %08x %08x\n",
+		   s->total, s->dropped, s->time_squeeze, s->throttled,
+		   s->fastroute_hit, s->fastroute_success, s->fastroute_defer,
+		   s->fastroute_deferred_out,
+#if 0
+		   s->fastroute_latency_reduction
+#else
+		   s->cpu_collision
+#endif
+		  );
+	return 0;
+}
+
+static struct seq_operations dev_seq_ops = {
+	.start = dev_seq_start,
+	.next  = dev_seq_next,
+	.stop  = dev_seq_stop,
+	.show  = dev_seq_show,
+};
+
+static int dev_seq_open(struct inode *inode, struct file *file)
+{
+	return seq_open(file, &dev_seq_ops);
+}
+
+static struct file_operations dev_seq_fops = {
+	.owner	 = THIS_MODULE,
+	.open    = dev_seq_open,
+	.read    = seq_read,
+	.llseek  = seq_lseek,
+	.release = seq_release,
+};
+
+static struct seq_operations softnet_seq_ops = {
+	.start = softnet_seq_start,
+	.next  = softnet_seq_next,
+	.stop  = softnet_seq_stop,
+	.show  = softnet_seq_show,
+};
+
+static int softnet_seq_open(struct inode *inode, struct file *file)
+{
+	return seq_open(file, &softnet_seq_ops);
+}
+
+static struct file_operations softnet_seq_fops = {
+	.owner	 = THIS_MODULE,
+	.open    = softnet_seq_open,
+	.read    = seq_read,
+	.llseek  = seq_lseek,
+	.release = seq_release,
+};
+
+#ifdef WIRELESS_EXT
+extern int wireless_proc_init(void);
+#else
+#define wireless_proc_init() 0
+#endif
+
+static int __init dev_proc_init(void)
+{
+	int rc = -ENOMEM;
+
+	if (!proc_net_fops_create("dev", S_IRUGO, &dev_seq_fops))
+		goto out;
+	if (!proc_net_fops_create("softnet_stat", S_IRUGO, &softnet_seq_fops))
+		goto out_dev;
+	if (wireless_proc_init())
+		goto out_softnet;
+	rc = 0;
+out:
+	return rc;
+out_softnet:
+	proc_net_remove("softnet_stat");
+out_dev:
+	proc_net_remove("dev");
+	goto out;
+}
+#else
+#define dev_proc_init() 0
+#endif	/* CONFIG_PROC_FS */
+
+
+/**
+ *	netdev_set_master	-	set up master/slave pair
+ *	@slave: slave device
+ *	@master: new master device
+ *
+ *	Changes the master device of the slave. Pass %NULL to break the
+ *	bonding. The caller must hold the RTNL semaphore. On a failure
+ *	a negative errno code is returned. On success the reference counts
+ *	are adjusted, %RTM_NEWLINK is sent to the routing socket and the
+ *	function returns zero.
+ */
+int netdev_set_master(struct net_device *slave, struct net_device *master)
+{
+	struct net_device *old = slave->master;
+
+	ASSERT_RTNL();
+
+	if (master) {
+		if (old)
+			return -EBUSY;
+		dev_hold(master);
+	}
+
+	slave->master = master;
+	
+	synchronize_net();
+
+	if (old)
+		dev_put(old);
+
+	if (master)
+		slave->flags |= IFF_SLAVE;
+	else
+		slave->flags &= ~IFF_SLAVE;
+
+	rtmsg_ifinfo(RTM_NEWLINK, slave, IFF_SLAVE);
+	return 0;
+}
+
+/**
+ *	dev_set_promiscuity	- update promiscuity count on a device
+ *	@dev: device
+ *	@inc: modifier
+ *
+ *	Add or remove promsicuity from a device. While the count in the device
+ *	remains above zero the interface remains promiscuous. Once it hits zero
+ *	the device reverts back to normal filtering operation. A negative inc
+ *	value is used to drop promiscuity on the device.
+ */
+void dev_set_promiscuity(struct net_device *dev, int inc)
+{
+	unsigned short old_flags = dev->flags;
+
+	dev->flags |= IFF_PROMISC;
+	if ((dev->promiscuity += inc) == 0)
+		dev->flags &= ~IFF_PROMISC;
+	if (dev->flags ^ old_flags) {
+		dev_mc_upload(dev);
+		printk(KERN_INFO "device %s %s promiscuous mode\n",
+		       dev->name, (dev->flags & IFF_PROMISC) ? "entered" :
+		       					       "left");
+	}
+}
+
+/**
+ *	dev_set_allmulti	- update allmulti count on a device
+ *	@dev: device
+ *	@inc: modifier
+ *
+ *	Add or remove reception of all multicast frames to a device. While the
+ *	count in the device remains above zero the interface remains listening
+ *	to all interfaces. Once it hits zero the device reverts back to normal
+ *	filtering operation. A negative @inc value is used to drop the counter
+ *	when releasing a resource needing all multicasts.
+ */
+
+void dev_set_allmulti(struct net_device *dev, int inc)
+{
+	unsigned short old_flags = dev->flags;
+
+	dev->flags |= IFF_ALLMULTI;
+	if ((dev->allmulti += inc) == 0)
+		dev->flags &= ~IFF_ALLMULTI;
+	if (dev->flags ^ old_flags)
+		dev_mc_upload(dev);
+}
+
+unsigned dev_get_flags(const struct net_device *dev)
+{
+	unsigned flags;
+
+	flags = (dev->flags & ~(IFF_PROMISC |
+				IFF_ALLMULTI |
+				IFF_RUNNING)) | 
+		(dev->gflags & (IFF_PROMISC |
+				IFF_ALLMULTI));
+
+	if (netif_running(dev) && netif_carrier_ok(dev))
+		flags |= IFF_RUNNING;
+
+	return flags;
+}
+
+int dev_change_flags(struct net_device *dev, unsigned flags)
+{
+	int ret;
+	int old_flags = dev->flags;
+
+	/*
+	 *	Set the flags on our device.
+	 */
+
+	dev->flags = (flags & (IFF_DEBUG | IFF_NOTRAILERS | IFF_NOARP |
+			       IFF_DYNAMIC | IFF_MULTICAST | IFF_PORTSEL |
+			       IFF_AUTOMEDIA)) |
+		     (dev->flags & (IFF_UP | IFF_VOLATILE | IFF_PROMISC |
+				    IFF_ALLMULTI));
+
+	/*
+	 *	Load in the correct multicast list now the flags have changed.
+	 */
+
+	dev_mc_upload(dev);
+
+	/*
+	 *	Have we downed the interface. We handle IFF_UP ourselves
+	 *	according to user attempts to set it, rather than blindly
+	 *	setting it.
+	 */
+
+	ret = 0;
+	if ((old_flags ^ flags) & IFF_UP) {	/* Bit is different  ? */
+		ret = ((old_flags & IFF_UP) ? dev_close : dev_open)(dev);
+
+		if (!ret)
+			dev_mc_upload(dev);
+	}
+
+	if (dev->flags & IFF_UP &&
+	    ((old_flags ^ dev->flags) &~ (IFF_UP | IFF_PROMISC | IFF_ALLMULTI |
+					  IFF_VOLATILE)))
+		notifier_call_chain(&netdev_chain, NETDEV_CHANGE, dev);
+
+	if ((flags ^ dev->gflags) & IFF_PROMISC) {
+		int inc = (flags & IFF_PROMISC) ? +1 : -1;
+		dev->gflags ^= IFF_PROMISC;
+		dev_set_promiscuity(dev, inc);
+	}
+
+	/* NOTE: order of synchronization of IFF_PROMISC and IFF_ALLMULTI
+	   is important. Some (broken) drivers set IFF_PROMISC, when
+	   IFF_ALLMULTI is requested not asking us and not reporting.
+	 */
+	if ((flags ^ dev->gflags) & IFF_ALLMULTI) {
+		int inc = (flags & IFF_ALLMULTI) ? +1 : -1;
+		dev->gflags ^= IFF_ALLMULTI;
+		dev_set_allmulti(dev, inc);
+	}
+
+	if (old_flags ^ dev->flags)
+		rtmsg_ifinfo(RTM_NEWLINK, dev, old_flags ^ dev->flags);
+
+	return ret;
+}
+
+int dev_set_mtu(struct net_device *dev, int new_mtu)
+{
+	int err;
+
+	if (new_mtu == dev->mtu)
+		return 0;
+
+	/*	MTU must be positive.	 */
+	if (new_mtu < 0)
+		return -EINVAL;
+
+	if (!netif_device_present(dev))
+		return -ENODEV;
+
+	err = 0;
+	if (dev->change_mtu)
+		err = dev->change_mtu(dev, new_mtu);
+	else
+		dev->mtu = new_mtu;
+	if (!err && dev->flags & IFF_UP)
+		notifier_call_chain(&netdev_chain,
+				    NETDEV_CHANGEMTU, dev);
+	return err;
+}
+
+int dev_set_mac_address(struct net_device *dev, struct sockaddr *sa)
+{
+	int err;
+
+	if (!dev->set_mac_address)
+		return -EOPNOTSUPP;
+	if (sa->sa_family != dev->type)
+		return -EINVAL;
+	if (!netif_device_present(dev))
+		return -ENODEV;
+	err = dev->set_mac_address(dev, sa);
+	if (!err)
+		notifier_call_chain(&netdev_chain, NETDEV_CHANGEADDR, dev);
+	return err;
+}
+
+/*
+ *	Perform the SIOCxIFxxx calls.
+ */
+static int dev_ifsioc(struct ifreq *ifr, unsigned int cmd)
+{
+	int err;
+	struct net_device *dev = __dev_get_by_name(ifr->ifr_name);
+
+	if (!dev)
+		return -ENODEV;
+
+	switch (cmd) {
+		case SIOCGIFFLAGS:	/* Get interface flags */
+			ifr->ifr_flags = dev_get_flags(dev);
+			return 0;
+
+		case SIOCSIFFLAGS:	/* Set interface flags */
+			return dev_change_flags(dev, ifr->ifr_flags);
+
+		case SIOCGIFMETRIC:	/* Get the metric on the interface
+					   (currently unused) */
+			ifr->ifr_metric = 0;
+			return 0;
+
+		case SIOCSIFMETRIC:	/* Set the metric on the interface
+					   (currently unused) */
+			return -EOPNOTSUPP;
+
+		case SIOCGIFMTU:	/* Get the MTU of a device */
+			ifr->ifr_mtu = dev->mtu;
+			return 0;
+
+		case SIOCSIFMTU:	/* Set the MTU of a device */
+			return dev_set_mtu(dev, ifr->ifr_mtu);
+
+		case SIOCGIFHWADDR:
+			if (!dev->addr_len)
+				memset(ifr->ifr_hwaddr.sa_data, 0, sizeof ifr->ifr_hwaddr.sa_data);
+			else
+				memcpy(ifr->ifr_hwaddr.sa_data, dev->dev_addr,
+				       min(sizeof ifr->ifr_hwaddr.sa_data, (size_t) dev->addr_len));
+			ifr->ifr_hwaddr.sa_family = dev->type;
+			return 0;
+
+		case SIOCSIFHWADDR:
+			return dev_set_mac_address(dev, &ifr->ifr_hwaddr);
+
+		case SIOCSIFHWBROADCAST:
+			if (ifr->ifr_hwaddr.sa_family != dev->type)
+				return -EINVAL;
+			memcpy(dev->broadcast, ifr->ifr_hwaddr.sa_data,
+			       min(sizeof ifr->ifr_hwaddr.sa_data, (size_t) dev->addr_len));
+			notifier_call_chain(&netdev_chain,
+					    NETDEV_CHANGEADDR, dev);
+			return 0;
+
+		case SIOCGIFMAP:
+			ifr->ifr_map.mem_start = dev->mem_start;
+			ifr->ifr_map.mem_end   = dev->mem_end;
+			ifr->ifr_map.base_addr = dev->base_addr;
+			ifr->ifr_map.irq       = dev->irq;
+			ifr->ifr_map.dma       = dev->dma;
+			ifr->ifr_map.port      = dev->if_port;
+			return 0;
+
+		case SIOCSIFMAP:
+			if (dev->set_config) {
+				if (!netif_device_present(dev))
+					return -ENODEV;
+				return dev->set_config(dev, &ifr->ifr_map);
+			}
+			return -EOPNOTSUPP;
+
+		case SIOCADDMULTI:
+			if (!dev->set_multicast_list ||
+			    ifr->ifr_hwaddr.sa_family != AF_UNSPEC)
+				return -EINVAL;
+			if (!netif_device_present(dev))
+				return -ENODEV;
+			return dev_mc_add(dev, ifr->ifr_hwaddr.sa_data,
+					  dev->addr_len, 1);
+
+		case SIOCDELMULTI:
+			if (!dev->set_multicast_list ||
+			    ifr->ifr_hwaddr.sa_family != AF_UNSPEC)
+				return -EINVAL;
+			if (!netif_device_present(dev))
+				return -ENODEV;
+			return dev_mc_delete(dev, ifr->ifr_hwaddr.sa_data,
+					     dev->addr_len, 1);
+
+		case SIOCGIFINDEX:
+			ifr->ifr_ifindex = dev->ifindex;
+			return 0;
+
+		case SIOCGIFTXQLEN:
+			ifr->ifr_qlen = dev->tx_queue_len;
+			return 0;
+
+		case SIOCSIFTXQLEN:
+			if (ifr->ifr_qlen < 0)
+				return -EINVAL;
+			dev->tx_queue_len = ifr->ifr_qlen;
+			return 0;
+
+		case SIOCSIFNAME:
+			ifr->ifr_newname[IFNAMSIZ-1] = '\0';
+			return dev_change_name(dev, ifr->ifr_newname);
+
+		/*
+		 *	Unknown or private ioctl
+		 */
+
+		default:
+			if ((cmd >= SIOCDEVPRIVATE &&
+			    cmd <= SIOCDEVPRIVATE + 15) ||
+			    cmd == SIOCBONDENSLAVE ||
+			    cmd == SIOCBONDRELEASE ||
+			    cmd == SIOCBONDSETHWADDR ||
+			    cmd == SIOCBONDSLAVEINFOQUERY ||
+			    cmd == SIOCBONDINFOQUERY ||
+			    cmd == SIOCBONDCHANGEACTIVE ||
+			    cmd == SIOCGMIIPHY ||
+			    cmd == SIOCGMIIREG ||
+			    cmd == SIOCSMIIREG ||
+			    cmd == SIOCBRADDIF ||
+			    cmd == SIOCBRDELIF ||
+			    cmd == SIOCWANDEV) {
+				err = -EOPNOTSUPP;
+				if (dev->do_ioctl) {
+					if (netif_device_present(dev))
+						err = dev->do_ioctl(dev, ifr,
+								    cmd);
+					else
+						err = -ENODEV;
+				}
+			} else
+				err = -EINVAL;
+
+	}
+	return err;
+}
+
+/*
+ *	This function handles all "interface"-type I/O control requests. The actual
+ *	'doing' part of this is dev_ifsioc above.
+ */
+
+/**
+ *	dev_ioctl	-	network device ioctl
+ *	@cmd: command to issue
+ *	@arg: pointer to a struct ifreq in user space
+ *
+ *	Issue ioctl functions to devices. This is normally called by the
+ *	user space syscall interfaces but can sometimes be useful for
+ *	other purposes. The return value is the return from the syscall if
+ *	positive or a negative errno code on error.
+ */
+
+int dev_ioctl(unsigned int cmd, void __user *arg)
+{
+	struct ifreq ifr;
+	int ret;
+	char *colon;
+
+	/* One special case: SIOCGIFCONF takes ifconf argument
+	   and requires shared lock, because it sleeps writing
+	   to user space.
+	 */
+
+	if (cmd == SIOCGIFCONF) {
+		rtnl_shlock();
+		ret = dev_ifconf((char __user *) arg);
+		rtnl_shunlock();
+		return ret;
+	}
+	if (cmd == SIOCGIFNAME)
+		return dev_ifname((struct ifreq __user *)arg);
+
+	if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
+		return -EFAULT;
+
+	ifr.ifr_name[IFNAMSIZ-1] = 0;
+
+	colon = strchr(ifr.ifr_name, ':');
+	if (colon)
+		*colon = 0;
+
+	/*
+	 *	See which interface the caller is talking about.
+	 */
+
+	switch (cmd) {
+		/*
+		 *	These ioctl calls:
+		 *	- can be done by all.
+		 *	- atomic and do not require locking.
+		 *	- return a value
+		 */
+		case SIOCGIFFLAGS:
+		case SIOCGIFMETRIC:
+		case SIOCGIFMTU:
+		case SIOCGIFHWADDR:
+		case SIOCGIFSLAVE:
+		case SIOCGIFMAP:
+		case SIOCGIFINDEX:
+		case SIOCGIFTXQLEN:
+			dev_load(ifr.ifr_name);
+			read_lock(&dev_base_lock);
+			ret = dev_ifsioc(&ifr, cmd);
+			read_unlock(&dev_base_lock);
+			if (!ret) {
+				if (colon)
+					*colon = ':';
+				if (copy_to_user(arg, &ifr,
+						 sizeof(struct ifreq)))
+					ret = -EFAULT;
+			}
+			return ret;
+
+		case SIOCETHTOOL:
+			dev_load(ifr.ifr_name);
+			rtnl_lock();
+			ret = dev_ethtool(&ifr);
+			rtnl_unlock();
+			if (!ret) {
+				if (colon)
+					*colon = ':';
+				if (copy_to_user(arg, &ifr,
+						 sizeof(struct ifreq)))
+					ret = -EFAULT;
+			}
+			return ret;
+
+		/*
+		 *	These ioctl calls:
+		 *	- require superuser power.
+		 *	- require strict serialization.
+		 *	- return a value
+		 */
+		case SIOCGMIIPHY:
+		case SIOCGMIIREG:
+		case SIOCSIFNAME:
+			if (!capable(CAP_NET_ADMIN))
+				return -EPERM;
+			dev_load(ifr.ifr_name);
+			rtnl_lock();
+			ret = dev_ifsioc(&ifr, cmd);
+			rtnl_unlock();
+			if (!ret) {
+				if (colon)
+					*colon = ':';
+				if (copy_to_user(arg, &ifr,
+						 sizeof(struct ifreq)))
+					ret = -EFAULT;
+			}
+			return ret;
+
+		/*
+		 *	These ioctl calls:
+		 *	- require superuser power.
+		 *	- require strict serialization.
+		 *	- do not return a value
+		 */
+		case SIOCSIFFLAGS:
+		case SIOCSIFMETRIC:
+		case SIOCSIFMTU:
+		case SIOCSIFMAP:
+		case SIOCSIFHWADDR:
+		case SIOCSIFSLAVE:
+		case SIOCADDMULTI:
+		case SIOCDELMULTI:
+		case SIOCSIFHWBROADCAST:
+		case SIOCSIFTXQLEN:
+		case SIOCSMIIREG:
+		case SIOCBONDENSLAVE:
+		case SIOCBONDRELEASE:
+		case SIOCBONDSETHWADDR:
+		case SIOCBONDSLAVEINFOQUERY:
+		case SIOCBONDINFOQUERY:
+		case SIOCBONDCHANGEACTIVE:
+		case SIOCBRADDIF:
+		case SIOCBRDELIF:
+			if (!capable(CAP_NET_ADMIN))
+				return -EPERM;
+			dev_load(ifr.ifr_name);
+			rtnl_lock();
+			ret = dev_ifsioc(&ifr, cmd);
+			rtnl_unlock();
+			return ret;
+
+		case SIOCGIFMEM:
+			/* Get the per device memory space. We can add this but
+			 * currently do not support it */
+		case SIOCSIFMEM:
+			/* Set the per device memory buffer space.
+			 * Not applicable in our case */
+		case SIOCSIFLINK:
+			return -EINVAL;
+
+		/*
+		 *	Unknown or private ioctl.
+		 */
+		default:
+			if (cmd == SIOCWANDEV ||
+			    (cmd >= SIOCDEVPRIVATE &&
+			     cmd <= SIOCDEVPRIVATE + 15)) {
+				dev_load(ifr.ifr_name);
+				rtnl_lock();
+				ret = dev_ifsioc(&ifr, cmd);
+				rtnl_unlock();
+				if (!ret && copy_to_user(arg, &ifr,
+							 sizeof(struct ifreq)))
+					ret = -EFAULT;
+				return ret;
+			}
+#ifdef WIRELESS_EXT
+			/* Take care of Wireless Extensions */
+			if (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST) {
+				/* If command is `set a parameter', or
+				 * `get the encoding parameters', check if
+				 * the user has the right to do it */
+				if (IW_IS_SET(cmd) || cmd == SIOCGIWENCODE) {
+					if (!capable(CAP_NET_ADMIN))
+						return -EPERM;
+				}
+				dev_load(ifr.ifr_name);
+				rtnl_lock();
+				/* Follow me in net/core/wireless.c */
+				ret = wireless_process_ioctl(&ifr, cmd);
+				rtnl_unlock();
+				if (IW_IS_GET(cmd) &&
+				    copy_to_user(arg, &ifr,
+					    	 sizeof(struct ifreq)))
+					ret = -EFAULT;
+				return ret;
+			}
+#endif	/* WIRELESS_EXT */
+			return -EINVAL;
+	}
+}
+
+
+/**
+ *	dev_new_index	-	allocate an ifindex
+ *
+ *	Returns a suitable unique value for a new device interface
+ *	number.  The caller must hold the rtnl semaphore or the
+ *	dev_base_lock to be sure it remains unique.
+ */
+static int dev_new_index(void)
+{
+	static int ifindex;
+	for (;;) {
+		if (++ifindex <= 0)
+			ifindex = 1;
+		if (!__dev_get_by_index(ifindex))
+			return ifindex;
+	}
+}
+
+static int dev_boot_phase = 1;
+
+/* Delayed registration/unregisteration */
+static DEFINE_SPINLOCK(net_todo_list_lock);
+static struct list_head net_todo_list = LIST_HEAD_INIT(net_todo_list);
+
+static inline void net_set_todo(struct net_device *dev)
+{
+	spin_lock(&net_todo_list_lock);
+	list_add_tail(&dev->todo_list, &net_todo_list);
+	spin_unlock(&net_todo_list_lock);
+}
+
+/**
+ *	register_netdevice	- register a network device
+ *	@dev: device to register
+ *
+ *	Take a completed network device structure and add it to the kernel
+ *	interfaces. A %NETDEV_REGISTER message is sent to the netdev notifier
+ *	chain. 0 is returned on success. A negative errno code is returned
+ *	on a failure to set up the device, or if the name is a duplicate.
+ *
+ *	Callers must hold the rtnl semaphore. You may want
+ *	register_netdev() instead of this.
+ *
+ *	BUGS:
+ *	The locking appears insufficient to guarantee two parallel registers
+ *	will not get the same name.
+ */
+
+int register_netdevice(struct net_device *dev)
+{
+	struct hlist_head *head;
+	struct hlist_node *p;
+	int ret;
+
+	BUG_ON(dev_boot_phase);
+	ASSERT_RTNL();
+
+	/* When net_device's are persistent, this will be fatal. */
+	BUG_ON(dev->reg_state != NETREG_UNINITIALIZED);
+
+	spin_lock_init(&dev->queue_lock);
+	spin_lock_init(&dev->xmit_lock);
+	dev->xmit_lock_owner = -1;
+#ifdef CONFIG_NET_CLS_ACT
+	spin_lock_init(&dev->ingress_lock);
+#endif
+
+	ret = alloc_divert_blk(dev);
+	if (ret)
+		goto out;
+
+	dev->iflink = -1;
+
+	/* Init, if this function is available */
+	if (dev->init) {
+		ret = dev->init(dev);
+		if (ret) {
+			if (ret > 0)
+				ret = -EIO;
+			goto out_err;
+		}
+	}
+ 
+	if (!dev_valid_name(dev->name)) {
+		ret = -EINVAL;
+		goto out_err;
+	}
+
+	dev->ifindex = dev_new_index();
+	if (dev->iflink == -1)
+		dev->iflink = dev->ifindex;
+
+	/* Check for existence of name */
+	head = dev_name_hash(dev->name);
+	hlist_for_each(p, head) {
+		struct net_device *d
+			= hlist_entry(p, struct net_device, name_hlist);
+		if (!strncmp(d->name, dev->name, IFNAMSIZ)) {
+			ret = -EEXIST;
+ 			goto out_err;
+		}
+ 	}
+
+	/* Fix illegal SG+CSUM combinations. */
+	if ((dev->features & NETIF_F_SG) &&
+	    !(dev->features & (NETIF_F_IP_CSUM |
+			       NETIF_F_NO_CSUM |
+			       NETIF_F_HW_CSUM))) {
+		printk("%s: Dropping NETIF_F_SG since no checksum feature.\n",
+		       dev->name);
+		dev->features &= ~NETIF_F_SG;
+	}
+
+	/* TSO requires that SG is present as well. */
+	if ((dev->features & NETIF_F_TSO) &&
+	    !(dev->features & NETIF_F_SG)) {
+		printk("%s: Dropping NETIF_F_TSO since no SG feature.\n",
+		       dev->name);
+		dev->features &= ~NETIF_F_TSO;
+	}
+
+	/*
+	 *	nil rebuild_header routine,
+	 *	that should be never called and used as just bug trap.
+	 */
+
+	if (!dev->rebuild_header)
+		dev->rebuild_header = default_rebuild_header;
+
+	/*
+	 *	Default initial state at registry is that the
+	 *	device is present.
+	 */
+
+	set_bit(__LINK_STATE_PRESENT, &dev->state);
+
+	dev->next = NULL;
+	dev_init_scheduler(dev);
+	write_lock_bh(&dev_base_lock);
+	*dev_tail = dev;
+	dev_tail = &dev->next;
+	hlist_add_head(&dev->name_hlist, head);
+	hlist_add_head(&dev->index_hlist, dev_index_hash(dev->ifindex));
+	dev_hold(dev);
+	dev->reg_state = NETREG_REGISTERING;
+	write_unlock_bh(&dev_base_lock);
+
+	/* Notify protocols, that a new device appeared. */
+	notifier_call_chain(&netdev_chain, NETDEV_REGISTER, dev);
+
+	/* Finish registration after unlock */
+	net_set_todo(dev);
+	ret = 0;
+
+out:
+	return ret;
+out_err:
+	free_divert_blk(dev);
+	goto out;
+}
+
+/**
+ *	register_netdev	- register a network device
+ *	@dev: device to register
+ *
+ *	Take a completed network device structure and add it to the kernel
+ *	interfaces. A %NETDEV_REGISTER message is sent to the netdev notifier
+ *	chain. 0 is returned on success. A negative errno code is returned
+ *	on a failure to set up the device, or if the name is a duplicate.
+ *
+ *	This is a wrapper around register_netdev that takes the rtnl semaphore
+ *	and expands the device name if you passed a format string to
+ *	alloc_netdev.
+ */
+int register_netdev(struct net_device *dev)
+{
+	int err;
+
+	rtnl_lock();
+
+	/*
+	 * If the name is a format string the caller wants us to do a
+	 * name allocation.
+	 */
+	if (strchr(dev->name, '%')) {
+		err = dev_alloc_name(dev, dev->name);
+		if (err < 0)
+			goto out;
+	}
+	
+	/*
+	 * Back compatibility hook. Kill this one in 2.5
+	 */
+	if (dev->name[0] == 0 || dev->name[0] == ' ') {
+		err = dev_alloc_name(dev, "eth%d");
+		if (err < 0)
+			goto out;
+	}
+
+	err = register_netdevice(dev);
+out:
+	rtnl_unlock();
+	return err;
+}
+EXPORT_SYMBOL(register_netdev);
+
+/*
+ * netdev_wait_allrefs - wait until all references are gone.
+ *
+ * This is called when unregistering network devices.
+ *
+ * Any protocol or device that holds a reference should register
+ * for netdevice notification, and cleanup and put back the
+ * reference if they receive an UNREGISTER event.
+ * We can get stuck here if buggy protocols don't correctly
+ * call dev_put. 
+ */
+static void netdev_wait_allrefs(struct net_device *dev)
+{
+	unsigned long rebroadcast_time, warning_time;
+
+	rebroadcast_time = warning_time = jiffies;
+	while (atomic_read(&dev->refcnt) != 0) {
+		if (time_after(jiffies, rebroadcast_time + 1 * HZ)) {
+			rtnl_shlock();
+
+			/* Rebroadcast unregister notification */
+			notifier_call_chain(&netdev_chain,
+					    NETDEV_UNREGISTER, dev);
+
+			if (test_bit(__LINK_STATE_LINKWATCH_PENDING,
+				     &dev->state)) {
+				/* We must not have linkwatch events
+				 * pending on unregister. If this
+				 * happens, we simply run the queue
+				 * unscheduled, resulting in a noop
+				 * for this device.
+				 */
+				linkwatch_run_queue();
+			}
+
+			rtnl_shunlock();
+
+			rebroadcast_time = jiffies;
+		}
+
+		msleep(250);
+
+		if (time_after(jiffies, warning_time + 10 * HZ)) {
+			printk(KERN_EMERG "unregister_netdevice: "
+			       "waiting for %s to become free. Usage "
+			       "count = %d\n",
+			       dev->name, atomic_read(&dev->refcnt));
+			warning_time = jiffies;
+		}
+	}
+}
+
+/* The sequence is:
+ *
+ *	rtnl_lock();
+ *	...
+ *	register_netdevice(x1);
+ *	register_netdevice(x2);
+ *	...
+ *	unregister_netdevice(y1);
+ *	unregister_netdevice(y2);
+ *      ...
+ *	rtnl_unlock();
+ *	free_netdev(y1);
+ *	free_netdev(y2);
+ *
+ * We are invoked by rtnl_unlock() after it drops the semaphore.
+ * This allows us to deal with problems:
+ * 1) We can create/delete sysfs objects which invoke hotplug
+ *    without deadlocking with linkwatch via keventd.
+ * 2) Since we run with the RTNL semaphore not held, we can sleep
+ *    safely in order to wait for the netdev refcnt to drop to zero.
+ */
+static DECLARE_MUTEX(net_todo_run_mutex);
+void netdev_run_todo(void)
+{
+	struct list_head list = LIST_HEAD_INIT(list);
+	int err;
+
+
+	/* Need to guard against multiple cpu's getting out of order. */
+	down(&net_todo_run_mutex);
+
+	/* Not safe to do outside the semaphore.  We must not return
+	 * until all unregister events invoked by the local processor
+	 * have been completed (either by this todo run, or one on
+	 * another cpu).
+	 */
+	if (list_empty(&net_todo_list))
+		goto out;
+
+	/* Snapshot list, allow later requests */
+	spin_lock(&net_todo_list_lock);
+	list_splice_init(&net_todo_list, &list);
+	spin_unlock(&net_todo_list_lock);
+		
+	while (!list_empty(&list)) {
+		struct net_device *dev
+			= list_entry(list.next, struct net_device, todo_list);
+		list_del(&dev->todo_list);
+
+		switch(dev->reg_state) {
+		case NETREG_REGISTERING:
+			err = netdev_register_sysfs(dev);
+			if (err)
+				printk(KERN_ERR "%s: failed sysfs registration (%d)\n",
+				       dev->name, err);
+			dev->reg_state = NETREG_REGISTERED;
+			break;
+
+		case NETREG_UNREGISTERING:
+			netdev_unregister_sysfs(dev);
+			dev->reg_state = NETREG_UNREGISTERED;
+
+			netdev_wait_allrefs(dev);
+
+			/* paranoia */
+			BUG_ON(atomic_read(&dev->refcnt));
+			BUG_TRAP(!dev->ip_ptr);
+			BUG_TRAP(!dev->ip6_ptr);
+			BUG_TRAP(!dev->dn_ptr);
+
+
+			/* It must be the very last action, 
+			 * after this 'dev' may point to freed up memory.
+			 */
+			if (dev->destructor)
+				dev->destructor(dev);
+			break;
+
+		default:
+			printk(KERN_ERR "network todo '%s' but state %d\n",
+			       dev->name, dev->reg_state);
+			break;
+		}
+	}
+
+out:
+	up(&net_todo_run_mutex);
+}
+
+/**
+ *	alloc_netdev - allocate network device
+ *	@sizeof_priv:	size of private data to allocate space for
+ *	@name:		device name format string
+ *	@setup:		callback to initialize device
+ *
+ *	Allocates a struct net_device with private data area for driver use
+ *	and performs basic initialization.
+ */
+struct net_device *alloc_netdev(int sizeof_priv, const char *name,
+		void (*setup)(struct net_device *))
+{
+	void *p;
+	struct net_device *dev;
+	int alloc_size;
+
+	/* ensure 32-byte alignment of both the device and private area */
+	alloc_size = (sizeof(*dev) + NETDEV_ALIGN_CONST) & ~NETDEV_ALIGN_CONST;
+	alloc_size += sizeof_priv + NETDEV_ALIGN_CONST;
+
+	p = kmalloc(alloc_size, GFP_KERNEL);
+	if (!p) {
+		printk(KERN_ERR "alloc_dev: Unable to allocate device.\n");
+		return NULL;
+	}
+	memset(p, 0, alloc_size);
+
+	dev = (struct net_device *)
+		(((long)p + NETDEV_ALIGN_CONST) & ~NETDEV_ALIGN_CONST);
+	dev->padded = (char *)dev - (char *)p;
+
+	if (sizeof_priv)
+		dev->priv = netdev_priv(dev);
+
+	setup(dev);
+	strcpy(dev->name, name);
+	return dev;
+}
+EXPORT_SYMBOL(alloc_netdev);
+
+/**
+ *	free_netdev - free network device
+ *	@dev: device
+ *
+ *	This function does the last stage of destroying an allocated device 
+ * 	interface. The reference to the device object is released.  
+ *	If this is the last reference then it will be freed.
+ */
+void free_netdev(struct net_device *dev)
+{
+#ifdef CONFIG_SYSFS
+	/*  Compatiablity with error handling in drivers */
+	if (dev->reg_state == NETREG_UNINITIALIZED) {
+		kfree((char *)dev - dev->padded);
+		return;
+	}
+
+	BUG_ON(dev->reg_state != NETREG_UNREGISTERED);
+	dev->reg_state = NETREG_RELEASED;
+
+	/* will free via class release */
+	class_device_put(&dev->class_dev);
+#else
+	kfree((char *)dev - dev->padded);
+#endif
+}
+ 
+/* Synchronize with packet receive processing. */
+void synchronize_net(void) 
+{
+	might_sleep();
+	synchronize_kernel();
+}
+
+/**
+ *	unregister_netdevice - remove device from the kernel
+ *	@dev: device
+ *
+ *	This function shuts down a device interface and removes it
+ *	from the kernel tables. On success 0 is returned, on a failure
+ *	a negative errno code is returned.
+ *
+ *	Callers must hold the rtnl semaphore.  You may want
+ *	unregister_netdev() instead of this.
+ */
+
+int unregister_netdevice(struct net_device *dev)
+{
+	struct net_device *d, **dp;
+
+	BUG_ON(dev_boot_phase);
+	ASSERT_RTNL();
+
+	/* Some devices call without registering for initialization unwind. */
+	if (dev->reg_state == NETREG_UNINITIALIZED) {
+		printk(KERN_DEBUG "unregister_netdevice: device %s/%p never "
+				  "was registered\n", dev->name, dev);
+		return -ENODEV;
+	}
+
+	BUG_ON(dev->reg_state != NETREG_REGISTERED);
+
+	/* If device is running, close it first. */
+	if (dev->flags & IFF_UP)
+		dev_close(dev);
+
+	/* And unlink it from device chain. */
+	for (dp = &dev_base; (d = *dp) != NULL; dp = &d->next) {
+		if (d == dev) {
+			write_lock_bh(&dev_base_lock);
+			hlist_del(&dev->name_hlist);
+			hlist_del(&dev->index_hlist);
+			if (dev_tail == &dev->next)
+				dev_tail = dp;
+			*dp = d->next;
+			write_unlock_bh(&dev_base_lock);
+			break;
+		}
+	}
+	if (!d) {
+		printk(KERN_ERR "unregister net_device: '%s' not found\n",
+		       dev->name);
+		return -ENODEV;
+	}
+
+	dev->reg_state = NETREG_UNREGISTERING;
+
+	synchronize_net();
+
+	/* Shutdown queueing discipline. */
+	dev_shutdown(dev);
+
+	
+	/* Notify protocols, that we are about to destroy
+	   this device. They should clean all the things.
+	*/
+	notifier_call_chain(&netdev_chain, NETDEV_UNREGISTER, dev);
+	
+	/*
+	 *	Flush the multicast chain
+	 */
+	dev_mc_discard(dev);
+
+	if (dev->uninit)
+		dev->uninit(dev);
+
+	/* Notifier chain MUST detach us from master device. */
+	BUG_TRAP(!dev->master);
+
+	free_divert_blk(dev);
+
+	/* Finish processing unregister after unlock */
+	net_set_todo(dev);
+
+	synchronize_net();
+
+	dev_put(dev);
+	return 0;
+}
+
+/**
+ *	unregister_netdev - remove device from the kernel
+ *	@dev: device
+ *
+ *	This function shuts down a device interface and removes it
+ *	from the kernel tables. On success 0 is returned, on a failure
+ *	a negative errno code is returned.
+ *
+ *	This is just a wrapper for unregister_netdevice that takes
+ *	the rtnl semaphore.  In general you want to use this and not
+ *	unregister_netdevice.
+ */
+void unregister_netdev(struct net_device *dev)
+{
+	rtnl_lock();
+	unregister_netdevice(dev);
+	rtnl_unlock();
+}
+
+EXPORT_SYMBOL(unregister_netdev);
+
+#ifdef CONFIG_HOTPLUG_CPU
+static int dev_cpu_callback(struct notifier_block *nfb,
+			    unsigned long action,
+			    void *ocpu)
+{
+	struct sk_buff **list_skb;
+	struct net_device **list_net;
+	struct sk_buff *skb;
+	unsigned int cpu, oldcpu = (unsigned long)ocpu;
+	struct softnet_data *sd, *oldsd;
+
+	if (action != CPU_DEAD)
+		return NOTIFY_OK;
+
+	local_irq_disable();
+	cpu = smp_processor_id();
+	sd = &per_cpu(softnet_data, cpu);
+	oldsd = &per_cpu(softnet_data, oldcpu);
+
+	/* Find end of our completion_queue. */
+	list_skb = &sd->completion_queue;
+	while (*list_skb)
+		list_skb = &(*list_skb)->next;
+	/* Append completion queue from offline CPU. */
+	*list_skb = oldsd->completion_queue;
+	oldsd->completion_queue = NULL;
+
+	/* Find end of our output_queue. */
+	list_net = &sd->output_queue;
+	while (*list_net)
+		list_net = &(*list_net)->next_sched;
+	/* Append output queue from offline CPU. */
+	*list_net = oldsd->output_queue;
+	oldsd->output_queue = NULL;
+
+	raise_softirq_irqoff(NET_TX_SOFTIRQ);
+	local_irq_enable();
+
+	/* Process offline CPU's input_pkt_queue */
+	while ((skb = __skb_dequeue(&oldsd->input_pkt_queue)))
+		netif_rx(skb);
+
+	return NOTIFY_OK;
+}
+#endif /* CONFIG_HOTPLUG_CPU */
+
+
+/*
+ *	Initialize the DEV module. At boot time this walks the device list and
+ *	unhooks any devices that fail to initialise (normally hardware not
+ *	present) and leaves us with a valid list of present and active devices.
+ *
+ */
+
+/*
+ *       This is called single threaded during boot, so no need
+ *       to take the rtnl semaphore.
+ */
+static int __init net_dev_init(void)
+{
+	int i, rc = -ENOMEM;
+
+	BUG_ON(!dev_boot_phase);
+
+	net_random_init();
+
+	if (dev_proc_init())
+		goto out;
+
+	if (netdev_sysfs_init())
+		goto out;
+
+	INIT_LIST_HEAD(&ptype_all);
+	for (i = 0; i < 16; i++) 
+		INIT_LIST_HEAD(&ptype_base[i]);
+
+	for (i = 0; i < ARRAY_SIZE(dev_name_head); i++)
+		INIT_HLIST_HEAD(&dev_name_head[i]);
+
+	for (i = 0; i < ARRAY_SIZE(dev_index_head); i++)
+		INIT_HLIST_HEAD(&dev_index_head[i]);
+
+	/*
+	 *	Initialise the packet receive queues.
+	 */
+
+	for (i = 0; i < NR_CPUS; i++) {
+		struct softnet_data *queue;
+
+		queue = &per_cpu(softnet_data, i);
+		skb_queue_head_init(&queue->input_pkt_queue);
+		queue->throttle = 0;
+		queue->cng_level = 0;
+		queue->avg_blog = 10; /* arbitrary non-zero */
+		queue->completion_queue = NULL;
+		INIT_LIST_HEAD(&queue->poll_list);
+		set_bit(__LINK_STATE_START, &queue->backlog_dev.state);
+		queue->backlog_dev.weight = weight_p;
+		queue->backlog_dev.poll = process_backlog;
+		atomic_set(&queue->backlog_dev.refcnt, 1);
+	}
+
+#ifdef OFFLINE_SAMPLE
+	samp_timer.expires = jiffies + (10 * HZ);
+	add_timer(&samp_timer);
+#endif
+
+	dev_boot_phase = 0;
+
+	open_softirq(NET_TX_SOFTIRQ, net_tx_action, NULL);
+	open_softirq(NET_RX_SOFTIRQ, net_rx_action, NULL);
+
+	hotcpu_notifier(dev_cpu_callback, 0);
+	dst_init();
+	dev_mcast_init();
+	rc = 0;
+out:
+	return rc;
+}
+
+subsys_initcall(net_dev_init);
+
+EXPORT_SYMBOL(__dev_get_by_index);
+EXPORT_SYMBOL(__dev_get_by_name);
+EXPORT_SYMBOL(__dev_remove_pack);
+EXPORT_SYMBOL(__skb_linearize);
+EXPORT_SYMBOL(dev_add_pack);
+EXPORT_SYMBOL(dev_alloc_name);
+EXPORT_SYMBOL(dev_close);
+EXPORT_SYMBOL(dev_get_by_flags);
+EXPORT_SYMBOL(dev_get_by_index);
+EXPORT_SYMBOL(dev_get_by_name);
+EXPORT_SYMBOL(dev_ioctl);
+EXPORT_SYMBOL(dev_open);
+EXPORT_SYMBOL(dev_queue_xmit);
+EXPORT_SYMBOL(dev_remove_pack);
+EXPORT_SYMBOL(dev_set_allmulti);
+EXPORT_SYMBOL(dev_set_promiscuity);
+EXPORT_SYMBOL(dev_change_flags);
+EXPORT_SYMBOL(dev_set_mtu);
+EXPORT_SYMBOL(dev_set_mac_address);
+EXPORT_SYMBOL(free_netdev);
+EXPORT_SYMBOL(netdev_boot_setup_check);
+EXPORT_SYMBOL(netdev_set_master);
+EXPORT_SYMBOL(netdev_state_change);
+EXPORT_SYMBOL(netif_receive_skb);
+EXPORT_SYMBOL(netif_rx);
+EXPORT_SYMBOL(register_gifconf);
+EXPORT_SYMBOL(register_netdevice);
+EXPORT_SYMBOL(register_netdevice_notifier);
+EXPORT_SYMBOL(skb_checksum_help);
+EXPORT_SYMBOL(synchronize_net);
+EXPORT_SYMBOL(unregister_netdevice);
+EXPORT_SYMBOL(unregister_netdevice_notifier);
+EXPORT_SYMBOL(net_enable_timestamp);
+EXPORT_SYMBOL(net_disable_timestamp);
+EXPORT_SYMBOL(dev_get_flags);
+
+#if defined(CONFIG_BRIDGE) || defined(CONFIG_BRIDGE_MODULE)
+EXPORT_SYMBOL(br_handle_frame_hook);
+EXPORT_SYMBOL(br_fdb_get_hook);
+EXPORT_SYMBOL(br_fdb_put_hook);
+#endif
+
+#ifdef CONFIG_KMOD
+EXPORT_SYMBOL(dev_load);
+#endif
+
+EXPORT_PER_CPU_SYMBOL(softnet_data);
diff --git a/net/core/dev_mcast.c b/net/core/dev_mcast.c
new file mode 100644
index 00000000000..db098ff3cd6
--- /dev/null
+++ b/net/core/dev_mcast.c
@@ -0,0 +1,299 @@
+/*
+ *	Linux NET3:	Multicast List maintenance. 
+ *
+ *	Authors:
+ *		Tim Kordas <tjk@nostromo.eeap.cwru.edu> 
+ *		Richard Underwood <richard@wuzz.demon.co.uk>
+ *
+ *	Stir fried together from the IP multicast and CAP patches above
+ *		Alan Cox <Alan.Cox@linux.org>	
+ *
+ *	Fixes:
+ *		Alan Cox	:	Update the device on a real delete
+ *					rather than any time but...
+ *		Alan Cox	:	IFF_ALLMULTI support.
+ *		Alan Cox	: 	New format set_multicast_list() calls.
+ *		Gleb Natapov    :       Remove dev_mc_lock.
+ *
+ *	This program is free software; you can redistribute it and/or
+ *	modify it under the terms of the GNU General Public License
+ *	as published by the Free Software Foundation; either version
+ *	2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/config.h> 
+#include <linux/module.h> 
+#include <asm/uaccess.h>
+#include <asm/system.h>
+#include <linux/bitops.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/string.h>
+#include <linux/mm.h>
+#include <linux/socket.h>
+#include <linux/sockios.h>
+#include <linux/in.h>
+#include <linux/errno.h>
+#include <linux/interrupt.h>
+#include <linux/if_ether.h>
+#include <linux/inet.h>
+#include <linux/netdevice.h>
+#include <linux/etherdevice.h>
+#include <linux/proc_fs.h>
+#include <linux/seq_file.h>
+#include <linux/init.h>
+#include <net/ip.h>
+#include <net/route.h>
+#include <linux/skbuff.h>
+#include <net/sock.h>
+#include <net/arp.h>
+
+
+/*
+ *	Device multicast list maintenance. 
+ *
+ *	This is used both by IP and by the user level maintenance functions. 
+ *	Unlike BSD we maintain a usage count on a given multicast address so 
+ *	that a casual user application can add/delete multicasts used by 
+ *	protocols without doing damage to the protocols when it deletes the
+ *	entries. It also helps IP as it tracks overlapping maps.
+ *
+ *	Device mc lists are changed by bh at least if IPv6 is enabled,
+ *	so that it must be bh protected.
+ *
+ *	We block accesses to device mc filters with dev->xmit_lock.
+ */
+
+/*
+ *	Update the multicast list into the physical NIC controller.
+ */
+ 
+static void __dev_mc_upload(struct net_device *dev)
+{
+	/* Don't do anything till we up the interface
+	 * [dev_open will call this function so the list will
+	 * stay sane]
+	 */
+
+	if (!(dev->flags&IFF_UP))
+		return;
+
+	/*
+	 *	Devices with no set multicast or which have been
+	 *	detached don't get set.
+	 */
+
+	if (dev->set_multicast_list == NULL ||
+	    !netif_device_present(dev))
+		return;
+
+	dev->set_multicast_list(dev);
+}
+
+void dev_mc_upload(struct net_device *dev)
+{
+	spin_lock_bh(&dev->xmit_lock);
+	__dev_mc_upload(dev);
+	spin_unlock_bh(&dev->xmit_lock);
+}
+
+/*
+ *	Delete a device level multicast
+ */
+ 
+int dev_mc_delete(struct net_device *dev, void *addr, int alen, int glbl)
+{
+	int err = 0;
+	struct dev_mc_list *dmi, **dmip;
+
+	spin_lock_bh(&dev->xmit_lock);
+
+	for (dmip = &dev->mc_list; (dmi = *dmip) != NULL; dmip = &dmi->next) {
+		/*
+		 *	Find the entry we want to delete. The device could
+		 *	have variable length entries so check these too.
+		 */
+		if (memcmp(dmi->dmi_addr, addr, dmi->dmi_addrlen) == 0 &&
+		    alen == dmi->dmi_addrlen) {
+			if (glbl) {
+				int old_glbl = dmi->dmi_gusers;
+				dmi->dmi_gusers = 0;
+				if (old_glbl == 0)
+					break;
+			}
+			if (--dmi->dmi_users)
+				goto done;
+
+			/*
+			 *	Last user. So delete the entry.
+			 */
+			*dmip = dmi->next;
+			dev->mc_count--;
+
+			kfree(dmi);
+
+			/*
+			 *	We have altered the list, so the card
+			 *	loaded filter is now wrong. Fix it
+			 */
+			__dev_mc_upload(dev);
+			
+			spin_unlock_bh(&dev->xmit_lock);
+			return 0;
+		}
+	}
+	err = -ENOENT;
+done:
+	spin_unlock_bh(&dev->xmit_lock);
+	return err;
+}
+
+/*
+ *	Add a device level multicast
+ */
+ 
+int dev_mc_add(struct net_device *dev, void *addr, int alen, int glbl)
+{
+	int err = 0;
+	struct dev_mc_list *dmi, *dmi1;
+
+	dmi1 = (struct dev_mc_list *)kmalloc(sizeof(*dmi), GFP_ATOMIC);
+
+	spin_lock_bh(&dev->xmit_lock);
+	for (dmi = dev->mc_list; dmi != NULL; dmi = dmi->next) {
+		if (memcmp(dmi->dmi_addr, addr, dmi->dmi_addrlen) == 0 &&
+		    dmi->dmi_addrlen == alen) {
+			if (glbl) {
+				int old_glbl = dmi->dmi_gusers;
+				dmi->dmi_gusers = 1;
+				if (old_glbl)
+					goto done;
+			}
+			dmi->dmi_users++;
+			goto done;
+		}
+	}
+
+	if ((dmi = dmi1) == NULL) {
+		spin_unlock_bh(&dev->xmit_lock);
+		return -ENOMEM;
+	}
+	memcpy(dmi->dmi_addr, addr, alen);
+	dmi->dmi_addrlen = alen;
+	dmi->next = dev->mc_list;
+	dmi->dmi_users = 1;
+	dmi->dmi_gusers = glbl ? 1 : 0;
+	dev->mc_list = dmi;
+	dev->mc_count++;
+
+	__dev_mc_upload(dev);
+	
+	spin_unlock_bh(&dev->xmit_lock);
+	return 0;
+
+done:
+	spin_unlock_bh(&dev->xmit_lock);
+	if (dmi1)
+		kfree(dmi1);
+	return err;
+}
+
+/*
+ *	Discard multicast list when a device is downed
+ */
+
+void dev_mc_discard(struct net_device *dev)
+{
+	spin_lock_bh(&dev->xmit_lock);
+	
+	while (dev->mc_list != NULL) {
+		struct dev_mc_list *tmp = dev->mc_list;
+		dev->mc_list = tmp->next;
+		if (tmp->dmi_users > tmp->dmi_gusers)
+			printk("dev_mc_discard: multicast leakage! dmi_users=%d\n", tmp->dmi_users);
+		kfree(tmp);
+	}
+	dev->mc_count = 0;
+
+	spin_unlock_bh(&dev->xmit_lock);
+}
+
+#ifdef CONFIG_PROC_FS
+static void *dev_mc_seq_start(struct seq_file *seq, loff_t *pos)
+{
+	struct net_device *dev;
+	loff_t off = 0;
+
+	read_lock(&dev_base_lock);
+	for (dev = dev_base; dev; dev = dev->next) {
+		if (off++ == *pos) 
+			return dev;
+	}
+	return NULL;
+}
+
+static void *dev_mc_seq_next(struct seq_file *seq, void *v, loff_t *pos)
+{
+	struct net_device *dev = v;
+	++*pos;
+	return dev->next;
+}
+
+static void dev_mc_seq_stop(struct seq_file *seq, void *v)
+{
+	read_unlock(&dev_base_lock);
+}
+
+
+static int dev_mc_seq_show(struct seq_file *seq, void *v)
+{
+	struct dev_mc_list *m;
+	struct net_device *dev = v;
+
+	spin_lock_bh(&dev->xmit_lock);
+	for (m = dev->mc_list; m; m = m->next) {
+		int i;
+
+		seq_printf(seq, "%-4d %-15s %-5d %-5d ", dev->ifindex,
+			   dev->name, m->dmi_users, m->dmi_gusers);
+
+		for (i = 0; i < m->dmi_addrlen; i++)
+			seq_printf(seq, "%02x", m->dmi_addr[i]);
+
+		seq_putc(seq, '\n');
+	}
+	spin_unlock_bh(&dev->xmit_lock);
+	return 0;
+}
+
+static struct seq_operations dev_mc_seq_ops = {
+	.start = dev_mc_seq_start,
+	.next  = dev_mc_seq_next,
+	.stop  = dev_mc_seq_stop,
+	.show  = dev_mc_seq_show,
+};
+
+static int dev_mc_seq_open(struct inode *inode, struct file *file)
+{
+	return seq_open(file, &dev_mc_seq_ops);
+}
+
+static struct file_operations dev_mc_seq_fops = {
+	.owner	 = THIS_MODULE,
+	.open    = dev_mc_seq_open,
+	.read    = seq_read,
+	.llseek  = seq_lseek,
+	.release = seq_release,
+};
+
+#endif
+
+void __init dev_mcast_init(void)
+{
+	proc_net_fops_create("dev_mcast", 0, &dev_mc_seq_fops);
+}
+
+EXPORT_SYMBOL(dev_mc_add);
+EXPORT_SYMBOL(dev_mc_delete);
+EXPORT_SYMBOL(dev_mc_upload);
diff --git a/net/core/dst.c b/net/core/dst.c
new file mode 100644
index 00000000000..3bf6cc43481
--- /dev/null
+++ b/net/core/dst.c
@@ -0,0 +1,276 @@
+/*
+ * net/core/dst.c	Protocol independent destination cache.
+ *
+ * Authors:		Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
+ *
+ */
+
+#include <linux/bitops.h>
+#include <linux/errno.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/module.h>
+#include <linux/netdevice.h>
+#include <linux/sched.h>
+#include <linux/skbuff.h>
+#include <linux/string.h>
+#include <linux/types.h>
+
+#include <net/dst.h>
+
+/* Locking strategy:
+ * 1) Garbage collection state of dead destination cache
+ *    entries is protected by dst_lock.
+ * 2) GC is run only from BH context, and is the only remover
+ *    of entries.
+ * 3) Entries are added to the garbage list from both BH
+ *    and non-BH context, so local BH disabling is needed.
+ * 4) All operations modify state, so a spinlock is used.
+ */
+static struct dst_entry 	*dst_garbage_list;
+#if RT_CACHE_DEBUG >= 2 
+static atomic_t			 dst_total = ATOMIC_INIT(0);
+#endif
+static DEFINE_SPINLOCK(dst_lock);
+
+static unsigned long dst_gc_timer_expires;
+static unsigned long dst_gc_timer_inc = DST_GC_MAX;
+static void dst_run_gc(unsigned long);
+static void ___dst_free(struct dst_entry * dst);
+
+static struct timer_list dst_gc_timer =
+	TIMER_INITIALIZER(dst_run_gc, DST_GC_MIN, 0);
+
+static void dst_run_gc(unsigned long dummy)
+{
+	int    delayed = 0;
+	struct dst_entry * dst, **dstp;
+
+	if (!spin_trylock(&dst_lock)) {
+		mod_timer(&dst_gc_timer, jiffies + HZ/10);
+		return;
+	}
+
+
+	del_timer(&dst_gc_timer);
+	dstp = &dst_garbage_list;
+	while ((dst = *dstp) != NULL) {
+		if (atomic_read(&dst->__refcnt)) {
+			dstp = &dst->next;
+			delayed++;
+			continue;
+		}
+		*dstp = dst->next;
+
+		dst = dst_destroy(dst);
+		if (dst) {
+			/* NOHASH and still referenced. Unless it is already
+			 * on gc list, invalidate it and add to gc list.
+			 *
+			 * Note: this is temporary. Actually, NOHASH dst's
+			 * must be obsoleted when parent is obsoleted.
+			 * But we do not have state "obsoleted, but
+			 * referenced by parent", so it is right.
+			 */
+			if (dst->obsolete > 1)
+				continue;
+
+			___dst_free(dst);
+			dst->next = *dstp;
+			*dstp = dst;
+			dstp = &dst->next;
+		}
+	}
+	if (!dst_garbage_list) {
+		dst_gc_timer_inc = DST_GC_MAX;
+		goto out;
+	}
+	if ((dst_gc_timer_expires += dst_gc_timer_inc) > DST_GC_MAX)
+		dst_gc_timer_expires = DST_GC_MAX;
+	dst_gc_timer_inc += DST_GC_INC;
+	dst_gc_timer.expires = jiffies + dst_gc_timer_expires;
+#if RT_CACHE_DEBUG >= 2
+	printk("dst_total: %d/%d %ld\n",
+	       atomic_read(&dst_total), delayed,  dst_gc_timer_expires);
+#endif
+	add_timer(&dst_gc_timer);
+
+out:
+	spin_unlock(&dst_lock);
+}
+
+static int dst_discard_in(struct sk_buff *skb)
+{
+	kfree_skb(skb);
+	return 0;
+}
+
+static int dst_discard_out(struct sk_buff *skb)
+{
+	kfree_skb(skb);
+	return 0;
+}
+
+void * dst_alloc(struct dst_ops * ops)
+{
+	struct dst_entry * dst;
+
+	if (ops->gc && atomic_read(&ops->entries) > ops->gc_thresh) {
+		if (ops->gc())
+			return NULL;
+	}
+	dst = kmem_cache_alloc(ops->kmem_cachep, SLAB_ATOMIC);
+	if (!dst)
+		return NULL;
+	memset(dst, 0, ops->entry_size);
+	atomic_set(&dst->__refcnt, 0);
+	dst->ops = ops;
+	dst->lastuse = jiffies;
+	dst->path = dst;
+	dst->input = dst_discard_in;
+	dst->output = dst_discard_out;
+#if RT_CACHE_DEBUG >= 2 
+	atomic_inc(&dst_total);
+#endif
+	atomic_inc(&ops->entries);
+	return dst;
+}
+
+static void ___dst_free(struct dst_entry * dst)
+{
+	/* The first case (dev==NULL) is required, when
+	   protocol module is unloaded.
+	 */
+	if (dst->dev == NULL || !(dst->dev->flags&IFF_UP)) {
+		dst->input = dst_discard_in;
+		dst->output = dst_discard_out;
+	}
+	dst->obsolete = 2;
+}
+
+void __dst_free(struct dst_entry * dst)
+{
+	spin_lock_bh(&dst_lock);
+	___dst_free(dst);
+	dst->next = dst_garbage_list;
+	dst_garbage_list = dst;
+	if (dst_gc_timer_inc > DST_GC_INC) {
+		dst_gc_timer_inc = DST_GC_INC;
+		dst_gc_timer_expires = DST_GC_MIN;
+		mod_timer(&dst_gc_timer, jiffies + dst_gc_timer_expires);
+	}
+	spin_unlock_bh(&dst_lock);
+}
+
+struct dst_entry *dst_destroy(struct dst_entry * dst)
+{
+	struct dst_entry *child;
+	struct neighbour *neigh;
+	struct hh_cache *hh;
+
+	smp_rmb();
+
+again:
+	neigh = dst->neighbour;
+	hh = dst->hh;
+	child = dst->child;
+
+	dst->hh = NULL;
+	if (hh && atomic_dec_and_test(&hh->hh_refcnt))
+		kfree(hh);
+
+	if (neigh) {
+		dst->neighbour = NULL;
+		neigh_release(neigh);
+	}
+
+	atomic_dec(&dst->ops->entries);
+
+	if (dst->ops->destroy)
+		dst->ops->destroy(dst);
+	if (dst->dev)
+		dev_put(dst->dev);
+#if RT_CACHE_DEBUG >= 2 
+	atomic_dec(&dst_total);
+#endif
+	kmem_cache_free(dst->ops->kmem_cachep, dst);
+
+	dst = child;
+	if (dst) {
+		if (atomic_dec_and_test(&dst->__refcnt)) {
+			/* We were real parent of this dst, so kill child. */
+			if (dst->flags&DST_NOHASH)
+				goto again;
+		} else {
+			/* Child is still referenced, return it for freeing. */
+			if (dst->flags&DST_NOHASH)
+				return dst;
+			/* Child is still in his hash table */
+		}
+	}
+	return NULL;
+}
+
+/* Dirty hack. We did it in 2.2 (in __dst_free),
+ * we have _very_ good reasons not to repeat
+ * this mistake in 2.3, but we have no choice
+ * now. _It_ _is_ _explicit_ _deliberate_
+ * _race_ _condition_.
+ *
+ * Commented and originally written by Alexey.
+ */
+static inline void dst_ifdown(struct dst_entry *dst, struct net_device *dev,
+			      int unregister)
+{
+	if (dst->ops->ifdown)
+		dst->ops->ifdown(dst, dev, unregister);
+
+	if (dev != dst->dev)
+		return;
+
+	if (!unregister) {
+		dst->input = dst_discard_in;
+		dst->output = dst_discard_out;
+	} else {
+		dst->dev = &loopback_dev;
+		dev_hold(&loopback_dev);
+		dev_put(dev);
+		if (dst->neighbour && dst->neighbour->dev == dev) {
+			dst->neighbour->dev = &loopback_dev;
+			dev_put(dev);
+			dev_hold(&loopback_dev);
+		}
+	}
+}
+
+static int dst_dev_event(struct notifier_block *this, unsigned long event, void *ptr)
+{
+	struct net_device *dev = ptr;
+	struct dst_entry *dst;
+
+	switch (event) {
+	case NETDEV_UNREGISTER:
+	case NETDEV_DOWN:
+		spin_lock_bh(&dst_lock);
+		for (dst = dst_garbage_list; dst; dst = dst->next) {
+			dst_ifdown(dst, dev, event != NETDEV_DOWN);
+		}
+		spin_unlock_bh(&dst_lock);
+		break;
+	}
+	return NOTIFY_DONE;
+}
+
+static struct notifier_block dst_dev_notifier = {
+	.notifier_call	= dst_dev_event,
+};
+
+void __init dst_init(void)
+{
+	register_netdevice_notifier(&dst_dev_notifier);
+}
+
+EXPORT_SYMBOL(__dst_free);
+EXPORT_SYMBOL(dst_alloc);
+EXPORT_SYMBOL(dst_destroy);
diff --git a/net/core/dv.c b/net/core/dv.c
new file mode 100644
index 00000000000..3f25f4aa4e6
--- /dev/null
+++ b/net/core/dv.c
@@ -0,0 +1,548 @@
+/*
+ * INET		An implementation of the TCP/IP protocol suite for the LINUX
+ *		operating system.  INET is implemented using the  BSD Socket
+ *		interface as the means of communication with the user level.
+ *
+ *		Generic frame diversion
+ *
+ * Authors:	
+ * 		Benoit LOCHER:	initial integration within the kernel with support for ethernet
+ * 		Dave Miller:	improvement on the code (correctness, performance and source files)
+ *
+ */
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/string.h>
+#include <linux/mm.h>
+#include <linux/socket.h>
+#include <linux/in.h>
+#include <linux/inet.h>
+#include <linux/ip.h>
+#include <linux/udp.h>
+#include <linux/netdevice.h>
+#include <linux/etherdevice.h>
+#include <linux/skbuff.h>
+#include <linux/errno.h>
+#include <linux/init.h>
+#include <net/dst.h>
+#include <net/arp.h>
+#include <net/sock.h>
+#include <net/ipv6.h>
+#include <net/ip.h>
+#include <asm/uaccess.h>
+#include <asm/system.h>
+#include <asm/checksum.h>
+#include <linux/divert.h>
+#include <linux/sockios.h>
+
+const char sysctl_divert_version[32]="0.46";	/* Current version */
+
+static int __init dv_init(void)
+{
+	return 0;
+}
+module_init(dv_init);
+
+/*
+ * Allocate a divert_blk for a device. This must be an ethernet nic.
+ */
+int alloc_divert_blk(struct net_device *dev)
+{
+	int alloc_size = (sizeof(struct divert_blk) + 3) & ~3;
+
+	dev->divert = NULL;
+	if (dev->type == ARPHRD_ETHER) {
+		dev->divert = (struct divert_blk *)
+			kmalloc(alloc_size, GFP_KERNEL);
+		if (dev->divert == NULL) {
+			printk(KERN_INFO "divert: unable to allocate divert_blk for %s\n",
+			       dev->name);
+			return -ENOMEM;
+		}
+
+		memset(dev->divert, 0, sizeof(struct divert_blk));
+		dev_hold(dev);
+	}
+
+	return 0;
+} 
+
+/*
+ * Free a divert_blk allocated by the above function, if it was 
+ * allocated on that device.
+ */
+void free_divert_blk(struct net_device *dev)
+{
+	if (dev->divert) {
+		kfree(dev->divert);
+		dev->divert=NULL;
+		dev_put(dev);
+	}
+}
+
+/*
+ * Adds a tcp/udp (source or dest) port to an array
+ */
+static int add_port(u16 ports[], u16 port)
+{
+	int i;
+
+	if (port == 0)
+		return -EINVAL;
+
+	/* Storing directly in network format for performance,
+	 * thanks Dave :)
+	 */
+	port = htons(port);
+
+	for (i = 0; i < MAX_DIVERT_PORTS; i++) {
+		if (ports[i] == port)
+			return -EALREADY;
+	}
+	
+	for (i = 0; i < MAX_DIVERT_PORTS; i++) {
+		if (ports[i] == 0) {
+			ports[i] = port;
+			return 0;
+		}
+	}
+
+	return -ENOBUFS;
+}
+
+/*
+ * Removes a port from an array tcp/udp (source or dest)
+ */
+static int remove_port(u16 ports[], u16 port)
+{
+	int i;
+
+	if (port == 0)
+		return -EINVAL;
+	
+	/* Storing directly in network format for performance,
+	 * thanks Dave !
+	 */
+	port = htons(port);
+
+	for (i = 0; i < MAX_DIVERT_PORTS; i++) {
+		if (ports[i] == port) {
+			ports[i] = 0;
+			return 0;
+		}
+	}
+
+	return -EINVAL;
+}
+
+/* Some basic sanity checks on the arguments passed to divert_ioctl() */
+static int check_args(struct divert_cf *div_cf, struct net_device **dev)
+{
+	char devname[32];
+	int ret;
+
+	if (dev == NULL)
+		return -EFAULT;
+	
+	/* GETVERSION: all other args are unused */
+	if (div_cf->cmd == DIVCMD_GETVERSION)
+		return 0;
+	
+	/* Network device index should reasonably be between 0 and 1000 :) */
+	if (div_cf->dev_index < 0 || div_cf->dev_index > 1000) 
+		return -EINVAL;
+			
+	/* Let's try to find the ifname */
+	sprintf(devname, "eth%d", div_cf->dev_index);
+	*dev = dev_get_by_name(devname);
+	
+	/* dev should NOT be null */
+	if (*dev == NULL)
+		return -EINVAL;
+
+	ret = 0;
+
+	/* user issuing the ioctl must be a super one :) */
+	if (!capable(CAP_SYS_ADMIN)) {
+		ret = -EPERM;
+		goto out;
+	}
+
+	/* Device must have a divert_blk member NOT null */
+	if ((*dev)->divert == NULL)
+		ret = -EINVAL;
+out:
+	dev_put(*dev);
+	return ret;
+}
+
+/*
+ * control function of the diverter
+ */
+#if 0
+#define	DVDBG(a)	\
+	printk(KERN_DEBUG "divert_ioctl() line %d %s\n", __LINE__, (a))
+#else
+#define	DVDBG(a)
+#endif
+
+int divert_ioctl(unsigned int cmd, struct divert_cf __user *arg)
+{
+	struct divert_cf	div_cf;
+	struct divert_blk	*div_blk;
+	struct net_device	*dev;
+	int			ret;
+
+	switch (cmd) {
+	case SIOCGIFDIVERT:
+		DVDBG("SIOCGIFDIVERT, copy_from_user");
+		if (copy_from_user(&div_cf, arg, sizeof(struct divert_cf)))
+			return -EFAULT;
+		DVDBG("before check_args");
+		ret = check_args(&div_cf, &dev);
+		if (ret)
+			return ret;
+		DVDBG("after checkargs");
+		div_blk = dev->divert;
+			
+		DVDBG("befre switch()");
+		switch (div_cf.cmd) {
+		case DIVCMD_GETSTATUS:
+			/* Now, just give the user the raw divert block
+			 * for him to play with :)
+			 */
+			if (copy_to_user(div_cf.arg1.ptr, dev->divert,
+					 sizeof(struct divert_blk)))
+				return -EFAULT;
+			break;
+
+		case DIVCMD_GETVERSION:
+			DVDBG("GETVERSION: checking ptr");
+			if (div_cf.arg1.ptr == NULL)
+				return -EINVAL;
+			DVDBG("GETVERSION: copying data to userland");
+			if (copy_to_user(div_cf.arg1.ptr,
+					 sysctl_divert_version, 32))
+				return -EFAULT;
+			DVDBG("GETVERSION: data copied");
+			break;
+
+		default:
+			return -EINVAL;
+		}
+
+		break;
+
+	case SIOCSIFDIVERT:
+		if (copy_from_user(&div_cf, arg, sizeof(struct divert_cf)))
+			return -EFAULT;
+
+		ret = check_args(&div_cf, &dev);
+		if (ret)
+			return ret;
+
+		div_blk = dev->divert;
+
+		switch(div_cf.cmd) {
+		case DIVCMD_RESET:
+			div_blk->divert = 0;
+			div_blk->protos = DIVERT_PROTO_NONE;
+			memset(div_blk->tcp_dst, 0,
+			       MAX_DIVERT_PORTS * sizeof(u16));
+			memset(div_blk->tcp_src, 0,
+			       MAX_DIVERT_PORTS * sizeof(u16));
+			memset(div_blk->udp_dst, 0,
+			       MAX_DIVERT_PORTS * sizeof(u16));
+			memset(div_blk->udp_src, 0,
+			       MAX_DIVERT_PORTS * sizeof(u16));
+			return 0;
+				
+		case DIVCMD_DIVERT:
+			switch(div_cf.arg1.int32) {
+			case DIVARG1_ENABLE:
+				if (div_blk->divert)
+					return -EALREADY;
+				div_blk->divert = 1;
+				break;
+
+			case DIVARG1_DISABLE:
+				if (!div_blk->divert)
+					return -EALREADY;
+				div_blk->divert = 0;
+				break;
+
+			default:
+				return -EINVAL;
+			}
+
+			break;
+
+		case DIVCMD_IP:
+			switch(div_cf.arg1.int32) {
+			case DIVARG1_ENABLE:
+				if (div_blk->protos & DIVERT_PROTO_IP)
+					return -EALREADY;
+				div_blk->protos |= DIVERT_PROTO_IP;
+				break;
+
+			case DIVARG1_DISABLE:
+				if (!(div_blk->protos & DIVERT_PROTO_IP))
+					return -EALREADY;
+				div_blk->protos &= ~DIVERT_PROTO_IP;
+				break;
+
+			default:
+				return -EINVAL;
+			}
+
+			break;
+
+		case DIVCMD_TCP:
+			switch(div_cf.arg1.int32) {
+			case DIVARG1_ENABLE:
+				if (div_blk->protos & DIVERT_PROTO_TCP)
+					return -EALREADY;
+				div_blk->protos |= DIVERT_PROTO_TCP;
+				break;
+
+			case DIVARG1_DISABLE:
+				if (!(div_blk->protos & DIVERT_PROTO_TCP))
+					return -EALREADY;
+				div_blk->protos &= ~DIVERT_PROTO_TCP;
+				break;
+
+			default:
+				return -EINVAL;
+			}
+
+			break;
+
+		case DIVCMD_TCPDST:
+			switch(div_cf.arg1.int32) {
+			case DIVARG1_ADD:
+				return add_port(div_blk->tcp_dst,
+						div_cf.arg2.uint16);
+				
+			case DIVARG1_REMOVE:
+				return remove_port(div_blk->tcp_dst,
+						   div_cf.arg2.uint16);
+
+			default:
+				return -EINVAL;
+			}
+
+			break;
+
+		case DIVCMD_TCPSRC:
+			switch(div_cf.arg1.int32) {
+			case DIVARG1_ADD:
+				return add_port(div_blk->tcp_src,
+						div_cf.arg2.uint16);
+
+			case DIVARG1_REMOVE:
+				return remove_port(div_blk->tcp_src,
+						   div_cf.arg2.uint16);
+
+			default:
+				return -EINVAL;
+			}
+
+			break;
+
+		case DIVCMD_UDP:
+			switch(div_cf.arg1.int32) {
+			case DIVARG1_ENABLE:
+				if (div_blk->protos & DIVERT_PROTO_UDP)
+					return -EALREADY;
+				div_blk->protos |= DIVERT_PROTO_UDP;
+				break;
+
+			case DIVARG1_DISABLE:
+				if (!(div_blk->protos & DIVERT_PROTO_UDP))
+					return -EALREADY;
+				div_blk->protos &= ~DIVERT_PROTO_UDP;
+				break;
+
+			default:
+				return -EINVAL;
+			}
+
+			break;
+
+		case DIVCMD_UDPDST:
+			switch(div_cf.arg1.int32) {
+			case DIVARG1_ADD:
+				return add_port(div_blk->udp_dst,
+						div_cf.arg2.uint16);
+
+			case DIVARG1_REMOVE:
+				return remove_port(div_blk->udp_dst,
+						   div_cf.arg2.uint16);
+
+			default:
+				return -EINVAL;
+			}
+
+			break;
+
+		case DIVCMD_UDPSRC:
+			switch(div_cf.arg1.int32) {
+			case DIVARG1_ADD:
+				return add_port(div_blk->udp_src,
+						div_cf.arg2.uint16);
+
+			case DIVARG1_REMOVE:
+				return remove_port(div_blk->udp_src,
+						   div_cf.arg2.uint16);
+
+			default:
+				return -EINVAL;
+			}
+
+			break;
+
+		case DIVCMD_ICMP:
+			switch(div_cf.arg1.int32) {
+			case DIVARG1_ENABLE:
+				if (div_blk->protos & DIVERT_PROTO_ICMP)
+					return -EALREADY;
+				div_blk->protos |= DIVERT_PROTO_ICMP;
+				break;
+
+			case DIVARG1_DISABLE:
+				if (!(div_blk->protos & DIVERT_PROTO_ICMP))
+					return -EALREADY;
+				div_blk->protos &= ~DIVERT_PROTO_ICMP;
+				break;
+
+			default:
+				return -EINVAL;
+			}
+
+			break;
+
+		default:
+			return -EINVAL;
+		}
+
+		break;
+
+	default:
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+
+/*
+ * Check if packet should have its dest mac address set to the box itself
+ * for diversion
+ */
+
+#define	ETH_DIVERT_FRAME(skb) \
+	memcpy(eth_hdr(skb), skb->dev->dev_addr, ETH_ALEN); \
+	skb->pkt_type=PACKET_HOST
+		
+void divert_frame(struct sk_buff *skb)
+{
+	struct ethhdr			*eth = eth_hdr(skb);
+	struct iphdr			*iph;
+	struct tcphdr			*tcph;
+	struct udphdr			*udph;
+	struct divert_blk		*divert = skb->dev->divert;
+	int				i, src, dst;
+	unsigned char			*skb_data_end = skb->data + skb->len;
+
+	/* Packet is already aimed at us, return */
+	if (!memcmp(eth, skb->dev->dev_addr, ETH_ALEN))
+		return;
+	
+	/* proto is not IP, do nothing */
+	if (eth->h_proto != htons(ETH_P_IP))
+		return;
+	
+	/* Divert all IP frames ? */
+	if (divert->protos & DIVERT_PROTO_IP) {
+		ETH_DIVERT_FRAME(skb);
+		return;
+	}
+	
+	/* Check for possible (maliciously) malformed IP frame (thanks Dave) */
+	iph = (struct iphdr *) skb->data;
+	if (((iph->ihl<<2)+(unsigned char*)(iph)) >= skb_data_end) {
+		printk(KERN_INFO "divert: malformed IP packet !\n");
+		return;
+	}
+
+	switch (iph->protocol) {
+	/* Divert all ICMP frames ? */
+	case IPPROTO_ICMP:
+		if (divert->protos & DIVERT_PROTO_ICMP) {
+			ETH_DIVERT_FRAME(skb);
+			return;
+		}
+		break;
+
+	/* Divert all TCP frames ? */
+	case IPPROTO_TCP:
+		if (divert->protos & DIVERT_PROTO_TCP) {
+			ETH_DIVERT_FRAME(skb);
+			return;
+		}
+
+		/* Check for possible (maliciously) malformed IP
+		 * frame (thanx Dave)
+		 */
+		tcph = (struct tcphdr *)
+			(((unsigned char *)iph) + (iph->ihl<<2));
+		if (((unsigned char *)(tcph+1)) >= skb_data_end) {
+			printk(KERN_INFO "divert: malformed TCP packet !\n");
+			return;
+		}
+
+		/* Divert some tcp dst/src ports only ?*/
+		for (i = 0; i < MAX_DIVERT_PORTS; i++) {
+			dst = divert->tcp_dst[i];
+			src = divert->tcp_src[i];
+			if ((dst && dst == tcph->dest) ||
+			    (src && src == tcph->source)) {
+				ETH_DIVERT_FRAME(skb);
+				return;
+			}
+		}
+		break;
+
+	/* Divert all UDP frames ? */
+	case IPPROTO_UDP:
+		if (divert->protos & DIVERT_PROTO_UDP) {
+			ETH_DIVERT_FRAME(skb);
+			return;
+		}
+
+		/* Check for possible (maliciously) malformed IP
+		 * packet (thanks Dave)
+		 */
+		udph = (struct udphdr *)
+			(((unsigned char *)iph) + (iph->ihl<<2));
+		if (((unsigned char *)(udph+1)) >= skb_data_end) {
+			printk(KERN_INFO
+			       "divert: malformed UDP packet !\n");
+			return;
+		}
+
+		/* Divert some udp dst/src ports only ? */
+		for (i = 0; i < MAX_DIVERT_PORTS; i++) {
+			dst = divert->udp_dst[i];
+			src = divert->udp_src[i];
+			if ((dst && dst == udph->dest) ||
+			    (src && src == udph->source)) {
+				ETH_DIVERT_FRAME(skb);
+				return;
+			}
+		}
+		break;
+	}
+}
diff --git a/net/core/ethtool.c b/net/core/ethtool.c
new file mode 100644
index 00000000000..f05fde97c43
--- /dev/null
+++ b/net/core/ethtool.c
@@ -0,0 +1,819 @@
+/*
+ * net/core/ethtool.c - Ethtool ioctl handler
+ * Copyright (c) 2003 Matthew Wilcox <matthew@wil.cx>
+ *
+ * This file is where we call all the ethtool_ops commands to get
+ * the information ethtool needs.  We fall back to calling do_ioctl()
+ * for drivers which haven't been converted to ethtool_ops yet.
+ *
+ * It's GPL, stupid.
+ */
+
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/errno.h>
+#include <linux/ethtool.h>
+#include <linux/netdevice.h>
+#include <asm/uaccess.h>
+
+/* 
+ * Some useful ethtool_ops methods that're device independent.
+ * If we find that all drivers want to do the same thing here,
+ * we can turn these into dev_() function calls.
+ */
+
+u32 ethtool_op_get_link(struct net_device *dev)
+{
+	return netif_carrier_ok(dev) ? 1 : 0;
+}
+
+u32 ethtool_op_get_tx_csum(struct net_device *dev)
+{
+	return (dev->features & NETIF_F_IP_CSUM) != 0;
+}
+
+int ethtool_op_set_tx_csum(struct net_device *dev, u32 data)
+{
+	if (data)
+		dev->features |= NETIF_F_IP_CSUM;
+	else
+		dev->features &= ~NETIF_F_IP_CSUM;
+
+	return 0;
+}
+
+u32 ethtool_op_get_sg(struct net_device *dev)
+{
+	return (dev->features & NETIF_F_SG) != 0;
+}
+
+int ethtool_op_set_sg(struct net_device *dev, u32 data)
+{
+	if (data)
+		dev->features |= NETIF_F_SG;
+	else
+		dev->features &= ~NETIF_F_SG;
+
+	return 0;
+}
+
+u32 ethtool_op_get_tso(struct net_device *dev)
+{
+	return (dev->features & NETIF_F_TSO) != 0;
+}
+
+int ethtool_op_set_tso(struct net_device *dev, u32 data)
+{
+	if (data)
+		dev->features |= NETIF_F_TSO;
+	else
+		dev->features &= ~NETIF_F_TSO;
+
+	return 0;
+}
+
+/* Handlers for each ethtool command */
+
+static int ethtool_get_settings(struct net_device *dev, void __user *useraddr)
+{
+	struct ethtool_cmd cmd = { ETHTOOL_GSET };
+	int err;
+
+	if (!dev->ethtool_ops->get_settings)
+		return -EOPNOTSUPP;
+
+	err = dev->ethtool_ops->get_settings(dev, &cmd);
+	if (err < 0)
+		return err;
+
+	if (copy_to_user(useraddr, &cmd, sizeof(cmd)))
+		return -EFAULT;
+	return 0;
+}
+
+static int ethtool_set_settings(struct net_device *dev, void __user *useraddr)
+{
+	struct ethtool_cmd cmd;
+
+	if (!dev->ethtool_ops->set_settings)
+		return -EOPNOTSUPP;
+
+	if (copy_from_user(&cmd, useraddr, sizeof(cmd)))
+		return -EFAULT;
+
+	return dev->ethtool_ops->set_settings(dev, &cmd);
+}
+
+static int ethtool_get_drvinfo(struct net_device *dev, void __user *useraddr)
+{
+	struct ethtool_drvinfo info;
+	struct ethtool_ops *ops = dev->ethtool_ops;
+
+	if (!ops->get_drvinfo)
+		return -EOPNOTSUPP;
+
+	memset(&info, 0, sizeof(info));
+	info.cmd = ETHTOOL_GDRVINFO;
+	ops->get_drvinfo(dev, &info);
+
+	if (ops->self_test_count)
+		info.testinfo_len = ops->self_test_count(dev);
+	if (ops->get_stats_count)
+		info.n_stats = ops->get_stats_count(dev);
+	if (ops->get_regs_len)
+		info.regdump_len = ops->get_regs_len(dev);
+	if (ops->get_eeprom_len)
+		info.eedump_len = ops->get_eeprom_len(dev);
+
+	if (copy_to_user(useraddr, &info, sizeof(info)))
+		return -EFAULT;
+	return 0;
+}
+
+static int ethtool_get_regs(struct net_device *dev, char __user *useraddr)
+{
+	struct ethtool_regs regs;
+	struct ethtool_ops *ops = dev->ethtool_ops;
+	void *regbuf;
+	int reglen, ret;
+
+	if (!ops->get_regs || !ops->get_regs_len)
+		return -EOPNOTSUPP;
+
+	if (copy_from_user(&regs, useraddr, sizeof(regs)))
+		return -EFAULT;
+
+	reglen = ops->get_regs_len(dev);
+	if (regs.len > reglen)
+		regs.len = reglen;
+
+	regbuf = kmalloc(reglen, GFP_USER);
+	if (!regbuf)
+		return -ENOMEM;
+
+	ops->get_regs(dev, &regs, regbuf);
+
+	ret = -EFAULT;
+	if (copy_to_user(useraddr, &regs, sizeof(regs)))
+		goto out;
+	useraddr += offsetof(struct ethtool_regs, data);
+	if (copy_to_user(useraddr, regbuf, regs.len))
+		goto out;
+	ret = 0;
+
+ out:
+	kfree(regbuf);
+	return ret;
+}
+
+static int ethtool_get_wol(struct net_device *dev, char __user *useraddr)
+{
+	struct ethtool_wolinfo wol = { ETHTOOL_GWOL };
+
+	if (!dev->ethtool_ops->get_wol)
+		return -EOPNOTSUPP;
+
+	dev->ethtool_ops->get_wol(dev, &wol);
+
+	if (copy_to_user(useraddr, &wol, sizeof(wol)))
+		return -EFAULT;
+	return 0;
+}
+
+static int ethtool_set_wol(struct net_device *dev, char __user *useraddr)
+{
+	struct ethtool_wolinfo wol;
+
+	if (!dev->ethtool_ops->set_wol)
+		return -EOPNOTSUPP;
+
+	if (copy_from_user(&wol, useraddr, sizeof(wol)))
+		return -EFAULT;
+
+	return dev->ethtool_ops->set_wol(dev, &wol);
+}
+
+static int ethtool_get_msglevel(struct net_device *dev, char __user *useraddr)
+{
+	struct ethtool_value edata = { ETHTOOL_GMSGLVL };
+
+	if (!dev->ethtool_ops->get_msglevel)
+		return -EOPNOTSUPP;
+
+	edata.data = dev->ethtool_ops->get_msglevel(dev);
+
+	if (copy_to_user(useraddr, &edata, sizeof(edata)))
+		return -EFAULT;
+	return 0;
+}
+
+static int ethtool_set_msglevel(struct net_device *dev, char __user *useraddr)
+{
+	struct ethtool_value edata;
+
+	if (!dev->ethtool_ops->set_msglevel)
+		return -EOPNOTSUPP;
+
+	if (copy_from_user(&edata, useraddr, sizeof(edata)))
+		return -EFAULT;
+
+	dev->ethtool_ops->set_msglevel(dev, edata.data);
+	return 0;
+}
+
+static int ethtool_nway_reset(struct net_device *dev)
+{
+	if (!dev->ethtool_ops->nway_reset)
+		return -EOPNOTSUPP;
+
+	return dev->ethtool_ops->nway_reset(dev);
+}
+
+static int ethtool_get_link(struct net_device *dev, void __user *useraddr)
+{
+	struct ethtool_value edata = { ETHTOOL_GLINK };
+
+	if (!dev->ethtool_ops->get_link)
+		return -EOPNOTSUPP;
+
+	edata.data = dev->ethtool_ops->get_link(dev);
+
+	if (copy_to_user(useraddr, &edata, sizeof(edata)))
+		return -EFAULT;
+	return 0;
+}
+
+static int ethtool_get_eeprom(struct net_device *dev, void __user *useraddr)
+{
+	struct ethtool_eeprom eeprom;
+	struct ethtool_ops *ops = dev->ethtool_ops;
+	u8 *data;
+	int ret;
+
+	if (!ops->get_eeprom || !ops->get_eeprom_len)
+		return -EOPNOTSUPP;
+
+	if (copy_from_user(&eeprom, useraddr, sizeof(eeprom)))
+		return -EFAULT;
+
+	/* Check for wrap and zero */
+	if (eeprom.offset + eeprom.len <= eeprom.offset)
+		return -EINVAL;
+
+	/* Check for exceeding total eeprom len */
+	if (eeprom.offset + eeprom.len > ops->get_eeprom_len(dev))
+		return -EINVAL;
+
+	data = kmalloc(eeprom.len, GFP_USER);
+	if (!data)
+		return -ENOMEM;
+
+	ret = -EFAULT;
+	if (copy_from_user(data, useraddr + sizeof(eeprom), eeprom.len))
+		goto out;
+
+	ret = ops->get_eeprom(dev, &eeprom, data);
+	if (ret)
+		goto out;
+
+	ret = -EFAULT;
+	if (copy_to_user(useraddr, &eeprom, sizeof(eeprom)))
+		goto out;
+	if (copy_to_user(useraddr + sizeof(eeprom), data, eeprom.len))
+		goto out;
+	ret = 0;
+
+ out:
+	kfree(data);
+	return ret;
+}
+
+static int ethtool_set_eeprom(struct net_device *dev, void __user *useraddr)
+{
+	struct ethtool_eeprom eeprom;
+	struct ethtool_ops *ops = dev->ethtool_ops;
+	u8 *data;
+	int ret;
+
+	if (!ops->set_eeprom || !ops->get_eeprom_len)
+		return -EOPNOTSUPP;
+
+	if (copy_from_user(&eeprom, useraddr, sizeof(eeprom)))
+		return -EFAULT;
+
+	/* Check for wrap and zero */
+	if (eeprom.offset + eeprom.len <= eeprom.offset)
+		return -EINVAL;
+
+	/* Check for exceeding total eeprom len */
+	if (eeprom.offset + eeprom.len > ops->get_eeprom_len(dev))
+		return -EINVAL;
+
+	data = kmalloc(eeprom.len, GFP_USER);
+	if (!data)
+		return -ENOMEM;
+
+	ret = -EFAULT;
+	if (copy_from_user(data, useraddr + sizeof(eeprom), eeprom.len))
+		goto out;
+
+	ret = ops->set_eeprom(dev, &eeprom, data);
+	if (ret)
+		goto out;
+
+	if (copy_to_user(useraddr + sizeof(eeprom), data, eeprom.len))
+		ret = -EFAULT;
+
+ out:
+	kfree(data);
+	return ret;
+}
+
+static int ethtool_get_coalesce(struct net_device *dev, void __user *useraddr)
+{
+	struct ethtool_coalesce coalesce = { ETHTOOL_GCOALESCE };
+
+	if (!dev->ethtool_ops->get_coalesce)
+		return -EOPNOTSUPP;
+
+	dev->ethtool_ops->get_coalesce(dev, &coalesce);
+
+	if (copy_to_user(useraddr, &coalesce, sizeof(coalesce)))
+		return -EFAULT;
+	return 0;
+}
+
+static int ethtool_set_coalesce(struct net_device *dev, void __user *useraddr)
+{
+	struct ethtool_coalesce coalesce;
+
+	if (!dev->ethtool_ops->get_coalesce)
+		return -EOPNOTSUPP;
+
+	if (copy_from_user(&coalesce, useraddr, sizeof(coalesce)))
+		return -EFAULT;
+
+	return dev->ethtool_ops->set_coalesce(dev, &coalesce);
+}
+
+static int ethtool_get_ringparam(struct net_device *dev, void __user *useraddr)
+{
+	struct ethtool_ringparam ringparam = { ETHTOOL_GRINGPARAM };
+
+	if (!dev->ethtool_ops->get_ringparam)
+		return -EOPNOTSUPP;
+
+	dev->ethtool_ops->get_ringparam(dev, &ringparam);
+
+	if (copy_to_user(useraddr, &ringparam, sizeof(ringparam)))
+		return -EFAULT;
+	return 0;
+}
+
+static int ethtool_set_ringparam(struct net_device *dev, void __user *useraddr)
+{
+	struct ethtool_ringparam ringparam;
+
+	if (!dev->ethtool_ops->set_ringparam)
+		return -EOPNOTSUPP;
+
+	if (copy_from_user(&ringparam, useraddr, sizeof(ringparam)))
+		return -EFAULT;
+
+	return dev->ethtool_ops->set_ringparam(dev, &ringparam);
+}
+
+static int ethtool_get_pauseparam(struct net_device *dev, void __user *useraddr)
+{
+	struct ethtool_pauseparam pauseparam = { ETHTOOL_GPAUSEPARAM };
+
+	if (!dev->ethtool_ops->get_pauseparam)
+		return -EOPNOTSUPP;
+
+	dev->ethtool_ops->get_pauseparam(dev, &pauseparam);
+
+	if (copy_to_user(useraddr, &pauseparam, sizeof(pauseparam)))
+		return -EFAULT;
+	return 0;
+}
+
+static int ethtool_set_pauseparam(struct net_device *dev, void __user *useraddr)
+{
+	struct ethtool_pauseparam pauseparam;
+
+	if (!dev->ethtool_ops->get_pauseparam)
+		return -EOPNOTSUPP;
+
+	if (copy_from_user(&pauseparam, useraddr, sizeof(pauseparam)))
+		return -EFAULT;
+
+	return dev->ethtool_ops->set_pauseparam(dev, &pauseparam);
+}
+
+static int ethtool_get_rx_csum(struct net_device *dev, char __user *useraddr)
+{
+	struct ethtool_value edata = { ETHTOOL_GRXCSUM };
+
+	if (!dev->ethtool_ops->get_rx_csum)
+		return -EOPNOTSUPP;
+
+	edata.data = dev->ethtool_ops->get_rx_csum(dev);
+
+	if (copy_to_user(useraddr, &edata, sizeof(edata)))
+		return -EFAULT;
+	return 0;
+}
+
+static int ethtool_set_rx_csum(struct net_device *dev, char __user *useraddr)
+{
+	struct ethtool_value edata;
+
+	if (!dev->ethtool_ops->set_rx_csum)
+		return -EOPNOTSUPP;
+
+	if (copy_from_user(&edata, useraddr, sizeof(edata)))
+		return -EFAULT;
+
+	dev->ethtool_ops->set_rx_csum(dev, edata.data);
+	return 0;
+}
+
+static int ethtool_get_tx_csum(struct net_device *dev, char __user *useraddr)
+{
+	struct ethtool_value edata = { ETHTOOL_GTXCSUM };
+
+	if (!dev->ethtool_ops->get_tx_csum)
+		return -EOPNOTSUPP;
+
+	edata.data = dev->ethtool_ops->get_tx_csum(dev);
+
+	if (copy_to_user(useraddr, &edata, sizeof(edata)))
+		return -EFAULT;
+	return 0;
+}
+
+static int __ethtool_set_sg(struct net_device *dev, u32 data)
+{
+	int err;
+
+	if (!data && dev->ethtool_ops->set_tso) {
+		err = dev->ethtool_ops->set_tso(dev, 0);
+		if (err)
+			return err;
+	}
+
+	return dev->ethtool_ops->set_sg(dev, data);
+}
+
+static int ethtool_set_tx_csum(struct net_device *dev, char __user *useraddr)
+{
+	struct ethtool_value edata;
+	int err;
+
+	if (!dev->ethtool_ops->set_tx_csum)
+		return -EOPNOTSUPP;
+
+	if (copy_from_user(&edata, useraddr, sizeof(edata)))
+		return -EFAULT;
+
+	if (!edata.data && dev->ethtool_ops->set_sg) {
+		err = __ethtool_set_sg(dev, 0);
+		if (err)
+			return err;
+	}
+
+	return dev->ethtool_ops->set_tx_csum(dev, edata.data);
+}
+
+static int ethtool_get_sg(struct net_device *dev, char __user *useraddr)
+{
+	struct ethtool_value edata = { ETHTOOL_GSG };
+
+	if (!dev->ethtool_ops->get_sg)
+		return -EOPNOTSUPP;
+
+	edata.data = dev->ethtool_ops->get_sg(dev);
+
+	if (copy_to_user(useraddr, &edata, sizeof(edata)))
+		return -EFAULT;
+	return 0;
+}
+
+static int ethtool_set_sg(struct net_device *dev, char __user *useraddr)
+{
+	struct ethtool_value edata;
+
+	if (!dev->ethtool_ops->set_sg)
+		return -EOPNOTSUPP;
+
+	if (copy_from_user(&edata, useraddr, sizeof(edata)))
+		return -EFAULT;
+
+	if (edata.data && 
+	    !(dev->features & (NETIF_F_IP_CSUM |
+			       NETIF_F_NO_CSUM |
+			       NETIF_F_HW_CSUM)))
+		return -EINVAL;
+
+	return __ethtool_set_sg(dev, edata.data);
+}
+
+static int ethtool_get_tso(struct net_device *dev, char __user *useraddr)
+{
+	struct ethtool_value edata = { ETHTOOL_GTSO };
+
+	if (!dev->ethtool_ops->get_tso)
+		return -EOPNOTSUPP;
+
+	edata.data = dev->ethtool_ops->get_tso(dev);
+
+	if (copy_to_user(useraddr, &edata, sizeof(edata)))
+		return -EFAULT;
+	return 0;
+}
+
+static int ethtool_set_tso(struct net_device *dev, char __user *useraddr)
+{
+	struct ethtool_value edata;
+
+	if (!dev->ethtool_ops->set_tso)
+		return -EOPNOTSUPP;
+
+	if (copy_from_user(&edata, useraddr, sizeof(edata)))
+		return -EFAULT;
+
+	if (edata.data && !(dev->features & NETIF_F_SG))
+		return -EINVAL;
+
+	return dev->ethtool_ops->set_tso(dev, edata.data);
+}
+
+static int ethtool_self_test(struct net_device *dev, char __user *useraddr)
+{
+	struct ethtool_test test;
+	struct ethtool_ops *ops = dev->ethtool_ops;
+	u64 *data;
+	int ret;
+
+	if (!ops->self_test || !ops->self_test_count)
+		return -EOPNOTSUPP;
+
+	if (copy_from_user(&test, useraddr, sizeof(test)))
+		return -EFAULT;
+
+	test.len = ops->self_test_count(dev);
+	data = kmalloc(test.len * sizeof(u64), GFP_USER);
+	if (!data)
+		return -ENOMEM;
+
+	ops->self_test(dev, &test, data);
+
+	ret = -EFAULT;
+	if (copy_to_user(useraddr, &test, sizeof(test)))
+		goto out;
+	useraddr += sizeof(test);
+	if (copy_to_user(useraddr, data, test.len * sizeof(u64)))
+		goto out;
+	ret = 0;
+
+ out:
+	kfree(data);
+	return ret;
+}
+
+static int ethtool_get_strings(struct net_device *dev, void __user *useraddr)
+{
+	struct ethtool_gstrings gstrings;
+	struct ethtool_ops *ops = dev->ethtool_ops;
+	u8 *data;
+	int ret;
+
+	if (!ops->get_strings)
+		return -EOPNOTSUPP;
+
+	if (copy_from_user(&gstrings, useraddr, sizeof(gstrings)))
+		return -EFAULT;
+
+	switch (gstrings.string_set) {
+	case ETH_SS_TEST:
+		if (!ops->self_test_count)
+			return -EOPNOTSUPP;
+		gstrings.len = ops->self_test_count(dev);
+		break;
+	case ETH_SS_STATS:
+		if (!ops->get_stats_count)
+			return -EOPNOTSUPP;
+		gstrings.len = ops->get_stats_count(dev);
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	data = kmalloc(gstrings.len * ETH_GSTRING_LEN, GFP_USER);
+	if (!data)
+		return -ENOMEM;
+
+	ops->get_strings(dev, gstrings.string_set, data);
+
+	ret = -EFAULT;
+	if (copy_to_user(useraddr, &gstrings, sizeof(gstrings)))
+		goto out;
+	useraddr += sizeof(gstrings);
+	if (copy_to_user(useraddr, data, gstrings.len * ETH_GSTRING_LEN))
+		goto out;
+	ret = 0;
+
+ out:
+	kfree(data);
+	return ret;
+}
+
+static int ethtool_phys_id(struct net_device *dev, void __user *useraddr)
+{
+	struct ethtool_value id;
+
+	if (!dev->ethtool_ops->phys_id)
+		return -EOPNOTSUPP;
+
+	if (copy_from_user(&id, useraddr, sizeof(id)))
+		return -EFAULT;
+
+	return dev->ethtool_ops->phys_id(dev, id.data);
+}
+
+static int ethtool_get_stats(struct net_device *dev, void __user *useraddr)
+{
+	struct ethtool_stats stats;
+	struct ethtool_ops *ops = dev->ethtool_ops;
+	u64 *data;
+	int ret;
+
+	if (!ops->get_ethtool_stats || !ops->get_stats_count)
+		return -EOPNOTSUPP;
+
+	if (copy_from_user(&stats, useraddr, sizeof(stats)))
+		return -EFAULT;
+
+	stats.n_stats = ops->get_stats_count(dev);
+	data = kmalloc(stats.n_stats * sizeof(u64), GFP_USER);
+	if (!data)
+		return -ENOMEM;
+
+	ops->get_ethtool_stats(dev, &stats, data);
+
+	ret = -EFAULT;
+	if (copy_to_user(useraddr, &stats, sizeof(stats)))
+		goto out;
+	useraddr += sizeof(stats);
+	if (copy_to_user(useraddr, data, stats.n_stats * sizeof(u64)))
+		goto out;
+	ret = 0;
+
+ out:
+	kfree(data);
+	return ret;
+}
+
+/* The main entry point in this file.  Called from net/core/dev.c */
+
+int dev_ethtool(struct ifreq *ifr)
+{
+	struct net_device *dev = __dev_get_by_name(ifr->ifr_name);
+	void __user *useraddr = ifr->ifr_data;
+	u32 ethcmd;
+	int rc;
+
+	/*
+	 * XXX: This can be pushed down into the ethtool_* handlers that
+	 * need it.  Keep existing behaviour for the moment.
+	 */
+	if (!capable(CAP_NET_ADMIN))
+		return -EPERM;
+
+	if (!dev || !netif_device_present(dev))
+		return -ENODEV;
+
+	if (!dev->ethtool_ops)
+		goto ioctl;
+
+	if (copy_from_user(&ethcmd, useraddr, sizeof (ethcmd)))
+		return -EFAULT;
+
+	if(dev->ethtool_ops->begin)
+		if ((rc = dev->ethtool_ops->begin(dev)) < 0)
+			return rc;
+
+	switch (ethcmd) {
+	case ETHTOOL_GSET:
+		rc = ethtool_get_settings(dev, useraddr);
+		break;
+	case ETHTOOL_SSET:
+		rc = ethtool_set_settings(dev, useraddr);
+		break;
+	case ETHTOOL_GDRVINFO:
+		rc = ethtool_get_drvinfo(dev, useraddr);
+
+		break;
+	case ETHTOOL_GREGS:
+		rc = ethtool_get_regs(dev, useraddr);
+		break;
+	case ETHTOOL_GWOL:
+		rc = ethtool_get_wol(dev, useraddr);
+		break;
+	case ETHTOOL_SWOL:
+		rc = ethtool_set_wol(dev, useraddr);
+		break;
+	case ETHTOOL_GMSGLVL:
+		rc = ethtool_get_msglevel(dev, useraddr);
+		break;
+	case ETHTOOL_SMSGLVL:
+		rc = ethtool_set_msglevel(dev, useraddr);
+		break;
+	case ETHTOOL_NWAY_RST:
+		rc = ethtool_nway_reset(dev);
+		break;
+	case ETHTOOL_GLINK:
+		rc = ethtool_get_link(dev, useraddr);
+		break;
+	case ETHTOOL_GEEPROM:
+		rc = ethtool_get_eeprom(dev, useraddr);
+		break;
+	case ETHTOOL_SEEPROM:
+		rc = ethtool_set_eeprom(dev, useraddr);
+		break;
+	case ETHTOOL_GCOALESCE:
+		rc = ethtool_get_coalesce(dev, useraddr);
+		break;
+	case ETHTOOL_SCOALESCE:
+		rc = ethtool_set_coalesce(dev, useraddr);
+		break;
+	case ETHTOOL_GRINGPARAM:
+		rc = ethtool_get_ringparam(dev, useraddr);
+		break;
+	case ETHTOOL_SRINGPARAM:
+		rc = ethtool_set_ringparam(dev, useraddr);
+		break;
+	case ETHTOOL_GPAUSEPARAM:
+		rc = ethtool_get_pauseparam(dev, useraddr);
+		break;
+	case ETHTOOL_SPAUSEPARAM:
+		rc = ethtool_set_pauseparam(dev, useraddr);
+		break;
+	case ETHTOOL_GRXCSUM:
+		rc = ethtool_get_rx_csum(dev, useraddr);
+		break;
+	case ETHTOOL_SRXCSUM:
+		rc = ethtool_set_rx_csum(dev, useraddr);
+		break;
+	case ETHTOOL_GTXCSUM:
+		rc = ethtool_get_tx_csum(dev, useraddr);
+		break;
+	case ETHTOOL_STXCSUM:
+		rc = ethtool_set_tx_csum(dev, useraddr);
+		break;
+	case ETHTOOL_GSG:
+		rc = ethtool_get_sg(dev, useraddr);
+		break;
+	case ETHTOOL_SSG:
+		rc = ethtool_set_sg(dev, useraddr);
+		break;
+	case ETHTOOL_GTSO:
+		rc = ethtool_get_tso(dev, useraddr);
+		break;
+	case ETHTOOL_STSO:
+		rc = ethtool_set_tso(dev, useraddr);
+		break;
+	case ETHTOOL_TEST:
+		rc = ethtool_self_test(dev, useraddr);
+		break;
+	case ETHTOOL_GSTRINGS:
+		rc = ethtool_get_strings(dev, useraddr);
+		break;
+	case ETHTOOL_PHYS_ID:
+		rc = ethtool_phys_id(dev, useraddr);
+		break;
+	case ETHTOOL_GSTATS:
+		rc = ethtool_get_stats(dev, useraddr);
+		break;
+	default:
+		rc =  -EOPNOTSUPP;
+	}
+	
+	if(dev->ethtool_ops->complete)
+		dev->ethtool_ops->complete(dev);
+	return rc;
+
+ ioctl:
+	if (dev->do_ioctl)
+		return dev->do_ioctl(dev, ifr, SIOCETHTOOL);
+	return -EOPNOTSUPP;
+}
+
+EXPORT_SYMBOL(dev_ethtool);
+EXPORT_SYMBOL(ethtool_op_get_link);
+EXPORT_SYMBOL(ethtool_op_get_sg);
+EXPORT_SYMBOL(ethtool_op_get_tso);
+EXPORT_SYMBOL(ethtool_op_get_tx_csum);
+EXPORT_SYMBOL(ethtool_op_set_sg);
+EXPORT_SYMBOL(ethtool_op_set_tso);
+EXPORT_SYMBOL(ethtool_op_set_tx_csum);
diff --git a/net/core/filter.c b/net/core/filter.c
new file mode 100644
index 00000000000..f3b88205ace
--- /dev/null
+++ b/net/core/filter.c
@@ -0,0 +1,432 @@
+/*
+ * Linux Socket Filter - Kernel level socket filtering
+ *
+ * Author:
+ *     Jay Schulist <jschlst@samba.org>
+ *
+ * Based on the design of:
+ *     - The Berkeley Packet Filter
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ * Andi Kleen - Fix a few bad bugs and races.
+ */
+
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/sched.h>
+#include <linux/mm.h>
+#include <linux/fcntl.h>
+#include <linux/socket.h>
+#include <linux/in.h>
+#include <linux/inet.h>
+#include <linux/netdevice.h>
+#include <linux/if_packet.h>
+#include <net/ip.h>
+#include <net/protocol.h>
+#include <linux/skbuff.h>
+#include <net/sock.h>
+#include <linux/errno.h>
+#include <linux/timer.h>
+#include <asm/system.h>
+#include <asm/uaccess.h>
+#include <linux/filter.h>
+
+/* No hurry in this branch */
+static u8 *load_pointer(struct sk_buff *skb, int k)
+{
+	u8 *ptr = NULL;
+
+	if (k >= SKF_NET_OFF)
+		ptr = skb->nh.raw + k - SKF_NET_OFF;
+	else if (k >= SKF_LL_OFF)
+		ptr = skb->mac.raw + k - SKF_LL_OFF;
+
+	if (ptr >= skb->head && ptr < skb->tail)
+		return ptr;
+	return NULL;
+}
+
+/**
+ *	sk_run_filter	- 	run a filter on a socket
+ *	@skb: buffer to run the filter on
+ *	@filter: filter to apply
+ *	@flen: length of filter
+ *
+ * Decode and apply filter instructions to the skb->data.
+ * Return length to keep, 0 for none. skb is the data we are
+ * filtering, filter is the array of filter instructions, and
+ * len is the number of filter blocks in the array.
+ */
+ 
+int sk_run_filter(struct sk_buff *skb, struct sock_filter *filter, int flen)
+{
+	unsigned char *data = skb->data;
+	/* len is UNSIGNED. Byte wide insns relies only on implicit
+	   type casts to prevent reading arbitrary memory locations.
+	 */
+	unsigned int len = skb->len-skb->data_len;
+	struct sock_filter *fentry;	/* We walk down these */
+	u32 A = 0;	   		/* Accumulator */
+	u32 X = 0;   			/* Index Register */
+	u32 mem[BPF_MEMWORDS];		/* Scratch Memory Store */
+	int k;
+	int pc;
+
+	/*
+	 * Process array of filter instructions.
+	 */
+	for (pc = 0; pc < flen; pc++) {
+		fentry = &filter[pc];
+			
+		switch (fentry->code) {
+		case BPF_ALU|BPF_ADD|BPF_X:
+			A += X;
+			continue;
+		case BPF_ALU|BPF_ADD|BPF_K:
+			A += fentry->k;
+			continue;
+		case BPF_ALU|BPF_SUB|BPF_X:
+			A -= X;
+			continue;
+		case BPF_ALU|BPF_SUB|BPF_K:
+			A -= fentry->k;
+			continue;
+		case BPF_ALU|BPF_MUL|BPF_X:
+			A *= X;
+			continue;
+		case BPF_ALU|BPF_MUL|BPF_K:
+			A *= fentry->k;
+			continue;
+		case BPF_ALU|BPF_DIV|BPF_X:
+			if (X == 0)
+				return 0;
+			A /= X;
+			continue;
+		case BPF_ALU|BPF_DIV|BPF_K:
+			if (fentry->k == 0)
+				return 0;
+			A /= fentry->k;
+			continue;
+		case BPF_ALU|BPF_AND|BPF_X:
+			A &= X;
+			continue;
+		case BPF_ALU|BPF_AND|BPF_K:
+			A &= fentry->k;
+			continue;
+		case BPF_ALU|BPF_OR|BPF_X:
+			A |= X;
+			continue;
+		case BPF_ALU|BPF_OR|BPF_K:
+			A |= fentry->k;
+			continue;
+		case BPF_ALU|BPF_LSH|BPF_X:
+			A <<= X;
+			continue;
+		case BPF_ALU|BPF_LSH|BPF_K:
+			A <<= fentry->k;
+			continue;
+		case BPF_ALU|BPF_RSH|BPF_X:
+			A >>= X;
+			continue;
+		case BPF_ALU|BPF_RSH|BPF_K:
+			A >>= fentry->k;
+			continue;
+		case BPF_ALU|BPF_NEG:
+			A = -A;
+			continue;
+		case BPF_JMP|BPF_JA:
+			pc += fentry->k;
+			continue;
+		case BPF_JMP|BPF_JGT|BPF_K:
+			pc += (A > fentry->k) ? fentry->jt : fentry->jf;
+			continue;
+		case BPF_JMP|BPF_JGE|BPF_K:
+			pc += (A >= fentry->k) ? fentry->jt : fentry->jf;
+			continue;
+		case BPF_JMP|BPF_JEQ|BPF_K:
+			pc += (A == fentry->k) ? fentry->jt : fentry->jf;
+			continue;
+		case BPF_JMP|BPF_JSET|BPF_K:
+			pc += (A & fentry->k) ? fentry->jt : fentry->jf;
+			continue;
+		case BPF_JMP|BPF_JGT|BPF_X:
+			pc += (A > X) ? fentry->jt : fentry->jf;
+			continue;
+		case BPF_JMP|BPF_JGE|BPF_X:
+			pc += (A >= X) ? fentry->jt : fentry->jf;
+			continue;
+		case BPF_JMP|BPF_JEQ|BPF_X:
+			pc += (A == X) ? fentry->jt : fentry->jf;
+			continue;
+		case BPF_JMP|BPF_JSET|BPF_X:
+			pc += (A & X) ? fentry->jt : fentry->jf;
+			continue;
+		case BPF_LD|BPF_W|BPF_ABS:
+			k = fentry->k;
+ load_w:
+			if (k >= 0 && (unsigned int)(k+sizeof(u32)) <= len) {
+				A = ntohl(*(u32*)&data[k]);
+				continue;
+			}
+			if (k < 0) {
+				u8 *ptr;
+
+				if (k >= SKF_AD_OFF)
+					break;
+				ptr = load_pointer(skb, k);
+				if (ptr) {
+					A = ntohl(*(u32*)ptr);
+					continue;
+				}
+			} else {
+				u32 _tmp, *p;
+				p = skb_header_pointer(skb, k, 4, &_tmp);
+				if (p != NULL) {
+					A = ntohl(*p);
+					continue;
+				}
+			}
+			return 0;
+		case BPF_LD|BPF_H|BPF_ABS:
+			k = fentry->k;
+ load_h:
+			if (k >= 0 && (unsigned int)(k + sizeof(u16)) <= len) {
+				A = ntohs(*(u16*)&data[k]);
+				continue;
+			}
+			if (k < 0) {
+				u8 *ptr;
+
+				if (k >= SKF_AD_OFF)
+					break;
+				ptr = load_pointer(skb, k);
+				if (ptr) {
+					A = ntohs(*(u16*)ptr);
+					continue;
+				}
+			} else {
+				u16 _tmp, *p;
+				p = skb_header_pointer(skb, k, 2, &_tmp);
+				if (p != NULL) {
+					A = ntohs(*p);
+					continue;
+				}
+			}
+			return 0;
+		case BPF_LD|BPF_B|BPF_ABS:
+			k = fentry->k;
+load_b:
+			if (k >= 0 && (unsigned int)k < len) {
+				A = data[k];
+				continue;
+			}
+			if (k < 0) {
+				u8 *ptr;
+
+				if (k >= SKF_AD_OFF)
+					break;
+				ptr = load_pointer(skb, k);
+				if (ptr) {
+					A = *ptr;
+					continue;
+				}
+			} else {
+				u8 _tmp, *p;
+				p = skb_header_pointer(skb, k, 1, &_tmp);
+				if (p != NULL) {
+					A = *p;
+					continue;
+				}
+			}
+			return 0;
+		case BPF_LD|BPF_W|BPF_LEN:
+			A = len;
+			continue;
+		case BPF_LDX|BPF_W|BPF_LEN:
+			X = len;
+			continue;
+		case BPF_LD|BPF_W|BPF_IND:
+			k = X + fentry->k;
+			goto load_w;
+		case BPF_LD|BPF_H|BPF_IND:
+			k = X + fentry->k;
+			goto load_h;
+		case BPF_LD|BPF_B|BPF_IND:
+			k = X + fentry->k;
+			goto load_b;
+		case BPF_LDX|BPF_B|BPF_MSH:
+			if (fentry->k >= len)
+				return 0;
+			X = (data[fentry->k] & 0xf) << 2;
+			continue;
+		case BPF_LD|BPF_IMM:
+			A = fentry->k;
+			continue;
+		case BPF_LDX|BPF_IMM:
+			X = fentry->k;
+			continue;
+		case BPF_LD|BPF_MEM:
+			A = mem[fentry->k];
+			continue;
+		case BPF_LDX|BPF_MEM:
+			X = mem[fentry->k];
+			continue;
+		case BPF_MISC|BPF_TAX:
+			X = A;
+			continue;
+		case BPF_MISC|BPF_TXA:
+			A = X;
+			continue;
+		case BPF_RET|BPF_K:
+			return ((unsigned int)fentry->k);
+		case BPF_RET|BPF_A:
+			return ((unsigned int)A);
+		case BPF_ST:
+			mem[fentry->k] = A;
+			continue;
+		case BPF_STX:
+			mem[fentry->k] = X;
+			continue;
+		default:
+			/* Invalid instruction counts as RET */
+			return 0;
+		}
+
+		/*
+		 * Handle ancillary data, which are impossible
+		 * (or very difficult) to get parsing packet contents.
+		 */
+		switch (k-SKF_AD_OFF) {
+		case SKF_AD_PROTOCOL:
+			A = htons(skb->protocol);
+			continue;
+		case SKF_AD_PKTTYPE:
+			A = skb->pkt_type;
+			continue;
+		case SKF_AD_IFINDEX:
+			A = skb->dev->ifindex;
+			continue;
+		default:
+			return 0;
+		}
+	}
+
+	return 0;
+}
+
+/**
+ *	sk_chk_filter - verify socket filter code
+ *	@filter: filter to verify
+ *	@flen: length of filter
+ *
+ * Check the user's filter code. If we let some ugly
+ * filter code slip through kaboom! The filter must contain
+ * no references or jumps that are out of range, no illegal instructions
+ * and no backward jumps. It must end with a RET instruction
+ *
+ * Returns 0 if the rule set is legal or a negative errno code if not.
+ */
+int sk_chk_filter(struct sock_filter *filter, int flen)
+{
+	struct sock_filter *ftest;
+	int pc;
+
+	if (((unsigned int)flen >= (~0U / sizeof(struct sock_filter))) || flen == 0)
+		return -EINVAL;
+
+	/* check the filter code now */
+	for (pc = 0; pc < flen; pc++) {
+		/* all jumps are forward as they are not signed */
+		ftest = &filter[pc];
+		if (BPF_CLASS(ftest->code) == BPF_JMP) {
+			/* but they mustn't jump off the end */
+			if (BPF_OP(ftest->code) == BPF_JA) {
+				/*
+				 * Note, the large ftest->k might cause loops.
+				 * Compare this with conditional jumps below,
+				 * where offsets are limited. --ANK (981016)
+				 */
+				if (ftest->k >= (unsigned)(flen-pc-1))
+					return -EINVAL;
+			} else {
+				/* for conditionals both must be safe */
+ 				if (pc + ftest->jt +1 >= flen ||
+				    pc + ftest->jf +1 >= flen)
+					return -EINVAL;
+			}
+		}
+
+		/* check that memory operations use valid addresses. */
+		if (ftest->k >= BPF_MEMWORDS) {
+			/* but it might not be a memory operation... */
+			switch (ftest->code) {
+			case BPF_ST:	
+			case BPF_STX:	
+			case BPF_LD|BPF_MEM:	
+			case BPF_LDX|BPF_MEM:	
+				return -EINVAL;
+			}
+		}
+	}
+
+	/*
+	 * The program must end with a return. We don't care where they
+	 * jumped within the script (its always forwards) but in the end
+	 * they _will_ hit this.
+	 */
+        return (BPF_CLASS(filter[flen - 1].code) == BPF_RET) ? 0 : -EINVAL;
+}
+
+/**
+ *	sk_attach_filter - attach a socket filter
+ *	@fprog: the filter program
+ *	@sk: the socket to use
+ *
+ * Attach the user's filter code. We first run some sanity checks on
+ * it to make sure it does not explode on us later. If an error
+ * occurs or there is insufficient memory for the filter a negative
+ * errno code is returned. On success the return is zero.
+ */
+int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk)
+{
+	struct sk_filter *fp; 
+	unsigned int fsize = sizeof(struct sock_filter) * fprog->len;
+	int err;
+
+	/* Make sure new filter is there and in the right amounts. */
+        if (fprog->filter == NULL || fprog->len > BPF_MAXINSNS)
+                return -EINVAL;
+
+	fp = sock_kmalloc(sk, fsize+sizeof(*fp), GFP_KERNEL);
+	if (!fp)
+		return -ENOMEM;
+	if (copy_from_user(fp->insns, fprog->filter, fsize)) {
+		sock_kfree_s(sk, fp, fsize+sizeof(*fp)); 
+		return -EFAULT;
+	}
+
+	atomic_set(&fp->refcnt, 1);
+	fp->len = fprog->len;
+
+	err = sk_chk_filter(fp->insns, fp->len);
+	if (!err) {
+		struct sk_filter *old_fp;
+
+		spin_lock_bh(&sk->sk_lock.slock);
+		old_fp = sk->sk_filter;
+		sk->sk_filter = fp;
+		spin_unlock_bh(&sk->sk_lock.slock);
+		fp = old_fp;
+	}
+
+	if (fp)
+		sk_filter_release(sk, fp);
+	return err;
+}
+
+EXPORT_SYMBOL(sk_chk_filter);
+EXPORT_SYMBOL(sk_run_filter);
diff --git a/net/core/flow.c b/net/core/flow.c
new file mode 100644
index 00000000000..f289570b15a
--- /dev/null
+++ b/net/core/flow.c
@@ -0,0 +1,371 @@
+/* flow.c: Generic flow cache.
+ *
+ * Copyright (C) 2003 Alexey N. Kuznetsov (kuznet@ms2.inr.ac.ru)
+ * Copyright (C) 2003 David S. Miller (davem@redhat.com)
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/list.h>
+#include <linux/jhash.h>
+#include <linux/interrupt.h>
+#include <linux/mm.h>
+#include <linux/random.h>
+#include <linux/init.h>
+#include <linux/slab.h>
+#include <linux/smp.h>
+#include <linux/completion.h>
+#include <linux/percpu.h>
+#include <linux/bitops.h>
+#include <linux/notifier.h>
+#include <linux/cpu.h>
+#include <linux/cpumask.h>
+#include <net/flow.h>
+#include <asm/atomic.h>
+#include <asm/semaphore.h>
+
+struct flow_cache_entry {
+	struct flow_cache_entry	*next;
+	u16			family;
+	u8			dir;
+	struct flowi		key;
+	u32			genid;
+	void			*object;
+	atomic_t		*object_ref;
+};
+
+atomic_t flow_cache_genid = ATOMIC_INIT(0);
+
+static u32 flow_hash_shift;
+#define flow_hash_size	(1 << flow_hash_shift)
+static DEFINE_PER_CPU(struct flow_cache_entry **, flow_tables) = { NULL };
+
+#define flow_table(cpu) (per_cpu(flow_tables, cpu))
+
+static kmem_cache_t *flow_cachep;
+
+static int flow_lwm, flow_hwm;
+
+struct flow_percpu_info {
+	int hash_rnd_recalc;
+	u32 hash_rnd;
+	int count;
+} ____cacheline_aligned;
+static DEFINE_PER_CPU(struct flow_percpu_info, flow_hash_info) = { 0 };
+
+#define flow_hash_rnd_recalc(cpu) \
+	(per_cpu(flow_hash_info, cpu).hash_rnd_recalc)
+#define flow_hash_rnd(cpu) \
+	(per_cpu(flow_hash_info, cpu).hash_rnd)
+#define flow_count(cpu) \
+	(per_cpu(flow_hash_info, cpu).count)
+
+static struct timer_list flow_hash_rnd_timer;
+
+#define FLOW_HASH_RND_PERIOD	(10 * 60 * HZ)
+
+struct flow_flush_info {
+	atomic_t cpuleft;
+	struct completion completion;
+};
+static DEFINE_PER_CPU(struct tasklet_struct, flow_flush_tasklets) = { NULL };
+
+#define flow_flush_tasklet(cpu) (&per_cpu(flow_flush_tasklets, cpu))
+
+static void flow_cache_new_hashrnd(unsigned long arg)
+{
+	int i;
+
+	for_each_cpu(i)
+		flow_hash_rnd_recalc(i) = 1;
+
+	flow_hash_rnd_timer.expires = jiffies + FLOW_HASH_RND_PERIOD;
+	add_timer(&flow_hash_rnd_timer);
+}
+
+static void __flow_cache_shrink(int cpu, int shrink_to)
+{
+	struct flow_cache_entry *fle, **flp;
+	int i;
+
+	for (i = 0; i < flow_hash_size; i++) {
+		int k = 0;
+
+		flp = &flow_table(cpu)[i];
+		while ((fle = *flp) != NULL && k < shrink_to) {
+			k++;
+			flp = &fle->next;
+		}
+		while ((fle = *flp) != NULL) {
+			*flp = fle->next;
+			if (fle->object)
+				atomic_dec(fle->object_ref);
+			kmem_cache_free(flow_cachep, fle);
+			flow_count(cpu)--;
+		}
+	}
+}
+
+static void flow_cache_shrink(int cpu)
+{
+	int shrink_to = flow_lwm / flow_hash_size;
+
+	__flow_cache_shrink(cpu, shrink_to);
+}
+
+static void flow_new_hash_rnd(int cpu)
+{
+	get_random_bytes(&flow_hash_rnd(cpu), sizeof(u32));
+	flow_hash_rnd_recalc(cpu) = 0;
+
+	__flow_cache_shrink(cpu, 0);
+}
+
+static u32 flow_hash_code(struct flowi *key, int cpu)
+{
+	u32 *k = (u32 *) key;
+
+	return (jhash2(k, (sizeof(*key) / sizeof(u32)), flow_hash_rnd(cpu)) &
+		(flow_hash_size - 1));
+}
+
+#if (BITS_PER_LONG == 64)
+typedef u64 flow_compare_t;
+#else
+typedef u32 flow_compare_t;
+#endif
+
+extern void flowi_is_missized(void);
+
+/* I hear what you're saying, use memcmp.  But memcmp cannot make
+ * important assumptions that we can here, such as alignment and
+ * constant size.
+ */
+static int flow_key_compare(struct flowi *key1, struct flowi *key2)
+{
+	flow_compare_t *k1, *k1_lim, *k2;
+	const int n_elem = sizeof(struct flowi) / sizeof(flow_compare_t);
+
+	if (sizeof(struct flowi) % sizeof(flow_compare_t))
+		flowi_is_missized();
+
+	k1 = (flow_compare_t *) key1;
+	k1_lim = k1 + n_elem;
+
+	k2 = (flow_compare_t *) key2;
+
+	do {
+		if (*k1++ != *k2++)
+			return 1;
+	} while (k1 < k1_lim);
+
+	return 0;
+}
+
+void *flow_cache_lookup(struct flowi *key, u16 family, u8 dir,
+			flow_resolve_t resolver)
+{
+	struct flow_cache_entry *fle, **head;
+	unsigned int hash;
+	int cpu;
+
+	local_bh_disable();
+	cpu = smp_processor_id();
+
+	fle = NULL;
+	/* Packet really early in init?  Making flow_cache_init a
+	 * pre-smp initcall would solve this.  --RR */
+	if (!flow_table(cpu))
+		goto nocache;
+
+	if (flow_hash_rnd_recalc(cpu))
+		flow_new_hash_rnd(cpu);
+	hash = flow_hash_code(key, cpu);
+
+	head = &flow_table(cpu)[hash];
+	for (fle = *head; fle; fle = fle->next) {
+		if (fle->family == family &&
+		    fle->dir == dir &&
+		    flow_key_compare(key, &fle->key) == 0) {
+			if (fle->genid == atomic_read(&flow_cache_genid)) {
+				void *ret = fle->object;
+
+				if (ret)
+					atomic_inc(fle->object_ref);
+				local_bh_enable();
+
+				return ret;
+			}
+			break;
+		}
+	}
+
+	if (!fle) {
+		if (flow_count(cpu) > flow_hwm)
+			flow_cache_shrink(cpu);
+
+		fle = kmem_cache_alloc(flow_cachep, SLAB_ATOMIC);
+		if (fle) {
+			fle->next = *head;
+			*head = fle;
+			fle->family = family;
+			fle->dir = dir;
+			memcpy(&fle->key, key, sizeof(*key));
+			fle->object = NULL;
+			flow_count(cpu)++;
+		}
+	}
+
+nocache:
+	{
+		void *obj;
+		atomic_t *obj_ref;
+
+		resolver(key, family, dir, &obj, &obj_ref);
+
+		if (fle) {
+			fle->genid = atomic_read(&flow_cache_genid);
+
+			if (fle->object)
+				atomic_dec(fle->object_ref);
+
+			fle->object = obj;
+			fle->object_ref = obj_ref;
+			if (obj)
+				atomic_inc(fle->object_ref);
+		}
+		local_bh_enable();
+
+		return obj;
+	}
+}
+
+static void flow_cache_flush_tasklet(unsigned long data)
+{
+	struct flow_flush_info *info = (void *)data;
+	int i;
+	int cpu;
+
+	cpu = smp_processor_id();
+	for (i = 0; i < flow_hash_size; i++) {
+		struct flow_cache_entry *fle;
+
+		fle = flow_table(cpu)[i];
+		for (; fle; fle = fle->next) {
+			unsigned genid = atomic_read(&flow_cache_genid);
+
+			if (!fle->object || fle->genid == genid)
+				continue;
+
+			fle->object = NULL;
+			atomic_dec(fle->object_ref);
+		}
+	}
+
+	if (atomic_dec_and_test(&info->cpuleft))
+		complete(&info->completion);
+}
+
+static void flow_cache_flush_per_cpu(void *) __attribute__((__unused__));
+static void flow_cache_flush_per_cpu(void *data)
+{
+	struct flow_flush_info *info = data;
+	int cpu;
+	struct tasklet_struct *tasklet;
+
+	cpu = smp_processor_id();
+
+	tasklet = flow_flush_tasklet(cpu);
+	tasklet->data = (unsigned long)info;
+	tasklet_schedule(tasklet);
+}
+
+void flow_cache_flush(void)
+{
+	struct flow_flush_info info;
+	static DECLARE_MUTEX(flow_flush_sem);
+
+	/* Don't want cpus going down or up during this. */
+	lock_cpu_hotplug();
+	down(&flow_flush_sem);
+	atomic_set(&info.cpuleft, num_online_cpus());
+	init_completion(&info.completion);
+
+	local_bh_disable();
+	smp_call_function(flow_cache_flush_per_cpu, &info, 1, 0);
+	flow_cache_flush_tasklet((unsigned long)&info);
+	local_bh_enable();
+
+	wait_for_completion(&info.completion);
+	up(&flow_flush_sem);
+	unlock_cpu_hotplug();
+}
+
+static void __devinit flow_cache_cpu_prepare(int cpu)
+{
+	struct tasklet_struct *tasklet;
+	unsigned long order;
+
+	for (order = 0;
+	     (PAGE_SIZE << order) <
+		     (sizeof(struct flow_cache_entry *)*flow_hash_size);
+	     order++)
+		/* NOTHING */;
+
+	flow_table(cpu) = (struct flow_cache_entry **)
+		__get_free_pages(GFP_KERNEL, order);
+	if (!flow_table(cpu))
+		panic("NET: failed to allocate flow cache order %lu\n", order);
+
+	memset(flow_table(cpu), 0, PAGE_SIZE << order);
+
+	flow_hash_rnd_recalc(cpu) = 1;
+	flow_count(cpu) = 0;
+
+	tasklet = flow_flush_tasklet(cpu);
+	tasklet_init(tasklet, flow_cache_flush_tasklet, 0);
+}
+
+#ifdef CONFIG_HOTPLUG_CPU
+static int flow_cache_cpu(struct notifier_block *nfb,
+			  unsigned long action,
+			  void *hcpu)
+{
+	if (action == CPU_DEAD)
+		__flow_cache_shrink((unsigned long)hcpu, 0);
+	return NOTIFY_OK;
+}
+#endif /* CONFIG_HOTPLUG_CPU */
+
+static int __init flow_cache_init(void)
+{
+	int i;
+
+	flow_cachep = kmem_cache_create("flow_cache",
+					sizeof(struct flow_cache_entry),
+					0, SLAB_HWCACHE_ALIGN,
+					NULL, NULL);
+
+	if (!flow_cachep)
+		panic("NET: failed to allocate flow cache slab\n");
+
+	flow_hash_shift = 10;
+	flow_lwm = 2 * flow_hash_size;
+	flow_hwm = 4 * flow_hash_size;
+
+	init_timer(&flow_hash_rnd_timer);
+	flow_hash_rnd_timer.function = flow_cache_new_hashrnd;
+	flow_hash_rnd_timer.expires = jiffies + FLOW_HASH_RND_PERIOD;
+	add_timer(&flow_hash_rnd_timer);
+
+	for_each_cpu(i)
+		flow_cache_cpu_prepare(i);
+
+	hotcpu_notifier(flow_cache_cpu, 0);
+	return 0;
+}
+
+module_init(flow_cache_init);
+
+EXPORT_SYMBOL(flow_cache_genid);
+EXPORT_SYMBOL(flow_cache_lookup);
diff --git a/net/core/gen_estimator.c b/net/core/gen_estimator.c
new file mode 100644
index 00000000000..b07c029e821
--- /dev/null
+++ b/net/core/gen_estimator.c
@@ -0,0 +1,250 @@
+/*
+ * net/sched/gen_estimator.c	Simple rate estimator.
+ *
+ *		This program is free software; you can redistribute it and/or
+ *		modify it under the terms of the GNU General Public License
+ *		as published by the Free Software Foundation; either version
+ *		2 of the License, or (at your option) any later version.
+ *
+ * Authors:	Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
+ *
+ * Changes:
+ *              Jamal Hadi Salim - moved it to net/core and reshulfed
+ *              names to make it usable in general net subsystem.
+ */
+
+#include <asm/uaccess.h>
+#include <asm/system.h>
+#include <asm/bitops.h>
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/jiffies.h>
+#include <linux/string.h>
+#include <linux/mm.h>
+#include <linux/socket.h>
+#include <linux/sockios.h>
+#include <linux/in.h>
+#include <linux/errno.h>
+#include <linux/interrupt.h>
+#include <linux/netdevice.h>
+#include <linux/skbuff.h>
+#include <linux/rtnetlink.h>
+#include <linux/init.h>
+#include <net/sock.h>
+#include <net/gen_stats.h>
+
+/*
+   This code is NOT intended to be used for statistics collection,
+   its purpose is to provide a base for statistical multiplexing
+   for controlled load service.
+   If you need only statistics, run a user level daemon which
+   periodically reads byte counters.
+
+   Unfortunately, rate estimation is not a very easy task.
+   F.e. I did not find a simple way to estimate the current peak rate
+   and even failed to formulate the problem 8)8)
+
+   So I preferred not to built an estimator into the scheduler,
+   but run this task separately.
+   Ideally, it should be kernel thread(s), but for now it runs
+   from timers, which puts apparent top bounds on the number of rated
+   flows, has minimal overhead on small, but is enough
+   to handle controlled load service, sets of aggregates.
+
+   We measure rate over A=(1<<interval) seconds and evaluate EWMA:
+
+   avrate = avrate*(1-W) + rate*W
+
+   where W is chosen as negative power of 2: W = 2^(-ewma_log)
+
+   The resulting time constant is:
+
+   T = A/(-ln(1-W))
+
+
+   NOTES.
+
+   * The stored value for avbps is scaled by 2^5, so that maximal
+     rate is ~1Gbit, avpps is scaled by 2^10.
+
+   * Minimal interval is HZ/4=250msec (it is the greatest common divisor
+     for HZ=100 and HZ=1024 8)), maximal interval
+     is (HZ*2^EST_MAX_INTERVAL)/4 = 8sec. Shorter intervals
+     are too expensive, longer ones can be implemented
+     at user level painlessly.
+ */
+
+#define EST_MAX_INTERVAL	5
+
+struct gen_estimator
+{
+	struct gen_estimator	*next;
+	struct gnet_stats_basic	*bstats;
+	struct gnet_stats_rate_est	*rate_est;
+	spinlock_t		*stats_lock;
+	unsigned		interval;
+	int			ewma_log;
+	u64			last_bytes;
+	u32			last_packets;
+	u32			avpps;
+	u32			avbps;
+};
+
+struct gen_estimator_head
+{
+	struct timer_list	timer;
+	struct gen_estimator	*list;
+};
+
+static struct gen_estimator_head elist[EST_MAX_INTERVAL+1];
+
+/* Estimator array lock */
+static DEFINE_RWLOCK(est_lock);
+
+static void est_timer(unsigned long arg)
+{
+	int idx = (int)arg;
+	struct gen_estimator *e;
+
+	read_lock(&est_lock);
+	for (e = elist[idx].list; e; e = e->next) {
+		u64 nbytes;
+		u32 npackets;
+		u32 rate;
+
+		spin_lock(e->stats_lock);
+		nbytes = e->bstats->bytes;
+		npackets = e->bstats->packets;
+		rate = (nbytes - e->last_bytes)<<(7 - idx);
+		e->last_bytes = nbytes;
+		e->avbps += ((long)rate - (long)e->avbps) >> e->ewma_log;
+		e->rate_est->bps = (e->avbps+0xF)>>5;
+
+		rate = (npackets - e->last_packets)<<(12 - idx);
+		e->last_packets = npackets;
+		e->avpps += ((long)rate - (long)e->avpps) >> e->ewma_log;
+		e->rate_est->pps = (e->avpps+0x1FF)>>10;
+		spin_unlock(e->stats_lock);
+	}
+
+	mod_timer(&elist[idx].timer, jiffies + ((HZ<<idx)/4));
+	read_unlock(&est_lock);
+}
+
+/**
+ * gen_new_estimator - create a new rate estimator
+ * @bstats: basic statistics
+ * @rate_est: rate estimator statistics
+ * @stats_lock: statistics lock
+ * @opt: rate estimator configuration TLV
+ *
+ * Creates a new rate estimator with &bstats as source and &rate_est
+ * as destination. A new timer with the interval specified in the
+ * configuration TLV is created. Upon each interval, the latest statistics
+ * will be read from &bstats and the estimated rate will be stored in
+ * &rate_est with the statistics lock grabed during this period.
+ * 
+ * Returns 0 on success or a negative error code.
+ */
+int gen_new_estimator(struct gnet_stats_basic *bstats,
+	struct gnet_stats_rate_est *rate_est, spinlock_t *stats_lock, struct rtattr *opt)
+{
+	struct gen_estimator *est;
+	struct gnet_estimator *parm = RTA_DATA(opt);
+
+	if (RTA_PAYLOAD(opt) < sizeof(*parm))
+		return -EINVAL;
+
+	if (parm->interval < -2 || parm->interval > 3)
+		return -EINVAL;
+
+	est = kmalloc(sizeof(*est), GFP_KERNEL);
+	if (est == NULL)
+		return -ENOBUFS;
+
+	memset(est, 0, sizeof(*est));
+	est->interval = parm->interval + 2;
+	est->bstats = bstats;
+	est->rate_est = rate_est;
+	est->stats_lock = stats_lock;
+	est->ewma_log = parm->ewma_log;
+	est->last_bytes = bstats->bytes;
+	est->avbps = rate_est->bps<<5;
+	est->last_packets = bstats->packets;
+	est->avpps = rate_est->pps<<10;
+
+	est->next = elist[est->interval].list;
+	if (est->next == NULL) {
+		init_timer(&elist[est->interval].timer);
+		elist[est->interval].timer.data = est->interval;
+		elist[est->interval].timer.expires = jiffies + ((HZ<<est->interval)/4);
+		elist[est->interval].timer.function = est_timer;
+		add_timer(&elist[est->interval].timer);
+	}
+	write_lock_bh(&est_lock);
+	elist[est->interval].list = est;
+	write_unlock_bh(&est_lock);
+	return 0;
+}
+
+/**
+ * gen_kill_estimator - remove a rate estimator
+ * @bstats: basic statistics
+ * @rate_est: rate estimator statistics
+ *
+ * Removes the rate estimator specified by &bstats and &rate_est
+ * and deletes the timer.
+ */
+void gen_kill_estimator(struct gnet_stats_basic *bstats,
+	struct gnet_stats_rate_est *rate_est)
+{
+	int idx;
+	struct gen_estimator *est, **pest;
+
+	for (idx=0; idx <= EST_MAX_INTERVAL; idx++) {
+		int killed = 0;
+		pest = &elist[idx].list;
+		while ((est=*pest) != NULL) {
+			if (est->rate_est != rate_est || est->bstats != bstats) {
+				pest = &est->next;
+				continue;
+			}
+
+			write_lock_bh(&est_lock);
+			*pest = est->next;
+			write_unlock_bh(&est_lock);
+
+			kfree(est);
+			killed++;
+		}
+		if (killed && elist[idx].list == NULL)
+			del_timer(&elist[idx].timer);
+	}
+}
+
+/**
+ * gen_replace_estimator - replace rate estimator configruation
+ * @bstats: basic statistics
+ * @rate_est: rate estimator statistics
+ * @stats_lock: statistics lock
+ * @opt: rate estimator configuration TLV
+ *
+ * Replaces the configuration of a rate estimator by calling
+ * gen_kill_estimator() and gen_new_estimator().
+ * 
+ * Returns 0 on success or a negative error code.
+ */
+int
+gen_replace_estimator(struct gnet_stats_basic *bstats,
+	struct gnet_stats_rate_est *rate_est, spinlock_t *stats_lock,
+	struct rtattr *opt)
+{
+    gen_kill_estimator(bstats, rate_est);
+    return gen_new_estimator(bstats, rate_est, stats_lock, opt);
+}
+    
+
+EXPORT_SYMBOL(gen_kill_estimator);
+EXPORT_SYMBOL(gen_new_estimator);
+EXPORT_SYMBOL(gen_replace_estimator);
diff --git a/net/core/gen_stats.c b/net/core/gen_stats.c
new file mode 100644
index 00000000000..8f21490355f
--- /dev/null
+++ b/net/core/gen_stats.c
@@ -0,0 +1,239 @@
+/*
+ * net/core/gen_stats.c
+ *
+ *             This program is free software; you can redistribute it and/or
+ *             modify it under the terms of the GNU General Public License
+ *             as published by the Free Software Foundation; either version
+ *             2 of the License, or (at your option) any later version.
+ *
+ * Authors:  Thomas Graf <tgraf@suug.ch>
+ *           Jamal Hadi Salim
+ *           Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
+ *
+ * See Documentation/networking/gen_stats.txt
+ */
+
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/interrupt.h>
+#include <linux/socket.h>
+#include <linux/rtnetlink.h>
+#include <linux/gen_stats.h>
+#include <net/gen_stats.h>
+
+
+static inline int
+gnet_stats_copy(struct gnet_dump *d, int type, void *buf, int size)
+{
+	RTA_PUT(d->skb, type, size, buf);
+	return 0;
+
+rtattr_failure:
+	spin_unlock_bh(d->lock);
+	return -1;
+}
+
+/**
+ * gnet_stats_start_copy_compat - start dumping procedure in compatibility mode
+ * @skb: socket buffer to put statistics TLVs into
+ * @type: TLV type for top level statistic TLV
+ * @tc_stats_type: TLV type for backward compatibility struct tc_stats TLV
+ * @xstats_type: TLV type for backward compatibility xstats TLV
+ * @lock: statistics lock
+ * @d: dumping handle
+ *
+ * Initializes the dumping handle, grabs the statistic lock and appends
+ * an empty TLV header to the socket buffer for use a container for all
+ * other statistic TLVS.
+ *
+ * The dumping handle is marked to be in backward compatibility mode telling
+ * all gnet_stats_copy_XXX() functions to fill a local copy of struct tc_stats.
+ *
+ * Returns 0 on success or -1 if the room in the socket buffer was not sufficient.
+ */
+int
+gnet_stats_start_copy_compat(struct sk_buff *skb, int type, int tc_stats_type,
+	int xstats_type, spinlock_t *lock, struct gnet_dump *d)
+{
+	memset(d, 0, sizeof(*d));
+	
+	spin_lock_bh(lock);
+	d->lock = lock;
+	if (type)
+		d->tail = (struct rtattr *) skb->tail;
+	d->skb = skb;
+	d->compat_tc_stats = tc_stats_type;
+	d->compat_xstats = xstats_type;
+
+	if (d->tail)
+		return gnet_stats_copy(d, type, NULL, 0);
+
+	return 0;
+}
+
+/**
+ * gnet_stats_start_copy_compat - start dumping procedure in compatibility mode
+ * @skb: socket buffer to put statistics TLVs into
+ * @type: TLV type for top level statistic TLV
+ * @lock: statistics lock
+ * @d: dumping handle
+ *
+ * Initializes the dumping handle, grabs the statistic lock and appends
+ * an empty TLV header to the socket buffer for use a container for all
+ * other statistic TLVS.
+ *
+ * Returns 0 on success or -1 if the room in the socket buffer was not sufficient.
+ */
+int
+gnet_stats_start_copy(struct sk_buff *skb, int type, spinlock_t *lock,
+	struct gnet_dump *d)
+{
+	return gnet_stats_start_copy_compat(skb, type, 0, 0, lock, d);
+}
+
+/**
+ * gnet_stats_copy_basic - copy basic statistics into statistic TLV
+ * @d: dumping handle
+ * @b: basic statistics
+ *
+ * Appends the basic statistics to the top level TLV created by
+ * gnet_stats_start_copy().
+ *
+ * Returns 0 on success or -1 with the statistic lock released
+ * if the room in the socket buffer was not sufficient.
+ */
+int
+gnet_stats_copy_basic(struct gnet_dump *d, struct gnet_stats_basic *b)
+{
+	if (d->compat_tc_stats) {
+		d->tc_stats.bytes = b->bytes;
+		d->tc_stats.packets = b->packets;
+	}
+
+	if (d->tail)
+		return gnet_stats_copy(d, TCA_STATS_BASIC, b, sizeof(*b));
+
+	return 0;
+}
+
+/**
+ * gnet_stats_copy_rate_est - copy rate estimator statistics into statistics TLV
+ * @d: dumping handle
+ * @r: rate estimator statistics
+ *
+ * Appends the rate estimator statistics to the top level TLV created by
+ * gnet_stats_start_copy().
+ *
+ * Returns 0 on success or -1 with the statistic lock released
+ * if the room in the socket buffer was not sufficient.
+ */
+int
+gnet_stats_copy_rate_est(struct gnet_dump *d, struct gnet_stats_rate_est *r)
+{
+	if (d->compat_tc_stats) {
+		d->tc_stats.bps = r->bps;
+		d->tc_stats.pps = r->pps;
+	}
+
+	if (d->tail)
+		return gnet_stats_copy(d, TCA_STATS_RATE_EST, r, sizeof(*r));
+
+	return 0;
+}
+
+/**
+ * gnet_stats_copy_queue - copy queue statistics into statistics TLV
+ * @d: dumping handle
+ * @q: queue statistics
+ *
+ * Appends the queue statistics to the top level TLV created by
+ * gnet_stats_start_copy().
+ *
+ * Returns 0 on success or -1 with the statistic lock released
+ * if the room in the socket buffer was not sufficient.
+ */
+int
+gnet_stats_copy_queue(struct gnet_dump *d, struct gnet_stats_queue *q)
+{
+	if (d->compat_tc_stats) {
+		d->tc_stats.drops = q->drops;
+		d->tc_stats.qlen = q->qlen;
+		d->tc_stats.backlog = q->backlog;
+		d->tc_stats.overlimits = q->overlimits;
+	}
+
+	if (d->tail)
+		return gnet_stats_copy(d, TCA_STATS_QUEUE, q, sizeof(*q));
+
+	return 0;
+}
+
+/**
+ * gnet_stats_copy_app - copy application specific statistics into statistics TLV
+ * @d: dumping handle
+ * @st: application specific statistics data
+ * @len: length of data
+ *
+ * Appends the application sepecific statistics to the top level TLV created by
+ * gnet_stats_start_copy() and remembers the data for XSTATS if the dumping
+ * handle is in backward compatibility mode.
+ *
+ * Returns 0 on success or -1 with the statistic lock released
+ * if the room in the socket buffer was not sufficient.
+ */
+int
+gnet_stats_copy_app(struct gnet_dump *d, void *st, int len)
+{
+	if (d->compat_xstats) {
+		d->xstats = st;
+		d->xstats_len = len;
+	}
+
+	if (d->tail)
+		return gnet_stats_copy(d, TCA_STATS_APP, st, len);
+
+	return 0;
+}
+
+/**
+ * gnet_stats_finish_copy - finish dumping procedure
+ * @d: dumping handle
+ *
+ * Corrects the length of the top level TLV to include all TLVs added
+ * by gnet_stats_copy_XXX() calls. Adds the backward compatibility TLVs
+ * if gnet_stats_start_copy_compat() was used and releases the statistics
+ * lock.
+ *
+ * Returns 0 on success or -1 with the statistic lock released
+ * if the room in the socket buffer was not sufficient.
+ */
+int
+gnet_stats_finish_copy(struct gnet_dump *d)
+{
+	if (d->tail)
+		d->tail->rta_len = d->skb->tail - (u8 *) d->tail;
+
+	if (d->compat_tc_stats)
+		if (gnet_stats_copy(d, d->compat_tc_stats, &d->tc_stats,
+			sizeof(d->tc_stats)) < 0)
+			return -1;
+
+	if (d->compat_xstats && d->xstats) {
+		if (gnet_stats_copy(d, d->compat_xstats, d->xstats,
+			d->xstats_len) < 0)
+			return -1;
+	}
+
+	spin_unlock_bh(d->lock);
+	return 0;
+}
+
+
+EXPORT_SYMBOL(gnet_stats_start_copy);
+EXPORT_SYMBOL(gnet_stats_start_copy_compat);
+EXPORT_SYMBOL(gnet_stats_copy_basic);
+EXPORT_SYMBOL(gnet_stats_copy_rate_est);
+EXPORT_SYMBOL(gnet_stats_copy_queue);
+EXPORT_SYMBOL(gnet_stats_copy_app);
+EXPORT_SYMBOL(gnet_stats_finish_copy);
diff --git a/net/core/iovec.c b/net/core/iovec.c
new file mode 100644
index 00000000000..d57ace949ab
--- /dev/null
+++ b/net/core/iovec.c
@@ -0,0 +1,239 @@
+/*
+ *	iovec manipulation routines.
+ *
+ *
+ *		This program is free software; you can redistribute it and/or
+ *		modify it under the terms of the GNU General Public License
+ *		as published by the Free Software Foundation; either version
+ *		2 of the License, or (at your option) any later version.
+ *
+ *	Fixes:
+ *		Andrew Lunn	:	Errors in iovec copying.
+ *		Pedro Roque	:	Added memcpy_fromiovecend and
+ *					csum_..._fromiovecend.
+ *		Andi Kleen	:	fixed error handling for 2.1
+ *		Alexey Kuznetsov:	2.1 optimisations
+ *		Andi Kleen	:	Fix csum*fromiovecend for IPv6.
+ */
+
+#include <linux/errno.h>
+#include <linux/module.h>
+#include <linux/sched.h>
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/slab.h>
+#include <linux/net.h>
+#include <linux/in6.h>
+#include <asm/uaccess.h>
+#include <asm/byteorder.h>
+#include <net/checksum.h>
+#include <net/sock.h>
+
+/*
+ *	Verify iovec. The caller must ensure that the iovec is big enough
+ *	to hold the message iovec.
+ *
+ *	Save time not doing verify_area. copy_*_user will make this work
+ *	in any case.
+ */
+
+int verify_iovec(struct msghdr *m, struct iovec *iov, char *address, int mode)
+{
+	int size, err, ct;
+	
+	if (m->msg_namelen) {
+		if (mode == VERIFY_READ) {
+			err = move_addr_to_kernel(m->msg_name, m->msg_namelen,
+						  address);
+			if (err < 0)
+				return err;
+		}
+		m->msg_name = address;
+	} else {
+		m->msg_name = NULL;
+	}
+
+	size = m->msg_iovlen * sizeof(struct iovec);
+	if (copy_from_user(iov, m->msg_iov, size))
+		return -EFAULT;
+
+	m->msg_iov = iov;
+	err = 0;
+
+	for (ct = 0; ct < m->msg_iovlen; ct++) {
+		err += iov[ct].iov_len;
+		/*
+		 * Goal is not to verify user data, but to prevent returning
+		 * negative value, which is interpreted as errno.
+		 * Overflow is still possible, but it is harmless.
+		 */
+		if (err < 0)
+			return -EMSGSIZE;
+	}
+
+	return err;
+}
+
+/*
+ *	Copy kernel to iovec. Returns -EFAULT on error.
+ *
+ *	Note: this modifies the original iovec.
+ */
+ 
+int memcpy_toiovec(struct iovec *iov, unsigned char *kdata, int len)
+{
+	while (len > 0) {
+		if (iov->iov_len) {
+			int copy = min_t(unsigned int, iov->iov_len, len);
+			if (copy_to_user(iov->iov_base, kdata, copy))
+				return -EFAULT;
+			kdata += copy;
+			len -= copy;
+			iov->iov_len -= copy;
+			iov->iov_base += copy;
+		}
+		iov++;
+	}
+
+	return 0;
+}
+
+/*
+ *	Copy iovec to kernel. Returns -EFAULT on error.
+ *
+ *	Note: this modifies the original iovec.
+ */
+ 
+int memcpy_fromiovec(unsigned char *kdata, struct iovec *iov, int len)
+{
+	while (len > 0) {
+		if (iov->iov_len) {
+			int copy = min_t(unsigned int, len, iov->iov_len);
+			if (copy_from_user(kdata, iov->iov_base, copy))
+				return -EFAULT;
+			len -= copy;
+			kdata += copy;
+			iov->iov_base += copy;
+			iov->iov_len -= copy;
+		}
+		iov++;
+	}
+
+	return 0;
+}
+
+/*
+ *	For use with ip_build_xmit
+ */
+int memcpy_fromiovecend(unsigned char *kdata, struct iovec *iov, int offset,
+			int len)
+{
+	/* Skip over the finished iovecs */
+	while (offset >= iov->iov_len) {
+		offset -= iov->iov_len;
+		iov++;
+	}
+
+	while (len > 0) {
+		u8 __user *base = iov->iov_base + offset;
+		int copy = min_t(unsigned int, len, iov->iov_len - offset);
+
+		offset = 0;
+		if (copy_from_user(kdata, base, copy))
+			return -EFAULT;
+		len -= copy;
+		kdata += copy;
+		iov++;
+	}
+
+	return 0;
+}
+
+/*
+ *	And now for the all-in-one: copy and checksum from a user iovec
+ *	directly to a datagram
+ *	Calls to csum_partial but the last must be in 32 bit chunks
+ *
+ *	ip_build_xmit must ensure that when fragmenting only the last
+ *	call to this function will be unaligned also.
+ */
+int csum_partial_copy_fromiovecend(unsigned char *kdata, struct iovec *iov,
+				 int offset, unsigned int len, int *csump)
+{
+	int csum = *csump;
+	int partial_cnt = 0, err = 0;
+
+	/* Skip over the finished iovecs */
+	while (offset >= iov->iov_len) {
+		offset -= iov->iov_len;
+		iov++;
+	}
+
+	while (len > 0) {
+		u8 __user *base = iov->iov_base + offset;
+		int copy = min_t(unsigned int, len, iov->iov_len - offset);
+
+		offset = 0;
+
+		/* There is a remnant from previous iov. */
+		if (partial_cnt) {
+			int par_len = 4 - partial_cnt;
+
+			/* iov component is too short ... */
+			if (par_len > copy) {
+				if (copy_from_user(kdata, base, copy))
+					goto out_fault;
+				kdata += copy;
+				base += copy;
+				partial_cnt += copy;
+				len -= copy;
+				iov++;
+				if (len)
+					continue;
+				*csump = csum_partial(kdata - partial_cnt,
+							 partial_cnt, csum);
+				goto out;
+			}
+			if (copy_from_user(kdata, base, par_len))
+				goto out_fault;
+			csum = csum_partial(kdata - partial_cnt, 4, csum);
+			kdata += par_len;
+			base  += par_len;
+			copy  -= par_len;
+			len   -= par_len;
+			partial_cnt = 0;
+		}
+
+		if (len > copy) {
+			partial_cnt = copy % 4;
+			if (partial_cnt) {
+				copy -= partial_cnt;
+				if (copy_from_user(kdata + copy, base + copy,
+				 		partial_cnt))
+					goto out_fault;
+			}
+		}
+
+		if (copy) {
+			csum = csum_and_copy_from_user(base, kdata, copy,
+							csum, &err);
+			if (err)
+				goto out;
+		}
+		len   -= copy + partial_cnt;
+		kdata += copy + partial_cnt;
+		iov++;
+	}
+        *csump = csum;
+out:
+	return err;
+
+out_fault:
+	err = -EFAULT;
+	goto out;
+}
+
+EXPORT_SYMBOL(csum_partial_copy_fromiovecend);
+EXPORT_SYMBOL(memcpy_fromiovec);
+EXPORT_SYMBOL(memcpy_fromiovecend);
+EXPORT_SYMBOL(memcpy_toiovec);
diff --git a/net/core/link_watch.c b/net/core/link_watch.c
new file mode 100644
index 00000000000..4859b7446c6
--- /dev/null
+++ b/net/core/link_watch.c
@@ -0,0 +1,137 @@
+/*
+ * Linux network device link state notification
+ *
+ * Author:
+ *     Stefan Rompf <sux@loplof.de>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ */
+
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/netdevice.h>
+#include <linux/if.h>
+#include <net/sock.h>
+#include <linux/rtnetlink.h>
+#include <linux/jiffies.h>
+#include <linux/spinlock.h>
+#include <linux/list.h>
+#include <linux/slab.h>
+#include <linux/workqueue.h>
+#include <linux/bitops.h>
+#include <asm/types.h>
+
+
+enum lw_bits {
+	LW_RUNNING = 0,
+	LW_SE_USED
+};
+
+static unsigned long linkwatch_flags;
+static unsigned long linkwatch_nextevent;
+
+static void linkwatch_event(void *dummy);
+static DECLARE_WORK(linkwatch_work, linkwatch_event, NULL);
+
+static LIST_HEAD(lweventlist);
+static DEFINE_SPINLOCK(lweventlist_lock);
+
+struct lw_event {
+	struct list_head list;
+	struct net_device *dev;
+};
+
+/* Avoid kmalloc() for most systems */
+static struct lw_event singleevent;
+
+/* Must be called with the rtnl semaphore held */
+void linkwatch_run_queue(void)
+{
+	LIST_HEAD(head);
+	struct list_head *n, *next;
+
+	spin_lock_irq(&lweventlist_lock);
+	list_splice_init(&lweventlist, &head);
+	spin_unlock_irq(&lweventlist_lock);
+
+	list_for_each_safe(n, next, &head) {
+		struct lw_event *event = list_entry(n, struct lw_event, list);
+		struct net_device *dev = event->dev;
+
+		if (event == &singleevent) {
+			clear_bit(LW_SE_USED, &linkwatch_flags);
+		} else {
+			kfree(event);
+		}
+
+		/* We are about to handle this device,
+		 * so new events can be accepted
+		 */
+		clear_bit(__LINK_STATE_LINKWATCH_PENDING, &dev->state);
+
+		if (dev->flags & IFF_UP) {
+			netdev_state_change(dev);
+		}
+
+		dev_put(dev);
+	}
+}       
+
+
+static void linkwatch_event(void *dummy)
+{
+	/* Limit the number of linkwatch events to one
+	 * per second so that a runaway driver does not
+	 * cause a storm of messages on the netlink
+	 * socket
+	 */	
+	linkwatch_nextevent = jiffies + HZ;
+	clear_bit(LW_RUNNING, &linkwatch_flags);
+
+	rtnl_shlock();
+	linkwatch_run_queue();
+	rtnl_shunlock();
+}
+
+
+void linkwatch_fire_event(struct net_device *dev)
+{
+	if (!test_and_set_bit(__LINK_STATE_LINKWATCH_PENDING, &dev->state)) {
+		unsigned long flags;
+		struct lw_event *event;
+
+		if (test_and_set_bit(LW_SE_USED, &linkwatch_flags)) {
+			event = kmalloc(sizeof(struct lw_event), GFP_ATOMIC);
+
+			if (unlikely(event == NULL)) {
+				clear_bit(__LINK_STATE_LINKWATCH_PENDING, &dev->state);
+				return;
+			}
+		} else {
+			event = &singleevent;
+		}
+
+		dev_hold(dev);
+		event->dev = dev;
+
+		spin_lock_irqsave(&lweventlist_lock, flags);
+		list_add_tail(&event->list, &lweventlist);
+		spin_unlock_irqrestore(&lweventlist_lock, flags);
+
+		if (!test_and_set_bit(LW_RUNNING, &linkwatch_flags)) {
+			unsigned long thisevent = jiffies;
+
+			if (thisevent >= linkwatch_nextevent) {
+				schedule_work(&linkwatch_work);
+			} else {
+				schedule_delayed_work(&linkwatch_work, linkwatch_nextevent - thisevent);
+			}
+		}
+	}
+}
+
+EXPORT_SYMBOL(linkwatch_fire_event);
diff --git a/net/core/neighbour.c b/net/core/neighbour.c
new file mode 100644
index 00000000000..0a2f67bbef2
--- /dev/null
+++ b/net/core/neighbour.c
@@ -0,0 +1,2362 @@
+/*
+ *	Generic address resolution entity
+ *
+ *	Authors:
+ *	Pedro Roque		<roque@di.fc.ul.pt>
+ *	Alexey Kuznetsov	<kuznet@ms2.inr.ac.ru>
+ *
+ *	This program is free software; you can redistribute it and/or
+ *      modify it under the terms of the GNU General Public License
+ *      as published by the Free Software Foundation; either version
+ *      2 of the License, or (at your option) any later version.
+ *
+ *	Fixes:
+ *	Vitaly E. Lavrov	releasing NULL neighbor in neigh_add.
+ *	Harald Welte		Add neighbour cache statistics like rtstat
+ */
+
+#include <linux/config.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/socket.h>
+#include <linux/sched.h>
+#include <linux/netdevice.h>
+#include <linux/proc_fs.h>
+#ifdef CONFIG_SYSCTL
+#include <linux/sysctl.h>
+#endif
+#include <linux/times.h>
+#include <net/neighbour.h>
+#include <net/dst.h>
+#include <net/sock.h>
+#include <linux/rtnetlink.h>
+#include <linux/random.h>
+
+#define NEIGH_DEBUG 1
+
+#define NEIGH_PRINTK(x...) printk(x)
+#define NEIGH_NOPRINTK(x...) do { ; } while(0)
+#define NEIGH_PRINTK0 NEIGH_PRINTK
+#define NEIGH_PRINTK1 NEIGH_NOPRINTK
+#define NEIGH_PRINTK2 NEIGH_NOPRINTK
+
+#if NEIGH_DEBUG >= 1
+#undef NEIGH_PRINTK1
+#define NEIGH_PRINTK1 NEIGH_PRINTK
+#endif
+#if NEIGH_DEBUG >= 2
+#undef NEIGH_PRINTK2
+#define NEIGH_PRINTK2 NEIGH_PRINTK
+#endif
+
+#define PNEIGH_HASHMASK		0xF
+
+static void neigh_timer_handler(unsigned long arg);
+#ifdef CONFIG_ARPD
+static void neigh_app_notify(struct neighbour *n);
+#endif
+static int pneigh_ifdown(struct neigh_table *tbl, struct net_device *dev);
+void neigh_changeaddr(struct neigh_table *tbl, struct net_device *dev);
+
+static struct neigh_table *neigh_tables;
+static struct file_operations neigh_stat_seq_fops;
+
+/*
+   Neighbour hash table buckets are protected with rwlock tbl->lock.
+
+   - All the scans/updates to hash buckets MUST be made under this lock.
+   - NOTHING clever should be made under this lock: no callbacks
+     to protocol backends, no attempts to send something to network.
+     It will result in deadlocks, if backend/driver wants to use neighbour
+     cache.
+   - If the entry requires some non-trivial actions, increase
+     its reference count and release table lock.
+
+   Neighbour entries are protected:
+   - with reference count.
+   - with rwlock neigh->lock
+
+   Reference count prevents destruction.
+
+   neigh->lock mainly serializes ll address data and its validity state.
+   However, the same lock is used to protect another entry fields:
+    - timer
+    - resolution queue
+
+   Again, nothing clever shall be made under neigh->lock,
+   the most complicated procedure, which we allow is dev->hard_header.
+   It is supposed, that dev->hard_header is simplistic and does
+   not make callbacks to neighbour tables.
+
+   The last lock is neigh_tbl_lock. It is pure SMP lock, protecting
+   list of neighbour tables. This list is used only in process context,
+ */
+
+static DEFINE_RWLOCK(neigh_tbl_lock);
+
+static int neigh_blackhole(struct sk_buff *skb)
+{
+	kfree_skb(skb);
+	return -ENETDOWN;
+}
+
+/*
+ * It is random distribution in the interval (1/2)*base...(3/2)*base.
+ * It corresponds to default IPv6 settings and is not overridable,
+ * because it is really reasonable choice.
+ */
+
+unsigned long neigh_rand_reach_time(unsigned long base)
+{
+	return (base ? (net_random() % base) + (base >> 1) : 0);
+}
+
+
+static int neigh_forced_gc(struct neigh_table *tbl)
+{
+	int shrunk = 0;
+	int i;
+
+	NEIGH_CACHE_STAT_INC(tbl, forced_gc_runs);
+
+	write_lock_bh(&tbl->lock);
+	for (i = 0; i <= tbl->hash_mask; i++) {
+		struct neighbour *n, **np;
+
+		np = &tbl->hash_buckets[i];
+		while ((n = *np) != NULL) {
+			/* Neighbour record may be discarded if:
+			 * - nobody refers to it.
+			 * - it is not permanent
+			 */
+			write_lock(&n->lock);
+			if (atomic_read(&n->refcnt) == 1 &&
+			    !(n->nud_state & NUD_PERMANENT)) {
+				*np	= n->next;
+				n->dead = 1;
+				shrunk	= 1;
+				write_unlock(&n->lock);
+				neigh_release(n);
+				continue;
+			}
+			write_unlock(&n->lock);
+			np = &n->next;
+		}
+	}
+
+	tbl->last_flush = jiffies;
+
+	write_unlock_bh(&tbl->lock);
+
+	return shrunk;
+}
+
+static int neigh_del_timer(struct neighbour *n)
+{
+	if ((n->nud_state & NUD_IN_TIMER) &&
+	    del_timer(&n->timer)) {
+		neigh_release(n);
+		return 1;
+	}
+	return 0;
+}
+
+static void pneigh_queue_purge(struct sk_buff_head *list)
+{
+	struct sk_buff *skb;
+
+	while ((skb = skb_dequeue(list)) != NULL) {
+		dev_put(skb->dev);
+		kfree_skb(skb);
+	}
+}
+
+void neigh_changeaddr(struct neigh_table *tbl, struct net_device *dev)
+{
+	int i;
+
+	write_lock_bh(&tbl->lock);
+
+	for (i=0; i <= tbl->hash_mask; i++) {
+		struct neighbour *n, **np;
+
+		np = &tbl->hash_buckets[i];
+		while ((n = *np) != NULL) {
+			if (dev && n->dev != dev) {
+				np = &n->next;
+				continue;
+			}
+			*np = n->next;
+			write_lock_bh(&n->lock);
+			n->dead = 1;
+			neigh_del_timer(n);
+			write_unlock_bh(&n->lock);
+			neigh_release(n);
+		}
+	}
+
+        write_unlock_bh(&tbl->lock);
+}
+
+int neigh_ifdown(struct neigh_table *tbl, struct net_device *dev)
+{
+	int i;
+
+	write_lock_bh(&tbl->lock);
+
+	for (i = 0; i <= tbl->hash_mask; i++) {
+		struct neighbour *n, **np = &tbl->hash_buckets[i];
+
+		while ((n = *np) != NULL) {
+			if (dev && n->dev != dev) {
+				np = &n->next;
+				continue;
+			}
+			*np = n->next;
+			write_lock(&n->lock);
+			neigh_del_timer(n);
+			n->dead = 1;
+
+			if (atomic_read(&n->refcnt) != 1) {
+				/* The most unpleasant situation.
+				   We must destroy neighbour entry,
+				   but someone still uses it.
+
+				   The destroy will be delayed until
+				   the last user releases us, but
+				   we must kill timers etc. and move
+				   it to safe state.
+				 */
+				skb_queue_purge(&n->arp_queue);
+				n->output = neigh_blackhole;
+				if (n->nud_state & NUD_VALID)
+					n->nud_state = NUD_NOARP;
+				else
+					n->nud_state = NUD_NONE;
+				NEIGH_PRINTK2("neigh %p is stray.\n", n);
+			}
+			write_unlock(&n->lock);
+			neigh_release(n);
+		}
+	}
+
+	pneigh_ifdown(tbl, dev);
+	write_unlock_bh(&tbl->lock);
+
+	del_timer_sync(&tbl->proxy_timer);
+	pneigh_queue_purge(&tbl->proxy_queue);
+	return 0;
+}
+
+static struct neighbour *neigh_alloc(struct neigh_table *tbl)
+{
+	struct neighbour *n = NULL;
+	unsigned long now = jiffies;
+	int entries;
+
+	entries = atomic_inc_return(&tbl->entries) - 1;
+	if (entries >= tbl->gc_thresh3 ||
+	    (entries >= tbl->gc_thresh2 &&
+	     time_after(now, tbl->last_flush + 5 * HZ))) {
+		if (!neigh_forced_gc(tbl) &&
+		    entries >= tbl->gc_thresh3)
+			goto out_entries;
+	}
+
+	n = kmem_cache_alloc(tbl->kmem_cachep, SLAB_ATOMIC);
+	if (!n)
+		goto out_entries;
+
+	memset(n, 0, tbl->entry_size);
+
+	skb_queue_head_init(&n->arp_queue);
+	rwlock_init(&n->lock);
+	n->updated	  = n->used = now;
+	n->nud_state	  = NUD_NONE;
+	n->output	  = neigh_blackhole;
+	n->parms	  = neigh_parms_clone(&tbl->parms);
+	init_timer(&n->timer);
+	n->timer.function = neigh_timer_handler;
+	n->timer.data	  = (unsigned long)n;
+
+	NEIGH_CACHE_STAT_INC(tbl, allocs);
+	n->tbl		  = tbl;
+	atomic_set(&n->refcnt, 1);
+	n->dead		  = 1;
+out:
+	return n;
+
+out_entries:
+	atomic_dec(&tbl->entries);
+	goto out;
+}
+
+static struct neighbour **neigh_hash_alloc(unsigned int entries)
+{
+	unsigned long size = entries * sizeof(struct neighbour *);
+	struct neighbour **ret;
+
+	if (size <= PAGE_SIZE) {
+		ret = kmalloc(size, GFP_ATOMIC);
+	} else {
+		ret = (struct neighbour **)
+			__get_free_pages(GFP_ATOMIC, get_order(size));
+	}
+	if (ret)
+		memset(ret, 0, size);
+
+	return ret;
+}
+
+static void neigh_hash_free(struct neighbour **hash, unsigned int entries)
+{
+	unsigned long size = entries * sizeof(struct neighbour *);
+
+	if (size <= PAGE_SIZE)
+		kfree(hash);
+	else
+		free_pages((unsigned long)hash, get_order(size));
+}
+
+static void neigh_hash_grow(struct neigh_table *tbl, unsigned long new_entries)
+{
+	struct neighbour **new_hash, **old_hash;
+	unsigned int i, new_hash_mask, old_entries;
+
+	NEIGH_CACHE_STAT_INC(tbl, hash_grows);
+
+	BUG_ON(new_entries & (new_entries - 1));
+	new_hash = neigh_hash_alloc(new_entries);
+	if (!new_hash)
+		return;
+
+	old_entries = tbl->hash_mask + 1;
+	new_hash_mask = new_entries - 1;
+	old_hash = tbl->hash_buckets;
+
+	get_random_bytes(&tbl->hash_rnd, sizeof(tbl->hash_rnd));
+	for (i = 0; i < old_entries; i++) {
+		struct neighbour *n, *next;
+
+		for (n = old_hash[i]; n; n = next) {
+			unsigned int hash_val = tbl->hash(n->primary_key, n->dev);
+
+			hash_val &= new_hash_mask;
+			next = n->next;
+
+			n->next = new_hash[hash_val];
+			new_hash[hash_val] = n;
+		}
+	}
+	tbl->hash_buckets = new_hash;
+	tbl->hash_mask = new_hash_mask;
+
+	neigh_hash_free(old_hash, old_entries);
+}
+
+struct neighbour *neigh_lookup(struct neigh_table *tbl, const void *pkey,
+			       struct net_device *dev)
+{
+	struct neighbour *n;
+	int key_len = tbl->key_len;
+	u32 hash_val = tbl->hash(pkey, dev) & tbl->hash_mask;
+	
+	NEIGH_CACHE_STAT_INC(tbl, lookups);
+
+	read_lock_bh(&tbl->lock);
+	for (n = tbl->hash_buckets[hash_val]; n; n = n->next) {
+		if (dev == n->dev && !memcmp(n->primary_key, pkey, key_len)) {
+			neigh_hold(n);
+			NEIGH_CACHE_STAT_INC(tbl, hits);
+			break;
+		}
+	}
+	read_unlock_bh(&tbl->lock);
+	return n;
+}
+
+struct neighbour *neigh_lookup_nodev(struct neigh_table *tbl, const void *pkey)
+{
+	struct neighbour *n;
+	int key_len = tbl->key_len;
+	u32 hash_val = tbl->hash(pkey, NULL) & tbl->hash_mask;
+
+	NEIGH_CACHE_STAT_INC(tbl, lookups);
+
+	read_lock_bh(&tbl->lock);
+	for (n = tbl->hash_buckets[hash_val]; n; n = n->next) {
+		if (!memcmp(n->primary_key, pkey, key_len)) {
+			neigh_hold(n);
+			NEIGH_CACHE_STAT_INC(tbl, hits);
+			break;
+		}
+	}
+	read_unlock_bh(&tbl->lock);
+	return n;
+}
+
+struct neighbour *neigh_create(struct neigh_table *tbl, const void *pkey,
+			       struct net_device *dev)
+{
+	u32 hash_val;
+	int key_len = tbl->key_len;
+	int error;
+	struct neighbour *n1, *rc, *n = neigh_alloc(tbl);
+
+	if (!n) {
+		rc = ERR_PTR(-ENOBUFS);
+		goto out;
+	}
+
+	memcpy(n->primary_key, pkey, key_len);
+	n->dev = dev;
+	dev_hold(dev);
+
+	/* Protocol specific setup. */
+	if (tbl->constructor &&	(error = tbl->constructor(n)) < 0) {
+		rc = ERR_PTR(error);
+		goto out_neigh_release;
+	}
+
+	/* Device specific setup. */
+	if (n->parms->neigh_setup &&
+	    (error = n->parms->neigh_setup(n)) < 0) {
+		rc = ERR_PTR(error);
+		goto out_neigh_release;
+	}
+
+	n->confirmed = jiffies - (n->parms->base_reachable_time << 1);
+
+	write_lock_bh(&tbl->lock);
+
+	if (atomic_read(&tbl->entries) > (tbl->hash_mask + 1))
+		neigh_hash_grow(tbl, (tbl->hash_mask + 1) << 1);
+
+	hash_val = tbl->hash(pkey, dev) & tbl->hash_mask;
+
+	if (n->parms->dead) {
+		rc = ERR_PTR(-EINVAL);
+		goto out_tbl_unlock;
+	}
+
+	for (n1 = tbl->hash_buckets[hash_val]; n1; n1 = n1->next) {
+		if (dev == n1->dev && !memcmp(n1->primary_key, pkey, key_len)) {
+			neigh_hold(n1);
+			rc = n1;
+			goto out_tbl_unlock;
+		}
+	}
+
+	n->next = tbl->hash_buckets[hash_val];
+	tbl->hash_buckets[hash_val] = n;
+	n->dead = 0;
+	neigh_hold(n);
+	write_unlock_bh(&tbl->lock);
+	NEIGH_PRINTK2("neigh %p is created.\n", n);
+	rc = n;
+out:
+	return rc;
+out_tbl_unlock:
+	write_unlock_bh(&tbl->lock);
+out_neigh_release:
+	neigh_release(n);
+	goto out;
+}
+
+struct pneigh_entry * pneigh_lookup(struct neigh_table *tbl, const void *pkey,
+				    struct net_device *dev, int creat)
+{
+	struct pneigh_entry *n;
+	int key_len = tbl->key_len;
+	u32 hash_val = *(u32 *)(pkey + key_len - 4);
+
+	hash_val ^= (hash_val >> 16);
+	hash_val ^= hash_val >> 8;
+	hash_val ^= hash_val >> 4;
+	hash_val &= PNEIGH_HASHMASK;
+
+	read_lock_bh(&tbl->lock);
+
+	for (n = tbl->phash_buckets[hash_val]; n; n = n->next) {
+		if (!memcmp(n->key, pkey, key_len) &&
+		    (n->dev == dev || !n->dev)) {
+			read_unlock_bh(&tbl->lock);
+			goto out;
+		}
+	}
+	read_unlock_bh(&tbl->lock);
+	n = NULL;
+	if (!creat)
+		goto out;
+
+	n = kmalloc(sizeof(*n) + key_len, GFP_KERNEL);
+	if (!n)
+		goto out;
+
+	memcpy(n->key, pkey, key_len);
+	n->dev = dev;
+	if (dev)
+		dev_hold(dev);
+
+	if (tbl->pconstructor && tbl->pconstructor(n)) {
+		if (dev)
+			dev_put(dev);
+		kfree(n);
+		n = NULL;
+		goto out;
+	}
+
+	write_lock_bh(&tbl->lock);
+	n->next = tbl->phash_buckets[hash_val];
+	tbl->phash_buckets[hash_val] = n;
+	write_unlock_bh(&tbl->lock);
+out:
+	return n;
+}
+
+
+int pneigh_delete(struct neigh_table *tbl, const void *pkey,
+		  struct net_device *dev)
+{
+	struct pneigh_entry *n, **np;
+	int key_len = tbl->key_len;
+	u32 hash_val = *(u32 *)(pkey + key_len - 4);
+
+	hash_val ^= (hash_val >> 16);
+	hash_val ^= hash_val >> 8;
+	hash_val ^= hash_val >> 4;
+	hash_val &= PNEIGH_HASHMASK;
+
+	write_lock_bh(&tbl->lock);
+	for (np = &tbl->phash_buckets[hash_val]; (n = *np) != NULL;
+	     np = &n->next) {
+		if (!memcmp(n->key, pkey, key_len) && n->dev == dev) {
+			*np = n->next;
+			write_unlock_bh(&tbl->lock);
+			if (tbl->pdestructor)
+				tbl->pdestructor(n);
+			if (n->dev)
+				dev_put(n->dev);
+			kfree(n);
+			return 0;
+		}
+	}
+	write_unlock_bh(&tbl->lock);
+	return -ENOENT;
+}
+
+static int pneigh_ifdown(struct neigh_table *tbl, struct net_device *dev)
+{
+	struct pneigh_entry *n, **np;
+	u32 h;
+
+	for (h = 0; h <= PNEIGH_HASHMASK; h++) {
+		np = &tbl->phash_buckets[h];
+		while ((n = *np) != NULL) {
+			if (!dev || n->dev == dev) {
+				*np = n->next;
+				if (tbl->pdestructor)
+					tbl->pdestructor(n);
+				if (n->dev)
+					dev_put(n->dev);
+				kfree(n);
+				continue;
+			}
+			np = &n->next;
+		}
+	}
+	return -ENOENT;
+}
+
+
+/*
+ *	neighbour must already be out of the table;
+ *
+ */
+void neigh_destroy(struct neighbour *neigh)
+{
+	struct hh_cache *hh;
+
+	NEIGH_CACHE_STAT_INC(neigh->tbl, destroys);
+
+	if (!neigh->dead) {
+		printk(KERN_WARNING
+		       "Destroying alive neighbour %p\n", neigh);
+		dump_stack();
+		return;
+	}
+
+	if (neigh_del_timer(neigh))
+		printk(KERN_WARNING "Impossible event.\n");
+
+	while ((hh = neigh->hh) != NULL) {
+		neigh->hh = hh->hh_next;
+		hh->hh_next = NULL;
+		write_lock_bh(&hh->hh_lock);
+		hh->hh_output = neigh_blackhole;
+		write_unlock_bh(&hh->hh_lock);
+		if (atomic_dec_and_test(&hh->hh_refcnt))
+			kfree(hh);
+	}
+
+	if (neigh->ops && neigh->ops->destructor)
+		(neigh->ops->destructor)(neigh);
+
+	skb_queue_purge(&neigh->arp_queue);
+
+	dev_put(neigh->dev);
+	neigh_parms_put(neigh->parms);
+
+	NEIGH_PRINTK2("neigh %p is destroyed.\n", neigh);
+
+	atomic_dec(&neigh->tbl->entries);
+	kmem_cache_free(neigh->tbl->kmem_cachep, neigh);
+}
+
+/* Neighbour state is suspicious;
+   disable fast path.
+
+   Called with write_locked neigh.
+ */
+static void neigh_suspect(struct neighbour *neigh)
+{
+	struct hh_cache *hh;
+
+	NEIGH_PRINTK2("neigh %p is suspected.\n", neigh);
+
+	neigh->output = neigh->ops->output;
+
+	for (hh = neigh->hh; hh; hh = hh->hh_next)
+		hh->hh_output = neigh->ops->output;
+}
+
+/* Neighbour state is OK;
+   enable fast path.
+
+   Called with write_locked neigh.
+ */
+static void neigh_connect(struct neighbour *neigh)
+{
+	struct hh_cache *hh;
+
+	NEIGH_PRINTK2("neigh %p is connected.\n", neigh);
+
+	neigh->output = neigh->ops->connected_output;
+
+	for (hh = neigh->hh; hh; hh = hh->hh_next)
+		hh->hh_output = neigh->ops->hh_output;
+}
+
+static void neigh_periodic_timer(unsigned long arg)
+{
+	struct neigh_table *tbl = (struct neigh_table *)arg;
+	struct neighbour *n, **np;
+	unsigned long expire, now = jiffies;
+
+	NEIGH_CACHE_STAT_INC(tbl, periodic_gc_runs);
+
+	write_lock(&tbl->lock);
+
+	/*
+	 *	periodically recompute ReachableTime from random function
+	 */
+
+	if (time_after(now, tbl->last_rand + 300 * HZ)) {
+		struct neigh_parms *p;
+		tbl->last_rand = now;
+		for (p = &tbl->parms; p; p = p->next)
+			p->reachable_time =
+				neigh_rand_reach_time(p->base_reachable_time);
+	}
+
+	np = &tbl->hash_buckets[tbl->hash_chain_gc];
+	tbl->hash_chain_gc = ((tbl->hash_chain_gc + 1) & tbl->hash_mask);
+
+	while ((n = *np) != NULL) {
+		unsigned int state;
+
+		write_lock(&n->lock);
+
+		state = n->nud_state;
+		if (state & (NUD_PERMANENT | NUD_IN_TIMER)) {
+			write_unlock(&n->lock);
+			goto next_elt;
+		}
+
+		if (time_before(n->used, n->confirmed))
+			n->used = n->confirmed;
+
+		if (atomic_read(&n->refcnt) == 1 &&
+		    (state == NUD_FAILED ||
+		     time_after(now, n->used + n->parms->gc_staletime))) {
+			*np = n->next;
+			n->dead = 1;
+			write_unlock(&n->lock);
+			neigh_release(n);
+			continue;
+		}
+		write_unlock(&n->lock);
+
+next_elt:
+		np = &n->next;
+	}
+
+ 	/* Cycle through all hash buckets every base_reachable_time/2 ticks.
+ 	 * ARP entry timeouts range from 1/2 base_reachable_time to 3/2
+ 	 * base_reachable_time.
+	 */
+	expire = tbl->parms.base_reachable_time >> 1;
+	expire /= (tbl->hash_mask + 1);
+	if (!expire)
+		expire = 1;
+
+ 	mod_timer(&tbl->gc_timer, now + expire);
+
+	write_unlock(&tbl->lock);
+}
+
+static __inline__ int neigh_max_probes(struct neighbour *n)
+{
+	struct neigh_parms *p = n->parms;
+	return (n->nud_state & NUD_PROBE ?
+		p->ucast_probes :
+		p->ucast_probes + p->app_probes + p->mcast_probes);
+}
+
+
+/* Called when a timer expires for a neighbour entry. */
+
+static void neigh_timer_handler(unsigned long arg)
+{
+	unsigned long now, next;
+	struct neighbour *neigh = (struct neighbour *)arg;
+	unsigned state;
+	int notify = 0;
+
+	write_lock(&neigh->lock);
+
+	state = neigh->nud_state;
+	now = jiffies;
+	next = now + HZ;
+
+	if (!(state & NUD_IN_TIMER)) {
+#ifndef CONFIG_SMP
+		printk(KERN_WARNING "neigh: timer & !nud_in_timer\n");
+#endif
+		goto out;
+	}
+
+	if (state & NUD_REACHABLE) {
+		if (time_before_eq(now, 
+				   neigh->confirmed + neigh->parms->reachable_time)) {
+			NEIGH_PRINTK2("neigh %p is still alive.\n", neigh);
+			next = neigh->confirmed + neigh->parms->reachable_time;
+		} else if (time_before_eq(now,
+					  neigh->used + neigh->parms->delay_probe_time)) {
+			NEIGH_PRINTK2("neigh %p is delayed.\n", neigh);
+			neigh->nud_state = NUD_DELAY;
+			neigh_suspect(neigh);
+			next = now + neigh->parms->delay_probe_time;
+		} else {
+			NEIGH_PRINTK2("neigh %p is suspected.\n", neigh);
+			neigh->nud_state = NUD_STALE;
+			neigh_suspect(neigh);
+		}
+	} else if (state & NUD_DELAY) {
+		if (time_before_eq(now, 
+				   neigh->confirmed + neigh->parms->delay_probe_time)) {
+			NEIGH_PRINTK2("neigh %p is now reachable.\n", neigh);
+			neigh->nud_state = NUD_REACHABLE;
+			neigh_connect(neigh);
+			next = neigh->confirmed + neigh->parms->reachable_time;
+		} else {
+			NEIGH_PRINTK2("neigh %p is probed.\n", neigh);
+			neigh->nud_state = NUD_PROBE;
+			atomic_set(&neigh->probes, 0);
+			next = now + neigh->parms->retrans_time;
+		}
+	} else {
+		/* NUD_PROBE|NUD_INCOMPLETE */
+		next = now + neigh->parms->retrans_time;
+	}
+
+	if ((neigh->nud_state & (NUD_INCOMPLETE | NUD_PROBE)) &&
+	    atomic_read(&neigh->probes) >= neigh_max_probes(neigh)) {
+		struct sk_buff *skb;
+
+		neigh->nud_state = NUD_FAILED;
+		notify = 1;
+		NEIGH_CACHE_STAT_INC(neigh->tbl, res_failed);
+		NEIGH_PRINTK2("neigh %p is failed.\n", neigh);
+
+		/* It is very thin place. report_unreachable is very complicated
+		   routine. Particularly, it can hit the same neighbour entry!
+
+		   So that, we try to be accurate and avoid dead loop. --ANK
+		 */
+		while (neigh->nud_state == NUD_FAILED &&
+		       (skb = __skb_dequeue(&neigh->arp_queue)) != NULL) {
+			write_unlock(&neigh->lock);
+			neigh->ops->error_report(neigh, skb);
+			write_lock(&neigh->lock);
+		}
+		skb_queue_purge(&neigh->arp_queue);
+	}
+
+	if (neigh->nud_state & NUD_IN_TIMER) {
+		neigh_hold(neigh);
+		if (time_before(next, jiffies + HZ/2))
+			next = jiffies + HZ/2;
+		neigh->timer.expires = next;
+		add_timer(&neigh->timer);
+	}
+	if (neigh->nud_state & (NUD_INCOMPLETE | NUD_PROBE)) {
+		struct sk_buff *skb = skb_peek(&neigh->arp_queue);
+		/* keep skb alive even if arp_queue overflows */
+		if (skb)
+			skb_get(skb);
+		write_unlock(&neigh->lock);
+		neigh->ops->solicit(neigh, skb);
+		atomic_inc(&neigh->probes);
+		if (skb)
+			kfree_skb(skb);
+	} else {
+out:
+		write_unlock(&neigh->lock);
+	}
+
+#ifdef CONFIG_ARPD
+	if (notify && neigh->parms->app_probes)
+		neigh_app_notify(neigh);
+#endif
+	neigh_release(neigh);
+}
+
+int __neigh_event_send(struct neighbour *neigh, struct sk_buff *skb)
+{
+	int rc;
+	unsigned long now;
+
+	write_lock_bh(&neigh->lock);
+
+	rc = 0;
+	if (neigh->nud_state & (NUD_CONNECTED | NUD_DELAY | NUD_PROBE))
+		goto out_unlock_bh;
+
+	now = jiffies;
+	
+	if (!(neigh->nud_state & (NUD_STALE | NUD_INCOMPLETE))) {
+		if (neigh->parms->mcast_probes + neigh->parms->app_probes) {
+			atomic_set(&neigh->probes, neigh->parms->ucast_probes);
+			neigh->nud_state     = NUD_INCOMPLETE;
+			neigh_hold(neigh);
+			neigh->timer.expires = now + 1;
+			add_timer(&neigh->timer);
+		} else {
+			neigh->nud_state = NUD_FAILED;
+			write_unlock_bh(&neigh->lock);
+
+			if (skb)
+				kfree_skb(skb);
+			return 1;
+		}
+	} else if (neigh->nud_state & NUD_STALE) {
+		NEIGH_PRINTK2("neigh %p is delayed.\n", neigh);
+		neigh_hold(neigh);
+		neigh->nud_state = NUD_DELAY;
+		neigh->timer.expires = jiffies + neigh->parms->delay_probe_time;
+		add_timer(&neigh->timer);
+	}
+
+	if (neigh->nud_state == NUD_INCOMPLETE) {
+		if (skb) {
+			if (skb_queue_len(&neigh->arp_queue) >=
+			    neigh->parms->queue_len) {
+				struct sk_buff *buff;
+				buff = neigh->arp_queue.next;
+				__skb_unlink(buff, &neigh->arp_queue);
+				kfree_skb(buff);
+			}
+			__skb_queue_tail(&neigh->arp_queue, skb);
+		}
+		rc = 1;
+	}
+out_unlock_bh:
+	write_unlock_bh(&neigh->lock);
+	return rc;
+}
+
+static __inline__ void neigh_update_hhs(struct neighbour *neigh)
+{
+	struct hh_cache *hh;
+	void (*update)(struct hh_cache*, struct net_device*, unsigned char *) =
+		neigh->dev->header_cache_update;
+
+	if (update) {
+		for (hh = neigh->hh; hh; hh = hh->hh_next) {
+			write_lock_bh(&hh->hh_lock);
+			update(hh, neigh->dev, neigh->ha);
+			write_unlock_bh(&hh->hh_lock);
+		}
+	}
+}
+
+
+
+/* Generic update routine.
+   -- lladdr is new lladdr or NULL, if it is not supplied.
+   -- new    is new state.
+   -- flags
+	NEIGH_UPDATE_F_OVERRIDE allows to override existing lladdr,
+				if it is different.
+	NEIGH_UPDATE_F_WEAK_OVERRIDE will suspect existing "connected"
+				lladdr instead of overriding it 
+				if it is different.
+				It also allows to retain current state
+				if lladdr is unchanged.
+	NEIGH_UPDATE_F_ADMIN	means that the change is administrative.
+
+	NEIGH_UPDATE_F_OVERRIDE_ISROUTER allows to override existing 
+				NTF_ROUTER flag.
+	NEIGH_UPDATE_F_ISROUTER	indicates if the neighbour is known as
+				a router.
+
+   Caller MUST hold reference count on the entry.
+ */
+
+int neigh_update(struct neighbour *neigh, const u8 *lladdr, u8 new,
+		 u32 flags)
+{
+	u8 old;
+	int err;
+#ifdef CONFIG_ARPD
+	int notify = 0;
+#endif
+	struct net_device *dev;
+	int update_isrouter = 0;
+
+	write_lock_bh(&neigh->lock);
+
+	dev    = neigh->dev;
+	old    = neigh->nud_state;
+	err    = -EPERM;
+
+	if (!(flags & NEIGH_UPDATE_F_ADMIN) && 
+	    (old & (NUD_NOARP | NUD_PERMANENT)))
+		goto out;
+
+	if (!(new & NUD_VALID)) {
+		neigh_del_timer(neigh);
+		if (old & NUD_CONNECTED)
+			neigh_suspect(neigh);
+		neigh->nud_state = new;
+		err = 0;
+#ifdef CONFIG_ARPD
+		notify = old & NUD_VALID;
+#endif
+		goto out;
+	}
+
+	/* Compare new lladdr with cached one */
+	if (!dev->addr_len) {
+		/* First case: device needs no address. */
+		lladdr = neigh->ha;
+	} else if (lladdr) {
+		/* The second case: if something is already cached
+		   and a new address is proposed:
+		   - compare new & old
+		   - if they are different, check override flag
+		 */
+		if ((old & NUD_VALID) && 
+		    !memcmp(lladdr, neigh->ha, dev->addr_len))
+			lladdr = neigh->ha;
+	} else {
+		/* No address is supplied; if we know something,
+		   use it, otherwise discard the request.
+		 */
+		err = -EINVAL;
+		if (!(old & NUD_VALID))
+			goto out;
+		lladdr = neigh->ha;
+	}
+
+	if (new & NUD_CONNECTED)
+		neigh->confirmed = jiffies;
+	neigh->updated = jiffies;
+
+	/* If entry was valid and address is not changed,
+	   do not change entry state, if new one is STALE.
+	 */
+	err = 0;
+	update_isrouter = flags & NEIGH_UPDATE_F_OVERRIDE_ISROUTER;
+	if (old & NUD_VALID) {
+		if (lladdr != neigh->ha && !(flags & NEIGH_UPDATE_F_OVERRIDE)) {
+			update_isrouter = 0;
+			if ((flags & NEIGH_UPDATE_F_WEAK_OVERRIDE) &&
+			    (old & NUD_CONNECTED)) {
+				lladdr = neigh->ha;
+				new = NUD_STALE;
+			} else
+				goto out;
+		} else {
+			if (lladdr == neigh->ha && new == NUD_STALE &&
+			    ((flags & NEIGH_UPDATE_F_WEAK_OVERRIDE) ||
+			     (old & NUD_CONNECTED))
+			    )
+				new = old;
+		}
+	}
+
+	if (new != old) {
+		neigh_del_timer(neigh);
+		if (new & NUD_IN_TIMER) {
+			neigh_hold(neigh);
+			neigh->timer.expires = jiffies + 
+						((new & NUD_REACHABLE) ? 
+						 neigh->parms->reachable_time : 0);
+			add_timer(&neigh->timer);
+		}
+		neigh->nud_state = new;
+	}
+
+	if (lladdr != neigh->ha) {
+		memcpy(&neigh->ha, lladdr, dev->addr_len);
+		neigh_update_hhs(neigh);
+		if (!(new & NUD_CONNECTED))
+			neigh->confirmed = jiffies -
+				      (neigh->parms->base_reachable_time << 1);
+#ifdef CONFIG_ARPD
+		notify = 1;
+#endif
+	}
+	if (new == old)
+		goto out;
+	if (new & NUD_CONNECTED)
+		neigh_connect(neigh);
+	else
+		neigh_suspect(neigh);
+	if (!(old & NUD_VALID)) {
+		struct sk_buff *skb;
+
+		/* Again: avoid dead loop if something went wrong */
+
+		while (neigh->nud_state & NUD_VALID &&
+		       (skb = __skb_dequeue(&neigh->arp_queue)) != NULL) {
+			struct neighbour *n1 = neigh;
+			write_unlock_bh(&neigh->lock);
+			/* On shaper/eql skb->dst->neighbour != neigh :( */
+			if (skb->dst && skb->dst->neighbour)
+				n1 = skb->dst->neighbour;
+			n1->output(skb);
+			write_lock_bh(&neigh->lock);
+		}
+		skb_queue_purge(&neigh->arp_queue);
+	}
+out:
+	if (update_isrouter) {
+		neigh->flags = (flags & NEIGH_UPDATE_F_ISROUTER) ?
+			(neigh->flags | NTF_ROUTER) :
+			(neigh->flags & ~NTF_ROUTER);
+	}
+	write_unlock_bh(&neigh->lock);
+#ifdef CONFIG_ARPD
+	if (notify && neigh->parms->app_probes)
+		neigh_app_notify(neigh);
+#endif
+	return err;
+}
+
+struct neighbour *neigh_event_ns(struct neigh_table *tbl,
+				 u8 *lladdr, void *saddr,
+				 struct net_device *dev)
+{
+	struct neighbour *neigh = __neigh_lookup(tbl, saddr, dev,
+						 lladdr || !dev->addr_len);
+	if (neigh)
+		neigh_update(neigh, lladdr, NUD_STALE, 
+			     NEIGH_UPDATE_F_OVERRIDE);
+	return neigh;
+}
+
+static void neigh_hh_init(struct neighbour *n, struct dst_entry *dst,
+			  u16 protocol)
+{
+	struct hh_cache	*hh;
+	struct net_device *dev = dst->dev;
+
+	for (hh = n->hh; hh; hh = hh->hh_next)
+		if (hh->hh_type == protocol)
+			break;
+
+	if (!hh && (hh = kmalloc(sizeof(*hh), GFP_ATOMIC)) != NULL) {
+		memset(hh, 0, sizeof(struct hh_cache));
+		rwlock_init(&hh->hh_lock);
+		hh->hh_type = protocol;
+		atomic_set(&hh->hh_refcnt, 0);
+		hh->hh_next = NULL;
+		if (dev->hard_header_cache(n, hh)) {
+			kfree(hh);
+			hh = NULL;
+		} else {
+			atomic_inc(&hh->hh_refcnt);
+			hh->hh_next = n->hh;
+			n->hh	    = hh;
+			if (n->nud_state & NUD_CONNECTED)
+				hh->hh_output = n->ops->hh_output;
+			else
+				hh->hh_output = n->ops->output;
+		}
+	}
+	if (hh)	{
+		atomic_inc(&hh->hh_refcnt);
+		dst->hh = hh;
+	}
+}
+
+/* This function can be used in contexts, where only old dev_queue_xmit
+   worked, f.e. if you want to override normal output path (eql, shaper),
+   but resolution is not made yet.
+ */
+
+int neigh_compat_output(struct sk_buff *skb)
+{
+	struct net_device *dev = skb->dev;
+
+	__skb_pull(skb, skb->nh.raw - skb->data);
+
+	if (dev->hard_header &&
+	    dev->hard_header(skb, dev, ntohs(skb->protocol), NULL, NULL,
+		    	     skb->len) < 0 &&
+	    dev->rebuild_header(skb))
+		return 0;
+
+	return dev_queue_xmit(skb);
+}
+
+/* Slow and careful. */
+
+int neigh_resolve_output(struct sk_buff *skb)
+{
+	struct dst_entry *dst = skb->dst;
+	struct neighbour *neigh;
+	int rc = 0;
+
+	if (!dst || !(neigh = dst->neighbour))
+		goto discard;
+
+	__skb_pull(skb, skb->nh.raw - skb->data);
+
+	if (!neigh_event_send(neigh, skb)) {
+		int err;
+		struct net_device *dev = neigh->dev;
+		if (dev->hard_header_cache && !dst->hh) {
+			write_lock_bh(&neigh->lock);
+			if (!dst->hh)
+				neigh_hh_init(neigh, dst, dst->ops->protocol);
+			err = dev->hard_header(skb, dev, ntohs(skb->protocol),
+					       neigh->ha, NULL, skb->len);
+			write_unlock_bh(&neigh->lock);
+		} else {
+			read_lock_bh(&neigh->lock);
+			err = dev->hard_header(skb, dev, ntohs(skb->protocol),
+					       neigh->ha, NULL, skb->len);
+			read_unlock_bh(&neigh->lock);
+		}
+		if (err >= 0)
+			rc = neigh->ops->queue_xmit(skb);
+		else
+			goto out_kfree_skb;
+	}
+out:
+	return rc;
+discard:
+	NEIGH_PRINTK1("neigh_resolve_output: dst=%p neigh=%p\n",
+		      dst, dst ? dst->neighbour : NULL);
+out_kfree_skb:
+	rc = -EINVAL;
+	kfree_skb(skb);
+	goto out;
+}
+
+/* As fast as possible without hh cache */
+
+int neigh_connected_output(struct sk_buff *skb)
+{
+	int err;
+	struct dst_entry *dst = skb->dst;
+	struct neighbour *neigh = dst->neighbour;
+	struct net_device *dev = neigh->dev;
+
+	__skb_pull(skb, skb->nh.raw - skb->data);
+
+	read_lock_bh(&neigh->lock);
+	err = dev->hard_header(skb, dev, ntohs(skb->protocol),
+			       neigh->ha, NULL, skb->len);
+	read_unlock_bh(&neigh->lock);
+	if (err >= 0)
+		err = neigh->ops->queue_xmit(skb);
+	else {
+		err = -EINVAL;
+		kfree_skb(skb);
+	}
+	return err;
+}
+
+static void neigh_proxy_process(unsigned long arg)
+{
+	struct neigh_table *tbl = (struct neigh_table *)arg;
+	long sched_next = 0;
+	unsigned long now = jiffies;
+	struct sk_buff *skb;
+
+	spin_lock(&tbl->proxy_queue.lock);
+
+	skb = tbl->proxy_queue.next;
+
+	while (skb != (struct sk_buff *)&tbl->proxy_queue) {
+		struct sk_buff *back = skb;
+		long tdif = back->stamp.tv_usec - now;
+
+		skb = skb->next;
+		if (tdif <= 0) {
+			struct net_device *dev = back->dev;
+			__skb_unlink(back, &tbl->proxy_queue);
+			if (tbl->proxy_redo && netif_running(dev))
+				tbl->proxy_redo(back);
+			else
+				kfree_skb(back);
+
+			dev_put(dev);
+		} else if (!sched_next || tdif < sched_next)
+			sched_next = tdif;
+	}
+	del_timer(&tbl->proxy_timer);
+	if (sched_next)
+		mod_timer(&tbl->proxy_timer, jiffies + sched_next);
+	spin_unlock(&tbl->proxy_queue.lock);
+}
+
+void pneigh_enqueue(struct neigh_table *tbl, struct neigh_parms *p,
+		    struct sk_buff *skb)
+{
+	unsigned long now = jiffies;
+	unsigned long sched_next = now + (net_random() % p->proxy_delay);
+
+	if (tbl->proxy_queue.qlen > p->proxy_qlen) {
+		kfree_skb(skb);
+		return;
+	}
+	skb->stamp.tv_sec  = LOCALLY_ENQUEUED;
+	skb->stamp.tv_usec = sched_next;
+
+	spin_lock(&tbl->proxy_queue.lock);
+	if (del_timer(&tbl->proxy_timer)) {
+		if (time_before(tbl->proxy_timer.expires, sched_next))
+			sched_next = tbl->proxy_timer.expires;
+	}
+	dst_release(skb->dst);
+	skb->dst = NULL;
+	dev_hold(skb->dev);
+	__skb_queue_tail(&tbl->proxy_queue, skb);
+	mod_timer(&tbl->proxy_timer, sched_next);
+	spin_unlock(&tbl->proxy_queue.lock);
+}
+
+
+struct neigh_parms *neigh_parms_alloc(struct net_device *dev,
+				      struct neigh_table *tbl)
+{
+	struct neigh_parms *p = kmalloc(sizeof(*p), GFP_KERNEL);
+
+	if (p) {
+		memcpy(p, &tbl->parms, sizeof(*p));
+		p->tbl		  = tbl;
+		atomic_set(&p->refcnt, 1);
+		INIT_RCU_HEAD(&p->rcu_head);
+		p->reachable_time =
+				neigh_rand_reach_time(p->base_reachable_time);
+		if (dev && dev->neigh_setup && dev->neigh_setup(dev, p)) {
+			kfree(p);
+			return NULL;
+		}
+		p->sysctl_table = NULL;
+		write_lock_bh(&tbl->lock);
+		p->next		= tbl->parms.next;
+		tbl->parms.next = p;
+		write_unlock_bh(&tbl->lock);
+	}
+	return p;
+}
+
+static void neigh_rcu_free_parms(struct rcu_head *head)
+{
+	struct neigh_parms *parms =
+		container_of(head, struct neigh_parms, rcu_head);
+
+	neigh_parms_put(parms);
+}
+
+void neigh_parms_release(struct neigh_table *tbl, struct neigh_parms *parms)
+{
+	struct neigh_parms **p;
+
+	if (!parms || parms == &tbl->parms)
+		return;
+	write_lock_bh(&tbl->lock);
+	for (p = &tbl->parms.next; *p; p = &(*p)->next) {
+		if (*p == parms) {
+			*p = parms->next;
+			parms->dead = 1;
+			write_unlock_bh(&tbl->lock);
+			call_rcu(&parms->rcu_head, neigh_rcu_free_parms);
+			return;
+		}
+	}
+	write_unlock_bh(&tbl->lock);
+	NEIGH_PRINTK1("neigh_parms_release: not found\n");
+}
+
+void neigh_parms_destroy(struct neigh_parms *parms)
+{
+	kfree(parms);
+}
+
+
+void neigh_table_init(struct neigh_table *tbl)
+{
+	unsigned long now = jiffies;
+	unsigned long phsize;
+
+	atomic_set(&tbl->parms.refcnt, 1);
+	INIT_RCU_HEAD(&tbl->parms.rcu_head);
+	tbl->parms.reachable_time =
+			  neigh_rand_reach_time(tbl->parms.base_reachable_time);
+
+	if (!tbl->kmem_cachep)
+		tbl->kmem_cachep = kmem_cache_create(tbl->id,
+						     tbl->entry_size,
+						     0, SLAB_HWCACHE_ALIGN,
+						     NULL, NULL);
+
+	if (!tbl->kmem_cachep)
+		panic("cannot create neighbour cache");
+
+	tbl->stats = alloc_percpu(struct neigh_statistics);
+	if (!tbl->stats)
+		panic("cannot create neighbour cache statistics");
+	
+#ifdef CONFIG_PROC_FS
+	tbl->pde = create_proc_entry(tbl->id, 0, proc_net_stat);
+	if (!tbl->pde) 
+		panic("cannot create neighbour proc dir entry");
+	tbl->pde->proc_fops = &neigh_stat_seq_fops;
+	tbl->pde->data = tbl;
+#endif
+
+	tbl->hash_mask = 1;
+	tbl->hash_buckets = neigh_hash_alloc(tbl->hash_mask + 1);
+
+	phsize = (PNEIGH_HASHMASK + 1) * sizeof(struct pneigh_entry *);
+	tbl->phash_buckets = kmalloc(phsize, GFP_KERNEL);
+
+	if (!tbl->hash_buckets || !tbl->phash_buckets)
+		panic("cannot allocate neighbour cache hashes");
+
+	memset(tbl->phash_buckets, 0, phsize);
+
+	get_random_bytes(&tbl->hash_rnd, sizeof(tbl->hash_rnd));
+
+	rwlock_init(&tbl->lock);
+	init_timer(&tbl->gc_timer);
+	tbl->gc_timer.data     = (unsigned long)tbl;
+	tbl->gc_timer.function = neigh_periodic_timer;
+	tbl->gc_timer.expires  = now + 1;
+	add_timer(&tbl->gc_timer);
+
+	init_timer(&tbl->proxy_timer);
+	tbl->proxy_timer.data	  = (unsigned long)tbl;
+	tbl->proxy_timer.function = neigh_proxy_process;
+	skb_queue_head_init(&tbl->proxy_queue);
+
+	tbl->last_flush = now;
+	tbl->last_rand	= now + tbl->parms.reachable_time * 20;
+	write_lock(&neigh_tbl_lock);
+	tbl->next	= neigh_tables;
+	neigh_tables	= tbl;
+	write_unlock(&neigh_tbl_lock);
+}
+
+int neigh_table_clear(struct neigh_table *tbl)
+{
+	struct neigh_table **tp;
+
+	/* It is not clean... Fix it to unload IPv6 module safely */
+	del_timer_sync(&tbl->gc_timer);
+	del_timer_sync(&tbl->proxy_timer);
+	pneigh_queue_purge(&tbl->proxy_queue);
+	neigh_ifdown(tbl, NULL);
+	if (atomic_read(&tbl->entries))
+		printk(KERN_CRIT "neighbour leakage\n");
+	write_lock(&neigh_tbl_lock);
+	for (tp = &neigh_tables; *tp; tp = &(*tp)->next) {
+		if (*tp == tbl) {
+			*tp = tbl->next;
+			break;
+		}
+	}
+	write_unlock(&neigh_tbl_lock);
+
+	neigh_hash_free(tbl->hash_buckets, tbl->hash_mask + 1);
+	tbl->hash_buckets = NULL;
+
+	kfree(tbl->phash_buckets);
+	tbl->phash_buckets = NULL;
+
+	return 0;
+}
+
+int neigh_delete(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
+{
+	struct ndmsg *ndm = NLMSG_DATA(nlh);
+	struct rtattr **nda = arg;
+	struct neigh_table *tbl;
+	struct net_device *dev = NULL;
+	int err = -ENODEV;
+
+	if (ndm->ndm_ifindex &&
+	    (dev = dev_get_by_index(ndm->ndm_ifindex)) == NULL)
+		goto out;
+
+	read_lock(&neigh_tbl_lock);
+	for (tbl = neigh_tables; tbl; tbl = tbl->next) {
+		struct rtattr *dst_attr = nda[NDA_DST - 1];
+		struct neighbour *n;
+
+		if (tbl->family != ndm->ndm_family)
+			continue;
+		read_unlock(&neigh_tbl_lock);
+
+		err = -EINVAL;
+		if (!dst_attr || RTA_PAYLOAD(dst_attr) < tbl->key_len)
+			goto out_dev_put;
+
+		if (ndm->ndm_flags & NTF_PROXY) {
+			err = pneigh_delete(tbl, RTA_DATA(dst_attr), dev);
+			goto out_dev_put;
+		}
+
+		if (!dev)
+			goto out;
+
+		n = neigh_lookup(tbl, RTA_DATA(dst_attr), dev);
+		if (n) {
+			err = neigh_update(n, NULL, NUD_FAILED, 
+					   NEIGH_UPDATE_F_OVERRIDE|
+					   NEIGH_UPDATE_F_ADMIN);
+			neigh_release(n);
+		}
+		goto out_dev_put;
+	}
+	read_unlock(&neigh_tbl_lock);
+	err = -EADDRNOTAVAIL;
+out_dev_put:
+	if (dev)
+		dev_put(dev);
+out:
+	return err;
+}
+
+int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
+{
+	struct ndmsg *ndm = NLMSG_DATA(nlh);
+	struct rtattr **nda = arg;
+	struct neigh_table *tbl;
+	struct net_device *dev = NULL;
+	int err = -ENODEV;
+
+	if (ndm->ndm_ifindex &&
+	    (dev = dev_get_by_index(ndm->ndm_ifindex)) == NULL)
+		goto out;
+
+	read_lock(&neigh_tbl_lock);
+	for (tbl = neigh_tables; tbl; tbl = tbl->next) {
+		struct rtattr *lladdr_attr = nda[NDA_LLADDR - 1];
+		struct rtattr *dst_attr = nda[NDA_DST - 1];
+		int override = 1;
+		struct neighbour *n;
+
+		if (tbl->family != ndm->ndm_family)
+			continue;
+		read_unlock(&neigh_tbl_lock);
+
+		err = -EINVAL;
+		if (!dst_attr || RTA_PAYLOAD(dst_attr) < tbl->key_len)
+			goto out_dev_put;
+
+		if (ndm->ndm_flags & NTF_PROXY) {
+			err = -ENOBUFS;
+			if (pneigh_lookup(tbl, RTA_DATA(dst_attr), dev, 1))
+				err = 0;
+			goto out_dev_put;
+		}
+
+		err = -EINVAL;
+		if (!dev)
+			goto out;
+		if (lladdr_attr && RTA_PAYLOAD(lladdr_attr) < dev->addr_len)
+			goto out_dev_put;
+	
+		n = neigh_lookup(tbl, RTA_DATA(dst_attr), dev);
+		if (n) {
+			if (nlh->nlmsg_flags & NLM_F_EXCL) {
+				err = -EEXIST;
+				neigh_release(n);
+				goto out_dev_put;
+			}
+			
+			override = nlh->nlmsg_flags & NLM_F_REPLACE;
+		} else if (!(nlh->nlmsg_flags & NLM_F_CREATE)) {
+			err = -ENOENT;
+			goto out_dev_put;
+		} else {
+			n = __neigh_lookup_errno(tbl, RTA_DATA(dst_attr), dev);
+			if (IS_ERR(n)) {
+				err = PTR_ERR(n);
+				goto out_dev_put;
+			}
+		}
+
+		err = neigh_update(n,
+				   lladdr_attr ? RTA_DATA(lladdr_attr) : NULL,
+				   ndm->ndm_state,
+				   (override ? NEIGH_UPDATE_F_OVERRIDE : 0) |
+				   NEIGH_UPDATE_F_ADMIN);
+
+		neigh_release(n);
+		goto out_dev_put;
+	}
+
+	read_unlock(&neigh_tbl_lock);
+	err = -EADDRNOTAVAIL;
+out_dev_put:
+	if (dev)
+		dev_put(dev);
+out:
+	return err;
+}
+
+
+static int neigh_fill_info(struct sk_buff *skb, struct neighbour *n,
+			   u32 pid, u32 seq, int event)
+{
+	unsigned long now = jiffies;
+	unsigned char *b = skb->tail;
+	struct nda_cacheinfo ci;
+	int locked = 0;
+	u32 probes;
+	struct nlmsghdr *nlh = NLMSG_PUT(skb, pid, seq, event,
+					 sizeof(struct ndmsg));
+	struct ndmsg *ndm = NLMSG_DATA(nlh);
+
+	nlh->nlmsg_flags = pid ? NLM_F_MULTI : 0;
+	ndm->ndm_family	 = n->ops->family;
+	ndm->ndm_flags	 = n->flags;
+	ndm->ndm_type	 = n->type;
+	ndm->ndm_ifindex = n->dev->ifindex;
+	RTA_PUT(skb, NDA_DST, n->tbl->key_len, n->primary_key);
+	read_lock_bh(&n->lock);
+	locked		 = 1;
+	ndm->ndm_state	 = n->nud_state;
+	if (n->nud_state & NUD_VALID)
+		RTA_PUT(skb, NDA_LLADDR, n->dev->addr_len, n->ha);
+	ci.ndm_used	 = now - n->used;
+	ci.ndm_confirmed = now - n->confirmed;
+	ci.ndm_updated	 = now - n->updated;
+	ci.ndm_refcnt	 = atomic_read(&n->refcnt) - 1;
+	probes = atomic_read(&n->probes);
+	read_unlock_bh(&n->lock);
+	locked		 = 0;
+	RTA_PUT(skb, NDA_CACHEINFO, sizeof(ci), &ci);
+	RTA_PUT(skb, NDA_PROBES, sizeof(probes), &probes);
+	nlh->nlmsg_len	 = skb->tail - b;
+	return skb->len;
+
+nlmsg_failure:
+rtattr_failure:
+	if (locked)
+		read_unlock_bh(&n->lock);
+	skb_trim(skb, b - skb->data);
+	return -1;
+}
+
+
+static int neigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb,
+			    struct netlink_callback *cb)
+{
+	struct neighbour *n;
+	int rc, h, s_h = cb->args[1];
+	int idx, s_idx = idx = cb->args[2];
+
+	for (h = 0; h <= tbl->hash_mask; h++) {
+		if (h < s_h)
+			continue;
+		if (h > s_h)
+			s_idx = 0;
+		read_lock_bh(&tbl->lock);
+		for (n = tbl->hash_buckets[h], idx = 0; n; n = n->next, idx++) {
+			if (idx < s_idx)
+				continue;
+			if (neigh_fill_info(skb, n, NETLINK_CB(cb->skb).pid,
+					    cb->nlh->nlmsg_seq,
+					    RTM_NEWNEIGH) <= 0) {
+				read_unlock_bh(&tbl->lock);
+				rc = -1;
+				goto out;
+			}
+		}
+		read_unlock_bh(&tbl->lock);
+	}
+	rc = skb->len;
+out:
+	cb->args[1] = h;
+	cb->args[2] = idx;
+	return rc;
+}
+
+int neigh_dump_info(struct sk_buff *skb, struct netlink_callback *cb)
+{
+	struct neigh_table *tbl;
+	int t, family, s_t;
+
+	read_lock(&neigh_tbl_lock);
+	family = ((struct rtgenmsg *)NLMSG_DATA(cb->nlh))->rtgen_family;
+	s_t = cb->args[0];
+
+	for (tbl = neigh_tables, t = 0; tbl; tbl = tbl->next, t++) {
+		if (t < s_t || (family && tbl->family != family))
+			continue;
+		if (t > s_t)
+			memset(&cb->args[1], 0, sizeof(cb->args) -
+						sizeof(cb->args[0]));
+		if (neigh_dump_table(tbl, skb, cb) < 0)
+			break;
+	}
+	read_unlock(&neigh_tbl_lock);
+
+	cb->args[0] = t;
+	return skb->len;
+}
+
+void neigh_for_each(struct neigh_table *tbl, void (*cb)(struct neighbour *, void *), void *cookie)
+{
+	int chain;
+
+	read_lock_bh(&tbl->lock);
+	for (chain = 0; chain <= tbl->hash_mask; chain++) {
+		struct neighbour *n;
+
+		for (n = tbl->hash_buckets[chain]; n; n = n->next)
+			cb(n, cookie);
+	}
+	read_unlock_bh(&tbl->lock);
+}
+EXPORT_SYMBOL(neigh_for_each);
+
+/* The tbl->lock must be held as a writer and BH disabled. */
+void __neigh_for_each_release(struct neigh_table *tbl,
+			      int (*cb)(struct neighbour *))
+{
+	int chain;
+
+	for (chain = 0; chain <= tbl->hash_mask; chain++) {
+		struct neighbour *n, **np;
+
+		np = &tbl->hash_buckets[chain];
+		while ((n = *np) != NULL) {
+			int release;
+
+			write_lock(&n->lock);
+			release = cb(n);
+			if (release) {
+				*np = n->next;
+				n->dead = 1;
+			} else
+				np = &n->next;
+			write_unlock(&n->lock);
+			if (release)
+				neigh_release(n);
+		}
+	}
+}
+EXPORT_SYMBOL(__neigh_for_each_release);
+
+#ifdef CONFIG_PROC_FS
+
+static struct neighbour *neigh_get_first(struct seq_file *seq)
+{
+	struct neigh_seq_state *state = seq->private;
+	struct neigh_table *tbl = state->tbl;
+	struct neighbour *n = NULL;
+	int bucket = state->bucket;
+
+	state->flags &= ~NEIGH_SEQ_IS_PNEIGH;
+	for (bucket = 0; bucket <= tbl->hash_mask; bucket++) {
+		n = tbl->hash_buckets[bucket];
+
+		while (n) {
+			if (state->neigh_sub_iter) {
+				loff_t fakep = 0;
+				void *v;
+
+				v = state->neigh_sub_iter(state, n, &fakep);
+				if (!v)
+					goto next;
+			}
+			if (!(state->flags & NEIGH_SEQ_SKIP_NOARP))
+				break;
+			if (n->nud_state & ~NUD_NOARP)
+				break;
+		next:
+			n = n->next;
+		}
+
+		if (n)
+			break;
+	}
+	state->bucket = bucket;
+
+	return n;
+}
+
+static struct neighbour *neigh_get_next(struct seq_file *seq,
+					struct neighbour *n,
+					loff_t *pos)
+{
+	struct neigh_seq_state *state = seq->private;
+	struct neigh_table *tbl = state->tbl;
+
+	if (state->neigh_sub_iter) {
+		void *v = state->neigh_sub_iter(state, n, pos);
+		if (v)
+			return n;
+	}
+	n = n->next;
+
+	while (1) {
+		while (n) {
+			if (state->neigh_sub_iter) {
+				void *v = state->neigh_sub_iter(state, n, pos);
+				if (v)
+					return n;
+				goto next;
+			}
+			if (!(state->flags & NEIGH_SEQ_SKIP_NOARP))
+				break;
+
+			if (n->nud_state & ~NUD_NOARP)
+				break;
+		next:
+			n = n->next;
+		}
+
+		if (n)
+			break;
+
+		if (++state->bucket > tbl->hash_mask)
+			break;
+
+		n = tbl->hash_buckets[state->bucket];
+	}
+
+	if (n && pos)
+		--(*pos);
+	return n;
+}
+
+static struct neighbour *neigh_get_idx(struct seq_file *seq, loff_t *pos)
+{
+	struct neighbour *n = neigh_get_first(seq);
+
+	if (n) {
+		while (*pos) {
+			n = neigh_get_next(seq, n, pos);
+			if (!n)
+				break;
+		}
+	}
+	return *pos ? NULL : n;
+}
+
+static struct pneigh_entry *pneigh_get_first(struct seq_file *seq)
+{
+	struct neigh_seq_state *state = seq->private;
+	struct neigh_table *tbl = state->tbl;
+	struct pneigh_entry *pn = NULL;
+	int bucket = state->bucket;
+
+	state->flags |= NEIGH_SEQ_IS_PNEIGH;
+	for (bucket = 0; bucket <= PNEIGH_HASHMASK; bucket++) {
+		pn = tbl->phash_buckets[bucket];
+		if (pn)
+			break;
+	}
+	state->bucket = bucket;
+
+	return pn;
+}
+
+static struct pneigh_entry *pneigh_get_next(struct seq_file *seq,
+					    struct pneigh_entry *pn,
+					    loff_t *pos)
+{
+	struct neigh_seq_state *state = seq->private;
+	struct neigh_table *tbl = state->tbl;
+
+	pn = pn->next;
+	while (!pn) {
+		if (++state->bucket > PNEIGH_HASHMASK)
+			break;
+		pn = tbl->phash_buckets[state->bucket];
+		if (pn)
+			break;
+	}
+
+	if (pn && pos)
+		--(*pos);
+
+	return pn;
+}
+
+static struct pneigh_entry *pneigh_get_idx(struct seq_file *seq, loff_t *pos)
+{
+	struct pneigh_entry *pn = pneigh_get_first(seq);
+
+	if (pn) {
+		while (*pos) {
+			pn = pneigh_get_next(seq, pn, pos);
+			if (!pn)
+				break;
+		}
+	}
+	return *pos ? NULL : pn;
+}
+
+static void *neigh_get_idx_any(struct seq_file *seq, loff_t *pos)
+{
+	struct neigh_seq_state *state = seq->private;
+	void *rc;
+
+	rc = neigh_get_idx(seq, pos);
+	if (!rc && !(state->flags & NEIGH_SEQ_NEIGH_ONLY))
+		rc = pneigh_get_idx(seq, pos);
+
+	return rc;
+}
+
+void *neigh_seq_start(struct seq_file *seq, loff_t *pos, struct neigh_table *tbl, unsigned int neigh_seq_flags)
+{
+	struct neigh_seq_state *state = seq->private;
+	loff_t pos_minus_one;
+
+	state->tbl = tbl;
+	state->bucket = 0;
+	state->flags = (neigh_seq_flags & ~NEIGH_SEQ_IS_PNEIGH);
+
+	read_lock_bh(&tbl->lock);
+
+	pos_minus_one = *pos - 1;
+	return *pos ? neigh_get_idx_any(seq, &pos_minus_one) : SEQ_START_TOKEN;
+}
+EXPORT_SYMBOL(neigh_seq_start);
+
+void *neigh_seq_next(struct seq_file *seq, void *v, loff_t *pos)
+{
+	struct neigh_seq_state *state;
+	void *rc;
+
+	if (v == SEQ_START_TOKEN) {
+		rc = neigh_get_idx(seq, pos);
+		goto out;
+	}
+
+	state = seq->private;
+	if (!(state->flags & NEIGH_SEQ_IS_PNEIGH)) {
+		rc = neigh_get_next(seq, v, NULL);
+		if (rc)
+			goto out;
+		if (!(state->flags & NEIGH_SEQ_NEIGH_ONLY))
+			rc = pneigh_get_first(seq);
+	} else {
+		BUG_ON(state->flags & NEIGH_SEQ_NEIGH_ONLY);
+		rc = pneigh_get_next(seq, v, NULL);
+	}
+out:
+	++(*pos);
+	return rc;
+}
+EXPORT_SYMBOL(neigh_seq_next);
+
+void neigh_seq_stop(struct seq_file *seq, void *v)
+{
+	struct neigh_seq_state *state = seq->private;
+	struct neigh_table *tbl = state->tbl;
+
+	read_unlock_bh(&tbl->lock);
+}
+EXPORT_SYMBOL(neigh_seq_stop);
+
+/* statistics via seq_file */
+
+static void *neigh_stat_seq_start(struct seq_file *seq, loff_t *pos)
+{
+	struct proc_dir_entry *pde = seq->private;
+	struct neigh_table *tbl = pde->data;
+	int cpu;
+
+	if (*pos == 0)
+		return SEQ_START_TOKEN;
+	
+	for (cpu = *pos-1; cpu < NR_CPUS; ++cpu) {
+		if (!cpu_possible(cpu))
+			continue;
+		*pos = cpu+1;
+		return per_cpu_ptr(tbl->stats, cpu);
+	}
+	return NULL;
+}
+
+static void *neigh_stat_seq_next(struct seq_file *seq, void *v, loff_t *pos)
+{
+	struct proc_dir_entry *pde = seq->private;
+	struct neigh_table *tbl = pde->data;
+	int cpu;
+
+	for (cpu = *pos; cpu < NR_CPUS; ++cpu) {
+		if (!cpu_possible(cpu))
+			continue;
+		*pos = cpu+1;
+		return per_cpu_ptr(tbl->stats, cpu);
+	}
+	return NULL;
+}
+
+static void neigh_stat_seq_stop(struct seq_file *seq, void *v)
+{
+
+}
+
+static int neigh_stat_seq_show(struct seq_file *seq, void *v)
+{
+	struct proc_dir_entry *pde = seq->private;
+	struct neigh_table *tbl = pde->data;
+	struct neigh_statistics *st = v;
+
+	if (v == SEQ_START_TOKEN) {
+		seq_printf(seq, "entries  allocs destroys hash_grows  lookups hits  res_failed  rcv_probes_mcast rcv_probes_ucast  periodic_gc_runs forced_gc_runs forced_gc_goal_miss\n");
+		return 0;
+	}
+
+	seq_printf(seq, "%08x  %08lx %08lx %08lx  %08lx %08lx  %08lx  "
+			"%08lx %08lx  %08lx %08lx\n",
+		   atomic_read(&tbl->entries),
+
+		   st->allocs,
+		   st->destroys,
+		   st->hash_grows,
+
+		   st->lookups,
+		   st->hits,
+
+		   st->res_failed,
+
+		   st->rcv_probes_mcast,
+		   st->rcv_probes_ucast,
+
+		   st->periodic_gc_runs,
+		   st->forced_gc_runs
+		   );
+
+	return 0;
+}
+
+static struct seq_operations neigh_stat_seq_ops = {
+	.start	= neigh_stat_seq_start,
+	.next	= neigh_stat_seq_next,
+	.stop	= neigh_stat_seq_stop,
+	.show	= neigh_stat_seq_show,
+};
+
+static int neigh_stat_seq_open(struct inode *inode, struct file *file)
+{
+	int ret = seq_open(file, &neigh_stat_seq_ops);
+
+	if (!ret) {
+		struct seq_file *sf = file->private_data;
+		sf->private = PDE(inode);
+	}
+	return ret;
+};
+
+static struct file_operations neigh_stat_seq_fops = {
+	.owner	 = THIS_MODULE,
+	.open 	 = neigh_stat_seq_open,
+	.read	 = seq_read,
+	.llseek	 = seq_lseek,
+	.release = seq_release,
+};
+
+#endif /* CONFIG_PROC_FS */
+
+#ifdef CONFIG_ARPD
+void neigh_app_ns(struct neighbour *n)
+{
+	struct nlmsghdr  *nlh;
+	int size = NLMSG_SPACE(sizeof(struct ndmsg) + 256);
+	struct sk_buff *skb = alloc_skb(size, GFP_ATOMIC);
+
+	if (!skb)
+		return;
+
+	if (neigh_fill_info(skb, n, 0, 0, RTM_GETNEIGH) < 0) {
+		kfree_skb(skb);
+		return;
+	}
+	nlh			   = (struct nlmsghdr *)skb->data;
+	nlh->nlmsg_flags	   = NLM_F_REQUEST;
+	NETLINK_CB(skb).dst_groups = RTMGRP_NEIGH;
+	netlink_broadcast(rtnl, skb, 0, RTMGRP_NEIGH, GFP_ATOMIC);
+}
+
+static void neigh_app_notify(struct neighbour *n)
+{
+	struct nlmsghdr *nlh;
+	int size = NLMSG_SPACE(sizeof(struct ndmsg) + 256);
+	struct sk_buff *skb = alloc_skb(size, GFP_ATOMIC);
+
+	if (!skb)
+		return;
+
+	if (neigh_fill_info(skb, n, 0, 0, RTM_NEWNEIGH) < 0) {
+		kfree_skb(skb);
+		return;
+	}
+	nlh			   = (struct nlmsghdr *)skb->data;
+	NETLINK_CB(skb).dst_groups = RTMGRP_NEIGH;
+	netlink_broadcast(rtnl, skb, 0, RTMGRP_NEIGH, GFP_ATOMIC);
+}
+
+#endif /* CONFIG_ARPD */
+
+#ifdef CONFIG_SYSCTL
+
+static struct neigh_sysctl_table {
+	struct ctl_table_header *sysctl_header;
+	ctl_table		neigh_vars[__NET_NEIGH_MAX];
+	ctl_table		neigh_dev[2];
+	ctl_table		neigh_neigh_dir[2];
+	ctl_table		neigh_proto_dir[2];
+	ctl_table		neigh_root_dir[2];
+} neigh_sysctl_template = {
+	.neigh_vars = {
+		{
+			.ctl_name	= NET_NEIGH_MCAST_SOLICIT,
+			.procname	= "mcast_solicit",
+			.maxlen		= sizeof(int),
+			.mode		= 0644,
+			.proc_handler	= &proc_dointvec,
+		},
+		{
+			.ctl_name	= NET_NEIGH_UCAST_SOLICIT,
+			.procname	= "ucast_solicit",
+			.maxlen		= sizeof(int),
+			.mode		= 0644,
+			.proc_handler	= &proc_dointvec,
+		},
+		{
+			.ctl_name	= NET_NEIGH_APP_SOLICIT,
+			.procname	= "app_solicit",
+			.maxlen		= sizeof(int),
+			.mode		= 0644,
+			.proc_handler	= &proc_dointvec,
+		},
+		{
+			.ctl_name	= NET_NEIGH_RETRANS_TIME,
+			.procname	= "retrans_time",
+			.maxlen		= sizeof(int),
+			.mode		= 0644,
+			.proc_handler	= &proc_dointvec_userhz_jiffies,
+		},
+		{
+			.ctl_name	= NET_NEIGH_REACHABLE_TIME,
+			.procname	= "base_reachable_time",
+			.maxlen		= sizeof(int),
+			.mode		= 0644,
+			.proc_handler	= &proc_dointvec_jiffies,
+			.strategy	= &sysctl_jiffies,
+		},
+		{
+			.ctl_name	= NET_NEIGH_DELAY_PROBE_TIME,
+			.procname	= "delay_first_probe_time",
+			.maxlen		= sizeof(int),
+			.mode		= 0644,
+			.proc_handler	= &proc_dointvec_jiffies,
+			.strategy	= &sysctl_jiffies,
+		},
+		{
+			.ctl_name	= NET_NEIGH_GC_STALE_TIME,
+			.procname	= "gc_stale_time",
+			.maxlen		= sizeof(int),
+			.mode		= 0644,
+			.proc_handler	= &proc_dointvec_jiffies,
+			.strategy	= &sysctl_jiffies,
+		},
+		{
+			.ctl_name	= NET_NEIGH_UNRES_QLEN,
+			.procname	= "unres_qlen",
+			.maxlen		= sizeof(int),
+			.mode		= 0644,
+			.proc_handler	= &proc_dointvec,
+		},
+		{
+			.ctl_name	= NET_NEIGH_PROXY_QLEN,
+			.procname	= "proxy_qlen",
+			.maxlen		= sizeof(int),
+			.mode		= 0644,
+			.proc_handler	= &proc_dointvec,
+		},
+		{
+			.ctl_name	= NET_NEIGH_ANYCAST_DELAY,
+			.procname	= "anycast_delay",
+			.maxlen		= sizeof(int),
+			.mode		= 0644,
+			.proc_handler	= &proc_dointvec_userhz_jiffies,
+		},
+		{
+			.ctl_name	= NET_NEIGH_PROXY_DELAY,
+			.procname	= "proxy_delay",
+			.maxlen		= sizeof(int),
+			.mode		= 0644,
+			.proc_handler	= &proc_dointvec_userhz_jiffies,
+		},
+		{
+			.ctl_name	= NET_NEIGH_LOCKTIME,
+			.procname	= "locktime",
+			.maxlen		= sizeof(int),
+			.mode		= 0644,
+			.proc_handler	= &proc_dointvec_userhz_jiffies,
+		},
+		{
+			.ctl_name	= NET_NEIGH_GC_INTERVAL,
+			.procname	= "gc_interval",
+			.maxlen		= sizeof(int),
+			.mode		= 0644,
+			.proc_handler	= &proc_dointvec_jiffies,
+			.strategy	= &sysctl_jiffies,
+		},
+		{
+			.ctl_name	= NET_NEIGH_GC_THRESH1,
+			.procname	= "gc_thresh1",
+			.maxlen		= sizeof(int),
+			.mode		= 0644,
+			.proc_handler	= &proc_dointvec,
+		},
+		{
+			.ctl_name	= NET_NEIGH_GC_THRESH2,
+			.procname	= "gc_thresh2",
+			.maxlen		= sizeof(int),
+			.mode		= 0644,
+			.proc_handler	= &proc_dointvec,
+		},
+		{
+			.ctl_name	= NET_NEIGH_GC_THRESH3,
+			.procname	= "gc_thresh3",
+			.maxlen		= sizeof(int),
+			.mode		= 0644,
+			.proc_handler	= &proc_dointvec,
+		},
+		{
+			.ctl_name	= NET_NEIGH_RETRANS_TIME_MS,
+			.procname	= "retrans_time_ms",
+			.maxlen		= sizeof(int),
+			.mode		= 0644,
+			.proc_handler	= &proc_dointvec_ms_jiffies,
+			.strategy	= &sysctl_ms_jiffies,
+		},
+		{
+			.ctl_name	= NET_NEIGH_REACHABLE_TIME_MS,
+			.procname	= "base_reachable_time_ms",
+			.maxlen		= sizeof(int),
+			.mode		= 0644,
+			.proc_handler	= &proc_dointvec_ms_jiffies,
+			.strategy	= &sysctl_ms_jiffies,
+		},
+	},
+	.neigh_dev = {
+		{
+			.ctl_name	= NET_PROTO_CONF_DEFAULT,
+			.procname	= "default",
+			.mode		= 0555,
+		},
+	},
+	.neigh_neigh_dir = {
+		{
+			.procname	= "neigh",
+			.mode		= 0555,
+		},
+	},
+	.neigh_proto_dir = {
+		{
+			.mode		= 0555,
+		},
+	},
+	.neigh_root_dir = {
+		{
+			.ctl_name	= CTL_NET,
+			.procname	= "net",
+			.mode		= 0555,
+		},
+	},
+};
+
+int neigh_sysctl_register(struct net_device *dev, struct neigh_parms *p,
+			  int p_id, int pdev_id, char *p_name, 
+			  proc_handler *handler, ctl_handler *strategy)
+{
+	struct neigh_sysctl_table *t = kmalloc(sizeof(*t), GFP_KERNEL);
+	const char *dev_name_source = NULL;
+	char *dev_name = NULL;
+	int err = 0;
+
+	if (!t)
+		return -ENOBUFS;
+	memcpy(t, &neigh_sysctl_template, sizeof(*t));
+	t->neigh_vars[0].data  = &p->mcast_probes;
+	t->neigh_vars[1].data  = &p->ucast_probes;
+	t->neigh_vars[2].data  = &p->app_probes;
+	t->neigh_vars[3].data  = &p->retrans_time;
+	t->neigh_vars[4].data  = &p->base_reachable_time;
+	t->neigh_vars[5].data  = &p->delay_probe_time;
+	t->neigh_vars[6].data  = &p->gc_staletime;
+	t->neigh_vars[7].data  = &p->queue_len;
+	t->neigh_vars[8].data  = &p->proxy_qlen;
+	t->neigh_vars[9].data  = &p->anycast_delay;
+	t->neigh_vars[10].data = &p->proxy_delay;
+	t->neigh_vars[11].data = &p->locktime;
+
+	if (dev) {
+		dev_name_source = dev->name;
+		t->neigh_dev[0].ctl_name = dev->ifindex;
+		t->neigh_vars[12].procname = NULL;
+		t->neigh_vars[13].procname = NULL;
+		t->neigh_vars[14].procname = NULL;
+		t->neigh_vars[15].procname = NULL;
+	} else {
+ 		dev_name_source = t->neigh_dev[0].procname;
+		t->neigh_vars[12].data = (int *)(p + 1);
+		t->neigh_vars[13].data = (int *)(p + 1) + 1;
+		t->neigh_vars[14].data = (int *)(p + 1) + 2;
+		t->neigh_vars[15].data = (int *)(p + 1) + 3;
+	}
+
+	t->neigh_vars[16].data  = &p->retrans_time;
+	t->neigh_vars[17].data  = &p->base_reachable_time;
+
+	if (handler || strategy) {
+		/* RetransTime */
+		t->neigh_vars[3].proc_handler = handler;
+		t->neigh_vars[3].strategy = strategy;
+		t->neigh_vars[3].extra1 = dev;
+		/* ReachableTime */
+		t->neigh_vars[4].proc_handler = handler;
+		t->neigh_vars[4].strategy = strategy;
+		t->neigh_vars[4].extra1 = dev;
+		/* RetransTime (in milliseconds)*/
+		t->neigh_vars[16].proc_handler = handler;
+		t->neigh_vars[16].strategy = strategy;
+		t->neigh_vars[16].extra1 = dev;
+		/* ReachableTime (in milliseconds) */
+		t->neigh_vars[17].proc_handler = handler;
+		t->neigh_vars[17].strategy = strategy;
+		t->neigh_vars[17].extra1 = dev;
+	}
+
+	dev_name = net_sysctl_strdup(dev_name_source);
+	if (!dev_name) {
+		err = -ENOBUFS;
+		goto free;
+	}
+
+ 	t->neigh_dev[0].procname = dev_name;
+
+	t->neigh_neigh_dir[0].ctl_name = pdev_id;
+
+	t->neigh_proto_dir[0].procname = p_name;
+	t->neigh_proto_dir[0].ctl_name = p_id;
+
+	t->neigh_dev[0].child	       = t->neigh_vars;
+	t->neigh_neigh_dir[0].child    = t->neigh_dev;
+	t->neigh_proto_dir[0].child    = t->neigh_neigh_dir;
+	t->neigh_root_dir[0].child     = t->neigh_proto_dir;
+
+	t->sysctl_header = register_sysctl_table(t->neigh_root_dir, 0);
+	if (!t->sysctl_header) {
+		err = -ENOBUFS;
+		goto free_procname;
+	}
+	p->sysctl_table = t;
+	return 0;
+
+	/* error path */
+ free_procname:
+	kfree(dev_name);
+ free:
+	kfree(t);
+
+	return err;
+}
+
+void neigh_sysctl_unregister(struct neigh_parms *p)
+{
+	if (p->sysctl_table) {
+		struct neigh_sysctl_table *t = p->sysctl_table;
+		p->sysctl_table = NULL;
+		unregister_sysctl_table(t->sysctl_header);
+		kfree(t->neigh_dev[0].procname);
+		kfree(t);
+	}
+}
+
+#endif	/* CONFIG_SYSCTL */
+
+EXPORT_SYMBOL(__neigh_event_send);
+EXPORT_SYMBOL(neigh_add);
+EXPORT_SYMBOL(neigh_changeaddr);
+EXPORT_SYMBOL(neigh_compat_output);
+EXPORT_SYMBOL(neigh_connected_output);
+EXPORT_SYMBOL(neigh_create);
+EXPORT_SYMBOL(neigh_delete);
+EXPORT_SYMBOL(neigh_destroy);
+EXPORT_SYMBOL(neigh_dump_info);
+EXPORT_SYMBOL(neigh_event_ns);
+EXPORT_SYMBOL(neigh_ifdown);
+EXPORT_SYMBOL(neigh_lookup);
+EXPORT_SYMBOL(neigh_lookup_nodev);
+EXPORT_SYMBOL(neigh_parms_alloc);
+EXPORT_SYMBOL(neigh_parms_release);
+EXPORT_SYMBOL(neigh_rand_reach_time);
+EXPORT_SYMBOL(neigh_resolve_output);
+EXPORT_SYMBOL(neigh_table_clear);
+EXPORT_SYMBOL(neigh_table_init);
+EXPORT_SYMBOL(neigh_update);
+EXPORT_SYMBOL(neigh_update_hhs);
+EXPORT_SYMBOL(pneigh_enqueue);
+EXPORT_SYMBOL(pneigh_lookup);
+
+#ifdef CONFIG_ARPD
+EXPORT_SYMBOL(neigh_app_ns);
+#endif
+#ifdef CONFIG_SYSCTL
+EXPORT_SYMBOL(neigh_sysctl_register);
+EXPORT_SYMBOL(neigh_sysctl_unregister);
+#endif
diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
new file mode 100644
index 00000000000..060f703659e
--- /dev/null
+++ b/net/core/net-sysfs.c
@@ -0,0 +1,461 @@
+/*
+ * net-sysfs.c - network device class and attributes
+ *
+ * Copyright (c) 2003 Stephen Hemminger <shemminger@osdl.org>
+ * 
+ *	This program is free software; you can redistribute it and/or
+ *	modify it under the terms of the GNU General Public License
+ *	as published by the Free Software Foundation; either version
+ *	2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/config.h>
+#include <linux/kernel.h>
+#include <linux/netdevice.h>
+#include <linux/if_arp.h>
+#include <net/sock.h>
+#include <linux/rtnetlink.h>
+#include <linux/wireless.h>
+
+#define to_class_dev(obj) container_of(obj,struct class_device,kobj)
+#define to_net_dev(class) container_of(class, struct net_device, class_dev)
+
+static const char fmt_hex[] = "%#x\n";
+static const char fmt_dec[] = "%d\n";
+static const char fmt_ulong[] = "%lu\n";
+
+static inline int dev_isalive(const struct net_device *dev) 
+{
+	return dev->reg_state == NETREG_REGISTERED;
+}
+
+/* use same locking rules as GIF* ioctl's */
+static ssize_t netdev_show(const struct class_device *cd, char *buf,
+			   ssize_t (*format)(const struct net_device *, char *))
+{
+	struct net_device *net = to_net_dev(cd);
+	ssize_t ret = -EINVAL;
+
+	read_lock(&dev_base_lock);
+	if (dev_isalive(net))
+		ret = (*format)(net, buf);
+	read_unlock(&dev_base_lock);
+
+	return ret;
+}
+
+/* generate a show function for simple field */
+#define NETDEVICE_SHOW(field, format_string)				\
+static ssize_t format_##field(const struct net_device *net, char *buf)	\
+{									\
+	return sprintf(buf, format_string, net->field);			\
+}									\
+static ssize_t show_##field(struct class_device *cd, char *buf)		\
+{									\
+	return netdev_show(cd, buf, format_##field);			\
+}
+
+
+/* use same locking and permission rules as SIF* ioctl's */
+static ssize_t netdev_store(struct class_device *dev,
+			    const char *buf, size_t len,
+			    int (*set)(struct net_device *, unsigned long))
+{
+	struct net_device *net = to_net_dev(dev);
+	char *endp;
+	unsigned long new;
+	int ret = -EINVAL;
+
+	if (!capable(CAP_NET_ADMIN))
+		return -EPERM;
+
+	new = simple_strtoul(buf, &endp, 0);
+	if (endp == buf)
+		goto err;
+
+	rtnl_lock();
+	if (dev_isalive(net)) {
+		if ((ret = (*set)(net, new)) == 0)
+			ret = len;
+	}
+	rtnl_unlock();
+ err:
+	return ret;
+}
+
+/* generate a read-only network device class attribute */
+#define NETDEVICE_ATTR(field, format_string)				\
+NETDEVICE_SHOW(field, format_string)					\
+static CLASS_DEVICE_ATTR(field, S_IRUGO, show_##field, NULL)		\
+
+NETDEVICE_ATTR(addr_len, fmt_dec);
+NETDEVICE_ATTR(iflink, fmt_dec);
+NETDEVICE_ATTR(ifindex, fmt_dec);
+NETDEVICE_ATTR(features, fmt_hex);
+NETDEVICE_ATTR(type, fmt_dec);
+
+/* use same locking rules as GIFHWADDR ioctl's */
+static ssize_t format_addr(char *buf, const unsigned char *addr, int len)
+{
+	int i;
+	char *cp = buf;
+
+	for (i = 0; i < len; i++)
+		cp += sprintf(cp, "%02x%c", addr[i],
+			      i == (len - 1) ? '\n' : ':');
+	return cp - buf;
+}
+
+static ssize_t show_address(struct class_device *dev, char *buf)
+{
+	struct net_device *net = to_net_dev(dev);
+	ssize_t ret = -EINVAL;
+
+	read_lock(&dev_base_lock);
+	if (dev_isalive(net))
+	    ret = format_addr(buf, net->dev_addr, net->addr_len);
+	read_unlock(&dev_base_lock);
+	return ret;
+}
+
+static ssize_t show_broadcast(struct class_device *dev, char *buf)
+{
+	struct net_device *net = to_net_dev(dev);
+	if (dev_isalive(net))
+		return format_addr(buf, net->broadcast, net->addr_len);
+	return -EINVAL;
+}
+
+static ssize_t show_carrier(struct class_device *dev, char *buf)
+{
+	struct net_device *netdev = to_net_dev(dev);
+	if (netif_running(netdev)) {
+		return sprintf(buf, fmt_dec, !!netif_carrier_ok(netdev));
+	}
+	return -EINVAL;
+}
+
+static CLASS_DEVICE_ATTR(address, S_IRUGO, show_address, NULL);
+static CLASS_DEVICE_ATTR(broadcast, S_IRUGO, show_broadcast, NULL);
+static CLASS_DEVICE_ATTR(carrier, S_IRUGO, show_carrier, NULL);
+
+/* read-write attributes */
+NETDEVICE_SHOW(mtu, fmt_dec);
+
+static int change_mtu(struct net_device *net, unsigned long new_mtu)
+{
+	return dev_set_mtu(net, (int) new_mtu);
+}
+
+static ssize_t store_mtu(struct class_device *dev, const char *buf, size_t len)
+{
+	return netdev_store(dev, buf, len, change_mtu);
+}
+
+static CLASS_DEVICE_ATTR(mtu, S_IRUGO | S_IWUSR, show_mtu, store_mtu);
+
+NETDEVICE_SHOW(flags, fmt_hex);
+
+static int change_flags(struct net_device *net, unsigned long new_flags)
+{
+	return dev_change_flags(net, (unsigned) new_flags);
+}
+
+static ssize_t store_flags(struct class_device *dev, const char *buf, size_t len)
+{
+	return netdev_store(dev, buf, len, change_flags);
+}
+
+static CLASS_DEVICE_ATTR(flags, S_IRUGO | S_IWUSR, show_flags, store_flags);
+
+NETDEVICE_SHOW(tx_queue_len, fmt_ulong);
+
+static int change_tx_queue_len(struct net_device *net, unsigned long new_len)
+{
+	net->tx_queue_len = new_len;
+	return 0;
+}
+
+static ssize_t store_tx_queue_len(struct class_device *dev, const char *buf, size_t len)
+{
+	return netdev_store(dev, buf, len, change_tx_queue_len);
+}
+
+static CLASS_DEVICE_ATTR(tx_queue_len, S_IRUGO | S_IWUSR, show_tx_queue_len, 
+			 store_tx_queue_len);
+
+
+static struct class_device_attribute *net_class_attributes[] = {
+	&class_device_attr_ifindex,
+	&class_device_attr_iflink,
+	&class_device_attr_addr_len,
+	&class_device_attr_tx_queue_len,
+	&class_device_attr_features,
+	&class_device_attr_mtu,
+	&class_device_attr_flags,
+	&class_device_attr_type,
+	&class_device_attr_address,
+	&class_device_attr_broadcast,
+	&class_device_attr_carrier,
+	NULL
+};
+
+/* Show a given an attribute in the statistics group */
+static ssize_t netstat_show(const struct class_device *cd, char *buf, 
+			    unsigned long offset)
+{
+	struct net_device *dev = to_net_dev(cd);
+	struct net_device_stats *stats;
+	ssize_t ret = -EINVAL;
+
+	if (offset > sizeof(struct net_device_stats) ||
+	    offset % sizeof(unsigned long) != 0)
+		WARN_ON(1);
+
+	read_lock(&dev_base_lock);
+	if (dev_isalive(dev) && dev->get_stats &&
+	    (stats = (*dev->get_stats)(dev))) 
+		ret = sprintf(buf, fmt_ulong,
+			      *(unsigned long *)(((u8 *) stats) + offset));
+
+	read_unlock(&dev_base_lock);
+	return ret;
+}
+
+/* generate a read-only statistics attribute */
+#define NETSTAT_ENTRY(name)						\
+static ssize_t show_##name(struct class_device *cd, char *buf) 		\
+{									\
+	return netstat_show(cd, buf, 					\
+			    offsetof(struct net_device_stats, name));	\
+}									\
+static CLASS_DEVICE_ATTR(name, S_IRUGO, show_##name, NULL)
+
+NETSTAT_ENTRY(rx_packets);
+NETSTAT_ENTRY(tx_packets);
+NETSTAT_ENTRY(rx_bytes);
+NETSTAT_ENTRY(tx_bytes);
+NETSTAT_ENTRY(rx_errors);
+NETSTAT_ENTRY(tx_errors);
+NETSTAT_ENTRY(rx_dropped);
+NETSTAT_ENTRY(tx_dropped);
+NETSTAT_ENTRY(multicast);
+NETSTAT_ENTRY(collisions);
+NETSTAT_ENTRY(rx_length_errors);
+NETSTAT_ENTRY(rx_over_errors);
+NETSTAT_ENTRY(rx_crc_errors);
+NETSTAT_ENTRY(rx_frame_errors);
+NETSTAT_ENTRY(rx_fifo_errors);
+NETSTAT_ENTRY(rx_missed_errors);
+NETSTAT_ENTRY(tx_aborted_errors);
+NETSTAT_ENTRY(tx_carrier_errors);
+NETSTAT_ENTRY(tx_fifo_errors);
+NETSTAT_ENTRY(tx_heartbeat_errors);
+NETSTAT_ENTRY(tx_window_errors);
+NETSTAT_ENTRY(rx_compressed);
+NETSTAT_ENTRY(tx_compressed);
+
+static struct attribute *netstat_attrs[] = {
+	&class_device_attr_rx_packets.attr,
+	&class_device_attr_tx_packets.attr,
+	&class_device_attr_rx_bytes.attr,
+	&class_device_attr_tx_bytes.attr,
+	&class_device_attr_rx_errors.attr,
+	&class_device_attr_tx_errors.attr,
+	&class_device_attr_rx_dropped.attr,
+	&class_device_attr_tx_dropped.attr,
+	&class_device_attr_multicast.attr,
+	&class_device_attr_collisions.attr,
+	&class_device_attr_rx_length_errors.attr,
+	&class_device_attr_rx_over_errors.attr,
+	&class_device_attr_rx_crc_errors.attr,
+	&class_device_attr_rx_frame_errors.attr,
+	&class_device_attr_rx_fifo_errors.attr,
+	&class_device_attr_rx_missed_errors.attr,
+	&class_device_attr_tx_aborted_errors.attr,
+	&class_device_attr_tx_carrier_errors.attr,
+	&class_device_attr_tx_fifo_errors.attr,
+	&class_device_attr_tx_heartbeat_errors.attr,
+	&class_device_attr_tx_window_errors.attr,
+	&class_device_attr_rx_compressed.attr,
+	&class_device_attr_tx_compressed.attr,
+	NULL
+};
+
+
+static struct attribute_group netstat_group = {
+	.name  = "statistics",
+	.attrs  = netstat_attrs,
+};
+
+#ifdef WIRELESS_EXT
+/* helper function that does all the locking etc for wireless stats */
+static ssize_t wireless_show(struct class_device *cd, char *buf,
+			     ssize_t (*format)(const struct iw_statistics *,
+					       char *))
+{
+	struct net_device *dev = to_net_dev(cd);
+	const struct iw_statistics *iw;
+	ssize_t ret = -EINVAL;
+	
+	read_lock(&dev_base_lock);
+	if (dev_isalive(dev) && dev->get_wireless_stats 
+	    && (iw = dev->get_wireless_stats(dev)) != NULL) 
+		ret = (*format)(iw, buf);
+	read_unlock(&dev_base_lock);
+
+	return ret;
+}
+
+/* show function template for wireless fields */
+#define WIRELESS_SHOW(name, field, format_string)			\
+static ssize_t format_iw_##name(const struct iw_statistics *iw, char *buf) \
+{									\
+	return sprintf(buf, format_string, iw->field);			\
+}									\
+static ssize_t show_iw_##name(struct class_device *cd, char *buf)	\
+{									\
+	return wireless_show(cd, buf, format_iw_##name);		\
+}									\
+static CLASS_DEVICE_ATTR(name, S_IRUGO, show_iw_##name, NULL)
+
+WIRELESS_SHOW(status, status, fmt_hex);
+WIRELESS_SHOW(link, qual.qual, fmt_dec);
+WIRELESS_SHOW(level, qual.level, fmt_dec);
+WIRELESS_SHOW(noise, qual.noise, fmt_dec);
+WIRELESS_SHOW(nwid, discard.nwid, fmt_dec);
+WIRELESS_SHOW(crypt, discard.code, fmt_dec);
+WIRELESS_SHOW(fragment, discard.fragment, fmt_dec);
+WIRELESS_SHOW(misc, discard.misc, fmt_dec);
+WIRELESS_SHOW(retries, discard.retries, fmt_dec);
+WIRELESS_SHOW(beacon, miss.beacon, fmt_dec);
+
+static struct attribute *wireless_attrs[] = {
+	&class_device_attr_status.attr,
+	&class_device_attr_link.attr,
+	&class_device_attr_level.attr,
+	&class_device_attr_noise.attr,
+	&class_device_attr_nwid.attr,
+	&class_device_attr_crypt.attr,
+	&class_device_attr_fragment.attr,
+	&class_device_attr_retries.attr,
+	&class_device_attr_misc.attr,
+	&class_device_attr_beacon.attr,
+	NULL
+};
+
+static struct attribute_group wireless_group = {
+	.name = "wireless",
+	.attrs = wireless_attrs,
+};
+#endif
+
+#ifdef CONFIG_HOTPLUG
+static int netdev_hotplug(struct class_device *cd, char **envp,
+			  int num_envp, char *buf, int size)
+{
+	struct net_device *dev = to_net_dev(cd);
+	int i = 0;
+	int n;
+
+	/* pass interface in env to hotplug. */
+	envp[i++] = buf;
+	n = snprintf(buf, size, "INTERFACE=%s", dev->name) + 1;
+	buf += n;
+	size -= n;
+
+	if ((size <= 0) || (i >= num_envp))
+		return -ENOMEM;
+
+	envp[i] = NULL;
+	return 0;
+}
+#endif
+
+/*
+ *	netdev_release -- destroy and free a dead device. 
+ *	Called when last reference to class_device kobject is gone.
+ */
+static void netdev_release(struct class_device *cd)
+{
+	struct net_device *dev 
+		= container_of(cd, struct net_device, class_dev);
+
+	BUG_ON(dev->reg_state != NETREG_RELEASED);
+
+	kfree((char *)dev - dev->padded);
+}
+
+static struct class net_class = {
+	.name = "net",
+	.release = netdev_release,
+#ifdef CONFIG_HOTPLUG
+	.hotplug = netdev_hotplug,
+#endif
+};
+
+void netdev_unregister_sysfs(struct net_device * net)
+{
+	struct class_device * class_dev = &(net->class_dev);
+
+	if (net->get_stats)
+		sysfs_remove_group(&class_dev->kobj, &netstat_group);
+
+#ifdef WIRELESS_EXT
+	if (net->get_wireless_stats)
+		sysfs_remove_group(&class_dev->kobj, &wireless_group);
+#endif
+	class_device_del(class_dev);
+
+}
+
+/* Create sysfs entries for network device. */
+int netdev_register_sysfs(struct net_device *net)
+{
+	struct class_device *class_dev = &(net->class_dev);
+	int i;
+	struct class_device_attribute *attr;
+	int ret;
+
+	class_dev->class = &net_class;
+	class_dev->class_data = net;
+
+	strlcpy(class_dev->class_id, net->name, BUS_ID_SIZE);
+	if ((ret = class_device_register(class_dev)))
+		goto out;
+
+	for (i = 0; (attr = net_class_attributes[i]) != NULL; i++) {
+		if ((ret = class_device_create_file(class_dev, attr)))
+		    goto out_unreg;
+	}
+
+
+	if (net->get_stats &&
+	    (ret = sysfs_create_group(&class_dev->kobj, &netstat_group)))
+		goto out_unreg; 
+
+#ifdef WIRELESS_EXT
+	if (net->get_wireless_stats &&
+	    (ret = sysfs_create_group(&class_dev->kobj, &wireless_group)))
+		goto out_cleanup; 
+
+	return 0;
+out_cleanup:
+	if (net->get_stats)
+		sysfs_remove_group(&class_dev->kobj, &netstat_group);
+#else
+	return 0;
+#endif
+
+out_unreg:
+	printk(KERN_WARNING "%s: sysfs attribute registration failed %d\n",
+	       net->name, ret);
+	class_device_unregister(class_dev);
+out:
+	return ret;
+}
+
+int netdev_sysfs_init(void)
+{
+	return class_register(&net_class);
+}
diff --git a/net/core/netfilter.c b/net/core/netfilter.c
new file mode 100644
index 00000000000..e51cfa46950
--- /dev/null
+++ b/net/core/netfilter.c
@@ -0,0 +1,799 @@
+/* netfilter.c: look after the filters for various protocols. 
+ * Heavily influenced by the old firewall.c by David Bonn and Alan Cox.
+ *
+ * Thanks to Rob `CmdrTaco' Malda for not influencing this code in any
+ * way.
+ *
+ * Rusty Russell (C)2000 -- This code is GPL.
+ *
+ * February 2000: Modified by James Morris to have 1 queue per protocol.
+ * 15-Mar-2000:   Added NF_REPEAT --RR.
+ * 08-May-2003:	  Internal logging interface added by Jozsef Kadlecsik.
+ */
+#include <linux/config.h>
+#include <linux/kernel.h>
+#include <linux/netfilter.h>
+#include <net/protocol.h>
+#include <linux/init.h>
+#include <linux/skbuff.h>
+#include <linux/wait.h>
+#include <linux/module.h>
+#include <linux/interrupt.h>
+#include <linux/if.h>
+#include <linux/netdevice.h>
+#include <linux/inetdevice.h>
+#include <linux/tcp.h>
+#include <linux/udp.h>
+#include <linux/icmp.h>
+#include <net/sock.h>
+#include <net/route.h>
+#include <linux/ip.h>
+
+/* In this code, we can be waiting indefinitely for userspace to
+ * service a packet if a hook returns NF_QUEUE.  We could keep a count
+ * of skbuffs queued for userspace, and not deregister a hook unless
+ * this is zero, but that sucks.  Now, we simply check when the
+ * packets come back: if the hook is gone, the packet is discarded. */
+#ifdef CONFIG_NETFILTER_DEBUG
+#define NFDEBUG(format, args...)  printk(format , ## args)
+#else
+#define NFDEBUG(format, args...)
+#endif
+
+/* Sockopts only registered and called from user context, so
+   net locking would be overkill.  Also, [gs]etsockopt calls may
+   sleep. */
+static DECLARE_MUTEX(nf_sockopt_mutex);
+
+struct list_head nf_hooks[NPROTO][NF_MAX_HOOKS];
+static LIST_HEAD(nf_sockopts);
+static DEFINE_SPINLOCK(nf_hook_lock);
+
+/* 
+ * A queue handler may be registered for each protocol.  Each is protected by
+ * long term mutex.  The handler must provide an an outfn() to accept packets
+ * for queueing and must reinject all packets it receives, no matter what.
+ */
+static struct nf_queue_handler_t {
+	nf_queue_outfn_t outfn;
+	void *data;
+} queue_handler[NPROTO];
+static DEFINE_RWLOCK(queue_handler_lock);
+
+int nf_register_hook(struct nf_hook_ops *reg)
+{
+	struct list_head *i;
+
+	spin_lock_bh(&nf_hook_lock);
+	list_for_each(i, &nf_hooks[reg->pf][reg->hooknum]) {
+		if (reg->priority < ((struct nf_hook_ops *)i)->priority)
+			break;
+	}
+	list_add_rcu(&reg->list, i->prev);
+	spin_unlock_bh(&nf_hook_lock);
+
+	synchronize_net();
+	return 0;
+}
+
+void nf_unregister_hook(struct nf_hook_ops *reg)
+{
+	spin_lock_bh(&nf_hook_lock);
+	list_del_rcu(&reg->list);
+	spin_unlock_bh(&nf_hook_lock);
+
+	synchronize_net();
+}
+
+/* Do exclusive ranges overlap? */
+static inline int overlap(int min1, int max1, int min2, int max2)
+{
+	return max1 > min2 && min1 < max2;
+}
+
+/* Functions to register sockopt ranges (exclusive). */
+int nf_register_sockopt(struct nf_sockopt_ops *reg)
+{
+	struct list_head *i;
+	int ret = 0;
+
+	if (down_interruptible(&nf_sockopt_mutex) != 0)
+		return -EINTR;
+
+	list_for_each(i, &nf_sockopts) {
+		struct nf_sockopt_ops *ops = (struct nf_sockopt_ops *)i;
+		if (ops->pf == reg->pf
+		    && (overlap(ops->set_optmin, ops->set_optmax, 
+				reg->set_optmin, reg->set_optmax)
+			|| overlap(ops->get_optmin, ops->get_optmax, 
+				   reg->get_optmin, reg->get_optmax))) {
+			NFDEBUG("nf_sock overlap: %u-%u/%u-%u v %u-%u/%u-%u\n",
+				ops->set_optmin, ops->set_optmax, 
+				ops->get_optmin, ops->get_optmax, 
+				reg->set_optmin, reg->set_optmax,
+				reg->get_optmin, reg->get_optmax);
+			ret = -EBUSY;
+			goto out;
+		}
+	}
+
+	list_add(&reg->list, &nf_sockopts);
+out:
+	up(&nf_sockopt_mutex);
+	return ret;
+}
+
+void nf_unregister_sockopt(struct nf_sockopt_ops *reg)
+{
+	/* No point being interruptible: we're probably in cleanup_module() */
+ restart:
+	down(&nf_sockopt_mutex);
+	if (reg->use != 0) {
+		/* To be woken by nf_sockopt call... */
+		/* FIXME: Stuart Young's name appears gratuitously. */
+		set_current_state(TASK_UNINTERRUPTIBLE);
+		reg->cleanup_task = current;
+		up(&nf_sockopt_mutex);
+		schedule();
+		goto restart;
+	}
+	list_del(&reg->list);
+	up(&nf_sockopt_mutex);
+}
+
+#ifdef CONFIG_NETFILTER_DEBUG
+#include <net/ip.h>
+#include <net/tcp.h>
+#include <linux/netfilter_ipv4.h>
+
+static void debug_print_hooks_ip(unsigned int nf_debug)
+{
+	if (nf_debug & (1 << NF_IP_PRE_ROUTING)) {
+		printk("PRE_ROUTING ");
+		nf_debug ^= (1 << NF_IP_PRE_ROUTING);
+	}
+	if (nf_debug & (1 << NF_IP_LOCAL_IN)) {
+		printk("LOCAL_IN ");
+		nf_debug ^= (1 << NF_IP_LOCAL_IN);
+	}
+	if (nf_debug & (1 << NF_IP_FORWARD)) {
+		printk("FORWARD ");
+		nf_debug ^= (1 << NF_IP_FORWARD);
+	}
+	if (nf_debug & (1 << NF_IP_LOCAL_OUT)) {
+		printk("LOCAL_OUT ");
+		nf_debug ^= (1 << NF_IP_LOCAL_OUT);
+	}
+	if (nf_debug & (1 << NF_IP_POST_ROUTING)) {
+		printk("POST_ROUTING ");
+		nf_debug ^= (1 << NF_IP_POST_ROUTING);
+	}
+	if (nf_debug)
+		printk("Crap bits: 0x%04X", nf_debug);
+	printk("\n");
+}
+
+static void nf_dump_skb(int pf, struct sk_buff *skb)
+{
+	printk("skb: pf=%i %s dev=%s len=%u\n", 
+	       pf,
+	       skb->sk ? "(owned)" : "(unowned)",
+	       skb->dev ? skb->dev->name : "(no dev)",
+	       skb->len);
+	switch (pf) {
+	case PF_INET: {
+		const struct iphdr *ip = skb->nh.iph;
+		__u32 *opt = (__u32 *) (ip + 1);
+		int opti;
+		__u16 src_port = 0, dst_port = 0;
+
+		if (ip->protocol == IPPROTO_TCP
+		    || ip->protocol == IPPROTO_UDP) {
+			struct tcphdr *tcp=(struct tcphdr *)((__u32 *)ip+ip->ihl);
+			src_port = ntohs(tcp->source);
+			dst_port = ntohs(tcp->dest);
+		}
+	
+		printk("PROTO=%d %u.%u.%u.%u:%hu %u.%u.%u.%u:%hu"
+		       " L=%hu S=0x%2.2hX I=%hu F=0x%4.4hX T=%hu",
+		       ip->protocol, NIPQUAD(ip->saddr),
+		       src_port, NIPQUAD(ip->daddr),
+		       dst_port,
+		       ntohs(ip->tot_len), ip->tos, ntohs(ip->id),
+		       ntohs(ip->frag_off), ip->ttl);
+
+		for (opti = 0; opti < (ip->ihl - sizeof(struct iphdr) / 4); opti++)
+			printk(" O=0x%8.8X", *opt++);
+		printk("\n");
+	}
+	}
+}
+
+void nf_debug_ip_local_deliver(struct sk_buff *skb)
+{
+	/* If it's a loopback packet, it must have come through
+	 * NF_IP_LOCAL_OUT, NF_IP_RAW_INPUT, NF_IP_PRE_ROUTING and
+	 * NF_IP_LOCAL_IN.  Otherwise, must have gone through
+	 * NF_IP_RAW_INPUT and NF_IP_PRE_ROUTING.  */
+	if (!skb->dev) {
+		printk("ip_local_deliver: skb->dev is NULL.\n");
+	}
+	else if (strcmp(skb->dev->name, "lo") == 0) {
+		if (skb->nf_debug != ((1 << NF_IP_LOCAL_OUT)
+				      | (1 << NF_IP_POST_ROUTING)
+				      | (1 << NF_IP_PRE_ROUTING)
+				      | (1 << NF_IP_LOCAL_IN))) {
+			printk("ip_local_deliver: bad loopback skb: ");
+			debug_print_hooks_ip(skb->nf_debug);
+			nf_dump_skb(PF_INET, skb);
+		}
+	}
+	else {
+		if (skb->nf_debug != ((1<<NF_IP_PRE_ROUTING)
+				      | (1<<NF_IP_LOCAL_IN))) {
+			printk("ip_local_deliver: bad non-lo skb: ");
+			debug_print_hooks_ip(skb->nf_debug);
+			nf_dump_skb(PF_INET, skb);
+		}
+	}
+}
+
+void nf_debug_ip_loopback_xmit(struct sk_buff *newskb)
+{
+	if (newskb->nf_debug != ((1 << NF_IP_LOCAL_OUT)
+				 | (1 << NF_IP_POST_ROUTING))) {
+		printk("ip_dev_loopback_xmit: bad owned skb = %p: ", 
+		       newskb);
+		debug_print_hooks_ip(newskb->nf_debug);
+		nf_dump_skb(PF_INET, newskb);
+	}
+	/* Clear to avoid confusing input check */
+	newskb->nf_debug = 0;
+}
+
+void nf_debug_ip_finish_output2(struct sk_buff *skb)
+{
+	/* If it's owned, it must have gone through the
+	 * NF_IP_LOCAL_OUT and NF_IP_POST_ROUTING.
+	 * Otherwise, must have gone through
+	 * NF_IP_PRE_ROUTING, NF_IP_FORWARD and NF_IP_POST_ROUTING.
+	 */
+	if (skb->sk) {
+		if (skb->nf_debug != ((1 << NF_IP_LOCAL_OUT)
+				      | (1 << NF_IP_POST_ROUTING))) {
+			printk("ip_finish_output: bad owned skb = %p: ", skb);
+			debug_print_hooks_ip(skb->nf_debug);
+			nf_dump_skb(PF_INET, skb);
+		}
+	} else {
+		if (skb->nf_debug != ((1 << NF_IP_PRE_ROUTING)
+				      | (1 << NF_IP_FORWARD)
+				      | (1 << NF_IP_POST_ROUTING))) {
+			/* Fragments, entunnelled packets, TCP RSTs
+                           generated by ipt_REJECT will have no
+                           owners, but still may be local */
+			if (skb->nf_debug != ((1 << NF_IP_LOCAL_OUT)
+					      | (1 << NF_IP_POST_ROUTING))){
+				printk("ip_finish_output:"
+				       " bad unowned skb = %p: ",skb);
+				debug_print_hooks_ip(skb->nf_debug);
+				nf_dump_skb(PF_INET, skb);
+			}
+		}
+	}
+}
+#endif /*CONFIG_NETFILTER_DEBUG*/
+
+/* Call get/setsockopt() */
+static int nf_sockopt(struct sock *sk, int pf, int val, 
+		      char __user *opt, int *len, int get)
+{
+	struct list_head *i;
+	struct nf_sockopt_ops *ops;
+	int ret;
+
+	if (down_interruptible(&nf_sockopt_mutex) != 0)
+		return -EINTR;
+
+	list_for_each(i, &nf_sockopts) {
+		ops = (struct nf_sockopt_ops *)i;
+		if (ops->pf == pf) {
+			if (get) {
+				if (val >= ops->get_optmin
+				    && val < ops->get_optmax) {
+					ops->use++;
+					up(&nf_sockopt_mutex);
+					ret = ops->get(sk, val, opt, len);
+					goto out;
+				}
+			} else {
+				if (val >= ops->set_optmin
+				    && val < ops->set_optmax) {
+					ops->use++;
+					up(&nf_sockopt_mutex);
+					ret = ops->set(sk, val, opt, *len);
+					goto out;
+				}
+			}
+		}
+	}
+	up(&nf_sockopt_mutex);
+	return -ENOPROTOOPT;
+	
+ out:
+	down(&nf_sockopt_mutex);
+	ops->use--;
+	if (ops->cleanup_task)
+		wake_up_process(ops->cleanup_task);
+	up(&nf_sockopt_mutex);
+	return ret;
+}
+
+int nf_setsockopt(struct sock *sk, int pf, int val, char __user *opt,
+		  int len)
+{
+	return nf_sockopt(sk, pf, val, opt, &len, 0);
+}
+
+int nf_getsockopt(struct sock *sk, int pf, int val, char __user *opt, int *len)
+{
+	return nf_sockopt(sk, pf, val, opt, len, 1);
+}
+
+static unsigned int nf_iterate(struct list_head *head,
+			       struct sk_buff **skb,
+			       int hook,
+			       const struct net_device *indev,
+			       const struct net_device *outdev,
+			       struct list_head **i,
+			       int (*okfn)(struct sk_buff *),
+			       int hook_thresh)
+{
+	unsigned int verdict;
+
+	/*
+	 * The caller must not block between calls to this
+	 * function because of risk of continuing from deleted element.
+	 */
+	list_for_each_continue_rcu(*i, head) {
+		struct nf_hook_ops *elem = (struct nf_hook_ops *)*i;
+
+		if (hook_thresh > elem->priority)
+			continue;
+
+		/* Optimization: we don't need to hold module
+                   reference here, since function can't sleep. --RR */
+		verdict = elem->hook(hook, skb, indev, outdev, okfn);
+		if (verdict != NF_ACCEPT) {
+#ifdef CONFIG_NETFILTER_DEBUG
+			if (unlikely(verdict > NF_MAX_VERDICT)) {
+				NFDEBUG("Evil return from %p(%u).\n",
+				        elem->hook, hook);
+				continue;
+			}
+#endif
+			if (verdict != NF_REPEAT)
+				return verdict;
+			*i = (*i)->prev;
+		}
+	}
+	return NF_ACCEPT;
+}
+
+int nf_register_queue_handler(int pf, nf_queue_outfn_t outfn, void *data)
+{      
+	int ret;
+
+	write_lock_bh(&queue_handler_lock);
+	if (queue_handler[pf].outfn)
+		ret = -EBUSY;
+	else {
+		queue_handler[pf].outfn = outfn;
+		queue_handler[pf].data = data;
+		ret = 0;
+	}
+	write_unlock_bh(&queue_handler_lock);
+
+	return ret;
+}
+
+/* The caller must flush their queue before this */
+int nf_unregister_queue_handler(int pf)
+{
+	write_lock_bh(&queue_handler_lock);
+	queue_handler[pf].outfn = NULL;
+	queue_handler[pf].data = NULL;
+	write_unlock_bh(&queue_handler_lock);
+	
+	return 0;
+}
+
+/* 
+ * Any packet that leaves via this function must come back 
+ * through nf_reinject().
+ */
+static int nf_queue(struct sk_buff *skb, 
+		    struct list_head *elem, 
+		    int pf, unsigned int hook,
+		    struct net_device *indev,
+		    struct net_device *outdev,
+		    int (*okfn)(struct sk_buff *))
+{
+	int status;
+	struct nf_info *info;
+#ifdef CONFIG_BRIDGE_NETFILTER
+	struct net_device *physindev = NULL;
+	struct net_device *physoutdev = NULL;
+#endif
+
+	/* QUEUE == DROP if noone is waiting, to be safe. */
+	read_lock(&queue_handler_lock);
+	if (!queue_handler[pf].outfn) {
+		read_unlock(&queue_handler_lock);
+		kfree_skb(skb);
+		return 1;
+	}
+
+	info = kmalloc(sizeof(*info), GFP_ATOMIC);
+	if (!info) {
+		if (net_ratelimit())
+			printk(KERN_ERR "OOM queueing packet %p\n",
+			       skb);
+		read_unlock(&queue_handler_lock);
+		kfree_skb(skb);
+		return 1;
+	}
+
+	*info = (struct nf_info) { 
+		(struct nf_hook_ops *)elem, pf, hook, indev, outdev, okfn };
+
+	/* If it's going away, ignore hook. */
+	if (!try_module_get(info->elem->owner)) {
+		read_unlock(&queue_handler_lock);
+		kfree(info);
+		return 0;
+	}
+
+	/* Bump dev refs so they don't vanish while packet is out */
+	if (indev) dev_hold(indev);
+	if (outdev) dev_hold(outdev);
+
+#ifdef CONFIG_BRIDGE_NETFILTER
+	if (skb->nf_bridge) {
+		physindev = skb->nf_bridge->physindev;
+		if (physindev) dev_hold(physindev);
+		physoutdev = skb->nf_bridge->physoutdev;
+		if (physoutdev) dev_hold(physoutdev);
+	}
+#endif
+
+	status = queue_handler[pf].outfn(skb, info, queue_handler[pf].data);
+	read_unlock(&queue_handler_lock);
+
+	if (status < 0) {
+		/* James M doesn't say fuck enough. */
+		if (indev) dev_put(indev);
+		if (outdev) dev_put(outdev);
+#ifdef CONFIG_BRIDGE_NETFILTER
+		if (physindev) dev_put(physindev);
+		if (physoutdev) dev_put(physoutdev);
+#endif
+		module_put(info->elem->owner);
+		kfree(info);
+		kfree_skb(skb);
+		return 1;
+	}
+	return 1;
+}
+
+/* Returns 1 if okfn() needs to be executed by the caller,
+ * -EPERM for NF_DROP, 0 otherwise. */
+int nf_hook_slow(int pf, unsigned int hook, struct sk_buff **pskb,
+		 struct net_device *indev,
+		 struct net_device *outdev,
+		 int (*okfn)(struct sk_buff *),
+		 int hook_thresh)
+{
+	struct list_head *elem;
+	unsigned int verdict;
+	int ret = 0;
+
+	/* We may already have this, but read-locks nest anyway */
+	rcu_read_lock();
+
+#ifdef CONFIG_NETFILTER_DEBUG
+	if (unlikely((*pskb)->nf_debug & (1 << hook))) {
+		printk("nf_hook: hook %i already set.\n", hook);
+		nf_dump_skb(pf, *pskb);
+	}
+	(*pskb)->nf_debug |= (1 << hook);
+#endif
+
+	elem = &nf_hooks[pf][hook];
+next_hook:
+	verdict = nf_iterate(&nf_hooks[pf][hook], pskb, hook, indev,
+			     outdev, &elem, okfn, hook_thresh);
+	if (verdict == NF_ACCEPT || verdict == NF_STOP) {
+		ret = 1;
+		goto unlock;
+	} else if (verdict == NF_DROP) {
+		kfree_skb(*pskb);
+		ret = -EPERM;
+	} else if (verdict == NF_QUEUE) {
+		NFDEBUG("nf_hook: Verdict = QUEUE.\n");
+		if (!nf_queue(*pskb, elem, pf, hook, indev, outdev, okfn))
+			goto next_hook;
+	}
+unlock:
+	rcu_read_unlock();
+	return ret;
+}
+
+void nf_reinject(struct sk_buff *skb, struct nf_info *info,
+		 unsigned int verdict)
+{
+	struct list_head *elem = &info->elem->list;
+	struct list_head *i;
+
+	rcu_read_lock();
+
+	/* Release those devices we held, or Alexey will kill me. */
+	if (info->indev) dev_put(info->indev);
+	if (info->outdev) dev_put(info->outdev);
+#ifdef CONFIG_BRIDGE_NETFILTER
+	if (skb->nf_bridge) {
+		if (skb->nf_bridge->physindev)
+			dev_put(skb->nf_bridge->physindev);
+		if (skb->nf_bridge->physoutdev)
+			dev_put(skb->nf_bridge->physoutdev);
+	}
+#endif
+
+	/* Drop reference to owner of hook which queued us. */
+	module_put(info->elem->owner);
+
+	list_for_each_rcu(i, &nf_hooks[info->pf][info->hook]) {
+		if (i == elem) 
+  			break;
+  	}
+  
+	if (elem == &nf_hooks[info->pf][info->hook]) {
+		/* The module which sent it to userspace is gone. */
+		NFDEBUG("%s: module disappeared, dropping packet.\n",
+			__FUNCTION__);
+		verdict = NF_DROP;
+	}
+
+	/* Continue traversal iff userspace said ok... */
+	if (verdict == NF_REPEAT) {
+		elem = elem->prev;
+		verdict = NF_ACCEPT;
+	}
+
+	if (verdict == NF_ACCEPT) {
+	next_hook:
+		verdict = nf_iterate(&nf_hooks[info->pf][info->hook],
+				     &skb, info->hook, 
+				     info->indev, info->outdev, &elem,
+				     info->okfn, INT_MIN);
+	}
+
+	switch (verdict) {
+	case NF_ACCEPT:
+		info->okfn(skb);
+		break;
+
+	case NF_QUEUE:
+		if (!nf_queue(skb, elem, info->pf, info->hook, 
+			      info->indev, info->outdev, info->okfn))
+			goto next_hook;
+		break;
+	}
+	rcu_read_unlock();
+
+	if (verdict == NF_DROP)
+		kfree_skb(skb);
+
+	kfree(info);
+	return;
+}
+
+#ifdef CONFIG_INET
+/* route_me_harder function, used by iptable_nat, iptable_mangle + ip_queue */
+int ip_route_me_harder(struct sk_buff **pskb)
+{
+	struct iphdr *iph = (*pskb)->nh.iph;
+	struct rtable *rt;
+	struct flowi fl = {};
+	struct dst_entry *odst;
+	unsigned int hh_len;
+
+	/* some non-standard hacks like ipt_REJECT.c:send_reset() can cause
+	 * packets with foreign saddr to appear on the NF_IP_LOCAL_OUT hook.
+	 */
+	if (inet_addr_type(iph->saddr) == RTN_LOCAL) {
+		fl.nl_u.ip4_u.daddr = iph->daddr;
+		fl.nl_u.ip4_u.saddr = iph->saddr;
+		fl.nl_u.ip4_u.tos = RT_TOS(iph->tos);
+		fl.oif = (*pskb)->sk ? (*pskb)->sk->sk_bound_dev_if : 0;
+#ifdef CONFIG_IP_ROUTE_FWMARK
+		fl.nl_u.ip4_u.fwmark = (*pskb)->nfmark;
+#endif
+		fl.proto = iph->protocol;
+		if (ip_route_output_key(&rt, &fl) != 0)
+			return -1;
+
+		/* Drop old route. */
+		dst_release((*pskb)->dst);
+		(*pskb)->dst = &rt->u.dst;
+	} else {
+		/* non-local src, find valid iif to satisfy
+		 * rp-filter when calling ip_route_input. */
+		fl.nl_u.ip4_u.daddr = iph->saddr;
+		if (ip_route_output_key(&rt, &fl) != 0)
+			return -1;
+
+		odst = (*pskb)->dst;
+		if (ip_route_input(*pskb, iph->daddr, iph->saddr,
+				   RT_TOS(iph->tos), rt->u.dst.dev) != 0) {
+			dst_release(&rt->u.dst);
+			return -1;
+		}
+		dst_release(&rt->u.dst);
+		dst_release(odst);
+	}
+	
+	if ((*pskb)->dst->error)
+		return -1;
+
+	/* Change in oif may mean change in hh_len. */
+	hh_len = (*pskb)->dst->dev->hard_header_len;
+	if (skb_headroom(*pskb) < hh_len) {
+		struct sk_buff *nskb;
+
+		nskb = skb_realloc_headroom(*pskb, hh_len);
+		if (!nskb) 
+			return -1;
+		if ((*pskb)->sk)
+			skb_set_owner_w(nskb, (*pskb)->sk);
+		kfree_skb(*pskb);
+		*pskb = nskb;
+	}
+
+	return 0;
+}
+EXPORT_SYMBOL(ip_route_me_harder);
+
+int skb_ip_make_writable(struct sk_buff **pskb, unsigned int writable_len)
+{
+	struct sk_buff *nskb;
+
+	if (writable_len > (*pskb)->len)
+		return 0;
+
+	/* Not exclusive use of packet?  Must copy. */
+	if (skb_shared(*pskb) || skb_cloned(*pskb))
+		goto copy_skb;
+
+	return pskb_may_pull(*pskb, writable_len);
+
+copy_skb:
+	nskb = skb_copy(*pskb, GFP_ATOMIC);
+	if (!nskb)
+		return 0;
+	BUG_ON(skb_is_nonlinear(nskb));
+
+	/* Rest of kernel will get very unhappy if we pass it a
+	   suddenly-orphaned skbuff */
+	if ((*pskb)->sk)
+		skb_set_owner_w(nskb, (*pskb)->sk);
+	kfree_skb(*pskb);
+	*pskb = nskb;
+	return 1;
+}
+EXPORT_SYMBOL(skb_ip_make_writable);
+#endif /*CONFIG_INET*/
+
+/* Internal logging interface, which relies on the real 
+   LOG target modules */
+
+#define NF_LOG_PREFIXLEN		128
+
+static nf_logfn *nf_logging[NPROTO]; /* = NULL */
+static int reported = 0;
+static DEFINE_SPINLOCK(nf_log_lock);
+
+int nf_log_register(int pf, nf_logfn *logfn)
+{
+	int ret = -EBUSY;
+
+	/* Any setup of logging members must be done before
+	 * substituting pointer. */
+	spin_lock(&nf_log_lock);
+	if (!nf_logging[pf]) {
+		rcu_assign_pointer(nf_logging[pf], logfn);
+		ret = 0;
+	}
+	spin_unlock(&nf_log_lock);
+	return ret;
+}		
+
+void nf_log_unregister(int pf, nf_logfn *logfn)
+{
+	spin_lock(&nf_log_lock);
+	if (nf_logging[pf] == logfn)
+		nf_logging[pf] = NULL;
+	spin_unlock(&nf_log_lock);
+
+	/* Give time to concurrent readers. */
+	synchronize_net();
+}		
+
+void nf_log_packet(int pf,
+		   unsigned int hooknum,
+		   const struct sk_buff *skb,
+		   const struct net_device *in,
+		   const struct net_device *out,
+		   const char *fmt, ...)
+{
+	va_list args;
+	char prefix[NF_LOG_PREFIXLEN];
+	nf_logfn *logfn;
+	
+	rcu_read_lock();
+	logfn = rcu_dereference(nf_logging[pf]);
+	if (logfn) {
+		va_start(args, fmt);
+		vsnprintf(prefix, sizeof(prefix), fmt, args);
+		va_end(args);
+		/* We must read logging before nf_logfn[pf] */
+		logfn(hooknum, skb, in, out, prefix);
+	} else if (!reported) {
+		printk(KERN_WARNING "nf_log_packet: can\'t log yet, "
+		       "no backend logging module loaded in!\n");
+		reported++;
+	}
+	rcu_read_unlock();
+}
+EXPORT_SYMBOL(nf_log_register);
+EXPORT_SYMBOL(nf_log_unregister);
+EXPORT_SYMBOL(nf_log_packet);
+
+/* This does not belong here, but locally generated errors need it if connection
+   tracking in use: without this, connection may not be in hash table, and hence
+   manufactured ICMP or RST packets will not be associated with it. */
+void (*ip_ct_attach)(struct sk_buff *, struct sk_buff *);
+
+void nf_ct_attach(struct sk_buff *new, struct sk_buff *skb)
+{
+	void (*attach)(struct sk_buff *, struct sk_buff *);
+
+	if (skb->nfct && (attach = ip_ct_attach) != NULL) {
+		mb(); /* Just to be sure: must be read before executing this */
+		attach(new, skb);
+	}
+}
+
+void __init netfilter_init(void)
+{
+	int i, h;
+
+	for (i = 0; i < NPROTO; i++) {
+		for (h = 0; h < NF_MAX_HOOKS; h++)
+			INIT_LIST_HEAD(&nf_hooks[i][h]);
+	}
+}
+
+EXPORT_SYMBOL(ip_ct_attach);
+EXPORT_SYMBOL(nf_ct_attach);
+EXPORT_SYMBOL(nf_getsockopt);
+EXPORT_SYMBOL(nf_hook_slow);
+EXPORT_SYMBOL(nf_hooks);
+EXPORT_SYMBOL(nf_register_hook);
+EXPORT_SYMBOL(nf_register_queue_handler);
+EXPORT_SYMBOL(nf_register_sockopt);
+EXPORT_SYMBOL(nf_reinject);
+EXPORT_SYMBOL(nf_setsockopt);
+EXPORT_SYMBOL(nf_unregister_hook);
+EXPORT_SYMBOL(nf_unregister_queue_handler);
+EXPORT_SYMBOL(nf_unregister_sockopt);
diff --git a/net/core/netpoll.c b/net/core/netpoll.c
new file mode 100644
index 00000000000..a119696d552
--- /dev/null
+++ b/net/core/netpoll.c
@@ -0,0 +1,735 @@
+/*
+ * Common framework for low-level network console, dump, and debugger code
+ *
+ * Sep 8 2003  Matt Mackall <mpm@selenic.com>
+ *
+ * based on the netconsole code from:
+ *
+ * Copyright (C) 2001  Ingo Molnar <mingo@redhat.com>
+ * Copyright (C) 2002  Red Hat, Inc.
+ */
+
+#include <linux/smp_lock.h>
+#include <linux/netdevice.h>
+#include <linux/etherdevice.h>
+#include <linux/string.h>
+#include <linux/inetdevice.h>
+#include <linux/inet.h>
+#include <linux/interrupt.h>
+#include <linux/netpoll.h>
+#include <linux/sched.h>
+#include <linux/delay.h>
+#include <linux/rcupdate.h>
+#include <linux/workqueue.h>
+#include <net/tcp.h>
+#include <net/udp.h>
+#include <asm/unaligned.h>
+
+/*
+ * We maintain a small pool of fully-sized skbs, to make sure the
+ * message gets out even in extreme OOM situations.
+ */
+
+#define MAX_UDP_CHUNK 1460
+#define MAX_SKBS 32
+#define MAX_QUEUE_DEPTH (MAX_SKBS / 2)
+
+static DEFINE_SPINLOCK(skb_list_lock);
+static int nr_skbs;
+static struct sk_buff *skbs;
+
+static DEFINE_SPINLOCK(queue_lock);
+static int queue_depth;
+static struct sk_buff *queue_head, *queue_tail;
+
+static atomic_t trapped;
+
+#define NETPOLL_RX_ENABLED  1
+#define NETPOLL_RX_DROP     2
+
+#define MAX_SKB_SIZE \
+		(MAX_UDP_CHUNK + sizeof(struct udphdr) + \
+				sizeof(struct iphdr) + sizeof(struct ethhdr))
+
+static void zap_completion_queue(void);
+
+static void queue_process(void *p)
+{
+	unsigned long flags;
+	struct sk_buff *skb;
+
+	while (queue_head) {
+		spin_lock_irqsave(&queue_lock, flags);
+
+		skb = queue_head;
+		queue_head = skb->next;
+		if (skb == queue_tail)
+			queue_head = NULL;
+
+		queue_depth--;
+
+		spin_unlock_irqrestore(&queue_lock, flags);
+
+		dev_queue_xmit(skb);
+	}
+}
+
+static DECLARE_WORK(send_queue, queue_process, NULL);
+
+void netpoll_queue(struct sk_buff *skb)
+{
+	unsigned long flags;
+
+	if (queue_depth == MAX_QUEUE_DEPTH) {
+		__kfree_skb(skb);
+		return;
+	}
+
+	spin_lock_irqsave(&queue_lock, flags);
+	if (!queue_head)
+		queue_head = skb;
+	else
+		queue_tail->next = skb;
+	queue_tail = skb;
+	queue_depth++;
+	spin_unlock_irqrestore(&queue_lock, flags);
+
+	schedule_work(&send_queue);
+}
+
+static int checksum_udp(struct sk_buff *skb, struct udphdr *uh,
+			     unsigned short ulen, u32 saddr, u32 daddr)
+{
+	if (uh->check == 0)
+		return 0;
+
+	if (skb->ip_summed == CHECKSUM_HW)
+		return csum_tcpudp_magic(
+			saddr, daddr, ulen, IPPROTO_UDP, skb->csum);
+
+	skb->csum = csum_tcpudp_nofold(saddr, daddr, ulen, IPPROTO_UDP, 0);
+
+	return csum_fold(skb_checksum(skb, 0, skb->len, skb->csum));
+}
+
+/*
+ * Check whether delayed processing was scheduled for our NIC. If so,
+ * we attempt to grab the poll lock and use ->poll() to pump the card.
+ * If this fails, either we've recursed in ->poll() or it's already
+ * running on another CPU.
+ *
+ * Note: we don't mask interrupts with this lock because we're using
+ * trylock here and interrupts are already disabled in the softirq
+ * case. Further, we test the poll_owner to avoid recursion on UP
+ * systems where the lock doesn't exist.
+ *
+ * In cases where there is bi-directional communications, reading only
+ * one message at a time can lead to packets being dropped by the
+ * network adapter, forcing superfluous retries and possibly timeouts.
+ * Thus, we set our budget to greater than 1.
+ */
+static void poll_napi(struct netpoll *np)
+{
+	int budget = 16;
+
+	if (test_bit(__LINK_STATE_RX_SCHED, &np->dev->state) &&
+	    np->poll_owner != smp_processor_id() &&
+	    spin_trylock(&np->poll_lock)) {
+		np->rx_flags |= NETPOLL_RX_DROP;
+		atomic_inc(&trapped);
+
+		np->dev->poll(np->dev, &budget);
+
+		atomic_dec(&trapped);
+		np->rx_flags &= ~NETPOLL_RX_DROP;
+		spin_unlock(&np->poll_lock);
+	}
+}
+
+void netpoll_poll(struct netpoll *np)
+{
+	if(!np->dev || !netif_running(np->dev) || !np->dev->poll_controller)
+		return;
+
+	/* Process pending work on NIC */
+	np->dev->poll_controller(np->dev);
+	if (np->dev->poll)
+		poll_napi(np);
+
+	zap_completion_queue();
+}
+
+static void refill_skbs(void)
+{
+	struct sk_buff *skb;
+	unsigned long flags;
+
+	spin_lock_irqsave(&skb_list_lock, flags);
+	while (nr_skbs < MAX_SKBS) {
+		skb = alloc_skb(MAX_SKB_SIZE, GFP_ATOMIC);
+		if (!skb)
+			break;
+
+		skb->next = skbs;
+		skbs = skb;
+		nr_skbs++;
+	}
+	spin_unlock_irqrestore(&skb_list_lock, flags);
+}
+
+static void zap_completion_queue(void)
+{
+	unsigned long flags;
+	struct softnet_data *sd = &get_cpu_var(softnet_data);
+
+	if (sd->completion_queue) {
+		struct sk_buff *clist;
+
+		local_irq_save(flags);
+		clist = sd->completion_queue;
+		sd->completion_queue = NULL;
+		local_irq_restore(flags);
+
+		while (clist != NULL) {
+			struct sk_buff *skb = clist;
+			clist = clist->next;
+			if(skb->destructor)
+				dev_kfree_skb_any(skb); /* put this one back */
+			else
+				__kfree_skb(skb);
+		}
+	}
+
+	put_cpu_var(softnet_data);
+}
+
+static struct sk_buff * find_skb(struct netpoll *np, int len, int reserve)
+{
+	int once = 1, count = 0;
+	unsigned long flags;
+	struct sk_buff *skb = NULL;
+
+	zap_completion_queue();
+repeat:
+	if (nr_skbs < MAX_SKBS)
+		refill_skbs();
+
+	skb = alloc_skb(len, GFP_ATOMIC);
+
+	if (!skb) {
+		spin_lock_irqsave(&skb_list_lock, flags);
+		skb = skbs;
+		if (skb) {
+			skbs = skb->next;
+			skb->next = NULL;
+			nr_skbs--;
+		}
+		spin_unlock_irqrestore(&skb_list_lock, flags);
+	}
+
+	if(!skb) {
+		count++;
+		if (once && (count == 1000000)) {
+			printk("out of netpoll skbs!\n");
+			once = 0;
+		}
+		netpoll_poll(np);
+		goto repeat;
+	}
+
+	atomic_set(&skb->users, 1);
+	skb_reserve(skb, reserve);
+	return skb;
+}
+
+static void netpoll_send_skb(struct netpoll *np, struct sk_buff *skb)
+{
+	int status;
+
+repeat:
+	if(!np || !np->dev || !netif_running(np->dev)) {
+		__kfree_skb(skb);
+		return;
+	}
+
+	/* avoid recursion */
+	if(np->poll_owner == smp_processor_id() ||
+	   np->dev->xmit_lock_owner == smp_processor_id()) {
+		if (np->drop)
+			np->drop(skb);
+		else
+			__kfree_skb(skb);
+		return;
+	}
+
+	spin_lock(&np->dev->xmit_lock);
+	np->dev->xmit_lock_owner = smp_processor_id();
+
+	/*
+	 * network drivers do not expect to be called if the queue is
+	 * stopped.
+	 */
+	if (netif_queue_stopped(np->dev)) {
+		np->dev->xmit_lock_owner = -1;
+		spin_unlock(&np->dev->xmit_lock);
+
+		netpoll_poll(np);
+		goto repeat;
+	}
+
+	status = np->dev->hard_start_xmit(skb, np->dev);
+	np->dev->xmit_lock_owner = -1;
+	spin_unlock(&np->dev->xmit_lock);
+
+	/* transmit busy */
+	if(status) {
+		netpoll_poll(np);
+		goto repeat;
+	}
+}
+
+void netpoll_send_udp(struct netpoll *np, const char *msg, int len)
+{
+	int total_len, eth_len, ip_len, udp_len;
+	struct sk_buff *skb;
+	struct udphdr *udph;
+	struct iphdr *iph;
+	struct ethhdr *eth;
+
+	udp_len = len + sizeof(*udph);
+	ip_len = eth_len = udp_len + sizeof(*iph);
+	total_len = eth_len + ETH_HLEN + NET_IP_ALIGN;
+
+	skb = find_skb(np, total_len, total_len - len);
+	if (!skb)
+		return;
+
+	memcpy(skb->data, msg, len);
+	skb->len += len;
+
+	udph = (struct udphdr *) skb_push(skb, sizeof(*udph));
+	udph->source = htons(np->local_port);
+	udph->dest = htons(np->remote_port);
+	udph->len = htons(udp_len);
+	udph->check = 0;
+
+	iph = (struct iphdr *)skb_push(skb, sizeof(*iph));
+
+	/* iph->version = 4; iph->ihl = 5; */
+	put_unaligned(0x45, (unsigned char *)iph);
+	iph->tos      = 0;
+	put_unaligned(htons(ip_len), &(iph->tot_len));
+	iph->id       = 0;
+	iph->frag_off = 0;
+	iph->ttl      = 64;
+	iph->protocol = IPPROTO_UDP;
+	iph->check    = 0;
+	put_unaligned(htonl(np->local_ip), &(iph->saddr));
+	put_unaligned(htonl(np->remote_ip), &(iph->daddr));
+	iph->check    = ip_fast_csum((unsigned char *)iph, iph->ihl);
+
+	eth = (struct ethhdr *) skb_push(skb, ETH_HLEN);
+
+	eth->h_proto = htons(ETH_P_IP);
+	memcpy(eth->h_source, np->local_mac, 6);
+	memcpy(eth->h_dest, np->remote_mac, 6);
+
+	skb->dev = np->dev;
+
+	netpoll_send_skb(np, skb);
+}
+
+static void arp_reply(struct sk_buff *skb)
+{
+	struct arphdr *arp;
+	unsigned char *arp_ptr;
+	int size, type = ARPOP_REPLY, ptype = ETH_P_ARP;
+	u32 sip, tip;
+	struct sk_buff *send_skb;
+	struct netpoll *np = skb->dev->np;
+
+	if (!np) return;
+
+	/* No arp on this interface */
+	if (skb->dev->flags & IFF_NOARP)
+		return;
+
+	if (!pskb_may_pull(skb, (sizeof(struct arphdr) +
+				 (2 * skb->dev->addr_len) +
+				 (2 * sizeof(u32)))))
+		return;
+
+	skb->h.raw = skb->nh.raw = skb->data;
+	arp = skb->nh.arph;
+
+	if ((arp->ar_hrd != htons(ARPHRD_ETHER) &&
+	     arp->ar_hrd != htons(ARPHRD_IEEE802)) ||
+	    arp->ar_pro != htons(ETH_P_IP) ||
+	    arp->ar_op != htons(ARPOP_REQUEST))
+		return;
+
+	arp_ptr = (unsigned char *)(arp+1) + skb->dev->addr_len;
+	memcpy(&sip, arp_ptr, 4);
+	arp_ptr += 4 + skb->dev->addr_len;
+	memcpy(&tip, arp_ptr, 4);
+
+	/* Should we ignore arp? */
+	if (tip != htonl(np->local_ip) || LOOPBACK(tip) || MULTICAST(tip))
+		return;
+
+	size = sizeof(struct arphdr) + 2 * (skb->dev->addr_len + 4);
+	send_skb = find_skb(np, size + LL_RESERVED_SPACE(np->dev),
+			    LL_RESERVED_SPACE(np->dev));
+
+	if (!send_skb)
+		return;
+
+	send_skb->nh.raw = send_skb->data;
+	arp = (struct arphdr *) skb_put(send_skb, size);
+	send_skb->dev = skb->dev;
+	send_skb->protocol = htons(ETH_P_ARP);
+
+	/* Fill the device header for the ARP frame */
+
+	if (np->dev->hard_header &&
+	    np->dev->hard_header(send_skb, skb->dev, ptype,
+				       np->remote_mac, np->local_mac,
+				       send_skb->len) < 0) {
+		kfree_skb(send_skb);
+		return;
+	}
+
+	/*
+	 * Fill out the arp protocol part.
+	 *
+	 * we only support ethernet device type,
+	 * which (according to RFC 1390) should always equal 1 (Ethernet).
+	 */
+
+	arp->ar_hrd = htons(np->dev->type);
+	arp->ar_pro = htons(ETH_P_IP);
+	arp->ar_hln = np->dev->addr_len;
+	arp->ar_pln = 4;
+	arp->ar_op = htons(type);
+
+	arp_ptr=(unsigned char *)(arp + 1);
+	memcpy(arp_ptr, np->dev->dev_addr, np->dev->addr_len);
+	arp_ptr += np->dev->addr_len;
+	memcpy(arp_ptr, &tip, 4);
+	arp_ptr += 4;
+	memcpy(arp_ptr, np->remote_mac, np->dev->addr_len);
+	arp_ptr += np->dev->addr_len;
+	memcpy(arp_ptr, &sip, 4);
+
+	netpoll_send_skb(np, send_skb);
+}
+
+int __netpoll_rx(struct sk_buff *skb)
+{
+	int proto, len, ulen;
+	struct iphdr *iph;
+	struct udphdr *uh;
+	struct netpoll *np = skb->dev->np;
+
+	if (!np->rx_hook)
+		goto out;
+	if (skb->dev->type != ARPHRD_ETHER)
+		goto out;
+
+	/* check if netpoll clients need ARP */
+	if (skb->protocol == __constant_htons(ETH_P_ARP) &&
+	    atomic_read(&trapped)) {
+		arp_reply(skb);
+		return 1;
+	}
+
+	proto = ntohs(eth_hdr(skb)->h_proto);
+	if (proto != ETH_P_IP)
+		goto out;
+	if (skb->pkt_type == PACKET_OTHERHOST)
+		goto out;
+	if (skb_shared(skb))
+		goto out;
+
+	iph = (struct iphdr *)skb->data;
+	if (!pskb_may_pull(skb, sizeof(struct iphdr)))
+		goto out;
+	if (iph->ihl < 5 || iph->version != 4)
+		goto out;
+	if (!pskb_may_pull(skb, iph->ihl*4))
+		goto out;
+	if (ip_fast_csum((u8 *)iph, iph->ihl) != 0)
+		goto out;
+
+	len = ntohs(iph->tot_len);
+	if (skb->len < len || len < iph->ihl*4)
+		goto out;
+
+	if (iph->protocol != IPPROTO_UDP)
+		goto out;
+
+	len -= iph->ihl*4;
+	uh = (struct udphdr *)(((char *)iph) + iph->ihl*4);
+	ulen = ntohs(uh->len);
+
+	if (ulen != len)
+		goto out;
+	if (checksum_udp(skb, uh, ulen, iph->saddr, iph->daddr) < 0)
+		goto out;
+	if (np->local_ip && np->local_ip != ntohl(iph->daddr))
+		goto out;
+	if (np->remote_ip && np->remote_ip != ntohl(iph->saddr))
+		goto out;
+	if (np->local_port && np->local_port != ntohs(uh->dest))
+		goto out;
+
+	np->rx_hook(np, ntohs(uh->source),
+		    (char *)(uh+1),
+		    ulen - sizeof(struct udphdr));
+
+	kfree_skb(skb);
+	return 1;
+
+out:
+	if (atomic_read(&trapped)) {
+		kfree_skb(skb);
+		return 1;
+	}
+
+	return 0;
+}
+
+int netpoll_parse_options(struct netpoll *np, char *opt)
+{
+	char *cur=opt, *delim;
+
+	if(*cur != '@') {
+		if ((delim = strchr(cur, '@')) == NULL)
+			goto parse_failed;
+		*delim=0;
+		np->local_port=simple_strtol(cur, NULL, 10);
+		cur=delim;
+	}
+	cur++;
+	printk(KERN_INFO "%s: local port %d\n", np->name, np->local_port);
+
+	if(*cur != '/') {
+		if ((delim = strchr(cur, '/')) == NULL)
+			goto parse_failed;
+		*delim=0;
+		np->local_ip=ntohl(in_aton(cur));
+		cur=delim;
+
+		printk(KERN_INFO "%s: local IP %d.%d.%d.%d\n",
+		       np->name, HIPQUAD(np->local_ip));
+	}
+	cur++;
+
+	if ( *cur != ',') {
+		/* parse out dev name */
+		if ((delim = strchr(cur, ',')) == NULL)
+			goto parse_failed;
+		*delim=0;
+		strlcpy(np->dev_name, cur, sizeof(np->dev_name));
+		cur=delim;
+	}
+	cur++;
+
+	printk(KERN_INFO "%s: interface %s\n", np->name, np->dev_name);
+
+	if ( *cur != '@' ) {
+		/* dst port */
+		if ((delim = strchr(cur, '@')) == NULL)
+			goto parse_failed;
+		*delim=0;
+		np->remote_port=simple_strtol(cur, NULL, 10);
+		cur=delim;
+	}
+	cur++;
+	printk(KERN_INFO "%s: remote port %d\n", np->name, np->remote_port);
+
+	/* dst ip */
+	if ((delim = strchr(cur, '/')) == NULL)
+		goto parse_failed;
+	*delim=0;
+	np->remote_ip=ntohl(in_aton(cur));
+	cur=delim+1;
+
+	printk(KERN_INFO "%s: remote IP %d.%d.%d.%d\n",
+		       np->name, HIPQUAD(np->remote_ip));
+
+	if( *cur != 0 )
+	{
+		/* MAC address */
+		if ((delim = strchr(cur, ':')) == NULL)
+			goto parse_failed;
+		*delim=0;
+		np->remote_mac[0]=simple_strtol(cur, NULL, 16);
+		cur=delim+1;
+		if ((delim = strchr(cur, ':')) == NULL)
+			goto parse_failed;
+		*delim=0;
+		np->remote_mac[1]=simple_strtol(cur, NULL, 16);
+		cur=delim+1;
+		if ((delim = strchr(cur, ':')) == NULL)
+			goto parse_failed;
+		*delim=0;
+		np->remote_mac[2]=simple_strtol(cur, NULL, 16);
+		cur=delim+1;
+		if ((delim = strchr(cur, ':')) == NULL)
+			goto parse_failed;
+		*delim=0;
+		np->remote_mac[3]=simple_strtol(cur, NULL, 16);
+		cur=delim+1;
+		if ((delim = strchr(cur, ':')) == NULL)
+			goto parse_failed;
+		*delim=0;
+		np->remote_mac[4]=simple_strtol(cur, NULL, 16);
+		cur=delim+1;
+		np->remote_mac[5]=simple_strtol(cur, NULL, 16);
+	}
+
+	printk(KERN_INFO "%s: remote ethernet address "
+	       "%02x:%02x:%02x:%02x:%02x:%02x\n",
+	       np->name,
+	       np->remote_mac[0],
+	       np->remote_mac[1],
+	       np->remote_mac[2],
+	       np->remote_mac[3],
+	       np->remote_mac[4],
+	       np->remote_mac[5]);
+
+	return 0;
+
+ parse_failed:
+	printk(KERN_INFO "%s: couldn't parse config at %s!\n",
+	       np->name, cur);
+	return -1;
+}
+
+int netpoll_setup(struct netpoll *np)
+{
+	struct net_device *ndev = NULL;
+	struct in_device *in_dev;
+
+	np->poll_lock = SPIN_LOCK_UNLOCKED;
+	np->poll_owner = -1;
+
+	if (np->dev_name)
+		ndev = dev_get_by_name(np->dev_name);
+	if (!ndev) {
+		printk(KERN_ERR "%s: %s doesn't exist, aborting.\n",
+		       np->name, np->dev_name);
+		return -1;
+	}
+
+	np->dev = ndev;
+	ndev->np = np;
+
+	if (!ndev->poll_controller) {
+		printk(KERN_ERR "%s: %s doesn't support polling, aborting.\n",
+		       np->name, np->dev_name);
+		goto release;
+	}
+
+	if (!netif_running(ndev)) {
+		unsigned long atmost, atleast;
+
+		printk(KERN_INFO "%s: device %s not up yet, forcing it\n",
+		       np->name, np->dev_name);
+
+		rtnl_shlock();
+		if (dev_change_flags(ndev, ndev->flags | IFF_UP) < 0) {
+			printk(KERN_ERR "%s: failed to open %s\n",
+			       np->name, np->dev_name);
+			rtnl_shunlock();
+			goto release;
+		}
+		rtnl_shunlock();
+
+		atleast = jiffies + HZ/10;
+ 		atmost = jiffies + 4*HZ;
+		while (!netif_carrier_ok(ndev)) {
+			if (time_after(jiffies, atmost)) {
+				printk(KERN_NOTICE
+				       "%s: timeout waiting for carrier\n",
+				       np->name);
+				break;
+			}
+			cond_resched();
+		}
+
+		/* If carrier appears to come up instantly, we don't
+		 * trust it and pause so that we don't pump all our
+		 * queued console messages into the bitbucket.
+		 */
+
+		if (time_before(jiffies, atleast)) {
+			printk(KERN_NOTICE "%s: carrier detect appears"
+			       " untrustworthy, waiting 4 seconds\n",
+			       np->name);
+			msleep(4000);
+		}
+	}
+
+	if (!memcmp(np->local_mac, "\0\0\0\0\0\0", 6) && ndev->dev_addr)
+		memcpy(np->local_mac, ndev->dev_addr, 6);
+
+	if (!np->local_ip) {
+		rcu_read_lock();
+		in_dev = __in_dev_get(ndev);
+
+		if (!in_dev || !in_dev->ifa_list) {
+			rcu_read_unlock();
+			printk(KERN_ERR "%s: no IP address for %s, aborting\n",
+			       np->name, np->dev_name);
+			goto release;
+		}
+
+		np->local_ip = ntohl(in_dev->ifa_list->ifa_local);
+		rcu_read_unlock();
+		printk(KERN_INFO "%s: local IP %d.%d.%d.%d\n",
+		       np->name, HIPQUAD(np->local_ip));
+	}
+
+	if(np->rx_hook)
+		np->rx_flags = NETPOLL_RX_ENABLED;
+
+	return 0;
+
+ release:
+	ndev->np = NULL;
+	np->dev = NULL;
+	dev_put(ndev);
+	return -1;
+}
+
+void netpoll_cleanup(struct netpoll *np)
+{
+	if (np->dev)
+		np->dev->np = NULL;
+	dev_put(np->dev);
+	np->dev = NULL;
+}
+
+int netpoll_trap(void)
+{
+	return atomic_read(&trapped);
+}
+
+void netpoll_set_trap(int trap)
+{
+	if (trap)
+		atomic_inc(&trapped);
+	else
+		atomic_dec(&trapped);
+}
+
+EXPORT_SYMBOL(netpoll_set_trap);
+EXPORT_SYMBOL(netpoll_trap);
+EXPORT_SYMBOL(netpoll_parse_options);
+EXPORT_SYMBOL(netpoll_setup);
+EXPORT_SYMBOL(netpoll_cleanup);
+EXPORT_SYMBOL(netpoll_send_udp);
+EXPORT_SYMBOL(netpoll_poll);
+EXPORT_SYMBOL(netpoll_queue);
diff --git a/net/core/pktgen.c b/net/core/pktgen.c
new file mode 100644
index 00000000000..c57b06bc79f
--- /dev/null
+++ b/net/core/pktgen.c
@@ -0,0 +1,3132 @@
+/*
+ * Authors:
+ * Copyright 2001, 2002 by Robert Olsson <robert.olsson@its.uu.se>
+ *                             Uppsala University and
+ *                             Swedish University of Agricultural Sciences
+ *
+ * Alexey Kuznetsov  <kuznet@ms2.inr.ac.ru>
+ * Ben Greear <greearb@candelatech.com>
+ * Jens L��s <jens.laas@data.slu.se>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ *
+ * A tool for loading the network with preconfigurated packets.
+ * The tool is implemented as a linux module.  Parameters are output 
+ * device, delay (to hard_xmit), number of packets, and whether
+ * to use multiple SKBs or just the same one.
+ * pktgen uses the installed interface's output routine.
+ *
+ * Additional hacking by:
+ *
+ * Jens.Laas@data.slu.se
+ * Improved by ANK. 010120.
+ * Improved by ANK even more. 010212.
+ * MAC address typo fixed. 010417 --ro
+ * Integrated.  020301 --DaveM
+ * Added multiskb option 020301 --DaveM
+ * Scaling of results. 020417--sigurdur@linpro.no
+ * Significant re-work of the module:
+ *   *  Convert to threaded model to more efficiently be able to transmit
+ *       and receive on multiple interfaces at once.
+ *   *  Converted many counters to __u64 to allow longer runs.
+ *   *  Allow configuration of ranges, like min/max IP address, MACs,
+ *       and UDP-ports, for both source and destination, and can
+ *       set to use a random distribution or sequentially walk the range.
+ *   *  Can now change most values after starting.
+ *   *  Place 12-byte packet in UDP payload with magic number,
+ *       sequence number, and timestamp.
+ *   *  Add receiver code that detects dropped pkts, re-ordered pkts, and
+ *       latencies (with micro-second) precision.
+ *   *  Add IOCTL interface to easily get counters & configuration.
+ *   --Ben Greear <greearb@candelatech.com>
+ *
+ * Renamed multiskb to clone_skb and cleaned up sending core for two distinct 
+ * skb modes. A clone_skb=0 mode for Ben "ranges" work and a clone_skb != 0 
+ * as a "fastpath" with a configurable number of clones after alloc's.
+ * clone_skb=0 means all packets are allocated this also means ranges time 
+ * stamps etc can be used. clone_skb=100 means 1 malloc is followed by 100 
+ * clones.
+ *
+ * Also moved to /proc/net/pktgen/ 
+ * --ro
+ *
+ * Sept 10:  Fixed threading/locking.  Lots of bone-headed and more clever
+ *    mistakes.  Also merged in DaveM's patch in the -pre6 patch.
+ * --Ben Greear <greearb@candelatech.com>
+ *
+ * Integrated to 2.5.x 021029 --Lucio Maciel (luciomaciel@zipmail.com.br)
+ *
+ * 
+ * 021124 Finished major redesign and rewrite for new functionality.
+ * See Documentation/networking/pktgen.txt for how to use this.
+ *
+ * The new operation:
+ * For each CPU one thread/process is created at start. This process checks 
+ * for running devices in the if_list and sends packets until count is 0 it 
+ * also the thread checks the thread->control which is used for inter-process 
+ * communication. controlling process "posts" operations to the threads this 
+ * way. The if_lock should be possible to remove when add/rem_device is merged
+ * into this too.
+ *
+ * By design there should only be *one* "controlling" process. In practice 
+ * multiple write accesses gives unpredictable result. Understood by "write" 
+ * to /proc gives result code thats should be read be the "writer".
+ * For pratical use this should be no problem.
+ *
+ * Note when adding devices to a specific CPU there good idea to also assign 
+ * /proc/irq/XX/smp_affinity so TX-interrupts gets bound to the same CPU. 
+ * --ro
+ *
+ * Fix refcount off by one if first packet fails, potential null deref, 
+ * memleak 030710- KJP
+ *
+ * First "ranges" functionality for ipv6 030726 --ro
+ *
+ * Included flow support. 030802 ANK.
+ *
+ * Fixed unaligned access on IA-64 Grant Grundler <grundler@parisc-linux.org>
+ * 
+ * Remove if fix from added Harald Welte <laforge@netfilter.org> 040419
+ * ia64 compilation fix from  Aron Griffis <aron@hp.com> 040604
+ *
+ * New xmit() return, do_div and misc clean up by Stephen Hemminger 
+ * <shemminger@osdl.org> 040923
+ *
+ * Rany Dunlap fixed u64 printk compiler waring 
+ *
+ * Remove FCS from BW calculation.  Lennert Buytenhek <buytenh@wantstofly.org>
+ * New time handling. Lennert Buytenhek <buytenh@wantstofly.org> 041213
+ *
+ * Corrections from Nikolai Malykh (nmalykh@bilim.com) 
+ * Removed unused flags F_SET_SRCMAC & F_SET_SRCIP 041230
+ *
+ * interruptible_sleep_on_timeout() replaced Nishanth Aravamudan <nacc@us.ibm.com> 
+ * 050103
+ */
+#include <linux/sys.h>
+#include <linux/types.h>
+#include <linux/module.h>
+#include <linux/moduleparam.h>
+#include <linux/kernel.h>
+#include <linux/smp_lock.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/vmalloc.h>
+#include <linux/sched.h>
+#include <linux/unistd.h>
+#include <linux/string.h>
+#include <linux/ptrace.h>
+#include <linux/errno.h>
+#include <linux/ioport.h>
+#include <linux/interrupt.h>
+#include <linux/delay.h>
+#include <linux/timer.h>
+#include <linux/init.h>
+#include <linux/skbuff.h>
+#include <linux/netdevice.h>
+#include <linux/inet.h>
+#include <linux/inetdevice.h>
+#include <linux/rtnetlink.h>
+#include <linux/if_arp.h>
+#include <linux/in.h>
+#include <linux/ip.h>
+#include <linux/ipv6.h>
+#include <linux/udp.h>
+#include <linux/proc_fs.h>
+#include <linux/wait.h>
+#include <net/checksum.h>
+#include <net/ipv6.h>
+#include <net/addrconf.h>
+#include <asm/byteorder.h>
+#include <linux/rcupdate.h>
+#include <asm/bitops.h>
+#include <asm/io.h>
+#include <asm/dma.h>
+#include <asm/uaccess.h>
+#include <asm/div64.h> /* do_div */
+#include <asm/timex.h>
+
+
+#define VERSION  "pktgen v2.61: Packet Generator for packet performance testing.\n"
+
+/* #define PG_DEBUG(a) a */
+#define PG_DEBUG(a) 
+
+/* The buckets are exponential in 'width' */
+#define LAT_BUCKETS_MAX 32
+#define IP_NAME_SZ 32
+
+/* Device flag bits */
+#define F_IPSRC_RND   (1<<0)  /* IP-Src Random  */
+#define F_IPDST_RND   (1<<1)  /* IP-Dst Random  */
+#define F_UDPSRC_RND  (1<<2)  /* UDP-Src Random */
+#define F_UDPDST_RND  (1<<3)  /* UDP-Dst Random */
+#define F_MACSRC_RND  (1<<4)  /* MAC-Src Random */
+#define F_MACDST_RND  (1<<5)  /* MAC-Dst Random */
+#define F_TXSIZE_RND  (1<<6)  /* Transmit size is random */
+#define F_IPV6        (1<<7)  /* Interface in IPV6 Mode */
+
+/* Thread control flag bits */
+#define T_TERMINATE   (1<<0)  
+#define T_STOP        (1<<1)  /* Stop run */
+#define T_RUN         (1<<2)  /* Start run */
+#define T_REMDEV      (1<<3)  /* Remove all devs */
+
+/* Locks */
+#define   thread_lock()        spin_lock(&_thread_lock)
+#define   thread_unlock()      spin_unlock(&_thread_lock)
+
+/* If lock -- can be removed after some work */
+#define   if_lock(t)           spin_lock(&(t->if_lock));
+#define   if_unlock(t)           spin_unlock(&(t->if_lock));
+
+/* Used to help with determining the pkts on receive */
+#define PKTGEN_MAGIC 0xbe9be955
+#define PG_PROC_DIR "pktgen"
+
+#define MAX_CFLOWS  65536
+
+struct flow_state
+{
+	__u32		cur_daddr;
+	int		count;
+};
+
+struct pktgen_dev {
+
+	/*
+	 * Try to keep frequent/infrequent used vars. separated.
+	 */
+
+        char ifname[32];
+        struct proc_dir_entry *proc_ent;
+        char result[512];
+        /* proc file names */
+        char fname[80];
+
+        struct pktgen_thread* pg_thread; /* the owner */
+        struct pktgen_dev *next; /* Used for chaining in the thread's run-queue */
+
+        int running;  /* if this changes to false, the test will stop */
+        
+        /* If min != max, then we will either do a linear iteration, or
+         * we will do a random selection from within the range.
+         */
+        __u32 flags;     
+
+        int min_pkt_size;    /* = ETH_ZLEN; */
+        int max_pkt_size;    /* = ETH_ZLEN; */
+        int nfrags;
+        __u32 delay_us;    /* Default delay */
+        __u32 delay_ns;
+        __u64 count;  /* Default No packets to send */
+        __u64 sofar;  /* How many pkts we've sent so far */
+        __u64 tx_bytes; /* How many bytes we've transmitted */
+        __u64 errors;    /* Errors when trying to transmit, pkts will be re-sent */
+
+        /* runtime counters relating to clone_skb */
+        __u64 next_tx_us;          /* timestamp of when to tx next */
+        __u32 next_tx_ns;
+        
+        __u64 allocated_skbs;
+        __u32 clone_count;
+	int last_ok;           /* Was last skb sent? 
+	                        * Or a failed transmit of some sort?  This will keep
+                                * sequence numbers in order, for example.
+				*/
+        __u64 started_at; /* micro-seconds */
+        __u64 stopped_at; /* micro-seconds */
+        __u64 idle_acc; /* micro-seconds */
+        __u32 seq_num;
+        
+        int clone_skb; /* Use multiple SKBs during packet gen.  If this number
+                          * is greater than 1, then that many coppies of the same
+                          * packet will be sent before a new packet is allocated.
+                          * For instance, if you want to send 1024 identical packets
+                          * before creating a new packet, set clone_skb to 1024.
+                          */
+        
+        char dst_min[IP_NAME_SZ]; /* IP, ie 1.2.3.4 */
+        char dst_max[IP_NAME_SZ]; /* IP, ie 1.2.3.4 */
+        char src_min[IP_NAME_SZ]; /* IP, ie 1.2.3.4 */
+        char src_max[IP_NAME_SZ]; /* IP, ie 1.2.3.4 */
+
+	struct in6_addr  in6_saddr;
+	struct in6_addr  in6_daddr;
+	struct in6_addr  cur_in6_daddr;
+	struct in6_addr  cur_in6_saddr;
+	/* For ranges */
+	struct in6_addr  min_in6_daddr;
+	struct in6_addr  max_in6_daddr;
+	struct in6_addr  min_in6_saddr;
+	struct in6_addr  max_in6_saddr;
+
+        /* If we're doing ranges, random or incremental, then this
+         * defines the min/max for those ranges.
+         */
+        __u32 saddr_min; /* inclusive, source IP address */
+        __u32 saddr_max; /* exclusive, source IP address */
+        __u32 daddr_min; /* inclusive, dest IP address */
+        __u32 daddr_max; /* exclusive, dest IP address */
+
+        __u16 udp_src_min; /* inclusive, source UDP port */
+        __u16 udp_src_max; /* exclusive, source UDP port */
+        __u16 udp_dst_min; /* inclusive, dest UDP port */
+        __u16 udp_dst_max; /* exclusive, dest UDP port */
+
+        __u32 src_mac_count; /* How many MACs to iterate through */
+        __u32 dst_mac_count; /* How many MACs to iterate through */
+        
+        unsigned char dst_mac[6];
+        unsigned char src_mac[6];
+        
+        __u32 cur_dst_mac_offset;
+        __u32 cur_src_mac_offset;
+        __u32 cur_saddr;
+        __u32 cur_daddr;
+        __u16 cur_udp_dst;
+        __u16 cur_udp_src;
+        __u32 cur_pkt_size;
+        
+        __u8 hh[14];
+        /* = { 
+           0x00, 0x80, 0xC8, 0x79, 0xB3, 0xCB, 
+           
+           We fill in SRC address later
+           0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+           0x08, 0x00
+           };
+        */
+        __u16 pad; /* pad out the hh struct to an even 16 bytes */
+
+        struct sk_buff* skb; /* skb we are to transmit next, mainly used for when we
+                              * are transmitting the same one multiple times
+                              */
+        struct net_device* odev; /* The out-going device.  Note that the device should
+                                  * have it's pg_info pointer pointing back to this
+                                  * device.  This will be set when the user specifies
+                                  * the out-going device name (not when the inject is
+                                  * started as it used to do.)
+                                  */
+	struct flow_state *flows;
+	unsigned cflows;         /* Concurrent flows (config) */
+	unsigned lflow;          /* Flow length  (config) */
+	unsigned nflows;         /* accumulated flows (stats) */
+};
+
+struct pktgen_hdr {
+        __u32 pgh_magic;
+        __u32 seq_num;
+	__u32 tv_sec;
+	__u32 tv_usec;
+};
+
+struct pktgen_thread {
+        spinlock_t if_lock;
+        struct pktgen_dev *if_list;           /* All device here */
+        struct pktgen_thread* next;
+        char name[32];
+        char fname[128]; /* name of proc file */
+        struct proc_dir_entry *proc_ent;
+        char result[512];
+        u32 max_before_softirq; /* We'll call do_softirq to prevent starvation. */
+        
+	/* Field for thread to receive "posted" events terminate, stop ifs etc.*/
+
+        u32 control;
+	int pid;
+	int cpu;
+
+        wait_queue_head_t queue;
+};
+
+#define REMOVE 1
+#define FIND   0
+
+/*  This code works around the fact that do_div cannot handle two 64-bit
+    numbers, and regular 64-bit division doesn't work on x86 kernels.
+    --Ben
+*/
+
+#define PG_DIV 0
+
+/* This was emailed to LMKL by: Chris Caputo <ccaputo@alt.net>
+ * Function copied/adapted/optimized from:
+ *
+ *  nemesis.sourceforge.net/browse/lib/static/intmath/ix86/intmath.c.html
+ *
+ * Copyright 1994, University of Cambridge Computer Laboratory
+ * All Rights Reserved.
+ *
+ */
+inline static s64 divremdi3(s64 x, s64 y, int type) 
+{
+        u64 a = (x < 0) ? -x : x;
+        u64 b = (y < 0) ? -y : y;
+        u64 res = 0, d = 1;
+
+        if (b > 0) {
+                while (b < a) {
+                        b <<= 1;
+                        d <<= 1;
+                }
+        }
+        
+        do {
+                if ( a >= b ) {
+                        a -= b;
+                        res += d;
+                }
+                b >>= 1;
+                d >>= 1;
+        }
+        while (d);
+
+        if (PG_DIV == type) {
+                return (((x ^ y) & (1ll<<63)) == 0) ? res : -(s64)res;
+        }
+        else {
+                return ((x & (1ll<<63)) == 0) ? a : -(s64)a;
+        }
+}
+
+/* End of hacks to deal with 64-bit math on x86 */
+
+/** Convert to miliseconds */
+static inline __u64 tv_to_ms(const struct timeval* tv) 
+{
+        __u64 ms = tv->tv_usec / 1000;
+        ms += (__u64)tv->tv_sec * (__u64)1000;
+        return ms;
+}
+
+
+/** Convert to micro-seconds */
+static inline __u64 tv_to_us(const struct timeval* tv) 
+{
+        __u64 us = tv->tv_usec;
+        us += (__u64)tv->tv_sec * (__u64)1000000;
+        return us;
+}
+
+static inline __u64 pg_div(__u64 n, __u32 base) {
+        __u64 tmp = n;
+        do_div(tmp, base);
+        /* printk("pktgen: pg_div, n: %llu  base: %d  rv: %llu\n",
+                  n, base, tmp); */
+        return tmp;
+}
+
+static inline __u64 pg_div64(__u64 n, __u64 base) 
+{
+        __u64 tmp = n;
+/*
+ * How do we know if the architectrure we are running on
+ * supports division with 64 bit base?
+ * 
+ */
+#if defined(__sparc_v9__) || defined(__powerpc64__) || defined(__alpha__) || defined(__x86_64__) || defined(__ia64__) 
+
+		do_div(tmp, base);
+#else
+		tmp = divremdi3(n, base, PG_DIV);
+#endif
+        return tmp;
+}
+
+static inline u32 pktgen_random(void)
+{
+#if 0
+	__u32 n;
+	get_random_bytes(&n, 4);
+	return n;
+#else
+	return net_random();
+#endif
+}
+
+static inline __u64 getCurMs(void) 
+{
+        struct timeval tv;
+        do_gettimeofday(&tv);
+        return tv_to_ms(&tv);
+}
+
+static inline __u64 getCurUs(void) 
+{
+        struct timeval tv;
+        do_gettimeofday(&tv);
+        return tv_to_us(&tv);
+}
+
+static inline __u64 tv_diff(const struct timeval* a, const struct timeval* b) 
+{
+        return tv_to_us(a) - tv_to_us(b);
+}
+
+
+/* old include end */
+
+static char version[] __initdata = VERSION;
+
+static ssize_t proc_pgctrl_read(struct file* file, char __user * buf, size_t count, loff_t *ppos);
+static ssize_t proc_pgctrl_write(struct file* file, const char __user * buf, size_t count, loff_t *ppos);
+static int proc_if_read(char *buf , char **start, off_t offset, int len, int *eof, void *data);
+
+static int proc_thread_read(char *buf , char **start, off_t offset, int len, int *eof, void *data);
+static int proc_if_write(struct file *file, const char __user *user_buffer, unsigned long count, void *data);
+static int proc_thread_write(struct file *file, const char __user *user_buffer, unsigned long count, void *data);
+static int create_proc_dir(void);
+static int remove_proc_dir(void);
+
+static int pktgen_remove_device(struct pktgen_thread* t, struct pktgen_dev *i);
+static int pktgen_add_device(struct pktgen_thread* t, const char* ifname);
+static struct pktgen_thread* pktgen_find_thread(const char* name);
+static struct pktgen_dev *pktgen_find_dev(struct pktgen_thread* t, const char* ifname);
+static int pktgen_device_event(struct notifier_block *, unsigned long, void *);
+static void pktgen_run_all_threads(void);
+static void pktgen_stop_all_threads_ifs(void);
+static int pktgen_stop_device(struct pktgen_dev *pkt_dev);
+static void pktgen_stop(struct pktgen_thread* t);
+static void pktgen_clear_counters(struct pktgen_dev *pkt_dev);
+static struct pktgen_dev *pktgen_NN_threads(const char* dev_name, int remove);
+static unsigned int scan_ip6(const char *s,char ip[16]);
+static unsigned int fmt_ip6(char *s,const char ip[16]);
+
+/* Module parameters, defaults. */
+static int pg_count_d = 1000; /* 1000 pkts by default */
+static int pg_delay_d = 0;
+static int pg_clone_skb_d = 0;
+static int debug = 0;
+
+static spinlock_t _thread_lock = SPIN_LOCK_UNLOCKED;
+static struct pktgen_thread *pktgen_threads = NULL;
+
+static char module_fname[128];
+static struct proc_dir_entry *module_proc_ent = NULL;
+
+static struct notifier_block pktgen_notifier_block = {
+	.notifier_call = pktgen_device_event,
+};
+
+static struct file_operations pktgen_fops = {
+        .read     = proc_pgctrl_read,
+        .write    = proc_pgctrl_write,
+	/*  .ioctl    = pktgen_ioctl, later maybe */
+};
+
+/*
+ * /proc handling functions 
+ *
+ */
+
+static struct proc_dir_entry *pg_proc_dir = NULL;
+static int proc_pgctrl_read_eof=0;
+
+static ssize_t proc_pgctrl_read(struct file* file, char __user * buf,
+                                 size_t count, loff_t *ppos)
+{ 
+	char data[200];
+	int len = 0;
+
+	if(proc_pgctrl_read_eof) {
+		proc_pgctrl_read_eof=0;
+		len = 0;
+		goto out;
+	}
+
+	sprintf(data, "%s", VERSION); 
+
+	len = strlen(data);
+
+	if(len > count) {
+		len =-EFAULT;
+		goto out;
+	}  	
+
+	if (copy_to_user(buf, data, len)) {
+		len =-EFAULT;
+		goto out;
+	}  
+
+	*ppos += len;
+	proc_pgctrl_read_eof=1; /* EOF next call */
+
+ out:
+	return len;
+}
+
+static ssize_t proc_pgctrl_write(struct file* file,const char __user * buf,
+				 size_t count, loff_t *ppos)
+{
+	char *data = NULL;
+	int err = 0;
+
+        if (!capable(CAP_NET_ADMIN)){
+                err = -EPERM;
+		goto out;
+        }
+
+	data = (void*)vmalloc ((unsigned int)count);
+
+	if(!data) {
+		err = -ENOMEM;
+		goto out;
+	}
+	if (copy_from_user(data, buf, count)) {
+		err =-EFAULT;
+		goto out_free;
+	}  
+	data[count-1] = 0; /* Make string */
+
+	if (!strcmp(data, "stop")) 
+		pktgen_stop_all_threads_ifs();
+
+        else if (!strcmp(data, "start")) 
+		pktgen_run_all_threads();
+
+	else 
+		printk("pktgen: Unknown command: %s\n", data);
+
+	err = count;
+
+ out_free:
+	vfree (data);
+ out:
+        return err;
+}
+
+static int proc_if_read(char *buf , char **start, off_t offset,
+                           int len, int *eof, void *data)
+{
+	char *p;
+	int i;
+        struct pktgen_dev *pkt_dev = (struct pktgen_dev*)(data);
+        __u64 sa;
+        __u64 stopped;
+        __u64 now = getCurUs();
+        
+	p = buf;
+	p += sprintf(p, "Params: count %llu  min_pkt_size: %u  max_pkt_size: %u\n",
+		     (unsigned long long) pkt_dev->count,
+		     pkt_dev->min_pkt_size, pkt_dev->max_pkt_size);
+
+	p += sprintf(p, "     frags: %d  delay: %u  clone_skb: %d  ifname: %s\n",
+                     pkt_dev->nfrags, 1000*pkt_dev->delay_us+pkt_dev->delay_ns, pkt_dev->clone_skb, pkt_dev->ifname);
+
+	p += sprintf(p, "     flows: %u flowlen: %u\n", pkt_dev->cflows, pkt_dev->lflow);
+
+
+	if(pkt_dev->flags & F_IPV6) {
+		char b1[128], b2[128], b3[128];
+		fmt_ip6(b1,  pkt_dev->in6_saddr.s6_addr);
+		fmt_ip6(b2,  pkt_dev->min_in6_saddr.s6_addr);
+		fmt_ip6(b3,  pkt_dev->max_in6_saddr.s6_addr);
+		p += sprintf(p, "     saddr: %s  min_saddr: %s  max_saddr: %s\n", b1, b2, b3);
+
+		fmt_ip6(b1,  pkt_dev->in6_daddr.s6_addr);
+		fmt_ip6(b2,  pkt_dev->min_in6_daddr.s6_addr);
+		fmt_ip6(b3,  pkt_dev->max_in6_daddr.s6_addr);
+		p += sprintf(p, "     daddr: %s  min_daddr: %s  max_daddr: %s\n", b1, b2, b3);
+
+	} 
+	else 
+		p += sprintf(p, "     dst_min: %s  dst_max: %s\n     src_min: %s  src_max: %s\n",
+                     pkt_dev->dst_min, pkt_dev->dst_max, pkt_dev->src_min, pkt_dev->src_max);
+
+        p += sprintf(p, "     src_mac: ");
+
+	if ((pkt_dev->src_mac[0] == 0) && 
+	    (pkt_dev->src_mac[1] == 0) && 
+	    (pkt_dev->src_mac[2] == 0) && 
+	    (pkt_dev->src_mac[3] == 0) && 
+	    (pkt_dev->src_mac[4] == 0) && 
+	    (pkt_dev->src_mac[5] == 0)) 
+
+		for (i = 0; i < 6; i++) 
+			p += sprintf(p, "%02X%s", pkt_dev->odev->dev_addr[i], i == 5 ? "  " : ":");
+
+	else 
+		for (i = 0; i < 6; i++) 
+			p += sprintf(p, "%02X%s", pkt_dev->src_mac[i], i == 5 ? "  " : ":");
+
+        p += sprintf(p, "dst_mac: ");
+	for (i = 0; i < 6; i++) 
+		p += sprintf(p, "%02X%s", pkt_dev->dst_mac[i], i == 5 ? "\n" : ":");
+
+        p += sprintf(p, "     udp_src_min: %d  udp_src_max: %d  udp_dst_min: %d  udp_dst_max: %d\n",
+                     pkt_dev->udp_src_min, pkt_dev->udp_src_max, pkt_dev->udp_dst_min,
+                     pkt_dev->udp_dst_max);
+
+        p += sprintf(p, "     src_mac_count: %d  dst_mac_count: %d \n     Flags: ",
+                     pkt_dev->src_mac_count, pkt_dev->dst_mac_count);
+
+
+        if (pkt_dev->flags &  F_IPV6) 
+                p += sprintf(p, "IPV6  ");
+
+        if (pkt_dev->flags &  F_IPSRC_RND) 
+                p += sprintf(p, "IPSRC_RND  ");
+
+        if (pkt_dev->flags & F_IPDST_RND) 
+                p += sprintf(p, "IPDST_RND  ");
+        
+        if (pkt_dev->flags & F_TXSIZE_RND) 
+                p += sprintf(p, "TXSIZE_RND  ");
+        
+        if (pkt_dev->flags & F_UDPSRC_RND) 
+                p += sprintf(p, "UDPSRC_RND  ");
+        
+        if (pkt_dev->flags & F_UDPDST_RND) 
+                p += sprintf(p, "UDPDST_RND  ");
+        
+        if (pkt_dev->flags & F_MACSRC_RND) 
+                p += sprintf(p, "MACSRC_RND  ");
+        
+        if (pkt_dev->flags & F_MACDST_RND) 
+                p += sprintf(p, "MACDST_RND  ");
+
+        
+        p += sprintf(p, "\n");
+        
+        sa = pkt_dev->started_at;
+        stopped = pkt_dev->stopped_at;
+        if (pkt_dev->running) 
+                stopped = now; /* not really stopped, more like last-running-at */
+        
+        p += sprintf(p, "Current:\n     pkts-sofar: %llu  errors: %llu\n     started: %lluus  stopped: %lluus idle: %lluus\n",
+		     (unsigned long long) pkt_dev->sofar,
+		     (unsigned long long) pkt_dev->errors,
+		     (unsigned long long) sa,
+		     (unsigned long long) stopped, 
+		     (unsigned long long) pkt_dev->idle_acc);
+
+        p += sprintf(p, "     seq_num: %d  cur_dst_mac_offset: %d  cur_src_mac_offset: %d\n",
+                     pkt_dev->seq_num, pkt_dev->cur_dst_mac_offset, pkt_dev->cur_src_mac_offset);
+
+	if(pkt_dev->flags & F_IPV6) {
+		char b1[128], b2[128];
+		fmt_ip6(b1,  pkt_dev->cur_in6_daddr.s6_addr);
+		fmt_ip6(b2,  pkt_dev->cur_in6_saddr.s6_addr);
+		p += sprintf(p, "     cur_saddr: %s  cur_daddr: %s\n", b2, b1);
+	} 
+	else 
+		p += sprintf(p, "     cur_saddr: 0x%x  cur_daddr: 0x%x\n",
+                     pkt_dev->cur_saddr, pkt_dev->cur_daddr);
+
+
+	p += sprintf(p, "     cur_udp_dst: %d  cur_udp_src: %d\n",
+                     pkt_dev->cur_udp_dst, pkt_dev->cur_udp_src);
+
+	p += sprintf(p, "     flows: %u\n", pkt_dev->nflows);
+
+	if (pkt_dev->result[0])
+		p += sprintf(p, "Result: %s\n", pkt_dev->result);
+	else
+		p += sprintf(p, "Result: Idle\n");
+	*eof = 1;
+
+	return p - buf;
+}
+
+
+static int count_trail_chars(const char __user *user_buffer, unsigned int maxlen)
+{
+	int i;
+
+	for (i = 0; i < maxlen; i++) {
+                char c;
+                if (get_user(c, &user_buffer[i]))
+                        return -EFAULT;
+                switch (c) {
+		case '\"':
+		case '\n':
+		case '\r':
+		case '\t':
+		case ' ':
+		case '=':
+			break;
+		default:
+			goto done;
+		};
+	}
+done:
+	return i;
+}
+
+static unsigned long num_arg(const char __user *user_buffer, unsigned long maxlen, 
+			     unsigned long *num)
+{
+	int i = 0;
+	*num = 0;
+  
+	for(; i < maxlen; i++) {
+                char c;
+                if (get_user(c, &user_buffer[i]))
+                        return -EFAULT;
+                if ((c >= '0') && (c <= '9')) {
+			*num *= 10;
+			*num += c -'0';
+		} else
+			break;
+	}
+	return i;
+}
+
+static int strn_len(const char __user *user_buffer, unsigned int maxlen)
+{
+	int i = 0;
+
+	for(; i < maxlen; i++) {
+                char c;
+                if (get_user(c, &user_buffer[i]))
+                        return -EFAULT;
+                switch (c) {
+		case '\"':
+		case '\n':
+		case '\r':
+		case '\t':
+		case ' ':
+			goto done_str;
+			break;
+		default:
+			break;
+		};
+	}
+done_str:
+
+	return i;
+}
+
+static int proc_if_write(struct file *file, const char __user *user_buffer,
+                            unsigned long count, void *data)
+{
+	int i = 0, max, len;
+	char name[16], valstr[32];
+	unsigned long value = 0;
+        struct pktgen_dev *pkt_dev = (struct pktgen_dev*)(data);
+        char* pg_result = NULL;
+        int tmp = 0;
+	char buf[128];
+        
+        pg_result = &(pkt_dev->result[0]);
+        
+	if (count < 1) {
+		printk("pktgen: wrong command format\n");
+		return -EINVAL;
+	}
+  
+	max = count - i;
+	tmp = count_trail_chars(&user_buffer[i], max);
+        if (tmp < 0) { 
+		printk("pktgen: illegal format\n");
+		return tmp; 
+	}
+        i += tmp;
+        
+	/* Read variable name */
+
+	len = strn_len(&user_buffer[i], sizeof(name) - 1);
+        if (len < 0) { return len; }
+	memset(name, 0, sizeof(name));
+	if (copy_from_user(name, &user_buffer[i], len) )
+		return -EFAULT;
+	i += len;
+  
+	max = count -i;
+	len = count_trail_chars(&user_buffer[i], max);
+        if (len < 0) 
+                return len;
+        
+	i += len;
+
+	if (debug) {
+                char tb[count + 1];
+                if (copy_from_user(tb, user_buffer, count))
+			return -EFAULT;
+                tb[count] = 0;
+		printk("pktgen: %s,%lu  buffer -:%s:-\n", name, count, tb);
+        }
+
+	if (!strcmp(name, "min_pkt_size")) {
+		len = num_arg(&user_buffer[i], 10, &value);
+                if (len < 0) { return len; }
+		i += len;
+		if (value < 14+20+8)
+			value = 14+20+8;
+                if (value != pkt_dev->min_pkt_size) {
+                        pkt_dev->min_pkt_size = value;
+                        pkt_dev->cur_pkt_size = value;
+                }
+		sprintf(pg_result, "OK: min_pkt_size=%u", pkt_dev->min_pkt_size);
+		return count;
+	}
+
+        if (!strcmp(name, "max_pkt_size")) {
+		len = num_arg(&user_buffer[i], 10, &value);
+                if (len < 0) { return len; }
+		i += len;
+		if (value < 14+20+8)
+			value = 14+20+8;
+                if (value != pkt_dev->max_pkt_size) {
+                        pkt_dev->max_pkt_size = value;
+                        pkt_dev->cur_pkt_size = value;
+                }
+		sprintf(pg_result, "OK: max_pkt_size=%u", pkt_dev->max_pkt_size);
+		return count;
+	}
+
+        /* Shortcut for min = max */
+
+	if (!strcmp(name, "pkt_size")) {
+		len = num_arg(&user_buffer[i], 10, &value);
+                if (len < 0) { return len; }
+		i += len;
+		if (value < 14+20+8)
+			value = 14+20+8;
+                if (value != pkt_dev->min_pkt_size) {
+                        pkt_dev->min_pkt_size = value;
+                        pkt_dev->max_pkt_size = value;
+                        pkt_dev->cur_pkt_size = value;
+                }
+		sprintf(pg_result, "OK: pkt_size=%u", pkt_dev->min_pkt_size);
+		return count;
+	}
+
+        if (!strcmp(name, "debug")) {
+		len = num_arg(&user_buffer[i], 10, &value);
+                if (len < 0) { return len; }
+		i += len;
+                debug = value;
+		sprintf(pg_result, "OK: debug=%u", debug);
+		return count;
+	}
+
+        if (!strcmp(name, "frags")) {
+		len = num_arg(&user_buffer[i], 10, &value);
+                if (len < 0) { return len; }
+		i += len;
+		pkt_dev->nfrags = value;
+		sprintf(pg_result, "OK: frags=%u", pkt_dev->nfrags);
+		return count;
+	}
+	if (!strcmp(name, "delay")) {
+		len = num_arg(&user_buffer[i], 10, &value);
+                if (len < 0) { return len; }
+		i += len;
+		if (value == 0x7FFFFFFF) {
+			pkt_dev->delay_us = 0x7FFFFFFF;
+			pkt_dev->delay_ns = 0;
+		} else {
+			pkt_dev->delay_us = value / 1000;
+			pkt_dev->delay_ns = value % 1000;
+		}
+		sprintf(pg_result, "OK: delay=%u", 1000*pkt_dev->delay_us+pkt_dev->delay_ns);
+		return count;
+	}
+ 	if (!strcmp(name, "udp_src_min")) {
+		len = num_arg(&user_buffer[i], 10, &value);
+                if (len < 0) { return len; }
+		i += len;
+                if (value != pkt_dev->udp_src_min) {
+                        pkt_dev->udp_src_min = value;
+                        pkt_dev->cur_udp_src = value;
+                }       
+		sprintf(pg_result, "OK: udp_src_min=%u", pkt_dev->udp_src_min);
+		return count;
+	}
+ 	if (!strcmp(name, "udp_dst_min")) {
+		len = num_arg(&user_buffer[i], 10, &value);
+                if (len < 0) { return len; }
+		i += len;
+                if (value != pkt_dev->udp_dst_min) {
+                        pkt_dev->udp_dst_min = value;
+                        pkt_dev->cur_udp_dst = value;
+                }
+		sprintf(pg_result, "OK: udp_dst_min=%u", pkt_dev->udp_dst_min);
+		return count;
+	}
+ 	if (!strcmp(name, "udp_src_max")) {
+		len = num_arg(&user_buffer[i], 10, &value);
+                if (len < 0) { return len; }
+		i += len;
+                if (value != pkt_dev->udp_src_max) {
+                        pkt_dev->udp_src_max = value;
+                        pkt_dev->cur_udp_src = value;
+                }
+		sprintf(pg_result, "OK: udp_src_max=%u", pkt_dev->udp_src_max);
+		return count;
+	}
+ 	if (!strcmp(name, "udp_dst_max")) {
+		len = num_arg(&user_buffer[i], 10, &value);
+                if (len < 0) { return len; }
+		i += len;
+                if (value != pkt_dev->udp_dst_max) {
+                        pkt_dev->udp_dst_max = value;
+                        pkt_dev->cur_udp_dst = value;
+                }
+		sprintf(pg_result, "OK: udp_dst_max=%u", pkt_dev->udp_dst_max);
+		return count;
+	}
+	if (!strcmp(name, "clone_skb")) {
+		len = num_arg(&user_buffer[i], 10, &value);
+                if (len < 0) { return len; }
+		i += len;
+                pkt_dev->clone_skb = value;
+	
+		sprintf(pg_result, "OK: clone_skb=%d", pkt_dev->clone_skb);
+		return count;
+	}
+	if (!strcmp(name, "count")) {
+		len = num_arg(&user_buffer[i], 10, &value);
+                if (len < 0) { return len; }
+		i += len;
+		pkt_dev->count = value;
+		sprintf(pg_result, "OK: count=%llu",
+			(unsigned long long) pkt_dev->count);
+		return count;
+	}
+	if (!strcmp(name, "src_mac_count")) {
+		len = num_arg(&user_buffer[i], 10, &value);
+                if (len < 0) { return len; }
+		i += len;
+		if (pkt_dev->src_mac_count != value) {
+                        pkt_dev->src_mac_count = value;
+                        pkt_dev->cur_src_mac_offset = 0;
+                }
+		sprintf(pg_result, "OK: src_mac_count=%d", pkt_dev->src_mac_count);
+		return count;
+	}
+	if (!strcmp(name, "dst_mac_count")) {
+		len = num_arg(&user_buffer[i], 10, &value);
+                if (len < 0) { return len; }
+		i += len;
+		if (pkt_dev->dst_mac_count != value) {
+                        pkt_dev->dst_mac_count = value;
+                        pkt_dev->cur_dst_mac_offset = 0;
+                }
+		sprintf(pg_result, "OK: dst_mac_count=%d", pkt_dev->dst_mac_count);
+		return count;
+	}
+	if (!strcmp(name, "flag")) {
+                char f[32];
+                memset(f, 0, 32);
+		len = strn_len(&user_buffer[i], sizeof(f) - 1);
+                if (len < 0) { return len; }
+		if (copy_from_user(f, &user_buffer[i], len))
+			return -EFAULT;
+		i += len;
+                if (strcmp(f, "IPSRC_RND") == 0) 
+                        pkt_dev->flags |= F_IPSRC_RND;
+                
+                else if (strcmp(f, "!IPSRC_RND") == 0) 
+                        pkt_dev->flags &= ~F_IPSRC_RND;
+                
+                else if (strcmp(f, "TXSIZE_RND") == 0) 
+                        pkt_dev->flags |= F_TXSIZE_RND;
+                
+                else if (strcmp(f, "!TXSIZE_RND") == 0) 
+                        pkt_dev->flags &= ~F_TXSIZE_RND;
+                
+                else if (strcmp(f, "IPDST_RND") == 0) 
+                        pkt_dev->flags |= F_IPDST_RND;
+                
+                else if (strcmp(f, "!IPDST_RND") == 0) 
+                        pkt_dev->flags &= ~F_IPDST_RND;
+                
+                else if (strcmp(f, "UDPSRC_RND") == 0) 
+                        pkt_dev->flags |= F_UDPSRC_RND;
+                
+                else if (strcmp(f, "!UDPSRC_RND") == 0) 
+                        pkt_dev->flags &= ~F_UDPSRC_RND;
+                
+                else if (strcmp(f, "UDPDST_RND") == 0) 
+                        pkt_dev->flags |= F_UDPDST_RND;
+                
+                else if (strcmp(f, "!UDPDST_RND") == 0) 
+                        pkt_dev->flags &= ~F_UDPDST_RND;
+                
+                else if (strcmp(f, "MACSRC_RND") == 0) 
+                        pkt_dev->flags |= F_MACSRC_RND;
+                
+                else if (strcmp(f, "!MACSRC_RND") == 0) 
+                        pkt_dev->flags &= ~F_MACSRC_RND;
+                
+                else if (strcmp(f, "MACDST_RND") == 0) 
+                        pkt_dev->flags |= F_MACDST_RND;
+                
+                else if (strcmp(f, "!MACDST_RND") == 0) 
+                        pkt_dev->flags &= ~F_MACDST_RND;
+                
+                else {
+                        sprintf(pg_result, "Flag -:%s:- unknown\nAvailable flags, (prepend ! to un-set flag):\n%s",
+                                f,
+                                "IPSRC_RND, IPDST_RND, TXSIZE_RND, UDPSRC_RND, UDPDST_RND, MACSRC_RND, MACDST_RND\n");
+                        return count;
+                }
+		sprintf(pg_result, "OK: flags=0x%x", pkt_dev->flags);
+		return count;
+	}
+	if (!strcmp(name, "dst_min") || !strcmp(name, "dst")) {
+		len = strn_len(&user_buffer[i], sizeof(pkt_dev->dst_min) - 1);
+                if (len < 0) { return len; }
+
+                if (copy_from_user(buf, &user_buffer[i], len))
+			return -EFAULT;
+                buf[len] = 0;
+                if (strcmp(buf, pkt_dev->dst_min) != 0) {
+                        memset(pkt_dev->dst_min, 0, sizeof(pkt_dev->dst_min));
+                        strncpy(pkt_dev->dst_min, buf, len);
+                        pkt_dev->daddr_min = in_aton(pkt_dev->dst_min);
+                        pkt_dev->cur_daddr = pkt_dev->daddr_min;
+                }
+                if(debug)
+                        printk("pktgen: dst_min set to: %s\n", pkt_dev->dst_min);
+                i += len;
+		sprintf(pg_result, "OK: dst_min=%s", pkt_dev->dst_min);
+		return count;
+	}
+	if (!strcmp(name, "dst_max")) {
+		len = strn_len(&user_buffer[i], sizeof(pkt_dev->dst_max) - 1);
+                if (len < 0) { return len; }
+
+                if (copy_from_user(buf, &user_buffer[i], len))
+			return -EFAULT;
+
+                buf[len] = 0;
+                if (strcmp(buf, pkt_dev->dst_max) != 0) {
+                        memset(pkt_dev->dst_max, 0, sizeof(pkt_dev->dst_max));
+                        strncpy(pkt_dev->dst_max, buf, len);
+                        pkt_dev->daddr_max = in_aton(pkt_dev->dst_max);
+                        pkt_dev->cur_daddr = pkt_dev->daddr_max;
+                }
+		if(debug)
+			printk("pktgen: dst_max set to: %s\n", pkt_dev->dst_max);
+		i += len;
+		sprintf(pg_result, "OK: dst_max=%s", pkt_dev->dst_max);
+		return count;
+	}
+	if (!strcmp(name, "dst6")) {
+		len = strn_len(&user_buffer[i], sizeof(buf) - 1);
+                if (len < 0) return len; 
+
+		pkt_dev->flags |= F_IPV6;
+
+                if (copy_from_user(buf, &user_buffer[i], len))
+			return -EFAULT;
+                buf[len] = 0;
+
+		scan_ip6(buf, pkt_dev->in6_daddr.s6_addr);
+		fmt_ip6(buf,  pkt_dev->in6_daddr.s6_addr);
+
+		ipv6_addr_copy(&pkt_dev->cur_in6_daddr, &pkt_dev->in6_daddr);
+
+                if(debug) 
+			printk("pktgen: dst6 set to: %s\n", buf);
+
+                i += len;
+		sprintf(pg_result, "OK: dst6=%s", buf);
+		return count;
+	}
+	if (!strcmp(name, "dst6_min")) {
+		len = strn_len(&user_buffer[i], sizeof(buf) - 1);
+                if (len < 0) return len; 
+
+		pkt_dev->flags |= F_IPV6;
+
+                if (copy_from_user(buf, &user_buffer[i], len))
+			return -EFAULT;
+                buf[len] = 0;
+
+		scan_ip6(buf, pkt_dev->min_in6_daddr.s6_addr);
+		fmt_ip6(buf,  pkt_dev->min_in6_daddr.s6_addr);
+
+		ipv6_addr_copy(&pkt_dev->cur_in6_daddr, &pkt_dev->min_in6_daddr);
+                if(debug) 
+			printk("pktgen: dst6_min set to: %s\n", buf);
+
+                i += len;
+		sprintf(pg_result, "OK: dst6_min=%s", buf);
+		return count;
+	}
+	if (!strcmp(name, "dst6_max")) {
+		len = strn_len(&user_buffer[i], sizeof(buf) - 1);
+                if (len < 0) return len; 
+
+		pkt_dev->flags |= F_IPV6;
+
+                if (copy_from_user(buf, &user_buffer[i], len))
+			return -EFAULT;
+                buf[len] = 0;
+
+		scan_ip6(buf, pkt_dev->max_in6_daddr.s6_addr);
+		fmt_ip6(buf,  pkt_dev->max_in6_daddr.s6_addr);
+
+                if(debug) 
+			printk("pktgen: dst6_max set to: %s\n", buf);
+
+                i += len;
+		sprintf(pg_result, "OK: dst6_max=%s", buf);
+		return count;
+	}
+	if (!strcmp(name, "src6")) {
+		len = strn_len(&user_buffer[i], sizeof(buf) - 1);
+                if (len < 0) return len; 
+
+		pkt_dev->flags |= F_IPV6;
+
+                if (copy_from_user(buf, &user_buffer[i], len))
+			return -EFAULT;
+                buf[len] = 0;
+
+		scan_ip6(buf, pkt_dev->in6_saddr.s6_addr);
+		fmt_ip6(buf,  pkt_dev->in6_saddr.s6_addr);
+
+		ipv6_addr_copy(&pkt_dev->cur_in6_saddr, &pkt_dev->in6_saddr);
+
+                if(debug) 
+			printk("pktgen: src6 set to: %s\n", buf);
+		
+                i += len;
+		sprintf(pg_result, "OK: src6=%s", buf);
+		return count;
+	}
+	if (!strcmp(name, "src_min")) {
+		len = strn_len(&user_buffer[i], sizeof(pkt_dev->src_min) - 1);
+                if (len < 0) { return len; }
+                if (copy_from_user(buf, &user_buffer[i], len))
+			return -EFAULT;
+                buf[len] = 0;
+                if (strcmp(buf, pkt_dev->src_min) != 0) {
+                        memset(pkt_dev->src_min, 0, sizeof(pkt_dev->src_min));
+                        strncpy(pkt_dev->src_min, buf, len);
+                        pkt_dev->saddr_min = in_aton(pkt_dev->src_min);
+                        pkt_dev->cur_saddr = pkt_dev->saddr_min;
+                }
+		if(debug)
+			printk("pktgen: src_min set to: %s\n", pkt_dev->src_min);
+		i += len;
+		sprintf(pg_result, "OK: src_min=%s", pkt_dev->src_min);
+		return count;
+	}
+	if (!strcmp(name, "src_max")) {
+		len = strn_len(&user_buffer[i], sizeof(pkt_dev->src_max) - 1);
+                if (len < 0) { return len; }
+                if (copy_from_user(buf, &user_buffer[i], len))
+			return -EFAULT;
+                buf[len] = 0;
+                if (strcmp(buf, pkt_dev->src_max) != 0) {
+                        memset(pkt_dev->src_max, 0, sizeof(pkt_dev->src_max));
+                        strncpy(pkt_dev->src_max, buf, len);
+                        pkt_dev->saddr_max = in_aton(pkt_dev->src_max);
+                        pkt_dev->cur_saddr = pkt_dev->saddr_max;
+                }
+		if(debug)
+			printk("pktgen: src_max set to: %s\n", pkt_dev->src_max);
+		i += len;
+		sprintf(pg_result, "OK: src_max=%s", pkt_dev->src_max);
+		return count;
+	}
+	if (!strcmp(name, "dst_mac")) {
+		char *v = valstr;
+                unsigned char old_dmac[6];
+		unsigned char *m = pkt_dev->dst_mac;
+                memcpy(old_dmac, pkt_dev->dst_mac, 6);
+                
+		len = strn_len(&user_buffer[i], sizeof(valstr) - 1);
+                if (len < 0) { return len; }
+		memset(valstr, 0, sizeof(valstr));
+		if( copy_from_user(valstr, &user_buffer[i], len))
+			return -EFAULT;
+		i += len;
+
+		for(*m = 0;*v && m < pkt_dev->dst_mac + 6; v++) {
+			if (*v >= '0' && *v <= '9') {
+				*m *= 16;
+				*m += *v - '0';
+			}
+			if (*v >= 'A' && *v <= 'F') {
+				*m *= 16;
+				*m += *v - 'A' + 10;
+			}
+			if (*v >= 'a' && *v <= 'f') {
+				*m *= 16;
+				*m += *v - 'a' + 10;
+			}
+			if (*v == ':') {
+				m++;
+				*m = 0;
+			}
+		}
+
+		/* Set up Dest MAC */
+                if (memcmp(old_dmac, pkt_dev->dst_mac, 6) != 0) 
+                        memcpy(&(pkt_dev->hh[0]), pkt_dev->dst_mac, 6);
+                
+		sprintf(pg_result, "OK: dstmac");
+		return count;
+	}
+	if (!strcmp(name, "src_mac")) {
+		char *v = valstr;
+		unsigned char *m = pkt_dev->src_mac;
+
+		len = strn_len(&user_buffer[i], sizeof(valstr) - 1);
+                if (len < 0) { return len; }
+		memset(valstr, 0, sizeof(valstr));
+		if( copy_from_user(valstr, &user_buffer[i], len)) 
+			return -EFAULT;
+		i += len;
+
+		for(*m = 0;*v && m < pkt_dev->src_mac + 6; v++) {
+			if (*v >= '0' && *v <= '9') {
+				*m *= 16;
+				*m += *v - '0';
+			}
+			if (*v >= 'A' && *v <= 'F') {
+				*m *= 16;
+				*m += *v - 'A' + 10;
+			}
+			if (*v >= 'a' && *v <= 'f') {
+				*m *= 16;
+				*m += *v - 'a' + 10;
+			}
+			if (*v == ':') {
+				m++;
+				*m = 0;
+			}
+		}	  
+
+                sprintf(pg_result, "OK: srcmac");
+		return count;
+	}
+
+        if (!strcmp(name, "clear_counters")) {
+                pktgen_clear_counters(pkt_dev);
+                sprintf(pg_result, "OK: Clearing counters.\n");
+                return count;
+        }
+
+	if (!strcmp(name, "flows")) {
+		len = num_arg(&user_buffer[i], 10, &value);
+                if (len < 0) { return len; }
+		i += len;
+		if (value > MAX_CFLOWS)
+			value = MAX_CFLOWS;
+
+		pkt_dev->cflows = value;
+		sprintf(pg_result, "OK: flows=%u", pkt_dev->cflows);
+		return count;
+	}
+
+	if (!strcmp(name, "flowlen")) {
+		len = num_arg(&user_buffer[i], 10, &value);
+                if (len < 0) { return len; }
+		i += len;
+		pkt_dev->lflow = value;
+		sprintf(pg_result, "OK: flowlen=%u", pkt_dev->lflow);
+		return count;
+	}
+        
+	sprintf(pkt_dev->result, "No such parameter \"%s\"", name);
+	return -EINVAL;
+}
+
+static int proc_thread_read(char *buf , char **start, off_t offset,
+                               int len, int *eof, void *data)
+{
+	char *p;
+        struct pktgen_thread *t = (struct pktgen_thread*)(data);
+        struct pktgen_dev *pkt_dev = NULL;
+
+
+        if (!t) {
+                printk("pktgen: ERROR: could not find thread in proc_thread_read\n");
+                return -EINVAL;
+        }
+
+	p = buf;
+	p += sprintf(p, "Name: %s  max_before_softirq: %d\n",
+                     t->name, t->max_before_softirq);
+
+        p += sprintf(p, "Running: ");
+        
+        if_lock(t);
+        for(pkt_dev = t->if_list;pkt_dev; pkt_dev = pkt_dev->next) 
+		if(pkt_dev->running)
+			p += sprintf(p, "%s ", pkt_dev->ifname);
+        
+        p += sprintf(p, "\nStopped: ");
+
+        for(pkt_dev = t->if_list;pkt_dev; pkt_dev = pkt_dev->next) 
+		if(!pkt_dev->running)
+			p += sprintf(p, "%s ", pkt_dev->ifname);
+
+	if (t->result[0])
+		p += sprintf(p, "\nResult: %s\n", t->result);
+	else
+		p += sprintf(p, "\nResult: NA\n");
+
+	*eof = 1;
+
+        if_unlock(t);
+
+	return p - buf;
+}
+
+static int proc_thread_write(struct file *file, const char __user *user_buffer,
+                                unsigned long count, void *data)
+{
+	int i = 0, max, len, ret;
+	char name[40];
+        struct pktgen_thread *t;
+        char *pg_result;
+        unsigned long value = 0;
+        
+	if (count < 1) {
+		//	sprintf(pg_result, "Wrong command format");
+		return -EINVAL;
+	}
+  
+	max = count - i;
+        len = count_trail_chars(&user_buffer[i], max);
+        if (len < 0) 
+		return len; 
+     
+	i += len;
+  
+	/* Read variable name */
+
+	len = strn_len(&user_buffer[i], sizeof(name) - 1);
+        if (len < 0)  
+		return len; 
+	
+	memset(name, 0, sizeof(name));
+	if (copy_from_user(name, &user_buffer[i], len))
+		return -EFAULT;
+	i += len;
+  
+	max = count -i;
+	len = count_trail_chars(&user_buffer[i], max);
+        if (len < 0)  
+		return len; 
+	
+	i += len;
+
+	if (debug) 
+		printk("pktgen: t=%s, count=%lu\n", name, count);
+        
+
+        t = (struct pktgen_thread*)(data);
+	if(!t) {
+		printk("pktgen: ERROR: No thread\n");
+		ret = -EINVAL;
+		goto out;
+	}
+
+	pg_result = &(t->result[0]);
+
+        if (!strcmp(name, "add_device")) {
+                char f[32];
+                memset(f, 0, 32);
+		len = strn_len(&user_buffer[i], sizeof(f) - 1);
+                if (len < 0) { 
+			ret = len; 
+			goto out;
+		}
+		if( copy_from_user(f, &user_buffer[i], len) )
+			return -EFAULT;
+		i += len;
+		thread_lock();
+                pktgen_add_device(t, f);
+		thread_unlock();
+                ret = count;
+                sprintf(pg_result, "OK: add_device=%s", f);
+		goto out;
+	}
+
+        if (!strcmp(name, "rem_device_all")) {
+		thread_lock();
+		t->control |= T_REMDEV;
+		thread_unlock();
+		current->state = TASK_INTERRUPTIBLE;
+		schedule_timeout(HZ/8);  /* Propagate thread->control  */
+		ret = count;
+                sprintf(pg_result, "OK: rem_device_all");
+		goto out;
+	}
+
+        if (!strcmp(name, "max_before_softirq")) {
+                len = num_arg(&user_buffer[i], 10, &value);
+		thread_lock();
+                t->max_before_softirq = value;
+		thread_unlock();
+                ret = count;
+                sprintf(pg_result, "OK: max_before_softirq=%lu", value);
+		goto out;
+	}
+
+	ret = -EINVAL;
+ out:
+
+	return ret;
+}
+
+static int create_proc_dir(void)
+{
+        int     len;
+        /*  does proc_dir already exists */
+        len = strlen(PG_PROC_DIR);
+
+        for (pg_proc_dir = proc_net->subdir; pg_proc_dir; pg_proc_dir=pg_proc_dir->next) {
+                if ((pg_proc_dir->namelen == len) &&
+		    (! memcmp(pg_proc_dir->name, PG_PROC_DIR, len))) 
+                        break;
+        }
+        
+        if (!pg_proc_dir) 
+                pg_proc_dir = create_proc_entry(PG_PROC_DIR, S_IFDIR, proc_net);
+        
+        if (!pg_proc_dir) 
+                return -ENODEV;
+        
+        return 0;
+}
+
+static int remove_proc_dir(void)
+{
+        remove_proc_entry(PG_PROC_DIR, proc_net);
+        return 0;
+}
+
+/* Think find or remove for NN */
+static struct pktgen_dev *__pktgen_NN_threads(const char* ifname, int remove) 
+{
+	struct pktgen_thread *t;
+	struct pktgen_dev *pkt_dev = NULL;
+
+        t = pktgen_threads;
+                
+	while (t) {
+		pkt_dev = pktgen_find_dev(t, ifname);
+		if (pkt_dev) {
+		                if(remove) { 
+				        if_lock(t);
+				        pktgen_remove_device(t, pkt_dev);
+				        if_unlock(t);
+				}
+			break;
+		}
+		t = t->next;
+	}
+        return pkt_dev;
+}
+
+static struct pktgen_dev *pktgen_NN_threads(const char* ifname, int remove) 
+{
+	struct pktgen_dev *pkt_dev = NULL;
+	thread_lock();
+	pkt_dev = __pktgen_NN_threads(ifname, remove);
+        thread_unlock();
+	return pkt_dev;
+}
+
+static int pktgen_device_event(struct notifier_block *unused, unsigned long event, void *ptr) 
+{
+	struct net_device *dev = (struct net_device *)(ptr);
+
+	/* It is OK that we do not hold the group lock right now,
+	 * as we run under the RTNL lock.
+	 */
+
+	switch (event) {
+	case NETDEV_CHANGEADDR:
+	case NETDEV_GOING_DOWN:
+	case NETDEV_DOWN:
+	case NETDEV_UP:
+		/* Ignore for now */
+		break;
+		
+	case NETDEV_UNREGISTER:
+                pktgen_NN_threads(dev->name, REMOVE);
+		break;
+	};
+
+	return NOTIFY_DONE;
+}
+
+/* Associate pktgen_dev with a device. */
+
+static struct net_device* pktgen_setup_dev(struct pktgen_dev *pkt_dev) {
+	struct net_device *odev;
+
+	/* Clean old setups */
+
+	if (pkt_dev->odev) {
+		dev_put(pkt_dev->odev);
+                pkt_dev->odev = NULL;
+        }
+
+	odev = dev_get_by_name(pkt_dev->ifname);
+
+	if (!odev) {
+		printk("pktgen: no such netdevice: \"%s\"\n", pkt_dev->ifname);
+		goto out;
+	}
+	if (odev->type != ARPHRD_ETHER) {
+		printk("pktgen: not an ethernet device: \"%s\"\n", pkt_dev->ifname);
+		goto out_put;
+	}
+	if (!netif_running(odev)) {
+		printk("pktgen: device is down: \"%s\"\n", pkt_dev->ifname);
+		goto out_put;
+	}
+	pkt_dev->odev = odev;
+	
+        return pkt_dev->odev;
+
+out_put:
+	dev_put(odev);
+out:
+ 	return NULL;
+
+}
+
+/* Read pkt_dev from the interface and set up internal pktgen_dev
+ * structure to have the right information to create/send packets
+ */
+static void pktgen_setup_inject(struct pktgen_dev *pkt_dev)
+{
+	/* Try once more, just in case it works now. */
+        if (!pkt_dev->odev) 
+                pktgen_setup_dev(pkt_dev);
+        
+        if (!pkt_dev->odev) {
+                printk("pktgen: ERROR: pkt_dev->odev == NULL in setup_inject.\n");
+                sprintf(pkt_dev->result, "ERROR: pkt_dev->odev == NULL in setup_inject.\n");
+                return;
+        }
+        
+        /* Default to the interface's mac if not explicitly set. */
+
+	if ((pkt_dev->src_mac[0] == 0) && 
+	    (pkt_dev->src_mac[1] == 0) && 
+	    (pkt_dev->src_mac[2] == 0) && 
+	    (pkt_dev->src_mac[3] == 0) && 
+	    (pkt_dev->src_mac[4] == 0) && 
+	    (pkt_dev->src_mac[5] == 0)) {
+
+	       memcpy(&(pkt_dev->hh[6]), pkt_dev->odev->dev_addr, 6);
+       }
+        /* Set up Dest MAC */
+        memcpy(&(pkt_dev->hh[0]), pkt_dev->dst_mac, 6);
+
+        /* Set up pkt size */
+        pkt_dev->cur_pkt_size = pkt_dev->min_pkt_size;
+	
+	if(pkt_dev->flags & F_IPV6) {
+		/*
+		 * Skip this automatic address setting until locks or functions 
+		 * gets exported
+		 */
+
+#ifdef NOTNOW
+		int i, set = 0, err=1;
+		struct inet6_dev *idev;
+
+		for(i=0; i< IN6_ADDR_HSIZE; i++)
+			if(pkt_dev->cur_in6_saddr.s6_addr[i]) {
+				set = 1;
+				break;
+			}
+
+		if(!set) {
+			
+			/*
+			 * Use linklevel address if unconfigured.
+			 *
+			 * use ipv6_get_lladdr if/when it's get exported
+			 */
+
+
+			read_lock(&addrconf_lock);
+			if ((idev = __in6_dev_get(pkt_dev->odev)) != NULL) {
+				struct inet6_ifaddr *ifp;
+
+				read_lock_bh(&idev->lock);
+				for (ifp=idev->addr_list; ifp; ifp=ifp->if_next) {
+					if (ifp->scope == IFA_LINK && !(ifp->flags&IFA_F_TENTATIVE)) {
+						ipv6_addr_copy(&pkt_dev->cur_in6_saddr, &ifp->addr);
+						err = 0;
+						break;
+					}
+				}
+				read_unlock_bh(&idev->lock);
+			}
+			read_unlock(&addrconf_lock);
+			if(err)	printk("pktgen: ERROR: IPv6 link address not availble.\n");
+		}
+#endif
+	} 
+	else {
+		pkt_dev->saddr_min = 0;
+		pkt_dev->saddr_max = 0;
+		if (strlen(pkt_dev->src_min) == 0) {
+			
+			struct in_device *in_dev; 
+
+			rcu_read_lock();
+			in_dev = __in_dev_get(pkt_dev->odev);
+			if (in_dev) {
+				if (in_dev->ifa_list) {
+					pkt_dev->saddr_min = in_dev->ifa_list->ifa_address;
+					pkt_dev->saddr_max = pkt_dev->saddr_min;
+				}
+				__in_dev_put(in_dev);	
+			}
+			rcu_read_unlock();
+		}
+		else {
+			pkt_dev->saddr_min = in_aton(pkt_dev->src_min);
+			pkt_dev->saddr_max = in_aton(pkt_dev->src_max);
+		}
+
+		pkt_dev->daddr_min = in_aton(pkt_dev->dst_min);
+		pkt_dev->daddr_max = in_aton(pkt_dev->dst_max);
+	}
+        /* Initialize current values. */
+        pkt_dev->cur_dst_mac_offset = 0;
+        pkt_dev->cur_src_mac_offset = 0;
+        pkt_dev->cur_saddr = pkt_dev->saddr_min;
+        pkt_dev->cur_daddr = pkt_dev->daddr_min;
+        pkt_dev->cur_udp_dst = pkt_dev->udp_dst_min;
+        pkt_dev->cur_udp_src = pkt_dev->udp_src_min;
+	pkt_dev->nflows = 0;
+}
+
+static void spin(struct pktgen_dev *pkt_dev, __u64 spin_until_us)
+{
+	__u64 start;
+	__u64 now;
+
+	start = now = getCurUs();
+	printk(KERN_INFO "sleeping for %d\n", (int)(spin_until_us - now));
+	while (now < spin_until_us) {
+		/* TODO: optimise sleeping behavior */
+		if (spin_until_us - now > (1000000/HZ)+1) {
+			current->state = TASK_INTERRUPTIBLE;
+			schedule_timeout(1);
+		} else if (spin_until_us - now > 100) {
+			do_softirq();
+			if (!pkt_dev->running)
+				return;
+			if (need_resched())
+				schedule();
+		}
+
+		now = getCurUs();
+	}
+
+	pkt_dev->idle_acc += now - start;
+}
+
+
+/* Increment/randomize headers according to flags and current values
+ * for IP src/dest, UDP src/dst port, MAC-Addr src/dst
+ */
+static void mod_cur_headers(struct pktgen_dev *pkt_dev) {        
+        __u32 imn;
+        __u32 imx;
+	int  flow = 0;
+
+	if(pkt_dev->cflows)  {
+		flow = pktgen_random() % pkt_dev->cflows;
+		
+		if (pkt_dev->flows[flow].count > pkt_dev->lflow)
+			pkt_dev->flows[flow].count = 0;
+	}						
+
+
+	/*  Deal with source MAC */
+        if (pkt_dev->src_mac_count > 1) {
+                __u32 mc;
+                __u32 tmp;
+
+                if (pkt_dev->flags & F_MACSRC_RND) 
+                        mc = pktgen_random() % (pkt_dev->src_mac_count);
+                else {
+                        mc = pkt_dev->cur_src_mac_offset++;
+                        if (pkt_dev->cur_src_mac_offset > pkt_dev->src_mac_count) 
+                                pkt_dev->cur_src_mac_offset = 0;
+                }
+
+                tmp = pkt_dev->src_mac[5] + (mc & 0xFF);
+                pkt_dev->hh[11] = tmp;
+                tmp = (pkt_dev->src_mac[4] + ((mc >> 8) & 0xFF) + (tmp >> 8));
+                pkt_dev->hh[10] = tmp;
+                tmp = (pkt_dev->src_mac[3] + ((mc >> 16) & 0xFF) + (tmp >> 8));
+                pkt_dev->hh[9] = tmp;
+                tmp = (pkt_dev->src_mac[2] + ((mc >> 24) & 0xFF) + (tmp >> 8));
+                pkt_dev->hh[8] = tmp;
+                tmp = (pkt_dev->src_mac[1] + (tmp >> 8));
+                pkt_dev->hh[7] = tmp;        
+        }
+
+        /*  Deal with Destination MAC */
+        if (pkt_dev->dst_mac_count > 1) {
+                __u32 mc;
+                __u32 tmp;
+
+                if (pkt_dev->flags & F_MACDST_RND) 
+                        mc = pktgen_random() % (pkt_dev->dst_mac_count);
+
+                else {
+                        mc = pkt_dev->cur_dst_mac_offset++;
+                        if (pkt_dev->cur_dst_mac_offset > pkt_dev->dst_mac_count) {
+                                pkt_dev->cur_dst_mac_offset = 0;
+                        }
+                }
+
+                tmp = pkt_dev->dst_mac[5] + (mc & 0xFF);
+                pkt_dev->hh[5] = tmp;
+                tmp = (pkt_dev->dst_mac[4] + ((mc >> 8) & 0xFF) + (tmp >> 8));
+                pkt_dev->hh[4] = tmp;
+                tmp = (pkt_dev->dst_mac[3] + ((mc >> 16) & 0xFF) + (tmp >> 8));
+                pkt_dev->hh[3] = tmp;
+                tmp = (pkt_dev->dst_mac[2] + ((mc >> 24) & 0xFF) + (tmp >> 8));
+                pkt_dev->hh[2] = tmp;
+                tmp = (pkt_dev->dst_mac[1] + (tmp >> 8));
+                pkt_dev->hh[1] = tmp;        
+        }
+
+        if (pkt_dev->udp_src_min < pkt_dev->udp_src_max) {
+                if (pkt_dev->flags & F_UDPSRC_RND) 
+                        pkt_dev->cur_udp_src = ((pktgen_random() % (pkt_dev->udp_src_max - pkt_dev->udp_src_min)) + pkt_dev->udp_src_min);
+
+                else {
+			pkt_dev->cur_udp_src++;
+			if (pkt_dev->cur_udp_src >= pkt_dev->udp_src_max)
+				pkt_dev->cur_udp_src = pkt_dev->udp_src_min;
+                }
+        }
+
+        if (pkt_dev->udp_dst_min < pkt_dev->udp_dst_max) {
+                if (pkt_dev->flags & F_UDPDST_RND) {
+                        pkt_dev->cur_udp_dst = ((pktgen_random() % (pkt_dev->udp_dst_max - pkt_dev->udp_dst_min)) + pkt_dev->udp_dst_min);
+                }
+                else {
+			pkt_dev->cur_udp_dst++;
+			if (pkt_dev->cur_udp_dst >= pkt_dev->udp_dst_max) 
+				pkt_dev->cur_udp_dst = pkt_dev->udp_dst_min;
+                }
+        }
+
+	if (!(pkt_dev->flags & F_IPV6)) {
+
+		if ((imn = ntohl(pkt_dev->saddr_min)) < (imx = ntohl(pkt_dev->saddr_max))) {
+			__u32 t;
+			if (pkt_dev->flags & F_IPSRC_RND) 
+				t = ((pktgen_random() % (imx - imn)) + imn);
+			else {
+				t = ntohl(pkt_dev->cur_saddr);
+				t++;
+				if (t > imx) {
+					t = imn;
+				}
+			}
+			pkt_dev->cur_saddr = htonl(t);
+		}
+		
+		if (pkt_dev->cflows && pkt_dev->flows[flow].count != 0) {
+			pkt_dev->cur_daddr = pkt_dev->flows[flow].cur_daddr;
+		} else {
+
+			if ((imn = ntohl(pkt_dev->daddr_min)) < (imx = ntohl(pkt_dev->daddr_max))) {
+				__u32 t;
+				if (pkt_dev->flags & F_IPDST_RND) {
+
+					t = ((pktgen_random() % (imx - imn)) + imn);
+					t = htonl(t);
+
+					while( LOOPBACK(t) || MULTICAST(t) || BADCLASS(t) || ZERONET(t) ||  LOCAL_MCAST(t) ) {
+						t = ((pktgen_random() % (imx - imn)) + imn);
+						t = htonl(t);
+					}
+					pkt_dev->cur_daddr = t;
+				}
+				
+				else {
+					t = ntohl(pkt_dev->cur_daddr);
+					t++;
+					if (t > imx) {
+						t = imn;
+					}
+					pkt_dev->cur_daddr = htonl(t);
+				}
+			}
+			if(pkt_dev->cflows) {	
+				pkt_dev->flows[flow].cur_daddr = pkt_dev->cur_daddr;
+				pkt_dev->nflows++;
+			}
+		}
+	}
+	else /* IPV6 * */
+	{
+		if(pkt_dev->min_in6_daddr.s6_addr32[0] == 0 &&
+		   pkt_dev->min_in6_daddr.s6_addr32[1] == 0 &&
+		   pkt_dev->min_in6_daddr.s6_addr32[2] == 0 &&
+		   pkt_dev->min_in6_daddr.s6_addr32[3] == 0);
+		else {
+			int i;
+
+			/* Only random destinations yet */
+
+			for(i=0; i < 4; i++) {
+				pkt_dev->cur_in6_daddr.s6_addr32[i] =
+					((pktgen_random() |
+					  pkt_dev->min_in6_daddr.s6_addr32[i]) &
+					 pkt_dev->max_in6_daddr.s6_addr32[i]);
+			}
+ 		}
+	}
+
+        if (pkt_dev->min_pkt_size < pkt_dev->max_pkt_size) {
+                __u32 t;
+                if (pkt_dev->flags & F_TXSIZE_RND) {
+                        t = ((pktgen_random() % (pkt_dev->max_pkt_size - pkt_dev->min_pkt_size))
+                             + pkt_dev->min_pkt_size);
+                }
+                else {
+			t = pkt_dev->cur_pkt_size + 1;
+			if (t > pkt_dev->max_pkt_size) 
+				t = pkt_dev->min_pkt_size;
+                }
+                pkt_dev->cur_pkt_size = t;
+        }
+
+	pkt_dev->flows[flow].count++;
+}
+
+
+static struct sk_buff *fill_packet_ipv4(struct net_device *odev, 
+				   struct pktgen_dev *pkt_dev)
+{
+	struct sk_buff *skb = NULL;
+	__u8 *eth;
+	struct udphdr *udph;
+	int datalen, iplen;
+	struct iphdr *iph;
+        struct pktgen_hdr *pgh = NULL;
+        
+	skb = alloc_skb(pkt_dev->cur_pkt_size + 64 + 16, GFP_ATOMIC);
+	if (!skb) {
+		sprintf(pkt_dev->result, "No memory");
+		return NULL;
+	}
+
+	skb_reserve(skb, 16);
+
+	/*  Reserve for ethernet and IP header  */
+	eth = (__u8 *) skb_push(skb, 14);
+	iph = (struct iphdr *)skb_put(skb, sizeof(struct iphdr));
+	udph = (struct udphdr *)skb_put(skb, sizeof(struct udphdr));
+
+        /* Update any of the values, used when we're incrementing various
+         * fields.
+         */
+        mod_cur_headers(pkt_dev);
+
+	memcpy(eth, pkt_dev->hh, 12);
+	*(u16*)&eth[12] = __constant_htons(ETH_P_IP);
+
+	datalen = pkt_dev->cur_pkt_size - 14 - 20 - 8; /* Eth + IPh + UDPh */
+	if (datalen < sizeof(struct pktgen_hdr)) 
+		datalen = sizeof(struct pktgen_hdr);
+        
+	udph->source = htons(pkt_dev->cur_udp_src);
+	udph->dest = htons(pkt_dev->cur_udp_dst);
+	udph->len = htons(datalen + 8); /* DATA + udphdr */
+	udph->check = 0;  /* No checksum */
+
+	iph->ihl = 5;
+	iph->version = 4;
+	iph->ttl = 32;
+	iph->tos = 0;
+	iph->protocol = IPPROTO_UDP; /* UDP */
+	iph->saddr = pkt_dev->cur_saddr;
+	iph->daddr = pkt_dev->cur_daddr;
+	iph->frag_off = 0;
+	iplen = 20 + 8 + datalen;
+	iph->tot_len = htons(iplen);
+	iph->check = 0;
+	iph->check = ip_fast_csum((void *) iph, iph->ihl);
+	skb->protocol = __constant_htons(ETH_P_IP);
+	skb->mac.raw = ((u8 *)iph) - 14;
+	skb->dev = odev;
+	skb->pkt_type = PACKET_HOST;
+
+	if (pkt_dev->nfrags <= 0) 
+                pgh = (struct pktgen_hdr *)skb_put(skb, datalen);
+	else {
+		int frags = pkt_dev->nfrags;
+		int i;
+
+                pgh = (struct pktgen_hdr*)(((char*)(udph)) + 8);
+                
+		if (frags > MAX_SKB_FRAGS)
+			frags = MAX_SKB_FRAGS;
+		if (datalen > frags*PAGE_SIZE) {
+			skb_put(skb, datalen-frags*PAGE_SIZE);
+			datalen = frags*PAGE_SIZE;
+		}
+
+		i = 0;
+		while (datalen > 0) {
+			struct page *page = alloc_pages(GFP_KERNEL, 0);
+			skb_shinfo(skb)->frags[i].page = page;
+			skb_shinfo(skb)->frags[i].page_offset = 0;
+			skb_shinfo(skb)->frags[i].size =
+				(datalen < PAGE_SIZE ? datalen : PAGE_SIZE);
+			datalen -= skb_shinfo(skb)->frags[i].size;
+			skb->len += skb_shinfo(skb)->frags[i].size;
+			skb->data_len += skb_shinfo(skb)->frags[i].size;
+			i++;
+			skb_shinfo(skb)->nr_frags = i;
+		}
+
+		while (i < frags) {
+			int rem;
+
+			if (i == 0)
+				break;
+
+			rem = skb_shinfo(skb)->frags[i - 1].size / 2;
+			if (rem == 0)
+				break;
+
+			skb_shinfo(skb)->frags[i - 1].size -= rem;
+
+			skb_shinfo(skb)->frags[i] = skb_shinfo(skb)->frags[i - 1];
+			get_page(skb_shinfo(skb)->frags[i].page);
+			skb_shinfo(skb)->frags[i].page = skb_shinfo(skb)->frags[i - 1].page;
+			skb_shinfo(skb)->frags[i].page_offset += skb_shinfo(skb)->frags[i - 1].size;
+			skb_shinfo(skb)->frags[i].size = rem;
+			i++;
+			skb_shinfo(skb)->nr_frags = i;
+		}
+	}
+
+        /* Stamp the time, and sequence number, convert them to network byte order */
+
+        if (pgh) {
+              struct timeval timestamp;
+	      
+	      pgh->pgh_magic = htonl(PKTGEN_MAGIC);
+	      pgh->seq_num   = htonl(pkt_dev->seq_num);
+	      
+	      do_gettimeofday(&timestamp);
+	      pgh->tv_sec    = htonl(timestamp.tv_sec);
+	      pgh->tv_usec   = htonl(timestamp.tv_usec);
+        }
+        pkt_dev->seq_num++;
+        
+	return skb;
+}
+
+/*
+ * scan_ip6, fmt_ip taken from dietlibc-0.21 
+ * Author Felix von Leitner <felix-dietlibc@fefe.de>
+ *
+ * Slightly modified for kernel. 
+ * Should be candidate for net/ipv4/utils.c
+ * --ro
+ */
+
+static unsigned int scan_ip6(const char *s,char ip[16])
+{
+	unsigned int i;
+	unsigned int len=0;
+	unsigned long u;
+	char suffix[16];
+	unsigned int prefixlen=0;
+	unsigned int suffixlen=0;
+	__u32 tmp;
+
+	for (i=0; i<16; i++) ip[i]=0;
+
+	for (;;) {
+		if (*s == ':') {
+			len++;
+			if (s[1] == ':') {        /* Found "::", skip to part 2 */
+				s+=2;
+				len++;
+				break;
+			}
+			s++;
+		}
+		{
+			char *tmp;
+			u=simple_strtoul(s,&tmp,16);
+			i=tmp-s;
+		}
+
+		if (!i) return 0;
+		if (prefixlen==12 && s[i]=='.') {
+
+			/* the last 4 bytes may be written as IPv4 address */
+
+			tmp = in_aton(s);
+			memcpy((struct in_addr*)(ip+12), &tmp, sizeof(tmp));
+			return i+len;
+		}
+		ip[prefixlen++] = (u >> 8);
+		ip[prefixlen++] = (u & 255);
+		s += i; len += i;
+		if (prefixlen==16)
+			return len;
+	}
+
+/* part 2, after "::" */
+	for (;;) {
+		if (*s == ':') {
+			if (suffixlen==0)
+				break;
+			s++;
+			len++;
+		} else if (suffixlen!=0)
+			break;
+		{
+			char *tmp;
+			u=simple_strtol(s,&tmp,16);
+			i=tmp-s;
+		}
+		if (!i) {
+			if (*s) len--;
+			break;
+		}
+		if (suffixlen+prefixlen<=12 && s[i]=='.') {
+			tmp = in_aton(s);
+			memcpy((struct in_addr*)(suffix+suffixlen), &tmp, sizeof(tmp));
+			suffixlen+=4;
+			len+=strlen(s);
+			break;
+		}
+		suffix[suffixlen++] = (u >> 8);
+		suffix[suffixlen++] = (u & 255);
+		s += i; len += i;
+		if (prefixlen+suffixlen==16)
+			break;
+	}
+	for (i=0; i<suffixlen; i++)
+		ip[16-suffixlen+i] = suffix[i];
+	return len;
+}
+
+static char tohex(char hexdigit) {
+	return hexdigit>9?hexdigit+'a'-10:hexdigit+'0';
+}
+
+static int fmt_xlong(char* s,unsigned int i) {
+	char* bak=s;
+	*s=tohex((i>>12)&0xf); if (s!=bak || *s!='0') ++s;
+	*s=tohex((i>>8)&0xf); if (s!=bak || *s!='0') ++s;
+	*s=tohex((i>>4)&0xf); if (s!=bak || *s!='0') ++s;
+	*s=tohex(i&0xf);
+	return s-bak+1;
+}
+
+static unsigned int fmt_ip6(char *s,const char ip[16]) {
+	unsigned int len;
+	unsigned int i;
+	unsigned int temp;
+	unsigned int compressing;
+	int j;
+
+	len = 0; compressing = 0;
+	for (j=0; j<16; j+=2) {
+
+#ifdef V4MAPPEDPREFIX
+		if (j==12 && !memcmp(ip,V4mappedprefix,12)) {
+			inet_ntoa_r(*(struct in_addr*)(ip+12),s);
+			temp=strlen(s);
+			return len+temp;
+		}
+#endif
+		temp = ((unsigned long) (unsigned char) ip[j] << 8) +
+			(unsigned long) (unsigned char) ip[j+1];
+		if (temp == 0) {
+			if (!compressing) {
+				compressing=1;
+				if (j==0) {
+					*s++=':'; ++len;
+				}
+			}
+		} else {
+			if (compressing) {
+				compressing=0;
+				*s++=':'; ++len;
+			}
+			i = fmt_xlong(s,temp); len += i; s += i;
+			if (j<14) {
+				*s++ = ':';
+				++len;
+			}
+		}
+	}
+	if (compressing) {
+		*s++=':'; ++len;
+	}
+	*s=0;
+	return len;
+}
+
+static struct sk_buff *fill_packet_ipv6(struct net_device *odev, 
+				   struct pktgen_dev *pkt_dev)
+{
+	struct sk_buff *skb = NULL;
+	__u8 *eth;
+	struct udphdr *udph;
+	int datalen;
+	struct ipv6hdr *iph;
+        struct pktgen_hdr *pgh = NULL;
+        
+	skb = alloc_skb(pkt_dev->cur_pkt_size + 64 + 16, GFP_ATOMIC);
+	if (!skb) {
+		sprintf(pkt_dev->result, "No memory");
+		return NULL;
+	}
+
+	skb_reserve(skb, 16);
+
+	/*  Reserve for ethernet and IP header  */
+	eth = (__u8 *) skb_push(skb, 14);
+	iph = (struct ipv6hdr *)skb_put(skb, sizeof(struct ipv6hdr));
+	udph = (struct udphdr *)skb_put(skb, sizeof(struct udphdr));
+
+
+        /* Update any of the values, used when we're incrementing various
+         * fields.
+         */
+	mod_cur_headers(pkt_dev);
+
+	
+	memcpy(eth, pkt_dev->hh, 12);
+	*(u16*)&eth[12] = __constant_htons(ETH_P_IPV6);
+	
+        
+	datalen = pkt_dev->cur_pkt_size-14- 
+		sizeof(struct ipv6hdr)-sizeof(struct udphdr); /* Eth + IPh + UDPh */
+
+	if (datalen < sizeof(struct pktgen_hdr)) { 
+		datalen = sizeof(struct pktgen_hdr);
+		if (net_ratelimit())
+			printk(KERN_INFO "pktgen: increased datalen to %d\n", datalen);
+	}
+
+	udph->source = htons(pkt_dev->cur_udp_src);
+	udph->dest = htons(pkt_dev->cur_udp_dst);
+	udph->len = htons(datalen + sizeof(struct udphdr)); 
+	udph->check = 0;  /* No checksum */
+
+	 *(u32*)iph = __constant_htonl(0x60000000); /* Version + flow */
+
+	iph->hop_limit = 32;
+
+	iph->payload_len = htons(sizeof(struct udphdr) + datalen);
+	iph->nexthdr = IPPROTO_UDP;
+
+	ipv6_addr_copy(&iph->daddr, &pkt_dev->cur_in6_daddr);
+	ipv6_addr_copy(&iph->saddr, &pkt_dev->cur_in6_saddr);
+
+	skb->mac.raw = ((u8 *)iph) - 14;
+	skb->protocol = __constant_htons(ETH_P_IPV6);
+	skb->dev = odev;
+	skb->pkt_type = PACKET_HOST;
+
+	if (pkt_dev->nfrags <= 0) 
+                pgh = (struct pktgen_hdr *)skb_put(skb, datalen);
+	else {
+		int frags = pkt_dev->nfrags;
+		int i;
+
+                pgh = (struct pktgen_hdr*)(((char*)(udph)) + 8);
+                
+		if (frags > MAX_SKB_FRAGS)
+			frags = MAX_SKB_FRAGS;
+		if (datalen > frags*PAGE_SIZE) {
+			skb_put(skb, datalen-frags*PAGE_SIZE);
+			datalen = frags*PAGE_SIZE;
+		}
+
+		i = 0;
+		while (datalen > 0) {
+			struct page *page = alloc_pages(GFP_KERNEL, 0);
+			skb_shinfo(skb)->frags[i].page = page;
+			skb_shinfo(skb)->frags[i].page_offset = 0;
+			skb_shinfo(skb)->frags[i].size =
+				(datalen < PAGE_SIZE ? datalen : PAGE_SIZE);
+			datalen -= skb_shinfo(skb)->frags[i].size;
+			skb->len += skb_shinfo(skb)->frags[i].size;
+			skb->data_len += skb_shinfo(skb)->frags[i].size;
+			i++;
+			skb_shinfo(skb)->nr_frags = i;
+		}
+
+		while (i < frags) {
+			int rem;
+
+			if (i == 0)
+				break;
+
+			rem = skb_shinfo(skb)->frags[i - 1].size / 2;
+			if (rem == 0)
+				break;
+
+			skb_shinfo(skb)->frags[i - 1].size -= rem;
+
+			skb_shinfo(skb)->frags[i] = skb_shinfo(skb)->frags[i - 1];
+			get_page(skb_shinfo(skb)->frags[i].page);
+			skb_shinfo(skb)->frags[i].page = skb_shinfo(skb)->frags[i - 1].page;
+			skb_shinfo(skb)->frags[i].page_offset += skb_shinfo(skb)->frags[i - 1].size;
+			skb_shinfo(skb)->frags[i].size = rem;
+			i++;
+			skb_shinfo(skb)->nr_frags = i;
+		}
+	}
+
+        /* Stamp the time, and sequence number, convert them to network byte order */
+	/* should we update cloned packets too ? */
+        if (pgh) {
+              struct timeval timestamp;
+	      
+	      pgh->pgh_magic = htonl(PKTGEN_MAGIC);
+	      pgh->seq_num   = htonl(pkt_dev->seq_num);
+	      
+	      do_gettimeofday(&timestamp);
+	      pgh->tv_sec    = htonl(timestamp.tv_sec);
+	      pgh->tv_usec   = htonl(timestamp.tv_usec);
+        }
+        pkt_dev->seq_num++;
+        
+	return skb;
+}
+
+static inline struct sk_buff *fill_packet(struct net_device *odev, 
+				   struct pktgen_dev *pkt_dev)
+{
+	if(pkt_dev->flags & F_IPV6) 
+		return fill_packet_ipv6(odev, pkt_dev);
+	else
+		return fill_packet_ipv4(odev, pkt_dev);
+}
+
+static void pktgen_clear_counters(struct pktgen_dev *pkt_dev) 
+{
+        pkt_dev->seq_num = 1;
+        pkt_dev->idle_acc = 0;
+	pkt_dev->sofar = 0;
+        pkt_dev->tx_bytes = 0;
+        pkt_dev->errors = 0;
+}
+
+/* Set up structure for sending pkts, clear counters */
+
+static void pktgen_run(struct pktgen_thread *t)
+{
+        struct pktgen_dev *pkt_dev = NULL;
+	int started = 0;
+
+	PG_DEBUG(printk("pktgen: entering pktgen_run. %p\n", t));
+
+	if_lock(t);
+        for (pkt_dev = t->if_list; pkt_dev; pkt_dev = pkt_dev->next ) {
+
+		/*
+		 * setup odev and create initial packet.
+		 */
+		pktgen_setup_inject(pkt_dev);
+
+		if(pkt_dev->odev) { 
+			pktgen_clear_counters(pkt_dev);
+			pkt_dev->running = 1; /* Cranke yeself! */
+			pkt_dev->skb = NULL;
+			pkt_dev->started_at = getCurUs();
+			pkt_dev->next_tx_us = getCurUs(); /* Transmit immediately */
+			pkt_dev->next_tx_ns = 0;
+			
+			strcpy(pkt_dev->result, "Starting");
+			started++;
+		}
+		else 
+			strcpy(pkt_dev->result, "Error starting");
+	}
+	if_unlock(t);
+	if(started) t->control &= ~(T_STOP);
+}
+
+static void pktgen_stop_all_threads_ifs(void)
+{
+        struct pktgen_thread *t = pktgen_threads;
+
+	PG_DEBUG(printk("pktgen: entering pktgen_stop_all_threads.\n"));
+
+	thread_lock();
+	while(t) {
+		pktgen_stop(t);
+		t = t->next;
+	}
+       thread_unlock();
+}
+
+static int thread_is_running(struct pktgen_thread *t )
+{
+        struct pktgen_dev *next;
+        int res = 0;
+
+        for(next=t->if_list; next; next=next->next) { 
+		if(next->running) {
+			res = 1;
+			break;
+		}
+        }
+        return res;
+}
+
+static int pktgen_wait_thread_run(struct pktgen_thread *t )
+{
+        if_lock(t);
+
+        while(thread_is_running(t)) {
+
+                if_unlock(t);
+
+		msleep_interruptible(100); 
+
+                if (signal_pending(current)) 
+                        goto signal;
+                if_lock(t);
+        }
+        if_unlock(t);
+        return 1;
+ signal:
+        return 0;
+}
+
+static int pktgen_wait_all_threads_run(void)
+{
+	struct pktgen_thread *t = pktgen_threads;
+	int sig = 1;
+	
+	while (t) {
+		sig = pktgen_wait_thread_run(t);
+		if( sig == 0 ) break;
+		thread_lock();
+		t=t->next;
+		thread_unlock();
+	}
+	if(sig == 0) {
+		thread_lock();
+		while (t) {
+			t->control |= (T_STOP);
+			t=t->next;
+		}
+		thread_unlock();
+	}
+	return sig;
+}
+
+static void pktgen_run_all_threads(void)
+{
+        struct pktgen_thread *t = pktgen_threads;
+
+	PG_DEBUG(printk("pktgen: entering pktgen_run_all_threads.\n"));
+
+	thread_lock();
+
+	while(t) {
+		t->control |= (T_RUN);
+		t = t->next;
+	}
+	thread_unlock();
+
+	current->state = TASK_INTERRUPTIBLE;
+	schedule_timeout(HZ/8);  /* Propagate thread->control  */
+			
+	pktgen_wait_all_threads_run();
+}
+
+
+static void show_results(struct pktgen_dev *pkt_dev, int nr_frags)
+{
+       __u64 total_us, bps, mbps, pps, idle;
+       char *p = pkt_dev->result;
+
+       total_us = pkt_dev->stopped_at - pkt_dev->started_at;
+
+       idle = pkt_dev->idle_acc;
+
+       p += sprintf(p, "OK: %llu(c%llu+d%llu) usec, %llu (%dbyte,%dfrags)\n",
+                    (unsigned long long) total_us, 
+		    (unsigned long long)(total_us - idle), 
+		    (unsigned long long) idle,
+                    (unsigned long long) pkt_dev->sofar, 
+		    pkt_dev->cur_pkt_size, nr_frags);
+
+       pps = pkt_dev->sofar * USEC_PER_SEC;
+
+       while ((total_us >> 32) != 0) {
+               pps >>= 1;
+               total_us >>= 1;
+       }
+
+       do_div(pps, total_us);
+       
+       bps = pps * 8 * pkt_dev->cur_pkt_size;
+
+       mbps = bps;
+       do_div(mbps, 1000000);
+       p += sprintf(p, "  %llupps %lluMb/sec (%llubps) errors: %llu",
+                    (unsigned long long) pps, 
+		    (unsigned long long) mbps, 
+		    (unsigned long long) bps, 
+		    (unsigned long long) pkt_dev->errors);
+}
+ 
+
+/* Set stopped-at timer, remove from running list, do counters & statistics */
+
+static int pktgen_stop_device(struct pktgen_dev *pkt_dev) 
+{
+	
+        if (!pkt_dev->running) {
+                printk("pktgen: interface: %s is already stopped\n", pkt_dev->ifname);
+                return -EINVAL;
+        }
+
+        pkt_dev->stopped_at = getCurUs();
+        pkt_dev->running = 0;
+
+	show_results(pkt_dev, skb_shinfo(pkt_dev->skb)->nr_frags);
+
+	if (pkt_dev->skb) 
+		kfree_skb(pkt_dev->skb);
+
+	pkt_dev->skb = NULL;
+	
+        return 0;
+}
+
+static struct pktgen_dev *next_to_run(struct pktgen_thread *t )
+{
+	struct pktgen_dev *next, *best = NULL;
+        
+	if_lock(t);
+
+	for(next=t->if_list; next ; next=next->next) {
+		if(!next->running) continue;
+		if(best == NULL) best=next;
+		else if ( next->next_tx_us < best->next_tx_us) 
+			best =  next;
+	}
+	if_unlock(t);
+        return best;
+}
+
+static void pktgen_stop(struct pktgen_thread *t) {
+        struct pktgen_dev *next = NULL;
+
+	PG_DEBUG(printk("pktgen: entering pktgen_stop.\n"));
+
+        if_lock(t);
+
+        for(next=t->if_list; next; next=next->next)
+                pktgen_stop_device(next);
+
+        if_unlock(t);
+}
+
+static void pktgen_rem_all_ifs(struct pktgen_thread *t) 
+{
+        struct pktgen_dev *cur, *next = NULL;
+        
+        /* Remove all devices, free mem */
+ 
+        if_lock(t);
+
+        for(cur=t->if_list; cur; cur=next) { 
+		next = cur->next;
+		pktgen_remove_device(t, cur);
+	}
+
+        if_unlock(t);
+}
+
+static void pktgen_rem_thread(struct pktgen_thread *t) 
+{
+        /* Remove from the thread list */
+
+	struct pktgen_thread *tmp = pktgen_threads;
+
+        if (strlen(t->fname))
+                remove_proc_entry(t->fname, NULL);
+
+       thread_lock();
+
+	if (tmp == t)
+		pktgen_threads = tmp->next;
+	else {
+		while (tmp) {
+			if (tmp->next == t) {
+				tmp->next = t->next;
+				t->next = NULL;
+				break;
+			}
+			tmp = tmp->next;
+		}
+	}
+        thread_unlock();
+}
+
+static __inline__ void pktgen_xmit(struct pktgen_dev *pkt_dev)
+{
+	struct net_device *odev = NULL;
+	__u64 idle_start = 0;
+	int ret;
+
+	odev = pkt_dev->odev;
+	
+	if (pkt_dev->delay_us || pkt_dev->delay_ns) {
+		u64 now;
+
+		now = getCurUs();
+		if (now < pkt_dev->next_tx_us)
+			spin(pkt_dev, pkt_dev->next_tx_us);
+
+		/* This is max DELAY, this has special meaning of
+		 * "never transmit"
+		 */
+		if (pkt_dev->delay_us == 0x7FFFFFFF) {
+			pkt_dev->next_tx_us = getCurUs() + pkt_dev->delay_us;
+			pkt_dev->next_tx_ns = pkt_dev->delay_ns;
+			goto out;
+		}
+	}
+	
+	if (netif_queue_stopped(odev) || need_resched()) {
+		idle_start = getCurUs();
+		
+		if (!netif_running(odev)) {
+			pktgen_stop_device(pkt_dev);
+			goto out;
+		}
+		if (need_resched()) 
+			schedule();
+		
+		pkt_dev->idle_acc += getCurUs() - idle_start;
+		
+		if (netif_queue_stopped(odev)) {
+			pkt_dev->next_tx_us = getCurUs(); /* TODO */
+			pkt_dev->next_tx_ns = 0;
+			goto out; /* Try the next interface */
+		}
+	}
+	
+	if (pkt_dev->last_ok || !pkt_dev->skb) {
+		if ((++pkt_dev->clone_count >= pkt_dev->clone_skb ) || (!pkt_dev->skb)) {
+			/* build a new pkt */
+			if (pkt_dev->skb) 
+				kfree_skb(pkt_dev->skb);
+			
+			pkt_dev->skb = fill_packet(odev, pkt_dev);
+			if (pkt_dev->skb == NULL) {
+				printk("pktgen: ERROR: couldn't allocate skb in fill_packet.\n");
+				schedule();
+				pkt_dev->clone_count--; /* back out increment, OOM */
+				goto out;
+			}
+			pkt_dev->allocated_skbs++;
+			pkt_dev->clone_count = 0; /* reset counter */
+		}
+	}
+	
+	spin_lock_bh(&odev->xmit_lock);
+	if (!netif_queue_stopped(odev)) {
+
+		atomic_inc(&(pkt_dev->skb->users));
+retry_now:
+		ret = odev->hard_start_xmit(pkt_dev->skb, odev);
+		if (likely(ret == NETDEV_TX_OK)) {
+			pkt_dev->last_ok = 1;    
+			pkt_dev->sofar++;
+			pkt_dev->seq_num++;
+			pkt_dev->tx_bytes += pkt_dev->cur_pkt_size;
+			
+		} else if (ret == NETDEV_TX_LOCKED 
+			   && (odev->features & NETIF_F_LLTX)) {
+			cpu_relax();
+			goto retry_now;
+		} else {  /* Retry it next time */
+			
+			atomic_dec(&(pkt_dev->skb->users));
+			
+			if (debug && net_ratelimit())
+				printk(KERN_INFO "pktgen: Hard xmit error\n");
+			
+			pkt_dev->errors++;
+			pkt_dev->last_ok = 0;
+		}
+
+		pkt_dev->next_tx_us = getCurUs();
+		pkt_dev->next_tx_ns = 0;
+
+		pkt_dev->next_tx_us += pkt_dev->delay_us;
+		pkt_dev->next_tx_ns += pkt_dev->delay_ns;
+
+		if (pkt_dev->next_tx_ns > 1000) {
+			pkt_dev->next_tx_us++;
+			pkt_dev->next_tx_ns -= 1000;
+		}
+	} 
+
+	else {  /* Retry it next time */
+                pkt_dev->last_ok = 0;
+                pkt_dev->next_tx_us = getCurUs(); /* TODO */
+		pkt_dev->next_tx_ns = 0;
+        }
+
+	spin_unlock_bh(&odev->xmit_lock);
+	
+	/* If pkt_dev->count is zero, then run forever */
+	if ((pkt_dev->count != 0) && (pkt_dev->sofar >= pkt_dev->count)) {
+		if (atomic_read(&(pkt_dev->skb->users)) != 1) {
+			idle_start = getCurUs();
+			while (atomic_read(&(pkt_dev->skb->users)) != 1) {
+				if (signal_pending(current)) {
+					break;
+				}
+				schedule();
+			}
+			pkt_dev->idle_acc += getCurUs() - idle_start;
+		}
+                
+		/* Done with this */
+		pktgen_stop_device(pkt_dev);
+	} 
+ out:;
+ }
+
+/* 
+ * Main loop of the thread goes here
+ */
+
+static void pktgen_thread_worker(struct pktgen_thread *t) 
+{
+	DEFINE_WAIT(wait);
+        struct pktgen_dev *pkt_dev = NULL;
+	int cpu = t->cpu;
+	sigset_t tmpsig;
+	u32 max_before_softirq;
+        u32 tx_since_softirq = 0;
+
+	daemonize("pktgen/%d", cpu);
+
+        /* Block all signals except SIGKILL, SIGSTOP and SIGTERM */
+
+        spin_lock_irq(&current->sighand->siglock);
+        tmpsig = current->blocked;
+        siginitsetinv(&current->blocked, 
+                      sigmask(SIGKILL) | 
+                      sigmask(SIGSTOP)| 
+                      sigmask(SIGTERM));
+
+        recalc_sigpending();
+        spin_unlock_irq(&current->sighand->siglock);
+
+	/* Migrate to the right CPU */
+	set_cpus_allowed(current, cpumask_of_cpu(cpu));
+        if (smp_processor_id() != cpu)
+                BUG();
+
+	init_waitqueue_head(&t->queue);
+
+	t->control &= ~(T_TERMINATE);
+	t->control &= ~(T_RUN);
+	t->control &= ~(T_STOP);
+	t->control &= ~(T_REMDEV);
+
+        t->pid = current->pid;        
+
+        PG_DEBUG(printk("pktgen: starting pktgen/%d:  pid=%d\n", cpu, current->pid));
+
+	max_before_softirq = t->max_before_softirq;
+        
+        __set_current_state(TASK_INTERRUPTIBLE);
+        mb();
+
+        while (1) {
+		
+		__set_current_state(TASK_RUNNING);
+
+		/*
+		 * Get next dev to xmit -- if any.
+		 */
+
+                pkt_dev = next_to_run(t);
+                
+                if (pkt_dev) {
+
+			pktgen_xmit(pkt_dev);
+
+			/*
+			 * We like to stay RUNNING but must also give
+			 * others fair share.
+			 */
+
+			tx_since_softirq += pkt_dev->last_ok;
+
+			if (tx_since_softirq > max_before_softirq) {
+				if (local_softirq_pending())
+					do_softirq();
+				tx_since_softirq = 0;
+			}
+		} else {
+			prepare_to_wait(&(t->queue), &wait, TASK_INTERRUPTIBLE);
+			schedule_timeout(HZ/10);
+			finish_wait(&(t->queue), &wait);
+		}
+
+                /* 
+		 * Back from sleep, either due to the timeout or signal.
+		 * We check if we have any "posted" work for us.
+		 */
+
+                if (t->control & T_TERMINATE || signal_pending(current)) 
+                        /* we received a request to terminate ourself */
+                        break;
+		
+
+		if(t->control & T_STOP) {
+			pktgen_stop(t);
+			t->control &= ~(T_STOP);
+		}
+
+		if(t->control & T_RUN) {
+			pktgen_run(t);
+			t->control &= ~(T_RUN);
+		}
+
+		if(t->control & T_REMDEV) {
+			pktgen_rem_all_ifs(t);
+			t->control &= ~(T_REMDEV);
+		}
+
+		if (need_resched()) 
+			schedule();
+        } 
+
+        PG_DEBUG(printk("pktgen: %s stopping all device\n", t->name));
+        pktgen_stop(t);
+
+        PG_DEBUG(printk("pktgen: %s removing all device\n", t->name));
+        pktgen_rem_all_ifs(t);
+
+        PG_DEBUG(printk("pktgen: %s removing thread.\n", t->name));
+        pktgen_rem_thread(t);
+}
+
+static struct pktgen_dev *pktgen_find_dev(struct pktgen_thread *t, const char* ifname) 
+{
+        struct pktgen_dev *pkt_dev = NULL;
+        if_lock(t);
+
+        for(pkt_dev=t->if_list; pkt_dev; pkt_dev = pkt_dev->next ) {
+                if (strcmp(pkt_dev->ifname, ifname) == 0) {
+                        break;
+                }
+        }
+
+        if_unlock(t);
+	PG_DEBUG(printk("pktgen: find_dev(%s) returning %p\n", ifname,pkt_dev));
+        return pkt_dev;
+}
+
+/* 
+ * Adds a dev at front of if_list. 
+ */
+
+static int add_dev_to_thread(struct pktgen_thread *t, struct pktgen_dev *pkt_dev) 
+{
+	int rv = 0;
+	
+        if_lock(t);
+
+        if (pkt_dev->pg_thread) {
+                printk("pktgen: ERROR:  already assigned to a thread.\n");
+                rv = -EBUSY;
+                goto out;
+        }
+	pkt_dev->next =t->if_list; t->if_list=pkt_dev;
+        pkt_dev->pg_thread = t;
+	pkt_dev->running = 0;
+
+ out:
+        if_unlock(t);        
+        return rv;
+}
+
+/* Called under thread lock */
+
+static int pktgen_add_device(struct pktgen_thread *t, const char* ifname) 
+{
+        struct pktgen_dev *pkt_dev;
+	
+	/* We don't allow a device to be on several threads */
+
+	if( (pkt_dev = __pktgen_NN_threads(ifname, FIND)) == NULL) {
+						   
+		pkt_dev = kmalloc(sizeof(struct pktgen_dev), GFP_KERNEL);
+                if (!pkt_dev) 
+                        return -ENOMEM;
+
+                memset(pkt_dev, 0, sizeof(struct pktgen_dev));
+
+		pkt_dev->flows = vmalloc(MAX_CFLOWS*sizeof(struct flow_state));
+		if (pkt_dev->flows == NULL) {
+			kfree(pkt_dev);
+			return -ENOMEM;
+		}
+		memset(pkt_dev->flows, 0, MAX_CFLOWS*sizeof(struct flow_state));
+
+		pkt_dev->min_pkt_size = ETH_ZLEN;
+                pkt_dev->max_pkt_size = ETH_ZLEN;
+                pkt_dev->nfrags = 0;
+                pkt_dev->clone_skb = pg_clone_skb_d;
+                pkt_dev->delay_us = pg_delay_d / 1000;
+                pkt_dev->delay_ns = pg_delay_d % 1000;
+                pkt_dev->count = pg_count_d;
+                pkt_dev->sofar = 0;
+                pkt_dev->udp_src_min = 9; /* sink port */
+                pkt_dev->udp_src_max = 9;
+                pkt_dev->udp_dst_min = 9;
+                pkt_dev->udp_dst_max = 9;
+
+                strncpy(pkt_dev->ifname, ifname, 31);
+                sprintf(pkt_dev->fname, "net/%s/%s", PG_PROC_DIR, ifname);
+
+                if (! pktgen_setup_dev(pkt_dev)) {
+                        printk("pktgen: ERROR: pktgen_setup_dev failed.\n");
+			if (pkt_dev->flows)
+				vfree(pkt_dev->flows);
+                        kfree(pkt_dev);
+                        return -ENODEV;
+                }
+
+                pkt_dev->proc_ent = create_proc_entry(pkt_dev->fname, 0600, NULL);
+                if (!pkt_dev->proc_ent) {
+                        printk("pktgen: cannot create %s procfs entry.\n", pkt_dev->fname);
+			if (pkt_dev->flows)
+				vfree(pkt_dev->flows);
+                        kfree(pkt_dev);
+                        return -EINVAL;
+                }
+                pkt_dev->proc_ent->read_proc = proc_if_read;
+                pkt_dev->proc_ent->write_proc = proc_if_write;
+                pkt_dev->proc_ent->data = (void*)(pkt_dev);
+		pkt_dev->proc_ent->owner = THIS_MODULE;
+
+                return add_dev_to_thread(t, pkt_dev);
+        }
+        else {
+                printk("pktgen: ERROR: interface already used.\n");
+                return -EBUSY;
+        }
+}
+
+static struct pktgen_thread *pktgen_find_thread(const char* name) 
+{
+        struct pktgen_thread *t = NULL;
+
+       thread_lock();
+
+        t = pktgen_threads;
+        while (t) {
+                if (strcmp(t->name, name) == 0) 
+                        break;
+
+                t = t->next;
+        }
+        thread_unlock();
+        return t;
+}
+
+static int pktgen_create_thread(const char* name, int cpu) 
+{
+        struct pktgen_thread *t = NULL;
+
+        if (strlen(name) > 31) {
+                printk("pktgen: ERROR:  Thread name cannot be more than 31 characters.\n");
+                return -EINVAL;
+        }
+        
+        if (pktgen_find_thread(name)) {
+                printk("pktgen: ERROR: thread: %s already exists\n", name);
+                return -EINVAL;
+        }
+
+        t = (struct pktgen_thread*)(kmalloc(sizeof(struct pktgen_thread), GFP_KERNEL));
+        if (!t) {
+                printk("pktgen: ERROR: out of memory, can't create new thread.\n");
+                return -ENOMEM;
+        }
+
+        memset(t, 0, sizeof(struct pktgen_thread));
+        strcpy(t->name, name);
+        spin_lock_init(&t->if_lock);
+	t->cpu = cpu;
+        
+        sprintf(t->fname, "net/%s/%s", PG_PROC_DIR, t->name);
+        t->proc_ent = create_proc_entry(t->fname, 0600, NULL);
+        if (!t->proc_ent) {
+                printk("pktgen: cannot create %s procfs entry.\n", t->fname);
+                kfree(t);
+                return -EINVAL;
+        }
+        t->proc_ent->read_proc = proc_thread_read;
+        t->proc_ent->write_proc = proc_thread_write;
+        t->proc_ent->data = (void*)(t);
+        t->proc_ent->owner = THIS_MODULE;
+
+        t->next = pktgen_threads;
+        pktgen_threads = t;
+
+	if (kernel_thread((void *) pktgen_thread_worker, (void *) t, 
+			  CLONE_FS | CLONE_FILES | CLONE_SIGHAND) < 0)
+		printk("pktgen: kernel_thread() failed for cpu %d\n", t->cpu);
+
+	return 0;
+}
+
+/* 
+ * Removes a device from the thread if_list. 
+ */
+static void _rem_dev_from_if_list(struct pktgen_thread *t, struct pktgen_dev *pkt_dev) 
+{
+	struct pktgen_dev *i, *prev = NULL;
+
+	i = t->if_list;
+
+	while(i) {
+		if(i == pkt_dev) {
+			if(prev) prev->next = i->next;
+			else t->if_list = NULL;
+			break;
+		}
+		prev = i;
+		i=i->next;
+	}
+}
+
+static int pktgen_remove_device(struct pktgen_thread *t, struct pktgen_dev *pkt_dev) 
+{
+
+	PG_DEBUG(printk("pktgen: remove_device pkt_dev=%p\n", pkt_dev));
+
+        if (pkt_dev->running) { 
+                printk("pktgen:WARNING: trying to remove a running interface, stopping it now.\n");
+                pktgen_stop_device(pkt_dev);
+        }
+        
+        /* Dis-associate from the interface */
+
+	if (pkt_dev->odev) {
+		dev_put(pkt_dev->odev);
+                pkt_dev->odev = NULL;
+        }
+        
+	/* And update the thread if_list */
+
+	_rem_dev_from_if_list(t, pkt_dev);
+
+        /* Clean up proc file system */
+
+        if (strlen(pkt_dev->fname)) 
+                remove_proc_entry(pkt_dev->fname, NULL);
+
+	if (pkt_dev->flows)
+		vfree(pkt_dev->flows);
+	kfree(pkt_dev);
+        return 0;
+}
+
+static int __init pg_init(void) 
+{
+	int cpu;
+	printk(version);
+
+        module_fname[0] = 0;
+
+	create_proc_dir();
+
+        sprintf(module_fname, "net/%s/pgctrl", PG_PROC_DIR);
+        module_proc_ent = create_proc_entry(module_fname, 0600, NULL);
+        if (!module_proc_ent) {
+                printk("pktgen: ERROR: cannot create %s procfs entry.\n", module_fname);
+                return -EINVAL;
+        }
+
+        module_proc_ent->proc_fops =  &pktgen_fops;
+        module_proc_ent->data = NULL;
+
+	/* Register us to receive netdevice events */
+	register_netdevice_notifier(&pktgen_notifier_block);
+        
+	for (cpu = 0; cpu < NR_CPUS ; cpu++) {
+		char buf[30];
+
+		if (!cpu_online(cpu))
+			continue;
+
+                sprintf(buf, "kpktgend_%i", cpu);
+                pktgen_create_thread(buf, cpu);
+        }
+        return 0;        
+}
+
+static void __exit pg_cleanup(void)
+{
+	wait_queue_head_t queue;
+	init_waitqueue_head(&queue);
+
+        /* Stop all interfaces & threads */        
+
+        while (pktgen_threads) {
+                struct pktgen_thread *t = pktgen_threads;
+                pktgen_threads->control |= (T_TERMINATE);
+
+		wait_event_interruptible_timeout(queue, (t != pktgen_threads), HZ);
+        }
+
+        /* Un-register us from receiving netdevice events */
+	unregister_netdevice_notifier(&pktgen_notifier_block);
+
+        /* Clean up proc file system */
+
+        remove_proc_entry(module_fname, NULL);
+        
+	remove_proc_dir();
+}
+
+
+module_init(pg_init);
+module_exit(pg_cleanup);
+
+MODULE_AUTHOR("Robert Olsson <robert.olsson@its.uu.se");
+MODULE_DESCRIPTION("Packet Generator tool");
+MODULE_LICENSE("GPL");
+module_param(pg_count_d, int, 0);
+module_param(pg_delay_d, int, 0);
+module_param(pg_clone_skb_d, int, 0);
+module_param(debug, int, 0);
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
new file mode 100644
index 00000000000..d69ad90e581
--- /dev/null
+++ b/net/core/rtnetlink.c
@@ -0,0 +1,711 @@
+/*
+ * INET		An implementation of the TCP/IP protocol suite for the LINUX
+ *		operating system.  INET is implemented using the  BSD Socket
+ *		interface as the means of communication with the user level.
+ *
+ *		Routing netlink socket interface: protocol independent part.
+ *
+ * Authors:	Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
+ *
+ *		This program is free software; you can redistribute it and/or
+ *		modify it under the terms of the GNU General Public License
+ *		as published by the Free Software Foundation; either version
+ *		2 of the License, or (at your option) any later version.
+ *
+ *	Fixes:
+ *	Vitaly E. Lavrov		RTA_OK arithmetics was wrong.
+ */
+
+#include <linux/config.h>
+#include <linux/errno.h>
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/socket.h>
+#include <linux/kernel.h>
+#include <linux/major.h>
+#include <linux/sched.h>
+#include <linux/timer.h>
+#include <linux/string.h>
+#include <linux/sockios.h>
+#include <linux/net.h>
+#include <linux/fcntl.h>
+#include <linux/mm.h>
+#include <linux/slab.h>
+#include <linux/interrupt.h>
+#include <linux/capability.h>
+#include <linux/skbuff.h>
+#include <linux/init.h>
+#include <linux/security.h>
+
+#include <asm/uaccess.h>
+#include <asm/system.h>
+#include <asm/string.h>
+
+#include <linux/inet.h>
+#include <linux/netdevice.h>
+#include <net/ip.h>
+#include <net/protocol.h>
+#include <net/arp.h>
+#include <net/route.h>
+#include <net/udp.h>
+#include <net/sock.h>
+#include <net/pkt_sched.h>
+
+DECLARE_MUTEX(rtnl_sem);
+
+void rtnl_lock(void)
+{
+	rtnl_shlock();
+}
+
+int rtnl_lock_interruptible(void)
+{
+	return down_interruptible(&rtnl_sem);
+}
+ 
+void rtnl_unlock(void)
+{
+	rtnl_shunlock();
+
+	netdev_run_todo();
+}
+
+int rtattr_parse(struct rtattr *tb[], int maxattr, struct rtattr *rta, int len)
+{
+	memset(tb, 0, sizeof(struct rtattr*)*maxattr);
+
+	while (RTA_OK(rta, len)) {
+		unsigned flavor = rta->rta_type;
+		if (flavor && flavor <= maxattr)
+			tb[flavor-1] = rta;
+		rta = RTA_NEXT(rta, len);
+	}
+	return 0;
+}
+
+struct sock *rtnl;
+
+struct rtnetlink_link * rtnetlink_links[NPROTO];
+
+static const int rtm_min[(RTM_MAX+1-RTM_BASE)/4] =
+{
+	NLMSG_LENGTH(sizeof(struct ifinfomsg)),
+	NLMSG_LENGTH(sizeof(struct ifaddrmsg)),
+	NLMSG_LENGTH(sizeof(struct rtmsg)),
+	NLMSG_LENGTH(sizeof(struct ndmsg)),
+	NLMSG_LENGTH(sizeof(struct rtmsg)),
+	NLMSG_LENGTH(sizeof(struct tcmsg)),
+	NLMSG_LENGTH(sizeof(struct tcmsg)),
+	NLMSG_LENGTH(sizeof(struct tcmsg)),
+	NLMSG_LENGTH(sizeof(struct tcamsg))
+};
+
+static const int rta_max[(RTM_MAX+1-RTM_BASE)/4] =
+{
+	IFLA_MAX,
+	IFA_MAX,
+	RTA_MAX,
+	NDA_MAX,
+	RTA_MAX,
+	TCA_MAX,
+	TCA_MAX,
+	TCA_MAX,
+	TCAA_MAX
+};
+
+void __rta_fill(struct sk_buff *skb, int attrtype, int attrlen, const void *data)
+{
+	struct rtattr *rta;
+	int size = RTA_LENGTH(attrlen);
+
+	rta = (struct rtattr*)skb_put(skb, RTA_ALIGN(size));
+	rta->rta_type = attrtype;
+	rta->rta_len = size;
+	memcpy(RTA_DATA(rta), data, attrlen);
+}
+
+size_t rtattr_strlcpy(char *dest, const struct rtattr *rta, size_t size)
+{
+	size_t ret = RTA_PAYLOAD(rta);
+	char *src = RTA_DATA(rta);
+
+	if (ret > 0 && src[ret - 1] == '\0')
+		ret--;
+	if (size > 0) {
+		size_t len = (ret >= size) ? size - 1 : ret;
+		memset(dest, 0, size);
+		memcpy(dest, src, len);
+	}
+	return ret;
+}
+
+int rtnetlink_send(struct sk_buff *skb, u32 pid, unsigned group, int echo)
+{
+	int err = 0;
+
+	NETLINK_CB(skb).dst_groups = group;
+	if (echo)
+		atomic_inc(&skb->users);
+	netlink_broadcast(rtnl, skb, pid, group, GFP_KERNEL);
+	if (echo)
+		err = netlink_unicast(rtnl, skb, pid, MSG_DONTWAIT);
+	return err;
+}
+
+int rtnetlink_put_metrics(struct sk_buff *skb, u32 *metrics)
+{
+	struct rtattr *mx = (struct rtattr*)skb->tail;
+	int i;
+
+	RTA_PUT(skb, RTA_METRICS, 0, NULL);
+	for (i=0; i<RTAX_MAX; i++) {
+		if (metrics[i])
+			RTA_PUT(skb, i+1, sizeof(u32), metrics+i);
+	}
+	mx->rta_len = skb->tail - (u8*)mx;
+	if (mx->rta_len == RTA_LENGTH(0))
+		skb_trim(skb, (u8*)mx - skb->data);
+	return 0;
+
+rtattr_failure:
+	skb_trim(skb, (u8*)mx - skb->data);
+	return -1;
+}
+
+
+static int rtnetlink_fill_ifinfo(struct sk_buff *skb, struct net_device *dev,
+				 int type, u32 pid, u32 seq, u32 change)
+{
+	struct ifinfomsg *r;
+	struct nlmsghdr  *nlh;
+	unsigned char	 *b = skb->tail;
+
+	nlh = NLMSG_PUT(skb, pid, seq, type, sizeof(*r));
+	if (pid) nlh->nlmsg_flags |= NLM_F_MULTI;
+	r = NLMSG_DATA(nlh);
+	r->ifi_family = AF_UNSPEC;
+	r->ifi_type = dev->type;
+	r->ifi_index = dev->ifindex;
+	r->ifi_flags = dev_get_flags(dev);
+	r->ifi_change = change;
+
+	RTA_PUT(skb, IFLA_IFNAME, strlen(dev->name)+1, dev->name);
+
+	if (1) {
+		u32 txqlen = dev->tx_queue_len;
+		RTA_PUT(skb, IFLA_TXQLEN, sizeof(txqlen), &txqlen);
+	}
+
+	if (1) {
+		u32 weight = dev->weight;
+		RTA_PUT(skb, IFLA_WEIGHT, sizeof(weight), &weight);
+	}
+
+	if (1) {
+		struct rtnl_link_ifmap map = {
+			.mem_start   = dev->mem_start,
+			.mem_end     = dev->mem_end,
+			.base_addr   = dev->base_addr,
+			.irq         = dev->irq,
+			.dma         = dev->dma,
+			.port        = dev->if_port,
+		};
+		RTA_PUT(skb, IFLA_MAP, sizeof(map), &map);
+	}
+
+	if (dev->addr_len) {
+		RTA_PUT(skb, IFLA_ADDRESS, dev->addr_len, dev->dev_addr);
+		RTA_PUT(skb, IFLA_BROADCAST, dev->addr_len, dev->broadcast);
+	}
+
+	if (1) {
+		u32 mtu = dev->mtu;
+		RTA_PUT(skb, IFLA_MTU, sizeof(mtu), &mtu);
+	}
+
+	if (dev->ifindex != dev->iflink) {
+		u32 iflink = dev->iflink;
+		RTA_PUT(skb, IFLA_LINK, sizeof(iflink), &iflink);
+	}
+
+	if (dev->qdisc_sleeping)
+		RTA_PUT(skb, IFLA_QDISC,
+			strlen(dev->qdisc_sleeping->ops->id) + 1,
+			dev->qdisc_sleeping->ops->id);
+	
+	if (dev->master) {
+		u32 master = dev->master->ifindex;
+		RTA_PUT(skb, IFLA_MASTER, sizeof(master), &master);
+	}
+
+	if (dev->get_stats) {
+		unsigned long *stats = (unsigned long*)dev->get_stats(dev);
+		if (stats) {
+			struct rtattr  *a;
+			__u32	       *s;
+			int		i;
+			int		n = sizeof(struct rtnl_link_stats)/4;
+
+			a = __RTA_PUT(skb, IFLA_STATS, n*4);
+			s = RTA_DATA(a);
+			for (i=0; i<n; i++)
+				s[i] = stats[i];
+		}
+	}
+	nlh->nlmsg_len = skb->tail - b;
+	return skb->len;
+
+nlmsg_failure:
+rtattr_failure:
+	skb_trim(skb, b - skb->data);
+	return -1;
+}
+
+static int rtnetlink_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb)
+{
+	int idx;
+	int s_idx = cb->args[0];
+	struct net_device *dev;
+
+	read_lock(&dev_base_lock);
+	for (dev=dev_base, idx=0; dev; dev = dev->next, idx++) {
+		if (idx < s_idx)
+			continue;
+		if (rtnetlink_fill_ifinfo(skb, dev, RTM_NEWLINK, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq, 0) <= 0)
+			break;
+	}
+	read_unlock(&dev_base_lock);
+	cb->args[0] = idx;
+
+	return skb->len;
+}
+
+static int do_setlink(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
+{
+	struct ifinfomsg  *ifm = NLMSG_DATA(nlh);
+	struct rtattr    **ida = arg;
+	struct net_device *dev;
+	int err, send_addr_notify = 0;
+
+	if (ifm->ifi_index >= 0)
+		dev = dev_get_by_index(ifm->ifi_index);
+	else if (ida[IFLA_IFNAME - 1]) {
+		char ifname[IFNAMSIZ];
+
+		if (rtattr_strlcpy(ifname, ida[IFLA_IFNAME - 1],
+		                   IFNAMSIZ) >= IFNAMSIZ)
+			return -EINVAL;
+		dev = dev_get_by_name(ifname);
+	} else
+		return -EINVAL;
+
+	if (!dev)
+		return -ENODEV;
+
+	err = -EINVAL;
+
+	if (ifm->ifi_flags)
+		dev_change_flags(dev, ifm->ifi_flags);
+
+	if (ida[IFLA_MAP - 1]) {
+		struct rtnl_link_ifmap *u_map;
+		struct ifmap k_map;
+
+		if (!dev->set_config) {
+			err = -EOPNOTSUPP;
+			goto out;
+		}
+
+		if (!netif_device_present(dev)) {
+			err = -ENODEV;
+			goto out;
+		}
+		
+		if (ida[IFLA_MAP - 1]->rta_len != RTA_LENGTH(sizeof(*u_map)))
+			goto out;
+
+		u_map = RTA_DATA(ida[IFLA_MAP - 1]);
+
+		k_map.mem_start = (unsigned long) u_map->mem_start;
+		k_map.mem_end = (unsigned long) u_map->mem_end;
+		k_map.base_addr = (unsigned short) u_map->base_addr;
+		k_map.irq = (unsigned char) u_map->irq;
+		k_map.dma = (unsigned char) u_map->dma;
+		k_map.port = (unsigned char) u_map->port;
+
+		err = dev->set_config(dev, &k_map);
+
+		if (err)
+			goto out;
+	}
+
+	if (ida[IFLA_ADDRESS - 1]) {
+		if (!dev->set_mac_address) {
+			err = -EOPNOTSUPP;
+			goto out;
+		}
+		if (!netif_device_present(dev)) {
+			err = -ENODEV;
+			goto out;
+		}
+		if (ida[IFLA_ADDRESS - 1]->rta_len != RTA_LENGTH(dev->addr_len))
+			goto out;
+
+		err = dev->set_mac_address(dev, RTA_DATA(ida[IFLA_ADDRESS - 1]));
+		if (err)
+			goto out;
+		send_addr_notify = 1;
+	}
+
+	if (ida[IFLA_BROADCAST - 1]) {
+		if (ida[IFLA_BROADCAST - 1]->rta_len != RTA_LENGTH(dev->addr_len))
+			goto out;
+		memcpy(dev->broadcast, RTA_DATA(ida[IFLA_BROADCAST - 1]),
+		       dev->addr_len);
+		send_addr_notify = 1;
+	}
+
+	if (ida[IFLA_MTU - 1]) {
+		if (ida[IFLA_MTU - 1]->rta_len != RTA_LENGTH(sizeof(u32)))
+			goto out;
+		err = dev_set_mtu(dev, *((u32 *) RTA_DATA(ida[IFLA_MTU - 1])));
+
+		if (err)
+			goto out;
+
+	}
+
+	if (ida[IFLA_TXQLEN - 1]) {
+		if (ida[IFLA_TXQLEN - 1]->rta_len != RTA_LENGTH(sizeof(u32)))
+			goto out;
+
+		dev->tx_queue_len = *((u32 *) RTA_DATA(ida[IFLA_TXQLEN - 1]));
+	}
+
+	if (ida[IFLA_WEIGHT - 1]) {
+		if (ida[IFLA_WEIGHT - 1]->rta_len != RTA_LENGTH(sizeof(u32)))
+			goto out;
+
+		dev->weight = *((u32 *) RTA_DATA(ida[IFLA_WEIGHT - 1]));
+	}
+
+	if (ifm->ifi_index >= 0 && ida[IFLA_IFNAME - 1]) {
+		char ifname[IFNAMSIZ];
+
+		if (rtattr_strlcpy(ifname, ida[IFLA_IFNAME - 1],
+		                   IFNAMSIZ) >= IFNAMSIZ)
+			goto out;
+		err = dev_change_name(dev, ifname);
+		if (err)
+			goto out;
+	}
+
+	err = 0;
+
+out:
+	if (send_addr_notify)
+		call_netdevice_notifiers(NETDEV_CHANGEADDR, dev);
+
+	dev_put(dev);
+	return err;
+}
+
+static int rtnetlink_dump_all(struct sk_buff *skb, struct netlink_callback *cb)
+{
+	int idx;
+	int s_idx = cb->family;
+
+	if (s_idx == 0)
+		s_idx = 1;
+	for (idx=1; idx<NPROTO; idx++) {
+		int type = cb->nlh->nlmsg_type-RTM_BASE;
+		if (idx < s_idx || idx == PF_PACKET)
+			continue;
+		if (rtnetlink_links[idx] == NULL ||
+		    rtnetlink_links[idx][type].dumpit == NULL)
+			continue;
+		if (idx > s_idx)
+			memset(&cb->args[0], 0, sizeof(cb->args));
+		if (rtnetlink_links[idx][type].dumpit(skb, cb))
+			break;
+	}
+	cb->family = idx;
+
+	return skb->len;
+}
+
+void rtmsg_ifinfo(int type, struct net_device *dev, unsigned change)
+{
+	struct sk_buff *skb;
+	int size = NLMSG_SPACE(sizeof(struct ifinfomsg) +
+			       sizeof(struct rtnl_link_ifmap) +
+			       sizeof(struct rtnl_link_stats) + 128);
+
+	skb = alloc_skb(size, GFP_KERNEL);
+	if (!skb)
+		return;
+
+	if (rtnetlink_fill_ifinfo(skb, dev, type, 0, 0, change) < 0) {
+		kfree_skb(skb);
+		return;
+	}
+	NETLINK_CB(skb).dst_groups = RTMGRP_LINK;
+	netlink_broadcast(rtnl, skb, 0, RTMGRP_LINK, GFP_KERNEL);
+}
+
+static int rtnetlink_done(struct netlink_callback *cb)
+{
+	return 0;
+}
+
+/* Protected by RTNL sempahore.  */
+static struct rtattr **rta_buf;
+static int rtattr_max;
+
+/* Process one rtnetlink message. */
+
+static __inline__ int
+rtnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh, int *errp)
+{
+	struct rtnetlink_link *link;
+	struct rtnetlink_link *link_tab;
+	int sz_idx, kind;
+	int min_len;
+	int family;
+	int type;
+	int err;
+
+	/* Only requests are handled by kernel now */
+	if (!(nlh->nlmsg_flags&NLM_F_REQUEST))
+		return 0;
+
+	type = nlh->nlmsg_type;
+
+	/* A control message: ignore them */
+	if (type < RTM_BASE)
+		return 0;
+
+	/* Unknown message: reply with EINVAL */
+	if (type > RTM_MAX)
+		goto err_inval;
+
+	type -= RTM_BASE;
+
+	/* All the messages must have at least 1 byte length */
+	if (nlh->nlmsg_len < NLMSG_LENGTH(sizeof(struct rtgenmsg)))
+		return 0;
+
+	family = ((struct rtgenmsg*)NLMSG_DATA(nlh))->rtgen_family;
+	if (family >= NPROTO) {
+		*errp = -EAFNOSUPPORT;
+		return -1;
+	}
+
+	link_tab = rtnetlink_links[family];
+	if (link_tab == NULL)
+		link_tab = rtnetlink_links[PF_UNSPEC];
+	link = &link_tab[type];
+
+	sz_idx = type>>2;
+	kind = type&3;
+
+	if (kind != 2 && security_netlink_recv(skb)) {
+		*errp = -EPERM;
+		return -1;
+	}
+
+	if (kind == 2 && nlh->nlmsg_flags&NLM_F_DUMP) {
+		u32 rlen;
+
+		if (link->dumpit == NULL)
+			link = &(rtnetlink_links[PF_UNSPEC][type]);
+
+		if (link->dumpit == NULL)
+			goto err_inval;
+
+		if ((*errp = netlink_dump_start(rtnl, skb, nlh,
+						link->dumpit,
+						rtnetlink_done)) != 0) {
+			return -1;
+		}
+		rlen = NLMSG_ALIGN(nlh->nlmsg_len);
+		if (rlen > skb->len)
+			rlen = skb->len;
+		skb_pull(skb, rlen);
+		return -1;
+	}
+
+	memset(rta_buf, 0, (rtattr_max * sizeof(struct rtattr *)));
+
+	min_len = rtm_min[sz_idx];
+	if (nlh->nlmsg_len < min_len)
+		goto err_inval;
+
+	if (nlh->nlmsg_len > min_len) {
+		int attrlen = nlh->nlmsg_len - NLMSG_ALIGN(min_len);
+		struct rtattr *attr = (void*)nlh + NLMSG_ALIGN(min_len);
+
+		while (RTA_OK(attr, attrlen)) {
+			unsigned flavor = attr->rta_type;
+			if (flavor) {
+				if (flavor > rta_max[sz_idx])
+					goto err_inval;
+				rta_buf[flavor-1] = attr;
+			}
+			attr = RTA_NEXT(attr, attrlen);
+		}
+	}
+
+	if (link->doit == NULL)
+		link = &(rtnetlink_links[PF_UNSPEC][type]);
+	if (link->doit == NULL)
+		goto err_inval;
+	err = link->doit(skb, nlh, (void *)&rta_buf[0]);
+
+	*errp = err;
+	return err;
+
+err_inval:
+	*errp = -EINVAL;
+	return -1;
+}
+
+/* 
+ * Process one packet of messages.
+ * Malformed skbs with wrong lengths of messages are discarded silently.
+ */
+
+static inline int rtnetlink_rcv_skb(struct sk_buff *skb)
+{
+	int err;
+	struct nlmsghdr * nlh;
+
+	while (skb->len >= NLMSG_SPACE(0)) {
+		u32 rlen;
+
+		nlh = (struct nlmsghdr *)skb->data;
+		if (nlh->nlmsg_len < sizeof(*nlh) || skb->len < nlh->nlmsg_len)
+			return 0;
+		rlen = NLMSG_ALIGN(nlh->nlmsg_len);
+		if (rlen > skb->len)
+			rlen = skb->len;
+		if (rtnetlink_rcv_msg(skb, nlh, &err)) {
+			/* Not error, but we must interrupt processing here:
+			 *   Note, that in this case we do not pull message
+			 *   from skb, it will be processed later.
+			 */
+			if (err == 0)
+				return -1;
+			netlink_ack(skb, nlh, err);
+		} else if (nlh->nlmsg_flags&NLM_F_ACK)
+			netlink_ack(skb, nlh, 0);
+		skb_pull(skb, rlen);
+	}
+
+	return 0;
+}
+
+/*
+ *  rtnetlink input queue processing routine:
+ *	- try to acquire shared lock. If it is failed, defer processing.
+ *	- feed skbs to rtnetlink_rcv_skb, until it refuse a message,
+ *	  that will occur, when a dump started and/or acquisition of
+ *	  exclusive lock failed.
+ */
+
+static void rtnetlink_rcv(struct sock *sk, int len)
+{
+	do {
+		struct sk_buff *skb;
+
+		if (rtnl_shlock_nowait())
+			return;
+
+		while ((skb = skb_dequeue(&sk->sk_receive_queue)) != NULL) {
+			if (rtnetlink_rcv_skb(skb)) {
+				if (skb->len)
+					skb_queue_head(&sk->sk_receive_queue,
+						       skb);
+				else
+					kfree_skb(skb);
+				break;
+			}
+			kfree_skb(skb);
+		}
+
+		up(&rtnl_sem);
+
+		netdev_run_todo();
+	} while (rtnl && rtnl->sk_receive_queue.qlen);
+}
+
+static struct rtnetlink_link link_rtnetlink_table[RTM_MAX-RTM_BASE+1] =
+{
+	[RTM_GETLINK  - RTM_BASE] = { .dumpit = rtnetlink_dump_ifinfo },
+	[RTM_SETLINK  - RTM_BASE] = { .doit   = do_setlink	      },
+	[RTM_GETADDR  - RTM_BASE] = { .dumpit = rtnetlink_dump_all    },
+	[RTM_GETROUTE - RTM_BASE] = { .dumpit = rtnetlink_dump_all    },
+	[RTM_NEWNEIGH - RTM_BASE] = { .doit   = neigh_add	      },
+	[RTM_DELNEIGH - RTM_BASE] = { .doit   = neigh_delete	      },
+	[RTM_GETNEIGH - RTM_BASE] = { .dumpit = neigh_dump_info	      }
+};
+
+static int rtnetlink_event(struct notifier_block *this, unsigned long event, void *ptr)
+{
+	struct net_device *dev = ptr;
+	switch (event) {
+	case NETDEV_UNREGISTER:
+		rtmsg_ifinfo(RTM_DELLINK, dev, ~0U);
+		break;
+	case NETDEV_REGISTER:
+		rtmsg_ifinfo(RTM_NEWLINK, dev, ~0U);
+		break;
+	case NETDEV_UP:
+	case NETDEV_DOWN:
+		rtmsg_ifinfo(RTM_NEWLINK, dev, IFF_UP|IFF_RUNNING);
+		break;
+	case NETDEV_CHANGE:
+	case NETDEV_GOING_DOWN:
+		break;
+	default:
+		rtmsg_ifinfo(RTM_NEWLINK, dev, 0);
+		break;
+	}
+	return NOTIFY_DONE;
+}
+
+static struct notifier_block rtnetlink_dev_notifier = {
+	.notifier_call	= rtnetlink_event,
+};
+
+void __init rtnetlink_init(void)
+{
+	int i;
+
+	rtattr_max = 0;
+	for (i = 0; i < ARRAY_SIZE(rta_max); i++)
+		if (rta_max[i] > rtattr_max)
+			rtattr_max = rta_max[i];
+	rta_buf = kmalloc(rtattr_max * sizeof(struct rtattr *), GFP_KERNEL);
+	if (!rta_buf)
+		panic("rtnetlink_init: cannot allocate rta_buf\n");
+
+	rtnl = netlink_kernel_create(NETLINK_ROUTE, rtnetlink_rcv);
+	if (rtnl == NULL)
+		panic("rtnetlink_init: cannot initialize rtnetlink\n");
+	netlink_set_nonroot(NETLINK_ROUTE, NL_NONROOT_RECV);
+	register_netdevice_notifier(&rtnetlink_dev_notifier);
+	rtnetlink_links[PF_UNSPEC] = link_rtnetlink_table;
+	rtnetlink_links[PF_PACKET] = link_rtnetlink_table;
+}
+
+EXPORT_SYMBOL(__rta_fill);
+EXPORT_SYMBOL(rtattr_strlcpy);
+EXPORT_SYMBOL(rtattr_parse);
+EXPORT_SYMBOL(rtnetlink_links);
+EXPORT_SYMBOL(rtnetlink_put_metrics);
+EXPORT_SYMBOL(rtnl);
+EXPORT_SYMBOL(rtnl_lock);
+EXPORT_SYMBOL(rtnl_lock_interruptible);
+EXPORT_SYMBOL(rtnl_sem);
+EXPORT_SYMBOL(rtnl_unlock);
diff --git a/net/core/scm.c b/net/core/scm.c
new file mode 100644
index 00000000000..a2ebf30f6aa
--- /dev/null
+++ b/net/core/scm.c
@@ -0,0 +1,291 @@
+/* scm.c - Socket level control messages processing.
+ *
+ * Author:	Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
+ *              Alignment and value checking mods by Craig Metz
+ *
+ *		This program is free software; you can redistribute it and/or
+ *		modify it under the terms of the GNU General Public License
+ *		as published by the Free Software Foundation; either version
+ *		2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/module.h>
+#include <linux/signal.h>
+#include <linux/errno.h>
+#include <linux/sched.h>
+#include <linux/mm.h>
+#include <linux/kernel.h>
+#include <linux/major.h>
+#include <linux/stat.h>
+#include <linux/socket.h>
+#include <linux/file.h>
+#include <linux/fcntl.h>
+#include <linux/net.h>
+#include <linux/interrupt.h>
+#include <linux/netdevice.h>
+#include <linux/security.h>
+
+#include <asm/system.h>
+#include <asm/uaccess.h>
+
+#include <net/protocol.h>
+#include <linux/skbuff.h>
+#include <net/sock.h>
+#include <net/compat.h>
+#include <net/scm.h>
+
+
+/*
+ *	Only allow a user to send credentials, that they could set with 
+ *	setu(g)id.
+ */
+
+static __inline__ int scm_check_creds(struct ucred *creds)
+{
+	if ((creds->pid == current->tgid || capable(CAP_SYS_ADMIN)) &&
+	    ((creds->uid == current->uid || creds->uid == current->euid ||
+	      creds->uid == current->suid) || capable(CAP_SETUID)) &&
+	    ((creds->gid == current->gid || creds->gid == current->egid ||
+	      creds->gid == current->sgid) || capable(CAP_SETGID))) {
+	       return 0;
+	}
+	return -EPERM;
+}
+
+static int scm_fp_copy(struct cmsghdr *cmsg, struct scm_fp_list **fplp)
+{
+	int *fdp = (int*)CMSG_DATA(cmsg);
+	struct scm_fp_list *fpl = *fplp;
+	struct file **fpp;
+	int i, num;
+
+	num = (cmsg->cmsg_len - CMSG_ALIGN(sizeof(struct cmsghdr)))/sizeof(int);
+
+	if (num <= 0)
+		return 0;
+
+	if (num > SCM_MAX_FD)
+		return -EINVAL;
+
+	if (!fpl)
+	{
+		fpl = kmalloc(sizeof(struct scm_fp_list), GFP_KERNEL);
+		if (!fpl)
+			return -ENOMEM;
+		*fplp = fpl;
+		fpl->count = 0;
+	}
+	fpp = &fpl->fp[fpl->count];
+
+	if (fpl->count + num > SCM_MAX_FD)
+		return -EINVAL;
+	
+	/*
+	 *	Verify the descriptors and increment the usage count.
+	 */
+	 
+	for (i=0; i< num; i++)
+	{
+		int fd = fdp[i];
+		struct file *file;
+
+		if (fd < 0 || !(file = fget(fd)))
+			return -EBADF;
+		*fpp++ = file;
+		fpl->count++;
+	}
+	return num;
+}
+
+void __scm_destroy(struct scm_cookie *scm)
+{
+	struct scm_fp_list *fpl = scm->fp;
+	int i;
+
+	if (fpl) {
+		scm->fp = NULL;
+		for (i=fpl->count-1; i>=0; i--)
+			fput(fpl->fp[i]);
+		kfree(fpl);
+	}
+}
+
+int __scm_send(struct socket *sock, struct msghdr *msg, struct scm_cookie *p)
+{
+	struct cmsghdr *cmsg;
+	int err;
+
+	for (cmsg = CMSG_FIRSTHDR(msg); cmsg; cmsg = CMSG_NXTHDR(msg, cmsg))
+	{
+		err = -EINVAL;
+
+		/* Verify that cmsg_len is at least sizeof(struct cmsghdr) */
+		/* The first check was omitted in <= 2.2.5. The reasoning was
+		   that parser checks cmsg_len in any case, so that
+		   additional check would be work duplication.
+		   But if cmsg_level is not SOL_SOCKET, we do not check 
+		   for too short ancillary data object at all! Oops.
+		   OK, let's add it...
+		 */
+		if (!CMSG_OK(msg, cmsg))
+			goto error;
+
+		if (cmsg->cmsg_level != SOL_SOCKET)
+			continue;
+
+		switch (cmsg->cmsg_type)
+		{
+		case SCM_RIGHTS:
+			err=scm_fp_copy(cmsg, &p->fp);
+			if (err<0)
+				goto error;
+			break;
+		case SCM_CREDENTIALS:
+			if (cmsg->cmsg_len != CMSG_LEN(sizeof(struct ucred)))
+				goto error;
+			memcpy(&p->creds, CMSG_DATA(cmsg), sizeof(struct ucred));
+			err = scm_check_creds(&p->creds);
+			if (err)
+				goto error;
+			break;
+		default:
+			goto error;
+		}
+	}
+
+	if (p->fp && !p->fp->count)
+	{
+		kfree(p->fp);
+		p->fp = NULL;
+	}
+	return 0;
+	
+error:
+	scm_destroy(p);
+	return err;
+}
+
+int put_cmsg(struct msghdr * msg, int level, int type, int len, void *data)
+{
+	struct cmsghdr __user *cm = (struct cmsghdr __user *)msg->msg_control;
+	struct cmsghdr cmhdr;
+	int cmlen = CMSG_LEN(len);
+	int err;
+
+	if (MSG_CMSG_COMPAT & msg->msg_flags)
+		return put_cmsg_compat(msg, level, type, len, data);
+
+	if (cm==NULL || msg->msg_controllen < sizeof(*cm)) {
+		msg->msg_flags |= MSG_CTRUNC;
+		return 0; /* XXX: return error? check spec. */
+	}
+	if (msg->msg_controllen < cmlen) {
+		msg->msg_flags |= MSG_CTRUNC;
+		cmlen = msg->msg_controllen;
+	}
+	cmhdr.cmsg_level = level;
+	cmhdr.cmsg_type = type;
+	cmhdr.cmsg_len = cmlen;
+
+	err = -EFAULT;
+	if (copy_to_user(cm, &cmhdr, sizeof cmhdr))
+		goto out; 
+	if (copy_to_user(CMSG_DATA(cm), data, cmlen - sizeof(struct cmsghdr)))
+		goto out;
+	cmlen = CMSG_SPACE(len);
+	msg->msg_control += cmlen;
+	msg->msg_controllen -= cmlen;
+	err = 0;
+out:
+	return err;
+}
+
+void scm_detach_fds(struct msghdr *msg, struct scm_cookie *scm)
+{
+	struct cmsghdr __user *cm = (struct cmsghdr __user*)msg->msg_control;
+
+	int fdmax = 0;
+	int fdnum = scm->fp->count;
+	struct file **fp = scm->fp->fp;
+	int __user *cmfptr;
+	int err = 0, i;
+
+	if (MSG_CMSG_COMPAT & msg->msg_flags) {
+		scm_detach_fds_compat(msg, scm);
+		return;
+	}
+
+	if (msg->msg_controllen > sizeof(struct cmsghdr))
+		fdmax = ((msg->msg_controllen - sizeof(struct cmsghdr))
+			 / sizeof(int));
+
+	if (fdnum < fdmax)
+		fdmax = fdnum;
+
+	for (i=0, cmfptr=(int __user *)CMSG_DATA(cm); i<fdmax; i++, cmfptr++)
+	{
+		int new_fd;
+		err = security_file_receive(fp[i]);
+		if (err)
+			break;
+		err = get_unused_fd();
+		if (err < 0)
+			break;
+		new_fd = err;
+		err = put_user(new_fd, cmfptr);
+		if (err) {
+			put_unused_fd(new_fd);
+			break;
+		}
+		/* Bump the usage count and install the file. */
+		get_file(fp[i]);
+		fd_install(new_fd, fp[i]);
+	}
+
+	if (i > 0)
+	{
+		int cmlen = CMSG_LEN(i*sizeof(int));
+		if (!err)
+			err = put_user(SOL_SOCKET, &cm->cmsg_level);
+		if (!err)
+			err = put_user(SCM_RIGHTS, &cm->cmsg_type);
+		if (!err)
+			err = put_user(cmlen, &cm->cmsg_len);
+		if (!err) {
+			cmlen = CMSG_SPACE(i*sizeof(int));
+			msg->msg_control += cmlen;
+			msg->msg_controllen -= cmlen;
+		}
+	}
+	if (i < fdnum || (fdnum && fdmax <= 0))
+		msg->msg_flags |= MSG_CTRUNC;
+
+	/*
+	 * All of the files that fit in the message have had their
+	 * usage counts incremented, so we just free the list.
+	 */
+	__scm_destroy(scm);
+}
+
+struct scm_fp_list *scm_fp_dup(struct scm_fp_list *fpl)
+{
+	struct scm_fp_list *new_fpl;
+	int i;
+
+	if (!fpl)
+		return NULL;
+
+	new_fpl = kmalloc(sizeof(*fpl), GFP_KERNEL);
+	if (new_fpl) {
+		for (i=fpl->count-1; i>=0; i--)
+			get_file(fpl->fp[i]);
+		memcpy(new_fpl, fpl, sizeof(*fpl));
+	}
+	return new_fpl;
+}
+
+EXPORT_SYMBOL(__scm_destroy);
+EXPORT_SYMBOL(__scm_send);
+EXPORT_SYMBOL(put_cmsg);
+EXPORT_SYMBOL(scm_detach_fds);
+EXPORT_SYMBOL(scm_fp_dup);
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
new file mode 100644
index 00000000000..bf02ca9f80a
--- /dev/null
+++ b/net/core/skbuff.c
@@ -0,0 +1,1460 @@
+/*
+ *	Routines having to do with the 'struct sk_buff' memory handlers.
+ *
+ *	Authors:	Alan Cox <iiitac@pyr.swan.ac.uk>
+ *			Florian La Roche <rzsfl@rz.uni-sb.de>
+ *
+ *	Version:	$Id: skbuff.c,v 1.90 2001/11/07 05:56:19 davem Exp $
+ *
+ *	Fixes:
+ *		Alan Cox	:	Fixed the worst of the load
+ *					balancer bugs.
+ *		Dave Platt	:	Interrupt stacking fix.
+ *	Richard Kooijman	:	Timestamp fixes.
+ *		Alan Cox	:	Changed buffer format.
+ *		Alan Cox	:	destructor hook for AF_UNIX etc.
+ *		Linus Torvalds	:	Better skb_clone.
+ *		Alan Cox	:	Added skb_copy.
+ *		Alan Cox	:	Added all the changed routines Linus
+ *					only put in the headers
+ *		Ray VanTassle	:	Fixed --skb->lock in free
+ *		Alan Cox	:	skb_copy copy arp field
+ *		Andi Kleen	:	slabified it.
+ *		Robert Olsson	:	Removed skb_head_pool
+ *
+ *	NOTE:
+ *		The __skb_ routines should be called with interrupts
+ *	disabled, or you better be *real* sure that the operation is atomic
+ *	with respect to whatever list is being frobbed (e.g. via lock_sock()
+ *	or via disabling bottom half handlers, etc).
+ *
+ *	This program is free software; you can redistribute it and/or
+ *	modify it under the terms of the GNU General Public License
+ *	as published by the Free Software Foundation; either version
+ *	2 of the License, or (at your option) any later version.
+ */
+
+/*
+ *	The functions in this file will not compile correctly with gcc 2.4.x
+ */
+
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/mm.h>
+#include <linux/interrupt.h>
+#include <linux/in.h>
+#include <linux/inet.h>
+#include <linux/slab.h>
+#include <linux/netdevice.h>
+#ifdef CONFIG_NET_CLS_ACT
+#include <net/pkt_sched.h>
+#endif
+#include <linux/string.h>
+#include <linux/skbuff.h>
+#include <linux/cache.h>
+#include <linux/rtnetlink.h>
+#include <linux/init.h>
+#include <linux/highmem.h>
+
+#include <net/protocol.h>
+#include <net/dst.h>
+#include <net/sock.h>
+#include <net/checksum.h>
+#include <net/xfrm.h>
+
+#include <asm/uaccess.h>
+#include <asm/system.h>
+
+static kmem_cache_t *skbuff_head_cache;
+
+/*
+ *	Keep out-of-line to prevent kernel bloat.
+ *	__builtin_return_address is not used because it is not always
+ *	reliable.
+ */
+
+/**
+ *	skb_over_panic	- 	private function
+ *	@skb: buffer
+ *	@sz: size
+ *	@here: address
+ *
+ *	Out of line support code for skb_put(). Not user callable.
+ */
+void skb_over_panic(struct sk_buff *skb, int sz, void *here)
+{
+	printk(KERN_INFO "skput:over: %p:%d put:%d dev:%s",
+		here, skb->len, sz, skb->dev ? skb->dev->name : "<NULL>");
+	BUG();
+}
+
+/**
+ *	skb_under_panic	- 	private function
+ *	@skb: buffer
+ *	@sz: size
+ *	@here: address
+ *
+ *	Out of line support code for skb_push(). Not user callable.
+ */
+
+void skb_under_panic(struct sk_buff *skb, int sz, void *here)
+{
+	printk(KERN_INFO "skput:under: %p:%d put:%d dev:%s",
+               here, skb->len, sz, skb->dev ? skb->dev->name : "<NULL>");
+	BUG();
+}
+
+/* 	Allocate a new skbuff. We do this ourselves so we can fill in a few
+ *	'private' fields and also do memory statistics to find all the
+ *	[BEEP] leaks.
+ *
+ */
+
+/**
+ *	alloc_skb	-	allocate a network buffer
+ *	@size: size to allocate
+ *	@gfp_mask: allocation mask
+ *
+ *	Allocate a new &sk_buff. The returned buffer has no headroom and a
+ *	tail room of size bytes. The object has a reference count of one.
+ *	The return is the buffer. On a failure the return is %NULL.
+ *
+ *	Buffers may only be allocated from interrupts using a @gfp_mask of
+ *	%GFP_ATOMIC.
+ */
+struct sk_buff *alloc_skb(unsigned int size, int gfp_mask)
+{
+	struct sk_buff *skb;
+	u8 *data;
+
+	/* Get the HEAD */
+	skb = kmem_cache_alloc(skbuff_head_cache,
+			       gfp_mask & ~__GFP_DMA);
+	if (!skb)
+		goto out;
+
+	/* Get the DATA. Size must match skb_add_mtu(). */
+	size = SKB_DATA_ALIGN(size);
+	data = kmalloc(size + sizeof(struct skb_shared_info), gfp_mask);
+	if (!data)
+		goto nodata;
+
+	memset(skb, 0, offsetof(struct sk_buff, truesize));
+	skb->truesize = size + sizeof(struct sk_buff);
+	atomic_set(&skb->users, 1);
+	skb->head = data;
+	skb->data = data;
+	skb->tail = data;
+	skb->end  = data + size;
+
+	atomic_set(&(skb_shinfo(skb)->dataref), 1);
+	skb_shinfo(skb)->nr_frags  = 0;
+	skb_shinfo(skb)->tso_size = 0;
+	skb_shinfo(skb)->tso_segs = 0;
+	skb_shinfo(skb)->frag_list = NULL;
+out:
+	return skb;
+nodata:
+	kmem_cache_free(skbuff_head_cache, skb);
+	skb = NULL;
+	goto out;
+}
+
+/**
+ *	alloc_skb_from_cache	-	allocate a network buffer
+ *	@cp: kmem_cache from which to allocate the data area
+ *           (object size must be big enough for @size bytes + skb overheads)
+ *	@size: size to allocate
+ *	@gfp_mask: allocation mask
+ *
+ *	Allocate a new &sk_buff. The returned buffer has no headroom and
+ *	tail room of size bytes. The object has a reference count of one.
+ *	The return is the buffer. On a failure the return is %NULL.
+ *
+ *	Buffers may only be allocated from interrupts using a @gfp_mask of
+ *	%GFP_ATOMIC.
+ */
+struct sk_buff *alloc_skb_from_cache(kmem_cache_t *cp,
+				     unsigned int size, int gfp_mask)
+{
+	struct sk_buff *skb;
+	u8 *data;
+
+	/* Get the HEAD */
+	skb = kmem_cache_alloc(skbuff_head_cache,
+			       gfp_mask & ~__GFP_DMA);
+	if (!skb)
+		goto out;
+
+	/* Get the DATA. */
+	size = SKB_DATA_ALIGN(size);
+	data = kmem_cache_alloc(cp, gfp_mask);
+	if (!data)
+		goto nodata;
+
+	memset(skb, 0, offsetof(struct sk_buff, truesize));
+	skb->truesize = size + sizeof(struct sk_buff);
+	atomic_set(&skb->users, 1);
+	skb->head = data;
+	skb->data = data;
+	skb->tail = data;
+	skb->end  = data + size;
+
+	atomic_set(&(skb_shinfo(skb)->dataref), 1);
+	skb_shinfo(skb)->nr_frags  = 0;
+	skb_shinfo(skb)->tso_size = 0;
+	skb_shinfo(skb)->tso_segs = 0;
+	skb_shinfo(skb)->frag_list = NULL;
+out:
+	return skb;
+nodata:
+	kmem_cache_free(skbuff_head_cache, skb);
+	skb = NULL;
+	goto out;
+}
+
+
+static void skb_drop_fraglist(struct sk_buff *skb)
+{
+	struct sk_buff *list = skb_shinfo(skb)->frag_list;
+
+	skb_shinfo(skb)->frag_list = NULL;
+
+	do {
+		struct sk_buff *this = list;
+		list = list->next;
+		kfree_skb(this);
+	} while (list);
+}
+
+static void skb_clone_fraglist(struct sk_buff *skb)
+{
+	struct sk_buff *list;
+
+	for (list = skb_shinfo(skb)->frag_list; list; list = list->next)
+		skb_get(list);
+}
+
+void skb_release_data(struct sk_buff *skb)
+{
+	if (!skb->cloned ||
+	    !atomic_sub_return(skb->nohdr ? (1 << SKB_DATAREF_SHIFT) + 1 : 1,
+			       &skb_shinfo(skb)->dataref)) {
+		if (skb_shinfo(skb)->nr_frags) {
+			int i;
+			for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
+				put_page(skb_shinfo(skb)->frags[i].page);
+		}
+
+		if (skb_shinfo(skb)->frag_list)
+			skb_drop_fraglist(skb);
+
+		kfree(skb->head);
+	}
+}
+
+/*
+ *	Free an skbuff by memory without cleaning the state.
+ */
+void kfree_skbmem(struct sk_buff *skb)
+{
+	skb_release_data(skb);
+	kmem_cache_free(skbuff_head_cache, skb);
+}
+
+/**
+ *	__kfree_skb - private function
+ *	@skb: buffer
+ *
+ *	Free an sk_buff. Release anything attached to the buffer.
+ *	Clean the state. This is an internal helper function. Users should
+ *	always call kfree_skb
+ */
+
+void __kfree_skb(struct sk_buff *skb)
+{
+	if (skb->list) {
+	 	printk(KERN_WARNING "Warning: kfree_skb passed an skb still "
+		       "on a list (from %p).\n", NET_CALLER(skb));
+		BUG();
+	}
+
+	dst_release(skb->dst);
+#ifdef CONFIG_XFRM
+	secpath_put(skb->sp);
+#endif
+	if(skb->destructor) {
+		if (in_irq())
+			printk(KERN_WARNING "Warning: kfree_skb on "
+					    "hard IRQ %p\n", NET_CALLER(skb));
+		skb->destructor(skb);
+	}
+#ifdef CONFIG_NETFILTER
+	nf_conntrack_put(skb->nfct);
+#ifdef CONFIG_BRIDGE_NETFILTER
+	nf_bridge_put(skb->nf_bridge);
+#endif
+#endif
+/* XXX: IS this still necessary? - JHS */
+#ifdef CONFIG_NET_SCHED
+	skb->tc_index = 0;
+#ifdef CONFIG_NET_CLS_ACT
+	skb->tc_verd = 0;
+	skb->tc_classid = 0;
+#endif
+#endif
+
+	kfree_skbmem(skb);
+}
+
+/**
+ *	skb_clone	-	duplicate an sk_buff
+ *	@skb: buffer to clone
+ *	@gfp_mask: allocation priority
+ *
+ *	Duplicate an &sk_buff. The new one is not owned by a socket. Both
+ *	copies share the same packet data but not structure. The new
+ *	buffer has a reference count of 1. If the allocation fails the
+ *	function returns %NULL otherwise the new buffer is returned.
+ *
+ *	If this function is called from an interrupt gfp_mask() must be
+ *	%GFP_ATOMIC.
+ */
+
+struct sk_buff *skb_clone(struct sk_buff *skb, int gfp_mask)
+{
+	struct sk_buff *n = kmem_cache_alloc(skbuff_head_cache, gfp_mask);
+
+	if (!n) 
+		return NULL;
+
+#define C(x) n->x = skb->x
+
+	n->next = n->prev = NULL;
+	n->list = NULL;
+	n->sk = NULL;
+	C(stamp);
+	C(dev);
+	C(real_dev);
+	C(h);
+	C(nh);
+	C(mac);
+	C(dst);
+	dst_clone(skb->dst);
+	C(sp);
+#ifdef CONFIG_INET
+	secpath_get(skb->sp);
+#endif
+	memcpy(n->cb, skb->cb, sizeof(skb->cb));
+	C(len);
+	C(data_len);
+	C(csum);
+	C(local_df);
+	n->cloned = 1;
+	n->nohdr = 0;
+	C(pkt_type);
+	C(ip_summed);
+	C(priority);
+	C(protocol);
+	C(security);
+	n->destructor = NULL;
+#ifdef CONFIG_NETFILTER
+	C(nfmark);
+	C(nfcache);
+	C(nfct);
+	nf_conntrack_get(skb->nfct);
+	C(nfctinfo);
+#ifdef CONFIG_NETFILTER_DEBUG
+	C(nf_debug);
+#endif
+#ifdef CONFIG_BRIDGE_NETFILTER
+	C(nf_bridge);
+	nf_bridge_get(skb->nf_bridge);
+#endif
+#endif /*CONFIG_NETFILTER*/
+#if defined(CONFIG_HIPPI)
+	C(private);
+#endif
+#ifdef CONFIG_NET_SCHED
+	C(tc_index);
+#ifdef CONFIG_NET_CLS_ACT
+	n->tc_verd = SET_TC_VERD(skb->tc_verd,0);
+	n->tc_verd = CLR_TC_OK2MUNGE(skb->tc_verd);
+	n->tc_verd = CLR_TC_MUNGED(skb->tc_verd);
+	C(input_dev);
+	C(tc_classid);
+#endif
+
+#endif
+	C(truesize);
+	atomic_set(&n->users, 1);
+	C(head);
+	C(data);
+	C(tail);
+	C(end);
+
+	atomic_inc(&(skb_shinfo(skb)->dataref));
+	skb->cloned = 1;
+
+	return n;
+}
+
+static void copy_skb_header(struct sk_buff *new, const struct sk_buff *old)
+{
+	/*
+	 *	Shift between the two data areas in bytes
+	 */
+	unsigned long offset = new->data - old->data;
+
+	new->list	= NULL;
+	new->sk		= NULL;
+	new->dev	= old->dev;
+	new->real_dev	= old->real_dev;
+	new->priority	= old->priority;
+	new->protocol	= old->protocol;
+	new->dst	= dst_clone(old->dst);
+#ifdef CONFIG_INET
+	new->sp		= secpath_get(old->sp);
+#endif
+	new->h.raw	= old->h.raw + offset;
+	new->nh.raw	= old->nh.raw + offset;
+	new->mac.raw	= old->mac.raw + offset;
+	memcpy(new->cb, old->cb, sizeof(old->cb));
+	new->local_df	= old->local_df;
+	new->pkt_type	= old->pkt_type;
+	new->stamp	= old->stamp;
+	new->destructor = NULL;
+	new->security	= old->security;
+#ifdef CONFIG_NETFILTER
+	new->nfmark	= old->nfmark;
+	new->nfcache	= old->nfcache;
+	new->nfct	= old->nfct;
+	nf_conntrack_get(old->nfct);
+	new->nfctinfo	= old->nfctinfo;
+#ifdef CONFIG_NETFILTER_DEBUG
+	new->nf_debug	= old->nf_debug;
+#endif
+#ifdef CONFIG_BRIDGE_NETFILTER
+	new->nf_bridge	= old->nf_bridge;
+	nf_bridge_get(old->nf_bridge);
+#endif
+#endif
+#ifdef CONFIG_NET_SCHED
+#ifdef CONFIG_NET_CLS_ACT
+	new->tc_verd = old->tc_verd;
+#endif
+	new->tc_index	= old->tc_index;
+#endif
+	atomic_set(&new->users, 1);
+	skb_shinfo(new)->tso_size = skb_shinfo(old)->tso_size;
+	skb_shinfo(new)->tso_segs = skb_shinfo(old)->tso_segs;
+}
+
+/**
+ *	skb_copy	-	create private copy of an sk_buff
+ *	@skb: buffer to copy
+ *	@gfp_mask: allocation priority
+ *
+ *	Make a copy of both an &sk_buff and its data. This is used when the
+ *	caller wishes to modify the data and needs a private copy of the
+ *	data to alter. Returns %NULL on failure or the pointer to the buffer
+ *	on success. The returned buffer has a reference count of 1.
+ *
+ *	As by-product this function converts non-linear &sk_buff to linear
+ *	one, so that &sk_buff becomes completely private and caller is allowed
+ *	to modify all the data of returned buffer. This means that this
+ *	function is not recommended for use in circumstances when only
+ *	header is going to be modified. Use pskb_copy() instead.
+ */
+
+struct sk_buff *skb_copy(const struct sk_buff *skb, int gfp_mask)
+{
+	int headerlen = skb->data - skb->head;
+	/*
+	 *	Allocate the copy buffer
+	 */
+	struct sk_buff *n = alloc_skb(skb->end - skb->head + skb->data_len,
+				      gfp_mask);
+	if (!n)
+		return NULL;
+
+	/* Set the data pointer */
+	skb_reserve(n, headerlen);
+	/* Set the tail pointer and length */
+	skb_put(n, skb->len);
+	n->csum	     = skb->csum;
+	n->ip_summed = skb->ip_summed;
+
+	if (skb_copy_bits(skb, -headerlen, n->head, headerlen + skb->len))
+		BUG();
+
+	copy_skb_header(n, skb);
+	return n;
+}
+
+
+/**
+ *	pskb_copy	-	create copy of an sk_buff with private head.
+ *	@skb: buffer to copy
+ *	@gfp_mask: allocation priority
+ *
+ *	Make a copy of both an &sk_buff and part of its data, located
+ *	in header. Fragmented data remain shared. This is used when
+ *	the caller wishes to modify only header of &sk_buff and needs
+ *	private copy of the header to alter. Returns %NULL on failure
+ *	or the pointer to the buffer on success.
+ *	The returned buffer has a reference count of 1.
+ */
+
+struct sk_buff *pskb_copy(struct sk_buff *skb, int gfp_mask)
+{
+	/*
+	 *	Allocate the copy buffer
+	 */
+	struct sk_buff *n = alloc_skb(skb->end - skb->head, gfp_mask);
+
+	if (!n)
+		goto out;
+
+	/* Set the data pointer */
+	skb_reserve(n, skb->data - skb->head);
+	/* Set the tail pointer and length */
+	skb_put(n, skb_headlen(skb));
+	/* Copy the bytes */
+	memcpy(n->data, skb->data, n->len);
+	n->csum	     = skb->csum;
+	n->ip_summed = skb->ip_summed;
+
+	n->data_len  = skb->data_len;
+	n->len	     = skb->len;
+
+	if (skb_shinfo(skb)->nr_frags) {
+		int i;
+
+		for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
+			skb_shinfo(n)->frags[i] = skb_shinfo(skb)->frags[i];
+			get_page(skb_shinfo(n)->frags[i].page);
+		}
+		skb_shinfo(n)->nr_frags = i;
+	}
+
+	if (skb_shinfo(skb)->frag_list) {
+		skb_shinfo(n)->frag_list = skb_shinfo(skb)->frag_list;
+		skb_clone_fraglist(n);
+	}
+
+	copy_skb_header(n, skb);
+out:
+	return n;
+}
+
+/**
+ *	pskb_expand_head - reallocate header of &sk_buff
+ *	@skb: buffer to reallocate
+ *	@nhead: room to add at head
+ *	@ntail: room to add at tail
+ *	@gfp_mask: allocation priority
+ *
+ *	Expands (or creates identical copy, if &nhead and &ntail are zero)
+ *	header of skb. &sk_buff itself is not changed. &sk_buff MUST have
+ *	reference count of 1. Returns zero in the case of success or error,
+ *	if expansion failed. In the last case, &sk_buff is not changed.
+ *
+ *	All the pointers pointing into skb header may change and must be
+ *	reloaded after call to this function.
+ */
+
+int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail, int gfp_mask)
+{
+	int i;
+	u8 *data;
+	int size = nhead + (skb->end - skb->head) + ntail;
+	long off;
+
+	if (skb_shared(skb))
+		BUG();
+
+	size = SKB_DATA_ALIGN(size);
+
+	data = kmalloc(size + sizeof(struct skb_shared_info), gfp_mask);
+	if (!data)
+		goto nodata;
+
+	/* Copy only real data... and, alas, header. This should be
+	 * optimized for the cases when header is void. */
+	memcpy(data + nhead, skb->head, skb->tail - skb->head);
+	memcpy(data + size, skb->end, sizeof(struct skb_shared_info));
+
+	for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
+		get_page(skb_shinfo(skb)->frags[i].page);
+
+	if (skb_shinfo(skb)->frag_list)
+		skb_clone_fraglist(skb);
+
+	skb_release_data(skb);
+
+	off = (data + nhead) - skb->head;
+
+	skb->head     = data;
+	skb->end      = data + size;
+	skb->data    += off;
+	skb->tail    += off;
+	skb->mac.raw += off;
+	skb->h.raw   += off;
+	skb->nh.raw  += off;
+	skb->cloned   = 0;
+	skb->nohdr    = 0;
+	atomic_set(&skb_shinfo(skb)->dataref, 1);
+	return 0;
+
+nodata:
+	return -ENOMEM;
+}
+
+/* Make private copy of skb with writable head and some headroom */
+
+struct sk_buff *skb_realloc_headroom(struct sk_buff *skb, unsigned int headroom)
+{
+	struct sk_buff *skb2;
+	int delta = headroom - skb_headroom(skb);
+
+	if (delta <= 0)
+		skb2 = pskb_copy(skb, GFP_ATOMIC);
+	else {
+		skb2 = skb_clone(skb, GFP_ATOMIC);
+		if (skb2 && pskb_expand_head(skb2, SKB_DATA_ALIGN(delta), 0,
+					     GFP_ATOMIC)) {
+			kfree_skb(skb2);
+			skb2 = NULL;
+		}
+	}
+	return skb2;
+}
+
+
+/**
+ *	skb_copy_expand	-	copy and expand sk_buff
+ *	@skb: buffer to copy
+ *	@newheadroom: new free bytes at head
+ *	@newtailroom: new free bytes at tail
+ *	@gfp_mask: allocation priority
+ *
+ *	Make a copy of both an &sk_buff and its data and while doing so
+ *	allocate additional space.
+ *
+ *	This is used when the caller wishes to modify the data and needs a
+ *	private copy of the data to alter as well as more space for new fields.
+ *	Returns %NULL on failure or the pointer to the buffer
+ *	on success. The returned buffer has a reference count of 1.
+ *
+ *	You must pass %GFP_ATOMIC as the allocation priority if this function
+ *	is called from an interrupt.
+ *
+ *	BUG ALERT: ip_summed is not copied. Why does this work? Is it used
+ *	only by netfilter in the cases when checksum is recalculated? --ANK
+ */
+struct sk_buff *skb_copy_expand(const struct sk_buff *skb,
+				int newheadroom, int newtailroom, int gfp_mask)
+{
+	/*
+	 *	Allocate the copy buffer
+	 */
+	struct sk_buff *n = alloc_skb(newheadroom + skb->len + newtailroom,
+				      gfp_mask);
+	int head_copy_len, head_copy_off;
+
+	if (!n)
+		return NULL;
+
+	skb_reserve(n, newheadroom);
+
+	/* Set the tail pointer and length */
+	skb_put(n, skb->len);
+
+	head_copy_len = skb_headroom(skb);
+	head_copy_off = 0;
+	if (newheadroom <= head_copy_len)
+		head_copy_len = newheadroom;
+	else
+		head_copy_off = newheadroom - head_copy_len;
+
+	/* Copy the linear header and data. */
+	if (skb_copy_bits(skb, -head_copy_len, n->head + head_copy_off,
+			  skb->len + head_copy_len))
+		BUG();
+
+	copy_skb_header(n, skb);
+
+	return n;
+}
+
+/**
+ *	skb_pad			-	zero pad the tail of an skb
+ *	@skb: buffer to pad
+ *	@pad: space to pad
+ *
+ *	Ensure that a buffer is followed by a padding area that is zero
+ *	filled. Used by network drivers which may DMA or transfer data
+ *	beyond the buffer end onto the wire.
+ *
+ *	May return NULL in out of memory cases.
+ */
+ 
+struct sk_buff *skb_pad(struct sk_buff *skb, int pad)
+{
+	struct sk_buff *nskb;
+	
+	/* If the skbuff is non linear tailroom is always zero.. */
+	if (skb_tailroom(skb) >= pad) {
+		memset(skb->data+skb->len, 0, pad);
+		return skb;
+	}
+	
+	nskb = skb_copy_expand(skb, skb_headroom(skb), skb_tailroom(skb) + pad, GFP_ATOMIC);
+	kfree_skb(skb);
+	if (nskb)
+		memset(nskb->data+nskb->len, 0, pad);
+	return nskb;
+}	
+ 
+/* Trims skb to length len. It can change skb pointers, if "realloc" is 1.
+ * If realloc==0 and trimming is impossible without change of data,
+ * it is BUG().
+ */
+
+int ___pskb_trim(struct sk_buff *skb, unsigned int len, int realloc)
+{
+	int offset = skb_headlen(skb);
+	int nfrags = skb_shinfo(skb)->nr_frags;
+	int i;
+
+	for (i = 0; i < nfrags; i++) {
+		int end = offset + skb_shinfo(skb)->frags[i].size;
+		if (end > len) {
+			if (skb_cloned(skb)) {
+				if (!realloc)
+					BUG();
+				if (pskb_expand_head(skb, 0, 0, GFP_ATOMIC))
+					return -ENOMEM;
+			}
+			if (len <= offset) {
+				put_page(skb_shinfo(skb)->frags[i].page);
+				skb_shinfo(skb)->nr_frags--;
+			} else {
+				skb_shinfo(skb)->frags[i].size = len - offset;
+			}
+		}
+		offset = end;
+	}
+
+	if (offset < len) {
+		skb->data_len -= skb->len - len;
+		skb->len       = len;
+	} else {
+		if (len <= skb_headlen(skb)) {
+			skb->len      = len;
+			skb->data_len = 0;
+			skb->tail     = skb->data + len;
+			if (skb_shinfo(skb)->frag_list && !skb_cloned(skb))
+				skb_drop_fraglist(skb);
+		} else {
+			skb->data_len -= skb->len - len;
+			skb->len       = len;
+		}
+	}
+
+	return 0;
+}
+
+/**
+ *	__pskb_pull_tail - advance tail of skb header
+ *	@skb: buffer to reallocate
+ *	@delta: number of bytes to advance tail
+ *
+ *	The function makes a sense only on a fragmented &sk_buff,
+ *	it expands header moving its tail forward and copying necessary
+ *	data from fragmented part.
+ *
+ *	&sk_buff MUST have reference count of 1.
+ *
+ *	Returns %NULL (and &sk_buff does not change) if pull failed
+ *	or value of new tail of skb in the case of success.
+ *
+ *	All the pointers pointing into skb header may change and must be
+ *	reloaded after call to this function.
+ */
+
+/* Moves tail of skb head forward, copying data from fragmented part,
+ * when it is necessary.
+ * 1. It may fail due to malloc failure.
+ * 2. It may change skb pointers.
+ *
+ * It is pretty complicated. Luckily, it is called only in exceptional cases.
+ */
+unsigned char *__pskb_pull_tail(struct sk_buff *skb, int delta)
+{
+	/* If skb has not enough free space at tail, get new one
+	 * plus 128 bytes for future expansions. If we have enough
+	 * room at tail, reallocate without expansion only if skb is cloned.
+	 */
+	int i, k, eat = (skb->tail + delta) - skb->end;
+
+	if (eat > 0 || skb_cloned(skb)) {
+		if (pskb_expand_head(skb, 0, eat > 0 ? eat + 128 : 0,
+				     GFP_ATOMIC))
+			return NULL;
+	}
+
+	if (skb_copy_bits(skb, skb_headlen(skb), skb->tail, delta))
+		BUG();
+
+	/* Optimization: no fragments, no reasons to preestimate
+	 * size of pulled pages. Superb.
+	 */
+	if (!skb_shinfo(skb)->frag_list)
+		goto pull_pages;
+
+	/* Estimate size of pulled pages. */
+	eat = delta;
+	for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
+		if (skb_shinfo(skb)->frags[i].size >= eat)
+			goto pull_pages;
+		eat -= skb_shinfo(skb)->frags[i].size;
+	}
+
+	/* If we need update frag list, we are in troubles.
+	 * Certainly, it possible to add an offset to skb data,
+	 * but taking into account that pulling is expected to
+	 * be very rare operation, it is worth to fight against
+	 * further bloating skb head and crucify ourselves here instead.
+	 * Pure masohism, indeed. 8)8)
+	 */
+	if (eat) {
+		struct sk_buff *list = skb_shinfo(skb)->frag_list;
+		struct sk_buff *clone = NULL;
+		struct sk_buff *insp = NULL;
+
+		do {
+			if (!list)
+				BUG();
+
+			if (list->len <= eat) {
+				/* Eaten as whole. */
+				eat -= list->len;
+				list = list->next;
+				insp = list;
+			} else {
+				/* Eaten partially. */
+
+				if (skb_shared(list)) {
+					/* Sucks! We need to fork list. :-( */
+					clone = skb_clone(list, GFP_ATOMIC);
+					if (!clone)
+						return NULL;
+					insp = list->next;
+					list = clone;
+				} else {
+					/* This may be pulled without
+					 * problems. */
+					insp = list;
+				}
+				if (!pskb_pull(list, eat)) {
+					if (clone)
+						kfree_skb(clone);
+					return NULL;
+				}
+				break;
+			}
+		} while (eat);
+
+		/* Free pulled out fragments. */
+		while ((list = skb_shinfo(skb)->frag_list) != insp) {
+			skb_shinfo(skb)->frag_list = list->next;
+			kfree_skb(list);
+		}
+		/* And insert new clone at head. */
+		if (clone) {
+			clone->next = list;
+			skb_shinfo(skb)->frag_list = clone;
+		}
+	}
+	/* Success! Now we may commit changes to skb data. */
+
+pull_pages:
+	eat = delta;
+	k = 0;
+	for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
+		if (skb_shinfo(skb)->frags[i].size <= eat) {
+			put_page(skb_shinfo(skb)->frags[i].page);
+			eat -= skb_shinfo(skb)->frags[i].size;
+		} else {
+			skb_shinfo(skb)->frags[k] = skb_shinfo(skb)->frags[i];
+			if (eat) {
+				skb_shinfo(skb)->frags[k].page_offset += eat;
+				skb_shinfo(skb)->frags[k].size -= eat;
+				eat = 0;
+			}
+			k++;
+		}
+	}
+	skb_shinfo(skb)->nr_frags = k;
+
+	skb->tail     += delta;
+	skb->data_len -= delta;
+
+	return skb->tail;
+}
+
+/* Copy some data bits from skb to kernel buffer. */
+
+int skb_copy_bits(const struct sk_buff *skb, int offset, void *to, int len)
+{
+	int i, copy;
+	int start = skb_headlen(skb);
+
+	if (offset > (int)skb->len - len)
+		goto fault;
+
+	/* Copy header. */
+	if ((copy = start - offset) > 0) {
+		if (copy > len)
+			copy = len;
+		memcpy(to, skb->data + offset, copy);
+		if ((len -= copy) == 0)
+			return 0;
+		offset += copy;
+		to     += copy;
+	}
+
+	for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
+		int end;
+
+		BUG_TRAP(start <= offset + len);
+
+		end = start + skb_shinfo(skb)->frags[i].size;
+		if ((copy = end - offset) > 0) {
+			u8 *vaddr;
+
+			if (copy > len)
+				copy = len;
+
+			vaddr = kmap_skb_frag(&skb_shinfo(skb)->frags[i]);
+			memcpy(to,
+			       vaddr + skb_shinfo(skb)->frags[i].page_offset+
+			       offset - start, copy);
+			kunmap_skb_frag(vaddr);
+
+			if ((len -= copy) == 0)
+				return 0;
+			offset += copy;
+			to     += copy;
+		}
+		start = end;
+	}
+
+	if (skb_shinfo(skb)->frag_list) {
+		struct sk_buff *list = skb_shinfo(skb)->frag_list;
+
+		for (; list; list = list->next) {
+			int end;
+
+			BUG_TRAP(start <= offset + len);
+
+			end = start + list->len;
+			if ((copy = end - offset) > 0) {
+				if (copy > len)
+					copy = len;
+				if (skb_copy_bits(list, offset - start,
+						  to, copy))
+					goto fault;
+				if ((len -= copy) == 0)
+					return 0;
+				offset += copy;
+				to     += copy;
+			}
+			start = end;
+		}
+	}
+	if (!len)
+		return 0;
+
+fault:
+	return -EFAULT;
+}
+
+/* Checksum skb data. */
+
+unsigned int skb_checksum(const struct sk_buff *skb, int offset,
+			  int len, unsigned int csum)
+{
+	int start = skb_headlen(skb);
+	int i, copy = start - offset;
+	int pos = 0;
+
+	/* Checksum header. */
+	if (copy > 0) {
+		if (copy > len)
+			copy = len;
+		csum = csum_partial(skb->data + offset, copy, csum);
+		if ((len -= copy) == 0)
+			return csum;
+		offset += copy;
+		pos	= copy;
+	}
+
+	for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
+		int end;
+
+		BUG_TRAP(start <= offset + len);
+
+		end = start + skb_shinfo(skb)->frags[i].size;
+		if ((copy = end - offset) > 0) {
+			unsigned int csum2;
+			u8 *vaddr;
+			skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
+
+			if (copy > len)
+				copy = len;
+			vaddr = kmap_skb_frag(frag);
+			csum2 = csum_partial(vaddr + frag->page_offset +
+					     offset - start, copy, 0);
+			kunmap_skb_frag(vaddr);
+			csum = csum_block_add(csum, csum2, pos);
+			if (!(len -= copy))
+				return csum;
+			offset += copy;
+			pos    += copy;
+		}
+		start = end;
+	}
+
+	if (skb_shinfo(skb)->frag_list) {
+		struct sk_buff *list = skb_shinfo(skb)->frag_list;
+
+		for (; list; list = list->next) {
+			int end;
+
+			BUG_TRAP(start <= offset + len);
+
+			end = start + list->len;
+			if ((copy = end - offset) > 0) {
+				unsigned int csum2;
+				if (copy > len)
+					copy = len;
+				csum2 = skb_checksum(list, offset - start,
+						     copy, 0);
+				csum = csum_block_add(csum, csum2, pos);
+				if ((len -= copy) == 0)
+					return csum;
+				offset += copy;
+				pos    += copy;
+			}
+			start = end;
+		}
+	}
+	if (len)
+		BUG();
+
+	return csum;
+}
+
+/* Both of above in one bottle. */
+
+unsigned int skb_copy_and_csum_bits(const struct sk_buff *skb, int offset,
+				    u8 *to, int len, unsigned int csum)
+{
+	int start = skb_headlen(skb);
+	int i, copy = start - offset;
+	int pos = 0;
+
+	/* Copy header. */
+	if (copy > 0) {
+		if (copy > len)
+			copy = len;
+		csum = csum_partial_copy_nocheck(skb->data + offset, to,
+						 copy, csum);
+		if ((len -= copy) == 0)
+			return csum;
+		offset += copy;
+		to     += copy;
+		pos	= copy;
+	}
+
+	for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
+		int end;
+
+		BUG_TRAP(start <= offset + len);
+
+		end = start + skb_shinfo(skb)->frags[i].size;
+		if ((copy = end - offset) > 0) {
+			unsigned int csum2;
+			u8 *vaddr;
+			skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
+
+			if (copy > len)
+				copy = len;
+			vaddr = kmap_skb_frag(frag);
+			csum2 = csum_partial_copy_nocheck(vaddr +
+							  frag->page_offset +
+							  offset - start, to,
+							  copy, 0);
+			kunmap_skb_frag(vaddr);
+			csum = csum_block_add(csum, csum2, pos);
+			if (!(len -= copy))
+				return csum;
+			offset += copy;
+			to     += copy;
+			pos    += copy;
+		}
+		start = end;
+	}
+
+	if (skb_shinfo(skb)->frag_list) {
+		struct sk_buff *list = skb_shinfo(skb)->frag_list;
+
+		for (; list; list = list->next) {
+			unsigned int csum2;
+			int end;
+
+			BUG_TRAP(start <= offset + len);
+
+			end = start + list->len;
+			if ((copy = end - offset) > 0) {
+				if (copy > len)
+					copy = len;
+				csum2 = skb_copy_and_csum_bits(list,
+							       offset - start,
+							       to, copy, 0);
+				csum = csum_block_add(csum, csum2, pos);
+				if ((len -= copy) == 0)
+					return csum;
+				offset += copy;
+				to     += copy;
+				pos    += copy;
+			}
+			start = end;
+		}
+	}
+	if (len)
+		BUG();
+	return csum;
+}
+
+void skb_copy_and_csum_dev(const struct sk_buff *skb, u8 *to)
+{
+	unsigned int csum;
+	long csstart;
+
+	if (skb->ip_summed == CHECKSUM_HW)
+		csstart = skb->h.raw - skb->data;
+	else
+		csstart = skb_headlen(skb);
+
+	if (csstart > skb_headlen(skb))
+		BUG();
+
+	memcpy(to, skb->data, csstart);
+
+	csum = 0;
+	if (csstart != skb->len)
+		csum = skb_copy_and_csum_bits(skb, csstart, to + csstart,
+					      skb->len - csstart, 0);
+
+	if (skb->ip_summed == CHECKSUM_HW) {
+		long csstuff = csstart + skb->csum;
+
+		*((unsigned short *)(to + csstuff)) = csum_fold(csum);
+	}
+}
+
+/**
+ *	skb_dequeue - remove from the head of the queue
+ *	@list: list to dequeue from
+ *
+ *	Remove the head of the list. The list lock is taken so the function
+ *	may be used safely with other locking list functions. The head item is
+ *	returned or %NULL if the list is empty.
+ */
+
+struct sk_buff *skb_dequeue(struct sk_buff_head *list)
+{
+	unsigned long flags;
+	struct sk_buff *result;
+
+	spin_lock_irqsave(&list->lock, flags);
+	result = __skb_dequeue(list);
+	spin_unlock_irqrestore(&list->lock, flags);
+	return result;
+}
+
+/**
+ *	skb_dequeue_tail - remove from the tail of the queue
+ *	@list: list to dequeue from
+ *
+ *	Remove the tail of the list. The list lock is taken so the function
+ *	may be used safely with other locking list functions. The tail item is
+ *	returned or %NULL if the list is empty.
+ */
+struct sk_buff *skb_dequeue_tail(struct sk_buff_head *list)
+{
+	unsigned long flags;
+	struct sk_buff *result;
+
+	spin_lock_irqsave(&list->lock, flags);
+	result = __skb_dequeue_tail(list);
+	spin_unlock_irqrestore(&list->lock, flags);
+	return result;
+}
+
+/**
+ *	skb_queue_purge - empty a list
+ *	@list: list to empty
+ *
+ *	Delete all buffers on an &sk_buff list. Each buffer is removed from
+ *	the list and one reference dropped. This function takes the list
+ *	lock and is atomic with respect to other list locking functions.
+ */
+void skb_queue_purge(struct sk_buff_head *list)
+{
+	struct sk_buff *skb;
+	while ((skb = skb_dequeue(list)) != NULL)
+		kfree_skb(skb);
+}
+
+/**
+ *	skb_queue_head - queue a buffer at the list head
+ *	@list: list to use
+ *	@newsk: buffer to queue
+ *
+ *	Queue a buffer at the start of the list. This function takes the
+ *	list lock and can be used safely with other locking &sk_buff functions
+ *	safely.
+ *
+ *	A buffer cannot be placed on two lists at the same time.
+ */
+void skb_queue_head(struct sk_buff_head *list, struct sk_buff *newsk)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&list->lock, flags);
+	__skb_queue_head(list, newsk);
+	spin_unlock_irqrestore(&list->lock, flags);
+}
+
+/**
+ *	skb_queue_tail - queue a buffer at the list tail
+ *	@list: list to use
+ *	@newsk: buffer to queue
+ *
+ *	Queue a buffer at the tail of the list. This function takes the
+ *	list lock and can be used safely with other locking &sk_buff functions
+ *	safely.
+ *
+ *	A buffer cannot be placed on two lists at the same time.
+ */
+void skb_queue_tail(struct sk_buff_head *list, struct sk_buff *newsk)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&list->lock, flags);
+	__skb_queue_tail(list, newsk);
+	spin_unlock_irqrestore(&list->lock, flags);
+}
+/**
+ *	skb_unlink	-	remove a buffer from a list
+ *	@skb: buffer to remove
+ *
+ *	Place a packet after a given packet in a list. The list locks are taken
+ *	and this function is atomic with respect to other list locked calls
+ *
+ *	Works even without knowing the list it is sitting on, which can be
+ *	handy at times. It also means that THE LIST MUST EXIST when you
+ *	unlink. Thus a list must have its contents unlinked before it is
+ *	destroyed.
+ */
+void skb_unlink(struct sk_buff *skb)
+{
+	struct sk_buff_head *list = skb->list;
+
+	if (list) {
+		unsigned long flags;
+
+		spin_lock_irqsave(&list->lock, flags);
+		if (skb->list == list)
+			__skb_unlink(skb, skb->list);
+		spin_unlock_irqrestore(&list->lock, flags);
+	}
+}
+
+
+/**
+ *	skb_append	-	append a buffer
+ *	@old: buffer to insert after
+ *	@newsk: buffer to insert
+ *
+ *	Place a packet after a given packet in a list. The list locks are taken
+ *	and this function is atomic with respect to other list locked calls.
+ *	A buffer cannot be placed on two lists at the same time.
+ */
+
+void skb_append(struct sk_buff *old, struct sk_buff *newsk)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&old->list->lock, flags);
+	__skb_append(old, newsk);
+	spin_unlock_irqrestore(&old->list->lock, flags);
+}
+
+
+/**
+ *	skb_insert	-	insert a buffer
+ *	@old: buffer to insert before
+ *	@newsk: buffer to insert
+ *
+ *	Place a packet before a given packet in a list. The list locks are taken
+ *	and this function is atomic with respect to other list locked calls
+ *	A buffer cannot be placed on two lists at the same time.
+ */
+
+void skb_insert(struct sk_buff *old, struct sk_buff *newsk)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&old->list->lock, flags);
+	__skb_insert(newsk, old->prev, old, old->list);
+	spin_unlock_irqrestore(&old->list->lock, flags);
+}
+
+#if 0
+/*
+ * 	Tune the memory allocator for a new MTU size.
+ */
+void skb_add_mtu(int mtu)
+{
+	/* Must match allocation in alloc_skb */
+	mtu = SKB_DATA_ALIGN(mtu) + sizeof(struct skb_shared_info);
+
+	kmem_add_cache_size(mtu);
+}
+#endif
+
+static inline void skb_split_inside_header(struct sk_buff *skb,
+					   struct sk_buff* skb1,
+					   const u32 len, const int pos)
+{
+	int i;
+
+	memcpy(skb_put(skb1, pos - len), skb->data + len, pos - len);
+
+	/* And move data appendix as is. */
+	for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
+		skb_shinfo(skb1)->frags[i] = skb_shinfo(skb)->frags[i];
+
+	skb_shinfo(skb1)->nr_frags = skb_shinfo(skb)->nr_frags;
+	skb_shinfo(skb)->nr_frags  = 0;
+	skb1->data_len		   = skb->data_len;
+	skb1->len		   += skb1->data_len;
+	skb->data_len		   = 0;
+	skb->len		   = len;
+	skb->tail		   = skb->data + len;
+}
+
+static inline void skb_split_no_header(struct sk_buff *skb,
+				       struct sk_buff* skb1,
+				       const u32 len, int pos)
+{
+	int i, k = 0;
+	const int nfrags = skb_shinfo(skb)->nr_frags;
+
+	skb_shinfo(skb)->nr_frags = 0;
+	skb1->len		  = skb1->data_len = skb->len - len;
+	skb->len		  = len;
+	skb->data_len		  = len - pos;
+
+	for (i = 0; i < nfrags; i++) {
+		int size = skb_shinfo(skb)->frags[i].size;
+
+		if (pos + size > len) {
+			skb_shinfo(skb1)->frags[k] = skb_shinfo(skb)->frags[i];
+
+			if (pos < len) {
+				/* Split frag.
+				 * We have two variants in this case:
+				 * 1. Move all the frag to the second
+				 *    part, if it is possible. F.e.
+				 *    this approach is mandatory for TUX,
+				 *    where splitting is expensive.
+				 * 2. Split is accurately. We make this.
+				 */
+				get_page(skb_shinfo(skb)->frags[i].page);
+				skb_shinfo(skb1)->frags[0].page_offset += len - pos;
+				skb_shinfo(skb1)->frags[0].size -= len - pos;
+				skb_shinfo(skb)->frags[i].size	= len - pos;
+				skb_shinfo(skb)->nr_frags++;
+			}
+			k++;
+		} else
+			skb_shinfo(skb)->nr_frags++;
+		pos += size;
+	}
+	skb_shinfo(skb1)->nr_frags = k;
+}
+
+/**
+ * skb_split - Split fragmented skb to two parts at length len.
+ * @skb: the buffer to split
+ * @skb1: the buffer to receive the second part
+ * @len: new length for skb
+ */
+void skb_split(struct sk_buff *skb, struct sk_buff *skb1, const u32 len)
+{
+	int pos = skb_headlen(skb);
+
+	if (len < pos)	/* Split line is inside header. */
+		skb_split_inside_header(skb, skb1, len, pos);
+	else		/* Second chunk has no header, nothing to copy. */
+		skb_split_no_header(skb, skb1, len, pos);
+}
+
+void __init skb_init(void)
+{
+	skbuff_head_cache = kmem_cache_create("skbuff_head_cache",
+					      sizeof(struct sk_buff),
+					      0,
+					      SLAB_HWCACHE_ALIGN,
+					      NULL, NULL);
+	if (!skbuff_head_cache)
+		panic("cannot create skbuff cache");
+}
+
+EXPORT_SYMBOL(___pskb_trim);
+EXPORT_SYMBOL(__kfree_skb);
+EXPORT_SYMBOL(__pskb_pull_tail);
+EXPORT_SYMBOL(alloc_skb);
+EXPORT_SYMBOL(pskb_copy);
+EXPORT_SYMBOL(pskb_expand_head);
+EXPORT_SYMBOL(skb_checksum);
+EXPORT_SYMBOL(skb_clone);
+EXPORT_SYMBOL(skb_clone_fraglist);
+EXPORT_SYMBOL(skb_copy);
+EXPORT_SYMBOL(skb_copy_and_csum_bits);
+EXPORT_SYMBOL(skb_copy_and_csum_dev);
+EXPORT_SYMBOL(skb_copy_bits);
+EXPORT_SYMBOL(skb_copy_expand);
+EXPORT_SYMBOL(skb_over_panic);
+EXPORT_SYMBOL(skb_pad);
+EXPORT_SYMBOL(skb_realloc_headroom);
+EXPORT_SYMBOL(skb_under_panic);
+EXPORT_SYMBOL(skb_dequeue);
+EXPORT_SYMBOL(skb_dequeue_tail);
+EXPORT_SYMBOL(skb_insert);
+EXPORT_SYMBOL(skb_queue_purge);
+EXPORT_SYMBOL(skb_queue_head);
+EXPORT_SYMBOL(skb_queue_tail);
+EXPORT_SYMBOL(skb_unlink);
+EXPORT_SYMBOL(skb_append);
+EXPORT_SYMBOL(skb_split);
diff --git a/net/core/sock.c b/net/core/sock.c
new file mode 100644
index 00000000000..629ab4a5b45
--- /dev/null
+++ b/net/core/sock.c
@@ -0,0 +1,1565 @@
+/*
+ * INET		An implementation of the TCP/IP protocol suite for the LINUX
+ *		operating system.  INET is implemented using the  BSD Socket
+ *		interface as the means of communication with the user level.
+ *
+ *		Generic socket support routines. Memory allocators, socket lock/release
+ *		handler for protocols to use and generic option handler.
+ *
+ *
+ * Version:	$Id: sock.c,v 1.117 2002/02/01 22:01:03 davem Exp $
+ *
+ * Authors:	Ross Biro, <bir7@leland.Stanford.Edu>
+ *		Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
+ *		Florian La Roche, <flla@stud.uni-sb.de>
+ *		Alan Cox, <A.Cox@swansea.ac.uk>
+ *
+ * Fixes:
+ *		Alan Cox	: 	Numerous verify_area() problems
+ *		Alan Cox	:	Connecting on a connecting socket
+ *					now returns an error for tcp.
+ *		Alan Cox	:	sock->protocol is set correctly.
+ *					and is not sometimes left as 0.
+ *		Alan Cox	:	connect handles icmp errors on a
+ *					connect properly. Unfortunately there
+ *					is a restart syscall nasty there. I
+ *					can't match BSD without hacking the C
+ *					library. Ideas urgently sought!
+ *		Alan Cox	:	Disallow bind() to addresses that are
+ *					not ours - especially broadcast ones!!
+ *		Alan Cox	:	Socket 1024 _IS_ ok for users. (fencepost)
+ *		Alan Cox	:	sock_wfree/sock_rfree don't destroy sockets,
+ *					instead they leave that for the DESTROY timer.
+ *		Alan Cox	:	Clean up error flag in accept
+ *		Alan Cox	:	TCP ack handling is buggy, the DESTROY timer
+ *					was buggy. Put a remove_sock() in the handler
+ *					for memory when we hit 0. Also altered the timer
+ *					code. The ACK stuff can wait and needs major 
+ *					TCP layer surgery.
+ *		Alan Cox	:	Fixed TCP ack bug, removed remove sock
+ *					and fixed timer/inet_bh race.
+ *		Alan Cox	:	Added zapped flag for TCP
+ *		Alan Cox	:	Move kfree_skb into skbuff.c and tidied up surplus code
+ *		Alan Cox	:	for new sk_buff allocations wmalloc/rmalloc now call alloc_skb
+ *		Alan Cox	:	kfree_s calls now are kfree_skbmem so we can track skb resources
+ *		Alan Cox	:	Supports socket option broadcast now as does udp. Packet and raw need fixing.
+ *		Alan Cox	:	Added RCVBUF,SNDBUF size setting. It suddenly occurred to me how easy it was so...
+ *		Rick Sladkey	:	Relaxed UDP rules for matching packets.
+ *		C.E.Hawkins	:	IFF_PROMISC/SIOCGHWADDR support
+ *	Pauline Middelink	:	identd support
+ *		Alan Cox	:	Fixed connect() taking signals I think.
+ *		Alan Cox	:	SO_LINGER supported
+ *		Alan Cox	:	Error reporting fixes
+ *		Anonymous	:	inet_create tidied up (sk->reuse setting)
+ *		Alan Cox	:	inet sockets don't set sk->type!
+ *		Alan Cox	:	Split socket option code
+ *		Alan Cox	:	Callbacks
+ *		Alan Cox	:	Nagle flag for Charles & Johannes stuff
+ *		Alex		:	Removed restriction on inet fioctl
+ *		Alan Cox	:	Splitting INET from NET core
+ *		Alan Cox	:	Fixed bogus SO_TYPE handling in getsockopt()
+ *		Adam Caldwell	:	Missing return in SO_DONTROUTE/SO_DEBUG code
+ *		Alan Cox	:	Split IP from generic code
+ *		Alan Cox	:	New kfree_skbmem()
+ *		Alan Cox	:	Make SO_DEBUG superuser only.
+ *		Alan Cox	:	Allow anyone to clear SO_DEBUG
+ *					(compatibility fix)
+ *		Alan Cox	:	Added optimistic memory grabbing for AF_UNIX throughput.
+ *		Alan Cox	:	Allocator for a socket is settable.
+ *		Alan Cox	:	SO_ERROR includes soft errors.
+ *		Alan Cox	:	Allow NULL arguments on some SO_ opts
+ *		Alan Cox	: 	Generic socket allocation to make hooks
+ *					easier (suggested by Craig Metz).
+ *		Michael Pall	:	SO_ERROR returns positive errno again
+ *              Steve Whitehouse:       Added default destructor to free
+ *                                      protocol private data.
+ *              Steve Whitehouse:       Added various other default routines
+ *                                      common to several socket families.
+ *              Chris Evans     :       Call suser() check last on F_SETOWN
+ *		Jay Schulist	:	Added SO_ATTACH_FILTER and SO_DETACH_FILTER.
+ *		Andi Kleen	:	Add sock_kmalloc()/sock_kfree_s()
+ *		Andi Kleen	:	Fix write_space callback
+ *		Chris Evans	:	Security fixes - signedness again
+ *		Arnaldo C. Melo :       cleanups, use skb_queue_purge
+ *
+ * To Fix:
+ *
+ *
+ *		This program is free software; you can redistribute it and/or
+ *		modify it under the terms of the GNU General Public License
+ *		as published by the Free Software Foundation; either version
+ *		2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/config.h>
+#include <linux/errno.h>
+#include <linux/types.h>
+#include <linux/socket.h>
+#include <linux/in.h>
+#include <linux/kernel.h>
+#include <linux/major.h>
+#include <linux/module.h>
+#include <linux/proc_fs.h>
+#include <linux/seq_file.h>
+#include <linux/sched.h>
+#include <linux/timer.h>
+#include <linux/string.h>
+#include <linux/sockios.h>
+#include <linux/net.h>
+#include <linux/mm.h>
+#include <linux/slab.h>
+#include <linux/interrupt.h>
+#include <linux/poll.h>
+#include <linux/tcp.h>
+#include <linux/init.h>
+
+#include <asm/uaccess.h>
+#include <asm/system.h>
+
+#include <linux/netdevice.h>
+#include <net/protocol.h>
+#include <linux/skbuff.h>
+#include <net/sock.h>
+#include <net/xfrm.h>
+#include <linux/ipsec.h>
+
+#include <linux/filter.h>
+
+#ifdef CONFIG_INET
+#include <net/tcp.h>
+#endif
+
+/* Take into consideration the size of the struct sk_buff overhead in the
+ * determination of these values, since that is non-constant across
+ * platforms.  This makes socket queueing behavior and performance
+ * not depend upon such differences.
+ */
+#define _SK_MEM_PACKETS		256
+#define _SK_MEM_OVERHEAD	(sizeof(struct sk_buff) + 256)
+#define SK_WMEM_MAX		(_SK_MEM_OVERHEAD * _SK_MEM_PACKETS)
+#define SK_RMEM_MAX		(_SK_MEM_OVERHEAD * _SK_MEM_PACKETS)
+
+/* Run time adjustable parameters. */
+__u32 sysctl_wmem_max = SK_WMEM_MAX;
+__u32 sysctl_rmem_max = SK_RMEM_MAX;
+__u32 sysctl_wmem_default = SK_WMEM_MAX;
+__u32 sysctl_rmem_default = SK_RMEM_MAX;
+
+/* Maximal space eaten by iovec or ancilliary data plus some space */
+int sysctl_optmem_max = sizeof(unsigned long)*(2*UIO_MAXIOV + 512);
+
+static int sock_set_timeout(long *timeo_p, char __user *optval, int optlen)
+{
+	struct timeval tv;
+
+	if (optlen < sizeof(tv))
+		return -EINVAL;
+	if (copy_from_user(&tv, optval, sizeof(tv)))
+		return -EFAULT;
+
+	*timeo_p = MAX_SCHEDULE_TIMEOUT;
+	if (tv.tv_sec == 0 && tv.tv_usec == 0)
+		return 0;
+	if (tv.tv_sec < (MAX_SCHEDULE_TIMEOUT/HZ - 1))
+		*timeo_p = tv.tv_sec*HZ + (tv.tv_usec+(1000000/HZ-1))/(1000000/HZ);
+	return 0;
+}
+
+static void sock_warn_obsolete_bsdism(const char *name)
+{
+	static int warned;
+	static char warncomm[TASK_COMM_LEN];
+	if (strcmp(warncomm, current->comm) && warned < 5) { 
+		strcpy(warncomm,  current->comm); 
+		printk(KERN_WARNING "process `%s' is using obsolete "
+		       "%s SO_BSDCOMPAT\n", warncomm, name);
+		warned++;
+	}
+}
+
+static void sock_disable_timestamp(struct sock *sk)
+{	
+	if (sock_flag(sk, SOCK_TIMESTAMP)) { 
+		sock_reset_flag(sk, SOCK_TIMESTAMP);
+		net_disable_timestamp();
+	}
+}
+
+
+/*
+ *	This is meant for all protocols to use and covers goings on
+ *	at the socket level. Everything here is generic.
+ */
+
+int sock_setsockopt(struct socket *sock, int level, int optname,
+		    char __user *optval, int optlen)
+{
+	struct sock *sk=sock->sk;
+	struct sk_filter *filter;
+	int val;
+	int valbool;
+	struct linger ling;
+	int ret = 0;
+	
+	/*
+	 *	Options without arguments
+	 */
+
+#ifdef SO_DONTLINGER		/* Compatibility item... */
+	switch (optname) {
+		case SO_DONTLINGER:
+			sock_reset_flag(sk, SOCK_LINGER);
+			return 0;
+	}
+#endif	
+		
+  	if(optlen<sizeof(int))
+  		return(-EINVAL);
+  	
+	if (get_user(val, (int __user *)optval))
+		return -EFAULT;
+	
+  	valbool = val?1:0;
+
+	lock_sock(sk);
+
+  	switch(optname) 
+  	{
+		case SO_DEBUG:	
+			if(val && !capable(CAP_NET_ADMIN))
+			{
+				ret = -EACCES;
+			}
+			else if (valbool)
+				sock_set_flag(sk, SOCK_DBG);
+			else
+				sock_reset_flag(sk, SOCK_DBG);
+			break;
+		case SO_REUSEADDR:
+			sk->sk_reuse = valbool;
+			break;
+		case SO_TYPE:
+		case SO_ERROR:
+			ret = -ENOPROTOOPT;
+		  	break;
+		case SO_DONTROUTE:
+			if (valbool)
+				sock_set_flag(sk, SOCK_LOCALROUTE);
+			else
+				sock_reset_flag(sk, SOCK_LOCALROUTE);
+			break;
+		case SO_BROADCAST:
+			sock_valbool_flag(sk, SOCK_BROADCAST, valbool);
+			break;
+		case SO_SNDBUF:
+			/* Don't error on this BSD doesn't and if you think
+			   about it this is right. Otherwise apps have to
+			   play 'guess the biggest size' games. RCVBUF/SNDBUF
+			   are treated in BSD as hints */
+			   
+			if (val > sysctl_wmem_max)
+				val = sysctl_wmem_max;
+
+			sk->sk_userlocks |= SOCK_SNDBUF_LOCK;
+			if ((val * 2) < SOCK_MIN_SNDBUF)
+				sk->sk_sndbuf = SOCK_MIN_SNDBUF;
+			else
+				sk->sk_sndbuf = val * 2;
+
+			/*
+			 *	Wake up sending tasks if we
+			 *	upped the value.
+			 */
+			sk->sk_write_space(sk);
+			break;
+
+		case SO_RCVBUF:
+			/* Don't error on this BSD doesn't and if you think
+			   about it this is right. Otherwise apps have to
+			   play 'guess the biggest size' games. RCVBUF/SNDBUF
+			   are treated in BSD as hints */
+			  
+			if (val > sysctl_rmem_max)
+				val = sysctl_rmem_max;
+
+			sk->sk_userlocks |= SOCK_RCVBUF_LOCK;
+			/* FIXME: is this lower bound the right one? */
+			if ((val * 2) < SOCK_MIN_RCVBUF)
+				sk->sk_rcvbuf = SOCK_MIN_RCVBUF;
+			else
+				sk->sk_rcvbuf = val * 2;
+			break;
+
+		case SO_KEEPALIVE:
+#ifdef CONFIG_INET
+			if (sk->sk_protocol == IPPROTO_TCP)
+				tcp_set_keepalive(sk, valbool);
+#endif
+			sock_valbool_flag(sk, SOCK_KEEPOPEN, valbool);
+			break;
+
+	 	case SO_OOBINLINE:
+			sock_valbool_flag(sk, SOCK_URGINLINE, valbool);
+			break;
+
+	 	case SO_NO_CHECK:
+			sk->sk_no_check = valbool;
+			break;
+
+		case SO_PRIORITY:
+			if ((val >= 0 && val <= 6) || capable(CAP_NET_ADMIN)) 
+				sk->sk_priority = val;
+			else
+				ret = -EPERM;
+			break;
+
+		case SO_LINGER:
+			if(optlen<sizeof(ling)) {
+				ret = -EINVAL;	/* 1003.1g */
+				break;
+			}
+			if (copy_from_user(&ling,optval,sizeof(ling))) {
+				ret = -EFAULT;
+				break;
+			}
+			if (!ling.l_onoff)
+				sock_reset_flag(sk, SOCK_LINGER);
+			else {
+#if (BITS_PER_LONG == 32)
+				if (ling.l_linger >= MAX_SCHEDULE_TIMEOUT/HZ)
+					sk->sk_lingertime = MAX_SCHEDULE_TIMEOUT;
+				else
+#endif
+					sk->sk_lingertime = ling.l_linger * HZ;
+				sock_set_flag(sk, SOCK_LINGER);
+			}
+			break;
+
+		case SO_BSDCOMPAT:
+			sock_warn_obsolete_bsdism("setsockopt");
+			break;
+
+		case SO_PASSCRED:
+			if (valbool)
+				set_bit(SOCK_PASSCRED, &sock->flags);
+			else
+				clear_bit(SOCK_PASSCRED, &sock->flags);
+			break;
+
+		case SO_TIMESTAMP:
+			if (valbool)  {
+				sock_set_flag(sk, SOCK_RCVTSTAMP);
+				sock_enable_timestamp(sk);
+			} else
+				sock_reset_flag(sk, SOCK_RCVTSTAMP);
+			break;
+
+		case SO_RCVLOWAT:
+			if (val < 0)
+				val = INT_MAX;
+			sk->sk_rcvlowat = val ? : 1;
+			break;
+
+		case SO_RCVTIMEO:
+			ret = sock_set_timeout(&sk->sk_rcvtimeo, optval, optlen);
+			break;
+
+		case SO_SNDTIMEO:
+			ret = sock_set_timeout(&sk->sk_sndtimeo, optval, optlen);
+			break;
+
+#ifdef CONFIG_NETDEVICES
+		case SO_BINDTODEVICE:
+		{
+			char devname[IFNAMSIZ]; 
+
+			/* Sorry... */ 
+			if (!capable(CAP_NET_RAW)) {
+				ret = -EPERM;
+				break;
+			}
+
+			/* Bind this socket to a particular device like "eth0",
+			 * as specified in the passed interface name. If the
+			 * name is "" or the option length is zero the socket 
+			 * is not bound. 
+			 */ 
+
+			if (!valbool) {
+				sk->sk_bound_dev_if = 0;
+			} else {
+				if (optlen > IFNAMSIZ) 
+					optlen = IFNAMSIZ; 
+				if (copy_from_user(devname, optval, optlen)) {
+					ret = -EFAULT;
+					break;
+				}
+
+				/* Remove any cached route for this socket. */
+				sk_dst_reset(sk);
+
+				if (devname[0] == '\0') {
+					sk->sk_bound_dev_if = 0;
+				} else {
+					struct net_device *dev = dev_get_by_name(devname);
+					if (!dev) {
+						ret = -ENODEV;
+						break;
+					}
+					sk->sk_bound_dev_if = dev->ifindex;
+					dev_put(dev);
+				}
+			}
+			break;
+		}
+#endif
+
+
+		case SO_ATTACH_FILTER:
+			ret = -EINVAL;
+			if (optlen == sizeof(struct sock_fprog)) {
+				struct sock_fprog fprog;
+
+				ret = -EFAULT;
+				if (copy_from_user(&fprog, optval, sizeof(fprog)))
+					break;
+
+				ret = sk_attach_filter(&fprog, sk);
+			}
+			break;
+
+		case SO_DETACH_FILTER:
+			spin_lock_bh(&sk->sk_lock.slock);
+			filter = sk->sk_filter;
+                        if (filter) {
+				sk->sk_filter = NULL;
+				spin_unlock_bh(&sk->sk_lock.slock);
+				sk_filter_release(sk, filter);
+				break;
+			}
+			spin_unlock_bh(&sk->sk_lock.slock);
+			ret = -ENONET;
+			break;
+
+		/* We implement the SO_SNDLOWAT etc to
+		   not be settable (1003.1g 5.3) */
+		default:
+		  	ret = -ENOPROTOOPT;
+			break;
+  	}
+	release_sock(sk);
+	return ret;
+}
+
+
+int sock_getsockopt(struct socket *sock, int level, int optname,
+		    char __user *optval, int __user *optlen)
+{
+	struct sock *sk = sock->sk;
+	
+	union
+	{
+  		int val;
+  		struct linger ling;
+		struct timeval tm;
+	} v;
+	
+	unsigned int lv = sizeof(int);
+	int len;
+  	
+  	if(get_user(len,optlen))
+  		return -EFAULT;
+	if(len < 0)
+		return -EINVAL;
+		
+  	switch(optname) 
+  	{
+		case SO_DEBUG:		
+			v.val = sock_flag(sk, SOCK_DBG);
+			break;
+		
+		case SO_DONTROUTE:
+			v.val = sock_flag(sk, SOCK_LOCALROUTE);
+			break;
+		
+		case SO_BROADCAST:
+			v.val = !!sock_flag(sk, SOCK_BROADCAST);
+			break;
+
+		case SO_SNDBUF:
+			v.val = sk->sk_sndbuf;
+			break;
+		
+		case SO_RCVBUF:
+			v.val = sk->sk_rcvbuf;
+			break;
+
+		case SO_REUSEADDR:
+			v.val = sk->sk_reuse;
+			break;
+
+		case SO_KEEPALIVE:
+			v.val = !!sock_flag(sk, SOCK_KEEPOPEN);
+			break;
+
+		case SO_TYPE:
+			v.val = sk->sk_type;		  		
+			break;
+
+		case SO_ERROR:
+			v.val = -sock_error(sk);
+			if(v.val==0)
+				v.val = xchg(&sk->sk_err_soft, 0);
+			break;
+
+		case SO_OOBINLINE:
+			v.val = !!sock_flag(sk, SOCK_URGINLINE);
+			break;
+	
+		case SO_NO_CHECK:
+			v.val = sk->sk_no_check;
+			break;
+
+		case SO_PRIORITY:
+			v.val = sk->sk_priority;
+			break;
+		
+		case SO_LINGER:	
+			lv		= sizeof(v.ling);
+			v.ling.l_onoff	= !!sock_flag(sk, SOCK_LINGER);
+ 			v.ling.l_linger	= sk->sk_lingertime / HZ;
+			break;
+					
+		case SO_BSDCOMPAT:
+			sock_warn_obsolete_bsdism("getsockopt");
+			break;
+
+		case SO_TIMESTAMP:
+			v.val = sock_flag(sk, SOCK_RCVTSTAMP);
+			break;
+
+		case SO_RCVTIMEO:
+			lv=sizeof(struct timeval);
+			if (sk->sk_rcvtimeo == MAX_SCHEDULE_TIMEOUT) {
+				v.tm.tv_sec = 0;
+				v.tm.tv_usec = 0;
+			} else {
+				v.tm.tv_sec = sk->sk_rcvtimeo / HZ;
+				v.tm.tv_usec = ((sk->sk_rcvtimeo % HZ) * 1000000) / HZ;
+			}
+			break;
+
+		case SO_SNDTIMEO:
+			lv=sizeof(struct timeval);
+			if (sk->sk_sndtimeo == MAX_SCHEDULE_TIMEOUT) {
+				v.tm.tv_sec = 0;
+				v.tm.tv_usec = 0;
+			} else {
+				v.tm.tv_sec = sk->sk_sndtimeo / HZ;
+				v.tm.tv_usec = ((sk->sk_sndtimeo % HZ) * 1000000) / HZ;
+			}
+			break;
+
+		case SO_RCVLOWAT:
+			v.val = sk->sk_rcvlowat;
+			break;
+
+		case SO_SNDLOWAT:
+			v.val=1;
+			break; 
+
+		case SO_PASSCRED:
+			v.val = test_bit(SOCK_PASSCRED, &sock->flags) ? 1 : 0;
+			break;
+
+		case SO_PEERCRED:
+			if (len > sizeof(sk->sk_peercred))
+				len = sizeof(sk->sk_peercred);
+			if (copy_to_user(optval, &sk->sk_peercred, len))
+				return -EFAULT;
+			goto lenout;
+
+		case SO_PEERNAME:
+		{
+			char address[128];
+
+			if (sock->ops->getname(sock, (struct sockaddr *)address, &lv, 2))
+				return -ENOTCONN;
+			if (lv < len)
+				return -EINVAL;
+			if (copy_to_user(optval, address, len))
+				return -EFAULT;
+			goto lenout;
+		}
+
+		/* Dubious BSD thing... Probably nobody even uses it, but
+		 * the UNIX standard wants it for whatever reason... -DaveM
+		 */
+		case SO_ACCEPTCONN:
+			v.val = sk->sk_state == TCP_LISTEN;
+			break;
+
+		case SO_PEERSEC:
+			return security_socket_getpeersec(sock, optval, optlen, len);
+
+		default:
+			return(-ENOPROTOOPT);
+	}
+	if (len > lv)
+		len = lv;
+	if (copy_to_user(optval, &v, len))
+		return -EFAULT;
+lenout:
+  	if (put_user(len, optlen))
+  		return -EFAULT;
+  	return 0;
+}
+
+/**
+ *	sk_alloc - All socket objects are allocated here
+ *	@family - protocol family
+ *	@priority - for allocation (%GFP_KERNEL, %GFP_ATOMIC, etc)
+ *	@prot - struct proto associated with this new sock instance
+ *	@zero_it - if we should zero the newly allocated sock
+ */
+struct sock *sk_alloc(int family, int priority, struct proto *prot, int zero_it)
+{
+	struct sock *sk = NULL;
+	kmem_cache_t *slab = prot->slab;
+
+	if (slab != NULL)
+		sk = kmem_cache_alloc(slab, priority);
+	else
+		sk = kmalloc(prot->obj_size, priority);
+
+	if (sk) {
+		if (zero_it) {
+			memset(sk, 0, prot->obj_size);
+			sk->sk_family = family;
+			sk->sk_prot = prot;
+			sock_lock_init(sk);
+		}
+		
+		if (security_sk_alloc(sk, family, priority)) {
+			kmem_cache_free(slab, sk);
+			sk = NULL;
+		} else
+			__module_get(prot->owner);
+	}
+	return sk;
+}
+
+void sk_free(struct sock *sk)
+{
+	struct sk_filter *filter;
+	struct module *owner = sk->sk_prot->owner;
+
+	if (sk->sk_destruct)
+		sk->sk_destruct(sk);
+
+	filter = sk->sk_filter;
+	if (filter) {
+		sk_filter_release(sk, filter);
+		sk->sk_filter = NULL;
+	}
+
+	sock_disable_timestamp(sk);
+
+	if (atomic_read(&sk->sk_omem_alloc))
+		printk(KERN_DEBUG "%s: optmem leakage (%d bytes) detected.\n",
+		       __FUNCTION__, atomic_read(&sk->sk_omem_alloc));
+
+	security_sk_free(sk);
+	if (sk->sk_prot->slab != NULL)
+		kmem_cache_free(sk->sk_prot->slab, sk);
+	else
+		kfree(sk);
+	module_put(owner);
+}
+
+void __init sk_init(void)
+{
+	if (num_physpages <= 4096) {
+		sysctl_wmem_max = 32767;
+		sysctl_rmem_max = 32767;
+		sysctl_wmem_default = 32767;
+		sysctl_rmem_default = 32767;
+	} else if (num_physpages >= 131072) {
+		sysctl_wmem_max = 131071;
+		sysctl_rmem_max = 131071;
+	}
+}
+
+/*
+ *	Simple resource managers for sockets.
+ */
+
+
+/* 
+ * Write buffer destructor automatically called from kfree_skb. 
+ */
+void sock_wfree(struct sk_buff *skb)
+{
+	struct sock *sk = skb->sk;
+
+	/* In case it might be waiting for more memory. */
+	atomic_sub(skb->truesize, &sk->sk_wmem_alloc);
+	if (!sock_flag(sk, SOCK_USE_WRITE_QUEUE))
+		sk->sk_write_space(sk);
+	sock_put(sk);
+}
+
+/* 
+ * Read buffer destructor automatically called from kfree_skb. 
+ */
+void sock_rfree(struct sk_buff *skb)
+{
+	struct sock *sk = skb->sk;
+
+	atomic_sub(skb->truesize, &sk->sk_rmem_alloc);
+}
+
+
+int sock_i_uid(struct sock *sk)
+{
+	int uid;
+
+	read_lock(&sk->sk_callback_lock);
+	uid = sk->sk_socket ? SOCK_INODE(sk->sk_socket)->i_uid : 0;
+	read_unlock(&sk->sk_callback_lock);
+	return uid;
+}
+
+unsigned long sock_i_ino(struct sock *sk)
+{
+	unsigned long ino;
+
+	read_lock(&sk->sk_callback_lock);
+	ino = sk->sk_socket ? SOCK_INODE(sk->sk_socket)->i_ino : 0;
+	read_unlock(&sk->sk_callback_lock);
+	return ino;
+}
+
+/*
+ * Allocate a skb from the socket's send buffer.
+ */
+struct sk_buff *sock_wmalloc(struct sock *sk, unsigned long size, int force, int priority)
+{
+	if (force || atomic_read(&sk->sk_wmem_alloc) < sk->sk_sndbuf) {
+		struct sk_buff * skb = alloc_skb(size, priority);
+		if (skb) {
+			skb_set_owner_w(skb, sk);
+			return skb;
+		}
+	}
+	return NULL;
+}
+
+/*
+ * Allocate a skb from the socket's receive buffer.
+ */ 
+struct sk_buff *sock_rmalloc(struct sock *sk, unsigned long size, int force, int priority)
+{
+	if (force || atomic_read(&sk->sk_rmem_alloc) < sk->sk_rcvbuf) {
+		struct sk_buff *skb = alloc_skb(size, priority);
+		if (skb) {
+			skb_set_owner_r(skb, sk);
+			return skb;
+		}
+	}
+	return NULL;
+}
+
+/* 
+ * Allocate a memory block from the socket's option memory buffer.
+ */ 
+void *sock_kmalloc(struct sock *sk, int size, int priority)
+{
+	if ((unsigned)size <= sysctl_optmem_max &&
+	    atomic_read(&sk->sk_omem_alloc) + size < sysctl_optmem_max) {
+		void *mem;
+		/* First do the add, to avoid the race if kmalloc
+ 		 * might sleep.
+		 */
+		atomic_add(size, &sk->sk_omem_alloc);
+		mem = kmalloc(size, priority);
+		if (mem)
+			return mem;
+		atomic_sub(size, &sk->sk_omem_alloc);
+	}
+	return NULL;
+}
+
+/*
+ * Free an option memory block.
+ */
+void sock_kfree_s(struct sock *sk, void *mem, int size)
+{
+	kfree(mem);
+	atomic_sub(size, &sk->sk_omem_alloc);
+}
+
+/* It is almost wait_for_tcp_memory minus release_sock/lock_sock.
+   I think, these locks should be removed for datagram sockets.
+ */
+static long sock_wait_for_wmem(struct sock * sk, long timeo)
+{
+	DEFINE_WAIT(wait);
+
+	clear_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
+	for (;;) {
+		if (!timeo)
+			break;
+		if (signal_pending(current))
+			break;
+		set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
+		prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE);
+		if (atomic_read(&sk->sk_wmem_alloc) < sk->sk_sndbuf)
+			break;
+		if (sk->sk_shutdown & SEND_SHUTDOWN)
+			break;
+		if (sk->sk_err)
+			break;
+		timeo = schedule_timeout(timeo);
+	}
+	finish_wait(sk->sk_sleep, &wait);
+	return timeo;
+}
+
+
+/*
+ *	Generic send/receive buffer handlers
+ */
+
+static struct sk_buff *sock_alloc_send_pskb(struct sock *sk,
+					    unsigned long header_len,
+					    unsigned long data_len,
+					    int noblock, int *errcode)
+{
+	struct sk_buff *skb;
+	unsigned int gfp_mask;
+	long timeo;
+	int err;
+
+	gfp_mask = sk->sk_allocation;
+	if (gfp_mask & __GFP_WAIT)
+		gfp_mask |= __GFP_REPEAT;
+
+	timeo = sock_sndtimeo(sk, noblock);
+	while (1) {
+		err = sock_error(sk);
+		if (err != 0)
+			goto failure;
+
+		err = -EPIPE;
+		if (sk->sk_shutdown & SEND_SHUTDOWN)
+			goto failure;
+
+		if (atomic_read(&sk->sk_wmem_alloc) < sk->sk_sndbuf) {
+			skb = alloc_skb(header_len, sk->sk_allocation);
+			if (skb) {
+				int npages;
+				int i;
+
+				/* No pages, we're done... */
+				if (!data_len)
+					break;
+
+				npages = (data_len + (PAGE_SIZE - 1)) >> PAGE_SHIFT;
+				skb->truesize += data_len;
+				skb_shinfo(skb)->nr_frags = npages;
+				for (i = 0; i < npages; i++) {
+					struct page *page;
+					skb_frag_t *frag;
+
+					page = alloc_pages(sk->sk_allocation, 0);
+					if (!page) {
+						err = -ENOBUFS;
+						skb_shinfo(skb)->nr_frags = i;
+						kfree_skb(skb);
+						goto failure;
+					}
+
+					frag = &skb_shinfo(skb)->frags[i];
+					frag->page = page;
+					frag->page_offset = 0;
+					frag->size = (data_len >= PAGE_SIZE ?
+						      PAGE_SIZE :
+						      data_len);
+					data_len -= PAGE_SIZE;
+				}
+
+				/* Full success... */
+				break;
+			}
+			err = -ENOBUFS;
+			goto failure;
+		}
+		set_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
+		set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
+		err = -EAGAIN;
+		if (!timeo)
+			goto failure;
+		if (signal_pending(current))
+			goto interrupted;
+		timeo = sock_wait_for_wmem(sk, timeo);
+	}
+
+	skb_set_owner_w(skb, sk);
+	return skb;
+
+interrupted:
+	err = sock_intr_errno(timeo);
+failure:
+	*errcode = err;
+	return NULL;
+}
+
+struct sk_buff *sock_alloc_send_skb(struct sock *sk, unsigned long size, 
+				    int noblock, int *errcode)
+{
+	return sock_alloc_send_pskb(sk, size, 0, noblock, errcode);
+}
+
+static void __lock_sock(struct sock *sk)
+{
+	DEFINE_WAIT(wait);
+
+	for(;;) {
+		prepare_to_wait_exclusive(&sk->sk_lock.wq, &wait,
+					TASK_UNINTERRUPTIBLE);
+		spin_unlock_bh(&sk->sk_lock.slock);
+		schedule();
+		spin_lock_bh(&sk->sk_lock.slock);
+		if(!sock_owned_by_user(sk))
+			break;
+	}
+	finish_wait(&sk->sk_lock.wq, &wait);
+}
+
+static void __release_sock(struct sock *sk)
+{
+	struct sk_buff *skb = sk->sk_backlog.head;
+
+	do {
+		sk->sk_backlog.head = sk->sk_backlog.tail = NULL;
+		bh_unlock_sock(sk);
+
+		do {
+			struct sk_buff *next = skb->next;
+
+			skb->next = NULL;
+			sk->sk_backlog_rcv(sk, skb);
+
+			/*
+			 * We are in process context here with softirqs
+			 * disabled, use cond_resched_softirq() to preempt.
+			 * This is safe to do because we've taken the backlog
+			 * queue private:
+			 */
+			cond_resched_softirq();
+
+			skb = next;
+		} while (skb != NULL);
+
+		bh_lock_sock(sk);
+	} while((skb = sk->sk_backlog.head) != NULL);
+}
+
+/**
+ * sk_wait_data - wait for data to arrive at sk_receive_queue
+ * sk - sock to wait on
+ * timeo - for how long
+ *
+ * Now socket state including sk->sk_err is changed only under lock,
+ * hence we may omit checks after joining wait queue.
+ * We check receive queue before schedule() only as optimization;
+ * it is very likely that release_sock() added new data.
+ */
+int sk_wait_data(struct sock *sk, long *timeo)
+{
+	int rc;
+	DEFINE_WAIT(wait);
+
+	prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE);
+	set_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
+	rc = sk_wait_event(sk, timeo, !skb_queue_empty(&sk->sk_receive_queue));
+	clear_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
+	finish_wait(sk->sk_sleep, &wait);
+	return rc;
+}
+
+EXPORT_SYMBOL(sk_wait_data);
+
+/*
+ * Set of default routines for initialising struct proto_ops when
+ * the protocol does not support a particular function. In certain
+ * cases where it makes no sense for a protocol to have a "do nothing"
+ * function, some default processing is provided.
+ */
+
+int sock_no_bind(struct socket *sock, struct sockaddr *saddr, int len)
+{
+	return -EOPNOTSUPP;
+}
+
+int sock_no_connect(struct socket *sock, struct sockaddr *saddr, 
+		    int len, int flags)
+{
+	return -EOPNOTSUPP;
+}
+
+int sock_no_socketpair(struct socket *sock1, struct socket *sock2)
+{
+	return -EOPNOTSUPP;
+}
+
+int sock_no_accept(struct socket *sock, struct socket *newsock, int flags)
+{
+	return -EOPNOTSUPP;
+}
+
+int sock_no_getname(struct socket *sock, struct sockaddr *saddr, 
+		    int *len, int peer)
+{
+	return -EOPNOTSUPP;
+}
+
+unsigned int sock_no_poll(struct file * file, struct socket *sock, poll_table *pt)
+{
+	return 0;
+}
+
+int sock_no_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
+{
+	return -EOPNOTSUPP;
+}
+
+int sock_no_listen(struct socket *sock, int backlog)
+{
+	return -EOPNOTSUPP;
+}
+
+int sock_no_shutdown(struct socket *sock, int how)
+{
+	return -EOPNOTSUPP;
+}
+
+int sock_no_setsockopt(struct socket *sock, int level, int optname,
+		    char __user *optval, int optlen)
+{
+	return -EOPNOTSUPP;
+}
+
+int sock_no_getsockopt(struct socket *sock, int level, int optname,
+		    char __user *optval, int __user *optlen)
+{
+	return -EOPNOTSUPP;
+}
+
+int sock_no_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *m,
+		    size_t len)
+{
+	return -EOPNOTSUPP;
+}
+
+int sock_no_recvmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *m,
+		    size_t len, int flags)
+{
+	return -EOPNOTSUPP;
+}
+
+int sock_no_mmap(struct file *file, struct socket *sock, struct vm_area_struct *vma)
+{
+	/* Mirror missing mmap method error code */
+	return -ENODEV;
+}
+
+ssize_t sock_no_sendpage(struct socket *sock, struct page *page, int offset, size_t size, int flags)
+{
+	ssize_t res;
+	struct msghdr msg = {.msg_flags = flags};
+	struct kvec iov;
+	char *kaddr = kmap(page);
+	iov.iov_base = kaddr + offset;
+	iov.iov_len = size;
+	res = kernel_sendmsg(sock, &msg, &iov, 1, size);
+	kunmap(page);
+	return res;
+}
+
+/*
+ *	Default Socket Callbacks
+ */
+
+static void sock_def_wakeup(struct sock *sk)
+{
+	read_lock(&sk->sk_callback_lock);
+	if (sk->sk_sleep && waitqueue_active(sk->sk_sleep))
+		wake_up_interruptible_all(sk->sk_sleep);
+	read_unlock(&sk->sk_callback_lock);
+}
+
+static void sock_def_error_report(struct sock *sk)
+{
+	read_lock(&sk->sk_callback_lock);
+	if (sk->sk_sleep && waitqueue_active(sk->sk_sleep))
+		wake_up_interruptible(sk->sk_sleep);
+	sk_wake_async(sk,0,POLL_ERR); 
+	read_unlock(&sk->sk_callback_lock);
+}
+
+static void sock_def_readable(struct sock *sk, int len)
+{
+	read_lock(&sk->sk_callback_lock);
+	if (sk->sk_sleep && waitqueue_active(sk->sk_sleep))
+		wake_up_interruptible(sk->sk_sleep);
+	sk_wake_async(sk,1,POLL_IN);
+	read_unlock(&sk->sk_callback_lock);
+}
+
+static void sock_def_write_space(struct sock *sk)
+{
+	read_lock(&sk->sk_callback_lock);
+
+	/* Do not wake up a writer until he can make "significant"
+	 * progress.  --DaveM
+	 */
+	if((atomic_read(&sk->sk_wmem_alloc) << 1) <= sk->sk_sndbuf) {
+		if (sk->sk_sleep && waitqueue_active(sk->sk_sleep))
+			wake_up_interruptible(sk->sk_sleep);
+
+		/* Should agree with poll, otherwise some programs break */
+		if (sock_writeable(sk))
+			sk_wake_async(sk, 2, POLL_OUT);
+	}
+
+	read_unlock(&sk->sk_callback_lock);
+}
+
+static void sock_def_destruct(struct sock *sk)
+{
+	if (sk->sk_protinfo)
+		kfree(sk->sk_protinfo);
+}
+
+void sk_send_sigurg(struct sock *sk)
+{
+	if (sk->sk_socket && sk->sk_socket->file)
+		if (send_sigurg(&sk->sk_socket->file->f_owner))
+			sk_wake_async(sk, 3, POLL_PRI);
+}
+
+void sk_reset_timer(struct sock *sk, struct timer_list* timer,
+		    unsigned long expires)
+{
+	if (!mod_timer(timer, expires))
+		sock_hold(sk);
+}
+
+EXPORT_SYMBOL(sk_reset_timer);
+
+void sk_stop_timer(struct sock *sk, struct timer_list* timer)
+{
+	if (timer_pending(timer) && del_timer(timer))
+		__sock_put(sk);
+}
+
+EXPORT_SYMBOL(sk_stop_timer);
+
+void sock_init_data(struct socket *sock, struct sock *sk)
+{
+	skb_queue_head_init(&sk->sk_receive_queue);
+	skb_queue_head_init(&sk->sk_write_queue);
+	skb_queue_head_init(&sk->sk_error_queue);
+
+	sk->sk_send_head	=	NULL;
+
+	init_timer(&sk->sk_timer);
+	
+	sk->sk_allocation	=	GFP_KERNEL;
+	sk->sk_rcvbuf		=	sysctl_rmem_default;
+	sk->sk_sndbuf		=	sysctl_wmem_default;
+	sk->sk_state		=	TCP_CLOSE;
+	sk->sk_socket		=	sock;
+
+	sock_set_flag(sk, SOCK_ZAPPED);
+
+	if(sock)
+	{
+		sk->sk_type	=	sock->type;
+		sk->sk_sleep	=	&sock->wait;
+		sock->sk	=	sk;
+	} else
+		sk->sk_sleep	=	NULL;
+
+	rwlock_init(&sk->sk_dst_lock);
+	rwlock_init(&sk->sk_callback_lock);
+
+	sk->sk_state_change	=	sock_def_wakeup;
+	sk->sk_data_ready	=	sock_def_readable;
+	sk->sk_write_space	=	sock_def_write_space;
+	sk->sk_error_report	=	sock_def_error_report;
+	sk->sk_destruct		=	sock_def_destruct;
+
+	sk->sk_sndmsg_page	=	NULL;
+	sk->sk_sndmsg_off	=	0;
+
+	sk->sk_peercred.pid 	=	0;
+	sk->sk_peercred.uid	=	-1;
+	sk->sk_peercred.gid	=	-1;
+	sk->sk_write_pending	=	0;
+	sk->sk_rcvlowat		=	1;
+	sk->sk_rcvtimeo		=	MAX_SCHEDULE_TIMEOUT;
+	sk->sk_sndtimeo		=	MAX_SCHEDULE_TIMEOUT;
+
+	sk->sk_stamp.tv_sec     = -1L;
+	sk->sk_stamp.tv_usec    = -1L;
+
+	atomic_set(&sk->sk_refcnt, 1);
+}
+
+void fastcall lock_sock(struct sock *sk)
+{
+	might_sleep();
+	spin_lock_bh(&(sk->sk_lock.slock));
+	if (sk->sk_lock.owner)
+		__lock_sock(sk);
+	sk->sk_lock.owner = (void *)1;
+	spin_unlock_bh(&(sk->sk_lock.slock));
+}
+
+EXPORT_SYMBOL(lock_sock);
+
+void fastcall release_sock(struct sock *sk)
+{
+	spin_lock_bh(&(sk->sk_lock.slock));
+	if (sk->sk_backlog.tail)
+		__release_sock(sk);
+	sk->sk_lock.owner = NULL;
+        if (waitqueue_active(&(sk->sk_lock.wq)))
+		wake_up(&(sk->sk_lock.wq));
+	spin_unlock_bh(&(sk->sk_lock.slock));
+}
+EXPORT_SYMBOL(release_sock);
+
+int sock_get_timestamp(struct sock *sk, struct timeval __user *userstamp)
+{ 
+	if (!sock_flag(sk, SOCK_TIMESTAMP))
+		sock_enable_timestamp(sk);
+	if (sk->sk_stamp.tv_sec == -1) 
+		return -ENOENT;
+	if (sk->sk_stamp.tv_sec == 0)
+		do_gettimeofday(&sk->sk_stamp);
+	return copy_to_user(userstamp, &sk->sk_stamp, sizeof(struct timeval)) ?
+		-EFAULT : 0; 
+} 
+EXPORT_SYMBOL(sock_get_timestamp);
+
+void sock_enable_timestamp(struct sock *sk)
+{	
+	if (!sock_flag(sk, SOCK_TIMESTAMP)) { 
+		sock_set_flag(sk, SOCK_TIMESTAMP);
+		net_enable_timestamp();
+	}
+}
+EXPORT_SYMBOL(sock_enable_timestamp); 
+
+/*
+ *	Get a socket option on an socket.
+ *
+ *	FIX: POSIX 1003.1g is very ambiguous here. It states that
+ *	asynchronous errors should be reported by getsockopt. We assume
+ *	this means if you specify SO_ERROR (otherwise whats the point of it).
+ */
+int sock_common_getsockopt(struct socket *sock, int level, int optname,
+			   char __user *optval, int __user *optlen)
+{
+	struct sock *sk = sock->sk;
+
+	return sk->sk_prot->getsockopt(sk, level, optname, optval, optlen);
+}
+
+EXPORT_SYMBOL(sock_common_getsockopt);
+
+int sock_common_recvmsg(struct kiocb *iocb, struct socket *sock,
+			struct msghdr *msg, size_t size, int flags)
+{
+	struct sock *sk = sock->sk;
+	int addr_len = 0;
+	int err;
+
+	err = sk->sk_prot->recvmsg(iocb, sk, msg, size, flags & MSG_DONTWAIT,
+				   flags & ~MSG_DONTWAIT, &addr_len);
+	if (err >= 0)
+		msg->msg_namelen = addr_len;
+	return err;
+}
+
+EXPORT_SYMBOL(sock_common_recvmsg);
+
+/*
+ *	Set socket options on an inet socket.
+ */
+int sock_common_setsockopt(struct socket *sock, int level, int optname,
+			   char __user *optval, int optlen)
+{
+	struct sock *sk = sock->sk;
+
+	return sk->sk_prot->setsockopt(sk, level, optname, optval, optlen);
+}
+
+EXPORT_SYMBOL(sock_common_setsockopt);
+
+void sk_common_release(struct sock *sk)
+{
+	if (sk->sk_prot->destroy)
+		sk->sk_prot->destroy(sk);
+
+	/*
+	 * Observation: when sock_common_release is called, processes have
+	 * no access to socket. But net still has.
+	 * Step one, detach it from networking:
+	 *
+	 * A. Remove from hash tables.
+	 */
+
+	sk->sk_prot->unhash(sk);
+
+	/*
+	 * In this point socket cannot receive new packets, but it is possible
+	 * that some packets are in flight because some CPU runs receiver and
+	 * did hash table lookup before we unhashed socket. They will achieve
+	 * receive queue and will be purged by socket destructor.
+	 *
+	 * Also we still have packets pending on receive queue and probably,
+	 * our own packets waiting in device queues. sock_destroy will drain
+	 * receive queue, but transmitted packets will delay socket destruction
+	 * until the last reference will be released.
+	 */
+
+	sock_orphan(sk);
+
+	xfrm_sk_free_policy(sk);
+
+#ifdef INET_REFCNT_DEBUG
+	if (atomic_read(&sk->sk_refcnt) != 1)
+		printk(KERN_DEBUG "Destruction of the socket %p delayed, c=%d\n",
+		       sk, atomic_read(&sk->sk_refcnt));
+#endif
+	sock_put(sk);
+}
+
+EXPORT_SYMBOL(sk_common_release);
+
+static DEFINE_RWLOCK(proto_list_lock);
+static LIST_HEAD(proto_list);
+
+int proto_register(struct proto *prot, int alloc_slab)
+{
+	int rc = -ENOBUFS;
+
+	write_lock(&proto_list_lock);
+
+	if (alloc_slab) {
+		prot->slab = kmem_cache_create(prot->name, prot->obj_size, 0,
+					       SLAB_HWCACHE_ALIGN, NULL, NULL);
+
+		if (prot->slab == NULL) {
+			printk(KERN_CRIT "%s: Can't create sock SLAB cache!\n",
+			       prot->name);
+			goto out_unlock;
+		}
+	}
+
+	list_add(&prot->node, &proto_list);
+	rc = 0;
+out_unlock:
+	write_unlock(&proto_list_lock);
+	return rc;
+}
+
+EXPORT_SYMBOL(proto_register);
+
+void proto_unregister(struct proto *prot)
+{
+	write_lock(&proto_list_lock);
+
+	if (prot->slab != NULL) {
+		kmem_cache_destroy(prot->slab);
+		prot->slab = NULL;
+	}
+
+	list_del(&prot->node);
+	write_unlock(&proto_list_lock);
+}
+
+EXPORT_SYMBOL(proto_unregister);
+
+#ifdef CONFIG_PROC_FS
+static inline struct proto *__proto_head(void)
+{
+	return list_entry(proto_list.next, struct proto, node);
+}
+
+static inline struct proto *proto_head(void)
+{
+	return list_empty(&proto_list) ? NULL : __proto_head();
+}
+
+static inline struct proto *proto_next(struct proto *proto)
+{
+	return proto->node.next == &proto_list ? NULL :
+		list_entry(proto->node.next, struct proto, node);
+}
+
+static inline struct proto *proto_get_idx(loff_t pos)
+{
+	struct proto *proto;
+	loff_t i = 0;
+
+	list_for_each_entry(proto, &proto_list, node)
+		if (i++ == pos)
+			goto out;
+
+	proto = NULL;
+out:
+	return proto;
+}
+
+static void *proto_seq_start(struct seq_file *seq, loff_t *pos)
+{
+	read_lock(&proto_list_lock);
+	return *pos ? proto_get_idx(*pos - 1) : SEQ_START_TOKEN;
+}
+
+static void *proto_seq_next(struct seq_file *seq, void *v, loff_t *pos)
+{
+	++*pos;
+	return v == SEQ_START_TOKEN ? proto_head() : proto_next(v);
+}
+
+static void proto_seq_stop(struct seq_file *seq, void *v)
+{
+	read_unlock(&proto_list_lock);
+}
+
+static char proto_method_implemented(const void *method)
+{
+	return method == NULL ? 'n' : 'y';
+}
+
+static void proto_seq_printf(struct seq_file *seq, struct proto *proto)
+{
+	seq_printf(seq, "%-9s %4u %6d  %6d   %-3s %6u   %-3s  %-10s "
+			"%2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c\n",
+		   proto->name,
+		   proto->obj_size,
+		   proto->sockets_allocated != NULL ? atomic_read(proto->sockets_allocated) : -1,
+		   proto->memory_allocated != NULL ? atomic_read(proto->memory_allocated) : -1,
+		   proto->memory_pressure != NULL ? *proto->memory_pressure ? "yes" : "no" : "NI",
+		   proto->max_header,
+		   proto->slab == NULL ? "no" : "yes",
+		   module_name(proto->owner),
+		   proto_method_implemented(proto->close),
+		   proto_method_implemented(proto->connect),
+		   proto_method_implemented(proto->disconnect),
+		   proto_method_implemented(proto->accept),
+		   proto_method_implemented(proto->ioctl),
+		   proto_method_implemented(proto->init),
+		   proto_method_implemented(proto->destroy),
+		   proto_method_implemented(proto->shutdown),
+		   proto_method_implemented(proto->setsockopt),
+		   proto_method_implemented(proto->getsockopt),
+		   proto_method_implemented(proto->sendmsg),
+		   proto_method_implemented(proto->recvmsg),
+		   proto_method_implemented(proto->sendpage),
+		   proto_method_implemented(proto->bind),
+		   proto_method_implemented(proto->backlog_rcv),
+		   proto_method_implemented(proto->hash),
+		   proto_method_implemented(proto->unhash),
+		   proto_method_implemented(proto->get_port),
+		   proto_method_implemented(proto->enter_memory_pressure));
+}
+
+static int proto_seq_show(struct seq_file *seq, void *v)
+{
+	if (v == SEQ_START_TOKEN)
+		seq_printf(seq, "%-9s %-4s %-8s %-6s %-5s %-7s %-4s %-10s %s",
+			   "protocol",
+			   "size",
+			   "sockets",
+			   "memory",
+			   "press",
+			   "maxhdr",
+			   "slab",
+			   "module",
+			   "cl co di ac io in de sh ss gs se re sp bi br ha uh gp em\n");
+	else
+		proto_seq_printf(seq, v);
+	return 0;
+}
+
+static struct seq_operations proto_seq_ops = {
+	.start  = proto_seq_start,
+	.next   = proto_seq_next,
+	.stop   = proto_seq_stop,
+	.show   = proto_seq_show,
+};
+
+static int proto_seq_open(struct inode *inode, struct file *file)
+{
+	return seq_open(file, &proto_seq_ops);
+}
+
+static struct file_operations proto_seq_fops = {
+	.owner		= THIS_MODULE,
+	.open		= proto_seq_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= seq_release,
+};
+
+static int __init proto_init(void)
+{
+	/* register /proc/net/protocols */
+	return proc_net_fops_create("protocols", S_IRUGO, &proto_seq_fops) == NULL ? -ENOBUFS : 0;
+}
+
+subsys_initcall(proto_init);
+
+#endif /* PROC_FS */
+
+EXPORT_SYMBOL(sk_alloc);
+EXPORT_SYMBOL(sk_free);
+EXPORT_SYMBOL(sk_send_sigurg);
+EXPORT_SYMBOL(sock_alloc_send_skb);
+EXPORT_SYMBOL(sock_init_data);
+EXPORT_SYMBOL(sock_kfree_s);
+EXPORT_SYMBOL(sock_kmalloc);
+EXPORT_SYMBOL(sock_no_accept);
+EXPORT_SYMBOL(sock_no_bind);
+EXPORT_SYMBOL(sock_no_connect);
+EXPORT_SYMBOL(sock_no_getname);
+EXPORT_SYMBOL(sock_no_getsockopt);
+EXPORT_SYMBOL(sock_no_ioctl);
+EXPORT_SYMBOL(sock_no_listen);
+EXPORT_SYMBOL(sock_no_mmap);
+EXPORT_SYMBOL(sock_no_poll);
+EXPORT_SYMBOL(sock_no_recvmsg);
+EXPORT_SYMBOL(sock_no_sendmsg);
+EXPORT_SYMBOL(sock_no_sendpage);
+EXPORT_SYMBOL(sock_no_setsockopt);
+EXPORT_SYMBOL(sock_no_shutdown);
+EXPORT_SYMBOL(sock_no_socketpair);
+EXPORT_SYMBOL(sock_rfree);
+EXPORT_SYMBOL(sock_setsockopt);
+EXPORT_SYMBOL(sock_wfree);
+EXPORT_SYMBOL(sock_wmalloc);
+EXPORT_SYMBOL(sock_i_uid);
+EXPORT_SYMBOL(sock_i_ino);
+#ifdef CONFIG_SYSCTL
+EXPORT_SYMBOL(sysctl_optmem_max);
+EXPORT_SYMBOL(sysctl_rmem_max);
+EXPORT_SYMBOL(sysctl_wmem_max);
+#endif
diff --git a/net/core/stream.c b/net/core/stream.c
new file mode 100644
index 00000000000..1e27a57b5a9
--- /dev/null
+++ b/net/core/stream.c
@@ -0,0 +1,287 @@
+/*
+ *     SUCS NET3:
+ *
+ *     Generic stream handling routines. These are generic for most
+ *     protocols. Even IP. Tonight 8-).
+ *     This is used because TCP, LLC (others too) layer all have mostly
+ *     identical sendmsg() and recvmsg() code.
+ *     So we (will) share it here.
+ *
+ *     Authors:        Arnaldo Carvalho de Melo <acme@conectiva.com.br>
+ *                     (from old tcp.c code)
+ *                     Alan Cox <alan@redhat.com> (Borrowed comments 8-))
+ */
+
+#include <linux/module.h>
+#include <linux/net.h>
+#include <linux/signal.h>
+#include <linux/tcp.h>
+#include <linux/wait.h>
+#include <net/sock.h>
+
+/**
+ * sk_stream_write_space - stream socket write_space callback.
+ * sk - socket
+ *
+ * FIXME: write proper description
+ */
+void sk_stream_write_space(struct sock *sk)
+{
+	struct socket *sock = sk->sk_socket;
+
+	if (sk_stream_wspace(sk) >= sk_stream_min_wspace(sk) && sock) {
+		clear_bit(SOCK_NOSPACE, &sock->flags);
+
+		if (sk->sk_sleep && waitqueue_active(sk->sk_sleep))
+			wake_up_interruptible(sk->sk_sleep);
+		if (sock->fasync_list && !(sk->sk_shutdown & SEND_SHUTDOWN))
+			sock_wake_async(sock, 2, POLL_OUT);
+	}
+}
+
+EXPORT_SYMBOL(sk_stream_write_space);
+
+/**
+ * sk_stream_wait_connect - Wait for a socket to get into the connected state
+ * @sk - sock to wait on
+ * @timeo_p - for how long to wait
+ *
+ * Must be called with the socket locked.
+ */
+int sk_stream_wait_connect(struct sock *sk, long *timeo_p)
+{
+	struct task_struct *tsk = current;
+	DEFINE_WAIT(wait);
+
+	while (1) {
+		if (sk->sk_err)
+			return sock_error(sk);
+		if ((1 << sk->sk_state) & ~(TCPF_SYN_SENT | TCPF_SYN_RECV))
+			return -EPIPE;
+		if (!*timeo_p)
+			return -EAGAIN;
+		if (signal_pending(tsk))
+			return sock_intr_errno(*timeo_p);
+
+		prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE);
+		sk->sk_write_pending++;
+		if (sk_wait_event(sk, timeo_p,
+				  !((1 << sk->sk_state) & 
+				    ~(TCPF_ESTABLISHED | TCPF_CLOSE_WAIT))))
+			break;
+		finish_wait(sk->sk_sleep, &wait);
+		sk->sk_write_pending--;
+	}
+	return 0;
+}
+
+EXPORT_SYMBOL(sk_stream_wait_connect);
+
+/**
+ * sk_stream_closing - Return 1 if we still have things to send in our buffers.
+ * @sk - socket to verify
+ */
+static inline int sk_stream_closing(struct sock *sk)
+{
+	return (1 << sk->sk_state) &
+	       (TCPF_FIN_WAIT1 | TCPF_CLOSING | TCPF_LAST_ACK);
+}
+
+void sk_stream_wait_close(struct sock *sk, long timeout)
+{
+	if (timeout) {
+		DEFINE_WAIT(wait);
+
+		do {
+			prepare_to_wait(sk->sk_sleep, &wait,
+					TASK_INTERRUPTIBLE);
+			if (sk_wait_event(sk, &timeout, !sk_stream_closing(sk)))
+				break;
+		} while (!signal_pending(current) && timeout);
+
+		finish_wait(sk->sk_sleep, &wait);
+	}
+}
+
+EXPORT_SYMBOL(sk_stream_wait_close);
+
+/**
+ * sk_stream_wait_memory - Wait for more memory for a socket
+ * @sk - socket to wait for memory
+ * @timeo_p - for how long
+ */
+int sk_stream_wait_memory(struct sock *sk, long *timeo_p)
+{
+	int err = 0;
+	long vm_wait = 0;
+	long current_timeo = *timeo_p;
+	DEFINE_WAIT(wait);
+
+	if (sk_stream_memory_free(sk))
+		current_timeo = vm_wait = (net_random() % (HZ / 5)) + 2;
+
+	while (1) {
+		set_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
+
+		prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE);
+
+		if (sk->sk_err || (sk->sk_shutdown & SEND_SHUTDOWN))
+			goto do_error;
+		if (!*timeo_p)
+			goto do_nonblock;
+		if (signal_pending(current))
+			goto do_interrupted;
+		clear_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
+		if (sk_stream_memory_free(sk) && !vm_wait)
+			break;
+
+		set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
+		sk->sk_write_pending++;
+		sk_wait_event(sk, &current_timeo, sk_stream_memory_free(sk) &&
+						  vm_wait);
+		sk->sk_write_pending--;
+
+		if (vm_wait) {
+			vm_wait -= current_timeo;
+			current_timeo = *timeo_p;
+			if (current_timeo != MAX_SCHEDULE_TIMEOUT &&
+			    (current_timeo -= vm_wait) < 0)
+				current_timeo = 0;
+			vm_wait = 0;
+		}
+		*timeo_p = current_timeo;
+	}
+out:
+	finish_wait(sk->sk_sleep, &wait);
+	return err;
+
+do_error:
+	err = -EPIPE;
+	goto out;
+do_nonblock:
+	err = -EAGAIN;
+	goto out;
+do_interrupted:
+	err = sock_intr_errno(*timeo_p);
+	goto out;
+}
+
+EXPORT_SYMBOL(sk_stream_wait_memory);
+
+void sk_stream_rfree(struct sk_buff *skb)
+{
+	struct sock *sk = skb->sk;
+
+	atomic_sub(skb->truesize, &sk->sk_rmem_alloc);
+	sk->sk_forward_alloc += skb->truesize;
+}
+
+EXPORT_SYMBOL(sk_stream_rfree);
+
+int sk_stream_error(struct sock *sk, int flags, int err)
+{
+	if (err == -EPIPE)
+		err = sock_error(sk) ? : -EPIPE;
+	if (err == -EPIPE && !(flags & MSG_NOSIGNAL))
+		send_sig(SIGPIPE, current, 0);
+	return err;
+}
+
+EXPORT_SYMBOL(sk_stream_error);
+
+void __sk_stream_mem_reclaim(struct sock *sk)
+{
+	if (sk->sk_forward_alloc >= SK_STREAM_MEM_QUANTUM) {
+		atomic_sub(sk->sk_forward_alloc / SK_STREAM_MEM_QUANTUM,
+			   sk->sk_prot->memory_allocated);
+		sk->sk_forward_alloc &= SK_STREAM_MEM_QUANTUM - 1;
+		if (*sk->sk_prot->memory_pressure &&
+		    (atomic_read(sk->sk_prot->memory_allocated) <
+		     sk->sk_prot->sysctl_mem[0]))
+			*sk->sk_prot->memory_pressure = 0;
+	}
+}
+
+EXPORT_SYMBOL(__sk_stream_mem_reclaim);
+
+int sk_stream_mem_schedule(struct sock *sk, int size, int kind)
+{
+	int amt = sk_stream_pages(size);
+
+	sk->sk_forward_alloc += amt * SK_STREAM_MEM_QUANTUM;
+	atomic_add(amt, sk->sk_prot->memory_allocated);
+
+	/* Under limit. */
+	if (atomic_read(sk->sk_prot->memory_allocated) < sk->sk_prot->sysctl_mem[0]) {
+		if (*sk->sk_prot->memory_pressure)
+			*sk->sk_prot->memory_pressure = 0;
+		return 1;
+	}
+
+	/* Over hard limit. */
+	if (atomic_read(sk->sk_prot->memory_allocated) > sk->sk_prot->sysctl_mem[2]) {
+		sk->sk_prot->enter_memory_pressure();
+		goto suppress_allocation;
+	}
+
+	/* Under pressure. */
+	if (atomic_read(sk->sk_prot->memory_allocated) > sk->sk_prot->sysctl_mem[1])
+		sk->sk_prot->enter_memory_pressure();
+
+	if (kind) {
+		if (atomic_read(&sk->sk_rmem_alloc) < sk->sk_prot->sysctl_rmem[0])
+			return 1;
+	} else if (sk->sk_wmem_queued < sk->sk_prot->sysctl_wmem[0])
+		return 1;
+
+	if (!*sk->sk_prot->memory_pressure ||
+	    sk->sk_prot->sysctl_mem[2] > atomic_read(sk->sk_prot->sockets_allocated) *
+				sk_stream_pages(sk->sk_wmem_queued +
+						atomic_read(&sk->sk_rmem_alloc) +
+						sk->sk_forward_alloc))
+		return 1;
+
+suppress_allocation:
+
+	if (!kind) {
+		sk_stream_moderate_sndbuf(sk);
+
+		/* Fail only if socket is _under_ its sndbuf.
+		 * In this case we cannot block, so that we have to fail.
+		 */
+		if (sk->sk_wmem_queued + size >= sk->sk_sndbuf)
+			return 1;
+	}
+
+	/* Alas. Undo changes. */
+	sk->sk_forward_alloc -= amt * SK_STREAM_MEM_QUANTUM;
+	atomic_sub(amt, sk->sk_prot->memory_allocated);
+	return 0;
+}
+
+EXPORT_SYMBOL(sk_stream_mem_schedule);
+
+void sk_stream_kill_queues(struct sock *sk)
+{
+	/* First the read buffer. */
+	__skb_queue_purge(&sk->sk_receive_queue);
+
+	/* Next, the error queue. */
+	__skb_queue_purge(&sk->sk_error_queue);
+
+	/* Next, the write queue. */
+	BUG_TRAP(skb_queue_empty(&sk->sk_write_queue));
+
+	/* Account for returned memory. */
+	sk_stream_mem_reclaim(sk);
+
+	BUG_TRAP(!sk->sk_wmem_queued);
+	BUG_TRAP(!sk->sk_forward_alloc);
+
+	/* It is _impossible_ for the backlog to contain anything
+	 * when we get here.  All user references to this socket
+	 * have gone away, only the net layer knows can touch it.
+	 */
+}
+
+EXPORT_SYMBOL(sk_stream_kill_queues);
diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c
new file mode 100644
index 00000000000..c8be646cb19
--- /dev/null
+++ b/net/core/sysctl_net_core.c
@@ -0,0 +1,182 @@
+/* -*- linux-c -*-
+ * sysctl_net_core.c: sysctl interface to net core subsystem.
+ *
+ * Begun April 1, 1996, Mike Shaver.
+ * Added /proc/sys/net/core directory entry (empty =) ). [MS]
+ */
+
+#include <linux/mm.h>
+#include <linux/sysctl.h>
+#include <linux/config.h>
+#include <linux/module.h>
+
+#ifdef CONFIG_SYSCTL
+
+extern int netdev_max_backlog;
+extern int weight_p;
+extern int no_cong_thresh;
+extern int no_cong;
+extern int lo_cong;
+extern int mod_cong;
+extern int netdev_fastroute;
+extern int net_msg_cost;
+extern int net_msg_burst;
+
+extern __u32 sysctl_wmem_max;
+extern __u32 sysctl_rmem_max;
+extern __u32 sysctl_wmem_default;
+extern __u32 sysctl_rmem_default;
+
+extern int sysctl_core_destroy_delay;
+extern int sysctl_optmem_max;
+extern int sysctl_somaxconn;
+
+#ifdef CONFIG_NET_DIVERT
+extern char sysctl_divert_version[];
+#endif /* CONFIG_NET_DIVERT */
+
+/*
+ * This strdup() is used for creating copies of network 
+ * device names to be handed over to sysctl.
+ */
+ 
+char *net_sysctl_strdup(const char *s)
+{
+	char *rv = kmalloc(strlen(s)+1, GFP_KERNEL);
+	if (rv)
+		strcpy(rv, s);
+	return rv;
+}
+
+ctl_table core_table[] = {
+#ifdef CONFIG_NET
+	{
+		.ctl_name	= NET_CORE_WMEM_MAX,
+		.procname	= "wmem_max",
+		.data		= &sysctl_wmem_max,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec
+	},
+	{
+		.ctl_name	= NET_CORE_RMEM_MAX,
+		.procname	= "rmem_max",
+		.data		= &sysctl_rmem_max,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec
+	},
+	{
+		.ctl_name	= NET_CORE_WMEM_DEFAULT,
+		.procname	= "wmem_default",
+		.data		= &sysctl_wmem_default,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec
+	},
+	{
+		.ctl_name	= NET_CORE_RMEM_DEFAULT,
+		.procname	= "rmem_default",
+		.data		= &sysctl_rmem_default,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec
+	},
+	{
+		.ctl_name	= NET_CORE_DEV_WEIGHT,
+		.procname	= "dev_weight",
+		.data		= &weight_p,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec
+	},
+	{
+		.ctl_name	= NET_CORE_MAX_BACKLOG,
+		.procname	= "netdev_max_backlog",
+		.data		= &netdev_max_backlog,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec
+	},
+	{
+		.ctl_name	= NET_CORE_NO_CONG_THRESH,
+		.procname	= "no_cong_thresh",
+		.data		= &no_cong_thresh,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec
+	},
+	{
+		.ctl_name	= NET_CORE_NO_CONG,
+		.procname	= "no_cong",
+		.data		= &no_cong,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec
+	},
+	{
+		.ctl_name	= NET_CORE_LO_CONG,
+		.procname	= "lo_cong",
+		.data		= &lo_cong,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec
+	},
+	{
+		.ctl_name	= NET_CORE_MOD_CONG,
+		.procname	= "mod_cong",
+		.data		= &mod_cong,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec
+	},
+	{
+		.ctl_name	= NET_CORE_MSG_COST,
+		.procname	= "message_cost",
+		.data		= &net_msg_cost,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec_jiffies,
+		.strategy	= &sysctl_jiffies,
+	},
+	{
+		.ctl_name	= NET_CORE_MSG_BURST,
+		.procname	= "message_burst",
+		.data		= &net_msg_burst,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec,
+	},
+	{
+		.ctl_name	= NET_CORE_OPTMEM_MAX,
+		.procname	= "optmem_max",
+		.data		= &sysctl_optmem_max,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec
+	},
+#ifdef CONFIG_NET_DIVERT
+	{
+		.ctl_name	= NET_CORE_DIVERT_VERSION,
+		.procname	= "divert_version",
+		.data		= (void *)sysctl_divert_version,
+		.maxlen		= 32,
+		.mode		= 0444,
+		.proc_handler	= &proc_dostring
+	},
+#endif /* CONFIG_NET_DIVERT */
+#endif /* CONFIG_NET */
+	{
+		.ctl_name	= NET_CORE_SOMAXCONN,
+		.procname	= "somaxconn",
+		.data		= &sysctl_somaxconn,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec
+	},
+	{ .ctl_name = 0 }
+};
+
+EXPORT_SYMBOL(net_sysctl_strdup);
+
+#endif
diff --git a/net/core/utils.c b/net/core/utils.c
new file mode 100644
index 00000000000..e11a8654f36
--- /dev/null
+++ b/net/core/utils.c
@@ -0,0 +1,155 @@
+/*
+ *	Generic address resultion entity
+ *
+ *	Authors:
+ *	net_random Alan Cox
+ *	net_ratelimit Andy Kleen
+ *
+ *	Created by Alexey Kuznetsov <kuznet@ms2.inr.ac.ru>
+ *
+ *	This program is free software; you can redistribute it and/or
+ *      modify it under the terms of the GNU General Public License
+ *      as published by the Free Software Foundation; either version
+ *      2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/module.h>
+#include <linux/jiffies.h>
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/string.h>
+#include <linux/types.h>
+#include <linux/random.h>
+#include <linux/percpu.h>
+#include <linux/init.h>
+
+#include <asm/system.h>
+#include <asm/uaccess.h>
+
+
+/*
+  This is a maximally equidistributed combined Tausworthe generator
+  based on code from GNU Scientific Library 1.5 (30 Jun 2004)
+
+   x_n = (s1_n ^ s2_n ^ s3_n) 
+
+   s1_{n+1} = (((s1_n & 4294967294) <<12) ^ (((s1_n <<13) ^ s1_n) >>19))
+   s2_{n+1} = (((s2_n & 4294967288) << 4) ^ (((s2_n << 2) ^ s2_n) >>25))
+   s3_{n+1} = (((s3_n & 4294967280) <<17) ^ (((s3_n << 3) ^ s3_n) >>11))
+
+   The period of this generator is about 2^88.
+
+   From: P. L'Ecuyer, "Maximally Equidistributed Combined Tausworthe
+   Generators", Mathematics of Computation, 65, 213 (1996), 203--213.
+
+   This is available on the net from L'Ecuyer's home page,
+
+   http://www.iro.umontreal.ca/~lecuyer/myftp/papers/tausme.ps
+   ftp://ftp.iro.umontreal.ca/pub/simulation/lecuyer/papers/tausme.ps 
+
+   There is an erratum in the paper "Tables of Maximally
+   Equidistributed Combined LFSR Generators", Mathematics of
+   Computation, 68, 225 (1999), 261--269:
+   http://www.iro.umontreal.ca/~lecuyer/myftp/papers/tausme2.ps
+
+        ... the k_j most significant bits of z_j must be non-
+        zero, for each j. (Note: this restriction also applies to the 
+        computer code given in [4], but was mistakenly not mentioned in
+        that paper.)
+   
+   This affects the seeding procedure by imposing the requirement
+   s1 > 1, s2 > 7, s3 > 15.
+
+*/
+struct nrnd_state {
+	u32 s1, s2, s3;
+};
+
+static DEFINE_PER_CPU(struct nrnd_state, net_rand_state);
+
+static u32 __net_random(struct nrnd_state *state)
+{
+#define TAUSWORTHE(s,a,b,c,d) ((s&c)<<d) ^ (((s <<a) ^ s)>>b)
+
+	state->s1 = TAUSWORTHE(state->s1, 13, 19, 4294967294UL, 12);
+	state->s2 = TAUSWORTHE(state->s2, 2, 25, 4294967288UL, 4);
+	state->s3 = TAUSWORTHE(state->s3, 3, 11, 4294967280UL, 17);
+
+	return (state->s1 ^ state->s2 ^ state->s3);
+}
+
+static void __net_srandom(struct nrnd_state *state, unsigned long s)
+{
+	if (s == 0)
+		s = 1;      /* default seed is 1 */
+
+#define LCG(n) (69069 * n)
+	state->s1 = LCG(s);
+	state->s2 = LCG(state->s1);
+	state->s3 = LCG(state->s2);
+
+	/* "warm it up" */
+	__net_random(state);
+	__net_random(state);
+	__net_random(state);
+	__net_random(state);
+	__net_random(state);
+	__net_random(state);
+}
+
+
+unsigned long net_random(void)
+{
+	unsigned long r;
+	struct nrnd_state *state = &get_cpu_var(net_rand_state);
+	r = __net_random(state);
+	put_cpu_var(state);
+	return r;
+}
+
+
+void net_srandom(unsigned long entropy)
+{
+	struct nrnd_state *state = &get_cpu_var(net_rand_state);
+	__net_srandom(state, state->s1^entropy);
+	put_cpu_var(state);
+}
+
+void __init net_random_init(void)
+{
+	int i;
+
+	for (i = 0; i < NR_CPUS; i++) {
+		struct nrnd_state *state = &per_cpu(net_rand_state,i);
+		__net_srandom(state, i+jiffies);
+	}
+}
+
+static int net_random_reseed(void)
+{
+	int i;
+	unsigned long seed[NR_CPUS];
+
+	get_random_bytes(seed, sizeof(seed));
+	for (i = 0; i < NR_CPUS; i++) {
+		struct nrnd_state *state = &per_cpu(net_rand_state,i);
+		__net_srandom(state, seed[i]);
+	}
+	return 0;
+}
+late_initcall(net_random_reseed);
+
+int net_msg_cost = 5*HZ;
+int net_msg_burst = 10;
+
+/* 
+ * All net warning printk()s should be guarded by this function.
+ */ 
+int net_ratelimit(void)
+{
+	return __printk_ratelimit(net_msg_cost, net_msg_burst);
+}
+
+EXPORT_SYMBOL(net_random);
+EXPORT_SYMBOL(net_ratelimit);
+EXPORT_SYMBOL(net_srandom);
diff --git a/net/core/wireless.c b/net/core/wireless.c
new file mode 100644
index 00000000000..750cc5daeb0
--- /dev/null
+++ b/net/core/wireless.c
@@ -0,0 +1,1459 @@
+/*
+ * This file implement the Wireless Extensions APIs.
+ *
+ * Authors :	Jean Tourrilhes - HPL - <jt@hpl.hp.com>
+ * Copyright (c) 1997-2004 Jean Tourrilhes, All Rights Reserved.
+ *
+ * (As all part of the Linux kernel, this file is GPL)
+ */
+
+/************************** DOCUMENTATION **************************/
+/*
+ * API definition :
+ * --------------
+ * See <linux/wireless.h> for details of the APIs and the rest.
+ *
+ * History :
+ * -------
+ *
+ * v1 - 5.12.01 - Jean II
+ *	o Created this file.
+ *
+ * v2 - 13.12.01 - Jean II
+ *	o Move /proc/net/wireless stuff from net/core/dev.c to here
+ *	o Make Wireless Extension IOCTLs go through here
+ *	o Added iw_handler handling ;-)
+ *	o Added standard ioctl description
+ *	o Initial dumb commit strategy based on orinoco.c
+ *
+ * v3 - 19.12.01 - Jean II
+ *	o Make sure we don't go out of standard_ioctl[] in ioctl_standard_call
+ *	o Add event dispatcher function
+ *	o Add event description
+ *	o Propagate events as rtnetlink IFLA_WIRELESS option
+ *	o Generate event on selected SET requests
+ *
+ * v4 - 18.04.02 - Jean II
+ *	o Fix stupid off by one in iw_ioctl_description : IW_ESSID_MAX_SIZE + 1
+ *
+ * v5 - 21.06.02 - Jean II
+ *	o Add IW_PRIV_TYPE_ADDR in priv_type_size (+cleanup)
+ *	o Reshuffle IW_HEADER_TYPE_XXX to map IW_PRIV_TYPE_XXX changes
+ *	o Add IWEVCUSTOM for driver specific event/scanning token
+ *	o Turn on WE_STRICT_WRITE by default + kernel warning
+ *	o Fix WE_STRICT_WRITE in ioctl_export_private() (32 => iw_num)
+ *	o Fix off-by-one in test (extra_size <= IFNAMSIZ)
+ *
+ * v6 - 9.01.03 - Jean II
+ *	o Add common spy support : iw_handler_set_spy(), wireless_spy_update()
+ *	o Add enhanced spy support : iw_handler_set_thrspy() and event.
+ *	o Add WIRELESS_EXT version display in /proc/net/wireless
+ *
+ * v6 - 18.06.04 - Jean II
+ *	o Change get_spydata() method for added safety
+ *	o Remove spy #ifdef, they are always on -> cleaner code
+ *	o Allow any size GET request if user specifies length > max
+ *		and if request has IW_DESCR_FLAG_NOMAX flag or is SIOCGIWPRIV
+ *	o Start migrating get_wireless_stats to struct iw_handler_def
+ *	o Add wmb() in iw_handler_set_spy() for non-coherent archs/cpus
+ * Based on patch from Pavel Roskin <proski@gnu.org> :
+ *	o Fix kernel data leak to user space in private handler handling
+ */
+
+/***************************** INCLUDES *****************************/
+
+#include <linux/config.h>		/* Not needed ??? */
+#include <linux/module.h>
+#include <linux/types.h>		/* off_t */
+#include <linux/netdevice.h>		/* struct ifreq, dev_get_by_name() */
+#include <linux/proc_fs.h>
+#include <linux/rtnetlink.h>		/* rtnetlink stuff */
+#include <linux/seq_file.h>
+#include <linux/init.h>			/* for __init */
+#include <linux/if_arp.h>		/* ARPHRD_ETHER */
+
+#include <linux/wireless.h>		/* Pretty obvious */
+#include <net/iw_handler.h>		/* New driver API */
+
+#include <asm/uaccess.h>		/* copy_to_user() */
+
+/**************************** CONSTANTS ****************************/
+
+/* Debugging stuff */
+#undef WE_IOCTL_DEBUG		/* Debug IOCTL API */
+#undef WE_EVENT_DEBUG		/* Debug Event dispatcher */
+#undef WE_SPY_DEBUG		/* Debug enhanced spy support */
+
+/* Options */
+#define WE_EVENT_NETLINK	/* Propagate events using rtnetlink */
+#define WE_SET_EVENT		/* Generate an event on some set commands */
+
+/************************* GLOBAL VARIABLES *************************/
+/*
+ * You should not use global variables, because of re-entrancy.
+ * On our case, it's only const, so it's OK...
+ */
+/*
+ * Meta-data about all the standard Wireless Extension request we
+ * know about.
+ */
+static const struct iw_ioctl_description standard_ioctl[] = {
+	[SIOCSIWCOMMIT	- SIOCIWFIRST] = {
+		.header_type	= IW_HEADER_TYPE_NULL,
+	},
+	[SIOCGIWNAME	- SIOCIWFIRST] = {
+		.header_type	= IW_HEADER_TYPE_CHAR,
+		.flags		= IW_DESCR_FLAG_DUMP,
+	},
+	[SIOCSIWNWID	- SIOCIWFIRST] = {
+		.header_type	= IW_HEADER_TYPE_PARAM,
+		.flags		= IW_DESCR_FLAG_EVENT,
+	},
+	[SIOCGIWNWID	- SIOCIWFIRST] = {
+		.header_type	= IW_HEADER_TYPE_PARAM,
+		.flags		= IW_DESCR_FLAG_DUMP,
+	},
+	[SIOCSIWFREQ	- SIOCIWFIRST] = {
+		.header_type	= IW_HEADER_TYPE_FREQ,
+		.flags		= IW_DESCR_FLAG_EVENT,
+	},
+	[SIOCGIWFREQ	- SIOCIWFIRST] = {
+		.header_type	= IW_HEADER_TYPE_FREQ,
+		.flags		= IW_DESCR_FLAG_DUMP,
+	},
+	[SIOCSIWMODE	- SIOCIWFIRST] = {
+		.header_type	= IW_HEADER_TYPE_UINT,
+		.flags		= IW_DESCR_FLAG_EVENT,
+	},
+	[SIOCGIWMODE	- SIOCIWFIRST] = {
+		.header_type	= IW_HEADER_TYPE_UINT,
+		.flags		= IW_DESCR_FLAG_DUMP,
+	},
+	[SIOCSIWSENS	- SIOCIWFIRST] = {
+		.header_type	= IW_HEADER_TYPE_PARAM,
+	},
+	[SIOCGIWSENS	- SIOCIWFIRST] = {
+		.header_type	= IW_HEADER_TYPE_PARAM,
+	},
+	[SIOCSIWRANGE	- SIOCIWFIRST] = {
+		.header_type	= IW_HEADER_TYPE_NULL,
+	},
+	[SIOCGIWRANGE	- SIOCIWFIRST] = {
+		.header_type	= IW_HEADER_TYPE_POINT,
+		.token_size	= 1,
+		.max_tokens	= sizeof(struct iw_range),
+		.flags		= IW_DESCR_FLAG_DUMP,
+	},
+	[SIOCSIWPRIV	- SIOCIWFIRST] = {
+		.header_type	= IW_HEADER_TYPE_NULL,
+	},
+	[SIOCGIWPRIV	- SIOCIWFIRST] = { /* (handled directly by us) */
+		.header_type	= IW_HEADER_TYPE_NULL,
+	},
+	[SIOCSIWSTATS	- SIOCIWFIRST] = {
+		.header_type	= IW_HEADER_TYPE_NULL,
+	},
+	[SIOCGIWSTATS	- SIOCIWFIRST] = { /* (handled directly by us) */
+		.header_type	= IW_HEADER_TYPE_NULL,
+		.flags		= IW_DESCR_FLAG_DUMP,
+	},
+	[SIOCSIWSPY	- SIOCIWFIRST] = {
+		.header_type	= IW_HEADER_TYPE_POINT,
+		.token_size	= sizeof(struct sockaddr),
+		.max_tokens	= IW_MAX_SPY,
+	},
+	[SIOCGIWSPY	- SIOCIWFIRST] = {
+		.header_type	= IW_HEADER_TYPE_POINT,
+		.token_size	= sizeof(struct sockaddr) +
+				  sizeof(struct iw_quality),
+		.max_tokens	= IW_MAX_SPY,
+	},
+	[SIOCSIWTHRSPY	- SIOCIWFIRST] = {
+		.header_type	= IW_HEADER_TYPE_POINT,
+		.token_size	= sizeof(struct iw_thrspy),
+		.min_tokens	= 1,
+		.max_tokens	= 1,
+	},
+	[SIOCGIWTHRSPY	- SIOCIWFIRST] = {
+		.header_type	= IW_HEADER_TYPE_POINT,
+		.token_size	= sizeof(struct iw_thrspy),
+		.min_tokens	= 1,
+		.max_tokens	= 1,
+	},
+	[SIOCSIWAP	- SIOCIWFIRST] = {
+		.header_type	= IW_HEADER_TYPE_ADDR,
+	},
+	[SIOCGIWAP	- SIOCIWFIRST] = {
+		.header_type	= IW_HEADER_TYPE_ADDR,
+		.flags		= IW_DESCR_FLAG_DUMP,
+	},
+	[SIOCGIWAPLIST	- SIOCIWFIRST] = {
+		.header_type	= IW_HEADER_TYPE_POINT,
+		.token_size	= sizeof(struct sockaddr) +
+				  sizeof(struct iw_quality),
+		.max_tokens	= IW_MAX_AP,
+		.flags		= IW_DESCR_FLAG_NOMAX,
+	},
+	[SIOCSIWSCAN	- SIOCIWFIRST] = {
+		.header_type	= IW_HEADER_TYPE_PARAM,
+	},
+	[SIOCGIWSCAN	- SIOCIWFIRST] = {
+		.header_type	= IW_HEADER_TYPE_POINT,
+		.token_size	= 1,
+		.max_tokens	= IW_SCAN_MAX_DATA,
+		.flags		= IW_DESCR_FLAG_NOMAX,
+	},
+	[SIOCSIWESSID	- SIOCIWFIRST] = {
+		.header_type	= IW_HEADER_TYPE_POINT,
+		.token_size	= 1,
+		.max_tokens	= IW_ESSID_MAX_SIZE + 1,
+		.flags		= IW_DESCR_FLAG_EVENT,
+	},
+	[SIOCGIWESSID	- SIOCIWFIRST] = {
+		.header_type	= IW_HEADER_TYPE_POINT,
+		.token_size	= 1,
+		.max_tokens	= IW_ESSID_MAX_SIZE + 1,
+		.flags		= IW_DESCR_FLAG_DUMP,
+	},
+	[SIOCSIWNICKN	- SIOCIWFIRST] = {
+		.header_type	= IW_HEADER_TYPE_POINT,
+		.token_size	= 1,
+		.max_tokens	= IW_ESSID_MAX_SIZE + 1,
+	},
+	[SIOCGIWNICKN	- SIOCIWFIRST] = {
+		.header_type	= IW_HEADER_TYPE_POINT,
+		.token_size	= 1,
+		.max_tokens	= IW_ESSID_MAX_SIZE + 1,
+	},
+	[SIOCSIWRATE	- SIOCIWFIRST] = {
+		.header_type	= IW_HEADER_TYPE_PARAM,
+	},
+	[SIOCGIWRATE	- SIOCIWFIRST] = {
+		.header_type	= IW_HEADER_TYPE_PARAM,
+	},
+	[SIOCSIWRTS	- SIOCIWFIRST] = {
+		.header_type	= IW_HEADER_TYPE_PARAM,
+	},
+	[SIOCGIWRTS	- SIOCIWFIRST] = {
+		.header_type	= IW_HEADER_TYPE_PARAM,
+	},
+	[SIOCSIWFRAG	- SIOCIWFIRST] = {
+		.header_type	= IW_HEADER_TYPE_PARAM,
+	},
+	[SIOCGIWFRAG	- SIOCIWFIRST] = {
+		.header_type	= IW_HEADER_TYPE_PARAM,
+	},
+	[SIOCSIWTXPOW	- SIOCIWFIRST] = {
+		.header_type	= IW_HEADER_TYPE_PARAM,
+	},
+	[SIOCGIWTXPOW	- SIOCIWFIRST] = {
+		.header_type	= IW_HEADER_TYPE_PARAM,
+	},
+	[SIOCSIWRETRY	- SIOCIWFIRST] = {
+		.header_type	= IW_HEADER_TYPE_PARAM,
+	},
+	[SIOCGIWRETRY	- SIOCIWFIRST] = {
+		.header_type	= IW_HEADER_TYPE_PARAM,
+	},
+	[SIOCSIWENCODE	- SIOCIWFIRST] = {
+		.header_type	= IW_HEADER_TYPE_POINT,
+		.token_size	= 1,
+		.max_tokens	= IW_ENCODING_TOKEN_MAX,
+		.flags		= IW_DESCR_FLAG_EVENT | IW_DESCR_FLAG_RESTRICT,
+	},
+	[SIOCGIWENCODE	- SIOCIWFIRST] = {
+		.header_type	= IW_HEADER_TYPE_POINT,
+		.token_size	= 1,
+		.max_tokens	= IW_ENCODING_TOKEN_MAX,
+		.flags		= IW_DESCR_FLAG_DUMP | IW_DESCR_FLAG_RESTRICT,
+	},
+	[SIOCSIWPOWER	- SIOCIWFIRST] = {
+		.header_type	= IW_HEADER_TYPE_PARAM,
+	},
+	[SIOCGIWPOWER	- SIOCIWFIRST] = {
+		.header_type	= IW_HEADER_TYPE_PARAM,
+	},
+};
+static const int standard_ioctl_num = (sizeof(standard_ioctl) /
+				       sizeof(struct iw_ioctl_description));
+
+/*
+ * Meta-data about all the additional standard Wireless Extension events
+ * we know about.
+ */
+static const struct iw_ioctl_description standard_event[] = {
+	[IWEVTXDROP	- IWEVFIRST] = {
+		.header_type	= IW_HEADER_TYPE_ADDR,
+	},
+	[IWEVQUAL	- IWEVFIRST] = {
+		.header_type	= IW_HEADER_TYPE_QUAL,
+	},
+	[IWEVCUSTOM	- IWEVFIRST] = {
+		.header_type	= IW_HEADER_TYPE_POINT,
+		.token_size	= 1,
+		.max_tokens	= IW_CUSTOM_MAX,
+	},
+	[IWEVREGISTERED	- IWEVFIRST] = {
+		.header_type	= IW_HEADER_TYPE_ADDR,
+	},
+	[IWEVEXPIRED	- IWEVFIRST] = {
+		.header_type	= IW_HEADER_TYPE_ADDR, 
+	},
+};
+static const int standard_event_num = (sizeof(standard_event) /
+				       sizeof(struct iw_ioctl_description));
+
+/* Size (in bytes) of the various private data types */
+static const char iw_priv_type_size[] = {
+	0,				/* IW_PRIV_TYPE_NONE */
+	1,				/* IW_PRIV_TYPE_BYTE */
+	1,				/* IW_PRIV_TYPE_CHAR */
+	0,				/* Not defined */
+	sizeof(__u32),			/* IW_PRIV_TYPE_INT */
+	sizeof(struct iw_freq),		/* IW_PRIV_TYPE_FLOAT */
+	sizeof(struct sockaddr),	/* IW_PRIV_TYPE_ADDR */
+	0,				/* Not defined */
+};
+
+/* Size (in bytes) of various events */
+static const int event_type_size[] = {
+	IW_EV_LCP_LEN,			/* IW_HEADER_TYPE_NULL */
+	0,
+	IW_EV_CHAR_LEN,			/* IW_HEADER_TYPE_CHAR */
+	0,
+	IW_EV_UINT_LEN,			/* IW_HEADER_TYPE_UINT */
+	IW_EV_FREQ_LEN,			/* IW_HEADER_TYPE_FREQ */
+	IW_EV_ADDR_LEN,			/* IW_HEADER_TYPE_ADDR */
+	0,
+	IW_EV_POINT_LEN,		/* Without variable payload */
+	IW_EV_PARAM_LEN,		/* IW_HEADER_TYPE_PARAM */
+	IW_EV_QUAL_LEN,			/* IW_HEADER_TYPE_QUAL */
+};
+
+/************************ COMMON SUBROUTINES ************************/
+/*
+ * Stuff that may be used in various place or doesn't fit in one
+ * of the section below.
+ */
+
+/* ---------------------------------------------------------------- */
+/*
+ * Return the driver handler associated with a specific Wireless Extension.
+ * Called from various place, so make sure it remains efficient.
+ */
+static inline iw_handler get_handler(struct net_device *dev,
+				     unsigned int cmd)
+{
+	/* Don't "optimise" the following variable, it will crash */
+	unsigned int	index;		/* *MUST* be unsigned */
+
+	/* Check if we have some wireless handlers defined */
+	if(dev->wireless_handlers == NULL)
+		return NULL;
+
+	/* Try as a standard command */
+	index = cmd - SIOCIWFIRST;
+	if(index < dev->wireless_handlers->num_standard)
+		return dev->wireless_handlers->standard[index];
+
+	/* Try as a private command */
+	index = cmd - SIOCIWFIRSTPRIV;
+	if(index < dev->wireless_handlers->num_private)
+		return dev->wireless_handlers->private[index];
+
+	/* Not found */
+	return NULL;
+}
+
+/* ---------------------------------------------------------------- */
+/*
+ * Get statistics out of the driver
+ */
+static inline struct iw_statistics *get_wireless_stats(struct net_device *dev)
+{
+	/* New location */
+	if((dev->wireless_handlers != NULL) &&
+	   (dev->wireless_handlers->get_wireless_stats != NULL))
+		return dev->wireless_handlers->get_wireless_stats(dev);
+
+	/* Old location, will be phased out in next WE */
+	return (dev->get_wireless_stats ?
+		dev->get_wireless_stats(dev) :
+		(struct iw_statistics *) NULL);
+}
+
+/* ---------------------------------------------------------------- */
+/*
+ * Call the commit handler in the driver
+ * (if exist and if conditions are right)
+ *
+ * Note : our current commit strategy is currently pretty dumb,
+ * but we will be able to improve on that...
+ * The goal is to try to agreagate as many changes as possible
+ * before doing the commit. Drivers that will define a commit handler
+ * are usually those that need a reset after changing parameters, so
+ * we want to minimise the number of reset.
+ * A cool idea is to use a timer : at each "set" command, we re-set the
+ * timer, when the timer eventually fires, we call the driver.
+ * Hopefully, more on that later.
+ *
+ * Also, I'm waiting to see how many people will complain about the
+ * netif_running(dev) test. I'm open on that one...
+ * Hopefully, the driver will remember to do a commit in "open()" ;-)
+ */
+static inline int call_commit_handler(struct net_device *	dev)
+{
+	if((netif_running(dev)) &&
+	   (dev->wireless_handlers->standard[0] != NULL)) {
+		/* Call the commit handler on the driver */
+		return dev->wireless_handlers->standard[0](dev, NULL,
+							   NULL, NULL);
+	} else
+		return 0;		/* Command completed successfully */
+}
+
+/* ---------------------------------------------------------------- */
+/*
+ * Calculate size of private arguments
+ */
+static inline int get_priv_size(__u16	args)
+{
+	int	num = args & IW_PRIV_SIZE_MASK;
+	int	type = (args & IW_PRIV_TYPE_MASK) >> 12;
+
+	return num * iw_priv_type_size[type];
+}
+
+/* ---------------------------------------------------------------- */
+/*
+ * Re-calculate the size of private arguments
+ */
+static inline int adjust_priv_size(__u16		args,
+				   union iwreq_data *	wrqu)
+{
+	int	num = wrqu->data.length;
+	int	max = args & IW_PRIV_SIZE_MASK;
+	int	type = (args & IW_PRIV_TYPE_MASK) >> 12;
+
+	/* Make sure the driver doesn't goof up */
+	if (max < num)
+		num = max;
+
+	return num * iw_priv_type_size[type];
+}
+
+
+/******************** /proc/net/wireless SUPPORT ********************/
+/*
+ * The /proc/net/wireless file is a human readable user-space interface
+ * exporting various wireless specific statistics from the wireless devices.
+ * This is the most popular part of the Wireless Extensions ;-)
+ *
+ * This interface is a pure clone of /proc/net/dev (in net/core/dev.c).
+ * The content of the file is basically the content of "struct iw_statistics".
+ */
+
+#ifdef CONFIG_PROC_FS
+
+/* ---------------------------------------------------------------- */
+/*
+ * Print one entry (line) of /proc/net/wireless
+ */
+static __inline__ void wireless_seq_printf_stats(struct seq_file *seq,
+						 struct net_device *dev)
+{
+	/* Get stats from the driver */
+	struct iw_statistics *stats = get_wireless_stats(dev);
+
+	if (stats) {
+		seq_printf(seq, "%6s: %04x  %3d%c  %3d%c  %3d%c  %6d %6d %6d "
+				"%6d %6d   %6d\n",
+			   dev->name, stats->status, stats->qual.qual,
+			   stats->qual.updated & IW_QUAL_QUAL_UPDATED
+			   ? '.' : ' ',
+			   ((__u8) stats->qual.level),
+			   stats->qual.updated & IW_QUAL_LEVEL_UPDATED
+			   ? '.' : ' ',
+			   ((__u8) stats->qual.noise),
+			   stats->qual.updated & IW_QUAL_NOISE_UPDATED
+			   ? '.' : ' ',
+			   stats->discard.nwid, stats->discard.code,
+			   stats->discard.fragment, stats->discard.retries,
+			   stats->discard.misc, stats->miss.beacon);
+		stats->qual.updated = 0;
+	}
+}
+
+/* ---------------------------------------------------------------- */
+/*
+ * Print info for /proc/net/wireless (print all entries)
+ */
+static int wireless_seq_show(struct seq_file *seq, void *v)
+{
+	if (v == SEQ_START_TOKEN)
+		seq_printf(seq, "Inter-| sta-|   Quality        |   Discarded "
+				"packets               | Missed | WE\n"
+				" face | tus | link level noise |  nwid  "
+				"crypt   frag  retry   misc | beacon | %d\n",
+			   WIRELESS_EXT);
+	else
+		wireless_seq_printf_stats(seq, v);
+	return 0;
+}
+
+extern void *dev_seq_start(struct seq_file *seq, loff_t *pos);
+extern void *dev_seq_next(struct seq_file *seq, void *v, loff_t *pos);
+extern void dev_seq_stop(struct seq_file *seq, void *v);
+
+static struct seq_operations wireless_seq_ops = {
+	.start = dev_seq_start,
+	.next  = dev_seq_next,
+	.stop  = dev_seq_stop,
+	.show  = wireless_seq_show,
+};
+
+static int wireless_seq_open(struct inode *inode, struct file *file)
+{
+	return seq_open(file, &wireless_seq_ops);
+}
+
+static struct file_operations wireless_seq_fops = {
+	.owner	 = THIS_MODULE,
+	.open    = wireless_seq_open,
+	.read    = seq_read,
+	.llseek  = seq_lseek,
+	.release = seq_release,
+};
+
+int __init wireless_proc_init(void)
+{
+	if (!proc_net_fops_create("wireless", S_IRUGO, &wireless_seq_fops))
+		return -ENOMEM;
+
+	return 0;
+}
+#endif	/* CONFIG_PROC_FS */
+
+/************************** IOCTL SUPPORT **************************/
+/*
+ * The original user space API to configure all those Wireless Extensions
+ * is through IOCTLs.
+ * In there, we check if we need to call the new driver API (iw_handler)
+ * or just call the driver ioctl handler.
+ */
+
+/* ---------------------------------------------------------------- */
+/*
+ *	Allow programatic access to /proc/net/wireless even if /proc
+ *	doesn't exist... Also more efficient...
+ */
+static inline int dev_iwstats(struct net_device *dev, struct ifreq *ifr)
+{
+	/* Get stats from the driver */
+	struct iw_statistics *stats;
+
+	stats = get_wireless_stats(dev);
+	if (stats != (struct iw_statistics *) NULL) {
+		struct iwreq *	wrq = (struct iwreq *)ifr;
+
+		/* Copy statistics to the user buffer */
+		if(copy_to_user(wrq->u.data.pointer, stats,
+				sizeof(struct iw_statistics)))
+			return -EFAULT;
+
+		/* Check if we need to clear the update flag */
+		if(wrq->u.data.flags != 0)
+			stats->qual.updated = 0;
+		return 0;
+	} else
+		return -EOPNOTSUPP;
+}
+
+/* ---------------------------------------------------------------- */
+/*
+ * Export the driver private handler definition
+ * They will be picked up by tools like iwpriv...
+ */
+static inline int ioctl_export_private(struct net_device *	dev,
+				       struct ifreq *		ifr)
+{
+	struct iwreq *				iwr = (struct iwreq *) ifr;
+
+	/* Check if the driver has something to export */
+	if((dev->wireless_handlers->num_private_args == 0) ||
+	   (dev->wireless_handlers->private_args == NULL))
+		return -EOPNOTSUPP;
+
+	/* Check NULL pointer */
+	if(iwr->u.data.pointer == NULL)
+		return -EFAULT;
+
+	/* Check if there is enough buffer up there */
+	if(iwr->u.data.length < dev->wireless_handlers->num_private_args) {
+		/* User space can't know in advance how large the buffer
+		 * needs to be. Give it a hint, so that we can support
+		 * any size buffer we want somewhat efficiently... */
+		iwr->u.data.length = dev->wireless_handlers->num_private_args;
+		return -E2BIG;
+	}
+
+	/* Set the number of available ioctls. */
+	iwr->u.data.length = dev->wireless_handlers->num_private_args;
+
+	/* Copy structure to the user buffer. */
+	if (copy_to_user(iwr->u.data.pointer,
+			 dev->wireless_handlers->private_args,
+			 sizeof(struct iw_priv_args) * iwr->u.data.length))
+		return -EFAULT;
+
+	return 0;
+}
+
+/* ---------------------------------------------------------------- */
+/*
+ * Wrapper to call a standard Wireless Extension handler.
+ * We do various checks and also take care of moving data between
+ * user space and kernel space.
+ */
+static inline int ioctl_standard_call(struct net_device *	dev,
+				      struct ifreq *		ifr,
+				      unsigned int		cmd,
+				      iw_handler		handler)
+{
+	struct iwreq *				iwr = (struct iwreq *) ifr;
+	const struct iw_ioctl_description *	descr;
+	struct iw_request_info			info;
+	int					ret = -EINVAL;
+
+	/* Get the description of the IOCTL */
+	if((cmd - SIOCIWFIRST) >= standard_ioctl_num)
+		return -EOPNOTSUPP;
+	descr = &(standard_ioctl[cmd - SIOCIWFIRST]);
+
+#ifdef WE_IOCTL_DEBUG
+	printk(KERN_DEBUG "%s (WE) : Found standard handler for 0x%04X\n",
+	       ifr->ifr_name, cmd);
+	printk(KERN_DEBUG "%s (WE) : Header type : %d, Token type : %d, size : %d, token : %d\n", dev->name, descr->header_type, descr->token_type, descr->token_size, descr->max_tokens);
+#endif	/* WE_IOCTL_DEBUG */
+
+	/* Prepare the call */
+	info.cmd = cmd;
+	info.flags = 0;
+
+	/* Check if we have a pointer to user space data or not */
+	if(descr->header_type != IW_HEADER_TYPE_POINT) {
+
+		/* No extra arguments. Trivial to handle */
+		ret = handler(dev, &info, &(iwr->u), NULL);
+
+#ifdef WE_SET_EVENT
+		/* Generate an event to notify listeners of the change */
+		if((descr->flags & IW_DESCR_FLAG_EVENT) &&
+		   ((ret == 0) || (ret == -EIWCOMMIT)))
+			wireless_send_event(dev, cmd, &(iwr->u), NULL);
+#endif	/* WE_SET_EVENT */
+	} else {
+		char *	extra;
+		int	extra_size;
+		int	user_length = 0;
+		int	err;
+
+		/* Calculate space needed by arguments. Always allocate
+		 * for max space. Easier, and won't last long... */
+		extra_size = descr->max_tokens * descr->token_size;
+
+		/* Check what user space is giving us */
+		if(IW_IS_SET(cmd)) {
+			/* Check NULL pointer */
+			if((iwr->u.data.pointer == NULL) &&
+			   (iwr->u.data.length != 0))
+				return -EFAULT;
+			/* Check if number of token fits within bounds */
+			if(iwr->u.data.length > descr->max_tokens)
+				return -E2BIG;
+			if(iwr->u.data.length < descr->min_tokens)
+				return -EINVAL;
+		} else {
+			/* Check NULL pointer */
+			if(iwr->u.data.pointer == NULL)
+				return -EFAULT;
+			/* Save user space buffer size for checking */
+			user_length = iwr->u.data.length;
+
+			/* Don't check if user_length > max to allow forward
+			 * compatibility. The test user_length < min is
+			 * implied by the test at the end. */
+
+			/* Support for very large requests */
+			if((descr->flags & IW_DESCR_FLAG_NOMAX) &&
+			   (user_length > descr->max_tokens)) {
+				/* Allow userspace to GET more than max so
+				 * we can support any size GET requests.
+				 * There is still a limit : -ENOMEM. */
+				extra_size = user_length * descr->token_size;
+				/* Note : user_length is originally a __u16,
+				 * and token_size is controlled by us,
+				 * so extra_size won't get negative and
+				 * won't overflow... */
+			}
+		}
+
+#ifdef WE_IOCTL_DEBUG
+		printk(KERN_DEBUG "%s (WE) : Malloc %d bytes\n",
+		       dev->name, extra_size);
+#endif	/* WE_IOCTL_DEBUG */
+
+		/* Create the kernel buffer */
+		extra = kmalloc(extra_size, GFP_KERNEL);
+		if (extra == NULL) {
+			return -ENOMEM;
+		}
+
+		/* If it is a SET, get all the extra data in here */
+		if(IW_IS_SET(cmd) && (iwr->u.data.length != 0)) {
+			err = copy_from_user(extra, iwr->u.data.pointer,
+					     iwr->u.data.length *
+					     descr->token_size);
+			if (err) {
+				kfree(extra);
+				return -EFAULT;
+			}
+#ifdef WE_IOCTL_DEBUG
+			printk(KERN_DEBUG "%s (WE) : Got %d bytes\n",
+			       dev->name,
+			       iwr->u.data.length * descr->token_size);
+#endif	/* WE_IOCTL_DEBUG */
+		}
+
+		/* Call the handler */
+		ret = handler(dev, &info, &(iwr->u), extra);
+
+		/* If we have something to return to the user */
+		if (!ret && IW_IS_GET(cmd)) {
+			/* Check if there is enough buffer up there */
+			if(user_length < iwr->u.data.length) {
+				kfree(extra);
+				return -E2BIG;
+			}
+
+			err = copy_to_user(iwr->u.data.pointer, extra,
+					   iwr->u.data.length *
+					   descr->token_size);
+			if (err)
+				ret =  -EFAULT;				   
+#ifdef WE_IOCTL_DEBUG
+			printk(KERN_DEBUG "%s (WE) : Wrote %d bytes\n",
+			       dev->name,
+			       iwr->u.data.length * descr->token_size);
+#endif	/* WE_IOCTL_DEBUG */
+		}
+
+#ifdef WE_SET_EVENT
+		/* Generate an event to notify listeners of the change */
+		if((descr->flags & IW_DESCR_FLAG_EVENT) &&
+		   ((ret == 0) || (ret == -EIWCOMMIT))) {
+			if(descr->flags & IW_DESCR_FLAG_RESTRICT)
+				/* If the event is restricted, don't
+				 * export the payload */
+				wireless_send_event(dev, cmd, &(iwr->u), NULL);
+			else
+				wireless_send_event(dev, cmd, &(iwr->u),
+						    extra);
+		}
+#endif	/* WE_SET_EVENT */
+
+		/* Cleanup - I told you it wasn't that long ;-) */
+		kfree(extra);
+	}
+
+	/* Call commit handler if needed and defined */
+	if(ret == -EIWCOMMIT)
+		ret = call_commit_handler(dev);
+
+	/* Here, we will generate the appropriate event if needed */
+
+	return ret;
+}
+
+/* ---------------------------------------------------------------- */
+/*
+ * Wrapper to call a private Wireless Extension handler.
+ * We do various checks and also take care of moving data between
+ * user space and kernel space.
+ * It's not as nice and slimline as the standard wrapper. The cause
+ * is struct iw_priv_args, which was not really designed for the
+ * job we are going here.
+ *
+ * IMPORTANT : This function prevent to set and get data on the same
+ * IOCTL and enforce the SET/GET convention. Not doing it would be
+ * far too hairy...
+ * If you need to set and get data at the same time, please don't use
+ * a iw_handler but process it in your ioctl handler (i.e. use the
+ * old driver API).
+ */
+static inline int ioctl_private_call(struct net_device *	dev,
+				     struct ifreq *		ifr,
+				     unsigned int		cmd,
+				     iw_handler		handler)
+{
+	struct iwreq *			iwr = (struct iwreq *) ifr;
+	const struct iw_priv_args *	descr = NULL;
+	struct iw_request_info		info;
+	int				extra_size = 0;
+	int				i;
+	int				ret = -EINVAL;
+
+	/* Get the description of the IOCTL */
+	for(i = 0; i < dev->wireless_handlers->num_private_args; i++)
+		if(cmd == dev->wireless_handlers->private_args[i].cmd) {
+			descr = &(dev->wireless_handlers->private_args[i]);
+			break;
+		}
+
+#ifdef WE_IOCTL_DEBUG
+	printk(KERN_DEBUG "%s (WE) : Found private handler for 0x%04X\n",
+	       ifr->ifr_name, cmd);
+	if(descr) {
+		printk(KERN_DEBUG "%s (WE) : Name %s, set %X, get %X\n",
+		       dev->name, descr->name,
+		       descr->set_args, descr->get_args);
+	}
+#endif	/* WE_IOCTL_DEBUG */
+
+	/* Compute the size of the set/get arguments */
+	if(descr != NULL) {
+		if(IW_IS_SET(cmd)) {
+			int	offset = 0;	/* For sub-ioctls */
+			/* Check for sub-ioctl handler */
+			if(descr->name[0] == '\0')
+				/* Reserve one int for sub-ioctl index */
+				offset = sizeof(__u32);
+
+			/* Size of set arguments */
+			extra_size = get_priv_size(descr->set_args);
+
+			/* Does it fits in iwr ? */
+			if((descr->set_args & IW_PRIV_SIZE_FIXED) &&
+			   ((extra_size + offset) <= IFNAMSIZ))
+				extra_size = 0;
+		} else {
+			/* Size of get arguments */
+			extra_size = get_priv_size(descr->get_args);
+
+			/* Does it fits in iwr ? */
+			if((descr->get_args & IW_PRIV_SIZE_FIXED) &&
+			   (extra_size <= IFNAMSIZ))
+				extra_size = 0;
+		}
+	}
+
+	/* Prepare the call */
+	info.cmd = cmd;
+	info.flags = 0;
+
+	/* Check if we have a pointer to user space data or not. */
+	if(extra_size == 0) {
+		/* No extra arguments. Trivial to handle */
+		ret = handler(dev, &info, &(iwr->u), (char *) &(iwr->u));
+	} else {
+		char *	extra;
+		int	err;
+
+		/* Check what user space is giving us */
+		if(IW_IS_SET(cmd)) {
+			/* Check NULL pointer */
+			if((iwr->u.data.pointer == NULL) &&
+			   (iwr->u.data.length != 0))
+				return -EFAULT;
+
+			/* Does it fits within bounds ? */
+			if(iwr->u.data.length > (descr->set_args &
+						 IW_PRIV_SIZE_MASK))
+				return -E2BIG;
+		} else {
+			/* Check NULL pointer */
+			if(iwr->u.data.pointer == NULL)
+				return -EFAULT;
+		}
+
+#ifdef WE_IOCTL_DEBUG
+		printk(KERN_DEBUG "%s (WE) : Malloc %d bytes\n",
+		       dev->name, extra_size);
+#endif	/* WE_IOCTL_DEBUG */
+
+		/* Always allocate for max space. Easier, and won't last
+		 * long... */
+		extra = kmalloc(extra_size, GFP_KERNEL);
+		if (extra == NULL) {
+			return -ENOMEM;
+		}
+
+		/* If it is a SET, get all the extra data in here */
+		if(IW_IS_SET(cmd) && (iwr->u.data.length != 0)) {
+			err = copy_from_user(extra, iwr->u.data.pointer,
+					     extra_size);
+			if (err) {
+				kfree(extra);
+				return -EFAULT;
+			}
+#ifdef WE_IOCTL_DEBUG
+			printk(KERN_DEBUG "%s (WE) : Got %d elem\n",
+			       dev->name, iwr->u.data.length);
+#endif	/* WE_IOCTL_DEBUG */
+		}
+
+		/* Call the handler */
+		ret = handler(dev, &info, &(iwr->u), extra);
+
+		/* If we have something to return to the user */
+		if (!ret && IW_IS_GET(cmd)) {
+
+			/* Adjust for the actual length if it's variable,
+			 * avoid leaking kernel bits outside. */
+			if (!(descr->get_args & IW_PRIV_SIZE_FIXED)) {
+				extra_size = adjust_priv_size(descr->get_args,
+							      &(iwr->u));
+			}
+
+			err = copy_to_user(iwr->u.data.pointer, extra,
+					   extra_size);
+			if (err)
+				ret =  -EFAULT;				   
+#ifdef WE_IOCTL_DEBUG
+			printk(KERN_DEBUG "%s (WE) : Wrote %d elem\n",
+			       dev->name, iwr->u.data.length);
+#endif	/* WE_IOCTL_DEBUG */
+		}
+
+		/* Cleanup - I told you it wasn't that long ;-) */
+		kfree(extra);
+	}
+
+
+	/* Call commit handler if needed and defined */
+	if(ret == -EIWCOMMIT)
+		ret = call_commit_handler(dev);
+
+	return ret;
+}
+
+/* ---------------------------------------------------------------- */
+/*
+ * Main IOCTl dispatcher. Called from the main networking code
+ * (dev_ioctl() in net/core/dev.c).
+ * Check the type of IOCTL and call the appropriate wrapper...
+ */
+int wireless_process_ioctl(struct ifreq *ifr, unsigned int cmd)
+{
+	struct net_device *dev;
+	iw_handler	handler;
+
+	/* Permissions are already checked in dev_ioctl() before calling us.
+	 * The copy_to/from_user() of ifr is also dealt with in there */
+
+	/* Make sure the device exist */
+	if ((dev = __dev_get_by_name(ifr->ifr_name)) == NULL)
+		return -ENODEV;
+
+	/* A bunch of special cases, then the generic case...
+	 * Note that 'cmd' is already filtered in dev_ioctl() with
+	 * (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST) */
+	switch(cmd) 
+	{
+		case SIOCGIWSTATS:
+			/* Get Wireless Stats */
+			return dev_iwstats(dev, ifr);
+
+		case SIOCGIWPRIV:
+			/* Check if we have some wireless handlers defined */
+			if(dev->wireless_handlers != NULL) {
+				/* We export to user space the definition of
+				 * the private handler ourselves */
+				return ioctl_export_private(dev, ifr);
+			}
+			// ## Fall-through for old API ##
+		default:
+			/* Generic IOCTL */
+			/* Basic check */
+			if (!netif_device_present(dev))
+				return -ENODEV;
+			/* New driver API : try to find the handler */
+			handler = get_handler(dev, cmd);
+			if(handler != NULL) {
+				/* Standard and private are not the same */
+				if(cmd < SIOCIWFIRSTPRIV)
+					return ioctl_standard_call(dev,
+								   ifr,
+								   cmd,
+								   handler);
+				else
+					return ioctl_private_call(dev,
+								  ifr,
+								  cmd,
+								  handler);
+			}
+			/* Old driver API : call driver ioctl handler */
+			if (dev->do_ioctl) {
+				return dev->do_ioctl(dev, ifr, cmd);
+			}
+			return -EOPNOTSUPP;
+	}
+	/* Not reached */
+	return -EINVAL;
+}
+
+/************************* EVENT PROCESSING *************************/
+/*
+ * Process events generated by the wireless layer or the driver.
+ * Most often, the event will be propagated through rtnetlink
+ */
+
+#ifdef WE_EVENT_NETLINK
+/* "rtnl" is defined in net/core/rtnetlink.c, but we need it here.
+ * It is declared in <linux/rtnetlink.h> */
+
+/* ---------------------------------------------------------------- */
+/*
+ * Fill a rtnetlink message with our event data.
+ * Note that we propage only the specified event and don't dump the
+ * current wireless config. Dumping the wireless config is far too
+ * expensive (for each parameter, the driver need to query the hardware).
+ */
+static inline int rtnetlink_fill_iwinfo(struct sk_buff *	skb,
+					struct net_device *	dev,
+					int			type,
+					char *			event,
+					int			event_len)
+{
+	struct ifinfomsg *r;
+	struct nlmsghdr  *nlh;
+	unsigned char	 *b = skb->tail;
+
+	nlh = NLMSG_PUT(skb, 0, 0, type, sizeof(*r));
+	r = NLMSG_DATA(nlh);
+	r->ifi_family = AF_UNSPEC;
+	r->ifi_type = dev->type;
+	r->ifi_index = dev->ifindex;
+	r->ifi_flags = dev->flags;
+	r->ifi_change = 0;	/* Wireless changes don't affect those flags */
+
+	/* Add the wireless events in the netlink packet */
+	RTA_PUT(skb, IFLA_WIRELESS,
+		event_len, event);
+
+	nlh->nlmsg_len = skb->tail - b;
+	return skb->len;
+
+nlmsg_failure:
+rtattr_failure:
+	skb_trim(skb, b - skb->data);
+	return -1;
+}
+
+/* ---------------------------------------------------------------- */
+/*
+ * Create and broadcast and send it on the standard rtnetlink socket
+ * This is a pure clone rtmsg_ifinfo() in net/core/rtnetlink.c
+ * Andrzej Krzysztofowicz mandated that I used a IFLA_XXX field
+ * within a RTM_NEWLINK event.
+ */
+static inline void rtmsg_iwinfo(struct net_device *	dev,
+				char *			event,
+				int			event_len)
+{
+	struct sk_buff *skb;
+	int size = NLMSG_GOODSIZE;
+
+	skb = alloc_skb(size, GFP_ATOMIC);
+	if (!skb)
+		return;
+
+	if (rtnetlink_fill_iwinfo(skb, dev, RTM_NEWLINK,
+				  event, event_len) < 0) {
+		kfree_skb(skb);
+		return;
+	}
+	NETLINK_CB(skb).dst_groups = RTMGRP_LINK;
+	netlink_broadcast(rtnl, skb, 0, RTMGRP_LINK, GFP_ATOMIC);
+}
+#endif	/* WE_EVENT_NETLINK */
+
+/* ---------------------------------------------------------------- */
+/*
+ * Main event dispatcher. Called from other parts and drivers.
+ * Send the event on the appropriate channels.
+ * May be called from interrupt context.
+ */
+void wireless_send_event(struct net_device *	dev,
+			 unsigned int		cmd,
+			 union iwreq_data *	wrqu,
+			 char *			extra)
+{
+	const struct iw_ioctl_description *	descr = NULL;
+	int extra_len = 0;
+	struct iw_event  *event;		/* Mallocated whole event */
+	int event_len;				/* Its size */
+	int hdr_len;				/* Size of the event header */
+	/* Don't "optimise" the following variable, it will crash */
+	unsigned	cmd_index;		/* *MUST* be unsigned */
+
+	/* Get the description of the IOCTL */
+	if(cmd <= SIOCIWLAST) {
+		cmd_index = cmd - SIOCIWFIRST;
+		if(cmd_index < standard_ioctl_num)
+			descr = &(standard_ioctl[cmd_index]);
+	} else {
+		cmd_index = cmd - IWEVFIRST;
+		if(cmd_index < standard_event_num)
+			descr = &(standard_event[cmd_index]);
+	}
+	/* Don't accept unknown events */
+	if(descr == NULL) {
+		/* Note : we don't return an error to the driver, because
+		 * the driver would not know what to do about it. It can't
+		 * return an error to the user, because the event is not
+		 * initiated by a user request.
+		 * The best the driver could do is to log an error message.
+		 * We will do it ourselves instead...
+		 */
+	  	printk(KERN_ERR "%s (WE) : Invalid/Unknown Wireless Event (0x%04X)\n",
+		       dev->name, cmd);
+		return;
+	}
+#ifdef WE_EVENT_DEBUG
+	printk(KERN_DEBUG "%s (WE) : Got event 0x%04X\n",
+	       dev->name, cmd);
+	printk(KERN_DEBUG "%s (WE) : Header type : %d, Token type : %d, size : %d, token : %d\n", dev->name, descr->header_type, descr->token_type, descr->token_size, descr->max_tokens);
+#endif	/* WE_EVENT_DEBUG */
+
+	/* Check extra parameters and set extra_len */
+	if(descr->header_type == IW_HEADER_TYPE_POINT) {
+		/* Check if number of token fits within bounds */
+		if(wrqu->data.length > descr->max_tokens) {
+		  	printk(KERN_ERR "%s (WE) : Wireless Event too big (%d)\n", dev->name, wrqu->data.length);
+			return;
+		}
+		if(wrqu->data.length < descr->min_tokens) {
+		  	printk(KERN_ERR "%s (WE) : Wireless Event too small (%d)\n", dev->name, wrqu->data.length);
+			return;
+		}
+		/* Calculate extra_len - extra is NULL for restricted events */
+		if(extra != NULL)
+			extra_len = wrqu->data.length * descr->token_size;
+#ifdef WE_EVENT_DEBUG
+		printk(KERN_DEBUG "%s (WE) : Event 0x%04X, tokens %d, extra_len %d\n", dev->name, cmd, wrqu->data.length, extra_len);
+#endif	/* WE_EVENT_DEBUG */
+	}
+
+	/* Total length of the event */
+	hdr_len = event_type_size[descr->header_type];
+	event_len = hdr_len + extra_len;
+
+#ifdef WE_EVENT_DEBUG
+	printk(KERN_DEBUG "%s (WE) : Event 0x%04X, hdr_len %d, event_len %d\n", dev->name, cmd, hdr_len, event_len);
+#endif	/* WE_EVENT_DEBUG */
+
+	/* Create temporary buffer to hold the event */
+	event = kmalloc(event_len, GFP_ATOMIC);
+	if(event == NULL)
+		return;
+
+	/* Fill event */
+	event->len = event_len;
+	event->cmd = cmd;
+	memcpy(&event->u, wrqu, hdr_len - IW_EV_LCP_LEN);
+	if(extra != NULL)
+		memcpy(((char *) event) + hdr_len, extra, extra_len);
+
+#ifdef WE_EVENT_NETLINK
+	/* rtnetlink event channel */
+	rtmsg_iwinfo(dev, (char *) event, event_len);
+#endif	/* WE_EVENT_NETLINK */
+
+	/* Cleanup */
+	kfree(event);
+
+	return;		/* Always success, I guess ;-) */
+}
+
+/********************** ENHANCED IWSPY SUPPORT **********************/
+/*
+ * In the old days, the driver was handling spy support all by itself.
+ * Now, the driver can delegate this task to Wireless Extensions.
+ * It needs to use those standard spy iw_handler in struct iw_handler_def,
+ * push data to us via wireless_spy_update() and include struct iw_spy_data
+ * in its private part (and advertise it in iw_handler_def->spy_offset).
+ * One of the main advantage of centralising spy support here is that
+ * it becomes much easier to improve and extend it without having to touch
+ * the drivers. One example is the addition of the Spy-Threshold events.
+ */
+
+/* ---------------------------------------------------------------- */
+/*
+ * Return the pointer to the spy data in the driver.
+ * Because this is called on the Rx path via wireless_spy_update(),
+ * we want it to be efficient...
+ */
+static inline struct iw_spy_data * get_spydata(struct net_device *dev)
+{
+	/* This is the new way */
+	if(dev->wireless_data)
+		return(dev->wireless_data->spy_data);
+
+	/* This is the old way. Doesn't work for multi-headed drivers.
+	 * It will be removed in the next version of WE. */
+	return (dev->priv + dev->wireless_handlers->spy_offset);
+}
+
+/*------------------------------------------------------------------*/
+/*
+ * Standard Wireless Handler : set Spy List
+ */
+int iw_handler_set_spy(struct net_device *	dev,
+		       struct iw_request_info *	info,
+		       union iwreq_data *	wrqu,
+		       char *			extra)
+{
+	struct iw_spy_data *	spydata = get_spydata(dev);
+	struct sockaddr *	address = (struct sockaddr *) extra;
+
+	if(!dev->wireless_data)
+		/* Help user know that driver needs updating */
+		printk(KERN_DEBUG "%s (WE) : Driver using old/buggy spy support, please fix driver !\n",
+		       dev->name);
+	/* Make sure driver is not buggy or using the old API */
+	if(!spydata)
+		return -EOPNOTSUPP;
+
+	/* Disable spy collection while we copy the addresses.
+	 * While we copy addresses, any call to wireless_spy_update()
+	 * will NOP. This is OK, as anyway the addresses are changing. */
+	spydata->spy_number = 0;
+
+	/* We want to operate without locking, because wireless_spy_update()
+	 * most likely will happen in the interrupt handler, and therefore
+	 * have its own locking constraints and needs performance.
+	 * The rtnl_lock() make sure we don't race with the other iw_handlers.
+	 * This make sure wireless_spy_update() "see" that the spy list
+	 * is temporarily disabled. */
+	wmb();
+
+	/* Are there are addresses to copy? */
+	if(wrqu->data.length > 0) {
+		int i;
+
+		/* Copy addresses */
+		for(i = 0; i < wrqu->data.length; i++)
+			memcpy(spydata->spy_address[i], address[i].sa_data,
+			       ETH_ALEN);
+		/* Reset stats */
+		memset(spydata->spy_stat, 0,
+		       sizeof(struct iw_quality) * IW_MAX_SPY);
+
+#ifdef WE_SPY_DEBUG
+		printk(KERN_DEBUG "iw_handler_set_spy() :  offset %ld, spydata %p, num %d\n", dev->wireless_handlers->spy_offset, spydata, wrqu->data.length);
+		for (i = 0; i < wrqu->data.length; i++)
+			printk(KERN_DEBUG
+			       "%02X:%02X:%02X:%02X:%02X:%02X \n",
+			       spydata->spy_address[i][0],
+			       spydata->spy_address[i][1],
+			       spydata->spy_address[i][2],
+			       spydata->spy_address[i][3],
+			       spydata->spy_address[i][4],
+			       spydata->spy_address[i][5]);
+#endif	/* WE_SPY_DEBUG */
+	}
+
+	/* Make sure above is updated before re-enabling */
+	wmb();
+
+	/* Enable addresses */
+	spydata->spy_number = wrqu->data.length;
+
+	return 0;
+}
+
+/*------------------------------------------------------------------*/
+/*
+ * Standard Wireless Handler : get Spy List
+ */
+int iw_handler_get_spy(struct net_device *	dev,
+		       struct iw_request_info *	info,
+		       union iwreq_data *	wrqu,
+		       char *			extra)
+{
+	struct iw_spy_data *	spydata = get_spydata(dev);
+	struct sockaddr *	address = (struct sockaddr *) extra;
+	int			i;
+
+	/* Make sure driver is not buggy or using the old API */
+	if(!spydata)
+		return -EOPNOTSUPP;
+
+	wrqu->data.length = spydata->spy_number;
+
+	/* Copy addresses. */
+	for(i = 0; i < spydata->spy_number; i++) 	{
+		memcpy(address[i].sa_data, spydata->spy_address[i], ETH_ALEN);
+		address[i].sa_family = AF_UNIX;
+	}
+	/* Copy stats to the user buffer (just after). */
+	if(spydata->spy_number > 0)
+		memcpy(extra  + (sizeof(struct sockaddr) *spydata->spy_number),
+		       spydata->spy_stat,
+		       sizeof(struct iw_quality) * spydata->spy_number);
+	/* Reset updated flags. */
+	for(i = 0; i < spydata->spy_number; i++)
+		spydata->spy_stat[i].updated = 0;
+	return 0;
+}
+
+/*------------------------------------------------------------------*/
+/*
+ * Standard Wireless Handler : set spy threshold
+ */
+int iw_handler_set_thrspy(struct net_device *	dev,
+			  struct iw_request_info *info,
+			  union iwreq_data *	wrqu,
+			  char *		extra)
+{
+	struct iw_spy_data *	spydata = get_spydata(dev);
+	struct iw_thrspy *	threshold = (struct iw_thrspy *) extra;
+
+	/* Make sure driver is not buggy or using the old API */
+	if(!spydata)
+		return -EOPNOTSUPP;
+
+	/* Just do it */
+	memcpy(&(spydata->spy_thr_low), &(threshold->low),
+	       2 * sizeof(struct iw_quality));
+
+	/* Clear flag */
+	memset(spydata->spy_thr_under, '\0', sizeof(spydata->spy_thr_under));
+
+#ifdef WE_SPY_DEBUG
+	printk(KERN_DEBUG "iw_handler_set_thrspy() :  low %d ; high %d\n", spydata->spy_thr_low.level, spydata->spy_thr_high.level);
+#endif	/* WE_SPY_DEBUG */
+
+	return 0;
+}
+
+/*------------------------------------------------------------------*/
+/*
+ * Standard Wireless Handler : get spy threshold
+ */
+int iw_handler_get_thrspy(struct net_device *	dev,
+			  struct iw_request_info *info,
+			  union iwreq_data *	wrqu,
+			  char *		extra)
+{
+	struct iw_spy_data *	spydata = get_spydata(dev);
+	struct iw_thrspy *	threshold = (struct iw_thrspy *) extra;
+
+	/* Make sure driver is not buggy or using the old API */
+	if(!spydata)
+		return -EOPNOTSUPP;
+
+	/* Just do it */
+	memcpy(&(threshold->low), &(spydata->spy_thr_low),
+	       2 * sizeof(struct iw_quality));
+
+	return 0;
+}
+
+/*------------------------------------------------------------------*/
+/*
+ * Prepare and send a Spy Threshold event
+ */
+static void iw_send_thrspy_event(struct net_device *	dev,
+				 struct iw_spy_data *	spydata,
+				 unsigned char *	address,
+				 struct iw_quality *	wstats)
+{
+	union iwreq_data	wrqu;
+	struct iw_thrspy	threshold;
+
+	/* Init */
+	wrqu.data.length = 1;
+	wrqu.data.flags = 0;
+	/* Copy address */
+	memcpy(threshold.addr.sa_data, address, ETH_ALEN);
+	threshold.addr.sa_family = ARPHRD_ETHER;
+	/* Copy stats */
+	memcpy(&(threshold.qual), wstats, sizeof(struct iw_quality));
+	/* Copy also thresholds */
+	memcpy(&(threshold.low), &(spydata->spy_thr_low),
+	       2 * sizeof(struct iw_quality));
+
+#ifdef WE_SPY_DEBUG
+	printk(KERN_DEBUG "iw_send_thrspy_event() : address %02X:%02X:%02X:%02X:%02X:%02X, level %d, up = %d\n",
+	       threshold.addr.sa_data[0],
+	       threshold.addr.sa_data[1],
+	       threshold.addr.sa_data[2],
+	       threshold.addr.sa_data[3],
+	       threshold.addr.sa_data[4],
+	       threshold.addr.sa_data[5], threshold.qual.level);
+#endif	/* WE_SPY_DEBUG */
+
+	/* Send event to user space */
+	wireless_send_event(dev, SIOCGIWTHRSPY, &wrqu, (char *) &threshold);
+}
+
+/* ---------------------------------------------------------------- */
+/*
+ * Call for the driver to update the spy data.
+ * For now, the spy data is a simple array. As the size of the array is
+ * small, this is good enough. If we wanted to support larger number of
+ * spy addresses, we should use something more efficient...
+ */
+void wireless_spy_update(struct net_device *	dev,
+			 unsigned char *	address,
+			 struct iw_quality *	wstats)
+{
+	struct iw_spy_data *	spydata = get_spydata(dev);
+	int			i;
+	int			match = -1;
+
+	/* Make sure driver is not buggy or using the old API */
+	if(!spydata)
+		return;
+
+#ifdef WE_SPY_DEBUG
+	printk(KERN_DEBUG "wireless_spy_update() :  offset %ld, spydata %p, address %02X:%02X:%02X:%02X:%02X:%02X\n", dev->wireless_handlers->spy_offset, spydata, address[0], address[1], address[2], address[3], address[4], address[5]);
+#endif	/* WE_SPY_DEBUG */
+
+	/* Update all records that match */
+	for(i = 0; i < spydata->spy_number; i++)
+		if(!memcmp(address, spydata->spy_address[i], ETH_ALEN)) {
+			memcpy(&(spydata->spy_stat[i]), wstats,
+			       sizeof(struct iw_quality));
+			match = i;
+		}
+
+	/* Generate an event if we cross the spy threshold.
+	 * To avoid event storms, we have a simple hysteresis : we generate
+	 * event only when we go under the low threshold or above the
+	 * high threshold. */
+	if(match >= 0) {
+		if(spydata->spy_thr_under[match]) {
+			if(wstats->level > spydata->spy_thr_high.level) {
+				spydata->spy_thr_under[match] = 0;
+				iw_send_thrspy_event(dev, spydata,
+						     address, wstats);
+			}
+		} else {
+			if(wstats->level < spydata->spy_thr_low.level) {
+				spydata->spy_thr_under[match] = 1;
+				iw_send_thrspy_event(dev, spydata,
+						     address, wstats);
+			}
+		}
+	}
+}
+
+EXPORT_SYMBOL(iw_handler_get_spy);
+EXPORT_SYMBOL(iw_handler_get_thrspy);
+EXPORT_SYMBOL(iw_handler_set_spy);
+EXPORT_SYMBOL(iw_handler_set_thrspy);
+EXPORT_SYMBOL(wireless_send_event);
+EXPORT_SYMBOL(wireless_spy_update);
author	Linus Torvalds <torvalds@ppc970.osdl.org>	2005-04-16 15:20:36 -0700
committer	Linus Torvalds <torvalds@ppc970.osdl.org>	2005-04-16 15:20:36 -0700
commit	1da177e4c3f41524e886b7f1b8a0c1fc7321cac2 (patch)
tree	0bba044c4ce775e45a88a51686b5d9f90697ea9d /net/core