32 files changed, 333 insertions, 263 deletions
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index 60123905dbb..732d8f088b1 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -59,6 +59,13 @@ struct fib_table *ip_fib_main_table;
 #define FIB_TABLE_HASHSZ 1
 static struct hlist_head fib_table_hash[FIB_TABLE_HASHSZ];
 
+static void __init fib4_rules_init(void)
+{
+	ip_fib_local_table = fib_hash_init(RT_TABLE_LOCAL);
+	hlist_add_head_rcu(&ip_fib_local_table->tb_hlist, &fib_table_hash[0]);
+	ip_fib_main_table  = fib_hash_init(RT_TABLE_MAIN);
+	hlist_add_head_rcu(&ip_fib_main_table->tb_hlist, &fib_table_hash[0]);
+}
 #else
 
 #define FIB_TABLE_HASHSZ 256
@@ -905,14 +912,8 @@ void __init ip_fib_init(void)
 
 	for (i = 0; i < FIB_TABLE_HASHSZ; i++)
 		INIT_HLIST_HEAD(&fib_table_hash[i]);
-#ifndef CONFIG_IP_MULTIPLE_TABLES
-	ip_fib_local_table = fib_hash_init(RT_TABLE_LOCAL);
-	hlist_add_head_rcu(&ip_fib_local_table->tb_hlist, &fib_table_hash[0]);
-	ip_fib_main_table  = fib_hash_init(RT_TABLE_MAIN);
-	hlist_add_head_rcu(&ip_fib_main_table->tb_hlist, &fib_table_hash[0]);
-#else
+
 	fib4_rules_init();
-#endif
 
 	register_netdevice_notifier(&fib_netdev_notifier);
 	register_inetaddr_notifier(&fib_inetaddr_notifier);
diff --git a/net/ipv4/fib_rules.c b/net/ipv4/fib_rules.c
index f16839c6a72..a0ada3a8d8d 100644
--- a/net/ipv4/fib_rules.c
+++ b/net/ipv4/fib_rules.c
@@ -49,33 +49,6 @@ struct fib4_rule
 #endif
 };
 
-static struct fib4_rule default_rule = {
-	.common = {
-		.refcnt =	ATOMIC_INIT(2),
-		.pref =		0x7FFF,
-		.table =	RT_TABLE_DEFAULT,
-		.action =	FR_ACT_TO_TBL,
-	},
-};
-
-static struct fib4_rule main_rule = {
-	.common = {
-		.refcnt =	ATOMIC_INIT(2),
-		.pref =		0x7FFE,
-		.table =	RT_TABLE_MAIN,
-		.action =	FR_ACT_TO_TBL,
-	},
-};
-
-static struct fib4_rule local_rule = {
-	.common = {
-		.refcnt =	ATOMIC_INIT(2),
-		.table =	RT_TABLE_LOCAL,
-		.action =	FR_ACT_TO_TBL,
-		.flags =	FIB_RULE_PERMANENT,
-	},
-};
-
 #ifdef CONFIG_NET_CLS_ROUTE
 u32 fib_rules_tclass(struct fib_result *res)
 {
@@ -319,11 +292,27 @@ static struct fib_rules_ops fib4_rules_ops = {
 	.owner		= THIS_MODULE,
 };
 
-void __init fib4_rules_init(void)
+static int __init fib_default_rules_init(void)
 {
-	list_add_tail(&local_rule.common.list, &fib4_rules_ops.rules_list);
-	list_add_tail(&main_rule.common.list, &fib4_rules_ops.rules_list);
-	list_add_tail(&default_rule.common.list, &fib4_rules_ops.rules_list);
+	int err;
+
+	err = fib_default_rule_add(&fib4_rules_ops, 0,
+				   RT_TABLE_LOCAL, FIB_RULE_PERMANENT);
+	if (err < 0)
+		return err;
+	err = fib_default_rule_add(&fib4_rules_ops, 0x7FFE,
+				   RT_TABLE_MAIN, 0);
+	if (err < 0)
+		return err;
+	err = fib_default_rule_add(&fib4_rules_ops, 0x7FFF,
+				   RT_TABLE_DEFAULT, 0);
+	if (err < 0)
+		return err;
+	return 0;
+}
 
+void __init fib4_rules_init(void)
+{
+	BUG_ON(fib_default_rules_init());
 	fib_rules_register(&fib4_rules_ops);
 }
diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c
index dc429b6b0ba..b0170732b5e 100644
--- a/net/ipv4/inet_diag.c
+++ b/net/ipv4/inet_diag.c
@@ -747,13 +747,14 @@ skip_listen_ht:
 
 	for (i = s_i; i < hashinfo->ehash_size; i++) {
 		struct inet_ehash_bucket *head = &hashinfo->ehash[i];
+		rwlock_t *lock = inet_ehash_lockp(hashinfo, i);
 		struct sock *sk;
 		struct hlist_node *node;
 
 		if (i > s_i)
 			s_num = 0;
 
-		read_lock_bh(&head->lock);
+		read_lock_bh(lock);
 		num = 0;
 		sk_for_each(sk, node, &head->chain) {
 			struct inet_sock *inet = inet_sk(sk);
@@ -769,7 +770,7 @@ skip_listen_ht:
 			    r->id.idiag_dport)
 				goto next_normal;
 			if (inet_csk_diag_dump(sk, skb, cb) < 0) {
-				read_unlock_bh(&head->lock);
+				read_unlock_bh(lock);
 				goto done;
 			}
 next_normal:
@@ -791,14 +792,14 @@ next_normal:
 				    r->id.idiag_dport)
 					goto next_dying;
 				if (inet_twsk_diag_dump(tw, skb, cb) < 0) {
-					read_unlock_bh(&head->lock);
+					read_unlock_bh(lock);
 					goto done;
 				}
 next_dying:
 				++num;
 			}
 		}
-		read_unlock_bh(&head->lock);
+		read_unlock_bh(lock);
 	}
 
 done:
diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c
index 16eecc7046a..67704da04fc 100644
--- a/net/ipv4/inet_hashtables.c
+++ b/net/ipv4/inet_hashtables.c
@@ -204,12 +204,13 @@ static int __inet_check_established(struct inet_timewait_death_row *death_row,
 	const __portpair ports = INET_COMBINED_PORTS(inet->dport, lport);
 	unsigned int hash = inet_ehashfn(daddr, lport, saddr, inet->dport);
 	struct inet_ehash_bucket *head = inet_ehash_bucket(hinfo, hash);
+	rwlock_t *lock = inet_ehash_lockp(hinfo, hash);
 	struct sock *sk2;
 	const struct hlist_node *node;
 	struct inet_timewait_sock *tw;
 
 	prefetch(head->chain.first);
-	write_lock(&head->lock);
+	write_lock(lock);
 
 	/* Check TIME-WAIT sockets first. */
 	sk_for_each(sk2, node, &head->twchain) {
@@ -239,7 +240,7 @@ unique:
 	BUG_TRAP(sk_unhashed(sk));
 	__sk_add_node(sk, &head->chain);
 	sock_prot_inc_use(sk->sk_prot);
-	write_unlock(&head->lock);
+	write_unlock(lock);
 
 	if (twp) {
 		*twp = tw;
@@ -255,7 +256,7 @@ unique:
 	return 0;
 
 not_unique:
-	write_unlock(&head->lock);
+	write_unlock(lock);
 	return -EADDRNOTAVAIL;
 }
 
diff --git a/net/ipv4/inet_timewait_sock.c b/net/ipv4/inet_timewait_sock.c
index 4e189e28f30..a60b99e0ebd 100644
--- a/net/ipv4/inet_timewait_sock.c
+++ b/net/ipv4/inet_timewait_sock.c
@@ -20,16 +20,16 @@ static void __inet_twsk_kill(struct inet_timewait_sock *tw,
 	struct inet_bind_hashbucket *bhead;
 	struct inet_bind_bucket *tb;
 	/* Unlink from established hashes. */
-	struct inet_ehash_bucket *ehead = inet_ehash_bucket(hashinfo, tw->tw_hash);
+	rwlock_t *lock = inet_ehash_lockp(hashinfo, tw->tw_hash);
 
-	write_lock(&ehead->lock);
+	write_lock(lock);
 	if (hlist_unhashed(&tw->tw_node)) {
-		write_unlock(&ehead->lock);
+		write_unlock(lock);
 		return;
 	}
 	__hlist_del(&tw->tw_node);
 	sk_node_init(&tw->tw_node);
-	write_unlock(&ehead->lock);
+	write_unlock(lock);
 
 	/* Disassociate with bind bucket. */
 	bhead = &hashinfo->bhash[inet_bhashfn(tw->tw_num, hashinfo->bhash_size)];
@@ -59,6 +59,7 @@ void __inet_twsk_hashdance(struct inet_timewait_sock *tw, struct sock *sk,
 	const struct inet_sock *inet = inet_sk(sk);
 	const struct inet_connection_sock *icsk = inet_csk(sk);
 	struct inet_ehash_bucket *ehead = inet_ehash_bucket(hashinfo, sk->sk_hash);
+	rwlock_t *lock = inet_ehash_lockp(hashinfo, sk->sk_hash);
 	struct inet_bind_hashbucket *bhead;
 	/* Step 1: Put TW into bind hash. Original socket stays there too.
 	   Note, that any socket with inet->num != 0 MUST be bound in
@@ -71,7 +72,7 @@ void __inet_twsk_hashdance(struct inet_timewait_sock *tw, struct sock *sk,
 	inet_twsk_add_bind_node(tw, &tw->tw_tb->owners);
 	spin_unlock(&bhead->lock);
 
-	write_lock(&ehead->lock);
+	write_lock(lock);
 
 	/* Step 2: Remove SK from established hash. */
 	if (__sk_del_node_init(sk))
@@ -81,7 +82,7 @@ void __inet_twsk_hashdance(struct inet_timewait_sock *tw, struct sock *sk,
 	inet_twsk_add_node(tw, &ehead->twchain);
 	atomic_inc(&tw->tw_refcnt);
 
-	write_unlock(&ehead->lock);
+	write_unlock(lock);
 }
 
 EXPORT_SYMBOL_GPL(__inet_twsk_hashdance);
diff --git a/net/ipv4/inetpeer.c b/net/ipv4/inetpeer.c
index 771031dfbd0..af995198f64 100644
--- a/net/ipv4/inetpeer.c
+++ b/net/ipv4/inetpeer.c
@@ -61,7 +61,7 @@
  *  4.  Global variable peer_total is modified under the pool lock.
  *  5.  struct inet_peer fields modification:
  *		avl_left, avl_right, avl_parent, avl_height: pool lock
- *		unused_next, unused_prevp: unused node list lock
+ *		unused: unused node list lock
  *		refcnt: atomically against modifications on other CPU;
  *		   usually under some other lock to prevent node disappearing
  *		dtime: unused node list lock
@@ -94,8 +94,7 @@ int inet_peer_maxttl __read_mostly = 10 * 60 * HZ;	/* usual time to live: 10 min
 int inet_peer_gc_mintime __read_mostly = 10 * HZ;
 int inet_peer_gc_maxtime __read_mostly = 120 * HZ;
 
-static struct inet_peer *inet_peer_unused_head;
-static struct inet_peer **inet_peer_unused_tailp = &inet_peer_unused_head;
+static LIST_HEAD(unused_peers);
 static DEFINE_SPINLOCK(inet_peer_unused_lock);
 
 static void peer_check_expire(unsigned long dummy);
@@ -138,15 +137,7 @@ void __init inet_initpeers(void)
 static void unlink_from_unused(struct inet_peer *p)
 {
 	spin_lock_bh(&inet_peer_unused_lock);
-	if (p->unused_prevp != NULL) {
-		/* On unused list. */
-		*p->unused_prevp = p->unused_next;
-		if (p->unused_next != NULL)
-			p->unused_next->unused_prevp = p->unused_prevp;
-		else
-			inet_peer_unused_tailp = p->unused_prevp;
-		p->unused_prevp = NULL; /* mark it as removed */
-	}
+	list_del_init(&p->unused);
 	spin_unlock_bh(&inet_peer_unused_lock);
 }
 
@@ -337,24 +328,24 @@ static void unlink_from_pool(struct inet_peer *p)
 /* May be called with local BH enabled. */
 static int cleanup_once(unsigned long ttl)
 {
-	struct inet_peer *p;
+	struct inet_peer *p = NULL;
 
 	/* Remove the first entry from the list of unused nodes. */
 	spin_lock_bh(&inet_peer_unused_lock);
-	p = inet_peer_unused_head;
-	if (p != NULL) {
-		__u32 delta = (__u32)jiffies - p->dtime;
+	if (!list_empty(&unused_peers)) {
+		__u32 delta;
+
+		p = list_first_entry(&unused_peers, struct inet_peer, unused);
+		delta = (__u32)jiffies - p->dtime;
+
 		if (delta < ttl) {
 			/* Do not prune fresh entries. */
 			spin_unlock_bh(&inet_peer_unused_lock);
 			return -1;
 		}
-		inet_peer_unused_head = p->unused_next;
-		if (p->unused_next != NULL)
-			p->unused_next->unused_prevp = p->unused_prevp;
-		else
-			inet_peer_unused_tailp = p->unused_prevp;
-		p->unused_prevp = NULL; /* mark as not on the list */
+
+		list_del_init(&p->unused);
+
 		/* Grab an extra reference to prevent node disappearing
 		 * before unlink_from_pool() call. */
 		atomic_inc(&p->refcnt);
@@ -412,7 +403,7 @@ struct inet_peer *inet_getpeer(__be32 daddr, int create)
 
 	/* Link the node. */
 	link_to_pool(n);
-	n->unused_prevp = NULL; /* not on the list */
+	INIT_LIST_HEAD(&n->unused);
 	peer_total++;
 	write_unlock_bh(&peer_pool_lock);
 
@@ -467,10 +458,7 @@ void inet_putpeer(struct inet_peer *p)
 {
 	spin_lock_bh(&inet_peer_unused_lock);
 	if (atomic_dec_and_test(&p->refcnt)) {
-		p->unused_prevp = inet_peer_unused_tailp;
-		p->unused_next = NULL;
-		*inet_peer_unused_tailp = p;
-		inet_peer_unused_tailp = &p->unused_next;
+		list_add_tail(&p->unused, &unused_peers);
 		p->dtime = (__u32)jiffies;
 	}
 	spin_unlock_bh(&inet_peer_unused_lock);
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index e5f7dc2de30..fd99fbd685e 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -1183,6 +1183,17 @@ error:
 	return err;
 }
 
+static void ip_cork_release(struct inet_sock *inet)
+{
+	inet->cork.flags &= ~IPCORK_OPT;
+	kfree(inet->cork.opt);
+	inet->cork.opt = NULL;
+	if (inet->cork.rt) {
+		ip_rt_put(inet->cork.rt);
+		inet->cork.rt = NULL;
+	}
+}
+
 /*
  *	Combined all pending IP fragments on the socket as one IP datagram
  *	and push them out.
@@ -1276,13 +1287,7 @@ int ip_push_pending_frames(struct sock *sk)
 	}
 
 out:
-	inet->cork.flags &= ~IPCORK_OPT;
-	kfree(inet->cork.opt);
-	inet->cork.opt = NULL;
-	if (inet->cork.rt) {
-		ip_rt_put(inet->cork.rt);
-		inet->cork.rt = NULL;
-	}
+	ip_cork_release(inet);
 	return err;
 
 error:
@@ -1295,19 +1300,12 @@ error:
  */
 void ip_flush_pending_frames(struct sock *sk)
 {
-	struct inet_sock *inet = inet_sk(sk);
 	struct sk_buff *skb;
 
 	while ((skb = __skb_dequeue_tail(&sk->sk_write_queue)) != NULL)
 		kfree_skb(skb);
 
-	inet->cork.flags &= ~IPCORK_OPT;
-	kfree(inet->cork.opt);
-	inet->cork.opt = NULL;
-	if (inet->cork.rt) {
-		ip_rt_put(inet->cork.rt);
-		inet->cork.rt = NULL;
-	}
+	ip_cork_release(inet_sk(sk));
 }
 
 
diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c
index f51f20e487c..82817e55436 100644
--- a/net/ipv4/ip_sockglue.c
+++ b/net/ipv4/ip_sockglue.c
@@ -437,10 +437,8 @@ static int do_ip_setsockopt(struct sock *sk, int level,
 
 	/* If optlen==0, it is equivalent to val == 0 */
 
-#ifdef CONFIG_IP_MROUTE
-	if (optname >= MRT_BASE && optname <= (MRT_BASE + 10))
+	if (ip_mroute_opt(optname))
 		return ip_mroute_setsockopt(sk,optname,optval,optlen);
-#endif
 
 	err = 0;
 	lock_sock(sk);
@@ -909,11 +907,9 @@ int ip_setsockopt(struct sock *sk, int level,
 #ifdef CONFIG_NETFILTER
 	/* we need to exclude all possible ENOPROTOOPTs except default case */
 	if (err == -ENOPROTOOPT && optname != IP_HDRINCL &&
-		optname != IP_IPSEC_POLICY && optname != IP_XFRM_POLICY
-#ifdef CONFIG_IP_MROUTE
-		&& (optname < MRT_BASE || optname > (MRT_BASE + 10))
-#endif
-	   ) {
+			optname != IP_IPSEC_POLICY &&
+			optname != IP_XFRM_POLICY &&
+			!ip_mroute_opt(optname)) {
 		lock_sock(sk);
 		err = nf_setsockopt(sk, PF_INET, optname, optval, optlen);
 		release_sock(sk);
@@ -935,11 +931,9 @@ int compat_ip_setsockopt(struct sock *sk, int level, int optname,
 #ifdef CONFIG_NETFILTER
 	/* we need to exclude all possible ENOPROTOOPTs except default case */
 	if (err == -ENOPROTOOPT && optname != IP_HDRINCL &&
-	    optname != IP_IPSEC_POLICY && optname != IP_XFRM_POLICY
-#ifdef CONFIG_IP_MROUTE
-	    && (optname < MRT_BASE || optname > (MRT_BASE + 10))
-#endif
-	   ) {
+			optname != IP_IPSEC_POLICY &&
+			optname != IP_XFRM_POLICY &&
+			!ip_mroute_opt(optname)) {
 		lock_sock(sk);
 		err = compat_nf_setsockopt(sk, PF_INET, optname,
 					   optval, optlen);
@@ -967,11 +961,8 @@ static int do_ip_getsockopt(struct sock *sk, int level, int optname,
 	if (level != SOL_IP)
 		return -EOPNOTSUPP;
 
-#ifdef CONFIG_IP_MROUTE
-	if (optname >= MRT_BASE && optname <= MRT_BASE+10) {
+	if (ip_mroute_opt(optname))
 		return ip_mroute_getsockopt(sk,optname,optval,optlen);
-	}
-#endif
 
 	if (get_user(len,optlen))
 		return -EFAULT;
@@ -1171,11 +1162,8 @@ int ip_getsockopt(struct sock *sk, int level,
 	err = do_ip_getsockopt(sk, level, optname, optval, optlen);
 #ifdef CONFIG_NETFILTER
 	/* we need to exclude all possible ENOPROTOOPTs except default case */
-	if (err == -ENOPROTOOPT && optname != IP_PKTOPTIONS
-#ifdef CONFIG_IP_MROUTE
-		&& (optname < MRT_BASE || optname > MRT_BASE+10)
-#endif
-	   ) {
+	if (err == -ENOPROTOOPT && optname != IP_PKTOPTIONS &&
+			!ip_mroute_opt(optname)) {
 		int len;
 
 		if (get_user(len,optlen))
@@ -1200,11 +1188,8 @@ int compat_ip_getsockopt(struct sock *sk, int level, int optname,
 	int err = do_ip_getsockopt(sk, level, optname, optval, optlen);
 #ifdef CONFIG_NETFILTER
 	/* we need to exclude all possible ENOPROTOOPTs except default case */
-	if (err == -ENOPROTOOPT && optname != IP_PKTOPTIONS
-#ifdef CONFIG_IP_MROUTE
-	    && (optname < MRT_BASE || optname > MRT_BASE+10)
-#endif
-	   ) {
+	if (err == -ENOPROTOOPT && optname != IP_PKTOPTIONS &&
+			!ip_mroute_opt(optname)) {
 		int len;
 
 		if (get_user(len, optlen))
diff --git a/net/ipv4/ipcomp.c b/net/ipv4/ipcomp.c
index ca1b5fdb8d3..2c44a94c213 100644
--- a/net/ipv4/ipcomp.c
+++ b/net/ipv4/ipcomp.c
@@ -16,6 +16,7 @@
 #include <linux/module.h>
 #include <asm/semaphore.h>
 #include <linux/crypto.h>
+#include <linux/err.h>
 #include <linux/pfkeyv2.h>
 #include <linux/percpu.h>
 #include <linux/smp.h>
@@ -344,7 +345,7 @@ static struct crypto_comp **ipcomp_alloc_tfms(const char *alg_name)
 	for_each_possible_cpu(cpu) {
 		struct crypto_comp *tfm = crypto_alloc_comp(alg_name, 0,
 							    CRYPTO_ALG_ASYNC);
-		if (!tfm)
+		if (IS_ERR(tfm))
 			goto error;
 		*per_cpu_ptr(tfms, cpu) = tfm;
 	}
diff --git a/net/ipv4/ipvs/ip_vs_conn.c b/net/ipv4/ipvs/ip_vs_conn.c
index 4b702f708d3..0a9f3c37e18 100644
--- a/net/ipv4/ipvs/ip_vs_conn.c
+++ b/net/ipv4/ipvs/ip_vs_conn.c
@@ -426,6 +426,24 @@ ip_vs_bind_dest(struct ip_vs_conn *cp, struct ip_vs_dest *dest)
 
 
 /*
+ * Check if there is a destination for the connection, if so
+ * bind the connection to the destination.
+ */
+struct ip_vs_dest *ip_vs_try_bind_dest(struct ip_vs_conn *cp)
+{
+	struct ip_vs_dest *dest;
+
+	if ((cp) && (!cp->dest)) {
+		dest = ip_vs_find_dest(cp->daddr, cp->dport,
+				       cp->vaddr, cp->vport, cp->protocol);
+		ip_vs_bind_dest(cp, dest);
+		return dest;
+	} else
+		return NULL;
+}
+
+
+/*
  *	Unbind a connection entry with its VS destination
  *	Called by the ip_vs_conn_expire function.
  */
diff --git a/net/ipv4/ipvs/ip_vs_core.c b/net/ipv4/ipvs/ip_vs_core.c
index c6ed7654e83..20c884a5772 100644
--- a/net/ipv4/ipvs/ip_vs_core.c
+++ b/net/ipv4/ipvs/ip_vs_core.c
@@ -979,15 +979,23 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb,
 		ret = NF_ACCEPT;
 	}
 
-	/* increase its packet counter and check if it is needed
-	   to be synchronized */
+	/* Increase its packet counter and check if it is needed
+	 * to be synchronized
+	 *
+	 * Sync connection if it is about to close to
+	 * encorage the standby servers to update the connections timeout
+	 */
 	atomic_inc(&cp->in_pkts);
 	if ((ip_vs_sync_state & IP_VS_STATE_MASTER) &&
-	    (cp->protocol != IPPROTO_TCP ||
-	     cp->state == IP_VS_TCP_S_ESTABLISHED) &&
-	    (atomic_read(&cp->in_pkts) % sysctl_ip_vs_sync_threshold[1]
-	     == sysctl_ip_vs_sync_threshold[0]))
+	    (((cp->protocol != IPPROTO_TCP ||
+	       cp->state == IP_VS_TCP_S_ESTABLISHED) &&
+	      (atomic_read(&cp->in_pkts) % sysctl_ip_vs_sync_threshold[1]
+	       == sysctl_ip_vs_sync_threshold[0])) ||
+	     ((cp->protocol == IPPROTO_TCP) && (cp->old_state != cp->state) &&
+	      ((cp->state == IP_VS_TCP_S_FIN_WAIT) ||
+	       (cp->state == IP_VS_TCP_S_CLOSE)))))
 		ip_vs_sync_conn(cp);
+	cp->old_state = cp->state;
 
 	ip_vs_conn_put(cp);
 	return ret;
diff --git a/net/ipv4/ipvs/ip_vs_ctl.c b/net/ipv4/ipvs/ip_vs_ctl.c
index 7345fc252a2..b64cf45a9ea 100644
--- a/net/ipv4/ipvs/ip_vs_ctl.c
+++ b/net/ipv4/ipvs/ip_vs_ctl.c
@@ -579,6 +579,31 @@ ip_vs_lookup_dest(struct ip_vs_service *svc, __be32 daddr, __be16 dport)
 	return NULL;
 }
 
+/*
+ * Find destination by {daddr,dport,vaddr,protocol}
+ * Cretaed to be used in ip_vs_process_message() in
+ * the backup synchronization daemon. It finds the
+ * destination to be bound to the received connection
+ * on the backup.
+ *
+ * ip_vs_lookup_real_service() looked promissing, but
+ * seems not working as expected.
+ */
+struct ip_vs_dest *ip_vs_find_dest(__be32 daddr, __be16 dport,
+				    __be32 vaddr, __be16 vport, __u16 protocol)
+{
+	struct ip_vs_dest *dest;
+	struct ip_vs_service *svc;
+
+	svc = ip_vs_service_get(0, protocol, vaddr, vport);
+	if (!svc)
+		return NULL;
+	dest = ip_vs_lookup_dest(svc, daddr, dport);
+	if (dest)
+		atomic_inc(&dest->refcnt);
+	ip_vs_service_put(svc);
+	return dest;
+}
 
 /*
  *  Lookup dest by {svc,addr,port} in the destination trash.
diff --git a/net/ipv4/ipvs/ip_vs_sync.c b/net/ipv4/ipvs/ip_vs_sync.c
index 0d4d9721cbd..bd930efc18d 100644
--- a/net/ipv4/ipvs/ip_vs_sync.c
+++ b/net/ipv4/ipvs/ip_vs_sync.c
@@ -284,6 +284,7 @@ static void ip_vs_process_message(const char *buffer, const size_t buflen)
 	struct ip_vs_sync_conn_options *opt;
 	struct ip_vs_conn *cp;
 	struct ip_vs_protocol *pp;
+	struct ip_vs_dest *dest;
 	char *p;
 	int i;
 
@@ -317,20 +318,34 @@ static void ip_vs_process_message(const char *buffer, const size_t buflen)
 					       s->caddr, s->cport,
 					       s->vaddr, s->vport);
 		if (!cp) {
+			/*
+			 * Find the appropriate destination for the connection.
+			 * If it is not found the connection will remain unbound
+			 * but still handled.
+			 */
+			dest = ip_vs_find_dest(s->daddr, s->dport,
+					       s->vaddr, s->vport,
+					       s->protocol);
 			cp = ip_vs_conn_new(s->protocol,
 					    s->caddr, s->cport,
 					    s->vaddr, s->vport,
 					    s->daddr, s->dport,
-					    flags, NULL);
+					    flags, dest);
+			if (dest)
+				atomic_dec(&dest->refcnt);
 			if (!cp) {
 				IP_VS_ERR("ip_vs_conn_new failed\n");
 				return;
 			}
 			cp->state = ntohs(s->state);
 		} else if (!cp->dest) {
-			/* it is an entry created by the synchronization */
-			cp->state = ntohs(s->state);
-			cp->flags = flags | IP_VS_CONN_F_HASHED;
+			dest = ip_vs_try_bind_dest(cp);
+			if (!dest) {
+				/* it is an unbound entry created by
+				 * synchronization */
+				cp->flags = flags | IP_VS_CONN_F_HASHED;
+			} else
+				atomic_dec(&dest->refcnt);
 		}	/* Note that we don't touch its state and flags
 			   if it is a normal entry. */
 
@@ -342,6 +357,7 @@ static void ip_vs_process_message(const char *buffer, const size_t buflen)
 			p += SIMPLE_CONN_SIZE;
 
 		atomic_set(&cp->in_pkts, sysctl_ip_vs_sync_threshold[0]);
+		cp->state = ntohs(s->state);
 		pp = ip_vs_proto_get(s->protocol);
 		cp->timeout = pp->timeout_table[cp->state];
 		ip_vs_conn_put(cp);
diff --git a/net/ipv4/netfilter/Makefile b/net/ipv4/netfilter/Makefile
index 409d273f6f8..7456833d6ad 100644
--- a/net/ipv4/netfilter/Makefile
+++ b/net/ipv4/netfilter/Makefile
@@ -41,27 +41,27 @@ obj-$(CONFIG_NF_NAT) += iptable_nat.o
 obj-$(CONFIG_IP_NF_RAW) += iptable_raw.o
 
 # matches
+obj-$(CONFIG_IP_NF_MATCH_ADDRTYPE) += ipt_addrtype.o
+obj-$(CONFIG_IP_NF_MATCH_AH) += ipt_ah.o
+obj-$(CONFIG_IP_NF_MATCH_ECN) += ipt_ecn.o
 obj-$(CONFIG_IP_NF_MATCH_IPRANGE) += ipt_iprange.o
 obj-$(CONFIG_IP_NF_MATCH_OWNER) += ipt_owner.o
-obj-$(CONFIG_IP_NF_MATCH_TOS) += ipt_tos.o
 obj-$(CONFIG_IP_NF_MATCH_RECENT) += ipt_recent.o
-obj-$(CONFIG_IP_NF_MATCH_ECN) += ipt_ecn.o
-obj-$(CONFIG_IP_NF_MATCH_AH) += ipt_ah.o
+obj-$(CONFIG_IP_NF_MATCH_TOS) += ipt_tos.o
 obj-$(CONFIG_IP_NF_MATCH_TTL) += ipt_ttl.o
-obj-$(CONFIG_IP_NF_MATCH_ADDRTYPE) += ipt_addrtype.o
 
 # targets
-obj-$(CONFIG_IP_NF_TARGET_REJECT) += ipt_REJECT.o
-obj-$(CONFIG_IP_NF_TARGET_TOS) += ipt_TOS.o
+obj-$(CONFIG_IP_NF_TARGET_CLUSTERIP) += ipt_CLUSTERIP.o
 obj-$(CONFIG_IP_NF_TARGET_ECN) += ipt_ECN.o
+obj-$(CONFIG_IP_NF_TARGET_LOG) += ipt_LOG.o
 obj-$(CONFIG_IP_NF_TARGET_MASQUERADE) += ipt_MASQUERADE.o
-obj-$(CONFIG_IP_NF_TARGET_REDIRECT) += ipt_REDIRECT.o
 obj-$(CONFIG_IP_NF_TARGET_NETMAP) += ipt_NETMAP.o
+obj-$(CONFIG_IP_NF_TARGET_REDIRECT) += ipt_REDIRECT.o
+obj-$(CONFIG_IP_NF_TARGET_REJECT) += ipt_REJECT.o
 obj-$(CONFIG_IP_NF_TARGET_SAME) += ipt_SAME.o
-obj-$(CONFIG_IP_NF_TARGET_LOG) += ipt_LOG.o
-obj-$(CONFIG_IP_NF_TARGET_ULOG) += ipt_ULOG.o
-obj-$(CONFIG_IP_NF_TARGET_CLUSTERIP) += ipt_CLUSTERIP.o
+obj-$(CONFIG_IP_NF_TARGET_TOS) += ipt_TOS.o
 obj-$(CONFIG_IP_NF_TARGET_TTL) += ipt_TTL.o
+obj-$(CONFIG_IP_NF_TARGET_ULOG) += ipt_ULOG.o
 
 # generic ARP tables
 obj-$(CONFIG_IP_NF_ARPTABLES) += arp_tables.o
diff --git a/net/ipv4/netfilter/ip_queue.c b/net/ipv4/netfilter/ip_queue.c
index 10a2ce09fd8..14d64a383db 100644
--- a/net/ipv4/netfilter/ip_queue.c
+++ b/net/ipv4/netfilter/ip_queue.c
@@ -22,6 +22,7 @@
 #include <linux/spinlock.h>
 #include <linux/sysctl.h>
 #include <linux/proc_fs.h>
+#include <linux/seq_file.h>
 #include <linux/security.h>
 #include <linux/mutex.h>
 #include <net/net_namespace.h>
@@ -607,15 +608,11 @@ static ctl_table ipq_root_table[] = {
 	{ .ctl_name = 0 }
 };
 
-#ifdef CONFIG_PROC_FS
-static int
-ipq_get_info(char *buffer, char **start, off_t offset, int length)
+static int ip_queue_show(struct seq_file *m, void *v)
 {
-	int len;
-
 	read_lock_bh(&queue_lock);
 
-	len = sprintf(buffer,
+	seq_printf(m,
 		      "Peer PID          : %d\n"
 		      "Copy mode         : %hu\n"
 		      "Copy range        : %u\n"
@@ -632,16 +629,21 @@ ipq_get_info(char *buffer, char **start, off_t offset, int length)
 		      queue_user_dropped);
 
 	read_unlock_bh(&queue_lock);
+	return 0;
+}
 
-	*start = buffer + offset;
-	len -= offset;
-	if (len > length)
-		len = length;
-	else if (len < 0)
-		len = 0;
-	return len;
+static int ip_queue_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, ip_queue_show, NULL);
 }
-#endif /* CONFIG_PROC_FS */
+
+static const struct file_operations ip_queue_proc_fops = {
+	.open		= ip_queue_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= single_release,
+	.owner		= THIS_MODULE,
+};
 
 static struct nf_queue_handler nfqh = {
 	.name	= "ip_queue",
@@ -661,10 +663,11 @@ static int __init ip_queue_init(void)
 		goto cleanup_netlink_notifier;
 	}
 
-	proc = proc_net_create(&init_net, IPQ_PROC_FS_NAME, 0, ipq_get_info);
-	if (proc)
+	proc = create_proc_entry(IPQ_PROC_FS_NAME, 0, init_net.proc_net);
+	if (proc) {
 		proc->owner = THIS_MODULE;
-	else {
+		proc->proc_fops = &ip_queue_proc_fops;
+	} else {
 		printk(KERN_ERR "ip_queue: failed to create proc entry\n");
 		goto cleanup_ipqnl;
 	}
diff --git a/net/ipv4/netfilter/nf_nat_amanda.c b/net/ipv4/netfilter/nf_nat_amanda.c
index 35a5aa69cd9..c31b8766825 100644
--- a/net/ipv4/netfilter/nf_nat_amanda.c
+++ b/net/ipv4/netfilter/nf_nat_amanda.c
@@ -69,7 +69,7 @@ static void __exit nf_nat_amanda_fini(void)
 
 static int __init nf_nat_amanda_init(void)
 {
-	BUG_ON(rcu_dereference(nf_nat_amanda_hook));
+	BUG_ON(nf_nat_amanda_hook != NULL);
 	rcu_assign_pointer(nf_nat_amanda_hook, help);
 	return 0;
 }
diff --git a/net/ipv4/netfilter/nf_nat_core.c b/net/ipv4/netfilter/nf_nat_core.c
index 56e93f692e8..70e7997ea28 100644
--- a/net/ipv4/netfilter/nf_nat_core.c
+++ b/net/ipv4/netfilter/nf_nat_core.c
@@ -681,7 +681,7 @@ static int clean_nat(struct nf_conn *i, void *data)
 
 	if (!nat)
 		return 0;
-	memset(nat, 0, sizeof(nat));
+	memset(nat, 0, sizeof(*nat));
 	i->status &= ~(IPS_NAT_MASK | IPS_NAT_DONE_MASK | IPS_SEQ_ADJUST);
 	return 0;
 }
diff --git a/net/ipv4/netfilter/nf_nat_ftp.c b/net/ipv4/netfilter/nf_nat_ftp.c
index e1a16d3ea4c..a1d5d58a58b 100644
--- a/net/ipv4/netfilter/nf_nat_ftp.c
+++ b/net/ipv4/netfilter/nf_nat_ftp.c
@@ -147,7 +147,7 @@ static void __exit nf_nat_ftp_fini(void)
 
 static int __init nf_nat_ftp_init(void)
 {
-	BUG_ON(rcu_dereference(nf_nat_ftp_hook));
+	BUG_ON(nf_nat_ftp_hook != NULL);
 	rcu_assign_pointer(nf_nat_ftp_hook, nf_nat_ftp);
 	return 0;
 }
diff --git a/net/ipv4/netfilter/nf_nat_h323.c b/net/ipv4/netfilter/nf_nat_h323.c
index a868c8c4132..93e18ef114f 100644
--- a/net/ipv4/netfilter/nf_nat_h323.c
+++ b/net/ipv4/netfilter/nf_nat_h323.c
@@ -544,15 +544,15 @@ static int nat_callforwarding(struct sk_buff *skb, struct nf_conn *ct,
 /****************************************************************************/
 static int __init init(void)
 {
-	BUG_ON(rcu_dereference(set_h245_addr_hook) != NULL);
-	BUG_ON(rcu_dereference(set_h225_addr_hook) != NULL);
-	BUG_ON(rcu_dereference(set_sig_addr_hook) != NULL);
-	BUG_ON(rcu_dereference(set_ras_addr_hook) != NULL);
-	BUG_ON(rcu_dereference(nat_rtp_rtcp_hook) != NULL);
-	BUG_ON(rcu_dereference(nat_t120_hook) != NULL);
-	BUG_ON(rcu_dereference(nat_h245_hook) != NULL);
-	BUG_ON(rcu_dereference(nat_callforwarding_hook) != NULL);
-	BUG_ON(rcu_dereference(nat_q931_hook) != NULL);
+	BUG_ON(set_h245_addr_hook != NULL);
+	BUG_ON(set_h225_addr_hook != NULL);
+	BUG_ON(set_sig_addr_hook != NULL);
+	BUG_ON(set_ras_addr_hook != NULL);
+	BUG_ON(nat_rtp_rtcp_hook != NULL);
+	BUG_ON(nat_t120_hook != NULL);
+	BUG_ON(nat_h245_hook != NULL);
+	BUG_ON(nat_callforwarding_hook != NULL);
+	BUG_ON(nat_q931_hook != NULL);
 
 	rcu_assign_pointer(set_h245_addr_hook, set_h245_addr);
 	rcu_assign_pointer(set_h225_addr_hook, set_h225_addr);
diff --git a/net/ipv4/netfilter/nf_nat_irc.c b/net/ipv4/netfilter/nf_nat_irc.c
index 766e2c16c6b..fe6f9cef6c8 100644
--- a/net/ipv4/netfilter/nf_nat_irc.c
+++ b/net/ipv4/netfilter/nf_nat_irc.c
@@ -74,7 +74,7 @@ static void __exit nf_nat_irc_fini(void)
 
 static int __init nf_nat_irc_init(void)
 {
-	BUG_ON(rcu_dereference(nf_nat_irc_hook));
+	BUG_ON(nf_nat_irc_hook != NULL);
 	rcu_assign_pointer(nf_nat_irc_hook, help);
 	return 0;
 }
diff --git a/net/ipv4/netfilter/nf_nat_pptp.c b/net/ipv4/netfilter/nf_nat_pptp.c
index e1385a09907..6817e7995f3 100644
--- a/net/ipv4/netfilter/nf_nat_pptp.c
+++ b/net/ipv4/netfilter/nf_nat_pptp.c
@@ -281,16 +281,16 @@ static int __init nf_nat_helper_pptp_init(void)
 {
 	nf_nat_need_gre();
 
-	BUG_ON(rcu_dereference(nf_nat_pptp_hook_outbound));
+	BUG_ON(nf_nat_pptp_hook_outbound != NULL);
 	rcu_assign_pointer(nf_nat_pptp_hook_outbound, pptp_outbound_pkt);
 
-	BUG_ON(rcu_dereference(nf_nat_pptp_hook_inbound));
+	BUG_ON(nf_nat_pptp_hook_inbound != NULL);
 	rcu_assign_pointer(nf_nat_pptp_hook_inbound, pptp_inbound_pkt);
 
-	BUG_ON(rcu_dereference(nf_nat_pptp_hook_exp_gre));
+	BUG_ON(nf_nat_pptp_hook_exp_gre != NULL);
 	rcu_assign_pointer(nf_nat_pptp_hook_exp_gre, pptp_exp_gre);
 
-	BUG_ON(rcu_dereference(nf_nat_pptp_hook_expectfn));
+	BUG_ON(nf_nat_pptp_hook_expectfn != NULL);
 	rcu_assign_pointer(nf_nat_pptp_hook_expectfn, pptp_nat_expected);
 	return 0;
 }
diff --git a/net/ipv4/netfilter/nf_nat_sip.c b/net/ipv4/netfilter/nf_nat_sip.c
index ce9edbcc01e..3ca98971a1e 100644
--- a/net/ipv4/netfilter/nf_nat_sip.c
+++ b/net/ipv4/netfilter/nf_nat_sip.c
@@ -293,8 +293,8 @@ static void __exit nf_nat_sip_fini(void)
 
 static int __init nf_nat_sip_init(void)
 {
-	BUG_ON(rcu_dereference(nf_nat_sip_hook));
-	BUG_ON(rcu_dereference(nf_nat_sdp_hook));
+	BUG_ON(nf_nat_sip_hook != NULL);
+	BUG_ON(nf_nat_sdp_hook != NULL);
 	rcu_assign_pointer(nf_nat_sip_hook, ip_nat_sip);
 	rcu_assign_pointer(nf_nat_sdp_hook, ip_nat_sdp);
 	return 0;
diff --git a/net/ipv4/netfilter/nf_nat_tftp.c b/net/ipv4/netfilter/nf_nat_tftp.c
index 0ecec701cb4..1360a94766d 100644
--- a/net/ipv4/netfilter/nf_nat_tftp.c
+++ b/net/ipv4/netfilter/nf_nat_tftp.c
@@ -43,7 +43,7 @@ static void __exit nf_nat_tftp_fini(void)
 
 static int __init nf_nat_tftp_init(void)
 {
-	BUG_ON(rcu_dereference(nf_nat_tftp_hook));
+	BUG_ON(nf_nat_tftp_hook != NULL);
 	rcu_assign_pointer(nf_nat_tftp_hook, help);
 	return 0;
 }
diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c
index ffdccc0972e..ce34b281803 100644
--- a/net/ipv4/proc.c
+++ b/net/ipv4/proc.c
@@ -46,17 +46,6 @@
 #include <net/sock.h>
 #include <net/raw.h>
 
-static int fold_prot_inuse(struct proto *proto)
-{
-	int res = 0;
-	int cpu;
-
-	for_each_possible_cpu(cpu)
-		res += proto->stats[cpu].inuse;
-
-	return res;
-}
-
 /*
  *	Report socket allocation statistics [mea@utu.fi]
  */
@@ -64,12 +53,12 @@ static int sockstat_seq_show(struct seq_file *seq, void *v)
 {
 	socket_seq_show(seq);
 	seq_printf(seq, "TCP: inuse %d orphan %d tw %d alloc %d mem %d\n",
-		   fold_prot_inuse(&tcp_prot), atomic_read(&tcp_orphan_count),
+		   sock_prot_inuse(&tcp_prot), atomic_read(&tcp_orphan_count),
 		   tcp_death_row.tw_count, atomic_read(&tcp_sockets_allocated),
 		   atomic_read(&tcp_memory_allocated));
-	seq_printf(seq, "UDP: inuse %d\n", fold_prot_inuse(&udp_prot));
-	seq_printf(seq, "UDPLITE: inuse %d\n", fold_prot_inuse(&udplite_prot));
-	seq_printf(seq, "RAW: inuse %d\n", fold_prot_inuse(&raw_prot));
+	seq_printf(seq, "UDP: inuse %d\n", sock_prot_inuse(&udp_prot));
+	seq_printf(seq, "UDPLITE: inuse %d\n", sock_prot_inuse(&udplite_prot));
+	seq_printf(seq, "RAW: inuse %d\n", sock_prot_inuse(&raw_prot));
 	seq_printf(seq,  "FRAG: inuse %d memory %d\n",
 			ip_frag_nqueues(), ip_frag_mem());
 	return 0;
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c
index 3916faca3af..66b42f547bf 100644
--- a/net/ipv4/raw.c
+++ b/net/ipv4/raw.c
@@ -760,6 +760,8 @@ static int raw_ioctl(struct sock *sk, int cmd, unsigned long arg)
 	}
 }
 
+DEFINE_PROTO_INUSE(raw)
+
 struct proto raw_prot = {
 	.name		   = "RAW",
 	.owner		   = THIS_MODULE,
@@ -781,6 +783,7 @@ struct proto raw_prot = {
 	.compat_setsockopt = compat_raw_setsockopt,
 	.compat_getsockopt = compat_raw_getsockopt,
 #endif
+	REF_PROTO_INUSE(raw)
 };
 
 #ifdef CONFIG_PROC_FS
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 21b12de9e65..1bff9ed349f 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -578,6 +578,9 @@ static void rt_check_expire(struct work_struct *work)
 		i = (i + 1) & rt_hash_mask;
 		rthp = &rt_hash_table[i].chain;
 
+		if (need_resched())
+			cond_resched();
+
 		if (*rthp == NULL)
 			continue;
 		spin_lock_bh(rt_hash_lock_addr(i));
@@ -851,9 +854,7 @@ restart:
 			 */
 			rcu_assign_pointer(rt_hash_table[hash].chain, rth);
 
-			rth->u.dst.__use++;
-			dst_hold(&rth->u.dst);
-			rth->u.dst.lastuse = now;
+			dst_use(&rth->u.dst, now);
 			spin_unlock_bh(rt_hash_lock_addr(hash));
 
 			rt_drop(rt);
@@ -1813,11 +1814,6 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr,
 		goto martian_destination;
 
 	err = ip_mkroute_input(skb, &res, &fl, in_dev, daddr, saddr, tos);
-	if (err == -ENOBUFS)
-		goto e_nobufs;
-	if (err == -EINVAL)
-		goto e_inval;
-
 done:
 	in_dev_put(in_dev);
 	if (free_res)
@@ -1935,9 +1931,7 @@ int ip_route_input(struct sk_buff *skb, __be32 daddr, __be32 saddr,
 		    rth->fl.oif == 0 &&
 		    rth->fl.mark == skb->mark &&
 		    rth->fl.fl4_tos == tos) {
-			rth->u.dst.lastuse = jiffies;
-			dst_hold(&rth->u.dst);
-			rth->u.dst.__use++;
+			dst_use(&rth->u.dst, jiffies);
 			RT_CACHE_STAT_INC(in_hit);
 			rcu_read_unlock();
 			skb->dst = (struct dst_entry*)rth;
@@ -2331,9 +2325,7 @@ int __ip_route_output_key(struct rtable **rp, const struct flowi *flp)
 		    rth->fl.mark == flp->mark &&
 		    !((rth->fl.fl4_tos ^ flp->fl4_tos) &
 			    (IPTOS_RT_MASK | RTO_ONLINK))) {
-			rth->u.dst.lastuse = jiffies;
-			dst_hold(&rth->u.dst);
-			rth->u.dst.__use++;
+			dst_use(&rth->u.dst, jiffies);
 			RT_CACHE_STAT_INC(out_hit);
 			rcu_read_unlock_bh();
 			*rp = rth;
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index c64072bb504..8e65182f7af 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -2456,11 +2456,11 @@ void __init tcp_init(void)
 					thash_entries ? 0 : 512 * 1024);
 	tcp_hashinfo.ehash_size = 1 << tcp_hashinfo.ehash_size;
 	for (i = 0; i < tcp_hashinfo.ehash_size; i++) {
-		rwlock_init(&tcp_hashinfo.ehash[i].lock);
 		INIT_HLIST_HEAD(&tcp_hashinfo.ehash[i].chain);
 		INIT_HLIST_HEAD(&tcp_hashinfo.ehash[i].twchain);
 	}
-
+	if (inet_ehash_locks_alloc(&tcp_hashinfo))
+		panic("TCP: failed to alloc ehash_locks");
 	tcp_hashinfo.bhash =
 		alloc_large_system_hash("TCP bind",
 					sizeof(struct inet_bind_hashbucket),
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index ca9590f4f52..0f0c1c9829a 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -1269,6 +1269,9 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_
 	if (before(TCP_SKB_CB(ack_skb)->ack_seq, prior_snd_una - tp->max_window))
 		return 0;
 
+	if (!tp->packets_out)
+		goto out;
+
 	/* SACK fastpath:
 	 * if the only SACK change is the increase of the end_seq of
 	 * the first block then only apply that SACK block
@@ -1400,11 +1403,9 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_
 			/* DSACK info lost if out-of-mem, try SACK still */
 			if (in_sack <= 0)
 				in_sack = tcp_match_skb_to_sack(sk, skb, start_seq, end_seq);
-			if (in_sack < 0)
+			if (unlikely(in_sack < 0))
 				break;
 
-			fack_count += tcp_skb_pcount(skb);
-
 			sacked = TCP_SKB_CB(skb)->sacked;
 
 			/* Account D-SACK for retransmitted packet. */
@@ -1419,19 +1420,17 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_
 					if ((dup_sack && in_sack) &&
 					    (sacked&TCPCB_SACKED_ACKED))
 						reord = min(fack_count, reord);
-				} else {
-					/* If it was in a hole, we detected reordering. */
-					if (fack_count < prior_fackets &&
-					    !(sacked&TCPCB_SACKED_ACKED))
-						reord = min(fack_count, reord);
 				}
 
 				/* Nothing to do; acked frame is about to be dropped. */
+				fack_count += tcp_skb_pcount(skb);
 				continue;
 			}
 
-			if (!in_sack)
+			if (!in_sack) {
+				fack_count += tcp_skb_pcount(skb);
 				continue;
+			}
 
 			if (!(sacked&TCPCB_SACKED_ACKED)) {
 				if (sacked & TCPCB_SACKED_RETRANS) {
@@ -1448,12 +1447,17 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_
 						tp->retransmit_skb_hint = NULL;
 					}
 				} else {
-					/* New sack for not retransmitted frame,
-					 * which was in hole. It is reordering.
-					 */
-					if (!(sacked & TCPCB_RETRANS) &&
-					    fack_count < prior_fackets)
-						reord = min(fack_count, reord);
+					if (!(sacked & TCPCB_RETRANS)) {
+						/* New sack for not retransmitted frame,
+						 * which was in hole. It is reordering.
+						 */
+						if (fack_count < prior_fackets)
+							reord = min(fack_count, reord);
+
+						/* SACK enhanced F-RTO (RFC4138; Appendix B) */
+						if (!after(TCP_SKB_CB(skb)->end_seq, tp->frto_highmark))
+							flag |= FLAG_ONLY_ORIG_SACKED;
+					}
 
 					if (sacked & TCPCB_LOST) {
 						TCP_SKB_CB(skb)->sacked &= ~TCPCB_LOST;
@@ -1462,24 +1466,13 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_
 						/* clear lost hint */
 						tp->retransmit_skb_hint = NULL;
 					}
-					/* SACK enhanced F-RTO detection.
-					 * Set flag if and only if non-rexmitted
-					 * segments below frto_highmark are
-					 * SACKed (RFC4138; Appendix B).
-					 * Clearing correct due to in-order walk
-					 */
-					if (after(end_seq, tp->frto_highmark)) {
-						flag &= ~FLAG_ONLY_ORIG_SACKED;
-					} else {
-						if (!(sacked & TCPCB_RETRANS))
-							flag |= FLAG_ONLY_ORIG_SACKED;
-					}
 				}
 
 				TCP_SKB_CB(skb)->sacked |= TCPCB_SACKED_ACKED;
 				flag |= FLAG_DATA_SACKED;
 				tp->sacked_out += tcp_skb_pcount(skb);
 
+				fack_count += tcp_skb_pcount(skb);
 				if (fack_count > tp->fackets_out)
 					tp->fackets_out = fack_count;
 
@@ -1490,6 +1483,8 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_
 			} else {
 				if (dup_sack && (sacked&TCPCB_RETRANS))
 					reord = min(fack_count, reord);
+
+				fack_count += tcp_skb_pcount(skb);
 			}
 
 			/* D-SACK. We can detect redundant retransmission
@@ -1504,6 +1499,12 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_
 				tp->retransmit_skb_hint = NULL;
 			}
 		}
+
+		/* SACK enhanced FRTO (RFC4138, Appendix B): Clearing correct
+		 * due to in-order walk
+		 */
+		if (after(end_seq, tp->frto_highmark))
+			flag &= ~FLAG_ONLY_ORIG_SACKED;
 	}
 
 	if (tp->retrans_out &&
@@ -1515,7 +1516,9 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_
 
 	if ((reord < tp->fackets_out) && icsk->icsk_ca_state != TCP_CA_Loss &&
 	    (!tp->frto_highmark || after(tp->snd_una, tp->frto_highmark)))
-		tcp_update_reordering(sk, ((tp->fackets_out + 1) - reord), 0);
+		tcp_update_reordering(sk, tp->fackets_out - reord, 0);
+
+out:
 
 #if FASTRETRANS_DEBUG > 0
 	BUG_TRAP((int)tp->sacked_out >= 0);
@@ -1671,6 +1674,9 @@ void tcp_enter_frto(struct sock *sk)
 	}
 	tcp_verify_left_out(tp);
 
+	/* Too bad if TCP was application limited */
+	tp->snd_cwnd = min(tp->snd_cwnd, tcp_packets_in_flight(tp) + 1);
+
 	/* Earlier loss recovery underway (see RFC4138; Appendix B).
 	 * The last condition is necessary at least in tp->frto_counter case.
 	 */
@@ -1703,6 +1709,8 @@ static void tcp_enter_frto_loss(struct sock *sk, int allowed_segments, int flag)
 	tcp_for_write_queue(skb, sk) {
 		if (skb == tcp_send_head(sk))
 			break;
+
+		TCP_SKB_CB(skb)->sacked &= ~TCPCB_LOST;
 		/*
 		 * Count the retransmission made on RTO correctly (only when
 		 * waiting for the first ACK and did not get it)...
@@ -1716,7 +1724,7 @@ static void tcp_enter_frto_loss(struct sock *sk, int allowed_segments, int flag)
 		} else {
 			if (TCP_SKB_CB(skb)->sacked & TCPCB_RETRANS)
 				tp->undo_marker = 0;
-			TCP_SKB_CB(skb)->sacked &= ~(TCPCB_LOST|TCPCB_SACKED_RETRANS);
+			TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_RETRANS;
 		}
 
 		/* Don't lost mark skbs that were fwd transmitted after RTO */
@@ -2630,7 +2638,8 @@ static u32 tcp_tso_acked(struct sock *sk, struct sk_buff *skb)
  * is before the ack sequence we can discard it as it's confirmed to have
  * arrived at the other end.
  */
-static int tcp_clean_rtx_queue(struct sock *sk, s32 *seq_rtt_p)
+static int tcp_clean_rtx_queue(struct sock *sk, s32 *seq_rtt_p,
+			       int prior_fackets)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
 	const struct inet_connection_sock *icsk = inet_csk(sk);
@@ -2639,6 +2648,8 @@ static int tcp_clean_rtx_queue(struct sock *sk, s32 *seq_rtt_p)
 	int fully_acked = 1;
 	int flag = 0;
 	int prior_packets = tp->packets_out;
+	u32 cnt = 0;
+	u32 reord = tp->packets_out;
 	s32 seq_rtt = -1;
 	ktime_t last_ackt = net_invalid_timestamp();
 
@@ -2679,10 +2690,14 @@ static int tcp_clean_rtx_queue(struct sock *sk, s32 *seq_rtt_p)
 				if ((flag & FLAG_DATA_ACKED) ||
 				    (packets_acked > 1))
 					flag |= FLAG_NONHEAD_RETRANS_ACKED;
-			} else if (seq_rtt < 0) {
-				seq_rtt = now - scb->when;
-				if (fully_acked)
-					last_ackt = skb->tstamp;
+			} else {
+				if (seq_rtt < 0) {
+					seq_rtt = now - scb->when;
+					if (fully_acked)
+						last_ackt = skb->tstamp;
+				}
+				if (!(sacked & TCPCB_SACKED_ACKED))
+					reord = min(cnt, reord);
 			}
 
 			if (sacked & TCPCB_SACKED_ACKED)
@@ -2693,12 +2708,16 @@ static int tcp_clean_rtx_queue(struct sock *sk, s32 *seq_rtt_p)
 			if ((sacked & TCPCB_URG) && tp->urg_mode &&
 			    !before(end_seq, tp->snd_up))
 				tp->urg_mode = 0;
-		} else if (seq_rtt < 0) {
-			seq_rtt = now - scb->when;
-			if (fully_acked)
-				last_ackt = skb->tstamp;
+		} else {
+			if (seq_rtt < 0) {
+				seq_rtt = now - scb->when;
+				if (fully_acked)
+					last_ackt = skb->tstamp;
+			}
+			reord = min(cnt, reord);
 		}
 		tp->packets_out -= packets_acked;
+		cnt += packets_acked;
 
 		/* Initial outgoing SYN's get put onto the write_queue
 		 * just like anything else we transmit.  It is not
@@ -2730,13 +2749,18 @@ static int tcp_clean_rtx_queue(struct sock *sk, s32 *seq_rtt_p)
 		tcp_ack_update_rtt(sk, flag, seq_rtt);
 		tcp_rearm_rto(sk);
 
+		if (tcp_is_reno(tp)) {
+			tcp_remove_reno_sacks(sk, pkts_acked);
+		} else {
+			/* Non-retransmitted hole got filled? That's reordering */
+			if (reord < prior_fackets)
+				tcp_update_reordering(sk, tp->fackets_out - reord, 0);
+		}
+
 		tp->fackets_out -= min(pkts_acked, tp->fackets_out);
 		/* hint's skb might be NULL but we don't need to care */
 		tp->fastpath_cnt_hint -= min_t(u32, pkts_acked,
 					       tp->fastpath_cnt_hint);
-		if (tcp_is_reno(tp))
-			tcp_remove_reno_sacks(sk, pkts_acked);
-
 		if (ca_ops->pkts_acked) {
 			s32 rtt_us = -1;
 
@@ -3019,6 +3043,7 @@ static int tcp_ack(struct sock *sk, struct sk_buff *skb, int flag)
 	u32 ack_seq = TCP_SKB_CB(skb)->seq;
 	u32 ack = TCP_SKB_CB(skb)->ack_seq;
 	u32 prior_in_flight;
+	u32 prior_fackets;
 	s32 seq_rtt;
 	int prior_packets;
 	int frto_cwnd = 0;
@@ -3043,6 +3068,8 @@ static int tcp_ack(struct sock *sk, struct sk_buff *skb, int flag)
 			tp->bytes_acked += min(ack - prior_snd_una, tp->mss_cache);
 	}
 
+	prior_fackets = tp->fackets_out;
+
 	if (!(flag&FLAG_SLOWPATH) && after(ack, prior_snd_una)) {
 		/* Window is constant, pure forward advance.
 		 * No more checks are required.
@@ -3084,13 +3111,13 @@ static int tcp_ack(struct sock *sk, struct sk_buff *skb, int flag)
 	prior_in_flight = tcp_packets_in_flight(tp);
 
 	/* See if we can take anything off of the retransmit queue. */
-	flag |= tcp_clean_rtx_queue(sk, &seq_rtt);
+	flag |= tcp_clean_rtx_queue(sk, &seq_rtt, prior_fackets);
 
+	if (tp->frto_counter)
+		frto_cwnd = tcp_process_frto(sk, flag);
 	/* Guarantee sacktag reordering detection against wrap-arounds */
 	if (before(tp->frto_highmark, tp->snd_una))
 		tp->frto_highmark = 0;
-	if (tp->frto_counter)
-		frto_cwnd = tcp_process_frto(sk, flag);
 
 	if (tcp_ack_is_dubious(sk, flag)) {
 		/* Advance CWND, if state allows this. */
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index d438dfb0c8f..e566f3c6767 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -2049,8 +2049,9 @@ static void *established_get_first(struct seq_file *seq)
 		struct sock *sk;
 		struct hlist_node *node;
 		struct inet_timewait_sock *tw;
+		rwlock_t *lock = inet_ehash_lockp(&tcp_hashinfo, st->bucket);
 
-		read_lock_bh(&tcp_hashinfo.ehash[st->bucket].lock);
+		read_lock_bh(lock);
 		sk_for_each(sk, node, &tcp_hashinfo.ehash[st->bucket].chain) {
 			if (sk->sk_family != st->family) {
 				continue;
@@ -2067,7 +2068,7 @@ static void *established_get_first(struct seq_file *seq)
 			rc = tw;
 			goto out;
 		}
-		read_unlock_bh(&tcp_hashinfo.ehash[st->bucket].lock);
+		read_unlock_bh(lock);
 		st->state = TCP_SEQ_STATE_ESTABLISHED;
 	}
 out:
@@ -2094,11 +2095,11 @@ get_tw:
 			cur = tw;
 			goto out;
 		}
-		read_unlock_bh(&tcp_hashinfo.ehash[st->bucket].lock);
+		read_unlock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket));
 		st->state = TCP_SEQ_STATE_ESTABLISHED;
 
 		if (++st->bucket < tcp_hashinfo.ehash_size) {
-			read_lock_bh(&tcp_hashinfo.ehash[st->bucket].lock);
+			read_lock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket));
 			sk = sk_head(&tcp_hashinfo.ehash[st->bucket].chain);
 		} else {
 			cur = NULL;
@@ -2206,7 +2207,7 @@ static void tcp_seq_stop(struct seq_file *seq, void *v)
 	case TCP_SEQ_STATE_TIME_WAIT:
 	case TCP_SEQ_STATE_ESTABLISHED:
 		if (v)
-			read_unlock_bh(&tcp_hashinfo.ehash[st->bucket].lock);
+			read_unlock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket));
 		break;
 	}
 }
@@ -2417,6 +2418,8 @@ void tcp4_proc_exit(void)
 }
 #endif /* CONFIG_PROC_FS */
 
+DEFINE_PROTO_INUSE(tcp)
+
 struct proto tcp_prot = {
 	.name			= "TCP",
 	.owner			= THIS_MODULE,
@@ -2451,6 +2454,7 @@ struct proto tcp_prot = {
 	.compat_setsockopt	= compat_tcp_setsockopt,
 	.compat_getsockopt	= compat_tcp_getsockopt,
 #endif
+	REF_PROTO_INUSE(tcp)
 };
 
 void __init tcp_v4_init(struct net_proto_family *ops)
diff --git a/net/ipv4/tunnel4.c b/net/ipv4/tunnel4.c
index a794a8ca8b4..978b3fd61e6 100644
--- a/net/ipv4/tunnel4.c
+++ b/net/ipv4/tunnel4.c
@@ -17,6 +17,11 @@ static struct xfrm_tunnel *tunnel4_handlers;
 static struct xfrm_tunnel *tunnel64_handlers;
 static DEFINE_MUTEX(tunnel4_mutex);
 
+static inline struct xfrm_tunnel **fam_handlers(unsigned short family)
+{
+	return (family == AF_INET) ? &tunnel4_handlers : &tunnel64_handlers;
+}
+
 int xfrm4_tunnel_register(struct xfrm_tunnel *handler, unsigned short family)
 {
 	struct xfrm_tunnel **pprev;
@@ -25,8 +30,7 @@ int xfrm4_tunnel_register(struct xfrm_tunnel *handler, unsigned short family)
 
 	mutex_lock(&tunnel4_mutex);
 
-	for (pprev = (family == AF_INET) ? &tunnel4_handlers : &tunnel64_handlers;
-	     *pprev; pprev = &(*pprev)->next) {
+	for (pprev = fam_handlers(family); *pprev; pprev = &(*pprev)->next) {
 		if ((*pprev)->priority > priority)
 			break;
 		if ((*pprev)->priority == priority)
@@ -53,8 +57,7 @@ int xfrm4_tunnel_deregister(struct xfrm_tunnel *handler, unsigned short family)
 
 	mutex_lock(&tunnel4_mutex);
 
-	for (pprev = (family == AF_INET) ? &tunnel4_handlers : &tunnel64_handlers;
-	     *pprev; pprev = &(*pprev)->next) {
+	for (pprev = fam_handlers(family); *pprev; pprev = &(*pprev)->next) {
 		if (*pprev == handler) {
 			*pprev = handler->next;
 			ret = 0;
@@ -118,6 +121,17 @@ static void tunnel4_err(struct sk_buff *skb, u32 info)
 			break;
 }
 
+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+static void tunnel64_err(struct sk_buff *skb, u32 info)
+{
+	struct xfrm_tunnel *handler;
+
+	for (handler = tunnel64_handlers; handler; handler = handler->next)
+		if (!handler->err_handler(skb, info))
+			break;
+}
+#endif
+
 static struct net_protocol tunnel4_protocol = {
 	.handler	=	tunnel4_rcv,
 	.err_handler	=	tunnel4_err,
@@ -127,7 +141,7 @@ static struct net_protocol tunnel4_protocol = {
 #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
 static struct net_protocol tunnel64_protocol = {
 	.handler	=	tunnel64_rcv,
-	.err_handler	=	tunnel4_err,
+	.err_handler	=	tunnel64_err,
 	.no_policy	=	1,
 };
 #endif
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 4bc25b46f33..03c400ca14c 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -1430,6 +1430,8 @@ unsigned int udp_poll(struct file *file, struct socket *sock, poll_table *wait)
 
 }
 
+DEFINE_PROTO_INUSE(udp)
+
 struct proto udp_prot = {
 	.name		   = "UDP",
 	.owner		   = THIS_MODULE,
@@ -1452,6 +1454,7 @@ struct proto udp_prot = {
 	.compat_setsockopt = compat_udp_setsockopt,
 	.compat_getsockopt = compat_udp_getsockopt,
 #endif
+	REF_PROTO_INUSE(udp)
 };
 
 /* ------------------------------------------------------------------------ */
diff --git a/net/ipv4/udplite.c b/net/ipv4/udplite.c
index 94977205abb..f5baeb3e8b8 100644
--- a/net/ipv4/udplite.c
+++ b/net/ipv4/udplite.c
@@ -44,6 +44,8 @@ static	struct net_protocol udplite_protocol = {
 	.no_policy	= 1,
 };
 
+DEFINE_PROTO_INUSE(udplite)
+
 struct proto 	udplite_prot = {
 	.name		   = "UDP-Lite",
 	.owner		   = THIS_MODULE,
@@ -67,6 +69,7 @@ struct proto 	udplite_prot = {
 	.compat_setsockopt = compat_udp_setsockopt,
 	.compat_getsockopt = compat_udp_getsockopt,
 #endif
+	REF_PROTO_INUSE(udplite)
 };
 
 static struct inet_protosw udplite4_protosw = {