Merge with /home/shaggy/git/linus-clean/

author: Dave Kleikamp <shaggy@austin.ibm.com> 2005-11-11 14:09:06 -0600
committer: Dave Kleikamp <shaggy@austin.ibm.com> 2005-11-11 14:09:06 -0600
commit: ca869912366f60cb5e0bdd09f65e80ee6816e73c (patch)
tree: a72913a29495ca078987c09fc0008f47e11b900b /net
parent: dd8a306ac0c918268bd2ae89da2dea627f6e352d (diff)
parent: 388f7ef720a982f49925e7b4e96f216f208f8c03 (diff)
29 files changed, 459 insertions, 324 deletions
diff --git a/net/core/datagram.c b/net/core/datagram.c
index d219435d086..1bcfef51ac5 100644
--- a/net/core/datagram.c
+++ b/net/core/datagram.c
@@ -350,6 +350,20 @@ fault:
 	return -EFAULT;
 }
 
+unsigned int __skb_checksum_complete(struct sk_buff *skb)
+{
+	unsigned int sum;
+
+	sum = (u16)csum_fold(skb_checksum(skb, 0, skb->len, skb->csum));
+	if (likely(!sum)) {
+		if (unlikely(skb->ip_summed == CHECKSUM_HW))
+			netdev_rx_csum_fault(skb->dev);
+		skb->ip_summed = CHECKSUM_UNNECESSARY;
+	}
+	return sum;
+}
+EXPORT_SYMBOL(__skb_checksum_complete);
+
 /**
  *	skb_copy_and_csum_datagram_iovec - Copy and checkum skb to user iovec.
  *	@skb: skbuff
@@ -363,7 +377,7 @@ fault:
  *		 -EFAULT - fault during copy. Beware, in this case iovec
  *			   can be modified!
  */
-int skb_copy_and_csum_datagram_iovec(const struct sk_buff *skb,
+int skb_copy_and_csum_datagram_iovec(struct sk_buff *skb,
 				     int hlen, struct iovec *iov)
 {
 	unsigned int csum;
@@ -376,8 +390,7 @@ int skb_copy_and_csum_datagram_iovec(const struct sk_buff *skb,
 		iov++;
 
 	if (iov->iov_len < chunk) {
-		if ((unsigned short)csum_fold(skb_checksum(skb, 0, chunk + hlen,
-							   skb->csum)))
+		if (__skb_checksum_complete(skb))
 			goto csum_error;
 		if (skb_copy_datagram_iovec(skb, hlen, iov, chunk))
 			goto fault;
@@ -388,6 +401,8 @@ int skb_copy_and_csum_datagram_iovec(const struct sk_buff *skb,
 			goto fault;
 		if ((unsigned short)csum_fold(csum))
 			goto csum_error;
+		if (unlikely(skb->ip_summed == CHECKSUM_HW))
+			netdev_rx_csum_fault(skb->dev);
 		iov->iov_len -= chunk;
 		iov->iov_base += chunk;
 	}
diff --git a/net/core/dev.c b/net/core/dev.c
index 8d154159527..0b48e294aaf 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1108,6 +1108,18 @@ out:
 	return ret;
 }
 
+/* Take action when hardware reception checksum errors are detected. */
+#ifdef CONFIG_BUG
+void netdev_rx_csum_fault(struct net_device *dev)
+{
+	if (net_ratelimit()) {
+		printk(KERN_ERR "%s: hw csum failure.\n", dev->name);
+		dump_stack();
+	}
+}
+EXPORT_SYMBOL(netdev_rx_csum_fault);
+#endif
+
 #ifdef CONFIG_HIGHMEM
 /* Actually, we should eliminate this check as soon as we know, that:
  * 1. IOMMU is present and allows to map all the memory.
diff --git a/net/core/netpoll.c b/net/core/netpoll.c
index 802fe11efad..49424a42a2c 100644
--- a/net/core/netpoll.c
+++ b/net/core/netpoll.c
@@ -101,16 +101,20 @@ void netpoll_queue(struct sk_buff *skb)
 static int checksum_udp(struct sk_buff *skb, struct udphdr *uh,
 			     unsigned short ulen, u32 saddr, u32 daddr)
 {
-	if (uh->check == 0)
+	unsigned int psum;
+
+	if (uh->check == 0 || skb->ip_summed == CHECKSUM_UNNECESSARY)
 		return 0;
 
-	if (skb->ip_summed == CHECKSUM_HW)
-		return csum_tcpudp_magic(
-			saddr, daddr, ulen, IPPROTO_UDP, skb->csum);
+	psum = csum_tcpudp_nofold(saddr, daddr, ulen, IPPROTO_UDP, 0);
+
+	if (skb->ip_summed == CHECKSUM_HW &&
+	    !(u16)csum_fold(csum_add(psum, skb->csum)))
+		return 0;
 
-	skb->csum = csum_tcpudp_nofold(saddr, daddr, ulen, IPPROTO_UDP, 0);
+	skb->csum = psum;
 
-	return csum_fold(skb_checksum(skb, 0, skb->len, skb->csum));
+	return __skb_checksum_complete(skb);
 }
 
 /*
@@ -489,7 +493,7 @@ int __netpoll_rx(struct sk_buff *skb)
 
 	if (ulen != len)
 		goto out;
-	if (checksum_udp(skb, uh, ulen, iph->saddr, iph->daddr) < 0)
+	if (checksum_udp(skb, uh, ulen, iph->saddr, iph->daddr))
 		goto out;
 	if (np->local_ip && np->local_ip != ntohl(iph->daddr))
 		goto out;
diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
index 175e093ec56..e3eceecd049 100644
--- a/net/ipv4/icmp.c
+++ b/net/ipv4/icmp.c
@@ -934,11 +934,11 @@ int icmp_rcv(struct sk_buff *skb)
 	case CHECKSUM_HW:
 		if (!(u16)csum_fold(skb->csum))
 			break;
-		LIMIT_NETDEBUG(KERN_DEBUG "icmp v4 hw csum failure\n");
+		/* fall through */
 	case CHECKSUM_NONE:
-		if ((u16)csum_fold(skb_checksum(skb, 0, skb->len, 0)))
+		skb->csum = 0;
+		if (__skb_checksum_complete(skb))
 			goto error;
-	default:;
 	}
 
 	if (!pskb_pull(skb, sizeof(struct icmphdr)))
diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c
index c6247fc8406..c04607b4921 100644
--- a/net/ipv4/igmp.c
+++ b/net/ipv4/igmp.c
@@ -872,11 +872,18 @@ int igmp_rcv(struct sk_buff *skb)
 		return 0;
 	}
 
-	if (!pskb_may_pull(skb, sizeof(struct igmphdr)) || 
-	    (u16)csum_fold(skb_checksum(skb, 0, len, 0))) {
-		in_dev_put(in_dev);
-		kfree_skb(skb);
-		return 0;
+	if (!pskb_may_pull(skb, sizeof(struct igmphdr)))
+		goto drop;
+
+	switch (skb->ip_summed) {
+	case CHECKSUM_HW:
+		if (!(u16)csum_fold(skb->csum))
+			break;
+		/* fall through */
+	case CHECKSUM_NONE:
+		skb->csum = 0;
+		if (__skb_checksum_complete(skb))
+			goto drop;
 	}
 
 	ih = skb->h.igmph;
@@ -906,6 +913,8 @@ int igmp_rcv(struct sk_buff *skb)
 	default:
 		NETDEBUG(KERN_DEBUG "New IGMP type=%d, why we do not know about it?\n", ih->type);
 	}
+
+drop:
 	in_dev_put(in_dev);
 	kfree_skb(skb);
 	return 0;
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index 896ce3f8f53..4e9c74b54b1 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -577,15 +577,16 @@ static int ipgre_rcv(struct sk_buff *skb)
 			goto drop_nolock;
 
 		if (flags&GRE_CSUM) {
-			if (skb->ip_summed == CHECKSUM_HW) {
+			switch (skb->ip_summed) {
+			case CHECKSUM_HW:
 				csum = (u16)csum_fold(skb->csum);
-				if (csum)
-					skb->ip_summed = CHECKSUM_NONE;
-			}
-			if (skb->ip_summed == CHECKSUM_NONE) {
-				skb->csum = skb_checksum(skb, 0, skb->len, 0);
+				if (!csum)
+					break;
+				/* fall through */
+			case CHECKSUM_NONE:
+				skb->csum = 0;
+				csum = __skb_checksum_complete(skb);
 				skb->ip_summed = CHECKSUM_HW;
-				csum = (u16)csum_fold(skb->csum);
 			}
 			offset += 4;
 		}
diff --git a/net/ipv4/netfilter/ip_conntrack_proto_icmp.c b/net/ipv4/netfilter/ip_conntrack_proto_icmp.c
index 5198f3a1e2c..e4d6b268e8c 100644
--- a/net/ipv4/netfilter/ip_conntrack_proto_icmp.c
+++ b/net/ipv4/netfilter/ip_conntrack_proto_icmp.c
@@ -13,6 +13,7 @@
 #include <linux/in.h>
 #include <linux/icmp.h>
 #include <linux/seq_file.h>
+#include <linux/skbuff.h>
 #include <net/ip.h>
 #include <net/checksum.h>
 #include <linux/netfilter.h>
@@ -230,19 +231,15 @@ icmp_error(struct sk_buff *skb, enum ip_conntrack_info *ctinfo,
 	case CHECKSUM_HW:
 		if (!(u16)csum_fold(skb->csum)) 
 			break;
-		if (LOG_INVALID(IPPROTO_ICMP))
-			nf_log_packet(PF_INET, 0, skb, NULL, NULL, NULL,
-				      "ip_ct_icmp: bad HW ICMP checksum ");
-		return -NF_ACCEPT;
+		/* fall through */
 	case CHECKSUM_NONE:
-		if ((u16)csum_fold(skb_checksum(skb, 0, skb->len, 0))) {
+		skb->csum = 0;
+		if (__skb_checksum_complete(skb)) {
 			if (LOG_INVALID(IPPROTO_ICMP))
 				nf_log_packet(PF_INET, 0, skb, NULL, NULL, NULL,
 					      "ip_ct_icmp: bad ICMP checksum ");
 			return -NF_ACCEPT;
 		}
-	default:
-		break;
 	}
 
 checksum_skipped:
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index 65268562351..01444a02b48 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -645,6 +645,14 @@ ctl_table ipv4_table[] = {
 		.proc_handler	= &proc_tcp_congestion_control,
 		.strategy	= &sysctl_tcp_congestion_control,
 	},
+	{
+		.ctl_name	= NET_TCP_ABC,
+		.procname	= "tcp_abc",
+		.data		= &sysctl_tcp_abc,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec,
+	},
 
 	{ .ctl_name = 0 }
 };
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 72b7c22e1ea..9ac7a4f46bd 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -1640,7 +1640,7 @@ int tcp_disconnect(struct sock *sk, int flags)
 	} else if (tcp_need_reset(old_state) ||
 		   (tp->snd_nxt != tp->write_seq &&
 		    (1 << old_state) & (TCPF_CLOSING | TCPF_LAST_ACK))) {
-		/* The last check adjusts for discrepance of Linux wrt. RFC
+		/* The last check adjusts for discrepancy of Linux wrt. RFC
 		 * states
 		 */
 		tcp_send_active_reset(sk, gfp_any());
@@ -1669,6 +1669,7 @@ int tcp_disconnect(struct sock *sk, int flags)
 	tp->packets_out = 0;
 	tp->snd_ssthresh = 0x7fffffff;
 	tp->snd_cwnd_cnt = 0;
+	tp->bytes_acked = 0;
 	tcp_set_ca_state(sk, TCP_CA_Open);
 	tcp_clear_retrans(tp);
 	inet_csk_delack_init(sk);
diff --git a/net/ipv4/tcp_bic.c b/net/ipv4/tcp_bic.c
index ae35e060904..1d0cd86621b 100644
--- a/net/ipv4/tcp_bic.c
+++ b/net/ipv4/tcp_bic.c
@@ -217,17 +217,15 @@ static void bictcp_cong_avoid(struct sock *sk, u32 ack,
 
 	bictcp_low_utilization(sk, data_acked);
 
-	if (in_flight < tp->snd_cwnd)
+	if (!tcp_is_cwnd_limited(sk, in_flight))
 		return;
 
-	if (tp->snd_cwnd <= tp->snd_ssthresh) {
-		/* In "safe" area, increase. */
-		if (tp->snd_cwnd < tp->snd_cwnd_clamp)
-			tp->snd_cwnd++;
-	} else {
+	if (tp->snd_cwnd <= tp->snd_ssthresh)
+		tcp_slow_start(tp);
+	else {
 		bictcp_update(ca, tp->snd_cwnd);
 
-                /* In dangerous area, increase slowly.
+		/* In dangerous area, increase slowly.
 		 * In theory this is tp->snd_cwnd += 1 / tp->snd_cwnd
 		 */
 		if (tp->snd_cwnd_cnt >= ca->cnt) {
diff --git a/net/ipv4/tcp_cong.c b/net/ipv4/tcp_cong.c
index bbf2d6624e8..c7cc62c8dc1 100644
--- a/net/ipv4/tcp_cong.c
+++ b/net/ipv4/tcp_cong.c
@@ -186,24 +186,32 @@ void tcp_reno_cong_avoid(struct sock *sk, u32 ack, u32 rtt, u32 in_flight,
 {
 	struct tcp_sock *tp = tcp_sk(sk);
 
-	if (in_flight < tp->snd_cwnd)
+	if (!tcp_is_cwnd_limited(sk, in_flight))
 		return;
 
-        if (tp->snd_cwnd <= tp->snd_ssthresh) {
-                /* In "safe" area, increase. */
-		if (tp->snd_cwnd < tp->snd_cwnd_clamp)
-			tp->snd_cwnd++;
-	} else {
-                /* In dangerous area, increase slowly.
-		 * In theory this is tp->snd_cwnd += 1 / tp->snd_cwnd
-		 */
-		if (tp->snd_cwnd_cnt >= tp->snd_cwnd) {
-			if (tp->snd_cwnd < tp->snd_cwnd_clamp)
-				tp->snd_cwnd++;
-			tp->snd_cwnd_cnt = 0;
-		} else
-			tp->snd_cwnd_cnt++;
-	}
+	/* In "safe" area, increase. */
+        if (tp->snd_cwnd <= tp->snd_ssthresh)
+		tcp_slow_start(tp);
+
+ 	/* In dangerous area, increase slowly. */
+	else if (sysctl_tcp_abc) {
+ 		/* RFC3465: Apppriate Byte Count
+ 		 * increase once for each full cwnd acked
+ 		 */
+ 		if (tp->bytes_acked >= tp->snd_cwnd*tp->mss_cache) {
+ 			tp->bytes_acked -= tp->snd_cwnd*tp->mss_cache;
+ 			if (tp->snd_cwnd < tp->snd_cwnd_clamp)
+ 				tp->snd_cwnd++;
+ 		}
+ 	} else {
+ 		/* In theory this is tp->snd_cwnd += 1 / tp->snd_cwnd */
+ 		if (tp->snd_cwnd_cnt >= tp->snd_cwnd) {
+ 			if (tp->snd_cwnd < tp->snd_cwnd_clamp)
+ 				tp->snd_cwnd++;
+ 			tp->snd_cwnd_cnt = 0;
+ 		} else
+ 			tp->snd_cwnd_cnt++;
+ 	}
 }
 EXPORT_SYMBOL_GPL(tcp_reno_cong_avoid);
 
diff --git a/net/ipv4/tcp_highspeed.c b/net/ipv4/tcp_highspeed.c
index 6acc04bde08..82b3c189bd7 100644
--- a/net/ipv4/tcp_highspeed.c
+++ b/net/ipv4/tcp_highspeed.c
@@ -111,18 +111,17 @@ static void hstcp_init(struct sock *sk)
 }
 
 static void hstcp_cong_avoid(struct sock *sk, u32 adk, u32 rtt,
-			     u32 in_flight, int good)
+			     u32 in_flight, u32 pkts_acked)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
 	struct hstcp *ca = inet_csk_ca(sk);
 
-	if (in_flight < tp->snd_cwnd)
+	if (!tcp_is_cwnd_limited(sk, in_flight))
 		return;
 
-	if (tp->snd_cwnd <= tp->snd_ssthresh) {
-		if (tp->snd_cwnd < tp->snd_cwnd_clamp)
-			tp->snd_cwnd++;
-	} else {
+	if (tp->snd_cwnd <= tp->snd_ssthresh)
+		tcp_slow_start(tp);
+	else {
 		/* Update AIMD parameters */
 		if (tp->snd_cwnd > hstcp_aimd_vals[ca->ai].cwnd) {
 			while (tp->snd_cwnd > hstcp_aimd_vals[ca->ai].cwnd &&
diff --git a/net/ipv4/tcp_htcp.c b/net/ipv4/tcp_htcp.c
index e47b37984e9..3284cfb993e 100644
--- a/net/ipv4/tcp_htcp.c
+++ b/net/ipv4/tcp_htcp.c
@@ -207,14 +207,13 @@ static void htcp_cong_avoid(struct sock *sk, u32 ack, u32 rtt,
 	struct tcp_sock *tp = tcp_sk(sk);
 	struct htcp *ca = inet_csk_ca(sk);
 
-	if (in_flight < tp->snd_cwnd)
+	if (!tcp_is_cwnd_limited(sk, in_flight))
 		return;
 
-        if (tp->snd_cwnd <= tp->snd_ssthresh) {
-                /* In "safe" area, increase. */
-		if (tp->snd_cwnd < tp->snd_cwnd_clamp)
-			tp->snd_cwnd++;
-	} else {
+        if (tp->snd_cwnd <= tp->snd_ssthresh)
+		tcp_slow_start(tp);
+	else {
+
 		measure_rtt(sk);
 
 		/* keep track of number of round-trip times since last backoff event */
@@ -224,7 +223,7 @@ static void htcp_cong_avoid(struct sock *sk, u32 ack, u32 rtt,
 			htcp_alpha_update(ca);
 		}
 
-                /* In dangerous area, increase slowly.
+		/* In dangerous area, increase slowly.
 		 * In theory this is tp->snd_cwnd += alpha / tp->snd_cwnd
 		 */
 		if ((tp->snd_cwnd_cnt++ * ca->alpha)>>7 >= tp->snd_cwnd) {
diff --git a/net/ipv4/tcp_hybla.c b/net/ipv4/tcp_hybla.c
index 77add63623d..40dbb387751 100644
--- a/net/ipv4/tcp_hybla.c
+++ b/net/ipv4/tcp_hybla.c
@@ -100,12 +100,12 @@ static void hybla_cong_avoid(struct sock *sk, u32 ack, u32 rtt,
 		ca->minrtt = tp->srtt;
 	}
 
+	if (!tcp_is_cwnd_limited(sk, in_flight))
+		return;
+
 	if (!ca->hybla_en)
 		return tcp_reno_cong_avoid(sk, ack, rtt, in_flight, flag);
 
-	if (in_flight < tp->snd_cwnd)
-		return;
-
 	if (ca->rho == 0)
 		hybla_recalc_param(sk);
 
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 3e98b57578d..40a26b7157b 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -42,7 +42,7 @@
  *		Andi Kleen	:	Moved open_request checking here
  *					and process RSTs for open_requests.
  *		Andi Kleen	:	Better prune_queue, and other fixes.
- *		Andrey Savochkin:	Fix RTT measurements in the presnce of
+ *		Andrey Savochkin:	Fix RTT measurements in the presence of
  *					timestamps.
  *		Andrey Savochkin:	Check sequence numbers correctly when
  *					removing SACKs due to in sequence incoming
@@ -89,6 +89,7 @@ int sysctl_tcp_frto;
 int sysctl_tcp_nometrics_save;
 
 int sysctl_tcp_moderate_rcvbuf = 1;
+int sysctl_tcp_abc = 1;
 
 #define FLAG_DATA		0x01 /* Incoming frame contained data.		*/
 #define FLAG_WIN_UPDATE		0x02 /* Incoming ACK was a window update.	*/
@@ -223,7 +224,7 @@ static void tcp_fixup_sndbuf(struct sock *sk)
  *   of receiver window. Check #2.
  *
  * The scheme does not work when sender sends good segments opening
- * window and then starts to feed us spagetti. But it should work
+ * window and then starts to feed us spaghetti. But it should work
  * in common situations. Otherwise, we have to rely on queue collapsing.
  */
 
@@ -233,7 +234,7 @@ static int __tcp_grow_window(const struct sock *sk, struct tcp_sock *tp,
 {
 	/* Optimize this! */
 	int truesize = tcp_win_from_space(skb->truesize)/2;
-	int window = tcp_full_space(sk)/2;
+	int window = tcp_win_from_space(sysctl_tcp_rmem[2])/2;
 
 	while (tp->rcv_ssthresh <= window) {
 		if (truesize <= skb->len)
@@ -277,7 +278,7 @@ static void tcp_fixup_rcvbuf(struct sock *sk)
 	int rcvmem = tp->advmss + MAX_TCP_HEADER + 16 + sizeof(struct sk_buff);
 
 	/* Try to select rcvbuf so that 4 mss-sized segments
-	 * will fit to window and correspoding skbs will fit to our rcvbuf.
+	 * will fit to window and corresponding skbs will fit to our rcvbuf.
 	 * (was 3; 4 is minimum to allow fast retransmit to work.)
 	 */
 	while (tcp_win_from_space(rcvmem) < tp->advmss)
@@ -286,7 +287,7 @@ static void tcp_fixup_rcvbuf(struct sock *sk)
 		sk->sk_rcvbuf = min(4 * rcvmem, sysctl_tcp_rmem[2]);
 }
 
-/* 4. Try to fixup all. It is made iimediately after connection enters
+/* 4. Try to fixup all. It is made immediately after connection enters
  *    established state.
  */
 static void tcp_init_buffer_space(struct sock *sk)
@@ -326,37 +327,18 @@ static void tcp_init_buffer_space(struct sock *sk)
 static void tcp_clamp_window(struct sock *sk, struct tcp_sock *tp)
 {
 	struct inet_connection_sock *icsk = inet_csk(sk);
-	struct sk_buff *skb;
-	unsigned int app_win = tp->rcv_nxt - tp->copied_seq;
-	int ofo_win = 0;
 
 	icsk->icsk_ack.quick = 0;
 
-	skb_queue_walk(&tp->out_of_order_queue, skb) {
-		ofo_win += skb->len;
-	}
-
-	/* If overcommit is due to out of order segments,
-	 * do not clamp window. Try to expand rcvbuf instead.
-	 */
-	if (ofo_win) {
-		if (sk->sk_rcvbuf < sysctl_tcp_rmem[2] &&
-		    !(sk->sk_userlocks & SOCK_RCVBUF_LOCK) &&
-		    !tcp_memory_pressure &&
-		    atomic_read(&tcp_memory_allocated) < sysctl_tcp_mem[0])
-			sk->sk_rcvbuf = min(atomic_read(&sk->sk_rmem_alloc),
-					    sysctl_tcp_rmem[2]);
+	if (sk->sk_rcvbuf < sysctl_tcp_rmem[2] &&
+	    !(sk->sk_userlocks & SOCK_RCVBUF_LOCK) &&
+	    !tcp_memory_pressure &&
+	    atomic_read(&tcp_memory_allocated) < sysctl_tcp_mem[0]) {
+		sk->sk_rcvbuf = min(atomic_read(&sk->sk_rmem_alloc),
+				    sysctl_tcp_rmem[2]);
 	}
-	if (atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf) {
-		app_win += ofo_win;
-		if (atomic_read(&sk->sk_rmem_alloc) >= 2 * sk->sk_rcvbuf)
-			app_win >>= 1;
-		if (app_win > icsk->icsk_ack.rcv_mss)
-			app_win -= icsk->icsk_ack.rcv_mss;
-		app_win = max(app_win, 2U*tp->advmss);
-
+	if (atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf)
 		tp->rcv_ssthresh = min(tp->window_clamp, 2U*tp->advmss);
-	}
 }
 
 /* Receiver "autotuning" code.
@@ -385,8 +367,8 @@ static void tcp_rcv_rtt_update(struct tcp_sock *tp, u32 sample, int win_dep)
 		 * are stalled on filesystem I/O.
 		 *
 		 * Also, since we are only going for a minimum in the
-		 * non-timestamp case, we do not smoothe things out
-		 * else with timestamps disabled convergance takes too
+		 * non-timestamp case, we do not smoother things out
+		 * else with timestamps disabled convergence takes too
 		 * long.
 		 */
 		if (!win_dep) {
@@ -395,7 +377,7 @@ static void tcp_rcv_rtt_update(struct tcp_sock *tp, u32 sample, int win_dep)
 		} else if (m < new_sample)
 			new_sample = m << 3;
 	} else {
-		/* No previous mesaure. */
+		/* No previous measure. */
 		new_sample = m << 3;
 	}
 
@@ -524,7 +506,7 @@ static void tcp_event_data_recv(struct sock *sk, struct tcp_sock *tp, struct sk_
 			if (icsk->icsk_ack.ato > icsk->icsk_rto)
 				icsk->icsk_ack.ato = icsk->icsk_rto;
 		} else if (m > icsk->icsk_rto) {
-			/* Too long gap. Apparently sender falled to
+			/* Too long gap. Apparently sender failed to
 			 * restart window, so that we send ACKs quickly.
 			 */
 			tcp_incr_quickack(sk);
@@ -548,10 +530,9 @@ static void tcp_event_data_recv(struct sock *sk, struct tcp_sock *tp, struct sk_
  * To save cycles in the RFC 1323 implementation it was better to break
  * it up into three procedures. -- erics
  */
-static void tcp_rtt_estimator(struct sock *sk, const __u32 mrtt, u32 *usrtt)
+static void tcp_rtt_estimator(struct sock *sk, const __u32 mrtt)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
-	const struct inet_connection_sock *icsk = inet_csk(sk);
 	long m = mrtt; /* RTT */
 
 	/*	The following amusing code comes from Jacobson's
@@ -565,7 +546,7 @@ static void tcp_rtt_estimator(struct sock *sk, const __u32 mrtt, u32 *usrtt)
 	 *
 	 * Funny. This algorithm seems to be very broken.
 	 * These formulae increase RTO, when it should be decreased, increase
-	 * too slowly, when it should be incresed fastly, decrease too fastly
+	 * too slowly, when it should be increased fastly, decrease too fastly
 	 * etc. I guess in BSD RTO takes ONE value, so that it is absolutely
 	 * does not matter how to _calculate_ it. Seems, it was trap
 	 * that VJ failed to avoid. 8)
@@ -610,9 +591,6 @@ static void tcp_rtt_estimator(struct sock *sk, const __u32 mrtt, u32 *usrtt)
 		tp->mdev_max = tp->rttvar = max(tp->mdev, TCP_RTO_MIN);
 		tp->rtt_seq = tp->snd_nxt;
 	}
-
-	if (icsk->icsk_ca_ops->rtt_sample)
-		icsk->icsk_ca_ops->rtt_sample(sk, *usrtt);
 }
 
 /* Calculate rto without backoff.  This is the second half of Van Jacobson's
@@ -629,14 +607,14 @@ static inline void tcp_set_rto(struct sock *sk)
 	 *    at least by solaris and freebsd. "Erratic ACKs" has _nothing_
 	 *    to do with delayed acks, because at cwnd>2 true delack timeout
 	 *    is invisible. Actually, Linux-2.4 also generates erratic
-	 *    ACKs in some curcumstances.
+	 *    ACKs in some circumstances.
 	 */
 	inet_csk(sk)->icsk_rto = (tp->srtt >> 3) + tp->rttvar;
 
 	/* 2. Fixups made earlier cannot be right.
 	 *    If we do not estimate RTO correctly without them,
 	 *    all the algo is pure shit and should be replaced
-	 *    with correct one. It is exaclty, which we pretend to do.
+	 *    with correct one. It is exactly, which we pretend to do.
 	 */
 }
 
@@ -794,7 +772,7 @@ static void tcp_init_metrics(struct sock *sk)
 	 * to make it more realistic.
 	 *
 	 * A bit of theory. RTT is time passed after "normal" sized packet
-	 * is sent until it is ACKed. In normal curcumstances sending small
+	 * is sent until it is ACKed. In normal circumstances sending small
 	 * packets force peer to delay ACKs and calculation is correct too.
 	 * The algorithm is adaptive and, provided we follow specs, it
 	 * NEVER underestimate RTT. BUT! If peer tries to make some clever
@@ -919,18 +897,32 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_
 	int prior_fackets;
 	u32 lost_retrans = 0;
 	int flag = 0;
+	int dup_sack = 0;
 	int i;
 
 	if (!tp->sacked_out)
 		tp->fackets_out = 0;
 	prior_fackets = tp->fackets_out;
 
-	for (i=0; i<num_sacks; i++, sp++) {
-		struct sk_buff *skb;
-		__u32 start_seq = ntohl(sp->start_seq);
-		__u32 end_seq = ntohl(sp->end_seq);
-		int fack_count = 0;
-		int dup_sack = 0;
+	/* SACK fastpath:
+	 * if the only SACK change is the increase of the end_seq of
+	 * the first block then only apply that SACK block
+	 * and use retrans queue hinting otherwise slowpath */
+	flag = 1;
+	for (i = 0; i< num_sacks; i++) {
+		__u32 start_seq = ntohl(sp[i].start_seq);
+		__u32 end_seq =	 ntohl(sp[i].end_seq);
+
+		if (i == 0){
+			if (tp->recv_sack_cache[i].start_seq != start_seq)
+				flag = 0;
+		} else {
+			if ((tp->recv_sack_cache[i].start_seq != start_seq) ||
+			    (tp->recv_sack_cache[i].end_seq != end_seq))
+				flag = 0;
+		}
+		tp->recv_sack_cache[i].start_seq = start_seq;
+		tp->recv_sack_cache[i].end_seq = end_seq;
 
 		/* Check for D-SACK. */
 		if (i == 0) {
@@ -962,15 +954,58 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_
 			if (before(ack, prior_snd_una - tp->max_window))
 				return 0;
 		}
+	}
+
+	if (flag)
+		num_sacks = 1;
+	else {
+		int j;
+		tp->fastpath_skb_hint = NULL;
+
+		/* order SACK blocks to allow in order walk of the retrans queue */
+		for (i = num_sacks-1; i > 0; i--) {
+			for (j = 0; j < i; j++){
+				if (after(ntohl(sp[j].start_seq),
+					  ntohl(sp[j+1].start_seq))){
+					sp[j].start_seq = htonl(tp->recv_sack_cache[j+1].start_seq);
+					sp[j].end_seq = htonl(tp->recv_sack_cache[j+1].end_seq);
+					sp[j+1].start_seq = htonl(tp->recv_sack_cache[j].start_seq);
+					sp[j+1].end_seq = htonl(tp->recv_sack_cache[j].end_seq);
+				}
+
+			}
+		}
+	}
+
+	/* clear flag as used for different purpose in following code */
+	flag = 0;
+
+	for (i=0; i<num_sacks; i++, sp++) {
+		struct sk_buff *skb;
+		__u32 start_seq = ntohl(sp->start_seq);
+		__u32 end_seq = ntohl(sp->end_seq);
+		int fack_count;
+
+		/* Use SACK fastpath hint if valid */
+		if (tp->fastpath_skb_hint) {
+			skb = tp->fastpath_skb_hint;
+			fack_count = tp->fastpath_cnt_hint;
+		} else {
+			skb = sk->sk_write_queue.next;
+			fack_count = 0;
+		}
 
 		/* Event "B" in the comment above. */
 		if (after(end_seq, tp->high_seq))
 			flag |= FLAG_DATA_LOST;
 
-		sk_stream_for_retrans_queue(skb, sk) {
+		sk_stream_for_retrans_queue_from(skb, sk) {
 			int in_sack, pcount;
 			u8 sacked;
 
+			tp->fastpath_skb_hint = skb;
+			tp->fastpath_cnt_hint = fack_count;
+
 			/* The retransmission queue is always in order, so
 			 * we can short-circuit the walk early.
 			 */
@@ -1045,6 +1080,9 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_
 						TCP_SKB_CB(skb)->sacked &= ~(TCPCB_LOST|TCPCB_SACKED_RETRANS);
 						tp->lost_out -= tcp_skb_pcount(skb);
 						tp->retrans_out -= tcp_skb_pcount(skb);
+
+						/* clear lost hint */
+						tp->retransmit_skb_hint = NULL;
 					}
 				} else {
 					/* New sack for not retransmitted frame,
@@ -1057,6 +1095,9 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_
 					if (sacked & TCPCB_LOST) {
 						TCP_SKB_CB(skb)->sacked &= ~TCPCB_LOST;
 						tp->lost_out -= tcp_skb_pcount(skb);
+
+						/* clear lost hint */
+						tp->retransmit_skb_hint = NULL;
 					}
 				}
 
@@ -1080,6 +1121,7 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_
 			    (TCP_SKB_CB(skb)->sacked&TCPCB_SACKED_RETRANS)) {
 				TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_RETRANS;
 				tp->retrans_out -= tcp_skb_pcount(skb);
+				tp->retransmit_skb_hint = NULL;
 			}
 		}
 	}
@@ -1107,6 +1149,9 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_
 				TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_RETRANS;
 				tp->retrans_out -= tcp_skb_pcount(skb);
 
+				/* clear lost hint */
+				tp->retransmit_skb_hint = NULL;
+
 				if (!(TCP_SKB_CB(skb)->sacked&(TCPCB_LOST|TCPCB_SACKED_ACKED))) {
 					tp->lost_out += tcp_skb_pcount(skb);
 					TCP_SKB_CB(skb)->sacked |= TCPCB_LOST;
@@ -1214,6 +1259,8 @@ static void tcp_enter_frto_loss(struct sock *sk)
 	tcp_set_ca_state(sk, TCP_CA_Loss);
 	tp->high_seq = tp->frto_highmark;
 	TCP_ECN_queue_cwr(tp);
+
+	clear_all_retrans_hints(tp);
 }
 
 void tcp_clear_retrans(struct tcp_sock *tp)
@@ -1251,6 +1298,7 @@ void tcp_enter_loss(struct sock *sk, int how)
 	tp->snd_cwnd_cnt   = 0;
 	tp->snd_cwnd_stamp = tcp_time_stamp;
 
+	tp->bytes_acked = 0;
 	tcp_clear_retrans(tp);
 
 	/* Push undo marker, if it was plain RTO and nothing
@@ -1279,6 +1327,8 @@ void tcp_enter_loss(struct sock *sk, int how)
 	tcp_set_ca_state(sk, TCP_CA_Loss);
 	tp->high_seq = tp->snd_nxt;
 	TCP_ECN_queue_cwr(tp);
+
+	clear_all_retrans_hints(tp);
 }
 
 static int tcp_check_sack_reneging(struct sock *sk)
@@ -1503,17 +1553,37 @@ static void tcp_mark_head_lost(struct sock *sk, struct tcp_sock *tp,
 			       int packets, u32 high_seq)
 {
 	struct sk_buff *skb;
-	int cnt = packets;
+	int cnt;
 
-	BUG_TRAP(cnt <= tp->packets_out);
+	BUG_TRAP(packets <= tp->packets_out);
+	if (tp->lost_skb_hint) {
+		skb = tp->lost_skb_hint;
+		cnt = tp->lost_cnt_hint;
+	} else {
+		skb = sk->sk_write_queue.next;
+		cnt = 0;
+	}
 
-	sk_stream_for_retrans_queue(skb, sk) {
-		cnt -= tcp_skb_pcount(skb);
-		if (cnt < 0 || after(TCP_SKB_CB(skb)->end_seq, high_seq))
+	sk_stream_for_retrans_queue_from(skb, sk) {
+		/* TODO: do this better */
+		/* this is not the most efficient way to do this... */
+		tp->lost_skb_hint = skb;
+		tp->lost_cnt_hint = cnt;
+		cnt += tcp_skb_pcount(skb);
+		if (cnt > packets || after(TCP_SKB_CB(skb)->end_seq, high_seq))
 			break;
 		if (!(TCP_SKB_CB(skb)->sacked&TCPCB_TAGBITS)) {
 			TCP_SKB_CB(skb)->sacked |= TCPCB_LOST;
 			tp->lost_out += tcp_skb_pcount(skb);
+
+			/* clear xmit_retransmit_queue hints
+			 *  if this is beyond hint */
+			if(tp->retransmit_skb_hint != NULL &&
+			   before(TCP_SKB_CB(skb)->seq,
+				  TCP_SKB_CB(tp->retransmit_skb_hint)->seq)) {
+
+				tp->retransmit_skb_hint = NULL;
+			}
 		}
 	}
 	tcp_sync_left_out(tp);
@@ -1540,13 +1610,28 @@ static void tcp_update_scoreboard(struct sock *sk, struct tcp_sock *tp)
 	if (tcp_head_timedout(sk, tp)) {
 		struct sk_buff *skb;
 
-		sk_stream_for_retrans_queue(skb, sk) {
-			if (tcp_skb_timedout(sk, skb) &&
-			    !(TCP_SKB_CB(skb)->sacked&TCPCB_TAGBITS)) {
+		skb = tp->scoreboard_skb_hint ? tp->scoreboard_skb_hint
+			: sk->sk_write_queue.next;
+
+		sk_stream_for_retrans_queue_from(skb, sk) {
+			if (!tcp_skb_timedout(sk, skb))
+				break;
+
+			if (!(TCP_SKB_CB(skb)->sacked&TCPCB_TAGBITS)) {
 				TCP_SKB_CB(skb)->sacked |= TCPCB_LOST;
 				tp->lost_out += tcp_skb_pcount(skb);
+
+				/* clear xmit_retrans hint */
+				if (tp->retransmit_skb_hint &&
+				    before(TCP_SKB_CB(skb)->seq,
+					   TCP_SKB_CB(tp->retransmit_skb_hint)->seq))
+
+					tp->retransmit_skb_hint = NULL;
 			}
 		}
+
+		tp->scoreboard_skb_hint = skb;
+
 		tcp_sync_left_out(tp);
 	}
 }
@@ -1626,6 +1711,10 @@ static void tcp_undo_cwr(struct sock *sk, const int undo)
 	}
 	tcp_moderate_cwnd(tp);
 	tp->snd_cwnd_stamp = tcp_time_stamp;
+
+	/* There is something screwy going on with the retrans hints after
+	   an undo */
+	clear_all_retrans_hints(tp);
 }
 
 static inline int tcp_may_undo(struct tcp_sock *tp)
@@ -1709,6 +1798,9 @@ static int tcp_try_undo_loss(struct sock *sk, struct tcp_sock *tp)
 		sk_stream_for_retrans_queue(skb, sk) {
 			TCP_SKB_CB(skb)->sacked &= ~TCPCB_LOST;
 		}
+
+		clear_all_retrans_hints(tp);
+
 		DBGUNDO(sk, tp, "partial loss");
 		tp->lost_out = 0;
 		tp->left_out = tp->sacked_out;
@@ -1908,6 +2000,7 @@ tcp_fastretrans_alert(struct sock *sk, u32 prior_snd_una,
 			TCP_ECN_queue_cwr(tp);
 		}
 
+		tp->bytes_acked = 0;
 		tp->snd_cwnd_cnt = 0;
 		tcp_set_ca_state(sk, TCP_CA_Recovery);
 	}
@@ -1919,9 +2012,9 @@ tcp_fastretrans_alert(struct sock *sk, u32 prior_snd_una,
 }
 
 /* Read draft-ietf-tcplw-high-performance before mucking
- * with this code. (Superceeds RFC1323)
+ * with this code. (Supersedes RFC1323)
  */
-static void tcp_ack_saw_tstamp(struct sock *sk, u32 *usrtt, int flag)
+static void tcp_ack_saw_tstamp(struct sock *sk, int flag)
 {
 	/* RTTM Rule: A TSecr value received in a segment is used to
 	 * update the averaged RTT measurement only if the segment
@@ -1932,7 +2025,7 @@ static void tcp_ack_saw_tstamp(struct sock *sk, u32 *usrtt, int flag)
 	 * 1998/04/10 Andrey V. Savochkin <saw@msu.ru>
 	 *
 	 * Changed: reset backoff as soon as we see the first valid sample.
-	 * If we do not, we get strongly overstimated rto. With timestamps
+	 * If we do not, we get strongly overestimated rto. With timestamps
 	 * samples are accepted even from very old segments: f.e., when rtt=1
 	 * increases to 8, we retransmit 5 times and after 8 seconds delayed
 	 * answer arrives rto becomes 120 seconds! If at least one of segments
@@ -1940,13 +2033,13 @@ static void tcp_ack_saw_tstamp(struct sock *sk, u32 *usrtt, int flag)
 	 */
 	struct tcp_sock *tp = tcp_sk(sk);
 	const __u32 seq_rtt = tcp_time_stamp - tp->rx_opt.rcv_tsecr;
-	tcp_rtt_estimator(sk, seq_rtt, usrtt);
+	tcp_rtt_estimator(sk, seq_rtt);
 	tcp_set_rto(sk);
 	inet_csk(sk)->icsk_backoff = 0;
 	tcp_bound_rto(sk);
 }
 
-static void tcp_ack_no_tstamp(struct sock *sk, u32 seq_rtt, u32 *usrtt, int flag)
+static void tcp_ack_no_tstamp(struct sock *sk, u32 seq_rtt, int flag)
 {
 	/* We don't have a timestamp. Can only use
 	 * packets that are not retransmitted to determine
@@ -1960,21 +2053,21 @@ static void tcp_ack_no_tstamp(struct sock *sk, u32 seq_rtt, u32 *usrtt, int flag
 	if (flag & FLAG_RETRANS_DATA_ACKED)
 		return;
 
-	tcp_rtt_estimator(sk, seq_rtt, usrtt);
+	tcp_rtt_estimator(sk, seq_rtt);
 	tcp_set_rto(sk);
 	inet_csk(sk)->icsk_backoff = 0;
 	tcp_bound_rto(sk);
 }
 
 static inline void tcp_ack_update_rtt(struct sock *sk, const int flag,
-				      const s32 seq_rtt, u32 *usrtt)
+				      const s32 seq_rtt)
 {
 	const struct tcp_sock *tp = tcp_sk(sk);
 	/* Note that peer MAY send zero echo. In this case it is ignored. (rfc1323) */
 	if (tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr)
-		tcp_ack_saw_tstamp(sk, usrtt, flag);
+		tcp_ack_saw_tstamp(sk, flag);
 	else if (seq_rtt >= 0)
-		tcp_ack_no_tstamp(sk, seq_rtt, usrtt, flag);
+		tcp_ack_no_tstamp(sk, seq_rtt, flag);
 }
 
 static inline void tcp_cong_avoid(struct sock *sk, u32 ack, u32 rtt,
@@ -2054,20 +2147,27 @@ static int tcp_tso_acked(struct sock *sk, struct sk_buff *skb,
 	return acked;
 }
 
+static inline u32 tcp_usrtt(const struct sk_buff *skb)
+{
+	struct timeval tv, now;
+
+	do_gettimeofday(&now);
+	skb_get_timestamp(skb, &tv);
+	return (now.tv_sec - tv.tv_sec) * 1000000 + (now.tv_usec - tv.tv_usec);
+}
 
 /* Remove acknowledged frames from the retransmission queue. */
-static int tcp_clean_rtx_queue(struct sock *sk, __s32 *seq_rtt_p, s32 *seq_usrtt)
+static int tcp_clean_rtx_queue(struct sock *sk, __s32 *seq_rtt_p)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
+	const struct inet_connection_sock *icsk = inet_csk(sk);
 	struct sk_buff *skb;
 	__u32 now = tcp_time_stamp;
 	int acked = 0;
 	__s32 seq_rtt = -1;
-	struct timeval usnow;
 	u32 pkts_acked = 0;
-
-	if (seq_usrtt)
-		do_gettimeofday(&usnow);
+	void (*rtt_sample)(struct sock *sk, u32 usrtt)
+		= icsk->icsk_ca_ops->rtt_sample;
 
 	while ((skb = skb_peek(&sk->sk_write_queue)) &&
 	       skb != sk->sk_send_head) {
@@ -2107,16 +2207,11 @@ static int tcp_clean_rtx_queue(struct sock *sk, __s32 *seq_rtt_p, s32 *seq_usrtt
 					tp->retrans_out -= tcp_skb_pcount(skb);
 				acked |= FLAG_RETRANS_DATA_ACKED;
 				seq_rtt = -1;
-			} else if (seq_rtt < 0)
+			} else if (seq_rtt < 0) {
 				seq_rtt = now - scb->when;
-			if (seq_usrtt) {
-				struct timeval tv;
-			
-				skb_get_timestamp(skb, &tv);
-				*seq_usrtt = (usnow.tv_sec - tv.tv_sec) * 1000000
-					+ (usnow.tv_usec - tv.tv_usec);
+				if (rtt_sample)
+					(*rtt_sample)(sk, tcp_usrtt(skb));
 			}
-
 			if (sacked & TCPCB_SACKED_ACKED)
 				tp->sacked_out -= tcp_skb_pcount(skb);
 			if (sacked & TCPCB_LOST)
@@ -2126,17 +2221,20 @@ static int tcp_clean_rtx_queue(struct sock *sk, __s32 *seq_rtt_p, s32 *seq_usrtt
 				    !before(scb->end_seq, tp->snd_up))
 					tp->urg_mode = 0;
 			}
-		} else if (seq_rtt < 0)
+		} else if (seq_rtt < 0) {
 			seq_rtt = now - scb->when;
+			if (rtt_sample)
+				(*rtt_sample)(sk, tcp_usrtt(skb));
+		}
 		tcp_dec_pcount_approx(&tp->fackets_out, skb);
 		tcp_packets_out_dec(tp, skb);
 		__skb_unlink(skb, &sk->sk_write_queue);
 		sk_stream_free_skb(sk, skb);
+		clear_all_retrans_hints(tp);
 	}
 
 	if (acked&FLAG_ACKED) {
-		const struct inet_connection_sock *icsk = inet_csk(sk);
-		tcp_ack_update_rtt(sk, acked, seq_rtt, seq_usrtt);
+		tcp_ack_update_rtt(sk, acked, seq_rtt);
 		tcp_ack_packets_out(sk, tp);
 
 		if (icsk->icsk_ca_ops->pkts_acked)
@@ -2284,7 +2382,7 @@ static void tcp_process_frto(struct sock *sk, u32 prior_snd_una)
 	}
 
 	/* F-RTO affects on two new ACKs following RTO.
-	 * At latest on third ACK the TCP behavor is back to normal.
+	 * At latest on third ACK the TCP behavior is back to normal.
 	 */
 	tp->frto_counter = (tp->frto_counter + 1) % 3;
 }
@@ -2299,7 +2397,6 @@ static int tcp_ack(struct sock *sk, struct sk_buff *skb, int flag)
 	u32 ack = TCP_SKB_CB(skb)->ack_seq;
 	u32 prior_in_flight;
 	s32 seq_rtt;
-	s32 seq_usrtt = 0;
 	int prior_packets;
 
 	/* If the ack is newer than sent or older than previous acks
@@ -2311,6 +2408,9 @@ static int tcp_ack(struct sock *sk, struct sk_buff *skb, int flag)
 	if (before(ack, prior_snd_una))
 		goto old_ack;
 
+	if (sysctl_tcp_abc && icsk->icsk_ca_state < TCP_CA_CWR)
+		tp->bytes_acked += ack - prior_snd_una;
+
 	if (!(flag&FLAG_SLOWPATH) && after(ack, prior_snd_una)) {
 		/* Window is constant, pure forward advance.
 		 * No more checks are required.
@@ -2352,14 +2452,13 @@ static int tcp_ack(struct sock *sk, struct sk_buff *skb, int flag)
 	prior_in_flight = tcp_packets_in_flight(tp);
 
 	/* See if we can take anything off of the retransmit queue. */
-	flag |= tcp_clean_rtx_queue(sk, &seq_rtt,
-				    icsk->icsk_ca_ops->rtt_sample ? &seq_usrtt : NULL);
+	flag |= tcp_clean_rtx_queue(sk, &seq_rtt);
 
 	if (tp->frto_counter)
 		tcp_process_frto(sk, prior_snd_una);
 
 	if (tcp_ack_is_dubious(sk, flag)) {
-		/* Advanve CWND, if state allows this. */
+		/* Advance CWND, if state allows this. */
 		if ((flag & FLAG_DATA_ACKED) && tcp_may_raise_cwnd(sk, flag))
 			tcp_cong_avoid(sk, ack,  seq_rtt, prior_in_flight, 0);
 		tcp_fastretrans_alert(sk, prior_snd_una, prior_packets, flag);
@@ -3148,7 +3247,7 @@ tcp_collapse(struct sock *sk, struct sk_buff_head *list,
 {
 	struct sk_buff *skb;
 
-	/* First, check that queue is collapsable and find
+	/* First, check that queue is collapsible and find
 	 * the point where collapsing can be useful. */
 	for (skb = head; skb != tail; ) {
 		/* No new bits? It is possible on ofo queue. */
@@ -3456,7 +3555,7 @@ static __inline__ void tcp_ack_snd_check(struct sock *sk)
 
 /*
  *	This routine is only called when we have urgent data
- *	signalled. Its the 'slow' part of tcp_urg. It could be
+ *	signaled. Its the 'slow' part of tcp_urg. It could be
  *	moved inline now as tcp_urg is only called from one
  *	place. We handle URGent data wrong. We have to - as
  *	BSD still doesn't use the correction from RFC961.
@@ -3501,7 +3600,7 @@ static void tcp_check_urg(struct sock * sk, struct tcphdr * th)
 	 * urgent. To do this requires some care. We cannot just ignore
 	 * tp->copied_seq since we would read the last urgent byte again
 	 * as data, nor can we alter copied_seq until this data arrives
-	 * or we break the sematics of SIOCATMARK (and thus sockatmark())
+	 * or we break the semantics of SIOCATMARK (and thus sockatmark())
 	 *
 	 * NOTE. Double Dutch. Rendering to plain English: author of comment
 	 * above did something sort of 	send("A", MSG_OOB); send("B", MSG_OOB);
@@ -3646,7 +3745,7 @@ int tcp_rcv_established(struct sock *sk, struct sk_buff *skb,
 	tp->rx_opt.saw_tstamp = 0;
 
 	/*	pred_flags is 0xS?10 << 16 + snd_wnd
-	 *	if header_predition is to be made
+	 *	if header_prediction is to be made
 	 *	'S' will always be tp->tcp_header_len >> 2
 	 *	'?' will be 0 for the fast path, otherwise pred_flags is 0 to
 	 *  turn it off	(when there are holes in the receive 
@@ -4242,7 +4341,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
 				 */
 				if (tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr &&
 				    !tp->srtt)
-					tcp_ack_saw_tstamp(sk, NULL, 0);
+					tcp_ack_saw_tstamp(sk, 0);
 
 				if (tp->rx_opt.tstamp_ok)
 					tp->advmss -= TCPOLEN_TSTAMP_ALIGNED;
@@ -4372,6 +4471,7 @@ discard:
 
 EXPORT_SYMBOL(sysctl_tcp_ecn);
 EXPORT_SYMBOL(sysctl_tcp_reordering);
+EXPORT_SYMBOL(sysctl_tcp_abc);
 EXPORT_SYMBOL(tcp_parse_options);
 EXPORT_SYMBOL(tcp_rcv_established);
 EXPORT_SYMBOL(tcp_rcv_state_process);
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 634dabb558f..4d5021e1929 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -39,7 +39,7 @@
  *					request_sock handling and moved
  *					most of it into the af independent code.
  *					Added tail drop and some other bugfixes.
- *					Added new listen sematics.
+ *					Added new listen semantics.
  *		Mike McLagan	:	Routing by source
  *	Juan Jose Ciarlante:		ip_dynaddr bits
  *		Andi Kleen:		various fixes.
@@ -1110,24 +1110,18 @@ static struct sock *tcp_v4_hnd_req(struct sock *sk, struct sk_buff *skb)
 static int tcp_v4_checksum_init(struct sk_buff *skb)
 {
 	if (skb->ip_summed == CHECKSUM_HW) {
-		skb->ip_summed = CHECKSUM_UNNECESSARY;
 		if (!tcp_v4_check(skb->h.th, skb->len, skb->nh.iph->saddr,
-				  skb->nh.iph->daddr, skb->csum))
+				  skb->nh.iph->daddr, skb->csum)) {
+			skb->ip_summed = CHECKSUM_UNNECESSARY;
 			return 0;
-
-		LIMIT_NETDEBUG(KERN_DEBUG "hw tcp v4 csum failed\n");
-		skb->ip_summed = CHECKSUM_NONE;
+		}
 	}
+
+	skb->csum = csum_tcpudp_nofold(skb->nh.iph->saddr, skb->nh.iph->daddr,
+				       skb->len, IPPROTO_TCP, 0);
+
 	if (skb->len <= 76) {
-		if (tcp_v4_check(skb->h.th, skb->len, skb->nh.iph->saddr,
-				 skb->nh.iph->daddr,
-				 skb_checksum(skb, 0, skb->len, 0)))
-			return -1;
-		skb->ip_summed = CHECKSUM_UNNECESSARY;
-	} else {
-		skb->csum = ~tcp_v4_check(skb->h.th, skb->len,
-					  skb->nh.iph->saddr,
-					  skb->nh.iph->daddr, 0);
+		return __skb_checksum_complete(skb);
 	}
 	return 0;
 }
@@ -1216,10 +1210,10 @@ int tcp_v4_rcv(struct sk_buff *skb)
 
 	/* An explanation is required here, I think.
 	 * Packet length and doff are validated by header prediction,
-	 * provided case of th->doff==0 is elimineted.
+	 * provided case of th->doff==0 is eliminated.
 	 * So, we defer the checks. */
 	if ((skb->ip_summed != CHECKSUM_UNNECESSARY &&
-	     tcp_v4_checksum_init(skb) < 0))
+	     tcp_v4_checksum_init(skb)))
 		goto bad_packet;
 
 	th = skb->h.th;
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index b1a63b2c6b4..1b66a2ac432 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -158,7 +158,7 @@ kill_with_rst:
 		/* I am shamed, but failed to make it more elegant.
 		 * Yes, it is direct reference to IP, which is impossible
 		 * to generalize to IPv6. Taking into account that IPv6
-		 * do not undertsnad recycling in any case, it not
+		 * do not understand recycling in any case, it not
 		 * a big problem in practice. --ANK */
 		if (tw->tw_family == AF_INET &&
 		    tcp_death_row.sysctl_tw_recycle && tcptw->tw_ts_recent_stamp &&
@@ -194,7 +194,7 @@ kill_with_rst:
 		/* In window segment, it may be only reset or bare ack. */
 
 		if (th->rst) {
-			/* This is TIME_WAIT assasination, in two flavors.
+			/* This is TIME_WAIT assassination, in two flavors.
 			 * Oh well... nobody has a sufficient solution to this
 			 * protocol bug yet.
 			 */
@@ -380,6 +380,7 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req,
 		 */
 		newtp->snd_cwnd = 2;
 		newtp->snd_cwnd_cnt = 0;
+		newtp->bytes_acked = 0;
 
 		newtp->frto_counter = 0;
 		newtp->frto_highmark = 0;
@@ -550,7 +551,7 @@ struct sock *tcp_check_req(struct sock *sk,struct sk_buff *skb,
 
 	/* RFC793 page 36: "If the connection is in any non-synchronized state ...
 	 *                  and the incoming segment acknowledges something not yet
-	 *                  sent (the segment carries an unaccaptable ACK) ...
+	 *                  sent (the segment carries an unacceptable ACK) ...
 	 *                  a reset is sent."
 	 *
 	 * Invalid ACK: reset will be sent by listening socket
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index b907456a79f..029c70dfb58 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -436,6 +436,8 @@ int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len, unsigned int mss
 	u16 flags;
 
 	BUG_ON(len > skb->len);
+
+ 	clear_all_retrans_hints(tp);
 	nsize = skb_headlen(skb) - len;
 	if (nsize < 0)
 		nsize = 0;
@@ -599,7 +601,7 @@ int tcp_trim_head(struct sock *sk, struct sk_buff *skb, u32 len)
    for TCP options, but includes only bare TCP header.
 
    tp->rx_opt.mss_clamp is mss negotiated at connection setup.
-   It is minumum of user_mss and mss received with SYN.
+   It is minimum of user_mss and mss received with SYN.
    It also does not include TCP options.
 
    tp->pmtu_cookie is last pmtu, seen by this function.
@@ -1171,7 +1173,7 @@ u32 __tcp_select_window(struct sock *sk)
 {
 	struct inet_connection_sock *icsk = inet_csk(sk);
 	struct tcp_sock *tp = tcp_sk(sk);
-	/* MSS for the peer's data.  Previous verions used mss_clamp
+	/* MSS for the peer's data.  Previous versions used mss_clamp
 	 * here.  I don't know if the value based on our guesses
 	 * of peer's MSS is better for the performance.  It's more correct
 	 * but may be worse for the performance because of rcv_mss
@@ -1260,7 +1262,10 @@ static void tcp_retrans_try_collapse(struct sock *sk, struct sk_buff *skb, int m
 		BUG_ON(tcp_skb_pcount(skb) != 1 ||
 		       tcp_skb_pcount(next_skb) != 1);
 
-		/* Ok.  We will be able to collapse the packet. */
+		/* changing transmit queue under us so clear hints */
+		clear_all_retrans_hints(tp);
+
+		/* Ok.	We will be able to collapse the packet. */
 		__skb_unlink(next_skb, &sk->sk_write_queue);
 
 		memcpy(skb_put(skb, next_skb_size), next_skb->data, next_skb_size);
@@ -1330,6 +1335,8 @@ void tcp_simple_retransmit(struct sock *sk)
 		}
 	}
 
+	clear_all_retrans_hints(tp);
+
 	if (!lost)
 		return;
 
@@ -1361,7 +1368,7 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb)
 	int err;
 
 	/* Do not sent more than we queued. 1/4 is reserved for possible
-	 * copying overhead: frgagmentation, tunneling, mangling etc.
+	 * copying overhead: fragmentation, tunneling, mangling etc.
 	 */
 	if (atomic_read(&sk->sk_wmem_alloc) >
 	    min(sk->sk_wmem_queued + (sk->sk_wmem_queued >> 2), sk->sk_sndbuf))
@@ -1468,13 +1475,25 @@ void tcp_xmit_retransmit_queue(struct sock *sk)
 	const struct inet_connection_sock *icsk = inet_csk(sk);
 	struct tcp_sock *tp = tcp_sk(sk);
 	struct sk_buff *skb;
-	int packet_cnt = tp->lost_out;
+	int packet_cnt;
+
+	if (tp->retransmit_skb_hint) {
+		skb = tp->retransmit_skb_hint;
+		packet_cnt = tp->retransmit_cnt_hint;
+	}else{
+		skb = sk->sk_write_queue.next;
+		packet_cnt = 0;
+	}
 
 	/* First pass: retransmit lost packets. */
-	if (packet_cnt) {
-		sk_stream_for_retrans_queue(skb, sk) {
+	if (tp->lost_out) {
+		sk_stream_for_retrans_queue_from(skb, sk) {
 			__u8 sacked = TCP_SKB_CB(skb)->sacked;
 
+			/* we could do better than to assign each time */
+			tp->retransmit_skb_hint = skb;
+			tp->retransmit_cnt_hint = packet_cnt;
+
 			/* Assume this retransmit will generate
 			 * only one packet for congestion window
 			 * calculation purposes.  This works because
@@ -1485,10 +1504,12 @@ void tcp_xmit_retransmit_queue(struct sock *sk)
 			if (tcp_packets_in_flight(tp) >= tp->snd_cwnd)
 				return;
 
-			if (sacked&TCPCB_LOST) {
+			if (sacked & TCPCB_LOST) {
 				if (!(sacked&(TCPCB_SACKED_ACKED|TCPCB_SACKED_RETRANS))) {
-					if (tcp_retransmit_skb(sk, skb))
+					if (tcp_retransmit_skb(sk, skb)) {
+						tp->retransmit_skb_hint = NULL;
 						return;
+					}
 					if (icsk->icsk_ca_state != TCP_CA_Loss)
 						NET_INC_STATS_BH(LINUX_MIB_TCPFASTRETRANS);
 					else
@@ -1501,8 +1522,8 @@ void tcp_xmit_retransmit_queue(struct sock *sk)
 									  TCP_RTO_MAX);
 				}
 
-				packet_cnt -= tcp_skb_pcount(skb);
-				if (packet_cnt <= 0)
+				packet_cnt += tcp_skb_pcount(skb);
+				if (packet_cnt >= tp->lost_out)
 					break;
 			}
 		}
@@ -1528,9 +1549,18 @@ void tcp_xmit_retransmit_queue(struct sock *sk)
 	if (tcp_may_send_now(sk, tp))
 		return;
 
-	packet_cnt = 0;
+	if (tp->forward_skb_hint) {
+		skb = tp->forward_skb_hint;
+		packet_cnt = tp->forward_cnt_hint;
+	} else{
+		skb = sk->sk_write_queue.next;
+		packet_cnt = 0;
+	}
+
+	sk_stream_for_retrans_queue_from(skb, sk) {
+		tp->forward_cnt_hint = packet_cnt;
+		tp->forward_skb_hint = skb;
 
-	sk_stream_for_retrans_queue(skb, sk) {
 		/* Similar to the retransmit loop above we
 		 * can pretend that the retransmitted SKB
 		 * we send out here will be composed of one
@@ -1547,8 +1577,10 @@ void tcp_xmit_retransmit_queue(struct sock *sk)
 			continue;
 
 		/* Ok, retransmit it. */
-		if (tcp_retransmit_skb(sk, skb))
+		if (tcp_retransmit_skb(sk, skb)) {
+			tp->forward_skb_hint = NULL;
 			break;
+		}
 
 		if (skb == skb_peek(&sk->sk_write_queue))
 			inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
@@ -2058,3 +2090,4 @@ EXPORT_SYMBOL(tcp_connect);
 EXPORT_SYMBOL(tcp_make_synack);
 EXPORT_SYMBOL(tcp_simple_retransmit);
 EXPORT_SYMBOL(tcp_sync_mss);
+EXPORT_SYMBOL(sysctl_tcp_tso_win_divisor);
diff --git a/net/ipv4/tcp_scalable.c b/net/ipv4/tcp_scalable.c
index 327770bf552..26d7486ee50 100644
--- a/net/ipv4/tcp_scalable.c
+++ b/net/ipv4/tcp_scalable.c
@@ -20,20 +20,20 @@ static void tcp_scalable_cong_avoid(struct sock *sk, u32 ack, u32 rtt,
 				    u32 in_flight, int flag)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
-	if (in_flight < tp->snd_cwnd)
+
+	if (!tcp_is_cwnd_limited(sk, in_flight))
 		return;
 
-	if (tp->snd_cwnd <= tp->snd_ssthresh) {
-		tp->snd_cwnd++;
-	} else {
+	if (tp->snd_cwnd <= tp->snd_ssthresh)
+		tcp_slow_start(tp);
+	else {
 		tp->snd_cwnd_cnt++;
 		if (tp->snd_cwnd_cnt > min(tp->snd_cwnd, TCP_SCALABLE_AI_CNT)){
-			tp->snd_cwnd++;
+			if (tp->snd_cwnd < tp->snd_cwnd_clamp)
+				tp->snd_cwnd++;
 			tp->snd_cwnd_cnt = 0;
 		}
 	}
-	tp->snd_cwnd = min_t(u32, tp->snd_cwnd, tp->snd_cwnd_clamp);
-	tp->snd_cwnd_stamp = tcp_time_stamp;
 }
 
 static u32 tcp_scalable_ssthresh(struct sock *sk)
diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c
index 415ee47ac1c..e1880959614 100644
--- a/net/ipv4/tcp_timer.c
+++ b/net/ipv4/tcp_timer.c
@@ -58,7 +58,7 @@ static void tcp_write_err(struct sock *sk)
  * to prevent DoS attacks. It is called when a retransmission timeout
  * or zero probe timeout occurs on orphaned socket.
  *
- * Criterium is still not confirmed experimentally and may change.
+ * Criteria is still not confirmed experimentally and may change.
  * We kill the socket, if:
  * 1. If number of orphaned sockets exceeds an administratively configured
  *    limit.
@@ -132,7 +132,7 @@ static int tcp_write_timeout(struct sock *sk)
 			   hole detection. :-(
 
 			   It is place to make it. It is not made. I do not want
-			   to make it. It is disguisting. It does not work in any
+			   to make it. It is disgusting. It does not work in any
 			   case. Let me to cite the same draft, which requires for
 			   us to implement this:
 
diff --git a/net/ipv4/tcp_vegas.c b/net/ipv4/tcp_vegas.c
index 93c5f92070f..b7d296a8ac6 100644
--- a/net/ipv4/tcp_vegas.c
+++ b/net/ipv4/tcp_vegas.c
@@ -236,8 +236,7 @@ static void tcp_vegas_cong_avoid(struct sock *sk, u32 ack,
 			/* We don't have enough RTT samples to do the Vegas
 			 * calculation, so we'll behave like Reno.
 			 */
-			if (tp->snd_cwnd > tp->snd_ssthresh)
-				tp->snd_cwnd++;
+			tcp_reno_cong_avoid(sk, ack, seq_rtt, in_flight, flag);
 		} else {
 			u32 rtt, target_cwnd, diff;
 
@@ -275,7 +274,7 @@ static void tcp_vegas_cong_avoid(struct sock *sk, u32 ack,
 			 */
 			diff = (old_wnd << V_PARAM_SHIFT) - target_cwnd;
 
-			if (tp->snd_cwnd < tp->snd_ssthresh) {
+			if (tp->snd_cwnd <= tp->snd_ssthresh) {
 				/* Slow start.  */
 				if (diff > gamma) {
 					/* Going too fast. Time to slow down
@@ -295,6 +294,7 @@ static void tcp_vegas_cong_avoid(struct sock *sk, u32 ack,
 							    V_PARAM_SHIFT)+1);
 
 				}
+				tcp_slow_start(tp);
 			} else {
 				/* Congestion avoidance. */
 				u32 next_snd_cwnd;
@@ -327,37 +327,17 @@ static void tcp_vegas_cong_avoid(struct sock *sk, u32 ack,
 				else if (next_snd_cwnd < tp->snd_cwnd)
 					tp->snd_cwnd--;
 			}
-		}
 
-		/* Wipe the slate clean for the next RTT. */
-		vegas->cntRTT = 0;
-		vegas->minRTT = 0x7fffffff;
+			if (tp->snd_cwnd < 2)
+				tp->snd_cwnd = 2;
+			else if (tp->snd_cwnd > tp->snd_cwnd_clamp)
+				tp->snd_cwnd = tp->snd_cwnd_clamp;
+		}
 	}
 
-	/* The following code is executed for every ack we receive,
-	 * except for conditions checked in should_advance_cwnd()
-	 * before the call to tcp_cong_avoid(). Mainly this means that
-	 * we only execute this code if the ack actually acked some
-	 * data.
-	 */
-
-	/* If we are in slow start, increase our cwnd in response to this ACK.
-	 * (If we are not in slow start then we are in congestion avoidance,
-	 * and adjust our congestion window only once per RTT. See the code
-	 * above.)
-	 */
-	if (tp->snd_cwnd <= tp->snd_ssthresh)
-		tp->snd_cwnd++;
-
-	/* to keep cwnd from growing without bound */
-	tp->snd_cwnd = min_t(u32, tp->snd_cwnd, tp->snd_cwnd_clamp);
-
-	/* Make sure that we are never so timid as to reduce our cwnd below
-	 * 2 MSS.
-	 *
-	 * Going below 2 MSS would risk huge delayed ACKs from our receiver.
-	 */
-	tp->snd_cwnd = max(tp->snd_cwnd, 2U);
+	/* Wipe the slate clean for the next RTT. */
+	vegas->cntRTT = 0;
+	vegas->minRTT = 0x7fffffff;
 }
 
 /* Extract info for Tcp socket info provided via netlink. */
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index e0bd1013cb0..2422a5f7195 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -761,7 +761,7 @@ int udp_ioctl(struct sock *sk, int cmd, unsigned long arg)
 
 static __inline__ int __udp_checksum_complete(struct sk_buff *skb)
 {
-	return (unsigned short)csum_fold(skb_checksum(skb, 0, skb->len, skb->csum));
+	return __skb_checksum_complete(skb);
 }
 
 static __inline__ int udp_checksum_complete(struct sk_buff *skb)
@@ -1100,11 +1100,8 @@ static int udp_checksum_init(struct sk_buff *skb, struct udphdr *uh,
 	if (uh->check == 0) {
 		skb->ip_summed = CHECKSUM_UNNECESSARY;
 	} else if (skb->ip_summed == CHECKSUM_HW) {
-		skb->ip_summed = CHECKSUM_UNNECESSARY;
 		if (!udp_check(uh, ulen, saddr, daddr, skb->csum))
-			return 0;
-		LIMIT_NETDEBUG(KERN_DEBUG "udp v4 hw csum failure.\n");
-		skb->ip_summed = CHECKSUM_NONE;
+			skb->ip_summed = CHECKSUM_UNNECESSARY;
 	}
 	if (skb->ip_summed != CHECKSUM_UNNECESSARY)
 		skb->csum = csum_tcpudp_nofold(saddr, daddr, ulen, IPPROTO_UDP, 0);
diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c
index 23e540365a1..1bdf0fb8bf8 100644
--- a/net/ipv6/icmp.c
+++ b/net/ipv6/icmp.c
@@ -585,17 +585,16 @@ static int icmpv6_rcv(struct sk_buff **pskb, unsigned int *nhoffp)
 	daddr = &skb->nh.ipv6h->daddr;
 
 	/* Perform checksum. */
-	if (skb->ip_summed == CHECKSUM_HW) {
-		skb->ip_summed = CHECKSUM_UNNECESSARY;
-		if (csum_ipv6_magic(saddr, daddr, skb->len, IPPROTO_ICMPV6,
-				    skb->csum)) {
-			LIMIT_NETDEBUG(KERN_DEBUG "ICMPv6 hw checksum failed\n");
-			skb->ip_summed = CHECKSUM_NONE;
-		}
-	}
-	if (skb->ip_summed == CHECKSUM_NONE) {
-		if (csum_ipv6_magic(saddr, daddr, skb->len, IPPROTO_ICMPV6,
-				    skb_checksum(skb, 0, skb->len, 0))) {
+	switch (skb->ip_summed) {
+	case CHECKSUM_HW:
+		if (!csum_ipv6_magic(saddr, daddr, skb->len, IPPROTO_ICMPV6,
+				     skb->csum))
+			break;
+		/* fall through */
+	case CHECKSUM_NONE:
+		skb->csum = ~csum_ipv6_magic(saddr, daddr, skb->len,
+					     IPPROTO_ICMPV6, 0);
+		if (__skb_checksum_complete(skb)) {
 			LIMIT_NETDEBUG(KERN_DEBUG "ICMPv6 checksum failed [%04x:%04x:%04x:%04x:%04x:%04x:%04x:%04x > %04x:%04x:%04x:%04x:%04x:%04x:%04x:%04x]\n",
 				       NIP6(*saddr), NIP6(*daddr));
 			goto discard_it;
diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c
index 651c79b41ee..8e9628f1c4c 100644
--- a/net/ipv6/raw.c
+++ b/net/ipv6/raw.c
@@ -298,13 +298,10 @@ void rawv6_err(struct sock *sk, struct sk_buff *skb,
 static inline int rawv6_rcv_skb(struct sock * sk, struct sk_buff * skb)
 {
 	if ((raw6_sk(sk)->checksum || sk->sk_filter) && 
-	    skb->ip_summed != CHECKSUM_UNNECESSARY) {
-		if ((unsigned short)csum_fold(skb_checksum(skb, 0, skb->len, skb->csum))) {
-			/* FIXME: increment a raw6 drops counter here */
-			kfree_skb(skb);
-			return 0;
-		}
-		skb->ip_summed = CHECKSUM_UNNECESSARY;
+	    skb_checksum_complete(skb)) {
+		/* FIXME: increment a raw6 drops counter here */
+		kfree_skb(skb);
+		return 0;
 	}
 
 	/* Charge it to the socket. */
@@ -337,32 +334,25 @@ int rawv6_rcv(struct sock *sk, struct sk_buff *skb)
 	if (!rp->checksum)
 		skb->ip_summed = CHECKSUM_UNNECESSARY;
 
-	if (skb->ip_summed != CHECKSUM_UNNECESSARY) {
-		if (skb->ip_summed == CHECKSUM_HW) {
-			skb_postpull_rcsum(skb, skb->nh.raw,
-			                   skb->h.raw - skb->nh.raw);
+	if (skb->ip_summed == CHECKSUM_HW) {
+		skb_postpull_rcsum(skb, skb->nh.raw,
+		                   skb->h.raw - skb->nh.raw);
+		if (!csum_ipv6_magic(&skb->nh.ipv6h->saddr,
+				     &skb->nh.ipv6h->daddr,
+				     skb->len, inet->num, skb->csum))
 			skb->ip_summed = CHECKSUM_UNNECESSARY;
-			if (csum_ipv6_magic(&skb->nh.ipv6h->saddr,
-					    &skb->nh.ipv6h->daddr,
-					    skb->len, inet->num, skb->csum)) {
-				LIMIT_NETDEBUG(KERN_DEBUG "raw v6 hw csum failure.\n");
-				skb->ip_summed = CHECKSUM_NONE;
-			}
-		}
-		if (skb->ip_summed == CHECKSUM_NONE)
-			skb->csum = ~csum_ipv6_magic(&skb->nh.ipv6h->saddr,
-						     &skb->nh.ipv6h->daddr,
-						     skb->len, inet->num, 0);
 	}
+	if (skb->ip_summed != CHECKSUM_UNNECESSARY)
+		skb->csum = ~csum_ipv6_magic(&skb->nh.ipv6h->saddr,
+					     &skb->nh.ipv6h->daddr,
+					     skb->len, inet->num, 0);
 
 	if (inet->hdrincl) {
-		if (skb->ip_summed != CHECKSUM_UNNECESSARY &&
-		    (unsigned short)csum_fold(skb_checksum(skb, 0, skb->len, skb->csum))) {
+		if (skb_checksum_complete(skb)) {
 			/* FIXME: increment a raw6 drops counter here */
 			kfree_skb(skb);
 			return 0;
 		}
-		skb->ip_summed = CHECKSUM_UNNECESSARY;
 	}
 
 	rawv6_rcv_skb(sk, skb);
@@ -407,7 +397,7 @@ static int rawv6_recvmsg(struct kiocb *iocb, struct sock *sk,
 	if (skb->ip_summed==CHECKSUM_UNNECESSARY) {
 		err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copied);
 	} else if (msg->msg_flags&MSG_TRUNC) {
-		if ((unsigned short)csum_fold(skb_checksum(skb, 0, skb->len, skb->csum)))
+		if (__skb_checksum_complete(skb))
 			goto csum_copy_err;
 		err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copied);
 	} else {
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index d746d3b27ef..62c0e5bd931 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -1401,20 +1401,18 @@ out:
 static int tcp_v6_checksum_init(struct sk_buff *skb)
 {
 	if (skb->ip_summed == CHECKSUM_HW) {
-		skb->ip_summed = CHECKSUM_UNNECESSARY;
 		if (!tcp_v6_check(skb->h.th,skb->len,&skb->nh.ipv6h->saddr,
-				  &skb->nh.ipv6h->daddr,skb->csum))
+				  &skb->nh.ipv6h->daddr,skb->csum)) {
+			skb->ip_summed = CHECKSUM_UNNECESSARY;
 			return 0;
-		LIMIT_NETDEBUG(KERN_DEBUG "hw tcp v6 csum failed\n");
+		}
 	}
+
+	skb->csum = ~tcp_v6_check(skb->h.th,skb->len,&skb->nh.ipv6h->saddr,
+				  &skb->nh.ipv6h->daddr, 0);
+
 	if (skb->len <= 76) {
-		if (tcp_v6_check(skb->h.th,skb->len,&skb->nh.ipv6h->saddr,
-				 &skb->nh.ipv6h->daddr,skb_checksum(skb, 0, skb->len, 0)))
-			return -1;
-		skb->ip_summed = CHECKSUM_UNNECESSARY;
-	} else {
-		skb->csum = ~tcp_v6_check(skb->h.th,skb->len,&skb->nh.ipv6h->saddr,
-					  &skb->nh.ipv6h->daddr,0);
+		return __skb_checksum_complete(skb);
 	}
 	return 0;
 }
@@ -1575,7 +1573,7 @@ static int tcp_v6_rcv(struct sk_buff **pskb, unsigned int *nhoffp)
 		goto discard_it;
 
 	if ((skb->ip_summed != CHECKSUM_UNNECESSARY &&
-	     tcp_v6_checksum_init(skb) < 0))
+	     tcp_v6_checksum_init(skb)))
 		goto bad_packet;
 
 	th = skb->h.th;
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index bf9519341fd..e671153b47b 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -248,7 +248,7 @@ try_again:
 		err = skb_copy_datagram_iovec(skb, sizeof(struct udphdr), msg->msg_iov,
 					      copied);
 	} else if (msg->msg_flags&MSG_TRUNC) {
-		if ((unsigned short)csum_fold(skb_checksum(skb, 0, skb->len, skb->csum)))
+		if (__skb_checksum_complete(skb))
 			goto csum_copy_err;
 		err = skb_copy_datagram_iovec(skb, sizeof(struct udphdr), msg->msg_iov,
 					      copied);
@@ -363,13 +363,10 @@ static inline int udpv6_queue_rcv_skb(struct sock * sk, struct sk_buff *skb)
 		return -1;
 	}
 
-	if (skb->ip_summed != CHECKSUM_UNNECESSARY) {
-		if ((unsigned short)csum_fold(skb_checksum(skb, 0, skb->len, skb->csum))) {
-			UDP6_INC_STATS_BH(UDP_MIB_INERRORS);
-			kfree_skb(skb);
-			return 0;
-		}
-		skb->ip_summed = CHECKSUM_UNNECESSARY;
+	if (skb_checksum_complete(skb)) {
+		UDP6_INC_STATS_BH(UDP_MIB_INERRORS);
+		kfree_skb(skb);
+		return 0;
 	}
 
 	if (sock_queue_rcv_skb(sk,skb)<0) {
@@ -491,13 +488,10 @@ static int udpv6_rcv(struct sk_buff **pskb, unsigned int *nhoffp)
 		uh = skb->h.uh;
 	}
 
-	if (skb->ip_summed==CHECKSUM_HW) {
+	if (skb->ip_summed == CHECKSUM_HW &&
+	    !csum_ipv6_magic(saddr, daddr, ulen, IPPROTO_UDP, skb->csum))
 		skb->ip_summed = CHECKSUM_UNNECESSARY;
-		if (csum_ipv6_magic(saddr, daddr, ulen, IPPROTO_UDP, skb->csum)) {
-			LIMIT_NETDEBUG(KERN_DEBUG "udp v6 hw csum failure.\n");
-			skb->ip_summed = CHECKSUM_NONE;
-		}
-	}
+
 	if (skb->ip_summed != CHECKSUM_UNNECESSARY)
 		skb->csum = ~csum_ipv6_magic(saddr, daddr, ulen, IPPROTO_UDP, 0);
 
@@ -521,8 +515,7 @@ static int udpv6_rcv(struct sk_buff **pskb, unsigned int *nhoffp)
 		if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb))
 			goto discard;
 
-		if (skb->ip_summed != CHECKSUM_UNNECESSARY &&
-		    (unsigned short)csum_fold(skb_checksum(skb, 0, skb->len, skb->csum)))
+		if (skb_checksum_complete(skb))
 			goto discard;
 		UDP6_INC_STATS_BH(UDP_MIB_NOPORTS);
 
diff --git a/net/rxrpc/transport.c b/net/rxrpc/transport.c
index 122c086ee2d..dbe6105e83a 100644
--- a/net/rxrpc/transport.c
+++ b/net/rxrpc/transport.c
@@ -23,6 +23,7 @@
 #include <linux/in.h>
 #include <linux/in6.h>
 #include <linux/icmp.h>
+#include <linux/skbuff.h>
 #include <net/sock.h>
 #include <net/ip.h>
 #if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE)
@@ -475,15 +476,11 @@ void rxrpc_trans_receive_packet(struct rxrpc_transport *trans)
 
 		/* we'll probably need to checksum it (didn't call
 		 * sock_recvmsg) */
-		if (pkt->ip_summed != CHECKSUM_UNNECESSARY) {
-			if ((unsigned short)
-			    csum_fold(skb_checksum(pkt, 0, pkt->len,
-						   pkt->csum))) {
-				kfree_skb(pkt);
-				rxrpc_krxiod_queue_transport(trans);
-				_leave(" CSUM failed");
-				return;
-			}
+		if (skb_checksum_complete(pkt)) {
+			kfree_skb(pkt);
+			rxrpc_krxiod_queue_transport(trans);
+			_leave(" CSUM failed");
+			return;
 		}
 
 		addr = pkt->nh.iph->saddr;
diff --git a/net/sunrpc/socklib.c b/net/sunrpc/socklib.c
index 8f97e90f36c..eb330d4f66d 100644
--- a/net/sunrpc/socklib.c
+++ b/net/sunrpc/socklib.c
@@ -6,6 +6,9 @@
  * Copyright (C) 1995, 1996 Olaf Kirch <okir@monad.swb.de>
  */
 
+#include <linux/compiler.h>
+#include <linux/netdevice.h>
+#include <linux/skbuff.h>
 #include <linux/types.h>
 #include <linux/pagemap.h>
 #include <linux/udp.h>
@@ -165,6 +168,8 @@ int csum_partial_copy_to_xdr(struct xdr_buf *xdr, struct sk_buff *skb)
 		return -1;
 	if ((unsigned short)csum_fold(desc.csum))
 		return -1;
+	if (unlikely(skb->ip_summed == CHECKSUM_HW))
+		netdev_rx_csum_fault(skb->dev);
 	return 0;
 no_checksum:
 	if (xdr_partial_copy_from_skb(xdr, 0, &desc, skb_read_bits) < 0)
diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
index f16e7cdd615..e50e7cf4373 100644
--- a/net/sunrpc/svcsock.c
+++ b/net/sunrpc/svcsock.c
@@ -623,12 +623,9 @@ svc_udp_recvfrom(struct svc_rqst *rqstp)
 		/* we can use it in-place */
 		rqstp->rq_arg.head[0].iov_base = skb->data + sizeof(struct udphdr);
 		rqstp->rq_arg.head[0].iov_len = len;
-		if (skb->ip_summed != CHECKSUM_UNNECESSARY) {
-			if ((unsigned short)csum_fold(skb_checksum(skb, 0, skb->len, skb->csum))) {
-				skb_free_datagram(svsk->sk_sk, skb);
-				return 0;
-			}
-			skb->ip_summed = CHECKSUM_UNNECESSARY;
+		if (skb_checksum_complete(skb)) {
+			skb_free_datagram(svsk->sk_sk, skb);
+			return 0;
 		}
 		rqstp->rq_skbuff = skb;
 	}
author	Dave Kleikamp <shaggy@austin.ibm.com>	2005-11-11 14:09:06 -0600
committer	Dave Kleikamp <shaggy@austin.ibm.com>	2005-11-11 14:09:06 -0600
commit	ca869912366f60cb5e0bdd09f65e80ee6816e73c (patch)
tree	a72913a29495ca078987c09fc0008f47e11b900b /net
parent	dd8a306ac0c918268bd2ae89da2dea627f6e352d (diff)
parent	388f7ef720a982f49925e7b4e96f216f208f8c03 (diff)