aboutsummaryrefslogtreecommitdiff
path: root/net/ipv4/tcp_output.c
diff options
context:
space:
mode:
Diffstat (limited to 'net/ipv4/tcp_output.c')
-rw-r--r--net/ipv4/tcp_output.c674
1 files changed, 317 insertions, 357 deletions
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 324b4207254..ed750f9ceb0 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -61,27 +61,24 @@ int sysctl_tcp_base_mss __read_mostly = 512;
/* By default, RFC2861 behavior. */
int sysctl_tcp_slow_start_after_idle __read_mostly = 1;
-static inline void tcp_packets_out_inc(struct sock *sk,
- const struct sk_buff *skb)
+static void tcp_event_new_data_sent(struct sock *sk, struct sk_buff *skb)
{
struct tcp_sock *tp = tcp_sk(sk);
- int orig = tp->packets_out;
+ unsigned int prior_packets = tp->packets_out;
+
+ tcp_advance_send_head(sk, skb);
+ tp->snd_nxt = TCP_SKB_CB(skb)->end_seq;
+
+ /* Don't override Nagle indefinately with F-RTO */
+ if (tp->frto_counter == 2)
+ tp->frto_counter = 3;
tp->packets_out += tcp_skb_pcount(skb);
- if (!orig)
+ if (!prior_packets)
inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
inet_csk(sk)->icsk_rto, TCP_RTO_MAX);
}
-static void update_send_head(struct sock *sk, struct sk_buff *skb)
-{
- struct tcp_sock *tp = tcp_sk(sk);
-
- tcp_advance_send_head(sk, skb);
- tp->snd_nxt = TCP_SKB_CB(skb)->end_seq;
- tcp_packets_out_inc(sk, skb);
-}
-
/* SND.NXT, if window was not shrunk.
* If window has been shrunk, what should we make? It is not clear at all.
* Using SND.UNA we will fail to open window, SND.NXT is out of window. :-(
@@ -92,10 +89,10 @@ static inline __u32 tcp_acceptable_seq(struct sock *sk)
{
struct tcp_sock *tp = tcp_sk(sk);
- if (!before(tp->snd_una+tp->snd_wnd, tp->snd_nxt))
+ if (!before(tcp_wnd_end(tp), tp->snd_nxt))
return tp->snd_nxt;
else
- return tp->snd_una+tp->snd_wnd;
+ return tcp_wnd_end(tp);
}
/* Calculate mss to advertise in SYN segment.
@@ -224,14 +221,14 @@ void tcp_select_initial_window(int __space, __u32 mss,
* following RFC2414. Senders, not following this RFC,
* will be satisfied with 2.
*/
- if (mss > (1<<*rcv_wscale)) {
+ if (mss > (1 << *rcv_wscale)) {
int init_cwnd = 4;
- if (mss > 1460*3)
+ if (mss > 1460 * 3)
init_cwnd = 2;
else if (mss > 1460)
init_cwnd = 3;
- if (*rcv_wnd > init_cwnd*mss)
- *rcv_wnd = init_cwnd*mss;
+ if (*rcv_wnd > init_cwnd * mss)
+ *rcv_wnd = init_cwnd * mss;
}
/* Set the clamp no higher than max representable value */
@@ -281,11 +278,10 @@ static u16 tcp_select_window(struct sock *sk)
return new_win;
}
-static inline void TCP_ECN_send_synack(struct tcp_sock *tp,
- struct sk_buff *skb)
+static inline void TCP_ECN_send_synack(struct tcp_sock *tp, struct sk_buff *skb)
{
TCP_SKB_CB(skb)->flags &= ~TCPCB_FLAG_CWR;
- if (!(tp->ecn_flags&TCP_ECN_OK))
+ if (!(tp->ecn_flags & TCP_ECN_OK))
TCP_SKB_CB(skb)->flags &= ~TCPCB_FLAG_ECE;
}
@@ -295,7 +291,7 @@ static inline void TCP_ECN_send_syn(struct sock *sk, struct sk_buff *skb)
tp->ecn_flags = 0;
if (sysctl_tcp_ecn) {
- TCP_SKB_CB(skb)->flags |= TCPCB_FLAG_ECE|TCPCB_FLAG_CWR;
+ TCP_SKB_CB(skb)->flags |= TCPCB_FLAG_ECE | TCPCB_FLAG_CWR;
tp->ecn_flags = TCP_ECN_OK;
}
}
@@ -317,7 +313,7 @@ static inline void TCP_ECN_send(struct sock *sk, struct sk_buff *skb,
if (skb->len != tcp_header_len &&
!before(TCP_SKB_CB(skb)->seq, tp->snd_nxt)) {
INET_ECN_xmit(sk);
- if (tp->ecn_flags&TCP_ECN_QUEUE_CWR) {
+ if (tp->ecn_flags & TCP_ECN_QUEUE_CWR) {
tp->ecn_flags &= ~TCP_ECN_QUEUE_CWR;
tcp_hdr(skb)->cwr = 1;
skb_shinfo(skb)->gso_type |= SKB_GSO_TCP_ECN;
@@ -331,6 +327,26 @@ static inline void TCP_ECN_send(struct sock *sk, struct sk_buff *skb,
}
}
+/* Constructs common control bits of non-data skb. If SYN/FIN is present,
+ * auto increment end seqno.
+ */
+static void tcp_init_nondata_skb(struct sk_buff *skb, u32 seq, u8 flags)
+{
+ skb->csum = 0;
+
+ TCP_SKB_CB(skb)->flags = flags;
+ TCP_SKB_CB(skb)->sacked = 0;
+
+ skb_shinfo(skb)->gso_segs = 1;
+ skb_shinfo(skb)->gso_size = 0;
+ skb_shinfo(skb)->gso_type = 0;
+
+ TCP_SKB_CB(skb)->seq = seq;
+ if (flags & (TCPCB_FLAG_SYN | TCPCB_FLAG_FIN))
+ seq++;
+ TCP_SKB_CB(skb)->end_seq = seq;
+}
+
static void tcp_build_and_update_options(__be32 *ptr, struct tcp_sock *tp,
__u32 tstamp, __u8 **md5_hash)
{
@@ -434,7 +450,7 @@ static void tcp_syn_build_options(__be32 *ptr, int mss, int ts, int sack,
(TCPOPT_NOP << 16) |
(TCPOPT_MD5SIG << 8) |
TCPOLEN_MD5SIG);
- *md5_hash = (__u8 *) ptr;
+ *md5_hash = (__u8 *)ptr;
}
#endif
}
@@ -450,7 +466,8 @@ static void tcp_syn_build_options(__be32 *ptr, int mss, int ts, int sack,
* We are working here with either a clone of the original
* SKB, or a fresh unique copy made by the retransmit engine.
*/
-static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it, gfp_t gfp_mask)
+static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
+ gfp_t gfp_mask)
{
const struct inet_connection_sock *icsk = inet_csk(sk);
struct inet_sock *inet;
@@ -554,8 +571,8 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
th->urg_ptr = 0;
if (unlikely(tp->urg_mode &&
- between(tp->snd_up, tcb->seq+1, tcb->seq+0xFFFF))) {
- th->urg_ptr = htons(tp->snd_up-tcb->seq);
+ between(tp->snd_up, tcb->seq + 1, tcb->seq + 0xFFFF))) {
+ th->urg_ptr = htons(tp->snd_up - tcb->seq);
th->urg = 1;
}
@@ -619,7 +636,6 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
#undef SYSCTL_FLAG_SACK
}
-
/* This routine just queue's the buffer
*
* NOTE: probe0 timer is not checked, do not forget tcp_push_pending_frames,
@@ -633,10 +649,12 @@ static void tcp_queue_skb(struct sock *sk, struct sk_buff *skb)
tp->write_seq = TCP_SKB_CB(skb)->end_seq;
skb_header_release(skb);
tcp_add_write_queue_tail(sk, skb);
- sk_charge_skb(sk, skb);
+ sk->sk_wmem_queued += skb->truesize;
+ sk_mem_charge(sk, skb->truesize);
}
-static void tcp_set_skb_tso_segs(struct sock *sk, struct sk_buff *skb, unsigned int mss_now)
+static void tcp_set_skb_tso_segs(struct sock *sk, struct sk_buff *skb,
+ unsigned int mss_now)
{
if (skb->len <= mss_now || !sk_can_gso(sk)) {
/* Avoid the costly divide in the normal
@@ -653,23 +671,18 @@ static void tcp_set_skb_tso_segs(struct sock *sk, struct sk_buff *skb, unsigned
}
/* When a modification to fackets out becomes necessary, we need to check
- * skb is counted to fackets_out or not. Another important thing is to
- * tweak SACK fastpath hint too as it would overwrite all changes unless
- * hint is also changed.
+ * skb is counted to fackets_out or not.
*/
-static void tcp_adjust_fackets_out(struct tcp_sock *tp, struct sk_buff *skb,
+static void tcp_adjust_fackets_out(struct sock *sk, struct sk_buff *skb,
int decr)
{
+ struct tcp_sock *tp = tcp_sk(sk);
+
if (!tp->sacked_out || tcp_is_reno(tp))
return;
- if (!before(tp->highest_sack, TCP_SKB_CB(skb)->seq))
+ if (after(tcp_highest_sack_seq(tp), TCP_SKB_CB(skb)->seq))
tp->fackets_out -= decr;
-
- /* cnt_hint is "off-by-one" compared with fackets_out (see sacktag) */
- if (tp->fastpath_skb_hint != NULL &&
- after(TCP_SKB_CB(tp->fastpath_skb_hint)->seq, TCP_SKB_CB(skb)->seq))
- tp->fastpath_cnt_hint -= decr;
}
/* Function to create two new TCP segments. Shrinks the given segment
@@ -677,7 +690,8 @@ static void tcp_adjust_fackets_out(struct tcp_sock *tp, struct sk_buff *skb,
* packet to the list. This won't be called frequently, I hope.
* Remember, these are still headerless SKBs at this point.
*/
-int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len, unsigned int mss_now)
+int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len,
+ unsigned int mss_now)
{
struct tcp_sock *tp = tcp_sk(sk);
struct sk_buff *buff;
@@ -702,7 +716,8 @@ int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len, unsigned int mss
if (buff == NULL)
return -ENOMEM; /* We'll just try again later. */
- sk_charge_skb(sk, buff);
+ sk->sk_wmem_queued += buff->truesize;
+ sk_mem_charge(sk, buff->truesize);
nlen = skb->len - len - nsize;
buff->truesize += nlen;
skb->truesize -= nlen;
@@ -712,20 +727,16 @@ int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len, unsigned int mss
TCP_SKB_CB(buff)->end_seq = TCP_SKB_CB(skb)->end_seq;
TCP_SKB_CB(skb)->end_seq = TCP_SKB_CB(buff)->seq;
- if (tcp_is_sack(tp) && tp->sacked_out &&
- (TCP_SKB_CB(skb)->seq == tp->highest_sack))
- tp->highest_sack = TCP_SKB_CB(buff)->seq;
-
/* PSH and FIN should only be set in the second packet. */
flags = TCP_SKB_CB(skb)->flags;
- TCP_SKB_CB(skb)->flags = flags & ~(TCPCB_FLAG_FIN|TCPCB_FLAG_PSH);
+ TCP_SKB_CB(skb)->flags = flags & ~(TCPCB_FLAG_FIN | TCPCB_FLAG_PSH);
TCP_SKB_CB(buff)->flags = flags;
TCP_SKB_CB(buff)->sacked = TCP_SKB_CB(skb)->sacked;
- TCP_SKB_CB(skb)->sacked &= ~TCPCB_AT_TAIL;
if (!skb_shinfo(skb)->nr_frags && skb->ip_summed != CHECKSUM_PARTIAL) {
/* Copy and checksum data tail into the new buffer. */
- buff->csum = csum_partial_copy_nocheck(skb->data + len, skb_put(buff, nsize),
+ buff->csum = csum_partial_copy_nocheck(skb->data + len,
+ skb_put(buff, nsize),
nsize, 0);
skb_trim(skb, len);
@@ -772,7 +783,7 @@ int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len, unsigned int mss
tcp_dec_pcount_approx_int(&tp->sacked_out, diff);
tcp_verify_left_out(tp);
}
- tcp_adjust_fackets_out(tp, skb, diff);
+ tcp_adjust_fackets_out(sk, skb, diff);
}
/* Link BUFF into the send queue. */
@@ -792,7 +803,7 @@ static void __pskb_trim_head(struct sk_buff *skb, int len)
eat = len;
k = 0;
- for (i=0; i<skb_shinfo(skb)->nr_frags; i++) {
+ for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
if (skb_shinfo(skb)->frags[i].size <= eat) {
put_page(skb_shinfo(skb)->frags[i].page);
eat -= skb_shinfo(skb)->frags[i].size;
@@ -815,8 +826,7 @@ static void __pskb_trim_head(struct sk_buff *skb, int len)
int tcp_trim_head(struct sock *sk, struct sk_buff *skb, u32 len)
{
- if (skb_cloned(skb) &&
- pskb_expand_head(skb, 0, 0, GFP_ATOMIC))
+ if (skb_cloned(skb) && pskb_expand_head(skb, 0, 0, GFP_ATOMIC))
return -ENOMEM;
/* If len == headlen, we avoid __skb_pull to preserve alignment. */
@@ -830,7 +840,7 @@ int tcp_trim_head(struct sock *sk, struct sk_buff *skb, u32 len)
skb->truesize -= len;
sk->sk_wmem_queued -= len;
- sk->sk_forward_alloc += len;
+ sk_mem_uncharge(sk, len);
sock_set_flag(sk, SOCK_QUEUE_SHRUNK);
/* Any change of skb->len requires recalculation of tso
@@ -898,6 +908,15 @@ void tcp_mtup_init(struct sock *sk)
icsk->icsk_mtup.probe_size = 0;
}
+/* Bound MSS / TSO packet size with the half of the window */
+static int tcp_bound_to_half_wnd(struct tcp_sock *tp, int pktsize)
+{
+ if (tp->max_window && pktsize > (tp->max_window >> 1))
+ return max(tp->max_window >> 1, 68U - tp->tcp_header_len);
+ else
+ return pktsize;
+}
+
/* This function synchronize snd mss to current pmtu/exthdr set.
tp->rx_opt.user_mss is mss set by user by TCP_MAXSEG. It does NOT counts
@@ -920,7 +939,6 @@ void tcp_mtup_init(struct sock *sk)
NOTE2. inet_csk(sk)->icsk_pmtu_cookie and tp->mss_cache
are READ ONLY outside this function. --ANK (980731)
*/
-
unsigned int tcp_sync_mss(struct sock *sk, u32 pmtu)
{
struct tcp_sock *tp = tcp_sk(sk);
@@ -931,10 +949,7 @@ unsigned int tcp_sync_mss(struct sock *sk, u32 pmtu)
icsk->icsk_mtup.search_high = pmtu;
mss_now = tcp_mtu_to_mss(sk, pmtu);
-
- /* Bound mss with half of window */
- if (tp->max_window && mss_now > (tp->max_window>>1))
- mss_now = max((tp->max_window>>1), 68U - tp->tcp_header_len);
+ mss_now = tcp_bound_to_half_wnd(tp, mss_now);
/* And store cached results */
icsk->icsk_pmtu_cookie = pmtu;
@@ -988,11 +1003,7 @@ unsigned int tcp_current_mss(struct sock *sk, int large_allowed)
inet_csk(sk)->icsk_ext_hdr_len -
tp->tcp_header_len);
- if (tp->max_window &&
- (xmit_size_goal > (tp->max_window >> 1)))
- xmit_size_goal = max((tp->max_window >> 1),
- 68U - tp->tcp_header_len);
-
+ xmit_size_goal = tcp_bound_to_half_wnd(tp, xmit_size_goal);
xmit_size_goal -= (xmit_size_goal % mss_now);
}
tp->xmit_size_goal = xmit_size_goal;
@@ -1001,13 +1012,11 @@ unsigned int tcp_current_mss(struct sock *sk, int large_allowed)
}
/* Congestion window validation. (RFC2861) */
-
static void tcp_cwnd_validate(struct sock *sk)
{
struct tcp_sock *tp = tcp_sk(sk);
- __u32 packets_out = tp->packets_out;
- if (packets_out >= tp->snd_cwnd) {
+ if (tp->packets_out >= tp->snd_cwnd) {
/* Network is feed fully. */
tp->snd_cwnd_used = 0;
tp->snd_cwnd_stamp = tcp_time_stamp;
@@ -1022,19 +1031,35 @@ static void tcp_cwnd_validate(struct sock *sk)
}
}
-static unsigned int tcp_window_allows(struct tcp_sock *tp, struct sk_buff *skb, unsigned int mss_now, unsigned int cwnd)
+/* Returns the portion of skb which can be sent right away without
+ * introducing MSS oddities to segment boundaries. In rare cases where
+ * mss_now != mss_cache, we will request caller to create a small skb
+ * per input skb which could be mostly avoided here (if desired).
+ */
+static unsigned int tcp_mss_split_point(struct sock *sk, struct sk_buff *skb,
+ unsigned int mss_now, unsigned int cwnd)
{
- u32 window, cwnd_len;
+ struct tcp_sock *tp = tcp_sk(sk);
+ u32 needed, window, cwnd_len;
- window = (tp->snd_una + tp->snd_wnd - TCP_SKB_CB(skb)->seq);
+ window = tcp_wnd_end(tp) - TCP_SKB_CB(skb)->seq;
cwnd_len = mss_now * cwnd;
- return min(window, cwnd_len);
+
+ if (likely(cwnd_len <= window && skb != tcp_write_queue_tail(sk)))
+ return cwnd_len;
+
+ if (skb == tcp_write_queue_tail(sk) && cwnd_len <= skb->len)
+ return cwnd_len;
+
+ needed = min(skb->len, window);
+ return needed - needed % mss_now;
}
/* Can at least one segment of SKB be sent right now, according to the
* congestion window rules? If so, return how many segments are allowed.
*/
-static inline unsigned int tcp_cwnd_test(struct tcp_sock *tp, struct sk_buff *skb)
+static inline unsigned int tcp_cwnd_test(struct tcp_sock *tp,
+ struct sk_buff *skb)
{
u32 in_flight, cwnd;
@@ -1054,13 +1079,12 @@ static inline unsigned int tcp_cwnd_test(struct tcp_sock *tp, struct sk_buff *sk
/* This must be invoked the first time we consider transmitting
* SKB onto the wire.
*/
-static int tcp_init_tso_segs(struct sock *sk, struct sk_buff *skb, unsigned int mss_now)
+static int tcp_init_tso_segs(struct sock *sk, struct sk_buff *skb,
+ unsigned int mss_now)
{
int tso_segs = tcp_skb_pcount(skb);
- if (!tso_segs ||
- (tso_segs > 1 &&
- tcp_skb_mss(skb) != mss_now)) {
+ if (!tso_segs || (tso_segs > 1 && tcp_skb_mss(skb) != mss_now)) {
tcp_set_skb_tso_segs(sk, skb, mss_now);
tso_segs = tcp_skb_pcount(skb);
}
@@ -1080,16 +1104,13 @@ static inline int tcp_minshall_check(const struct tcp_sock *tp)
* 4. Or TCP_CORK is not set, and all sent packets are ACKed.
* With Minshall's modification: all sent small packets are ACKed.
*/
-
static inline int tcp_nagle_check(const struct tcp_sock *tp,
const struct sk_buff *skb,
unsigned mss_now, int nonagle)
{
return (skb->len < mss_now &&
- ((nonagle&TCP_NAGLE_CORK) ||
- (!nonagle &&
- tp->packets_out &&
- tcp_minshall_check(tp))));
+ ((nonagle & TCP_NAGLE_CORK) ||
+ (!nonagle && tp->packets_out && tcp_minshall_check(tp))));
}
/* Return non-zero if the Nagle test allows this packet to be
@@ -1121,14 +1142,15 @@ static inline int tcp_nagle_test(struct tcp_sock *tp, struct sk_buff *skb,
}
/* Does at least the first segment of SKB fit into the send window? */
-static inline int tcp_snd_wnd_test(struct tcp_sock *tp, struct sk_buff *skb, unsigned int cur_mss)
+static inline int tcp_snd_wnd_test(struct tcp_sock *tp, struct sk_buff *skb,
+ unsigned int cur_mss)
{
u32 end_seq = TCP_SKB_CB(skb)->end_seq;
if (skb->len > cur_mss)
end_seq = TCP_SKB_CB(skb)->seq + cur_mss;
- return !after(end_seq, tp->snd_una + tp->snd_wnd);
+ return !after(end_seq, tcp_wnd_end(tp));
}
/* This checks if the data bearing packet SKB (usually tcp_send_head(sk))
@@ -1147,8 +1169,7 @@ static unsigned int tcp_snd_test(struct sock *sk, struct sk_buff *skb,
return 0;
cwnd_quota = tcp_cwnd_test(tp, skb);
- if (cwnd_quota &&
- !tcp_snd_wnd_test(tp, skb, cur_mss))
+ if (cwnd_quota && !tcp_snd_wnd_test(tp, skb, cur_mss))
cwnd_quota = 0;
return cwnd_quota;
@@ -1162,8 +1183,7 @@ int tcp_may_send_now(struct sock *sk)
return (skb &&
tcp_snd_test(sk, skb, tcp_current_mss(sk, 1),
(tcp_skb_is_last(sk, skb) ?
- TCP_NAGLE_PUSH :
- tp->nonagle)));
+ tp->nonagle : TCP_NAGLE_PUSH)));
}
/* Trim TSO SKB to LEN bytes, put the remaining data into a new packet
@@ -1173,7 +1193,8 @@ int tcp_may_send_now(struct sock *sk)
* know that all the data is in scatter-gather pages, and that the
* packet has never been sent out before (and thus is not cloned).
*/
-static int tso_fragment(struct sock *sk, struct sk_buff *skb, unsigned int len, unsigned int mss_now)
+static int tso_fragment(struct sock *sk, struct sk_buff *skb, unsigned int len,
+ unsigned int mss_now)
{
struct sk_buff *buff;
int nlen = skb->len - len;
@@ -1183,11 +1204,12 @@ static int tso_fragment(struct sock *sk, struct sk_buff *skb, unsigned int len,
if (skb->len != skb->data_len)
return tcp_fragment(sk, skb, len, mss_now);
- buff = sk_stream_alloc_pskb(sk, 0, 0, GFP_ATOMIC);
+ buff = sk_stream_alloc_skb(sk, 0, GFP_ATOMIC);
if (unlikely(buff == NULL))
return -ENOMEM;
- sk_charge_skb(sk, buff);
+ sk->sk_wmem_queued += buff->truesize;
+ sk_mem_charge(sk, buff->truesize);
buff->truesize += nlen;
skb->truesize -= nlen;
@@ -1198,7 +1220,7 @@ static int tso_fragment(struct sock *sk, struct sk_buff *skb, unsigned int len,
/* PSH and FIN should only be set in the second packet. */
flags = TCP_SKB_CB(skb)->flags;
- TCP_SKB_CB(skb)->flags = flags & ~(TCPCB_FLAG_FIN|TCPCB_FLAG_PSH);
+ TCP_SKB_CB(skb)->flags = flags & ~(TCPCB_FLAG_FIN | TCPCB_FLAG_PSH);
TCP_SKB_CB(buff)->flags = flags;
/* This packet was never sent out yet, so no SACK bits. */
@@ -1236,15 +1258,15 @@ static int tcp_tso_should_defer(struct sock *sk, struct sk_buff *skb)
goto send_now;
/* Defer for less than two clock ticks. */
- if (!tp->tso_deferred && ((jiffies<<1)>>1) - (tp->tso_deferred>>1) > 1)
+ if (tp->tso_deferred &&
+ ((jiffies << 1) >> 1) - (tp->tso_deferred >> 1) > 1)
goto send_now;
in_flight = tcp_packets_in_flight(tp);
- BUG_ON(tcp_skb_pcount(skb) <= 1 ||
- (tp->snd_cwnd <= in_flight));
+ BUG_ON(tcp_skb_pcount(skb) <= 1 || (tp->snd_cwnd <= in_flight));
- send_win = (tp->snd_una + tp->snd_wnd) - TCP_SKB_CB(skb)->seq;
+ send_win = tcp_wnd_end(tp) - TCP_SKB_CB(skb)->seq;
/* From in_flight test above, we know that cwnd > in_flight. */
cong_win = (tp->snd_cwnd - in_flight) * tp->mss_cache;
@@ -1275,7 +1297,7 @@ static int tcp_tso_should_defer(struct sock *sk, struct sk_buff *skb)
}
/* Ok, it looks like it is advisable to defer. */
- tp->tso_deferred = 1 | (jiffies<<1);
+ tp->tso_deferred = 1 | (jiffies << 1);
return 1;
@@ -1287,7 +1309,8 @@ send_now:
/* Create a new MTU probe if we are ready.
* Returns 0 if we should wait to probe (no cwnd available),
* 1 if a probe was sent,
- * -1 otherwise */
+ * -1 otherwise
+ */
static int tcp_mtu_probe(struct sock *sk)
{
struct tcp_sock *tp = tcp_sk(sk);
@@ -1295,7 +1318,7 @@ static int tcp_mtu_probe(struct sock *sk)
struct sk_buff *skb, *nskb, *next;
int len;
int probe_size;
- unsigned int pif;
+ int size_needed;
int copy;
int mss_now;
@@ -1312,34 +1335,25 @@ static int tcp_mtu_probe(struct sock *sk)
/* Very simple search strategy: just double the MSS. */
mss_now = tcp_current_mss(sk, 0);
- probe_size = 2*tp->mss_cache;
+ probe_size = 2 * tp->mss_cache;
+ size_needed = probe_size + (tp->reordering + 1) * tp->mss_cache;
if (probe_size > tcp_mtu_to_mss(sk, icsk->icsk_mtup.search_high)) {
/* TODO: set timer for probe_converge_event */
return -1;
}
/* Have enough data in the send queue to probe? */
- len = 0;
- if ((skb = tcp_send_head(sk)) == NULL)
- return -1;
- while ((len += skb->len) < probe_size && !tcp_skb_is_last(sk, skb))
- skb = tcp_write_queue_next(sk, skb);
- if (len < probe_size)
+ if (tp->write_seq - tp->snd_nxt < size_needed)
return -1;
- /* Receive window check. */
- if (after(TCP_SKB_CB(skb)->seq + probe_size, tp->snd_una + tp->snd_wnd)) {
- if (tp->snd_wnd < probe_size)
- return -1;
- else
- return 0;
- }
+ if (tp->snd_wnd < size_needed)
+ return -1;
+ if (after(tp->snd_nxt + size_needed, tcp_wnd_end(tp)))
+ return 0;
- /* Do we need to wait to drain cwnd? */
- pif = tcp_packets_in_flight(tp);
- if (pif + 2 > tp->snd_cwnd) {
- /* With no packets in flight, don't stall. */
- if (pif == 0)
+ /* Do we need to wait to drain cwnd? With none in flight, don't stall */
+ if (tcp_packets_in_flight(tp) + 2 > tp->snd_cwnd) {
+ if (!tcp_packets_in_flight(tp))
return -1;
else
return 0;
@@ -1348,11 +1362,10 @@ static int tcp_mtu_probe(struct sock *sk)
/* We're allowed to probe. Build it now. */
if ((nskb = sk_stream_alloc_skb(sk, probe_size, GFP_ATOMIC)) == NULL)
return -1;
- sk_charge_skb(sk, nskb);
+ sk->sk_wmem_queued += nskb->truesize;
+ sk_mem_charge(sk, nskb->truesize);
skb = tcp_send_head(sk);
- tcp_insert_write_queue_before(nskb, skb, sk);
- tcp_advance_send_head(sk, skb);
TCP_SKB_CB(nskb)->seq = TCP_SKB_CB(skb)->seq;
TCP_SKB_CB(nskb)->end_seq = TCP_SKB_CB(skb)->seq + probe_size;
@@ -1361,30 +1374,32 @@ static int tcp_mtu_probe(struct sock *sk)
nskb->csum = 0;
nskb->ip_summed = skb->ip_summed;
- len = 0;
- while (len < probe_size) {
- next = tcp_write_queue_next(sk, skb);
+ tcp_insert_write_queue_before(nskb, skb, sk);
+ len = 0;
+ tcp_for_write_queue_from_safe(skb, next, sk) {
copy = min_t(int, skb->len, probe_size - len);
if (nskb->ip_summed)
skb_copy_bits(skb, 0, skb_put(nskb, copy), copy);
else
nskb->csum = skb_copy_and_csum_bits(skb, 0,
- skb_put(nskb, copy), copy, nskb->csum);
+ skb_put(nskb, copy),
+ copy, nskb->csum);
if (skb->len <= copy) {
/* We've eaten all the data from this skb.
* Throw it away. */
TCP_SKB_CB(nskb)->flags |= TCP_SKB_CB(skb)->flags;
tcp_unlink_write_queue(skb, sk);
- sk_stream_free_skb(sk, skb);
+ sk_wmem_free_skb(sk, skb);
} else {
TCP_SKB_CB(nskb)->flags |= TCP_SKB_CB(skb)->flags &
~(TCPCB_FLAG_FIN|TCPCB_FLAG_PSH);
if (!skb_shinfo(skb)->nr_frags) {
skb_pull(skb, copy);
if (skb->ip_summed != CHECKSUM_PARTIAL)
- skb->csum = csum_partial(skb->data, skb->len, 0);
+ skb->csum = csum_partial(skb->data,
+ skb->len, 0);
} else {
__pskb_trim_head(skb, copy);
tcp_set_skb_tso_segs(sk, skb, mss_now);
@@ -1393,7 +1408,9 @@ static int tcp_mtu_probe(struct sock *sk)
}
len += copy;
- skb = next;
+
+ if (len >= probe_size)
+ break;
}
tcp_init_tso_segs(sk, nskb, nskb->len);
@@ -1402,9 +1419,9 @@ static int tcp_mtu_probe(struct sock *sk)
TCP_SKB_CB(nskb)->when = tcp_time_stamp;
if (!tcp_transmit_skb(sk, nskb, 1, GFP_ATOMIC)) {
/* Decrement cwnd here because we are sending
- * effectively two packets. */
+ * effectively two packets. */
tp->snd_cwnd--;
- update_send_head(sk, nskb);
+ tcp_event_new_data_sent(sk, nskb);
icsk->icsk_mtup.probe_size = tcp_mss_to_mtu(sk, nskb->len);
tp->mtu_probe.probe_seq_start = TCP_SKB_CB(nskb)->seq;
@@ -1416,7 +1433,6 @@ static int tcp_mtu_probe(struct sock *sk)
return -1;
}
-
/* This routine writes packets to the network. It advances the
* send_head. This happens as incoming acks open up the remote
* window for us.
@@ -1472,17 +1488,9 @@ static int tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle)
}
limit = mss_now;
- if (tso_segs > 1) {
- limit = tcp_window_allows(tp, skb,
- mss_now, cwnd_quota);
-
- if (skb->len < limit) {
- unsigned int trim = skb->len % mss_now;
-
- if (trim)
- limit = skb->len - trim;
- }
- }
+ if (tso_segs > 1)
+ limit = tcp_mss_split_point(sk, skb, mss_now,
+ cwnd_quota);
if (skb->len > limit &&
unlikely(tso_fragment(sk, skb, limit, mss_now)))
@@ -1496,7 +1504,7 @@ static int tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle)
/* Advance the send_head. This one is sent out.
* This call will increment packets_out.
*/
- update_send_head(sk, skb);
+ tcp_event_new_data_sent(sk, skb);
tcp_minshall_update(tp, mss_now, skb);
sent_pkts++;
@@ -1529,7 +1537,6 @@ void __tcp_push_pending_frames(struct sock *sk, unsigned int cur_mss,
*/
void tcp_push_one(struct sock *sk, unsigned int mss_now)
{
- struct tcp_sock *tp = tcp_sk(sk);
struct sk_buff *skb = tcp_send_head(sk);
unsigned int tso_segs, cwnd_quota;
@@ -1544,17 +1551,9 @@ void tcp_push_one(struct sock *sk, unsigned int mss_now)
BUG_ON(!tso_segs);
limit = mss_now;
- if (tso_segs > 1) {
- limit = tcp_window_allows(tp, skb,
- mss_now, cwnd_quota);
-
- if (skb->len < limit) {
- unsigned int trim = skb->len % mss_now;
-
- if (trim)
- limit = skb->len - trim;
- }
- }
+ if (tso_segs > 1)
+ limit = tcp_mss_split_point(sk, skb, mss_now,
+ cwnd_quota);
if (skb->len > limit &&
unlikely(tso_fragment(sk, skb, limit, mss_now)))
@@ -1564,7 +1563,7 @@ void tcp_push_one(struct sock *sk, unsigned int mss_now)
TCP_SKB_CB(skb)->when = tcp_time_stamp;
if (likely(!tcp_transmit_skb(sk, skb, 1, sk->sk_allocation))) {
- update_send_head(sk, skb);
+ tcp_event_new_data_sent(sk, skb);
tcp_cwnd_validate(sk);
return;
}
@@ -1641,11 +1640,12 @@ u32 __tcp_select_window(struct sock *sk)
if (mss > full_space)
mss = full_space;
- if (free_space < full_space/2) {
+ if (free_space < (full_space >> 1)) {
icsk->icsk_ack.quick = 0;
if (tcp_memory_pressure)
- tp->rcv_ssthresh = min(tp->rcv_ssthresh, 4U*tp->advmss);
+ tp->rcv_ssthresh = min(tp->rcv_ssthresh,
+ 4U * tp->advmss);
if (free_space < mss)
return 0;
@@ -1678,9 +1678,9 @@ u32 __tcp_select_window(struct sock *sk)
* is too small.
*/
if (window <= free_space - mss || window > free_space)
- window = (free_space/mss)*mss;
+ window = (free_space / mss) * mss;
else if (mss == full_space &&
- free_space > window + full_space/2)
+ free_space > window + (full_space >> 1))
window = free_space;
}
@@ -1688,86 +1688,82 @@ u32 __tcp_select_window(struct sock *sk)
}
/* Attempt to collapse two adjacent SKB's during retransmission. */
-static void tcp_retrans_try_collapse(struct sock *sk, struct sk_buff *skb, int mss_now)
+static void tcp_retrans_try_collapse(struct sock *sk, struct sk_buff *skb,
+ int mss_now)
{
struct tcp_sock *tp = tcp_sk(sk);
struct sk_buff *next_skb = tcp_write_queue_next(sk, skb);
+ int skb_size, next_skb_size;
+ u16 flags;
/* The first test we must make is that neither of these two
* SKB's are still referenced by someone else.
*/
- if (!skb_cloned(skb) && !skb_cloned(next_skb)) {
- int skb_size = skb->len, next_skb_size = next_skb->len;
- u16 flags = TCP_SKB_CB(skb)->flags;
+ if (skb_cloned(skb) || skb_cloned(next_skb))
+ return;
- /* Also punt if next skb has been SACK'd. */
- if (TCP_SKB_CB(next_skb)->sacked & TCPCB_SACKED_ACKED)
- return;
+ skb_size = skb->len;
+ next_skb_size = next_skb->len;
+ flags = TCP_SKB_CB(skb)->flags;
- /* Next skb is out of window. */
- if (after(TCP_SKB_CB(next_skb)->end_seq, tp->snd_una+tp->snd_wnd))
- return;
+ /* Also punt if next skb has been SACK'd. */
+ if (TCP_SKB_CB(next_skb)->sacked & TCPCB_SACKED_ACKED)
+ return;
- /* Punt if not enough space exists in the first SKB for
- * the data in the second, or the total combined payload
- * would exceed the MSS.
- */
- if ((next_skb_size > skb_tailroom(skb)) ||
- ((skb_size + next_skb_size) > mss_now))
- return;
+ /* Next skb is out of window. */
+ if (after(TCP_SKB_CB(next_skb)->end_seq, tcp_wnd_end(tp)))
+ return;
- BUG_ON(tcp_skb_pcount(skb) != 1 ||
- tcp_skb_pcount(next_skb) != 1);
+ /* Punt if not enough space exists in the first SKB for
+ * the data in the second, or the total combined payload
+ * would exceed the MSS.
+ */
+ if ((next_skb_size > skb_tailroom(skb)) ||
+ ((skb_size + next_skb_size) > mss_now))
+ return;
- if (WARN_ON(tcp_is_sack(tp) && tp->sacked_out &&
- (TCP_SKB_CB(next_skb)->seq == tp->highest_sack)))
- return;
+ BUG_ON(tcp_skb_pcount(skb) != 1 || tcp_skb_pcount(next_skb) != 1);
- /* Ok. We will be able to collapse the packet. */
- tcp_unlink_write_queue(next_skb, sk);
+ tcp_highest_sack_combine(sk, next_skb, skb);
- skb_copy_from_linear_data(next_skb,
- skb_put(skb, next_skb_size),
- next_skb_size);
+ /* Ok. We will be able to collapse the packet. */
+ tcp_unlink_write_queue(next_skb, sk);
- if (next_skb->ip_summed == CHECKSUM_PARTIAL)
- skb->ip_summed = CHECKSUM_PARTIAL;
+ skb_copy_from_linear_data(next_skb, skb_put(skb, next_skb_size),
+ next_skb_size);
- if (skb->ip_summed != CHECKSUM_PARTIAL)
- skb->csum = csum_block_add(skb->csum, next_skb->csum, skb_size);
+ if (next_skb->ip_summed == CHECKSUM_PARTIAL)
+ skb->ip_summed = CHECKSUM_PARTIAL;
- /* Update sequence range on original skb. */
- TCP_SKB_CB(skb)->end_seq = TCP_SKB_CB(next_skb)->end_seq;
+ if (skb->ip_summed != CHECKSUM_PARTIAL)
+ skb->csum = csum_block_add(skb->csum, next_skb->csum, skb_size);
- /* Merge over control information. */
- flags |= TCP_SKB_CB(next_skb)->flags; /* This moves PSH/FIN etc. over */
- TCP_SKB_CB(skb)->flags = flags;
+ /* Update sequence range on original skb. */
+ TCP_SKB_CB(skb)->end_seq = TCP_SKB_CB(next_skb)->end_seq;
- /* All done, get rid of second SKB and account for it so
- * packet counting does not break.
- */
- TCP_SKB_CB(skb)->sacked |= TCP_SKB_CB(next_skb)->sacked&(TCPCB_EVER_RETRANS|TCPCB_AT_TAIL);
- if (TCP_SKB_CB(next_skb)->sacked&TCPCB_SACKED_RETRANS)
- tp->retrans_out -= tcp_skb_pcount(next_skb);
- if (TCP_SKB_CB(next_skb)->sacked&TCPCB_LOST)
- tp->lost_out -= tcp_skb_pcount(next_skb);
- /* Reno case is special. Sigh... */
- if (tcp_is_reno(tp) && tp->sacked_out)
- tcp_dec_pcount_approx(&tp->sacked_out, next_skb);
-
- tcp_adjust_fackets_out(tp, next_skb, tcp_skb_pcount(next_skb));
- tp->packets_out -= tcp_skb_pcount(next_skb);
-
- /* changed transmit queue under us so clear hints */
- tcp_clear_retrans_hints_partial(tp);
- /* manually tune sacktag skb hint */
- if (tp->fastpath_skb_hint == next_skb) {
- tp->fastpath_skb_hint = skb;
- tp->fastpath_cnt_hint -= tcp_skb_pcount(skb);
- }
+ /* Merge over control information. */
+ flags |= TCP_SKB_CB(next_skb)->flags; /* This moves PSH/FIN etc. over */
+ TCP_SKB_CB(skb)->flags = flags;
- sk_stream_free_skb(sk, next_skb);
- }
+ /* All done, get rid of second SKB and account for it so
+ * packet counting does not break.
+ */
+ TCP_SKB_CB(skb)->sacked |= TCP_SKB_CB(next_skb)->sacked & TCPCB_EVER_RETRANS;
+ if (TCP_SKB_CB(next_skb)->sacked & TCPCB_SACKED_RETRANS)
+ tp->retrans_out -= tcp_skb_pcount(next_skb);
+ if (TCP_SKB_CB(next_skb)->sacked & TCPCB_LOST)
+ tp->lost_out -= tcp_skb_pcount(next_skb);
+ /* Reno case is special. Sigh... */
+ if (tcp_is_reno(tp) && tp->sacked_out)
+ tcp_dec_pcount_approx(&tp->sacked_out, next_skb);
+
+ tcp_adjust_fackets_out(sk, next_skb, tcp_skb_pcount(next_skb));
+ tp->packets_out -= tcp_skb_pcount(next_skb);
+
+ /* changed transmit queue under us so clear hints */
+ tcp_clear_retrans_hints_partial(tp);
+
+ sk_wmem_free_skb(sk, next_skb);
}
/* Do a simple retransmit without using the backoff mechanisms in
@@ -1786,12 +1782,12 @@ void tcp_simple_retransmit(struct sock *sk)
if (skb == tcp_send_head(sk))
break;
if (skb->len > mss &&
- !(TCP_SKB_CB(skb)->sacked&TCPCB_SACKED_ACKED)) {
- if (TCP_SKB_CB(skb)->sacked&TCPCB_SACKED_RETRANS) {
+ !(TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED)) {
+ if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_RETRANS) {
TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_RETRANS;
tp->retrans_out -= tcp_skb_pcount(skb);
}
- if (!(TCP_SKB_CB(skb)->sacked&TCPCB_LOST)) {
+ if (!(TCP_SKB_CB(skb)->sacked & TCPCB_LOST)) {
TCP_SKB_CB(skb)->sacked |= TCPCB_LOST;
tp->lost_out += tcp_skb_pcount(skb);
lost = 1;
@@ -1856,7 +1852,7 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb)
* case, when window is shrunk to zero. In this case
* our retransmit serves as a zero window probe.
*/
- if (!before(TCP_SKB_CB(skb)->seq, tp->snd_una+tp->snd_wnd)
+ if (!before(TCP_SKB_CB(skb)->seq, tcp_wnd_end(tp))
&& TCP_SKB_CB(skb)->seq != tp->snd_una)
return -EAGAIN;
@@ -1870,8 +1866,10 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb)
(skb->len < (cur_mss >> 1)) &&
(tcp_write_queue_next(sk, skb) != tcp_send_head(sk)) &&
(!tcp_skb_is_last(sk, skb)) &&
- (skb_shinfo(skb)->nr_frags == 0 && skb_shinfo(tcp_write_queue_next(sk, skb))->nr_frags == 0) &&
- (tcp_skb_pcount(skb) == 1 && tcp_skb_pcount(tcp_write_queue_next(sk, skb)) == 1) &&
+ (skb_shinfo(skb)->nr_frags == 0 &&
+ skb_shinfo(tcp_write_queue_next(sk, skb))->nr_frags == 0) &&
+ (tcp_skb_pcount(skb) == 1 &&
+ tcp_skb_pcount(tcp_write_queue_next(sk, skb)) == 1) &&
(sysctl_tcp_retrans_collapse != 0))
tcp_retrans_try_collapse(sk, skb, cur_mss);
@@ -1886,12 +1884,10 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb)
(TCP_SKB_CB(skb)->flags & TCPCB_FLAG_FIN) &&
tp->snd_una == (TCP_SKB_CB(skb)->end_seq - 1)) {
if (!pskb_trim(skb, 0)) {
- TCP_SKB_CB(skb)->seq = TCP_SKB_CB(skb)->end_seq - 1;
- skb_shinfo(skb)->gso_segs = 1;
- skb_shinfo(skb)->gso_size = 0;
- skb_shinfo(skb)->gso_type = 0;
+ /* Reuse, even though it does some unnecessary work */
+ tcp_init_nondata_skb(skb, TCP_SKB_CB(skb)->end_seq - 1,
+ TCP_SKB_CB(skb)->flags);
skb->ip_summed = CHECKSUM_NONE;
- skb->csum = 0;
}
}
@@ -1909,7 +1905,7 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb)
tp->total_retrans++;
#if FASTRETRANS_DEBUG > 0
- if (TCP_SKB_CB(skb)->sacked&TCPCB_SACKED_RETRANS) {
+ if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_RETRANS) {
if (net_ratelimit())
printk(KERN_DEBUG "retrans_out leaked.\n");
}
@@ -1951,7 +1947,7 @@ void tcp_xmit_retransmit_queue(struct sock *sk)
if (tp->retransmit_skb_hint) {
skb = tp->retransmit_skb_hint;
packet_cnt = tp->retransmit_cnt_hint;
- }else{
+ } else {
skb = tcp_write_queue_head(sk);
packet_cnt = 0;
}
@@ -1978,7 +1974,7 @@ void tcp_xmit_retransmit_queue(struct sock *sk)
return;
if (sacked & TCPCB_LOST) {
- if (!(sacked&(TCPCB_SACKED_ACKED|TCPCB_SACKED_RETRANS))) {
+ if (!(sacked & (TCPCB_SACKED_ACKED|TCPCB_SACKED_RETRANS))) {
if (tcp_retransmit_skb(sk, skb)) {
tp->retransmit_skb_hint = NULL;
return;
@@ -2036,7 +2032,7 @@ void tcp_xmit_retransmit_queue(struct sock *sk)
break;
tp->forward_skb_hint = skb;
- if (after(TCP_SKB_CB(skb)->seq, tp->highest_sack))
+ if (!before(TCP_SKB_CB(skb)->seq, tcp_highest_sack_seq(tp)))
break;
if (tcp_packets_in_flight(tp) >= tp->snd_cwnd)
@@ -2060,7 +2056,6 @@ void tcp_xmit_retransmit_queue(struct sock *sk)
}
}
-
/* Send a fin. The caller locks the socket for us. This cannot be
* allowed to fail queueing a FIN frame under any circumstances.
*/
@@ -2091,16 +2086,9 @@ void tcp_send_fin(struct sock *sk)
/* Reserve space for headers and prepare control bits. */
skb_reserve(skb, MAX_TCP_HEADER);
- skb->csum = 0;
- TCP_SKB_CB(skb)->flags = (TCPCB_FLAG_ACK | TCPCB_FLAG_FIN);
- TCP_SKB_CB(skb)->sacked = 0;
- skb_shinfo(skb)->gso_segs = 1;
- skb_shinfo(skb)->gso_size = 0;
- skb_shinfo(skb)->gso_type = 0;
-
/* FIN eats a sequence byte, write_seq advanced by tcp_queue_skb(). */
- TCP_SKB_CB(skb)->seq = tp->write_seq;
- TCP_SKB_CB(skb)->end_seq = TCP_SKB_CB(skb)->seq + 1;
+ tcp_init_nondata_skb(skb, tp->write_seq,
+ TCPCB_FLAG_ACK | TCPCB_FLAG_FIN);
tcp_queue_skb(sk, skb);
}
__tcp_push_pending_frames(sk, mss_now, TCP_NAGLE_OFF);
@@ -2124,16 +2112,9 @@ void tcp_send_active_reset(struct sock *sk, gfp_t priority)
/* Reserve space for headers and prepare control bits. */
skb_reserve(skb, MAX_TCP_HEADER);
- skb->csum = 0;
- TCP_SKB_CB(skb)->flags = (TCPCB_FLAG_ACK | TCPCB_FLAG_RST);
- TCP_SKB_CB(skb)->sacked = 0;
- skb_shinfo(skb)->gso_segs = 1;
- skb_shinfo(skb)->gso_size = 0;
- skb_shinfo(skb)->gso_type = 0;
-
+ tcp_init_nondata_skb(skb, tcp_acceptable_seq(sk),
+ TCPCB_FLAG_ACK | TCPCB_FLAG_RST);
/* Send it off. */
- TCP_SKB_CB(skb)->seq = tcp_acceptable_seq(sk);
- TCP_SKB_CB(skb)->end_seq = TCP_SKB_CB(skb)->seq;
TCP_SKB_CB(skb)->when = tcp_time_stamp;
if (tcp_transmit_skb(sk, skb, 0, priority))
NET_INC_STATS(LINUX_MIB_TCPABORTFAILED);
@@ -2146,14 +2127,14 @@ void tcp_send_active_reset(struct sock *sk, gfp_t priority)
*/
int tcp_send_synack(struct sock *sk)
{
- struct sk_buff* skb;
+ struct sk_buff *skb;
skb = tcp_write_queue_head(sk);
- if (skb == NULL || !(TCP_SKB_CB(skb)->flags&TCPCB_FLAG_SYN)) {
+ if (skb == NULL || !(TCP_SKB_CB(skb)->flags & TCPCB_FLAG_SYN)) {
printk(KERN_DEBUG "tcp_send_synack: wrong queue state\n");
return -EFAULT;
}
- if (!(TCP_SKB_CB(skb)->flags&TCPCB_FLAG_ACK)) {
+ if (!(TCP_SKB_CB(skb)->flags & TCPCB_FLAG_ACK)) {
if (skb_cloned(skb)) {
struct sk_buff *nskb = skb_copy(skb, GFP_ATOMIC);
if (nskb == NULL)
@@ -2161,8 +2142,9 @@ int tcp_send_synack(struct sock *sk)
tcp_unlink_write_queue(skb, sk);
skb_header_release(nskb);
__tcp_add_write_queue_head(sk, nskb);
- sk_stream_free_skb(sk, skb);
- sk_charge_skb(sk, nskb);
+ sk_wmem_free_skb(sk, skb);
+ sk->sk_wmem_queued += nskb->truesize;
+ sk_mem_charge(sk, nskb->truesize);
skb = nskb;
}
@@ -2176,8 +2158,8 @@ int tcp_send_synack(struct sock *sk)
/*
* Prepare a SYN-ACK.
*/
-struct sk_buff * tcp_make_synack(struct sock *sk, struct dst_entry *dst,
- struct request_sock *req)
+struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst,
+ struct request_sock *req)
{
struct inet_request_sock *ireq = inet_rsk(req);
struct tcp_sock *tp = tcp_sk(sk);
@@ -2220,12 +2202,11 @@ struct sk_buff * tcp_make_synack(struct sock *sk, struct dst_entry *dst,
TCP_ECN_make_synack(req, th);
th->source = inet_sk(sk)->sport;
th->dest = ireq->rmt_port;
- TCP_SKB_CB(skb)->seq = tcp_rsk(req)->snt_isn;
- TCP_SKB_CB(skb)->end_seq = TCP_SKB_CB(skb)->seq + 1;
- TCP_SKB_CB(skb)->sacked = 0;
- skb_shinfo(skb)->gso_segs = 1;
- skb_shinfo(skb)->gso_size = 0;
- skb_shinfo(skb)->gso_type = 0;
+ /* Setting of flags are superfluous here for callers (and ECE is
+ * not even correctly set)
+ */
+ tcp_init_nondata_skb(skb, tcp_rsk(req)->snt_isn,
+ TCPCB_FLAG_SYN | TCPCB_FLAG_ACK);
th->seq = htonl(TCP_SKB_CB(skb)->seq);
th->ack_seq = htonl(tcp_rsk(req)->rcv_isn + 1);
if (req->rcv_wnd == 0) { /* ignored for retransmitted syns */
@@ -2257,7 +2238,6 @@ struct sk_buff * tcp_make_synack(struct sock *sk, struct dst_entry *dst,
NULL)
);
- skb->csum = 0;
th->doff = (tcp_header_size >> 2);
TCP_INC_STATS(TCP_MIB_OUTSEGS);
@@ -2349,23 +2329,17 @@ int tcp_connect(struct sock *sk)
/* Reserve space for headers. */
skb_reserve(buff, MAX_TCP_HEADER);
- TCP_SKB_CB(buff)->flags = TCPCB_FLAG_SYN;
- TCP_ECN_send_syn(sk, buff);
- TCP_SKB_CB(buff)->sacked = 0;
- skb_shinfo(buff)->gso_segs = 1;
- skb_shinfo(buff)->gso_size = 0;
- skb_shinfo(buff)->gso_type = 0;
- buff->csum = 0;
tp->snd_nxt = tp->write_seq;
- TCP_SKB_CB(buff)->seq = tp->write_seq++;
- TCP_SKB_CB(buff)->end_seq = tp->write_seq;
+ tcp_init_nondata_skb(buff, tp->write_seq++, TCPCB_FLAG_SYN);
+ TCP_ECN_send_syn(sk, buff);
/* Send it off. */
TCP_SKB_CB(buff)->when = tcp_time_stamp;
tp->retrans_stamp = TCP_SKB_CB(buff)->when;
skb_header_release(buff);
__tcp_add_write_queue_tail(sk, buff);
- sk_charge_skb(sk, buff);
+ sk->sk_wmem_queued += buff->truesize;
+ sk_mem_charge(sk, buff->truesize);
tp->packets_out += tcp_skb_pcount(buff);
tcp_transmit_skb(sk, buff, 1, GFP_KERNEL);
@@ -2394,9 +2368,10 @@ void tcp_send_delayed_ack(struct sock *sk)
if (ato > TCP_DELACK_MIN) {
const struct tcp_sock *tp = tcp_sk(sk);
- int max_ato = HZ/2;
+ int max_ato = HZ / 2;
- if (icsk->icsk_ack.pingpong || (icsk->icsk_ack.pending & ICSK_ACK_PUSHED))
+ if (icsk->icsk_ack.pingpong ||
+ (icsk->icsk_ack.pending & ICSK_ACK_PUSHED))
max_ato = TCP_DELACK_MAX;
/* Slow path, intersegment interval is "high". */
@@ -2406,7 +2381,7 @@ void tcp_send_delayed_ack(struct sock *sk)
* directly.
*/
if (tp->srtt) {
- int rtt = max(tp->srtt>>3, TCP_DELACK_MIN);
+ int rtt = max(tp->srtt >> 3, TCP_DELACK_MIN);
if (rtt < max_ato)
max_ato = rtt;
@@ -2440,37 +2415,32 @@ void tcp_send_delayed_ack(struct sock *sk)
/* This routine sends an ack and also updates the window. */
void tcp_send_ack(struct sock *sk)
{
- /* If we have been reset, we may not send again. */
- if (sk->sk_state != TCP_CLOSE) {
- struct sk_buff *buff;
+ struct sk_buff *buff;
- /* We are not putting this on the write queue, so
- * tcp_transmit_skb() will set the ownership to this
- * sock.
- */
- buff = alloc_skb(MAX_TCP_HEADER, GFP_ATOMIC);
- if (buff == NULL) {
- inet_csk_schedule_ack(sk);
- inet_csk(sk)->icsk_ack.ato = TCP_ATO_MIN;
- inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK,
- TCP_DELACK_MAX, TCP_RTO_MAX);
- return;
- }
+ /* If we have been reset, we may not send again. */
+ if (sk->sk_state == TCP_CLOSE)
+ return;
- /* Reserve space for headers and prepare control bits. */
- skb_reserve(buff, MAX_TCP_HEADER);
- buff->csum = 0;
- TCP_SKB_CB(buff)->flags = TCPCB_FLAG_ACK;
- TCP_SKB_CB(buff)->sacked = 0;
- skb_shinfo(buff)->gso_segs = 1;
- skb_shinfo(buff)->gso_size = 0;
- skb_shinfo(buff)->gso_type = 0;
-
- /* Send it off, this clears delayed acks for us. */
- TCP_SKB_CB(buff)->seq = TCP_SKB_CB(buff)->end_seq = tcp_acceptable_seq(sk);
- TCP_SKB_CB(buff)->when = tcp_time_stamp;
- tcp_transmit_skb(sk, buff, 0, GFP_ATOMIC);
+ /* We are not putting this on the write queue, so
+ * tcp_transmit_skb() will set the ownership to this
+ * sock.
+ */
+ buff = alloc_skb(MAX_TCP_HEADER, GFP_ATOMIC);
+ if (buff == NULL) {
+ inet_csk_schedule_ack(sk);
+ inet_csk(sk)->icsk_ack.ato = TCP_ATO_MIN;
+ inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK,
+ TCP_DELACK_MAX, TCP_RTO_MAX);
+ return;
}
+
+ /* Reserve space for headers and prepare control bits. */
+ skb_reserve(buff, MAX_TCP_HEADER);
+ tcp_init_nondata_skb(buff, tcp_acceptable_seq(sk), TCPCB_FLAG_ACK);
+
+ /* Send it off, this clears delayed acks for us. */
+ TCP_SKB_CB(buff)->when = tcp_time_stamp;
+ tcp_transmit_skb(sk, buff, 0, GFP_ATOMIC);
}
/* This routine sends a packet with an out of date sequence
@@ -2496,66 +2466,57 @@ static int tcp_xmit_probe_skb(struct sock *sk, int urgent)
/* Reserve space for headers and set control bits. */
skb_reserve(skb, MAX_TCP_HEADER);
- skb->csum = 0;
- TCP_SKB_CB(skb)->flags = TCPCB_FLAG_ACK;
- TCP_SKB_CB(skb)->sacked = urgent;
- skb_shinfo(skb)->gso_segs = 1;
- skb_shinfo(skb)->gso_size = 0;
- skb_shinfo(skb)->gso_type = 0;
-
/* Use a previous sequence. This should cause the other
* end to send an ack. Don't queue or clone SKB, just
* send it.
*/
- TCP_SKB_CB(skb)->seq = urgent ? tp->snd_una : tp->snd_una - 1;
- TCP_SKB_CB(skb)->end_seq = TCP_SKB_CB(skb)->seq;
+ tcp_init_nondata_skb(skb, tp->snd_una - !urgent, TCPCB_FLAG_ACK);
TCP_SKB_CB(skb)->when = tcp_time_stamp;
return tcp_transmit_skb(sk, skb, 0, GFP_ATOMIC);
}
int tcp_write_wakeup(struct sock *sk)
{
- if (sk->sk_state != TCP_CLOSE) {
- struct tcp_sock *tp = tcp_sk(sk);
- struct sk_buff *skb;
-
- if ((skb = tcp_send_head(sk)) != NULL &&
- before(TCP_SKB_CB(skb)->seq, tp->snd_una+tp->snd_wnd)) {
- int err;
- unsigned int mss = tcp_current_mss(sk, 0);
- unsigned int seg_size = tp->snd_una+tp->snd_wnd-TCP_SKB_CB(skb)->seq;
-
- if (before(tp->pushed_seq, TCP_SKB_CB(skb)->end_seq))
- tp->pushed_seq = TCP_SKB_CB(skb)->end_seq;
-
- /* We are probing the opening of a window
- * but the window size is != 0
- * must have been a result SWS avoidance ( sender )
- */
- if (seg_size < TCP_SKB_CB(skb)->end_seq - TCP_SKB_CB(skb)->seq ||
- skb->len > mss) {
- seg_size = min(seg_size, mss);
- TCP_SKB_CB(skb)->flags |= TCPCB_FLAG_PSH;
- if (tcp_fragment(sk, skb, seg_size, mss))
- return -1;
- } else if (!tcp_skb_pcount(skb))
- tcp_set_skb_tso_segs(sk, skb, mss);
+ struct tcp_sock *tp = tcp_sk(sk);
+ struct sk_buff *skb;
+
+ if (sk->sk_state == TCP_CLOSE)
+ return -1;
+
+ if ((skb = tcp_send_head(sk)) != NULL &&
+ before(TCP_SKB_CB(skb)->seq, tcp_wnd_end(tp))) {
+ int err;
+ unsigned int mss = tcp_current_mss(sk, 0);
+ unsigned int seg_size = tcp_wnd_end(tp) - TCP_SKB_CB(skb)->seq;
+
+ if (before(tp->pushed_seq, TCP_SKB_CB(skb)->end_seq))
+ tp->pushed_seq = TCP_SKB_CB(skb)->end_seq;
+ /* We are probing the opening of a window
+ * but the window size is != 0
+ * must have been a result SWS avoidance ( sender )
+ */
+ if (seg_size < TCP_SKB_CB(skb)->end_seq - TCP_SKB_CB(skb)->seq ||
+ skb->len > mss) {
+ seg_size = min(seg_size, mss);
TCP_SKB_CB(skb)->flags |= TCPCB_FLAG_PSH;
- TCP_SKB_CB(skb)->when = tcp_time_stamp;
- err = tcp_transmit_skb(sk, skb, 1, GFP_ATOMIC);
- if (!err) {
- update_send_head(sk, skb);
- }
- return err;
- } else {
- if (tp->urg_mode &&
- between(tp->snd_up, tp->snd_una+1, tp->snd_una+0xFFFF))
- tcp_xmit_probe_skb(sk, TCPCB_URG);
- return tcp_xmit_probe_skb(sk, 0);
- }
+ if (tcp_fragment(sk, skb, seg_size, mss))
+ return -1;
+ } else if (!tcp_skb_pcount(skb))
+ tcp_set_skb_tso_segs(sk, skb, mss);
+
+ TCP_SKB_CB(skb)->flags |= TCPCB_FLAG_PSH;
+ TCP_SKB_CB(skb)->when = tcp_time_stamp;
+ err = tcp_transmit_skb(sk, skb, 1, GFP_ATOMIC);
+ if (!err)
+ tcp_event_new_data_sent(sk, skb);
+ return err;
+ } else {
+ if (tp->urg_mode &&
+ between(tp->snd_up, tp->snd_una + 1, tp->snd_una + 0xFFFF))
+ tcp_xmit_probe_skb(sk, 1);
+ return tcp_xmit_probe_skb(sk, 0);
}
- return -1;
}
/* A window probe timeout has occurred. If window is not closed send
@@ -2603,5 +2564,4 @@ EXPORT_SYMBOL(tcp_connect);
EXPORT_SYMBOL(tcp_make_synack);
EXPORT_SYMBOL(tcp_simple_retransmit);
EXPORT_SYMBOL(tcp_sync_mss);
-EXPORT_SYMBOL(sysctl_tcp_tso_win_divisor);
EXPORT_SYMBOL(tcp_mtup_init);