From 55820ee2f8c767a2833b21bd365e5753f50bd8ce Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Tue, 5 Jul 2005 14:08:10 -0700 Subject: [NET]: Fix signedness issues in net/core/filter.c MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This is the code to load packet data into a register: k = fentry->k; if (k < 0) { ... } else { u32 _tmp, *p; p = skb_header_pointer(skb, k, 4, &_tmp); if (p != NULL) { A = ntohl(*p); continue; } } skb_header_pointer checks if the requested data is within the linear area: int hlen = skb_headlen(skb); if (offset + len <= hlen) return skb->data + offset; When offset is within [INT_MAX-len+1..INT_MAX] the addition will result in a negative number which is <= hlen. I couldn't trigger a crash on my AMD64 with 2GB of memory, but a coworker tried on his x86 machine and it crashed immediately. This patch fixes the check in skb_header_pointer to handle large positive offsets similar to skb_copy_bits. Invalid data can still be accessed using negative offsets (also similar to skb_copy_bits), anyone using negative offsets needs to verify them himself. Thanks to Thomas Vögtle for verifying the problem by crashing his machine and providing me with an Oops. Signed-off-by: Patrick McHardy Acked-by: Herbert Xu Signed-off-by: David S. Miller --- include/linux/skbuff.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 416a2e4024b..fbcb1865197 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -1211,7 +1211,7 @@ static inline void *skb_header_pointer(const struct sk_buff *skb, int offset, { int hlen = skb_headlen(skb); - if (offset + len <= hlen) + if (hlen - offset >= len) return skb->data + offset; if (skb_copy_bits(skb, offset, buffer, len) < 0) -- cgit v1.2.3 From e176fe8954a5239c24afe79b1001ba3c29511963 Mon Sep 17 00:00:00 2001 From: Thomas Graf Date: Tue, 5 Jul 2005 14:12:44 -0700 Subject: [NET]: Remove unused security member in sk_buff Signed-off-by: Thomas Graf Signed-off-by: David S. Miller --- include/linux/skbuff.h | 4 +--- include/linux/tc_ematch/tc_em_meta.h | 2 +- 2 files changed, 2 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index fbcb1865197..1e6290f4f81 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -183,7 +183,6 @@ struct skb_shared_info { * @priority: Packet queueing priority * @users: User count - see {datagram,tcp}.c * @protocol: Packet protocol from driver - * @security: Security level of packet * @truesize: Buffer size * @head: Head of buffer * @data: Data head pointer @@ -255,8 +254,7 @@ struct sk_buff { pkt_type, ip_summed; __u32 priority; - unsigned short protocol, - security; + unsigned short protocol; void (*destructor)(struct sk_buff *skb); #ifdef CONFIG_NETFILTER diff --git a/include/linux/tc_ematch/tc_em_meta.h b/include/linux/tc_ematch/tc_em_meta.h index a6b2cc530af..bcb762d9312 100644 --- a/include/linux/tc_ematch/tc_em_meta.h +++ b/include/linux/tc_ematch/tc_em_meta.h @@ -45,7 +45,7 @@ enum TCF_META_ID_REALDEV, TCF_META_ID_PRIORITY, TCF_META_ID_PROTOCOL, - TCF_META_ID_SECURITY, + TCF_META_ID_SECURITY, /* obsolete */ TCF_META_ID_PKTTYPE, TCF_META_ID_PKTLEN, TCF_META_ID_DATALEN, -- cgit v1.2.3 From 1cbb3380ef683f742876f48e3739b3df4ea9e168 Mon Sep 17 00:00:00 2001 From: Thomas Graf Date: Tue, 5 Jul 2005 14:13:41 -0700 Subject: [NET]: Reduce size of sk_buff by 4 bytes Reduce local_df to a bit field and ip_summed to a 2 bits field thus saving 13 bits. Move bit fields, packet type, and protocol into the spare area between the priority and the destructor. Saves 4 bytes on both, 32bit and 64bit architectures. Signed-off-by: Thomas Graf Signed-off-by: David S. Miller --- include/linux/skbuff.h | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 1e6290f4f81..14b95041349 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -248,17 +248,18 @@ struct sk_buff { data_len, mac_len, csum; - unsigned char local_df, - cloned:1, - nohdr:1, - pkt_type, - ip_summed; __u32 priority; - unsigned short protocol; + __u8 local_df:1, + cloned:1, + ip_summed:2, + nohdr:1; + /* 3 bits spare */ + __u8 pkt_type; + __u16 protocol; void (*destructor)(struct sk_buff *skb); #ifdef CONFIG_NETFILTER - unsigned long nfmark; + unsigned long nfmark; __u32 nfcache; __u32 nfctinfo; struct nf_conntrack *nfct; -- cgit v1.2.3 From e41a33e6ec20a0a6ac762629149e36cab5d4213f Mon Sep 17 00:00:00 2001 From: Thomas Graf Date: Tue, 5 Jul 2005 14:14:30 -0700 Subject: [PKT_SCHED]: Move sch_generic.c prototypes to correct header file Signed-off-by: Thomas Graf Signed-off-by: David S. Miller --- include/net/pkt_sched.h | 10 ---------- include/net/sch_generic.h | 12 ++++++++++++ 2 files changed, 12 insertions(+), 10 deletions(-) (limited to 'include') diff --git a/include/net/pkt_sched.h b/include/net/pkt_sched.h index fcb05a387db..2f494a20d51 100644 --- a/include/net/pkt_sched.h +++ b/include/net/pkt_sched.h @@ -207,8 +207,6 @@ psched_tod_diff(int delta_sec, int bound) #endif /* !CONFIG_NET_SCH_CLK_GETTIMEOFDAY */ -extern struct Qdisc noop_qdisc; -extern struct Qdisc_ops noop_qdisc_ops; extern struct Qdisc_ops pfifo_qdisc_ops; extern struct Qdisc_ops bfifo_qdisc_ops; @@ -216,14 +214,6 @@ extern int register_qdisc(struct Qdisc_ops *qops); extern int unregister_qdisc(struct Qdisc_ops *qops); extern struct Qdisc *qdisc_lookup(struct net_device *dev, u32 handle); extern struct Qdisc *qdisc_lookup_class(struct net_device *dev, u32 handle); -extern void dev_init_scheduler(struct net_device *dev); -extern void dev_shutdown(struct net_device *dev); -extern void dev_activate(struct net_device *dev); -extern void dev_deactivate(struct net_device *dev); -extern void qdisc_reset(struct Qdisc *qdisc); -extern void qdisc_destroy(struct Qdisc *qdisc); -extern struct Qdisc * qdisc_create_dflt(struct net_device *dev, - struct Qdisc_ops *ops); extern struct qdisc_rate_table *qdisc_get_rtab(struct tc_ratespec *r, struct rtattr *tab); extern void qdisc_put_rtab(struct qdisc_rate_table *tab); diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index 7b97405e2db..c76d34e62ff 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -164,6 +164,18 @@ extern void qdisc_unlock_tree(struct net_device *dev); #define tcf_tree_lock(tp) qdisc_lock_tree((tp)->q->dev) #define tcf_tree_unlock(tp) qdisc_unlock_tree((tp)->q->dev) +extern struct Qdisc noop_qdisc; +extern struct Qdisc_ops noop_qdisc_ops; + +extern void dev_init_scheduler(struct net_device *dev); +extern void dev_shutdown(struct net_device *dev); +extern void dev_activate(struct net_device *dev); +extern void dev_deactivate(struct net_device *dev); +extern void qdisc_reset(struct Qdisc *qdisc); +extern void qdisc_destroy(struct Qdisc *qdisc); +extern struct Qdisc *qdisc_create_dflt(struct net_device *dev, + struct Qdisc_ops *ops); + static inline void tcf_destroy(struct tcf_proto *tp) { -- cgit v1.2.3 From 3d54b82fdf0ca79608f61448fb8ab92676487645 Mon Sep 17 00:00:00 2001 From: Thomas Graf Date: Tue, 5 Jul 2005 14:15:09 -0700 Subject: [PKT_SCHED]: Cleanup qdisc creation and alignment macros Adds qdisc_alloc() to share code between qdisc_create() and qdisc_create_dflt(). Hides the qdisc alignment behind macros and makes use of them. Signed-off-by: Thomas Graf Signed-off-by: David S. Miller --- include/net/pkt_sched.h | 7 +++---- include/net/sch_generic.h | 1 + 2 files changed, 4 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/net/pkt_sched.h b/include/net/pkt_sched.h index 2f494a20d51..6492e7363d8 100644 --- a/include/net/pkt_sched.h +++ b/include/net/pkt_sched.h @@ -13,13 +13,12 @@ struct qdisc_walker extern rwlock_t qdisc_tree_lock; -#define QDISC_ALIGN 32 -#define QDISC_ALIGN_CONST (QDISC_ALIGN - 1) +#define QDISC_ALIGNTO 32 +#define QDISC_ALIGN(len) (((len) + QDISC_ALIGNTO-1) & ~(QDISC_ALIGNTO-1)) static inline void *qdisc_priv(struct Qdisc *q) { - return (char *)q + ((sizeof(struct Qdisc) + QDISC_ALIGN_CONST) - & ~QDISC_ALIGN_CONST); + return (char *) q + QDISC_ALIGN(sizeof(struct Qdisc)); } /* diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index c76d34e62ff..7b6ec998671 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -173,6 +173,7 @@ extern void dev_activate(struct net_device *dev); extern void dev_deactivate(struct net_device *dev); extern void qdisc_reset(struct Qdisc *qdisc); extern void qdisc_destroy(struct Qdisc *qdisc); +extern struct Qdisc *qdisc_alloc(struct net_device *dev, struct Qdisc_ops *ops); extern struct Qdisc *qdisc_create_dflt(struct net_device *dev, struct Qdisc_ops *ops); -- cgit v1.2.3 From bc971dee6ece1fd0d431948924becd9c50e7b778 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 5 Jul 2005 15:03:46 -0700 Subject: [SHAPER]: Switch to spinlocks. Dave, you were right and the sleeping locks in shaper were broken. Markus Kanet noticed this and also tested the patch below that switches locking to spinlocks. Signed-off-by: Christoph Hellwig Signed-off-by: David S. Miller --- include/linux/if_shaper.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/if_shaper.h b/include/linux/if_shaper.h index 004e6f09a6e..68c896a36a3 100644 --- a/include/linux/if_shaper.h +++ b/include/linux/if_shaper.h @@ -23,7 +23,7 @@ struct shaper __u32 shapeclock; unsigned long recovery; /* Time we can next clock a packet out on an empty queue */ - struct semaphore sem; + spinlock_t lock; struct net_device_stats stats; struct net_device *dev; int (*hard_start_xmit) (struct sk_buff *skb, -- cgit v1.2.3 From b8259d9ad1d0f8d0c5ea0e37bb15080b0bd395b5 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Tue, 5 Jul 2005 15:12:04 -0700 Subject: [NET]: Remove __ARGS from include/net/slhc_vj.h I suspect "#define __ARGS(x) ()" was deprecated before I was born. Signed-off-by: Alexey Dobriyan Signed-off-by: Domen Puncer Signed-off-by: David S. Miller --- include/net/slhc_vj.h | 21 ++++++++------------- 1 file changed, 8 insertions(+), 13 deletions(-) (limited to 'include') diff --git a/include/net/slhc_vj.h b/include/net/slhc_vj.h index 0b2c2784f33..8716d5942b6 100644 --- a/include/net/slhc_vj.h +++ b/include/net/slhc_vj.h @@ -170,19 +170,14 @@ struct slcompress { }; #define NULLSLCOMPR (struct slcompress *)0 -#define __ARGS(x) x - /* In slhc.c: */ -struct slcompress *slhc_init __ARGS((int rslots, int tslots)); -void slhc_free __ARGS((struct slcompress *comp)); - -int slhc_compress __ARGS((struct slcompress *comp, unsigned char *icp, - int isize, unsigned char *ocp, unsigned char **cpp, - int compress_cid)); -int slhc_uncompress __ARGS((struct slcompress *comp, unsigned char *icp, - int isize)); -int slhc_remember __ARGS((struct slcompress *comp, unsigned char *icp, - int isize)); -int slhc_toss __ARGS((struct slcompress *comp)); +struct slcompress *slhc_init(int rslots, int tslots); +void slhc_free(struct slcompress *comp); + +int slhc_compress(struct slcompress *comp, unsigned char *icp, int isize, + unsigned char *ocp, unsigned char **cpp, int compress_cid); +int slhc_uncompress(struct slcompress *comp, unsigned char *icp, int isize); +int slhc_remember(struct slcompress *comp, unsigned char *icp, int isize); +int slhc_toss(struct slcompress *comp); #endif /* _SLHC_H */ -- cgit v1.2.3 From c65f7f00c587828e3d50737805a78f74804972de Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Tue, 5 Jul 2005 15:17:25 -0700 Subject: [TCP]: Simplify SKB data portion allocation with NETIF_F_SG. The ideal and most optimal layout for an SKB when doing scatter-gather is to put all the headers at skb->data, and all the user data in the page array. This makes SKB splitting and combining extremely simple, especially before a packet goes onto the wire the first time. So, when sk_stream_alloc_pskb() is given a zero size, make sure there is no skb_tailroom(). This is achieved by applying SKB_DATA_ALIGN() to the header length used here. Next, make select_size() in TCP output segmentation use a length of zero when NETIF_F_SG is true on the outgoing interface. Signed-off-by: David S. Miller --- include/net/sock.h | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/net/sock.h b/include/net/sock.h index e593af5b1ec..7b76f891ae2 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -1134,13 +1134,16 @@ static inline void sk_stream_moderate_sndbuf(struct sock *sk) static inline struct sk_buff *sk_stream_alloc_pskb(struct sock *sk, int size, int mem, int gfp) { - struct sk_buff *skb = alloc_skb(size + sk->sk_prot->max_header, gfp); + struct sk_buff *skb; + int hdr_len; + hdr_len = SKB_DATA_ALIGN(sk->sk_prot->max_header); + skb = alloc_skb(size + hdr_len, gfp); if (skb) { skb->truesize += mem; if (sk->sk_forward_alloc >= (int)skb->truesize || sk_stream_mem_schedule(sk, skb->truesize, 0)) { - skb_reserve(skb, sk->sk_prot->max_header); + skb_reserve(skb, hdr_len); return skb; } __kfree_skb(skb); -- cgit v1.2.3 From fc6415bcb0f58f03adb910e56d7e1df6368794e0 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Tue, 5 Jul 2005 15:17:45 -0700 Subject: [TCP]: Fix quick-ack decrementing with TSO. On each packet output, we call tcp_dec_quickack_mode() if the ACK flag is set. It drops tp->ack.quick until it hits zero, at which time we deflate the ATO value. When doing TSO, we are emitting multiple packets with ACK set, so we should decrement tp->ack.quick that many segments. Note that, unlike this case, tcp_enter_cwr() should not take the tcp_skb_pcount(skb) into consideration. That function, one time, readjusts tp->snd_cwnd and moves into TCP_CA_CWR state. Signed-off-by: David S. Miller --- include/net/tcp.h | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/net/tcp.h b/include/net/tcp.h index ec9e20c2717..afe41c5de2f 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -721,11 +721,16 @@ static inline int tcp_ack_scheduled(struct tcp_sock *tp) return tp->ack.pending&TCP_ACK_SCHED; } -static __inline__ void tcp_dec_quickack_mode(struct tcp_sock *tp) +static __inline__ void tcp_dec_quickack_mode(struct tcp_sock *tp, unsigned int pkts) { - if (tp->ack.quick && --tp->ack.quick == 0) { - /* Leaving quickack mode we deflate ATO. */ - tp->ack.ato = TCP_ATO_MIN; + if (tp->ack.quick) { + if (pkts >= tp->ack.quick) { + tp->ack.quick = 0; + + /* Leaving quickack mode we deflate ATO. */ + tp->ack.ato = TCP_ATO_MIN; + } else + tp->ack.quick -= pkts; } } -- cgit v1.2.3 From f6302d1d78f77c2d4c8bd32b0afc2df7fdf5f281 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Tue, 5 Jul 2005 15:18:03 -0700 Subject: [TCP]: Move send test logic out of net/tcp.h This just moves the code into tcp_output.c, no code logic changes are made by this patch. Using this as a baseline, we can begin to untangle the mess of comparisons for the Nagle test et al. We will also be able to reduce all of the redundant computation that occurs when outputting data packets. Signed-off-by: David S. Miller --- include/net/tcp.h | 113 ++---------------------------------------------------- 1 file changed, 3 insertions(+), 110 deletions(-) (limited to 'include') diff --git a/include/net/tcp.h b/include/net/tcp.h index afe41c5de2f..f2b104532de 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -849,6 +849,9 @@ extern __u32 cookie_v4_init_sequence(struct sock *sk, struct sk_buff *skb, /* tcp_output.c */ extern int tcp_write_xmit(struct sock *, int nonagle); +extern void __tcp_push_pending_frames(struct sock *sk, struct tcp_sock *tp, + unsigned cur_mss, int nonagle); +extern int tcp_may_send_now(struct sock *sk, struct tcp_sock *tp); extern int tcp_retransmit_skb(struct sock *, struct sk_buff *); extern void tcp_xmit_retransmit_queue(struct sock *); extern void tcp_simple_retransmit(struct sock *); @@ -1284,12 +1287,6 @@ static __inline__ __u32 tcp_max_burst(const struct tcp_sock *tp) return 3; } -static __inline__ int tcp_minshall_check(const struct tcp_sock *tp) -{ - return after(tp->snd_sml,tp->snd_una) && - !after(tp->snd_sml, tp->snd_nxt); -} - static __inline__ void tcp_minshall_update(struct tcp_sock *tp, int mss, const struct sk_buff *skb) { @@ -1297,122 +1294,18 @@ static __inline__ void tcp_minshall_update(struct tcp_sock *tp, int mss, tp->snd_sml = TCP_SKB_CB(skb)->end_seq; } -/* Return 0, if packet can be sent now without violation Nagle's rules: - 1. It is full sized. - 2. Or it contains FIN. - 3. Or TCP_NODELAY was set. - 4. Or TCP_CORK is not set, and all sent packets are ACKed. - With Minshall's modification: all sent small packets are ACKed. - */ - -static __inline__ int -tcp_nagle_check(const struct tcp_sock *tp, const struct sk_buff *skb, - unsigned mss_now, int nonagle) -{ - return (skb->len < mss_now && - !(TCP_SKB_CB(skb)->flags & TCPCB_FLAG_FIN) && - ((nonagle&TCP_NAGLE_CORK) || - (!nonagle && - tp->packets_out && - tcp_minshall_check(tp)))); -} - -extern void tcp_set_skb_tso_segs(struct sock *, struct sk_buff *); - -/* This checks if the data bearing packet SKB (usually sk->sk_send_head) - * should be put on the wire right now. - */ -static __inline__ int tcp_snd_test(struct sock *sk, - struct sk_buff *skb, - unsigned cur_mss, int nonagle) -{ - struct tcp_sock *tp = tcp_sk(sk); - int pkts = tcp_skb_pcount(skb); - - if (!pkts) { - tcp_set_skb_tso_segs(sk, skb); - pkts = tcp_skb_pcount(skb); - } - - /* RFC 1122 - section 4.2.3.4 - * - * We must queue if - * - * a) The right edge of this frame exceeds the window - * b) There are packets in flight and we have a small segment - * [SWS avoidance and Nagle algorithm] - * (part of SWS is done on packetization) - * Minshall version sounds: there are no _small_ - * segments in flight. (tcp_nagle_check) - * c) We have too many packets 'in flight' - * - * Don't use the nagle rule for urgent data (or - * for the final FIN -DaveM). - * - * Also, Nagle rule does not apply to frames, which - * sit in the middle of queue (they have no chances - * to get new data) and if room at tail of skb is - * not enough to save something seriously (<32 for now). - */ - - /* Don't be strict about the congestion window for the - * final FIN frame. -DaveM - */ - return (((nonagle&TCP_NAGLE_PUSH) || tp->urg_mode - || !tcp_nagle_check(tp, skb, cur_mss, nonagle)) && - (((tcp_packets_in_flight(tp) + (pkts-1)) < tp->snd_cwnd) || - (TCP_SKB_CB(skb)->flags & TCPCB_FLAG_FIN)) && - !after(TCP_SKB_CB(skb)->end_seq, tp->snd_una + tp->snd_wnd)); -} - static __inline__ void tcp_check_probe_timer(struct sock *sk, struct tcp_sock *tp) { if (!tp->packets_out && !tp->pending) tcp_reset_xmit_timer(sk, TCP_TIME_PROBE0, tp->rto); } -static __inline__ int tcp_skb_is_last(const struct sock *sk, - const struct sk_buff *skb) -{ - return skb->next == (struct sk_buff *)&sk->sk_write_queue; -} - -/* Push out any pending frames which were held back due to - * TCP_CORK or attempt at coalescing tiny packets. - * The socket must be locked by the caller. - */ -static __inline__ void __tcp_push_pending_frames(struct sock *sk, - struct tcp_sock *tp, - unsigned cur_mss, - int nonagle) -{ - struct sk_buff *skb = sk->sk_send_head; - - if (skb) { - if (!tcp_skb_is_last(sk, skb)) - nonagle = TCP_NAGLE_PUSH; - if (!tcp_snd_test(sk, skb, cur_mss, nonagle) || - tcp_write_xmit(sk, nonagle)) - tcp_check_probe_timer(sk, tp); - } - tcp_cwnd_validate(sk, tp); -} - static __inline__ void tcp_push_pending_frames(struct sock *sk, struct tcp_sock *tp) { __tcp_push_pending_frames(sk, tp, tcp_current_mss(sk, 1), tp->nonagle); } -static __inline__ int tcp_may_send_now(struct sock *sk, struct tcp_sock *tp) -{ - struct sk_buff *skb = sk->sk_send_head; - - return (skb && - tcp_snd_test(sk, skb, tcp_current_mss(sk, 1), - tcp_skb_is_last(sk, skb) ? TCP_NAGLE_PUSH : tp->nonagle)); -} - static __inline__ void tcp_init_wl(struct tcp_sock *tp, u32 ack, u32 seq) { tp->snd_wl1 = seq; -- cgit v1.2.3 From 84d3e7b9573291a1ea845bdd51b74bb484597661 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Tue, 5 Jul 2005 15:18:18 -0700 Subject: [TCP]: Move __tcp_data_snd_check into tcp_output.c It reimplements portions of tcp_snd_check(), so it we move it to tcp_output.c we can consolidate it's logic much easier in a later change. Signed-off-by: David S. Miller --- include/net/tcp.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/net/tcp.h b/include/net/tcp.h index f2b104532de..4888f9d3f56 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -849,6 +849,7 @@ extern __u32 cookie_v4_init_sequence(struct sock *sk, struct sk_buff *skb, /* tcp_output.c */ extern int tcp_write_xmit(struct sock *, int nonagle); +extern void __tcp_data_snd_check(struct sock *sk, struct sk_buff *skb); extern void __tcp_push_pending_frames(struct sock *sk, struct tcp_sock *tp, unsigned cur_mss, int nonagle); extern int tcp_may_send_now(struct sock *sk, struct tcp_sock *tp); -- cgit v1.2.3 From a762a9800752f05fa8768bb0ac35d0e7f1bcfe7f Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Tue, 5 Jul 2005 15:18:51 -0700 Subject: [TCP]: Kill extra cwnd validate in __tcp_push_pending_frames(). The tcp_cwnd_validate() function should only be invoked if we actually send some frames, yet __tcp_push_pending_frames() will always invoke it. tcp_write_xmit() does the call for us, so the call here can simply be removed. Also, tcp_write_xmit() can be marked static. Signed-off-by: David S. Miller --- include/net/tcp.h | 26 +++----------------------- 1 file changed, 3 insertions(+), 23 deletions(-) (limited to 'include') diff --git a/include/net/tcp.h b/include/net/tcp.h index 4888f9d3f56..f32e7aed2c7 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -848,7 +848,6 @@ extern __u32 cookie_v4_init_sequence(struct sock *sk, struct sk_buff *skb, /* tcp_output.c */ -extern int tcp_write_xmit(struct sock *, int nonagle); extern void __tcp_data_snd_check(struct sock *sk, struct sk_buff *skb); extern void __tcp_push_pending_frames(struct sock *sk, struct tcp_sock *tp, unsigned cur_mss, int nonagle); @@ -868,6 +867,9 @@ extern void tcp_push_one(struct sock *, unsigned mss_now); extern void tcp_send_ack(struct sock *sk); extern void tcp_send_delayed_ack(struct sock *sk); +/* tcp_input.c */ +extern void tcp_cwnd_application_limited(struct sock *sk); + /* tcp_timer.c */ extern void tcp_init_xmit_timers(struct sock *); extern void tcp_clear_xmit_timers(struct sock *); @@ -1234,28 +1236,6 @@ static inline void tcp_sync_left_out(struct tcp_sock *tp) tp->left_out = tp->sacked_out + tp->lost_out; } -extern void tcp_cwnd_application_limited(struct sock *sk); - -/* Congestion window validation. (RFC2861) */ - -static inline void tcp_cwnd_validate(struct sock *sk, struct tcp_sock *tp) -{ - __u32 packets_out = tp->packets_out; - - if (packets_out >= tp->snd_cwnd) { - /* Network is feed fully. */ - tp->snd_cwnd_used = 0; - tp->snd_cwnd_stamp = tcp_time_stamp; - } else { - /* Network starves. */ - if (tp->packets_out > tp->snd_cwnd_used) - tp->snd_cwnd_used = tp->packets_out; - - if ((s32)(tcp_time_stamp - tp->snd_cwnd_stamp) >= tp->rto) - tcp_cwnd_application_limited(sk); - } -} - /* Set slow start threshould and cwnd not falling to slow start */ static inline void __tcp_enter_cwr(struct tcp_sock *tp) { -- cgit v1.2.3 From a2e2a59c93cc8ba39caa9011c2573f429e40ccd9 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Tue, 5 Jul 2005 15:19:23 -0700 Subject: [TCP]: Fix redundant calculations of tcp_current_mss() tcp_write_xmit() uses tcp_current_mss(), but some of it's callers, namely __tcp_push_pending_frames(), already has this value available already. While we're here, fix the "cur_mss" argument to be "unsigned int" instead of plain "unsigned". Signed-off-by: David S. Miller --- include/net/tcp.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/tcp.h b/include/net/tcp.h index f32e7aed2c7..9416236cc39 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -850,7 +850,7 @@ extern __u32 cookie_v4_init_sequence(struct sock *sk, struct sk_buff *skb, extern void __tcp_data_snd_check(struct sock *sk, struct sk_buff *skb); extern void __tcp_push_pending_frames(struct sock *sk, struct tcp_sock *tp, - unsigned cur_mss, int nonagle); + unsigned int cur_mss, int nonagle); extern int tcp_may_send_now(struct sock *sk, struct tcp_sock *tp); extern int tcp_retransmit_skb(struct sock *, struct sk_buff *); extern void tcp_xmit_retransmit_queue(struct sock *); -- cgit v1.2.3 From 55c97f3e990c1ff63957c64f6cb10711a09fd70e Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Tue, 5 Jul 2005 15:19:38 -0700 Subject: [TCP]: Fix __tcp_push_pending_frames() 'nonagle' handling. 'nonagle' should be passed to the tcp_snd_test() function as 'TCP_NAGLE_PUSH' if we are checking an SKB not at the tail of the write_queue. This is because Nagle does not apply to such frames since we cannot possibly tack more data onto them. However, while doing this __tcp_push_pending_frames() makes all of the packets in the write_queue use this modified 'nonagle' value. Fix the bug and simplify this function by just calling tcp_write_xmit() directly if sk_send_head is non-NULL. As a result, we can now make tcp_data_snd_check() just call tcp_push_pending_frames() instead of the specialized __tcp_data_snd_check(). Signed-off-by: David S. Miller --- include/net/tcp.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/net/tcp.h b/include/net/tcp.h index 9416236cc39..b19238027da 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -848,7 +848,6 @@ extern __u32 cookie_v4_init_sequence(struct sock *sk, struct sk_buff *skb, /* tcp_output.c */ -extern void __tcp_data_snd_check(struct sock *sk, struct sk_buff *skb); extern void __tcp_push_pending_frames(struct sock *sk, struct tcp_sock *tp, unsigned int cur_mss, int nonagle); extern int tcp_may_send_now(struct sock *sk, struct tcp_sock *tp); -- cgit v1.2.3 From c1b4a7e69576d65efc31a8cea0714173c2841244 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Tue, 5 Jul 2005 15:24:38 -0700 Subject: [TCP]: Move to new TSO segmenting scheme. Make TSO segment transmit size decisions at send time not earlier. The basic scheme is that we try to build as large a TSO frame as possible when pulling in the user data, but the size of the TSO frame output to the card is determined at transmit time. This is guided by tp->xmit_size_goal. It is always set to a multiple of MSS and tells sendmsg/sendpage how large an SKB to try and build. Later, tcp_write_xmit() and tcp_push_one() chop up the packet if necessary and conditions warrant. These routines can also decide to "defer" in order to wait for more ACKs to arrive and thus allow larger TSO frames to be emitted. A general observation is that TSO elongates the pipe, thus requiring a larger congestion window and larger buffering especially at the sender side. Therefore, it is important that applications 1) get a large enough socket send buffer (this is accomplished by our dynamic send buffer expansion code) 2) do large enough writes. Signed-off-by: David S. Miller --- include/linux/tcp.h | 2 +- include/net/tcp.h | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/tcp.h b/include/linux/tcp.h index dfd93d03f5d..e4fd82e4210 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -286,7 +286,7 @@ struct tcp_sock { __u32 max_window; /* Maximal window ever seen from peer */ __u32 pmtu_cookie; /* Last pmtu seen by socket */ __u32 mss_cache; /* Cached effective mss, not including SACKS */ - __u16 mss_cache_std; /* Like mss_cache, but without TSO */ + __u16 xmit_size_goal; /* Goal for segmenting output packets */ __u16 ext_header_len; /* Network protocol overhead (IP/IPv6 options) */ __u8 ca_state; /* State of fast-retransmit machine */ __u8 retransmits; /* Number of unrecovered RTO timeouts. */ diff --git a/include/net/tcp.h b/include/net/tcp.h index b19238027da..a166918ca56 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -862,7 +862,7 @@ extern int tcp_write_wakeup(struct sock *); extern void tcp_send_fin(struct sock *sk); extern void tcp_send_active_reset(struct sock *sk, int priority); extern int tcp_send_synack(struct sock *); -extern void tcp_push_one(struct sock *, unsigned mss_now); +extern void tcp_push_one(struct sock *, unsigned int mss_now); extern void tcp_send_ack(struct sock *sk); extern void tcp_send_delayed_ack(struct sock *sk); @@ -968,7 +968,7 @@ static inline void tcp_reset_xmit_timer(struct sock *sk, int what, unsigned long static inline void tcp_initialize_rcv_mss(struct sock *sk) { struct tcp_sock *tp = tcp_sk(sk); - unsigned int hint = min(tp->advmss, tp->mss_cache_std); + unsigned int hint = min_t(unsigned int, tp->advmss, tp->mss_cache); hint = min(hint, tp->rcv_wnd/2); hint = min(hint, TCP_MIN_RCVMSS); -- cgit v1.2.3