diff options
Diffstat (limited to 'net/ipv4')
-rw-r--r-- | net/ipv4/af_inet.c | 1 | ||||
-rw-r--r-- | net/ipv4/ip_input.c | 10 | ||||
-rw-r--r-- | net/ipv4/ipmr.c | 9 | ||||
-rw-r--r-- | net/ipv4/netfilter/nf_nat_rule.c | 2 | ||||
-rw-r--r-- | net/ipv4/proc.c | 58 | ||||
-rw-r--r-- | net/ipv4/tcp.c | 3 | ||||
-rw-r--r-- | net/ipv4/tcp_htcp.c | 14 | ||||
-rw-r--r-- | net/ipv4/tcp_output.c | 15 | ||||
-rw-r--r-- | net/ipv4/tcp_vegas.c | 82 | ||||
-rw-r--r-- | net/ipv4/udp.c | 1 | ||||
-rw-r--r-- | net/ipv4/xfrm4_state.c | 1 |
11 files changed, 79 insertions, 117 deletions
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 1fbff5fa424..1aa2dc9e380 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -1117,6 +1117,7 @@ int inet_sk_rebuild_header(struct sock *sk) }, }, .proto = sk->sk_protocol, + .flags = inet_sk_flowi_flags(sk), .uli_u = { .ports = { .sport = inet->sport, diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c index 861978a4f1a..cfb38ac9d69 100644 --- a/net/ipv4/ip_input.c +++ b/net/ipv4/ip_input.c @@ -209,9 +209,17 @@ static int ip_local_deliver_finish(struct sk_buff *skb) hash = protocol & (MAX_INET_PROTOS - 1); ipprot = rcu_dereference(inet_protos[hash]); - if (ipprot != NULL && (net == &init_net || ipprot->netns_ok)) { + if (ipprot != NULL) { int ret; + if (!net_eq(net, &init_net) && !ipprot->netns_ok) { + if (net_ratelimit()) + printk("%s: proto %d isn't netns-ready\n", + __func__, protocol); + kfree_skb(skb); + goto out; + } + if (!ipprot->no_policy) { if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) { kfree_skb(skb); diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index b42e082cc17..25924b1eb2e 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -1945,13 +1945,14 @@ int __init ip_mr_init(void) goto proc_cache_fail; #endif return 0; -reg_notif_fail: - kmem_cache_destroy(mrt_cachep); #ifdef CONFIG_PROC_FS -proc_vif_fail: - unregister_netdevice_notifier(&ip_mr_notifier); proc_cache_fail: proc_net_remove(&init_net, "ip_mr_vif"); +proc_vif_fail: + unregister_netdevice_notifier(&ip_mr_notifier); #endif +reg_notif_fail: + del_timer(&ipmr_expire_timer); + kmem_cache_destroy(mrt_cachep); return err; } diff --git a/net/ipv4/netfilter/nf_nat_rule.c b/net/ipv4/netfilter/nf_nat_rule.c index bea54a68510..8d489e746b2 100644 --- a/net/ipv4/netfilter/nf_nat_rule.c +++ b/net/ipv4/netfilter/nf_nat_rule.c @@ -61,7 +61,7 @@ static struct static struct xt_table nat_table = { .name = "nat", .valid_hooks = NAT_VALID_HOOKS, - .lock = __RW_LOCK_UNLOCKED(__nat_table.lock), + .lock = __RW_LOCK_UNLOCKED(nat_table.lock), .me = THIS_MODULE, .af = AF_INET, }; diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c index 8f5a403f6f6..a631a1f110c 100644 --- a/net/ipv4/proc.c +++ b/net/ipv4/proc.c @@ -237,43 +237,45 @@ static const struct snmp_mib snmp4_net_list[] = { SNMP_MIB_SENTINEL }; +static void icmpmsg_put_line(struct seq_file *seq, unsigned long *vals, + unsigned short *type, int count) +{ + int j; + + if (count) { + seq_printf(seq, "\nIcmpMsg:"); + for (j = 0; j < count; ++j) + seq_printf(seq, " %sType%u", + type[j] & 0x100 ? "Out" : "In", + type[j] & 0xff); + seq_printf(seq, "\nIcmpMsg:"); + for (j = 0; j < count; ++j) + seq_printf(seq, " %lu", vals[j]); + } +} + static void icmpmsg_put(struct seq_file *seq) { #define PERLINE 16 - int j, i, count; - static int out[PERLINE]; + int i, count; + unsigned short type[PERLINE]; + unsigned long vals[PERLINE], val; struct net *net = seq->private; count = 0; for (i = 0; i < ICMPMSG_MIB_MAX; i++) { - - if (snmp_fold_field((void **) net->mib.icmpmsg_statistics, i)) - out[count++] = i; - if (count < PERLINE) - continue; - - seq_printf(seq, "\nIcmpMsg:"); - for (j = 0; j < PERLINE; ++j) - seq_printf(seq, " %sType%u", i & 0x100 ? "Out" : "In", - i & 0xff); - seq_printf(seq, "\nIcmpMsg: "); - for (j = 0; j < PERLINE; ++j) - seq_printf(seq, " %lu", - snmp_fold_field((void **) net->mib.icmpmsg_statistics, - out[j])); - seq_putc(seq, '\n'); - } - if (count) { - seq_printf(seq, "\nIcmpMsg:"); - for (j = 0; j < count; ++j) - seq_printf(seq, " %sType%u", out[j] & 0x100 ? "Out" : - "In", out[j] & 0xff); - seq_printf(seq, "\nIcmpMsg:"); - for (j = 0; j < count; ++j) - seq_printf(seq, " %lu", snmp_fold_field((void **) - net->mib.icmpmsg_statistics, out[j])); + val = snmp_fold_field((void **) net->mib.icmpmsg_statistics, i); + if (val) { + type[count] = i; + vals[count++] = val; + } + if (count == PERLINE) { + icmpmsg_put_line(seq, vals, type, count); + count = 0; + } } + icmpmsg_put_line(seq, vals, type, count); #undef PERLINE } diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index eccb7165a80..c5aca0bb116 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -1374,8 +1374,7 @@ int tcp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, sk->sk_state == TCP_CLOSE || (sk->sk_shutdown & RCV_SHUTDOWN) || !timeo || - signal_pending(current) || - (flags & MSG_PEEK)) + signal_pending(current)) break; } else { if (sock_flag(sk, SOCK_DONE)) diff --git a/net/ipv4/tcp_htcp.c b/net/ipv4/tcp_htcp.c index af99776146f..937549b8a92 100644 --- a/net/ipv4/tcp_htcp.c +++ b/net/ipv4/tcp_htcp.c @@ -69,9 +69,12 @@ static u32 htcp_cwnd_undo(struct sock *sk) const struct tcp_sock *tp = tcp_sk(sk); struct htcp *ca = inet_csk_ca(sk); - ca->last_cong = ca->undo_last_cong; - ca->maxRTT = ca->undo_maxRTT; - ca->old_maxB = ca->undo_old_maxB; + if (ca->undo_last_cong) { + ca->last_cong = ca->undo_last_cong; + ca->maxRTT = ca->undo_maxRTT; + ca->old_maxB = ca->undo_old_maxB; + ca->undo_last_cong = 0; + } return max(tp->snd_cwnd, (tp->snd_ssthresh << 7) / ca->beta); } @@ -268,7 +271,10 @@ static void htcp_state(struct sock *sk, u8 new_state) case TCP_CA_Open: { struct htcp *ca = inet_csk_ca(sk); - ca->last_cong = jiffies; + if (ca->undo_last_cong) { + ca->last_cong = jiffies; + ca->undo_last_cong = 0; + } } break; case TCP_CA_CWR: diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index ba85d883189..fe3b4bdfd25 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -1028,10 +1028,6 @@ unsigned int tcp_sync_mss(struct sock *sk, u32 pmtu) /* Compute the current effective MSS, taking SACKs and IP options, * and even PMTU discovery events into account. - * - * LARGESEND note: !tcp_urg_mode is overkill, only frames up to snd_up - * cannot be large. However, taking into account rare use of URG, this - * is not a big flaw. */ unsigned int tcp_current_mss(struct sock *sk, int large_allowed) { @@ -1046,7 +1042,7 @@ unsigned int tcp_current_mss(struct sock *sk, int large_allowed) mss_now = tp->mss_cache; - if (large_allowed && sk_can_gso(sk) && !tcp_urg_mode(tp)) + if (large_allowed && sk_can_gso(sk)) doing_tso = 1; if (dst) { @@ -1516,6 +1512,10 @@ static int tcp_mtu_probe(struct sock *sk) * send_head. This happens as incoming acks open up the remote * window for us. * + * LARGESEND note: !tcp_urg_mode is overkill, only frames between + * snd_up-64k-mss .. snd_up cannot be large. However, taking into + * account rare use of URG, this is not a big flaw. + * * Returns 1, if no segments are in flight and we have queued segments, but * cannot send anything now because of SWS or another problem. */ @@ -1567,7 +1567,7 @@ static int tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle) } limit = mss_now; - if (tso_segs > 1) + if (tso_segs > 1 && !tcp_urg_mode(tp)) limit = tcp_mss_split_point(sk, skb, mss_now, cwnd_quota); @@ -1616,6 +1616,7 @@ void __tcp_push_pending_frames(struct sock *sk, unsigned int cur_mss, */ void tcp_push_one(struct sock *sk, unsigned int mss_now) { + struct tcp_sock *tp = tcp_sk(sk); struct sk_buff *skb = tcp_send_head(sk); unsigned int tso_segs, cwnd_quota; @@ -1630,7 +1631,7 @@ void tcp_push_one(struct sock *sk, unsigned int mss_now) BUG_ON(!tso_segs); limit = mss_now; - if (tso_segs > 1) + if (tso_segs > 1 && !tcp_urg_mode(tp)) limit = tcp_mss_split_point(sk, skb, mss_now, cwnd_quota); diff --git a/net/ipv4/tcp_vegas.c b/net/ipv4/tcp_vegas.c index 14504dada11..a453aac91bd 100644 --- a/net/ipv4/tcp_vegas.c +++ b/net/ipv4/tcp_vegas.c @@ -40,18 +40,14 @@ #include "tcp_vegas.h" -/* Default values of the Vegas variables, in fixed-point representation - * with V_PARAM_SHIFT bits to the right of the binary point. - */ -#define V_PARAM_SHIFT 1 -static int alpha = 2<<V_PARAM_SHIFT; -static int beta = 4<<V_PARAM_SHIFT; -static int gamma = 1<<V_PARAM_SHIFT; +static int alpha = 2; +static int beta = 4; +static int gamma = 1; module_param(alpha, int, 0644); -MODULE_PARM_DESC(alpha, "lower bound of packets in network (scale by 2)"); +MODULE_PARM_DESC(alpha, "lower bound of packets in network"); module_param(beta, int, 0644); -MODULE_PARM_DESC(beta, "upper bound of packets in network (scale by 2)"); +MODULE_PARM_DESC(beta, "upper bound of packets in network"); module_param(gamma, int, 0644); MODULE_PARM_DESC(gamma, "limit on increase (scale by 2)"); @@ -172,49 +168,13 @@ static void tcp_vegas_cong_avoid(struct sock *sk, u32 ack, u32 in_flight) return; } - /* The key players are v_beg_snd_una and v_beg_snd_nxt. - * - * These are so named because they represent the approximate values - * of snd_una and snd_nxt at the beginning of the current RTT. More - * precisely, they represent the amount of data sent during the RTT. - * At the end of the RTT, when we receive an ACK for v_beg_snd_nxt, - * we will calculate that (v_beg_snd_nxt - v_beg_snd_una) outstanding - * bytes of data have been ACKed during the course of the RTT, giving - * an "actual" rate of: - * - * (v_beg_snd_nxt - v_beg_snd_una) / (rtt duration) - * - * Unfortunately, v_beg_snd_una is not exactly equal to snd_una, - * because delayed ACKs can cover more than one segment, so they - * don't line up nicely with the boundaries of RTTs. - * - * Another unfortunate fact of life is that delayed ACKs delay the - * advance of the left edge of our send window, so that the number - * of bytes we send in an RTT is often less than our cwnd will allow. - * So we keep track of our cwnd separately, in v_beg_snd_cwnd. - */ - if (after(ack, vegas->beg_snd_nxt)) { /* Do the Vegas once-per-RTT cwnd adjustment. */ - u32 old_wnd, old_snd_cwnd; - - - /* Here old_wnd is essentially the window of data that was - * sent during the previous RTT, and has all - * been acknowledged in the course of the RTT that ended - * with the ACK we just received. Likewise, old_snd_cwnd - * is the cwnd during the previous RTT. - */ - old_wnd = (vegas->beg_snd_nxt - vegas->beg_snd_una) / - tp->mss_cache; - old_snd_cwnd = vegas->beg_snd_cwnd; /* Save the extent of the current window so we can use this * at the end of the next RTT. */ - vegas->beg_snd_una = vegas->beg_snd_nxt; vegas->beg_snd_nxt = tp->snd_nxt; - vegas->beg_snd_cwnd = tp->snd_cwnd; /* We do the Vegas calculations only if we got enough RTT * samples that we can be reasonably sure that we got @@ -252,22 +212,14 @@ static void tcp_vegas_cong_avoid(struct sock *sk, u32 ack, u32 in_flight) * * This is: * (actual rate in segments) * baseRTT - * We keep it as a fixed point number with - * V_PARAM_SHIFT bits to the right of the binary point. */ - target_cwnd = ((u64)old_wnd * vegas->baseRTT); - target_cwnd <<= V_PARAM_SHIFT; - do_div(target_cwnd, rtt); + target_cwnd = tp->snd_cwnd * vegas->baseRTT / rtt; /* Calculate the difference between the window we had, * and the window we would like to have. This quantity * is the "Diff" from the Arizona Vegas papers. - * - * Again, this is a fixed point number with - * V_PARAM_SHIFT bits to the right of the binary - * point. */ - diff = (old_wnd << V_PARAM_SHIFT) - target_cwnd; + diff = tp->snd_cwnd * (rtt-vegas->baseRTT) / vegas->baseRTT; if (diff > gamma && tp->snd_ssthresh > 2 ) { /* Going too fast. Time to slow down @@ -282,16 +234,13 @@ static void tcp_vegas_cong_avoid(struct sock *sk, u32 ack, u32 in_flight) * truncation robs us of full link * utilization. */ - tp->snd_cwnd = min(tp->snd_cwnd, - ((u32)target_cwnd >> - V_PARAM_SHIFT)+1); + tp->snd_cwnd = min(tp->snd_cwnd, (u32)target_cwnd+1); } else if (tp->snd_cwnd <= tp->snd_ssthresh) { /* Slow start. */ tcp_slow_start(tp); } else { /* Congestion avoidance. */ - u32 next_snd_cwnd; /* Figure out where we would like cwnd * to be. @@ -300,32 +249,25 @@ static void tcp_vegas_cong_avoid(struct sock *sk, u32 ack, u32 in_flight) /* The old window was too fast, so * we slow down. */ - next_snd_cwnd = old_snd_cwnd - 1; + tp->snd_cwnd--; } else if (diff < alpha) { /* We don't have enough extra packets * in the network, so speed up. */ - next_snd_cwnd = old_snd_cwnd + 1; + tp->snd_cwnd++; } else { /* Sending just as fast as we * should be. */ - next_snd_cwnd = old_snd_cwnd; } - - /* Adjust cwnd upward or downward, toward the - * desired value. - */ - if (next_snd_cwnd > tp->snd_cwnd) - tp->snd_cwnd++; - else if (next_snd_cwnd < tp->snd_cwnd) - tp->snd_cwnd--; } if (tp->snd_cwnd < 2) tp->snd_cwnd = 2; else if (tp->snd_cwnd > tp->snd_cwnd_clamp) tp->snd_cwnd = tp->snd_cwnd_clamp; + + tp->snd_ssthresh = tcp_current_ssthresh(sk); } /* Wipe the slate clean for the next RTT. */ diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index cf02701ced4..98c1fd09be8 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -633,6 +633,7 @@ int udp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, .saddr = saddr, .tos = tos } }, .proto = sk->sk_protocol, + .flags = inet_sk_flowi_flags(sk), .uli_u = { .ports = { .sport = inet->sport, .dport = dport } } }; diff --git a/net/ipv4/xfrm4_state.c b/net/ipv4/xfrm4_state.c index 07735ed280d..55dc6beab9a 100644 --- a/net/ipv4/xfrm4_state.c +++ b/net/ipv4/xfrm4_state.c @@ -33,6 +33,7 @@ __xfrm4_init_tempsel(struct xfrm_state *x, struct flowi *fl, x->sel.dport_mask = htons(0xffff); x->sel.sport = xfrm_flowi_sport(fl); x->sel.sport_mask = htons(0xffff); + x->sel.family = AF_INET; x->sel.prefixlen_d = 32; x->sel.prefixlen_s = 32; x->sel.proto = fl->proto; |