aboutsummaryrefslogtreecommitdiff
path: root/net/ipv4
diff options
context:
space:
mode:
Diffstat (limited to 'net/ipv4')
-rw-r--r--net/ipv4/af_inet.c112
-rw-r--r--net/ipv4/arp.c8
-rw-r--r--net/ipv4/fib_trie.c95
-rw-r--r--net/ipv4/inet_timewait_sock.c2
-rw-r--r--net/ipv4/ip_gre.c4
-rw-r--r--net/ipv4/ip_output.c2
-rw-r--r--net/ipv4/ipip.c2
-rw-r--r--net/ipv4/ipmr.c2
-rw-r--r--net/ipv4/protocol.c19
-rw-r--r--net/ipv4/raw.c9
-rw-r--r--net/ipv4/route.c20
-rw-r--r--net/ipv4/tcp.c16
-rw-r--r--net/ipv4/tcp_input.c5
-rw-r--r--net/ipv4/tcp_ipv4.c66
-rw-r--r--net/ipv4/tcp_minisocks.c25
-rw-r--r--net/ipv4/tcp_output.c5
-rw-r--r--net/ipv4/tcp_timer.c16
-rw-r--r--net/ipv4/udp.c12
18 files changed, 192 insertions, 228 deletions
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index 197d024b253..6c30a73f03f 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -124,7 +124,6 @@ static struct list_head inetsw[SOCK_MAX];
static DEFINE_SPINLOCK(inetsw_lock);
struct ipv4_config ipv4_config;
-
EXPORT_SYMBOL(ipv4_config);
/* New destruction routine */
@@ -139,12 +138,12 @@ void inet_sock_destruct(struct sock *sk)
sk_mem_reclaim(sk);
if (sk->sk_type == SOCK_STREAM && sk->sk_state != TCP_CLOSE) {
- printk("Attempt to release TCP socket in state %d %p\n",
+ pr_err("Attempt to release TCP socket in state %d %p\n",
sk->sk_state, sk);
return;
}
if (!sock_flag(sk, SOCK_DEAD)) {
- printk("Attempt to release alive inet socket %p\n", sk);
+ pr_err("Attempt to release alive inet socket %p\n", sk);
return;
}
@@ -157,6 +156,7 @@ void inet_sock_destruct(struct sock *sk)
dst_release(sk->sk_dst_cache);
sk_refcnt_debug_dec(sk);
}
+EXPORT_SYMBOL(inet_sock_destruct);
/*
* The routines beyond this point handle the behaviour of an AF_INET
@@ -219,6 +219,7 @@ out:
release_sock(sk);
return err;
}
+EXPORT_SYMBOL(inet_listen);
u32 inet_ehash_secret __read_mostly;
EXPORT_SYMBOL(inet_ehash_secret);
@@ -435,9 +436,11 @@ int inet_release(struct socket *sock)
}
return 0;
}
+EXPORT_SYMBOL(inet_release);
/* It is off by default, see below. */
int sysctl_ip_nonlocal_bind __read_mostly;
+EXPORT_SYMBOL(sysctl_ip_nonlocal_bind);
int inet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
{
@@ -519,6 +522,7 @@ out_release_sock:
out:
return err;
}
+EXPORT_SYMBOL(inet_bind);
int inet_dgram_connect(struct socket *sock, struct sockaddr * uaddr,
int addr_len, int flags)
@@ -532,6 +536,7 @@ int inet_dgram_connect(struct socket *sock, struct sockaddr * uaddr,
return -EAGAIN;
return sk->sk_prot->connect(sk, (struct sockaddr *)uaddr, addr_len);
}
+EXPORT_SYMBOL(inet_dgram_connect);
static long inet_wait_for_connect(struct sock *sk, long timeo)
{
@@ -641,6 +646,7 @@ sock_error:
sock->state = SS_DISCONNECTING;
goto out;
}
+EXPORT_SYMBOL(inet_stream_connect);
/*
* Accept a pending connection. The TCP layer now gives BSD semantics.
@@ -668,6 +674,7 @@ int inet_accept(struct socket *sock, struct socket *newsock, int flags)
do_err:
return err;
}
+EXPORT_SYMBOL(inet_accept);
/*
@@ -699,6 +706,7 @@ int inet_getname(struct socket *sock, struct sockaddr *uaddr,
*uaddr_len = sizeof(*sin);
return 0;
}
+EXPORT_SYMBOL(inet_getname);
int inet_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg,
size_t size)
@@ -711,9 +719,11 @@ int inet_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg,
return sk->sk_prot->sendmsg(iocb, sk, msg, size);
}
+EXPORT_SYMBOL(inet_sendmsg);
-static ssize_t inet_sendpage(struct socket *sock, struct page *page, int offset, size_t size, int flags)
+static ssize_t inet_sendpage(struct socket *sock, struct page *page, int offset,
+ size_t size, int flags)
{
struct sock *sk = sock->sk;
@@ -780,6 +790,7 @@ int inet_shutdown(struct socket *sock, int how)
release_sock(sk);
return err;
}
+EXPORT_SYMBOL(inet_shutdown);
/*
* ioctl() calls you can issue on an INET socket. Most of these are
@@ -798,44 +809,45 @@ int inet_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
struct net *net = sock_net(sk);
switch (cmd) {
- case SIOCGSTAMP:
- err = sock_get_timestamp(sk, (struct timeval __user *)arg);
- break;
- case SIOCGSTAMPNS:
- err = sock_get_timestampns(sk, (struct timespec __user *)arg);
- break;
- case SIOCADDRT:
- case SIOCDELRT:
- case SIOCRTMSG:
- err = ip_rt_ioctl(net, cmd, (void __user *)arg);
- break;
- case SIOCDARP:
- case SIOCGARP:
- case SIOCSARP:
- err = arp_ioctl(net, cmd, (void __user *)arg);
- break;
- case SIOCGIFADDR:
- case SIOCSIFADDR:
- case SIOCGIFBRDADDR:
- case SIOCSIFBRDADDR:
- case SIOCGIFNETMASK:
- case SIOCSIFNETMASK:
- case SIOCGIFDSTADDR:
- case SIOCSIFDSTADDR:
- case SIOCSIFPFLAGS:
- case SIOCGIFPFLAGS:
- case SIOCSIFFLAGS:
- err = devinet_ioctl(net, cmd, (void __user *)arg);
- break;
- default:
- if (sk->sk_prot->ioctl)
- err = sk->sk_prot->ioctl(sk, cmd, arg);
- else
- err = -ENOIOCTLCMD;
- break;
+ case SIOCGSTAMP:
+ err = sock_get_timestamp(sk, (struct timeval __user *)arg);
+ break;
+ case SIOCGSTAMPNS:
+ err = sock_get_timestampns(sk, (struct timespec __user *)arg);
+ break;
+ case SIOCADDRT:
+ case SIOCDELRT:
+ case SIOCRTMSG:
+ err = ip_rt_ioctl(net, cmd, (void __user *)arg);
+ break;
+ case SIOCDARP:
+ case SIOCGARP:
+ case SIOCSARP:
+ err = arp_ioctl(net, cmd, (void __user *)arg);
+ break;
+ case SIOCGIFADDR:
+ case SIOCSIFADDR:
+ case SIOCGIFBRDADDR:
+ case SIOCSIFBRDADDR:
+ case SIOCGIFNETMASK:
+ case SIOCSIFNETMASK:
+ case SIOCGIFDSTADDR:
+ case SIOCSIFDSTADDR:
+ case SIOCSIFPFLAGS:
+ case SIOCGIFPFLAGS:
+ case SIOCSIFFLAGS:
+ err = devinet_ioctl(net, cmd, (void __user *)arg);
+ break;
+ default:
+ if (sk->sk_prot->ioctl)
+ err = sk->sk_prot->ioctl(sk, cmd, arg);
+ else
+ err = -ENOIOCTLCMD;
+ break;
}
return err;
}
+EXPORT_SYMBOL(inet_ioctl);
const struct proto_ops inet_stream_ops = {
.family = PF_INET,
@@ -862,6 +874,7 @@ const struct proto_ops inet_stream_ops = {
.compat_getsockopt = compat_sock_common_getsockopt,
#endif
};
+EXPORT_SYMBOL(inet_stream_ops);
const struct proto_ops inet_dgram_ops = {
.family = PF_INET,
@@ -887,6 +900,7 @@ const struct proto_ops inet_dgram_ops = {
.compat_getsockopt = compat_sock_common_getsockopt,
#endif
};
+EXPORT_SYMBOL(inet_dgram_ops);
/*
* For SOCK_RAW sockets; should be the same as inet_dgram_ops but without
@@ -1016,6 +1030,7 @@ out_illegal:
p->type);
goto out;
}
+EXPORT_SYMBOL(inet_register_protosw);
void inet_unregister_protosw(struct inet_protosw *p)
{
@@ -1031,6 +1046,7 @@ void inet_unregister_protosw(struct inet_protosw *p)
synchronize_net();
}
}
+EXPORT_SYMBOL(inet_unregister_protosw);
/*
* Shall we try to damage output packets if routing dev changes?
@@ -1141,7 +1157,6 @@ int inet_sk_rebuild_header(struct sock *sk)
return err;
}
-
EXPORT_SYMBOL(inet_sk_rebuild_header);
static int inet_gso_send_check(struct sk_buff *skb)
@@ -1369,7 +1384,6 @@ int inet_ctl_sock_create(struct sock **sk, unsigned short family,
}
return rc;
}
-
EXPORT_SYMBOL_GPL(inet_ctl_sock_create);
unsigned long snmp_fold_field(void *mib[], int offt)
@@ -1676,19 +1690,3 @@ static int __init ipv4_proc_init(void)
MODULE_ALIAS_NETPROTO(PF_INET);
-EXPORT_SYMBOL(inet_accept);
-EXPORT_SYMBOL(inet_bind);
-EXPORT_SYMBOL(inet_dgram_connect);
-EXPORT_SYMBOL(inet_dgram_ops);
-EXPORT_SYMBOL(inet_getname);
-EXPORT_SYMBOL(inet_ioctl);
-EXPORT_SYMBOL(inet_listen);
-EXPORT_SYMBOL(inet_register_protosw);
-EXPORT_SYMBOL(inet_release);
-EXPORT_SYMBOL(inet_sendmsg);
-EXPORT_SYMBOL(inet_shutdown);
-EXPORT_SYMBOL(inet_sock_destruct);
-EXPORT_SYMBOL(inet_stream_connect);
-EXPORT_SYMBOL(inet_stream_ops);
-EXPORT_SYMBOL(inet_unregister_protosw);
-EXPORT_SYMBOL(sysctl_ip_nonlocal_bind);
diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c
index 090e9991ac2..4e80f336c0c 100644
--- a/net/ipv4/arp.c
+++ b/net/ipv4/arp.c
@@ -130,7 +130,7 @@ static void arp_solicit(struct neighbour *neigh, struct sk_buff *skb);
static void arp_error_report(struct neighbour *neigh, struct sk_buff *skb);
static void parp_redo(struct sk_buff *skb);
-static struct neigh_ops arp_generic_ops = {
+static const struct neigh_ops arp_generic_ops = {
.family = AF_INET,
.solicit = arp_solicit,
.error_report = arp_error_report,
@@ -140,7 +140,7 @@ static struct neigh_ops arp_generic_ops = {
.queue_xmit = dev_queue_xmit,
};
-static struct neigh_ops arp_hh_ops = {
+static const struct neigh_ops arp_hh_ops = {
.family = AF_INET,
.solicit = arp_solicit,
.error_report = arp_error_report,
@@ -150,7 +150,7 @@ static struct neigh_ops arp_hh_ops = {
.queue_xmit = dev_queue_xmit,
};
-static struct neigh_ops arp_direct_ops = {
+static const struct neigh_ops arp_direct_ops = {
.family = AF_INET,
.output = dev_queue_xmit,
.connected_output = dev_queue_xmit,
@@ -158,7 +158,7 @@ static struct neigh_ops arp_direct_ops = {
.queue_xmit = dev_queue_xmit,
};
-struct neigh_ops arp_broken_ops = {
+const struct neigh_ops arp_broken_ops = {
.family = AF_INET,
.solicit = arp_solicit,
.error_report = arp_error_report,
diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index fe3c846b99a..291bdf50a21 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -48,7 +48,7 @@
* Patrick McHardy <kaber@trash.net>
*/
-#define VERSION "0.408"
+#define VERSION "0.409"
#include <asm/uaccess.h>
#include <asm/system.h>
@@ -325,10 +325,7 @@ static inline void check_tnode(const struct tnode *tn)
static const int halve_threshold = 25;
static const int inflate_threshold = 50;
static const int halve_threshold_root = 15;
-static const int inflate_threshold_root = 25;
-
-static int inflate_threshold_root_fix;
-#define INFLATE_FIX_MAX 10 /* a comment in resize() */
+static const int inflate_threshold_root = 30;
static void __alias_free_mem(struct rcu_head *head)
{
@@ -516,14 +513,14 @@ static void tnode_put_child_reorg(struct tnode *tn, int i, struct node *n,
rcu_assign_pointer(tn->child[i], n);
}
+#define MAX_WORK 10
static struct node *resize(struct trie *t, struct tnode *tn)
{
int i;
- int err = 0;
struct tnode *old_tn;
int inflate_threshold_use;
int halve_threshold_use;
- int max_resize;
+ int max_work;
if (!tn)
return NULL;
@@ -538,18 +535,7 @@ static struct node *resize(struct trie *t, struct tnode *tn)
}
/* One child */
if (tn->empty_children == tnode_child_length(tn) - 1)
- for (i = 0; i < tnode_child_length(tn); i++) {
- struct node *n;
-
- n = tn->child[i];
- if (!n)
- continue;
-
- /* compress one level */
- node_set_parent(n, NULL);
- tnode_free_safe(tn);
- return n;
- }
+ goto one_child;
/*
* Double as long as the resulting node has a number of
* nonempty nodes that are above the threshold.
@@ -618,15 +604,17 @@ static struct node *resize(struct trie *t, struct tnode *tn)
/* Keep root node larger */
- if (!tn->parent)
- inflate_threshold_use = inflate_threshold_root +
- inflate_threshold_root_fix;
- else
+ if (!node_parent((struct node*) tn)) {
+ inflate_threshold_use = inflate_threshold_root;
+ halve_threshold_use = halve_threshold_root;
+ }
+ else {
inflate_threshold_use = inflate_threshold;
+ halve_threshold_use = halve_threshold;
+ }
- err = 0;
- max_resize = 10;
- while ((tn->full_children > 0 && max_resize-- &&
+ max_work = MAX_WORK;
+ while ((tn->full_children > 0 && max_work-- &&
50 * (tn->full_children + tnode_child_length(tn)
- tn->empty_children)
>= inflate_threshold_use * tnode_child_length(tn))) {
@@ -643,47 +631,19 @@ static struct node *resize(struct trie *t, struct tnode *tn)
}
}
- if (max_resize < 0) {
- if (!tn->parent) {
- /*
- * It was observed that during large updates even
- * inflate_threshold_root = 35 might be needed to avoid
- * this warning; but it should be temporary, so let's
- * try to handle this automatically.
- */
- if (inflate_threshold_root_fix < INFLATE_FIX_MAX)
- inflate_threshold_root_fix++;
- else
- pr_warning("Fix inflate_threshold_root."
- " Now=%d size=%d bits fix=%d\n",
- inflate_threshold_root, tn->bits,
- inflate_threshold_root_fix);
- } else {
- pr_warning("Fix inflate_threshold."
- " Now=%d size=%d bits\n",
- inflate_threshold, tn->bits);
- }
- } else if (max_resize > 3 && !tn->parent && inflate_threshold_root_fix)
- inflate_threshold_root_fix--;
-
check_tnode(tn);
+ /* Return if at least one inflate is run */
+ if( max_work != MAX_WORK)
+ return (struct node *) tn;
+
/*
* Halve as long as the number of empty children in this
* node is above threshold.
*/
-
- /* Keep root node larger */
-
- if (!tn->parent)
- halve_threshold_use = halve_threshold_root;
- else
- halve_threshold_use = halve_threshold;
-
- err = 0;
- max_resize = 10;
- while (tn->bits > 1 && max_resize-- &&
+ max_work = MAX_WORK;
+ while (tn->bits > 1 && max_work-- &&
100 * (tnode_child_length(tn) - tn->empty_children) <
halve_threshold_use * tnode_child_length(tn)) {
@@ -698,19 +658,10 @@ static struct node *resize(struct trie *t, struct tnode *tn)
}
}
- if (max_resize < 0) {
- if (!tn->parent)
- pr_warning("Fix halve_threshold_root."
- " Now=%d size=%d bits\n",
- halve_threshold_root, tn->bits);
- else
- pr_warning("Fix halve_threshold."
- " Now=%d size=%d bits\n",
- halve_threshold, tn->bits);
- }
/* Only one child remains */
- if (tn->empty_children == tnode_child_length(tn) - 1)
+ if (tn->empty_children == tnode_child_length(tn) - 1) {
+one_child:
for (i = 0; i < tnode_child_length(tn); i++) {
struct node *n;
@@ -724,7 +675,7 @@ static struct node *resize(struct trie *t, struct tnode *tn)
tnode_free_safe(tn);
return n;
}
-
+ }
return (struct node *) tn;
}
diff --git a/net/ipv4/inet_timewait_sock.c b/net/ipv4/inet_timewait_sock.c
index 61283f92882..13f0781f35c 100644
--- a/net/ipv4/inet_timewait_sock.c
+++ b/net/ipv4/inet_timewait_sock.c
@@ -218,8 +218,8 @@ void inet_twdr_hangman(unsigned long data)
/* We purged the entire slot, anything left? */
if (twdr->tw_count)
need_timer = 1;
+ twdr->slot = ((twdr->slot + 1) & (INET_TWDR_TWKILL_SLOTS - 1));
}
- twdr->slot = ((twdr->slot + 1) & (INET_TWDR_TWKILL_SLOTS - 1));
if (need_timer)
mod_timer(&twdr->tw_timer, jiffies + twdr->period);
out:
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index b902ef55be7..533afaadefd 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -662,7 +662,7 @@ drop_nolock:
return(0);
}
-static int ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
+static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
{
struct ip_tunnel *tunnel = netdev_priv(dev);
struct net_device_stats *stats = &tunnel->dev->stats;
@@ -951,7 +951,7 @@ static int ipgre_tunnel_bind_dev(struct net_device *dev)
addend += 4;
}
dev->needed_headroom = addend + hlen;
- mtu -= dev->hard_header_len - addend;
+ mtu -= dev->hard_header_len + addend;
if (mtu < 68)
mtu = 68;
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index 7d082105472..afae0cbabbf 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -1302,7 +1302,7 @@ int ip_push_pending_frames(struct sock *sk)
err = ip_local_out(skb);
if (err) {
if (err > 0)
- err = inet->recverr ? net_xmit_errno(err) : 0;
+ err = net_xmit_errno(err);
if (err)
goto error;
}
diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c
index 98075b6d619..62548cb0923 100644
--- a/net/ipv4/ipip.c
+++ b/net/ipv4/ipip.c
@@ -387,7 +387,7 @@ static int ipip_rcv(struct sk_buff *skb)
* and that skb is filled properly by that function.
*/
-static int ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
+static netdev_tx_t ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
{
struct ip_tunnel *tunnel = netdev_priv(dev);
struct net_device_stats *stats = &tunnel->dev->stats;
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index 06c33fb6b32..65d421cf5bc 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -201,7 +201,7 @@ failure:
#ifdef CONFIG_IP_PIMSM
-static int reg_vif_xmit(struct sk_buff *skb, struct net_device *dev)
+static netdev_tx_t reg_vif_xmit(struct sk_buff *skb, struct net_device *dev)
{
struct net *net = dev_net(dev);
diff --git a/net/ipv4/protocol.c b/net/ipv4/protocol.c
index ea50da0649f..a2e5fc0a15e 100644
--- a/net/ipv4/protocol.c
+++ b/net/ipv4/protocol.c
@@ -22,26 +22,11 @@
* as published by the Free Software Foundation; either version
* 2 of the License, or (at your option) any later version.
*/
-
-#include <asm/uaccess.h>
-#include <asm/system.h>
+#include <linux/cache.h>
#include <linux/module.h>
-#include <linux/types.h>
-#include <linux/kernel.h>
-#include <linux/string.h>
-#include <linux/socket.h>
-#include <linux/in.h>
-#include <linux/inet.h>
#include <linux/netdevice.h>
-#include <linux/timer.h>
-#include <net/ip.h>
+#include <linux/spinlock.h>
#include <net/protocol.h>
-#include <linux/skbuff.h>
-#include <net/sock.h>
-#include <net/icmp.h>
-#include <net/udp.h>
-#include <net/ipip.h>
-#include <linux/igmp.h>
struct net_protocol *inet_protos[MAX_INET_PROTOS] ____cacheline_aligned_in_smp;
static DEFINE_SPINLOCK(inet_proto_lock);
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c
index 2979f14bb18..ebb1e5848bc 100644
--- a/net/ipv4/raw.c
+++ b/net/ipv4/raw.c
@@ -375,7 +375,7 @@ static int raw_send_hdrinc(struct sock *sk, void *from, size_t length,
err = NF_HOOK(PF_INET, NF_INET_LOCAL_OUT, skb, NULL, rt->u.dst.dev,
dst_output);
if (err > 0)
- err = inet->recverr ? net_xmit_errno(err) : 0;
+ err = net_xmit_errno(err);
if (err)
goto error;
out:
@@ -386,6 +386,8 @@ error_fault:
kfree_skb(skb);
error:
IP_INC_STATS(net, IPSTATS_MIB_OUTDISCARDS);
+ if (err == -ENOBUFS && !inet->recverr)
+ err = 0;
return err;
}
@@ -576,8 +578,11 @@ back_from_confirm:
&ipc, &rt, msg->msg_flags);
if (err)
ip_flush_pending_frames(sk);
- else if (!(msg->msg_flags & MSG_MORE))
+ else if (!(msg->msg_flags & MSG_MORE)) {
err = ip_push_pending_frames(sk);
+ if (err == -ENOBUFS && !inet->recverr)
+ err = 0;
+ }
release_sock(sk);
}
done:
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index fafbe163e2b..91867d3e632 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -1514,13 +1514,17 @@ static struct dst_entry *ipv4_negative_advice(struct dst_entry *dst)
void ip_rt_send_redirect(struct sk_buff *skb)
{
struct rtable *rt = skb_rtable(skb);
- struct in_device *in_dev = in_dev_get(rt->u.dst.dev);
+ struct in_device *in_dev;
+ int log_martians;
- if (!in_dev)
+ rcu_read_lock();
+ in_dev = __in_dev_get_rcu(rt->u.dst.dev);
+ if (!in_dev || !IN_DEV_TX_REDIRECTS(in_dev)) {
+ rcu_read_unlock();
return;
-
- if (!IN_DEV_TX_REDIRECTS(in_dev))
- goto out;
+ }
+ log_martians = IN_DEV_LOG_MARTIANS(in_dev);
+ rcu_read_unlock();
/* No redirected packets during ip_rt_redirect_silence;
* reset the algorithm.
@@ -1533,7 +1537,7 @@ void ip_rt_send_redirect(struct sk_buff *skb)
*/
if (rt->u.dst.rate_tokens >= ip_rt_redirect_number) {
rt->u.dst.rate_last = jiffies;
- goto out;
+ return;
}
/* Check for load limit; set rate_last to the latest sent
@@ -1547,7 +1551,7 @@ void ip_rt_send_redirect(struct sk_buff *skb)
rt->u.dst.rate_last = jiffies;
++rt->u.dst.rate_tokens;
#ifdef CONFIG_IP_ROUTE_VERBOSE
- if (IN_DEV_LOG_MARTIANS(in_dev) &&
+ if (log_martians &&
rt->u.dst.rate_tokens == ip_rt_redirect_number &&
net_ratelimit())
printk(KERN_WARNING "host %pI4/if%d ignores redirects for %pI4 to %pI4.\n",
@@ -1555,8 +1559,6 @@ void ip_rt_send_redirect(struct sk_buff *skb)
&rt->rt_dst, &rt->rt_gateway);
#endif
}
-out:
- in_dev_put(in_dev);
}
static int ip_error(struct sk_buff *skb)
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 91145244ea6..edeea060db4 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -1839,7 +1839,7 @@ void tcp_close(struct sock *sk, long timeout)
/* Unread data was tossed, zap the connection. */
NET_INC_STATS_USER(sock_net(sk), LINUX_MIB_TCPABORTONCLOSE);
tcp_set_state(sk, TCP_CLOSE);
- tcp_send_active_reset(sk, GFP_KERNEL);
+ tcp_send_active_reset(sk, sk->sk_allocation);
} else if (sock_flag(sk, SOCK_LINGER) && !sk->sk_lingertime) {
/* Check zero linger _after_ checking for unread data. */
sk->sk_prot->disconnect(sk, 0);
@@ -2336,13 +2336,13 @@ static int do_tcp_getsockopt(struct sock *sk, int level,
val = !!(tp->nonagle&TCP_NAGLE_CORK);
break;
case TCP_KEEPIDLE:
- val = (tp->keepalive_time ? : sysctl_tcp_keepalive_time) / HZ;
+ val = keepalive_time_when(tp) / HZ;
break;
case TCP_KEEPINTVL:
- val = (tp->keepalive_intvl ? : sysctl_tcp_keepalive_intvl) / HZ;
+ val = keepalive_intvl_when(tp) / HZ;
break;
case TCP_KEEPCNT:
- val = tp->keepalive_probes ? : sysctl_tcp_keepalive_probes;
+ val = keepalive_probes(tp);
break;
case TCP_SYNCNT:
val = icsk->icsk_syn_retries ? : sysctl_tcp_syn_retries;
@@ -2658,7 +2658,7 @@ void tcp_free_md5sig_pool(void)
EXPORT_SYMBOL(tcp_free_md5sig_pool);
-static struct tcp_md5sig_pool **__tcp_alloc_md5sig_pool(void)
+static struct tcp_md5sig_pool **__tcp_alloc_md5sig_pool(struct sock *sk)
{
int cpu;
struct tcp_md5sig_pool **pool;
@@ -2671,7 +2671,7 @@ static struct tcp_md5sig_pool **__tcp_alloc_md5sig_pool(void)
struct tcp_md5sig_pool *p;
struct crypto_hash *hash;
- p = kzalloc(sizeof(*p), GFP_KERNEL);
+ p = kzalloc(sizeof(*p), sk->sk_allocation);
if (!p)
goto out_free;
*per_cpu_ptr(pool, cpu) = p;
@@ -2688,7 +2688,7 @@ out_free:
return NULL;
}
-struct tcp_md5sig_pool **tcp_alloc_md5sig_pool(void)
+struct tcp_md5sig_pool **tcp_alloc_md5sig_pool(struct sock *sk)
{
struct tcp_md5sig_pool **pool;
int alloc = 0;
@@ -2709,7 +2709,7 @@ retry:
if (alloc) {
/* we cannot hold spinlock here because this may sleep. */
- struct tcp_md5sig_pool **p = __tcp_alloc_md5sig_pool();
+ struct tcp_md5sig_pool **p = __tcp_alloc_md5sig_pool(sk);
spin_lock_bh(&tcp_md5sig_pool_lock);
if (!p) {
tcp_md5sig_users--;
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 2bdb0da237e..af6d6fa00db 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -685,7 +685,7 @@ static inline void tcp_set_rto(struct sock *sk)
* is invisible. Actually, Linux-2.4 also generates erratic
* ACKs in some circumstances.
*/
- inet_csk(sk)->icsk_rto = (tp->srtt >> 3) + tp->rttvar;
+ inet_csk(sk)->icsk_rto = __tcp_set_rto(tp);
/* 2. Fixups made earlier cannot be right.
* If we do not estimate RTO correctly without them,
@@ -696,8 +696,7 @@ static inline void tcp_set_rto(struct sock *sk)
/* NOTE: clamping at TCP_RTO_MIN is not required, current algo
* guarantees that rto is higher.
*/
- if (inet_csk(sk)->icsk_rto > TCP_RTO_MAX)
- inet_csk(sk)->icsk_rto = TCP_RTO_MAX;
+ tcp_bound_rto(sk);
}
/* Save metrics learned by this TCP session.
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 6d88219c5e2..0543561da99 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -328,26 +328,29 @@ static void do_pmtu_discovery(struct sock *sk, struct iphdr *iph, u32 mtu)
*
*/
-void tcp_v4_err(struct sk_buff *skb, u32 info)
+void tcp_v4_err(struct sk_buff *icmp_skb, u32 info)
{
- struct iphdr *iph = (struct iphdr *)skb->data;
- struct tcphdr *th = (struct tcphdr *)(skb->data + (iph->ihl << 2));
+ struct iphdr *iph = (struct iphdr *)icmp_skb->data;
+ struct tcphdr *th = (struct tcphdr *)(icmp_skb->data + (iph->ihl << 2));
+ struct inet_connection_sock *icsk;
struct tcp_sock *tp;
struct inet_sock *inet;
- const int type = icmp_hdr(skb)->type;
- const int code = icmp_hdr(skb)->code;
+ const int type = icmp_hdr(icmp_skb)->type;
+ const int code = icmp_hdr(icmp_skb)->code;
struct sock *sk;
+ struct sk_buff *skb;
__u32 seq;
+ __u32 remaining;
int err;
- struct net *net = dev_net(skb->dev);
+ struct net *net = dev_net(icmp_skb->dev);
- if (skb->len < (iph->ihl << 2) + 8) {
+ if (icmp_skb->len < (iph->ihl << 2) + 8) {
ICMP_INC_STATS_BH(net, ICMP_MIB_INERRORS);
return;
}
sk = inet_lookup(net, &tcp_hashinfo, iph->daddr, th->dest,
- iph->saddr, th->source, inet_iif(skb));
+ iph->saddr, th->source, inet_iif(icmp_skb));
if (!sk) {
ICMP_INC_STATS_BH(net, ICMP_MIB_INERRORS);
return;
@@ -367,6 +370,7 @@ void tcp_v4_err(struct sk_buff *skb, u32 info)
if (sk->sk_state == TCP_CLOSE)
goto out;
+ icsk = inet_csk(sk);
tp = tcp_sk(sk);
seq = ntohl(th->seq);
if (sk->sk_state != TCP_LISTEN &&
@@ -393,6 +397,39 @@ void tcp_v4_err(struct sk_buff *skb, u32 info)
}
err = icmp_err_convert[code].errno;
+ /* check if icmp_skb allows revert of backoff
+ * (see draft-zimmermann-tcp-lcd) */
+ if (code != ICMP_NET_UNREACH && code != ICMP_HOST_UNREACH)
+ break;
+ if (seq != tp->snd_una || !icsk->icsk_retransmits ||
+ !icsk->icsk_backoff)
+ break;
+
+ icsk->icsk_backoff--;
+ inet_csk(sk)->icsk_rto = __tcp_set_rto(tp) <<
+ icsk->icsk_backoff;
+ tcp_bound_rto(sk);
+
+ skb = tcp_write_queue_head(sk);
+ BUG_ON(!skb);
+
+ remaining = icsk->icsk_rto - min(icsk->icsk_rto,
+ tcp_time_stamp - TCP_SKB_CB(skb)->when);
+
+ if (remaining) {
+ inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
+ remaining, TCP_RTO_MAX);
+ } else if (sock_owned_by_user(sk)) {
+ /* RTO revert clocked out retransmission,
+ * but socket is locked. Will defer. */
+ inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
+ HZ/20, TCP_RTO_MAX);
+ } else {
+ /* RTO revert clocked out retransmission.
+ * Will retransmit now */
+ tcp_retransmit_timer(sk);
+ }
+
break;
case ICMP_TIME_EXCEEDED:
err = EHOSTUNREACH;
@@ -849,7 +886,7 @@ int tcp_v4_md5_do_add(struct sock *sk, __be32 addr,
}
sk->sk_route_caps &= ~NETIF_F_GSO_MASK;
}
- if (tcp_alloc_md5sig_pool() == NULL) {
+ if (tcp_alloc_md5sig_pool(sk) == NULL) {
kfree(newkey);
return -ENOMEM;
}
@@ -970,8 +1007,9 @@ static int tcp_v4_parse_md5_keys(struct sock *sk, char __user *optval,
if (!tcp_sk(sk)->md5sig_info) {
struct tcp_sock *tp = tcp_sk(sk);
- struct tcp_md5sig_info *p = kzalloc(sizeof(*p), GFP_KERNEL);
+ struct tcp_md5sig_info *p;
+ p = kzalloc(sizeof(*p), sk->sk_allocation);
if (!p)
return -EINVAL;
@@ -979,7 +1017,7 @@ static int tcp_v4_parse_md5_keys(struct sock *sk, char __user *optval,
sk->sk_route_caps &= ~NETIF_F_GSO_MASK;
}
- newkey = kmemdup(cmd.tcpm_key, cmd.tcpm_keylen, GFP_KERNEL);
+ newkey = kmemdup(cmd.tcpm_key, cmd.tcpm_keylen, sk->sk_allocation);
if (!newkey)
return -ENOMEM;
return tcp_v4_md5_do_add(sk, sin->sin_addr.s_addr,
@@ -1158,7 +1196,7 @@ struct request_sock_ops tcp_request_sock_ops __read_mostly = {
};
#ifdef CONFIG_TCP_MD5SIG
-static struct tcp_request_sock_ops tcp_request_sock_ipv4_ops = {
+static const struct tcp_request_sock_ops tcp_request_sock_ipv4_ops = {
.md5_lookup = tcp_v4_reqsk_md5_lookup,
.calc_md5_hash = tcp_v4_md5_hash_skb,
};
@@ -1717,7 +1755,7 @@ int tcp_v4_tw_remember_stamp(struct inet_timewait_sock *tw)
return 0;
}
-struct inet_connection_sock_af_ops ipv4_specific = {
+const struct inet_connection_sock_af_ops ipv4_specific = {
.queue_xmit = ip_queue_xmit,
.send_check = tcp_v4_send_check,
.rebuild_header = inet_sk_rebuild_header,
@@ -1737,7 +1775,7 @@ struct inet_connection_sock_af_ops ipv4_specific = {
};
#ifdef CONFIG_TCP_MD5SIG
-static struct tcp_sock_af_ops tcp_sock_ipv4_specific = {
+static const struct tcp_sock_af_ops tcp_sock_ipv4_specific = {
.md5_lookup = tcp_v4_md5_lookup,
.calc_md5_hash = tcp_v4_md5_hash_skb,
.md5_add = tcp_v4_md5_add_func,
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index f8d67ccc64f..e48c37d74d7 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -322,7 +322,7 @@ void tcp_time_wait(struct sock *sk, int state, int timeo)
if (key != NULL) {
memcpy(&tcptw->tw_md5_key, key->key, key->keylen);
tcptw->tw_md5_keylen = key->keylen;
- if (tcp_alloc_md5sig_pool() == NULL)
+ if (tcp_alloc_md5sig_pool(sk) == NULL)
BUG();
}
} while (0);
@@ -657,29 +657,6 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb,
child = inet_csk(sk)->icsk_af_ops->syn_recv_sock(sk, skb, req, NULL);
if (child == NULL)
goto listen_overflow;
-#ifdef CONFIG_TCP_MD5SIG
- else {
- /* Copy over the MD5 key from the original socket */
- struct tcp_md5sig_key *key;
- struct tcp_sock *tp = tcp_sk(sk);
- key = tp->af_specific->md5_lookup(sk, child);
- if (key != NULL) {
- /*
- * We're using one, so create a matching key on the
- * newsk structure. If we fail to get memory then we
- * end up not copying the key across. Shucks.
- */
- char *newkey = kmemdup(key->key, key->keylen,
- GFP_ATOMIC);
- if (newkey) {
- if (!tcp_alloc_md5sig_pool())
- BUG();
- tp->af_specific->md5_add(child, child, newkey,
- key->keylen);
- }
- }
- }
-#endif
inet_csk_reqsk_queue_unlink(sk, req, prev);
inet_csk_reqsk_queue_removed(sk, req);
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 4e004424d40..5200aab0ca9 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -2135,7 +2135,8 @@ void tcp_send_fin(struct sock *sk)
} else {
/* Socket is locked, keep trying until memory is available. */
for (;;) {
- skb = alloc_skb_fclone(MAX_TCP_HEADER, GFP_KERNEL);
+ skb = alloc_skb_fclone(MAX_TCP_HEADER,
+ sk->sk_allocation);
if (skb)
break;
yield();
@@ -2388,7 +2389,7 @@ int tcp_connect(struct sock *sk)
sk->sk_wmem_queued += buff->truesize;
sk_mem_charge(sk, buff->truesize);
tp->packets_out += tcp_skb_pcount(buff);
- tcp_transmit_skb(sk, buff, 1, GFP_KERNEL);
+ tcp_transmit_skb(sk, buff, 1, sk->sk_allocation);
/* We change tp->snd_nxt after the tcp_transmit_skb() call
* in order to make this packet get counted in tcpOutSegs.
diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c
index b144a26359b..cdb2ca7684d 100644
--- a/net/ipv4/tcp_timer.c
+++ b/net/ipv4/tcp_timer.c
@@ -137,13 +137,14 @@ static int tcp_write_timeout(struct sock *sk)
{
struct inet_connection_sock *icsk = inet_csk(sk);
int retry_until;
+ bool do_reset;
if ((1 << sk->sk_state) & (TCPF_SYN_SENT | TCPF_SYN_RECV)) {
if (icsk->icsk_retransmits)
dst_negative_advice(&sk->sk_dst_cache);
retry_until = icsk->icsk_syn_retries ? : sysctl_tcp_syn_retries;
} else {
- if (icsk->icsk_retransmits >= sysctl_tcp_retries1) {
+ if (retransmits_timed_out(sk, sysctl_tcp_retries1)) {
/* Black hole detection */
tcp_mtu_probing(icsk, sk);
@@ -155,13 +156,15 @@ static int tcp_write_timeout(struct sock *sk)
const int alive = (icsk->icsk_rto < TCP_RTO_MAX);
retry_until = tcp_orphan_retries(sk, alive);
+ do_reset = alive ||
+ !retransmits_timed_out(sk, retry_until);
- if (tcp_out_of_resources(sk, alive || icsk->icsk_retransmits < retry_until))
+ if (tcp_out_of_resources(sk, do_reset))
return 1;
}
}
- if (icsk->icsk_retransmits >= retry_until) {
+ if (retransmits_timed_out(sk, retry_until)) {
/* Has it gone just too far? */
tcp_write_err(sk);
return 1;
@@ -279,7 +282,7 @@ static void tcp_probe_timer(struct sock *sk)
* The TCP retransmit timer.
*/
-static void tcp_retransmit_timer(struct sock *sk)
+void tcp_retransmit_timer(struct sock *sk)
{
struct tcp_sock *tp = tcp_sk(sk);
struct inet_connection_sock *icsk = inet_csk(sk);
@@ -385,7 +388,7 @@ static void tcp_retransmit_timer(struct sock *sk)
out_reset_timer:
icsk->icsk_rto = min(icsk->icsk_rto << 1, TCP_RTO_MAX);
inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, icsk->icsk_rto, TCP_RTO_MAX);
- if (icsk->icsk_retransmits > sysctl_tcp_retries1)
+ if (retransmits_timed_out(sk, sysctl_tcp_retries1 + 1))
__sk_dst_reset(sk);
out:;
@@ -499,8 +502,7 @@ static void tcp_keepalive_timer (unsigned long data)
elapsed = tcp_time_stamp - tp->rcv_tstamp;
if (elapsed >= keepalive_time_when(tp)) {
- if ((!tp->keepalive_probes && icsk->icsk_probes_out >= sysctl_tcp_keepalive_probes) ||
- (tp->keepalive_probes && icsk->icsk_probes_out >= tp->keepalive_probes)) {
+ if (icsk->icsk_probes_out >= keepalive_probes(tp)) {
tcp_send_active_reset(sk, GFP_ATOMIC);
tcp_write_err(sk);
goto out;
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 29ebb0d27a1..ebaaa7f973d 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -561,12 +561,18 @@ static int udp_push_pending_frames(struct sock *sk)
send:
err = ip_push_pending_frames(sk);
+ if (err) {
+ if (err == -ENOBUFS && !inet->recverr) {
+ UDP_INC_STATS_USER(sock_net(sk),
+ UDP_MIB_SNDBUFERRORS, is_udplite);
+ err = 0;
+ }
+ } else
+ UDP_INC_STATS_USER(sock_net(sk),
+ UDP_MIB_OUTDATAGRAMS, is_udplite);
out:
up->len = 0;
up->pending = 0;
- if (!err)
- UDP_INC_STATS_USER(sock_net(sk),
- UDP_MIB_OUTDATAGRAMS, is_udplite);
return err;
}