aboutsummaryrefslogtreecommitdiff
path: root/net/ipv4
diff options
context:
space:
mode:
Diffstat (limited to 'net/ipv4')
-rw-r--r--net/ipv4/fib_frontend.c15
-rw-r--r--net/ipv4/fib_rules.c51
-rw-r--r--net/ipv4/inet_diag.c9
-rw-r--r--net/ipv4/inet_hashtables.c7
-rw-r--r--net/ipv4/inet_timewait_sock.c13
-rw-r--r--net/ipv4/inetpeer.c42
-rw-r--r--net/ipv4/ip_output.c28
-rw-r--r--net/ipv4/ip_sockglue.c39
-rw-r--r--net/ipv4/ipcomp.c3
-rw-r--r--net/ipv4/ipvs/ip_vs_conn.c18
-rw-r--r--net/ipv4/ipvs/ip_vs_core.c20
-rw-r--r--net/ipv4/ipvs/ip_vs_ctl.c25
-rw-r--r--net/ipv4/ipvs/ip_vs_sync.c24
-rw-r--r--net/ipv4/netfilter/Makefile20
-rw-r--r--net/ipv4/netfilter/ip_queue.c37
-rw-r--r--net/ipv4/netfilter/nf_nat_amanda.c2
-rw-r--r--net/ipv4/netfilter/nf_nat_core.c2
-rw-r--r--net/ipv4/netfilter/nf_nat_ftp.c2
-rw-r--r--net/ipv4/netfilter/nf_nat_h323.c18
-rw-r--r--net/ipv4/netfilter/nf_nat_irc.c2
-rw-r--r--net/ipv4/netfilter/nf_nat_pptp.c8
-rw-r--r--net/ipv4/netfilter/nf_nat_sip.c4
-rw-r--r--net/ipv4/netfilter/nf_nat_tftp.c2
-rw-r--r--net/ipv4/proc.c19
-rw-r--r--net/ipv4/raw.c3
-rw-r--r--net/ipv4/route.c20
-rw-r--r--net/ipv4/tcp.c4
-rw-r--r--net/ipv4/tcp_input.c115
-rw-r--r--net/ipv4/tcp_ipv4.c14
-rw-r--r--net/ipv4/tunnel4.c24
-rw-r--r--net/ipv4/udp.c3
-rw-r--r--net/ipv4/udplite.c3
32 files changed, 333 insertions, 263 deletions
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index 60123905dbb..732d8f088b1 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -59,6 +59,13 @@ struct fib_table *ip_fib_main_table;
#define FIB_TABLE_HASHSZ 1
static struct hlist_head fib_table_hash[FIB_TABLE_HASHSZ];
+static void __init fib4_rules_init(void)
+{
+ ip_fib_local_table = fib_hash_init(RT_TABLE_LOCAL);
+ hlist_add_head_rcu(&ip_fib_local_table->tb_hlist, &fib_table_hash[0]);
+ ip_fib_main_table = fib_hash_init(RT_TABLE_MAIN);
+ hlist_add_head_rcu(&ip_fib_main_table->tb_hlist, &fib_table_hash[0]);
+}
#else
#define FIB_TABLE_HASHSZ 256
@@ -905,14 +912,8 @@ void __init ip_fib_init(void)
for (i = 0; i < FIB_TABLE_HASHSZ; i++)
INIT_HLIST_HEAD(&fib_table_hash[i]);
-#ifndef CONFIG_IP_MULTIPLE_TABLES
- ip_fib_local_table = fib_hash_init(RT_TABLE_LOCAL);
- hlist_add_head_rcu(&ip_fib_local_table->tb_hlist, &fib_table_hash[0]);
- ip_fib_main_table = fib_hash_init(RT_TABLE_MAIN);
- hlist_add_head_rcu(&ip_fib_main_table->tb_hlist, &fib_table_hash[0]);
-#else
+
fib4_rules_init();
-#endif
register_netdevice_notifier(&fib_netdev_notifier);
register_inetaddr_notifier(&fib_inetaddr_notifier);
diff --git a/net/ipv4/fib_rules.c b/net/ipv4/fib_rules.c
index f16839c6a72..a0ada3a8d8d 100644
--- a/net/ipv4/fib_rules.c
+++ b/net/ipv4/fib_rules.c
@@ -49,33 +49,6 @@ struct fib4_rule
#endif
};
-static struct fib4_rule default_rule = {
- .common = {
- .refcnt = ATOMIC_INIT(2),
- .pref = 0x7FFF,
- .table = RT_TABLE_DEFAULT,
- .action = FR_ACT_TO_TBL,
- },
-};
-
-static struct fib4_rule main_rule = {
- .common = {
- .refcnt = ATOMIC_INIT(2),
- .pref = 0x7FFE,
- .table = RT_TABLE_MAIN,
- .action = FR_ACT_TO_TBL,
- },
-};
-
-static struct fib4_rule local_rule = {
- .common = {
- .refcnt = ATOMIC_INIT(2),
- .table = RT_TABLE_LOCAL,
- .action = FR_ACT_TO_TBL,
- .flags = FIB_RULE_PERMANENT,
- },
-};
-
#ifdef CONFIG_NET_CLS_ROUTE
u32 fib_rules_tclass(struct fib_result *res)
{
@@ -319,11 +292,27 @@ static struct fib_rules_ops fib4_rules_ops = {
.owner = THIS_MODULE,
};
-void __init fib4_rules_init(void)
+static int __init fib_default_rules_init(void)
{
- list_add_tail(&local_rule.common.list, &fib4_rules_ops.rules_list);
- list_add_tail(&main_rule.common.list, &fib4_rules_ops.rules_list);
- list_add_tail(&default_rule.common.list, &fib4_rules_ops.rules_list);
+ int err;
+
+ err = fib_default_rule_add(&fib4_rules_ops, 0,
+ RT_TABLE_LOCAL, FIB_RULE_PERMANENT);
+ if (err < 0)
+ return err;
+ err = fib_default_rule_add(&fib4_rules_ops, 0x7FFE,
+ RT_TABLE_MAIN, 0);
+ if (err < 0)
+ return err;
+ err = fib_default_rule_add(&fib4_rules_ops, 0x7FFF,
+ RT_TABLE_DEFAULT, 0);
+ if (err < 0)
+ return err;
+ return 0;
+}
+void __init fib4_rules_init(void)
+{
+ BUG_ON(fib_default_rules_init());
fib_rules_register(&fib4_rules_ops);
}
diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c
index dc429b6b0ba..b0170732b5e 100644
--- a/net/ipv4/inet_diag.c
+++ b/net/ipv4/inet_diag.c
@@ -747,13 +747,14 @@ skip_listen_ht:
for (i = s_i; i < hashinfo->ehash_size; i++) {
struct inet_ehash_bucket *head = &hashinfo->ehash[i];
+ rwlock_t *lock = inet_ehash_lockp(hashinfo, i);
struct sock *sk;
struct hlist_node *node;
if (i > s_i)
s_num = 0;
- read_lock_bh(&head->lock);
+ read_lock_bh(lock);
num = 0;
sk_for_each(sk, node, &head->chain) {
struct inet_sock *inet = inet_sk(sk);
@@ -769,7 +770,7 @@ skip_listen_ht:
r->id.idiag_dport)
goto next_normal;
if (inet_csk_diag_dump(sk, skb, cb) < 0) {
- read_unlock_bh(&head->lock);
+ read_unlock_bh(lock);
goto done;
}
next_normal:
@@ -791,14 +792,14 @@ next_normal:
r->id.idiag_dport)
goto next_dying;
if (inet_twsk_diag_dump(tw, skb, cb) < 0) {
- read_unlock_bh(&head->lock);
+ read_unlock_bh(lock);
goto done;
}
next_dying:
++num;
}
}
- read_unlock_bh(&head->lock);
+ read_unlock_bh(lock);
}
done:
diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c
index 16eecc7046a..67704da04fc 100644
--- a/net/ipv4/inet_hashtables.c
+++ b/net/ipv4/inet_hashtables.c
@@ -204,12 +204,13 @@ static int __inet_check_established(struct inet_timewait_death_row *death_row,
const __portpair ports = INET_COMBINED_PORTS(inet->dport, lport);
unsigned int hash = inet_ehashfn(daddr, lport, saddr, inet->dport);
struct inet_ehash_bucket *head = inet_ehash_bucket(hinfo, hash);
+ rwlock_t *lock = inet_ehash_lockp(hinfo, hash);
struct sock *sk2;
const struct hlist_node *node;
struct inet_timewait_sock *tw;
prefetch(head->chain.first);
- write_lock(&head->lock);
+ write_lock(lock);
/* Check TIME-WAIT sockets first. */
sk_for_each(sk2, node, &head->twchain) {
@@ -239,7 +240,7 @@ unique:
BUG_TRAP(sk_unhashed(sk));
__sk_add_node(sk, &head->chain);
sock_prot_inc_use(sk->sk_prot);
- write_unlock(&head->lock);
+ write_unlock(lock);
if (twp) {
*twp = tw;
@@ -255,7 +256,7 @@ unique:
return 0;
not_unique:
- write_unlock(&head->lock);
+ write_unlock(lock);
return -EADDRNOTAVAIL;
}
diff --git a/net/ipv4/inet_timewait_sock.c b/net/ipv4/inet_timewait_sock.c
index 4e189e28f30..a60b99e0ebd 100644
--- a/net/ipv4/inet_timewait_sock.c
+++ b/net/ipv4/inet_timewait_sock.c
@@ -20,16 +20,16 @@ static void __inet_twsk_kill(struct inet_timewait_sock *tw,
struct inet_bind_hashbucket *bhead;
struct inet_bind_bucket *tb;
/* Unlink from established hashes. */
- struct inet_ehash_bucket *ehead = inet_ehash_bucket(hashinfo, tw->tw_hash);
+ rwlock_t *lock = inet_ehash_lockp(hashinfo, tw->tw_hash);
- write_lock(&ehead->lock);
+ write_lock(lock);
if (hlist_unhashed(&tw->tw_node)) {
- write_unlock(&ehead->lock);
+ write_unlock(lock);
return;
}
__hlist_del(&tw->tw_node);
sk_node_init(&tw->tw_node);
- write_unlock(&ehead->lock);
+ write_unlock(lock);
/* Disassociate with bind bucket. */
bhead = &hashinfo->bhash[inet_bhashfn(tw->tw_num, hashinfo->bhash_size)];
@@ -59,6 +59,7 @@ void __inet_twsk_hashdance(struct inet_timewait_sock *tw, struct sock *sk,
const struct inet_sock *inet = inet_sk(sk);
const struct inet_connection_sock *icsk = inet_csk(sk);
struct inet_ehash_bucket *ehead = inet_ehash_bucket(hashinfo, sk->sk_hash);
+ rwlock_t *lock = inet_ehash_lockp(hashinfo, sk->sk_hash);
struct inet_bind_hashbucket *bhead;
/* Step 1: Put TW into bind hash. Original socket stays there too.
Note, that any socket with inet->num != 0 MUST be bound in
@@ -71,7 +72,7 @@ void __inet_twsk_hashdance(struct inet_timewait_sock *tw, struct sock *sk,
inet_twsk_add_bind_node(tw, &tw->tw_tb->owners);
spin_unlock(&bhead->lock);
- write_lock(&ehead->lock);
+ write_lock(lock);
/* Step 2: Remove SK from established hash. */
if (__sk_del_node_init(sk))
@@ -81,7 +82,7 @@ void __inet_twsk_hashdance(struct inet_timewait_sock *tw, struct sock *sk,
inet_twsk_add_node(tw, &ehead->twchain);
atomic_inc(&tw->tw_refcnt);
- write_unlock(&ehead->lock);
+ write_unlock(lock);
}
EXPORT_SYMBOL_GPL(__inet_twsk_hashdance);
diff --git a/net/ipv4/inetpeer.c b/net/ipv4/inetpeer.c
index 771031dfbd0..af995198f64 100644
--- a/net/ipv4/inetpeer.c
+++ b/net/ipv4/inetpeer.c
@@ -61,7 +61,7 @@
* 4. Global variable peer_total is modified under the pool lock.
* 5. struct inet_peer fields modification:
* avl_left, avl_right, avl_parent, avl_height: pool lock
- * unused_next, unused_prevp: unused node list lock
+ * unused: unused node list lock
* refcnt: atomically against modifications on other CPU;
* usually under some other lock to prevent node disappearing
* dtime: unused node list lock
@@ -94,8 +94,7 @@ int inet_peer_maxttl __read_mostly = 10 * 60 * HZ; /* usual time to live: 10 min
int inet_peer_gc_mintime __read_mostly = 10 * HZ;
int inet_peer_gc_maxtime __read_mostly = 120 * HZ;
-static struct inet_peer *inet_peer_unused_head;
-static struct inet_peer **inet_peer_unused_tailp = &inet_peer_unused_head;
+static LIST_HEAD(unused_peers);
static DEFINE_SPINLOCK(inet_peer_unused_lock);
static void peer_check_expire(unsigned long dummy);
@@ -138,15 +137,7 @@ void __init inet_initpeers(void)
static void unlink_from_unused(struct inet_peer *p)
{
spin_lock_bh(&inet_peer_unused_lock);
- if (p->unused_prevp != NULL) {
- /* On unused list. */
- *p->unused_prevp = p->unused_next;
- if (p->unused_next != NULL)
- p->unused_next->unused_prevp = p->unused_prevp;
- else
- inet_peer_unused_tailp = p->unused_prevp;
- p->unused_prevp = NULL; /* mark it as removed */
- }
+ list_del_init(&p->unused);
spin_unlock_bh(&inet_peer_unused_lock);
}
@@ -337,24 +328,24 @@ static void unlink_from_pool(struct inet_peer *p)
/* May be called with local BH enabled. */
static int cleanup_once(unsigned long ttl)
{
- struct inet_peer *p;
+ struct inet_peer *p = NULL;
/* Remove the first entry from the list of unused nodes. */
spin_lock_bh(&inet_peer_unused_lock);
- p = inet_peer_unused_head;
- if (p != NULL) {
- __u32 delta = (__u32)jiffies - p->dtime;
+ if (!list_empty(&unused_peers)) {
+ __u32 delta;
+
+ p = list_first_entry(&unused_peers, struct inet_peer, unused);
+ delta = (__u32)jiffies - p->dtime;
+
if (delta < ttl) {
/* Do not prune fresh entries. */
spin_unlock_bh(&inet_peer_unused_lock);
return -1;
}
- inet_peer_unused_head = p->unused_next;
- if (p->unused_next != NULL)
- p->unused_next->unused_prevp = p->unused_prevp;
- else
- inet_peer_unused_tailp = p->unused_prevp;
- p->unused_prevp = NULL; /* mark as not on the list */
+
+ list_del_init(&p->unused);
+
/* Grab an extra reference to prevent node disappearing
* before unlink_from_pool() call. */
atomic_inc(&p->refcnt);
@@ -412,7 +403,7 @@ struct inet_peer *inet_getpeer(__be32 daddr, int create)
/* Link the node. */
link_to_pool(n);
- n->unused_prevp = NULL; /* not on the list */
+ INIT_LIST_HEAD(&n->unused);
peer_total++;
write_unlock_bh(&peer_pool_lock);
@@ -467,10 +458,7 @@ void inet_putpeer(struct inet_peer *p)
{
spin_lock_bh(&inet_peer_unused_lock);
if (atomic_dec_and_test(&p->refcnt)) {
- p->unused_prevp = inet_peer_unused_tailp;
- p->unused_next = NULL;
- *inet_peer_unused_tailp = p;
- inet_peer_unused_tailp = &p->unused_next;
+ list_add_tail(&p->unused, &unused_peers);
p->dtime = (__u32)jiffies;
}
spin_unlock_bh(&inet_peer_unused_lock);
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index e5f7dc2de30..fd99fbd685e 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -1183,6 +1183,17 @@ error:
return err;
}
+static void ip_cork_release(struct inet_sock *inet)
+{
+ inet->cork.flags &= ~IPCORK_OPT;
+ kfree(inet->cork.opt);
+ inet->cork.opt = NULL;
+ if (inet->cork.rt) {
+ ip_rt_put(inet->cork.rt);
+ inet->cork.rt = NULL;
+ }
+}
+
/*
* Combined all pending IP fragments on the socket as one IP datagram
* and push them out.
@@ -1276,13 +1287,7 @@ int ip_push_pending_frames(struct sock *sk)
}
out:
- inet->cork.flags &= ~IPCORK_OPT;
- kfree(inet->cork.opt);
- inet->cork.opt = NULL;
- if (inet->cork.rt) {
- ip_rt_put(inet->cork.rt);
- inet->cork.rt = NULL;
- }
+ ip_cork_release(inet);
return err;
error:
@@ -1295,19 +1300,12 @@ error:
*/
void ip_flush_pending_frames(struct sock *sk)
{
- struct inet_sock *inet = inet_sk(sk);
struct sk_buff *skb;
while ((skb = __skb_dequeue_tail(&sk->sk_write_queue)) != NULL)
kfree_skb(skb);
- inet->cork.flags &= ~IPCORK_OPT;
- kfree(inet->cork.opt);
- inet->cork.opt = NULL;
- if (inet->cork.rt) {
- ip_rt_put(inet->cork.rt);
- inet->cork.rt = NULL;
- }
+ ip_cork_release(inet_sk(sk));
}
diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c
index f51f20e487c..82817e55436 100644
--- a/net/ipv4/ip_sockglue.c
+++ b/net/ipv4/ip_sockglue.c
@@ -437,10 +437,8 @@ static int do_ip_setsockopt(struct sock *sk, int level,
/* If optlen==0, it is equivalent to val == 0 */
-#ifdef CONFIG_IP_MROUTE
- if (optname >= MRT_BASE && optname <= (MRT_BASE + 10))
+ if (ip_mroute_opt(optname))
return ip_mroute_setsockopt(sk,optname,optval,optlen);
-#endif
err = 0;
lock_sock(sk);
@@ -909,11 +907,9 @@ int ip_setsockopt(struct sock *sk, int level,
#ifdef CONFIG_NETFILTER
/* we need to exclude all possible ENOPROTOOPTs except default case */
if (err == -ENOPROTOOPT && optname != IP_HDRINCL &&
- optname != IP_IPSEC_POLICY && optname != IP_XFRM_POLICY
-#ifdef CONFIG_IP_MROUTE
- && (optname < MRT_BASE || optname > (MRT_BASE + 10))
-#endif
- ) {
+ optname != IP_IPSEC_POLICY &&
+ optname != IP_XFRM_POLICY &&
+ !ip_mroute_opt(optname)) {
lock_sock(sk);
err = nf_setsockopt(sk, PF_INET, optname, optval, optlen);
release_sock(sk);
@@ -935,11 +931,9 @@ int compat_ip_setsockopt(struct sock *sk, int level, int optname,
#ifdef CONFIG_NETFILTER
/* we need to exclude all possible ENOPROTOOPTs except default case */
if (err == -ENOPROTOOPT && optname != IP_HDRINCL &&
- optname != IP_IPSEC_POLICY && optname != IP_XFRM_POLICY
-#ifdef CONFIG_IP_MROUTE
- && (optname < MRT_BASE || optname > (MRT_BASE + 10))
-#endif
- ) {
+ optname != IP_IPSEC_POLICY &&
+ optname != IP_XFRM_POLICY &&
+ !ip_mroute_opt(optname)) {
lock_sock(sk);
err = compat_nf_setsockopt(sk, PF_INET, optname,
optval, optlen);
@@ -967,11 +961,8 @@ static int do_ip_getsockopt(struct sock *sk, int level, int optname,
if (level != SOL_IP)
return -EOPNOTSUPP;
-#ifdef CONFIG_IP_MROUTE
- if (optname >= MRT_BASE && optname <= MRT_BASE+10) {
+ if (ip_mroute_opt(optname))
return ip_mroute_getsockopt(sk,optname,optval,optlen);
- }
-#endif
if (get_user(len,optlen))
return -EFAULT;
@@ -1171,11 +1162,8 @@ int ip_getsockopt(struct sock *sk, int level,
err = do_ip_getsockopt(sk, level, optname, optval, optlen);
#ifdef CONFIG_NETFILTER
/* we need to exclude all possible ENOPROTOOPTs except default case */
- if (err == -ENOPROTOOPT && optname != IP_PKTOPTIONS
-#ifdef CONFIG_IP_MROUTE
- && (optname < MRT_BASE || optname > MRT_BASE+10)
-#endif
- ) {
+ if (err == -ENOPROTOOPT && optname != IP_PKTOPTIONS &&
+ !ip_mroute_opt(optname)) {
int len;
if (get_user(len,optlen))
@@ -1200,11 +1188,8 @@ int compat_ip_getsockopt(struct sock *sk, int level, int optname,
int err = do_ip_getsockopt(sk, level, optname, optval, optlen);
#ifdef CONFIG_NETFILTER
/* we need to exclude all possible ENOPROTOOPTs except default case */
- if (err == -ENOPROTOOPT && optname != IP_PKTOPTIONS
-#ifdef CONFIG_IP_MROUTE
- && (optname < MRT_BASE || optname > MRT_BASE+10)
-#endif
- ) {
+ if (err == -ENOPROTOOPT && optname != IP_PKTOPTIONS &&
+ !ip_mroute_opt(optname)) {
int len;
if (get_user(len, optlen))
diff --git a/net/ipv4/ipcomp.c b/net/ipv4/ipcomp.c
index ca1b5fdb8d3..2c44a94c213 100644
--- a/net/ipv4/ipcomp.c
+++ b/net/ipv4/ipcomp.c
@@ -16,6 +16,7 @@
#include <linux/module.h>
#include <asm/semaphore.h>
#include <linux/crypto.h>
+#include <linux/err.h>
#include <linux/pfkeyv2.h>
#include <linux/percpu.h>
#include <linux/smp.h>
@@ -344,7 +345,7 @@ static struct crypto_comp **ipcomp_alloc_tfms(const char *alg_name)
for_each_possible_cpu(cpu) {
struct crypto_comp *tfm = crypto_alloc_comp(alg_name, 0,
CRYPTO_ALG_ASYNC);
- if (!tfm)
+ if (IS_ERR(tfm))
goto error;
*per_cpu_ptr(tfms, cpu) = tfm;
}
diff --git a/net/ipv4/ipvs/ip_vs_conn.c b/net/ipv4/ipvs/ip_vs_conn.c
index 4b702f708d3..0a9f3c37e18 100644
--- a/net/ipv4/ipvs/ip_vs_conn.c
+++ b/net/ipv4/ipvs/ip_vs_conn.c
@@ -426,6 +426,24 @@ ip_vs_bind_dest(struct ip_vs_conn *cp, struct ip_vs_dest *dest)
/*
+ * Check if there is a destination for the connection, if so
+ * bind the connection to the destination.
+ */
+struct ip_vs_dest *ip_vs_try_bind_dest(struct ip_vs_conn *cp)
+{
+ struct ip_vs_dest *dest;
+
+ if ((cp) && (!cp->dest)) {
+ dest = ip_vs_find_dest(cp->daddr, cp->dport,
+ cp->vaddr, cp->vport, cp->protocol);
+ ip_vs_bind_dest(cp, dest);
+ return dest;
+ } else
+ return NULL;
+}
+
+
+/*
* Unbind a connection entry with its VS destination
* Called by the ip_vs_conn_expire function.
*/
diff --git a/net/ipv4/ipvs/ip_vs_core.c b/net/ipv4/ipvs/ip_vs_core.c
index c6ed7654e83..20c884a5772 100644
--- a/net/ipv4/ipvs/ip_vs_core.c
+++ b/net/ipv4/ipvs/ip_vs_core.c
@@ -979,15 +979,23 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb,
ret = NF_ACCEPT;
}
- /* increase its packet counter and check if it is needed
- to be synchronized */
+ /* Increase its packet counter and check if it is needed
+ * to be synchronized
+ *
+ * Sync connection if it is about to close to
+ * encorage the standby servers to update the connections timeout
+ */
atomic_inc(&cp->in_pkts);
if ((ip_vs_sync_state & IP_VS_STATE_MASTER) &&
- (cp->protocol != IPPROTO_TCP ||
- cp->state == IP_VS_TCP_S_ESTABLISHED) &&
- (atomic_read(&cp->in_pkts) % sysctl_ip_vs_sync_threshold[1]
- == sysctl_ip_vs_sync_threshold[0]))
+ (((cp->protocol != IPPROTO_TCP ||
+ cp->state == IP_VS_TCP_S_ESTABLISHED) &&
+ (atomic_read(&cp->in_pkts) % sysctl_ip_vs_sync_threshold[1]
+ == sysctl_ip_vs_sync_threshold[0])) ||
+ ((cp->protocol == IPPROTO_TCP) && (cp->old_state != cp->state) &&
+ ((cp->state == IP_VS_TCP_S_FIN_WAIT) ||
+ (cp->state == IP_VS_TCP_S_CLOSE)))))
ip_vs_sync_conn(cp);
+ cp->old_state = cp->state;
ip_vs_conn_put(cp);
return ret;
diff --git a/net/ipv4/ipvs/ip_vs_ctl.c b/net/ipv4/ipvs/ip_vs_ctl.c
index 7345fc252a2..b64cf45a9ea 100644
--- a/net/ipv4/ipvs/ip_vs_ctl.c
+++ b/net/ipv4/ipvs/ip_vs_ctl.c
@@ -579,6 +579,31 @@ ip_vs_lookup_dest(struct ip_vs_service *svc, __be32 daddr, __be16 dport)
return NULL;
}
+/*
+ * Find destination by {daddr,dport,vaddr,protocol}
+ * Cretaed to be used in ip_vs_process_message() in
+ * the backup synchronization daemon. It finds the
+ * destination to be bound to the received connection
+ * on the backup.
+ *
+ * ip_vs_lookup_real_service() looked promissing, but
+ * seems not working as expected.
+ */
+struct ip_vs_dest *ip_vs_find_dest(__be32 daddr, __be16 dport,
+ __be32 vaddr, __be16 vport, __u16 protocol)
+{
+ struct ip_vs_dest *dest;
+ struct ip_vs_service *svc;
+
+ svc = ip_vs_service_get(0, protocol, vaddr, vport);
+ if (!svc)
+ return NULL;
+ dest = ip_vs_lookup_dest(svc, daddr, dport);
+ if (dest)
+ atomic_inc(&dest->refcnt);
+ ip_vs_service_put(svc);
+ return dest;
+}
/*
* Lookup dest by {svc,addr,port} in the destination trash.
diff --git a/net/ipv4/ipvs/ip_vs_sync.c b/net/ipv4/ipvs/ip_vs_sync.c
index 0d4d9721cbd..bd930efc18d 100644
--- a/net/ipv4/ipvs/ip_vs_sync.c
+++ b/net/ipv4/ipvs/ip_vs_sync.c
@@ -284,6 +284,7 @@ static void ip_vs_process_message(const char *buffer, const size_t buflen)
struct ip_vs_sync_conn_options *opt;
struct ip_vs_conn *cp;
struct ip_vs_protocol *pp;
+ struct ip_vs_dest *dest;
char *p;
int i;
@@ -317,20 +318,34 @@ static void ip_vs_process_message(const char *buffer, const size_t buflen)
s->caddr, s->cport,
s->vaddr, s->vport);
if (!cp) {
+ /*
+ * Find the appropriate destination for the connection.
+ * If it is not found the connection will remain unbound
+ * but still handled.
+ */
+ dest = ip_vs_find_dest(s->daddr, s->dport,
+ s->vaddr, s->vport,
+ s->protocol);
cp = ip_vs_conn_new(s->protocol,
s->caddr, s->cport,
s->vaddr, s->vport,
s->daddr, s->dport,
- flags, NULL);
+ flags, dest);
+ if (dest)
+ atomic_dec(&dest->refcnt);
if (!cp) {
IP_VS_ERR("ip_vs_conn_new failed\n");
return;
}
cp->state = ntohs(s->state);
} else if (!cp->dest) {
- /* it is an entry created by the synchronization */
- cp->state = ntohs(s->state);
- cp->flags = flags | IP_VS_CONN_F_HASHED;
+ dest = ip_vs_try_bind_dest(cp);
+ if (!dest) {
+ /* it is an unbound entry created by
+ * synchronization */
+ cp->flags = flags | IP_VS_CONN_F_HASHED;
+ } else
+ atomic_dec(&dest->refcnt);
} /* Note that we don't touch its state and flags
if it is a normal entry. */
@@ -342,6 +357,7 @@ static void ip_vs_process_message(const char *buffer, const size_t buflen)
p += SIMPLE_CONN_SIZE;
atomic_set(&cp->in_pkts, sysctl_ip_vs_sync_threshold[0]);
+ cp->state = ntohs(s->state);
pp = ip_vs_proto_get(s->protocol);
cp->timeout = pp->timeout_table[cp->state];
ip_vs_conn_put(cp);
diff --git a/net/ipv4/netfilter/Makefile b/net/ipv4/netfilter/Makefile
index 409d273f6f8..7456833d6ad 100644
--- a/net/ipv4/netfilter/Makefile
+++ b/net/ipv4/netfilter/Makefile
@@ -41,27 +41,27 @@ obj-$(CONFIG_NF_NAT) += iptable_nat.o
obj-$(CONFIG_IP_NF_RAW) += iptable_raw.o
# matches
+obj-$(CONFIG_IP_NF_MATCH_ADDRTYPE) += ipt_addrtype.o
+obj-$(CONFIG_IP_NF_MATCH_AH) += ipt_ah.o
+obj-$(CONFIG_IP_NF_MATCH_ECN) += ipt_ecn.o
obj-$(CONFIG_IP_NF_MATCH_IPRANGE) += ipt_iprange.o
obj-$(CONFIG_IP_NF_MATCH_OWNER) += ipt_owner.o
-obj-$(CONFIG_IP_NF_MATCH_TOS) += ipt_tos.o
obj-$(CONFIG_IP_NF_MATCH_RECENT) += ipt_recent.o
-obj-$(CONFIG_IP_NF_MATCH_ECN) += ipt_ecn.o
-obj-$(CONFIG_IP_NF_MATCH_AH) += ipt_ah.o
+obj-$(CONFIG_IP_NF_MATCH_TOS) += ipt_tos.o
obj-$(CONFIG_IP_NF_MATCH_TTL) += ipt_ttl.o
-obj-$(CONFIG_IP_NF_MATCH_ADDRTYPE) += ipt_addrtype.o
# targets
-obj-$(CONFIG_IP_NF_TARGET_REJECT) += ipt_REJECT.o
-obj-$(CONFIG_IP_NF_TARGET_TOS) += ipt_TOS.o
+obj-$(CONFIG_IP_NF_TARGET_CLUSTERIP) += ipt_CLUSTERIP.o
obj-$(CONFIG_IP_NF_TARGET_ECN) += ipt_ECN.o
+obj-$(CONFIG_IP_NF_TARGET_LOG) += ipt_LOG.o
obj-$(CONFIG_IP_NF_TARGET_MASQUERADE) += ipt_MASQUERADE.o
-obj-$(CONFIG_IP_NF_TARGET_REDIRECT) += ipt_REDIRECT.o
obj-$(CONFIG_IP_NF_TARGET_NETMAP) += ipt_NETMAP.o
+obj-$(CONFIG_IP_NF_TARGET_REDIRECT) += ipt_REDIRECT.o
+obj-$(CONFIG_IP_NF_TARGET_REJECT) += ipt_REJECT.o
obj-$(CONFIG_IP_NF_TARGET_SAME) += ipt_SAME.o
-obj-$(CONFIG_IP_NF_TARGET_LOG) += ipt_LOG.o
-obj-$(CONFIG_IP_NF_TARGET_ULOG) += ipt_ULOG.o
-obj-$(CONFIG_IP_NF_TARGET_CLUSTERIP) += ipt_CLUSTERIP.o
+obj-$(CONFIG_IP_NF_TARGET_TOS) += ipt_TOS.o
obj-$(CONFIG_IP_NF_TARGET_TTL) += ipt_TTL.o
+obj-$(CONFIG_IP_NF_TARGET_ULOG) += ipt_ULOG.o
# generic ARP tables
obj-$(CONFIG_IP_NF_ARPTABLES) += arp_tables.o
diff --git a/net/ipv4/netfilter/ip_queue.c b/net/ipv4/netfilter/ip_queue.c
index 10a2ce09fd8..14d64a383db 100644
--- a/net/ipv4/netfilter/ip_queue.c
+++ b/net/ipv4/netfilter/ip_queue.c
@@ -22,6 +22,7 @@
#include <linux/spinlock.h>
#include <linux/sysctl.h>
#include <linux/proc_fs.h>
+#include <linux/seq_file.h>
#include <linux/security.h>
#include <linux/mutex.h>
#include <net/net_namespace.h>
@@ -607,15 +608,11 @@ static ctl_table ipq_root_table[] = {
{ .ctl_name = 0 }
};
-#ifdef CONFIG_PROC_FS
-static int
-ipq_get_info(char *buffer, char **start, off_t offset, int length)
+static int ip_queue_show(struct seq_file *m, void *v)
{
- int len;
-
read_lock_bh(&queue_lock);
- len = sprintf(buffer,
+ seq_printf(m,
"Peer PID : %d\n"
"Copy mode : %hu\n"
"Copy range : %u\n"
@@ -632,16 +629,21 @@ ipq_get_info(char *buffer, char **start, off_t offset, int length)
queue_user_dropped);
read_unlock_bh(&queue_lock);
+ return 0;
+}
- *start = buffer + offset;
- len -= offset;
- if (len > length)
- len = length;
- else if (len < 0)
- len = 0;
- return len;
+static int ip_queue_open(struct inode *inode, struct file *file)
+{
+ return single_open(file, ip_queue_show, NULL);
}
-#endif /* CONFIG_PROC_FS */
+
+static const struct file_operations ip_queue_proc_fops = {
+ .open = ip_queue_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = single_release,
+ .owner = THIS_MODULE,
+};
static struct nf_queue_handler nfqh = {
.name = "ip_queue",
@@ -661,10 +663,11 @@ static int __init ip_queue_init(void)
goto cleanup_netlink_notifier;
}
- proc = proc_net_create(&init_net, IPQ_PROC_FS_NAME, 0, ipq_get_info);
- if (proc)
+ proc = create_proc_entry(IPQ_PROC_FS_NAME, 0, init_net.proc_net);
+ if (proc) {
proc->owner = THIS_MODULE;
- else {
+ proc->proc_fops = &ip_queue_proc_fops;
+ } else {
printk(KERN_ERR "ip_queue: failed to create proc entry\n");
goto cleanup_ipqnl;
}
diff --git a/net/ipv4/netfilter/nf_nat_amanda.c b/net/ipv4/netfilter/nf_nat_amanda.c
index 35a5aa69cd9..c31b8766825 100644
--- a/net/ipv4/netfilter/nf_nat_amanda.c
+++ b/net/ipv4/netfilter/nf_nat_amanda.c
@@ -69,7 +69,7 @@ static void __exit nf_nat_amanda_fini(void)
static int __init nf_nat_amanda_init(void)
{
- BUG_ON(rcu_dereference(nf_nat_amanda_hook));
+ BUG_ON(nf_nat_amanda_hook != NULL);
rcu_assign_pointer(nf_nat_amanda_hook, help);
return 0;
}
diff --git a/net/ipv4/netfilter/nf_nat_core.c b/net/ipv4/netfilter/nf_nat_core.c
index 56e93f692e8..70e7997ea28 100644
--- a/net/ipv4/netfilter/nf_nat_core.c
+++ b/net/ipv4/netfilter/nf_nat_core.c
@@ -681,7 +681,7 @@ static int clean_nat(struct nf_conn *i, void *data)
if (!nat)
return 0;
- memset(nat, 0, sizeof(nat));
+ memset(nat, 0, sizeof(*nat));
i->status &= ~(IPS_NAT_MASK | IPS_NAT_DONE_MASK | IPS_SEQ_ADJUST);
return 0;
}
diff --git a/net/ipv4/netfilter/nf_nat_ftp.c b/net/ipv4/netfilter/nf_nat_ftp.c
index e1a16d3ea4c..a1d5d58a58b 100644
--- a/net/ipv4/netfilter/nf_nat_ftp.c
+++ b/net/ipv4/netfilter/nf_nat_ftp.c
@@ -147,7 +147,7 @@ static void __exit nf_nat_ftp_fini(void)
static int __init nf_nat_ftp_init(void)
{
- BUG_ON(rcu_dereference(nf_nat_ftp_hook));
+ BUG_ON(nf_nat_ftp_hook != NULL);
rcu_assign_pointer(nf_nat_ftp_hook, nf_nat_ftp);
return 0;
}
diff --git a/net/ipv4/netfilter/nf_nat_h323.c b/net/ipv4/netfilter/nf_nat_h323.c
index a868c8c4132..93e18ef114f 100644
--- a/net/ipv4/netfilter/nf_nat_h323.c
+++ b/net/ipv4/netfilter/nf_nat_h323.c
@@ -544,15 +544,15 @@ static int nat_callforwarding(struct sk_buff *skb, struct nf_conn *ct,
/****************************************************************************/
static int __init init(void)
{
- BUG_ON(rcu_dereference(set_h245_addr_hook) != NULL);
- BUG_ON(rcu_dereference(set_h225_addr_hook) != NULL);
- BUG_ON(rcu_dereference(set_sig_addr_hook) != NULL);
- BUG_ON(rcu_dereference(set_ras_addr_hook) != NULL);
- BUG_ON(rcu_dereference(nat_rtp_rtcp_hook) != NULL);
- BUG_ON(rcu_dereference(nat_t120_hook) != NULL);
- BUG_ON(rcu_dereference(nat_h245_hook) != NULL);
- BUG_ON(rcu_dereference(nat_callforwarding_hook) != NULL);
- BUG_ON(rcu_dereference(nat_q931_hook) != NULL);
+ BUG_ON(set_h245_addr_hook != NULL);
+ BUG_ON(set_h225_addr_hook != NULL);
+ BUG_ON(set_sig_addr_hook != NULL);
+ BUG_ON(set_ras_addr_hook != NULL);
+ BUG_ON(nat_rtp_rtcp_hook != NULL);
+ BUG_ON(nat_t120_hook != NULL);
+ BUG_ON(nat_h245_hook != NULL);
+ BUG_ON(nat_callforwarding_hook != NULL);
+ BUG_ON(nat_q931_hook != NULL);
rcu_assign_pointer(set_h245_addr_hook, set_h245_addr);
rcu_assign_pointer(set_h225_addr_hook, set_h225_addr);
diff --git a/net/ipv4/netfilter/nf_nat_irc.c b/net/ipv4/netfilter/nf_nat_irc.c
index 766e2c16c6b..fe6f9cef6c8 100644
--- a/net/ipv4/netfilter/nf_nat_irc.c
+++ b/net/ipv4/netfilter/nf_nat_irc.c
@@ -74,7 +74,7 @@ static void __exit nf_nat_irc_fini(void)
static int __init nf_nat_irc_init(void)
{
- BUG_ON(rcu_dereference(nf_nat_irc_hook));
+ BUG_ON(nf_nat_irc_hook != NULL);
rcu_assign_pointer(nf_nat_irc_hook, help);
return 0;
}
diff --git a/net/ipv4/netfilter/nf_nat_pptp.c b/net/ipv4/netfilter/nf_nat_pptp.c
index e1385a09907..6817e7995f3 100644
--- a/net/ipv4/netfilter/nf_nat_pptp.c
+++ b/net/ipv4/netfilter/nf_nat_pptp.c
@@ -281,16 +281,16 @@ static int __init nf_nat_helper_pptp_init(void)
{
nf_nat_need_gre();
- BUG_ON(rcu_dereference(nf_nat_pptp_hook_outbound));
+ BUG_ON(nf_nat_pptp_hook_outbound != NULL);
rcu_assign_pointer(nf_nat_pptp_hook_outbound, pptp_outbound_pkt);
- BUG_ON(rcu_dereference(nf_nat_pptp_hook_inbound));
+ BUG_ON(nf_nat_pptp_hook_inbound != NULL);
rcu_assign_pointer(nf_nat_pptp_hook_inbound, pptp_inbound_pkt);
- BUG_ON(rcu_dereference(nf_nat_pptp_hook_exp_gre));
+ BUG_ON(nf_nat_pptp_hook_exp_gre != NULL);
rcu_assign_pointer(nf_nat_pptp_hook_exp_gre, pptp_exp_gre);
- BUG_ON(rcu_dereference(nf_nat_pptp_hook_expectfn));
+ BUG_ON(nf_nat_pptp_hook_expectfn != NULL);
rcu_assign_pointer(nf_nat_pptp_hook_expectfn, pptp_nat_expected);
return 0;
}
diff --git a/net/ipv4/netfilter/nf_nat_sip.c b/net/ipv4/netfilter/nf_nat_sip.c
index ce9edbcc01e..3ca98971a1e 100644
--- a/net/ipv4/netfilter/nf_nat_sip.c
+++ b/net/ipv4/netfilter/nf_nat_sip.c
@@ -293,8 +293,8 @@ static void __exit nf_nat_sip_fini(void)
static int __init nf_nat_sip_init(void)
{
- BUG_ON(rcu_dereference(nf_nat_sip_hook));
- BUG_ON(rcu_dereference(nf_nat_sdp_hook));
+ BUG_ON(nf_nat_sip_hook != NULL);
+ BUG_ON(nf_nat_sdp_hook != NULL);
rcu_assign_pointer(nf_nat_sip_hook, ip_nat_sip);
rcu_assign_pointer(nf_nat_sdp_hook, ip_nat_sdp);
return 0;
diff --git a/net/ipv4/netfilter/nf_nat_tftp.c b/net/ipv4/netfilter/nf_nat_tftp.c
index 0ecec701cb4..1360a94766d 100644
--- a/net/ipv4/netfilter/nf_nat_tftp.c
+++ b/net/ipv4/netfilter/nf_nat_tftp.c
@@ -43,7 +43,7 @@ static void __exit nf_nat_tftp_fini(void)
static int __init nf_nat_tftp_init(void)
{
- BUG_ON(rcu_dereference(nf_nat_tftp_hook));
+ BUG_ON(nf_nat_tftp_hook != NULL);
rcu_assign_pointer(nf_nat_tftp_hook, help);
return 0;
}
diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c
index ffdccc0972e..ce34b281803 100644
--- a/net/ipv4/proc.c
+++ b/net/ipv4/proc.c
@@ -46,17 +46,6 @@
#include <net/sock.h>
#include <net/raw.h>
-static int fold_prot_inuse(struct proto *proto)
-{
- int res = 0;
- int cpu;
-
- for_each_possible_cpu(cpu)
- res += proto->stats[cpu].inuse;
-
- return res;
-}
-
/*
* Report socket allocation statistics [mea@utu.fi]
*/
@@ -64,12 +53,12 @@ static int sockstat_seq_show(struct seq_file *seq, void *v)
{
socket_seq_show(seq);
seq_printf(seq, "TCP: inuse %d orphan %d tw %d alloc %d mem %d\n",
- fold_prot_inuse(&tcp_prot), atomic_read(&tcp_orphan_count),
+ sock_prot_inuse(&tcp_prot), atomic_read(&tcp_orphan_count),
tcp_death_row.tw_count, atomic_read(&tcp_sockets_allocated),
atomic_read(&tcp_memory_allocated));
- seq_printf(seq, "UDP: inuse %d\n", fold_prot_inuse(&udp_prot));
- seq_printf(seq, "UDPLITE: inuse %d\n", fold_prot_inuse(&udplite_prot));
- seq_printf(seq, "RAW: inuse %d\n", fold_prot_inuse(&raw_prot));
+ seq_printf(seq, "UDP: inuse %d\n", sock_prot_inuse(&udp_prot));
+ seq_printf(seq, "UDPLITE: inuse %d\n", sock_prot_inuse(&udplite_prot));
+ seq_printf(seq, "RAW: inuse %d\n", sock_prot_inuse(&raw_prot));
seq_printf(seq, "FRAG: inuse %d memory %d\n",
ip_frag_nqueues(), ip_frag_mem());
return 0;
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c
index 3916faca3af..66b42f547bf 100644
--- a/net/ipv4/raw.c
+++ b/net/ipv4/raw.c
@@ -760,6 +760,8 @@ static int raw_ioctl(struct sock *sk, int cmd, unsigned long arg)
}
}
+DEFINE_PROTO_INUSE(raw)
+
struct proto raw_prot = {
.name = "RAW",
.owner = THIS_MODULE,
@@ -781,6 +783,7 @@ struct proto raw_prot = {
.compat_setsockopt = compat_raw_setsockopt,
.compat_getsockopt = compat_raw_getsockopt,
#endif
+ REF_PROTO_INUSE(raw)
};
#ifdef CONFIG_PROC_FS
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 21b12de9e65..1bff9ed349f 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -578,6 +578,9 @@ static void rt_check_expire(struct work_struct *work)
i = (i + 1) & rt_hash_mask;
rthp = &rt_hash_table[i].chain;
+ if (need_resched())
+ cond_resched();
+
if (*rthp == NULL)
continue;
spin_lock_bh(rt_hash_lock_addr(i));
@@ -851,9 +854,7 @@ restart:
*/
rcu_assign_pointer(rt_hash_table[hash].chain, rth);
- rth->u.dst.__use++;
- dst_hold(&rth->u.dst);
- rth->u.dst.lastuse = now;
+ dst_use(&rth->u.dst, now);
spin_unlock_bh(rt_hash_lock_addr(hash));
rt_drop(rt);
@@ -1813,11 +1814,6 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr,
goto martian_destination;
err = ip_mkroute_input(skb, &res, &fl, in_dev, daddr, saddr, tos);
- if (err == -ENOBUFS)
- goto e_nobufs;
- if (err == -EINVAL)
- goto e_inval;
-
done:
in_dev_put(in_dev);
if (free_res)
@@ -1935,9 +1931,7 @@ int ip_route_input(struct sk_buff *skb, __be32 daddr, __be32 saddr,
rth->fl.oif == 0 &&
rth->fl.mark == skb->mark &&
rth->fl.fl4_tos == tos) {
- rth->u.dst.lastuse = jiffies;
- dst_hold(&rth->u.dst);
- rth->u.dst.__use++;
+ dst_use(&rth->u.dst, jiffies);
RT_CACHE_STAT_INC(in_hit);
rcu_read_unlock();
skb->dst = (struct dst_entry*)rth;
@@ -2331,9 +2325,7 @@ int __ip_route_output_key(struct rtable **rp, const struct flowi *flp)
rth->fl.mark == flp->mark &&
!((rth->fl.fl4_tos ^ flp->fl4_tos) &
(IPTOS_RT_MASK | RTO_ONLINK))) {
- rth->u.dst.lastuse = jiffies;
- dst_hold(&rth->u.dst);
- rth->u.dst.__use++;
+ dst_use(&rth->u.dst, jiffies);
RT_CACHE_STAT_INC(out_hit);
rcu_read_unlock_bh();
*rp = rth;
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index c64072bb504..8e65182f7af 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -2456,11 +2456,11 @@ void __init tcp_init(void)
thash_entries ? 0 : 512 * 1024);
tcp_hashinfo.ehash_size = 1 << tcp_hashinfo.ehash_size;
for (i = 0; i < tcp_hashinfo.ehash_size; i++) {
- rwlock_init(&tcp_hashinfo.ehash[i].lock);
INIT_HLIST_HEAD(&tcp_hashinfo.ehash[i].chain);
INIT_HLIST_HEAD(&tcp_hashinfo.ehash[i].twchain);
}
-
+ if (inet_ehash_locks_alloc(&tcp_hashinfo))
+ panic("TCP: failed to alloc ehash_locks");
tcp_hashinfo.bhash =
alloc_large_system_hash("TCP bind",
sizeof(struct inet_bind_hashbucket),
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index ca9590f4f52..0f0c1c9829a 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -1269,6 +1269,9 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_
if (before(TCP_SKB_CB(ack_skb)->ack_seq, prior_snd_una - tp->max_window))
return 0;
+ if (!tp->packets_out)
+ goto out;
+
/* SACK fastpath:
* if the only SACK change is the increase of the end_seq of
* the first block then only apply that SACK block
@@ -1400,11 +1403,9 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_
/* DSACK info lost if out-of-mem, try SACK still */
if (in_sack <= 0)
in_sack = tcp_match_skb_to_sack(sk, skb, start_seq, end_seq);
- if (in_sack < 0)
+ if (unlikely(in_sack < 0))
break;
- fack_count += tcp_skb_pcount(skb);
-
sacked = TCP_SKB_CB(skb)->sacked;
/* Account D-SACK for retransmitted packet. */
@@ -1419,19 +1420,17 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_
if ((dup_sack && in_sack) &&
(sacked&TCPCB_SACKED_ACKED))
reord = min(fack_count, reord);
- } else {
- /* If it was in a hole, we detected reordering. */
- if (fack_count < prior_fackets &&
- !(sacked&TCPCB_SACKED_ACKED))
- reord = min(fack_count, reord);
}
/* Nothing to do; acked frame is about to be dropped. */
+ fack_count += tcp_skb_pcount(skb);
continue;
}
- if (!in_sack)
+ if (!in_sack) {
+ fack_count += tcp_skb_pcount(skb);
continue;
+ }
if (!(sacked&TCPCB_SACKED_ACKED)) {
if (sacked & TCPCB_SACKED_RETRANS) {
@@ -1448,12 +1447,17 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_
tp->retransmit_skb_hint = NULL;
}
} else {
- /* New sack for not retransmitted frame,
- * which was in hole. It is reordering.
- */
- if (!(sacked & TCPCB_RETRANS) &&
- fack_count < prior_fackets)
- reord = min(fack_count, reord);
+ if (!(sacked & TCPCB_RETRANS)) {
+ /* New sack for not retransmitted frame,
+ * which was in hole. It is reordering.
+ */
+ if (fack_count < prior_fackets)
+ reord = min(fack_count, reord);
+
+ /* SACK enhanced F-RTO (RFC4138; Appendix B) */
+ if (!after(TCP_SKB_CB(skb)->end_seq, tp->frto_highmark))
+ flag |= FLAG_ONLY_ORIG_SACKED;
+ }
if (sacked & TCPCB_LOST) {
TCP_SKB_CB(skb)->sacked &= ~TCPCB_LOST;
@@ -1462,24 +1466,13 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_
/* clear lost hint */
tp->retransmit_skb_hint = NULL;
}
- /* SACK enhanced F-RTO detection.
- * Set flag if and only if non-rexmitted
- * segments below frto_highmark are
- * SACKed (RFC4138; Appendix B).
- * Clearing correct due to in-order walk
- */
- if (after(end_seq, tp->frto_highmark)) {
- flag &= ~FLAG_ONLY_ORIG_SACKED;
- } else {
- if (!(sacked & TCPCB_RETRANS))
- flag |= FLAG_ONLY_ORIG_SACKED;
- }
}
TCP_SKB_CB(skb)->sacked |= TCPCB_SACKED_ACKED;
flag |= FLAG_DATA_SACKED;
tp->sacked_out += tcp_skb_pcount(skb);
+ fack_count += tcp_skb_pcount(skb);
if (fack_count > tp->fackets_out)
tp->fackets_out = fack_count;
@@ -1490,6 +1483,8 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_
} else {
if (dup_sack && (sacked&TCPCB_RETRANS))
reord = min(fack_count, reord);
+
+ fack_count += tcp_skb_pcount(skb);
}
/* D-SACK. We can detect redundant retransmission
@@ -1504,6 +1499,12 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_
tp->retransmit_skb_hint = NULL;
}
}
+
+ /* SACK enhanced FRTO (RFC4138, Appendix B): Clearing correct
+ * due to in-order walk
+ */
+ if (after(end_seq, tp->frto_highmark))
+ flag &= ~FLAG_ONLY_ORIG_SACKED;
}
if (tp->retrans_out &&
@@ -1515,7 +1516,9 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_
if ((reord < tp->fackets_out) && icsk->icsk_ca_state != TCP_CA_Loss &&
(!tp->frto_highmark || after(tp->snd_una, tp->frto_highmark)))
- tcp_update_reordering(sk, ((tp->fackets_out + 1) - reord), 0);
+ tcp_update_reordering(sk, tp->fackets_out - reord, 0);
+
+out:
#if FASTRETRANS_DEBUG > 0
BUG_TRAP((int)tp->sacked_out >= 0);
@@ -1671,6 +1674,9 @@ void tcp_enter_frto(struct sock *sk)
}
tcp_verify_left_out(tp);
+ /* Too bad if TCP was application limited */
+ tp->snd_cwnd = min(tp->snd_cwnd, tcp_packets_in_flight(tp) + 1);
+
/* Earlier loss recovery underway (see RFC4138; Appendix B).
* The last condition is necessary at least in tp->frto_counter case.
*/
@@ -1703,6 +1709,8 @@ static void tcp_enter_frto_loss(struct sock *sk, int allowed_segments, int flag)
tcp_for_write_queue(skb, sk) {
if (skb == tcp_send_head(sk))
break;
+
+ TCP_SKB_CB(skb)->sacked &= ~TCPCB_LOST;
/*
* Count the retransmission made on RTO correctly (only when
* waiting for the first ACK and did not get it)...
@@ -1716,7 +1724,7 @@ static void tcp_enter_frto_loss(struct sock *sk, int allowed_segments, int flag)
} else {
if (TCP_SKB_CB(skb)->sacked & TCPCB_RETRANS)
tp->undo_marker = 0;
- TCP_SKB_CB(skb)->sacked &= ~(TCPCB_LOST|TCPCB_SACKED_RETRANS);
+ TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_RETRANS;
}
/* Don't lost mark skbs that were fwd transmitted after RTO */
@@ -2630,7 +2638,8 @@ static u32 tcp_tso_acked(struct sock *sk, struct sk_buff *skb)
* is before the ack sequence we can discard it as it's confirmed to have
* arrived at the other end.
*/
-static int tcp_clean_rtx_queue(struct sock *sk, s32 *seq_rtt_p)
+static int tcp_clean_rtx_queue(struct sock *sk, s32 *seq_rtt_p,
+ int prior_fackets)
{
struct tcp_sock *tp = tcp_sk(sk);
const struct inet_connection_sock *icsk = inet_csk(sk);
@@ -2639,6 +2648,8 @@ static int tcp_clean_rtx_queue(struct sock *sk, s32 *seq_rtt_p)
int fully_acked = 1;
int flag = 0;
int prior_packets = tp->packets_out;
+ u32 cnt = 0;
+ u32 reord = tp->packets_out;
s32 seq_rtt = -1;
ktime_t last_ackt = net_invalid_timestamp();
@@ -2679,10 +2690,14 @@ static int tcp_clean_rtx_queue(struct sock *sk, s32 *seq_rtt_p)
if ((flag & FLAG_DATA_ACKED) ||
(packets_acked > 1))
flag |= FLAG_NONHEAD_RETRANS_ACKED;
- } else if (seq_rtt < 0) {
- seq_rtt = now - scb->when;
- if (fully_acked)
- last_ackt = skb->tstamp;
+ } else {
+ if (seq_rtt < 0) {
+ seq_rtt = now - scb->when;
+ if (fully_acked)
+ last_ackt = skb->tstamp;
+ }
+ if (!(sacked & TCPCB_SACKED_ACKED))
+ reord = min(cnt, reord);
}
if (sacked & TCPCB_SACKED_ACKED)
@@ -2693,12 +2708,16 @@ static int tcp_clean_rtx_queue(struct sock *sk, s32 *seq_rtt_p)
if ((sacked & TCPCB_URG) && tp->urg_mode &&
!before(end_seq, tp->snd_up))
tp->urg_mode = 0;
- } else if (seq_rtt < 0) {
- seq_rtt = now - scb->when;
- if (fully_acked)
- last_ackt = skb->tstamp;
+ } else {
+ if (seq_rtt < 0) {
+ seq_rtt = now - scb->when;
+ if (fully_acked)
+ last_ackt = skb->tstamp;
+ }
+ reord = min(cnt, reord);
}
tp->packets_out -= packets_acked;
+ cnt += packets_acked;
/* Initial outgoing SYN's get put onto the write_queue
* just like anything else we transmit. It is not
@@ -2730,13 +2749,18 @@ static int tcp_clean_rtx_queue(struct sock *sk, s32 *seq_rtt_p)
tcp_ack_update_rtt(sk, flag, seq_rtt);
tcp_rearm_rto(sk);
+ if (tcp_is_reno(tp)) {
+ tcp_remove_reno_sacks(sk, pkts_acked);
+ } else {
+ /* Non-retransmitted hole got filled? That's reordering */
+ if (reord < prior_fackets)
+ tcp_update_reordering(sk, tp->fackets_out - reord, 0);
+ }
+
tp->fackets_out -= min(pkts_acked, tp->fackets_out);
/* hint's skb might be NULL but we don't need to care */
tp->fastpath_cnt_hint -= min_t(u32, pkts_acked,
tp->fastpath_cnt_hint);
- if (tcp_is_reno(tp))
- tcp_remove_reno_sacks(sk, pkts_acked);
-
if (ca_ops->pkts_acked) {
s32 rtt_us = -1;
@@ -3019,6 +3043,7 @@ static int tcp_ack(struct sock *sk, struct sk_buff *skb, int flag)
u32 ack_seq = TCP_SKB_CB(skb)->seq;
u32 ack = TCP_SKB_CB(skb)->ack_seq;
u32 prior_in_flight;
+ u32 prior_fackets;
s32 seq_rtt;
int prior_packets;
int frto_cwnd = 0;
@@ -3043,6 +3068,8 @@ static int tcp_ack(struct sock *sk, struct sk_buff *skb, int flag)
tp->bytes_acked += min(ack - prior_snd_una, tp->mss_cache);
}
+ prior_fackets = tp->fackets_out;
+
if (!(flag&FLAG_SLOWPATH) && after(ack, prior_snd_una)) {
/* Window is constant, pure forward advance.
* No more checks are required.
@@ -3084,13 +3111,13 @@ static int tcp_ack(struct sock *sk, struct sk_buff *skb, int flag)
prior_in_flight = tcp_packets_in_flight(tp);
/* See if we can take anything off of the retransmit queue. */
- flag |= tcp_clean_rtx_queue(sk, &seq_rtt);
+ flag |= tcp_clean_rtx_queue(sk, &seq_rtt, prior_fackets);
+ if (tp->frto_counter)
+ frto_cwnd = tcp_process_frto(sk, flag);
/* Guarantee sacktag reordering detection against wrap-arounds */
if (before(tp->frto_highmark, tp->snd_una))
tp->frto_highmark = 0;
- if (tp->frto_counter)
- frto_cwnd = tcp_process_frto(sk, flag);
if (tcp_ack_is_dubious(sk, flag)) {
/* Advance CWND, if state allows this. */
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index d438dfb0c8f..e566f3c6767 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -2049,8 +2049,9 @@ static void *established_get_first(struct seq_file *seq)
struct sock *sk;
struct hlist_node *node;
struct inet_timewait_sock *tw;
+ rwlock_t *lock = inet_ehash_lockp(&tcp_hashinfo, st->bucket);
- read_lock_bh(&tcp_hashinfo.ehash[st->bucket].lock);
+ read_lock_bh(lock);
sk_for_each(sk, node, &tcp_hashinfo.ehash[st->bucket].chain) {
if (sk->sk_family != st->family) {
continue;
@@ -2067,7 +2068,7 @@ static void *established_get_first(struct seq_file *seq)
rc = tw;
goto out;
}
- read_unlock_bh(&tcp_hashinfo.ehash[st->bucket].lock);
+ read_unlock_bh(lock);
st->state = TCP_SEQ_STATE_ESTABLISHED;
}
out:
@@ -2094,11 +2095,11 @@ get_tw:
cur = tw;
goto out;
}
- read_unlock_bh(&tcp_hashinfo.ehash[st->bucket].lock);
+ read_unlock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket));
st->state = TCP_SEQ_STATE_ESTABLISHED;
if (++st->bucket < tcp_hashinfo.ehash_size) {
- read_lock_bh(&tcp_hashinfo.ehash[st->bucket].lock);
+ read_lock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket));
sk = sk_head(&tcp_hashinfo.ehash[st->bucket].chain);
} else {
cur = NULL;
@@ -2206,7 +2207,7 @@ static void tcp_seq_stop(struct seq_file *seq, void *v)
case TCP_SEQ_STATE_TIME_WAIT:
case TCP_SEQ_STATE_ESTABLISHED:
if (v)
- read_unlock_bh(&tcp_hashinfo.ehash[st->bucket].lock);
+ read_unlock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket));
break;
}
}
@@ -2417,6 +2418,8 @@ void tcp4_proc_exit(void)
}
#endif /* CONFIG_PROC_FS */
+DEFINE_PROTO_INUSE(tcp)
+
struct proto tcp_prot = {
.name = "TCP",
.owner = THIS_MODULE,
@@ -2451,6 +2454,7 @@ struct proto tcp_prot = {
.compat_setsockopt = compat_tcp_setsockopt,
.compat_getsockopt = compat_tcp_getsockopt,
#endif
+ REF_PROTO_INUSE(tcp)
};
void __init tcp_v4_init(struct net_proto_family *ops)
diff --git a/net/ipv4/tunnel4.c b/net/ipv4/tunnel4.c
index a794a8ca8b4..978b3fd61e6 100644
--- a/net/ipv4/tunnel4.c
+++ b/net/ipv4/tunnel4.c
@@ -17,6 +17,11 @@ static struct xfrm_tunnel *tunnel4_handlers;
static struct xfrm_tunnel *tunnel64_handlers;
static DEFINE_MUTEX(tunnel4_mutex);
+static inline struct xfrm_tunnel **fam_handlers(unsigned short family)
+{
+ return (family == AF_INET) ? &tunnel4_handlers : &tunnel64_handlers;
+}
+
int xfrm4_tunnel_register(struct xfrm_tunnel *handler, unsigned short family)
{
struct xfrm_tunnel **pprev;
@@ -25,8 +30,7 @@ int xfrm4_tunnel_register(struct xfrm_tunnel *handler, unsigned short family)
mutex_lock(&tunnel4_mutex);
- for (pprev = (family == AF_INET) ? &tunnel4_handlers : &tunnel64_handlers;
- *pprev; pprev = &(*pprev)->next) {
+ for (pprev = fam_handlers(family); *pprev; pprev = &(*pprev)->next) {
if ((*pprev)->priority > priority)
break;
if ((*pprev)->priority == priority)
@@ -53,8 +57,7 @@ int xfrm4_tunnel_deregister(struct xfrm_tunnel *handler, unsigned short family)
mutex_lock(&tunnel4_mutex);
- for (pprev = (family == AF_INET) ? &tunnel4_handlers : &tunnel64_handlers;
- *pprev; pprev = &(*pprev)->next) {
+ for (pprev = fam_handlers(family); *pprev; pprev = &(*pprev)->next) {
if (*pprev == handler) {
*pprev = handler->next;
ret = 0;
@@ -118,6 +121,17 @@ static void tunnel4_err(struct sk_buff *skb, u32 info)
break;
}
+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+static void tunnel64_err(struct sk_buff *skb, u32 info)
+{
+ struct xfrm_tunnel *handler;
+
+ for (handler = tunnel64_handlers; handler; handler = handler->next)
+ if (!handler->err_handler(skb, info))
+ break;
+}
+#endif
+
static struct net_protocol tunnel4_protocol = {
.handler = tunnel4_rcv,
.err_handler = tunnel4_err,
@@ -127,7 +141,7 @@ static struct net_protocol tunnel4_protocol = {
#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
static struct net_protocol tunnel64_protocol = {
.handler = tunnel64_rcv,
- .err_handler = tunnel4_err,
+ .err_handler = tunnel64_err,
.no_policy = 1,
};
#endif
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 4bc25b46f33..03c400ca14c 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -1430,6 +1430,8 @@ unsigned int udp_poll(struct file *file, struct socket *sock, poll_table *wait)
}
+DEFINE_PROTO_INUSE(udp)
+
struct proto udp_prot = {
.name = "UDP",
.owner = THIS_MODULE,
@@ -1452,6 +1454,7 @@ struct proto udp_prot = {
.compat_setsockopt = compat_udp_setsockopt,
.compat_getsockopt = compat_udp_getsockopt,
#endif
+ REF_PROTO_INUSE(udp)
};
/* ------------------------------------------------------------------------ */
diff --git a/net/ipv4/udplite.c b/net/ipv4/udplite.c
index 94977205abb..f5baeb3e8b8 100644
--- a/net/ipv4/udplite.c
+++ b/net/ipv4/udplite.c
@@ -44,6 +44,8 @@ static struct net_protocol udplite_protocol = {
.no_policy = 1,
};
+DEFINE_PROTO_INUSE(udplite)
+
struct proto udplite_prot = {
.name = "UDP-Lite",
.owner = THIS_MODULE,
@@ -67,6 +69,7 @@ struct proto udplite_prot = {
.compat_setsockopt = compat_udp_setsockopt,
.compat_getsockopt = compat_udp_getsockopt,
#endif
+ REF_PROTO_INUSE(udplite)
};
static struct inet_protosw udplite4_protosw = {