From 93821778def10ec1e69aa3ac10adee975dad4ff3 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Mon, 15 Sep 2008 11:48:46 -0700 Subject: udp: Fix rcv socket locking The previous patch in response to the recursive locking on IPsec reception is broken as it tries to drop the BH socket lock while in user context. This patch fixes it by shrinking the section protected by the socket lock to sock_queue_rcv_skb only. The only reason we added the lock is for the accounting which happens in that function. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- net/ipv4/udp.c | 62 +++++++++++++++++++++++++++++++--------------------------- 1 file changed, 33 insertions(+), 29 deletions(-) (limited to 'net/ipv4/udp.c') diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 8e42fbbd576..57e26fa6618 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -951,6 +951,27 @@ int udp_disconnect(struct sock *sk, int flags) return 0; } +static int __udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) +{ + int is_udplite = IS_UDPLITE(sk); + int rc; + + if ((rc = sock_queue_rcv_skb(sk, skb)) < 0) { + /* Note that an ENOMEM error is charged twice */ + if (rc == -ENOMEM) + UDP_INC_STATS_BH(sock_net(sk), UDP_MIB_RCVBUFERRORS, + is_udplite); + goto drop; + } + + return 0; + +drop: + UDP_INC_STATS_BH(sock_net(sk), UDP_MIB_INERRORS, is_udplite); + kfree_skb(skb); + return -1; +} + /* returns: * -1: error * 0: success @@ -989,9 +1010,7 @@ int udp_queue_rcv_skb(struct sock * sk, struct sk_buff *skb) up->encap_rcv != NULL) { int ret; - bh_unlock_sock(sk); ret = (*up->encap_rcv)(sk, skb); - bh_lock_sock(sk); if (ret <= 0) { UDP_INC_STATS_BH(sock_net(sk), UDP_MIB_INDATAGRAMS, @@ -1044,17 +1063,16 @@ int udp_queue_rcv_skb(struct sock * sk, struct sk_buff *skb) goto drop; } - if ((rc = sock_queue_rcv_skb(sk,skb)) < 0) { - /* Note that an ENOMEM error is charged twice */ - if (rc == -ENOMEM) { - UDP_INC_STATS_BH(sock_net(sk), - UDP_MIB_RCVBUFERRORS, is_udplite); - atomic_inc(&sk->sk_drops); - } - goto drop; - } + rc = 0; - return 0; + bh_lock_sock(sk); + if (!sock_owned_by_user(sk)) + rc = __udp_queue_rcv_skb(sk, skb); + else + sk_add_backlog(sk, skb); + bh_unlock_sock(sk); + + return rc; drop: UDP_INC_STATS_BH(sock_net(sk), UDP_MIB_INERRORS, is_udplite); @@ -1092,15 +1110,7 @@ static int __udp4_lib_mcast_deliver(struct net *net, struct sk_buff *skb, skb1 = skb_clone(skb, GFP_ATOMIC); if (skb1) { - int ret = 0; - - bh_lock_sock(sk); - if (!sock_owned_by_user(sk)) - ret = udp_queue_rcv_skb(sk, skb1); - else - sk_add_backlog(sk, skb1); - bh_unlock_sock(sk); - + int ret = udp_queue_rcv_skb(sk, skb1); if (ret > 0) /* we should probably re-process instead * of dropping packets here. */ @@ -1195,13 +1205,7 @@ int __udp4_lib_rcv(struct sk_buff *skb, struct hlist_head udptable[], uh->dest, inet_iif(skb), udptable); if (sk != NULL) { - int ret = 0; - bh_lock_sock(sk); - if (!sock_owned_by_user(sk)) - ret = udp_queue_rcv_skb(sk, skb); - else - sk_add_backlog(sk, skb); - bh_unlock_sock(sk); + int ret = udp_queue_rcv_skb(sk, skb); sock_put(sk); /* a return value > 0 means to resubmit the input, but @@ -1494,7 +1498,7 @@ struct proto udp_prot = { .sendmsg = udp_sendmsg, .recvmsg = udp_recvmsg, .sendpage = udp_sendpage, - .backlog_rcv = udp_queue_rcv_skb, + .backlog_rcv = __udp_queue_rcv_skb, .hash = udp_lib_hash, .unhash = udp_lib_unhash, .get_port = udp_v4_get_port, -- cgit v1.2.3 From bcd41303f422015ab662c9276d108414aa75b796 Mon Sep 17 00:00:00 2001 From: KOVACS Krisztian Date: Wed, 1 Oct 2008 07:48:10 -0700 Subject: udp: Export UDP socket lookup function The iptables tproxy code has to be able to do UDP socket hash lookups, so we have to provide an exported lookup function for this purpose. Signed-off-by: KOVACS Krisztian Signed-off-by: David S. Miller --- net/ipv4/udp.c | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'net/ipv4/udp.c') diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 57e26fa6618..c83d0ef469c 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -302,6 +302,13 @@ static struct sock *__udp4_lib_lookup(struct net *net, __be32 saddr, return result; } +struct sock *udp4_lib_lookup(struct net *net, __be32 saddr, __be16 sport, + __be32 daddr, __be16 dport, int dif) +{ + return __udp4_lib_lookup(net, saddr, sport, daddr, dport, dif, udp_hash); +} +EXPORT_SYMBOL_GPL(udp4_lib_lookup); + static inline struct sock *udp_v4_mcast_next(struct sock *sk, __be16 loc_port, __be32 loc_addr, __be16 rmt_port, __be32 rmt_addr, -- cgit v1.2.3 From 607c4aaf03041c8bd81555a0218050c0f895088e Mon Sep 17 00:00:00 2001 From: KOVACS Krisztian Date: Tue, 7 Oct 2008 12:38:32 -0700 Subject: inet: Add udplib_lookup_skb() helpers To be able to use the cached socket reference in the skb during input processing we add a new set of lookup functions that receive the skb on their argument list. Signed-off-by: KOVACS Krisztian Acked-by: Arnaldo Carvalho de Melo Signed-off-by: David S. Miller --- net/ipv4/udp.c | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) (limited to 'net/ipv4/udp.c') diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index c83d0ef469c..c7a90b546b2 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -302,6 +302,17 @@ static struct sock *__udp4_lib_lookup(struct net *net, __be32 saddr, return result; } +static inline struct sock *__udp4_lib_lookup_skb(struct sk_buff *skb, + __be16 sport, __be16 dport, + struct hlist_head udptable[]) +{ + const struct iphdr *iph = ip_hdr(skb); + + return __udp4_lib_lookup(dev_net(skb->dst->dev), iph->saddr, sport, + iph->daddr, dport, inet_iif(skb), + udptable); +} + struct sock *udp4_lib_lookup(struct net *net, __be32 saddr, __be16 sport, __be32 daddr, __be16 dport, int dif) { @@ -1208,8 +1219,7 @@ int __udp4_lib_rcv(struct sk_buff *skb, struct hlist_head udptable[], return __udp4_lib_mcast_deliver(net, skb, uh, saddr, daddr, udptable); - sk = __udp4_lib_lookup(net, saddr, uh->source, daddr, - uh->dest, inet_iif(skb), udptable); + sk = __udp4_lib_lookup_skb(skb, uh->source, uh->dest, udptable); if (sk != NULL) { int ret = udp_queue_rcv_skb(sk, skb); -- cgit v1.2.3 From 23542618deb77cfed312842fe8c41ed19fb16470 Mon Sep 17 00:00:00 2001 From: KOVACS Krisztian Date: Tue, 7 Oct 2008 12:41:01 -0700 Subject: inet: Don't lookup the socket if there's a socket attached to the skb Use the socket cached in the skb if it's present. Signed-off-by: KOVACS Krisztian Acked-by: Arnaldo Carvalho de Melo Signed-off-by: David S. Miller --- net/ipv4/udp.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) (limited to 'net/ipv4/udp.c') diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index c7a90b546b2..822c9deac83 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -306,11 +306,15 @@ static inline struct sock *__udp4_lib_lookup_skb(struct sk_buff *skb, __be16 sport, __be16 dport, struct hlist_head udptable[]) { + struct sock *sk; const struct iphdr *iph = ip_hdr(skb); - return __udp4_lib_lookup(dev_net(skb->dst->dev), iph->saddr, sport, - iph->daddr, dport, inet_iif(skb), - udptable); + if (unlikely(sk = skb_steal_sock(skb))) + return sk; + else + return __udp4_lib_lookup(dev_net(skb->dst->dev), iph->saddr, sport, + iph->daddr, dport, inet_iif(skb), + udptable); } struct sock *udp4_lib_lookup(struct net *net, __be32 saddr, __be16 sport, -- cgit v1.2.3 From 0c7ed677fb7013c8028045d409a48ac42151187a Mon Sep 17 00:00:00 2001 From: "Denis V. Lunev" Date: Tue, 7 Oct 2008 14:49:36 -0700 Subject: netns: make udpv6 mib per/namespace Signed-off-by: Denis V. Lunev Signed-off-by: David S. Miller --- net/ipv4/udp.c | 3 --- 1 file changed, 3 deletions(-) (limited to 'net/ipv4/udp.c') diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 822c9deac83..85f8e8e10b1 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -108,9 +108,6 @@ * Snmp MIB for the UDP layer */ -DEFINE_SNMP_STAT(struct udp_mib, udp_stats_in6) __read_mostly; -EXPORT_SYMBOL(udp_stats_in6); - struct hlist_head udp_hash[UDP_HTABLE_SIZE]; DEFINE_RWLOCK(udp_hash_lock); -- cgit v1.2.3 From 9088c5609584684149f3fb5b065aa7f18dcb03ff Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 8 Oct 2008 11:44:17 -0700 Subject: udp: Improve port randomization Current UDP port allocation is suboptimal. We select the shortest chain to chose a port (out of 512) that will hash in this shortest chain. First, it can lead to give not so ramdom ports and ease give attackers more opportunities to break the system. Second, it can consume a lot of CPU to scan all table in order to find the shortest chain. Third, in some pathological cases we can fail to find a free port even if they are plenty of them. This patch zap the search for a short chain and only use one random seed. Problem of getting long chains should be addressed in another way, since we can obtain long chains with non random ports. Based on a report and patch from Vitaly Mayatskikh Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/udp.c | 56 ++++++++++++-------------------------------------------- 1 file changed, 12 insertions(+), 44 deletions(-) (limited to 'net/ipv4/udp.c') diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 85f8e8e10b1..67d8430b4a2 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -155,55 +155,23 @@ int udp_lib_get_port(struct sock *sk, unsigned short snum, write_lock_bh(&udp_hash_lock); if (!snum) { - int i, low, high, remaining; - unsigned rover, best, best_size_so_far; + int low, high, remaining; + unsigned rand; + unsigned short first; inet_get_local_port_range(&low, &high); remaining = (high - low) + 1; - best_size_so_far = UINT_MAX; - best = rover = net_random() % remaining + low; - - /* 1st pass: look for empty (or shortest) hash chain */ - for (i = 0; i < UDP_HTABLE_SIZE; i++) { - int size = 0; - - head = &udptable[udp_hashfn(net, rover)]; - if (hlist_empty(head)) - goto gotit; - - sk_for_each(sk2, node, head) { - if (++size >= best_size_so_far) - goto next; - } - best_size_so_far = size; - best = rover; - next: - /* fold back if end of range */ - if (++rover > high) - rover = low + ((rover - low) - & (UDP_HTABLE_SIZE - 1)); - - - } - - /* 2nd pass: find hole in shortest hash chain */ - rover = best; - for (i = 0; i < (1 << 16) / UDP_HTABLE_SIZE; i++) { - if (! __udp_lib_lport_inuse(net, rover, udptable)) - goto gotit; - rover += UDP_HTABLE_SIZE; - if (rover > high) - rover = low + ((rover - low) - & (UDP_HTABLE_SIZE - 1)); + rand = net_random(); + snum = first = rand % remaining + low; + rand |= 1; + while (__udp_lib_lport_inuse(net, snum, udptable)) { + do { + snum = snum + rand; + } while (snum < low || snum > high); + if (snum == first) + goto fail; } - - - /* All ports in use! */ - goto fail; - -gotit: - snum = rover; } else { head = &udptable[udp_hashfn(net, snum)]; -- cgit v1.2.3 From f24d43c07e208372aa3d3bff419afbf43ba87698 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 9 Oct 2008 14:51:27 -0700 Subject: udp: complete port availability checking While looking at UDP port randomization, I noticed it was litle bit pessimistic, not looking at type of sockets (IPV6/IPV4) and not looking at bound addresses if any. We should perform same tests than when binding to a specific port. This permits a cleanup of udp_lib_get_port() Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/udp.c | 43 +++++++++++++++++++------------------------ 1 file changed, 19 insertions(+), 24 deletions(-) (limited to 'net/ipv4/udp.c') diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 67d8430b4a2..eacf4cfef14 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -122,14 +122,23 @@ EXPORT_SYMBOL(sysctl_udp_wmem_min); atomic_t udp_memory_allocated; EXPORT_SYMBOL(udp_memory_allocated); -static inline int __udp_lib_lport_inuse(struct net *net, __u16 num, - const struct hlist_head udptable[]) +static int udp_lib_lport_inuse(struct net *net, __u16 num, + const struct hlist_head udptable[], + struct sock *sk, + int (*saddr_comp)(const struct sock *sk1, + const struct sock *sk2)) { - struct sock *sk; + struct sock *sk2; struct hlist_node *node; - sk_for_each(sk, node, &udptable[udp_hashfn(net, num)]) - if (net_eq(sock_net(sk), net) && sk->sk_hash == num) + sk_for_each(sk2, node, &udptable[udp_hashfn(net, num)]) + if (net_eq(sock_net(sk2), net) && + sk2 != sk && + sk2->sk_hash == num && + (!sk2->sk_reuse || !sk->sk_reuse) && + (!sk2->sk_bound_dev_if || !sk->sk_bound_dev_if + || sk2->sk_bound_dev_if == sk->sk_bound_dev_if) && + (*saddr_comp)(sk, sk2)) return 1; return 0; } @@ -146,9 +155,6 @@ int udp_lib_get_port(struct sock *sk, unsigned short snum, const struct sock *sk2 ) ) { struct hlist_head *udptable = sk->sk_prot->h.udp_hash; - struct hlist_node *node; - struct hlist_head *head; - struct sock *sk2; int error = 1; struct net *net = sock_net(sk); @@ -165,32 +171,21 @@ int udp_lib_get_port(struct sock *sk, unsigned short snum, rand = net_random(); snum = first = rand % remaining + low; rand |= 1; - while (__udp_lib_lport_inuse(net, snum, udptable)) { + while (udp_lib_lport_inuse(net, snum, udptable, sk, + saddr_comp)) { do { snum = snum + rand; } while (snum < low || snum > high); if (snum == first) goto fail; } - } else { - head = &udptable[udp_hashfn(net, snum)]; - - sk_for_each(sk2, node, head) - if (sk2->sk_hash == snum && - sk2 != sk && - net_eq(sock_net(sk2), net) && - (!sk2->sk_reuse || !sk->sk_reuse) && - (!sk2->sk_bound_dev_if || !sk->sk_bound_dev_if - || sk2->sk_bound_dev_if == sk->sk_bound_dev_if) && - (*saddr_comp)(sk, sk2) ) - goto fail; - } + } else if (udp_lib_lport_inuse(net, snum, udptable, sk, saddr_comp)) + goto fail; inet_sk(sk)->num = snum; sk->sk_hash = snum; if (sk_unhashed(sk)) { - head = &udptable[udp_hashfn(net, snum)]; - sk_add_node(sk, head); + sk_add_node(sk, &udptable[udp_hashfn(net, snum)]); sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1); } error = 0; -- cgit v1.2.3