From def8b4faff5ca349beafbbfeb2c51f3602a6ef3a Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Tue, 28 Oct 2008 13:24:06 -0700 Subject: net: reduce structures when XFRM=n ifdef out * struct sk_buff::sp (pointer) * struct dst_entry::xfrm (pointer) * struct sock::sk_policy (2 pointers) Signed-off-by: Alexey Dobriyan Signed-off-by: David S. Miller --- net/ipv6/icmp.c | 3 ++- net/ipv6/ip6_output.c | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) (limited to 'net/ipv6') diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c index 9b7d19ae5ce..508a713ac04 100644 --- a/net/ipv6/icmp.c +++ b/net/ipv6/icmp.c @@ -646,9 +646,10 @@ static int icmpv6_rcv(struct sk_buff *skb) int type; if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) { + struct sec_path *sp = skb_sec_path(skb); int nh; - if (!(skb->sp && skb->sp->xvec[skb->sp->len - 1]->props.flags & + if (!(sp && sp->xvec[sp->len - 1]->props.flags & XFRM_STATE_ICMP)) goto drop_no_count; diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index c77db0b95e2..7d92fd97cfb 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -490,7 +490,7 @@ int ip6_forward(struct sk_buff *skb) We don't send redirects to frames decapsulated from IPsec. */ if (skb->dev == dst->dev && dst->neighbour && opt->srcrt == 0 && - !skb->sp) { + !skb_sec_path(skb)) { struct in6_addr *target = NULL; struct rt6_info *rt; struct neighbour *n = dst->neighbour; -- cgit v1.2.3 From b071195deba14b37ce896c26f20349b46e5f9fd2 Mon Sep 17 00:00:00 2001 From: Harvey Harrison Date: Tue, 28 Oct 2008 16:05:40 -0700 Subject: net: replace all current users of NIP6_SEQFMT with %#p6 The define in kernel.h can be done away with at a later time. Signed-off-by: Harvey Harrison Signed-off-by: David S. Miller --- net/ipv6/addrconf.c | 5 ++--- net/ipv6/anycast.c | 6 ++---- net/ipv6/ip6_flowlabel.c | 4 ++-- net/ipv6/mcast.c | 10 +++++----- net/ipv6/route.c | 9 +++------ 5 files changed, 14 insertions(+), 20 deletions(-) (limited to 'net/ipv6') diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index eea9542728c..113c4d93153 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -2986,9 +2986,8 @@ static void if6_seq_stop(struct seq_file *seq, void *v) static int if6_seq_show(struct seq_file *seq, void *v) { struct inet6_ifaddr *ifp = (struct inet6_ifaddr *)v; - seq_printf(seq, - NIP6_SEQFMT " %02x %02x %02x %02x %8s\n", - NIP6(ifp->addr), + seq_printf(seq, "%#p6 %02x %02x %02x %02x %8s\n", + &ifp->addr, ifp->idev->dev->ifindex, ifp->prefix_len, ifp->scope, diff --git a/net/ipv6/anycast.c b/net/ipv6/anycast.c index 8336cd81cb4..ae494e30e55 100644 --- a/net/ipv6/anycast.c +++ b/net/ipv6/anycast.c @@ -512,11 +512,9 @@ static int ac6_seq_show(struct seq_file *seq, void *v) struct ifacaddr6 *im = (struct ifacaddr6 *)v; struct ac6_iter_state *state = ac6_seq_private(seq); - seq_printf(seq, - "%-4d %-15s " NIP6_SEQFMT " %5d\n", + seq_printf(seq, "%-4d %-15s %#p6 %5d\n", state->dev->ifindex, state->dev->name, - NIP6(im->aca_addr), - im->aca_users); + &im->aca_addr, im->aca_users); return 0; } diff --git a/net/ipv6/ip6_flowlabel.c b/net/ipv6/ip6_flowlabel.c index 37a4e777e34..d5710235350 100644 --- a/net/ipv6/ip6_flowlabel.c +++ b/net/ipv6/ip6_flowlabel.c @@ -696,14 +696,14 @@ static int ip6fl_seq_show(struct seq_file *seq, void *v) else { struct ip6_flowlabel *fl = v; seq_printf(seq, - "%05X %-1d %-6d %-6d %-6ld %-8ld " NIP6_SEQFMT " %-4d\n", + "%05X %-1d %-6d %-6d %-6ld %-8ld %#p6 %-4d\n", (unsigned)ntohl(fl->label), fl->share, (unsigned)fl->owner, atomic_read(&fl->users), fl->linger/HZ, (long)(fl->expires - jiffies)/HZ, - NIP6(fl->dst), + &fl->dst, fl->opt ? fl->opt->opt_nflen : 0); } return 0; diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c index d7b3c6d398a..4416cdc3e62 100644 --- a/net/ipv6/mcast.c +++ b/net/ipv6/mcast.c @@ -2430,9 +2430,9 @@ static int igmp6_mc_seq_show(struct seq_file *seq, void *v) struct igmp6_mc_iter_state *state = igmp6_mc_seq_private(seq); seq_printf(seq, - "%-4d %-15s " NIP6_SEQFMT " %5d %08X %ld\n", + "%-4d %-15s %#p6 %5d %08X %ld\n", state->dev->ifindex, state->dev->name, - NIP6(im->mca_addr), + &im->mca_addr, im->mca_users, im->mca_flags, (im->mca_flags&MAF_TIMER_RUNNING) ? jiffies_to_clock_t(im->mca_timer.expires-jiffies) : 0); @@ -2591,10 +2591,10 @@ static int igmp6_mcf_seq_show(struct seq_file *seq, void *v) "Source Address", "INC", "EXC"); } else { seq_printf(seq, - "%3d %6.6s " NIP6_SEQFMT " " NIP6_SEQFMT " %6lu %6lu\n", + "%3d %6.6s %#p6 %#p6 %6lu %6lu\n", state->dev->ifindex, state->dev->name, - NIP6(state->im->mca_addr), - NIP6(psf->sf_addr), + &state->im->mca_addr, + &psf->sf_addr, psf->sf_count[MCAST_INCLUDE], psf->sf_count[MCAST_EXCLUDE]); } diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 89dc6992434..720bfedd318 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -2408,19 +2408,16 @@ static int rt6_info_route(struct rt6_info *rt, void *p_arg) { struct seq_file *m = p_arg; - seq_printf(m, NIP6_SEQFMT " %02x ", NIP6(rt->rt6i_dst.addr), - rt->rt6i_dst.plen); + seq_printf(m, "%#p6 %02x ", &rt->rt6i_dst.addr, rt->rt6i_dst.plen); #ifdef CONFIG_IPV6_SUBTREES - seq_printf(m, NIP6_SEQFMT " %02x ", NIP6(rt->rt6i_src.addr), - rt->rt6i_src.plen); + seq_printf(m, "%#p6 %02x ", &rt->rt6i_src.addr, rt->rt6i_src.plen); #else seq_puts(m, "00000000000000000000000000000000 00 "); #endif if (rt->rt6i_nexthop) { - seq_printf(m, NIP6_SEQFMT, - NIP6(*((struct in6_addr *)rt->rt6i_nexthop->primary_key))); + seq_printf(m, "%#p6", rt->rt6i_nexthop->primary_key); } else { seq_puts(m, "00000000000000000000000000000000"); } -- cgit v1.2.3 From 0c6ce78abf6e228d44c3840edb8a4ae0c1299825 Mon Sep 17 00:00:00 2001 From: Harvey Harrison Date: Tue, 28 Oct 2008 16:09:23 -0700 Subject: net: replace uses of NIP6_FMT with %p6 Signed-off-by: Harvey Harrison Signed-off-by: David S. Miller --- net/ipv6/addrlabel.c | 34 +++++++++----------------- net/ipv6/ah6.c | 4 +-- net/ipv6/esp6.c | 4 +-- net/ipv6/exthdrs.c | 2 +- net/ipv6/icmp.c | 4 +-- net/ipv6/ip6mr.c | 5 ++-- net/ipv6/ipcomp6.c | 4 +-- net/ipv6/ndisc.c | 7 ++---- net/ipv6/netfilter/ip6t_LOG.c | 2 +- net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c | 5 ++-- net/ipv6/tcp_ipv6.c | 8 +++--- 11 files changed, 30 insertions(+), 49 deletions(-) (limited to 'net/ipv6') diff --git a/net/ipv6/addrlabel.c b/net/ipv6/addrlabel.c index 08909039d87..d28036659d2 100644 --- a/net/ipv6/addrlabel.c +++ b/net/ipv6/addrlabel.c @@ -186,10 +186,8 @@ u32 ipv6_addr_label(struct net *net, label = p ? p->label : IPV6_ADDR_LABEL_DEFAULT; rcu_read_unlock(); - ADDRLABEL(KERN_DEBUG "%s(addr=" NIP6_FMT ", type=%d, ifindex=%d) => %08x\n", - __func__, - NIP6(*addr), type, ifindex, - label); + ADDRLABEL(KERN_DEBUG "%s(addr=%p6, type=%d, ifindex=%d) => %08x\n", + __func__, addr, type, ifindex, label); return label; } @@ -203,11 +201,8 @@ static struct ip6addrlbl_entry *ip6addrlbl_alloc(struct net *net, struct ip6addrlbl_entry *newp; int addrtype; - ADDRLABEL(KERN_DEBUG "%s(prefix=" NIP6_FMT ", prefixlen=%d, ifindex=%d, label=%u)\n", - __func__, - NIP6(*prefix), prefixlen, - ifindex, - (unsigned int)label); + ADDRLABEL(KERN_DEBUG "%s(prefix=%p6, prefixlen=%d, ifindex=%d, label=%u)\n", + __func__, prefix, prefixlen, ifindex, (unsigned int)label); addrtype = ipv6_addr_type(prefix) & (IPV6_ADDR_MAPPED | IPV6_ADDR_COMPATv4 | IPV6_ADDR_LOOPBACK); @@ -294,12 +289,9 @@ static int ip6addrlbl_add(struct net *net, struct ip6addrlbl_entry *newp; int ret = 0; - ADDRLABEL(KERN_DEBUG "%s(prefix=" NIP6_FMT ", prefixlen=%d, ifindex=%d, label=%u, replace=%d)\n", - __func__, - NIP6(*prefix), prefixlen, - ifindex, - (unsigned int)label, - replace); + ADDRLABEL(KERN_DEBUG "%s(prefix=%p6, prefixlen=%d, ifindex=%d, label=%u, replace=%d)\n", + __func__, prefix, prefixlen, ifindex, (unsigned int)label, + replace); newp = ip6addrlbl_alloc(net, prefix, prefixlen, ifindex, label); if (IS_ERR(newp)) @@ -321,10 +313,8 @@ static int __ip6addrlbl_del(struct net *net, struct hlist_node *pos, *n; int ret = -ESRCH; - ADDRLABEL(KERN_DEBUG "%s(prefix=" NIP6_FMT ", prefixlen=%d, ifindex=%d)\n", - __func__, - NIP6(*prefix), prefixlen, - ifindex); + ADDRLABEL(KERN_DEBUG "%s(prefix=%p6, prefixlen=%d, ifindex=%d)\n", + __func__, prefix, prefixlen, ifindex); hlist_for_each_entry_safe(p, pos, n, &ip6addrlbl_table.head, list) { if (p->prefixlen == prefixlen && @@ -347,10 +337,8 @@ static int ip6addrlbl_del(struct net *net, struct in6_addr prefix_buf; int ret; - ADDRLABEL(KERN_DEBUG "%s(prefix=" NIP6_FMT ", prefixlen=%d, ifindex=%d)\n", - __func__, - NIP6(*prefix), prefixlen, - ifindex); + ADDRLABEL(KERN_DEBUG "%s(prefix=%p6, prefixlen=%d, ifindex=%d)\n", + __func__, prefix, prefixlen, ifindex); ipv6_addr_prefix(&prefix_buf, prefix, prefixlen); spin_lock(&ip6addrlbl_table.lock); diff --git a/net/ipv6/ah6.c b/net/ipv6/ah6.c index 2ff0c8233e4..9bc43f2527c 100644 --- a/net/ipv6/ah6.c +++ b/net/ipv6/ah6.c @@ -419,8 +419,8 @@ static void ah6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, if (!x) return; - NETDEBUG(KERN_DEBUG "pmtu discovery on SA AH/%08x/" NIP6_FMT "\n", - ntohl(ah->spi), NIP6(iph->daddr)); + NETDEBUG(KERN_DEBUG "pmtu discovery on SA AH/%08x/%p6\n", + ntohl(ah->spi), &iph->daddr); xfrm_state_put(x); } diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c index b181b08fb76..ec4be188c34 100644 --- a/net/ipv6/esp6.c +++ b/net/ipv6/esp6.c @@ -367,8 +367,8 @@ static void esp6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, x = xfrm_state_lookup((xfrm_address_t *)&iph->daddr, esph->spi, IPPROTO_ESP, AF_INET6); if (!x) return; - printk(KERN_DEBUG "pmtu discovery on SA ESP/%08x/" NIP6_FMT "\n", - ntohl(esph->spi), NIP6(iph->daddr)); + printk(KERN_DEBUG "pmtu discovery on SA ESP/%08x/%p6\n", + ntohl(esph->spi), &iph->daddr); xfrm_state_put(x); } diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c index 6bfffec2371..a8905146835 100644 --- a/net/ipv6/exthdrs.c +++ b/net/ipv6/exthdrs.c @@ -219,7 +219,7 @@ static int ipv6_dest_hao(struct sk_buff *skb, int optoff) if (!(ipv6_addr_type(&hao->addr) & IPV6_ADDR_UNICAST)) { LIMIT_NETDEBUG( - KERN_DEBUG "hao is not an unicast addr: " NIP6_FMT "\n", NIP6(hao->addr)); + KERN_DEBUG "hao is not an unicast addr: %p6\n", &hao->addr); goto discard; } diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c index 508a713ac04..b3fa38e40dc 100644 --- a/net/ipv6/icmp.c +++ b/net/ipv6/icmp.c @@ -681,8 +681,8 @@ static int icmpv6_rcv(struct sk_buff *skb) skb->csum = ~csum_unfold(csum_ipv6_magic(saddr, daddr, skb->len, IPPROTO_ICMPV6, 0)); if (__skb_checksum_complete(skb)) { - LIMIT_NETDEBUG(KERN_DEBUG "ICMPv6 checksum failed [" NIP6_FMT " > " NIP6_FMT "]\n", - NIP6(*saddr), NIP6(*daddr)); + LIMIT_NETDEBUG(KERN_DEBUG "ICMPv6 checksum failed [%p6 > %p6]\n", + saddr, daddr); goto discard_it; } } diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c index 182f8a177e7..798e6a29dd8 100644 --- a/net/ipv6/ip6mr.c +++ b/net/ipv6/ip6mr.c @@ -297,9 +297,8 @@ static int ipmr_mfc_seq_show(struct seq_file *seq, void *v) const struct mfc6_cache *mfc = v; const struct ipmr_mfc_iter *it = seq->private; - seq_printf(seq, - NIP6_FMT " " NIP6_FMT " %-3d %8ld %8ld %8ld", - NIP6(mfc->mf6c_mcastgrp), NIP6(mfc->mf6c_origin), + seq_printf(seq, "%p6 %p6 %-3d %8ld %8ld %8ld", + &mfc->mf6c_mcastgrp, &mfc->mf6c_origin, mfc->mf6c_parent, mfc->mfc_un.res.pkt, mfc->mfc_un.res.bytes, diff --git a/net/ipv6/ipcomp6.c b/net/ipv6/ipcomp6.c index 4545e430686..9566d8f7314 100644 --- a/net/ipv6/ipcomp6.c +++ b/net/ipv6/ipcomp6.c @@ -67,8 +67,8 @@ static void ipcomp6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, if (!x) return; - printk(KERN_DEBUG "pmtu discovery on SA IPCOMP/%08x/" NIP6_FMT "\n", - spi, NIP6(iph->daddr)); + printk(KERN_DEBUG "pmtu discovery on SA IPCOMP/%08x/%p6\n", + spi, &iph->daddr); xfrm_state_put(x); } diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index 172438320ee..191bb0722a7 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -647,11 +647,8 @@ static void ndisc_solicit(struct neighbour *neigh, struct sk_buff *skb) if ((probes -= neigh->parms->ucast_probes) < 0) { if (!(neigh->nud_state & NUD_VALID)) { - ND_PRINTK1(KERN_DEBUG - "%s(): trying to ucast probe in NUD_INVALID: " - NIP6_FMT "\n", - __func__, - NIP6(*target)); + ND_PRINTK1(KERN_DEBUG "%s(): trying to ucast probe in NUD_INVALID: %p6\n", + __func__, target); } ndisc_send_ns(dev, neigh, target, target, saddr); } else if ((probes -= neigh->parms->app_probes) < 0) { diff --git a/net/ipv6/netfilter/ip6t_LOG.c b/net/ipv6/netfilter/ip6t_LOG.c index caa441d0956..a61ce301000 100644 --- a/net/ipv6/netfilter/ip6t_LOG.c +++ b/net/ipv6/netfilter/ip6t_LOG.c @@ -61,7 +61,7 @@ static void dump_packet(const struct nf_loginfo *info, } /* Max length: 88 "SRC=0000.0000.0000.0000.0000.0000.0000.0000 DST=0000.0000.0000.0000.0000.0000.0000.0000 " */ - printk("SRC=" NIP6_FMT " DST=" NIP6_FMT " ", NIP6(ih->saddr), NIP6(ih->daddr)); + printk("SRC=%p6 DST=%p6 ", &ih->saddr, &ih->daddr); /* Max length: 44 "LEN=65535 TC=255 HOPLIMIT=255 FLOWLBL=FFFFF " */ printk("LEN=%Zu TC=%u HOPLIMIT=%u FLOWLBL=%u ", diff --git a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c index e91db16611d..b165a273c6c 100644 --- a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c +++ b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c @@ -56,9 +56,8 @@ static bool ipv6_invert_tuple(struct nf_conntrack_tuple *tuple, static int ipv6_print_tuple(struct seq_file *s, const struct nf_conntrack_tuple *tuple) { - return seq_printf(s, "src=" NIP6_FMT " dst=" NIP6_FMT " ", - NIP6(*((struct in6_addr *)tuple->src.u3.ip6)), - NIP6(*((struct in6_addr *)tuple->dst.u3.ip6))); + return seq_printf(s, "src=%p6 dst=%p6 ", + tuple->src.u3.ip6, tuple->dst.u3.ip6); } /* diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index b6b356b7912..483550cfdf3 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -872,12 +872,10 @@ static int tcp_v6_inbound_md5_hash (struct sock *sk, struct sk_buff *skb) if (genhash || memcmp(hash_location, newhash, 16) != 0) { if (net_ratelimit()) { - printk(KERN_INFO "MD5 Hash %s for " - "(" NIP6_FMT ", %u)->" - "(" NIP6_FMT ", %u)\n", + printk(KERN_INFO "MD5 Hash %s for (%p6, %u)->(%p6, %u)\n", genhash ? "failed" : "mismatch", - NIP6(ip6h->saddr), ntohs(th->source), - NIP6(ip6h->daddr), ntohs(th->dest)); + &ip6h->saddr, ntohs(th->source), + &ip6h->daddr, ntohs(th->dest)); } return 1; } -- cgit v1.2.3 From 645ca708f936b2fbeb79e52d7823e3eb2c0905f8 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 29 Oct 2008 01:41:45 -0700 Subject: udp: introduce struct udp_table and multiple spinlocks UDP sockets are hashed in a 128 slots hash table. This hash table is protected by *one* rwlock. This rwlock is readlocked each time an incoming UDP message is handled. This rwlock is writelocked each time a socket must be inserted in hash table (bind time), or deleted from this table (close time) This is not scalable on SMP machines : 1) Even in read mode, lock() and unlock() are atomic operations and must dirty a contended cache line, shared by all cpus. 2) A writer might be starved if many readers are 'in flight'. This can happen on a machine with some NIC receiving many UDP messages. User process can be delayed a long time at socket creation/dismantle time. This patch prepares RCU migration, by introducing 'struct udp_table and struct udp_hslot', and using one spinlock per chain, to reduce contention on central rwlock. Introducing one spinlock per chain reduces latencies, for port randomization on heavily loaded UDP servers. This also speedup bindings to specific ports. udp_lib_unhash() was uninlined, becoming to big. Some cleanups were done to ease review of following patch (RCUification of UDP Unicast lookups) Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv6/udp.c | 112 +++++++++++++++++++++++++++++----------------------- net/ipv6/udp_impl.h | 4 +- net/ipv6/udplite.c | 8 ++-- 3 files changed, 68 insertions(+), 56 deletions(-) (limited to 'net/ipv6') diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index e51da8c092f..ccee7244ca0 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -54,62 +54,73 @@ int udp_v6_get_port(struct sock *sk, unsigned short snum) return udp_lib_get_port(sk, snum, ipv6_rcv_saddr_equal); } +static inline int compute_score(struct sock *sk, struct net *net, + unsigned short hnum, + struct in6_addr *saddr, __be16 sport, + struct in6_addr *daddr, __be16 dport, + int dif) +{ + int score = -1; + + if (net_eq(sock_net(sk), net) && sk->sk_hash == hnum && + sk->sk_family == PF_INET6) { + struct ipv6_pinfo *np = inet6_sk(sk); + struct inet_sock *inet = inet_sk(sk); + + score = 0; + if (inet->dport) { + if (inet->dport != sport) + return -1; + score++; + } + if (!ipv6_addr_any(&np->rcv_saddr)) { + if (!ipv6_addr_equal(&np->rcv_saddr, daddr)) + return -1; + score++; + } + if (!ipv6_addr_any(&np->daddr)) { + if (!ipv6_addr_equal(&np->daddr, saddr)) + return -1; + score++; + } + if (sk->sk_bound_dev_if) { + if (sk->sk_bound_dev_if != dif) + return -1; + score++; + } + } + return score; +} + static struct sock *__udp6_lib_lookup(struct net *net, struct in6_addr *saddr, __be16 sport, struct in6_addr *daddr, __be16 dport, - int dif, struct hlist_head udptable[]) + int dif, struct udp_table *udptable) { struct sock *sk, *result = NULL; struct hlist_node *node; unsigned short hnum = ntohs(dport); - int badness = -1; - - read_lock(&udp_hash_lock); - sk_for_each(sk, node, &udptable[udp_hashfn(net, hnum)]) { - struct inet_sock *inet = inet_sk(sk); - - if (net_eq(sock_net(sk), net) && sk->sk_hash == hnum && - sk->sk_family == PF_INET6) { - struct ipv6_pinfo *np = inet6_sk(sk); - int score = 0; - if (inet->dport) { - if (inet->dport != sport) - continue; - score++; - } - if (!ipv6_addr_any(&np->rcv_saddr)) { - if (!ipv6_addr_equal(&np->rcv_saddr, daddr)) - continue; - score++; - } - if (!ipv6_addr_any(&np->daddr)) { - if (!ipv6_addr_equal(&np->daddr, saddr)) - continue; - score++; - } - if (sk->sk_bound_dev_if) { - if (sk->sk_bound_dev_if != dif) - continue; - score++; - } - if (score == 4) { - result = sk; - break; - } else if (score > badness) { - result = sk; - badness = score; - } + unsigned int hash = udp_hashfn(net, hnum); + struct udp_hslot *hslot = &udptable->hash[hash]; + int score, badness = -1; + + spin_lock(&hslot->lock); + sk_for_each(sk, node, &hslot->head) { + score = compute_score(sk, net, hnum, saddr, sport, daddr, dport, dif); + if (score > badness) { + result = sk; + badness = score; } } if (result) sock_hold(result); - read_unlock(&udp_hash_lock); + spin_unlock(&hslot->lock); return result; } static struct sock *__udp6_lib_lookup_skb(struct sk_buff *skb, __be16 sport, __be16 dport, - struct hlist_head udptable[]) + struct udp_table *udptable) { struct sock *sk; struct ipv6hdr *iph = ipv6_hdr(skb); @@ -239,7 +250,7 @@ csum_copy_err: void __udp6_lib_err(struct sk_buff *skb, struct inet6_skb_parm *opt, int type, int code, int offset, __be32 info, - struct hlist_head udptable[] ) + struct udp_table *udptable) { struct ipv6_pinfo *np; struct ipv6hdr *hdr = (struct ipv6hdr*)skb->data; @@ -275,7 +286,7 @@ static __inline__ void udpv6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, int type, int code, int offset, __be32 info ) { - __udp6_lib_err(skb, opt, type, code, offset, info, udp_hash); + __udp6_lib_err(skb, opt, type, code, offset, info, &udp_table); } int udpv6_queue_rcv_skb(struct sock * sk, struct sk_buff *skb) @@ -374,14 +385,15 @@ static struct sock *udp_v6_mcast_next(struct sock *sk, */ static int __udp6_lib_mcast_deliver(struct net *net, struct sk_buff *skb, struct in6_addr *saddr, struct in6_addr *daddr, - struct hlist_head udptable[]) + struct udp_table *udptable) { struct sock *sk, *sk2; const struct udphdr *uh = udp_hdr(skb); + struct udp_hslot *hslot = &udptable->hash[udp_hashfn(net, ntohs(uh->dest))]; int dif; - read_lock(&udp_hash_lock); - sk = sk_head(&udptable[udp_hashfn(net, ntohs(uh->dest))]); + spin_lock(&hslot->lock); + sk = sk_head(&hslot->head); dif = inet6_iif(skb); sk = udp_v6_mcast_next(sk, uh->dest, daddr, uh->source, saddr, dif); if (!sk) { @@ -409,7 +421,7 @@ static int __udp6_lib_mcast_deliver(struct net *net, struct sk_buff *skb, sk_add_backlog(sk, skb); bh_unlock_sock(sk); out: - read_unlock(&udp_hash_lock); + spin_unlock(&hslot->lock); return 0; } @@ -447,7 +459,7 @@ static inline int udp6_csum_init(struct sk_buff *skb, struct udphdr *uh, return 0; } -int __udp6_lib_rcv(struct sk_buff *skb, struct hlist_head udptable[], +int __udp6_lib_rcv(struct sk_buff *skb, struct udp_table *udptable, int proto) { struct sock *sk; @@ -544,7 +556,7 @@ discard: static __inline__ int udpv6_rcv(struct sk_buff *skb) { - return __udp6_lib_rcv(skb, udp_hash, IPPROTO_UDP); + return __udp6_lib_rcv(skb, &udp_table, IPPROTO_UDP); } /* @@ -1008,7 +1020,7 @@ int udp6_seq_show(struct seq_file *seq, void *v) static struct udp_seq_afinfo udp6_seq_afinfo = { .name = "udp6", .family = AF_INET6, - .hashtable = udp_hash, + .udp_table = &udp_table, .seq_fops = { .owner = THIS_MODULE, }, @@ -1050,7 +1062,7 @@ struct proto udpv6_prot = { .sysctl_wmem = &sysctl_udp_wmem_min, .sysctl_rmem = &sysctl_udp_rmem_min, .obj_size = sizeof(struct udp6_sock), - .h.udp_hash = udp_hash, + .h.udp_table = &udp_table, #ifdef CONFIG_COMPAT .compat_setsockopt = compat_udpv6_setsockopt, .compat_getsockopt = compat_udpv6_getsockopt, diff --git a/net/ipv6/udp_impl.h b/net/ipv6/udp_impl.h index 92dd7da766d..23779208c33 100644 --- a/net/ipv6/udp_impl.h +++ b/net/ipv6/udp_impl.h @@ -7,9 +7,9 @@ #include #include -extern int __udp6_lib_rcv(struct sk_buff *, struct hlist_head [], int ); +extern int __udp6_lib_rcv(struct sk_buff *, struct udp_table *, int ); extern void __udp6_lib_err(struct sk_buff *, struct inet6_skb_parm *, - int , int , int , __be32 , struct hlist_head []); + int , int , int , __be32 , struct udp_table *); extern int udp_v6_get_port(struct sock *sk, unsigned short snum); diff --git a/net/ipv6/udplite.c b/net/ipv6/udplite.c index 3cd1a1ac3d6..f1e892a99e0 100644 --- a/net/ipv6/udplite.c +++ b/net/ipv6/udplite.c @@ -15,14 +15,14 @@ static int udplitev6_rcv(struct sk_buff *skb) { - return __udp6_lib_rcv(skb, udplite_hash, IPPROTO_UDPLITE); + return __udp6_lib_rcv(skb, &udplite_table, IPPROTO_UDPLITE); } static void udplitev6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, int type, int code, int offset, __be32 info) { - __udp6_lib_err(skb, opt, type, code, offset, info, udplite_hash); + __udp6_lib_err(skb, opt, type, code, offset, info, &udplite_table); } static struct inet6_protocol udplitev6_protocol = { @@ -49,7 +49,7 @@ struct proto udplitev6_prot = { .unhash = udp_lib_unhash, .get_port = udp_v6_get_port, .obj_size = sizeof(struct udp6_sock), - .h.udp_hash = udplite_hash, + .h.udp_table = &udplite_table, #ifdef CONFIG_COMPAT .compat_setsockopt = compat_udpv6_setsockopt, .compat_getsockopt = compat_udpv6_getsockopt, @@ -95,7 +95,7 @@ void udplitev6_exit(void) static struct udp_seq_afinfo udplite6_seq_afinfo = { .name = "udplite6", .family = AF_INET6, - .hashtable = udplite_hash, + .udp_table = &udplite_table, .seq_fops = { .owner = THIS_MODULE, }, -- cgit v1.2.3 From 271b72c7fa82c2c7a795bc16896149933110672d Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 29 Oct 2008 02:11:14 -0700 Subject: udp: RCU handling for Unicast packets. Goals are : 1) Optimizing handling of incoming Unicast UDP frames, so that no memory writes should happen in the fast path. Note: Multicasts and broadcasts still will need to take a lock, because doing a full lockless lookup in this case is difficult. 2) No expensive operations in the socket bind/unhash phases : - No expensive synchronize_rcu() calls. - No added rcu_head in socket structure, increasing memory needs, but more important, forcing us to use call_rcu() calls, that have the bad property of making sockets structure cold. (rcu grace period between socket freeing and its potential reuse make this socket being cold in CPU cache). David did a previous patch using call_rcu() and noticed a 20% impact on TCP connection rates. Quoting Cristopher Lameter : "Right. That results in cacheline cooldown. You'd want to recycle the object as they are cache hot on a per cpu basis. That is screwed up by the delayed regular rcu processing. We have seen multiple regressions due to cacheline cooldown. The only choice in cacheline hot sensitive areas is to deal with the complexity that comes with SLAB_DESTROY_BY_RCU or give up on RCU." - Because udp sockets are allocated from dedicated kmem_cache, use of SLAB_DESTROY_BY_RCU can help here. Theory of operation : --------------------- As the lookup is lockfree (using rcu_read_lock()/rcu_read_unlock()), special attention must be taken by readers and writers. Use of SLAB_DESTROY_BY_RCU is tricky too, because a socket can be freed, reused, inserted in a different chain or in worst case in the same chain while readers could do lookups in the same time. In order to avoid loops, a reader must check each socket found in a chain really belongs to the chain the reader was traversing. If it finds a mismatch, lookup must start again at the begining. This *restart* loop is the reason we had to use rdlock for the multicast case, because we dont want to send same message several times to the same socket. We use RCU only for fast path. Thus, /proc/net/udp still takes spinlocks. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv6/udp.c | 31 ++++++++++++++++++++++++------- net/ipv6/udplite.c | 1 + 2 files changed, 25 insertions(+), 7 deletions(-) (limited to 'net/ipv6') diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index ccee7244ca0..1d9790e43df 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -97,24 +97,40 @@ static struct sock *__udp6_lib_lookup(struct net *net, struct in6_addr *daddr, __be16 dport, int dif, struct udp_table *udptable) { - struct sock *sk, *result = NULL; + struct sock *sk, *result; struct hlist_node *node; unsigned short hnum = ntohs(dport); unsigned int hash = udp_hashfn(net, hnum); struct udp_hslot *hslot = &udptable->hash[hash]; - int score, badness = -1; + int score, badness; - spin_lock(&hslot->lock); - sk_for_each(sk, node, &hslot->head) { + rcu_read_lock(); +begin: + result = NULL; + badness = -1; + sk_for_each_rcu(sk, node, &hslot->head) { + /* + * lockless reader, and SLAB_DESTROY_BY_RCU items: + * We must check this item was not moved to another chain + */ + if (udp_hashfn(net, sk->sk_hash) != hash) + goto begin; score = compute_score(sk, net, hnum, saddr, sport, daddr, dport, dif); if (score > badness) { result = sk; badness = score; } } - if (result) - sock_hold(result); - spin_unlock(&hslot->lock); + if (result) { + if (unlikely(!atomic_inc_not_zero(&result->sk_refcnt))) + result = NULL; + else if (unlikely(compute_score(result, net, hnum, saddr, sport, + daddr, dport, dif) < badness)) { + sock_put(result); + goto begin; + } + } + rcu_read_unlock(); return result; } @@ -1062,6 +1078,7 @@ struct proto udpv6_prot = { .sysctl_wmem = &sysctl_udp_wmem_min, .sysctl_rmem = &sysctl_udp_rmem_min, .obj_size = sizeof(struct udp6_sock), + .slab_flags = SLAB_DESTROY_BY_RCU, .h.udp_table = &udp_table, #ifdef CONFIG_COMPAT .compat_setsockopt = compat_udpv6_setsockopt, diff --git a/net/ipv6/udplite.c b/net/ipv6/udplite.c index f1e892a99e0..ba162a82458 100644 --- a/net/ipv6/udplite.c +++ b/net/ipv6/udplite.c @@ -49,6 +49,7 @@ struct proto udplitev6_prot = { .unhash = udp_lib_unhash, .get_port = udp_v6_get_port, .obj_size = sizeof(struct udp6_sock), + .slab_flags = SLAB_DESTROY_BY_RCU, .h.udp_table = &udplite_table, #ifdef CONFIG_COMPAT .compat_setsockopt = compat_udpv6_setsockopt, -- cgit v1.2.3 From 96631ed16c514cf8b28fab991a076985ce378c26 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 29 Oct 2008 11:19:58 -0700 Subject: udp: introduce sk_for_each_rcu_safenext() Corey Minyard found a race added in commit 271b72c7fa82c2c7a795bc16896149933110672d (udp: RCU handling for Unicast packets.) "If the socket is moved from one list to another list in-between the time the hash is calculated and the next field is accessed, and the socket has moved to the end of the new list, the traversal will not complete properly on the list it should have, since the socket will be on the end of the new list and there's not a way to tell it's on a new list and restart the list traversal. I think that this can be solved by pre-fetching the "next" field (with proper barriers) before checking the hash." This patch corrects this problem, introducing a new sk_for_each_rcu_safenext() macro. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv6/udp.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net/ipv6') diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 1d9790e43df..32d914db6c4 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -98,7 +98,7 @@ static struct sock *__udp6_lib_lookup(struct net *net, int dif, struct udp_table *udptable) { struct sock *sk, *result; - struct hlist_node *node; + struct hlist_node *node, *next; unsigned short hnum = ntohs(dport); unsigned int hash = udp_hashfn(net, hnum); struct udp_hslot *hslot = &udptable->hash[hash]; @@ -108,7 +108,7 @@ static struct sock *__udp6_lib_lookup(struct net *net, begin: result = NULL; badness = -1; - sk_for_each_rcu(sk, node, &hslot->head) { + sk_for_each_rcu_safenext(sk, node, &hslot->head, next) { /* * lockless reader, and SLAB_DESTROY_BY_RCU items: * We must check this item was not moved to another chain -- cgit v1.2.3 From 4b7a4274ca63dadd9c4f17fc953f3a5d19855c4c Mon Sep 17 00:00:00 2001 From: Harvey Harrison Date: Wed, 29 Oct 2008 12:50:24 -0700 Subject: net: replace %#p6 format specifier with %pi6 gcc warns when using the # modifier with the %p format specifier, so we can't use this to omit the colons when needed, introduces %pi6 instead. Signed-off-by: Harvey Harrison Signed-off-by: David S. Miller --- net/ipv6/addrconf.c | 2 +- net/ipv6/anycast.c | 2 +- net/ipv6/ip6_flowlabel.c | 2 +- net/ipv6/mcast.c | 4 ++-- net/ipv6/route.c | 6 +++--- 5 files changed, 8 insertions(+), 8 deletions(-) (limited to 'net/ipv6') diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 113c4d93153..ff7ae05f72e 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -2986,7 +2986,7 @@ static void if6_seq_stop(struct seq_file *seq, void *v) static int if6_seq_show(struct seq_file *seq, void *v) { struct inet6_ifaddr *ifp = (struct inet6_ifaddr *)v; - seq_printf(seq, "%#p6 %02x %02x %02x %02x %8s\n", + seq_printf(seq, "%pi6 %02x %02x %02x %02x %8s\n", &ifp->addr, ifp->idev->dev->ifindex, ifp->prefix_len, diff --git a/net/ipv6/anycast.c b/net/ipv6/anycast.c index ae494e30e55..1ae58bec1de 100644 --- a/net/ipv6/anycast.c +++ b/net/ipv6/anycast.c @@ -512,7 +512,7 @@ static int ac6_seq_show(struct seq_file *seq, void *v) struct ifacaddr6 *im = (struct ifacaddr6 *)v; struct ac6_iter_state *state = ac6_seq_private(seq); - seq_printf(seq, "%-4d %-15s %#p6 %5d\n", + seq_printf(seq, "%-4d %-15s %pi6 %5d\n", state->dev->ifindex, state->dev->name, &im->aca_addr, im->aca_users); return 0; diff --git a/net/ipv6/ip6_flowlabel.c b/net/ipv6/ip6_flowlabel.c index d5710235350..7927a8498d1 100644 --- a/net/ipv6/ip6_flowlabel.c +++ b/net/ipv6/ip6_flowlabel.c @@ -696,7 +696,7 @@ static int ip6fl_seq_show(struct seq_file *seq, void *v) else { struct ip6_flowlabel *fl = v; seq_printf(seq, - "%05X %-1d %-6d %-6d %-6ld %-8ld %#p6 %-4d\n", + "%05X %-1d %-6d %-6d %-6ld %-8ld %pi6 %-4d\n", (unsigned)ntohl(fl->label), fl->share, (unsigned)fl->owner, diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c index 4416cdc3e62..a76199ecad2 100644 --- a/net/ipv6/mcast.c +++ b/net/ipv6/mcast.c @@ -2430,7 +2430,7 @@ static int igmp6_mc_seq_show(struct seq_file *seq, void *v) struct igmp6_mc_iter_state *state = igmp6_mc_seq_private(seq); seq_printf(seq, - "%-4d %-15s %#p6 %5d %08X %ld\n", + "%-4d %-15s %pi6 %5d %08X %ld\n", state->dev->ifindex, state->dev->name, &im->mca_addr, im->mca_users, im->mca_flags, @@ -2591,7 +2591,7 @@ static int igmp6_mcf_seq_show(struct seq_file *seq, void *v) "Source Address", "INC", "EXC"); } else { seq_printf(seq, - "%3d %6.6s %#p6 %#p6 %6lu %6lu\n", + "%3d %6.6s %pi6 %pi6 %6lu %6lu\n", state->dev->ifindex, state->dev->name, &state->im->mca_addr, &psf->sf_addr, diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 720bfedd318..d69fa462d3f 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -2408,16 +2408,16 @@ static int rt6_info_route(struct rt6_info *rt, void *p_arg) { struct seq_file *m = p_arg; - seq_printf(m, "%#p6 %02x ", &rt->rt6i_dst.addr, rt->rt6i_dst.plen); + seq_printf(m, "%pi6 %02x ", &rt->rt6i_dst.addr, rt->rt6i_dst.plen); #ifdef CONFIG_IPV6_SUBTREES - seq_printf(m, "%#p6 %02x ", &rt->rt6i_src.addr, rt->rt6i_src.plen); + seq_printf(m, "%pi6 %02x ", &rt->rt6i_src.addr, rt->rt6i_src.plen); #else seq_puts(m, "00000000000000000000000000000000 00 "); #endif if (rt->rt6i_nexthop) { - seq_printf(m, "%#p6", rt->rt6i_nexthop->primary_key); + seq_printf(m, "%pi6", rt->rt6i_nexthop->primary_key); } else { seq_puts(m, "00000000000000000000000000000000"); } -- cgit v1.2.3 From 5b095d98928fdb9e3b75be20a54b7a6cbf6ca9ad Mon Sep 17 00:00:00 2001 From: Harvey Harrison Date: Wed, 29 Oct 2008 12:52:50 -0700 Subject: net: replace %p6 with %pI6 Signed-off-by: Harvey Harrison Signed-off-by: David S. Miller --- net/ipv6/addrlabel.c | 10 +++++----- net/ipv6/ah6.c | 2 +- net/ipv6/esp6.c | 2 +- net/ipv6/exthdrs.c | 2 +- net/ipv6/icmp.c | 2 +- net/ipv6/ip6mr.c | 2 +- net/ipv6/ipcomp6.c | 2 +- net/ipv6/ndisc.c | 2 +- net/ipv6/netfilter/ip6t_LOG.c | 2 +- net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c | 2 +- net/ipv6/tcp_ipv6.c | 2 +- 11 files changed, 15 insertions(+), 15 deletions(-) (limited to 'net/ipv6') diff --git a/net/ipv6/addrlabel.c b/net/ipv6/addrlabel.c index d28036659d2..6ff73c4c126 100644 --- a/net/ipv6/addrlabel.c +++ b/net/ipv6/addrlabel.c @@ -186,7 +186,7 @@ u32 ipv6_addr_label(struct net *net, label = p ? p->label : IPV6_ADDR_LABEL_DEFAULT; rcu_read_unlock(); - ADDRLABEL(KERN_DEBUG "%s(addr=%p6, type=%d, ifindex=%d) => %08x\n", + ADDRLABEL(KERN_DEBUG "%s(addr=%pI6, type=%d, ifindex=%d) => %08x\n", __func__, addr, type, ifindex, label); return label; @@ -201,7 +201,7 @@ static struct ip6addrlbl_entry *ip6addrlbl_alloc(struct net *net, struct ip6addrlbl_entry *newp; int addrtype; - ADDRLABEL(KERN_DEBUG "%s(prefix=%p6, prefixlen=%d, ifindex=%d, label=%u)\n", + ADDRLABEL(KERN_DEBUG "%s(prefix=%pI6, prefixlen=%d, ifindex=%d, label=%u)\n", __func__, prefix, prefixlen, ifindex, (unsigned int)label); addrtype = ipv6_addr_type(prefix) & (IPV6_ADDR_MAPPED | IPV6_ADDR_COMPATv4 | IPV6_ADDR_LOOPBACK); @@ -289,7 +289,7 @@ static int ip6addrlbl_add(struct net *net, struct ip6addrlbl_entry *newp; int ret = 0; - ADDRLABEL(KERN_DEBUG "%s(prefix=%p6, prefixlen=%d, ifindex=%d, label=%u, replace=%d)\n", + ADDRLABEL(KERN_DEBUG "%s(prefix=%pI6, prefixlen=%d, ifindex=%d, label=%u, replace=%d)\n", __func__, prefix, prefixlen, ifindex, (unsigned int)label, replace); @@ -313,7 +313,7 @@ static int __ip6addrlbl_del(struct net *net, struct hlist_node *pos, *n; int ret = -ESRCH; - ADDRLABEL(KERN_DEBUG "%s(prefix=%p6, prefixlen=%d, ifindex=%d)\n", + ADDRLABEL(KERN_DEBUG "%s(prefix=%pI6, prefixlen=%d, ifindex=%d)\n", __func__, prefix, prefixlen, ifindex); hlist_for_each_entry_safe(p, pos, n, &ip6addrlbl_table.head, list) { @@ -337,7 +337,7 @@ static int ip6addrlbl_del(struct net *net, struct in6_addr prefix_buf; int ret; - ADDRLABEL(KERN_DEBUG "%s(prefix=%p6, prefixlen=%d, ifindex=%d)\n", + ADDRLABEL(KERN_DEBUG "%s(prefix=%pI6, prefixlen=%d, ifindex=%d)\n", __func__, prefix, prefixlen, ifindex); ipv6_addr_prefix(&prefix_buf, prefix, prefixlen); diff --git a/net/ipv6/ah6.c b/net/ipv6/ah6.c index 9bc43f2527c..7a8a01369e5 100644 --- a/net/ipv6/ah6.c +++ b/net/ipv6/ah6.c @@ -419,7 +419,7 @@ static void ah6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, if (!x) return; - NETDEBUG(KERN_DEBUG "pmtu discovery on SA AH/%08x/%p6\n", + NETDEBUG(KERN_DEBUG "pmtu discovery on SA AH/%08x/%pI6\n", ntohl(ah->spi), &iph->daddr); xfrm_state_put(x); diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c index ec4be188c34..c02a6308def 100644 --- a/net/ipv6/esp6.c +++ b/net/ipv6/esp6.c @@ -367,7 +367,7 @@ static void esp6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, x = xfrm_state_lookup((xfrm_address_t *)&iph->daddr, esph->spi, IPPROTO_ESP, AF_INET6); if (!x) return; - printk(KERN_DEBUG "pmtu discovery on SA ESP/%08x/%p6\n", + printk(KERN_DEBUG "pmtu discovery on SA ESP/%08x/%pI6\n", ntohl(esph->spi), &iph->daddr); xfrm_state_put(x); } diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c index a8905146835..1c7f400a3cf 100644 --- a/net/ipv6/exthdrs.c +++ b/net/ipv6/exthdrs.c @@ -219,7 +219,7 @@ static int ipv6_dest_hao(struct sk_buff *skb, int optoff) if (!(ipv6_addr_type(&hao->addr) & IPV6_ADDR_UNICAST)) { LIMIT_NETDEBUG( - KERN_DEBUG "hao is not an unicast addr: %p6\n", &hao->addr); + KERN_DEBUG "hao is not an unicast addr: %pI6\n", &hao->addr); goto discard; } diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c index b3fa38e40dc..3c2821f9b52 100644 --- a/net/ipv6/icmp.c +++ b/net/ipv6/icmp.c @@ -681,7 +681,7 @@ static int icmpv6_rcv(struct sk_buff *skb) skb->csum = ~csum_unfold(csum_ipv6_magic(saddr, daddr, skb->len, IPPROTO_ICMPV6, 0)); if (__skb_checksum_complete(skb)) { - LIMIT_NETDEBUG(KERN_DEBUG "ICMPv6 checksum failed [%p6 > %p6]\n", + LIMIT_NETDEBUG(KERN_DEBUG "ICMPv6 checksum failed [%pI6 > %pI6]\n", saddr, daddr); goto discard_it; } diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c index 798e6a29dd8..c491fb98a5e 100644 --- a/net/ipv6/ip6mr.c +++ b/net/ipv6/ip6mr.c @@ -297,7 +297,7 @@ static int ipmr_mfc_seq_show(struct seq_file *seq, void *v) const struct mfc6_cache *mfc = v; const struct ipmr_mfc_iter *it = seq->private; - seq_printf(seq, "%p6 %p6 %-3d %8ld %8ld %8ld", + seq_printf(seq, "%pI6 %pI6 %-3d %8ld %8ld %8ld", &mfc->mf6c_mcastgrp, &mfc->mf6c_origin, mfc->mf6c_parent, mfc->mfc_un.res.pkt, diff --git a/net/ipv6/ipcomp6.c b/net/ipv6/ipcomp6.c index 9566d8f7314..d4576a9c154 100644 --- a/net/ipv6/ipcomp6.c +++ b/net/ipv6/ipcomp6.c @@ -67,7 +67,7 @@ static void ipcomp6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, if (!x) return; - printk(KERN_DEBUG "pmtu discovery on SA IPCOMP/%08x/%p6\n", + printk(KERN_DEBUG "pmtu discovery on SA IPCOMP/%08x/%pI6\n", spi, &iph->daddr); xfrm_state_put(x); } diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index 191bb0722a7..2a6752dae09 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -647,7 +647,7 @@ static void ndisc_solicit(struct neighbour *neigh, struct sk_buff *skb) if ((probes -= neigh->parms->ucast_probes) < 0) { if (!(neigh->nud_state & NUD_VALID)) { - ND_PRINTK1(KERN_DEBUG "%s(): trying to ucast probe in NUD_INVALID: %p6\n", + ND_PRINTK1(KERN_DEBUG "%s(): trying to ucast probe in NUD_INVALID: %pI6\n", __func__, target); } ndisc_send_ns(dev, neigh, target, target, saddr); diff --git a/net/ipv6/netfilter/ip6t_LOG.c b/net/ipv6/netfilter/ip6t_LOG.c index a61ce301000..02885e8bb69 100644 --- a/net/ipv6/netfilter/ip6t_LOG.c +++ b/net/ipv6/netfilter/ip6t_LOG.c @@ -61,7 +61,7 @@ static void dump_packet(const struct nf_loginfo *info, } /* Max length: 88 "SRC=0000.0000.0000.0000.0000.0000.0000.0000 DST=0000.0000.0000.0000.0000.0000.0000.0000 " */ - printk("SRC=%p6 DST=%p6 ", &ih->saddr, &ih->daddr); + printk("SRC=%pI6 DST=%pI6 ", &ih->saddr, &ih->daddr); /* Max length: 44 "LEN=65535 TC=255 HOPLIMIT=255 FLOWLBL=FFFFF " */ printk("LEN=%Zu TC=%u HOPLIMIT=%u FLOWLBL=%u ", diff --git a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c index b165a273c6c..727b9530448 100644 --- a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c +++ b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c @@ -56,7 +56,7 @@ static bool ipv6_invert_tuple(struct nf_conntrack_tuple *tuple, static int ipv6_print_tuple(struct seq_file *s, const struct nf_conntrack_tuple *tuple) { - return seq_printf(s, "src=%p6 dst=%p6 ", + return seq_printf(s, "src=%pI6 dst=%pI6 ", tuple->src.u3.ip6, tuple->dst.u3.ip6); } diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 483550cfdf3..984276463a8 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -872,7 +872,7 @@ static int tcp_v6_inbound_md5_hash (struct sock *sk, struct sk_buff *skb) if (genhash || memcmp(hash_location, newhash, 16) != 0) { if (net_ratelimit()) { - printk(KERN_INFO "MD5 Hash %s for (%p6, %u)->(%p6, %u)\n", + printk(KERN_INFO "MD5 Hash %s for (%pI6, %u)->(%pI6, %u)\n", genhash ? "failed" : "mismatch", &ip6h->saddr, ntohs(th->source), &ip6h->daddr, ntohs(th->dest)); -- cgit v1.2.3 From 673d57e72398edfedc93fb50ff58048077c9d587 Mon Sep 17 00:00:00 2001 From: Harvey Harrison Date: Fri, 31 Oct 2008 00:53:57 -0700 Subject: net: replace NIPQUAD() in net/ipv4/ net/ipv6/ Using NIPQUAD() with NIPQUAD_FMT, %d.%d.%d.%d or %u.%u.%u.%u can be replaced with %pI4 Signed-off-by: Harvey Harrison Signed-off-by: David S. Miller --- net/ipv6/netfilter/ip6t_LOG.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'net/ipv6') diff --git a/net/ipv6/netfilter/ip6t_LOG.c b/net/ipv6/netfilter/ip6t_LOG.c index 02885e8bb69..7c668c63f70 100644 --- a/net/ipv6/netfilter/ip6t_LOG.c +++ b/net/ipv6/netfilter/ip6t_LOG.c @@ -424,9 +424,8 @@ ip6t_log_packet(u_int8_t pf, if (skb->dev->type == ARPHRD_SIT) { const struct iphdr *iph = (struct iphdr *)skb_mac_header(skb); - printk("TUNNEL=%u.%u.%u.%u->%u.%u.%u.%u ", - NIPQUAD(iph->saddr), - NIPQUAD(iph->daddr)); + printk("TUNNEL=%pI4->%pI4 ", + &iph->saddr, &iph->daddr); } } else printk(" "); -- cgit v1.2.3 From 7e3a42a12c4b9d99bfe81cb929cadf0e08a37c49 Mon Sep 17 00:00:00 2001 From: Nicolas Dichtel Date: Sat, 1 Nov 2008 21:12:07 -0700 Subject: xfrm6: handling fragment RFC4301 Section 7.1 says: "7.1. Tunnel Mode SAs that Carry Initial and Non-Initial Fragments All implementations MUST support tunnel mode SAs that are configured to pass traffic without regard to port field (or ICMP type/code or Mobility Header type) values. If the SA will carry traffic for specified protocols, the selector set for the SA MUST specify the port fields (or ICMP type/code or Mobility Header type) as ANY. An SA defined in this fashion will carry all traffic including initial and non-initial fragments for the indicated Local/Remote addresses and specified Next Layer protocol(s)." But for IPv6, fragment is treated as a protocol. This change catches protocol transported in fragmented packet. In IPv4, there is no problem. Signed-off-by: Nicolas Dichtel Signed-off-by: David S. Miller --- net/ipv6/xfrm6_policy.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) (limited to 'net/ipv6') diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c index 08e4cbbe3f0..604bc0a96c0 100644 --- a/net/ipv6/xfrm6_policy.c +++ b/net/ipv6/xfrm6_policy.c @@ -144,6 +144,7 @@ static int xfrm6_fill_dst(struct xfrm_dst *xdst, struct net_device *dev) static inline void _decode_session6(struct sk_buff *skb, struct flowi *fl, int reverse) { + int onlyproto = 0; u16 offset = skb_network_header_len(skb); struct ipv6hdr *hdr = ipv6_hdr(skb); struct ipv6_opt_hdr *exthdr; @@ -159,6 +160,8 @@ _decode_session6(struct sk_buff *skb, struct flowi *fl, int reverse) exthdr = (struct ipv6_opt_hdr *)(nh + offset); switch (nexthdr) { + case NEXTHDR_FRAGMENT: + onlyproto = 1; case NEXTHDR_ROUTING: case NEXTHDR_HOP: case NEXTHDR_DEST: @@ -172,7 +175,7 @@ _decode_session6(struct sk_buff *skb, struct flowi *fl, int reverse) case IPPROTO_TCP: case IPPROTO_SCTP: case IPPROTO_DCCP: - if (pskb_may_pull(skb, nh + offset + 4 - skb->data)) { + if (!onlyproto && pskb_may_pull(skb, nh + offset + 4 - skb->data)) { __be16 *ports = (__be16 *)exthdr; fl->fl_ip_sport = ports[!!reverse]; @@ -182,7 +185,7 @@ _decode_session6(struct sk_buff *skb, struct flowi *fl, int reverse) return; case IPPROTO_ICMPV6: - if (pskb_may_pull(skb, nh + offset + 2 - skb->data)) { + if (!onlyproto && pskb_may_pull(skb, nh + offset + 2 - skb->data)) { u8 *icmp = (u8 *)exthdr; fl->fl_icmp_type = icmp[0]; @@ -193,7 +196,7 @@ _decode_session6(struct sk_buff *skb, struct flowi *fl, int reverse) #if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE) case IPPROTO_MH: - if (pskb_may_pull(skb, nh + offset + 3 - skb->data)) { + if (!onlyproto && pskb_may_pull(skb, nh + offset + 3 - skb->data)) { struct ip6_mh *mh; mh = (struct ip6_mh *)exthdr; -- cgit v1.2.3 From 6d9f239a1edb31d6133230f478fd1dc2da338ec5 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Mon, 3 Nov 2008 18:21:05 -0800 Subject: net: '&' redux I want to compile out proc_* and sysctl_* handlers totally and stub them to NULL depending on config options, however usage of & will prevent this, since taking adress of NULL pointer will break compilation. So, drop & in front of every ->proc_handler and every ->strategy handler, it was never needed in fact. Signed-off-by: Alexey Dobriyan Signed-off-by: David S. Miller --- net/ipv6/addrconf.c | 62 +++++++++++++------------- net/ipv6/icmp.c | 4 +- net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c | 2 +- net/ipv6/netfilter/nf_conntrack_reasm.c | 6 +-- net/ipv6/reassembly.c | 12 ++--- net/ipv6/route.c | 34 +++++++------- net/ipv6/sysctl_net_ipv6.c | 4 +- 7 files changed, 62 insertions(+), 62 deletions(-) (limited to 'net/ipv6') diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index ff7ae05f72e..07ee758de9e 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -4030,8 +4030,8 @@ static struct addrconf_sysctl_table .data = &ipv6_devconf.forwarding, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &addrconf_sysctl_forward, - .strategy = &addrconf_sysctl_forward_strategy, + .proc_handler = addrconf_sysctl_forward, + .strategy = addrconf_sysctl_forward_strategy, }, { .ctl_name = NET_IPV6_HOP_LIMIT, @@ -4047,7 +4047,7 @@ static struct addrconf_sysctl_table .data = &ipv6_devconf.mtu6, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec, + .proc_handler = proc_dointvec, }, { .ctl_name = NET_IPV6_ACCEPT_RA, @@ -4055,7 +4055,7 @@ static struct addrconf_sysctl_table .data = &ipv6_devconf.accept_ra, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec, + .proc_handler = proc_dointvec, }, { .ctl_name = NET_IPV6_ACCEPT_REDIRECTS, @@ -4063,7 +4063,7 @@ static struct addrconf_sysctl_table .data = &ipv6_devconf.accept_redirects, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec, + .proc_handler = proc_dointvec, }, { .ctl_name = NET_IPV6_AUTOCONF, @@ -4071,7 +4071,7 @@ static struct addrconf_sysctl_table .data = &ipv6_devconf.autoconf, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec, + .proc_handler = proc_dointvec, }, { .ctl_name = NET_IPV6_DAD_TRANSMITS, @@ -4079,7 +4079,7 @@ static struct addrconf_sysctl_table .data = &ipv6_devconf.dad_transmits, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec, + .proc_handler = proc_dointvec, }, { .ctl_name = NET_IPV6_RTR_SOLICITS, @@ -4087,7 +4087,7 @@ static struct addrconf_sysctl_table .data = &ipv6_devconf.rtr_solicits, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec, + .proc_handler = proc_dointvec, }, { .ctl_name = NET_IPV6_RTR_SOLICIT_INTERVAL, @@ -4095,8 +4095,8 @@ static struct addrconf_sysctl_table .data = &ipv6_devconf.rtr_solicit_interval, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec_jiffies, - .strategy = &sysctl_jiffies, + .proc_handler = proc_dointvec_jiffies, + .strategy = sysctl_jiffies, }, { .ctl_name = NET_IPV6_RTR_SOLICIT_DELAY, @@ -4104,8 +4104,8 @@ static struct addrconf_sysctl_table .data = &ipv6_devconf.rtr_solicit_delay, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec_jiffies, - .strategy = &sysctl_jiffies, + .proc_handler = proc_dointvec_jiffies, + .strategy = sysctl_jiffies, }, { .ctl_name = NET_IPV6_FORCE_MLD_VERSION, @@ -4113,7 +4113,7 @@ static struct addrconf_sysctl_table .data = &ipv6_devconf.force_mld_version, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec, + .proc_handler = proc_dointvec, }, #ifdef CONFIG_IPV6_PRIVACY { @@ -4122,7 +4122,7 @@ static struct addrconf_sysctl_table .data = &ipv6_devconf.use_tempaddr, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec, + .proc_handler = proc_dointvec, }, { .ctl_name = NET_IPV6_TEMP_VALID_LFT, @@ -4130,7 +4130,7 @@ static struct addrconf_sysctl_table .data = &ipv6_devconf.temp_valid_lft, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec, + .proc_handler = proc_dointvec, }, { .ctl_name = NET_IPV6_TEMP_PREFERED_LFT, @@ -4138,7 +4138,7 @@ static struct addrconf_sysctl_table .data = &ipv6_devconf.temp_prefered_lft, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec, + .proc_handler = proc_dointvec, }, { .ctl_name = NET_IPV6_REGEN_MAX_RETRY, @@ -4146,7 +4146,7 @@ static struct addrconf_sysctl_table .data = &ipv6_devconf.regen_max_retry, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec, + .proc_handler = proc_dointvec, }, { .ctl_name = NET_IPV6_MAX_DESYNC_FACTOR, @@ -4154,7 +4154,7 @@ static struct addrconf_sysctl_table .data = &ipv6_devconf.max_desync_factor, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec, + .proc_handler = proc_dointvec, }, #endif { @@ -4163,7 +4163,7 @@ static struct addrconf_sysctl_table .data = &ipv6_devconf.max_addresses, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec, + .proc_handler = proc_dointvec, }, { .ctl_name = NET_IPV6_ACCEPT_RA_DEFRTR, @@ -4171,7 +4171,7 @@ static struct addrconf_sysctl_table .data = &ipv6_devconf.accept_ra_defrtr, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec, + .proc_handler = proc_dointvec, }, { .ctl_name = NET_IPV6_ACCEPT_RA_PINFO, @@ -4179,7 +4179,7 @@ static struct addrconf_sysctl_table .data = &ipv6_devconf.accept_ra_pinfo, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec, + .proc_handler = proc_dointvec, }, #ifdef CONFIG_IPV6_ROUTER_PREF { @@ -4188,7 +4188,7 @@ static struct addrconf_sysctl_table .data = &ipv6_devconf.accept_ra_rtr_pref, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec, + .proc_handler = proc_dointvec, }, { .ctl_name = NET_IPV6_RTR_PROBE_INTERVAL, @@ -4196,8 +4196,8 @@ static struct addrconf_sysctl_table .data = &ipv6_devconf.rtr_probe_interval, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec_jiffies, - .strategy = &sysctl_jiffies, + .proc_handler = proc_dointvec_jiffies, + .strategy = sysctl_jiffies, }, #ifdef CONFIG_IPV6_ROUTE_INFO { @@ -4206,7 +4206,7 @@ static struct addrconf_sysctl_table .data = &ipv6_devconf.accept_ra_rt_info_max_plen, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec, + .proc_handler = proc_dointvec, }, #endif #endif @@ -4216,7 +4216,7 @@ static struct addrconf_sysctl_table .data = &ipv6_devconf.proxy_ndp, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec, + .proc_handler = proc_dointvec, }, { .ctl_name = NET_IPV6_ACCEPT_SOURCE_ROUTE, @@ -4224,7 +4224,7 @@ static struct addrconf_sysctl_table .data = &ipv6_devconf.accept_source_route, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec, + .proc_handler = proc_dointvec, }, #ifdef CONFIG_IPV6_OPTIMISTIC_DAD { @@ -4233,7 +4233,7 @@ static struct addrconf_sysctl_table .data = &ipv6_devconf.optimistic_dad, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec, + .proc_handler = proc_dointvec, }, #endif @@ -4244,7 +4244,7 @@ static struct addrconf_sysctl_table .data = &ipv6_devconf.mc_forwarding, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec, + .proc_handler = proc_dointvec, }, #endif { @@ -4253,7 +4253,7 @@ static struct addrconf_sysctl_table .data = &ipv6_devconf.disable_ipv6, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec, + .proc_handler = proc_dointvec, }, { .ctl_name = CTL_UNNUMBERED, @@ -4261,7 +4261,7 @@ static struct addrconf_sysctl_table .data = &ipv6_devconf.accept_dad, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec, + .proc_handler = proc_dointvec, }, { .ctl_name = 0, /* sentinel */ diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c index 3c2821f9b52..be351009fd0 100644 --- a/net/ipv6/icmp.c +++ b/net/ipv6/icmp.c @@ -956,8 +956,8 @@ ctl_table ipv6_icmp_table_template[] = { .data = &init_net.ipv6.sysctl.icmpv6_time, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec_ms_jiffies, - .strategy = &sysctl_ms_jiffies + .proc_handler = proc_dointvec_ms_jiffies, + .strategy = sysctl_ms_jiffies }, { .ctl_name = 0 }, }; diff --git a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c index 05726177903..bd52151d31e 100644 --- a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c +++ b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c @@ -253,7 +253,7 @@ static struct ctl_table icmpv6_sysctl_table[] = { .data = &nf_ct_icmpv6_timeout, .maxlen = sizeof(unsigned int), .mode = 0644, - .proc_handler = &proc_dointvec_jiffies, + .proc_handler = proc_dointvec_jiffies, }, { .ctl_name = 0 diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c index 9967ac7a01a..ed4d79a9e4a 100644 --- a/net/ipv6/netfilter/nf_conntrack_reasm.c +++ b/net/ipv6/netfilter/nf_conntrack_reasm.c @@ -80,7 +80,7 @@ struct ctl_table nf_ct_ipv6_sysctl_table[] = { .data = &nf_init_frags.timeout, .maxlen = sizeof(unsigned int), .mode = 0644, - .proc_handler = &proc_dointvec_jiffies, + .proc_handler = proc_dointvec_jiffies, }, { .ctl_name = NET_NF_CONNTRACK_FRAG6_LOW_THRESH, @@ -88,7 +88,7 @@ struct ctl_table nf_ct_ipv6_sysctl_table[] = { .data = &nf_init_frags.low_thresh, .maxlen = sizeof(unsigned int), .mode = 0644, - .proc_handler = &proc_dointvec, + .proc_handler = proc_dointvec, }, { .ctl_name = NET_NF_CONNTRACK_FRAG6_HIGH_THRESH, @@ -96,7 +96,7 @@ struct ctl_table nf_ct_ipv6_sysctl_table[] = { .data = &nf_init_frags.high_thresh, .maxlen = sizeof(unsigned int), .mode = 0644, - .proc_handler = &proc_dointvec, + .proc_handler = proc_dointvec, }, { .ctl_name = 0 } }; diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c index af12de071f4..3c575118fca 100644 --- a/net/ipv6/reassembly.c +++ b/net/ipv6/reassembly.c @@ -642,7 +642,7 @@ static struct ctl_table ip6_frags_ns_ctl_table[] = { .data = &init_net.ipv6.frags.high_thresh, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec + .proc_handler = proc_dointvec }, { .ctl_name = NET_IPV6_IP6FRAG_LOW_THRESH, @@ -650,7 +650,7 @@ static struct ctl_table ip6_frags_ns_ctl_table[] = { .data = &init_net.ipv6.frags.low_thresh, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec + .proc_handler = proc_dointvec }, { .ctl_name = NET_IPV6_IP6FRAG_TIME, @@ -658,8 +658,8 @@ static struct ctl_table ip6_frags_ns_ctl_table[] = { .data = &init_net.ipv6.frags.timeout, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec_jiffies, - .strategy = &sysctl_jiffies, + .proc_handler = proc_dointvec_jiffies, + .strategy = sysctl_jiffies, }, { } }; @@ -671,8 +671,8 @@ static struct ctl_table ip6_frags_ctl_table[] = { .data = &ip6_frags.secret_interval, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec_jiffies, - .strategy = &sysctl_jiffies + .proc_handler = proc_dointvec_jiffies, + .strategy = sysctl_jiffies }, { } }; diff --git a/net/ipv6/route.c b/net/ipv6/route.c index d69fa462d3f..4d40dc214b2 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -2499,7 +2499,7 @@ ctl_table ipv6_route_table_template[] = { .data = &init_net.ipv6.sysctl.flush_delay, .maxlen = sizeof(int), .mode = 0200, - .proc_handler = &ipv6_sysctl_rtcache_flush + .proc_handler = ipv6_sysctl_rtcache_flush }, { .ctl_name = NET_IPV6_ROUTE_GC_THRESH, @@ -2507,7 +2507,7 @@ ctl_table ipv6_route_table_template[] = { .data = &ip6_dst_ops_template.gc_thresh, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec, + .proc_handler = proc_dointvec, }, { .ctl_name = NET_IPV6_ROUTE_MAX_SIZE, @@ -2515,7 +2515,7 @@ ctl_table ipv6_route_table_template[] = { .data = &init_net.ipv6.sysctl.ip6_rt_max_size, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec, + .proc_handler = proc_dointvec, }, { .ctl_name = NET_IPV6_ROUTE_GC_MIN_INTERVAL, @@ -2523,8 +2523,8 @@ ctl_table ipv6_route_table_template[] = { .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec_jiffies, - .strategy = &sysctl_jiffies, + .proc_handler = proc_dointvec_jiffies, + .strategy = sysctl_jiffies, }, { .ctl_name = NET_IPV6_ROUTE_GC_TIMEOUT, @@ -2532,8 +2532,8 @@ ctl_table ipv6_route_table_template[] = { .data = &init_net.ipv6.sysctl.ip6_rt_gc_timeout, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec_jiffies, - .strategy = &sysctl_jiffies, + .proc_handler = proc_dointvec_jiffies, + .strategy = sysctl_jiffies, }, { .ctl_name = NET_IPV6_ROUTE_GC_INTERVAL, @@ -2541,8 +2541,8 @@ ctl_table ipv6_route_table_template[] = { .data = &init_net.ipv6.sysctl.ip6_rt_gc_interval, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec_jiffies, - .strategy = &sysctl_jiffies, + .proc_handler = proc_dointvec_jiffies, + .strategy = sysctl_jiffies, }, { .ctl_name = NET_IPV6_ROUTE_GC_ELASTICITY, @@ -2550,8 +2550,8 @@ ctl_table ipv6_route_table_template[] = { .data = &init_net.ipv6.sysctl.ip6_rt_gc_elasticity, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec_jiffies, - .strategy = &sysctl_jiffies, + .proc_handler = proc_dointvec_jiffies, + .strategy = sysctl_jiffies, }, { .ctl_name = NET_IPV6_ROUTE_MTU_EXPIRES, @@ -2559,8 +2559,8 @@ ctl_table ipv6_route_table_template[] = { .data = &init_net.ipv6.sysctl.ip6_rt_mtu_expires, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec_jiffies, - .strategy = &sysctl_jiffies, + .proc_handler = proc_dointvec_jiffies, + .strategy = sysctl_jiffies, }, { .ctl_name = NET_IPV6_ROUTE_MIN_ADVMSS, @@ -2568,8 +2568,8 @@ ctl_table ipv6_route_table_template[] = { .data = &init_net.ipv6.sysctl.ip6_rt_min_advmss, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec_jiffies, - .strategy = &sysctl_jiffies, + .proc_handler = proc_dointvec_jiffies, + .strategy = sysctl_jiffies, }, { .ctl_name = NET_IPV6_ROUTE_GC_MIN_INTERVAL_MS, @@ -2577,8 +2577,8 @@ ctl_table ipv6_route_table_template[] = { .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec_ms_jiffies, - .strategy = &sysctl_ms_jiffies, + .proc_handler = proc_dointvec_ms_jiffies, + .strategy = sysctl_ms_jiffies, }, { .ctl_name = 0 } }; diff --git a/net/ipv6/sysctl_net_ipv6.c b/net/ipv6/sysctl_net_ipv6.c index 587f8f60c48..9048fe7e7ea 100644 --- a/net/ipv6/sysctl_net_ipv6.c +++ b/net/ipv6/sysctl_net_ipv6.c @@ -35,7 +35,7 @@ static ctl_table ipv6_table_template[] = { .data = &init_net.ipv6.sysctl.bindv6only, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec + .proc_handler = proc_dointvec }, { .ctl_name = 0 } }; @@ -47,7 +47,7 @@ static ctl_table ipv6_table[] = { .data = &sysctl_mld_max_msf, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec + .proc_handler = proc_dointvec }, { .ctl_name = 0 } }; -- cgit v1.2.3 From 305d552accae6afb859c493ebc7d98ca3371dae2 Mon Sep 17 00:00:00 2001 From: Brian Haley Date: Tue, 4 Nov 2008 17:51:14 -0800 Subject: bonding: send IPv6 neighbor advertisement on failover This patch adds better IPv6 failover support for bonding devices, especially when in active-backup mode and there are only IPv6 addresses configured, as reported by Alex Sidorenko. - Creates a new file, net/drivers/bonding/bond_ipv6.c, for the IPv6-specific routines. Both regular bonds and VLANs over bonds are supported. - Adds a new tunable, num_unsol_na, to limit the number of unsolicited IPv6 Neighbor Advertisements that are sent on a failover event. Default is 1. - Creates two new IPv6 neighbor discovery functions: ndisc_build_skb() ndisc_send_skb() These were required to support VLANs since we have to be able to add the VLAN id to the skb since ndisc_send_na() and friends shouldn't be asked to do this. These two routines are basically __ndisc_send() split into two pieces, in a slightly different order. - Updates Documentation/networking/bonding.txt and bumps the rev of bond support to 3.4.0. On failover, this new code will generate one packet: - An unsolicited IPv6 Neighbor Advertisement, which helps the switch learn that the address has moved to the new slave. Testing has shown that sending just the NA results in pretty good behavior when in active-back mode, I saw no lost ping packets for example. Signed-off-by: Brian Haley Signed-off-by: Jay Vosburgh Signed-off-by: Jeff Garzik --- net/ipv6/ndisc.c | 92 +++++++++++++++++++++++++++++++++++++++----------------- 1 file changed, 65 insertions(+), 27 deletions(-) (limited to 'net/ipv6') diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index 2a6752dae09..fbf451c0d77 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -437,38 +437,20 @@ static void pndisc_destructor(struct pneigh_entry *n) ipv6_dev_mc_dec(dev, &maddr); } -/* - * Send a Neighbour Advertisement - */ -static void __ndisc_send(struct net_device *dev, - struct neighbour *neigh, - const struct in6_addr *daddr, - const struct in6_addr *saddr, - struct icmp6hdr *icmp6h, const struct in6_addr *target, - int llinfo) +struct sk_buff *ndisc_build_skb(struct net_device *dev, + const struct in6_addr *daddr, + const struct in6_addr *saddr, + struct icmp6hdr *icmp6h, + const struct in6_addr *target, + int llinfo) { - struct flowi fl; - struct dst_entry *dst; struct net *net = dev_net(dev); struct sock *sk = net->ipv6.ndisc_sk; struct sk_buff *skb; struct icmp6hdr *hdr; - struct inet6_dev *idev; int len; int err; - u8 *opt, type; - - type = icmp6h->icmp6_type; - - icmpv6_flow_init(sk, &fl, type, saddr, daddr, dev->ifindex); - - dst = icmp6_dst_alloc(dev, neigh, daddr); - if (!dst) - return; - - err = xfrm_lookup(&dst, &fl, NULL, 0); - if (err < 0) - return; + u8 *opt; if (!dev->addr_len) llinfo = 0; @@ -485,8 +467,7 @@ static void __ndisc_send(struct net_device *dev, ND_PRINTK0(KERN_ERR "ICMPv6 ND: %s() failed to allocate an skb.\n", __func__); - dst_release(dst); - return; + return NULL; } skb_reserve(skb, LL_RESERVED_SPACE(dev)); @@ -513,6 +494,42 @@ static void __ndisc_send(struct net_device *dev, csum_partial((__u8 *) hdr, len, 0)); + return skb; +} + +EXPORT_SYMBOL(ndisc_build_skb); + +void ndisc_send_skb(struct sk_buff *skb, + struct net_device *dev, + struct neighbour *neigh, + const struct in6_addr *daddr, + const struct in6_addr *saddr, + struct icmp6hdr *icmp6h) +{ + struct flowi fl; + struct dst_entry *dst; + struct net *net = dev_net(dev); + struct sock *sk = net->ipv6.ndisc_sk; + struct inet6_dev *idev; + int err; + u8 type; + + type = icmp6h->icmp6_type; + + icmpv6_flow_init(sk, &fl, type, saddr, daddr, dev->ifindex); + + dst = icmp6_dst_alloc(dev, neigh, daddr); + if (!dst) { + kfree_skb(skb); + return; + } + + err = xfrm_lookup(&dst, &fl, NULL, 0); + if (err < 0) { + kfree_skb(skb); + return; + } + skb->dst = dst; idev = in6_dev_get(dst->dev); @@ -529,6 +546,27 @@ static void __ndisc_send(struct net_device *dev, in6_dev_put(idev); } +EXPORT_SYMBOL(ndisc_send_skb); + +/* + * Send a Neighbour Discover packet + */ +static void __ndisc_send(struct net_device *dev, + struct neighbour *neigh, + const struct in6_addr *daddr, + const struct in6_addr *saddr, + struct icmp6hdr *icmp6h, const struct in6_addr *target, + int llinfo) +{ + struct sk_buff *skb; + + skb = ndisc_build_skb(dev, daddr, saddr, icmp6h, target, llinfo); + if (!skb) + return; + + ndisc_send_skb(skb, dev, neigh, daddr, saddr, icmp6h); +} + static void ndisc_send_na(struct net_device *dev, struct neighbour *neigh, const struct in6_addr *daddr, const struct in6_addr *solicited_addr, -- cgit v1.2.3 From 6bb3ce25d05f2990c8a19adaf427531430267c1f Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Tue, 11 Nov 2008 17:25:22 -0800 Subject: net: remove struct dst_entry::entry_size Unused after kmem_cache_zalloc() conversion. Signed-off-by: Alexey Dobriyan Signed-off-by: David S. Miller --- net/ipv6/route.c | 2 -- net/ipv6/xfrm6_policy.c | 1 - 2 files changed, 3 deletions(-) (limited to 'net/ipv6') diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 4d40dc214b2..9da1ece466a 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -108,7 +108,6 @@ static struct dst_ops ip6_dst_ops_template = { .link_failure = ip6_link_failure, .update_pmtu = ip6_rt_update_pmtu, .local_out = __ip6_local_out, - .entry_size = sizeof(struct rt6_info), .entries = ATOMIC_INIT(0), }; @@ -122,7 +121,6 @@ static struct dst_ops ip6_dst_blackhole_ops = { .destroy = ip6_dst_destroy, .check = ip6_dst_check, .update_pmtu = ip6_rt_blackhole_update_pmtu, - .entry_size = sizeof(struct rt6_info), .entries = ATOMIC_INIT(0), }; diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c index 604bc0a96c0..3b67ce7786e 100644 --- a/net/ipv6/xfrm6_policy.c +++ b/net/ipv6/xfrm6_policy.c @@ -277,7 +277,6 @@ static struct dst_ops xfrm6_dst_ops = { .ifdown = xfrm6_dst_ifdown, .local_out = __ip6_local_out, .gc_thresh = 1024, - .entry_size = sizeof(struct xfrm_dst), .entries = ATOMIC_INIT(0), }; -- cgit v1.2.3 From 88ab1932eac721c6e7336708558fa5ed02c85c80 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sun, 16 Nov 2008 19:39:21 -0800 Subject: udp: Use hlist_nulls in UDP RCU code This is a straightforward patch, using hlist_nulls infrastructure. RCUification already done on UDP two weeks ago. Using hlist_nulls permits us to avoid some memory barriers, both at lookup time and delete time. Patch is large because it adds new macros to include/net/sock.h. These macros will be used by TCP & DCCP in next patch. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv6/udp.c | 26 ++++++++++++++------------ 1 file changed, 14 insertions(+), 12 deletions(-) (limited to 'net/ipv6') diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 8dafa36b1ba..fd2d9ad4a8a 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -98,7 +98,7 @@ static struct sock *__udp6_lib_lookup(struct net *net, int dif, struct udp_table *udptable) { struct sock *sk, *result; - struct hlist_node *node, *next; + struct hlist_nulls_node *node; unsigned short hnum = ntohs(dport); unsigned int hash = udp_hashfn(net, hnum); struct udp_hslot *hslot = &udptable->hash[hash]; @@ -108,19 +108,21 @@ static struct sock *__udp6_lib_lookup(struct net *net, begin: result = NULL; badness = -1; - sk_for_each_rcu_safenext(sk, node, &hslot->head, next) { - /* - * lockless reader, and SLAB_DESTROY_BY_RCU items: - * We must check this item was not moved to another chain - */ - if (udp_hashfn(net, sk->sk_hash) != hash) - goto begin; + sk_nulls_for_each_rcu(sk, node, &hslot->head) { score = compute_score(sk, net, hnum, saddr, sport, daddr, dport, dif); if (score > badness) { result = sk; badness = score; } } + /* + * if the nulls value we got at the end of this lookup is + * not the expected one, we must restart lookup. + * We probably met an item that was moved to another chain. + */ + if (get_nulls_value(node) != hash) + goto begin; + if (result) { if (unlikely(!atomic_inc_not_zero(&result->sk_refcnt))) result = NULL; @@ -374,11 +376,11 @@ static struct sock *udp_v6_mcast_next(struct net *net, struct sock *sk, __be16 rmt_port, struct in6_addr *rmt_addr, int dif) { - struct hlist_node *node; + struct hlist_nulls_node *node; struct sock *s = sk; unsigned short num = ntohs(loc_port); - sk_for_each_from(s, node) { + sk_nulls_for_each_from(s, node) { struct inet_sock *inet = inet_sk(s); if (!net_eq(sock_net(s), net)) @@ -423,7 +425,7 @@ static int __udp6_lib_mcast_deliver(struct net *net, struct sk_buff *skb, int dif; spin_lock(&hslot->lock); - sk = sk_head(&hslot->head); + sk = sk_nulls_head(&hslot->head); dif = inet6_iif(skb); sk = udp_v6_mcast_next(net, sk, uh->dest, daddr, uh->source, saddr, dif); if (!sk) { @@ -432,7 +434,7 @@ static int __udp6_lib_mcast_deliver(struct net *net, struct sk_buff *skb, } sk2 = sk; - while ((sk2 = udp_v6_mcast_next(net, sk_next(sk2), uh->dest, daddr, + while ((sk2 = udp_v6_mcast_next(net, sk_nulls_next(sk2), uh->dest, daddr, uh->source, saddr, dif))) { struct sk_buff *buff = skb_clone(skb, GFP_ATOMIC); if (buff) { -- cgit v1.2.3 From 3ab5aee7fe840b5b1b35a8d1ac11c3de5281e611 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sun, 16 Nov 2008 19:40:17 -0800 Subject: net: Convert TCP & DCCP hash tables to use RCU / hlist_nulls RCU was added to UDP lookups, using a fast infrastructure : - sockets kmem_cache use SLAB_DESTROY_BY_RCU and dont pay the price of call_rcu() at freeing time. - hlist_nulls permits to use few memory barriers. This patch uses same infrastructure for TCP/DCCP established and timewait sockets. Thanks to SLAB_DESTROY_BY_RCU, no slowdown for applications using short lived TCP connections. A followup patch, converting rwlocks to spinlocks will even speedup this case. __inet_lookup_established() is pretty fast now we dont have to dirty a contended cache line (read_lock/read_unlock) Only established and timewait hashtable are converted to RCU (bind table and listen table are still using traditional locking) Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv6/inet6_hashtables.c | 70 ++++++++++++++++++++++++++++++--------------- net/ipv6/tcp_ipv6.c | 1 + 2 files changed, 48 insertions(+), 23 deletions(-) (limited to 'net/ipv6') diff --git a/net/ipv6/inet6_hashtables.c b/net/ipv6/inet6_hashtables.c index 1646a565825..c1b4d401fd9 100644 --- a/net/ipv6/inet6_hashtables.c +++ b/net/ipv6/inet6_hashtables.c @@ -25,24 +25,28 @@ void __inet6_hash(struct sock *sk) { struct inet_hashinfo *hashinfo = sk->sk_prot->h.hashinfo; - struct hlist_head *list; rwlock_t *lock; WARN_ON(!sk_unhashed(sk)); if (sk->sk_state == TCP_LISTEN) { + struct hlist_head *list; + list = &hashinfo->listening_hash[inet_sk_listen_hashfn(sk)]; lock = &hashinfo->lhash_lock; inet_listen_wlock(hashinfo); + __sk_add_node(sk, list); } else { unsigned int hash; + struct hlist_nulls_head *list; + sk->sk_hash = hash = inet6_sk_ehashfn(sk); list = &inet_ehash_bucket(hashinfo, hash)->chain; lock = inet_ehash_lockp(hashinfo, hash); write_lock(lock); + __sk_nulls_add_node_rcu(sk, list); } - __sk_add_node(sk, list); sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1); write_unlock(lock); } @@ -63,33 +67,53 @@ struct sock *__inet6_lookup_established(struct net *net, const int dif) { struct sock *sk; - const struct hlist_node *node; + const struct hlist_nulls_node *node; const __portpair ports = INET_COMBINED_PORTS(sport, hnum); /* Optimize here for direct hit, only listening connections can * have wildcards anyways. */ unsigned int hash = inet6_ehashfn(net, daddr, hnum, saddr, sport); - struct inet_ehash_bucket *head = inet_ehash_bucket(hashinfo, hash); - rwlock_t *lock = inet_ehash_lockp(hashinfo, hash); + unsigned int slot = hash & (hashinfo->ehash_size - 1); + struct inet_ehash_bucket *head = &hashinfo->ehash[slot]; - prefetch(head->chain.first); - read_lock(lock); - sk_for_each(sk, node, &head->chain) { + + rcu_read_lock(); +begin: + sk_nulls_for_each_rcu(sk, node, &head->chain) { /* For IPV6 do the cheaper port and family tests first. */ - if (INET6_MATCH(sk, net, hash, saddr, daddr, ports, dif)) - goto hit; /* You sunk my battleship! */ + if (INET6_MATCH(sk, net, hash, saddr, daddr, ports, dif)) { + if (unlikely(!atomic_inc_not_zero(&sk->sk_refcnt))) + goto begintw; + if (!INET6_MATCH(sk, net, hash, saddr, daddr, ports, dif)) { + sock_put(sk); + goto begin; + } + goto out; + } } + if (get_nulls_value(node) != slot) + goto begin; + +begintw: /* Must check for a TIME_WAIT'er before going to listener hash. */ - sk_for_each(sk, node, &head->twchain) { - if (INET6_TW_MATCH(sk, net, hash, saddr, daddr, ports, dif)) - goto hit; + sk_nulls_for_each_rcu(sk, node, &head->twchain) { + if (INET6_TW_MATCH(sk, net, hash, saddr, daddr, ports, dif)) { + if (unlikely(!atomic_inc_not_zero(&sk->sk_refcnt))) { + sk = NULL; + goto out; + } + if (!INET6_TW_MATCH(sk, net, hash, saddr, daddr, ports, dif)) { + sock_put(sk); + goto begintw; + } + goto out; + } } - read_unlock(lock); - return NULL; - -hit: - sock_hold(sk); - read_unlock(lock); + if (get_nulls_value(node) != slot) + goto begintw; + sk = NULL; +out: + rcu_read_unlock(); return sk; } EXPORT_SYMBOL(__inet6_lookup_established); @@ -172,14 +196,14 @@ static int __inet6_check_established(struct inet_timewait_death_row *death_row, struct inet_ehash_bucket *head = inet_ehash_bucket(hinfo, hash); rwlock_t *lock = inet_ehash_lockp(hinfo, hash); struct sock *sk2; - const struct hlist_node *node; + const struct hlist_nulls_node *node; struct inet_timewait_sock *tw; prefetch(head->chain.first); write_lock(lock); /* Check TIME-WAIT sockets first. */ - sk_for_each(sk2, node, &head->twchain) { + sk_nulls_for_each(sk2, node, &head->twchain) { tw = inet_twsk(sk2); if (INET6_TW_MATCH(sk2, net, hash, saddr, daddr, ports, dif)) { @@ -192,7 +216,7 @@ static int __inet6_check_established(struct inet_timewait_death_row *death_row, tw = NULL; /* And established part... */ - sk_for_each(sk2, node, &head->chain) { + sk_nulls_for_each(sk2, node, &head->chain) { if (INET6_MATCH(sk2, net, hash, saddr, daddr, ports, dif)) goto not_unique; } @@ -203,7 +227,7 @@ unique: inet->num = lport; inet->sport = htons(lport); WARN_ON(!sk_unhashed(sk)); - __sk_add_node(sk, &head->chain); + __sk_nulls_add_node_rcu(sk, &head->chain); sk->sk_hash = hash; sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1); write_unlock(lock); diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 984276463a8..b3578705631 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -2043,6 +2043,7 @@ struct proto tcpv6_prot = { .sysctl_rmem = sysctl_tcp_rmem, .max_header = MAX_TCP_HEADER, .obj_size = sizeof(struct tcp6_sock), + .slab_flags = SLAB_DESTROY_BY_RCU, .twsk_prot = &tcp6_timewait_sock_ops, .rsk_prot = &tcp6_request_sock_ops, .h.hashinfo = &tcp_hashinfo, -- cgit v1.2.3 From 07f0757a6808f2f36a0e58c3a54867ccffdb8dc9 Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Wed, 19 Nov 2008 15:44:53 -0800 Subject: include/net net/ - csum_partial - remove unnecessary casts The first argument to csum_partial is const void * casts to char/u8 * are not necessary Signed-off-by: Joe Perches Signed-off-by: David S. Miller --- net/ipv6/icmp.c | 4 ++-- net/ipv6/mcast.c | 2 +- net/ipv6/ndisc.c | 4 ++-- net/ipv6/tcp_ipv6.c | 6 +++--- 4 files changed, 8 insertions(+), 8 deletions(-) (limited to 'net/ipv6') diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c index be351009fd0..a77b8d10380 100644 --- a/net/ipv6/icmp.c +++ b/net/ipv6/icmp.c @@ -233,7 +233,7 @@ static int icmpv6_push_pending_frames(struct sock *sk, struct flowi *fl, struct icmp6h->icmp6_cksum = 0; if (skb_queue_len(&sk->sk_write_queue) == 1) { - skb->csum = csum_partial((char *)icmp6h, + skb->csum = csum_partial(icmp6h, sizeof(struct icmp6hdr), skb->csum); icmp6h->icmp6_cksum = csum_ipv6_magic(&fl->fl6_src, &fl->fl6_dst, @@ -246,7 +246,7 @@ static int icmpv6_push_pending_frames(struct sock *sk, struct flowi *fl, struct tmp_csum = csum_add(tmp_csum, skb->csum); } - tmp_csum = csum_partial((char *)icmp6h, + tmp_csum = csum_partial(icmp6h, sizeof(struct icmp6hdr), tmp_csum); icmp6h->icmp6_cksum = csum_ipv6_magic(&fl->fl6_src, &fl->fl6_dst, diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c index a76199ecad2..870a1d64605 100644 --- a/net/ipv6/mcast.c +++ b/net/ipv6/mcast.c @@ -1817,7 +1817,7 @@ static void igmp6_send(struct in6_addr *addr, struct net_device *dev, int type) hdr->icmp6_cksum = csum_ipv6_magic(saddr, snd_addr, len, IPPROTO_ICMPV6, - csum_partial((__u8 *) hdr, len, 0)); + csum_partial(hdr, len, 0)); idev = in6_dev_get(skb->dev); diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index fbf451c0d77..af6705f03b5 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -491,7 +491,7 @@ struct sk_buff *ndisc_build_skb(struct net_device *dev, hdr->icmp6_cksum = csum_ipv6_magic(saddr, daddr, len, IPPROTO_ICMPV6, - csum_partial((__u8 *) hdr, + csum_partial(hdr, len, 0)); return skb; @@ -1612,7 +1612,7 @@ void ndisc_send_redirect(struct sk_buff *skb, struct neighbour *neigh, icmph->icmp6_cksum = csum_ipv6_magic(&saddr_buf, &ipv6_hdr(skb)->saddr, len, IPPROTO_ICMPV6, - csum_partial((u8 *) icmph, len, 0)); + csum_partial(icmph, len, 0)); buff->dst = dst; idev = in6_dev_get(dst->dev); diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index b3578705631..a5d750acd79 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -501,7 +501,7 @@ static int tcp_v6_send_synack(struct sock *sk, struct request_sock *req) th->check = tcp_v6_check(th, skb->len, &treq->loc_addr, &treq->rmt_addr, - csum_partial((char *)th, skb->len, skb->csum)); + csum_partial(th, skb->len, skb->csum)); ipv6_addr_copy(&fl.fl6_dst, &treq->rmt_addr); err = ip6_xmit(sk, skb, &fl, opt, 0); @@ -915,7 +915,7 @@ static void tcp_v6_send_check(struct sock *sk, int len, struct sk_buff *skb) skb->csum_offset = offsetof(struct tcphdr, check); } else { th->check = csum_ipv6_magic(&np->saddr, &np->daddr, len, IPPROTO_TCP, - csum_partial((char *)th, th->doff<<2, + csum_partial(th, th->doff<<2, skb->csum)); } } @@ -997,7 +997,7 @@ static void tcp_v6_send_response(struct sk_buff *skb, u32 seq, u32 ack, u32 win, } #endif - buff->csum = csum_partial((char *)t1, tot_len, 0); + buff->csum = csum_partial(t1, tot_len, 0); memset(&fl, 0, sizeof(fl)); ipv6_addr_copy(&fl.fl6_dst, &ipv6_hdr(skb)->saddr); -- cgit v1.2.3 From 5bc3eb7e2f0026f246d939851109df99e8e9f64a Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Wed, 19 Nov 2008 21:52:05 -0800 Subject: ip: convert to net_device_ops for ioctl Convert to net_device_ops function table pointer for ioctl. Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- net/ipv6/addrconf.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) (limited to 'net/ipv6') diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 0e41f1be6dc..e92ad8455c6 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -2031,8 +2031,8 @@ int addrconf_set_dstaddr(struct net *net, void __user *arg) #if defined(CONFIG_IPV6_SIT) || defined(CONFIG_IPV6_SIT_MODULE) if (dev->type == ARPHRD_SIT) { + const struct net_device_ops *ops = dev->netdev_ops; struct ifreq ifr; - mm_segment_t oldfs; struct ip_tunnel_parm p; err = -EADDRNOTAVAIL; @@ -2048,9 +2048,14 @@ int addrconf_set_dstaddr(struct net *net, void __user *arg) p.iph.ttl = 64; ifr.ifr_ifru.ifru_data = (__force void __user *)&p; - oldfs = get_fs(); set_fs(KERNEL_DS); - err = dev->do_ioctl(dev, &ifr, SIOCADDTUNNEL); - set_fs(oldfs); + if (ops->ndo_do_ioctl) { + mm_segment_t oldfs = get_fs(); + + set_fs(KERNEL_DS); + err = ops->ndo_do_ioctl(dev, &ifr, SIOCADDTUNNEL); + set_fs(oldfs); + } else + err = -EOPNOTSUPP; if (err == 0) { err = -ENOBUFS; -- cgit v1.2.3 From 5caea4ea7088e80ac5410d04660346094608b909 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 20 Nov 2008 00:40:07 -0800 Subject: net: listening_hash get a spinlock per bucket This patch prepares RCU migration of listening_hash table for TCP/DCCP protocols. listening_hash table being small (32 slots per protocol), we add a spinlock for each slot, instead of a single rwlock for whole table. This should reduce hold time of readers, and writers concurrency. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv6/inet6_hashtables.c | 23 ++++++++++++----------- 1 file changed, 12 insertions(+), 11 deletions(-) (limited to 'net/ipv6') diff --git a/net/ipv6/inet6_hashtables.c b/net/ipv6/inet6_hashtables.c index c1b4d401fd9..21544b9be25 100644 --- a/net/ipv6/inet6_hashtables.c +++ b/net/ipv6/inet6_hashtables.c @@ -25,30 +25,30 @@ void __inet6_hash(struct sock *sk) { struct inet_hashinfo *hashinfo = sk->sk_prot->h.hashinfo; - rwlock_t *lock; WARN_ON(!sk_unhashed(sk)); if (sk->sk_state == TCP_LISTEN) { - struct hlist_head *list; + struct inet_listen_hashbucket *ilb; - list = &hashinfo->listening_hash[inet_sk_listen_hashfn(sk)]; - lock = &hashinfo->lhash_lock; - inet_listen_wlock(hashinfo); - __sk_add_node(sk, list); + ilb = &hashinfo->listening_hash[inet_sk_listen_hashfn(sk)]; + spin_lock(&ilb->lock); + __sk_add_node(sk, &ilb->head); + spin_unlock(&ilb->lock); } else { unsigned int hash; struct hlist_nulls_head *list; + rwlock_t *lock; sk->sk_hash = hash = inet6_sk_ehashfn(sk); list = &inet_ehash_bucket(hashinfo, hash)->chain; lock = inet_ehash_lockp(hashinfo, hash); write_lock(lock); __sk_nulls_add_node_rcu(sk, list); + write_unlock(lock); } sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1); - write_unlock(lock); } EXPORT_SYMBOL(__inet6_hash); @@ -126,10 +126,11 @@ struct sock *inet6_lookup_listener(struct net *net, const struct hlist_node *node; struct sock *result = NULL; int score, hiscore = 0; + struct inet_listen_hashbucket *ilb; - read_lock(&hashinfo->lhash_lock); - sk_for_each(sk, node, - &hashinfo->listening_hash[inet_lhashfn(net, hnum)]) { + ilb = &hashinfo->listening_hash[inet_lhashfn(net, hnum)]; + spin_lock(&ilb->lock); + sk_for_each(sk, node, &ilb->head) { if (net_eq(sock_net(sk), net) && inet_sk(sk)->num == hnum && sk->sk_family == PF_INET6) { const struct ipv6_pinfo *np = inet6_sk(sk); @@ -157,7 +158,7 @@ struct sock *inet6_lookup_listener(struct net *net, } if (result) sock_hold(result); - read_unlock(&hashinfo->lhash_lock); + spin_unlock(&ilb->lock); return result; } -- cgit v1.2.3 From 61d3015808d877eb4ea225b5924feb128b0c1bc7 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Thu, 20 Nov 2008 09:58:08 +0100 Subject: netfilter: ip6table_filter: merge LOCAL_IN and FORWARD hooks Signed-off-by: Alexey Dobriyan Signed-off-by: Patrick McHardy --- net/ipv6/netfilter/ip6table_filter.c | 17 +++-------------- 1 file changed, 3 insertions(+), 14 deletions(-) (limited to 'net/ipv6') diff --git a/net/ipv6/netfilter/ip6table_filter.c b/net/ipv6/netfilter/ip6table_filter.c index b110a8a85a1..40d2e36d8fa 100644 --- a/net/ipv6/netfilter/ip6table_filter.c +++ b/net/ipv6/netfilter/ip6table_filter.c @@ -61,7 +61,7 @@ static struct xt_table packet_filter = { /* The work comes in here from netfilter.c. */ static unsigned int -ip6t_local_in_hook(unsigned int hook, +ip6t_in_hook(unsigned int hook, struct sk_buff *skb, const struct net_device *in, const struct net_device *out, @@ -71,17 +71,6 @@ ip6t_local_in_hook(unsigned int hook, dev_net(in)->ipv6.ip6table_filter); } -static unsigned int -ip6t_forward_hook(unsigned int hook, - struct sk_buff *skb, - const struct net_device *in, - const struct net_device *out, - int (*okfn)(struct sk_buff *)) -{ - return ip6t_do_table(skb, hook, in, out, - dev_net(in)->ipv6.ip6table_filter); -} - static unsigned int ip6t_local_out_hook(unsigned int hook, struct sk_buff *skb, @@ -105,14 +94,14 @@ ip6t_local_out_hook(unsigned int hook, static struct nf_hook_ops ip6t_ops[] __read_mostly = { { - .hook = ip6t_local_in_hook, + .hook = ip6t_in_hook, .owner = THIS_MODULE, .pf = PF_INET6, .hooknum = NF_INET_LOCAL_IN, .priority = NF_IP6_PRI_FILTER, }, { - .hook = ip6t_forward_hook, + .hook = ip6t_in_hook, .owner = THIS_MODULE, .pf = PF_INET6, .hooknum = NF_INET_FORWARD, -- cgit v1.2.3 From 007c3838d9fdcc8fdaea87e4879ec3759f016ed5 Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Thu, 20 Nov 2008 20:28:35 -0800 Subject: ipmr: convert ipmr virtual interface to net_device_ops Convert to new network device ops interface. Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- net/ipv6/ip6mr.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'net/ipv6') diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c index 363ae258ee1..dfba9fd0c24 100644 --- a/net/ipv6/ip6mr.c +++ b/net/ipv6/ip6mr.c @@ -416,12 +416,16 @@ static int reg_vif_xmit(struct sk_buff *skb, struct net_device *dev) return 0; } +static const struct net_device_ops reg_vif_netdev_ops = { + .ndo_start_xmit = reg_vif_xmit, +}; + static void reg_vif_setup(struct net_device *dev) { dev->type = ARPHRD_PIMREG; dev->mtu = 1500 - sizeof(struct ipv6hdr) - 8; dev->flags = IFF_NOARP; - dev->hard_start_xmit = reg_vif_xmit; + dev->netdev_ops = ®_vif_netdev_ops; dev->destructor = free_netdev; } -- cgit v1.2.3 From 1326c3d5a4b792a2b15877feb7fb691f8945d203 Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Thu, 20 Nov 2008 20:33:56 -0800 Subject: ipv6: convert tunnels to net_device_ops Like IPV4, convert the tunnel virtual devices to use net_device_ops. Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- net/ipv6/ip6_tunnel.c | 29 +++++++++++++++-------------- net/ipv6/sit.c | 33 +++++++++++++++++---------------- 2 files changed, 32 insertions(+), 30 deletions(-) (limited to 'net/ipv6') diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index 64ce3d33d9c..adfcd652792 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -74,8 +74,8 @@ MODULE_LICENSE("GPL"); (addr)->s6_addr32[2] ^ (addr)->s6_addr32[3]) & \ (HASH_SIZE - 1)) -static int ip6_fb_tnl_dev_init(struct net_device *dev); -static int ip6_tnl_dev_init(struct net_device *dev); +static void ip6_fb_tnl_dev_init(struct net_device *dev); +static void ip6_tnl_dev_init(struct net_device *dev); static void ip6_tnl_dev_setup(struct net_device *dev); static int ip6_tnl_net_id; @@ -249,7 +249,7 @@ static struct ip6_tnl *ip6_tnl_create(struct net *net, struct ip6_tnl_parm *p) } t = netdev_priv(dev); - dev->init = ip6_tnl_dev_init; + ip6_tnl_dev_init(dev); t->parms = *p; if ((err = register_netdevice(dev)) < 0) @@ -1306,6 +1306,14 @@ ip6_tnl_change_mtu(struct net_device *dev, int new_mtu) return 0; } + +static const struct net_device_ops ip6_tnl_netdev_ops = { + .ndo_uninit = ip6_tnl_dev_uninit, + .ndo_start_xmit = ip6_tnl_xmit, + .ndo_do_ioctl = ip6_tnl_ioctl, + .ndo_change_mtu = ip6_tnl_change_mtu, +}; + /** * ip6_tnl_dev_setup - setup virtual tunnel device * @dev: virtual device associated with tunnel @@ -1316,11 +1324,8 @@ ip6_tnl_change_mtu(struct net_device *dev, int new_mtu) static void ip6_tnl_dev_setup(struct net_device *dev) { - dev->uninit = ip6_tnl_dev_uninit; + dev->netdev_ops = &ip6_tnl_netdev_ops; dev->destructor = free_netdev; - dev->hard_start_xmit = ip6_tnl_xmit; - dev->do_ioctl = ip6_tnl_ioctl; - dev->change_mtu = ip6_tnl_change_mtu; dev->type = ARPHRD_TUNNEL6; dev->hard_header_len = LL_MAX_HEADER + sizeof (struct ipv6hdr); @@ -1349,13 +1354,11 @@ ip6_tnl_dev_init_gen(struct net_device *dev) * @dev: virtual device associated with tunnel **/ -static int -ip6_tnl_dev_init(struct net_device *dev) +static void ip6_tnl_dev_init(struct net_device *dev) { struct ip6_tnl *t = netdev_priv(dev); ip6_tnl_dev_init_gen(dev); ip6_tnl_link_config(t); - return 0; } /** @@ -1365,8 +1368,7 @@ ip6_tnl_dev_init(struct net_device *dev) * Return: 0 **/ -static int -ip6_fb_tnl_dev_init(struct net_device *dev) +static void ip6_fb_tnl_dev_init(struct net_device *dev) { struct ip6_tnl *t = netdev_priv(dev); struct net *net = dev_net(dev); @@ -1376,7 +1378,6 @@ ip6_fb_tnl_dev_init(struct net_device *dev) t->parms.proto = IPPROTO_IPV6; dev_hold(dev); ip6n->tnls_wc[0] = t; - return 0; } static struct xfrm6_tunnel ip4ip6_handler = { @@ -1429,7 +1430,7 @@ static int ip6_tnl_init_net(struct net *net) if (!ip6n->fb_tnl_dev) goto err_alloc_dev; - ip6n->fb_tnl_dev->init = ip6_fb_tnl_dev_init; + ip6_fb_tnl_dev_init(ip6n->fb_tnl_dev); dev_net_set(ip6n->fb_tnl_dev, net); err = register_netdev(ip6n->fb_tnl_dev); diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index b7a50e96850..1ebf99a6776 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c @@ -62,8 +62,8 @@ #define HASH_SIZE 16 #define HASH(addr) (((__force u32)addr^((__force u32)addr>>4))&0xF) -static int ipip6_fb_tunnel_init(struct net_device *dev); -static int ipip6_tunnel_init(struct net_device *dev); +static void ipip6_fb_tunnel_init(struct net_device *dev); +static void ipip6_tunnel_init(struct net_device *dev); static void ipip6_tunnel_setup(struct net_device *dev); static int sit_net_id; @@ -188,7 +188,8 @@ static struct ip_tunnel * ipip6_tunnel_locate(struct net *net, } nt = netdev_priv(dev); - dev->init = ipip6_tunnel_init; + ipip6_tunnel_init(dev); + nt->parms = *parms; if (parms->i_flags & SIT_ISATAP) @@ -926,13 +927,17 @@ static int ipip6_tunnel_change_mtu(struct net_device *dev, int new_mtu) return 0; } +static const struct net_device_ops ipip6_netdev_ops = { + .ndo_uninit = ipip6_tunnel_uninit, + .ndo_start_xmit = ipip6_tunnel_xmit, + .ndo_do_ioctl = ipip6_tunnel_ioctl, + .ndo_change_mtu = ipip6_tunnel_change_mtu, +}; + static void ipip6_tunnel_setup(struct net_device *dev) { - dev->uninit = ipip6_tunnel_uninit; + dev->netdev_ops = &ipip6_netdev_ops; dev->destructor = free_netdev; - dev->hard_start_xmit = ipip6_tunnel_xmit; - dev->do_ioctl = ipip6_tunnel_ioctl; - dev->change_mtu = ipip6_tunnel_change_mtu; dev->type = ARPHRD_SIT; dev->hard_header_len = LL_MAX_HEADER + sizeof(struct iphdr); @@ -943,11 +948,9 @@ static void ipip6_tunnel_setup(struct net_device *dev) dev->features |= NETIF_F_NETNS_LOCAL; } -static int ipip6_tunnel_init(struct net_device *dev) +static void ipip6_tunnel_init(struct net_device *dev) { - struct ip_tunnel *tunnel; - - tunnel = netdev_priv(dev); + struct ip_tunnel *tunnel = netdev_priv(dev); tunnel->dev = dev; strcpy(tunnel->parms.name, dev->name); @@ -956,11 +959,9 @@ static int ipip6_tunnel_init(struct net_device *dev) memcpy(dev->broadcast, &tunnel->parms.iph.daddr, 4); ipip6_tunnel_bind_dev(dev); - - return 0; } -static int ipip6_fb_tunnel_init(struct net_device *dev) +static void ipip6_fb_tunnel_init(struct net_device *dev) { struct ip_tunnel *tunnel = netdev_priv(dev); struct iphdr *iph = &tunnel->parms.iph; @@ -977,7 +978,6 @@ static int ipip6_fb_tunnel_init(struct net_device *dev) dev_hold(dev); sitn->tunnels_wc[0] = tunnel; - return 0; } static struct xfrm_tunnel sit_handler = { @@ -1026,7 +1026,7 @@ static int sit_init_net(struct net *net) goto err_alloc_dev; } - sitn->fb_tunnel_dev->init = ipip6_fb_tunnel_init; + ipip6_fb_tunnel_init(sitn->fb_tunnel_dev); dev_net_set(sitn->fb_tunnel_dev, net); if ((err = register_netdev(sitn->fb_tunnel_dev))) @@ -1035,6 +1035,7 @@ static int sit_init_net(struct net *net) return 0; err_reg_dev: + dev_put(sitn->fb_tunnel_dev); free_netdev(sitn->fb_tunnel_dev); err_alloc_dev: /* nothing */ -- cgit v1.2.3 From 9db66bdcc83749affe61c61eb8ff3cf08f42afec Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 20 Nov 2008 20:39:09 -0800 Subject: net: convert TCP/DCCP ehash rwlocks to spinlocks Now TCP & DCCP use RCU lookups, we can convert ehash rwlocks to spinlocks. /proc/net/tcp and other seq_file 'readers' can safely be converted to 'writers'. This should speedup writers, since spin_lock()/spin_unlock() only use one atomic operation instead of two for write_lock()/write_unlock() Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv6/inet6_hashtables.c | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) (limited to 'net/ipv6') diff --git a/net/ipv6/inet6_hashtables.c b/net/ipv6/inet6_hashtables.c index 21544b9be25..e0fd68187f8 100644 --- a/net/ipv6/inet6_hashtables.c +++ b/net/ipv6/inet6_hashtables.c @@ -38,14 +38,14 @@ void __inet6_hash(struct sock *sk) } else { unsigned int hash; struct hlist_nulls_head *list; - rwlock_t *lock; + spinlock_t *lock; sk->sk_hash = hash = inet6_sk_ehashfn(sk); list = &inet_ehash_bucket(hashinfo, hash)->chain; lock = inet_ehash_lockp(hashinfo, hash); - write_lock(lock); + spin_lock(lock); __sk_nulls_add_node_rcu(sk, list); - write_unlock(lock); + spin_unlock(lock); } sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1); @@ -195,13 +195,12 @@ static int __inet6_check_established(struct inet_timewait_death_row *death_row, const unsigned int hash = inet6_ehashfn(net, daddr, lport, saddr, inet->dport); struct inet_ehash_bucket *head = inet_ehash_bucket(hinfo, hash); - rwlock_t *lock = inet_ehash_lockp(hinfo, hash); + spinlock_t *lock = inet_ehash_lockp(hinfo, hash); struct sock *sk2; const struct hlist_nulls_node *node; struct inet_timewait_sock *tw; - prefetch(head->chain.first); - write_lock(lock); + spin_lock(lock); /* Check TIME-WAIT sockets first. */ sk_nulls_for_each(sk2, node, &head->twchain) { @@ -230,8 +229,8 @@ unique: WARN_ON(!sk_unhashed(sk)); __sk_nulls_add_node_rcu(sk, &head->chain); sk->sk_hash = hash; + spin_unlock(lock); sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1); - write_unlock(lock); if (twp != NULL) { *twp = tw; @@ -246,7 +245,7 @@ unique: return 0; not_unique: - write_unlock(lock); + spin_unlock(lock); return -EADDRNOTAVAIL; } -- cgit v1.2.3 From cf005b1d0e34d8c964347331c43de089c674a5a1 Mon Sep 17 00:00:00 2001 From: Qinghuang Feng Date: Fri, 21 Nov 2008 17:15:03 -0800 Subject: net: remove redundant argument comments Remove redundant argument comments in files of net/* Signed-off-by: Qinghuang Feng Signed-off-by: David S. Miller --- net/ipv6/ip6_tunnel.c | 1 - 1 file changed, 1 deletion(-) (limited to 'net/ipv6') diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index adfcd652792..358f001d848 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -1150,7 +1150,6 @@ static void ip6_tnl_link_config(struct ip6_tnl *t) * ip6_tnl_change - update the tunnel parameters * @t: tunnel to be changed * @p: tunnel configuration parameters - * @active: != 0 if tunnel is ready for use * * Description: * ip6_tnl_change() updates the tunnel parameters -- cgit v1.2.3 From c25eb3bfb97294d0543a81230fbc237046b4b84c Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sun, 23 Nov 2008 17:22:55 -0800 Subject: net: Convert TCP/DCCP listening hash tables to use RCU This is the last step to be able to perform full RCU lookups in __inet_lookup() : After established/timewait tables, we add RCU lookups to listening hash table. The only trick here is that a socket of a given type (TCP ipv4, TCP ipv6, ...) can now flight between two different tables (established and listening) during a RCU grace period, so we must use different 'nulls' end-of-chain values for two tables. We define a large value : #define LISTENING_NULLS_BASE (1U << 29) So that slots in listening table are guaranteed to have different end-of-chain values than slots in established table. A reader can still detect it finished its lookup in the right chain. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv6/inet6_hashtables.c | 94 ++++++++++++++++++++++++++++----------------- 1 file changed, 59 insertions(+), 35 deletions(-) (limited to 'net/ipv6') diff --git a/net/ipv6/inet6_hashtables.c b/net/ipv6/inet6_hashtables.c index e0fd68187f8..8fe267feb81 100644 --- a/net/ipv6/inet6_hashtables.c +++ b/net/ipv6/inet6_hashtables.c @@ -33,7 +33,7 @@ void __inet6_hash(struct sock *sk) ilb = &hashinfo->listening_hash[inet_sk_listen_hashfn(sk)]; spin_lock(&ilb->lock); - __sk_add_node(sk, &ilb->head); + __sk_nulls_add_node_rcu(sk, &ilb->head); spin_unlock(&ilb->lock); } else { unsigned int hash; @@ -118,47 +118,71 @@ out: } EXPORT_SYMBOL(__inet6_lookup_established); +static int inline compute_score(struct sock *sk, struct net *net, + const unsigned short hnum, + const struct in6_addr *daddr, + const int dif) +{ + int score = -1; + + if (net_eq(sock_net(sk), net) && inet_sk(sk)->num == hnum && + sk->sk_family == PF_INET6) { + const struct ipv6_pinfo *np = inet6_sk(sk); + + score = 1; + if (!ipv6_addr_any(&np->rcv_saddr)) { + if (!ipv6_addr_equal(&np->rcv_saddr, daddr)) + return -1; + score++; + } + if (sk->sk_bound_dev_if) { + if (sk->sk_bound_dev_if != dif) + return -1; + score++; + } + } + return score; +} + struct sock *inet6_lookup_listener(struct net *net, struct inet_hashinfo *hashinfo, const struct in6_addr *daddr, const unsigned short hnum, const int dif) { struct sock *sk; - const struct hlist_node *node; - struct sock *result = NULL; - int score, hiscore = 0; - struct inet_listen_hashbucket *ilb; - - ilb = &hashinfo->listening_hash[inet_lhashfn(net, hnum)]; - spin_lock(&ilb->lock); - sk_for_each(sk, node, &ilb->head) { - if (net_eq(sock_net(sk), net) && inet_sk(sk)->num == hnum && - sk->sk_family == PF_INET6) { - const struct ipv6_pinfo *np = inet6_sk(sk); - - score = 1; - if (!ipv6_addr_any(&np->rcv_saddr)) { - if (!ipv6_addr_equal(&np->rcv_saddr, daddr)) - continue; - score++; - } - if (sk->sk_bound_dev_if) { - if (sk->sk_bound_dev_if != dif) - continue; - score++; - } - if (score == 3) { - result = sk; - break; - } - if (score > hiscore) { - hiscore = score; - result = sk; - } + const struct hlist_nulls_node *node; + struct sock *result; + int score, hiscore; + unsigned int hash = inet_lhashfn(net, hnum); + struct inet_listen_hashbucket *ilb = &hashinfo->listening_hash[hash]; + + rcu_read_lock(); +begin: + result = NULL; + hiscore = -1; + sk_nulls_for_each(sk, node, &ilb->head) { + score = compute_score(sk, net, hnum, daddr, dif); + if (score > hiscore) { + hiscore = score; + result = sk; } } - if (result) - sock_hold(result); - spin_unlock(&ilb->lock); + /* + * if the nulls value we got at the end of this lookup is + * not the expected one, we must restart lookup. + * We probably met an item that was moved to another chain. + */ + if (get_nulls_value(node) != hash + LISTENING_NULLS_BASE) + goto begin; + if (result) { + if (unlikely(!atomic_inc_not_zero(&result->sk_refcnt))) + result = NULL; + else if (unlikely(compute_score(result, net, hnum, daddr, + dif) < hiscore)) { + sock_put(result); + goto begin; + } + } + rcu_read_unlock(); return result; } -- cgit v1.2.3 From be77e5930725c3e77bcc0fb1def28e016080d0a1 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Sun, 23 Nov 2008 17:26:26 -0800 Subject: net: fix tunnels in netns after ndo_ changes dev_net_set() should be the very first thing after alloc_netdev(). "ndo_" changes turned simple assignment (which is OK to do before netns assignment) into quite non-trivial operation (which is not OK, init_net was used). This leads to incomplete initialisation of tunnel device in netns. BUG: unable to handle kernel NULL pointer dereference at 00000004 IP: [] ip6_tnl_exit_net+0x37/0x4f *pde = 00000000 Oops: 0000 [#1] PREEMPT DEBUG_PAGEALLOC last sysfs file: /sys/class/net/lo/operstate Pid: 10, comm: netns Not tainted (2.6.28-rc6 #1) EIP: 0060:[] EFLAGS: 00010246 CPU: 0 EIP is at ip6_tnl_exit_net+0x37/0x4f EAX: 00000000 EBX: 00000020 ECX: 00000000 EDX: 00000003 ESI: c5caef30 EDI: c782bbe8 EBP: c7909f50 ESP: c7909f48 DS: 007b ES: 007b FS: 0000 GS: 0000 SS: 0068 Process netns (pid: 10, ti=c7908000 task=c7905780 task.ti=c7908000) Stack: c03e75e0 c7390bc8 c7909f60 c0245448 c7390bd8 c7390bf0 c7909fa8 c012577a 00000000 00000002 00000000 c0125736 c782bbe8 c7909f90 c0308fe3 c782bc04 c7390bd4 c0245406 c084b718 c04f0770 c03ad785 c782bbe8 c782bc04 c782bc0c Call Trace: [] ? cleanup_net+0x42/0x82 [] ? run_workqueue+0xd6/0x1ae [] ? run_workqueue+0x92/0x1ae [] ? schedule+0x275/0x285 [] ? cleanup_net+0x0/0x82 [] ? worker_thread+0x81/0x8d [] ? autoremove_wake_function+0x0/0x33 [] ? worker_thread+0x0/0x8d [] ? kthread+0x39/0x5e [] ? kthread+0x0/0x5e [] ? kernel_thread_helper+0x7/0x10 Code: db e8 05 ff ff ff 89 c6 e8 dc 04 f6 ff eb 08 8b 40 04 e8 38 89 f5 ff 8b 44 9e 04 85 c0 75 f0 43 83 fb 20 75 f2 8b 86 84 00 00 00 <8b> 40 04 e8 1c 89 f5 ff e8 98 04 f6 ff 89 f0 e8 f8 63 e6 ff 5b EIP: [] ip6_tnl_exit_net+0x37/0x4f SS:ESP 0068:c7909f48 ---[ end trace 6c2f2328fccd3e0c ]--- Signed-off-by: Alexey Dobriyan Signed-off-by: David S. Miller --- net/ipv6/ip6_tunnel.c | 2 +- net/ipv6/sit.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'net/ipv6') diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index 358f001d848..ef249ab5c93 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -1428,9 +1428,9 @@ static int ip6_tnl_init_net(struct net *net) if (!ip6n->fb_tnl_dev) goto err_alloc_dev; + dev_net_set(ip6n->fb_tnl_dev, net); ip6_fb_tnl_dev_init(ip6n->fb_tnl_dev); - dev_net_set(ip6n->fb_tnl_dev, net); err = register_netdev(ip6n->fb_tnl_dev); if (err < 0) diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index 1ebf99a6776..d3467e563f0 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c @@ -1025,9 +1025,9 @@ static int sit_init_net(struct net *net) err = -ENOMEM; goto err_alloc_dev; } + dev_net_set(sitn->fb_tunnel_dev, net); ipip6_fb_tunnel_init(sitn->fb_tunnel_dev); - dev_net_set(sitn->fb_tunnel_dev, net); if ((err = register_netdev(sitn->fb_tunnel_dev))) goto err_reg_dev; -- cgit v1.2.3 From 6daad37230ab02bb593d179d704079d4b5912bd7 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Tue, 25 Nov 2008 01:05:09 -0800 Subject: ah4/ah6: remove useless NULL assignments struct will be kfreed in a moment, so... Signed-off-by: Alexey Dobriyan Signed-off-by: David S. Miller --- net/ipv6/ah6.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'net/ipv6') diff --git a/net/ipv6/ah6.c b/net/ipv6/ah6.c index 7a8a01369e5..13e330d8917 100644 --- a/net/ipv6/ah6.c +++ b/net/ipv6/ah6.c @@ -509,9 +509,7 @@ static void ah6_destroy(struct xfrm_state *x) return; kfree(ahp->work_icv); - ahp->work_icv = NULL; crypto_free_hash(ahp->tfm); - ahp->tfm = NULL; kfree(ahp); } -- cgit v1.2.3 From fb7e06748c29c08a9f5ca057a780b65acbb91c27 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Tue, 25 Nov 2008 01:05:54 -0800 Subject: xfrm: remove useless forward declarations Signed-off-by: Alexey Dobriyan Signed-off-by: David S. Miller --- net/ipv6/xfrm6_state.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'net/ipv6') diff --git a/net/ipv6/xfrm6_state.c b/net/ipv6/xfrm6_state.c index 60c78cfc273..0e685b05496 100644 --- a/net/ipv6/xfrm6_state.c +++ b/net/ipv6/xfrm6_state.c @@ -19,8 +19,6 @@ #include #include -static struct xfrm_state_afinfo xfrm6_state_afinfo; - static void __xfrm6_init_tempsel(struct xfrm_state *x, struct flowi *fl, struct xfrm_tmpl *tmpl, -- cgit v1.2.3 From 9f40ac713c49fb6ca655550b620edc85c445d743 Mon Sep 17 00:00:00 2001 From: Eric Leblond Date: Tue, 25 Nov 2008 12:18:11 +0100 Subject: netfilter: nfmark IPV6 routing in OUTPUT, mangle, NFQUEUE This patch let nfmark to be evaluated for routing decision for OUTPUT packet, in mangle table, when process paquet in NFQUEUE. This patch is an IPv6 port of Laurent Licour IPv4 one. Signed-off-by: Eric Leblond Signed-off-by: Patrick McHardy --- net/ipv6/netfilter.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'net/ipv6') diff --git a/net/ipv6/netfilter.c b/net/ipv6/netfilter.c index fd5b3a4e332..0b88c563279 100644 --- a/net/ipv6/netfilter.c +++ b/net/ipv6/netfilter.c @@ -56,6 +56,7 @@ EXPORT_SYMBOL(ip6_route_me_harder); struct ip6_rt_info { struct in6_addr daddr; struct in6_addr saddr; + u_int32_t mark; }; static void nf_ip6_saveroute(const struct sk_buff *skb, @@ -68,6 +69,7 @@ static void nf_ip6_saveroute(const struct sk_buff *skb, rt_info->daddr = iph->daddr; rt_info->saddr = iph->saddr; + rt_info->mark = skb->mark; } } @@ -79,7 +81,8 @@ static int nf_ip6_reroute(struct sk_buff *skb, if (entry->hook == NF_INET_LOCAL_OUT) { struct ipv6hdr *iph = ipv6_hdr(skb); if (!ipv6_addr_equal(&iph->daddr, &rt_info->daddr) || - !ipv6_addr_equal(&iph->saddr, &rt_info->saddr)) + !ipv6_addr_equal(&iph->saddr, &rt_info->saddr) || + skb->mark != rt_info->mark) return ip6_route_me_harder(skb); } return 0; -- cgit v1.2.3 From 55205d400efe3260e29ad26dd64c992cec2efafd Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Tue, 25 Nov 2008 16:50:30 -0800 Subject: ipv6: fix warning in net/ipv6/ip6_flowlabel.c MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit this warning: net/ipv6/ip6_flowlabel.c: In function ‘ipv6_flowlabel_opt’: net/ipv6/ip6_flowlabel.c:467: warning: ‘err’ may be used uninitialized in this function triggers because GCC does not recognize the (correct) error flow between fl_create() and 'err'. Annotate it. Signed-off-by: Ingo Molnar Signed-off-by: David S. Miller --- net/ipv6/ip6_flowlabel.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net/ipv6') diff --git a/net/ipv6/ip6_flowlabel.c b/net/ipv6/ip6_flowlabel.c index 7927a8498d1..5656e8aa47d 100644 --- a/net/ipv6/ip6_flowlabel.c +++ b/net/ipv6/ip6_flowlabel.c @@ -464,7 +464,7 @@ static inline void fl_link(struct ipv6_pinfo *np, struct ipv6_fl_socklist *sfl, int ipv6_flowlabel_opt(struct sock *sk, char __user *optval, int optlen) { - int err; + int uninitialized_var(err); struct net *net = sock_net(sk); struct ipv6_pinfo *np = inet6_sk(sk); struct in6_flowlabel_req freq; -- cgit v1.2.3 From 673c09be457bb23aa0eaaa79804cbb342210d195 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Tue, 25 Nov 2008 17:15:16 -0800 Subject: netns xfrm: add struct xfrm_state::xs_net To avoid unnecessary complications with passing netns around. * set once, very early after allocating * once set, never changes For a while create every xfrm_state in init_net. Signed-off-by: Alexey Dobriyan Signed-off-by: David S. Miller --- net/ipv6/ipcomp6.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net/ipv6') diff --git a/net/ipv6/ipcomp6.c b/net/ipv6/ipcomp6.c index d4576a9c154..c369638e208 100644 --- a/net/ipv6/ipcomp6.c +++ b/net/ipv6/ipcomp6.c @@ -76,7 +76,7 @@ static struct xfrm_state *ipcomp6_tunnel_create(struct xfrm_state *x) { struct xfrm_state *t = NULL; - t = xfrm_state_alloc(); + t = xfrm_state_alloc(&init_net); if (!t) goto out; -- cgit v1.2.3 From 221df1ed33c9284fc7a6f6e47ca7f8d5f3665d43 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Tue, 25 Nov 2008 17:30:50 -0800 Subject: netns xfrm: state lookup in netns Signed-off-by: Alexey Dobriyan Signed-off-by: David S. Miller --- net/ipv6/ah6.c | 2 +- net/ipv6/esp6.c | 2 +- net/ipv6/ipcomp6.c | 4 ++-- net/ipv6/xfrm6_input.c | 2 +- 4 files changed, 5 insertions(+), 5 deletions(-) (limited to 'net/ipv6') diff --git a/net/ipv6/ah6.c b/net/ipv6/ah6.c index 13e330d8917..6ae014b86b6 100644 --- a/net/ipv6/ah6.c +++ b/net/ipv6/ah6.c @@ -415,7 +415,7 @@ static void ah6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, type != ICMPV6_PKT_TOOBIG) return; - x = xfrm_state_lookup((xfrm_address_t *)&iph->daddr, ah->spi, IPPROTO_AH, AF_INET6); + x = xfrm_state_lookup(&init_net, (xfrm_address_t *)&iph->daddr, ah->spi, IPPROTO_AH, AF_INET6); if (!x) return; diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c index c02a6308def..68f2af8c15c 100644 --- a/net/ipv6/esp6.c +++ b/net/ipv6/esp6.c @@ -364,7 +364,7 @@ static void esp6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, type != ICMPV6_PKT_TOOBIG) return; - x = xfrm_state_lookup((xfrm_address_t *)&iph->daddr, esph->spi, IPPROTO_ESP, AF_INET6); + x = xfrm_state_lookup(&init_net, (xfrm_address_t *)&iph->daddr, esph->spi, IPPROTO_ESP, AF_INET6); if (!x) return; printk(KERN_DEBUG "pmtu discovery on SA ESP/%08x/%pI6\n", diff --git a/net/ipv6/ipcomp6.c b/net/ipv6/ipcomp6.c index c369638e208..3a0b3be7ece 100644 --- a/net/ipv6/ipcomp6.c +++ b/net/ipv6/ipcomp6.c @@ -63,7 +63,7 @@ static void ipcomp6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, return; spi = htonl(ntohs(ipcomph->cpi)); - x = xfrm_state_lookup((xfrm_address_t *)&iph->daddr, spi, IPPROTO_COMP, AF_INET6); + x = xfrm_state_lookup(&init_net, (xfrm_address_t *)&iph->daddr, spi, IPPROTO_COMP, AF_INET6); if (!x) return; @@ -114,7 +114,7 @@ static int ipcomp6_tunnel_attach(struct xfrm_state *x) spi = xfrm6_tunnel_spi_lookup((xfrm_address_t *)&x->props.saddr); if (spi) - t = xfrm_state_lookup((xfrm_address_t *)&x->id.daddr, + t = xfrm_state_lookup(&init_net, (xfrm_address_t *)&x->id.daddr, spi, IPPROTO_IPV6, AF_INET6); if (!t) { t = ipcomp6_tunnel_create(x); diff --git a/net/ipv6/xfrm6_input.c b/net/ipv6/xfrm6_input.c index a71c7ddcb41..b69766a7774 100644 --- a/net/ipv6/xfrm6_input.c +++ b/net/ipv6/xfrm6_input.c @@ -100,7 +100,7 @@ int xfrm6_input_addr(struct sk_buff *skb, xfrm_address_t *daddr, break; } - x = xfrm_state_lookup_byaddr(dst, src, proto, AF_INET6); + x = xfrm_state_lookup_byaddr(&init_net, dst, src, proto, AF_INET6); if (!x) continue; -- cgit v1.2.3 From 52479b623d3d41df84c499325b6a8c7915413032 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Tue, 25 Nov 2008 17:35:18 -0800 Subject: netns xfrm: lookup in netns Pass netns to xfrm_lookup()/__xfrm_lookup(). For that pass netns to flow_cache_lookup() and resolver callback. Take it from socket or netdevice. Stub DECnet to init_net. Signed-off-by: Alexey Dobriyan Signed-off-by: David S. Miller --- net/ipv6/af_inet6.c | 2 +- net/ipv6/datagram.c | 3 ++- net/ipv6/icmp.c | 6 +++--- net/ipv6/inet6_connection_sock.c | 2 +- net/ipv6/ip6_tunnel.c | 5 +++-- net/ipv6/mcast.c | 4 ++-- net/ipv6/ndisc.c | 4 ++-- net/ipv6/netfilter.c | 2 +- net/ipv6/netfilter/ip6t_REJECT.c | 2 +- net/ipv6/raw.c | 3 ++- net/ipv6/syncookies.c | 2 +- net/ipv6/tcp_ipv6.c | 11 ++++++----- net/ipv6/udp.c | 3 ++- 13 files changed, 27 insertions(+), 22 deletions(-) (limited to 'net/ipv6') diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index 01edac88851..437b750b98f 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -637,7 +637,7 @@ int inet6_sk_rebuild_header(struct sock *sk) if (final_p) ipv6_addr_copy(&fl.fl6_dst, final_p); - if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0) { + if ((err = xfrm_lookup(sock_net(sk), &dst, &fl, sk, 0)) < 0) { sk->sk_err_soft = -err; return err; } diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c index e44deb8d4df..e2bdc6d83a4 100644 --- a/net/ipv6/datagram.c +++ b/net/ipv6/datagram.c @@ -175,7 +175,8 @@ ipv4_connected: if (final_p) ipv6_addr_copy(&fl.fl6_dst, final_p); - if ((err = __xfrm_lookup(&dst, &fl, sk, XFRM_LOOKUP_WAIT)) < 0) { + err = __xfrm_lookup(sock_net(sk), &dst, &fl, sk, XFRM_LOOKUP_WAIT); + if (err < 0) { if (err == -EREMOTE) err = ip6_dst_blackhole(sk, &dst, &fl); if (err < 0) diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c index a77b8d10380..4f433847d95 100644 --- a/net/ipv6/icmp.c +++ b/net/ipv6/icmp.c @@ -427,7 +427,7 @@ void icmpv6_send(struct sk_buff *skb, int type, int code, __u32 info, /* No need to clone since we're just using its address. */ dst2 = dst; - err = xfrm_lookup(&dst, &fl, sk, 0); + err = xfrm_lookup(net, &dst, &fl, sk, 0); switch (err) { case 0: if (dst != dst2) @@ -446,7 +446,7 @@ void icmpv6_send(struct sk_buff *skb, int type, int code, __u32 info, if (ip6_dst_lookup(sk, &dst2, &fl)) goto relookup_failed; - err = xfrm_lookup(&dst2, &fl, sk, XFRM_LOOKUP_ICMP); + err = xfrm_lookup(net, &dst2, &fl, sk, XFRM_LOOKUP_ICMP); switch (err) { case 0: dst_release(dst); @@ -552,7 +552,7 @@ static void icmpv6_echo_reply(struct sk_buff *skb) err = ip6_dst_lookup(sk, &dst, &fl); if (err) goto out; - if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0) + if ((err = xfrm_lookup(net, &dst, &fl, sk, 0)) < 0) goto out; if (ipv6_addr_is_multicast(&fl.fl6_dst)) diff --git a/net/ipv6/inet6_connection_sock.c b/net/ipv6/inet6_connection_sock.c index 16d43f20b32..3c3732d50c1 100644 --- a/net/ipv6/inet6_connection_sock.c +++ b/net/ipv6/inet6_connection_sock.c @@ -219,7 +219,7 @@ int inet6_csk_xmit(struct sk_buff *skb, int ipfragok) if (final_p) ipv6_addr_copy(&fl.fl6_dst, final_p); - if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0) { + if ((err = xfrm_lookup(sock_net(sk), &dst, &fl, sk, 0)) < 0) { sk->sk_route_caps = 0; kfree_skb(skb); return err; diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index ef249ab5c93..58e2b0d9375 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -846,6 +846,7 @@ static int ip6_tnl_xmit2(struct sk_buff *skb, int encap_limit, __u32 *pmtu) { + struct net *net = dev_net(dev); struct ip6_tnl *t = netdev_priv(dev); struct net_device_stats *stats = &t->dev->stats; struct ipv6hdr *ipv6h = ipv6_hdr(skb); @@ -861,9 +862,9 @@ static int ip6_tnl_xmit2(struct sk_buff *skb, if ((dst = ip6_tnl_dst_check(t)) != NULL) dst_hold(dst); else { - dst = ip6_route_output(dev_net(dev), NULL, fl); + dst = ip6_route_output(net, NULL, fl); - if (dst->error || xfrm_lookup(&dst, fl, NULL, 0) < 0) + if (dst->error || xfrm_lookup(net, &dst, fl, NULL, 0) < 0) goto tx_err_link_failure; } diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c index 870a1d64605..0f389603283 100644 --- a/net/ipv6/mcast.c +++ b/net/ipv6/mcast.c @@ -1466,7 +1466,7 @@ static void mld_sendpack(struct sk_buff *skb) &ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr, skb->dev->ifindex); - err = xfrm_lookup(&skb->dst, &fl, NULL, 0); + err = xfrm_lookup(net, &skb->dst, &fl, NULL, 0); if (err) goto err_out; @@ -1831,7 +1831,7 @@ static void igmp6_send(struct in6_addr *addr, struct net_device *dev, int type) &ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr, skb->dev->ifindex); - err = xfrm_lookup(&skb->dst, &fl, NULL, 0); + err = xfrm_lookup(net, &skb->dst, &fl, NULL, 0); if (err) goto err_out; diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index af6705f03b5..e4acc212345 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -524,7 +524,7 @@ void ndisc_send_skb(struct sk_buff *skb, return; } - err = xfrm_lookup(&dst, &fl, NULL, 0); + err = xfrm_lookup(net, &dst, &fl, NULL, 0); if (err < 0) { kfree_skb(skb); return; @@ -1524,7 +1524,7 @@ void ndisc_send_redirect(struct sk_buff *skb, struct neighbour *neigh, if (dst == NULL) return; - err = xfrm_lookup(&dst, &fl, NULL, 0); + err = xfrm_lookup(net, &dst, &fl, NULL, 0); if (err) return; diff --git a/net/ipv6/netfilter.c b/net/ipv6/netfilter.c index fd5b3a4e332..627e21db65d 100644 --- a/net/ipv6/netfilter.c +++ b/net/ipv6/netfilter.c @@ -29,7 +29,7 @@ int ip6_route_me_harder(struct sk_buff *skb) #ifdef CONFIG_XFRM if (!(IP6CB(skb)->flags & IP6SKB_XFRM_TRANSFORMED) && xfrm_decode_session(skb, &fl, AF_INET6) == 0) - if (xfrm_lookup(&skb->dst, &fl, skb->sk, 0)) + if (xfrm_lookup(net, &skb->dst, &fl, skb->sk, 0)) return -1; #endif diff --git a/net/ipv6/netfilter/ip6t_REJECT.c b/net/ipv6/netfilter/ip6t_REJECT.c index 0981b4ccb8b..5a2d0a41694 100644 --- a/net/ipv6/netfilter/ip6t_REJECT.c +++ b/net/ipv6/netfilter/ip6t_REJECT.c @@ -97,7 +97,7 @@ static void send_reset(struct net *net, struct sk_buff *oldskb) dst = ip6_route_output(net, NULL, &fl); if (dst == NULL) return; - if (dst->error || xfrm_lookup(&dst, &fl, NULL, 0)) + if (dst->error || xfrm_lookup(net, &dst, &fl, NULL, 0)) return; hh_len = (dst->dev->hard_header_len + 15)&~15; diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index 2ba04d41dc2..61f6827e590 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -860,7 +860,8 @@ static int rawv6_sendmsg(struct kiocb *iocb, struct sock *sk, if (final_p) ipv6_addr_copy(&fl.fl6_dst, final_p); - if ((err = __xfrm_lookup(&dst, &fl, sk, XFRM_LOOKUP_WAIT)) < 0) { + err = __xfrm_lookup(sock_net(sk), &dst, &fl, sk, XFRM_LOOKUP_WAIT); + if (err < 0) { if (err == -EREMOTE) err = ip6_dst_blackhole(sk, &dst, &fl); if (err < 0) diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c index 676c80b5b14..711175e0571 100644 --- a/net/ipv6/syncookies.c +++ b/net/ipv6/syncookies.c @@ -259,7 +259,7 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb) if (final_p) ipv6_addr_copy(&fl.fl6_dst, final_p); - if ((xfrm_lookup(&dst, &fl, sk, 0)) < 0) + if ((xfrm_lookup(sock_net(sk), &dst, &fl, sk, 0)) < 0) goto out_free; } diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index a5d750acd79..f259c9671f3 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -260,7 +260,8 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, if (final_p) ipv6_addr_copy(&fl.fl6_dst, final_p); - if ((err = __xfrm_lookup(&dst, &fl, sk, XFRM_LOOKUP_WAIT)) < 0) { + err = __xfrm_lookup(sock_net(sk), &dst, &fl, sk, XFRM_LOOKUP_WAIT); + if (err < 0) { if (err == -EREMOTE) err = ip6_dst_blackhole(sk, &dst, &fl); if (err < 0) @@ -390,7 +391,7 @@ static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, goto out; } - if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0) { + if ((err = xfrm_lookup(net, &dst, &fl, sk, 0)) < 0) { sk->sk_err_soft = -err; goto out; } @@ -492,7 +493,7 @@ static int tcp_v6_send_synack(struct sock *sk, struct request_sock *req) goto done; if (final_p) ipv6_addr_copy(&fl.fl6_dst, final_p); - if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0) + if ((err = xfrm_lookup(sock_net(sk), &dst, &fl, sk, 0)) < 0) goto done; skb = tcp_make_synack(sk, dst, req); @@ -1018,7 +1019,7 @@ static void tcp_v6_send_response(struct sk_buff *skb, u32 seq, u32 ack, u32 win, * namespace */ if (!ip6_dst_lookup(ctl_sk, &buff->dst, &fl)) { - if (xfrm_lookup(&buff->dst, &fl, NULL, 0) >= 0) { + if (xfrm_lookup(net, &buff->dst, &fl, NULL, 0) >= 0) { ip6_xmit(ctl_sk, buff, &fl, NULL, 0); TCP_INC_STATS_BH(net, TCP_MIB_OUTSEGS); if (rst) @@ -1316,7 +1317,7 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb, if (final_p) ipv6_addr_copy(&fl.fl6_dst, final_p); - if ((xfrm_lookup(&dst, &fl, sk, 0)) < 0) + if ((xfrm_lookup(sock_net(sk), &dst, &fl, sk, 0)) < 0) goto out; } diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index fd2d9ad4a8a..38390dd1963 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -849,7 +849,8 @@ do_udp_sendmsg: if (final_p) ipv6_addr_copy(&fl.fl6_dst, final_p); - if ((err = __xfrm_lookup(&dst, &fl, sk, XFRM_LOOKUP_WAIT)) < 0) { + err = __xfrm_lookup(sock_net(sk), &dst, &fl, sk, XFRM_LOOKUP_WAIT); + if (err < 0) { if (err == -EREMOTE) err = ip6_dst_blackhole(sk, &dst, &fl); if (err < 0) -- cgit v1.2.3 From ddcfd79680c1dc74eb5f24aa70785c11bf7eec8f Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Tue, 25 Nov 2008 17:37:23 -0800 Subject: netns xfrm: dst garbage-collecting in netns Pass netns pointer to struct xfrm_policy_afinfo::garbage_collect() [This needs more thoughts on what to do with dst_ops] [Currently stub to init_net] Signed-off-by: Alexey Dobriyan Signed-off-by: David S. Miller --- net/ipv6/xfrm6_policy.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net/ipv6') diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c index 3b67ce7786e..2df8a78fc3d 100644 --- a/net/ipv6/xfrm6_policy.c +++ b/net/ipv6/xfrm6_policy.c @@ -220,7 +220,7 @@ _decode_session6(struct sk_buff *skb, struct flowi *fl, int reverse) static inline int xfrm6_garbage_collect(struct dst_ops *ops) { - xfrm6_policy_afinfo.garbage_collect(); + xfrm6_policy_afinfo.garbage_collect(&init_net); return (atomic_read(&xfrm6_dst_ops.entries) > xfrm6_dst_ops.gc_thresh*2); } -- cgit v1.2.3 From db983c1144884cab10d6397532f4bf05eb0c01d2 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Tue, 25 Nov 2008 17:51:01 -0800 Subject: netns xfrm: KM reporting in netns Signed-off-by: Alexey Dobriyan Signed-off-by: David S. Miller --- net/ipv6/mip6.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'net/ipv6') diff --git a/net/ipv6/mip6.c b/net/ipv6/mip6.c index 31295c8f619..f995e19c87a 100644 --- a/net/ipv6/mip6.c +++ b/net/ipv6/mip6.c @@ -205,6 +205,7 @@ static inline int mip6_report_rl_allow(struct timeval *stamp, static int mip6_destopt_reject(struct xfrm_state *x, struct sk_buff *skb, struct flowi *fl) { + struct net *net = xs_net(x); struct inet6_skb_parm *opt = (struct inet6_skb_parm *)skb->cb; struct ipv6_destopt_hao *hao = NULL; struct xfrm_selector sel; @@ -247,7 +248,7 @@ static int mip6_destopt_reject(struct xfrm_state *x, struct sk_buff *skb, struct sel.sport_mask = htons(~0); sel.ifindex = fl->oif; - err = km_report(IPPROTO_DSTOPTS, &sel, + err = km_report(net, IPPROTO_DSTOPTS, &sel, (hao ? (xfrm_address_t *)&hao->addr : NULL)); out: -- cgit v1.2.3 From c5b3cf46eabe6e7459125fc6e2033b4222665017 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Tue, 25 Nov 2008 17:51:25 -0800 Subject: netns xfrm: ->dst_lookup in netns Signed-off-by: Alexey Dobriyan Signed-off-by: David S. Miller --- net/ipv6/xfrm6_policy.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'net/ipv6') diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c index 2df8a78fc3d..d7a5b8bc377 100644 --- a/net/ipv6/xfrm6_policy.c +++ b/net/ipv6/xfrm6_policy.c @@ -27,7 +27,8 @@ static struct dst_ops xfrm6_dst_ops; static struct xfrm_policy_afinfo xfrm6_policy_afinfo; -static struct dst_entry *xfrm6_dst_lookup(int tos, xfrm_address_t *saddr, +static struct dst_entry *xfrm6_dst_lookup(struct net *net, int tos, + xfrm_address_t *saddr, xfrm_address_t *daddr) { struct flowi fl = {}; @@ -38,7 +39,7 @@ static struct dst_entry *xfrm6_dst_lookup(int tos, xfrm_address_t *saddr, if (saddr) memcpy(&fl.fl6_src, saddr, sizeof(fl.fl6_src)); - dst = ip6_route_output(&init_net, NULL, &fl); + dst = ip6_route_output(net, NULL, &fl); err = dst->error; if (dst->error) { @@ -54,7 +55,7 @@ static int xfrm6_get_saddr(xfrm_address_t *saddr, xfrm_address_t *daddr) struct dst_entry *dst; struct net_device *dev; - dst = xfrm6_dst_lookup(0, NULL, daddr); + dst = xfrm6_dst_lookup(&init_net, 0, NULL, daddr); if (IS_ERR(dst)) return -EHOSTUNREACH; -- cgit v1.2.3 From fbda33b2b85941c1ae3a0d89522dec5c1b1bd98c Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Tue, 25 Nov 2008 17:56:49 -0800 Subject: netns xfrm: ->get_saddr in netns Signed-off-by: Alexey Dobriyan Signed-off-by: David S. Miller --- net/ipv6/xfrm6_policy.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'net/ipv6') diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c index d7a5b8bc377..97ab068e8cc 100644 --- a/net/ipv6/xfrm6_policy.c +++ b/net/ipv6/xfrm6_policy.c @@ -50,12 +50,13 @@ static struct dst_entry *xfrm6_dst_lookup(struct net *net, int tos, return dst; } -static int xfrm6_get_saddr(xfrm_address_t *saddr, xfrm_address_t *daddr) +static int xfrm6_get_saddr(struct net *net, + xfrm_address_t *saddr, xfrm_address_t *daddr) { struct dst_entry *dst; struct net_device *dev; - dst = xfrm6_dst_lookup(&init_net, 0, NULL, daddr); + dst = xfrm6_dst_lookup(net, 0, NULL, daddr); if (IS_ERR(dst)) return -EHOSTUNREACH; -- cgit v1.2.3 From 4fb236bac9fc7d51e2267866de6d4c30e549d2f8 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Tue, 25 Nov 2008 17:59:27 -0800 Subject: netns xfrm: AH/ESP in netns! Signed-off-by: Alexey Dobriyan Signed-off-by: David S. Miller --- net/ipv6/ah6.c | 3 ++- net/ipv6/esp6.c | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) (limited to 'net/ipv6') diff --git a/net/ipv6/ah6.c b/net/ipv6/ah6.c index 6ae014b86b6..52449f7a1b7 100644 --- a/net/ipv6/ah6.c +++ b/net/ipv6/ah6.c @@ -407,6 +407,7 @@ out: static void ah6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, int type, int code, int offset, __be32 info) { + struct net *net = dev_net(skb->dev); struct ipv6hdr *iph = (struct ipv6hdr*)skb->data; struct ip_auth_hdr *ah = (struct ip_auth_hdr*)(skb->data+offset); struct xfrm_state *x; @@ -415,7 +416,7 @@ static void ah6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, type != ICMPV6_PKT_TOOBIG) return; - x = xfrm_state_lookup(&init_net, (xfrm_address_t *)&iph->daddr, ah->spi, IPPROTO_AH, AF_INET6); + x = xfrm_state_lookup(net, (xfrm_address_t *)&iph->daddr, ah->spi, IPPROTO_AH, AF_INET6); if (!x) return; diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c index 68f2af8c15c..c2f250150db 100644 --- a/net/ipv6/esp6.c +++ b/net/ipv6/esp6.c @@ -356,6 +356,7 @@ static u32 esp6_get_mtu(struct xfrm_state *x, int mtu) static void esp6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, int type, int code, int offset, __be32 info) { + struct net *net = dev_net(skb->dev); struct ipv6hdr *iph = (struct ipv6hdr*)skb->data; struct ip_esp_hdr *esph = (struct ip_esp_hdr *)(skb->data + offset); struct xfrm_state *x; @@ -364,7 +365,7 @@ static void esp6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, type != ICMPV6_PKT_TOOBIG) return; - x = xfrm_state_lookup(&init_net, (xfrm_address_t *)&iph->daddr, esph->spi, IPPROTO_ESP, AF_INET6); + x = xfrm_state_lookup(net, (xfrm_address_t *)&iph->daddr, esph->spi, IPPROTO_ESP, AF_INET6); if (!x) return; printk(KERN_DEBUG "pmtu discovery on SA ESP/%08x/%pI6\n", -- cgit v1.2.3 From 59c9940ed0ef026673cac52f2eaed77af7d486da Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Tue, 25 Nov 2008 17:59:52 -0800 Subject: netns xfrm: per-netns MIBs Signed-off-by: Alexey Dobriyan Signed-off-by: David S. Miller --- net/ipv6/xfrm6_input.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) (limited to 'net/ipv6') diff --git a/net/ipv6/xfrm6_input.c b/net/ipv6/xfrm6_input.c index b69766a7774..9084582d236 100644 --- a/net/ipv6/xfrm6_input.c +++ b/net/ipv6/xfrm6_input.c @@ -58,6 +58,7 @@ EXPORT_SYMBOL(xfrm6_rcv); int xfrm6_input_addr(struct sk_buff *skb, xfrm_address_t *daddr, xfrm_address_t *saddr, u8 proto) { + struct net *net = dev_net(skb->dev); struct xfrm_state *x = NULL; int i = 0; @@ -67,7 +68,7 @@ int xfrm6_input_addr(struct sk_buff *skb, xfrm_address_t *daddr, sp = secpath_dup(skb->sp); if (!sp) { - XFRM_INC_STATS(LINUX_MIB_XFRMINERROR); + XFRM_INC_STATS(net, LINUX_MIB_XFRMINERROR); goto drop; } if (skb->sp) @@ -76,7 +77,7 @@ int xfrm6_input_addr(struct sk_buff *skb, xfrm_address_t *daddr, } if (1 + skb->sp->len == XFRM_MAX_DEPTH) { - XFRM_INC_STATS(LINUX_MIB_XFRMINBUFFERERROR); + XFRM_INC_STATS(net, LINUX_MIB_XFRMINBUFFERERROR); goto drop; } @@ -100,7 +101,7 @@ int xfrm6_input_addr(struct sk_buff *skb, xfrm_address_t *daddr, break; } - x = xfrm_state_lookup_byaddr(&init_net, dst, src, proto, AF_INET6); + x = xfrm_state_lookup_byaddr(net, dst, src, proto, AF_INET6); if (!x) continue; @@ -122,7 +123,7 @@ int xfrm6_input_addr(struct sk_buff *skb, xfrm_address_t *daddr, } if (!x) { - XFRM_INC_STATS(LINUX_MIB_XFRMINNOSTATES); + XFRM_INC_STATS(net, LINUX_MIB_XFRMINNOSTATES); xfrm_audit_state_notfound_simple(skb, AF_INET6); goto drop; } -- cgit v1.2.3 From 1748376b6626acf59c24e9592ac67b3fe2a0e026 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 25 Nov 2008 21:16:35 -0800 Subject: net: Use a percpu_counter for sockets_allocated Instead of using one atomic_t per protocol, use a percpu_counter for "sockets_allocated", to reduce cache line contention on heavy duty network servers. Note : We revert commit (248969ae31e1b3276fc4399d67ce29a5d81e6fd9 net: af_unix can make unix_nr_socks visbile in /proc), since it is not anymore used after sock_prot_inuse_add() addition Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv6/tcp_ipv6.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net/ipv6') diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index f259c9671f3..8702b06cb60 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -1830,7 +1830,7 @@ static int tcp_v6_init_sock(struct sock *sk) sk->sk_sndbuf = sysctl_tcp_wmem[1]; sk->sk_rcvbuf = sysctl_tcp_rmem[1]; - atomic_inc(&tcp_sockets_allocated); + percpu_counter_inc(&tcp_sockets_allocated); return 0; } -- cgit v1.2.3 From 1ea472e2dedcf23d5f31c63fc790cccfab93c0de Mon Sep 17 00:00:00 2001 From: Benjamin Thery Date: Wed, 3 Dec 2008 22:21:47 -0800 Subject: net: fix /proc/net/ip_mr_cache display - V2 /proc/net/ip_mr_cache and /proc/net/ip6_mr_cache displays garbage when showing unresolved mfc_cache entries. [root@qemu tests]# cat /proc/net/ip_mr_cache Group Origin Iif Pkts Bytes Wrong Oifs 014C00EF 010014AC 1 10 10050 0 2:1 3:1 024C00EF 010014AC 65535 514 2 -559067475 The first line is correct. It is a resolved cache entry, 10 packets used it... The second line represents an unresolved entry, and the columns Pkts(4th), Bytes(5th) and Wrong(6th) just show garbage. In struct mfc_cache, there's an union to store data for resolved and unresolved cases. And what ipmr_mfc_seq_show() is printing in these columns for the unresolved entries is some bytes from mfc_cache.mfc_un.res. Bad. (eg. In our case -559067475 is in fact 0xdead4ead which is the spinlock magic from mfc_cache.mfc_un.unres.unresolved.lock.magic). This patch replaces the garbage data written in these columns for the unresolved entries by '0' (zeros) which is more correct. This change doesn't break the ABI. Also, mfc->mfc_un.res.pkt, mfc->mfc_un.res.bytes, mfc->mfc_un.res.wrong_if are unsigned long. It applies on top of net-next-2.6. The patch for net-2.6 is slightly different because of the NIP6_FMT to %pI6 conversion that was made in the seq_printf. Changelog: ========== V2: * Instead of breaking the ABI by suppressing the columns that have no meaning for unresolved entries, fill them with 0 values. Signed-off-by: Benjamin Thery Signed-off-by: David S. Miller --- net/ipv6/ip6mr.c | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) (limited to 'net/ipv6') diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c index dfba9fd0c24..2dc4b019087 100644 --- a/net/ipv6/ip6mr.c +++ b/net/ipv6/ip6mr.c @@ -297,14 +297,15 @@ static int ipmr_mfc_seq_show(struct seq_file *seq, void *v) const struct mfc6_cache *mfc = v; const struct ipmr_mfc_iter *it = seq->private; - seq_printf(seq, "%pI6 %pI6 %-3d %8ld %8ld %8ld", + seq_printf(seq, "%pI6 %pI6 %-3d", &mfc->mf6c_mcastgrp, &mfc->mf6c_origin, - mfc->mf6c_parent, - mfc->mfc_un.res.pkt, - mfc->mfc_un.res.bytes, - mfc->mfc_un.res.wrong_if); + mfc->mf6c_parent); if (it->cache != &mfc_unres_queue) { + seq_printf(seq, " %8lu %8lu %8lu", + mfc->mfc_un.res.pkt, + mfc->mfc_un.res.bytes, + mfc->mfc_un.res.wrong_if); for (n = mfc->mfc_un.res.minvif; n < mfc->mfc_un.res.maxvif; n++) { if (MIF_EXISTS(n) && @@ -313,6 +314,11 @@ static int ipmr_mfc_seq_show(struct seq_file *seq, void *v) " %2d:%-3d", n, mfc->mfc_un.res.ttls[n]); } + } else { + /* unresolved mfc_caches don't contain + * pkt, bytes and wrong_if values + */ + seq_printf(seq, " %8lu %8lu %8lu", 0ul, 0ul, 0ul); } seq_putc(seq, '\n'); } -- cgit v1.2.3 From 999890b21a8eff7559a140fcbd2cd4b34e685c76 Mon Sep 17 00:00:00 2001 From: Benjamin Thery Date: Wed, 3 Dec 2008 22:22:16 -0800 Subject: net: /proc/net/ip_mr_cache, display Iif as a signed short Today, iproute2 fails to show multicast forwarding unresolved cache entries while scanning /proc/net/ip_mr_cache. Indeed, it expects to see -1 in 'Iif' column to identify unresolved entries but the kernel outputs 65535. It's a signed/unsigned issue: 'Iif', the source interface, is retrieved from member mfc_parent in struct mfc_cache. mfc_parent is a vifi_t: unsigned short, but is displayed in ipmr_mfc_seq_show() as "%-3d", signed integer. In unresolevd entries, the 65535 value (0xFFFF) comes from this define: #define ALL_VIFS ((vifi_t)(-1)) That may explains why the guy who added support for this in iproute2 thought a -1 should be expected. I don't know if this must be fixed in kernel or in iproute2. Who is right? What is the correct API? How was it designed originally? I let you decide if it should goes in the kernel or be fixed in iproute2. Signed-off-by: Benjamin Thery Signed-off-by: David S. Miller --- net/ipv6/ip6mr.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net/ipv6') diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c index 2dc4b019087..1446bec895a 100644 --- a/net/ipv6/ip6mr.c +++ b/net/ipv6/ip6mr.c @@ -297,7 +297,7 @@ static int ipmr_mfc_seq_show(struct seq_file *seq, void *v) const struct mfc6_cache *mfc = v; const struct ipmr_mfc_iter *it = seq->private; - seq_printf(seq, "%pI6 %pI6 %-3d", + seq_printf(seq, "%pI6 %pI6 %-3hd", &mfc->mf6c_mcastgrp, &mfc->mf6c_origin, mfc->mf6c_parent); -- cgit v1.2.3 From 36cbac5909d227c4de31fb93e1dd99c839c9cb6f Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Wed, 3 Dec 2008 22:27:25 -0800 Subject: net/ipv6/ip6mr.c: Use kmem_cache_zalloc, remove memset Signed-off-by: Joe Perches Signed-off-by: David S. Miller --- net/ipv6/ip6mr.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'net/ipv6') diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c index 1446bec895a..d1008e6891e 100644 --- a/net/ipv6/ip6mr.c +++ b/net/ipv6/ip6mr.c @@ -692,20 +692,18 @@ static struct mfc6_cache *ip6mr_cache_find(struct in6_addr *origin, struct in6_a */ static struct mfc6_cache *ip6mr_cache_alloc(void) { - struct mfc6_cache *c = kmem_cache_alloc(mrt_cachep, GFP_KERNEL); + struct mfc6_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_KERNEL); if (c == NULL) return NULL; - memset(c, 0, sizeof(*c)); c->mfc_un.res.minvif = MAXMIFS; return c; } static struct mfc6_cache *ip6mr_cache_alloc_unres(void) { - struct mfc6_cache *c = kmem_cache_alloc(mrt_cachep, GFP_ATOMIC); + struct mfc6_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_ATOMIC); if (c == NULL) return NULL; - memset(c, 0, sizeof(*c)); skb_queue_head_init(&c->mfc_un.unres.unresolved); c->mfc_un.unres.expires = jiffies + 10 * HZ; return c; -- cgit v1.2.3 From bd91b8bf372911c1e4d66d6bb44fe409349a6791 Mon Sep 17 00:00:00 2001 From: Benjamin Thery Date: Wed, 10 Dec 2008 16:07:08 -0800 Subject: netns: ip6mr: allocate mroute6_socket per-namespace. Preliminary work to make IPv6 multicast forwarding netns-aware. Make IPv6 multicast forwarding mroute6_socket per-namespace, moves it into struct netns_ipv6. At the moment, mroute6_socket is only referenced in init_net. Signed-off-by: Benjamin Thery Signed-off-by: David S. Miller --- net/ipv6/ip6_output.c | 3 ++- net/ipv6/ip6mr.c | 22 ++++++++++------------ 2 files changed, 12 insertions(+), 13 deletions(-) (limited to 'net/ipv6') diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 7d92fd97cfb..4b15938bef4 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -137,7 +137,8 @@ static int ip6_output2(struct sk_buff *skb) struct inet6_dev *idev = ip6_dst_idev(skb->dst); if (!(dev->flags & IFF_LOOPBACK) && (!np || np->mc_loop) && - ((mroute6_socket && !(IP6CB(skb)->flags & IP6SKB_FORWARDED)) || + ((mroute6_socket(dev_net(dev)) && + !(IP6CB(skb)->flags & IP6SKB_FORWARDED)) || ipv6_chk_mcast_addr(dev, &ipv6_hdr(skb)->daddr, &ipv6_hdr(skb)->saddr))) { struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC); diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c index d1008e6891e..02163dbf84c 100644 --- a/net/ipv6/ip6mr.c +++ b/net/ipv6/ip6mr.c @@ -49,9 +49,6 @@ #include #include -struct sock *mroute6_socket; - - /* Big lock, protecting vif table, mrt cache and mroute socket state. Note that the changes are semaphored via rtnl_lock. */ @@ -820,7 +817,7 @@ static int ip6mr_cache_report(struct sk_buff *pkt, mifi_t mifi, int assert) skb_pull(skb, sizeof(struct ipv6hdr)); } - if (mroute6_socket == NULL) { + if (init_net.ipv6.mroute6_sk == NULL) { kfree_skb(skb); return -EINVAL; } @@ -828,7 +825,8 @@ static int ip6mr_cache_report(struct sk_buff *pkt, mifi_t mifi, int assert) /* * Deliver to user space multicast routing algorithms */ - if ((ret = sock_queue_rcv_skb(mroute6_socket, skb)) < 0) { + ret = sock_queue_rcv_skb(init_net.ipv6.mroute6_sk, skb); + if (ret < 0) { if (net_ratelimit()) printk(KERN_WARNING "mroute6: pending queue full, dropping entries.\n"); kfree_skb(skb); @@ -1145,8 +1143,8 @@ static int ip6mr_sk_init(struct sock *sk) rtnl_lock(); write_lock_bh(&mrt_lock); - if (likely(mroute6_socket == NULL)) - mroute6_socket = sk; + if (likely(init_net.ipv6.mroute6_sk == NULL)) + init_net.ipv6.mroute6_sk = sk; else err = -EADDRINUSE; write_unlock_bh(&mrt_lock); @@ -1161,9 +1159,9 @@ int ip6mr_sk_done(struct sock *sk) int err = 0; rtnl_lock(); - if (sk == mroute6_socket) { + if (sk == init_net.ipv6.mroute6_sk) { write_lock_bh(&mrt_lock); - mroute6_socket = NULL; + init_net.ipv6.mroute6_sk = NULL; write_unlock_bh(&mrt_lock); mroute_clean_tables(sk); @@ -1189,7 +1187,7 @@ int ip6_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, int mifi_t mifi; if (optname != MRT6_INIT) { - if (sk != mroute6_socket && !capable(CAP_NET_ADMIN)) + if (sk != init_net.ipv6.mroute6_sk && !capable(CAP_NET_ADMIN)) return -EACCES; } @@ -1214,7 +1212,7 @@ int ip6_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, int if (vif.mif6c_mifi >= MAXMIFS) return -ENFILE; rtnl_lock(); - ret = mif6_add(&vif, sk == mroute6_socket); + ret = mif6_add(&vif, sk == init_net.ipv6.mroute6_sk); rtnl_unlock(); return ret; @@ -1242,7 +1240,7 @@ int ip6_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, int if (optname == MRT6_DEL_MFC) ret = ip6mr_mfc_delete(&mfc); else - ret = ip6mr_mfc_add(&mfc, sk == mroute6_socket); + ret = ip6mr_mfc_add(&mfc, sk == init_net.ipv6.mroute6_sk); rtnl_unlock(); return ret; -- cgit v1.2.3 From 4e16880cb4225bfa68878ad5b2a9ded53657d054 Mon Sep 17 00:00:00 2001 From: Benjamin Thery Date: Wed, 10 Dec 2008 16:15:08 -0800 Subject: netns: ip6mr: dynamically allocates vif6_table Preliminary work to make IPv6 multicast forwarding netns-aware. Dynamically allocates interface table vif6_table and moves it to struct netns_ipv6, and updates MIF_EXISTS() macro. At the moment, vif6_table is only referenced in init_net. Signed-off-by: Benjamin Thery Signed-off-by: David S. Miller --- net/ipv6/ip6mr.c | 107 +++++++++++++++++++++++++++++++++++-------------------- 1 file changed, 68 insertions(+), 39 deletions(-) (limited to 'net/ipv6') diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c index 02163dbf84c..bae3ef64966 100644 --- a/net/ipv6/ip6mr.c +++ b/net/ipv6/ip6mr.c @@ -59,10 +59,7 @@ static DEFINE_RWLOCK(mrt_lock); * Multicast router control variables */ -static struct mif_device vif6_table[MAXMIFS]; /* Devices */ -static int maxvif; - -#define MIF_EXISTS(idx) (vif6_table[idx].dev != NULL) +#define MIF_EXISTS(_net, _idx) ((_net)->ipv6.vif6_table[_idx].dev != NULL) static int mroute_do_assert; /* Set in PIM assert */ #ifdef CONFIG_IPV6_PIMSM_V2 @@ -145,11 +142,11 @@ struct ipmr_vif_iter { static struct mif_device *ip6mr_vif_seq_idx(struct ipmr_vif_iter *iter, loff_t pos) { - for (iter->ct = 0; iter->ct < maxvif; ++iter->ct) { - if (!MIF_EXISTS(iter->ct)) + for (iter->ct = 0; iter->ct < init_net.ipv6.maxvif; ++iter->ct) { + if (!MIF_EXISTS(&init_net, iter->ct)) continue; if (pos-- == 0) - return &vif6_table[iter->ct]; + return &init_net.ipv6.vif6_table[iter->ct]; } return NULL; } @@ -170,10 +167,10 @@ static void *ip6mr_vif_seq_next(struct seq_file *seq, void *v, loff_t *pos) if (v == SEQ_START_TOKEN) return ip6mr_vif_seq_idx(iter, 0); - while (++iter->ct < maxvif) { - if (!MIF_EXISTS(iter->ct)) + while (++iter->ct < init_net.ipv6.maxvif) { + if (!MIF_EXISTS(&init_net, iter->ct)) continue; - return &vif6_table[iter->ct]; + return &init_net.ipv6.vif6_table[iter->ct]; } return NULL; } @@ -195,7 +192,7 @@ static int ip6mr_vif_seq_show(struct seq_file *seq, void *v) seq_printf(seq, "%2td %-10s %8ld %7ld %8ld %7ld %05X\n", - vif - vif6_table, + vif - init_net.ipv6.vif6_table, name, vif->bytes_in, vif->pkt_in, vif->bytes_out, vif->pkt_out, vif->flags); @@ -305,7 +302,7 @@ static int ipmr_mfc_seq_show(struct seq_file *seq, void *v) mfc->mfc_un.res.wrong_if); for (n = mfc->mfc_un.res.minvif; n < mfc->mfc_un.res.maxvif; n++) { - if (MIF_EXISTS(n) && + if (MIF_EXISTS(&init_net, n) && mfc->mfc_un.res.ttls[n] < 255) seq_printf(seq, " %2d:%-3d", @@ -374,7 +371,7 @@ static int pim6_rcv(struct sk_buff *skb) read_lock(&mrt_lock); if (reg_vif_num >= 0) - reg_dev = vif6_table[reg_vif_num].dev; + reg_dev = init_net.ipv6.vif6_table[reg_vif_num].dev; if (reg_dev) dev_hold(reg_dev); read_unlock(&mrt_lock); @@ -470,10 +467,10 @@ static int mif6_delete(int vifi) { struct mif_device *v; struct net_device *dev; - if (vifi < 0 || vifi >= maxvif) + if (vifi < 0 || vifi >= init_net.ipv6.maxvif) return -EADDRNOTAVAIL; - v = &vif6_table[vifi]; + v = &init_net.ipv6.vif6_table[vifi]; write_lock_bh(&mrt_lock); dev = v->dev; @@ -489,13 +486,13 @@ static int mif6_delete(int vifi) reg_vif_num = -1; #endif - if (vifi + 1 == maxvif) { + if (vifi + 1 == init_net.ipv6.maxvif) { int tmp; for (tmp = vifi - 1; tmp >= 0; tmp--) { - if (MIF_EXISTS(tmp)) + if (MIF_EXISTS(&init_net, tmp)) break; } - maxvif = tmp + 1; + init_net.ipv6.maxvif = tmp + 1; } write_unlock_bh(&mrt_lock); @@ -586,8 +583,9 @@ static void ip6mr_update_thresholds(struct mfc6_cache *cache, unsigned char *ttl cache->mfc_un.res.maxvif = 0; memset(cache->mfc_un.res.ttls, 255, MAXMIFS); - for (vifi = 0; vifi < maxvif; vifi++) { - if (MIF_EXISTS(vifi) && ttls[vifi] && ttls[vifi] < 255) { + for (vifi = 0; vifi < init_net.ipv6.maxvif; vifi++) { + if (MIF_EXISTS(&init_net, vifi) && + ttls[vifi] && ttls[vifi] < 255) { cache->mfc_un.res.ttls[vifi] = ttls[vifi]; if (cache->mfc_un.res.minvif > vifi) cache->mfc_un.res.minvif = vifi; @@ -600,12 +598,12 @@ static void ip6mr_update_thresholds(struct mfc6_cache *cache, unsigned char *ttl static int mif6_add(struct mif6ctl *vifc, int mrtsock) { int vifi = vifc->mif6c_mifi; - struct mif_device *v = &vif6_table[vifi]; + struct mif_device *v = &init_net.ipv6.vif6_table[vifi]; struct net_device *dev; int err; /* Is vif busy ? */ - if (MIF_EXISTS(vifi)) + if (MIF_EXISTS(&init_net, vifi)) return -EADDRINUSE; switch (vifc->mif6c_flags) { @@ -665,8 +663,8 @@ static int mif6_add(struct mif6ctl *vifc, int mrtsock) if (v->flags & MIFF_REGISTER) reg_vif_num = vifi; #endif - if (vifi + 1 > maxvif) - maxvif = vifi + 1; + if (vifi + 1 > init_net.ipv6.maxvif) + init_net.ipv6.maxvif = vifi + 1; write_unlock_bh(&mrt_lock); return 0; } @@ -946,8 +944,8 @@ static int ip6mr_device_event(struct notifier_block *this, if (event != NETDEV_UNREGISTER) return NOTIFY_DONE; - v = &vif6_table[0]; - for (ct = 0; ct < maxvif; ct++, v++) { + v = &init_net.ipv6.vif6_table[0]; + for (ct = 0; ct < init_net.ipv6.maxvif; ct++, v++) { if (v->dev == dev) mif6_delete(ct); } @@ -962,6 +960,30 @@ static struct notifier_block ip6_mr_notifier = { * Setup for IP multicast routing */ +static int __net_init ip6mr_net_init(struct net *net) +{ + int err = 0; + + net->ipv6.vif6_table = kcalloc(MAXMIFS, sizeof(struct mif_device), + GFP_KERNEL); + if (!net->ipv6.vif6_table) { + err = -ENOMEM; + goto fail; + } +fail: + return err; +} + +static void __net_exit ip6mr_net_exit(struct net *net) +{ + kfree(net->ipv6.vif6_table); +} + +static struct pernet_operations ip6mr_net_ops = { + .init = ip6mr_net_init, + .exit = ip6mr_net_exit, +}; + int __init ip6_mr_init(void) { int err; @@ -973,6 +995,10 @@ int __init ip6_mr_init(void) if (!mrt_cachep) return -ENOMEM; + err = register_pernet_subsys(&ip6mr_net_ops); + if (err) + goto reg_pernet_fail; + setup_timer(&ipmr_expire_timer, ipmr_expire_process, 0); err = register_netdevice_notifier(&ip6_mr_notifier); if (err) @@ -994,6 +1020,8 @@ proc_vif_fail: #endif reg_notif_fail: del_timer(&ipmr_expire_timer); + unregister_pernet_subsys(&ip6mr_net_ops); +reg_pernet_fail: kmem_cache_destroy(mrt_cachep); return err; } @@ -1006,6 +1034,7 @@ void ip6_mr_cleanup(void) #endif unregister_netdevice_notifier(&ip6_mr_notifier); del_timer(&ipmr_expire_timer); + unregister_pernet_subsys(&ip6mr_net_ops); kmem_cache_destroy(mrt_cachep); } @@ -1095,8 +1124,8 @@ static void mroute_clean_tables(struct sock *sk) /* * Shut down all active vif entries */ - for (i = 0; i < maxvif; i++) { - if (!(vif6_table[i].flags & VIFF_STATIC)) + for (i = 0; i < init_net.ipv6.maxvif; i++) { + if (!(init_net.ipv6.vif6_table[i].flags & VIFF_STATIC)) mif6_delete(i); } @@ -1346,11 +1375,11 @@ int ip6mr_ioctl(struct sock *sk, int cmd, void __user *arg) case SIOCGETMIFCNT_IN6: if (copy_from_user(&vr, arg, sizeof(vr))) return -EFAULT; - if (vr.mifi >= maxvif) + if (vr.mifi >= init_net.ipv6.maxvif) return -EINVAL; read_lock(&mrt_lock); - vif = &vif6_table[vr.mifi]; - if (MIF_EXISTS(vr.mifi)) { + vif = &init_net.ipv6.vif6_table[vr.mifi]; + if (MIF_EXISTS(&init_net, vr.mifi)) { vr.icount = vif->pkt_in; vr.ocount = vif->pkt_out; vr.ibytes = vif->bytes_in; @@ -1401,7 +1430,7 @@ static inline int ip6mr_forward2_finish(struct sk_buff *skb) static int ip6mr_forward2(struct sk_buff *skb, struct mfc6_cache *c, int vifi) { struct ipv6hdr *ipv6h; - struct mif_device *vif = &vif6_table[vifi]; + struct mif_device *vif = &init_net.ipv6.vif6_table[vifi]; struct net_device *dev; struct dst_entry *dst; struct flowi fl; @@ -1474,8 +1503,8 @@ out_free: static int ip6mr_find_vif(struct net_device *dev) { int ct; - for (ct = maxvif - 1; ct >= 0; ct--) { - if (vif6_table[ct].dev == dev) + for (ct = init_net.ipv6.maxvif - 1; ct >= 0; ct--) { + if (init_net.ipv6.vif6_table[ct].dev == dev) break; } return ct; @@ -1493,7 +1522,7 @@ static int ip6_mr_forward(struct sk_buff *skb, struct mfc6_cache *cache) /* * Wrong interface: drop packet and (maybe) send PIM assert. */ - if (vif6_table[vif].dev != skb->dev) { + if (init_net.ipv6.vif6_table[vif].dev != skb->dev) { int true_vifi; cache->mfc_un.res.wrong_if++; @@ -1514,8 +1543,8 @@ static int ip6_mr_forward(struct sk_buff *skb, struct mfc6_cache *cache) goto dont_forward; } - vif6_table[vif].pkt_in++; - vif6_table[vif].bytes_in += skb->len; + init_net.ipv6.vif6_table[vif].pkt_in++; + init_net.ipv6.vif6_table[vif].bytes_in += skb->len; /* * Forward the frame @@ -1583,7 +1612,7 @@ ip6mr_fill_mroute(struct sk_buff *skb, struct mfc6_cache *c, struct rtmsg *rtm) { int ct; struct rtnexthop *nhp; - struct net_device *dev = vif6_table[c->mf6c_parent].dev; + struct net_device *dev = init_net.ipv6.vif6_table[c->mf6c_parent].dev; u8 *b = skb_tail_pointer(skb); struct rtattr *mp_head; @@ -1599,7 +1628,7 @@ ip6mr_fill_mroute(struct sk_buff *skb, struct mfc6_cache *c, struct rtmsg *rtm) nhp = (struct rtnexthop *)skb_put(skb, RTA_ALIGN(sizeof(*nhp))); nhp->rtnh_flags = 0; nhp->rtnh_hops = c->mfc_un.res.ttls[ct]; - nhp->rtnh_ifindex = vif6_table[ct].dev->ifindex; + nhp->rtnh_ifindex = init_net.ipv6.vif6_table[ct].dev->ifindex; nhp->rtnh_len = sizeof(*nhp); } } -- cgit v1.2.3 From 58701ad41105638baa0b38ffe9ac5b10469c1fd3 Mon Sep 17 00:00:00 2001 From: Benjamin Thery Date: Wed, 10 Dec 2008 16:22:34 -0800 Subject: netns: ip6mr: store netns in struct mfc6_cache This patch stores into struct mfc6_cache the network namespace each mfc6_cache belongs to. The new member is mfc6_net. mfc6_net is assigned at cache allocation and doesn't change during the rest of the cache entry life. This will help to retrieve the current netns around the IPv6 multicast forwarding code. At the moment, all mfc6_cache are allocated in init_net. Changelog: ========== * Use write_pnet()/read_pnet() to set and get mfc6_net. Signed-off-by: Benjamin Thery Signed-off-by: David S. Miller --- net/ipv6/ip6mr.c | 26 +++++++++++++++++--------- 1 file changed, 17 insertions(+), 9 deletions(-) (limited to 'net/ipv6') diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c index bae3ef64966..fab5aba28a2 100644 --- a/net/ipv6/ip6mr.c +++ b/net/ipv6/ip6mr.c @@ -506,6 +506,12 @@ static int mif6_delete(int vifi) return 0; } +static inline void ip6mr_cache_free(struct mfc6_cache *c) +{ + release_net(mfc6_net(c)); + kmem_cache_free(mrt_cachep, c); +} + /* Destroy an unresolved cache entry, killing queued skbs and reporting error to netlink readers. */ @@ -528,7 +534,7 @@ static void ip6mr_destroy_unres(struct mfc6_cache *c) kfree_skb(skb); } - kmem_cache_free(mrt_cachep, c); + ip6mr_cache_free(c); } @@ -685,22 +691,24 @@ static struct mfc6_cache *ip6mr_cache_find(struct in6_addr *origin, struct in6_a /* * Allocate a multicast cache entry */ -static struct mfc6_cache *ip6mr_cache_alloc(void) +static struct mfc6_cache *ip6mr_cache_alloc(struct net *net) { struct mfc6_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_KERNEL); if (c == NULL) return NULL; c->mfc_un.res.minvif = MAXMIFS; + mfc6_net_set(c, net); return c; } -static struct mfc6_cache *ip6mr_cache_alloc_unres(void) +static struct mfc6_cache *ip6mr_cache_alloc_unres(struct net *net) { struct mfc6_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_ATOMIC); if (c == NULL) return NULL; skb_queue_head_init(&c->mfc_un.unres.unresolved); c->mfc_un.unres.expires = jiffies + 10 * HZ; + mfc6_net_set(c, net); return c; } @@ -856,7 +864,7 @@ ip6mr_cache_unresolved(mifi_t mifi, struct sk_buff *skb) */ if (atomic_read(&cache_resolve_queue_len) >= 10 || - (c = ip6mr_cache_alloc_unres()) == NULL) { + (c = ip6mr_cache_alloc_unres(&init_net)) == NULL) { spin_unlock_bh(&mfc_unres_lock); kfree_skb(skb); @@ -879,7 +887,7 @@ ip6mr_cache_unresolved(mifi_t mifi, struct sk_buff *skb) */ spin_unlock_bh(&mfc_unres_lock); - kmem_cache_free(mrt_cachep, c); + ip6mr_cache_free(c); kfree_skb(skb); return err; } @@ -924,7 +932,7 @@ static int ip6mr_mfc_delete(struct mf6cctl *mfc) *cp = c->next; write_unlock_bh(&mrt_lock); - kmem_cache_free(mrt_cachep, c); + ip6mr_cache_free(c); return 0; } } @@ -1073,7 +1081,7 @@ static int ip6mr_mfc_add(struct mf6cctl *mfc, int mrtsock) if (!ipv6_addr_is_multicast(&mfc->mf6cc_mcastgrp.sin6_addr)) return -EINVAL; - c = ip6mr_cache_alloc(); + c = ip6mr_cache_alloc(&init_net); if (c == NULL) return -ENOMEM; @@ -1108,7 +1116,7 @@ static int ip6mr_mfc_add(struct mf6cctl *mfc, int mrtsock) if (uc) { ip6mr_cache_resolve(uc, c); - kmem_cache_free(mrt_cachep, uc); + ip6mr_cache_free(uc); } return 0; } @@ -1145,7 +1153,7 @@ static void mroute_clean_tables(struct sock *sk) *cp = c->next; write_unlock_bh(&mrt_lock); - kmem_cache_free(mrt_cachep, c); + ip6mr_cache_free(c); } } -- cgit v1.2.3 From 4a6258a0e33d042e4c84d9dec25d45ddb40a70b3 Mon Sep 17 00:00:00 2001 From: Benjamin Thery Date: Wed, 10 Dec 2008 16:24:07 -0800 Subject: netns: ip6mr: dynamically allocate mfc6_cache_array Preliminary work to make IPv6 multicast forwarding netns-aware. Dynamically allocates IPv6 multicast forwarding cache, mfc6_cache_array, and moves it to struct netns_ipv6. At the moment, mfc6_cache_array is only referenced in init_net. Replace 'ARRAY_SIZE(mfc6_cache_array)' with mfc6_cache_array size: MFC6_LINES. Signed-off-by: Benjamin Thery Signed-off-by: David S. Miller --- net/ipv6/ip6mr.c | 47 +++++++++++++++++++++++++++++++---------------- 1 file changed, 31 insertions(+), 16 deletions(-) (limited to 'net/ipv6') diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c index fab5aba28a2..287e526ba03 100644 --- a/net/ipv6/ip6mr.c +++ b/net/ipv6/ip6mr.c @@ -68,8 +68,6 @@ static int mroute_do_pim; #define mroute_do_pim 0 #endif -static struct mfc6_cache *mfc6_cache_array[MFC6_LINES]; /* Forwarding cache */ - static struct mfc6_cache *mfc_unres_queue; /* Queue of unresolved entries */ static atomic_t cache_resolve_queue_len; /* Size of unresolved */ @@ -109,10 +107,11 @@ static struct mfc6_cache *ipmr_mfc_seq_idx(struct ipmr_mfc_iter *it, loff_t pos) { struct mfc6_cache *mfc; - it->cache = mfc6_cache_array; + it->cache = init_net.ipv6.mfc6_cache_array; read_lock(&mrt_lock); - for (it->ct = 0; it->ct < ARRAY_SIZE(mfc6_cache_array); it->ct++) - for (mfc = mfc6_cache_array[it->ct]; mfc; mfc = mfc->next) + for (it->ct = 0; it->ct < MFC6_LINES; it->ct++) + for (mfc = init_net.ipv6.mfc6_cache_array[it->ct]; + mfc; mfc = mfc->next) if (pos-- == 0) return mfc; read_unlock(&mrt_lock); @@ -243,10 +242,10 @@ static void *ipmr_mfc_seq_next(struct seq_file *seq, void *v, loff_t *pos) if (it->cache == &mfc_unres_queue) goto end_of_list; - BUG_ON(it->cache != mfc6_cache_array); + BUG_ON(it->cache != init_net.ipv6.mfc6_cache_array); - while (++it->ct < ARRAY_SIZE(mfc6_cache_array)) { - mfc = mfc6_cache_array[it->ct]; + while (++it->ct < MFC6_LINES) { + mfc = init_net.ipv6.mfc6_cache_array[it->ct]; if (mfc) return mfc; } @@ -274,7 +273,7 @@ static void ipmr_mfc_seq_stop(struct seq_file *seq, void *v) if (it->cache == &mfc_unres_queue) spin_unlock_bh(&mfc_unres_lock); - else if (it->cache == mfc6_cache_array) + else if (it->cache == init_net.ipv6.mfc6_cache_array) read_unlock(&mrt_lock); } @@ -680,7 +679,7 @@ static struct mfc6_cache *ip6mr_cache_find(struct in6_addr *origin, struct in6_a int line = MFC6_HASH(mcastgrp, origin); struct mfc6_cache *c; - for (c = mfc6_cache_array[line]; c; c = c->next) { + for (c = init_net.ipv6.mfc6_cache_array[line]; c; c = c->next) { if (ipv6_addr_equal(&c->mf6c_origin, origin) && ipv6_addr_equal(&c->mf6c_mcastgrp, mcastgrp)) break; @@ -925,7 +924,8 @@ static int ip6mr_mfc_delete(struct mf6cctl *mfc) line = MFC6_HASH(&mfc->mf6cc_mcastgrp.sin6_addr, &mfc->mf6cc_origin.sin6_addr); - for (cp = &mfc6_cache_array[line]; (c = *cp) != NULL; cp = &c->next) { + for (cp = &init_net.ipv6.mfc6_cache_array[line]; + (c = *cp) != NULL; cp = &c->next) { if (ipv6_addr_equal(&c->mf6c_origin, &mfc->mf6cc_origin.sin6_addr) && ipv6_addr_equal(&c->mf6c_mcastgrp, &mfc->mf6cc_mcastgrp.sin6_addr)) { write_lock_bh(&mrt_lock); @@ -978,12 +978,26 @@ static int __net_init ip6mr_net_init(struct net *net) err = -ENOMEM; goto fail; } + + /* Forwarding cache */ + net->ipv6.mfc6_cache_array = kcalloc(MFC6_LINES, + sizeof(struct mfc6_cache *), + GFP_KERNEL); + if (!net->ipv6.mfc6_cache_array) { + err = -ENOMEM; + goto fail_mfc6_cache; + } + return 0; + +fail_mfc6_cache: + kfree(net->ipv6.vif6_table); fail: return err; } static void __net_exit ip6mr_net_exit(struct net *net) { + kfree(net->ipv6.mfc6_cache_array); kfree(net->ipv6.vif6_table); } @@ -1062,7 +1076,8 @@ static int ip6mr_mfc_add(struct mf6cctl *mfc, int mrtsock) line = MFC6_HASH(&mfc->mf6cc_mcastgrp.sin6_addr, &mfc->mf6cc_origin.sin6_addr); - for (cp = &mfc6_cache_array[line]; (c = *cp) != NULL; cp = &c->next) { + for (cp = &init_net.ipv6.mfc6_cache_array[line]; + (c = *cp) != NULL; cp = &c->next) { if (ipv6_addr_equal(&c->mf6c_origin, &mfc->mf6cc_origin.sin6_addr) && ipv6_addr_equal(&c->mf6c_mcastgrp, &mfc->mf6cc_mcastgrp.sin6_addr)) break; @@ -1093,8 +1108,8 @@ static int ip6mr_mfc_add(struct mf6cctl *mfc, int mrtsock) c->mfc_flags |= MFC_STATIC; write_lock_bh(&mrt_lock); - c->next = mfc6_cache_array[line]; - mfc6_cache_array[line] = c; + c->next = init_net.ipv6.mfc6_cache_array[line]; + init_net.ipv6.mfc6_cache_array[line] = c; write_unlock_bh(&mrt_lock); /* @@ -1140,10 +1155,10 @@ static void mroute_clean_tables(struct sock *sk) /* * Wipe the cache */ - for (i = 0; i < ARRAY_SIZE(mfc6_cache_array); i++) { + for (i = 0; i < MFC6_LINES; i++) { struct mfc6_cache *c, **cp; - cp = &mfc6_cache_array[i]; + cp = &init_net.ipv6.mfc6_cache_array[i]; while ((c = *cp) != NULL) { if (c->mfc_flags & MFC_STATIC) { cp = &c->next; -- cgit v1.2.3 From 4045e57c19bee150370390545ee8a933b3f7a18d Mon Sep 17 00:00:00 2001 From: Benjamin Thery Date: Wed, 10 Dec 2008 16:27:21 -0800 Subject: netns: ip6mr: declare counter cache_resolve_queue_len per-namespace Preliminary work to make IPv6 multicast forwarding netns-aware. Declare variable cache_resolve_queue_len per-namespace: moves it into struct netns_ipv6. This variable counts the number of unresolved cache entries queued in the list mfc_unres_queue. This list is kept global to all netns as the number of entries per namespace is limited to 10 (hardcoded in routine ip6mr_cache_unresolved). Entries belonging to different namespaces in mfc_unres_queue will be identified by matching the mfc_net member introduced previously in struct mfc6_cache. Keeping this list global to all netns, also allows us to keep a single timer (ipmr_expire_timer) to handle their expiration. In some places cache_resolve_queue_len value was tested for arming or deleting the timer. These tests were equivalent to testing mfc_unres_queue value instead and are replaced in this patch. At the moment, cache_resolve_queue_len is only referenced in init_net. Signed-off-by: Benjamin Thery Signed-off-by: David S. Miller --- net/ipv6/ip6mr.c | 40 +++++++++++++++++++++------------------- 1 file changed, 21 insertions(+), 19 deletions(-) (limited to 'net/ipv6') diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c index 287e526ba03..077c8198eb5 100644 --- a/net/ipv6/ip6mr.c +++ b/net/ipv6/ip6mr.c @@ -69,7 +69,6 @@ static int mroute_do_pim; #endif static struct mfc6_cache *mfc_unres_queue; /* Queue of unresolved entries */ -static atomic_t cache_resolve_queue_len; /* Size of unresolved */ /* Special spinlock for queue of unresolved entries */ static DEFINE_SPINLOCK(mfc_unres_lock); @@ -519,7 +518,7 @@ static void ip6mr_destroy_unres(struct mfc6_cache *c) { struct sk_buff *skb; - atomic_dec(&cache_resolve_queue_len); + atomic_dec(&init_net.ipv6.cache_resolve_queue_len); while((skb = skb_dequeue(&c->mfc_un.unres.unresolved)) != NULL) { if (ipv6_hdr(skb)->version == 0) { @@ -561,7 +560,7 @@ static void ipmr_do_expire_process(unsigned long dummy) ip6mr_destroy_unres(c); } - if (atomic_read(&cache_resolve_queue_len)) + if (mfc_unres_queue != NULL) mod_timer(&ipmr_expire_timer, jiffies + expires); } @@ -572,7 +571,7 @@ static void ipmr_expire_process(unsigned long dummy) return; } - if (atomic_read(&cache_resolve_queue_len)) + if (mfc_unres_queue != NULL) ipmr_do_expire_process(dummy); spin_unlock(&mfc_unres_lock); @@ -852,7 +851,8 @@ ip6mr_cache_unresolved(mifi_t mifi, struct sk_buff *skb) spin_lock_bh(&mfc_unres_lock); for (c = mfc_unres_queue; c; c = c->next) { - if (ipv6_addr_equal(&c->mf6c_mcastgrp, &ipv6_hdr(skb)->daddr) && + if (net_eq(mfc6_net(c), &init_net) && + ipv6_addr_equal(&c->mf6c_mcastgrp, &ipv6_hdr(skb)->daddr) && ipv6_addr_equal(&c->mf6c_origin, &ipv6_hdr(skb)->saddr)) break; } @@ -862,7 +862,7 @@ ip6mr_cache_unresolved(mifi_t mifi, struct sk_buff *skb) * Create a new entry if allowable */ - if (atomic_read(&cache_resolve_queue_len) >= 10 || + if (atomic_read(&init_net.ipv6.cache_resolve_queue_len) >= 10 || (c = ip6mr_cache_alloc_unres(&init_net)) == NULL) { spin_unlock_bh(&mfc_unres_lock); @@ -891,7 +891,7 @@ ip6mr_cache_unresolved(mifi_t mifi, struct sk_buff *skb) return err; } - atomic_inc(&cache_resolve_queue_len); + atomic_inc(&init_net.ipv6.cache_resolve_queue_len); c->next = mfc_unres_queue; mfc_unres_queue = c; @@ -1119,14 +1119,16 @@ static int ip6mr_mfc_add(struct mf6cctl *mfc, int mrtsock) spin_lock_bh(&mfc_unres_lock); for (cp = &mfc_unres_queue; (uc = *cp) != NULL; cp = &uc->next) { - if (ipv6_addr_equal(&uc->mf6c_origin, &c->mf6c_origin) && + if (net_eq(mfc6_net(uc), &init_net) && + ipv6_addr_equal(&uc->mf6c_origin, &c->mf6c_origin) && ipv6_addr_equal(&uc->mf6c_mcastgrp, &c->mf6c_mcastgrp)) { *cp = uc->next; - if (atomic_dec_and_test(&cache_resolve_queue_len)) - del_timer(&ipmr_expire_timer); + atomic_dec(&init_net.ipv6.cache_resolve_queue_len); break; } } + if (mfc_unres_queue == NULL) + del_timer(&ipmr_expire_timer); spin_unlock_bh(&mfc_unres_lock); if (uc) { @@ -1172,18 +1174,18 @@ static void mroute_clean_tables(struct sock *sk) } } - if (atomic_read(&cache_resolve_queue_len) != 0) { - struct mfc6_cache *c; + if (atomic_read(&init_net.ipv6.cache_resolve_queue_len) != 0) { + struct mfc6_cache *c, **cp; spin_lock_bh(&mfc_unres_lock); - while (mfc_unres_queue != NULL) { - c = mfc_unres_queue; - mfc_unres_queue = c->next; - spin_unlock_bh(&mfc_unres_lock); - + cp = &mfc_unres_queue; + while ((c = *cp) != NULL) { + if (!net_eq(mfc6_net(c), &init_net)) { + cp = &c->next; + continue; + } + *cp = c->next; ip6mr_destroy_unres(c); - - spin_lock_bh(&mfc_unres_lock); } spin_unlock_bh(&mfc_unres_lock); } -- cgit v1.2.3 From a21f3f997c73ced682129aedd372bb6b53041510 Mon Sep 17 00:00:00 2001 From: Benjamin Thery Date: Wed, 10 Dec 2008 16:28:44 -0800 Subject: netns: ip6mr: declare mroute_do_assert and mroute_do_pim per-namespace Preliminary work to make IPv6 multicast forwarding netns-aware. Declare IPv6 multicast forwarding variables 'mroute_do_assert' and 'mroute_do_pim' per-namespace in struct netns_ipv6. At the moment, these variables are only referenced in init_net. Signed-off-by: Benjamin Thery Signed-off-by: David S. Miller --- net/ipv6/ip6mr.c | 26 ++++++++++---------------- 1 file changed, 10 insertions(+), 16 deletions(-) (limited to 'net/ipv6') diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c index 077c8198eb5..2d2ac23b860 100644 --- a/net/ipv6/ip6mr.c +++ b/net/ipv6/ip6mr.c @@ -61,13 +61,6 @@ static DEFINE_RWLOCK(mrt_lock); #define MIF_EXISTS(_net, _idx) ((_net)->ipv6.vif6_table[_idx].dev != NULL) -static int mroute_do_assert; /* Set in PIM assert */ -#ifdef CONFIG_IPV6_PIMSM_V2 -static int mroute_do_pim; -#else -#define mroute_do_pim 0 -#endif - static struct mfc6_cache *mfc_unres_queue; /* Queue of unresolved entries */ /* Special spinlock for queue of unresolved entries */ @@ -1306,7 +1299,7 @@ int ip6_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, int int v; if (get_user(v, (int __user *)optval)) return -EFAULT; - mroute_do_assert = !!v; + init_net.ipv6.mroute_do_assert = !!v; return 0; } @@ -1319,10 +1312,10 @@ int ip6_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, int v = !!v; rtnl_lock(); ret = 0; - if (v != mroute_do_pim) { - mroute_do_pim = v; - mroute_do_assert = v; - if (mroute_do_pim) + if (v != init_net.ipv6.mroute_do_pim) { + init_net.ipv6.mroute_do_pim = v; + init_net.ipv6.mroute_do_assert = v; + if (init_net.ipv6.mroute_do_pim) ret = inet6_add_protocol(&pim6_protocol, IPPROTO_PIM); else @@ -1361,11 +1354,11 @@ int ip6_mroute_getsockopt(struct sock *sk, int optname, char __user *optval, break; #ifdef CONFIG_IPV6_PIMSM_V2 case MRT6_PIM: - val = mroute_do_pim; + val = init_net.ipv6.mroute_do_pim; break; #endif case MRT6_ASSERT: - val = mroute_do_assert; + val = init_net.ipv6.mroute_do_assert; break; default: return -ENOPROTOOPT; @@ -1553,13 +1546,14 @@ static int ip6_mr_forward(struct sk_buff *skb, struct mfc6_cache *cache) cache->mfc_un.res.wrong_if++; true_vifi = ip6mr_find_vif(skb->dev); - if (true_vifi >= 0 && mroute_do_assert && + if (true_vifi >= 0 && init_net.ipv6.mroute_do_assert && /* pimsm uses asserts, when switching from RPT to SPT, so that we cannot check that packet arrived on an oif. It is bad, but otherwise we would need to move pretty large chunk of pimd to kernel. Ough... --ANK */ - (mroute_do_pim || cache->mfc_un.res.ttls[true_vifi] < 255) && + (init_net.ipv6.mroute_do_pim || + cache->mfc_un.res.ttls[true_vifi] < 255) && time_after(jiffies, cache->mfc_un.res.last_assert + MFC_ASSERT_THRESH)) { cache->mfc_un.res.last_assert = jiffies; -- cgit v1.2.3 From 950d5704e5daa1f90bcd75b99163491e7b249169 Mon Sep 17 00:00:00 2001 From: Benjamin Thery Date: Wed, 10 Dec 2008 16:29:24 -0800 Subject: netns: ip6mr: declare reg_vif_num per-namespace Preliminary work to make IPv6 multicast forwarding netns-aware. Declare variable 'reg_vif_num' per-namespace, moves into struct netns_ipv6. At the moment, this variable is only referenced in init_net. Signed-off-by: Benjamin Thery Signed-off-by: David S. Miller --- net/ipv6/ip6mr.c | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) (limited to 'net/ipv6') diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c index 2d2ac23b860..8e693859ea0 100644 --- a/net/ipv6/ip6mr.c +++ b/net/ipv6/ip6mr.c @@ -333,13 +333,13 @@ static struct file_operations ip6mr_mfc_fops = { #endif #ifdef CONFIG_IPV6_PIMSM_V2 -static int reg_vif_num = -1; static int pim6_rcv(struct sk_buff *skb) { struct pimreghdr *pim; struct ipv6hdr *encap; struct net_device *reg_dev = NULL; + int reg_vif_num = init_net.ipv6.mroute_reg_vif_num; if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(*encap))) goto drop; @@ -401,7 +401,7 @@ static int reg_vif_xmit(struct sk_buff *skb, struct net_device *dev) read_lock(&mrt_lock); dev->stats.tx_bytes += skb->len; dev->stats.tx_packets++; - ip6mr_cache_report(skb, reg_vif_num, MRT6MSG_WHOLEPKT); + ip6mr_cache_report(skb, init_net.ipv6.mroute_reg_vif_num, MRT6MSG_WHOLEPKT); read_unlock(&mrt_lock); kfree_skb(skb); return 0; @@ -473,8 +473,8 @@ static int mif6_delete(int vifi) } #ifdef CONFIG_IPV6_PIMSM_V2 - if (vifi == reg_vif_num) - reg_vif_num = -1; + if (vifi == init_net.ipv6.mroute_reg_vif_num) + init_net.ipv6.mroute_reg_vif_num = -1; #endif if (vifi + 1 == init_net.ipv6.maxvif) { @@ -610,7 +610,7 @@ static int mif6_add(struct mif6ctl *vifc, int mrtsock) * Special Purpose VIF in PIM * All the packets will be sent to the daemon */ - if (reg_vif_num >= 0) + if (init_net.ipv6.mroute_reg_vif_num >= 0) return -EADDRINUSE; dev = ip6mr_reg_vif(); if (!dev) @@ -658,7 +658,7 @@ static int mif6_add(struct mif6ctl *vifc, int mrtsock) v->dev = dev; #ifdef CONFIG_IPV6_PIMSM_V2 if (v->flags & MIFF_REGISTER) - reg_vif_num = vifi; + init_net.ipv6.mroute_reg_vif_num = vifi; #endif if (vifi + 1 > init_net.ipv6.maxvif) init_net.ipv6.maxvif = vifi + 1; @@ -777,7 +777,7 @@ static int ip6mr_cache_report(struct sk_buff *pkt, mifi_t mifi, int assert) msg = (struct mrt6msg *)skb_transport_header(skb); msg->im6_mbz = 0; msg->im6_msgtype = MRT6MSG_WHOLEPKT; - msg->im6_mif = reg_vif_num; + msg->im6_mif = init_net.ipv6.mroute_reg_vif_num; msg->im6_pad = 0; ipv6_addr_copy(&msg->im6_src, &ipv6_hdr(pkt)->saddr); ipv6_addr_copy(&msg->im6_dst, &ipv6_hdr(pkt)->daddr); @@ -964,7 +964,6 @@ static struct notifier_block ip6_mr_notifier = { static int __net_init ip6mr_net_init(struct net *net) { int err = 0; - net->ipv6.vif6_table = kcalloc(MAXMIFS, sizeof(struct mif_device), GFP_KERNEL); if (!net->ipv6.vif6_table) { @@ -980,6 +979,10 @@ static int __net_init ip6mr_net_init(struct net *net) err = -ENOMEM; goto fail_mfc6_cache; } + +#ifdef CONFIG_IPV6_PIMSM_V2 + net->ipv6.mroute_reg_vif_num = -1; +#endif return 0; fail_mfc6_cache: -- cgit v1.2.3 From 8b90fc7e5b43aaef941044a4785a42439015b539 Mon Sep 17 00:00:00 2001 From: Benjamin Thery Date: Wed, 10 Dec 2008 16:29:48 -0800 Subject: netns: ip6mr: declare ip6mr /proc/net entries per-namespace Declare IPv6 multicast forwarding /proc/net entries per-namespace: /proc/net/ip6_mr_vif /proc/net/ip6_mr_cache Changelog ========= V2: * In routine ipmr_mfc_seq_idx(), only match entries belonging to current netns in mfc_unres_queue list. Signed-off-by: Benjamin Thery Signed-off-by: David S. Miller --- net/ipv6/ip6mr.c | 107 +++++++++++++++++++++++++++++++------------------------ 1 file changed, 61 insertions(+), 46 deletions(-) (limited to 'net/ipv6') diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c index 8e693859ea0..d619faf68d9 100644 --- a/net/ipv6/ip6mr.c +++ b/net/ipv6/ip6mr.c @@ -90,19 +90,21 @@ static struct timer_list ipmr_expire_timer; #ifdef CONFIG_PROC_FS struct ipmr_mfc_iter { + struct seq_net_private p; struct mfc6_cache **cache; int ct; }; -static struct mfc6_cache *ipmr_mfc_seq_idx(struct ipmr_mfc_iter *it, loff_t pos) +static struct mfc6_cache *ipmr_mfc_seq_idx(struct net *net, + struct ipmr_mfc_iter *it, loff_t pos) { struct mfc6_cache *mfc; - it->cache = init_net.ipv6.mfc6_cache_array; + it->cache = net->ipv6.mfc6_cache_array; read_lock(&mrt_lock); for (it->ct = 0; it->ct < MFC6_LINES; it->ct++) - for (mfc = init_net.ipv6.mfc6_cache_array[it->ct]; + for (mfc = net->ipv6.mfc6_cache_array[it->ct]; mfc; mfc = mfc->next) if (pos-- == 0) return mfc; @@ -111,7 +113,8 @@ static struct mfc6_cache *ipmr_mfc_seq_idx(struct ipmr_mfc_iter *it, loff_t pos) it->cache = &mfc_unres_queue; spin_lock_bh(&mfc_unres_lock); for (mfc = mfc_unres_queue; mfc; mfc = mfc->next) - if (pos-- == 0) + if (net_eq(mfc6_net(mfc), net) && + pos-- == 0) return mfc; spin_unlock_bh(&mfc_unres_lock); @@ -127,17 +130,19 @@ static struct mfc6_cache *ipmr_mfc_seq_idx(struct ipmr_mfc_iter *it, loff_t pos) */ struct ipmr_vif_iter { + struct seq_net_private p; int ct; }; -static struct mif_device *ip6mr_vif_seq_idx(struct ipmr_vif_iter *iter, +static struct mif_device *ip6mr_vif_seq_idx(struct net *net, + struct ipmr_vif_iter *iter, loff_t pos) { - for (iter->ct = 0; iter->ct < init_net.ipv6.maxvif; ++iter->ct) { - if (!MIF_EXISTS(&init_net, iter->ct)) + for (iter->ct = 0; iter->ct < net->ipv6.maxvif; ++iter->ct) { + if (!MIF_EXISTS(net, iter->ct)) continue; if (pos-- == 0) - return &init_net.ipv6.vif6_table[iter->ct]; + return &net->ipv6.vif6_table[iter->ct]; } return NULL; } @@ -145,23 +150,26 @@ static struct mif_device *ip6mr_vif_seq_idx(struct ipmr_vif_iter *iter, static void *ip6mr_vif_seq_start(struct seq_file *seq, loff_t *pos) __acquires(mrt_lock) { + struct net *net = seq_file_net(seq); + read_lock(&mrt_lock); - return (*pos ? ip6mr_vif_seq_idx(seq->private, *pos - 1) - : SEQ_START_TOKEN); + return *pos ? ip6mr_vif_seq_idx(net, seq->private, *pos - 1) + : SEQ_START_TOKEN; } static void *ip6mr_vif_seq_next(struct seq_file *seq, void *v, loff_t *pos) { struct ipmr_vif_iter *iter = seq->private; + struct net *net = seq_file_net(seq); ++*pos; if (v == SEQ_START_TOKEN) - return ip6mr_vif_seq_idx(iter, 0); + return ip6mr_vif_seq_idx(net, iter, 0); - while (++iter->ct < init_net.ipv6.maxvif) { - if (!MIF_EXISTS(&init_net, iter->ct)) + while (++iter->ct < net->ipv6.maxvif) { + if (!MIF_EXISTS(net, iter->ct)) continue; - return &init_net.ipv6.vif6_table[iter->ct]; + return &net->ipv6.vif6_table[iter->ct]; } return NULL; } @@ -174,6 +182,8 @@ static void ip6mr_vif_seq_stop(struct seq_file *seq, void *v) static int ip6mr_vif_seq_show(struct seq_file *seq, void *v) { + struct net *net = seq_file_net(seq); + if (v == SEQ_START_TOKEN) { seq_puts(seq, "Interface BytesIn PktsIn BytesOut PktsOut Flags\n"); @@ -183,7 +193,7 @@ static int ip6mr_vif_seq_show(struct seq_file *seq, void *v) seq_printf(seq, "%2td %-10s %8ld %7ld %8ld %7ld %05X\n", - vif - init_net.ipv6.vif6_table, + vif - net->ipv6.vif6_table, name, vif->bytes_in, vif->pkt_in, vif->bytes_out, vif->pkt_out, vif->flags); @@ -200,8 +210,8 @@ static struct seq_operations ip6mr_vif_seq_ops = { static int ip6mr_vif_open(struct inode *inode, struct file *file) { - return seq_open_private(file, &ip6mr_vif_seq_ops, - sizeof(struct ipmr_vif_iter)); + return seq_open_net(inode, file, &ip6mr_vif_seq_ops, + sizeof(struct ipmr_vif_iter)); } static struct file_operations ip6mr_vif_fops = { @@ -209,24 +219,27 @@ static struct file_operations ip6mr_vif_fops = { .open = ip6mr_vif_open, .read = seq_read, .llseek = seq_lseek, - .release = seq_release_private, + .release = seq_release_net, }; static void *ipmr_mfc_seq_start(struct seq_file *seq, loff_t *pos) { - return (*pos ? ipmr_mfc_seq_idx(seq->private, *pos - 1) - : SEQ_START_TOKEN); + struct net *net = seq_file_net(seq); + + return *pos ? ipmr_mfc_seq_idx(net, seq->private, *pos - 1) + : SEQ_START_TOKEN; } static void *ipmr_mfc_seq_next(struct seq_file *seq, void *v, loff_t *pos) { struct mfc6_cache *mfc = v; struct ipmr_mfc_iter *it = seq->private; + struct net *net = seq_file_net(seq); ++*pos; if (v == SEQ_START_TOKEN) - return ipmr_mfc_seq_idx(seq->private, 0); + return ipmr_mfc_seq_idx(net, seq->private, 0); if (mfc->next) return mfc->next; @@ -234,10 +247,10 @@ static void *ipmr_mfc_seq_next(struct seq_file *seq, void *v, loff_t *pos) if (it->cache == &mfc_unres_queue) goto end_of_list; - BUG_ON(it->cache != init_net.ipv6.mfc6_cache_array); + BUG_ON(it->cache != net->ipv6.mfc6_cache_array); while (++it->ct < MFC6_LINES) { - mfc = init_net.ipv6.mfc6_cache_array[it->ct]; + mfc = net->ipv6.mfc6_cache_array[it->ct]; if (mfc) return mfc; } @@ -262,16 +275,18 @@ static void *ipmr_mfc_seq_next(struct seq_file *seq, void *v, loff_t *pos) static void ipmr_mfc_seq_stop(struct seq_file *seq, void *v) { struct ipmr_mfc_iter *it = seq->private; + struct net *net = seq_file_net(seq); if (it->cache == &mfc_unres_queue) spin_unlock_bh(&mfc_unres_lock); - else if (it->cache == init_net.ipv6.mfc6_cache_array) + else if (it->cache == net->ipv6.mfc6_cache_array) read_unlock(&mrt_lock); } static int ipmr_mfc_seq_show(struct seq_file *seq, void *v) { int n; + struct net *net = seq_file_net(seq); if (v == SEQ_START_TOKEN) { seq_puts(seq, @@ -293,7 +308,7 @@ static int ipmr_mfc_seq_show(struct seq_file *seq, void *v) mfc->mfc_un.res.wrong_if); for (n = mfc->mfc_un.res.minvif; n < mfc->mfc_un.res.maxvif; n++) { - if (MIF_EXISTS(&init_net, n) && + if (MIF_EXISTS(net, n) && mfc->mfc_un.res.ttls[n] < 255) seq_printf(seq, " %2d:%-3d", @@ -319,8 +334,8 @@ static struct seq_operations ipmr_mfc_seq_ops = { static int ipmr_mfc_open(struct inode *inode, struct file *file) { - return seq_open_private(file, &ipmr_mfc_seq_ops, - sizeof(struct ipmr_mfc_iter)); + return seq_open_net(inode, file, &ipmr_mfc_seq_ops, + sizeof(struct ipmr_mfc_iter)); } static struct file_operations ip6mr_mfc_fops = { @@ -328,7 +343,7 @@ static struct file_operations ip6mr_mfc_fops = { .open = ipmr_mfc_open, .read = seq_read, .llseek = seq_lseek, - .release = seq_release_private, + .release = seq_release_net, }; #endif @@ -983,8 +998,22 @@ static int __net_init ip6mr_net_init(struct net *net) #ifdef CONFIG_IPV6_PIMSM_V2 net->ipv6.mroute_reg_vif_num = -1; #endif + +#ifdef CONFIG_PROC_FS + err = -ENOMEM; + if (!proc_net_fops_create(net, "ip6_mr_vif", 0, &ip6mr_vif_fops)) + goto proc_vif_fail; + if (!proc_net_fops_create(net, "ip6_mr_cache", 0, &ip6mr_mfc_fops)) + goto proc_cache_fail; +#endif return 0; +#ifdef CONFIG_PROC_FS +proc_cache_fail: + proc_net_remove(net, "ip6_mr_vif"); +proc_vif_fail: + kfree(net->ipv6.mfc6_cache_array); +#endif fail_mfc6_cache: kfree(net->ipv6.vif6_table); fail: @@ -993,6 +1022,10 @@ fail: static void __net_exit ip6mr_net_exit(struct net *net) { +#ifdef CONFIG_PROC_FS + proc_net_remove(net, "ip6_mr_cache"); + proc_net_remove(net, "ip6_mr_vif"); +#endif kfree(net->ipv6.mfc6_cache_array); kfree(net->ipv6.vif6_table); } @@ -1021,21 +1054,7 @@ int __init ip6_mr_init(void) err = register_netdevice_notifier(&ip6_mr_notifier); if (err) goto reg_notif_fail; -#ifdef CONFIG_PROC_FS - err = -ENOMEM; - if (!proc_net_fops_create(&init_net, "ip6_mr_vif", 0, &ip6mr_vif_fops)) - goto proc_vif_fail; - if (!proc_net_fops_create(&init_net, "ip6_mr_cache", - 0, &ip6mr_mfc_fops)) - goto proc_cache_fail; -#endif return 0; -#ifdef CONFIG_PROC_FS -proc_cache_fail: - proc_net_remove(&init_net, "ip6_mr_vif"); -proc_vif_fail: - unregister_netdevice_notifier(&ip6_mr_notifier); -#endif reg_notif_fail: del_timer(&ipmr_expire_timer); unregister_pernet_subsys(&ip6mr_net_ops); @@ -1046,10 +1065,6 @@ reg_pernet_fail: void ip6_mr_cleanup(void) { -#ifdef CONFIG_PROC_FS - proc_net_remove(&init_net, "ip6_mr_cache"); - proc_net_remove(&init_net, "ip6_mr_vif"); -#endif unregister_netdevice_notifier(&ip6_mr_notifier); del_timer(&ipmr_expire_timer); unregister_pernet_subsys(&ip6mr_net_ops); -- cgit v1.2.3 From 8229efdaef1e7913ae1712c0ba752f267e5fcd5e Mon Sep 17 00:00:00 2001 From: Benjamin Thery Date: Wed, 10 Dec 2008 16:30:15 -0800 Subject: netns: ip6mr: enable namespace support in ipv6 multicast forwarding code This last patch makes the appropriate changes to use and propagate the network namespace where needed in IPv6 multicast forwarding code. This consists mainly in replacing all the remaining init_net occurences with current netns pointer retrieved from sockets, net devices or mfc6_caches depending on the routines' contexts. Some routines receive a new 'struct net' parameter to propagate the current netns: * ip6mr_get_route * ip6mr_cache_report * ip6mr_cache_find * ip6mr_cache_unresolved * mif6_add/mif6_delete * ip6mr_mfc_add/ip6mr_mfc_delete * ip6mr_reg_vif All the IPv6 multicast forwarding variables moved to struct netns_ipv6 by the previous patches are now referenced in the correct namespace. Changelog: ========== * Take into account the net associated to mfc6_cache when matching entries in mfc_unres_queue list. * Call mroute_clean_tables() in ip6mr_net_exit() to free memory allocated per-namespace. * Call dev_net_set() in ip6mr_reg_vif() to initialize dev->nd_net correctly. Signed-off-by: Benjamin Thery Signed-off-by: David S. Miller --- net/ipv6/ip6mr.c | 226 +++++++++++++++++++++++++++++++------------------------ net/ipv6/route.c | 2 +- 2 files changed, 127 insertions(+), 101 deletions(-) (limited to 'net/ipv6') diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c index d619faf68d9..9eed2422b3e 100644 --- a/net/ipv6/ip6mr.c +++ b/net/ipv6/ip6mr.c @@ -77,8 +77,10 @@ static DEFINE_SPINLOCK(mfc_unres_lock); static struct kmem_cache *mrt_cachep __read_mostly; static int ip6_mr_forward(struct sk_buff *skb, struct mfc6_cache *cache); -static int ip6mr_cache_report(struct sk_buff *pkt, mifi_t mifi, int assert); +static int ip6mr_cache_report(struct net *net, struct sk_buff *pkt, + mifi_t mifi, int assert); static int ip6mr_fill_mroute(struct sk_buff *skb, struct mfc6_cache *c, struct rtmsg *rtm); +static void mroute_clean_tables(struct net *net); #ifdef CONFIG_IPV6_PIMSM_V2 static struct inet6_protocol pim6_protocol; @@ -354,7 +356,8 @@ static int pim6_rcv(struct sk_buff *skb) struct pimreghdr *pim; struct ipv6hdr *encap; struct net_device *reg_dev = NULL; - int reg_vif_num = init_net.ipv6.mroute_reg_vif_num; + struct net *net = dev_net(skb->dev); + int reg_vif_num = net->ipv6.mroute_reg_vif_num; if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(*encap))) goto drop; @@ -377,7 +380,7 @@ static int pim6_rcv(struct sk_buff *skb) read_lock(&mrt_lock); if (reg_vif_num >= 0) - reg_dev = init_net.ipv6.vif6_table[reg_vif_num].dev; + reg_dev = net->ipv6.vif6_table[reg_vif_num].dev; if (reg_dev) dev_hold(reg_dev); read_unlock(&mrt_lock); @@ -413,10 +416,13 @@ static struct inet6_protocol pim6_protocol = { static int reg_vif_xmit(struct sk_buff *skb, struct net_device *dev) { + struct net *net = dev_net(dev); + read_lock(&mrt_lock); dev->stats.tx_bytes += skb->len; dev->stats.tx_packets++; - ip6mr_cache_report(skb, init_net.ipv6.mroute_reg_vif_num, MRT6MSG_WHOLEPKT); + ip6mr_cache_report(net, skb, net->ipv6.mroute_reg_vif_num, + MRT6MSG_WHOLEPKT); read_unlock(&mrt_lock); kfree_skb(skb); return 0; @@ -435,7 +441,7 @@ static void reg_vif_setup(struct net_device *dev) dev->destructor = free_netdev; } -static struct net_device *ip6mr_reg_vif(void) +static struct net_device *ip6mr_reg_vif(struct net *net) { struct net_device *dev; @@ -443,6 +449,8 @@ static struct net_device *ip6mr_reg_vif(void) if (dev == NULL) return NULL; + dev_net_set(dev, net); + if (register_netdevice(dev)) { free_netdev(dev); return NULL; @@ -469,14 +477,14 @@ failure: * Delete a VIF entry */ -static int mif6_delete(int vifi) +static int mif6_delete(struct net *net, int vifi) { struct mif_device *v; struct net_device *dev; - if (vifi < 0 || vifi >= init_net.ipv6.maxvif) + if (vifi < 0 || vifi >= net->ipv6.maxvif) return -EADDRNOTAVAIL; - v = &init_net.ipv6.vif6_table[vifi]; + v = &net->ipv6.vif6_table[vifi]; write_lock_bh(&mrt_lock); dev = v->dev; @@ -488,17 +496,17 @@ static int mif6_delete(int vifi) } #ifdef CONFIG_IPV6_PIMSM_V2 - if (vifi == init_net.ipv6.mroute_reg_vif_num) - init_net.ipv6.mroute_reg_vif_num = -1; + if (vifi == net->ipv6.mroute_reg_vif_num) + net->ipv6.mroute_reg_vif_num = -1; #endif - if (vifi + 1 == init_net.ipv6.maxvif) { + if (vifi + 1 == net->ipv6.maxvif) { int tmp; for (tmp = vifi - 1; tmp >= 0; tmp--) { - if (MIF_EXISTS(&init_net, tmp)) + if (MIF_EXISTS(net, tmp)) break; } - init_net.ipv6.maxvif = tmp + 1; + net->ipv6.maxvif = tmp + 1; } write_unlock_bh(&mrt_lock); @@ -525,8 +533,9 @@ static inline void ip6mr_cache_free(struct mfc6_cache *c) static void ip6mr_destroy_unres(struct mfc6_cache *c) { struct sk_buff *skb; + struct net *net = mfc6_net(c); - atomic_dec(&init_net.ipv6.cache_resolve_queue_len); + atomic_dec(&net->ipv6.cache_resolve_queue_len); while((skb = skb_dequeue(&c->mfc_un.unres.unresolved)) != NULL) { if (ipv6_hdr(skb)->version == 0) { @@ -535,7 +544,7 @@ static void ip6mr_destroy_unres(struct mfc6_cache *c) nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr)); skb_trim(skb, nlh->nlmsg_len); ((struct nlmsgerr *)NLMSG_DATA(nlh))->error = -ETIMEDOUT; - rtnl_unicast(skb, &init_net, NETLINK_CB(skb).pid); + rtnl_unicast(skb, net, NETLINK_CB(skb).pid); } else kfree_skb(skb); } @@ -590,13 +599,14 @@ static void ipmr_expire_process(unsigned long dummy) static void ip6mr_update_thresholds(struct mfc6_cache *cache, unsigned char *ttls) { int vifi; + struct net *net = mfc6_net(cache); cache->mfc_un.res.minvif = MAXMIFS; cache->mfc_un.res.maxvif = 0; memset(cache->mfc_un.res.ttls, 255, MAXMIFS); - for (vifi = 0; vifi < init_net.ipv6.maxvif; vifi++) { - if (MIF_EXISTS(&init_net, vifi) && + for (vifi = 0; vifi < net->ipv6.maxvif; vifi++) { + if (MIF_EXISTS(net, vifi) && ttls[vifi] && ttls[vifi] < 255) { cache->mfc_un.res.ttls[vifi] = ttls[vifi]; if (cache->mfc_un.res.minvif > vifi) @@ -607,15 +617,15 @@ static void ip6mr_update_thresholds(struct mfc6_cache *cache, unsigned char *ttl } } -static int mif6_add(struct mif6ctl *vifc, int mrtsock) +static int mif6_add(struct net *net, struct mif6ctl *vifc, int mrtsock) { int vifi = vifc->mif6c_mifi; - struct mif_device *v = &init_net.ipv6.vif6_table[vifi]; + struct mif_device *v = &net->ipv6.vif6_table[vifi]; struct net_device *dev; int err; /* Is vif busy ? */ - if (MIF_EXISTS(&init_net, vifi)) + if (MIF_EXISTS(net, vifi)) return -EADDRINUSE; switch (vifc->mif6c_flags) { @@ -625,9 +635,9 @@ static int mif6_add(struct mif6ctl *vifc, int mrtsock) * Special Purpose VIF in PIM * All the packets will be sent to the daemon */ - if (init_net.ipv6.mroute_reg_vif_num >= 0) + if (net->ipv6.mroute_reg_vif_num >= 0) return -EADDRINUSE; - dev = ip6mr_reg_vif(); + dev = ip6mr_reg_vif(net); if (!dev) return -ENOBUFS; err = dev_set_allmulti(dev, 1); @@ -639,7 +649,7 @@ static int mif6_add(struct mif6ctl *vifc, int mrtsock) break; #endif case 0: - dev = dev_get_by_index(&init_net, vifc->mif6c_pifi); + dev = dev_get_by_index(net, vifc->mif6c_pifi); if (!dev) return -EADDRNOTAVAIL; err = dev_set_allmulti(dev, 1); @@ -673,20 +683,22 @@ static int mif6_add(struct mif6ctl *vifc, int mrtsock) v->dev = dev; #ifdef CONFIG_IPV6_PIMSM_V2 if (v->flags & MIFF_REGISTER) - init_net.ipv6.mroute_reg_vif_num = vifi; + net->ipv6.mroute_reg_vif_num = vifi; #endif - if (vifi + 1 > init_net.ipv6.maxvif) - init_net.ipv6.maxvif = vifi + 1; + if (vifi + 1 > net->ipv6.maxvif) + net->ipv6.maxvif = vifi + 1; write_unlock_bh(&mrt_lock); return 0; } -static struct mfc6_cache *ip6mr_cache_find(struct in6_addr *origin, struct in6_addr *mcastgrp) +static struct mfc6_cache *ip6mr_cache_find(struct net *net, + struct in6_addr *origin, + struct in6_addr *mcastgrp) { int line = MFC6_HASH(mcastgrp, origin); struct mfc6_cache *c; - for (c = init_net.ipv6.mfc6_cache_array[line]; c; c = c->next) { + for (c = net->ipv6.mfc6_cache_array[line]; c; c = c->next) { if (ipv6_addr_equal(&c->mf6c_origin, origin) && ipv6_addr_equal(&c->mf6c_mcastgrp, mcastgrp)) break; @@ -743,7 +755,7 @@ static void ip6mr_cache_resolve(struct mfc6_cache *uc, struct mfc6_cache *c) skb_trim(skb, nlh->nlmsg_len); ((struct nlmsgerr *)NLMSG_DATA(nlh))->error = -EMSGSIZE; } - err = rtnl_unicast(skb, &init_net, NETLINK_CB(skb).pid); + err = rtnl_unicast(skb, mfc6_net(uc), NETLINK_CB(skb).pid); } else ip6_mr_forward(skb, c); } @@ -756,7 +768,8 @@ static void ip6mr_cache_resolve(struct mfc6_cache *uc, struct mfc6_cache *c) * Called under mrt_lock. */ -static int ip6mr_cache_report(struct sk_buff *pkt, mifi_t mifi, int assert) +static int ip6mr_cache_report(struct net *net, struct sk_buff *pkt, mifi_t mifi, + int assert) { struct sk_buff *skb; struct mrt6msg *msg; @@ -792,7 +805,7 @@ static int ip6mr_cache_report(struct sk_buff *pkt, mifi_t mifi, int assert) msg = (struct mrt6msg *)skb_transport_header(skb); msg->im6_mbz = 0; msg->im6_msgtype = MRT6MSG_WHOLEPKT; - msg->im6_mif = init_net.ipv6.mroute_reg_vif_num; + msg->im6_mif = net->ipv6.mroute_reg_vif_num; msg->im6_pad = 0; ipv6_addr_copy(&msg->im6_src, &ipv6_hdr(pkt)->saddr); ipv6_addr_copy(&msg->im6_dst, &ipv6_hdr(pkt)->daddr); @@ -829,7 +842,7 @@ static int ip6mr_cache_report(struct sk_buff *pkt, mifi_t mifi, int assert) skb_pull(skb, sizeof(struct ipv6hdr)); } - if (init_net.ipv6.mroute6_sk == NULL) { + if (net->ipv6.mroute6_sk == NULL) { kfree_skb(skb); return -EINVAL; } @@ -837,7 +850,7 @@ static int ip6mr_cache_report(struct sk_buff *pkt, mifi_t mifi, int assert) /* * Deliver to user space multicast routing algorithms */ - ret = sock_queue_rcv_skb(init_net.ipv6.mroute6_sk, skb); + ret = sock_queue_rcv_skb(net->ipv6.mroute6_sk, skb); if (ret < 0) { if (net_ratelimit()) printk(KERN_WARNING "mroute6: pending queue full, dropping entries.\n"); @@ -852,14 +865,14 @@ static int ip6mr_cache_report(struct sk_buff *pkt, mifi_t mifi, int assert) */ static int -ip6mr_cache_unresolved(mifi_t mifi, struct sk_buff *skb) +ip6mr_cache_unresolved(struct net *net, mifi_t mifi, struct sk_buff *skb) { int err; struct mfc6_cache *c; spin_lock_bh(&mfc_unres_lock); for (c = mfc_unres_queue; c; c = c->next) { - if (net_eq(mfc6_net(c), &init_net) && + if (net_eq(mfc6_net(c), net) && ipv6_addr_equal(&c->mf6c_mcastgrp, &ipv6_hdr(skb)->daddr) && ipv6_addr_equal(&c->mf6c_origin, &ipv6_hdr(skb)->saddr)) break; @@ -870,8 +883,8 @@ ip6mr_cache_unresolved(mifi_t mifi, struct sk_buff *skb) * Create a new entry if allowable */ - if (atomic_read(&init_net.ipv6.cache_resolve_queue_len) >= 10 || - (c = ip6mr_cache_alloc_unres(&init_net)) == NULL) { + if (atomic_read(&net->ipv6.cache_resolve_queue_len) >= 10 || + (c = ip6mr_cache_alloc_unres(net)) == NULL) { spin_unlock_bh(&mfc_unres_lock); kfree_skb(skb); @@ -888,7 +901,8 @@ ip6mr_cache_unresolved(mifi_t mifi, struct sk_buff *skb) /* * Reflect first query at pim6sd */ - if ((err = ip6mr_cache_report(skb, mifi, MRT6MSG_NOCACHE)) < 0) { + err = ip6mr_cache_report(net, skb, mifi, MRT6MSG_NOCACHE); + if (err < 0) { /* If the report failed throw the cache entry out - Brad Parker */ @@ -899,7 +913,7 @@ ip6mr_cache_unresolved(mifi_t mifi, struct sk_buff *skb) return err; } - atomic_inc(&init_net.ipv6.cache_resolve_queue_len); + atomic_inc(&net->ipv6.cache_resolve_queue_len); c->next = mfc_unres_queue; mfc_unres_queue = c; @@ -925,14 +939,14 @@ ip6mr_cache_unresolved(mifi_t mifi, struct sk_buff *skb) * MFC6 cache manipulation by user space */ -static int ip6mr_mfc_delete(struct mf6cctl *mfc) +static int ip6mr_mfc_delete(struct net *net, struct mf6cctl *mfc) { int line; struct mfc6_cache *c, **cp; line = MFC6_HASH(&mfc->mf6cc_mcastgrp.sin6_addr, &mfc->mf6cc_origin.sin6_addr); - for (cp = &init_net.ipv6.mfc6_cache_array[line]; + for (cp = &net->ipv6.mfc6_cache_array[line]; (c = *cp) != NULL; cp = &c->next) { if (ipv6_addr_equal(&c->mf6c_origin, &mfc->mf6cc_origin.sin6_addr) && ipv6_addr_equal(&c->mf6c_mcastgrp, &mfc->mf6cc_mcastgrp.sin6_addr)) { @@ -951,19 +965,17 @@ static int ip6mr_device_event(struct notifier_block *this, unsigned long event, void *ptr) { struct net_device *dev = ptr; + struct net *net = dev_net(dev); struct mif_device *v; int ct; - if (!net_eq(dev_net(dev), &init_net)) - return NOTIFY_DONE; - if (event != NETDEV_UNREGISTER) return NOTIFY_DONE; - v = &init_net.ipv6.vif6_table[0]; - for (ct = 0; ct < init_net.ipv6.maxvif; ct++, v++) { + v = &net->ipv6.vif6_table[0]; + for (ct = 0; ct < net->ipv6.maxvif; ct++, v++) { if (v->dev == dev) - mif6_delete(ct); + mif6_delete(net, ct); } return NOTIFY_DONE; } @@ -1026,6 +1038,7 @@ static void __net_exit ip6mr_net_exit(struct net *net) proc_net_remove(net, "ip6_mr_cache"); proc_net_remove(net, "ip6_mr_vif"); #endif + mroute_clean_tables(net); kfree(net->ipv6.mfc6_cache_array); kfree(net->ipv6.vif6_table); } @@ -1071,7 +1084,7 @@ void ip6_mr_cleanup(void) kmem_cache_destroy(mrt_cachep); } -static int ip6mr_mfc_add(struct mf6cctl *mfc, int mrtsock) +static int ip6mr_mfc_add(struct net *net, struct mf6cctl *mfc, int mrtsock) { int line; struct mfc6_cache *uc, *c, **cp; @@ -1087,7 +1100,7 @@ static int ip6mr_mfc_add(struct mf6cctl *mfc, int mrtsock) line = MFC6_HASH(&mfc->mf6cc_mcastgrp.sin6_addr, &mfc->mf6cc_origin.sin6_addr); - for (cp = &init_net.ipv6.mfc6_cache_array[line]; + for (cp = &net->ipv6.mfc6_cache_array[line]; (c = *cp) != NULL; cp = &c->next) { if (ipv6_addr_equal(&c->mf6c_origin, &mfc->mf6cc_origin.sin6_addr) && ipv6_addr_equal(&c->mf6c_mcastgrp, &mfc->mf6cc_mcastgrp.sin6_addr)) @@ -1107,7 +1120,7 @@ static int ip6mr_mfc_add(struct mf6cctl *mfc, int mrtsock) if (!ipv6_addr_is_multicast(&mfc->mf6cc_mcastgrp.sin6_addr)) return -EINVAL; - c = ip6mr_cache_alloc(&init_net); + c = ip6mr_cache_alloc(net); if (c == NULL) return -ENOMEM; @@ -1119,8 +1132,8 @@ static int ip6mr_mfc_add(struct mf6cctl *mfc, int mrtsock) c->mfc_flags |= MFC_STATIC; write_lock_bh(&mrt_lock); - c->next = init_net.ipv6.mfc6_cache_array[line]; - init_net.ipv6.mfc6_cache_array[line] = c; + c->next = net->ipv6.mfc6_cache_array[line]; + net->ipv6.mfc6_cache_array[line] = c; write_unlock_bh(&mrt_lock); /* @@ -1130,11 +1143,11 @@ static int ip6mr_mfc_add(struct mf6cctl *mfc, int mrtsock) spin_lock_bh(&mfc_unres_lock); for (cp = &mfc_unres_queue; (uc = *cp) != NULL; cp = &uc->next) { - if (net_eq(mfc6_net(uc), &init_net) && + if (net_eq(mfc6_net(uc), net) && ipv6_addr_equal(&uc->mf6c_origin, &c->mf6c_origin) && ipv6_addr_equal(&uc->mf6c_mcastgrp, &c->mf6c_mcastgrp)) { *cp = uc->next; - atomic_dec(&init_net.ipv6.cache_resolve_queue_len); + atomic_dec(&net->ipv6.cache_resolve_queue_len); break; } } @@ -1153,16 +1166,16 @@ static int ip6mr_mfc_add(struct mf6cctl *mfc, int mrtsock) * Close the multicast socket, and clear the vif tables etc */ -static void mroute_clean_tables(struct sock *sk) +static void mroute_clean_tables(struct net *net) { int i; /* * Shut down all active vif entries */ - for (i = 0; i < init_net.ipv6.maxvif; i++) { - if (!(init_net.ipv6.vif6_table[i].flags & VIFF_STATIC)) - mif6_delete(i); + for (i = 0; i < net->ipv6.maxvif; i++) { + if (!(net->ipv6.vif6_table[i].flags & VIFF_STATIC)) + mif6_delete(net, i); } /* @@ -1171,7 +1184,7 @@ static void mroute_clean_tables(struct sock *sk) for (i = 0; i < MFC6_LINES; i++) { struct mfc6_cache *c, **cp; - cp = &init_net.ipv6.mfc6_cache_array[i]; + cp = &net->ipv6.mfc6_cache_array[i]; while ((c = *cp) != NULL) { if (c->mfc_flags & MFC_STATIC) { cp = &c->next; @@ -1185,13 +1198,13 @@ static void mroute_clean_tables(struct sock *sk) } } - if (atomic_read(&init_net.ipv6.cache_resolve_queue_len) != 0) { + if (atomic_read(&net->ipv6.cache_resolve_queue_len) != 0) { struct mfc6_cache *c, **cp; spin_lock_bh(&mfc_unres_lock); cp = &mfc_unres_queue; while ((c = *cp) != NULL) { - if (!net_eq(mfc6_net(c), &init_net)) { + if (!net_eq(mfc6_net(c), net)) { cp = &c->next; continue; } @@ -1205,11 +1218,12 @@ static void mroute_clean_tables(struct sock *sk) static int ip6mr_sk_init(struct sock *sk) { int err = 0; + struct net *net = sock_net(sk); rtnl_lock(); write_lock_bh(&mrt_lock); - if (likely(init_net.ipv6.mroute6_sk == NULL)) - init_net.ipv6.mroute6_sk = sk; + if (likely(net->ipv6.mroute6_sk == NULL)) + net->ipv6.mroute6_sk = sk; else err = -EADDRINUSE; write_unlock_bh(&mrt_lock); @@ -1222,14 +1236,15 @@ static int ip6mr_sk_init(struct sock *sk) int ip6mr_sk_done(struct sock *sk) { int err = 0; + struct net *net = sock_net(sk); rtnl_lock(); - if (sk == init_net.ipv6.mroute6_sk) { + if (sk == net->ipv6.mroute6_sk) { write_lock_bh(&mrt_lock); - init_net.ipv6.mroute6_sk = NULL; + net->ipv6.mroute6_sk = NULL; write_unlock_bh(&mrt_lock); - mroute_clean_tables(sk); + mroute_clean_tables(net); } else err = -EACCES; rtnl_unlock(); @@ -1250,9 +1265,10 @@ int ip6_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, int struct mif6ctl vif; struct mf6cctl mfc; mifi_t mifi; + struct net *net = sock_net(sk); if (optname != MRT6_INIT) { - if (sk != init_net.ipv6.mroute6_sk && !capable(CAP_NET_ADMIN)) + if (sk != net->ipv6.mroute6_sk && !capable(CAP_NET_ADMIN)) return -EACCES; } @@ -1277,7 +1293,7 @@ int ip6_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, int if (vif.mif6c_mifi >= MAXMIFS) return -ENFILE; rtnl_lock(); - ret = mif6_add(&vif, sk == init_net.ipv6.mroute6_sk); + ret = mif6_add(net, &vif, sk == net->ipv6.mroute6_sk); rtnl_unlock(); return ret; @@ -1287,7 +1303,7 @@ int ip6_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, int if (copy_from_user(&mifi, optval, sizeof(mifi_t))) return -EFAULT; rtnl_lock(); - ret = mif6_delete(mifi); + ret = mif6_delete(net, mifi); rtnl_unlock(); return ret; @@ -1303,9 +1319,10 @@ int ip6_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, int return -EFAULT; rtnl_lock(); if (optname == MRT6_DEL_MFC) - ret = ip6mr_mfc_delete(&mfc); + ret = ip6mr_mfc_delete(net, &mfc); else - ret = ip6mr_mfc_add(&mfc, sk == init_net.ipv6.mroute6_sk); + ret = ip6mr_mfc_add(net, &mfc, + sk == net->ipv6.mroute6_sk); rtnl_unlock(); return ret; @@ -1317,7 +1334,7 @@ int ip6_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, int int v; if (get_user(v, (int __user *)optval)) return -EFAULT; - init_net.ipv6.mroute_do_assert = !!v; + net->ipv6.mroute_do_assert = !!v; return 0; } @@ -1330,10 +1347,10 @@ int ip6_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, int v = !!v; rtnl_lock(); ret = 0; - if (v != init_net.ipv6.mroute_do_pim) { - init_net.ipv6.mroute_do_pim = v; - init_net.ipv6.mroute_do_assert = v; - if (init_net.ipv6.mroute_do_pim) + if (v != net->ipv6.mroute_do_pim) { + net->ipv6.mroute_do_pim = v; + net->ipv6.mroute_do_assert = v; + if (net->ipv6.mroute_do_pim) ret = inet6_add_protocol(&pim6_protocol, IPPROTO_PIM); else @@ -1365,6 +1382,7 @@ int ip6_mroute_getsockopt(struct sock *sk, int optname, char __user *optval, { int olr; int val; + struct net *net = sock_net(sk); switch (optname) { case MRT6_VERSION: @@ -1372,11 +1390,11 @@ int ip6_mroute_getsockopt(struct sock *sk, int optname, char __user *optval, break; #ifdef CONFIG_IPV6_PIMSM_V2 case MRT6_PIM: - val = init_net.ipv6.mroute_do_pim; + val = net->ipv6.mroute_do_pim; break; #endif case MRT6_ASSERT: - val = init_net.ipv6.mroute_do_assert; + val = net->ipv6.mroute_do_assert; break; default: return -ENOPROTOOPT; @@ -1406,16 +1424,17 @@ int ip6mr_ioctl(struct sock *sk, int cmd, void __user *arg) struct sioc_mif_req6 vr; struct mif_device *vif; struct mfc6_cache *c; + struct net *net = sock_net(sk); switch (cmd) { case SIOCGETMIFCNT_IN6: if (copy_from_user(&vr, arg, sizeof(vr))) return -EFAULT; - if (vr.mifi >= init_net.ipv6.maxvif) + if (vr.mifi >= net->ipv6.maxvif) return -EINVAL; read_lock(&mrt_lock); - vif = &init_net.ipv6.vif6_table[vr.mifi]; - if (MIF_EXISTS(&init_net, vr.mifi)) { + vif = &net->ipv6.vif6_table[vr.mifi]; + if (MIF_EXISTS(net, vr.mifi)) { vr.icount = vif->pkt_in; vr.ocount = vif->pkt_out; vr.ibytes = vif->bytes_in; @@ -1433,7 +1452,7 @@ int ip6mr_ioctl(struct sock *sk, int cmd, void __user *arg) return -EFAULT; read_lock(&mrt_lock); - c = ip6mr_cache_find(&sr.src.sin6_addr, &sr.grp.sin6_addr); + c = ip6mr_cache_find(net, &sr.src.sin6_addr, &sr.grp.sin6_addr); if (c) { sr.pktcnt = c->mfc_un.res.pkt; sr.bytecnt = c->mfc_un.res.bytes; @@ -1466,7 +1485,8 @@ static inline int ip6mr_forward2_finish(struct sk_buff *skb) static int ip6mr_forward2(struct sk_buff *skb, struct mfc6_cache *c, int vifi) { struct ipv6hdr *ipv6h; - struct mif_device *vif = &init_net.ipv6.vif6_table[vifi]; + struct net *net = mfc6_net(c); + struct mif_device *vif = &net->ipv6.vif6_table[vifi]; struct net_device *dev; struct dst_entry *dst; struct flowi fl; @@ -1480,7 +1500,7 @@ static int ip6mr_forward2(struct sk_buff *skb, struct mfc6_cache *c, int vifi) vif->bytes_out += skb->len; vif->dev->stats.tx_bytes += skb->len; vif->dev->stats.tx_packets++; - ip6mr_cache_report(skb, vifi, MRT6MSG_WHOLEPKT); + ip6mr_cache_report(net, skb, vifi, MRT6MSG_WHOLEPKT); kfree_skb(skb); return 0; } @@ -1495,7 +1515,7 @@ static int ip6mr_forward2(struct sk_buff *skb, struct mfc6_cache *c, int vifi) } }; - dst = ip6_route_output(&init_net, NULL, &fl); + dst = ip6_route_output(net, NULL, &fl); if (!dst) goto out_free; @@ -1538,9 +1558,10 @@ out_free: static int ip6mr_find_vif(struct net_device *dev) { + struct net *net = dev_net(dev); int ct; - for (ct = init_net.ipv6.maxvif - 1; ct >= 0; ct--) { - if (init_net.ipv6.vif6_table[ct].dev == dev) + for (ct = net->ipv6.maxvif - 1; ct >= 0; ct--) { + if (net->ipv6.vif6_table[ct].dev == dev) break; } return ct; @@ -1550,6 +1571,7 @@ static int ip6_mr_forward(struct sk_buff *skb, struct mfc6_cache *cache) { int psend = -1; int vif, ct; + struct net *net = mfc6_net(cache); vif = cache->mf6c_parent; cache->mfc_un.res.pkt++; @@ -1558,30 +1580,30 @@ static int ip6_mr_forward(struct sk_buff *skb, struct mfc6_cache *cache) /* * Wrong interface: drop packet and (maybe) send PIM assert. */ - if (init_net.ipv6.vif6_table[vif].dev != skb->dev) { + if (net->ipv6.vif6_table[vif].dev != skb->dev) { int true_vifi; cache->mfc_un.res.wrong_if++; true_vifi = ip6mr_find_vif(skb->dev); - if (true_vifi >= 0 && init_net.ipv6.mroute_do_assert && + if (true_vifi >= 0 && net->ipv6.mroute_do_assert && /* pimsm uses asserts, when switching from RPT to SPT, so that we cannot check that packet arrived on an oif. It is bad, but otherwise we would need to move pretty large chunk of pimd to kernel. Ough... --ANK */ - (init_net.ipv6.mroute_do_pim || + (net->ipv6.mroute_do_pim || cache->mfc_un.res.ttls[true_vifi] < 255) && time_after(jiffies, cache->mfc_un.res.last_assert + MFC_ASSERT_THRESH)) { cache->mfc_un.res.last_assert = jiffies; - ip6mr_cache_report(skb, true_vifi, MRT6MSG_WRONGMIF); + ip6mr_cache_report(net, skb, true_vifi, MRT6MSG_WRONGMIF); } goto dont_forward; } - init_net.ipv6.vif6_table[vif].pkt_in++; - init_net.ipv6.vif6_table[vif].bytes_in += skb->len; + net->ipv6.vif6_table[vif].pkt_in++; + net->ipv6.vif6_table[vif].bytes_in += skb->len; /* * Forward the frame @@ -1614,9 +1636,11 @@ dont_forward: int ip6_mr_input(struct sk_buff *skb) { struct mfc6_cache *cache; + struct net *net = dev_net(skb->dev); read_lock(&mrt_lock); - cache = ip6mr_cache_find(&ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr); + cache = ip6mr_cache_find(net, + &ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr); /* * No usable cache entry @@ -1626,7 +1650,7 @@ int ip6_mr_input(struct sk_buff *skb) vif = ip6mr_find_vif(skb->dev); if (vif >= 0) { - int err = ip6mr_cache_unresolved(vif, skb); + int err = ip6mr_cache_unresolved(net, vif, skb); read_unlock(&mrt_lock); return err; @@ -1649,7 +1673,8 @@ ip6mr_fill_mroute(struct sk_buff *skb, struct mfc6_cache *c, struct rtmsg *rtm) { int ct; struct rtnexthop *nhp; - struct net_device *dev = init_net.ipv6.vif6_table[c->mf6c_parent].dev; + struct net *net = mfc6_net(c); + struct net_device *dev = net->ipv6.vif6_table[c->mf6c_parent].dev; u8 *b = skb_tail_pointer(skb); struct rtattr *mp_head; @@ -1665,7 +1690,7 @@ ip6mr_fill_mroute(struct sk_buff *skb, struct mfc6_cache *c, struct rtmsg *rtm) nhp = (struct rtnexthop *)skb_put(skb, RTA_ALIGN(sizeof(*nhp))); nhp->rtnh_flags = 0; nhp->rtnh_hops = c->mfc_un.res.ttls[ct]; - nhp->rtnh_ifindex = init_net.ipv6.vif6_table[ct].dev->ifindex; + nhp->rtnh_ifindex = net->ipv6.vif6_table[ct].dev->ifindex; nhp->rtnh_len = sizeof(*nhp); } } @@ -1679,14 +1704,15 @@ rtattr_failure: return -EMSGSIZE; } -int ip6mr_get_route(struct sk_buff *skb, struct rtmsg *rtm, int nowait) +int ip6mr_get_route(struct net *net, + struct sk_buff *skb, struct rtmsg *rtm, int nowait) { int err; struct mfc6_cache *cache; struct rt6_info *rt = (struct rt6_info *)skb->dst; read_lock(&mrt_lock); - cache = ip6mr_cache_find(&rt->rt6i_src.addr, &rt->rt6i_dst.addr); + cache = ip6mr_cache_find(net, &rt->rt6i_src.addr, &rt->rt6i_dst.addr); if (!cache) { struct sk_buff *skb2; @@ -1729,7 +1755,7 @@ int ip6mr_get_route(struct sk_buff *skb, struct rtmsg *rtm, int nowait) ipv6_addr_copy(&iph->saddr, &rt->rt6i_src.addr); ipv6_addr_copy(&iph->daddr, &rt->rt6i_dst.addr); - err = ip6mr_cache_unresolved(vif, skb2); + err = ip6mr_cache_unresolved(net, vif, skb2); read_unlock(&mrt_lock); return err; diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 9da1ece466a..18c486cf498 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -2194,7 +2194,7 @@ static int rt6_fill_node(struct net *net, if (iif) { #ifdef CONFIG_IPV6_MROUTE if (ipv6_addr_is_multicast(&rt->rt6i_dst.addr)) { - int err = ip6mr_get_route(skb, rtm, nowait); + int err = ip6mr_get_route(net, skb, rtm, nowait); if (err <= 0) { if (!nowait) { if (err == 0) -- cgit v1.2.3 From 5ce1bbb97bf1e6707102d30499e7feaa1e6a2134 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= Date: Sun, 14 Dec 2008 23:13:48 -0800 Subject: xfrm6_tunnel: join error paths using goto MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Ilpo Järvinen Signed-off-by: David S. Miller --- net/ipv6/xfrm6_tunnel.c | 33 ++++++++++++++++----------------- 1 file changed, 16 insertions(+), 17 deletions(-) (limited to 'net/ipv6') diff --git a/net/ipv6/xfrm6_tunnel.c b/net/ipv6/xfrm6_tunnel.c index c2b27813860..80193db224d 100644 --- a/net/ipv6/xfrm6_tunnel.c +++ b/net/ipv6/xfrm6_tunnel.c @@ -345,24 +345,23 @@ static struct xfrm6_tunnel xfrm46_tunnel_handler = { static int __init xfrm6_tunnel_init(void) { if (xfrm_register_type(&xfrm6_tunnel_type, AF_INET6) < 0) - return -EAGAIN; - - if (xfrm6_tunnel_register(&xfrm6_tunnel_handler, AF_INET6)) { - xfrm_unregister_type(&xfrm6_tunnel_type, AF_INET6); - return -EAGAIN; - } - if (xfrm6_tunnel_register(&xfrm46_tunnel_handler, AF_INET)) { - xfrm6_tunnel_deregister(&xfrm6_tunnel_handler, AF_INET6); - xfrm_unregister_type(&xfrm6_tunnel_type, AF_INET6); - return -EAGAIN; - } - if (xfrm6_tunnel_spi_init() < 0) { - xfrm6_tunnel_deregister(&xfrm46_tunnel_handler, AF_INET); - xfrm6_tunnel_deregister(&xfrm6_tunnel_handler, AF_INET6); - xfrm_unregister_type(&xfrm6_tunnel_type, AF_INET6); - return -EAGAIN; - } + goto err; + if (xfrm6_tunnel_register(&xfrm6_tunnel_handler, AF_INET6)) + goto unreg; + if (xfrm6_tunnel_register(&xfrm46_tunnel_handler, AF_INET)) + goto dereg6; + if (xfrm6_tunnel_spi_init() < 0) + goto dereg46; return 0; + +dereg46: + xfrm6_tunnel_deregister(&xfrm46_tunnel_handler, AF_INET); +dereg6: + xfrm6_tunnel_deregister(&xfrm6_tunnel_handler, AF_INET6); +unreg: + xfrm_unregister_type(&xfrm6_tunnel_type, AF_INET6); +err: + return -EAGAIN; } static void __exit xfrm6_tunnel_fini(void) -- cgit v1.2.3 From 448eb71f40120a8fd11ebd58153c271c63e6f862 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= Date: Sun, 14 Dec 2008 23:15:21 -0800 Subject: ipv6/mcast: join error paths using goto MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Ilpo Järvinen Signed-off-by: David S. Miller --- net/ipv6/mcast.c | 25 ++++++++++++++----------- 1 file changed, 14 insertions(+), 11 deletions(-) (limited to 'net/ipv6') diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c index 0f389603283..a51fb33e686 100644 --- a/net/ipv6/mcast.c +++ b/net/ipv6/mcast.c @@ -303,20 +303,23 @@ static struct inet6_dev *ip6_mc_find_dev(struct net *net, dev = dev_get_by_index(net, ifindex); if (!dev) - return NULL; + goto nodev; idev = in6_dev_get(dev); - if (!idev) { - dev_put(dev); - return NULL; - } + if (!idev) + goto release; read_lock_bh(&idev->lock); - if (idev->dead) { - read_unlock_bh(&idev->lock); - in6_dev_put(idev); - dev_put(dev); - return NULL; - } + if (idev->dead) + goto unlock_release; + return idev; + +unlock_release: + read_unlock_bh(&idev->lock); + in6_dev_put(idev); +release: + dev_put(dev); +nodev: + return NULL; } void ipv6_sock_mc_close(struct sock *sk) -- cgit v1.2.3 From 8da73b73ef29d537ba09f29393cd68707833e746 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= Date: Sun, 14 Dec 2008 23:15:49 -0800 Subject: ip6mr: use goto to common label instead of opencoding MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Ilpo Järvinen Signed-off-by: David S. Miller --- net/ipv6/ip6mr.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'net/ipv6') diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c index 9eed2422b3e..3c51b2d827f 100644 --- a/net/ipv6/ip6mr.c +++ b/net/ipv6/ip6mr.c @@ -1501,8 +1501,7 @@ static int ip6mr_forward2(struct sk_buff *skb, struct mfc6_cache *c, int vifi) vif->dev->stats.tx_bytes += skb->len; vif->dev->stats.tx_packets++; ip6mr_cache_report(net, skb, vifi, MRT6MSG_WHOLEPKT); - kfree_skb(skb); - return 0; + goto out_free; } #endif -- cgit v1.2.3 From b24a2516d10751d7ed5afb58420df25370c9dffb Mon Sep 17 00:00:00 2001 From: Yang Hongyang Date: Tue, 16 Dec 2008 02:06:23 -0800 Subject: ipv6: Add IPV6_PKTINFO sticky option support to setsockopt() There are three reasons for me to add this support: 1.When no interface is specified in an IPV6_PKTINFO ancillary data item, the interface specified in an IPV6_PKTINFO sticky optionis is used. RFC3542: 6.7. Summary of Outgoing Interface Selection This document and [RFC-3493] specify various methods that affect the selection of the packet's outgoing interface. This subsection summarizes the ordering among those in order to ensure deterministic behavior. For a given outgoing packet on a given socket, the outgoing interface is determined in the following order: 1. if an interface is specified in an IPV6_PKTINFO ancillary data item, the interface is used. 2. otherwise, if an interface is specified in an IPV6_PKTINFO sticky option, the interface is used. 2.When no IPV6_PKTINFO ancillary data is received,getsockopt() should return the sticky option value which set with setsockopt(). RFC 3542: Issuing getsockopt() for the above options will return the sticky option value i.e., the value set with setsockopt(). If no sticky option value has been set getsockopt() will return the following values: 3.Make the setsockopt implementation POSIX compliant. Signed-off-by: Yang Hongyang Signed-off-by: David S. Miller --- net/ipv6/ipv6_sockglue.c | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) (limited to 'net/ipv6') diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c index 2aa294be0c7..0feaee38bc3 100644 --- a/net/ipv6/ipv6_sockglue.c +++ b/net/ipv6/ipv6_sockglue.c @@ -395,6 +395,28 @@ sticky_done: break; } + case IPV6_PKTINFO: + { + struct in6_pktinfo pkt; + + if (optlen == 0) + goto e_inval; + else if (optlen < sizeof(struct in6_pktinfo) || optval == NULL) + goto e_inval; + + if (copy_from_user(&pkt, optval, optlen)) { + retv = -EFAULT; + break; + } + if (sk->sk_bound_dev_if && pkt.ipi6_ifindex != sk->sk_bound_dev_if) + goto e_inval; + + np->sticky_pktinfo.ipi6_ifindex = pkt.ipi6_ifindex; + ipv6_addr_copy(&np->sticky_pktinfo.ipi6_addr, &pkt.ipi6_addr); + retv = 0; + break; + } + case IPV6_2292PKTOPTIONS: { struct ipv6_txoptions *opt = NULL; -- cgit v1.2.3 From f250dcdac111a8369220b8e192eae6c56dc1098a Mon Sep 17 00:00:00 2001 From: Yang Hongyang Date: Tue, 16 Dec 2008 02:07:45 -0800 Subject: ipv6: fix the return interface index when get it while no message is received When get receiving interface index while no message is received, the the value seted with setsockopt() should be returned. RFC 3542: Issuing getsockopt() for the above options will return the sticky option value i.e., the value set with setsockopt(). If no sticky option value has been set getsockopt() will return the following values: - For the IPV6_PKTINFO option, it will return an in6_pktinfo structure with ipi6_addr being in6addr_any and ipi6_ifindex being zero. Signed-off-by: Yang Hongyang Signed-off-by: David S. Miller --- net/ipv6/ipv6_sockglue.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) (limited to 'net/ipv6') diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c index 0feaee38bc3..eeeaad2e8b5 100644 --- a/net/ipv6/ipv6_sockglue.c +++ b/net/ipv6/ipv6_sockglue.c @@ -938,8 +938,10 @@ static int do_ipv6_getsockopt(struct sock *sk, int level, int optname, } else { if (np->rxopt.bits.rxinfo) { struct in6_pktinfo src_info; - src_info.ipi6_ifindex = np->mcast_oif ? np->mcast_oif : sk->sk_bound_dev_if; - ipv6_addr_copy(&src_info.ipi6_addr, &np->daddr); + src_info.ipi6_ifindex = np->mcast_oif ? np->mcast_oif : + np->sticky_pktinfo.ipi6_ifindex; + np->mcast_oif? ipv6_addr_copy(&src_info.ipi6_addr, &np->daddr) : + ipv6_addr_copy(&src_info.ipi6_addr, &(np->sticky_pktinfo.ipi6_addr)); put_cmsg(&msg, SOL_IPV6, IPV6_PKTINFO, sizeof(src_info), &src_info); } if (np->rxopt.bits.rxhlim) { @@ -948,8 +950,10 @@ static int do_ipv6_getsockopt(struct sock *sk, int level, int optname, } if (np->rxopt.bits.rxoinfo) { struct in6_pktinfo src_info; - src_info.ipi6_ifindex = np->mcast_oif ? np->mcast_oif : sk->sk_bound_dev_if; - ipv6_addr_copy(&src_info.ipi6_addr, &np->daddr); + src_info.ipi6_ifindex = np->mcast_oif ? np->mcast_oif : + np->sticky_pktinfo.ipi6_ifindex; + np->mcast_oif? ipv6_addr_copy(&src_info.ipi6_addr, &np->daddr) : + ipv6_addr_copy(&src_info.ipi6_addr, &(np->sticky_pktinfo.ipi6_addr)); put_cmsg(&msg, SOL_IPV6, IPV6_2292PKTINFO, sizeof(src_info), &src_info); } if (np->rxopt.bits.rxohlim) { -- cgit v1.2.3 From 9f690db7ff4cb32493c0b0b13334cc4f5fd49a6b Mon Sep 17 00:00:00 2001 From: Yang Hongyang Date: Tue, 16 Dec 2008 02:08:29 -0800 Subject: ipv6: fix the outgoing interface selection order in udpv6_sendmsg() 1.When no interface is specified in an IPV6_PKTINFO ancillary data item, the interface specified in an IPV6_PKTINFO sticky optionis is used. RFC3542: 6.7. Summary of Outgoing Interface Selection This document and [RFC-3493] specify various methods that affect the selection of the packet's outgoing interface. This subsection summarizes the ordering among those in order to ensure deterministic behavior. For a given outgoing packet on a given socket, the outgoing interface is determined in the following order: 1. if an interface is specified in an IPV6_PKTINFO ancillary data item, the interface is used. 2. otherwise, if an interface is specified in an IPV6_PKTINFO sticky option, the interface is used. Signed-off-by: Yang Hongyang Signed-off-by: David S. Miller --- net/ipv6/udp.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'net/ipv6') diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 38390dd1963..84b1a296eec 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -793,6 +793,9 @@ do_udp_sendmsg: if (!fl.oif) fl.oif = sk->sk_bound_dev_if; + if (!fl.oif) + fl.oif = np->sticky_pktinfo.ipi6_ifindex; + if (msg->msg_controllen) { opt = &opt_space; memset(opt, 0, sizeof(struct ipv6_txoptions)); -- cgit v1.2.3