From 643162258e57180a33e0ef7f08f0d986fbb5b4b9 Mon Sep 17 00:00:00 2001 From: YOSHIFUJI Hideaki Date: Fri, 28 Jul 2006 18:12:09 +0900 Subject: [IPV6] ADDRCONF: Check payload length for IFA_LOCAL attribute in RTM_{ADD,DEL}MSG message Signed-off-by: YOSHIFUJI Hideaki --- net/ipv6/addrconf.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 2316a4315a1..81702b9ba5b 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -2853,7 +2853,8 @@ inet6_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) pfx = RTA_DATA(rta[IFA_ADDRESS-1]); } if (rta[IFA_LOCAL-1]) { - if (pfx && memcmp(pfx, RTA_DATA(rta[IFA_LOCAL-1]), sizeof(*pfx))) + if (RTA_PAYLOAD(rta[IFA_LOCAL-1]) < sizeof(*pfx) || + (pfx && memcmp(pfx, RTA_DATA(rta[IFA_LOCAL-1]), sizeof(*pfx)))) return -EINVAL; pfx = RTA_DATA(rta[IFA_LOCAL-1]); } @@ -2877,7 +2878,8 @@ inet6_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) pfx = RTA_DATA(rta[IFA_ADDRESS-1]); } if (rta[IFA_LOCAL-1]) { - if (pfx && memcmp(pfx, RTA_DATA(rta[IFA_LOCAL-1]), sizeof(*pfx))) + if (RTA_PAYLOAD(rta[IFA_LOCAL-1]) < sizeof(*pfx) || + (pfx && memcmp(pfx, RTA_DATA(rta[IFA_LOCAL-1]), sizeof(*pfx)))) return -EINVAL; pfx = RTA_DATA(rta[IFA_LOCAL-1]); } -- cgit v1.2.3 From 0778769d392b5b80410673f53e4f946574ebacf7 Mon Sep 17 00:00:00 2001 From: Noriaki TAKAMIYA Date: Fri, 28 Jul 2006 18:12:10 +0900 Subject: [IPV6] ADDRCONF: Allow user-space to specify address lifetime Based on MIPL2 kernel patch. Signed-off-by: Noriaki TAKAMIYA Signed-off-by: YOSHIFUJI Hideaki --- net/ipv6/addrconf.c | 46 ++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 42 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 81702b9ba5b..c0641887bde 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -1869,15 +1869,21 @@ err_exit: /* * Manual configuration of address on an interface */ -static int inet6_addr_add(int ifindex, struct in6_addr *pfx, int plen) +static int inet6_addr_add(int ifindex, struct in6_addr *pfx, int plen, + __u32 prefered_lft, __u32 valid_lft) { struct inet6_ifaddr *ifp; struct inet6_dev *idev; struct net_device *dev; + __u8 ifa_flags = 0; int scope; ASSERT_RTNL(); + /* check the lifetime */ + if (!valid_lft || prefered_lft > valid_lft) + return -EINVAL; + if ((dev = __dev_get_by_index(ifindex)) == NULL) return -ENODEV; @@ -1889,10 +1895,29 @@ static int inet6_addr_add(int ifindex, struct in6_addr *pfx, int plen) scope = ipv6_addr_scope(pfx); - ifp = ipv6_add_addr(idev, pfx, plen, scope, IFA_F_PERMANENT); + if (valid_lft == INFINITY_LIFE_TIME) + ifa_flags |= IFA_F_PERMANENT; + else if (valid_lft >= 0x7FFFFFFF/HZ) + valid_lft = 0x7FFFFFFF/HZ; + + if (prefered_lft == 0) + ifa_flags |= IFA_F_DEPRECATED; + else if ((prefered_lft >= 0x7FFFFFFF/HZ) && + (prefered_lft != INFINITY_LIFE_TIME)) + prefered_lft = 0x7FFFFFFF/HZ; + + ifp = ipv6_add_addr(idev, pfx, plen, scope, ifa_flags); + if (!IS_ERR(ifp)) { + spin_lock(&ifp->lock); + ifp->valid_lft = valid_lft; + ifp->prefered_lft = prefered_lft; + ifp->tstamp = jiffies; + spin_unlock(&ifp->lock); + addrconf_dad_start(ifp, 0); in6_ifa_put(ifp); + addrconf_verify(0); return 0; } @@ -1945,7 +1970,8 @@ int addrconf_add_ifaddr(void __user *arg) return -EFAULT; rtnl_lock(); - err = inet6_addr_add(ireq.ifr6_ifindex, &ireq.ifr6_addr, ireq.ifr6_prefixlen); + err = inet6_addr_add(ireq.ifr6_ifindex, &ireq.ifr6_addr, ireq.ifr6_prefixlen, + INFINITY_LIFE_TIME, INFINITY_LIFE_TIME); rtnl_unlock(); return err; } @@ -2870,6 +2896,7 @@ inet6_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) struct rtattr **rta = arg; struct ifaddrmsg *ifm = NLMSG_DATA(nlh); struct in6_addr *pfx; + __u32 valid_lft = INFINITY_LIFE_TIME, prefered_lft = INFINITY_LIFE_TIME; pfx = NULL; if (rta[IFA_ADDRESS-1]) { @@ -2886,7 +2913,18 @@ inet6_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) if (pfx == NULL) return -EINVAL; - return inet6_addr_add(ifm->ifa_index, pfx, ifm->ifa_prefixlen); + if (rta[IFA_CACHEINFO-1]) { + struct ifa_cacheinfo *ci; + if (RTA_PAYLOAD(rta[IFA_CACHEINFO-1]) < sizeof(*ci)) + return -EINVAL; + ci = RTA_DATA(rta[IFA_CACHEINFO-1]); + valid_lft = ci->ifa_valid; + prefered_lft = ci->ifa_prefered; + } + + return inet6_addr_add(ifm->ifa_index, pfx, ifm->ifa_prefixlen, + prefered_lft, valid_lft); + } /* Maximum length of ifa_cacheinfo attributes */ -- cgit v1.2.3 From 8f27ebb9823b7f6b7a67ab325b515f75ba51bf4c Mon Sep 17 00:00:00 2001 From: YOSHIFUJI Hideaki Date: Fri, 28 Jul 2006 18:12:11 +0900 Subject: [IPV6] ADDRCONF: Do not verify an address with infinity lifetime We also do not try regenarating new temporary address corresponding to an address with infinite preferred lifetime. Signed-off-by: YOSHIFUJI Hideaki --- net/ipv6/addrconf.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index c0641887bde..93a40a8ade8 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -2797,12 +2797,16 @@ restart: ifp->idev->nd_parms->retrans_time / HZ; #endif - if (age >= ifp->valid_lft) { + if (ifp->valid_lft != INFINITY_LIFE_TIME && + age >= ifp->valid_lft) { spin_unlock(&ifp->lock); in6_ifa_hold(ifp); read_unlock(&addrconf_hash_lock); ipv6_del_addr(ifp); goto restart; + } else if (ifp->prefered_lft == INFINITY_LIFE_TIME) { + spin_unlock(&ifp->lock); + continue; } else if (age >= ifp->prefered_lft) { /* jiffies - ifp->tsamp > age >= ifp->prefered_lft */ int deprecate = 0; -- cgit v1.2.3 From 6c223828058bc45f070d35b63d4a819a8df0146d Mon Sep 17 00:00:00 2001 From: Noriaki TAKAMIYA Date: Fri, 28 Jul 2006 18:12:12 +0900 Subject: [IPV6] ADDRCONF: Support get operation of single address Based on MIPL2 kernel patch. Signed-off-by: Noriaki TAKAMIYA Signed-off-by: YOSHIFUJI Hideaki --- net/ipv6/addrconf.c | 59 ++++++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 58 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 93a40a8ade8..3ef3fe20283 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -3165,6 +3165,62 @@ static int inet6_dump_ifacaddr(struct sk_buff *skb, struct netlink_callback *cb) return inet6_dump_addr(skb, cb, type); } +static int inet6_rtm_getaddr(struct sk_buff *in_skb, + struct nlmsghdr* nlh, void *arg) +{ + struct rtattr **rta = arg; + struct ifaddrmsg *ifm = NLMSG_DATA(nlh); + struct in6_addr *addr = NULL; + struct net_device *dev = NULL; + struct inet6_ifaddr *ifa; + struct sk_buff *skb; + int size = NLMSG_SPACE(sizeof(struct ifaddrmsg) + INET6_IFADDR_RTA_SPACE); + int err; + + if (rta[IFA_ADDRESS-1]) { + if (RTA_PAYLOAD(rta[IFA_ADDRESS-1]) < sizeof(*addr)) + return -EINVAL; + addr = RTA_DATA(rta[IFA_ADDRESS-1]); + } + if (rta[IFA_LOCAL-1]) { + if (RTA_PAYLOAD(rta[IFA_LOCAL-1]) < sizeof(*addr) || + (addr && memcmp(addr, RTA_DATA(rta[IFA_LOCAL-1]), sizeof(*addr)))) + return -EINVAL; + addr = RTA_DATA(rta[IFA_LOCAL-1]); + } + if (addr == NULL) + return -EINVAL; + + if (ifm->ifa_index) + dev = __dev_get_by_index(ifm->ifa_index); + + if ((ifa = ipv6_get_ifaddr(addr, dev, 1)) == NULL) + return -EADDRNOTAVAIL; + + if ((skb = alloc_skb(size, GFP_KERNEL)) == NULL) { + err = -ENOBUFS; + goto out; + } + + NETLINK_CB(skb).dst_pid = NETLINK_CB(in_skb).pid; + err = inet6_fill_ifaddr(skb, ifa, NETLINK_CB(in_skb).pid, + nlh->nlmsg_seq, RTM_NEWADDR, 0); + if (err < 0) { + err = -EMSGSIZE; + goto out_free; + } + + err = netlink_unicast(rtnl, skb, NETLINK_CB(in_skb).pid, MSG_DONTWAIT); + if (err > 0) + err = 0; +out: + in6_ifa_put(ifa); + return err; +out_free: + kfree_skb(skb); + goto out; +} + static void inet6_ifa_notify(int event, struct inet6_ifaddr *ifa) { struct sk_buff *skb; @@ -3407,7 +3463,8 @@ static struct rtnetlink_link inet6_rtnetlink_table[RTM_NR_MSGTYPES] = { [RTM_GETLINK - RTM_BASE] = { .dumpit = inet6_dump_ifinfo, }, [RTM_NEWADDR - RTM_BASE] = { .doit = inet6_rtm_newaddr, }, [RTM_DELADDR - RTM_BASE] = { .doit = inet6_rtm_deladdr, }, - [RTM_GETADDR - RTM_BASE] = { .dumpit = inet6_dump_ifaddr, }, + [RTM_GETADDR - RTM_BASE] = { .doit = inet6_rtm_getaddr, + .dumpit = inet6_dump_ifaddr, }, [RTM_GETMULTICAST - RTM_BASE] = { .dumpit = inet6_dump_ifmcaddr, }, [RTM_GETANYCAST - RTM_BASE] = { .dumpit = inet6_dump_ifacaddr, }, [RTM_NEWROUTE - RTM_BASE] = { .doit = inet6_rtm_newroute, }, -- cgit v1.2.3 From 081bba5b3ace5698eccf2f1a378cd4a9a4c98a85 Mon Sep 17 00:00:00 2001 From: Noriaki TAKAMIYA Date: Fri, 28 Jul 2006 18:12:13 +0900 Subject: [IPV6] ADDRCONF: NLM_F_REPLACE support for RTM_NEWADDR Based on MIPL2 kernel patch. Signed-off-by: Noriaki YAKAMIYA Signed-off-by: YOSHIFUJI Hideaki --- net/ipv6/addrconf.c | 57 +++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 57 insertions(+) (limited to 'net') diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 3ef3fe20283..8ea1e36bf8e 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -2894,6 +2894,55 @@ inet6_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) return inet6_addr_del(ifm->ifa_index, pfx, ifm->ifa_prefixlen); } +static int +inet6_addr_modify(int ifindex, struct in6_addr *pfx, + __u32 prefered_lft, __u32 valid_lft) +{ + struct inet6_ifaddr *ifp = NULL; + struct net_device *dev; + int ifa_flags = 0; + + if ((dev = __dev_get_by_index(ifindex)) == NULL) + return -ENODEV; + + if (!(dev->flags&IFF_UP)) + return -ENETDOWN; + + if (!valid_lft || (prefered_lft > valid_lft)) + return -EINVAL; + + ifp = ipv6_get_ifaddr(pfx, dev, 1); + if (ifp == NULL) + return -ENOENT; + + if (valid_lft == INFINITY_LIFE_TIME) + ifa_flags = IFA_F_PERMANENT; + else if (valid_lft >= 0x7FFFFFFF/HZ) + valid_lft = 0x7FFFFFFF/HZ; + + if (prefered_lft == 0) + ifa_flags = IFA_F_DEPRECATED; + else if ((prefered_lft >= 0x7FFFFFFF/HZ) && + (prefered_lft != INFINITY_LIFE_TIME)) + prefered_lft = 0x7FFFFFFF/HZ; + + spin_lock_bh(&ifp->lock); + ifp->flags = (ifp->flags & ~(IFA_F_DEPRECATED|IFA_F_PERMANENT)) | ifa_flags; + + ifp->tstamp = jiffies; + ifp->valid_lft = valid_lft; + ifp->prefered_lft = prefered_lft; + + spin_unlock_bh(&ifp->lock); + if (!(ifp->flags&IFA_F_TENTATIVE)) + ipv6_ifa_notify(0, ifp); + in6_ifa_put(ifp); + + addrconf_verify(0); + + return 0; +} + static int inet6_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) { @@ -2926,6 +2975,14 @@ inet6_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) prefered_lft = ci->ifa_prefered; } + if (nlh->nlmsg_flags & NLM_F_REPLACE) { + int ret; + ret = inet6_addr_modify(ifm->ifa_index, pfx, + prefered_lft, valid_lft); + if (ret == 0 || !(nlh->nlmsg_flags & NLM_F_CREATE)) + return ret; + } + return inet6_addr_add(ifm->ifa_index, pfx, ifm->ifa_prefixlen, prefered_lft, valid_lft); -- cgit v1.2.3 From 679e898a4742d4a4a47430b67fd68a789a73dcfd Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Sun, 30 Jul 2006 20:19:11 -0700 Subject: [XFRM]: Fix protocol field value for outgoing IPv6 GSO packets Signed-off-by: Patrick McHardy Acked-by: Herbert Xu Signed-off-by: David S. Miller --- net/ipv6/xfrm6_output.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/ipv6/xfrm6_output.c b/net/ipv6/xfrm6_output.c index 0eea60ea9eb..c8c8b44a0f5 100644 --- a/net/ipv6/xfrm6_output.c +++ b/net/ipv6/xfrm6_output.c @@ -125,7 +125,7 @@ static int xfrm6_output_finish(struct sk_buff *skb) if (!skb_is_gso(skb)) return xfrm6_output_finish2(skb); - skb->protocol = htons(ETH_P_IP); + skb->protocol = htons(ETH_P_IPV6); segs = skb_gso_segment(skb, 0); kfree_skb(skb); if (unlikely(IS_ERR(segs))) -- cgit v1.2.3 From 497c615abad7ee81994dd592194535aea2aad617 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Sun, 30 Jul 2006 20:19:33 -0700 Subject: [IPV6]: Audit all ip6_dst_lookup/ip6_dst_store calls The current users of ip6_dst_lookup can be divided into two classes: 1) The caller holds no locks and is in user-context (UDP). 2) The caller does not want to lookup the dst cache at all. The second class covers everyone except UDP because most people do the cache lookup directly before calling ip6_dst_lookup. This patch adds ip6_sk_dst_lookup for the first class. Similarly ip6_dst_store users can be divded into those that need to take the socket dst lock and those that don't. This patch adds __ip6_dst_store for those (everyone except UDP/datagram) that don't need an extra lock. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- net/dccp/ipv6.c | 4 +- net/ipv6/af_inet6.c | 2 +- net/ipv6/inet6_connection_sock.c | 2 +- net/ipv6/ip6_output.c | 120 ++++++++++++++++++++++++++------------- net/ipv6/tcp_ipv6.c | 4 +- net/ipv6/udp.c | 2 +- 6 files changed, 88 insertions(+), 46 deletions(-) (limited to 'net') diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c index 9f3d4d7cd0b..610c722ac27 100644 --- a/net/dccp/ipv6.c +++ b/net/dccp/ipv6.c @@ -230,7 +230,7 @@ static int dccp_v6_connect(struct sock *sk, struct sockaddr *uaddr, ipv6_addr_copy(&np->saddr, saddr); inet->rcv_saddr = LOOPBACK4_IPV6; - ip6_dst_store(sk, dst, NULL); + __ip6_dst_store(sk, dst, NULL); icsk->icsk_ext_hdr_len = 0; if (np->opt != NULL) @@ -863,7 +863,7 @@ static struct sock *dccp_v6_request_recv_sock(struct sock *sk, * comment in that function for the gory details. -acme */ - ip6_dst_store(newsk, dst, NULL); + __ip6_dst_store(newsk, dst, NULL); newsk->sk_route_caps = dst->dev->features & ~(NETIF_F_IP_CSUM | NETIF_F_TSO); newdp6 = (struct dccp6_sock *)newsk; diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index 5a0ba58b86c..ac85e9c532c 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -658,7 +658,7 @@ int inet6_sk_rebuild_header(struct sock *sk) return err; } - ip6_dst_store(sk, dst, NULL); + __ip6_dst_store(sk, dst, NULL); } return 0; diff --git a/net/ipv6/inet6_connection_sock.c b/net/ipv6/inet6_connection_sock.c index 5c950cc79d8..bf491077b82 100644 --- a/net/ipv6/inet6_connection_sock.c +++ b/net/ipv6/inet6_connection_sock.c @@ -185,7 +185,7 @@ int inet6_csk_xmit(struct sk_buff *skb, int ipfragok) return err; } - ip6_dst_store(sk, dst, NULL); + __ip6_dst_store(sk, dst, NULL); } skb->dst = dst_clone(dst); diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 3bc74ce7880..5e74a37695f 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -723,48 +723,51 @@ fail: return err; } -int ip6_dst_lookup(struct sock *sk, struct dst_entry **dst, struct flowi *fl) +static struct dst_entry *ip6_sk_dst_check(struct sock *sk, + struct dst_entry *dst, + struct flowi *fl) { - int err = 0; + struct ipv6_pinfo *np = inet6_sk(sk); + struct rt6_info *rt = (struct rt6_info *)dst; - *dst = NULL; - if (sk) { - struct ipv6_pinfo *np = inet6_sk(sk); - - *dst = sk_dst_check(sk, np->dst_cookie); - if (*dst) { - struct rt6_info *rt = (struct rt6_info*)*dst; - - /* Yes, checking route validity in not connected - * case is not very simple. Take into account, - * that we do not support routing by source, TOS, - * and MSG_DONTROUTE --ANK (980726) - * - * 1. If route was host route, check that - * cached destination is current. - * If it is network route, we still may - * check its validity using saved pointer - * to the last used address: daddr_cache. - * We do not want to save whole address now, - * (because main consumer of this service - * is tcp, which has not this problem), - * so that the last trick works only on connected - * sockets. - * 2. oif also should be the same. - */ - if (((rt->rt6i_dst.plen != 128 || - !ipv6_addr_equal(&fl->fl6_dst, - &rt->rt6i_dst.addr)) - && (np->daddr_cache == NULL || - !ipv6_addr_equal(&fl->fl6_dst, - np->daddr_cache))) - || (fl->oif && fl->oif != (*dst)->dev->ifindex)) { - dst_release(*dst); - *dst = NULL; - } - } + if (!dst) + goto out; + + /* Yes, checking route validity in not connected + * case is not very simple. Take into account, + * that we do not support routing by source, TOS, + * and MSG_DONTROUTE --ANK (980726) + * + * 1. If route was host route, check that + * cached destination is current. + * If it is network route, we still may + * check its validity using saved pointer + * to the last used address: daddr_cache. + * We do not want to save whole address now, + * (because main consumer of this service + * is tcp, which has not this problem), + * so that the last trick works only on connected + * sockets. + * 2. oif also should be the same. + */ + if (((rt->rt6i_dst.plen != 128 || + !ipv6_addr_equal(&fl->fl6_dst, &rt->rt6i_dst.addr)) + && (np->daddr_cache == NULL || + !ipv6_addr_equal(&fl->fl6_dst, np->daddr_cache))) + || (fl->oif && fl->oif != dst->dev->ifindex)) { + dst_release(dst); + dst = NULL; } +out: + return dst; +} + +static int ip6_dst_lookup_tail(struct sock *sk, + struct dst_entry **dst, struct flowi *fl) +{ + int err; + if (*dst == NULL) *dst = ip6_route_output(sk, fl); @@ -773,7 +776,6 @@ int ip6_dst_lookup(struct sock *sk, struct dst_entry **dst, struct flowi *fl) if (ipv6_addr_any(&fl->fl6_src)) { err = ipv6_get_saddr(*dst, &fl->fl6_dst, &fl->fl6_src); - if (err) goto out_err_release; } @@ -786,8 +788,48 @@ out_err_release: return err; } +/** + * ip6_dst_lookup - perform route lookup on flow + * @sk: socket which provides route info + * @dst: pointer to dst_entry * for result + * @fl: flow to lookup + * + * This function performs a route lookup on the given flow. + * + * It returns zero on success, or a standard errno code on error. + */ +int ip6_dst_lookup(struct sock *sk, struct dst_entry **dst, struct flowi *fl) +{ + *dst = NULL; + return ip6_dst_lookup_tail(sk, dst, fl); +} EXPORT_SYMBOL_GPL(ip6_dst_lookup); +/** + * ip6_sk_dst_lookup - perform socket cached route lookup on flow + * @sk: socket which provides the dst cache and route info + * @dst: pointer to dst_entry * for result + * @fl: flow to lookup + * + * This function performs a route lookup on the given flow with the + * possibility of using the cached route in the socket if it is valid. + * It will take the socket dst lock when operating on the dst cache. + * As a result, this function can only be used in process context. + * + * It returns zero on success, or a standard errno code on error. + */ +int ip6_sk_dst_lookup(struct sock *sk, struct dst_entry **dst, struct flowi *fl) +{ + *dst = NULL; + if (sk) { + *dst = sk_dst_check(sk, inet6_sk(sk)->dst_cookie); + *dst = ip6_sk_dst_check(sk, *dst, fl); + } + + return ip6_dst_lookup_tail(sk, dst, fl); +} +EXPORT_SYMBOL_GPL(ip6_sk_dst_lookup); + static inline int ip6_ufo_append_data(struct sock *sk, int getfrag(void *from, char *to, int offset, int len, int odd, struct sk_buff *skb), diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 923989d0520..b76fd7fba5f 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -270,7 +270,7 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, inet->rcv_saddr = LOOPBACK4_IPV6; sk->sk_gso_type = SKB_GSO_TCPV6; - ip6_dst_store(sk, dst, NULL); + __ip6_dst_store(sk, dst, NULL); icsk->icsk_ext_hdr_len = 0; if (np->opt) @@ -947,7 +947,7 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb, */ sk->sk_gso_type = SKB_GSO_TCPV6; - ip6_dst_store(newsk, dst, NULL); + __ip6_dst_store(newsk, dst, NULL); newtcp6sk = (struct tcp6_sock *)newsk; inet_sk(newsk)->pinet6 = &newtcp6sk->inet6; diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index ccc57f434cd..3d54f246411 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -782,7 +782,7 @@ do_udp_sendmsg: connected = 0; } - err = ip6_dst_lookup(sk, &dst, fl); + err = ip6_sk_dst_lookup(sk, &dst, fl); if (err) goto out; if (final_p) -- cgit v1.2.3 From f4d26fb336f3c08066bffbe907d3104be4fb91a8 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Sun, 30 Jul 2006 20:20:28 -0700 Subject: [NET]: Fix ___pskb_trim when entire frag_list needs dropping When the trim point is within the head and there is no paged data, ___pskb_trim fails to drop the first element in the frag_list. This patch fixes this by moving the len <= offset case out of the page data loop. This patch also adds a missing kfree_skb on the frag that we just cloned. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- net/core/skbuff.c | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 476aa397850..d236f02c646 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -846,7 +846,11 @@ int ___pskb_trim(struct sk_buff *skb, unsigned int len) unlikely((err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC)))) return err; - for (i = 0; i < nfrags; i++) { + i = 0; + if (offset >= len) + goto drop_pages; + + for (; i < nfrags; i++) { int end = offset + skb_shinfo(skb)->frags[i].size; if (end < len) { @@ -854,9 +858,9 @@ int ___pskb_trim(struct sk_buff *skb, unsigned int len) continue; } - if (len > offset) - skb_shinfo(skb)->frags[i++].size = len - offset; + skb_shinfo(skb)->frags[i++].size = len - offset; +drop_pages: skb_shinfo(skb)->nr_frags = i; for (; i < nfrags; i++) @@ -864,7 +868,7 @@ int ___pskb_trim(struct sk_buff *skb, unsigned int len) if (skb_shinfo(skb)->frag_list) skb_drop_fraglist(skb); - break; + goto done; } for (fragp = &skb_shinfo(skb)->frag_list; (frag = *fragp); @@ -879,6 +883,7 @@ int ___pskb_trim(struct sk_buff *skb, unsigned int len) return -ENOMEM; nfrag->next = frag->next; + kfree_skb(frag); frag = nfrag; *fragp = frag; } @@ -897,6 +902,7 @@ int ___pskb_trim(struct sk_buff *skb, unsigned int len) break; } +done: if (len > skb_headlen(skb)) { skb->data_len -= skb->len - len; skb->len = len; -- cgit v1.2.3 From 118075b3cdc90e0815362365f3fc64d672ace0d6 Mon Sep 17 00:00:00 2001 From: James Morris Date: Sun, 30 Jul 2006 20:21:45 -0700 Subject: [TCP]: fix memory leak in net/ipv4/tcp_probe.c::tcpprobe_read() Based upon a patch by Jesper Juhl. Signed-off-by: James Morris Acked-by: Stephen Hemminger Acked-by: Jesper Juhl Signed-off-by: David S. Miller --- net/ipv4/tcp_probe.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/ipv4/tcp_probe.c b/net/ipv4/tcp_probe.c index d7d517a3a23..b3435324b57 100644 --- a/net/ipv4/tcp_probe.c +++ b/net/ipv4/tcp_probe.c @@ -114,7 +114,7 @@ static int tcpprobe_open(struct inode * inode, struct file * file) static ssize_t tcpprobe_read(struct file *file, char __user *buf, size_t len, loff_t *ppos) { - int error = 0, cnt; + int error = 0, cnt = 0; unsigned char *tbuf; if (!buf || len < 0) -- cgit v1.2.3 From 3687b1dc6fe83a500ba4d3235704594f6a111a2d Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Sun, 30 Jul 2006 20:35:54 -0700 Subject: [TCP]: SNMPv2 tcpAttemptFails counter error Refer to RFC2012, tcpAttemptFails is defined as following: tcpAttemptFails OBJECT-TYPE SYNTAX Counter32 MAX-ACCESS read-only STATUS current DESCRIPTION "The number of times TCP connections have made a direct transition to the CLOSED state from either the SYN-SENT state or the SYN-RCVD state, plus the number of times TCP connections have made a direct transition to the LISTEN state from the SYN-RCVD state." ::= { tcp 7 } When I lookup into RFC793, I found that the state change should occured under following condition: 1. SYN-SENT -> CLOSED a) Received ACK,RST segment when SYN-SENT state. 2. SYN-RCVD -> CLOSED b) Received SYN segment when SYN-RCVD state(came from LISTEN). c) Received RST segment when SYN-RCVD state(came from SYN-SENT). d) Received SYN segment when SYN-RCVD state(came from SYN-SENT). 3. SYN-RCVD -> LISTEN e) Received RST segment when SYN-RCVD state(came from LISTEN). In my test, those direct state transition can not be counted to tcpAttemptFails. Signed-off-by: Wei Yongjun Signed-off-by: David S. Miller --- net/ipv4/tcp_ipv4.c | 2 -- net/ipv4/tcp_minisocks.c | 4 +++- net/ipv6/tcp_ipv6.c | 2 -- 3 files changed, 3 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index f6f39e81429..4b04c3edd4a 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -438,7 +438,6 @@ void tcp_v4_err(struct sk_buff *skb, u32 info) It can f.e. if SYNs crossed. */ if (!sock_owned_by_user(sk)) { - TCP_INC_STATS_BH(TCP_MIB_ATTEMPTFAILS); sk->sk_err = err; sk->sk_error_report(sk); @@ -874,7 +873,6 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb) drop_and_free: reqsk_free(req); drop: - TCP_INC_STATS_BH(TCP_MIB_ATTEMPTFAILS); return 0; } diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index 0ccb7cb22b1..624e2b2c7f5 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -589,8 +589,10 @@ struct sock *tcp_check_req(struct sock *sk,struct sk_buff *skb, /* RFC793: "second check the RST bit" and * "fourth, check the SYN bit" */ - if (flg & (TCP_FLAG_RST|TCP_FLAG_SYN)) + if (flg & (TCP_FLAG_RST|TCP_FLAG_SYN)) { + TCP_INC_STATS_BH(TCP_MIB_ATTEMPTFAILS); goto embryonic_reset; + } /* ACK sequence verified above, just make sure ACK is * set. If ACK not set, just silently drop the packet. diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index b76fd7fba5f..b843a650be7 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -427,7 +427,6 @@ static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, case TCP_SYN_RECV: /* Cannot happen. It can, it SYNs are crossed. --ANK */ if (!sock_owned_by_user(sk)) { - TCP_INC_STATS_BH(TCP_MIB_ATTEMPTFAILS); sk->sk_err = err; sk->sk_error_report(sk); /* Wake people up to see the error (see connect in sock.c) */ @@ -831,7 +830,6 @@ drop: if (req) reqsk_free(req); - TCP_INC_STATS_BH(TCP_MIB_ATTEMPTFAILS); return 0; /* don't send reset */ } -- cgit v1.2.3 From 792d1932e319ff8ba01361e7d151b1794c55c31f Mon Sep 17 00:00:00 2001 From: Tom Tucker Date: Sun, 30 Jul 2006 20:43:26 -0700 Subject: [NET]: Network Event Notifier Mechanism. This patch uses notifier blocks to implement a network event notifier mechanism. Clients register their callback function by calling register_netevent_notifier() like this: static struct notifier_block nb = { .notifier_call = my_callback_func }; ... register_netevent_notifier(&nb); Signed-off-by: Tom Tucker Signed-off-by: Steve Wise Signed-off-by: David S. Miller --- net/core/netevent.c | 69 +++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 69 insertions(+) create mode 100644 net/core/netevent.c (limited to 'net') diff --git a/net/core/netevent.c b/net/core/netevent.c new file mode 100644 index 00000000000..35d02c38554 --- /dev/null +++ b/net/core/netevent.c @@ -0,0 +1,69 @@ +/* + * Network event notifiers + * + * Authors: + * Tom Tucker + * Steve Wise + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * Fixes: + */ + +#include +#include + +static ATOMIC_NOTIFIER_HEAD(netevent_notif_chain); + +/** + * register_netevent_notifier - register a netevent notifier block + * @nb: notifier + * + * Register a notifier to be called when a netevent occurs. + * The notifier passed is linked into the kernel structures and must + * not be reused until it has been unregistered. A negative errno code + * is returned on a failure. + */ +int register_netevent_notifier(struct notifier_block *nb) +{ + int err; + + err = atomic_notifier_chain_register(&netevent_notif_chain, nb); + return err; +} + +/** + * netevent_unregister_notifier - unregister a netevent notifier block + * @nb: notifier + * + * Unregister a notifier previously registered by + * register_neigh_notifier(). The notifier is unlinked into the + * kernel structures and may then be reused. A negative errno code + * is returned on a failure. + */ + +int unregister_netevent_notifier(struct notifier_block *nb) +{ + return atomic_notifier_chain_unregister(&netevent_notif_chain, nb); +} + +/** + * call_netevent_notifiers - call all netevent notifier blocks + * @val: value passed unmodified to notifier function + * @v: pointer passed unmodified to notifier function + * + * Call all neighbour notifier blocks. Parameters and return value + * are as for notifier_call_chain(). + */ + +int call_netevent_notifiers(unsigned long val, void *v) +{ + return atomic_notifier_call_chain(&netevent_notif_chain, val, v); +} + +EXPORT_SYMBOL_GPL(register_netevent_notifier); +EXPORT_SYMBOL_GPL(unregister_netevent_notifier); +EXPORT_SYMBOL_GPL(call_netevent_notifiers); -- cgit v1.2.3 From 8d71740c56a9058acc4378504a356d543ff1308b Mon Sep 17 00:00:00 2001 From: Tom Tucker Date: Sun, 30 Jul 2006 20:43:36 -0700 Subject: [NET]: Core net changes to generate netevents Generate netevents for: - neighbour changes - routing redirects - pmtu changes Signed-off-by: Tom Tucker Signed-off-by: Steve Wise Signed-off-by: David S. Miller --- net/core/Makefile | 2 +- net/core/neighbour.c | 14 ++++++++------ net/ipv4/route.c | 8 ++++++++ net/ipv6/route.c | 7 +++++++ 4 files changed, 24 insertions(+), 7 deletions(-) (limited to 'net') diff --git a/net/core/Makefile b/net/core/Makefile index e9bd2467d5a..2645ba428d4 100644 --- a/net/core/Makefile +++ b/net/core/Makefile @@ -7,7 +7,7 @@ obj-y := sock.o request_sock.o skbuff.o iovec.o datagram.o stream.o scm.o \ obj-$(CONFIG_SYSCTL) += sysctl_net_core.o -obj-y += dev.o ethtool.o dev_mcast.o dst.o \ +obj-y += dev.o ethtool.o dev_mcast.o dst.o netevent.o \ neighbour.o rtnetlink.o utils.o link_watch.o filter.o obj-$(CONFIG_XFRM) += flow.o diff --git a/net/core/neighbour.c b/net/core/neighbour.c index 7ad681f5e71..5130d2efdbb 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -29,6 +29,7 @@ #include #include #include +#include #include #include #include @@ -754,6 +755,7 @@ static void neigh_timer_handler(unsigned long arg) neigh->nud_state = NUD_STALE; neigh->updated = jiffies; neigh_suspect(neigh); + notify = 1; } } else if (state & NUD_DELAY) { if (time_before_eq(now, @@ -762,6 +764,7 @@ static void neigh_timer_handler(unsigned long arg) neigh->nud_state = NUD_REACHABLE; neigh->updated = jiffies; neigh_connect(neigh); + notify = 1; next = neigh->confirmed + neigh->parms->reachable_time; } else { NEIGH_PRINTK2("neigh %p is probed.\n", neigh); @@ -819,6 +822,8 @@ static void neigh_timer_handler(unsigned long arg) out: write_unlock(&neigh->lock); } + if (notify) + call_netevent_notifiers(NETEVENT_NEIGH_UPDATE, neigh); #ifdef CONFIG_ARPD if (notify && neigh->parms->app_probes) @@ -926,9 +931,7 @@ int neigh_update(struct neighbour *neigh, const u8 *lladdr, u8 new, { u8 old; int err; -#ifdef CONFIG_ARPD int notify = 0; -#endif struct net_device *dev; int update_isrouter = 0; @@ -948,9 +951,7 @@ int neigh_update(struct neighbour *neigh, const u8 *lladdr, u8 new, neigh_suspect(neigh); neigh->nud_state = new; err = 0; -#ifdef CONFIG_ARPD notify = old & NUD_VALID; -#endif goto out; } @@ -1022,9 +1023,7 @@ int neigh_update(struct neighbour *neigh, const u8 *lladdr, u8 new, if (!(new & NUD_CONNECTED)) neigh->confirmed = jiffies - (neigh->parms->base_reachable_time << 1); -#ifdef CONFIG_ARPD notify = 1; -#endif } if (new == old) goto out; @@ -1056,6 +1055,9 @@ out: (neigh->flags & ~NTF_ROUTER); } write_unlock_bh(&neigh->lock); + + if (notify) + call_netevent_notifiers(NETEVENT_NEIGH_UPDATE, neigh); #ifdef CONFIG_ARPD if (notify && neigh->parms->app_probes) neigh_app_notify(neigh); diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 2dc6dbb2846..19bd49d69d9 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -104,6 +104,7 @@ #include #include #include +#include #ifdef CONFIG_SYSCTL #include #endif @@ -1125,6 +1126,7 @@ void ip_rt_redirect(u32 old_gw, u32 daddr, u32 new_gw, struct rtable *rth, **rthp; u32 skeys[2] = { saddr, 0 }; int ikeys[2] = { dev->ifindex, 0 }; + struct netevent_redirect netevent; if (!in_dev) return; @@ -1216,6 +1218,11 @@ void ip_rt_redirect(u32 old_gw, u32 daddr, u32 new_gw, rt_drop(rt); goto do_next; } + + netevent.old = &rth->u.dst; + netevent.new = &rt->u.dst; + call_netevent_notifiers(NETEVENT_REDIRECT, + &netevent); rt_del(hash, rth); if (!rt_intern_hash(hash, rt, &rt)) @@ -1452,6 +1459,7 @@ static void ip_rt_update_pmtu(struct dst_entry *dst, u32 mtu) } dst->metrics[RTAX_MTU-1] = mtu; dst_set_expires(dst, ip_rt_mtu_expires); + call_netevent_notifiers(NETEVENT_PMTU_UPDATE, dst); } } diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 87c39c978cd..4b163711f3a 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -53,6 +53,7 @@ #include #include #include +#include #include @@ -742,6 +743,7 @@ static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu) dst->metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG; } dst->metrics[RTAX_MTU-1] = mtu; + call_netevent_notifiers(NETEVENT_PMTU_UPDATE, dst); } } @@ -1155,6 +1157,7 @@ void rt6_redirect(struct in6_addr *dest, struct in6_addr *saddr, struct rt6_info *rt, *nrt = NULL; int strict; struct fib6_node *fn; + struct netevent_redirect netevent; /* * Get the "current" route for this destination and @@ -1252,6 +1255,10 @@ restart: if (ip6_ins_rt(nrt, NULL, NULL, NULL)) goto out; + netevent.old = &rt->u.dst; + netevent.new = &nrt->u.dst; + call_netevent_notifiers(NETEVENT_REDIRECT, &netevent); + if (rt->rt6i_flags&RTF_CACHE) { ip6_del_rt(rt, NULL, NULL, NULL); return; -- cgit v1.2.3 From a280b89982f48e9a32c6410a37419b12ca88af6b Mon Sep 17 00:00:00 2001 From: James Morris Date: Sun, 30 Jul 2006 20:46:38 -0700 Subject: [SECURITY] secmark: nul-terminate secdata The patch below fixes a problem in the iptables SECMARK target, where the user-supplied 'selctx' string may not be nul-terminated. From initial analysis, it seems that the strlen() called from selinux_string_to_sid() could run until it arbitrarily finds a zero, and possibly cause a kernel oops before then. The impact of this appears limited because the operation requires CAP_NET_ADMIN, which is essentially always root. Also, the module is not yet in wide use. Signed-off-by: James Morris Signed-off-by: Stephen Smalley Signed-off-by: David S. Miller --- net/netfilter/xt_SECMARK.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'net') diff --git a/net/netfilter/xt_SECMARK.c b/net/netfilter/xt_SECMARK.c index c2ce9c4011c..de9537ad9a7 100644 --- a/net/netfilter/xt_SECMARK.c +++ b/net/netfilter/xt_SECMARK.c @@ -57,6 +57,8 @@ static int checkentry_selinux(struct xt_secmark_target_info *info) { int err; struct xt_secmark_target_selinux_info *sel = &info->u.sel; + + sel->selctx[SECMARK_SELCTX_MAX - 1] = '\0'; err = selinux_string_to_sid(sel->selctx, &sel->selsid); if (err) { -- cgit v1.2.3 From 52499afe40387524e9f46ef9ce4695efccdd2ed9 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Mon, 31 Jul 2006 22:32:09 -0700 Subject: [TCP]: Process linger2 timeout consistently. Based upon guidance from Alexey Kuznetsov. When linger2 is active, we check to see if the fin_wait2 timeout is longer than the timewait. If it is, we schedule the keepalive timer for the difference between the timewait timeout and the fin_wait2 timeout. When this orphan socket is seen by tcp_keepalive_timer() it will try to transform this fin_wait2 socket into a fin_wait2 mini-socket, again if linger2 is active. Not all paths were setting this initial keepalive timer correctly. The tcp input path was doing it correctly, but tcp_close() wasn't, potentially making the socket linger longer than it really needs to. Signed-off-by: David S. Miller --- net/ipv4/tcp.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index f6a2d9223d0..7b621e44b12 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -1659,7 +1659,8 @@ adjudge_to_death: const int tmo = tcp_fin_time(sk); if (tmo > TCP_TIMEWAIT_LEN) { - inet_csk_reset_keepalive_timer(sk, tcp_fin_time(sk)); + inet_csk_reset_keepalive_timer(sk, + tmo - TCP_TIMEWAIT_LEN); } else { tcp_time_wait(sk, TCP_FIN_WAIT2, tmo); goto out; -- cgit v1.2.3 From 8af2745645243b5e5b031504a643bf2158571dc7 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 31 Jul 2006 22:35:23 -0700 Subject: [NET]: Add netdev_alloc_skb(). Add a dev_alloc_skb variant that takes a struct net_device * paramater. For now that paramater is unused, but I'll use it to allocate the skb from node-local memory in a follow-up patch. Also there have been some other plans mentioned on the list that can use it. Signed-off-by: Christoph Hellwig Signed-off-by: David S. Miller --- net/core/skbuff.c | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) (limited to 'net') diff --git a/net/core/skbuff.c b/net/core/skbuff.c index d236f02c646..71487b915d6 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -256,6 +256,29 @@ nodata: goto out; } +/** + * __netdev_alloc_skb - allocate an skbuff for rx on a specific device + * @dev: network device to receive on + * @length: length to allocate + * @gfp_mask: get_free_pages mask, passed to alloc_skb + * + * Allocate a new &sk_buff and assign it a usage count of one. The + * buffer has unspecified headroom built in. Users should allocate + * the headroom they think they need without accounting for the + * built in space. The built in space is used for optimisations. + * + * %NULL is returned if there is no free memory. + */ +struct sk_buff *__netdev_alloc_skb(struct net_device *dev, + unsigned int length, gfp_t gfp_mask) +{ + struct sk_buff *skb; + + skb = alloc_skb(length + NET_SKB_PAD, gfp_mask); + if (likely(skb)) + skb_reserve(skb, NET_SKB_PAD); + return skb; +} static void skb_drop_list(struct sk_buff **listp) { @@ -2048,6 +2071,7 @@ EXPORT_SYMBOL(__kfree_skb); EXPORT_SYMBOL(kfree_skb); EXPORT_SYMBOL(__pskb_pull_tail); EXPORT_SYMBOL(__alloc_skb); +EXPORT_SYMBOL(__netdev_alloc_skb); EXPORT_SYMBOL(pskb_copy); EXPORT_SYMBOL(pskb_expand_head); EXPORT_SYMBOL(skb_checksum); -- cgit v1.2.3 From b10866fd7dd9ae9b8dd03646d28702a76d624474 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Mon, 31 Jul 2006 23:46:18 -0700 Subject: [NETFILTER]: SIP helper: expect RTP streams in both directions Since we don't know in which direction the first packet will arrive, we need to create one expectation for each direction, which is currently prevented by max_expected beeing set to 1. Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- net/ipv4/netfilter/ip_conntrack_sip.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/ipv4/netfilter/ip_conntrack_sip.c b/net/ipv4/netfilter/ip_conntrack_sip.c index fc87ce0da40..4f222d6be00 100644 --- a/net/ipv4/netfilter/ip_conntrack_sip.c +++ b/net/ipv4/netfilter/ip_conntrack_sip.c @@ -442,7 +442,7 @@ static int __init init(void) sip[i].tuple.src.u.udp.port = htons(ports[i]); sip[i].mask.src.u.udp.port = 0xFFFF; sip[i].mask.dst.protonum = 0xFF; - sip[i].max_expected = 1; + sip[i].max_expected = 2; sip[i].timeout = 3 * 60; /* 3 minutes */ sip[i].me = THIS_MODULE; sip[i].help = sip_help; -- cgit v1.2.3 From 3ab720881b6e36bd5190a3a11cee8d8d067c4ad7 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Mon, 31 Jul 2006 23:47:31 -0700 Subject: [NETFILTER]: xt_hashlimit/xt_string: missing string validation The hashlimit table name and the textsearch algorithm need to be terminated, the textsearch pattern length must not exceed the maximum size. Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- net/ipv4/netfilter/ipt_hashlimit.c | 3 +++ net/netfilter/xt_string.c | 5 ++++- 2 files changed, 7 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/ipv4/netfilter/ipt_hashlimit.c b/net/ipv4/netfilter/ipt_hashlimit.c index 92980ab8ce4..6b662449e82 100644 --- a/net/ipv4/netfilter/ipt_hashlimit.c +++ b/net/ipv4/netfilter/ipt_hashlimit.c @@ -508,6 +508,9 @@ hashlimit_checkentry(const char *tablename, if (!r->cfg.expire) return 0; + if (r->name[sizeof(r->name) - 1] != '\0') + return 0; + /* This is the best we've got: We cannot release and re-grab lock, * since checkentry() is called before ip_tables.c grabs ipt_mutex. * We also cannot grab the hashtable spinlock, since htable_create will diff --git a/net/netfilter/xt_string.c b/net/netfilter/xt_string.c index 0ebb6ac2c8c..d8e3891b5f8 100644 --- a/net/netfilter/xt_string.c +++ b/net/netfilter/xt_string.c @@ -55,7 +55,10 @@ static int checkentry(const char *tablename, /* Damn, can't handle this case properly with iptables... */ if (conf->from_offset > conf->to_offset) return 0; - + if (conf->algo[XT_STRING_MAX_ALGO_NAME_SIZE - 1] != '\0') + return 0; + if (conf->patlen > XT_STRING_MAX_PATTERN_SIZE) + return 0; ts_conf = textsearch_prepare(conf->algo, conf->pattern, conf->patlen, GFP_KERNEL, TS_AUTOLOAD); if (IS_ERR(ts_conf)) -- cgit v1.2.3 From b60dfc6c20bd5f19de0083362ce377c89b1e5a24 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Tue, 1 Aug 2006 00:00:12 -0700 Subject: [NET]: Kill the WARN_ON() calls for checksum fixups. We have a more complete solution in the works, involving the seperation of CHECKSUM_HW on input vs. output, and having netfilter properly do incremental checksums. But that is a very involved patch and is thus 2.6.19 material. What we have now is infinitely better than the past, wherein all TSO packets were dropped due to corrupt checksums as soon at the NAT module was loaded. At least now, the checksums do get fixed up, it just isn't the cleanest nor most optimal solution. Signed-off-by: David S. Miller --- net/core/dev.c | 10 ---------- 1 file changed, 10 deletions(-) (limited to 'net') diff --git a/net/core/dev.c b/net/core/dev.c index 4d2b5167d7f..5b630cece70 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1166,11 +1166,6 @@ int skb_checksum_help(struct sk_buff *skb, int inward) goto out_set_summed; if (unlikely(skb_shinfo(skb)->gso_size)) { - static int warned; - - WARN_ON(!warned); - warned = 1; - /* Let GSO fix up the checksum. */ goto out_set_summed; } @@ -1220,11 +1215,6 @@ struct sk_buff *skb_gso_segment(struct sk_buff *skb, int features) __skb_pull(skb, skb->mac_len); if (unlikely(skb->ip_summed != CHECKSUM_HW)) { - static int warned; - - WARN_ON(!warned); - warned = 1; - if (skb_header_cloned(skb) && (err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC))) return ERR_PTR(err); -- cgit v1.2.3 From 32c524d1c48b62be49fa1b1dd93fed10792debc0 Mon Sep 17 00:00:00 2001 From: Wei Dong Date: Wed, 2 Aug 2006 13:39:57 -0700 Subject: [IPV6]: SNMPv2 "ipv6IfStatsInHdrErrors" counter error When I tested Linux kernel 2.6.17.7 about statistics "ipv6IfStatsInHdrErrors", found that this counter couldn't increase correctly. The criteria is RFC2465: ipv6IfStatsInHdrErrors OBJECT-TYPE SYNTAX Counter3 MAX-ACCESS read-only STATUS current DESCRIPTION "The number of input datagrams discarded due to errors in their IPv6 headers, including version number mismatch, other format errors, hop count exceeded, errors discovered in processing their IPv6 options, etc." ::= { ipv6IfStatsEntry 2 } When I send TTL=0 and TTL=1 a packet to a router which need to be forwarded, router just sends an ICMPv6 message to tell the sender that TIME_EXCEED and HOPLIMITS, but no increments for this counter(in the function ip6_forward). Signed-off-by: Wei Dong Acked-by: YOSHIFUJI Hideaki Signed-off-by: David S. Miller --- net/ipv6/ip6_output.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net') diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 5e74a37695f..70c9234b70e 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -356,6 +356,7 @@ int ip6_forward(struct sk_buff *skb) skb->dev = dst->dev; icmpv6_send(skb, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT, 0, skb->dev); + IP6_INC_STATS_BH(IPSTATS_MIB_INHDRERRORS); kfree_skb(skb); return -ETIMEDOUT; -- cgit v1.2.3 From dafee490858f79e144c5e6cdd84ceb9efa20a3f1 Mon Sep 17 00:00:00 2001 From: Wei Dong Date: Wed, 2 Aug 2006 13:41:21 -0700 Subject: [IPV6]: SNMPv2 "ipv6IfStatsOutFragCreates" counter error When I tested linux kernel 2.6.71.7 about statistics "ipv6IfStatsOutFragCreates", and found that it couldn't increase correctly. The criteria is RFC 2465: ipv6IfStatsOutFragCreates OBJECT-TYPE SYNTAX Counter32 MAX-ACCESS read-only STATUS current DESCRIPTION "The number of output datagram fragments that have been generated as a result of fragmentation at this output interface." ::= { ipv6IfStatsEntry 15 } I think there are two issues in Linux kernel. 1st: RFC2465 specifies the counter is "The number of output datagram fragments...". I think increasing this counter after output a fragment successfully is better. And it should not be increased even though a fragment is created but failed to output. 2nd: If we send a big ICMP/ICMPv6 echo request to a host, and receive ICMP/ICMPv6 echo reply consisted of some fragments. As we know that in Linux kernel first fragmentation occurs in ICMP layer(maybe saying transport layer is better), but this is not the "real" fragmentation,just do some "pre-fragment" -- allocate space for date, and form a frag_list, etc. The "real" fragmentation happens in IP layer -- set offset and MF flag and so on. So I think in "fast path" for ip_fragment/ip6_fragment, if we send a fragment which "pre-fragment" by upper layer we should also increase "ipv6IfStatsOutFragCreates". Signed-off-by: Wei Dong Acked-by: YOSHIFUJI Hideaki Signed-off-by: David S. Miller --- net/ipv4/ip_output.c | 7 ++++--- net/ipv6/ip6_output.c | 8 +++++--- 2 files changed, 9 insertions(+), 6 deletions(-) (limited to 'net') diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 7c9f9a6421b..9bf307a2978 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -526,6 +526,8 @@ int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff*)) err = output(skb); + if (!err) + IP_INC_STATS(IPSTATS_MIB_FRAGCREATES); if (err || !frag) break; @@ -649,9 +651,6 @@ slow_path: /* * Put this fragment into the sending queue. */ - - IP_INC_STATS(IPSTATS_MIB_FRAGCREATES); - iph->tot_len = htons(len + hlen); ip_send_check(iph); @@ -659,6 +658,8 @@ slow_path: err = output(skb2); if (err) goto fail; + + IP_INC_STATS(IPSTATS_MIB_FRAGCREATES); } kfree_skb(skb); IP_INC_STATS(IPSTATS_MIB_FRAGOKS); diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 70c9234b70e..69451af6abe 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -596,6 +596,9 @@ static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *)) } err = output(skb); + if(!err) + IP6_INC_STATS(IPSTATS_MIB_FRAGCREATES); + if (err || !frag) break; @@ -707,12 +710,11 @@ slow_path: /* * Put this fragment into the sending queue. */ - - IP6_INC_STATS(IPSTATS_MIB_FRAGCREATES); - err = output(frag); if (err) goto fail; + + IP6_INC_STATS(IPSTATS_MIB_FRAGCREATES); } kfree_skb(skb); IP6_INC_STATS(IPSTATS_MIB_FRAGOKS); -- cgit v1.2.3 From 2b7e24b66d31d677d76b49918e711eb360c978b6 Mon Sep 17 00:00:00 2001 From: Adrian Bunk Date: Wed, 2 Aug 2006 14:07:58 -0700 Subject: [NET]: skb_queue_lock_key() is no longer used. Signed-off-by: Adrian Bunk Acked-by: Arjan van de Ven Signed-off-by: Andrew Morton Signed-off-by: David S. Miller --- net/core/skbuff.c | 7 ------- 1 file changed, 7 deletions(-) (limited to 'net') diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 71487b915d6..022d8894c11 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -70,13 +70,6 @@ static kmem_cache_t *skbuff_head_cache __read_mostly; static kmem_cache_t *skbuff_fclone_cache __read_mostly; -/* - * lockdep: lock class key used by skb_queue_head_init(): - */ -struct lock_class_key skb_queue_lock_key; - -EXPORT_SYMBOL(skb_queue_lock_key); - /* * Keep out-of-line to prevent kernel bloat. * __builtin_return_address is not used because it is not always -- cgit v1.2.3 From dc49c1f94e3469d94b952e8f5160dd4ccd791d79 Mon Sep 17 00:00:00 2001 From: Catherine Zhang Date: Wed, 2 Aug 2006 14:12:06 -0700 Subject: [AF_UNIX]: Kernel memory leak fix for af_unix datagram getpeersec patch From: Catherine Zhang This patch implements a cleaner fix for the memory leak problem of the original unix datagram getpeersec patch. Instead of creating a security context each time a unix datagram is sent, we only create the security context when the receiver requests it. This new design requires modification of the current unix_getsecpeer_dgram LSM hook and addition of two new hooks, namely, secid_to_secctx and release_secctx. The former retrieves the security context and the latter releases it. A hook is required for releasing the security context because it is up to the security module to decide how that's done. In the case of Selinux, it's a simple kfree operation. Acked-by: Stephen Smalley Signed-off-by: David S. Miller --- net/ipv4/ip_sockglue.c | 9 +++++++-- net/unix/af_unix.c | 17 +++++------------ 2 files changed, 12 insertions(+), 14 deletions(-) (limited to 'net') diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c index 84f43a3c909..2d05c4133d3 100644 --- a/net/ipv4/ip_sockglue.c +++ b/net/ipv4/ip_sockglue.c @@ -112,14 +112,19 @@ static void ip_cmsg_recv_retopts(struct msghdr *msg, struct sk_buff *skb) static void ip_cmsg_recv_security(struct msghdr *msg, struct sk_buff *skb) { char *secdata; - u32 seclen; + u32 seclen, secid; int err; - err = security_socket_getpeersec_dgram(skb, &secdata, &seclen); + err = security_socket_getpeersec_dgram(NULL, skb, &secid); + if (err) + return; + + err = security_secid_to_secctx(secid, &secdata, &seclen); if (err) return; put_cmsg(msg, SOL_IP, SCM_SECURITY, seclen, secdata); + security_release_secctx(secdata, seclen); } diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 6f290927926..de6ec519272 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -128,23 +128,17 @@ static atomic_t unix_nr_socks = ATOMIC_INIT(0); #define UNIX_ABSTRACT(sk) (unix_sk(sk)->addr->hash != UNIX_HASH_SIZE) #ifdef CONFIG_SECURITY_NETWORK -static void unix_get_peersec_dgram(struct sk_buff *skb) +static void unix_get_secdata(struct scm_cookie *scm, struct sk_buff *skb) { - int err; - - err = security_socket_getpeersec_dgram(skb, UNIXSECDATA(skb), - UNIXSECLEN(skb)); - if (err) - *(UNIXSECDATA(skb)) = NULL; + memcpy(UNIXSID(skb), &scm->secid, sizeof(u32)); } static inline void unix_set_secdata(struct scm_cookie *scm, struct sk_buff *skb) { - scm->secdata = *UNIXSECDATA(skb); - scm->seclen = *UNIXSECLEN(skb); + scm->secid = *UNIXSID(skb); } #else -static inline void unix_get_peersec_dgram(struct sk_buff *skb) +static inline void unix_get_secdata(struct scm_cookie *scm, struct sk_buff *skb) { } static inline void unix_set_secdata(struct scm_cookie *scm, struct sk_buff *skb) @@ -1322,8 +1316,7 @@ static int unix_dgram_sendmsg(struct kiocb *kiocb, struct socket *sock, memcpy(UNIXCREDS(skb), &siocb->scm->creds, sizeof(struct ucred)); if (siocb->scm->fp) unix_attach_fds(siocb->scm, skb); - - unix_get_peersec_dgram(skb); + unix_get_secdata(siocb->scm, skb); skb->h.raw = skb->data; err = memcpy_fromiovec(skb_put(skb,len), msg->msg_iov, len); -- cgit v1.2.3 From 9bbf28a1ff7b9d4e7df57829c25638721984277b Mon Sep 17 00:00:00 2001 From: Patrick Caulfield Date: Wed, 2 Aug 2006 14:14:44 -0700 Subject: [DECNET]: Fix for routing bug This patch fixes a bug in the DECnet routing code where we were selecting a loopback device in preference to an outward facing device even when the destination was known non-local. This patch should fix the problem. Signed-off-by: Patrick Caulfield Signed-off-by: Steven Whitehouse Signed-off-by: David S. Miller --- net/decnet/dn_route.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/decnet/dn_route.c b/net/decnet/dn_route.c index 1355614ec11..743e9fcf7c5 100644 --- a/net/decnet/dn_route.c +++ b/net/decnet/dn_route.c @@ -925,8 +925,13 @@ static int dn_route_output_slow(struct dst_entry **pprt, const struct flowi *old for(dev_out = dev_base; dev_out; dev_out = dev_out->next) { if (!dev_out->dn_ptr) continue; - if (dn_dev_islocal(dev_out, oldflp->fld_src)) - break; + if (!dn_dev_islocal(dev_out, oldflp->fld_src)) + continue; + if ((dev_out->flags & IFF_LOOPBACK) && + oldflp->fld_dst && + !dn_dev_islocal(dev_out, oldflp->fld_dst)) + continue; + break; } read_unlock(&dev_base_lock); if (dev_out == NULL) -- cgit v1.2.3 From e6eb307d48c81d688804f8b39a0a3ddde3cd3458 Mon Sep 17 00:00:00 2001 From: Chris Leech Date: Wed, 2 Aug 2006 14:21:19 -0700 Subject: [I/OAT]: Remove CPU hotplug lock from net_dma_rebalance Remove the lock_cpu_hotplug()/unlock_cpu_hotplug() calls from net_dma_rebalance The lock_cpu_hotplug()/unlock_cpu_hotplug() sequence in net_dma_rebalance is both incorrect (as pointed out by David Miller) because lock_cpu_hotplug() may sleep while the net_dma_event_lock spinlock is held, and unnecessary (as pointed out by Andrew Morton) as spin_lock() disables preemption which protects from CPU hotplug events. Signed-off-by: Chris Leech Signed-off-by: David S. Miller --- net/core/dev.c | 5 ----- 1 file changed, 5 deletions(-) (limited to 'net') diff --git a/net/core/dev.c b/net/core/dev.c index 5b630cece70..f25d7ecaf03 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -3419,12 +3419,9 @@ static void net_dma_rebalance(void) unsigned int cpu, i, n; struct dma_chan *chan; - lock_cpu_hotplug(); - if (net_dma_count == 0) { for_each_online_cpu(cpu) rcu_assign_pointer(per_cpu(softnet_data.net_dma, cpu), NULL); - unlock_cpu_hotplug(); return; } @@ -3444,8 +3441,6 @@ static void net_dma_rebalance(void) i++; } rcu_read_unlock(); - - unlock_cpu_hotplug(); } /** -- cgit v1.2.3 From 29bbd72d6ee1dbf2d9f00d022f8e999aa528fb3a Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Wed, 2 Aug 2006 15:02:31 -0700 Subject: [NET]: Fix more per-cpu typos Signed-off-by: Alexey Dobriyan Signed-off-by: Andrew Morton Signed-off-by: David S. Miller --- net/core/dev.c | 4 ++-- net/ipv4/tcp.c | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/core/dev.c b/net/core/dev.c index f25d7ecaf03..d95e2626d94 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -3421,7 +3421,7 @@ static void net_dma_rebalance(void) if (net_dma_count == 0) { for_each_online_cpu(cpu) - rcu_assign_pointer(per_cpu(softnet_data.net_dma, cpu), NULL); + rcu_assign_pointer(per_cpu(softnet_data, cpu).net_dma, NULL); return; } @@ -3434,7 +3434,7 @@ static void net_dma_rebalance(void) + (i < (num_online_cpus() % net_dma_count) ? 1 : 0)); while(n) { - per_cpu(softnet_data.net_dma, cpu) = chan; + per_cpu(softnet_data, cpu).net_dma = chan; cpu = next_cpu(cpu, cpu_online_map); n--; } diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 7b621e44b12..934396bb137 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -1132,7 +1132,7 @@ int tcp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, tp->ucopy.dma_chan = NULL; preempt_disable(); if ((len > sysctl_tcp_dma_copybreak) && !(flags & MSG_PEEK) && - !sysctl_tcp_low_latency && __get_cpu_var(softnet_data.net_dma)) { + !sysctl_tcp_low_latency && __get_cpu_var(softnet_data).net_dma) { preempt_enable_no_resched(); tp->ucopy.pinned_list = dma_pin_iovec_pages(msg->msg_iov, len); } else -- cgit v1.2.3