From b43309578477ff3271945d4efb920f558a309b4a Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Tue, 3 May 2005 14:23:13 -0700 Subject: [NETFILTER]: Missing owner-field initialization in iptable_raw Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- net/ipv4/netfilter/iptable_raw.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/net/ipv4/netfilter/iptable_raw.c b/net/ipv4/netfilter/iptable_raw.c index 01b4a3c814d..47449ba83eb 100644 --- a/net/ipv4/netfilter/iptable_raw.c +++ b/net/ipv4/netfilter/iptable_raw.c @@ -103,13 +103,15 @@ static struct nf_hook_ops ipt_ops[] = { .hook = ipt_hook, .pf = PF_INET, .hooknum = NF_IP_PRE_ROUTING, - .priority = NF_IP_PRI_RAW + .priority = NF_IP_PRI_RAW, + .owner = THIS_MODULE, }, { .hook = ipt_hook, .pf = PF_INET, .hooknum = NF_IP_LOCAL_OUT, - .priority = NF_IP_PRI_RAW + .priority = NF_IP_PRI_RAW, + .owner = THIS_MODULE, }, }; -- cgit v1.2.3 From 31da185d8162ae0f30a13ed945f1f4d28d158133 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Tue, 3 May 2005 14:23:50 -0700 Subject: [NETFILTER]: Don't checksum CHECKSUM_UNNECESSARY skbs in TCP connection tracking Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- net/ipv4/netfilter/ip_conntrack_proto_tcp.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/ipv4/netfilter/ip_conntrack_proto_tcp.c b/net/ipv4/netfilter/ip_conntrack_proto_tcp.c index 2b87c1974be..721ddbf522b 100644 --- a/net/ipv4/netfilter/ip_conntrack_proto_tcp.c +++ b/net/ipv4/netfilter/ip_conntrack_proto_tcp.c @@ -819,6 +819,7 @@ static int tcp_error(struct sk_buff *skb, */ /* FIXME: Source route IP option packets --RR */ if (hooknum == NF_IP_PRE_ROUTING + && skb->ip_summed != CHECKSUM_UNNECESSARY && csum_tcpudp_magic(iph->saddr, iph->daddr, tcplen, IPPROTO_TCP, skb->ip_summed == CHECKSUM_HW ? skb->csum : skb_checksum(skb, iph->ihl*4, tcplen, 0))) { -- cgit v1.2.3 From 679a87382433cf12a28f07a7d5c240f30f0daa08 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Tue, 3 May 2005 14:24:36 -0700 Subject: [IPV6]: Fix raw socket checksums with IPsec I made a mistake in my last patch to the raw socket checksum code. I used the value of inet->cork.length as the length of the payload. While this works with normal packets, it breaks down when IPsec is present since the cork length includes the extension header length. So here is a patch to fix the length calculations. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- net/ipv6/raw.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index 1352c1d9bf4..617645bc5ed 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -455,11 +455,11 @@ csum_copy_err: static int rawv6_push_pending_frames(struct sock *sk, struct flowi *fl, struct raw6_sock *rp) { - struct inet_sock *inet = inet_sk(sk); struct sk_buff *skb; int err = 0; int offset; int len; + int total_len; u32 tmp_csum; u16 csum; @@ -470,7 +470,8 @@ static int rawv6_push_pending_frames(struct sock *sk, struct flowi *fl, goto out; offset = rp->offset; - if (offset >= inet->cork.length - 1) { + total_len = inet_sk(sk)->cork.length - (skb->nh.raw - skb->data); + if (offset >= total_len - 1) { err = -EINVAL; ip6_flush_pending_frames(sk); goto out; @@ -514,7 +515,7 @@ static int rawv6_push_pending_frames(struct sock *sk, struct flowi *fl, tmp_csum = csum_ipv6_magic(&fl->fl6_src, &fl->fl6_dst, - inet->cork.length, fl->proto, tmp_csum); + total_len, fl->proto, tmp_csum); if (tmp_csum == 0) tmp_csum = -1; -- cgit v1.2.3 From e4553eddae592b948c9695c9a0002169b0cab6fc Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Tue, 3 May 2005 14:25:13 -0700 Subject: [IPV6]: Include ipv6.h for ipv6_addr_set This patch includes net/ipv6.h from addrconf.h since it needs ipv6_addr_set. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- include/net/addrconf.h | 1 + 1 file changed, 1 insertion(+) diff --git a/include/net/addrconf.h b/include/net/addrconf.h index 7af9a13cb9b..f1e5af4be98 100644 --- a/include/net/addrconf.h +++ b/include/net/addrconf.h @@ -46,6 +46,7 @@ struct prefix_info { #include #include #include +#include #define IN6_ADDR_HSIZE 16 -- cgit v1.2.3 From 526bdb80a23b2e10ed4ccc3fcf309c9118d892d6 Mon Sep 17 00:00:00 2001 From: Thomas Graf Date: Tue, 3 May 2005 14:26:01 -0700 Subject: [XFRM]: Prevent off-by-one access to xfrm_dispatch Makes the type > XFRM_MSG_MAX check behave correctly to protect access to xfrm_dispatch. Signed-off-by: Thomas Graf Signed-off-by: David S. Miller --- include/linux/xfrm.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/include/linux/xfrm.h b/include/linux/xfrm.h index f0df02ae68a..4d19b9e6531 100644 --- a/include/linux/xfrm.h +++ b/include/linux/xfrm.h @@ -140,8 +140,9 @@ enum { XFRM_MSG_FLUSHPOLICY, #define XFRM_MSG_FLUSHPOLICY XFRM_MSG_FLUSHPOLICY - XFRM_MSG_MAX + __XFRM_MSG_MAX }; +#define XFRM_MSG_MAX (__XFRM_MSG_MAX - 1) struct xfrm_user_tmpl { struct xfrm_id id; -- cgit v1.2.3 From 492b558b3191319cbc859a9e025bc354d336c261 Mon Sep 17 00:00:00 2001 From: Thomas Graf Date: Tue, 3 May 2005 14:26:40 -0700 Subject: [XFRM]: Cleanup xfrm_msg_min and xfrm_dispatch Converts xfrm_msg_min and xfrm_dispatch to use c99 designated initializers to make greping a little bit easier. Also replaces two hardcoded message type with meaningful names. Signed-off-by: Thomas Graf Signed-off-by: David S. Miller --- include/linux/xfrm.h | 2 ++ net/xfrm/xfrm_user.c | 73 ++++++++++++++++++++++++++-------------------------- 2 files changed, 38 insertions(+), 37 deletions(-) diff --git a/include/linux/xfrm.h b/include/linux/xfrm.h index 4d19b9e6531..fd2ef742a9f 100644 --- a/include/linux/xfrm.h +++ b/include/linux/xfrm.h @@ -144,6 +144,8 @@ enum { }; #define XFRM_MSG_MAX (__XFRM_MSG_MAX - 1) +#define XFRM_NR_MSGTYPES (XFRM_MSG_MAX + 1 - XFRM_MSG_BASE) + struct xfrm_user_tmpl { struct xfrm_id id; __u16 family; diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index 63661b0fd73..52b5843937c 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -855,47 +855,44 @@ static int xfrm_flush_policy(struct sk_buff *skb, struct nlmsghdr *nlh, void **x return 0; } -static const int xfrm_msg_min[(XFRM_MSG_MAX + 1 - XFRM_MSG_BASE)] = { - NLMSG_LENGTH(sizeof(struct xfrm_usersa_info)), /* NEW SA */ - NLMSG_LENGTH(sizeof(struct xfrm_usersa_id)), /* DEL SA */ - NLMSG_LENGTH(sizeof(struct xfrm_usersa_id)), /* GET SA */ - NLMSG_LENGTH(sizeof(struct xfrm_userpolicy_info)),/* NEW POLICY */ - NLMSG_LENGTH(sizeof(struct xfrm_userpolicy_id)), /* DEL POLICY */ - NLMSG_LENGTH(sizeof(struct xfrm_userpolicy_id)), /* GET POLICY */ - NLMSG_LENGTH(sizeof(struct xfrm_userspi_info)), /* ALLOC SPI */ - NLMSG_LENGTH(sizeof(struct xfrm_user_acquire)), /* ACQUIRE */ - NLMSG_LENGTH(sizeof(struct xfrm_user_expire)), /* EXPIRE */ - NLMSG_LENGTH(sizeof(struct xfrm_userpolicy_info)),/* UPD POLICY */ - NLMSG_LENGTH(sizeof(struct xfrm_usersa_info)), /* UPD SA */ - NLMSG_LENGTH(sizeof(struct xfrm_user_polexpire)), /* POLEXPIRE */ - NLMSG_LENGTH(sizeof(struct xfrm_usersa_flush)), /* FLUSH SA */ - NLMSG_LENGTH(0), /* FLUSH POLICY */ +#define XMSGSIZE(type) NLMSG_LENGTH(sizeof(struct type)) + +static const int xfrm_msg_min[XFRM_NR_MSGTYPES] = { + [XFRM_MSG_NEWSA - XFRM_MSG_BASE] = XMSGSIZE(xfrm_usersa_info), + [XFRM_MSG_DELSA - XFRM_MSG_BASE] = XMSGSIZE(xfrm_usersa_id), + [XFRM_MSG_GETSA - XFRM_MSG_BASE] = XMSGSIZE(xfrm_usersa_id), + [XFRM_MSG_NEWPOLICY - XFRM_MSG_BASE] = XMSGSIZE(xfrm_userpolicy_info), + [XFRM_MSG_DELPOLICY - XFRM_MSG_BASE] = XMSGSIZE(xfrm_userpolicy_id), + [XFRM_MSG_GETPOLICY - XFRM_MSG_BASE] = XMSGSIZE(xfrm_userpolicy_id), + [XFRM_MSG_ALLOCSPI - XFRM_MSG_BASE] = XMSGSIZE(xfrm_userspi_info), + [XFRM_MSG_ACQUIRE - XFRM_MSG_BASE] = XMSGSIZE(xfrm_user_acquire), + [XFRM_MSG_EXPIRE - XFRM_MSG_BASE] = XMSGSIZE(xfrm_user_expire), + [XFRM_MSG_UPDPOLICY - XFRM_MSG_BASE] = XMSGSIZE(xfrm_userpolicy_info), + [XFRM_MSG_UPDSA - XFRM_MSG_BASE] = XMSGSIZE(xfrm_usersa_info), + [XFRM_MSG_POLEXPIRE - XFRM_MSG_BASE] = XMSGSIZE(xfrm_user_polexpire), + [XFRM_MSG_FLUSHSA - XFRM_MSG_BASE] = XMSGSIZE(xfrm_usersa_flush), + [XFRM_MSG_FLUSHPOLICY - XFRM_MSG_BASE] = NLMSG_LENGTH(0), }; +#undef XMSGSIZE + static struct xfrm_link { int (*doit)(struct sk_buff *, struct nlmsghdr *, void **); int (*dump)(struct sk_buff *, struct netlink_callback *); -} xfrm_dispatch[] = { - { .doit = xfrm_add_sa, }, - { .doit = xfrm_del_sa, }, - { - .doit = xfrm_get_sa, - .dump = xfrm_dump_sa, - }, - { .doit = xfrm_add_policy }, - { .doit = xfrm_get_policy }, - { - .doit = xfrm_get_policy, - .dump = xfrm_dump_policy, - }, - { .doit = xfrm_alloc_userspi }, - {}, - {}, - { .doit = xfrm_add_policy }, - { .doit = xfrm_add_sa, }, - {}, - { .doit = xfrm_flush_sa }, - { .doit = xfrm_flush_policy }, +} xfrm_dispatch[XFRM_NR_MSGTYPES] = { + [XFRM_MSG_NEWSA - XFRM_MSG_BASE] = { .doit = xfrm_add_sa }, + [XFRM_MSG_DELSA - XFRM_MSG_BASE] = { .doit = xfrm_del_sa }, + [XFRM_MSG_GETSA - XFRM_MSG_BASE] = { .doit = xfrm_get_sa, + .dump = xfrm_dump_sa }, + [XFRM_MSG_NEWPOLICY - XFRM_MSG_BASE] = { .doit = xfrm_add_policy }, + [XFRM_MSG_DELPOLICY - XFRM_MSG_BASE] = { .doit = xfrm_get_policy }, + [XFRM_MSG_GETPOLICY - XFRM_MSG_BASE] = { .doit = xfrm_get_policy, + .dump = xfrm_dump_policy }, + [XFRM_MSG_ALLOCSPI - XFRM_MSG_BASE] = { .doit = xfrm_alloc_userspi }, + [XFRM_MSG_UPDPOLICY - XFRM_MSG_BASE] = { .doit = xfrm_add_policy }, + [XFRM_MSG_UPDSA - XFRM_MSG_BASE] = { .doit = xfrm_add_sa }, + [XFRM_MSG_FLUSHSA - XFRM_MSG_BASE] = { .doit = xfrm_flush_sa }, + [XFRM_MSG_FLUSHPOLICY - XFRM_MSG_BASE] = { .doit = xfrm_flush_policy }, }; static int xfrm_done(struct netlink_callback *cb) @@ -931,7 +928,9 @@ static int xfrm_user_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh, int *err return -1; } - if ((type == 2 || type == 5) && (nlh->nlmsg_flags & NLM_F_DUMP)) { + if ((type == (XFRM_MSG_GETSA - XFRM_MSG_BASE) || + type == (XFRM_MSG_GETPOLICY - XFRM_MSG_BASE)) && + (nlh->nlmsg_flags & NLM_F_DUMP)) { u32 rlen; if (link->dump == NULL) -- cgit v1.2.3 From d775fc09f16f4b88cd0373006b112c4772589778 Mon Sep 17 00:00:00 2001 From: Thomas Graf Date: Tue, 3 May 2005 14:27:35 -0700 Subject: [RTNETLINK] Fix RTM_MAX to represent the maximum valid message type RTM_MAX is currently set to the maximum reserverd message type plus one thus being the cause of two bugs for new types being assigned a) given the new family registers only the NEW command in its reserved block the array size for per family entries is calculated one entry short and b) given the new family registers all commands RTM_MAX would point to the first entry of the block following this one and the rtnetlink receive path would accept a message type for a nonexisting family. This patch changes RTM_MAX to point to the maximum valid message type by aligning it to the start of the next block and subtracting one. Signed-off-by: Thomas Graf Signed-off-by: David S. Miller --- include/linux/rtnetlink.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h index 32e52769a00..d607219af6a 100644 --- a/include/linux/rtnetlink.h +++ b/include/linux/rtnetlink.h @@ -89,8 +89,8 @@ enum { RTM_GETANYCAST = 62, #define RTM_GETANYCAST RTM_GETANYCAST - RTM_MAX, -#define RTM_MAX RTM_MAX + __RTM_MAX, +#define RTM_MAX (((__RTM_MAX + 3) & ~3) - 1) }; /* -- cgit v1.2.3 From f90a0a74b864fdc46737614f03b8868f4f31e3bf Mon Sep 17 00:00:00 2001 From: Thomas Graf Date: Tue, 3 May 2005 14:29:00 -0700 Subject: [RTNETLINK] Fix & cleanup rtm_min/rtm_max Converts rtm_min and rtm_max arrays to use c99 designated initializers for easier insertion of new message families. RTM_GETMULTICAST and RTM_GETANYCAST did not have the minimal message size specified which means that the netlink message was parsed for routing attributes starting from the header. Adds the proper minimal message sizes for these messages (netlink header + common rtnetlink header) to fix this issue. Signed-off-by: Thomas Graf Signed-off-by: David S. Miller --- include/linux/rtnetlink.h | 2 ++ net/core/rtnetlink.c | 39 +++++++++++++++++++++------------------ 2 files changed, 23 insertions(+), 18 deletions(-) diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h index d607219af6a..1ecaea74d55 100644 --- a/include/linux/rtnetlink.h +++ b/include/linux/rtnetlink.h @@ -93,6 +93,8 @@ enum { #define RTM_MAX (((__RTM_MAX + 3) & ~3) - 1) }; +#define RTM_FAM(cmd) (((cmd) - RTM_BASE) >> 2) + /* Generic structure for encapsulation of optional route information. It is reminiscent of sockaddr, but with sa_family replaced diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index d8c198e42f9..58a98180626 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -88,28 +88,31 @@ struct rtnetlink_link * rtnetlink_links[NPROTO]; static const int rtm_min[(RTM_MAX+1-RTM_BASE)/4] = { - NLMSG_LENGTH(sizeof(struct ifinfomsg)), - NLMSG_LENGTH(sizeof(struct ifaddrmsg)), - NLMSG_LENGTH(sizeof(struct rtmsg)), - NLMSG_LENGTH(sizeof(struct ndmsg)), - NLMSG_LENGTH(sizeof(struct rtmsg)), - NLMSG_LENGTH(sizeof(struct tcmsg)), - NLMSG_LENGTH(sizeof(struct tcmsg)), - NLMSG_LENGTH(sizeof(struct tcmsg)), - NLMSG_LENGTH(sizeof(struct tcamsg)) + [RTM_FAM(RTM_NEWLINK)] = NLMSG_LENGTH(sizeof(struct ifinfomsg)), + [RTM_FAM(RTM_NEWADDR)] = NLMSG_LENGTH(sizeof(struct ifaddrmsg)), + [RTM_FAM(RTM_NEWROUTE)] = NLMSG_LENGTH(sizeof(struct rtmsg)), + [RTM_FAM(RTM_NEWNEIGH)] = NLMSG_LENGTH(sizeof(struct ndmsg)), + [RTM_FAM(RTM_NEWRULE)] = NLMSG_LENGTH(sizeof(struct rtmsg)), + [RTM_FAM(RTM_NEWQDISC)] = NLMSG_LENGTH(sizeof(struct tcmsg)), + [RTM_FAM(RTM_NEWTCLASS)] = NLMSG_LENGTH(sizeof(struct tcmsg)), + [RTM_FAM(RTM_NEWTFILTER)] = NLMSG_LENGTH(sizeof(struct tcmsg)), + [RTM_FAM(RTM_NEWACTION)] = NLMSG_LENGTH(sizeof(struct tcamsg)), + [RTM_FAM(RTM_NEWPREFIX)] = NLMSG_LENGTH(sizeof(struct rtgenmsg)), + [RTM_FAM(RTM_GETMULTICAST)] = NLMSG_LENGTH(sizeof(struct rtgenmsg)), + [RTM_FAM(RTM_GETANYCAST)] = NLMSG_LENGTH(sizeof(struct rtgenmsg)), }; static const int rta_max[(RTM_MAX+1-RTM_BASE)/4] = { - IFLA_MAX, - IFA_MAX, - RTA_MAX, - NDA_MAX, - RTA_MAX, - TCA_MAX, - TCA_MAX, - TCA_MAX, - TCAA_MAX + [RTM_FAM(RTM_NEWLINK)] = IFLA_MAX, + [RTM_FAM(RTM_NEWADDR)] = IFA_MAX, + [RTM_FAM(RTM_NEWROUTE)] = RTA_MAX, + [RTM_FAM(RTM_NEWNEIGH)] = NDA_MAX, + [RTM_FAM(RTM_NEWRULE)] = RTA_MAX, + [RTM_FAM(RTM_NEWQDISC)] = TCA_MAX, + [RTM_FAM(RTM_NEWTCLASS)] = TCA_MAX, + [RTM_FAM(RTM_NEWTFILTER)] = TCA_MAX, + [RTM_FAM(RTM_NEWACTION)] = TCAA_MAX, }; void __rta_fill(struct sk_buff *skb, int attrtype, int attrlen, const void *data) -- cgit v1.2.3 From db46edc6d3b66bf708a8f23a9aa89f63a49ebe33 Mon Sep 17 00:00:00 2001 From: Thomas Graf Date: Tue, 3 May 2005 14:29:39 -0700 Subject: [RTNETLINK] Cleanup rtnetlink_link tables Converts remaining rtnetlink_link tables to use c99 designated initializers to make greping a little bit easier. Signed-off-by: Thomas Graf Signed-off-by: David S. Miller --- include/linux/rtnetlink.h | 2 ++ net/core/rtnetlink.c | 6 +++--- net/decnet/dn_dev.c | 25 +++++++++++++------------ net/ipv4/devinet.c | 21 +++++++++++---------- net/ipv6/addrconf.c | 2 +- 5 files changed, 30 insertions(+), 26 deletions(-) diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h index 1ecaea74d55..91ac97c2077 100644 --- a/include/linux/rtnetlink.h +++ b/include/linux/rtnetlink.h @@ -93,6 +93,8 @@ enum { #define RTM_MAX (((__RTM_MAX + 3) & ~3) - 1) }; +#define RTM_NR_MSGTYPES (RTM_MAX + 1 - RTM_BASE) +#define RTM_NR_FAMILIES (RTM_NR_MSGTYPES >> 2) #define RTM_FAM(cmd) (((cmd) - RTM_BASE) >> 2) /* diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 58a98180626..5fb70cfa108 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -86,7 +86,7 @@ struct sock *rtnl; struct rtnetlink_link * rtnetlink_links[NPROTO]; -static const int rtm_min[(RTM_MAX+1-RTM_BASE)/4] = +static const int rtm_min[RTM_NR_FAMILIES] = { [RTM_FAM(RTM_NEWLINK)] = NLMSG_LENGTH(sizeof(struct ifinfomsg)), [RTM_FAM(RTM_NEWADDR)] = NLMSG_LENGTH(sizeof(struct ifaddrmsg)), @@ -102,7 +102,7 @@ static const int rtm_min[(RTM_MAX+1-RTM_BASE)/4] = [RTM_FAM(RTM_GETANYCAST)] = NLMSG_LENGTH(sizeof(struct rtgenmsg)), }; -static const int rta_max[(RTM_MAX+1-RTM_BASE)/4] = +static const int rta_max[RTM_NR_FAMILIES] = { [RTM_FAM(RTM_NEWLINK)] = IFLA_MAX, [RTM_FAM(RTM_NEWADDR)] = IFA_MAX, @@ -641,7 +641,7 @@ static void rtnetlink_rcv(struct sock *sk, int len) } while (rtnl && rtnl->sk_receive_queue.qlen); } -static struct rtnetlink_link link_rtnetlink_table[RTM_MAX-RTM_BASE+1] = +static struct rtnetlink_link link_rtnetlink_table[RTM_NR_MSGTYPES] = { [RTM_GETLINK - RTM_BASE] = { .dumpit = rtnetlink_dump_ifinfo }, [RTM_SETLINK - RTM_BASE] = { .doit = do_setlink }, diff --git a/net/decnet/dn_dev.c b/net/decnet/dn_dev.c index c2a0346f423..e6e23eb1442 100644 --- a/net/decnet/dn_dev.c +++ b/net/decnet/dn_dev.c @@ -1411,21 +1411,22 @@ static struct file_operations dn_dev_seq_fops = { #endif /* CONFIG_PROC_FS */ -static struct rtnetlink_link dnet_rtnetlink_table[RTM_MAX-RTM_BASE+1] = +static struct rtnetlink_link dnet_rtnetlink_table[RTM_NR_MSGTYPES] = { - [4] = { .doit = dn_dev_rtm_newaddr, }, - [5] = { .doit = dn_dev_rtm_deladdr, }, - [6] = { .dumpit = dn_dev_dump_ifaddr, }, - + [RTM_NEWADDR - RTM_BASE] = { .doit = dn_dev_rtm_newaddr, }, + [RTM_DELADDR - RTM_BASE] = { .doit = dn_dev_rtm_deladdr, }, + [RTM_GETADDR - RTM_BASE] = { .dumpit = dn_dev_dump_ifaddr, }, #ifdef CONFIG_DECNET_ROUTER - [8] = { .doit = dn_fib_rtm_newroute, }, - [9] = { .doit = dn_fib_rtm_delroute, }, - [10] = { .doit = dn_cache_getroute, .dumpit = dn_fib_dump, }, - [16] = { .doit = dn_fib_rtm_newrule, }, - [17] = { .doit = dn_fib_rtm_delrule, }, - [18] = { .dumpit = dn_fib_dump_rules, }, + [RTM_NEWROUTE - RTM_BASE] = { .doit = dn_fib_rtm_newroute, }, + [RTM_DELROUTE - RTM_BASE] = { .doit = dn_fib_rtm_delroute, }, + [RTM_GETROUTE - RTM_BASE] = { .doit = dn_cache_getroute, + .dumpit = dn_fib_dump, }, + [RTM_NEWRULE - RTM_BASE] = { .doit = dn_fib_rtm_newrule, }, + [RTM_DELRULE - RTM_BASE] = { .doit = dn_fib_rtm_delrule, }, + [RTM_GETRULE - RTM_BASE] = { .dumpit = dn_fib_dump_rules, }, #else - [10] = { .doit = dn_cache_getroute, .dumpit = dn_cache_dump, }, + [RTM_GETROUTE - RTM_BASE] = { .doit = dn_cache_getroute, + .dumpit = dn_cache_dump, #endif }; diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index eea7ef01077..abbc6d5c183 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -1107,17 +1107,18 @@ static void rtmsg_ifa(int event, struct in_ifaddr* ifa) } } -static struct rtnetlink_link inet_rtnetlink_table[RTM_MAX - RTM_BASE + 1] = { - [4] = { .doit = inet_rtm_newaddr, }, - [5] = { .doit = inet_rtm_deladdr, }, - [6] = { .dumpit = inet_dump_ifaddr, }, - [8] = { .doit = inet_rtm_newroute, }, - [9] = { .doit = inet_rtm_delroute, }, - [10] = { .doit = inet_rtm_getroute, .dumpit = inet_dump_fib, }, +static struct rtnetlink_link inet_rtnetlink_table[RTM_NR_MSGTYPES] = { + [RTM_NEWADDR - RTM_BASE] = { .doit = inet_rtm_newaddr, }, + [RTM_DELADDR - RTM_BASE] = { .doit = inet_rtm_deladdr, }, + [RTM_GETADDR - RTM_BASE] = { .dumpit = inet_dump_ifaddr, }, + [RTM_NEWROUTE - RTM_BASE] = { .doit = inet_rtm_newroute, }, + [RTM_DELROUTE - RTM_BASE] = { .doit = inet_rtm_delroute, }, + [RTM_GETROUTE - RTM_BASE] = { .doit = inet_rtm_getroute, + .dumpit = inet_dump_fib, }, #ifdef CONFIG_IP_MULTIPLE_TABLES - [16] = { .doit = inet_rtm_newrule, }, - [17] = { .doit = inet_rtm_delrule, }, - [18] = { .dumpit = inet_dump_rules, }, + [RTM_NEWRULE - RTM_BASE] = { .doit = inet_rtm_newrule, }, + [RTM_DELRULE - RTM_BASE] = { .doit = inet_rtm_delrule, }, + [RTM_GETRULE - RTM_BASE] = { .dumpit = inet_dump_rules, }, #endif }; diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 7196ac2f2d1..7744a259269 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -3076,7 +3076,7 @@ static void inet6_prefix_notify(int event, struct inet6_dev *idev, netlink_broadcast(rtnl, skb, 0, RTMGRP_IPV6_PREFIX, GFP_ATOMIC); } -static struct rtnetlink_link inet6_rtnetlink_table[RTM_MAX - RTM_BASE + 1] = { +static struct rtnetlink_link inet6_rtnetlink_table[RTM_NR_MSGTYPES] = { [RTM_GETLINK - RTM_BASE] = { .dumpit = inet6_dump_ifinfo, }, [RTM_NEWADDR - RTM_BASE] = { .doit = inet6_rtm_newaddr, }, [RTM_DELADDR - RTM_BASE] = { .doit = inet6_rtm_deladdr, }, -- cgit v1.2.3 From 6a5d362120a61a719095443194cc2d5e9a7027dd Mon Sep 17 00:00:00 2001 From: Jesper Juhl Date: Tue, 3 May 2005 14:33:27 -0700 Subject: [WAN]: kfree of NULL pointer is valid kfree(0) is perfectly valid, checking pointers for NULL before calling kfree() on them is redundant. The patch below cleans away a few such redundant checks (and while I was around some of those bits I couldn't stop myself from making a few tiny whitespace changes as well). Signed-off-by: Jesper Juhl Acked-by: Arnaldo Carvalho de Melo Signed-off-by: David S. Miller --- drivers/net/wan/cycx_x25.c | 8 ++------ drivers/net/wan/pc300_tty.c | 27 +++++++++++---------------- drivers/net/wan/sdla_chdlc.c | 13 ++++--------- drivers/net/wan/x25_asy.c | 20 ++++++-------------- 4 files changed, 23 insertions(+), 45 deletions(-) diff --git a/drivers/net/wan/cycx_x25.c b/drivers/net/wan/cycx_x25.c index 5b48cd8568f..02d57c0b424 100644 --- a/drivers/net/wan/cycx_x25.c +++ b/drivers/net/wan/cycx_x25.c @@ -436,9 +436,7 @@ static int cycx_wan_new_if(struct wan_device *wandev, struct net_device *dev, } if (err) { - if (chan->local_addr) - kfree(chan->local_addr); - + kfree(chan->local_addr); kfree(chan); return err; } @@ -458,9 +456,7 @@ static int cycx_wan_del_if(struct wan_device *wandev, struct net_device *dev) struct cycx_x25_channel *chan = dev->priv; if (chan->svc) { - if (chan->local_addr) - kfree(chan->local_addr); - + kfree(chan->local_addr); if (chan->state == WAN_CONNECTED) del_timer(&chan->timer); } diff --git a/drivers/net/wan/pc300_tty.c b/drivers/net/wan/pc300_tty.c index 29f84ad0873..8454bf6caaa 100644 --- a/drivers/net/wan/pc300_tty.c +++ b/drivers/net/wan/pc300_tty.c @@ -400,10 +400,8 @@ static void cpc_tty_close(struct tty_struct *tty, struct file *flip) cpc_tty->buf_rx.last = NULL; } - if (cpc_tty->buf_tx) { - kfree(cpc_tty->buf_tx); - cpc_tty->buf_tx = NULL; - } + kfree(cpc_tty->buf_tx); + cpc_tty->buf_tx = NULL; CPC_TTY_DBG("%s: TTY closed\n",cpc_tty->name); @@ -666,7 +664,7 @@ static void cpc_tty_rx_work(void * data) unsigned long port; int i, j; st_cpc_tty_area *cpc_tty; - volatile st_cpc_rx_buf * buf; + volatile st_cpc_rx_buf *buf; char flags=0,flg_rx=1; struct tty_ldisc *ld; @@ -680,9 +678,9 @@ static void cpc_tty_rx_work(void * data) cpc_tty = &cpc_tty_area[port]; if ((buf=cpc_tty->buf_rx.first) != 0) { - if(cpc_tty->tty) { + if (cpc_tty->tty) { ld = tty_ldisc_ref(cpc_tty->tty); - if(ld) { + if (ld) { if (ld->receive_buf) { CPC_TTY_DBG("%s: call line disc. receive_buf\n",cpc_tty->name); ld->receive_buf(cpc_tty->tty, (char *)(buf->data), &flags, buf->size); @@ -691,7 +689,7 @@ static void cpc_tty_rx_work(void * data) } } cpc_tty->buf_rx.first = cpc_tty->buf_rx.first->next; - kfree((unsigned char *)buf); + kfree(buf); buf = cpc_tty->buf_rx.first; flg_rx = 1; } @@ -733,7 +731,7 @@ static void cpc_tty_rx_disc_frame(pc300ch_t *pc300chan) void cpc_tty_receive(pc300dev_t *pc300dev) { - st_cpc_tty_area *cpc_tty; + st_cpc_tty_area *cpc_tty; pc300ch_t *pc300chan = (pc300ch_t *)pc300dev->chan; pc300_t *card = (pc300_t *)pc300chan->card; int ch = pc300chan->channel; @@ -742,7 +740,7 @@ void cpc_tty_receive(pc300dev_t *pc300dev) int rx_len, rx_aux; volatile unsigned char status; unsigned short first_bd = pc300chan->rx_first_bd; - st_cpc_rx_buf *new=NULL; + st_cpc_rx_buf *new = NULL; unsigned char dsr_rx; if (pc300dev->cpc_tty == NULL) { @@ -762,7 +760,7 @@ void cpc_tty_receive(pc300dev_t *pc300dev) if (status & DST_EOM) { break; } - ptdescr=(pcsca_bd_t __iomem *)(card->hw.rambase+cpc_readl(&ptdescr->next)); + ptdescr = (pcsca_bd_t __iomem *)(card->hw.rambase+cpc_readl(&ptdescr->next)); } if (!rx_len) { @@ -771,10 +769,7 @@ void cpc_tty_receive(pc300dev_t *pc300dev) cpc_writel(card->hw.scabase + DRX_REG(EDAL, ch), RX_BD_ADDR(ch, pc300chan->rx_last_bd)); } - if (new) { - kfree(new); - new = NULL; - } + kfree(new); return; } @@ -787,7 +782,7 @@ void cpc_tty_receive(pc300dev_t *pc300dev) continue; } - new = (st_cpc_rx_buf *) kmalloc(rx_len + sizeof(st_cpc_rx_buf), GFP_ATOMIC); + new = (st_cpc_rx_buf *)kmalloc(rx_len + sizeof(st_cpc_rx_buf), GFP_ATOMIC); if (new == 0) { cpc_tty_rx_disc_frame(pc300chan); continue; diff --git a/drivers/net/wan/sdla_chdlc.c b/drivers/net/wan/sdla_chdlc.c index afbe0024e3e..496d29237e9 100644 --- a/drivers/net/wan/sdla_chdlc.c +++ b/drivers/net/wan/sdla_chdlc.c @@ -3664,15 +3664,10 @@ static void wanpipe_tty_close(struct tty_struct *tty, struct file * filp) chdlc_disable_comm_shutdown(card); unlock_adapter_irq(&card->wandev.lock,&smp_flags); - if (card->tty_buf){ - kfree(card->tty_buf); - card->tty_buf=NULL; - } - - if (card->tty_rx){ - kfree(card->tty_rx); - card->tty_rx=NULL; - } + kfree(card->tty_buf); + card->tty_buf = NULL; + kfree(card->tty_rx); + card->tty_rx = NULL; } return; } diff --git a/drivers/net/wan/x25_asy.c b/drivers/net/wan/x25_asy.c index 8c5cfcb5582..1c540d82555 100644 --- a/drivers/net/wan/x25_asy.c +++ b/drivers/net/wan/x25_asy.c @@ -107,13 +107,9 @@ static struct x25_asy *x25_asy_alloc(void) static void x25_asy_free(struct x25_asy *sl) { /* Free all X.25 frame buffers. */ - if (sl->rbuff) { - kfree(sl->rbuff); - } + kfree(sl->rbuff); sl->rbuff = NULL; - if (sl->xbuff) { - kfree(sl->xbuff); - } + kfree(sl->xbuff); sl->xbuff = NULL; if (!test_and_clear_bit(SLF_INUSE, &sl->flags)) { @@ -134,10 +130,8 @@ static int x25_asy_change_mtu(struct net_device *dev, int newmtu) { printk("%s: unable to grow X.25 buffers, MTU change cancelled.\n", dev->name); - if (xbuff != NULL) - kfree(xbuff); - if (rbuff != NULL) - kfree(rbuff); + kfree(xbuff); + kfree(rbuff); return -ENOMEM; } @@ -169,10 +163,8 @@ static int x25_asy_change_mtu(struct net_device *dev, int newmtu) spin_unlock_bh(&sl->lock); - if (xbuff != NULL) - kfree(xbuff); - if (rbuff != NULL) - kfree(rbuff); + kfree(xbuff); + kfree(rbuff); return 0; } -- cgit v1.2.3 From 20cc6befa23bb993cf4a4c58becb1dd99e7fc927 Mon Sep 17 00:00:00 2001 From: Lucas Correia Villa Real Date: Tue, 3 May 2005 14:34:20 -0700 Subject: [PKT_SCHED]: fix typo on Kconfig This is a trivial fix for a typo on Kconfig, where the Generic Random Early Detection algorithm is abbreviated as RED instead of GRED. Signed-off-by: Lucas Correia Villa Real Signed-off-by: David S. Miller --- net/sched/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/sched/Kconfig b/net/sched/Kconfig index 9c118baed9d..b0941186f86 100644 --- a/net/sched/Kconfig +++ b/net/sched/Kconfig @@ -185,7 +185,7 @@ config NET_SCH_GRED depends on NET_SCHED help Say Y here if you want to use the Generic Random Early Detection - (RED) packet scheduling algorithm for some of your network devices + (GRED) packet scheduling algorithm for some of your network devices (see the top of for details and references about the algorithm). -- cgit v1.2.3 From 0b2531bdc54e19717de5cb161d57e5ee0a7725ff Mon Sep 17 00:00:00 2001 From: Folkert van Heusden Date: Tue, 3 May 2005 14:36:08 -0700 Subject: [TCP]: Optimize check in port-allocation code. Signed-off-by: Folkert van Heusden Signed-off-by: David S. Miller --- net/ipv4/tcp_ipv4.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 3ac6659869c..dad98e4a504 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -222,10 +222,13 @@ static int tcp_v4_get_port(struct sock *sk, unsigned short snum) int rover; spin_lock(&tcp_portalloc_lock); - rover = tcp_port_rover; + if (tcp_port_rover < low) + rover = low; + else + rover = tcp_port_rover; do { rover++; - if (rover < low || rover > high) + if (rover > high) rover = low; head = &tcp_bhash[tcp_bhashfn(rover)]; spin_lock(&head->lock); -- cgit v1.2.3 From c3924c70dd3bddc28b99ccd1688bd281bad1a9be Mon Sep 17 00:00:00 2001 From: Folkert van Heusden Date: Tue, 3 May 2005 14:36:45 -0700 Subject: [TCP]: Optimize check in port-allocation code, v6 version. Signed-off-by: Folkert van Heusden Signed-off-by: David S. Miller --- net/ipv6/tcp_ipv6.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 4760c85e19d..0f69e800a0a 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -139,9 +139,12 @@ static int tcp_v6_get_port(struct sock *sk, unsigned short snum) int rover; spin_lock(&tcp_portalloc_lock); - rover = tcp_port_rover; + if (tcp_port_rover < low) + rover = low; + else + rover = tcp_port_rover; do { rover++; - if ((rover < low) || (rover > high)) + if (rover > high) rover = low; head = &tcp_bhash[tcp_bhashfn(rover)]; spin_lock(&head->lock); -- cgit v1.2.3 From 96edf83c4e284c08584f97623f7c7f029759459e Mon Sep 17 00:00:00 2001 From: Jesper Juhl Date: Tue, 3 May 2005 14:38:09 -0700 Subject: [PPP]: remove redundant NULL pointer checks before kfree & vfree kfree() and vfree() can both deal with NULL pointers. This patch removes redundant NULL pointer checks from the ppp code in drivers/net/ Signed-off-by: Jesper Juhl Signed-off-by: David S. Miller --- drivers/net/ppp_deflate.c | 6 ++---- drivers/net/ppp_generic.c | 12 ++++-------- 2 files changed, 6 insertions(+), 12 deletions(-) diff --git a/drivers/net/ppp_deflate.c b/drivers/net/ppp_deflate.c index 507d6328d4e..3872088fdd1 100644 --- a/drivers/net/ppp_deflate.c +++ b/drivers/net/ppp_deflate.c @@ -87,8 +87,7 @@ static void z_comp_free(void *arg) if (state) { zlib_deflateEnd(&state->strm); - if (state->strm.workspace) - vfree(state->strm.workspace); + vfree(state->strm.workspace); kfree(state); } } @@ -308,8 +307,7 @@ static void z_decomp_free(void *arg) if (state) { zlib_inflateEnd(&state->strm); - if (state->strm.workspace) - kfree(state->strm.workspace); + kfree(state->strm.workspace); kfree(state); } } diff --git a/drivers/net/ppp_generic.c b/drivers/net/ppp_generic.c index c456dc81b87..3b377f6cd4a 100644 --- a/drivers/net/ppp_generic.c +++ b/drivers/net/ppp_generic.c @@ -2467,14 +2467,10 @@ static void ppp_destroy_interface(struct ppp *ppp) skb_queue_purge(&ppp->mrq); #endif /* CONFIG_PPP_MULTILINK */ #ifdef CONFIG_PPP_FILTER - if (ppp->pass_filter) { - kfree(ppp->pass_filter); - ppp->pass_filter = NULL; - } - if (ppp->active_filter) { - kfree(ppp->active_filter); - ppp->active_filter = NULL; - } + kfree(ppp->pass_filter); + ppp->pass_filter = NULL; + kfree(ppp->active_filter); + ppp->active_filter = NULL; #endif /* CONFIG_PPP_FILTER */ kfree(ppp); -- cgit v1.2.3 From 033d899904792d3501b7dd469495ca9138424ec3 Mon Sep 17 00:00:00 2001 From: Asim Shankar Date: Tue, 3 May 2005 14:39:33 -0700 Subject: [PKT_SCHED]: HTB: Drop packet when direct queue is full htb_enqueue(): Free skb and return NET_XMIT_DROP if a packet is destined for the direct_queue but the direct_queue is full. (Before this: erroneously returned NET_XMIT_SUCCESS even though the packet was not enqueued) Signed-off-by: Asim Shankar Signed-off-by: David S. Miller --- net/sched/sch_htb.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c index a85935e7d53..558cc087e60 100644 --- a/net/sched/sch_htb.c +++ b/net/sched/sch_htb.c @@ -717,6 +717,10 @@ static int htb_enqueue(struct sk_buff *skb, struct Qdisc *sch) if (q->direct_queue.qlen < q->direct_qlen) { __skb_queue_tail(&q->direct_queue, skb); q->direct_pkts++; + } else { + kfree_skb(skb); + sch->qstats.drops++; + return NET_XMIT_DROP; } #ifdef CONFIG_NET_CLS_ACT } else if (!cl) { -- cgit v1.2.3 From 9dfa277f88388a94993b121db46b80df66f48d9e Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Tue, 3 May 2005 14:41:18 -0700 Subject: [PKT_SCHED]: Fix range in PSCHED_TDIFF_SAFE to 0..bound Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/net/pkt_sched.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/include/net/pkt_sched.h b/include/net/pkt_sched.h index 7352e455053..fcb05a387db 100644 --- a/include/net/pkt_sched.h +++ b/include/net/pkt_sched.h @@ -157,7 +157,8 @@ psched_tod_diff(int delta_sec, int bound) case 1: \ __delta += 1000000; \ case 0: \ - __delta = abs(__delta); \ + if (__delta > bound || __delta < 0) \ + __delta = bound; \ } \ __delta; \ }) -- cgit v1.2.3 From 96c36023434b7b6824b1da72a6b7b1ca61d7310c Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Tue, 3 May 2005 14:43:27 -0700 Subject: [NETLINK]: cb_lock does not needs ref count on sk Here is a little optimisation for the cb_lock used by netlink_dump. While fixing that race earlier, I noticed that the reference count held by cb_lock is completely useless. The reason is that in order to obtain the protection of the reference count, you have to take the cb_lock. But the only way to take the cb_lock is through dereferencing the socket. That is, you must already possess a reference count on the socket before you can take advantage of the reference count held by cb_lock. As a corollary, we can remve the reference count held by the cb_lock. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- net/netlink/af_netlink.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index 29a5fd231ea..4ee39206614 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -373,7 +373,6 @@ static int netlink_release(struct socket *sock) nlk->cb->done(nlk->cb); netlink_destroy_callback(nlk->cb); nlk->cb = NULL; - __sock_put(sk); } spin_unlock(&nlk->cb_lock); @@ -1099,7 +1098,6 @@ static int netlink_dump(struct sock *sk) spin_unlock(&nlk->cb_lock); netlink_destroy_callback(cb); - __sock_put(sk); return 0; } @@ -1138,7 +1136,6 @@ int netlink_dump_start(struct sock *ssk, struct sk_buff *skb, return -EBUSY; } nlk->cb = cb; - sock_hold(sk); spin_unlock(&nlk->cb_lock); netlink_dump(sk); -- cgit v1.2.3 From 2a0a6ebee1d68552152ae8d4aeda91d806995dec Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Tue, 3 May 2005 14:55:09 -0700 Subject: [NETLINK]: Synchronous message processing. Let's recap the problem. The current asynchronous netlink kernel message processing is vulnerable to these attacks: 1) Hit and run: Attacker sends one or more messages and then exits before they're processed. This may confuse/disable the next netlink user that gets the netlink address of the attacker since it may receive the responses to the attacker's messages. Proposed solutions: a) Synchronous processing. b) Stream mode socket. c) Restrict/prohibit binding. 2) Starvation: Because various netlink rcv functions were written to not return until all messages have been processed on a socket, it is possible for these functions to execute for an arbitrarily long period of time. If this is successfully exploited it could also be used to hold rtnl forever. Proposed solutions: a) Synchronous processing. b) Stream mode socket. Firstly let's cross off solution c). It only solves the first problem and it has user-visible impacts. In particular, it'll break user space applications that expect to bind or communicate with specific netlink addresses (pid's). So we're left with a choice of synchronous processing versus SOCK_STREAM for netlink. For the moment I'm sticking with the synchronous approach as suggested by Alexey since it's simpler and I'd rather spend my time working on other things. However, it does have a number of deficiencies compared to the stream mode solution: 1) User-space to user-space netlink communication is still vulnerable. 2) Inefficient use of resources. This is especially true for rtnetlink since the lock is shared with other users such as networking drivers. The latter could hold the rtnl while communicating with hardware which causes the rtnetlink user to wait when it could be doing other things. 3) It is still possible to DoS all netlink users by flooding the kernel netlink receive queue. The attacker simply fills the receive socket with a single netlink message that fills up the entire queue. The attacker then continues to call sendmsg with the same message in a loop. Point 3) can be countered by retransmissions in user-space code, however it is pretty messy. In light of these problems (in particular, point 3), we should implement stream mode netlink at some point. In the mean time, here is a patch that implements synchronous processing. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- kernel/audit.c | 19 ++++++++----------- net/core/rtnetlink.c | 23 ++++++++++++++--------- net/decnet/netfilter/dn_rtmsg.c | 3 ++- net/ipv4/netfilter/ip_queue.c | 20 +++++++++----------- net/ipv4/tcp_diag.c | 3 ++- net/ipv6/netfilter/ip6_queue.c | 20 +++++++++----------- net/xfrm/xfrm_user.c | 15 +++++++++++---- 7 files changed, 55 insertions(+), 48 deletions(-) diff --git a/kernel/audit.c b/kernel/audit.c index 0f84dd7af2c..ac26d4d960d 100644 --- a/kernel/audit.c +++ b/kernel/audit.c @@ -427,7 +427,7 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh) /* Get message from skb (based on rtnetlink_rcv_skb). Each message is * processed by audit_receive_msg. Malformed skbs with wrong length are * discarded silently. */ -static int audit_receive_skb(struct sk_buff *skb) +static void audit_receive_skb(struct sk_buff *skb) { int err; struct nlmsghdr *nlh; @@ -436,7 +436,7 @@ static int audit_receive_skb(struct sk_buff *skb) while (skb->len >= NLMSG_SPACE(0)) { nlh = (struct nlmsghdr *)skb->data; if (nlh->nlmsg_len < sizeof(*nlh) || skb->len < nlh->nlmsg_len) - return 0; + return; rlen = NLMSG_ALIGN(nlh->nlmsg_len); if (rlen > skb->len) rlen = skb->len; @@ -446,23 +446,20 @@ static int audit_receive_skb(struct sk_buff *skb) netlink_ack(skb, nlh, 0); skb_pull(skb, rlen); } - return 0; } /* Receive messages from netlink socket. */ static void audit_receive(struct sock *sk, int length) { struct sk_buff *skb; + unsigned int qlen; - if (down_trylock(&audit_netlink_sem)) - return; + down(&audit_netlink_sem); - /* FIXME: this must not cause starvation */ - while ((skb = skb_dequeue(&sk->sk_receive_queue))) { - if (audit_receive_skb(skb) && skb->len) - skb_queue_head(&sk->sk_receive_queue, skb); - else - kfree_skb(skb); + for (qlen = skb_queue_len(&sk->sk_receive_queue); qlen; qlen--) { + skb = skb_dequeue(&sk->sk_receive_queue); + audit_receive_skb(skb); + kfree_skb(skb); } up(&audit_netlink_sem); } diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 5fb70cfa108..6e1ab1e34b2 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -609,26 +609,31 @@ static inline int rtnetlink_rcv_skb(struct sk_buff *skb) /* * rtnetlink input queue processing routine: - * - try to acquire shared lock. If it is failed, defer processing. + * - process as much as there was in the queue upon entry. * - feed skbs to rtnetlink_rcv_skb, until it refuse a message, - * that will occur, when a dump started and/or acquisition of - * exclusive lock failed. + * that will occur, when a dump started. */ static void rtnetlink_rcv(struct sock *sk, int len) { + unsigned int qlen = skb_queue_len(&sk->sk_receive_queue); + do { struct sk_buff *skb; - if (rtnl_shlock_nowait()) - return; + rtnl_lock(); + + if (qlen > skb_queue_len(&sk->sk_receive_queue)) + qlen = skb_queue_len(&sk->sk_receive_queue); - while ((skb = skb_dequeue(&sk->sk_receive_queue)) != NULL) { + while (qlen--) { + skb = skb_dequeue(&sk->sk_receive_queue); if (rtnetlink_rcv_skb(skb)) { - if (skb->len) + if (skb->len) { skb_queue_head(&sk->sk_receive_queue, skb); - else + qlen++; + } else kfree_skb(skb); break; } @@ -638,7 +643,7 @@ static void rtnetlink_rcv(struct sock *sk, int len) up(&rtnl_sem); netdev_run_todo(); - } while (rtnl && rtnl->sk_receive_queue.qlen); + } while (qlen); } static struct rtnetlink_link link_rtnetlink_table[RTM_NR_MSGTYPES] = diff --git a/net/decnet/netfilter/dn_rtmsg.c b/net/decnet/netfilter/dn_rtmsg.c index f86a6259fd1..101ddef9ba9 100644 --- a/net/decnet/netfilter/dn_rtmsg.c +++ b/net/decnet/netfilter/dn_rtmsg.c @@ -119,8 +119,9 @@ static inline void dnrmg_receive_user_skb(struct sk_buff *skb) static void dnrmg_receive_user_sk(struct sock *sk, int len) { struct sk_buff *skb; + unsigned int qlen = skb_queue_len(&sk->sk_receive_queue); - while((skb = skb_dequeue(&sk->sk_receive_queue)) != NULL) { + while (qlen-- && (skb = skb_dequeue(&sk->sk_receive_queue))) { dnrmg_receive_user_skb(skb); kfree_skb(skb); } diff --git a/net/ipv4/netfilter/ip_queue.c b/net/ipv4/netfilter/ip_queue.c index 9e40dffc204..e5746b67441 100644 --- a/net/ipv4/netfilter/ip_queue.c +++ b/net/ipv4/netfilter/ip_queue.c @@ -546,20 +546,18 @@ ipq_rcv_skb(struct sk_buff *skb) static void ipq_rcv_sk(struct sock *sk, int len) { - do { - struct sk_buff *skb; + struct sk_buff *skb; + unsigned int qlen; - if (down_trylock(&ipqnl_sem)) - return; + down(&ipqnl_sem); - while ((skb = skb_dequeue(&sk->sk_receive_queue)) != NULL) { - ipq_rcv_skb(skb); - kfree_skb(skb); - } + for (qlen = skb_queue_len(&sk->sk_receive_queue); qlen; qlen--) { + skb = skb_dequeue(&sk->sk_receive_queue); + ipq_rcv_skb(skb); + kfree_skb(skb); + } - up(&ipqnl_sem); - - } while (ipqnl && ipqnl->sk_receive_queue.qlen); + up(&ipqnl_sem); } static int diff --git a/net/ipv4/tcp_diag.c b/net/ipv4/tcp_diag.c index 313c1408da3..8faa8948f75 100644 --- a/net/ipv4/tcp_diag.c +++ b/net/ipv4/tcp_diag.c @@ -777,8 +777,9 @@ static inline void tcpdiag_rcv_skb(struct sk_buff *skb) static void tcpdiag_rcv(struct sock *sk, int len) { struct sk_buff *skb; + unsigned int qlen = skb_queue_len(&sk->sk_receive_queue); - while ((skb = skb_dequeue(&sk->sk_receive_queue)) != NULL) { + while (qlen-- && (skb = skb_dequeue(&sk->sk_receive_queue))) { tcpdiag_rcv_skb(skb); kfree_skb(skb); } diff --git a/net/ipv6/netfilter/ip6_queue.c b/net/ipv6/netfilter/ip6_queue.c index c54830b8959..750943e2d34 100644 --- a/net/ipv6/netfilter/ip6_queue.c +++ b/net/ipv6/netfilter/ip6_queue.c @@ -549,20 +549,18 @@ ipq_rcv_skb(struct sk_buff *skb) static void ipq_rcv_sk(struct sock *sk, int len) { - do { - struct sk_buff *skb; + struct sk_buff *skb; + unsigned int qlen; - if (down_trylock(&ipqnl_sem)) - return; + down(&ipqnl_sem); - while ((skb = skb_dequeue(&sk->sk_receive_queue)) != NULL) { - ipq_rcv_skb(skb); - kfree_skb(skb); - } + for (qlen = skb_queue_len(&sk->sk_receive_queue); qlen; qlen--) { + skb = skb_dequeue(&sk->sk_receive_queue); + ipq_rcv_skb(skb); + kfree_skb(skb); + } - up(&ipqnl_sem); - - } while (ipqnl && ipqnl->sk_receive_queue.qlen); + up(&ipqnl_sem); } static int diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index 52b5843937c..dab112f1dd8 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -1008,17 +1008,24 @@ static int xfrm_user_rcv_skb(struct sk_buff *skb) static void xfrm_netlink_rcv(struct sock *sk, int len) { + unsigned int qlen = skb_queue_len(&sk->sk_receive_queue); + do { struct sk_buff *skb; down(&xfrm_cfg_sem); - while ((skb = skb_dequeue(&sk->sk_receive_queue)) != NULL) { + if (qlen > skb_queue_len(&sk->sk_receive_queue)) + qlen = skb_queue_len(&sk->sk_receive_queue); + + while (qlen--) { + skb = skb_dequeue(&sk->sk_receive_queue); if (xfrm_user_rcv_skb(skb)) { - if (skb->len) + if (skb->len) { skb_queue_head(&sk->sk_receive_queue, skb); - else + qlen++; + } else kfree_skb(skb); break; } @@ -1027,7 +1034,7 @@ static void xfrm_netlink_rcv(struct sock *sk, int len) up(&xfrm_cfg_sem); - } while (xfrm_nl && xfrm_nl->sk_receive_queue.qlen); + } while (qlen); } static int build_expire(struct sk_buff *skb, struct xfrm_state *x, int hard) -- cgit v1.2.3 From 09e14305982efc2f3b509d3c50ef5dcbff64a998 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Tue, 3 May 2005 15:30:05 -0700 Subject: [NETLINK]: Fix infinite loops in synchronous netlink changes. The qlen should continue to decrement, even if we pop partially processed SKBs back onto the receive queue. Signed-off-by: David S. Miller --- net/core/rtnetlink.c | 7 +++---- net/decnet/netfilter/dn_rtmsg.c | 2 +- net/xfrm/xfrm_user.c | 7 +++---- 3 files changed, 7 insertions(+), 9 deletions(-) diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 6e1ab1e34b2..75b6d33b529 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -626,14 +626,13 @@ static void rtnetlink_rcv(struct sock *sk, int len) if (qlen > skb_queue_len(&sk->sk_receive_queue)) qlen = skb_queue_len(&sk->sk_receive_queue); - while (qlen--) { + for (; qlen; qlen--) { skb = skb_dequeue(&sk->sk_receive_queue); if (rtnetlink_rcv_skb(skb)) { - if (skb->len) { + if (skb->len) skb_queue_head(&sk->sk_receive_queue, skb); - qlen++; - } else + else kfree_skb(skb); break; } diff --git a/net/decnet/netfilter/dn_rtmsg.c b/net/decnet/netfilter/dn_rtmsg.c index 101ddef9ba9..284a9998e53 100644 --- a/net/decnet/netfilter/dn_rtmsg.c +++ b/net/decnet/netfilter/dn_rtmsg.c @@ -121,7 +121,7 @@ static void dnrmg_receive_user_sk(struct sock *sk, int len) struct sk_buff *skb; unsigned int qlen = skb_queue_len(&sk->sk_receive_queue); - while (qlen-- && (skb = skb_dequeue(&sk->sk_receive_queue))) { + for (; qlen && (skb = skb_dequeue(&sk->sk_receive_queue)); qlen--) { dnrmg_receive_user_skb(skb); kfree_skb(skb); } diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index dab112f1dd8..e8740a4a1d7 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -1018,14 +1018,13 @@ static void xfrm_netlink_rcv(struct sock *sk, int len) if (qlen > skb_queue_len(&sk->sk_receive_queue)) qlen = skb_queue_len(&sk->sk_receive_queue); - while (qlen--) { + for (; qlen; qlen--) { skb = skb_dequeue(&sk->sk_receive_queue); if (xfrm_user_rcv_skb(skb)) { - if (skb->len) { + if (skb->len) skb_queue_head(&sk->sk_receive_queue, skb); - qlen++; - } else + else kfree_skb(skb); break; } -- cgit v1.2.3 From 0f4821e7b93fe72e89b8ff393bd8e705bd178aa5 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Tue, 3 May 2005 16:15:59 -0700 Subject: [XFRM/RTNETLINK]: Decrement qlen properly in {xfrm_,rt}netlink_rcv(). If we free up a partially processed packet because it's skb->len dropped to zero, we need to decrement qlen because we are dropping out of the top-level loop so it will do the decrement for us. Spotted by Herbert Xu. Signed-off-by: David S. Miller --- net/core/rtnetlink.c | 4 +++- net/xfrm/xfrm_user.c | 4 +++- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 75b6d33b529..00caf4b318b 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -632,8 +632,10 @@ static void rtnetlink_rcv(struct sock *sk, int len) if (skb->len) skb_queue_head(&sk->sk_receive_queue, skb); - else + else { kfree_skb(skb); + qlen--; + } break; } kfree_skb(skb); diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index e8740a4a1d7..5ddda2c98af 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -1024,8 +1024,10 @@ static void xfrm_netlink_rcv(struct sock *sk, int len) if (skb->len) skb_queue_head(&sk->sk_receive_queue, skb); - else + else { kfree_skb(skb); + qlen--; + } break; } kfree_skb(skb); -- cgit v1.2.3 From cacaddf57ed4d5ca994e9a7e2bd5558061f5d89d Mon Sep 17 00:00:00 2001 From: "Tommy S. Christensen" Date: Tue, 3 May 2005 16:18:52 -0700 Subject: [NET]: Disable queueing when carrier is lost. Some network drivers call netif_stop_queue() when detecting loss of carrier. This leads to packets being queued up at the qdisc level for an unbound period of time. In order to prevent this effect, the core networking stack will now cease to queue packets for any device, that is operationally down (i.e. the queue is flushed and disabled). Signed-off-by: Tommy S. Christensen Acked-by: Herbert Xu Signed-off-by: David S. Miller --- net/core/link_watch.c | 7 +++++++ net/sched/sch_generic.c | 4 ++++ 2 files changed, 11 insertions(+) diff --git a/net/core/link_watch.c b/net/core/link_watch.c index 4859b7446c6..d43d1201275 100644 --- a/net/core/link_watch.c +++ b/net/core/link_watch.c @@ -16,6 +16,7 @@ #include #include #include +#include #include #include #include @@ -74,6 +75,12 @@ void linkwatch_run_queue(void) clear_bit(__LINK_STATE_LINKWATCH_PENDING, &dev->state); if (dev->flags & IFF_UP) { + if (netif_carrier_ok(dev)) { + WARN_ON(dev->qdisc_sleeping == &noop_qdisc); + dev_activate(dev); + } else + dev_deactivate(dev); + netdev_state_change(dev); } diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index 8c01e023f02..9a2f8e41a26 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -539,6 +539,10 @@ void dev_activate(struct net_device *dev) write_unlock_bh(&qdisc_tree_lock); } + if (!netif_carrier_ok(dev)) + /* Delay activation until next carrier-on event */ + return; + spin_lock_bh(&dev->queue_lock); rcu_assign_pointer(dev->qdisc, dev->qdisc_sleeping); if (dev->qdisc != &noqueue_qdisc) { -- cgit v1.2.3 From e4f8ab00cf3599ecb8110c0a838cd15d013b79e5 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Tue, 3 May 2005 16:20:39 -0700 Subject: [NETFILTER]: Fix nf_debug_ip_local_deliver() Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- net/core/netfilter.c | 15 ++------------- 1 file changed, 2 insertions(+), 13 deletions(-) diff --git a/net/core/netfilter.c b/net/core/netfilter.c index e51cfa46950..92c51824797 100644 --- a/net/core/netfilter.c +++ b/net/core/netfilter.c @@ -217,21 +217,10 @@ void nf_debug_ip_local_deliver(struct sk_buff *skb) * NF_IP_RAW_INPUT and NF_IP_PRE_ROUTING. */ if (!skb->dev) { printk("ip_local_deliver: skb->dev is NULL.\n"); - } - else if (strcmp(skb->dev->name, "lo") == 0) { - if (skb->nf_debug != ((1 << NF_IP_LOCAL_OUT) - | (1 << NF_IP_POST_ROUTING) - | (1 << NF_IP_PRE_ROUTING) - | (1 << NF_IP_LOCAL_IN))) { - printk("ip_local_deliver: bad loopback skb: "); - debug_print_hooks_ip(skb->nf_debug); - nf_dump_skb(PF_INET, skb); - } - } - else { + } else { if (skb->nf_debug != ((1<nf_debug); nf_dump_skb(PF_INET, skb); } -- cgit v1.2.3 From bd96535b81ad09d7593cc75093534acb984d3dc9 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Tue, 3 May 2005 16:21:37 -0700 Subject: [NETFILTER]: Drop conntrack reference in ip_dev_loopback_xmit() Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- net/core/netfilter.c | 2 -- net/ipv4/ip_output.c | 1 + 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/net/core/netfilter.c b/net/core/netfilter.c index 92c51824797..22a8f127c4a 100644 --- a/net/core/netfilter.c +++ b/net/core/netfilter.c @@ -236,8 +236,6 @@ void nf_debug_ip_loopback_xmit(struct sk_buff *newskb) debug_print_hooks_ip(newskb->nf_debug); nf_dump_skb(PF_INET, newskb); } - /* Clear to avoid confusing input check */ - newskb->nf_debug = 0; } void nf_debug_ip_finish_output2(struct sk_buff *skb) diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 38f69532a02..24fe3e00b42 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -111,6 +111,7 @@ static int ip_dev_loopback_xmit(struct sk_buff *newskb) #ifdef CONFIG_NETFILTER_DEBUG nf_debug_ip_loopback_xmit(newskb); #endif + nf_reset(newskb); netif_rx(newskb); return 0; } -- cgit v1.2.3 From 8cbe1d46d69f9e2c49f284fe0e9aee3387bd2c71 Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Tue, 3 May 2005 16:24:03 -0700 Subject: [PKT_SCHED]: netetm: trap infinite loop hange on qlen underflow Due to bugs in netem (fixed by later patches), it is possible to get qdisc qlen to go negative. If this happens the CPU ends up spinning forever in qdisc_run(). So add a BUG_ON() to trap it. Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- net/sched/sch_generic.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index 9a2f8e41a26..87e48a4e105 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -179,6 +179,7 @@ requeue: netif_schedule(dev); return 1; } + BUG_ON((int) q->q.qlen < 0); return q->q.qlen; } -- cgit v1.2.3 From 771018e76aaa6474be20a53c20458bcae8b00485 Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Tue, 3 May 2005 16:24:32 -0700 Subject: [PKT_SCHED]: netetm: make qdisc friendly to outer disciplines Netem currently dumps packets into the queue when timer expires. This patch makes work by self-clocking (more like TBF). It fixes a bug when 0 delay is requested (only doing loss or duplication). Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- net/sched/sch_netem.c | 113 ++++++++++++++++++++++++++++++-------------------- 1 file changed, 67 insertions(+), 46 deletions(-) diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c index 31c29deb139..864b8d353ff 100644 --- a/net/sched/sch_netem.c +++ b/net/sched/sch_netem.c @@ -138,38 +138,78 @@ static long tabledist(unsigned long mu, long sigma, } /* Put skb in the private delayed queue. */ -static int delay_skb(struct Qdisc *sch, struct sk_buff *skb) +static int netem_delay(struct Qdisc *sch, struct sk_buff *skb) { struct netem_sched_data *q = qdisc_priv(sch); - struct netem_skb_cb *cb = (struct netem_skb_cb *)skb->cb; psched_tdiff_t td; psched_time_t now; PSCHED_GET_TIME(now); td = tabledist(q->latency, q->jitter, &q->delay_cor, q->delay_dist); - PSCHED_TADD2(now, td, cb->time_to_send); /* Always queue at tail to keep packets in order */ if (likely(q->delayed.qlen < q->limit)) { + struct netem_skb_cb *cb = (struct netem_skb_cb *)skb->cb; + + PSCHED_TADD2(now, td, cb->time_to_send); + + pr_debug("netem_delay: skb=%p now=%llu tosend=%llu\n", skb, + now, cb->time_to_send); + __skb_queue_tail(&q->delayed, skb); - if (!timer_pending(&q->timer)) { - q->timer.expires = jiffies + PSCHED_US2JIFFIE(td); - add_timer(&q->timer); - } return NET_XMIT_SUCCESS; } + pr_debug("netem_delay: queue over limit %d\n", q->limit); + sch->qstats.overlimits++; kfree_skb(skb); return NET_XMIT_DROP; } +/* + * Move a packet that is ready to send from the delay holding + * list to the underlying qdisc. + */ +static int netem_run(struct Qdisc *sch) +{ + struct netem_sched_data *q = qdisc_priv(sch); + struct sk_buff *skb; + psched_time_t now; + + PSCHED_GET_TIME(now); + + skb = skb_peek(&q->delayed); + if (skb) { + const struct netem_skb_cb *cb + = (const struct netem_skb_cb *)skb->cb; + long delay + = PSCHED_US2JIFFIE(PSCHED_TDIFF(cb->time_to_send, now)); + pr_debug("netem_run: skb=%p delay=%ld\n", skb, delay); + + /* if more time remaining? */ + if (delay > 0) { + mod_timer(&q->timer, jiffies + delay); + return 1; + } + + __skb_unlink(skb, &q->delayed); + + if (q->qdisc->enqueue(skb, q->qdisc)) { + sch->q.qlen--; + sch->qstats.drops++; + } + } + + return 0; +} + static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch) { struct netem_sched_data *q = qdisc_priv(sch); struct sk_buff *skb2; int ret; - pr_debug("netem_enqueue skb=%p @%lu\n", skb, jiffies); + pr_debug("netem_enqueue skb=%p\n", skb); /* Random packet drop 0 => none, ~0 => all */ if (q->loss && q->loss >= get_crandom(&q->loss_cor)) { @@ -184,7 +224,7 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch) && (skb2 = skb_clone(skb, GFP_ATOMIC)) != NULL) { pr_debug("netem_enqueue: dup %p\n", skb2); - if (delay_skb(sch, skb2)) { + if (netem_delay(sch, skb2)) { sch->q.qlen++; sch->bstats.bytes += skb2->len; sch->bstats.packets++; @@ -202,7 +242,8 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch) ret = q->qdisc->enqueue(skb, q->qdisc); } else { q->counter = 0; - ret = delay_skb(sch, skb); + ret = netem_delay(sch, skb); + netem_run(sch); } if (likely(ret == NET_XMIT_SUCCESS)) { @@ -241,56 +282,35 @@ static unsigned int netem_drop(struct Qdisc* sch) return len; } -/* Dequeue packet. - * Move all packets that are ready to send from the delay holding - * list to the underlying qdisc, then just call dequeue - */ static struct sk_buff *netem_dequeue(struct Qdisc *sch) { struct netem_sched_data *q = qdisc_priv(sch); struct sk_buff *skb; + int pending; + + pending = netem_run(sch); skb = q->qdisc->dequeue(q->qdisc); - if (skb) + if (skb) { + pr_debug("netem_dequeue: return skb=%p\n", skb); sch->q.qlen--; + sch->flags &= ~TCQ_F_THROTTLED; + } + else if (pending) { + pr_debug("netem_dequeue: throttling\n"); + sch->flags |= TCQ_F_THROTTLED; + } + return skb; } static void netem_watchdog(unsigned long arg) { struct Qdisc *sch = (struct Qdisc *)arg; - struct netem_sched_data *q = qdisc_priv(sch); - struct net_device *dev = sch->dev; - struct sk_buff *skb; - psched_time_t now; - pr_debug("netem_watchdog: fired @%lu\n", jiffies); - - spin_lock_bh(&dev->queue_lock); - PSCHED_GET_TIME(now); - - while ((skb = skb_peek(&q->delayed)) != NULL) { - const struct netem_skb_cb *cb - = (const struct netem_skb_cb *)skb->cb; - long delay - = PSCHED_US2JIFFIE(PSCHED_TDIFF(cb->time_to_send, now)); - pr_debug("netem_watchdog: skb %p@%lu %ld\n", - skb, jiffies, delay); - - /* if more time remaining? */ - if (delay > 0) { - mod_timer(&q->timer, jiffies + delay); - break; - } - __skb_unlink(skb, &q->delayed); - - if (q->qdisc->enqueue(skb, q->qdisc)) { - sch->q.qlen--; - sch->qstats.drops++; - } - } - qdisc_run(dev); - spin_unlock_bh(&dev->queue_lock); + pr_debug("netem_watchdog qlen=%d\n", sch->q.qlen); + sch->flags &= ~TCQ_F_THROTTLED; + netif_schedule(sch->dev); } static void netem_reset(struct Qdisc *sch) @@ -301,6 +321,7 @@ static void netem_reset(struct Qdisc *sch) skb_queue_purge(&q->delayed); sch->q.qlen = 0; + sch->flags &= ~TCQ_F_THROTTLED; del_timer_sync(&q->timer); } -- cgit v1.2.3 From d5d75cd6b10ddad2f375b61092754474ad78aec7 Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Tue, 3 May 2005 16:24:57 -0700 Subject: [PKT_SCHED]: netetm: adjust parent qlen when duplicating Fix qlen underrun when doing duplication with netem. If netem is used as leaf discipline, then the parent needs to be tweaked when packets are duplicated. Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- net/sched/sch_api.c | 1 + net/sched/sch_netem.c | 20 +++++++++++++++----- 2 files changed, 16 insertions(+), 5 deletions(-) diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index 4323a74eea3..07977f8f267 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -1289,6 +1289,7 @@ static int __init pktsched_init(void) subsys_initcall(pktsched_init); +EXPORT_SYMBOL(qdisc_lookup); EXPORT_SYMBOL(qdisc_get_rtab); EXPORT_SYMBOL(qdisc_put_rtab); EXPORT_SYMBOL(register_qdisc); diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c index 864b8d353ff..e0c9fbe73b1 100644 --- a/net/sched/sch_netem.c +++ b/net/sched/sch_netem.c @@ -206,7 +206,6 @@ static int netem_run(struct Qdisc *sch) static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch) { struct netem_sched_data *q = qdisc_priv(sch); - struct sk_buff *skb2; int ret; pr_debug("netem_enqueue skb=%p\n", skb); @@ -220,11 +219,21 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch) } /* Random duplication */ - if (q->duplicate && q->duplicate >= get_crandom(&q->dup_cor) - && (skb2 = skb_clone(skb, GFP_ATOMIC)) != NULL) { - pr_debug("netem_enqueue: dup %p\n", skb2); + if (q->duplicate && q->duplicate >= get_crandom(&q->dup_cor)) { + struct sk_buff *skb2; + + skb2 = skb_clone(skb, GFP_ATOMIC); + if (skb2 && netem_delay(sch, skb2) == NET_XMIT_SUCCESS) { + struct Qdisc *qp; + + /* Since one packet can generate two packets in the + * queue, the parent's qlen accounting gets confused, + * so fix it. + */ + qp = qdisc_lookup(sch->dev, TC_H_MAJ(sch->parent)); + if (qp) + qp->q.qlen++; - if (netem_delay(sch, skb2)) { sch->q.qlen++; sch->bstats.bytes += skb2->len; sch->bstats.packets++; @@ -253,6 +262,7 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch) } else sch->qstats.drops++; + pr_debug("netem: enqueue ret %d\n", ret); return ret; } -- cgit v1.2.3 From aabc9761b69f1bfa30a78f7005be95cc9cc06175 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Tue, 3 May 2005 16:27:10 -0700 Subject: [IPSEC]: Store idev entries I found a bug that stopped IPsec/IPv6 from working. About a month ago IPv6 started using rt6i_idev->dev on the cached socket dst entries. If the cached socket dst entry is IPsec, then rt6i_idev will be NULL. Since we want to look at the rt6i_idev of the original route in this case, the easiest fix is to store rt6i_idev in the IPsec dst entry just as we do for a number of other IPv6 route attributes. Unfortunately this means that we need some new code to handle the references to rt6i_idev. That's why this patch is bigger than it would otherwise be. I've also done the same thing for IPv4 since it is conceivable that once these idev attributes start getting used for accounting, we probably need to dereference them for IPv4 IPsec entries too. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- include/net/xfrm.h | 10 ++++++++++ net/ipv4/xfrm4_policy.c | 42 ++++++++++++++++++++++++++++++++++++++++++ net/ipv6/xfrm6_policy.c | 43 +++++++++++++++++++++++++++++++++++++++++++ net/xfrm/xfrm_policy.c | 25 ++----------------------- 4 files changed, 97 insertions(+), 23 deletions(-) diff --git a/include/net/xfrm.h b/include/net/xfrm.h index 73e9a8ca3d3..e142a256d5d 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -1,6 +1,7 @@ #ifndef _NET_XFRM_H #define _NET_XFRM_H +#include #include #include #include @@ -516,6 +517,15 @@ struct xfrm_dst u32 child_mtu_cached; }; +static inline void xfrm_dst_destroy(struct xfrm_dst *xdst) +{ + dst_release(xdst->route); + if (likely(xdst->u.dst.xfrm)) + xfrm_state_put(xdst->u.dst.xfrm); +} + +extern void xfrm_dst_ifdown(struct dst_entry *dst, struct net_device *dev); + /* Decapsulation state, used by the input to store data during * decapsulation procedure, to be used later (during the policy * check diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c index 7fe2afd2e66..b2b60f3e9cd 100644 --- a/net/ipv4/xfrm4_policy.c +++ b/net/ipv4/xfrm4_policy.c @@ -8,7 +8,10 @@ * */ +#include +#include #include +#include #include #include @@ -152,6 +155,8 @@ __xfrm4_bundle_create(struct xfrm_policy *policy, struct xfrm_state **xfrm, int x->u.rt.rt_dst = rt0->rt_dst; x->u.rt.rt_gateway = rt->rt_gateway; x->u.rt.rt_spec_dst = rt0->rt_spec_dst; + x->u.rt.idev = rt0->idev; + in_dev_hold(rt0->idev); header_len -= x->u.dst.xfrm->props.header_len; trailer_len -= x->u.dst.xfrm->props.trailer_len; } @@ -243,11 +248,48 @@ static void xfrm4_update_pmtu(struct dst_entry *dst, u32 mtu) path->ops->update_pmtu(path, mtu); } +static void xfrm4_dst_destroy(struct dst_entry *dst) +{ + struct xfrm_dst *xdst = (struct xfrm_dst *)dst; + + if (likely(xdst->u.rt.idev)) + in_dev_put(xdst->u.rt.idev); + xfrm_dst_destroy(xdst); +} + +static void xfrm4_dst_ifdown(struct dst_entry *dst, struct net_device *dev, + int unregister) +{ + struct xfrm_dst *xdst; + + if (!unregister) + return; + + xdst = (struct xfrm_dst *)dst; + if (xdst->u.rt.idev->dev == dev) { + struct in_device *loopback_idev = in_dev_get(&loopback_dev); + BUG_ON(!loopback_idev); + + do { + in_dev_put(xdst->u.rt.idev); + xdst->u.rt.idev = loopback_idev; + in_dev_hold(loopback_idev); + xdst = (struct xfrm_dst *)xdst->u.dst.child; + } while (xdst->u.dst.xfrm); + + __in_dev_put(loopback_idev); + } + + xfrm_dst_ifdown(dst, dev); +} + static struct dst_ops xfrm4_dst_ops = { .family = AF_INET, .protocol = __constant_htons(ETH_P_IP), .gc = xfrm4_garbage_collect, .update_pmtu = xfrm4_update_pmtu, + .destroy = xfrm4_dst_destroy, + .ifdown = xfrm4_dst_ifdown, .gc_thresh = 1024, .entry_size = sizeof(struct xfrm_dst), }; diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c index 8a4f37de4d2..4429b1a1fe5 100644 --- a/net/ipv6/xfrm6_policy.c +++ b/net/ipv6/xfrm6_policy.c @@ -11,7 +11,11 @@ * */ +#include +#include #include +#include +#include #include #include #include @@ -166,6 +170,8 @@ __xfrm6_bundle_create(struct xfrm_policy *policy, struct xfrm_state **xfrm, int memcpy(&x->u.rt6.rt6i_gateway, &rt0->rt6i_gateway, sizeof(x->u.rt6.rt6i_gateway)); x->u.rt6.rt6i_dst = rt0->rt6i_dst; x->u.rt6.rt6i_src = rt0->rt6i_src; + x->u.rt6.rt6i_idev = rt0->rt6i_idev; + in6_dev_hold(rt0->rt6i_idev); header_len -= x->u.dst.xfrm->props.header_len; trailer_len -= x->u.dst.xfrm->props.trailer_len; } @@ -251,11 +257,48 @@ static void xfrm6_update_pmtu(struct dst_entry *dst, u32 mtu) path->ops->update_pmtu(path, mtu); } +static void xfrm6_dst_destroy(struct dst_entry *dst) +{ + struct xfrm_dst *xdst = (struct xfrm_dst *)dst; + + if (likely(xdst->u.rt6.rt6i_idev)) + in6_dev_put(xdst->u.rt6.rt6i_idev); + xfrm_dst_destroy(xdst); +} + +static void xfrm6_dst_ifdown(struct dst_entry *dst, struct net_device *dev, + int unregister) +{ + struct xfrm_dst *xdst; + + if (!unregister) + return; + + xdst = (struct xfrm_dst *)dst; + if (xdst->u.rt6.rt6i_idev->dev == dev) { + struct inet6_dev *loopback_idev = in6_dev_get(&loopback_dev); + BUG_ON(!loopback_idev); + + do { + in6_dev_put(xdst->u.rt6.rt6i_idev); + xdst->u.rt6.rt6i_idev = loopback_idev; + in6_dev_hold(loopback_idev); + xdst = (struct xfrm_dst *)xdst->u.dst.child; + } while (xdst->u.dst.xfrm); + + __in6_dev_put(loopback_idev); + } + + xfrm_dst_ifdown(dst, dev); +} + static struct dst_ops xfrm6_dst_ops = { .family = AF_INET6, .protocol = __constant_htons(ETH_P_IPV6), .gc = xfrm6_garbage_collect, .update_pmtu = xfrm6_update_pmtu, + .destroy = xfrm6_dst_destroy, + .ifdown = xfrm6_dst_ifdown, .gc_thresh = 1024, .entry_size = sizeof(struct xfrm_dst), }; diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index 80828078733..55ed979db14 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -1028,30 +1028,15 @@ static int stale_bundle(struct dst_entry *dst) return !xfrm_bundle_ok((struct xfrm_dst *)dst, NULL, AF_UNSPEC); } -static void xfrm_dst_destroy(struct dst_entry *dst) +void xfrm_dst_ifdown(struct dst_entry *dst, struct net_device *dev) { - struct xfrm_dst *xdst = (struct xfrm_dst *)dst; - - dst_release(xdst->route); - - if (!dst->xfrm) - return; - xfrm_state_put(dst->xfrm); - dst->xfrm = NULL; -} - -static void xfrm_dst_ifdown(struct dst_entry *dst, struct net_device *dev, - int unregister) -{ - if (!unregister) - return; - while ((dst = dst->child) && dst->xfrm && dst->dev == dev) { dst->dev = &loopback_dev; dev_hold(&loopback_dev); dev_put(dev); } } +EXPORT_SYMBOL(xfrm_dst_ifdown); static void xfrm_link_failure(struct sk_buff *skb) { @@ -1262,10 +1247,6 @@ int xfrm_policy_register_afinfo(struct xfrm_policy_afinfo *afinfo) dst_ops->kmem_cachep = xfrm_dst_cache; if (likely(dst_ops->check == NULL)) dst_ops->check = xfrm_dst_check; - if (likely(dst_ops->destroy == NULL)) - dst_ops->destroy = xfrm_dst_destroy; - if (likely(dst_ops->ifdown == NULL)) - dst_ops->ifdown = xfrm_dst_ifdown; if (likely(dst_ops->negative_advice == NULL)) dst_ops->negative_advice = xfrm_negative_advice; if (likely(dst_ops->link_failure == NULL)) @@ -1297,8 +1278,6 @@ int xfrm_policy_unregister_afinfo(struct xfrm_policy_afinfo *afinfo) xfrm_policy_afinfo[afinfo->family] = NULL; dst_ops->kmem_cachep = NULL; dst_ops->check = NULL; - dst_ops->destroy = NULL; - dst_ops->ifdown = NULL; dst_ops->negative_advice = NULL; dst_ops->link_failure = NULL; dst_ops->get_mss = NULL; -- cgit v1.2.3 From 14d50e78f947d340066ee0465dd892ad1d9162c0 Mon Sep 17 00:00:00 2001 From: J Hadi Salim Date: Tue, 3 May 2005 16:29:13 -0700 Subject: [PKT_SCHED]: Action repeat Long standing bug. Policy to repeat an action never worked. Signed-off-by: J Hadi Salim Signed-off-by: David S. Miller --- net/sched/act_api.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/sched/act_api.c b/net/sched/act_api.c index 5e6cc371b39..cafcb084098 100644 --- a/net/sched/act_api.c +++ b/net/sched/act_api.c @@ -171,10 +171,10 @@ repeat: skb->tc_verd = SET_TC_OK2MUNGE(skb->tc_verd); skb->tc_verd = CLR_TC_MUNGED(skb->tc_verd); } - if (ret != TC_ACT_PIPE) - goto exec_done; if (ret == TC_ACT_REPEAT) goto repeat; /* we need a ttl - JHS */ + if (ret != TC_ACT_PIPE) + goto exec_done; } act = a->next; } -- cgit v1.2.3