From 93821778def10ec1e69aa3ac10adee975dad4ff3 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Mon, 15 Sep 2008 11:48:46 -0700 Subject: udp: Fix rcv socket locking The previous patch in response to the recursive locking on IPsec reception is broken as it tries to drop the BH socket lock while in user context. This patch fixes it by shrinking the section protected by the socket lock to sock_queue_rcv_skb only. The only reason we added the lock is for the accounting which happens in that function. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- net/ipv4/udp.c | 62 +++++++++++++++++++++++++++++++--------------------------- 1 file changed, 33 insertions(+), 29 deletions(-) (limited to 'net') diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 8e42fbbd576..57e26fa6618 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -951,6 +951,27 @@ int udp_disconnect(struct sock *sk, int flags) return 0; } +static int __udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) +{ + int is_udplite = IS_UDPLITE(sk); + int rc; + + if ((rc = sock_queue_rcv_skb(sk, skb)) < 0) { + /* Note that an ENOMEM error is charged twice */ + if (rc == -ENOMEM) + UDP_INC_STATS_BH(sock_net(sk), UDP_MIB_RCVBUFERRORS, + is_udplite); + goto drop; + } + + return 0; + +drop: + UDP_INC_STATS_BH(sock_net(sk), UDP_MIB_INERRORS, is_udplite); + kfree_skb(skb); + return -1; +} + /* returns: * -1: error * 0: success @@ -989,9 +1010,7 @@ int udp_queue_rcv_skb(struct sock * sk, struct sk_buff *skb) up->encap_rcv != NULL) { int ret; - bh_unlock_sock(sk); ret = (*up->encap_rcv)(sk, skb); - bh_lock_sock(sk); if (ret <= 0) { UDP_INC_STATS_BH(sock_net(sk), UDP_MIB_INDATAGRAMS, @@ -1044,17 +1063,16 @@ int udp_queue_rcv_skb(struct sock * sk, struct sk_buff *skb) goto drop; } - if ((rc = sock_queue_rcv_skb(sk,skb)) < 0) { - /* Note that an ENOMEM error is charged twice */ - if (rc == -ENOMEM) { - UDP_INC_STATS_BH(sock_net(sk), - UDP_MIB_RCVBUFERRORS, is_udplite); - atomic_inc(&sk->sk_drops); - } - goto drop; - } + rc = 0; - return 0; + bh_lock_sock(sk); + if (!sock_owned_by_user(sk)) + rc = __udp_queue_rcv_skb(sk, skb); + else + sk_add_backlog(sk, skb); + bh_unlock_sock(sk); + + return rc; drop: UDP_INC_STATS_BH(sock_net(sk), UDP_MIB_INERRORS, is_udplite); @@ -1092,15 +1110,7 @@ static int __udp4_lib_mcast_deliver(struct net *net, struct sk_buff *skb, skb1 = skb_clone(skb, GFP_ATOMIC); if (skb1) { - int ret = 0; - - bh_lock_sock(sk); - if (!sock_owned_by_user(sk)) - ret = udp_queue_rcv_skb(sk, skb1); - else - sk_add_backlog(sk, skb1); - bh_unlock_sock(sk); - + int ret = udp_queue_rcv_skb(sk, skb1); if (ret > 0) /* we should probably re-process instead * of dropping packets here. */ @@ -1195,13 +1205,7 @@ int __udp4_lib_rcv(struct sk_buff *skb, struct hlist_head udptable[], uh->dest, inet_iif(skb), udptable); if (sk != NULL) { - int ret = 0; - bh_lock_sock(sk); - if (!sock_owned_by_user(sk)) - ret = udp_queue_rcv_skb(sk, skb); - else - sk_add_backlog(sk, skb); - bh_unlock_sock(sk); + int ret = udp_queue_rcv_skb(sk, skb); sock_put(sk); /* a return value > 0 means to resubmit the input, but @@ -1494,7 +1498,7 @@ struct proto udp_prot = { .sendmsg = udp_sendmsg, .recvmsg = udp_recvmsg, .sendpage = udp_sendpage, - .backlog_rcv = udp_queue_rcv_skb, + .backlog_rcv = __udp_queue_rcv_skb, .hash = udp_lib_hash, .unhash = udp_lib_unhash, .get_port = udp_v4_get_port, -- cgit v1.2.3 From a3028b8ed1e1e9930bfa70ce4555fb7f9fad3dcc Mon Sep 17 00:00:00 2001 From: Vlad Yasevich Date: Thu, 18 Sep 2008 02:48:25 -0700 Subject: sctp: set the skb->ip_summed correctly when sending over loopback. Loopback used to clobber the ip_summed filed which sctp then used to figure out if it needed to do checksumming or not. Now that loopback doesn't do that any more, sctp needs to set the ip_summed field correctly. Signed-off-by: Vlad Yasevich Signed-off-by: David S. Miller --- net/sctp/output.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/sctp/output.c b/net/sctp/output.c index 0dc4a7dfb23..225c7123c41 100644 --- a/net/sctp/output.c +++ b/net/sctp/output.c @@ -533,7 +533,8 @@ int sctp_packet_transmit(struct sctp_packet *packet) if (!(dst->dev->features & NETIF_F_NO_CSUM)) { crc32 = sctp_start_cksum((__u8 *)sh, cksum_buf_len); crc32 = sctp_end_cksum(crc32); - } + } else + nskb->ip_summed = CHECKSUM_UNNECESSARY; /* 3) Put the resultant value into the checksum field in the * common header, and leave the rest of the bits unchanged. -- cgit v1.2.3 From 0ef46e285c062cbe35d60c0adbff96f530d31c86 Mon Sep 17 00:00:00 2001 From: Vlad Yasevich Date: Thu, 18 Sep 2008 16:27:38 -0700 Subject: sctp: do not enable peer features if we can't do them. Do not enable peer features like addip and auth, if they are administratively disabled localy. If the peer resports that he supports something that we don't, neither end can use it so enabling it is pointless. This solves a problem when talking to a peer that has auth and addip enabled while we do not. Found by Andrei Pelinescu-Onciul . Signed-off-by: Vlad Yasevich Signed-off-by: David S. Miller --- net/sctp/sm_make_chunk.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c index e8ca4e54981..fe94f42fa06 100644 --- a/net/sctp/sm_make_chunk.c +++ b/net/sctp/sm_make_chunk.c @@ -1886,11 +1886,13 @@ static void sctp_process_ext_param(struct sctp_association *asoc, /* if the peer reports AUTH, assume that he * supports AUTH. */ - asoc->peer.auth_capable = 1; + if (sctp_auth_enable) + asoc->peer.auth_capable = 1; break; case SCTP_CID_ASCONF: case SCTP_CID_ASCONF_ACK: - asoc->peer.asconf_capable = 1; + if (sctp_addip_enable) + asoc->peer.asconf_capable = 1; break; default: break; @@ -2460,6 +2462,9 @@ do_addr_param: break; case SCTP_PARAM_SET_PRIMARY: + if (!sctp_addip_enable) + goto fall_through; + addr_param = param.v + sizeof(sctp_addip_param_t); af = sctp_get_af_specific(param_type2af(param.p->type)); -- cgit v1.2.3 From add52379dde2e5300e2d574b172e62c6cf43b3d3 Mon Sep 17 00:00:00 2001 From: Vlad Yasevich Date: Thu, 18 Sep 2008 16:28:27 -0700 Subject: sctp: Fix oops when INIT-ACK indicates that peer doesn't support AUTH If INIT-ACK is received with SupportedExtensions parameter which indicates that the peer does not support AUTH, the packet will be silently ignore, and sctp_process_init() do cleanup all of the transports in the association. When T1-Init timer is expires, OOPS happen while we try to choose a different init transport. The solution is to only clean up the non-active transports, i.e the ones that the peer added. However, that introduces a problem with sctp_connectx(), because we don't mark the proper state for the transports provided by the user. So, we'll simply mark user-provided transports as ACTIVE. That will allow INIT retransmissions to work properly in the sctp_connectx() context and prevent the crash. Signed-off-by: Vlad Yasevich Signed-off-by: David S. Miller --- net/sctp/associola.c | 9 +++++---- net/sctp/sm_make_chunk.c | 6 ++---- 2 files changed, 7 insertions(+), 8 deletions(-) (limited to 'net') diff --git a/net/sctp/associola.c b/net/sctp/associola.c index 8472b8b349c..abd51cef241 100644 --- a/net/sctp/associola.c +++ b/net/sctp/associola.c @@ -599,11 +599,12 @@ struct sctp_transport *sctp_assoc_add_peer(struct sctp_association *asoc, /* Check to see if this is a duplicate. */ peer = sctp_assoc_lookup_paddr(asoc, addr); if (peer) { + /* An UNKNOWN state is only set on transports added by + * user in sctp_connectx() call. Such transports should be + * considered CONFIRMED per RFC 4960, Section 5.4. + */ if (peer->state == SCTP_UNKNOWN) { - if (peer_state == SCTP_ACTIVE) - peer->state = SCTP_ACTIVE; - if (peer_state == SCTP_UNCONFIRMED) - peer->state = SCTP_UNCONFIRMED; + peer->state = SCTP_ACTIVE; } return peer; } diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c index fe94f42fa06..b599cbba4fb 100644 --- a/net/sctp/sm_make_chunk.c +++ b/net/sctp/sm_make_chunk.c @@ -2321,12 +2321,10 @@ clean_up: /* Release the transport structures. */ list_for_each_safe(pos, temp, &asoc->peer.transport_addr_list) { transport = list_entry(pos, struct sctp_transport, transports); - list_del_init(pos); - sctp_transport_free(transport); + if (transport->state != SCTP_ACTIVE) + sctp_assoc_rm_peer(asoc, transport); } - asoc->peer.transport_count = 0; - nomem: return 0; } -- cgit v1.2.3 From ad55dcaff0e34269f86975ce2ea0da22e9eb74a1 Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Sat, 20 Sep 2008 22:05:50 -0700 Subject: netdev: simple_tx_hash shouldn't hash inside fragments Currently simple_tx_hash is hashing inside of udp fragments. As a result packets are getting getting sent to all queues when they shouldn't be. This causes a serious performance regression which can be seen by sending UDP frames larger than mtu on multiqueue devices. This change will make it so that fragments are hashed only as IP datagrams w/o any protocol information. Signed-off-by: Alexander Duyck Signed-off-by: David S. Miller --- net/core/dev.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/core/dev.c b/net/core/dev.c index e719ed29310..e8eb2b47834 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -122,6 +122,7 @@ #include #include #include +#include #include #include #include @@ -1667,7 +1668,7 @@ static u16 simple_tx_hash(struct net_device *dev, struct sk_buff *skb) { u32 addr1, addr2, ports; u32 hash, ihl; - u8 ip_proto; + u8 ip_proto = 0; if (unlikely(!simple_tx_hashrnd_initialized)) { get_random_bytes(&simple_tx_hashrnd, 4); @@ -1676,7 +1677,8 @@ static u16 simple_tx_hash(struct net_device *dev, struct sk_buff *skb) switch (skb->protocol) { case __constant_htons(ETH_P_IP): - ip_proto = ip_hdr(skb)->protocol; + if (!(ip_hdr(skb)->frag_off & htons(IP_MF | IP_OFFSET))) + ip_proto = ip_hdr(skb)->protocol; addr1 = ip_hdr(skb)->saddr; addr2 = ip_hdr(skb)->daddr; ihl = ip_hdr(skb)->ihl; -- cgit v1.2.3 From 2d4c8266774188cda7f7e612e6dfb8ad12c579d5 Mon Sep 17 00:00:00 2001 From: Michael Kerrisk Date: Mon, 22 Sep 2008 13:57:49 -0700 Subject: sys_paccept: disable paccept() until API design is resolved The reasons for disabling paccept() are as follows: * The API is more complex than needed. There is AFAICS no demonstrated use case that the sigset argument of this syscall serves that couldn't equally be served by the use of pselect/ppoll/epoll_pwait + traditional accept(). Roland seems to concur with this opinion (http://thread.gmane.org/gmane.linux.kernel/723953/focus=732255). I have (more than once) asked Ulrich to explain otherwise (http://thread.gmane.org/gmane.linux.kernel/723952/focus=731018), but he does not respond, so one is left to assume that he doesn't know of such a case. * The use of a sigset argument is not consistent with other I/O APIs that can block on a single file descriptor (e.g., read(), recv(), connect()). * The behavior of paccept() when interrupted by a signal is IMO strange: the kernel restarts the system call if SA_RESTART was set for the handler. I think that it should not do this -- that it should behave consistently with paccept()/ppoll()/epoll_pwait(), which never restart, regardless of SA_RESTART. The reasoning here is that the very purpose of paccept() is to wait for a connection or a signal, and that restarting in the latter case is probably never useful. (Note: Roland disagrees on this point, believing that rather paccept() should be consistent with accept() in its behavior wrt EINTR (http://thread.gmane.org/gmane.linux.kernel/723953/focus=732255).) I believe that instead, a simpler API, consistent with Ulrich's other recent additions, is preferable: accept4(int fd, struct sockaddr *sa, socklen_t *salen, ind flags); (This simpler API was originally proposed by Ulrich: http://thread.gmane.org/gmane.linux.network/92072) If this simpler API is added, then if we later decide that the sigset argument really is required, then a suitable bit in 'flags' could be added to indicate the presence of the sigset argument. At this point, I am hoping we either will get a counter-argument from Ulrich about why we really do need paccept()'s sigset argument, or that he will resubmit the original accept4() patch. Signed-off-by: Michael Kerrisk Cc: David Miller Cc: Davide Libenzi Cc: Alan Cox Cc: Ulrich Drepper Cc: Jakub Jelinek Cc: Roland McGrath Cc: Oleg Nesterov Cc: Christoph Hellwig Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- net/socket.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'net') diff --git a/net/socket.c b/net/socket.c index 8ef8ba81b9e..3e8d4e35c08 100644 --- a/net/socket.c +++ b/net/socket.c @@ -1511,6 +1511,7 @@ out_fd: goto out_put; } +#if 0 #ifdef HAVE_SET_RESTORE_SIGMASK asmlinkage long sys_paccept(int fd, struct sockaddr __user *upeer_sockaddr, int __user *upeer_addrlen, @@ -1564,6 +1565,7 @@ asmlinkage long sys_paccept(int fd, struct sockaddr __user *upeer_sockaddr, return do_accept(fd, upeer_sockaddr, upeer_addrlen, flags); } #endif +#endif asmlinkage long sys_accept(int fd, struct sockaddr __user *upeer_sockaddr, int __user *upeer_addrlen) -- cgit v1.2.3 From 72029fe85d8d060b3f966f2dbc36b3c75b5a6532 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Wed, 24 Sep 2008 16:22:23 -0500 Subject: 9p: implement proper trans module refcounting and unregistration 9p trans modules aren't refcounted nor were they unregistered properly. Fix it. * Add 9p_trans_module->owner and reference the module on each trans instance creation and put it on destruction. * Protect v9fs_trans_list with a spinlock. This isn't strictly necessary as the list is manipulated only during module loading / unloading but it's a good idea to make the API safe. * Unregister trans modules when the corresponding module is being unloaded. * While at it, kill unnecessary EXPORT_SYMBOL on p9_trans_fd_init(). Signed-off-by: Tejun Heo Signed-off-by: Eric Van Hensbergen --- net/9p/client.c | 10 ++++-- net/9p/mod.c | 92 +++++++++++++++++++++++++++++++++++++-------------- net/9p/trans_fd.c | 11 +++++- net/9p/trans_virtio.c | 2 ++ 4 files changed, 87 insertions(+), 28 deletions(-) (limited to 'net') diff --git a/net/9p/client.c b/net/9p/client.c index 2ffe40cf2f0..10e320307ec 100644 --- a/net/9p/client.c +++ b/net/9p/client.c @@ -75,7 +75,6 @@ static int parse_opts(char *opts, struct p9_client *clnt) int option; int ret = 0; - clnt->trans_mod = v9fs_default_trans(); clnt->dotu = 1; clnt->msize = 8192; @@ -108,7 +107,7 @@ static int parse_opts(char *opts, struct p9_client *clnt) clnt->msize = option; break; case Opt_trans: - clnt->trans_mod = v9fs_match_trans(&args[0]); + clnt->trans_mod = v9fs_get_trans_by_name(&args[0]); break; case Opt_legacy: clnt->dotu = 0; @@ -117,6 +116,10 @@ static int parse_opts(char *opts, struct p9_client *clnt) continue; } } + + if (!clnt->trans_mod) + clnt->trans_mod = v9fs_get_default_trans(); + kfree(options); return ret; } @@ -150,6 +153,7 @@ struct p9_client *p9_client_create(const char *dev_name, char *options) if (!clnt) return ERR_PTR(-ENOMEM); + clnt->trans_mod = NULL; clnt->trans = NULL; spin_lock_init(&clnt->lock); INIT_LIST_HEAD(&clnt->fidlist); @@ -235,6 +239,8 @@ void p9_client_destroy(struct p9_client *clnt) clnt->trans = NULL; } + v9fs_put_trans(clnt->trans_mod); + list_for_each_entry_safe(fid, fidptr, &clnt->fidlist, flist) p9_fid_destroy(fid); diff --git a/net/9p/mod.c b/net/9p/mod.c index bdee1fb7cc6..1084feb24cb 100644 --- a/net/9p/mod.c +++ b/net/9p/mod.c @@ -31,6 +31,7 @@ #include #include #include +#include #ifdef CONFIG_NET_9P_DEBUG unsigned int p9_debug_level = 0; /* feature-rific global debug level */ @@ -44,8 +45,8 @@ MODULE_PARM_DESC(debug, "9P debugging level"); * */ +static DEFINE_SPINLOCK(v9fs_trans_lock); static LIST_HEAD(v9fs_trans_list); -static struct p9_trans_module *v9fs_default_transport; /** * v9fs_register_trans - register a new transport with 9p @@ -54,48 +55,87 @@ static struct p9_trans_module *v9fs_default_transport; */ void v9fs_register_trans(struct p9_trans_module *m) { + spin_lock(&v9fs_trans_lock); list_add_tail(&m->list, &v9fs_trans_list); - if (m->def) - v9fs_default_transport = m; + spin_unlock(&v9fs_trans_lock); } EXPORT_SYMBOL(v9fs_register_trans); /** - * v9fs_match_trans - match transport versus registered transports + * v9fs_unregister_trans - unregister a 9p transport + * @m: the transport to remove + * + */ +void v9fs_unregister_trans(struct p9_trans_module *m) +{ + spin_lock(&v9fs_trans_lock); + list_del_init(&m->list); + spin_unlock(&v9fs_trans_lock); +} +EXPORT_SYMBOL(v9fs_unregister_trans); + +/** + * v9fs_get_trans_by_name - get transport with the matching name * @name: string identifying transport * */ -struct p9_trans_module *v9fs_match_trans(const substring_t *name) +struct p9_trans_module *v9fs_get_trans_by_name(const substring_t *name) { - struct list_head *p; - struct p9_trans_module *t = NULL; - - list_for_each(p, &v9fs_trans_list) { - t = list_entry(p, struct p9_trans_module, list); - if (strncmp(t->name, name->from, name->to-name->from) == 0) - return t; - } - return NULL; + struct p9_trans_module *t, *found = NULL; + + spin_lock(&v9fs_trans_lock); + + list_for_each_entry(t, &v9fs_trans_list, list) + if (strncmp(t->name, name->from, name->to-name->from) == 0 && + try_module_get(t->owner)) { + found = t; + break; + } + + spin_unlock(&v9fs_trans_lock); + return found; } -EXPORT_SYMBOL(v9fs_match_trans); +EXPORT_SYMBOL(v9fs_get_trans_by_name); /** - * v9fs_default_trans - returns pointer to default transport + * v9fs_get_default_trans - get the default transport * */ -struct p9_trans_module *v9fs_default_trans(void) +struct p9_trans_module *v9fs_get_default_trans(void) { - if (v9fs_default_transport) - return v9fs_default_transport; - else if (!list_empty(&v9fs_trans_list)) - return list_first_entry(&v9fs_trans_list, - struct p9_trans_module, list); - else - return NULL; + struct p9_trans_module *t, *found = NULL; + + spin_lock(&v9fs_trans_lock); + + list_for_each_entry(t, &v9fs_trans_list, list) + if (t->def && try_module_get(t->owner)) { + found = t; + break; + } + + if (!found) + list_for_each_entry(t, &v9fs_trans_list, list) + if (try_module_get(t->owner)) { + found = t; + break; + } + + spin_unlock(&v9fs_trans_lock); + return found; } -EXPORT_SYMBOL(v9fs_default_trans); +EXPORT_SYMBOL(v9fs_get_default_trans); +/** + * v9fs_put_trans - put trans + * @m: transport to put + * + */ +void v9fs_put_trans(struct p9_trans_module *m) +{ + if (m) + module_put(m->owner); +} /** * v9fs_init - Initialize module @@ -120,6 +160,8 @@ static int __init init_p9(void) static void __exit exit_p9(void) { printk(KERN_INFO "Unloading 9P2000 support\n"); + + p9_trans_fd_exit(); } module_init(init_p9) diff --git a/net/9p/trans_fd.c b/net/9p/trans_fd.c index cdf137af7ad..6a32ffdb942 100644 --- a/net/9p/trans_fd.c +++ b/net/9p/trans_fd.c @@ -1629,6 +1629,7 @@ static struct p9_trans_module p9_tcp_trans = { .maxsize = MAX_SOCK_BUF, .def = 1, .create = p9_trans_create_tcp, + .owner = THIS_MODULE, }; static struct p9_trans_module p9_unix_trans = { @@ -1636,6 +1637,7 @@ static struct p9_trans_module p9_unix_trans = { .maxsize = MAX_SOCK_BUF, .def = 0, .create = p9_trans_create_unix, + .owner = THIS_MODULE, }; static struct p9_trans_module p9_fd_trans = { @@ -1643,6 +1645,7 @@ static struct p9_trans_module p9_fd_trans = { .maxsize = MAX_SOCK_BUF, .def = 0, .create = p9_trans_create_fd, + .owner = THIS_MODULE, }; int p9_trans_fd_init(void) @@ -1659,4 +1662,10 @@ int p9_trans_fd_init(void) return 0; } -EXPORT_SYMBOL(p9_trans_fd_init); + +void p9_trans_fd_exit(void) +{ + v9fs_unregister_trans(&p9_tcp_trans); + v9fs_unregister_trans(&p9_unix_trans); + v9fs_unregister_trans(&p9_fd_trans); +} diff --git a/net/9p/trans_virtio.c b/net/9p/trans_virtio.c index 42adc052b14..94912e077a5 100644 --- a/net/9p/trans_virtio.c +++ b/net/9p/trans_virtio.c @@ -528,6 +528,7 @@ static struct p9_trans_module p9_virtio_trans = { .create = p9_virtio_create, .maxsize = PAGE_SIZE*16, .def = 0, + .owner = THIS_MODULE, }; /* The standard init function */ @@ -545,6 +546,7 @@ static int __init p9_virtio_init(void) static void __exit p9_virtio_cleanup(void) { unregister_virtio_driver(&p9_virtio_drv); + v9fs_unregister_trans(&p9_virtio_trans); } module_init(p9_virtio_init); -- cgit v1.2.3 From 7dc5d24be06a5ed874af035d52a083a7b61ef1bd Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Wed, 24 Sep 2008 16:22:23 -0500 Subject: 9p-trans_fd: fix trans_fd::p9_conn_destroy() p9_conn_destroy() first kills all current requests by calling p9_conn_cancel(), then waits for the request list to be cleared by waiting on p9_conn->equeue. After that, polling is stopped and the trans is destroyed. This sequence has a few problems. * Read and write works were never cancelled and the p9_conn can be destroyed while the works are running as r/w works remove requests from the list and dereference the p9_conn from them. * The list emptiness wait using p9_conn->equeue wouldn't trigger because p9_conn_cancel() always clears all the lists and the only way the wait can be triggered is to have another task to issue a request between the slim window between p9_conn_cancel() and the wait, which isn't safe under the current implementation with or without the wait. This patch fixes the problem by first stopping poll, which can schedule r/w works, first and cancle r/w works which guarantees that r/w works are not and will not run from that point and then calling p9_conn_cancel() and do the rest of destruction. Signed-off-by: Tejun Heo Signed-off-by: Eric Van Hensbergen --- net/9p/trans_fd.c | 24 +++++------------------- 1 file changed, 5 insertions(+), 19 deletions(-) (limited to 'net') diff --git a/net/9p/trans_fd.c b/net/9p/trans_fd.c index 6a32ffdb942..ee0d151da31 100644 --- a/net/9p/trans_fd.c +++ b/net/9p/trans_fd.c @@ -151,7 +151,6 @@ struct p9_mux_poll_task { * @trans: reference to transport instance for this connection * @tagpool: id accounting for transactions * @err: error state - * @equeue: event wait_q (?) * @req_list: accounting for requests which have been sent * @unsent_req_list: accounting for requests that haven't been sent * @rcall: current response &p9_fcall structure @@ -178,7 +177,6 @@ struct p9_conn { struct p9_trans *trans; struct p9_idpool *tagpool; int err; - wait_queue_head_t equeue; struct list_head req_list; struct list_head unsent_req_list; struct p9_fcall *rcall; @@ -430,7 +428,6 @@ static struct p9_conn *p9_conn_create(struct p9_trans *trans) } m->err = 0; - init_waitqueue_head(&m->equeue); INIT_LIST_HEAD(&m->req_list); INIT_LIST_HEAD(&m->unsent_req_list); m->rcall = NULL; @@ -483,18 +480,13 @@ static void p9_conn_destroy(struct p9_conn *m) { P9_DPRINTK(P9_DEBUG_MUX, "mux %p prev %p next %p\n", m, m->mux_list.prev, m->mux_list.next); - p9_conn_cancel(m, -ECONNRESET); - - if (!list_empty(&m->req_list)) { - /* wait until all processes waiting on this session exit */ - P9_DPRINTK(P9_DEBUG_MUX, - "mux %p waiting for empty request queue\n", m); - wait_event_timeout(m->equeue, (list_empty(&m->req_list)), 5000); - P9_DPRINTK(P9_DEBUG_MUX, "mux %p request queue empty: %d\n", m, - list_empty(&m->req_list)); - } p9_mux_poll_stop(m); + cancel_work_sync(&m->rq); + cancel_work_sync(&m->wq); + + p9_conn_cancel(m, -ECONNRESET); + m->trans = NULL; p9_idpool_destroy(m->tagpool); kfree(m); @@ -840,8 +832,6 @@ static void p9_read_work(struct work_struct *work) (*req->cb) (req, req->cba); else kfree(req->rcall); - - wake_up(&m->equeue); } } else { if (err >= 0 && rcall->id != P9_RFLUSH) @@ -984,8 +974,6 @@ static void p9_mux_flush_cb(struct p9_req *freq, void *a) (*req->cb) (req, req->cba); else kfree(req->rcall); - - wake_up(&m->equeue); } kfree(freq->tcall); @@ -1191,8 +1179,6 @@ void p9_conn_cancel(struct p9_conn *m, int err) else kfree(req->rcall); } - - wake_up(&m->equeue); } /** -- cgit v1.2.3 From 571ffeafffbfdd0b8f2f9d3b991028797ec87e42 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Wed, 24 Sep 2008 16:22:23 -0500 Subject: 9p-trans_fd: clean up p9_conn_create() * Use kzalloc() to allocate p9_conn and remove 0/NULL initializations. * Clean up error return paths. Signed-off-by: Tejun Heo Signed-off-by: Eric Van Hensbergen --- net/9p/trans_fd.c | 20 ++++---------------- 1 file changed, 4 insertions(+), 16 deletions(-) (limited to 'net') diff --git a/net/9p/trans_fd.c b/net/9p/trans_fd.c index ee0d151da31..6c88e898375 100644 --- a/net/9p/trans_fd.c +++ b/net/9p/trans_fd.c @@ -407,11 +407,11 @@ static void p9_mux_poll_stop(struct p9_conn *m) static struct p9_conn *p9_conn_create(struct p9_trans *trans) { int i, n; - struct p9_conn *m, *mtmp; + struct p9_conn *m; P9_DPRINTK(P9_DEBUG_MUX, "transport %p msize %d\n", trans, trans->msize); - m = kmalloc(sizeof(struct p9_conn), GFP_KERNEL); + m = kzalloc(sizeof(struct p9_conn), GFP_KERNEL); if (!m) return ERR_PTR(-ENOMEM); @@ -422,24 +422,14 @@ static struct p9_conn *p9_conn_create(struct p9_trans *trans) m->trans = trans; m->tagpool = p9_idpool_create(); if (IS_ERR(m->tagpool)) { - mtmp = ERR_PTR(-ENOMEM); kfree(m); - return mtmp; + return ERR_PTR(-ENOMEM); } - m->err = 0; INIT_LIST_HEAD(&m->req_list); INIT_LIST_HEAD(&m->unsent_req_list); - m->rcall = NULL; - m->rpos = 0; - m->rbuf = NULL; - m->wpos = m->wsize = 0; - m->wbuf = NULL; INIT_WORK(&m->rq, p9_read_work); INIT_WORK(&m->wq, p9_write_work); - m->wsched = 0; - memset(&m->poll_waddr, 0, sizeof(m->poll_waddr)); - m->poll_task = NULL; n = p9_mux_poll_start(m); if (n) { kfree(m); @@ -460,10 +450,8 @@ static struct p9_conn *p9_conn_create(struct p9_trans *trans) for (i = 0; i < ARRAY_SIZE(m->poll_waddr); i++) { if (IS_ERR(m->poll_waddr[i])) { p9_mux_poll_stop(m); - mtmp = (void *)m->poll_waddr; /* the error code */ kfree(m); - m = mtmp; - break; + return (void *)m->poll_waddr; /* the error code */ } } -- cgit v1.2.3 From ec3c68f232f6d98b4596c05c1c7551b44c617c5f Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Wed, 24 Sep 2008 16:22:23 -0500 Subject: 9p-trans_fd: don't do fs segment mangling in p9_fd_poll() p9_fd_poll() is never called with user pointers and f_op->poll() doesn't expect its arguments to be from userland. There's no need to set kernel ds before calling f_op->poll() from p9_fd_poll(). Remove it. Signed-off-by: Tejun Heo Signed-off-by: Eric Van Hensbergen --- net/9p/trans_fd.c | 14 +++----------- 1 file changed, 3 insertions(+), 11 deletions(-) (limited to 'net') diff --git a/net/9p/trans_fd.c b/net/9p/trans_fd.c index 6c88e898375..f6d4af16cb1 100644 --- a/net/9p/trans_fd.c +++ b/net/9p/trans_fd.c @@ -1344,7 +1344,6 @@ p9_fd_poll(struct p9_trans *trans, struct poll_table_struct *pt) { int ret, n; struct p9_trans_fd *ts = NULL; - mm_segment_t oldfs; if (trans && trans->status == Connected) ts = trans->priv; @@ -1358,24 +1357,17 @@ p9_fd_poll(struct p9_trans *trans, struct poll_table_struct *pt) if (!ts->wr->f_op || !ts->wr->f_op->poll) return -EIO; - oldfs = get_fs(); - set_fs(get_ds()); - ret = ts->rd->f_op->poll(ts->rd, pt); if (ret < 0) - goto end; + return ret; if (ts->rd != ts->wr) { n = ts->wr->f_op->poll(ts->wr, pt); - if (n < 0) { - ret = n; - goto end; - } + if (n < 0) + return n; ret = (ret & ~POLLOUT) | (n & ~POLLIN); } -end: - set_fs(oldfs); return ret; } -- cgit v1.2.3 From 206ca50de77033c6cc17d0e14fbb12d119a67b01 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Wed, 24 Sep 2008 16:22:23 -0500 Subject: 9p-trans_fd: fix and clean up module init/exit paths trans_fd leaked p9_mux_wq on module unload. Fix it. While at it, collapse p9_mux_global_init() into p9_trans_fd_init(). It's easier to follow this way and the global poll_tasks array is about to removed anyway. Signed-off-by: Tejun Heo Signed-off-by: Eric Van Hensbergen --- net/9p/trans_fd.c | 31 +++++++++++-------------------- 1 file changed, 11 insertions(+), 20 deletions(-) (limited to 'net') diff --git a/net/9p/trans_fd.c b/net/9p/trans_fd.c index f6d4af16cb1..0b4eb5f7835 100644 --- a/net/9p/trans_fd.c +++ b/net/9p/trans_fd.c @@ -238,22 +238,6 @@ static int p9_conn_rpcnb(struct p9_conn *m, struct p9_fcall *tc, static void p9_conn_cancel(struct p9_conn *m, int err); -static int p9_mux_global_init(void) -{ - int i; - - for (i = 0; i < ARRAY_SIZE(p9_mux_poll_tasks); i++) - p9_mux_poll_tasks[i].task = NULL; - - p9_mux_wq = create_workqueue("v9fs"); - if (!p9_mux_wq) { - printk(KERN_WARNING "v9fs: mux: creating workqueue failed\n"); - return -ENOMEM; - } - - return 0; -} - static u16 p9_mux_get_tag(struct p9_conn *m) { int tag; @@ -1616,10 +1600,15 @@ static struct p9_trans_module p9_fd_trans = { int p9_trans_fd_init(void) { - int ret = p9_mux_global_init(); - if (ret) { - printk(KERN_WARNING "9p: starting mux failed\n"); - return ret; + int i; + + for (i = 0; i < ARRAY_SIZE(p9_mux_poll_tasks); i++) + p9_mux_poll_tasks[i].task = NULL; + + p9_mux_wq = create_workqueue("v9fs"); + if (!p9_mux_wq) { + printk(KERN_WARNING "v9fs: mux: creating workqueue failed\n"); + return -ENOMEM; } v9fs_register_trans(&p9_tcp_trans); @@ -1634,4 +1623,6 @@ void p9_trans_fd_exit(void) v9fs_unregister_trans(&p9_tcp_trans); v9fs_unregister_trans(&p9_unix_trans); v9fs_unregister_trans(&p9_fd_trans); + + destroy_workqueue(p9_mux_wq); } -- cgit v1.2.3 From 620678244bc7b83287e2e283ed4fe6b959e94b7d Mon Sep 17 00:00:00 2001 From: Julia Lawall Date: Wed, 24 Sep 2008 16:22:22 -0500 Subject: 9p: introduce missing kfree Error handling code following a kmalloc should free the allocated data. The semantic match that finds the problem is as follows: (http://www.emn.fr/x-info/coccinelle/) // @r exists@ local idexpression x; statement S; expression E; identifier f,l; position p1,p2; expression *ptr != NULL; @@ ( if ((x@p1 = \(kmalloc\|kzalloc\|kcalloc\)(...)) == NULL) S | x@p1 = \(kmalloc\|kzalloc\|kcalloc\)(...); ... if (x == NULL) S ) <... when != x when != if (...) { <+...x...+> } x->f = E ...> ( return \(0\|<+...x...+>\|ptr\); | return@p2 ...; ) @script:python@ p1 << r.p1; p2 << r.p2; @@ print "* file: %s kmalloc %s return %s" % (p1[0].file,p1[0].line,p2[0].line) // Signed-off-by: Julia Lawall Signed-off-by: Eric Van Hensbergen Signed-off-by: Andrew Morton --- net/9p/trans_fd.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/9p/trans_fd.c b/net/9p/trans_fd.c index 0b4eb5f7835..d652baf5ff9 100644 --- a/net/9p/trans_fd.c +++ b/net/9p/trans_fd.c @@ -870,8 +870,10 @@ static struct p9_req *p9_send_request(struct p9_conn *m, else n = p9_mux_get_tag(m); - if (n < 0) + if (n < 0) { + kfree(req); return ERR_PTR(-ENOMEM); + } p9_set_tag(tc, n); -- cgit v1.2.3 From 16ec4700127d479143c77fd9128dfa17ab572963 Mon Sep 17 00:00:00 2001 From: Eric Van Hensbergen Date: Wed, 24 Sep 2008 16:22:22 -0500 Subject: 9p: fix put_data error handling Abhishek Kulkarni pointed out an inconsistency in the way errors are returned from p9_put_data. On deeper exploration it seems the error handling for this path was completely wrong. This patch adds checks for allocation problems and propagates errors correctly. Signed-off-by: Eric Van Hensbergen --- net/9p/conv.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/9p/conv.c b/net/9p/conv.c index 44547201f5b..5ad3a3bd73b 100644 --- a/net/9p/conv.c +++ b/net/9p/conv.c @@ -451,8 +451,10 @@ p9_put_data(struct cbuf *bufp, const char *data, int count, unsigned char **pdata) { *pdata = buf_alloc(bufp, count); + if (*pdata == NULL) + return -ENOMEM; memmove(*pdata, data, count); - return count; + return 0; } static int @@ -460,6 +462,8 @@ p9_put_user_data(struct cbuf *bufp, const char __user *data, int count, unsigned char **pdata) { *pdata = buf_alloc(bufp, count); + if (*pdata == NULL) + return -ENOMEM; return copy_from_user(*pdata, data, count); } -- cgit v1.2.3 From 8ca31ce52a5cfd03b960fd81a49197ae85d25347 Mon Sep 17 00:00:00 2001 From: Yasuyuki Kozakai Date: Wed, 24 Sep 2008 15:53:39 -0700 Subject: netfilter: ip6t_{hbh,dst}: Rejects not-strict mode on rule insertion The current code ignores rules for internal options in HBH/DST options header in packet processing if 'Not strict' mode is specified (which is not implemented). Clearly it is not expected by user. Kernel should reject HBH/DST rule insertion with 'Not strict' mode in the first place. Signed-off-by: Yasuyuki Kozakai Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- net/ipv6/netfilter/ip6t_hbh.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/ipv6/netfilter/ip6t_hbh.c b/net/ipv6/netfilter/ip6t_hbh.c index 62e39ace058..26654b26d7f 100644 --- a/net/ipv6/netfilter/ip6t_hbh.c +++ b/net/ipv6/netfilter/ip6t_hbh.c @@ -97,8 +97,6 @@ hbh_mt6(const struct sk_buff *skb, const struct net_device *in, hdrlen -= 2; if (!(optinfo->flags & IP6T_OPTS_OPTS)) { return ret; - } else if (optinfo->flags & IP6T_OPTS_NSTRICT) { - pr_debug("Not strict - not implemented"); } else { pr_debug("Strict "); pr_debug("#%d ", optinfo->optsnr); @@ -177,6 +175,12 @@ hbh_mt6_check(const char *tablename, const void *entry, pr_debug("ip6t_opts: unknown flags %X\n", optsinfo->invflags); return false; } + + if (optsinfo->flags & IP6T_OPTS_NSTRICT) { + pr_debug("ip6t_opts: Not strict - not implemented"); + return false; + } + return true; } -- cgit v1.2.3 From d01dbeb6af7a0848063033f73c3d146fec7451f3 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Tue, 30 Sep 2008 02:03:19 -0700 Subject: ipsec: Fix pskb_expand_head corruption in xfrm_state_check_space We're never supposed to shrink the headroom or tailroom. In fact, shrinking the headroom is a fatal action. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- net/xfrm/xfrm_output.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/xfrm/xfrm_output.c b/net/xfrm/xfrm_output.c index ac25b4c0e98..dc50f1e71f7 100644 --- a/net/xfrm/xfrm_output.c +++ b/net/xfrm/xfrm_output.c @@ -27,10 +27,14 @@ static int xfrm_state_check_space(struct xfrm_state *x, struct sk_buff *skb) - skb_headroom(skb); int ntail = dst->dev->needed_tailroom - skb_tailroom(skb); - if (nhead > 0 || ntail > 0) - return pskb_expand_head(skb, nhead, ntail, GFP_ATOMIC); - - return 0; + if (nhead <= 0) { + if (ntail <= 0) + return 0; + nhead = 0; + } else if (ntail < 0) + ntail = 0; + + return pskb_expand_head(skb, nhead, ntail, GFP_ATOMIC); } static int xfrm_output_one(struct sk_buff *skb, int err) -- cgit v1.2.3 From 8b122efd13a227d35d5ca242561770db1b5e3658 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Tue, 30 Sep 2008 03:03:35 -0700 Subject: iucv: Fix mismerge again. fb65a7c091529bfffb1262515252c0d0f6241c5c ("iucv: Fix bad merging.") fixed a merge error, but in a wrong way. We now end up with the bug below. This patch corrects the mismerge like it was intended. BUG: scheduling while atomic: swapper/1/0x00000000 Modules linked in: CPU: 1 Not tainted 2.6.27-rc7-00094-gc0f4d6d #9 Process swapper (pid: 1, task: 000000003fe7d988, ksp: 000000003fe838c0) 0000000000000000 000000003fe839b8 0000000000000002 0000000000000000 000000003fe83a58 000000003fe839d0 000000003fe839d0 0000000000390de6 000000000058acd8 00000000000000d0 000000003fe7dcd8 0000000000000000 000000000000000c 000000000000000d 0000000000000000 000000003fe83a28 000000000039c5b8 0000000000015e5e 000000003fe839b8 000000003fe83a00 Call Trace: ([<0000000000015d6a>] show_trace+0xe6/0x134) [<0000000000039656>] __schedule_bug+0xa2/0xa8 [<0000000000391744>] schedule+0x49c/0x910 [<0000000000391f64>] schedule_timeout+0xc4/0x114 [<00000000003910d4>] wait_for_common+0xe8/0x1b4 [<00000000000549ae>] call_usermodehelper_exec+0xa6/0xec [<00000000001af7b8>] kobject_uevent_env+0x418/0x438 [<00000000001d08fc>] bus_add_driver+0x1e4/0x298 [<00000000001d1ee4>] driver_register+0x90/0x18c [<0000000000566848>] netiucv_init+0x168/0x2c8 [<00000000000120be>] do_one_initcall+0x3e/0x17c [<000000000054a31a>] kernel_init+0x1ce/0x248 [<000000000001a97a>] kernel_thread_starter+0x6/0xc [<000000000001a974>] kernel_thread_starter+0x0/0xc iucv: NETIUCV driver initialized initcall netiucv_init+0x0/0x2c8 returned with preemption imbalance Signed-off-by: Heiko Carstens Signed-off-by: David S. Miller --- net/iucv/iucv.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/iucv/iucv.c b/net/iucv/iucv.c index 705959b31e2..d7b54b5bfa6 100644 --- a/net/iucv/iucv.c +++ b/net/iucv/iucv.c @@ -524,7 +524,6 @@ static int iucv_enable(void) get_online_cpus(); for_each_online_cpu(cpu) smp_call_function_single(cpu, iucv_declare_cpu, NULL, 1); - preempt_enable(); if (cpus_empty(iucv_buffer_cpumask)) /* No cpu could declare an iucv buffer. */ goto out_path; @@ -547,7 +546,9 @@ out: */ static void iucv_disable(void) { + get_online_cpus(); on_each_cpu(iucv_retrieve_cpu, NULL, 1); + put_online_cpus(); kfree(iucv_path_table); } -- cgit v1.2.3 From ba0166708ef4da7eeb61dd92bbba4d5a749d6561 Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Tue, 30 Sep 2008 05:32:24 -0700 Subject: sctp: Fix kernel panic while process protocol violation parameter Since call to function sctp_sf_abort_violation() need paramter 'arg' with 'struct sctp_chunk' type, it will read the chunk type and chunk length from the chunk_hdr member of chunk. But call to sctp_sf_violation_paramlen() always with 'struct sctp_paramhdr' type's parameter, it will be passed to sctp_sf_abort_violation(). This may cause kernel panic. sctp_sf_violation_paramlen() |-- sctp_sf_abort_violation() |-- sctp_make_abort_violation() This patch fixed this problem. This patch also fix two place which called sctp_sf_violation_paramlen() with wrong paramter type. Signed-off-by: Wei Yongjun Signed-off-by: Vlad Yasevich Signed-off-by: David S. Miller --- net/sctp/sm_make_chunk.c | 37 ++++++++++++++++++++++++------------- net/sctp/sm_statefuns.c | 48 +++++++++++++++++++++++++++++++++++++----------- 2 files changed, 61 insertions(+), 24 deletions(-) (limited to 'net') diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c index b599cbba4fb..d68869f966c 100644 --- a/net/sctp/sm_make_chunk.c +++ b/net/sctp/sm_make_chunk.c @@ -1012,6 +1012,29 @@ end: return retval; } +struct sctp_chunk *sctp_make_violation_paramlen( + const struct sctp_association *asoc, + const struct sctp_chunk *chunk, + struct sctp_paramhdr *param) +{ + struct sctp_chunk *retval; + static const char error[] = "The following parameter had invalid length:"; + size_t payload_len = sizeof(error) + sizeof(sctp_errhdr_t) + + sizeof(sctp_paramhdr_t); + + retval = sctp_make_abort(asoc, chunk, payload_len); + if (!retval) + goto nodata; + + sctp_init_cause(retval, SCTP_ERROR_PROTO_VIOLATION, + sizeof(error) + sizeof(sctp_paramhdr_t)); + sctp_addto_chunk(retval, sizeof(error), error); + sctp_addto_param(retval, sizeof(sctp_paramhdr_t), param); + +nodata: + return retval; +} + /* Make a HEARTBEAT chunk. */ struct sctp_chunk *sctp_make_heartbeat(const struct sctp_association *asoc, const struct sctp_transport *transport, @@ -1782,11 +1805,6 @@ static int sctp_process_inv_paramlength(const struct sctp_association *asoc, const struct sctp_chunk *chunk, struct sctp_chunk **errp) { - static const char error[] = "The following parameter had invalid length:"; - size_t payload_len = WORD_ROUND(sizeof(error)) + - sizeof(sctp_paramhdr_t); - - /* This is a fatal error. Any accumulated non-fatal errors are * not reported. */ @@ -1794,14 +1812,7 @@ static int sctp_process_inv_paramlength(const struct sctp_association *asoc, sctp_chunk_free(*errp); /* Create an error chunk and fill it in with our payload. */ - *errp = sctp_make_op_error_space(asoc, chunk, payload_len); - - if (*errp) { - sctp_init_cause(*errp, SCTP_ERROR_PROTO_VIOLATION, - sizeof(error) + sizeof(sctp_paramhdr_t)); - sctp_addto_chunk(*errp, sizeof(error), error); - sctp_addto_param(*errp, sizeof(sctp_paramhdr_t), param); - } + *errp = sctp_make_violation_paramlen(asoc, chunk, param); return 0; } diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c index 8848d329aa2..7c622af2ce5 100644 --- a/net/sctp/sm_statefuns.c +++ b/net/sctp/sm_statefuns.c @@ -119,7 +119,7 @@ static sctp_disposition_t sctp_sf_violation_paramlen( const struct sctp_endpoint *ep, const struct sctp_association *asoc, const sctp_subtype_t type, - void *arg, + void *arg, void *ext, sctp_cmd_seq_t *commands); static sctp_disposition_t sctp_sf_violation_ctsn( @@ -3425,7 +3425,7 @@ sctp_disposition_t sctp_sf_do_asconf(const struct sctp_endpoint *ep, addr_param = (union sctp_addr_param *)hdr->params; length = ntohs(addr_param->p.length); if (length < sizeof(sctp_paramhdr_t)) - return sctp_sf_violation_paramlen(ep, asoc, type, + return sctp_sf_violation_paramlen(ep, asoc, type, arg, (void *)addr_param, commands); /* Verify the ASCONF chunk before processing it. */ @@ -3433,8 +3433,8 @@ sctp_disposition_t sctp_sf_do_asconf(const struct sctp_endpoint *ep, (sctp_paramhdr_t *)((void *)addr_param + length), (void *)chunk->chunk_end, &err_param)) - return sctp_sf_violation_paramlen(ep, asoc, type, - (void *)&err_param, commands); + return sctp_sf_violation_paramlen(ep, asoc, type, arg, + (void *)err_param, commands); /* ADDIP 5.2 E1) Compare the value of the serial number to the value * the endpoint stored in a new association variable @@ -3542,8 +3542,8 @@ sctp_disposition_t sctp_sf_do_asconf_ack(const struct sctp_endpoint *ep, (sctp_paramhdr_t *)addip_hdr->params, (void *)asconf_ack->chunk_end, &err_param)) - return sctp_sf_violation_paramlen(ep, asoc, type, - (void *)&err_param, commands); + return sctp_sf_violation_paramlen(ep, asoc, type, arg, + (void *)err_param, commands); if (last_asconf) { addip_hdr = (sctp_addiphdr_t *)last_asconf->subh.addip_hdr; @@ -4240,12 +4240,38 @@ static sctp_disposition_t sctp_sf_violation_paramlen( const struct sctp_endpoint *ep, const struct sctp_association *asoc, const sctp_subtype_t type, - void *arg, - sctp_cmd_seq_t *commands) { - static const char err_str[] = "The following parameter had invalid length:"; + void *arg, void *ext, + sctp_cmd_seq_t *commands) +{ + struct sctp_chunk *chunk = arg; + struct sctp_paramhdr *param = ext; + struct sctp_chunk *abort = NULL; - return sctp_sf_abort_violation(ep, asoc, arg, commands, err_str, - sizeof(err_str)); + if (sctp_auth_recv_cid(SCTP_CID_ABORT, asoc)) + goto discard; + + /* Make the abort chunk. */ + abort = sctp_make_violation_paramlen(asoc, chunk, param); + if (!abort) + goto nomem; + + sctp_add_cmd_sf(commands, SCTP_CMD_REPLY, SCTP_CHUNK(abort)); + SCTP_INC_STATS(SCTP_MIB_OUTCTRLCHUNKS); + + sctp_add_cmd_sf(commands, SCTP_CMD_SET_SK_ERR, + SCTP_ERROR(ECONNABORTED)); + sctp_add_cmd_sf(commands, SCTP_CMD_ASSOC_FAILED, + SCTP_PERR(SCTP_ERROR_PROTO_VIOLATION)); + SCTP_DEC_STATS(SCTP_MIB_CURRESTAB); + +discard: + sctp_sf_pdiscard(ep, asoc, SCTP_ST_CHUNK(0), arg, commands); + + SCTP_INC_STATS(SCTP_MIB_ABORTEDS); + + return SCTP_DISPOSITION_ABORT; +nomem: + return SCTP_DISPOSITION_NOMEM; } /* Handle a protocol violation when the peer trying to advance the -- cgit v1.2.3 From 4dd7972d1204c3851a4092cecd2207e05eb29b09 Mon Sep 17 00:00:00 2001 From: Vitaliy Gusev Date: Wed, 1 Oct 2008 01:51:39 -0700 Subject: tcp: Fix NULL dereference in tcp_4_send_ack() Fix NULL dereference in tcp_4_send_ack(). As skb->dev is reset to NULL in tcp_v4_rcv() thus OOPS occurs: BUG: unable to handle kernel NULL pointer dereference at 00000000000004d0 IP: [] tcp_v4_send_ack+0x203/0x250 Stack: ffff810005dbb000 ffff810015c8acc0 e77b2c6e5f861600 a01610802e90cb6d 0a08010100000000 88afffff88afffff 0000000080762be8 0000000115c872e8 0004122000000000 0000000000000001 ffffffff80762b88 0000000000000020 Call Trace: [] tcp_v4_reqsk_send_ack+0x20/0x22 [] tcp_check_req+0x108/0x14c [] ? rt_intern_hash+0x322/0x33c [] tcp_v4_do_rcv+0x399/0x4ec [] ? skb_checksum+0x4f/0x272 [] ? __inet_lookup_listener+0x14a/0x15c [] tcp_v4_rcv+0x6a1/0x701 [] ip_local_deliver_finish+0x157/0x24a [] ip_local_deliver+0x72/0x7c [] ip_rcv_finish+0x38d/0x3b2 [] ? scsi_io_completion+0x19d/0x39e [] ip_rcv+0x2a2/0x2e5 [] netif_receive_skb+0x293/0x303 [] process_backlog+0x80/0xd0 [] ? __rcu_process_callbacks+0x125/0x1b4 [] net_rx_action+0xb9/0x17f [] __do_softirq+0xa3/0x164 [] call_softirq+0x1c/0x28 [] do_softirq+0x34/0x72 [] local_bh_enable_ip+0x3f/0x50 [] _spin_unlock_bh+0x12/0x14 [] release_sock+0xb8/0xc1 [] inet_stream_connect+0x146/0x25c [] ? autoremove_wake_function+0x0/0x38 [] sys_connect+0x68/0x8e [] ? fd_install+0x5f/0x68 [] ? sock_map_fd+0x55/0x62 [] system_call_after_swapgs+0x7b/0x80 Code: 41 10 11 d0 83 d0 00 4d 85 ed 89 45 c0 c7 45 c4 08 00 00 00 74 07 41 8b 45 04 89 45 c8 48 8b 43 20 8b 4d b8 48 8d 55 b0 48 89 de <48> 8b 80 d0 04 00 00 48 8b b8 60 01 00 00 e8 20 ae fe ff 65 48 RIP [] tcp_v4_send_ack+0x203/0x250 RSP CR2: 00000000000004d0 Signed-off-by: Vitaliy Gusev Signed-off-by: David S. Miller --- net/ipv4/tcp_ipv4.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 1b4fee20fc9..011478e46c4 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -618,7 +618,7 @@ static void tcp_v4_send_ack(struct sk_buff *skb, u32 seq, u32 ack, ]; } rep; struct ip_reply_arg arg; - struct net *net = dev_net(skb->dev); + struct net *net = dev_net(skb->dst->dev); memset(&rep.th, 0, sizeof(struct tcphdr)); memset(&arg, 0, sizeof(arg)); -- cgit v1.2.3 From 2a5b82751f73a0bf6f604ce56d34adba6da1b246 Mon Sep 17 00:00:00 2001 From: "Denis V. Lunev" Date: Wed, 1 Oct 2008 02:13:16 -0700 Subject: ipv6: NULL pointer dereferrence in tcp_v6_send_ack The following actions are possible: tcp_v6_rcv skb->dev = NULL; tcp_v6_do_rcv tcp_v6_hnd_req tcp_check_req req->rsk_ops->send_ack == tcp_v6_send_ack So, skb->dev can be NULL in tcp_v6_send_ack. We must obtain namespace from dst entry. Thanks to Vitaliy Gusev for initial problem finding in IPv4 code. Signed-off-by: Denis V. Lunev Signed-off-by: David S. Miller --- net/ipv6/tcp_ipv6.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index b585c850a89..10e22fd4822 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -1050,7 +1050,7 @@ static void tcp_v6_send_ack(struct sk_buff *skb, u32 seq, u32 ack, u32 win, u32 struct tcphdr *th = tcp_hdr(skb), *t1; struct sk_buff *buff; struct flowi fl; - struct net *net = dev_net(skb->dev); + struct net *net = dev_net(skb->dst->dev); struct sock *ctl_sk = net->ipv6.tcp_sk; unsigned int tot_len = sizeof(struct tcphdr); __be32 *topt; -- cgit v1.2.3 From 5dc121e9a7a8a3721cefeb07f3559f50fbedc67e Mon Sep 17 00:00:00 2001 From: Arnaud Ebalard Date: Wed, 1 Oct 2008 02:37:56 -0700 Subject: XFRM,IPv6: initialize ip6_dst_blackhole_ops.kmem_cachep ip6_dst_blackhole_ops.kmem_cachep is not expected to be NULL (i.e. to be initialized) when dst_alloc() is called from ip6_dst_blackhole(). Otherwise, it results in the following (xfrm_larval_drop is now set to 1 by default): [ 78.697642] Unable to handle kernel paging request for data at address 0x0000004c [ 78.703449] Faulting instruction address: 0xc0097f54 [ 78.786896] Oops: Kernel access of bad area, sig: 11 [#1] [ 78.792791] PowerMac [ 78.798383] Modules linked in: btusb usbhid bluetooth b43 mac80211 cfg80211 ehci_hcd ohci_hcd sungem sungem_phy usbcore ssb [ 78.804263] NIP: c0097f54 LR: c0334a28 CTR: c002d430 [ 78.809997] REGS: eef19ad0 TRAP: 0300 Not tainted (2.6.27-rc5) [ 78.815743] MSR: 00001032 CR: 22242482 XER: 20000000 [ 78.821550] DAR: 0000004c, DSISR: 40000000 [ 78.827278] TASK = eef0df40[3035] 'mip6d' THREAD: eef18000 [ 78.827408] GPR00: 00001032 eef19b80 eef0df40 00000000 00008020 eef19c30 00000001 00000000 [ 78.833249] GPR08: eee5101c c05a5c10 ef9ad500 00000000 24242422 1005787c 00000000 1004f960 [ 78.839151] GPR16: 00000000 10024e90 10050040 48030018 0fe44150 00000000 00000000 eef19c30 [ 78.845046] GPR24: eef19e44 00000000 eef19bf8 efb37c14 eef19bf8 00008020 00009032 c0596064 [ 78.856671] NIP [c0097f54] kmem_cache_alloc+0x20/0x94 [ 78.862581] LR [c0334a28] dst_alloc+0x40/0xc4 [ 78.868451] Call Trace: [ 78.874252] [eef19b80] [c03c1810] ip6_dst_lookup_tail+0x1c8/0x1dc (unreliable) [ 78.880222] [eef19ba0] [c0334a28] dst_alloc+0x40/0xc4 [ 78.886164] [eef19bb0] [c03cd698] ip6_dst_blackhole+0x28/0x1cc [ 78.892090] [eef19be0] [c03d9be8] rawv6_sendmsg+0x75c/0xc88 [ 78.897999] [eef19cb0] [c038bca4] inet_sendmsg+0x4c/0x78 [ 78.903907] [eef19cd0] [c03207c8] sock_sendmsg+0xac/0xe4 [ 78.909734] [eef19db0] [c03209e4] sys_sendmsg+0x1e4/0x2a0 [ 78.915540] [eef19f00] [c03220a8] sys_socketcall+0xfc/0x210 [ 78.921406] [eef19f40] [c0014b3c] ret_from_syscall+0x0/0x38 [ 78.927295] --- Exception: c01 at 0xfe2d730 [ 78.927297] LR = 0xfe2d71c [ 78.939019] Instruction dump: [ 78.944835] 91640018 9144001c 900a0000 4bffff44 9421ffe0 7c0802a6 bf810010 7c9d2378 [ 78.950694] 90010024 7fc000a6 57c0045e 7c000124 <83e3004c> 8383005c 2f9f0000 419e0050 [ 78.956464] ---[ end trace 05fa1ed7972487a1 ]--- As commented by Benjamin Thery, the bug was introduced by f2fc6a54585a1be6669613a31fbaba2ecbadcd36, while adding network namespaces support to ipv6 routes. Signed-off-by: Arnaud Ebalard Acked-by: Benjamin Thery Signed-off-by: David S. Miller --- net/ipv6/route.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'net') diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 9af6115f0f5..63442a1e741 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -2688,6 +2688,8 @@ int __init ip6_route_init(void) if (ret) goto out_kmem_cache; + ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep; + /* Registering of the loopback is done before this portion of code, * the loopback reference in rt6_info will not be taken, do it * manually for init_net */ -- cgit v1.2.3 From 0523820482dcb42784572ffd2296c2f08c275a2b Mon Sep 17 00:00:00 2001 From: Timo Teras Date: Wed, 1 Oct 2008 05:17:54 -0700 Subject: af_key: Free dumping state on socket close Fix a xfrm_{state,policy}_walk leak if pfkey socket is closed while dumping is on-going. Signed-off-by: Timo Teras Signed-off-by: David S. Miller --- net/key/af_key.c | 30 +++++++++++++++++++----------- 1 file changed, 19 insertions(+), 11 deletions(-) (limited to 'net') diff --git a/net/key/af_key.c b/net/key/af_key.c index d628df97e02..b7f5a1c353e 100644 --- a/net/key/af_key.c +++ b/net/key/af_key.c @@ -73,22 +73,18 @@ static int pfkey_can_dump(struct sock *sk) return 0; } -static int pfkey_do_dump(struct pfkey_sock *pfk) +static void pfkey_terminate_dump(struct pfkey_sock *pfk) { - int rc; - - rc = pfk->dump.dump(pfk); - if (rc == -ENOBUFS) - return 0; - - pfk->dump.done(pfk); - pfk->dump.dump = NULL; - pfk->dump.done = NULL; - return rc; + if (pfk->dump.dump) { + pfk->dump.done(pfk); + pfk->dump.dump = NULL; + pfk->dump.done = NULL; + } } static void pfkey_sock_destruct(struct sock *sk) { + pfkey_terminate_dump(pfkey_sk(sk)); skb_queue_purge(&sk->sk_receive_queue); if (!sock_flag(sk, SOCK_DEAD)) { @@ -310,6 +306,18 @@ static int pfkey_broadcast(struct sk_buff *skb, gfp_t allocation, return err; } +static int pfkey_do_dump(struct pfkey_sock *pfk) +{ + int rc; + + rc = pfk->dump.dump(pfk); + if (rc == -ENOBUFS) + return 0; + + pfkey_terminate_dump(pfk); + return rc; +} + static inline void pfkey_hdr_dup(struct sadb_msg *new, struct sadb_msg *orig) { *new = *orig; -- cgit v1.2.3