From fe315e76fc3a3f9f7e1581dc22fec7e7719f0896 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 11 Mar 2009 14:10:21 -0400 Subject: SUNRPC: Avoid spurious wake-up during UDP connect processing To clear out old state, the UDP connect workers unconditionally invoke xs_close() before proceeding with a new connect. Nowadays this causes a spurious wake-up of the task waiting for the connect to complete. This is a little racey, but usually harmless. The waiting task immediately retries the connect via a call_bind/call_connect sequence, which usually finds the transport already in the connected state because the connect worker has finished in the background. To avoid a spurious wake-up, factor the xs_close() logic that resets the underlying socket into a helper, and have the UDP connect workers call that helper instead of xs_close(). Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- net/sunrpc/xprtsock.c | 44 ++++++++++++++++++++++++++------------------ 1 file changed, 26 insertions(+), 18 deletions(-) (limited to 'net/sunrpc/xprtsock.c') diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index 29c71e645b2..1127eb93413 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -767,23 +767,13 @@ static void xs_restore_old_callbacks(struct sock_xprt *transport, struct sock *s sk->sk_error_report = transport->old_error_report; } -/** - * xs_close - close a socket - * @xprt: transport - * - * This is used when all requests are complete; ie, no DRC state remains - * on the server we want to save. - */ -static void xs_close(struct rpc_xprt *xprt) +static void xs_reset_transport(struct sock_xprt *transport) { - struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt); struct socket *sock = transport->sock; struct sock *sk = transport->inet; - if (!sk) - goto clear_close_wait; - - dprintk("RPC: xs_close xprt %p\n", xprt); + if (sk == NULL) + return; write_lock_bh(&sk->sk_callback_lock); transport->inet = NULL; @@ -797,7 +787,23 @@ static void xs_close(struct rpc_xprt *xprt) sk->sk_no_check = 0; sock_release(sock); -clear_close_wait: +} + +/** + * xs_close - close a socket + * @xprt: transport + * + * This is used when all requests are complete; ie, no DRC state remains + * on the server we want to save. + */ +static void xs_close(struct rpc_xprt *xprt) +{ + struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt); + + dprintk("RPC: xs_close xprt %p\n", xprt); + + xs_reset_transport(transport); + smp_mb__before_clear_bit(); clear_bit(XPRT_CLOSE_WAIT, &xprt->state); clear_bit(XPRT_CLOSING, &xprt->state); @@ -1537,9 +1543,10 @@ static void xs_udp_connect_worker4(struct work_struct *work) goto out; /* Start by resetting any existing state */ - xs_close(xprt); + xs_reset_transport(transport); - if ((err = sock_create_kern(PF_INET, SOCK_DGRAM, IPPROTO_UDP, &sock)) < 0) { + err = sock_create_kern(PF_INET, SOCK_DGRAM, IPPROTO_UDP, &sock); + if (err < 0) { dprintk("RPC: can't create UDP transport socket (%d).\n", -err); goto out; } @@ -1578,9 +1585,10 @@ static void xs_udp_connect_worker6(struct work_struct *work) goto out; /* Start by resetting any existing state */ - xs_close(xprt); + xs_reset_transport(transport); - if ((err = sock_create_kern(PF_INET6, SOCK_DGRAM, IPPROTO_UDP, &sock)) < 0) { + err = sock_create_kern(PF_INET6, SOCK_DGRAM, IPPROTO_UDP, &sock); + if (err < 0) { dprintk("RPC: can't create UDP transport socket (%d).\n", -err); goto out; } -- cgit v1.2.3 From 670f94573104b4a25525d3fcdcd6496c678df172 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 11 Mar 2009 14:37:58 -0400 Subject: SUNRPC: Ensure we set XPRT_CLOSING only after we've sent a tcp FIN... ...so that we can distinguish between when we need to shutdown and when we don't. Also remove the call to xs_tcp_shutdown() from xs_tcp_connect(), since xprt_connect() makes the same test. Signed-off-by: Trond Myklebust --- net/sunrpc/xprtsock.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) (limited to 'net/sunrpc/xprtsock.c') diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index 1127eb93413..cb4bd93b921 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -1180,7 +1180,6 @@ static void xs_tcp_state_change(struct sock *sk) break; case TCP_CLOSE_WAIT: /* The server initiated a shutdown of the socket */ - set_bit(XPRT_CLOSING, &xprt->state); xprt_force_disconnect(xprt); case TCP_SYN_SENT: xprt->connect_cookie++; @@ -1193,6 +1192,7 @@ static void xs_tcp_state_change(struct sock *sk) xprt->reestablish_timeout = XS_TCP_INIT_REEST_TO; break; case TCP_LAST_ACK: + set_bit(XPRT_CLOSING, &xprt->state); smp_mb__before_clear_bit(); clear_bit(XPRT_CONNECTED, &xprt->state); smp_mb__after_clear_bit(); @@ -1836,9 +1836,6 @@ static void xs_tcp_connect(struct rpc_task *task) { struct rpc_xprt *xprt = task->tk_xprt; - /* Initiate graceful shutdown of the socket if not already done */ - if (test_bit(XPRT_CONNECTED, &xprt->state)) - xs_tcp_shutdown(xprt); /* Exit if we need to wait for socket shutdown to complete */ if (test_bit(XPRT_CLOSING, &xprt->state)) return; -- cgit v1.2.3 From 40d2549db5f515e415894def98b49db7d4c56714 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 11 Mar 2009 14:37:58 -0400 Subject: SUNRPC: Don't disconnect if a connection is still in progress. Signed-off-by: Trond Myklebust --- net/sunrpc/xprtsock.c | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) (limited to 'net/sunrpc/xprtsock.c') diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index cb4bd93b921..9d1898f6ee8 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -1613,10 +1613,9 @@ out: * We need to preserve the port number so the reply cache on the server can * find our cached RPC replies when we get around to reconnecting. */ -static void xs_tcp_reuse_connection(struct rpc_xprt *xprt) +static void xs_abort_connection(struct rpc_xprt *xprt, struct sock_xprt *transport) { int result; - struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt); struct sockaddr any; dprintk("RPC: disconnecting xprt %p to reuse port\n", xprt); @@ -1633,6 +1632,17 @@ static void xs_tcp_reuse_connection(struct rpc_xprt *xprt) result); } +static void xs_tcp_reuse_connection(struct rpc_xprt *xprt, struct sock_xprt *transport) +{ + unsigned int state = transport->inet->sk_state; + + if (state == TCP_CLOSE && transport->sock->state == SS_UNCONNECTED) + return; + if ((1 << state) & (TCPF_ESTABLISHED|TCPF_SYN_SENT)) + return; + xs_abort_connection(xprt, transport); +} + static int xs_tcp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock) { struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt); @@ -1706,7 +1716,7 @@ static void xs_tcp_connect_worker4(struct work_struct *work) } } else /* "close" the socket, preserving the local port */ - xs_tcp_reuse_connection(xprt); + xs_tcp_reuse_connection(xprt, transport); dprintk("RPC: worker connecting xprt %p to address: %s\n", xprt, xprt->address_strings[RPC_DISPLAY_ALL]); @@ -1766,7 +1776,7 @@ static void xs_tcp_connect_worker6(struct work_struct *work) } } else /* "close" the socket, preserving the local port */ - xs_tcp_reuse_connection(xprt); + xs_tcp_reuse_connection(xprt, transport); dprintk("RPC: worker connecting xprt %p to address: %s\n", xprt, xprt->address_strings[RPC_DISPLAY_ALL]); -- cgit v1.2.3 From c8485e4d634f6df155040293928707f127f0d06d Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 11 Mar 2009 14:37:59 -0400 Subject: SUNRPC: Handle ECONNREFUSED correctly in xprt_transmit() If we get an ECONNREFUSED error, we currently go to sleep on the 'xprt->sending' wait queue. The problem is that no timeout is set there, and there is nothing else that will wake the task up later. We should deal with ECONNREFUSED in call_status, given that is where we also deal with -EHOSTDOWN, and friends. Signed-off-by: Trond Myklebust --- net/sunrpc/xprtsock.c | 26 ++++++++++++-------------- 1 file changed, 12 insertions(+), 14 deletions(-) (limited to 'net/sunrpc/xprtsock.c') diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index 9d1898f6ee8..5e8198bede8 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -594,6 +594,8 @@ static int xs_udp_send_request(struct rpc_task *task) /* Still some bytes left; set up for a retry later. */ status = -EAGAIN; } + if (!transport->sock) + goto out; switch (status) { case -ENOTSOCK: @@ -603,19 +605,17 @@ static int xs_udp_send_request(struct rpc_task *task) case -EAGAIN: xs_nospace(task); break; + default: + dprintk("RPC: sendmsg returned unrecognized error %d\n", + -status); case -ENETUNREACH: case -EPIPE: case -ECONNREFUSED: /* When the server has died, an ICMP port unreachable message * prompts ECONNREFUSED. */ clear_bit(SOCK_ASYNC_NOSPACE, &transport->sock->flags); - break; - default: - clear_bit(SOCK_ASYNC_NOSPACE, &transport->sock->flags); - dprintk("RPC: sendmsg returned unrecognized error %d\n", - -status); } - +out: return status; } @@ -697,6 +697,8 @@ static int xs_tcp_send_request(struct rpc_task *task) status = -EAGAIN; break; } + if (!transport->sock) + goto out; switch (status) { case -ENOTSOCK: @@ -706,21 +708,17 @@ static int xs_tcp_send_request(struct rpc_task *task) case -EAGAIN: xs_nospace(task); break; + default: + dprintk("RPC: sendmsg returned unrecognized error %d\n", + -status); case -ECONNRESET: xs_tcp_shutdown(xprt); case -ECONNREFUSED: case -ENOTCONN: case -EPIPE: - status = -ENOTCONN; - clear_bit(SOCK_ASYNC_NOSPACE, &transport->sock->flags); - break; - default: - dprintk("RPC: sendmsg returned unrecognized error %d\n", - -status); clear_bit(SOCK_ASYNC_NOSPACE, &transport->sock->flags); - xs_tcp_shutdown(xprt); } - +out: return status; } -- cgit v1.2.3 From 482f32e65d31cbf88d08306fa5d397cc945c3c26 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 11 Mar 2009 14:38:00 -0400 Subject: SUNRPC: Handle socket errors correctly Ensure that we pick up and handle socket errors as they occur. Signed-off-by: Trond Myklebust --- net/sunrpc/xprtsock.c | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) (limited to 'net/sunrpc/xprtsock.c') diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index 5e8198bede8..879af6f27b4 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -1208,23 +1208,20 @@ static void xs_tcp_state_change(struct sock *sk) } /** - * xs_tcp_error_report - callback mainly for catching RST events + * xs_error_report - callback mainly for catching socket errors * @sk: socket */ -static void xs_tcp_error_report(struct sock *sk) +static void xs_error_report(struct sock *sk) { struct rpc_xprt *xprt; read_lock(&sk->sk_callback_lock); - if (sk->sk_err != ECONNRESET || sk->sk_state != TCP_ESTABLISHED) - goto out; if (!(xprt = xprt_from_sock(sk))) goto out; dprintk("RPC: %s client %p...\n" "RPC: error %d\n", __func__, xprt, sk->sk_err); - - xprt_force_disconnect(xprt); + xprt_wake_pending_tasks(xprt, -EAGAIN); out: read_unlock(&sk->sk_callback_lock); } @@ -1509,6 +1506,7 @@ static void xs_udp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock) sk->sk_user_data = xprt; sk->sk_data_ready = xs_udp_data_ready; sk->sk_write_space = xs_udp_write_space; + sk->sk_error_report = xs_error_report; sk->sk_no_check = UDP_CSUM_NORCV; sk->sk_allocation = GFP_ATOMIC; @@ -1656,7 +1654,7 @@ static int xs_tcp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock) sk->sk_data_ready = xs_tcp_data_ready; sk->sk_state_change = xs_tcp_state_change; sk->sk_write_space = xs_tcp_write_space; - sk->sk_error_report = xs_tcp_error_report; + sk->sk_error_report = xs_error_report; sk->sk_allocation = GFP_ATOMIC; /* socket options */ -- cgit v1.2.3 From 2a4919919a97911b0aa4b9f5ac1eab90ba87652b Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 11 Mar 2009 14:38:00 -0400 Subject: SUNRPC: Return EAGAIN instead of ENOTCONN when waking up xprt->pending While we should definitely return socket errors to the task that is currently trying to send data, there is no need to propagate the same error to all the other tasks on xprt->pending. Doing so actually slows down recovery, since it causes more than one tasks to attempt socket recovery. Signed-off-by: Trond Myklebust --- net/sunrpc/xprtsock.c | 58 +++++++++++++++++++++++++++------------------------ 1 file changed, 31 insertions(+), 27 deletions(-) (limited to 'net/sunrpc/xprtsock.c') diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index 879af6f27b4..8e58b0b5460 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -1162,7 +1162,7 @@ static void xs_tcp_state_change(struct sock *sk) transport->tcp_flags = TCP_RCV_COPY_FRAGHDR | TCP_RCV_COPY_XID; - xprt_wake_pending_tasks(xprt, 0); + xprt_wake_pending_tasks(xprt, -EAGAIN); } spin_unlock_bh(&xprt->transport_lock); break; @@ -1721,20 +1721,22 @@ static void xs_tcp_connect_worker4(struct work_struct *work) dprintk("RPC: %p connect status %d connected %d sock state %d\n", xprt, -status, xprt_connected(xprt), sock->sk->sk_state); - if (status < 0) { - switch (status) { - case -EINPROGRESS: - case -EALREADY: - goto out_clear; - case -ECONNREFUSED: - case -ECONNRESET: - /* retry with existing socket, after a delay */ - break; - default: - /* get rid of existing socket, and retry */ - xs_tcp_shutdown(xprt); - } + switch (status) { + case 0: + case -EINPROGRESS: + case -EALREADY: + goto out_clear; + case -ECONNREFUSED: + case -ECONNRESET: + /* retry with existing socket, after a delay */ + break; + default: + /* get rid of existing socket, and retry */ + xs_tcp_shutdown(xprt); + printk("%s: connect returned unhandled error %d\n", + __func__, status); } + status = -EAGAIN; out: xprt_wake_pending_tasks(xprt, status); out_clear: @@ -1780,20 +1782,22 @@ static void xs_tcp_connect_worker6(struct work_struct *work) status = xs_tcp_finish_connecting(xprt, sock); dprintk("RPC: %p connect status %d connected %d sock state %d\n", xprt, -status, xprt_connected(xprt), sock->sk->sk_state); - if (status < 0) { - switch (status) { - case -EINPROGRESS: - case -EALREADY: - goto out_clear; - case -ECONNREFUSED: - case -ECONNRESET: - /* retry with existing socket, after a delay */ - break; - default: - /* get rid of existing socket, and retry */ - xs_tcp_shutdown(xprt); - } + switch (status) { + case 0: + case -EINPROGRESS: + case -EALREADY: + goto out_clear; + case -ECONNREFUSED: + case -ECONNRESET: + /* retry with existing socket, after a delay */ + break; + default: + /* get rid of existing socket, and retry */ + xs_tcp_shutdown(xprt); + printk("%s: connect returned unhandled error %d\n", + __func__, status); } + status = -EAGAIN; out: xprt_wake_pending_tasks(xprt, status); out_clear: -- cgit v1.2.3 From 8a2cec295f4499cc9d4452e9b02d4ed071bb42d3 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 11 Mar 2009 14:38:01 -0400 Subject: SUNRPC: Delay, then retry on connection errors. Enforce the comment in xs_tcp_connect_worker4/xs_tcp_connect_worker6 that we should delay, then retry on certain connection errors. Signed-off-by: Trond Myklebust --- net/sunrpc/xprtsock.c | 34 ++++++++++++++++------------------ 1 file changed, 16 insertions(+), 18 deletions(-) (limited to 'net/sunrpc/xprtsock.c') diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index 8e58b0b5460..9f3e615d3e0 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -1722,20 +1722,19 @@ static void xs_tcp_connect_worker4(struct work_struct *work) xprt, -status, xprt_connected(xprt), sock->sk->sk_state); switch (status) { + case -ECONNREFUSED: + case -ECONNRESET: + case -ENETUNREACH: + /* retry with existing socket, after a delay */ case 0: case -EINPROGRESS: case -EALREADY: goto out_clear; - case -ECONNREFUSED: - case -ECONNRESET: - /* retry with existing socket, after a delay */ - break; - default: - /* get rid of existing socket, and retry */ - xs_tcp_shutdown(xprt); - printk("%s: connect returned unhandled error %d\n", - __func__, status); } + /* get rid of existing socket, and retry */ + xs_tcp_shutdown(xprt); + printk("%s: connect returned unhandled error %d\n", + __func__, status); status = -EAGAIN; out: xprt_wake_pending_tasks(xprt, status); @@ -1783,20 +1782,19 @@ static void xs_tcp_connect_worker6(struct work_struct *work) dprintk("RPC: %p connect status %d connected %d sock state %d\n", xprt, -status, xprt_connected(xprt), sock->sk->sk_state); switch (status) { + case -ECONNREFUSED: + case -ECONNRESET: + case -ENETUNREACH: + /* retry with existing socket, after a delay */ case 0: case -EINPROGRESS: case -EALREADY: goto out_clear; - case -ECONNREFUSED: - case -ECONNRESET: - /* retry with existing socket, after a delay */ - break; - default: - /* get rid of existing socket, and retry */ - xs_tcp_shutdown(xprt); - printk("%s: connect returned unhandled error %d\n", - __func__, status); } + /* get rid of existing socket, and retry */ + xs_tcp_shutdown(xprt); + printk("%s: connect returned unhandled error %d\n", + __func__, status); status = -EAGAIN; out: xprt_wake_pending_tasks(xprt, status); -- cgit v1.2.3 From 5e3771ce2d6a69e10fcc870cdf226d121d868491 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 11 Mar 2009 14:38:01 -0400 Subject: SUNRPC: Ensure that xs_nospace return values are propagated If xs_nospace() finds that the socket has disconnected, it attempts to return ENOTCONN, however that value is then squashed by the callers. Signed-off-by: Trond Myklebust --- net/sunrpc/xprtsock.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) (limited to 'net/sunrpc/xprtsock.c') diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index 9f3e615d3e0..2e070679ab4 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -521,11 +521,12 @@ static void xs_nospace_callback(struct rpc_task *task) * @task: task to put to sleep * */ -static void xs_nospace(struct rpc_task *task) +static int xs_nospace(struct rpc_task *task) { struct rpc_rqst *req = task->tk_rqstp; struct rpc_xprt *xprt = req->rq_xprt; struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt); + int ret = 0; dprintk("RPC: %5u xmit incomplete (%u left of %u)\n", task->tk_pid, req->rq_slen - req->rq_bytes_sent, @@ -537,6 +538,7 @@ static void xs_nospace(struct rpc_task *task) /* Don't race with disconnect */ if (xprt_connected(xprt)) { if (test_bit(SOCK_ASYNC_NOSPACE, &transport->sock->flags)) { + ret = -EAGAIN; /* * Notify TCP that we're limited by the application * window size @@ -548,10 +550,11 @@ static void xs_nospace(struct rpc_task *task) } } else { clear_bit(SOCK_ASYNC_NOSPACE, &transport->sock->flags); - task->tk_status = -ENOTCONN; + ret = -ENOTCONN; } spin_unlock_bh(&xprt->transport_lock); + return ret; } /** @@ -603,7 +606,7 @@ static int xs_udp_send_request(struct rpc_task *task) /* Should we call xs_close() here? */ break; case -EAGAIN: - xs_nospace(task); + status = xs_nospace(task); break; default: dprintk("RPC: sendmsg returned unrecognized error %d\n", @@ -706,7 +709,7 @@ static int xs_tcp_send_request(struct rpc_task *task) /* Should we call xs_close() here? */ break; case -EAGAIN: - xs_nospace(task); + status = xs_nospace(task); break; default: dprintk("RPC: sendmsg returned unrecognized error %d\n", -- cgit v1.2.3 From 7d1e8255cf959fba7ee2317550dfde39f0b936ae Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 11 Mar 2009 14:38:03 -0400 Subject: SUNRPC: Add the equivalent of the linger and linger2 timeouts to RPC sockets This fixes a regression against FreeBSD servers as reported by Tomas Kasparek. Apparently when using RPC over a TCP socket, the FreeBSD servers don't ever react to the client closing the socket, and so commit e06799f958bf7f9f8fae15f0c6f519953fb0257c (SUNRPC: Use shutdown() instead of close() when disconnecting a TCP socket) causes the setup to hang forever whenever the client attempts to close and then reconnect. We break the deadlock by adding a 'linger2' style timeout to the socket, after which, the client will abort the connection using a TCP 'RST'. The default timeout is set to 15 seconds. A subsequent patch will put it under user control by means of a systctl. Signed-off-by: Trond Myklebust --- net/sunrpc/xprtsock.c | 98 ++++++++++++++++++++++++++++++++++++++++++--------- 1 file changed, 81 insertions(+), 17 deletions(-) (limited to 'net/sunrpc/xprtsock.c') diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index 2e070679ab4..b51f58b95c3 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -49,6 +49,8 @@ unsigned int xprt_tcp_slot_table_entries = RPC_DEF_SLOT_TABLE; unsigned int xprt_min_resvport = RPC_DEF_MIN_RESVPORT; unsigned int xprt_max_resvport = RPC_DEF_MAX_RESVPORT; +#define XS_TCP_LINGER_TO (15U * HZ) + /* * We can register our own files under /proc/sys/sunrpc by * calling register_sysctl_table() again. The files in that @@ -806,6 +808,7 @@ static void xs_close(struct rpc_xprt *xprt) xs_reset_transport(transport); smp_mb__before_clear_bit(); + clear_bit(XPRT_CONNECTION_ABORT, &xprt->state); clear_bit(XPRT_CLOSE_WAIT, &xprt->state); clear_bit(XPRT_CLOSING, &xprt->state); smp_mb__after_clear_bit(); @@ -1133,6 +1136,47 @@ out: read_unlock(&sk->sk_callback_lock); } +/* + * Do the equivalent of linger/linger2 handling for dealing with + * broken servers that don't close the socket in a timely + * fashion + */ +static void xs_tcp_schedule_linger_timeout(struct rpc_xprt *xprt, + unsigned long timeout) +{ + struct sock_xprt *transport; + + if (xprt_test_and_set_connecting(xprt)) + return; + set_bit(XPRT_CONNECTION_ABORT, &xprt->state); + transport = container_of(xprt, struct sock_xprt, xprt); + queue_delayed_work(rpciod_workqueue, &transport->connect_worker, + timeout); +} + +static void xs_tcp_cancel_linger_timeout(struct rpc_xprt *xprt) +{ + struct sock_xprt *transport; + + transport = container_of(xprt, struct sock_xprt, xprt); + + if (!test_bit(XPRT_CONNECTION_ABORT, &xprt->state) || + !cancel_delayed_work(&transport->connect_worker)) + return; + clear_bit(XPRT_CONNECTION_ABORT, &xprt->state); + xprt_clear_connecting(xprt); +} + +static void xs_sock_mark_closed(struct rpc_xprt *xprt) +{ + smp_mb__before_clear_bit(); + clear_bit(XPRT_CLOSE_WAIT, &xprt->state); + clear_bit(XPRT_CLOSING, &xprt->state); + smp_mb__after_clear_bit(); + /* Mark transport as closed and wake up all pending tasks */ + xprt_disconnect_done(xprt); +} + /** * xs_tcp_state_change - callback to handle TCP socket state changes * @sk: socket whose state has changed @@ -1178,6 +1222,7 @@ static void xs_tcp_state_change(struct sock *sk) clear_bit(XPRT_CONNECTED, &xprt->state); clear_bit(XPRT_CLOSE_WAIT, &xprt->state); smp_mb__after_clear_bit(); + xs_tcp_schedule_linger_timeout(xprt, XS_TCP_LINGER_TO); break; case TCP_CLOSE_WAIT: /* The server initiated a shutdown of the socket */ @@ -1194,17 +1239,14 @@ static void xs_tcp_state_change(struct sock *sk) break; case TCP_LAST_ACK: set_bit(XPRT_CLOSING, &xprt->state); + xs_tcp_schedule_linger_timeout(xprt, XS_TCP_LINGER_TO); smp_mb__before_clear_bit(); clear_bit(XPRT_CONNECTED, &xprt->state); smp_mb__after_clear_bit(); break; case TCP_CLOSE: - smp_mb__before_clear_bit(); - clear_bit(XPRT_CLOSE_WAIT, &xprt->state); - clear_bit(XPRT_CLOSING, &xprt->state); - smp_mb__after_clear_bit(); - /* Mark transport as closed and wake up all pending tasks */ - xprt_disconnect_done(xprt); + xs_tcp_cancel_linger_timeout(xprt); + xs_sock_mark_closed(xprt); } out: read_unlock(&sk->sk_callback_lock); @@ -1562,8 +1604,8 @@ static void xs_udp_connect_worker4(struct work_struct *work) xs_udp_finish_connecting(xprt, sock); status = 0; out: - xprt_wake_pending_tasks(xprt, status); xprt_clear_connecting(xprt); + xprt_wake_pending_tasks(xprt, status); } /** @@ -1604,8 +1646,8 @@ static void xs_udp_connect_worker6(struct work_struct *work) xs_udp_finish_connecting(xprt, sock); status = 0; out: - xprt_wake_pending_tasks(xprt, status); xprt_clear_connecting(xprt); + xprt_wake_pending_tasks(xprt, status); } /* @@ -1626,7 +1668,9 @@ static void xs_abort_connection(struct rpc_xprt *xprt, struct sock_xprt *transpo memset(&any, 0, sizeof(any)); any.sa_family = AF_UNSPEC; result = kernel_connect(transport->sock, &any, sizeof(any), 0); - if (result) + if (!result) + xs_sock_mark_closed(xprt); + else dprintk("RPC: AF_UNSPEC connect return code %d\n", result); } @@ -1702,6 +1746,7 @@ static void xs_tcp_connect_worker4(struct work_struct *work) goto out; if (!sock) { + clear_bit(XPRT_CONNECTION_ABORT, &xprt->state); /* start from scratch */ if ((err = sock_create_kern(PF_INET, SOCK_STREAM, IPPROTO_TCP, &sock)) < 0) { dprintk("RPC: can't create TCP transport socket (%d).\n", -err); @@ -1713,10 +1758,18 @@ static void xs_tcp_connect_worker4(struct work_struct *work) sock_release(sock); goto out; } - } else + } else { + int abort_and_exit; + + abort_and_exit = test_and_clear_bit(XPRT_CONNECTION_ABORT, + &xprt->state); /* "close" the socket, preserving the local port */ xs_tcp_reuse_connection(xprt, transport); + if (abort_and_exit) + goto out_eagain; + } + dprintk("RPC: worker connecting xprt %p to address: %s\n", xprt, xprt->address_strings[RPC_DISPLAY_ALL]); @@ -1732,17 +1785,18 @@ static void xs_tcp_connect_worker4(struct work_struct *work) case 0: case -EINPROGRESS: case -EALREADY: - goto out_clear; + xprt_clear_connecting(xprt); + return; } /* get rid of existing socket, and retry */ xs_tcp_shutdown(xprt); printk("%s: connect returned unhandled error %d\n", __func__, status); +out_eagain: status = -EAGAIN; out: - xprt_wake_pending_tasks(xprt, status); -out_clear: xprt_clear_connecting(xprt); + xprt_wake_pending_tasks(xprt, status); } /** @@ -1763,6 +1817,7 @@ static void xs_tcp_connect_worker6(struct work_struct *work) goto out; if (!sock) { + clear_bit(XPRT_CONNECTION_ABORT, &xprt->state); /* start from scratch */ if ((err = sock_create_kern(PF_INET6, SOCK_STREAM, IPPROTO_TCP, &sock)) < 0) { dprintk("RPC: can't create TCP transport socket (%d).\n", -err); @@ -1774,10 +1829,18 @@ static void xs_tcp_connect_worker6(struct work_struct *work) sock_release(sock); goto out; } - } else + } else { + int abort_and_exit; + + abort_and_exit = test_and_clear_bit(XPRT_CONNECTION_ABORT, + &xprt->state); /* "close" the socket, preserving the local port */ xs_tcp_reuse_connection(xprt, transport); + if (abort_and_exit) + goto out_eagain; + } + dprintk("RPC: worker connecting xprt %p to address: %s\n", xprt, xprt->address_strings[RPC_DISPLAY_ALL]); @@ -1792,17 +1855,18 @@ static void xs_tcp_connect_worker6(struct work_struct *work) case 0: case -EINPROGRESS: case -EALREADY: - goto out_clear; + xprt_clear_connecting(xprt); + return; } /* get rid of existing socket, and retry */ xs_tcp_shutdown(xprt); printk("%s: connect returned unhandled error %d\n", __func__, status); +out_eagain: status = -EAGAIN; out: - xprt_wake_pending_tasks(xprt, status); -out_clear: xprt_clear_connecting(xprt); + xprt_wake_pending_tasks(xprt, status); } /** -- cgit v1.2.3 From 25fe6142a57c720452c5e9ddbc1f32309c1e5c19 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 11 Mar 2009 14:38:03 -0400 Subject: SUNRPC: Add a sysctl to control the duration of the socket linger timeout Signed-off-by: Trond Myklebust --- net/sunrpc/xprtsock.c | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) (limited to 'net/sunrpc/xprtsock.c') diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index b51f58b95c3..42222b4dd76 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -50,6 +50,7 @@ unsigned int xprt_min_resvport = RPC_DEF_MIN_RESVPORT; unsigned int xprt_max_resvport = RPC_DEF_MAX_RESVPORT; #define XS_TCP_LINGER_TO (15U * HZ) +static unsigned int xs_tcp_fin_timeout __read_mostly = XS_TCP_LINGER_TO; /* * We can register our own files under /proc/sys/sunrpc by @@ -118,6 +119,14 @@ static ctl_table xs_tunables_table[] = { .extra1 = &xprt_min_resvport_limit, .extra2 = &xprt_max_resvport_limit }, + { + .procname = "tcp_fin_timeout", + .data = &xs_tcp_fin_timeout, + .maxlen = sizeof(xs_tcp_fin_timeout), + .mode = 0644, + .proc_handler = &proc_dointvec_jiffies, + .strategy = sysctl_jiffies + }, { .ctl_name = 0, }, @@ -1222,7 +1231,7 @@ static void xs_tcp_state_change(struct sock *sk) clear_bit(XPRT_CONNECTED, &xprt->state); clear_bit(XPRT_CLOSE_WAIT, &xprt->state); smp_mb__after_clear_bit(); - xs_tcp_schedule_linger_timeout(xprt, XS_TCP_LINGER_TO); + xs_tcp_schedule_linger_timeout(xprt, xs_tcp_fin_timeout); break; case TCP_CLOSE_WAIT: /* The server initiated a shutdown of the socket */ @@ -1239,7 +1248,7 @@ static void xs_tcp_state_change(struct sock *sk) break; case TCP_LAST_ACK: set_bit(XPRT_CLOSING, &xprt->state); - xs_tcp_schedule_linger_timeout(xprt, XS_TCP_LINGER_TO); + xs_tcp_schedule_linger_timeout(xprt, xs_tcp_fin_timeout); smp_mb__before_clear_bit(); clear_bit(XPRT_CONNECTED, &xprt->state); smp_mb__after_clear_bit(); -- cgit v1.2.3 From b61d59fffd3e5b6037c92b4c840605831de8a251 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 11 Mar 2009 14:38:04 -0400 Subject: =?UTF-8?q?SUNRPC:=C2=A0xs=5Ftcp=5Fconnect=5Fworker{4,6}:=20merge?= =?UTF-8?q?=20common=20code?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Trond Myklebust --- net/sunrpc/xprtsock.c | 140 ++++++++++++++++++++++++++------------------------ 1 file changed, 72 insertions(+), 68 deletions(-) (limited to 'net/sunrpc/xprtsock.c') diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index 42222b4dd76..f05a56e597e 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -1738,33 +1738,29 @@ static int xs_tcp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock) } /** - * xs_tcp_connect_worker4 - connect a TCP socket to a remote endpoint - * @work: RPC transport to connect + * xs_tcp_setup_socket - create a TCP socket and connect to a remote endpoint + * @xprt: RPC transport to connect + * @transport: socket transport to connect + * @create_sock: function to create a socket of the correct type * * Invoked by a work queue tasklet. */ -static void xs_tcp_connect_worker4(struct work_struct *work) +static void xs_tcp_setup_socket(struct rpc_xprt *xprt, + struct sock_xprt *transport, + struct socket *(*create_sock)(struct rpc_xprt *, + struct sock_xprt *)) { - struct sock_xprt *transport = - container_of(work, struct sock_xprt, connect_worker.work); - struct rpc_xprt *xprt = &transport->xprt; struct socket *sock = transport->sock; - int err, status = -EIO; + int status = -EIO; if (xprt->shutdown) goto out; if (!sock) { clear_bit(XPRT_CONNECTION_ABORT, &xprt->state); - /* start from scratch */ - if ((err = sock_create_kern(PF_INET, SOCK_STREAM, IPPROTO_TCP, &sock)) < 0) { - dprintk("RPC: can't create TCP transport socket (%d).\n", -err); - goto out; - } - xs_reclassify_socket4(sock); - - if (xs_bind4(transport, sock) < 0) { - sock_release(sock); + sock = create_sock(xprt, transport); + if (IS_ERR(sock)) { + status = PTR_ERR(sock); goto out; } } else { @@ -1808,74 +1804,82 @@ out: xprt_wake_pending_tasks(xprt, status); } +static struct socket *xs_create_tcp_sock4(struct rpc_xprt *xprt, + struct sock_xprt *transport) +{ + struct socket *sock; + int err; + + /* start from scratch */ + err = sock_create_kern(PF_INET, SOCK_STREAM, IPPROTO_TCP, &sock); + if (err < 0) { + dprintk("RPC: can't create TCP transport socket (%d).\n", + -err); + goto out_err; + } + xs_reclassify_socket4(sock); + + if (xs_bind4(transport, sock) < 0) { + sock_release(sock); + goto out_err; + } + return sock; +out_err: + return ERR_PTR(-EIO); +} + /** - * xs_tcp_connect_worker6 - connect a TCP socket to a remote endpoint + * xs_tcp_connect_worker4 - connect a TCP socket to a remote endpoint * @work: RPC transport to connect * * Invoked by a work queue tasklet. */ -static void xs_tcp_connect_worker6(struct work_struct *work) +static void xs_tcp_connect_worker4(struct work_struct *work) { struct sock_xprt *transport = container_of(work, struct sock_xprt, connect_worker.work); struct rpc_xprt *xprt = &transport->xprt; - struct socket *sock = transport->sock; - int err, status = -EIO; - if (xprt->shutdown) - goto out; - - if (!sock) { - clear_bit(XPRT_CONNECTION_ABORT, &xprt->state); - /* start from scratch */ - if ((err = sock_create_kern(PF_INET6, SOCK_STREAM, IPPROTO_TCP, &sock)) < 0) { - dprintk("RPC: can't create TCP transport socket (%d).\n", -err); - goto out; - } - xs_reclassify_socket6(sock); + xs_tcp_setup_socket(xprt, transport, xs_create_tcp_sock4); +} - if (xs_bind6(transport, sock) < 0) { - sock_release(sock); - goto out; - } - } else { - int abort_and_exit; +static struct socket *xs_create_tcp_sock6(struct rpc_xprt *xprt, + struct sock_xprt *transport) +{ + struct socket *sock; + int err; - abort_and_exit = test_and_clear_bit(XPRT_CONNECTION_ABORT, - &xprt->state); - /* "close" the socket, preserving the local port */ - xs_tcp_reuse_connection(xprt, transport); + /* start from scratch */ + err = sock_create_kern(PF_INET6, SOCK_STREAM, IPPROTO_TCP, &sock); + if (err < 0) { + dprintk("RPC: can't create TCP transport socket (%d).\n", + -err); + goto out_err; + } + xs_reclassify_socket6(sock); - if (abort_and_exit) - goto out_eagain; + if (xs_bind6(transport, sock) < 0) { + sock_release(sock); + goto out_err; } + return sock; +out_err: + return ERR_PTR(-EIO); +} - dprintk("RPC: worker connecting xprt %p to address: %s\n", - xprt, xprt->address_strings[RPC_DISPLAY_ALL]); +/** + * xs_tcp_connect_worker6 - connect a TCP socket to a remote endpoint + * @work: RPC transport to connect + * + * Invoked by a work queue tasklet. + */ +static void xs_tcp_connect_worker6(struct work_struct *work) +{ + struct sock_xprt *transport = + container_of(work, struct sock_xprt, connect_worker.work); + struct rpc_xprt *xprt = &transport->xprt; - status = xs_tcp_finish_connecting(xprt, sock); - dprintk("RPC: %p connect status %d connected %d sock state %d\n", - xprt, -status, xprt_connected(xprt), sock->sk->sk_state); - switch (status) { - case -ECONNREFUSED: - case -ECONNRESET: - case -ENETUNREACH: - /* retry with existing socket, after a delay */ - case 0: - case -EINPROGRESS: - case -EALREADY: - xprt_clear_connecting(xprt); - return; - } - /* get rid of existing socket, and retry */ - xs_tcp_shutdown(xprt); - printk("%s: connect returned unhandled error %d\n", - __func__, status); -out_eagain: - status = -EAGAIN; -out: - xprt_clear_connecting(xprt); - xprt_wake_pending_tasks(xprt, status); + xs_tcp_setup_socket(xprt, transport, xs_create_tcp_sock6); } /** -- cgit v1.2.3 From 55420c24a0d4d1fce70ca713f84aa00b6b74a70e Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 11 Mar 2009 15:29:24 -0400 Subject: SUNRPC: Ensure we close the socket on EPIPE errors too... As long as one task is holding the socket lock, then calls to xprt_force_disconnect(xprt) will not succeed in shutting down the socket. In particular, this would mean that a server initiated shutdown will not succeed until the lock is relinquished. In order to avoid the deadlock, we should ensure that xs_tcp_send_request() closes the socket on EPIPE errors too. Signed-off-by: Trond Myklebust --- net/sunrpc/xprtsock.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net/sunrpc/xprtsock.c') diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index f05a56e597e..fbc8725c20c 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -726,10 +726,10 @@ static int xs_tcp_send_request(struct rpc_task *task) dprintk("RPC: sendmsg returned unrecognized error %d\n", -status); case -ECONNRESET: + case -EPIPE: xs_tcp_shutdown(xprt); case -ECONNREFUSED: case -ENOTCONN: - case -EPIPE: clear_bit(SOCK_ASYNC_NOSPACE, &transport->sock->flags); } out: -- cgit v1.2.3