From: Trond Myklebust Date: Fri, 21 Aug 2009 17:37:17 +0000 (-0400) Subject: SUNRPC: Fix tcp reconnection X-Git-Tag: v2.6.27.32~2 X-Git-Url: https://git.kernelconcepts.de/?a=commitdiff_plain;h=2607b3b8c16b95806c81968bcd909cba02e6d051;p=karo-tx-linux.git SUNRPC: Fix tcp reconnection This fixes a problem that was reported as Red Hat Bugzilla entry number 485339, in which rpciod starts looping on the TCP connection code, rendering the NFS client unusable for 1/2 minute or so. It is basically a backport of commit f75e6745aa3084124ae1434fd7629853bdaf6798 (SUNRPC: Fix the problem of EADDRNOTAVAIL syslog floods on reconnect) Signed-off-by: Trond Myklebust Signed-off-by: Greg Kroah-Hartman --- diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h index 4d80a118d538..75a87fe48cf8 100644 --- a/include/linux/sunrpc/xprt.h +++ b/include/linux/sunrpc/xprt.h @@ -260,6 +260,7 @@ void xprt_conditional_disconnect(struct rpc_xprt *xprt, unsigned int cookie); #define XPRT_BOUND (4) #define XPRT_BINDING (5) #define XPRT_CLOSING (6) +#define XPRT_CONNECTION_CLOSE (8) static inline void xprt_set_connected(struct rpc_xprt *xprt) { diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c index 99a52aabe332..b66be675be28 100644 --- a/net/sunrpc/xprt.c +++ b/net/sunrpc/xprt.c @@ -645,10 +645,8 @@ xprt_init_autodisconnect(unsigned long data) if (test_and_set_bit(XPRT_LOCKED, &xprt->state)) goto out_abort; spin_unlock(&xprt->transport_lock); - if (xprt_connecting(xprt)) - xprt_release_write(xprt, NULL); - else - queue_work(rpciod_workqueue, &xprt->task_cleanup); + set_bit(XPRT_CONNECTION_CLOSE, &xprt->state); + queue_work(rpciod_workqueue, &xprt->task_cleanup); return; out_abort: spin_unlock(&xprt->transport_lock); diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index 8f9295da9b34..a28330410b0c 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -748,6 +748,9 @@ out_release: * * This is used when all requests are complete; ie, no DRC state remains * on the server we want to save. + * + * The caller _must_ be holding XPRT_LOCKED in order to avoid issues with + * xs_reset_transport() zeroing the socket from underneath a writer. */ static void xs_close(struct rpc_xprt *xprt) { @@ -781,6 +784,14 @@ clear_close_wait: xprt_disconnect_done(xprt); } +static void xs_tcp_close(struct rpc_xprt *xprt) +{ + if (test_and_clear_bit(XPRT_CONNECTION_CLOSE, &xprt->state)) + xs_close(xprt); + else + xs_tcp_shutdown(xprt); +} + /** * xs_destroy - prepare to shutdown a transport * @xprt: doomed transport @@ -1676,11 +1687,21 @@ static void xs_tcp_connect_worker4(struct work_struct *work) goto out_clear; case -ECONNREFUSED: case -ECONNRESET: + case -ENETUNREACH: /* retry with existing socket, after a delay */ - break; + goto out_clear; default: /* get rid of existing socket, and retry */ xs_tcp_shutdown(xprt); + printk("%s: connect returned unhandled error %d\n", + __func__, status); + case -EADDRNOTAVAIL: + /* We're probably in TIME_WAIT. Get rid of existing socket, + * and retry + */ + set_bit(XPRT_CONNECTION_CLOSE, &xprt->state); + xprt_force_disconnect(xprt); + status = -EAGAIN; } } out: @@ -1735,11 +1756,21 @@ static void xs_tcp_connect_worker6(struct work_struct *work) goto out_clear; case -ECONNREFUSED: case -ECONNRESET: + case -ENETUNREACH: /* retry with existing socket, after a delay */ - break; + goto out_clear; default: /* get rid of existing socket, and retry */ xs_tcp_shutdown(xprt); + printk("%s: connect returned unhandled error %d\n", + __func__, status); + case -EADDRNOTAVAIL: + /* We're probably in TIME_WAIT. Get rid of existing socket, + * and retry + */ + set_bit(XPRT_CONNECTION_CLOSE, &xprt->state); + xprt_force_disconnect(xprt); + status = -EAGAIN; } } out: @@ -1871,7 +1902,7 @@ static struct rpc_xprt_ops xs_tcp_ops = { .buf_free = rpc_free, .send_request = xs_tcp_send_request, .set_retrans_timeout = xprt_set_retrans_timeout_def, - .close = xs_tcp_shutdown, + .close = xs_tcp_close, .destroy = xs_destroy, .print_stats = xs_tcp_print_stats, };