]> git.kernelconcepts.de Git - karo-tx-linux.git/commitdiff
Merge branch 'rds-packet-assembly-fixes'
authorDavid S. Miller <davem@davemloft.net>
Tue, 7 Jun 2016 22:10:16 +0000 (15:10 -0700)
committerDavid S. Miller <davem@davemloft.net>
Tue, 7 Jun 2016 22:10:16 +0000 (15:10 -0700)
Sowmini Varadhan says:

====================
RDS: TCP: socket locking RDS packet assembly fixes

This three part patchset fixes bugs in synchronization between
rds_tcp_accept_one() and the rds-tcp send/recv path.

Patch 1 ensures that the lock_sock() is taken appropriately
and the RDS datagram reassembly state is reset  to synchronize
with the receive path.

Patch 2 ensures that partially sent RDS datagrams will get
retransmitted after rds_tcp_accept_one() switches sockets.

Patch 3 fixes a race window which would prematurely re-enable
rds_send_xmit() before the rds_tcp_connection setup has been
completed in rds_tcp_accept_one().
====================

Signed-off-by: David S. Miller <davem@davemloft.net>
net/rds/rds.h
net/rds/send.c
net/rds/tcp.c
net/rds/tcp.h
net/rds/tcp_connect.c
net/rds/tcp_listen.c
net/rds/threads.c

index 80256b08eac0388d702f5981f00703d2118f5cce..387df5f32e49b9084363dc3da3ff62aa252e5507 100644 (file)
@@ -74,6 +74,7 @@ enum {
        RDS_CONN_CONNECTING,
        RDS_CONN_DISCONNECTING,
        RDS_CONN_UP,
+       RDS_CONN_RESETTING,
        RDS_CONN_ERROR,
 };
 
@@ -813,6 +814,7 @@ void rds_connect_worker(struct work_struct *);
 void rds_shutdown_worker(struct work_struct *);
 void rds_send_worker(struct work_struct *);
 void rds_recv_worker(struct work_struct *);
+void rds_connect_path_complete(struct rds_connection *conn, int curr);
 void rds_connect_complete(struct rds_connection *conn);
 
 /* transport.c */
index c9cdb358ea885e3e356cc675b579f1313ed94ff9..b1962f8e30f7bfc9fa77fb9bb413c42c81018e57 100644 (file)
@@ -99,6 +99,7 @@ void rds_send_reset(struct rds_connection *conn)
        list_splice_init(&conn->c_retrans, &conn->c_send_queue);
        spin_unlock_irqrestore(&conn->c_lock, flags);
 }
+EXPORT_SYMBOL_GPL(rds_send_reset);
 
 static int acquire_in_xmit(struct rds_connection *conn)
 {
index 86187dad14403100ff6199ee79beed5262de818f..74ee126a6fe6c00ce8bce45053ec2b8805fc222e 100644 (file)
@@ -126,9 +126,81 @@ void rds_tcp_restore_callbacks(struct socket *sock,
 }
 
 /*
- * This is the only path that sets tc->t_sock.  Send and receive trust that
- * it is set.  The RDS_CONN_UP bit protects those paths from being
- * called while it isn't set.
+ * rds_tcp_reset_callbacks() switches the to the new sock and
+ * returns the existing tc->t_sock.
+ *
+ * The only functions that set tc->t_sock are rds_tcp_set_callbacks
+ * and rds_tcp_reset_callbacks.  Send and receive trust that
+ * it is set.  The absence of RDS_CONN_UP bit protects those paths
+ * from being called while it isn't set.
+ */
+void rds_tcp_reset_callbacks(struct socket *sock,
+                            struct rds_connection *conn)
+{
+       struct rds_tcp_connection *tc = conn->c_transport_data;
+       struct socket *osock = tc->t_sock;
+
+       if (!osock)
+               goto newsock;
+
+       /* Need to resolve a duelling SYN between peers.
+        * We have an outstanding SYN to this peer, which may
+        * potentially have transitioned to the RDS_CONN_UP state,
+        * so we must quiesce any send threads before resetting
+        * c_transport_data. We quiesce these threads by setting
+        * c_state to something other than RDS_CONN_UP, and then
+        * waiting for any existing threads in rds_send_xmit to
+        * complete release_in_xmit(). (Subsequent threads entering
+        * rds_send_xmit() will bail on !rds_conn_up().
+        *
+        * However an incoming syn-ack at this point would end up
+        * marking the conn as RDS_CONN_UP, and would again permit
+        * rds_send_xmi() threads through, so ideally we would
+        * synchronize on RDS_CONN_UP after lock_sock(), but cannot
+        * do that: waiting on !RDS_IN_XMIT after lock_sock() may
+        * end up deadlocking with tcp_sendmsg(), and the RDS_IN_XMIT
+        * would not get set. As a result, we set c_state to
+        * RDS_CONN_RESETTTING, to ensure that rds_tcp_state_change
+        * cannot mark rds_conn_path_up() in the window before lock_sock()
+        */
+       atomic_set(&conn->c_state, RDS_CONN_RESETTING);
+       wait_event(conn->c_waitq, !test_bit(RDS_IN_XMIT, &conn->c_flags));
+       lock_sock(osock->sk);
+       /* reset receive side state for rds_tcp_data_recv() for osock  */
+       if (tc->t_tinc) {
+               rds_inc_put(&tc->t_tinc->ti_inc);
+               tc->t_tinc = NULL;
+       }
+       tc->t_tinc_hdr_rem = sizeof(struct rds_header);
+       tc->t_tinc_data_rem = 0;
+       tc->t_sock = NULL;
+
+       write_lock_bh(&osock->sk->sk_callback_lock);
+
+       osock->sk->sk_user_data = NULL;
+       osock->sk->sk_data_ready = tc->t_orig_data_ready;
+       osock->sk->sk_write_space = tc->t_orig_write_space;
+       osock->sk->sk_state_change = tc->t_orig_state_change;
+       write_unlock_bh(&osock->sk->sk_callback_lock);
+       release_sock(osock->sk);
+       sock_release(osock);
+newsock:
+       rds_send_reset(conn);
+       lock_sock(sock->sk);
+       write_lock_bh(&sock->sk->sk_callback_lock);
+       tc->t_sock = sock;
+       sock->sk->sk_user_data = conn;
+       sock->sk->sk_data_ready = rds_tcp_data_ready;
+       sock->sk->sk_write_space = rds_tcp_write_space;
+       sock->sk->sk_state_change = rds_tcp_state_change;
+
+       write_unlock_bh(&sock->sk->sk_callback_lock);
+       release_sock(sock->sk);
+}
+
+/* Add tc to rds_tcp_tc_list and set tc->t_sock. See comments
+ * above rds_tcp_reset_callbacks for notes about synchronization
+ * with data path
  */
 void rds_tcp_set_callbacks(struct socket *sock, struct rds_connection *conn)
 {
index 41c228300525c029df02472b609ada1a71cea98e..ec0602b0dc24b443c40437d5697037082d270f44 100644 (file)
@@ -50,6 +50,7 @@ struct rds_tcp_statistics {
 void rds_tcp_tune(struct socket *sock);
 void rds_tcp_nonagle(struct socket *sock);
 void rds_tcp_set_callbacks(struct socket *sock, struct rds_connection *conn);
+void rds_tcp_reset_callbacks(struct socket *sock, struct rds_connection *conn);
 void rds_tcp_restore_callbacks(struct socket *sock,
                               struct rds_tcp_connection *tc);
 u32 rds_tcp_snd_nxt(struct rds_tcp_connection *tc);
index fb82e0a0bf89361e005a23c371f0e95dfa4970bc..fba13d0305fb25f12c476e6014a2a9d4c5559075 100644 (file)
@@ -60,7 +60,7 @@ void rds_tcp_state_change(struct sock *sk)
                case TCP_SYN_RECV:
                        break;
                case TCP_ESTABLISHED:
-                       rds_connect_complete(conn);
+                       rds_connect_path_complete(conn, RDS_CONN_CONNECTING);
                        break;
                case TCP_CLOSE_WAIT:
                case TCP_CLOSE:
index 4bf4befe5066a0084a7430cc39f9fd0c08a9a20e..686b1d03a55858aeb59d9863339c2c8e99e03d90 100644 (file)
@@ -78,7 +78,6 @@ int rds_tcp_accept_one(struct socket *sock)
        struct inet_sock *inet;
        struct rds_tcp_connection *rs_tcp = NULL;
        int conn_state;
-       struct sock *nsk;
 
        if (!sock) /* module unload or netns delete in progress */
                return -ENETUNREACH;
@@ -136,26 +135,21 @@ int rds_tcp_accept_one(struct socket *sock)
                    !conn->c_outgoing) {
                        goto rst_nsk;
                } else {
-                       atomic_set(&conn->c_state, RDS_CONN_CONNECTING);
-                       wait_event(conn->c_waitq,
-                                  !test_bit(RDS_IN_XMIT, &conn->c_flags));
-                       rds_tcp_restore_callbacks(rs_tcp->t_sock, rs_tcp);
+                       rds_tcp_reset_callbacks(new_sock, conn);
                        conn->c_outgoing = 0;
+                       /* rds_connect_path_complete() marks RDS_CONN_UP */
+                       rds_connect_path_complete(conn, RDS_CONN_DISCONNECTING);
                }
+       } else {
+               rds_tcp_set_callbacks(new_sock, conn);
+               rds_connect_path_complete(conn, RDS_CONN_CONNECTING);
        }
-       rds_tcp_set_callbacks(new_sock, conn);
-       rds_connect_complete(conn); /* marks RDS_CONN_UP */
        new_sock = NULL;
        ret = 0;
        goto out;
 rst_nsk:
        /* reset the newly returned accept sock and bail */
-       nsk = new_sock->sk;
-       rds_tcp_stats_inc(s_tcp_listen_closed_stale);
-       nsk->sk_user_data = NULL;
-       nsk->sk_prot->disconnect(nsk, 0);
-       tcp_done(nsk);
-       new_sock = NULL;
+       kernel_sock_shutdown(new_sock, SHUT_RDWR);
        ret = 0;
 out:
        if (rs_tcp)
index 454aa6d23327b2a302bcfc72b6b3d857656e33ed..4a323045719b5c0aa0407916a772fe65aa900b0b 100644 (file)
@@ -71,9 +71,9 @@
 struct workqueue_struct *rds_wq;
 EXPORT_SYMBOL_GPL(rds_wq);
 
-void rds_connect_complete(struct rds_connection *conn)
+void rds_connect_path_complete(struct rds_connection *conn, int curr)
 {
-       if (!rds_conn_transition(conn, RDS_CONN_CONNECTING, RDS_CONN_UP)) {
+       if (!rds_conn_transition(conn, curr, RDS_CONN_UP)) {
                printk(KERN_WARNING "%s: Cannot transition to state UP, "
                                "current state is %d\n",
                                __func__,
@@ -90,6 +90,12 @@ void rds_connect_complete(struct rds_connection *conn)
        queue_delayed_work(rds_wq, &conn->c_send_w, 0);
        queue_delayed_work(rds_wq, &conn->c_recv_w, 0);
 }
+EXPORT_SYMBOL_GPL(rds_connect_path_complete);
+
+void rds_connect_complete(struct rds_connection *conn)
+{
+       rds_connect_path_complete(conn, RDS_CONN_CONNECTING);
+}
 EXPORT_SYMBOL_GPL(rds_connect_complete);
 
 /*