]> git.kernelconcepts.de Git - karo-tx-linux.git/blobdiff - net/unix/af_unix.c
Merge tag 'scsi-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/jejb/scsi
[karo-tx-linux.git] / net / unix / af_unix.c
index 955ec152cb71eac8c91e06f1c922d1df20f6e1f0..ef05cd9403d4bb2c2ad0a3faddc71de611d892df 100644 (file)
@@ -326,6 +326,118 @@ found:
        return s;
 }
 
+/* Support code for asymmetrically connected dgram sockets
+ *
+ * If a datagram socket is connected to a socket not itself connected
+ * to the first socket (eg, /dev/log), clients may only enqueue more
+ * messages if the present receive queue of the server socket is not
+ * "too large". This means there's a second writeability condition
+ * poll and sendmsg need to test. The dgram recv code will do a wake
+ * up on the peer_wait wait queue of a socket upon reception of a
+ * datagram which needs to be propagated to sleeping would-be writers
+ * since these might not have sent anything so far. This can't be
+ * accomplished via poll_wait because the lifetime of the server
+ * socket might be less than that of its clients if these break their
+ * association with it or if the server socket is closed while clients
+ * are still connected to it and there's no way to inform "a polling
+ * implementation" that it should let go of a certain wait queue
+ *
+ * In order to propagate a wake up, a wait_queue_t of the client
+ * socket is enqueued on the peer_wait queue of the server socket
+ * whose wake function does a wake_up on the ordinary client socket
+ * wait queue. This connection is established whenever a write (or
+ * poll for write) hit the flow control condition and broken when the
+ * association to the server socket is dissolved or after a wake up
+ * was relayed.
+ */
+
+static int unix_dgram_peer_wake_relay(wait_queue_t *q, unsigned mode, int flags,
+                                     void *key)
+{
+       struct unix_sock *u;
+       wait_queue_head_t *u_sleep;
+
+       u = container_of(q, struct unix_sock, peer_wake);
+
+       __remove_wait_queue(&unix_sk(u->peer_wake.private)->peer_wait,
+                           q);
+       u->peer_wake.private = NULL;
+
+       /* relaying can only happen while the wq still exists */
+       u_sleep = sk_sleep(&u->sk);
+       if (u_sleep)
+               wake_up_interruptible_poll(u_sleep, key);
+
+       return 0;
+}
+
+static int unix_dgram_peer_wake_connect(struct sock *sk, struct sock *other)
+{
+       struct unix_sock *u, *u_other;
+       int rc;
+
+       u = unix_sk(sk);
+       u_other = unix_sk(other);
+       rc = 0;
+       spin_lock(&u_other->peer_wait.lock);
+
+       if (!u->peer_wake.private) {
+               u->peer_wake.private = other;
+               __add_wait_queue(&u_other->peer_wait, &u->peer_wake);
+
+               rc = 1;
+       }
+
+       spin_unlock(&u_other->peer_wait.lock);
+       return rc;
+}
+
+static void unix_dgram_peer_wake_disconnect(struct sock *sk,
+                                           struct sock *other)
+{
+       struct unix_sock *u, *u_other;
+
+       u = unix_sk(sk);
+       u_other = unix_sk(other);
+       spin_lock(&u_other->peer_wait.lock);
+
+       if (u->peer_wake.private == other) {
+               __remove_wait_queue(&u_other->peer_wait, &u->peer_wake);
+               u->peer_wake.private = NULL;
+       }
+
+       spin_unlock(&u_other->peer_wait.lock);
+}
+
+static void unix_dgram_peer_wake_disconnect_wakeup(struct sock *sk,
+                                                  struct sock *other)
+{
+       unix_dgram_peer_wake_disconnect(sk, other);
+       wake_up_interruptible_poll(sk_sleep(sk),
+                                  POLLOUT |
+                                  POLLWRNORM |
+                                  POLLWRBAND);
+}
+
+/* preconditions:
+ *     - unix_peer(sk) == other
+ *     - association is stable
+ */
+static int unix_dgram_peer_wake_me(struct sock *sk, struct sock *other)
+{
+       int connected;
+
+       connected = unix_dgram_peer_wake_connect(sk, other);
+
+       if (unix_recvq_full(other))
+               return 1;
+
+       if (connected)
+               unix_dgram_peer_wake_disconnect(sk, other);
+
+       return 0;
+}
+
 static int unix_writable(const struct sock *sk)
 {
        return sk->sk_state != TCP_LISTEN &&
@@ -431,6 +543,8 @@ static void unix_release_sock(struct sock *sk, int embrion)
                        skpair->sk_state_change(skpair);
                        sk_wake_async(skpair, SOCK_WAKE_WAITD, POLL_HUP);
                }
+
+               unix_dgram_peer_wake_disconnect(sk, skpair);
                sock_put(skpair); /* It may now die */
                unix_peer(sk) = NULL;
        }
@@ -666,6 +780,7 @@ static struct sock *unix_create1(struct net *net, struct socket *sock, int kern)
        INIT_LIST_HEAD(&u->link);
        mutex_init(&u->readlock); /* single task reading lock */
        init_waitqueue_head(&u->peer_wait);
+       init_waitqueue_func_entry(&u->peer_wake, unix_dgram_peer_wake_relay);
        unix_insert_socket(unix_sockets_unbound(sk), sk);
 out:
        if (sk == NULL)
@@ -838,32 +953,20 @@ fail:
        return NULL;
 }
 
-static int unix_mknod(const char *sun_path, umode_t mode, struct path *res)
+static int unix_mknod(struct dentry *dentry, struct path *path, umode_t mode,
+                     struct path *res)
 {
-       struct dentry *dentry;
-       struct path path;
-       int err = 0;
-       /*
-        * Get the parent directory, calculate the hash for last
-        * component.
-        */
-       dentry = kern_path_create(AT_FDCWD, sun_path, &path, 0);
-       err = PTR_ERR(dentry);
-       if (IS_ERR(dentry))
-               return err;
+       int err;
 
-       /*
-        * All right, let's create it.
-        */
-       err = security_path_mknod(&path, dentry, mode, 0);
+       err = security_path_mknod(path, dentry, mode, 0);
        if (!err) {
-               err = vfs_mknod(d_inode(path.dentry), dentry, mode, 0);
+               err = vfs_mknod(d_inode(path->dentry), dentry, mode, 0);
                if (!err) {
-                       res->mnt = mntget(path.mnt);
+                       res->mnt = mntget(path->mnt);
                        res->dentry = dget(dentry);
                }
        }
-       done_path_create(&path, dentry);
+
        return err;
 }
 
@@ -874,10 +977,12 @@ static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
        struct unix_sock *u = unix_sk(sk);
        struct sockaddr_un *sunaddr = (struct sockaddr_un *)uaddr;
        char *sun_path = sunaddr->sun_path;
-       int err;
+       int err, name_err;
        unsigned int hash;
        struct unix_address *addr;
        struct hlist_head *list;
+       struct path path;
+       struct dentry *dentry;
 
        err = -EINVAL;
        if (sunaddr->sun_family != AF_UNIX)
@@ -893,14 +998,34 @@ static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
                goto out;
        addr_len = err;
 
+       name_err = 0;
+       dentry = NULL;
+       if (sun_path[0]) {
+               /* Get the parent directory, calculate the hash for last
+                * component.
+                */
+               dentry = kern_path_create(AT_FDCWD, sun_path, &path, 0);
+
+               if (IS_ERR(dentry)) {
+                       /* delay report until after 'already bound' check */
+                       name_err = PTR_ERR(dentry);
+                       dentry = NULL;
+               }
+       }
+
        err = mutex_lock_interruptible(&u->readlock);
        if (err)
-               goto out;
+               goto out_path;
 
        err = -EINVAL;
        if (u->addr)
                goto out_up;
 
+       if (name_err) {
+               err = name_err == -EEXIST ? -EADDRINUSE : name_err;
+               goto out_up;
+       }
+
        err = -ENOMEM;
        addr = kmalloc(sizeof(*addr)+addr_len, GFP_KERNEL);
        if (!addr)
@@ -911,11 +1036,11 @@ static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
        addr->hash = hash ^ sk->sk_type;
        atomic_set(&addr->refcnt, 1);
 
-       if (sun_path[0]) {
-               struct path path;
+       if (dentry) {
+               struct path u_path;
                umode_t mode = S_IFSOCK |
                       (SOCK_INODE(sock)->i_mode & ~current_umask());
-               err = unix_mknod(sun_path, mode, &path);
+               err = unix_mknod(dentry, &path, mode, &u_path);
                if (err) {
                        if (err == -EEXIST)
                                err = -EADDRINUSE;
@@ -923,9 +1048,9 @@ static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
                        goto out_up;
                }
                addr->hash = UNIX_HASH_SIZE;
-               hash = d_backing_inode(path.dentry)->i_ino & (UNIX_HASH_SIZE-1);
+               hash = d_backing_inode(dentry)->i_ino & (UNIX_HASH_SIZE - 1);
                spin_lock(&unix_table_lock);
-               u->path = path;
+               u->path = u_path;
                list = &unix_socket_table[hash];
        } else {
                spin_lock(&unix_table_lock);
@@ -948,6 +1073,10 @@ out_unlock:
        spin_unlock(&unix_table_lock);
 out_up:
        mutex_unlock(&u->readlock);
+out_path:
+       if (dentry)
+               done_path_create(&path, dentry);
+
 out:
        return err;
 }
@@ -1033,6 +1162,8 @@ restart:
        if (unix_peer(sk)) {
                struct sock *old_peer = unix_peer(sk);
                unix_peer(sk) = other;
+               unix_dgram_peer_wake_disconnect_wakeup(sk, old_peer);
+
                unix_state_double_unlock(sk, other);
 
                if (other != old_peer)
@@ -1434,6 +1565,14 @@ static int unix_scm_to_skb(struct scm_cookie *scm, struct sk_buff *skb, bool sen
        return err;
 }
 
+static bool unix_passcred_enabled(const struct socket *sock,
+                                 const struct sock *other)
+{
+       return test_bit(SOCK_PASSCRED, &sock->flags) ||
+              !other->sk_socket ||
+              test_bit(SOCK_PASSCRED, &other->sk_socket->flags);
+}
+
 /*
  * Some apps rely on write() giving SCM_CREDENTIALS
  * We include credentials if source or destination socket
@@ -1444,14 +1583,41 @@ static void maybe_add_creds(struct sk_buff *skb, const struct socket *sock,
 {
        if (UNIXCB(skb).pid)
                return;
-       if (test_bit(SOCK_PASSCRED, &sock->flags) ||
-           !other->sk_socket ||
-           test_bit(SOCK_PASSCRED, &other->sk_socket->flags)) {
+       if (unix_passcred_enabled(sock, other)) {
                UNIXCB(skb).pid  = get_pid(task_tgid(current));
                current_uid_gid(&UNIXCB(skb).uid, &UNIXCB(skb).gid);
        }
 }
 
+static int maybe_init_creds(struct scm_cookie *scm,
+                           struct socket *socket,
+                           const struct sock *other)
+{
+       int err;
+       struct msghdr msg = { .msg_controllen = 0 };
+
+       err = scm_send(socket, &msg, scm, false);
+       if (err)
+               return err;
+
+       if (unix_passcred_enabled(socket, other)) {
+               scm->pid = get_pid(task_tgid(current));
+               current_uid_gid(&scm->creds.uid, &scm->creds.gid);
+       }
+       return err;
+}
+
+static bool unix_skb_scm_eq(struct sk_buff *skb,
+                           struct scm_cookie *scm)
+{
+       const struct unix_skb_parms *u = &UNIXCB(skb);
+
+       return u->pid == scm->pid &&
+              uid_eq(u->uid, scm->creds.uid) &&
+              gid_eq(u->gid, scm->creds.gid) &&
+              unix_secdata_eq(scm, skb);
+}
+
 /*
  *     Send AF_UNIX data.
  */
@@ -1472,6 +1638,7 @@ static int unix_dgram_sendmsg(struct socket *sock, struct msghdr *msg,
        struct scm_cookie scm;
        int max_level;
        int data_len = 0;
+       int sk_locked;
 
        wait_for_unix_gc();
        err = scm_send(sock, msg, &scm, false);
@@ -1550,12 +1717,14 @@ restart:
                goto out_free;
        }
 
+       sk_locked = 0;
        unix_state_lock(other);
+restart_locked:
        err = -EPERM;
        if (!unix_may_send(sk, other))
                goto out_unlock;
 
-       if (sock_flag(other, SOCK_DEAD)) {
+       if (unlikely(sock_flag(other, SOCK_DEAD))) {
                /*
                 *      Check with 1003.1g - what should
                 *      datagram error
@@ -1563,10 +1732,14 @@ restart:
                unix_state_unlock(other);
                sock_put(other);
 
+               if (!sk_locked)
+                       unix_state_lock(sk);
+
                err = 0;
-               unix_state_lock(sk);
                if (unix_peer(sk) == other) {
                        unix_peer(sk) = NULL;
+                       unix_dgram_peer_wake_disconnect_wakeup(sk, other);
+
                        unix_state_unlock(sk);
 
                        unix_dgram_disconnected(sk, other);
@@ -1592,21 +1765,38 @@ restart:
                        goto out_unlock;
        }
 
-       if (unix_peer(other) != sk && unix_recvq_full(other)) {
-               if (!timeo) {
-                       err = -EAGAIN;
-                       goto out_unlock;
+       if (unlikely(unix_peer(other) != sk && unix_recvq_full(other))) {
+               if (timeo) {
+                       timeo = unix_wait_for_peer(other, timeo);
+
+                       err = sock_intr_errno(timeo);
+                       if (signal_pending(current))
+                               goto out_free;
+
+                       goto restart;
                }
 
-               timeo = unix_wait_for_peer(other, timeo);
+               if (!sk_locked) {
+                       unix_state_unlock(other);
+                       unix_state_double_lock(sk, other);
+               }
 
-               err = sock_intr_errno(timeo);
-               if (signal_pending(current))
-                       goto out_free;
+               if (unix_peer(sk) != other ||
+                   unix_dgram_peer_wake_me(sk, other)) {
+                       err = -EAGAIN;
+                       sk_locked = 1;
+                       goto out_unlock;
+               }
 
-               goto restart;
+               if (!sk_locked) {
+                       sk_locked = 1;
+                       goto restart_locked;
+               }
        }
 
+       if (unlikely(sk_locked))
+               unix_state_unlock(sk);
+
        if (sock_flag(other, SOCK_RCVTSTAMP))
                __net_timestamp(skb);
        maybe_add_creds(skb, sock, other);
@@ -1620,6 +1810,8 @@ restart:
        return len;
 
 out_unlock:
+       if (sk_locked)
+               unix_state_unlock(sk);
        unix_state_unlock(other);
 out_free:
        kfree_skb(skb);
@@ -1741,8 +1933,10 @@ out_err:
 static ssize_t unix_stream_sendpage(struct socket *socket, struct page *page,
                                    int offset, size_t size, int flags)
 {
-       int err = 0;
-       bool send_sigpipe = true;
+       int err;
+       bool send_sigpipe = false;
+       bool init_scm = true;
+       struct scm_cookie scm;
        struct sock *other, *sk = socket->sk;
        struct sk_buff *skb, *newskb = NULL, *tail = NULL;
 
@@ -1760,7 +1954,7 @@ alloc_skb:
                newskb = sock_alloc_send_pskb(sk, 0, 0, flags & MSG_DONTWAIT,
                                              &err, 0);
                if (!newskb)
-                       return err;
+                       goto err;
        }
 
        /* we must acquire readlock as we modify already present
@@ -1769,12 +1963,12 @@ alloc_skb:
        err = mutex_lock_interruptible(&unix_sk(other)->readlock);
        if (err) {
                err = flags & MSG_DONTWAIT ? -EAGAIN : -ERESTARTSYS;
-               send_sigpipe = false;
                goto err;
        }
 
        if (sk->sk_shutdown & SEND_SHUTDOWN) {
                err = -EPIPE;
+               send_sigpipe = true;
                goto err_unlock;
        }
 
@@ -1783,17 +1977,27 @@ alloc_skb:
        if (sock_flag(other, SOCK_DEAD) ||
            other->sk_shutdown & RCV_SHUTDOWN) {
                err = -EPIPE;
+               send_sigpipe = true;
                goto err_state_unlock;
        }
 
+       if (init_scm) {
+               err = maybe_init_creds(&scm, socket, other);
+               if (err)
+                       goto err_state_unlock;
+               init_scm = false;
+       }
+
        skb = skb_peek_tail(&other->sk_receive_queue);
        if (tail && tail == skb) {
                skb = newskb;
-       } else if (!skb) {
-               if (newskb)
+       } else if (!skb || !unix_skb_scm_eq(skb, &scm)) {
+               if (newskb) {
                        skb = newskb;
-               else
+               } else {
+                       tail = skb;
                        goto alloc_skb;
+               }
        } else if (newskb) {
                /* this is fast path, we don't necessarily need to
                 * call to kfree_skb even though with newskb == NULL
@@ -1814,6 +2018,9 @@ alloc_skb:
        atomic_add(size, &sk->sk_wmem_alloc);
 
        if (newskb) {
+               err = unix_scm_to_skb(&scm, skb, false);
+               if (err)
+                       goto err_state_unlock;
                spin_lock(&other->sk_receive_queue.lock);
                __skb_queue_tail(&other->sk_receive_queue, newskb);
                spin_unlock(&other->sk_receive_queue.lock);
@@ -1823,7 +2030,7 @@ alloc_skb:
        mutex_unlock(&unix_sk(other)->readlock);
 
        other->sk_data_ready(other);
-
+       scm_destroy(&scm);
        return size;
 
 err_state_unlock:
@@ -1834,6 +2041,8 @@ err:
        kfree_skb(newskb);
        if (send_sigpipe && !(flags & MSG_NOSIGNAL))
                send_sig(SIGPIPE, current, 0);
+       if (!init_scm)
+               scm_destroy(&scm);
        return err;
 }
 
@@ -1996,7 +2205,7 @@ static long unix_stream_data_wait(struct sock *sk, long timeo,
                    !timeo)
                        break;
 
-               set_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
+               sk_set_bit(SOCKWQ_ASYNC_WAITDATA, sk);
                unix_state_unlock(sk);
                timeo = freezable_schedule_timeout(timeo);
                unix_state_lock(sk);
@@ -2004,7 +2213,7 @@ static long unix_stream_data_wait(struct sock *sk, long timeo,
                if (sock_flag(sk, SOCK_DEAD))
                        break;
 
-               clear_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
+               sk_clear_bit(SOCKWQ_ASYNC_WAITDATA, sk);
        }
 
        finish_wait(sk_sleep(sk), &wait);
@@ -2061,14 +2270,7 @@ static int unix_stream_read_generic(struct unix_stream_read_state *state)
        /* Lock the socket to prevent queue disordering
         * while sleeps in memcpy_tomsg
         */
-       err = mutex_lock_interruptible(&u->readlock);
-       if (unlikely(err)) {
-               /* recvmsg() in non blocking mode is supposed to return -EAGAIN
-                * sk_rcvtimeo is not honored by mutex_lock_interruptible()
-                */
-               err = noblock ? -EAGAIN : -ERESTARTSYS;
-               goto out;
-       }
+       mutex_lock(&u->readlock);
 
        if (flags & MSG_PEEK)
                skip = sk_peek_offset(sk, flags);
@@ -2112,12 +2314,12 @@ again:
                        timeo = unix_stream_data_wait(sk, timeo, last,
                                                      last_len);
 
-                       if (signal_pending(current) ||
-                           mutex_lock_interruptible(&u->readlock)) {
+                       if (signal_pending(current)) {
                                err = sock_intr_errno(timeo);
                                goto out;
                        }
 
+                       mutex_lock(&u->readlock);
                        continue;
 unlock:
                        unix_state_unlock(sk);
@@ -2137,10 +2339,7 @@ unlock:
 
                if (check_creds) {
                        /* Never glue messages from different writers */
-                       if ((UNIXCB(skb).pid  != scm.pid) ||
-                           !uid_eq(UNIXCB(skb).uid, scm.creds.uid) ||
-                           !gid_eq(UNIXCB(skb).gid, scm.creds.gid) ||
-                           !unix_secdata_eq(&scm, skb))
+                       if (!unix_skb_scm_eq(skb, &scm))
                                break;
                } else if (test_bit(SOCK_PASSCRED, &sock->flags)) {
                        /* Copy credentials */
@@ -2476,20 +2675,22 @@ static unsigned int unix_dgram_poll(struct file *file, struct socket *sock,
                return mask;
 
        writable = unix_writable(sk);
-       other = unix_peer_get(sk);
-       if (other) {
-               if (unix_peer(other) != sk) {
-                       sock_poll_wait(file, &unix_sk(other)->peer_wait, wait);
-                       if (unix_recvq_full(other))
-                               writable = 0;
-               }
-               sock_put(other);
+       if (writable) {
+               unix_state_lock(sk);
+
+               other = unix_peer(sk);
+               if (other && unix_peer(other) != sk &&
+                   unix_recvq_full(other) &&
+                   unix_dgram_peer_wake_me(sk, other))
+                       writable = 0;
+
+               unix_state_unlock(sk);
        }
 
        if (writable)
                mask |= POLLOUT | POLLWRNORM | POLLWRBAND;
        else
-               set_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
+               sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk);
 
        return mask;
 }