Merge tag 'for-3.3-rc3' of git://git.kernel.org/pub/scm/linux/kernel/git/balbi/usb...

[karo-tx-linux.git] / net / core / sock.c
diff --git a/net/core/sock.c b/net/core/sock.c

index b23f174ab84c3b6c72302834d8e13695d22a1c6c..3e81fd2e3c75ca01ed972e98f3bc3f344fe5bfe6 100644 (file)
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -111,6 +111,8 @@
  #include <linux/init.h>
  #include <linux/highmem.h>
  #include <linux/user_namespace.h>
+#include <linux/jump_label.h>
+#include <linux/memcontrol.h>
  
  #include <asm/uaccess.h>
  #include <asm/system.h>
@@ -125,6 +127,7 @@
  #include <net/xfrm.h>
  #include <linux/ipsec.h>
  #include <net/cls_cgroup.h>
+#include <net/netprio_cgroup.h>
  
  #include <linux/filter.h>
  
@@ -134,6 +137,46 @@
  #include <net/tcp.h>
  #endif
  
+static DEFINE_MUTEX(proto_list_mutex);
+static LIST_HEAD(proto_list);
+
+#ifdef CONFIG_CGROUP_MEM_RES_CTLR_KMEM
+int mem_cgroup_sockets_init(struct cgroup *cgrp, struct cgroup_subsys *ss)
+{
+       struct proto *proto;
+       int ret = 0;
+
+       mutex_lock(&proto_list_mutex);
+       list_for_each_entry(proto, &proto_list, node) {
+               if (proto->init_cgroup) {
+                       ret = proto->init_cgroup(cgrp, ss);
+                       if (ret)
+                               goto out;
+               }
+       }
+
+       mutex_unlock(&proto_list_mutex);
+       return ret;
+out:
+       list_for_each_entry_continue_reverse(proto, &proto_list, node)
+               if (proto->destroy_cgroup)
+                       proto->destroy_cgroup(cgrp, ss);
+       mutex_unlock(&proto_list_mutex);
+       return ret;
+}
+
+void mem_cgroup_sockets_destroy(struct cgroup *cgrp, struct cgroup_subsys *ss)
+{
+       struct proto *proto;
+
+       mutex_lock(&proto_list_mutex);
+       list_for_each_entry_reverse(proto, &proto_list, node)
+               if (proto->destroy_cgroup)
+                       proto->destroy_cgroup(cgrp, ss);
+       mutex_unlock(&proto_list_mutex);
+}
+#endif
+
  /*
   * Each address family might have different locking rules, so we have
   * one slock key per address family:
@@ -141,6 +184,9 @@
  static struct lock_class_key af_family_keys[AF_MAX];
  static struct lock_class_key af_family_slock_keys[AF_MAX];
  
+struct jump_label_key memcg_socket_limit_enabled;
+EXPORT_SYMBOL(memcg_socket_limit_enabled);
+
  /*
   * Make lock validator output more readable. (we pre-construct these
   * strings build-time, so that runtime initialization of socket
@@ -221,10 +267,16 @@ __u32 sysctl_rmem_default __read_mostly = SK_RMEM_MAX;
  int sysctl_optmem_max __read_mostly = sizeof(unsigned long)*(2*UIO_MAXIOV+512);
  EXPORT_SYMBOL(sysctl_optmem_max);
  
-#if defined(CONFIG_CGROUPS) && !defined(CONFIG_NET_CLS_CGROUP)
+#if defined(CONFIG_CGROUPS)
+#if !defined(CONFIG_NET_CLS_CGROUP)
  int net_cls_subsys_id = -1;
  EXPORT_SYMBOL_GPL(net_cls_subsys_id);
  #endif
+#if !defined(CONFIG_NETPRIO_CGROUP)
+int net_prio_subsys_id = -1;
+EXPORT_SYMBOL_GPL(net_prio_subsys_id);
+#endif
+#endif
  
  static int sock_set_timeout(long *timeo_p, char __user *optval, int optlen)
  {
@@ -269,14 +321,14 @@ static void sock_warn_obsolete_bsdism(const char *name)
         }
  }
  
-static void sock_disable_timestamp(struct sock *sk, int flag)
+#define SK_FLAGS_TIMESTAMP ((1UL << SOCK_TIMESTAMP) | (1UL << SOCK_TIMESTAMPING_RX_SOFTWARE))
+
+static void sock_disable_timestamp(struct sock *sk, unsigned long flags)
  {
-       if (sock_flag(sk, flag)) {
-               sock_reset_flag(sk, flag);
-               if (!sock_flag(sk, SOCK_TIMESTAMP) &&
-                   !sock_flag(sk, SOCK_TIMESTAMPING_RX_SOFTWARE)) {
+       if (sk->sk_flags & flags) {
+               sk->sk_flags &= ~flags;
+               if (!(sk->sk_flags & SK_FLAGS_TIMESTAMP))
                         net_disable_timestamp();
-               }
         }
  }
  
@@ -678,7 +730,7 @@ set_rcvbuf:
                                               SOCK_TIMESTAMPING_RX_SOFTWARE);
                 else
                         sock_disable_timestamp(sk,
-                                              SOCK_TIMESTAMPING_RX_SOFTWARE);
+                                              (1UL << SOCK_TIMESTAMPING_RX_SOFTWARE));
                 sock_valbool_flag(sk, SOCK_TIMESTAMPING_SOFTWARE,
                                   val & SOF_TIMESTAMPING_SOFTWARE);
                 sock_valbool_flag(sk, SOCK_TIMESTAMPING_SYS_HARDWARE,
@@ -736,6 +788,11 @@ set_rcvbuf:
         case SO_RXQ_OVFL:
                 sock_valbool_flag(sk, SOCK_RXQ_OVFL, valbool);
                 break;
+
+       case SO_WIFI_STATUS:
+               sock_valbool_flag(sk, SOCK_WIFI_STATUS, valbool);
+               break;
+
         default:
                 ret = -ENOPROTOOPT;
                 break;
@@ -957,6 +1014,10 @@ int sock_getsockopt(struct socket *sock, int level, int optname,
                 v.val = !!sock_flag(sk, SOCK_RXQ_OVFL);
                 break;
  
+       case SO_WIFI_STATUS:
+               v.val = !!sock_flag(sk, SOCK_WIFI_STATUS);
+               break;
+
         default:
                 return -ENOPROTOOPT;
         }
@@ -1107,6 +1168,18 @@ void sock_update_classid(struct sock *sk)
                 sk->sk_classid = classid;
  }
  EXPORT_SYMBOL(sock_update_classid);
+
+void sock_update_netprioidx(struct sock *sk)
+{
+       struct cgroup_netprio_state *state;
+       if (in_interrupt())
+               return;
+       rcu_read_lock();
+       state = task_netprio_state(current);
+       sk->sk_cgrp_prioidx = state ? state->prioidx : 0;
+       rcu_read_unlock();
+}
+EXPORT_SYMBOL_GPL(sock_update_netprioidx);
  #endif
  
  /**
@@ -1134,6 +1207,7 @@ struct sock *sk_alloc(struct net *net, int family, gfp_t priority,
                 atomic_set(&sk->sk_wmem_alloc, 1);
  
                 sock_update_classid(sk);
+               sock_update_netprioidx(sk);
         }
  
         return sk;
@@ -1154,8 +1228,7 @@ static void __sk_free(struct sock *sk)
                 RCU_INIT_POINTER(sk->sk_filter, NULL);
         }
  
-       sock_disable_timestamp(sk, SOCK_TIMESTAMP);
-       sock_disable_timestamp(sk, SOCK_TIMESTAMPING_RX_SOFTWARE);
+       sock_disable_timestamp(sk, SK_FLAGS_TIMESTAMP);
  
         if (atomic_read(&sk->sk_omem_alloc))
                 printk(KERN_DEBUG "%s: optmem leakage (%d bytes) detected.\n",
@@ -1200,7 +1273,20 @@ void sk_release_kernel(struct sock *sk)
  }
  EXPORT_SYMBOL(sk_release_kernel);
  
-struct sock *sk_clone(const struct sock *sk, const gfp_t priority)
+static void sk_update_clone(const struct sock *sk, struct sock *newsk)
+{
+       if (mem_cgroup_sockets_enabled && sk->sk_cgrp)
+               sock_update_memcg(newsk);
+}
+
+/**
+ *     sk_clone_lock - clone a socket, and lock its clone
+ *     @sk: the socket to clone
+ *     @priority: for allocation (%GFP_KERNEL, %GFP_ATOMIC, etc)
+ *
+ *     Caller must unlock socket even in error path (bh_unlock_sock(newsk))
+ */
+struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority)
  {
         struct sock *newsk;
  
@@ -1283,17 +1369,18 @@ struct sock *sk_clone(const struct sock *sk, const gfp_t priority)
                 sk_set_socket(newsk, NULL);
                 newsk->sk_wq = NULL;
  
+               sk_update_clone(sk, newsk);
+
                 if (newsk->sk_prot->sockets_allocated)
-                       percpu_counter_inc(newsk->sk_prot->sockets_allocated);
+                       sk_sockets_allocated_inc(newsk);
  
-               if (sock_flag(newsk, SOCK_TIMESTAMP) ||
-                   sock_flag(newsk, SOCK_TIMESTAMPING_RX_SOFTWARE))
+               if (newsk->sk_flags & SK_FLAGS_TIMESTAMP)
                         net_enable_timestamp();
         }
  out:
         return newsk;
  }
-EXPORT_SYMBOL_GPL(sk_clone);
+EXPORT_SYMBOL_GPL(sk_clone_lock);
  
  void sk_setup_caps(struct sock *sk, struct dst_entry *dst)
  {
@@ -1673,30 +1760,34 @@ int __sk_mem_schedule(struct sock *sk, int size, int kind)
         struct proto *prot = sk->sk_prot;
         int amt = sk_mem_pages(size);
         long allocated;
+       int parent_status = UNDER_LIMIT;
  
         sk->sk_forward_alloc += amt * SK_MEM_QUANTUM;
-       allocated = atomic_long_add_return(amt, prot->memory_allocated);
+
+       allocated = sk_memory_allocated_add(sk, amt, &parent_status);
  
         /* Under limit. */
-       if (allocated <= prot->sysctl_mem[0]) {
-               if (prot->memory_pressure && *prot->memory_pressure)
-                       *prot->memory_pressure = 0;
+       if (parent_status == UNDER_LIMIT &&
+                       allocated <= sk_prot_mem_limits(sk, 0)) {
+               sk_leave_memory_pressure(sk);
                 return 1;
         }
  
-       /* Under pressure. */
-       if (allocated > prot->sysctl_mem[1])
-               if (prot->enter_memory_pressure)
-                       prot->enter_memory_pressure(sk);
+       /* Under pressure. (we or our parents) */
+       if ((parent_status > SOFT_LIMIT) ||
+                       allocated > sk_prot_mem_limits(sk, 1))
+               sk_enter_memory_pressure(sk);
  
-       /* Over hard limit. */
-       if (allocated > prot->sysctl_mem[2])
+       /* Over hard limit (we or our parents) */
+       if ((parent_status == OVER_LIMIT) ||
+                       (allocated > sk_prot_mem_limits(sk, 2)))
                 goto suppress_allocation;
  
         /* guarantee minimum buffer size under pressure */
         if (kind == SK_MEM_RECV) {
                 if (atomic_read(&sk->sk_rmem_alloc) < prot->sysctl_rmem[0])
                         return 1;
+
         } else { /* SK_MEM_SEND */
                 if (sk->sk_type == SOCK_STREAM) {
                         if (sk->sk_wmem_queued < prot->sysctl_wmem[0])
@@ -1706,13 +1797,13 @@ int __sk_mem_schedule(struct sock *sk, int size, int kind)
                                 return 1;
         }
  
-       if (prot->memory_pressure) {
+       if (sk_has_memory_pressure(sk)) {
                 int alloc;
  
-               if (!*prot->memory_pressure)
+               if (!sk_under_memory_pressure(sk))
                         return 1;
-               alloc = percpu_counter_read_positive(prot->sockets_allocated);
-               if (prot->sysctl_mem[2] > alloc *
+               alloc = sk_sockets_allocated_read_positive(sk);
+               if (sk_prot_mem_limits(sk, 2) > alloc *
                     sk_mem_pages(sk->sk_wmem_queued +
                                  atomic_read(&sk->sk_rmem_alloc) +
                                  sk->sk_forward_alloc))
@@ -1735,7 +1826,9 @@ suppress_allocation:
  
         /* Alas. Undo changes. */
         sk->sk_forward_alloc -= amt * SK_MEM_QUANTUM;
-       atomic_long_sub(amt, prot->memory_allocated);
+
+       sk_memory_allocated_sub(sk, amt);
+
         return 0;
  }
  EXPORT_SYMBOL(__sk_mem_schedule);
@@ -1746,15 +1839,13 @@ EXPORT_SYMBOL(__sk_mem_schedule);
   */
  void __sk_mem_reclaim(struct sock *sk)
  {
-       struct proto *prot = sk->sk_prot;
-
-       atomic_long_sub(sk->sk_forward_alloc >> SK_MEM_QUANTUM_SHIFT,
-                  prot->memory_allocated);
+       sk_memory_allocated_sub(sk,
+                               sk->sk_forward_alloc >> SK_MEM_QUANTUM_SHIFT);
         sk->sk_forward_alloc &= SK_MEM_QUANTUM - 1;
  
-       if (prot->memory_pressure && *prot->memory_pressure &&
-           (atomic_long_read(prot->memory_allocated) < prot->sysctl_mem[0]))
-               *prot->memory_pressure = 0;
+       if (sk_under_memory_pressure(sk) &&
+           (sk_memory_allocated(sk) < sk_prot_mem_limits(sk, 0)))
+               sk_leave_memory_pressure(sk);
  }
  EXPORT_SYMBOL(__sk_mem_reclaim);
  
@@ -2125,16 +2216,15 @@ EXPORT_SYMBOL(sock_get_timestampns);
  void sock_enable_timestamp(struct sock *sk, int flag)
  {
         if (!sock_flag(sk, flag)) {
+               unsigned long previous_flags = sk->sk_flags;
+
                 sock_set_flag(sk, flag);
                 /*
                  * we just set one of the two flags which require net
                  * time stamping, but time stamping might have been on
                  * already because of the other one
                  */
-               if (!sock_flag(sk,
-                               flag == SOCK_TIMESTAMP ?
-                               SOCK_TIMESTAMPING_RX_SOFTWARE :
-                               SOCK_TIMESTAMP))
+               if (!(previous_flags & SK_FLAGS_TIMESTAMP))
                         net_enable_timestamp();
         }
  }
@@ -2246,9 +2336,6 @@ void sk_common_release(struct sock *sk)
  }
  EXPORT_SYMBOL(sk_common_release);
  
-static DEFINE_RWLOCK(proto_list_lock);
-static LIST_HEAD(proto_list);
-
  #ifdef CONFIG_PROC_FS
  #define PROTO_INUSE_NR 64      /* should be enough for the first time */
  struct prot_inuse {
@@ -2397,10 +2484,10 @@ int proto_register(struct proto *prot, int alloc_slab)
                 }
         }
  
-       write_lock(&proto_list_lock);
+       mutex_lock(&proto_list_mutex);
         list_add(&prot->node, &proto_list);
         assign_proto_idx(prot);
-       write_unlock(&proto_list_lock);
+       mutex_unlock(&proto_list_mutex);
         return 0;
  
  out_free_timewait_sock_slab_name:
@@ -2423,10 +2510,10 @@ EXPORT_SYMBOL(proto_register);
  
  void proto_unregister(struct proto *prot)
  {
-       write_lock(&proto_list_lock);
+       mutex_lock(&proto_list_mutex);
         release_proto_idx(prot);
         list_del(&prot->node);
-       write_unlock(&proto_list_lock);
+       mutex_unlock(&proto_list_mutex);
  
         if (prot->slab != NULL) {
                 kmem_cache_destroy(prot->slab);
@@ -2449,9 +2536,9 @@ EXPORT_SYMBOL(proto_unregister);
  
  #ifdef CONFIG_PROC_FS
  static void *proto_seq_start(struct seq_file *seq, loff_t *pos)
-       __acquires(proto_list_lock)
+       __acquires(proto_list_mutex)
  {
-       read_lock(&proto_list_lock);
+       mutex_lock(&proto_list_mutex);
         return seq_list_start_head(&proto_list, *pos);
  }
  
@@ -2461,25 +2548,36 @@ static void *proto_seq_next(struct seq_file *seq, void *v, loff_t *pos)
  }
  
  static void proto_seq_stop(struct seq_file *seq, void *v)
-       __releases(proto_list_lock)
+       __releases(proto_list_mutex)
  {
-       read_unlock(&proto_list_lock);
+       mutex_unlock(&proto_list_mutex);
  }
  
  static char proto_method_implemented(const void *method)
  {
         return method == NULL ? 'n' : 'y';
  }
+static long sock_prot_memory_allocated(struct proto *proto)
+{
+       return proto->memory_allocated != NULL ? proto_memory_allocated(proto): -1L;
+}
+
+static char *sock_prot_memory_pressure(struct proto *proto)
+{
+       return proto->memory_pressure != NULL ?
+       proto_memory_pressure(proto) ? "yes" : "no" : "NI";
+}
  
  static void proto_seq_printf(struct seq_file *seq, struct proto *proto)
  {
+
         seq_printf(seq, "%-9s %4u %6d  %6ld   %-3s %6u   %-3s  %-10s "
                         "%2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c\n",
                    proto->name,
                    proto->obj_size,
                    sock_prot_inuse_get(seq_file_net(seq), proto),
-                  proto->memory_allocated != NULL ? atomic_long_read(proto->memory_allocated) : -1L,
-                  proto->memory_pressure != NULL ? *proto->memory_pressure ? "yes" : "no" : "NI",
+                  sock_prot_memory_allocated(proto),
+                  sock_prot_memory_pressure(proto),
                    proto->max_header,
                    proto->slab == NULL ? "no" : "yes",
                    module_name(proto->owner),