]> git.kernelconcepts.de Git - karo-tx-linux.git/commitdiff
Merge branch 'akpm' (patches from Andrew)
authorLinus Torvalds <torvalds@linux-foundation.org>
Sat, 8 Apr 2017 08:35:32 +0000 (01:35 -0700)
committerLinus Torvalds <torvalds@linux-foundation.org>
Sat, 8 Apr 2017 08:35:32 +0000 (01:35 -0700)
Merge misc fixes from Andrew Morton:
 "10 fixes"

* emailed patches from Andrew Morton <akpm@linux-foundation.org>:
  mm: move pcp and lru-pcp draining into single wq
  mailmap: update Yakir Yang email address
  mm, swap_cgroup: reschedule when neeed in swap_cgroup_swapoff()
  dax: fix radix tree insertion race
  mm, thp: fix setting of defer+madvise thp defrag mode
  ptrace: fix PTRACE_LISTEN race corrupting task->state
  vmlinux.lds: add missing VMLINUX_SYMBOL macros
  mm/page_alloc.c: fix print order in show_free_areas()
  userfaultfd: report actual registered features in fdinfo
  mm: fix page_vma_mapped_walk() for ksm pages

12 files changed:
.mailmap
fs/dax.c
fs/userfaultfd.c
include/asm-generic/vmlinux.lds.h
kernel/ptrace.c
mm/huge_memory.c
mm/internal.h
mm/page_alloc.c
mm/page_vma_mapped.c
mm/swap.c
mm/swap_cgroup.c
mm/vmstat.c

index 67dc22ffc9a80cb4fd6abeeba3d2ec7f7ae2ba19..e229922dc7f0a30cefab74d0f75313af513965e1 100644 (file)
--- a/.mailmap
+++ b/.mailmap
@@ -171,6 +171,7 @@ Vlad Dogaru <ddvlad@gmail.com> <vlad.dogaru@intel.com>
 Vladimir Davydov <vdavydov.dev@gmail.com> <vdavydov@virtuozzo.com>
 Vladimir Davydov <vdavydov.dev@gmail.com> <vdavydov@parallels.com>
 Takashi YOSHII <takashi.yoshii.zj@renesas.com>
+Yakir Yang <kuankuan.y@gmail.com> <ykk@rock-chips.com>
 Yusuke Goda <goda.yusuke@renesas.com>
 Gustavo Padovan <gustavo@las.ic.unicamp.br>
 Gustavo Padovan <padovan@profusion.mobi>
index de622d4282a6507a9c4e4eb082ac6ff8286efb48..85abd741253d4b89a42c6c5f2b4dd7bcf456e843 100644 (file)
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -373,6 +373,22 @@ restart:
                }
                spin_lock_irq(&mapping->tree_lock);
 
+               if (!entry) {
+                       /*
+                        * We needed to drop the page_tree lock while calling
+                        * radix_tree_preload() and we didn't have an entry to
+                        * lock.  See if another thread inserted an entry at
+                        * our index during this time.
+                        */
+                       entry = __radix_tree_lookup(&mapping->page_tree, index,
+                                       NULL, &slot);
+                       if (entry) {
+                               radix_tree_preload_end();
+                               spin_unlock_irq(&mapping->tree_lock);
+                               goto restart;
+                       }
+               }
+
                if (pmd_downgrade) {
                        radix_tree_delete(&mapping->page_tree, index);
                        mapping->nrexceptional--;
@@ -388,19 +404,12 @@ restart:
                if (err) {
                        spin_unlock_irq(&mapping->tree_lock);
                        /*
-                        * Someone already created the entry?  This is a
-                        * normal failure when inserting PMDs in a range
-                        * that already contains PTEs.  In that case we want
-                        * to return -EEXIST immediately.
-                        */
-                       if (err == -EEXIST && !(size_flag & RADIX_DAX_PMD))
-                               goto restart;
-                       /*
-                        * Our insertion of a DAX PMD entry failed, most
-                        * likely because it collided with a PTE sized entry
-                        * at a different index in the PMD range.  We haven't
-                        * inserted anything into the radix tree and have no
-                        * waiters to wake.
+                        * Our insertion of a DAX entry failed, most likely
+                        * because we were inserting a PMD entry and it
+                        * collided with a PTE sized entry at a different
+                        * index in the PMD range.  We haven't inserted
+                        * anything into the radix tree and have no waiters to
+                        * wake.
                         */
                        return ERR_PTR(err);
                }
index 1d227b0fcf49ff26b40bdd726b3839fb8f353f35..f7555fc25877435e13b65cbe597ae9bdb11c6528 100644 (file)
@@ -1756,7 +1756,7 @@ static void userfaultfd_show_fdinfo(struct seq_file *m, struct file *f)
         *      protocols: aa:... bb:...
         */
        seq_printf(m, "pending:\t%lu\ntotal:\t%lu\nAPI:\t%Lx:%x:%Lx\n",
-                  pending, total, UFFD_API, UFFD_API_FEATURES,
+                  pending, total, UFFD_API, ctx->features,
                   UFFD_API_IOCTLS|UFFD_API_RANGE_IOCTLS);
 }
 #endif
index 7cdfe167074f873a71dd51e04da3304864d2f063..143db9c523e25f38488bd43302b82f316e3124a9 100644 (file)
  */
 #ifndef RO_AFTER_INIT_DATA
 #define RO_AFTER_INIT_DATA                                             \
-       __start_ro_after_init = .;                                      \
+       VMLINUX_SYMBOL(__start_ro_after_init) = .;                      \
        *(.data..ro_after_init)                                         \
-       __end_ro_after_init = .;
+       VMLINUX_SYMBOL(__end_ro_after_init) = .;
 #endif
 
 /*
index 0af9287121746d1b198429d52a99e862c4f1a8f0..266ddcc1d8bbbc6af7bceda3657618beef2a9c59 100644 (file)
@@ -184,11 +184,17 @@ static void ptrace_unfreeze_traced(struct task_struct *task)
 
        WARN_ON(!task->ptrace || task->parent != current);
 
+       /*
+        * PTRACE_LISTEN can allow ptrace_trap_notify to wake us up remotely.
+        * Recheck state under the lock to close this race.
+        */
        spin_lock_irq(&task->sighand->siglock);
-       if (__fatal_signal_pending(task))
-               wake_up_state(task, __TASK_TRACED);
-       else
-               task->state = TASK_TRACED;
+       if (task->state == __TASK_TRACED) {
+               if (__fatal_signal_pending(task))
+                       wake_up_state(task, __TASK_TRACED);
+               else
+                       task->state = TASK_TRACED;
+       }
        spin_unlock_irq(&task->sighand->siglock);
 }
 
index 1ebc93e179f3eab40cf469fd67a361ea43a11368..fef4cf210cc7f0df1889a01532bb32215d154e60 100644 (file)
@@ -240,18 +240,18 @@ static ssize_t defrag_store(struct kobject *kobj,
                clear_bit(TRANSPARENT_HUGEPAGE_DEFRAG_KSWAPD_OR_MADV_FLAG, &transparent_hugepage_flags);
                clear_bit(TRANSPARENT_HUGEPAGE_DEFRAG_REQ_MADV_FLAG, &transparent_hugepage_flags);
                set_bit(TRANSPARENT_HUGEPAGE_DEFRAG_DIRECT_FLAG, &transparent_hugepage_flags);
-       } else if (!memcmp("defer", buf,
-                   min(sizeof("defer")-1, count))) {
-               clear_bit(TRANSPARENT_HUGEPAGE_DEFRAG_DIRECT_FLAG, &transparent_hugepage_flags);
-               clear_bit(TRANSPARENT_HUGEPAGE_DEFRAG_KSWAPD_OR_MADV_FLAG, &transparent_hugepage_flags);
-               clear_bit(TRANSPARENT_HUGEPAGE_DEFRAG_REQ_MADV_FLAG, &transparent_hugepage_flags);
-               set_bit(TRANSPARENT_HUGEPAGE_DEFRAG_KSWAPD_FLAG, &transparent_hugepage_flags);
        } else if (!memcmp("defer+madvise", buf,
                    min(sizeof("defer+madvise")-1, count))) {
                clear_bit(TRANSPARENT_HUGEPAGE_DEFRAG_DIRECT_FLAG, &transparent_hugepage_flags);
                clear_bit(TRANSPARENT_HUGEPAGE_DEFRAG_KSWAPD_FLAG, &transparent_hugepage_flags);
                clear_bit(TRANSPARENT_HUGEPAGE_DEFRAG_REQ_MADV_FLAG, &transparent_hugepage_flags);
                set_bit(TRANSPARENT_HUGEPAGE_DEFRAG_KSWAPD_OR_MADV_FLAG, &transparent_hugepage_flags);
+       } else if (!memcmp("defer", buf,
+                   min(sizeof("defer")-1, count))) {
+               clear_bit(TRANSPARENT_HUGEPAGE_DEFRAG_DIRECT_FLAG, &transparent_hugepage_flags);
+               clear_bit(TRANSPARENT_HUGEPAGE_DEFRAG_KSWAPD_OR_MADV_FLAG, &transparent_hugepage_flags);
+               clear_bit(TRANSPARENT_HUGEPAGE_DEFRAG_REQ_MADV_FLAG, &transparent_hugepage_flags);
+               set_bit(TRANSPARENT_HUGEPAGE_DEFRAG_KSWAPD_FLAG, &transparent_hugepage_flags);
        } else if (!memcmp("madvise", buf,
                           min(sizeof("madvise")-1, count))) {
                clear_bit(TRANSPARENT_HUGEPAGE_DEFRAG_DIRECT_FLAG, &transparent_hugepage_flags);
index ccfc2a2969f4402bdbfb27e0b48df151f4da68b7..266efaeaa370a46debcc5b6b614a72e33833ac4d 100644 (file)
@@ -481,6 +481,13 @@ unsigned long reclaim_clean_pages_from_list(struct zone *zone,
 enum ttu_flags;
 struct tlbflush_unmap_batch;
 
+
+/*
+ * only for MM internal work items which do not depend on
+ * any allocations or locks which might depend on allocations
+ */
+extern struct workqueue_struct *mm_percpu_wq;
+
 #ifdef CONFIG_ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH
 void try_to_unmap_flush(void);
 void try_to_unmap_flush_dirty(void);
index 6cbde310abed8df22f9cd6ed80fcc252f4c80f43..f3d603cef2c0c0e5aef09540dd2f8d50da5a808c 100644 (file)
@@ -2373,6 +2373,13 @@ void drain_all_pages(struct zone *zone)
         */
        static cpumask_t cpus_with_pcps;
 
+       /*
+        * Make sure nobody triggers this path before mm_percpu_wq is fully
+        * initialized.
+        */
+       if (WARN_ON_ONCE(!mm_percpu_wq))
+               return;
+
        /* Workqueues cannot recurse */
        if (current->flags & PF_WQ_WORKER)
                return;
@@ -2422,7 +2429,7 @@ void drain_all_pages(struct zone *zone)
        for_each_cpu(cpu, &cpus_with_pcps) {
                struct work_struct *work = per_cpu_ptr(&pcpu_drain, cpu);
                INIT_WORK(work, drain_local_pages_wq);
-               schedule_work_on(cpu, work);
+               queue_work_on(cpu, mm_percpu_wq, work);
        }
        for_each_cpu(cpu, &cpus_with_pcps)
                flush_work(per_cpu_ptr(&pcpu_drain, cpu));
@@ -4519,13 +4526,13 @@ void show_free_areas(unsigned int filter, nodemask_t *nodemask)
                        K(node_page_state(pgdat, NR_FILE_MAPPED)),
                        K(node_page_state(pgdat, NR_FILE_DIRTY)),
                        K(node_page_state(pgdat, NR_WRITEBACK)),
+                       K(node_page_state(pgdat, NR_SHMEM)),
 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
                        K(node_page_state(pgdat, NR_SHMEM_THPS) * HPAGE_PMD_NR),
                        K(node_page_state(pgdat, NR_SHMEM_PMDMAPPED)
                                        * HPAGE_PMD_NR),
                        K(node_page_state(pgdat, NR_ANON_THPS) * HPAGE_PMD_NR),
 #endif
-                       K(node_page_state(pgdat, NR_SHMEM)),
                        K(node_page_state(pgdat, NR_WRITEBACK_TEMP)),
                        K(node_page_state(pgdat, NR_UNSTABLE_NFS)),
                        node_page_state(pgdat, NR_PAGES_SCANNED),
index c4c9def8ffea47b4838fc3095221ee90e0c0fae3..de9c40d7304aa0e714bdd32abe79517ec3d73038 100644 (file)
@@ -111,12 +111,8 @@ bool page_vma_mapped_walk(struct page_vma_mapped_walk *pvmw)
        if (pvmw->pmd && !pvmw->pte)
                return not_found(pvmw);
 
-       /* Only for THP, seek to next pte entry makes sense */
-       if (pvmw->pte) {
-               if (!PageTransHuge(pvmw->page) || PageHuge(pvmw->page))
-                       return not_found(pvmw);
+       if (pvmw->pte)
                goto next_pte;
-       }
 
        if (unlikely(PageHuge(pvmw->page))) {
                /* when pud is not present, pte will be NULL */
@@ -165,9 +161,14 @@ restart:
        while (1) {
                if (check_pte(pvmw))
                        return true;
-next_pte:      do {
+next_pte:
+               /* Seek to next pte only makes sense for THP */
+               if (!PageTransHuge(pvmw->page) || PageHuge(pvmw->page))
+                       return not_found(pvmw);
+               do {
                        pvmw->address += PAGE_SIZE;
-                       if (pvmw->address >=
+                       if (pvmw->address >= pvmw->vma->vm_end ||
+                           pvmw->address >=
                                        __vma_address(pvmw->page, pvmw->vma) +
                                        hpage_nr_pages(pvmw->page) * PAGE_SIZE)
                                return not_found(pvmw);
index c4910f14f9579ef1d8b165355f9294715968bf2d..5dabf444d724db98595567b0f7daed7d53fc877e 100644 (file)
--- a/mm/swap.c
+++ b/mm/swap.c
@@ -670,30 +670,19 @@ static void lru_add_drain_per_cpu(struct work_struct *dummy)
 
 static DEFINE_PER_CPU(struct work_struct, lru_add_drain_work);
 
-/*
- * lru_add_drain_wq is used to do lru_add_drain_all() from a WQ_MEM_RECLAIM
- * workqueue, aiding in getting memory freed.
- */
-static struct workqueue_struct *lru_add_drain_wq;
-
-static int __init lru_init(void)
-{
-       lru_add_drain_wq = alloc_workqueue("lru-add-drain", WQ_MEM_RECLAIM, 0);
-
-       if (WARN(!lru_add_drain_wq,
-               "Failed to create workqueue lru_add_drain_wq"))
-               return -ENOMEM;
-
-       return 0;
-}
-early_initcall(lru_init);
-
 void lru_add_drain_all(void)
 {
        static DEFINE_MUTEX(lock);
        static struct cpumask has_work;
        int cpu;
 
+       /*
+        * Make sure nobody triggers this path before mm_percpu_wq is fully
+        * initialized.
+        */
+       if (WARN_ON(!mm_percpu_wq))
+               return;
+
        mutex_lock(&lock);
        get_online_cpus();
        cpumask_clear(&has_work);
@@ -707,7 +696,7 @@ void lru_add_drain_all(void)
                    pagevec_count(&per_cpu(lru_deactivate_pvecs, cpu)) ||
                    need_activate_page_drain(cpu)) {
                        INIT_WORK(work, lru_add_drain_per_cpu);
-                       queue_work_on(cpu, lru_add_drain_wq, work);
+                       queue_work_on(cpu, mm_percpu_wq, work);
                        cpumask_set_cpu(cpu, &has_work);
                }
        }
index 310ac0b8f9746c53eff9306be52585fb9094fc00..ac6318a064d35e6dcc5385d1dc8062ff6e46554c 100644 (file)
@@ -201,6 +201,8 @@ void swap_cgroup_swapoff(int type)
                        struct page *page = map[i];
                        if (page)
                                __free_page(page);
+                       if (!(i % SWAP_CLUSTER_MAX))
+                               cond_resched();
                }
                vfree(map);
        }
index 89f95396ec46be64055f1a658c9c0f7bdad90d5c..809025ed97ea0eee97573a32ba2764c63ee2dffd 100644 (file)
@@ -1552,7 +1552,6 @@ static const struct file_operations proc_vmstat_file_operations = {
 #endif /* CONFIG_PROC_FS */
 
 #ifdef CONFIG_SMP
-static struct workqueue_struct *vmstat_wq;
 static DEFINE_PER_CPU(struct delayed_work, vmstat_work);
 int sysctl_stat_interval __read_mostly = HZ;
 
@@ -1623,7 +1622,7 @@ static void vmstat_update(struct work_struct *w)
                 * to occur in the future. Keep on running the
                 * update worker thread.
                 */
-               queue_delayed_work_on(smp_processor_id(), vmstat_wq,
+               queue_delayed_work_on(smp_processor_id(), mm_percpu_wq,
                                this_cpu_ptr(&vmstat_work),
                                round_jiffies_relative(sysctl_stat_interval));
        }
@@ -1702,7 +1701,7 @@ static void vmstat_shepherd(struct work_struct *w)
                struct delayed_work *dw = &per_cpu(vmstat_work, cpu);
 
                if (!delayed_work_pending(dw) && need_update(cpu))
-                       queue_delayed_work_on(cpu, vmstat_wq, dw, 0);
+                       queue_delayed_work_on(cpu, mm_percpu_wq, dw, 0);
        }
        put_online_cpus();
 
@@ -1718,7 +1717,6 @@ static void __init start_shepherd_timer(void)
                INIT_DEFERRABLE_WORK(per_cpu_ptr(&vmstat_work, cpu),
                        vmstat_update);
 
-       vmstat_wq = alloc_workqueue("vmstat", WQ_FREEZABLE|WQ_MEM_RECLAIM, 0);
        schedule_delayed_work(&shepherd,
                round_jiffies_relative(sysctl_stat_interval));
 }
@@ -1764,11 +1762,16 @@ static int vmstat_cpu_dead(unsigned int cpu)
 
 #endif
 
+struct workqueue_struct *mm_percpu_wq;
+
 void __init init_mm_internals(void)
 {
-#ifdef CONFIG_SMP
-       int ret;
+       int ret __maybe_unused;
 
+       mm_percpu_wq = alloc_workqueue("mm_percpu_wq",
+                                      WQ_FREEZABLE|WQ_MEM_RECLAIM, 0);
+
+#ifdef CONFIG_SMP
        ret = cpuhp_setup_state_nocalls(CPUHP_MM_VMSTAT_DEAD, "mm/vmstat:dead",
                                        NULL, vmstat_cpu_dead);
        if (ret < 0)