]> git.kernelconcepts.de Git - karo-tx-linux.git/blobdiff - mm/madvise.c
kaweth: remove ifdefed out call to dma_supported
[karo-tx-linux.git] / mm / madvise.c
index c889fcbb530e98d8779ef75750e1fde08bf786cf..5db5464312854586a5987fc3d13fc4a21487dfc9 100644 (file)
@@ -20,6 +20,9 @@
 #include <linux/backing-dev.h>
 #include <linux/swap.h>
 #include <linux/swapops.h>
+#include <linux/mmu_notifier.h>
+
+#include <asm/tlb.h>
 
 /*
  * Any behaviour which results in changes to the vma->vm_flags needs to
@@ -32,6 +35,7 @@ static int madvise_need_mmap_write(int behavior)
        case MADV_REMOVE:
        case MADV_WILLNEED:
        case MADV_DONTNEED:
+       case MADV_FREE:
                return 0;
        default:
                /* be safe, default to 1. list exceptions explicitly */
@@ -256,6 +260,166 @@ static long madvise_willneed(struct vm_area_struct *vma,
        return 0;
 }
 
+static int madvise_free_pte_range(pmd_t *pmd, unsigned long addr,
+                               unsigned long end, struct mm_walk *walk)
+
+{
+       struct mmu_gather *tlb = walk->private;
+       struct mm_struct *mm = tlb->mm;
+       struct vm_area_struct *vma = walk->vma;
+       spinlock_t *ptl;
+       pte_t *pte, ptent;
+       struct page *page;
+       swp_entry_t entry;
+       unsigned long next;
+       int nr_swap = 0;
+
+       next = pmd_addr_end(addr, end);
+       if (pmd_trans_huge(*pmd)) {
+               if (next - addr != HPAGE_PMD_SIZE)
+                       split_huge_pmd(vma, pmd, addr);
+               else if (!madvise_free_huge_pmd(tlb, vma, pmd, addr))
+                       goto next;
+               /* fall through */
+       }
+
+       if (pmd_trans_unstable(pmd))
+               return 0;
+
+       pte = pte_offset_map_lock(mm, pmd, addr, &ptl);
+       arch_enter_lazy_mmu_mode();
+       for (; addr != end; pte++, addr += PAGE_SIZE) {
+               ptent = *pte;
+
+               if (pte_none(ptent))
+                       continue;
+               /*
+                * If the pte has swp_entry, just clear page table to
+                * prevent swap-in which is more expensive rather than
+                * (page allocation + zeroing).
+                */
+               if (!pte_present(ptent)) {
+                       entry = pte_to_swp_entry(ptent);
+                       if (non_swap_entry(entry))
+                               continue;
+                       nr_swap--;
+                       free_swap_and_cache(entry);
+                       pte_clear_not_present_full(mm, addr, pte, tlb->fullmm);
+                       continue;
+               }
+
+               page = vm_normal_page(vma, addr, ptent);
+               if (!page)
+                       continue;
+
+               if (PageSwapCache(page) || PageDirty(page)) {
+                       if (!trylock_page(page))
+                               continue;
+                       /*
+                        * If page is shared with others, we couldn't clear
+                        * PG_dirty of the page.
+                        */
+                       if (page_count(page) != 1 + !!PageSwapCache(page)) {
+                               unlock_page(page);
+                               continue;
+                       }
+
+                       if (PageSwapCache(page) && !try_to_free_swap(page)) {
+                               unlock_page(page);
+                               continue;
+                       }
+
+                       ClearPageDirty(page);
+                       unlock_page(page);
+               }
+
+               /*
+                * Some of architecture(ex, PPC) don't update TLB
+                * with set_pte_at and tlb_remove_tlb_entry so for
+                * the portability, remap the pte with old|clean
+                * after pte clearing.
+                */
+               ptent = ptep_get_and_clear_full(mm, addr, pte,
+                                               tlb->fullmm);
+               ptent = pte_mkold(ptent);
+               ptent = pte_mkclean(ptent);
+               set_pte_at(mm, addr, pte, ptent);
+               if (PageActive(page))
+                       deactivate_page(page);
+               tlb_remove_tlb_entry(tlb, pte, addr);
+       }
+
+       if (nr_swap) {
+               if (current->mm == mm)
+                       sync_mm_rss(mm);
+
+               add_mm_counter(mm, MM_SWAPENTS, nr_swap);
+       }
+
+       arch_leave_lazy_mmu_mode();
+       pte_unmap_unlock(pte - 1, ptl);
+next:
+       cond_resched();
+       return 0;
+}
+
+static void madvise_free_page_range(struct mmu_gather *tlb,
+                            struct vm_area_struct *vma,
+                            unsigned long addr, unsigned long end)
+{
+       struct mm_walk free_walk = {
+               .pmd_entry = madvise_free_pte_range,
+               .mm = vma->vm_mm,
+               .private = tlb,
+       };
+
+       BUG_ON(addr >= end);
+       tlb_start_vma(tlb, vma);
+       walk_page_range(addr, end, &free_walk);
+       tlb_end_vma(tlb, vma);
+}
+
+static int madvise_free_single_vma(struct vm_area_struct *vma,
+                       unsigned long start_addr, unsigned long end_addr)
+{
+       unsigned long start, end;
+       struct mm_struct *mm = vma->vm_mm;
+       struct mmu_gather tlb;
+
+       if (vma->vm_flags & (VM_LOCKED|VM_HUGETLB|VM_PFNMAP))
+               return -EINVAL;
+
+       /* MADV_FREE works for only anon vma at the moment */
+       if (!vma_is_anonymous(vma))
+               return -EINVAL;
+
+       start = max(vma->vm_start, start_addr);
+       if (start >= vma->vm_end)
+               return -EINVAL;
+       end = min(vma->vm_end, end_addr);
+       if (end <= vma->vm_start)
+               return -EINVAL;
+
+       lru_add_drain();
+       tlb_gather_mmu(&tlb, mm, start, end);
+       update_hiwater_rss(mm);
+
+       mmu_notifier_invalidate_range_start(mm, start, end);
+       madvise_free_page_range(&tlb, vma, start, end);
+       mmu_notifier_invalidate_range_end(mm, start, end);
+       tlb_finish_mmu(&tlb, start, end);
+
+       return 0;
+}
+
+static long madvise_free(struct vm_area_struct *vma,
+                            struct vm_area_struct **prev,
+                            unsigned long start, unsigned long end)
+{
+       *prev = vma;
+       return madvise_free_single_vma(vma, start, end);
+}
+
 /*
  * Application no longer needs these pages.  If the pages are dirty,
  * it's OK to just throw them away.  The app will be more careful about
@@ -379,6 +543,14 @@ madvise_vma(struct vm_area_struct *vma, struct vm_area_struct **prev,
                return madvise_remove(vma, prev, start, end);
        case MADV_WILLNEED:
                return madvise_willneed(vma, prev, start, end);
+       case MADV_FREE:
+               /*
+                * XXX: In this implementation, MADV_FREE works like
+                * MADV_DONTNEED on swapless system or full swap.
+                */
+               if (get_nr_swap_pages() > 0)
+                       return madvise_free(vma, prev, start, end);
+               /* passthrough */
        case MADV_DONTNEED:
                return madvise_dontneed(vma, prev, start, end);
        default:
@@ -398,6 +570,7 @@ madvise_behavior_valid(int behavior)
        case MADV_REMOVE:
        case MADV_WILLNEED:
        case MADV_DONTNEED:
+       case MADV_FREE:
 #ifdef CONFIG_KSM
        case MADV_MERGEABLE:
        case MADV_UNMERGEABLE: