Merge branch 'for-4.8/core' of git://git.kernel.dk/linux-block

[karo-tx-linux.git] / fs / f2fs / node.c
diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c

index 1a33de9d84b16a68202ee410cafc4f9ba1353e11..e53403987f6d1cfcc237f29e17f9b896cb245b80 100644 (file)
--- a/fs/f2fs/node.c
+++ b/fs/f2fs/node.c
@@ -407,6 +407,29 @@ cache:
         up_write(&nm_i->nat_tree_lock);
  }
  
+/*
+ * readahead MAX_RA_NODE number of node pages.
+ */
+static void ra_node_pages(struct page *parent, int start, int n)
+{
+       struct f2fs_sb_info *sbi = F2FS_P_SB(parent);
+       struct blk_plug plug;
+       int i, end;
+       nid_t nid;
+
+       blk_start_plug(&plug);
+
+       /* Then, try readahead for siblings of the desired node */
+       end = start + n;
+       end = min(end, NIDS_PER_BLOCK);
+       for (i = start; i < end; i++) {
+               nid = get_nid(parent, i, false);
+               ra_node_page(sbi, nid);
+       }
+
+       blk_finish_plug(&plug);
+}
+
  pgoff_t get_next_page_offset(struct dnode_of_data *dn, pgoff_t pgofs)
  {
         const long direct_index = ADDRS_PER_INODE(dn->inode);
@@ -707,6 +730,8 @@ static int truncate_nodes(struct dnode_of_data *dn, unsigned int nofs,
                 return PTR_ERR(page);
         }
  
+       ra_node_pages(page, ofs, NIDS_PER_BLOCK);
+
         rn = F2FS_NODE(page);
         if (depth < 3) {
                 for (i = ofs; i < NIDS_PER_BLOCK; i++, freed++) {
@@ -784,6 +809,8 @@ static int truncate_partial_nodes(struct dnode_of_data *dn,
                 nid[i + 1] = get_nid(pages[i], offset[i + 1], false);
         }
  
+       ra_node_pages(pages[idx], offset[idx + 1], NIDS_PER_BLOCK);
+
         /* free direct nodes linked to a partial indirect node */
         for (i = offset[idx + 1]; i < NIDS_PER_BLOCK; i++) {
                 child_nid = get_nid(pages[idx], i, false);
@@ -832,7 +859,7 @@ int truncate_inode_blocks(struct inode *inode, pgoff_t from)
         trace_f2fs_truncate_inode_blocks_enter(inode, from);
  
         level = get_node_path(inode, from, offset, noffset);
-restart:
+
         page = get_node_page(sbi, inode->i_ino);
         if (IS_ERR(page)) {
                 trace_f2fs_truncate_inode_blocks_exit(inode, PTR_ERR(page));
@@ -896,10 +923,7 @@ skip_partial:
                 if (offset[1] == 0 &&
                                 ri->i_nid[offset[0] - NODE_DIR1_BLOCK]) {
                         lock_page(page);
-                       if (unlikely(page->mapping != NODE_MAPPING(sbi))) {
-                               f2fs_put_page(page, 1);
-                               goto restart;
-                       }
+                       BUG_ON(page->mapping != NODE_MAPPING(sbi));
                         f2fs_wait_on_page_writeback(page, NODE, true);
                         ri->i_nid[offset[0] - NODE_DIR1_BLOCK] = 0;
                         set_page_dirty(page);
@@ -998,7 +1022,7 @@ struct page *new_node_page(struct dnode_of_data *dn,
         if (unlikely(is_inode_flag_set(F2FS_I(dn->inode), FI_NO_ALLOC)))
                 return ERR_PTR(-EPERM);
  
-       page = grab_cache_page(NODE_MAPPING(sbi), dn->nid);
+       page = f2fs_grab_cache_page(NODE_MAPPING(sbi), dn->nid, false);
         if (!page)
                 return ERR_PTR(-ENOMEM);
  
@@ -1046,14 +1070,15 @@ fail:
   * 0: f2fs_put_page(page, 0)
   * LOCKED_PAGE or error: f2fs_put_page(page, 1)
   */
-static int read_node_page(struct page *page, int rw)
+static int read_node_page(struct page *page, int op_flags)
  {
         struct f2fs_sb_info *sbi = F2FS_P_SB(page);
         struct node_info ni;
         struct f2fs_io_info fio = {
                 .sbi = sbi,
                 .type = NODE,
-               .rw = rw,
+               .op = REQ_OP_READ,
+               .op_flags = op_flags,
                 .page = page,
                 .encrypted_page = NULL,
         };
@@ -1090,7 +1115,7 @@ void ra_node_page(struct f2fs_sb_info *sbi, nid_t nid)
         if (apage)
                 return;
  
-       apage = grab_cache_page(NODE_MAPPING(sbi), nid);
+       apage = f2fs_grab_cache_page(NODE_MAPPING(sbi), nid, false);
         if (!apage)
                 return;
  
@@ -1098,29 +1123,6 @@ void ra_node_page(struct f2fs_sb_info *sbi, nid_t nid)
         f2fs_put_page(apage, err ? 1 : 0);
  }
  
-/*
- * readahead MAX_RA_NODE number of node pages.
- */
-static void ra_node_pages(struct page *parent, int start)
-{
-       struct f2fs_sb_info *sbi = F2FS_P_SB(parent);
-       struct blk_plug plug;
-       int i, end;
-       nid_t nid;
-
-       blk_start_plug(&plug);
-
-       /* Then, try readahead for siblings of the desired node */
-       end = start + MAX_RA_NODE;
-       end = min(end, NIDS_PER_BLOCK);
-       for (i = start; i < end; i++) {
-               nid = get_nid(parent, i, false);
-               ra_node_page(sbi, nid);
-       }
-
-       blk_finish_plug(&plug);
-}
-
  static struct page *__get_node_page(struct f2fs_sb_info *sbi, pgoff_t nid,
                                         struct page *parent, int start)
  {
@@ -1131,7 +1133,7 @@ static struct page *__get_node_page(struct f2fs_sb_info *sbi, pgoff_t nid,
                 return ERR_PTR(-ENOENT);
         f2fs_bug_on(sbi, check_nid_range(sbi, nid));
  repeat:
-       page = grab_cache_page(NODE_MAPPING(sbi), nid);
+       page = f2fs_grab_cache_page(NODE_MAPPING(sbi), nid, false);
         if (!page)
                 return ERR_PTR(-ENOMEM);
  
@@ -1144,7 +1146,7 @@ repeat:
         }
  
         if (parent)
-               ra_node_pages(parent, start + 1);
+               ra_node_pages(parent, start + 1, MAX_RA_NODE);
  
         lock_page(page);
  
@@ -1196,19 +1198,17 @@ static void flush_inline_data(struct f2fs_sb_info *sbi, nid_t ino)
  {
         struct inode *inode;
         struct page *page;
+       int ret;
  
         /* should flush inline_data before evict_inode */
         inode = ilookup(sbi->sb, ino);
         if (!inode)
                 return;
  
-       page = pagecache_get_page(inode->i_mapping, 0, FGP_NOWAIT, 0);
+       page = pagecache_get_page(inode->i_mapping, 0, FGP_LOCK|FGP_NOWAIT, 0);
         if (!page)
                 goto iput_out;
  
-       if (!trylock_page(page))
-               goto release_out;
-
         if (!PageUptodate(page))
                 goto page_out;
  
@@ -1218,24 +1218,214 @@ static void flush_inline_data(struct f2fs_sb_info *sbi, nid_t ino)
         if (!clear_page_dirty_for_io(page))
                 goto page_out;
  
-       if (!f2fs_write_inline_data(inode, page))
-               inode_dec_dirty_pages(inode);
-       else
+       ret = f2fs_write_inline_data(inode, page);
+       inode_dec_dirty_pages(inode);
+       if (ret)
                 set_page_dirty(page);
  page_out:
-       unlock_page(page);
-release_out:
-       f2fs_put_page(page, 0);
+       f2fs_put_page(page, 1);
  iput_out:
         iput(inode);
  }
  
-int sync_node_pages(struct f2fs_sb_info *sbi, nid_t ino,
-                                       struct writeback_control *wbc)
+void move_node_page(struct page *node_page, int gc_type)
+{
+       if (gc_type == FG_GC) {
+               struct f2fs_sb_info *sbi = F2FS_P_SB(node_page);
+               struct writeback_control wbc = {
+                       .sync_mode = WB_SYNC_ALL,
+                       .nr_to_write = 1,
+                       .for_reclaim = 0,
+               };
+
+               set_page_dirty(node_page);
+               f2fs_wait_on_page_writeback(node_page, NODE, true);
+
+               f2fs_bug_on(sbi, PageWriteback(node_page));
+               if (!clear_page_dirty_for_io(node_page))
+                       goto out_page;
+
+               if (NODE_MAPPING(sbi)->a_ops->writepage(node_page, &wbc))
+                       unlock_page(node_page);
+               goto release_page;
+       } else {
+               /* set page dirty and write it */
+               if (!PageWriteback(node_page))
+                       set_page_dirty(node_page);
+       }
+out_page:
+       unlock_page(node_page);
+release_page:
+       f2fs_put_page(node_page, 0);
+}
+
+static struct page *last_fsync_dnode(struct f2fs_sb_info *sbi, nid_t ino)
  {
         pgoff_t index, end;
         struct pagevec pvec;
-       int step = ino ? 2 : 0;
+       struct page *last_page = NULL;
+
+       pagevec_init(&pvec, 0);
+       index = 0;
+       end = ULONG_MAX;
+
+       while (index <= end) {
+               int i, nr_pages;
+               nr_pages = pagevec_lookup_tag(&pvec, NODE_MAPPING(sbi), &index,
+                               PAGECACHE_TAG_DIRTY,
+                               min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1);
+               if (nr_pages == 0)
+                       break;
+
+               for (i = 0; i < nr_pages; i++) {
+                       struct page *page = pvec.pages[i];
+
+                       if (unlikely(f2fs_cp_error(sbi))) {
+                               f2fs_put_page(last_page, 0);
+                               pagevec_release(&pvec);
+                               return ERR_PTR(-EIO);
+                       }
+
+                       if (!IS_DNODE(page) || !is_cold_node(page))
+                               continue;
+                       if (ino_of_node(page) != ino)
+                               continue;
+
+                       lock_page(page);
+
+                       if (unlikely(page->mapping != NODE_MAPPING(sbi))) {
+continue_unlock:
+                               unlock_page(page);
+                               continue;
+                       }
+                       if (ino_of_node(page) != ino)
+                               goto continue_unlock;
+
+                       if (!PageDirty(page)) {
+                               /* someone wrote it for us */
+                               goto continue_unlock;
+                       }
+
+                       if (last_page)
+                               f2fs_put_page(last_page, 0);
+
+                       get_page(page);
+                       last_page = page;
+                       unlock_page(page);
+               }
+               pagevec_release(&pvec);
+               cond_resched();
+       }
+       return last_page;
+}
+
+int fsync_node_pages(struct f2fs_sb_info *sbi, nid_t ino,
+                       struct writeback_control *wbc, bool atomic)
+{
+       pgoff_t index, end;
+       struct pagevec pvec;
+       int ret = 0;
+       struct page *last_page = NULL;
+       bool marked = false;
+
+       if (atomic) {
+               last_page = last_fsync_dnode(sbi, ino);
+               if (IS_ERR_OR_NULL(last_page))
+                       return PTR_ERR_OR_ZERO(last_page);
+       }
+retry:
+       pagevec_init(&pvec, 0);
+       index = 0;
+       end = ULONG_MAX;
+
+       while (index <= end) {
+               int i, nr_pages;
+               nr_pages = pagevec_lookup_tag(&pvec, NODE_MAPPING(sbi), &index,
+                               PAGECACHE_TAG_DIRTY,
+                               min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1);
+               if (nr_pages == 0)
+                       break;
+
+               for (i = 0; i < nr_pages; i++) {
+                       struct page *page = pvec.pages[i];
+
+                       if (unlikely(f2fs_cp_error(sbi))) {
+                               f2fs_put_page(last_page, 0);
+                               pagevec_release(&pvec);
+                               return -EIO;
+                       }
+
+                       if (!IS_DNODE(page) || !is_cold_node(page))
+                               continue;
+                       if (ino_of_node(page) != ino)
+                               continue;
+
+                       lock_page(page);
+
+                       if (unlikely(page->mapping != NODE_MAPPING(sbi))) {
+continue_unlock:
+                               unlock_page(page);
+                               continue;
+                       }
+                       if (ino_of_node(page) != ino)
+                               goto continue_unlock;
+
+                       if (!PageDirty(page) && page != last_page) {
+                               /* someone wrote it for us */
+                               goto continue_unlock;
+                       }
+
+                       f2fs_wait_on_page_writeback(page, NODE, true);
+                       BUG_ON(PageWriteback(page));
+
+                       if (!atomic || page == last_page) {
+                               set_fsync_mark(page, 1);
+                               if (IS_INODE(page))
+                                       set_dentry_mark(page,
+                                               need_dentry_mark(sbi, ino));
+                               /*  may be written by other thread */
+                               if (!PageDirty(page))
+                                       set_page_dirty(page);
+                       }
+
+                       if (!clear_page_dirty_for_io(page))
+                               goto continue_unlock;
+
+                       ret = NODE_MAPPING(sbi)->a_ops->writepage(page, wbc);
+                       if (ret) {
+                               unlock_page(page);
+                               f2fs_put_page(last_page, 0);
+                               break;
+                       }
+                       if (page == last_page) {
+                               f2fs_put_page(page, 0);
+                               marked = true;
+                               break;
+                       }
+               }
+               pagevec_release(&pvec);
+               cond_resched();
+
+               if (ret || marked)
+                       break;
+       }
+       if (!ret && atomic && !marked) {
+               f2fs_msg(sbi->sb, KERN_DEBUG,
+                       "Retry to write fsync mark: ino=%u, idx=%lx",
+                                       ino, last_page->index);
+               lock_page(last_page);
+               set_page_dirty(last_page);
+               unlock_page(last_page);
+               goto retry;
+       }
+       return ret ? -EIO: 0;
+}
+
+int sync_node_pages(struct f2fs_sb_info *sbi, struct writeback_control *wbc)
+{
+       pgoff_t index, end;
+       struct pagevec pvec;
+       int step = 0;
         int nwritten = 0;
  
         pagevec_init(&pvec, 0);
@@ -1274,15 +1464,8 @@ next_step:
                         if (step == 2 && (!IS_DNODE(page) ||
                                                 !is_cold_node(page)))
                                 continue;
-
-                       /*
-                        * If an fsync mode,
-                        * we should not skip writing node pages.
-                        */
  lock_node:
-                       if (ino && ino_of_node(page) == ino)
-                               lock_page(page);
-                       else if (!trylock_page(page))
+                       if (!trylock_page(page))
                                 continue;
  
                         if (unlikely(page->mapping != NODE_MAPPING(sbi))) {
@@ -1290,8 +1473,6 @@ continue_unlock:
                                 unlock_page(page);
                                 continue;
                         }
-                       if (ino && ino_of_node(page) != ino)
-                               goto continue_unlock;
  
                         if (!PageDirty(page)) {
                                 /* someone wrote it for us */
@@ -1299,7 +1480,7 @@ continue_unlock:
                         }
  
                         /* flush inline_data */
-                       if (!ino && is_inline_node(page)) {
+                       if (is_inline_node(page)) {
                                 clear_inline_node(page);
                                 unlock_page(page);
                                 flush_inline_data(sbi, ino_of_node(page));
@@ -1312,17 +1493,8 @@ continue_unlock:
                         if (!clear_page_dirty_for_io(page))
                                 goto continue_unlock;
  
-                       /* called by fsync() */
-                       if (ino && IS_DNODE(page)) {
-                               set_fsync_mark(page, 1);
-                               if (IS_INODE(page))
-                                       set_dentry_mark(page,
-                                               need_dentry_mark(sbi, ino));
-                               nwritten++;
-                       } else {
-                               set_fsync_mark(page, 0);
-                               set_dentry_mark(page, 0);
-                       }
+                       set_fsync_mark(page, 0);
+                       set_dentry_mark(page, 0);
  
                         if (NODE_MAPPING(sbi)->a_ops->writepage(page, wbc))
                                 unlock_page(page);
@@ -1397,7 +1569,8 @@ static int f2fs_write_node_page(struct page *page,
         struct f2fs_io_info fio = {
                 .sbi = sbi,
                 .type = NODE,
-               .rw = (wbc->sync_mode == WB_SYNC_ALL) ? WRITE_SYNC : WRITE,
+               .op = REQ_OP_WRITE,
+               .op_flags = (wbc->sync_mode == WB_SYNC_ALL) ? WRITE_SYNC : 0,
                 .page = page,
                 .encrypted_page = NULL,
         };
@@ -1470,7 +1643,7 @@ static int f2fs_write_node_pages(struct address_space *mapping,
  
         diff = nr_pages_to_write(sbi, NODE, wbc);
         wbc->sync_mode = WB_SYNC_NONE;
-       sync_node_pages(sbi, 0, wbc);
+       sync_node_pages(sbi, wbc);
         wbc->nr_to_write = max((long)0, wbc->nr_to_write - diff);
         return 0;
  
@@ -1524,7 +1697,6 @@ static int add_free_nid(struct f2fs_sb_info *sbi, nid_t nid, bool build)
         struct f2fs_nm_info *nm_i = NM_I(sbi);
         struct free_nid *i;
         struct nat_entry *ne;
-       bool allocated = false;
  
         if (!available_free_memory(sbi, FREE_NIDS))
                 return -1;
@@ -1538,8 +1710,6 @@ static int add_free_nid(struct f2fs_sb_info *sbi, nid_t nid, bool build)
                 ne = __lookup_nat_cache(nm_i, nid);
                 if (ne && (!get_nat_flag(ne, IS_CHECKPOINTED) ||
                                 nat_get_blkaddr(ne) != NULL_ADDR))
-                       allocated = true;
-               if (allocated)
                         return 0;
         }
  
@@ -1672,6 +1842,10 @@ bool alloc_nid(struct f2fs_sb_info *sbi, nid_t *nid)
         struct f2fs_nm_info *nm_i = NM_I(sbi);
         struct free_nid *i = NULL;
  retry:
+#ifdef CONFIG_F2FS_FAULT_INJECTION
+       if (time_to_inject(FAULT_ALLOC_NID))
+               return false;
+#endif
         if (unlikely(sbi->total_valid_node_count + 1 > nm_i->available_nids))
                 return false;
  
@@ -1846,7 +2020,7 @@ int recover_inode_page(struct f2fs_sb_info *sbi, struct page *page)
         if (unlikely(old_ni.blk_addr != NULL_ADDR))
                 return -EINVAL;
  
-       ipage = grab_cache_page(NODE_MAPPING(sbi), ino);
+       ipage = f2fs_grab_cache_page(NODE_MAPPING(sbi), ino, false);
         if (!ipage)
                 return -ENOMEM;