Merge remote-tracking branch 'f2fs/dev'
authorStephen Rothwell <sfr@canb.auug.org.au>
Thu, 5 Nov 2015 00:01:56 +0000 (11:01 +1100)
committerStephen Rothwell <sfr@canb.auug.org.au>
Thu, 5 Nov 2015 00:01:56 +0000 (11:01 +1100)
21 files changed:
Documentation/ABI/testing/sysfs-fs-f2fs
Documentation/filesystems/f2fs.txt
fs/f2fs/checkpoint.c
fs/f2fs/data.c
fs/f2fs/debug.c
fs/f2fs/dir.c
fs/f2fs/extent_cache.c
fs/f2fs/f2fs.h
fs/f2fs/file.c
fs/f2fs/gc.c
fs/f2fs/gc.h
fs/f2fs/inline.c
fs/f2fs/inode.c
fs/f2fs/namei.c
fs/f2fs/node.c
fs/f2fs/node.h
fs/f2fs/recovery.c
fs/f2fs/segment.c
fs/f2fs/segment.h
fs/f2fs/super.c
include/trace/events/f2fs.h

index 2c4cc42..0345f2d 100644 (file)
@@ -80,3 +80,15 @@ Date:                February 2015
 Contact:       "Jaegeuk Kim" <jaegeuk@kernel.org>
 Description:
                 Controls the trimming rate in batch mode.
+
+What:          /sys/fs/f2fs/<disk>/cp_interval
+Date:          October 2015
+Contact:       "Jaegeuk Kim" <jaegeuk@kernel.org>
+Description:
+                Controls the checkpoint timing.
+
+What:          /sys/fs/f2fs/<disk>/ra_nid_pages
+Date:          October 2015
+Contact:       "Chao Yu" <chao2.yu@samsung.com>
+Description:
+                Controls the count of nid pages to be readaheaded.
index e2d5105..b102b43 100644 (file)
@@ -102,7 +102,8 @@ background_gc=%s       Turn on/off cleaning operations, namely garbage
                        collection, triggered in background when I/O subsystem is
                        idle. If background_gc=on, it will turn on the garbage
                        collection and if background_gc=off, garbage collection
-                       will be truned off.
+                       will be truned off. If background_gc=sync, it will turn
+                       on synchronous garbage collection running in background.
                        Default value for this option is on. So garbage
                        collection is on by default.
 disable_roll_forward   Disable the roll-forward recovery routine
index c5a38e3..f661d80 100644 (file)
@@ -47,7 +47,8 @@ repeat:
 /*
  * We guarantee no failure on the returned page.
  */
-struct page *get_meta_page(struct f2fs_sb_info *sbi, pgoff_t index)
+static struct page *__get_meta_page(struct f2fs_sb_info *sbi, pgoff_t index,
+                                                       bool is_meta)
 {
        struct address_space *mapping = META_MAPPING(sbi);
        struct page *page;
@@ -58,6 +59,9 @@ struct page *get_meta_page(struct f2fs_sb_info *sbi, pgoff_t index)
                .blk_addr = index,
                .encrypted_page = NULL,
        };
+
+       if (unlikely(!is_meta))
+               fio.rw &= ~REQ_META;
 repeat:
        page = grab_cache_page(mapping, index);
        if (!page) {
@@ -91,6 +95,17 @@ out:
        return page;
 }
 
+struct page *get_meta_page(struct f2fs_sb_info *sbi, pgoff_t index)
+{
+       return __get_meta_page(sbi, index, true);
+}
+
+/* for POR only */
+struct page *get_tmp_page(struct f2fs_sb_info *sbi, pgoff_t index)
+{
+       return __get_meta_page(sbi, index, false);
+}
+
 bool is_valid_blkaddr(struct f2fs_sb_info *sbi, block_t blkaddr, int type)
 {
        switch (type) {
@@ -125,7 +140,8 @@ bool is_valid_blkaddr(struct f2fs_sb_info *sbi, block_t blkaddr, int type)
 /*
  * Readahead CP/NAT/SIT/SSA pages
  */
-int ra_meta_pages(struct f2fs_sb_info *sbi, block_t start, int nrpages, int type)
+int ra_meta_pages(struct f2fs_sb_info *sbi, block_t start, int nrpages,
+                                                       int type, bool sync)
 {
        block_t prev_blk_addr = 0;
        struct page *page;
@@ -133,10 +149,13 @@ int ra_meta_pages(struct f2fs_sb_info *sbi, block_t start, int nrpages, int type
        struct f2fs_io_info fio = {
                .sbi = sbi,
                .type = META,
-               .rw = READ_SYNC | REQ_META | REQ_PRIO,
+               .rw = sync ? (READ_SYNC | REQ_META | REQ_PRIO) : READA,
                .encrypted_page = NULL,
        };
 
+       if (unlikely(type == META_POR))
+               fio.rw &= ~REQ_META;
+
        for (; nrpages-- > 0; blkno++) {
 
                if (!is_valid_blkaddr(sbi, blkno, type))
@@ -196,7 +215,7 @@ void ra_meta_pages_cond(struct f2fs_sb_info *sbi, pgoff_t index)
        f2fs_put_page(page, 0);
 
        if (readahead)
-               ra_meta_pages(sbi, index, MAX_BIO_BLOCKS(sbi), META_POR);
+               ra_meta_pages(sbi, index, MAX_BIO_BLOCKS(sbi), META_POR, true);
 }
 
 static int f2fs_write_meta_page(struct page *page,
@@ -257,7 +276,7 @@ long sync_meta_pages(struct f2fs_sb_info *sbi, enum page_type type,
                                                long nr_to_write)
 {
        struct address_space *mapping = META_MAPPING(sbi);
-       pgoff_t index = 0, end = LONG_MAX;
+       pgoff_t index = 0, end = LONG_MAX, prev = LONG_MAX;
        struct pagevec pvec;
        long nwritten = 0;
        struct writeback_control wbc = {
@@ -277,6 +296,13 @@ long sync_meta_pages(struct f2fs_sb_info *sbi, enum page_type type,
                for (i = 0; i < nr_pages; i++) {
                        struct page *page = pvec.pages[i];
 
+                       if (prev == LONG_MAX)
+                               prev = page->index - 1;
+                       if (nr_to_write != LONG_MAX && page->index != prev + 1) {
+                               pagevec_release(&pvec);
+                               goto stop;
+                       }
+
                        lock_page(page);
 
                        if (unlikely(page->mapping != mapping)) {
@@ -297,13 +323,14 @@ continue_unlock:
                                break;
                        }
                        nwritten++;
+                       prev = page->index;
                        if (unlikely(nwritten >= nr_to_write))
                                break;
                }
                pagevec_release(&pvec);
                cond_resched();
        }
-
+stop:
        if (nwritten)
                f2fs_submit_merged_bio(sbi, type, WRITE);
 
@@ -495,7 +522,7 @@ int recover_orphan_inodes(struct f2fs_sb_info *sbi)
        start_blk = __start_cp_addr(sbi) + 1 + __cp_payload(sbi);
        orphan_blocks = __start_sum_addr(sbi) - 1 - __cp_payload(sbi);
 
-       ra_meta_pages(sbi, start_blk, orphan_blocks, META_CP);
+       ra_meta_pages(sbi, start_blk, orphan_blocks, META_CP, true);
 
        for (i = 0; i < orphan_blocks; i++) {
                struct page *page = get_meta_page(sbi, start_blk + i);
@@ -1000,6 +1027,11 @@ static void do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
 
        start_blk = __start_cp_addr(sbi);
 
+       /* need to wait for end_io results */
+       wait_on_all_pages_writeback(sbi);
+       if (unlikely(f2fs_cp_error(sbi)))
+               return;
+
        /* write out checkpoint buffer at block 0 */
        update_meta_page(sbi, ckpt, start_blk++);
 
@@ -1109,6 +1141,9 @@ void write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
        if (cpc->reason == CP_RECOVERY)
                f2fs_msg(sbi->sb, KERN_NOTICE,
                        "checkpoint: version = %llx", ckpt_ver);
+
+       /* do checkpoint periodically */
+       sbi->cp_expires = round_jiffies_up(jiffies + HZ * sbi->cp_interval);
 out:
        mutex_unlock(&sbi->cp_mutex);
        trace_f2fs_write_checkpoint(sbi->sb, cpc->reason, "finish checkpoint");
index a82abe9..972eab7 100644 (file)
@@ -275,7 +275,8 @@ int f2fs_get_block(struct dnode_of_data *dn, pgoff_t index)
        return f2fs_reserve_block(dn, index);
 }
 
-struct page *get_read_data_page(struct inode *inode, pgoff_t index, int rw)
+struct page *get_read_data_page(struct inode *inode, pgoff_t index,
+                                               int rw, bool for_write)
 {
        struct address_space *mapping = inode->i_mapping;
        struct dnode_of_data dn;
@@ -292,7 +293,7 @@ struct page *get_read_data_page(struct inode *inode, pgoff_t index, int rw)
        if (f2fs_encrypted_inode(inode) && S_ISREG(inode->i_mode))
                return read_mapping_page(mapping, index, NULL);
 
-       page = grab_cache_page(mapping, index);
+       page = f2fs_grab_cache_page(mapping, index, for_write);
        if (!page)
                return ERR_PTR(-ENOMEM);
 
@@ -352,7 +353,7 @@ struct page *find_data_page(struct inode *inode, pgoff_t index)
                return page;
        f2fs_put_page(page, 0);
 
-       page = get_read_data_page(inode, index, READ_SYNC);
+       page = get_read_data_page(inode, index, READ_SYNC, false);
        if (IS_ERR(page))
                return page;
 
@@ -372,12 +373,13 @@ struct page *find_data_page(struct inode *inode, pgoff_t index)
  * Because, the callers, functions in dir.c and GC, should be able to know
  * whether this page exists or not.
  */
-struct page *get_lock_data_page(struct inode *inode, pgoff_t index)
+struct page *get_lock_data_page(struct inode *inode, pgoff_t index,
+                                                       bool for_write)
 {
        struct address_space *mapping = inode->i_mapping;
        struct page *page;
 repeat:
-       page = get_read_data_page(inode, index, READ_SYNC);
+       page = get_read_data_page(inode, index, READ_SYNC, for_write);
        if (IS_ERR(page))
                return page;
 
@@ -411,7 +413,7 @@ struct page *get_new_data_page(struct inode *inode,
        struct dnode_of_data dn;
        int err;
 repeat:
-       page = grab_cache_page(mapping, index);
+       page = f2fs_grab_cache_page(mapping, index, true);
        if (!page) {
                /*
                 * before exiting, we should make sure ipage will be released
@@ -439,7 +441,7 @@ repeat:
        } else {
                f2fs_put_page(page, 1);
 
-               page = get_read_data_page(inode, index, READ_SYNC);
+               page = get_read_data_page(inode, index, READ_SYNC, true);
                if (IS_ERR(page))
                        goto repeat;
 
@@ -447,9 +449,9 @@ repeat:
                lock_page(page);
        }
 got_it:
-       if (new_i_size &&
-               i_size_read(inode) < ((index + 1) << PAGE_CACHE_SHIFT)) {
-               i_size_write(inode, ((index + 1) << PAGE_CACHE_SHIFT));
+       if (new_i_size && i_size_read(inode) <
+                               ((loff_t)(index + 1) << PAGE_CACHE_SHIFT)) {
+               i_size_write(inode, ((loff_t)(index + 1) << PAGE_CACHE_SHIFT));
                /* Only the directory inode sets new_i_size */
                set_inode_flag(F2FS_I(inode), FI_UPDATE_DIR);
        }
@@ -489,8 +491,9 @@ alloc:
        /* update i_size */
        fofs = start_bidx_of_node(ofs_of_node(dn->node_page), fi) +
                                                        dn->ofs_in_node;
-       if (i_size_read(dn->inode) < ((fofs + 1) << PAGE_CACHE_SHIFT))
-               i_size_write(dn->inode, ((fofs + 1) << PAGE_CACHE_SHIFT));
+       if (i_size_read(dn->inode) < ((loff_t)(fofs + 1) << PAGE_CACHE_SHIFT))
+               i_size_write(dn->inode,
+                               ((loff_t)(fofs + 1) << PAGE_CACHE_SHIFT));
 
        /* direct IO doesn't use extent cache to maximize the performance */
        f2fs_drop_largest_extent(dn->inode, fofs);
@@ -523,6 +526,9 @@ static void __allocate_data_blocks(struct inode *inode, loff_t offset,
                while (dn.ofs_in_node < end_offset && len) {
                        block_t blkaddr;
 
+                       if (unlikely(f2fs_cp_error(sbi)))
+                               goto sync_out;
+
                        blkaddr = datablock_addr(dn.node_page, dn.ofs_in_node);
                        if (blkaddr == NULL_ADDR || blkaddr == NEW_ADDR) {
                                if (__allocate_data_block(&dn))
@@ -565,6 +571,7 @@ static int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map,
 {
        unsigned int maxblocks = map->m_len;
        struct dnode_of_data dn;
+       struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
        int mode = create ? ALLOC_NODE : LOOKUP_NODE_RA;
        pgoff_t pgofs, end_offset;
        int err = 0, ofs = 1;
@@ -595,40 +602,40 @@ static int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map,
                        err = 0;
                goto unlock_out;
        }
-       if (dn.data_blkaddr == NEW_ADDR) {
-               if (flag == F2FS_GET_BLOCK_BMAP) {
-                       err = -ENOENT;
-                       goto put_out;
-               } else if (flag == F2FS_GET_BLOCK_READ ||
-                               flag == F2FS_GET_BLOCK_DIO) {
-                       goto put_out;
+
+       if (dn.data_blkaddr == NEW_ADDR || dn.data_blkaddr == NULL_ADDR) {
+               if (create) {
+                       if (unlikely(f2fs_cp_error(sbi))) {
+                               err = -EIO;
+                               goto put_out;
+                       }
+                       err = __allocate_data_block(&dn);
+                       if (err)
+                               goto put_out;
+                       allocated = true;
+                       map->m_flags = F2FS_MAP_NEW;
+               } else {
+                       if (flag != F2FS_GET_BLOCK_FIEMAP ||
+                                               dn.data_blkaddr != NEW_ADDR) {
+                               if (flag == F2FS_GET_BLOCK_BMAP)
+                                       err = -ENOENT;
+                               goto put_out;
+                       }
+
+                       /*
+                        * preallocated unwritten block should be mapped
+                        * for fiemap.
+                        */
+                       if (dn.data_blkaddr == NEW_ADDR)
+                               map->m_flags = F2FS_MAP_UNWRITTEN;
                }
-               /*
-                * if it is in fiemap call path (flag = F2FS_GET_BLOCK_FIEMAP),
-                * mark it as mapped and unwritten block.
-                */
        }
 
-       if (dn.data_blkaddr != NULL_ADDR) {
-               map->m_flags = F2FS_MAP_MAPPED;
-               map->m_pblk = dn.data_blkaddr;
-               if (dn.data_blkaddr == NEW_ADDR)
-                       map->m_flags |= F2FS_MAP_UNWRITTEN;
-       } else if (create) {
-               err = __allocate_data_block(&dn);
-               if (err)
-                       goto put_out;
-               allocated = true;
-               map->m_flags = F2FS_MAP_NEW | F2FS_MAP_MAPPED;
-               map->m_pblk = dn.data_blkaddr;
-       } else {
-               if (flag == F2FS_GET_BLOCK_BMAP)
-                       err = -ENOENT;
-               goto put_out;
-       }
+       map->m_flags |= F2FS_MAP_MAPPED;
+       map->m_pblk = dn.data_blkaddr;
+       map->m_len = 1;
 
        end_offset = ADDRS_PER_PAGE(dn.node_page, F2FS_I(inode));
-       map->m_len = 1;
        dn.ofs_in_node++;
        pgofs++;
 
@@ -647,23 +654,35 @@ get_next:
                        goto unlock_out;
                }
 
-               if (dn.data_blkaddr == NEW_ADDR &&
-                               flag != F2FS_GET_BLOCK_FIEMAP)
-                       goto put_out;
-
                end_offset = ADDRS_PER_PAGE(dn.node_page, F2FS_I(inode));
        }
 
        if (maxblocks > map->m_len) {
                block_t blkaddr = datablock_addr(dn.node_page, dn.ofs_in_node);
-               if (blkaddr == NULL_ADDR && create) {
-                       err = __allocate_data_block(&dn);
-                       if (err)
-                               goto sync_out;
-                       allocated = true;
-                       map->m_flags |= F2FS_MAP_NEW;
-                       blkaddr = dn.data_blkaddr;
+
+               if (blkaddr == NEW_ADDR || blkaddr == NULL_ADDR) {
+                       if (create) {
+                               if (unlikely(f2fs_cp_error(sbi))) {
+                                       err = -EIO;
+                                       goto sync_out;
+                               }
+                               err = __allocate_data_block(&dn);
+                               if (err)
+                                       goto sync_out;
+                               allocated = true;
+                               map->m_flags |= F2FS_MAP_NEW;
+                               blkaddr = dn.data_blkaddr;
+                       } else {
+                               /*
+                                * we only merge preallocated unwritten blocks
+                                * for fiemap.
+                                */
+                               if (flag != F2FS_GET_BLOCK_FIEMAP ||
+                                               blkaddr != NEW_ADDR)
+                                       goto sync_out;
+                       }
                }
+
                /* Give more consecutive addresses for the readahead */
                if ((map->m_pblk != NEW_ADDR &&
                                blkaddr == (map->m_pblk + ofs)) ||
@@ -752,6 +771,12 @@ int f2fs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
        if (ret)
                return ret;
 
+       if (f2fs_has_inline_data(inode)) {
+               ret = f2fs_inline_data_fiemap(inode, fieinfo, start, len);
+               if (ret != -EAGAIN)
+                       return ret;
+       }
+
        mutex_lock(&inode->i_mutex);
 
        if (len >= isize) {
@@ -903,7 +928,8 @@ static int f2fs_mpage_readpages(struct address_space *mapping,
                        map.m_lblk = block_in_file;
                        map.m_len = last_block - block_in_file;
 
-                       if (f2fs_map_blocks(inode, &map, 0, false))
+                       if (f2fs_map_blocks(inode, &map, 0,
+                                                       F2FS_GET_BLOCK_READ))
                                goto set_error_page;
                }
 got_it:
@@ -936,21 +962,14 @@ submit_and_realloc:
 
                        if (f2fs_encrypted_inode(inode) &&
                                        S_ISREG(inode->i_mode)) {
-                               struct page *cpage;
 
                                ctx = f2fs_get_crypto_ctx(inode);
                                if (IS_ERR(ctx))
                                        goto set_error_page;
 
                                /* wait the page to be moved by cleaning */
-                               cpage = find_lock_page(
-                                               META_MAPPING(F2FS_I_SB(inode)),
-                                               block_nr);
-                               if (cpage) {
-                                       f2fs_wait_on_page_writeback(cpage,
-                                                                       DATA);
-                                       f2fs_put_page(cpage, 1);
-                               }
+                               f2fs_wait_on_encrypted_page_writeback(
+                                               F2FS_I_SB(inode), block_nr);
                        }
 
                        bio = bio_alloc(GFP_KERNEL,
@@ -1012,6 +1031,9 @@ static int f2fs_read_data_pages(struct file *file,
                        struct list_head *pages, unsigned nr_pages)
 {
        struct inode *inode = file->f_mapping->host;
+       struct page *page = list_entry(pages->prev, struct page, lru);
+
+       trace_f2fs_readpages(inode, page, nr_pages);
 
        /* If the file has inline data, skip readpages */
        if (f2fs_has_inline_data(inode))
@@ -1041,6 +1063,11 @@ int do_write_data_page(struct f2fs_io_info *fio)
        }
 
        if (f2fs_encrypted_inode(inode) && S_ISREG(inode->i_mode)) {
+
+               /* wait for GCed encrypted page writeback */
+               f2fs_wait_on_encrypted_page_writeback(F2FS_I_SB(inode),
+                                                       fio->blk_addr);
+
                fio->encrypted_page = f2fs_encrypt(inode, fio->page);
                if (IS_ERR(fio->encrypted_page)) {
                        err = PTR_ERR(fio->encrypted_page);
@@ -1429,6 +1456,10 @@ put_next:
 
        f2fs_wait_on_page_writeback(page, DATA);
 
+       /* wait for GCed encrypted page writeback */
+       if (f2fs_encrypted_inode(inode) && S_ISREG(inode->i_mode))
+               f2fs_wait_on_encrypted_page_writeback(sbi, dn.data_blkaddr);
+
        if (len == PAGE_CACHE_SIZE)
                goto out_update;
        if (PageUptodate(page))
@@ -1551,10 +1582,16 @@ static ssize_t f2fs_direct_IO(struct kiocb *iocb, struct iov_iter *iter,
 
        trace_f2fs_direct_IO_enter(inode, offset, count, iov_iter_rw(iter));
 
-       if (iov_iter_rw(iter) == WRITE)
+       if (iov_iter_rw(iter) == WRITE) {
                __allocate_data_blocks(inode, offset, count);
+               if (unlikely(f2fs_cp_error(F2FS_I_SB(inode)))) {
+                       err = -EIO;
+                       goto out;
+               }
+       }
 
        err = blockdev_direct_IO(iocb, inode, iter, offset, get_data_block_dio);
+out:
        if (err < 0 && iov_iter_rw(iter) == WRITE)
                f2fs_write_failed(mapping, offset + count);
 
@@ -1636,12 +1673,13 @@ static sector_t f2fs_bmap(struct address_space *mapping, sector_t block)
 {
        struct inode *inode = mapping->host;
 
-       /* we don't need to use inline_data strictly */
-       if (f2fs_has_inline_data(inode)) {
-               int err = f2fs_convert_inline_inode(inode);
-               if (err)
-                       return err;
-       }
+       if (f2fs_has_inline_data(inode))
+               return 0;
+
+       /* make sure allocating whole blocks */
+       if (mapping_tagged(mapping, PAGECACHE_TAG_DIRTY))
+               filemap_write_and_wait(mapping);
+
        return generic_block_bmap(mapping, block, get_data_block_bmap);
 }
 
index d013d84..478e5d5 100644 (file)
@@ -33,11 +33,11 @@ static void update_general_status(struct f2fs_sb_info *sbi)
        int i;
 
        /* validation check of the segment numbers */
-       si->hit_largest = atomic_read(&sbi->read_hit_largest);
-       si->hit_cached = atomic_read(&sbi->read_hit_cached);
-       si->hit_rbtree = atomic_read(&sbi->read_hit_rbtree);
+       si->hit_largest = atomic64_read(&sbi->read_hit_largest);
+       si->hit_cached = atomic64_read(&sbi->read_hit_cached);
+       si->hit_rbtree = atomic64_read(&sbi->read_hit_rbtree);
        si->hit_total = si->hit_largest + si->hit_cached + si->hit_rbtree;
-       si->total_ext = atomic_read(&sbi->total_hit_ext);
+       si->total_ext = atomic64_read(&sbi->total_hit_ext);
        si->ext_tree = sbi->total_ext_tree;
        si->ext_node = atomic_read(&sbi->total_ext_node);
        si->ndirty_node = get_pages(sbi, F2FS_DIRTY_NODES);
@@ -118,7 +118,7 @@ static void update_sit_info(struct f2fs_sb_info *sbi)
                }
        }
        dist = div_u64(MAIN_SECS(sbi) * hblks_per_sec * hblks_per_sec, 100);
-       si->bimodal = div_u64(bimodal, dist);
+       si->bimodal = div64_u64(bimodal, dist);
        if (si->dirty_count)
                si->avg_vblocks = div_u64(total_vblocks, ndirty);
        else
@@ -198,9 +198,9 @@ get_cache:
 
        si->page_mem = 0;
        npages = NODE_MAPPING(sbi)->nrpages;
-       si->page_mem += npages << PAGE_CACHE_SHIFT;
+       si->page_mem += (unsigned long long)npages << PAGE_CACHE_SHIFT;
        npages = META_MAPPING(sbi)->nrpages;
-       si->page_mem += npages << PAGE_CACHE_SHIFT;
+       si->page_mem += (unsigned long long)npages << PAGE_CACHE_SHIFT;
 }
 
 static int stat_show(struct seq_file *s, void *v)
@@ -283,12 +283,12 @@ static int stat_show(struct seq_file *s, void *v)
                seq_printf(s, "  - node blocks : %d (%d)\n", si->node_blks,
                                si->bg_node_blks);
                seq_puts(s, "\nExtent Cache:\n");
-               seq_printf(s, "  - Hit Count: L1-1:%d L1-2:%d L2:%d\n",
+               seq_printf(s, "  - Hit Count: L1-1:%llu L1-2:%llu L2:%llu\n",
                                si->hit_largest, si->hit_cached,
                                si->hit_rbtree);
-               seq_printf(s, "  - Hit Ratio: %d%% (%d / %d)\n",
+               seq_printf(s, "  - Hit Ratio: %llu%% (%llu / %llu)\n",
                                !si->total_ext ? 0 :
-                               (si->hit_total * 100) / si->total_ext,
+                               div64_u64(si->hit_total * 100, si->total_ext),
                                si->hit_total, si->total_ext);
                seq_printf(s, "  - Inner Struct Count: tree: %d, node: %d\n",
                                si->ext_tree, si->ext_node);
@@ -333,13 +333,13 @@ static int stat_show(struct seq_file *s, void *v)
 
                /* memory footprint */
                update_mem_info(si->sbi);
-               seq_printf(s, "\nMemory: %u KB\n",
+               seq_printf(s, "\nMemory: %llu KB\n",
                        (si->base_mem + si->cache_mem + si->page_mem) >> 10);
-               seq_printf(s, "  - static: %u KB\n",
+               seq_printf(s, "  - static: %llu KB\n",
                                si->base_mem >> 10);
-               seq_printf(s, "  - cached: %u KB\n",
+               seq_printf(s, "  - cached: %llu KB\n",
                                si->cache_mem >> 10);
-               seq_printf(s, "  - paged : %u KB\n",
+               seq_printf(s, "  - paged : %llu KB\n",
                                si->page_mem >> 10);
        }
        mutex_unlock(&f2fs_stat_mutex);
@@ -378,10 +378,10 @@ int f2fs_build_stats(struct f2fs_sb_info *sbi)
        si->sbi = sbi;
        sbi->stat_info = si;
 
-       atomic_set(&sbi->total_hit_ext, 0);
-       atomic_set(&sbi->read_hit_rbtree, 0);
-       atomic_set(&sbi->read_hit_largest, 0);
-       atomic_set(&sbi->read_hit_cached, 0);
+       atomic64_set(&sbi->total_hit_ext, 0);
+       atomic64_set(&sbi->read_hit_rbtree, 0);
+       atomic64_set(&sbi->read_hit_largest, 0);
+       atomic64_set(&sbi->read_hit_cached, 0);
 
        atomic_set(&sbi->inline_xattr, 0);
        atomic_set(&sbi->inline_inode, 0);
index 8f15fc1..7c1678b 100644 (file)
@@ -258,7 +258,7 @@ struct f2fs_dir_entry *f2fs_parent_dir(struct inode *dir, struct page **p)
        if (f2fs_has_inline_dentry(dir))
                return f2fs_parent_inline_dir(dir, p);
 
-       page = get_lock_data_page(dir, 0);
+       page = get_lock_data_page(dir, 0, false);
        if (IS_ERR(page))
                return NULL;
 
@@ -740,7 +740,7 @@ bool f2fs_empty_dir(struct inode *dir)
                return f2fs_empty_inline_dir(dir);
 
        for (bidx = 0; bidx < nblock; bidx++) {
-               dentry_page = get_lock_data_page(dir, bidx);
+               dentry_page = get_lock_data_page(dir, bidx, false);
                if (IS_ERR(dentry_page)) {
                        if (PTR_ERR(dentry_page) == -ENOENT)
                                continue;
@@ -787,7 +787,6 @@ bool f2fs_fill_dentries(struct dir_context *ctx, struct f2fs_dentry_ptr *d,
                else
                        d_type = DT_UNKNOWN;
 
-               /* encrypted case */
                de_name.name = d->filename[bit_pos];
                de_name.len = le16_to_cpu(de->name_len);
 
@@ -795,12 +794,20 @@ bool f2fs_fill_dentries(struct dir_context *ctx, struct f2fs_dentry_ptr *d,
                        int save_len = fstr->len;
                        int ret;
 
+                       de_name.name = kmalloc(de_name.len, GFP_NOFS);
+                       if (!de_name.name)
+                               return false;
+
+                       memcpy(de_name.name, d->filename[bit_pos], de_name.len);
+
                        ret = f2fs_fname_disk_to_usr(d->inode, &de->hash_code,
                                                        &de_name, fstr);
-                       de_name = *fstr;
-                       fstr->len = save_len;
+                       kfree(de_name.name);
                        if (ret < 0)
                                return true;
+
+                       de_name = *fstr;
+                       fstr->len = save_len;
                }
 
                if (!dir_emit(ctx, de_name.name, de_name.len,
@@ -847,7 +854,7 @@ static int f2fs_readdir(struct file *file, struct dir_context *ctx)
                                min(npages - n, (pgoff_t)MAX_DIR_RA_PAGES));
 
        for (; n < npages; n++) {
-               dentry_page = get_lock_data_page(inode, n);
+               dentry_page = get_lock_data_page(inode, n, false);
                if (IS_ERR(dentry_page))
                        continue;
 
index 997ac86..7ddba81 100644 (file)
@@ -155,11 +155,12 @@ static unsigned int __free_extent_tree(struct f2fs_sb_info *sbi,
        return count - et->count;
 }
 
-static void __drop_largest_extent(struct inode *inode, pgoff_t fofs)
+static void __drop_largest_extent(struct inode *inode,
+                                       pgoff_t fofs, unsigned int len)
 {
        struct extent_info *largest = &F2FS_I(inode)->extent_tree->largest;
 
-       if (largest->fofs <= fofs && largest->fofs + largest->len > fofs)
+       if (fofs < largest->fofs + largest->len && fofs + len > largest->fofs)
                largest->len = 0;
 }
 
@@ -168,7 +169,7 @@ void f2fs_drop_largest_extent(struct inode *inode, pgoff_t fofs)
        if (!f2fs_may_extent_tree(inode))
                return;
 
-       __drop_largest_extent(inode, fofs);
+       __drop_largest_extent(inode, fofs, 1);
 }
 
 void f2fs_init_extent_tree(struct inode *inode, struct f2fs_extent *i_ext)
@@ -350,8 +351,7 @@ static struct extent_node *__try_merge_extent_node(struct f2fs_sb_info *sbi,
        }
 
        if (en) {
-               if (en->ei.len > et->largest.len)
-                       et->largest = en->ei;
+               __try_update_largest_extent(et, en);
                et->cached_en = en;
        }
        return en;
@@ -388,18 +388,17 @@ do_insert:
        if (!en)
                return NULL;
 
-       if (en->ei.len > et->largest.len)
-               et->largest = en->ei;
+       __try_update_largest_extent(et, en);
        et->cached_en = en;
        return en;
 }
 
-unsigned int f2fs_update_extent_tree_range(struct inode *inode,
+static unsigned int f2fs_update_extent_tree_range(struct inode *inode,
                                pgoff_t fofs, block_t blkaddr, unsigned int len)
 {
        struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
        struct extent_tree *et = F2FS_I(inode)->extent_tree;
-       struct extent_node *en = NULL, *en1 = NULL, *en2 = NULL, *en3 = NULL;
+       struct extent_node *en = NULL, *en1 = NULL;
        struct extent_node *prev_en = NULL, *next_en = NULL;
        struct extent_info ei, dei, prev;
        struct rb_node **insert_p = NULL, *insert_parent = NULL;
@@ -409,6 +408,8 @@ unsigned int f2fs_update_extent_tree_range(struct inode *inode,
        if (!et)
                return false;
 
+       trace_f2fs_update_extent_tree_range(inode, fofs, blkaddr, len);
+
        write_lock(&et->lock);
 
        if (is_inode_flag_set(F2FS_I(inode), FI_NO_EXTENT)) {
@@ -419,148 +420,99 @@ unsigned int f2fs_update_extent_tree_range(struct inode *inode,
        prev = et->largest;
        dei.len = 0;
 
-       /* we do not guarantee that the largest extent is cached all the time */
-       __drop_largest_extent(inode, fofs);
+       /*
+        * drop largest extent before lookup, in case it's already
+        * been shrunk from extent tree
+        */
+       __drop_largest_extent(inode, fofs, len);
 
        /* 1. lookup first extent node in range [fofs, fofs + len - 1] */
        en = __lookup_extent_tree_ret(et, fofs, &prev_en, &next_en,
                                        &insert_p, &insert_parent);
-       if (!en) {
-               if (next_en) {
-                       en = next_en;
-                       f2fs_bug_on(sbi, en->ei.fofs <= pos);
-                       pos = en->ei.fofs;
-               } else {
-                       /*
-                        * skip searching in the tree since there is no
-                        * larger extent node in the cache.
-                        */
-                       goto update_extent;
-               }
-       }
+       if (!en)
+               en = next_en;
 
        /* 2. invlidate all extent nodes in range [fofs, fofs + len - 1] */
-       while (en) {
-               struct rb_node *node;
+       while (en && en->ei.fofs < end) {
+               unsigned int org_end;
+               int parts = 0;  /* # of parts current extent split into */
 
-               if (pos >= end)
-                       break;
+               next_en = en1 = NULL;
 
                dei = en->ei;
-               en1 = en2 = NULL;
+               org_end = dei.fofs + dei.len;
+               f2fs_bug_on(sbi, pos >= org_end);
 
-               node = rb_next(&en->rb_node);
+               if (pos > dei.fofs &&   pos - dei.fofs >= F2FS_MIN_EXTENT_LEN) {
+                       en->ei.len = pos - en->ei.fofs;
+                       prev_en = en;
+                       parts = 1;
+               }
 
-               /*
-                * 2.1 there are four cases when we invalidate blkaddr in extent
-                * node, |V: valid address, X: will be invalidated|
-                */
-               /* case#1, invalidate right part of extent node |VVVVVXXXXX| */
-               if (pos > dei.fofs && end >= dei.fofs + dei.len) {
-                       en->ei.len = pos - dei.fofs;
-
-                       if (en->ei.len < F2FS_MIN_EXTENT_LEN) {
-                               __detach_extent_node(sbi, et, en);
-                               insert_p = NULL;
-                               insert_parent = NULL;
-                               goto update;
+               if (end < org_end && org_end - end >= F2FS_MIN_EXTENT_LEN) {
+                       if (parts) {
+                               set_extent_info(&ei, end,
+                                               end - dei.fofs + dei.blk,
+                                               org_end - end);
+                               en1 = __insert_extent_tree(sbi, et, &ei,
+                                                       NULL, NULL);
+                               next_en = en1;
+                       } else {
+                               en->ei.fofs = end;
+                               en->ei.blk += end - dei.fofs;
+                               en->ei.len -= end - dei.fofs;
+                               next_en = en;
                        }
-
-                       if (__is_extent_same(&dei, &et->largest))
-                               et->largest = en->ei;
-                       goto next;
+                       parts++;
                }
 
-               /* case#2, invalidate left part of extent node |XXXXXVVVVV| */
-               if (pos <= dei.fofs && end < dei.fofs + dei.len) {
-                       en->ei.fofs = end;
-                       en->ei.blk += end - dei.fofs;
-                       en->ei.len -= end - dei.fofs;
-
-                       if (en->ei.len < F2FS_MIN_EXTENT_LEN) {
-                               __detach_extent_node(sbi, et, en);
-                               insert_p = NULL;
-                               insert_parent = NULL;
-                               goto update;
-                       }
+               if (!next_en) {
+                       struct rb_node *node = rb_next(&en->rb_node);
 
-                       if (__is_extent_same(&dei, &et->largest))
-                               et->largest = en->ei;
-                       goto next;
+                       next_en = node ?
+                               rb_entry(node, struct extent_node, rb_node)
+                               : NULL;
                }
 
-               __detach_extent_node(sbi, et, en);
+               if (parts)
+                       __try_update_largest_extent(et, en);
+               else
+                       __detach_extent_node(sbi, et, en);
 
                /*
-                * if we remove node in rb-tree, our parent node pointer may
-                * point the wrong place, discard them.
+                * if original extent is split into zero or two parts, extent
+                * tree has been altered by deletion or insertion, therefore
+                * invalidate pointers regard to tree.
                 */
-               insert_p = NULL;
-               insert_parent = NULL;
-
-               /* case#3, invalidate entire extent node |XXXXXXXXXX| */
-               if (pos <= dei.fofs && end >= dei.fofs + dei.len) {
-                       if (__is_extent_same(&dei, &et->largest))
-                               et->largest.len = 0;
-                       goto update;
+               if (parts != 1) {
+                       insert_p = NULL;
+                       insert_parent = NULL;
                }
 
-               /*
-                * case#4, invalidate data in the middle of extent node
-                * |VVVXXXXVVV|
-                */
-               if (dei.len > F2FS_MIN_EXTENT_LEN) {
-                       unsigned int endofs;
-
-                       /*  insert left part of split extent into cache */
-                       if (pos - dei.fofs >= F2FS_MIN_EXTENT_LEN) {
-                               set_extent_info(&ei, dei.fofs, dei.blk,
-                                                       pos - dei.fofs);
-                               en1 = __insert_extent_tree(sbi, et, &ei,
-                                                               NULL, NULL);
-                       }
-
-                       /* insert right part of split extent into cache */
-                       endofs = dei.fofs + dei.len;
-                       if (endofs - end >= F2FS_MIN_EXTENT_LEN) {
-                               set_extent_info(&ei, end,
-                                               end - dei.fofs + dei.blk,
-                                               endofs - end);
-                               en2 = __insert_extent_tree(sbi, et, &ei,
-                                                               NULL, NULL);
-                       }
-               }
-update:
-               /* 2.2 update in global extent list */
+               /* update in global extent list */
                spin_lock(&sbi->extent_lock);
-               if (en && !list_empty(&en->list))
+               if (!parts && !list_empty(&en->list))
                        list_del(&en->list);
                if (en1)
                        list_add_tail(&en1->list, &sbi->extent_list);
-               if (en2)
-                       list_add_tail(&en2->list, &sbi->extent_list);
                spin_unlock(&sbi->extent_lock);
 
-               /* 2.3 release extent node */
-               if (en)
+               /* release extent node */
+               if (!parts)
                        kmem_cache_free(extent_node_slab, en);
-next:
-               en = node ? rb_entry(node, struct extent_node, rb_node) : NULL;
-               next_en = en;
-               if (en)
-                       pos = en->ei.fofs;
+
+               en = next_en;
        }
 
-update_extent:
        /* 3. update extent in extent cache */
        if (blkaddr) {
                struct extent_node *den = NULL;
 
                set_extent_info(&ei, fofs, blkaddr, len);
-               en3 = __try_merge_extent_node(sbi, et, &ei, &den,
+               en1 = __try_merge_extent_node(sbi, et, &ei, &den,
                                                        prev_en, next_en);
-               if (!en3)
-                       en3 = __insert_extent_tree(sbi, et, &ei,
+               if (!en1)
+                       en1 = __insert_extent_tree(sbi, et, &ei,
                                                insert_p, insert_parent);
 
                /* give up extent_cache, if split and small updates happen */
@@ -572,11 +524,11 @@ update_extent:
                }
 
                spin_lock(&sbi->extent_lock);
-               if (en3) {
-                       if (list_empty(&en3->list))
-                               list_add_tail(&en3->list, &sbi->extent_list);
+               if (en1) {
+                       if (list_empty(&en1->list))
+                               list_add_tail(&en1->list, &sbi->extent_list);
                        else
-                               list_move_tail(&en3->list, &sbi->extent_list);
+                               list_move_tail(&en1->list, &sbi->extent_list);
                }
                if (den && !list_empty(&den->list))
                        list_del(&den->list);
@@ -650,6 +602,11 @@ unsigned int f2fs_shrink_extent_tree(struct f2fs_sb_info *sbi, int nr_shrink)
        }
        spin_unlock(&sbi->extent_lock);
 
+       /*
+        * reset ino for searching victims from beginning of global extent tree.
+        */
+       ino = F2FS_ROOT_INO(sbi);
+
        while ((found = radix_tree_gang_lookup(root,
                                (void **)treevec, ino, EXT_TREE_VEC_SIZE))) {
                unsigned i;
@@ -663,7 +620,7 @@ unsigned int f2fs_shrink_extent_tree(struct f2fs_sb_info *sbi, int nr_shrink)
                        write_unlock(&et->lock);
 
                        if (node_cnt + tree_cnt >= nr_shrink)
-                               break;
+                               goto unlock_out;
                }
        }
 unlock_out:
index f1a90ff..9db5500 100644 (file)
@@ -19,6 +19,7 @@
 #include <linux/magic.h>
 #include <linux/kobject.h>
 #include <linux/sched.h>
+#include <linux/vmalloc.h>
 #include <linux/bio.h>
 
 #ifdef CONFIG_F2FS_CHECK_FS
@@ -52,6 +53,7 @@
 #define F2FS_MOUNT_NOBARRIER           0x00000800
 #define F2FS_MOUNT_FASTBOOT            0x00001000
 #define F2FS_MOUNT_EXTENT_CACHE                0x00002000
+#define F2FS_MOUNT_FORCE_FG_GC         0x00004000
 
 #define clear_opt(sbi, option) (sbi->mount_opt.opt &= ~F2FS_MOUNT_##option)
 #define set_opt(sbi, option)   (sbi->mount_opt.opt |= F2FS_MOUNT_##option)
@@ -122,6 +124,7 @@ enum {
                (SM_I(sbi)->trim_sections * (sbi)->segs_per_sec)
 #define BATCHED_TRIM_BLOCKS(sbi)       \
                (BATCHED_TRIM_SEGMENTS(sbi) << (sbi)->log_blocks_per_seg)
+#define DEF_CP_INTERVAL                        60      /* 60 secs */
 
 struct cp_control {
        int reason;
@@ -230,6 +233,7 @@ static inline bool __has_cursum_space(struct f2fs_summary_block *sum, int size,
 #define F2FS_IOC_RELEASE_VOLATILE_WRITE        _IO(F2FS_IOCTL_MAGIC, 4)
 #define F2FS_IOC_ABORT_VOLATILE_WRITE  _IO(F2FS_IOCTL_MAGIC, 5)
 #define F2FS_IOC_GARBAGE_COLLECT       _IO(F2FS_IOCTL_MAGIC, 6)
+#define F2FS_IOC_WRITE_CHECKPOINT      _IO(F2FS_IOCTL_MAGIC, 7)
 
 #define F2FS_IOC_SET_ENCRYPTION_POLICY                                 \
                _IOR('f', 19, struct f2fs_encryption_policy)
@@ -246,6 +250,7 @@ static inline bool __has_cursum_space(struct f2fs_summary_block *sum, int size,
 #define F2FS_GOING_DOWN_FULLSYNC       0x0     /* going down with full sync */
 #define F2FS_GOING_DOWN_METASYNC       0x1     /* going down with metadata */
 #define F2FS_GOING_DOWN_NOSYNC         0x2     /* going down */
+#define F2FS_GOING_DOWN_METAFLUSH      0x3     /* going down with meta flush */
 
 #if defined(__KERNEL__) && defined(CONFIG_COMPAT)
 /*
@@ -492,12 +497,20 @@ static inline bool __is_front_mergeable(struct extent_info *cur,
        return __is_extent_mergeable(cur, front);
 }
 
+static inline void __try_update_largest_extent(struct extent_tree *et,
+                                               struct extent_node *en)
+{
+       if (en->ei.len > et->largest.len)
+               et->largest = en->ei;
+}
+
 struct f2fs_nm_info {
        block_t nat_blkaddr;            /* base disk address of NAT */
        nid_t max_nid;                  /* maximum possible node ids */
        nid_t available_nids;           /* maximum available node ids */
        nid_t next_scan_nid;            /* the next nid to be scanned */
        unsigned int ram_thresh;        /* control the memory footprint */
+       unsigned int ra_nid_pages;      /* # of nid pages to be readaheaded */
 
        /* NAT cache management */
        struct radix_tree_root nat_root;/* root of the nat entry cache */
@@ -724,6 +737,7 @@ struct f2fs_sb_info {
        struct rw_semaphore node_write;         /* locking node writes */
        struct mutex writepages;                /* mutex for writepages() */
        wait_queue_head_t cp_wait;
+       long cp_expires, cp_interval;           /* next expected periodic cp */
 
        struct inode_management im[MAX_INO_ENTRY];      /* manage inode cache */
 
@@ -787,10 +801,10 @@ struct f2fs_sb_info {
        unsigned int segment_count[2];          /* # of allocated segments */
        unsigned int block_count[2];            /* # of allocated blocks */
        atomic_t inplace_count;         /* # of inplace update */
-       atomic_t total_hit_ext;                 /* # of lookup extent cache */
-       atomic_t read_hit_rbtree;               /* # of hit rbtree extent node */
-       atomic_t read_hit_largest;              /* # of hit largest extent node */
-       atomic_t read_hit_cached;               /* # of hit cached extent node */
+       atomic64_t total_hit_ext;               /* # of lookup extent cache */
+       atomic64_t read_hit_rbtree;             /* # of hit rbtree extent node */
+       atomic64_t read_hit_largest;            /* # of hit largest extent node */
+       atomic64_t read_hit_cached;             /* # of hit cached extent node */
        atomic_t inline_xattr;                  /* # of inline_xattr inodes */
        atomic_t inline_inode;                  /* # of inline_data inodes */
        atomic_t inline_dir;                    /* # of inline_dentry inodes */
@@ -1220,6 +1234,24 @@ static inline unsigned int valid_inode_count(struct f2fs_sb_info *sbi)
        return sbi->total_valid_inode_count;
 }
 
+static inline struct page *f2fs_grab_cache_page(struct address_space *mapping,
+                                               pgoff_t index, bool for_write)
+{
+       if (!for_write)
+               return grab_cache_page(mapping, index);
+       return grab_cache_page_write_begin(mapping, index, AOP_FLAG_NOFS);
+}
+
+static inline void f2fs_copy_page(struct page *src, struct page *dst)
+{
+       char *src_kaddr = kmap(src);
+       char *dst_kaddr = kmap(dst);
+
+       memcpy(dst_kaddr, src_kaddr, PAGE_SIZE);
+       kunmap(dst);
+       kunmap(src);
+}
+
 static inline void f2fs_put_page(struct page *page, int unlock)
 {
        if (!page)
@@ -1579,6 +1611,26 @@ static inline bool f2fs_may_extent_tree(struct inode *inode)
        return S_ISREG(mode);
 }
 
+static inline void *f2fs_kvmalloc(size_t size, gfp_t flags)
+{
+       void *ret;
+
+       ret = kmalloc(size, flags | __GFP_NOWARN);
+       if (!ret)
+               ret = __vmalloc(size, flags, PAGE_KERNEL);
+       return ret;
+}
+
+static inline void *f2fs_kvzalloc(size_t size, gfp_t flags)
+{
+       void *ret;
+
+       ret = kzalloc(size, flags | __GFP_NOWARN);
+       if (!ret)
+               ret = __vmalloc(size, flags | __GFP_ZERO, PAGE_KERNEL);
+       return ret;
+}
+
 #define get_inode_mode(i) \
        ((is_inode_flag_set(F2FS_I(i), FI_ACL_MODE)) ? \
         (F2FS_I(i)->i_acl_mode) : ((i)->i_mode))
@@ -1721,6 +1773,7 @@ int f2fs_issue_flush(struct f2fs_sb_info *);
 int create_flush_cmd_control(struct f2fs_sb_info *);
 void destroy_flush_cmd_control(struct f2fs_sb_info *);
 void invalidate_blocks(struct f2fs_sb_info *, block_t);
+bool is_checkpointed_data(struct f2fs_sb_info *, block_t);
 void refresh_sit_entry(struct f2fs_sb_info *, block_t, block_t);
 void clear_prefree_segments(struct f2fs_sb_info *, struct cp_control *);
 void release_discard_addrs(struct f2fs_sb_info *);
@@ -1739,6 +1792,7 @@ void f2fs_replace_block(struct f2fs_sb_info *, struct dnode_of_data *,
 void allocate_data_block(struct f2fs_sb_info *, struct page *,
                block_t, block_t *, struct f2fs_summary *, int);
 void f2fs_wait_on_page_writeback(struct page *, enum page_type);
+void f2fs_wait_on_encrypted_page_writeback(struct f2fs_sb_info *, block_t);
 void write_data_summaries(struct f2fs_sb_info *, block_t);
 void write_node_summaries(struct f2fs_sb_info *, block_t);
 int lookup_journal_in_cursum(struct f2fs_summary_block *,
@@ -1754,8 +1808,9 @@ void destroy_segment_manager_caches(void);
  */
 struct page *grab_meta_page(struct f2fs_sb_info *, pgoff_t);
 struct page *get_meta_page(struct f2fs_sb_info *, pgoff_t);
+struct page *get_tmp_page(struct f2fs_sb_info *, pgoff_t);
 bool is_valid_blkaddr(struct f2fs_sb_info *, block_t, int);
-int ra_meta_pages(struct f2fs_sb_info *, block_t, int, int);
+int ra_meta_pages(struct f2fs_sb_info *, block_t, int, int, bool);
 void ra_meta_pages_cond(struct f2fs_sb_info *, pgoff_t);
 long sync_meta_pages(struct f2fs_sb_info *, enum page_type, long);
 void add_dirty_inode(struct f2fs_sb_info *, nid_t, int type);
@@ -1787,9 +1842,9 @@ void set_data_blkaddr(struct dnode_of_data *);
 int reserve_new_block(struct dnode_of_data *);
 int f2fs_get_block(struct dnode_of_data *, pgoff_t);
 int f2fs_reserve_block(struct dnode_of_data *, pgoff_t);
-struct page *get_read_data_page(struct inode *, pgoff_t, int);
+struct page *get_read_data_page(struct inode *, pgoff_t, int, bool);
 struct page *find_data_page(struct inode *, pgoff_t);
-struct page *get_lock_data_page(struct inode *, pgoff_t);
+struct page *get_lock_data_page(struct inode *, pgoff_t, bool);
 struct page *get_new_data_page(struct inode *, struct page *, pgoff_t, bool);
 int do_write_data_page(struct f2fs_io_info *);
 int f2fs_fiemap(struct inode *inode, struct fiemap_extent_info *, u64, u64);
@@ -1802,7 +1857,7 @@ int f2fs_release_page(struct page *, gfp_t);
 int start_gc_thread(struct f2fs_sb_info *);
 void stop_gc_thread(struct f2fs_sb_info *);
 block_t start_bidx_of_node(unsigned int, struct f2fs_inode_info *);
-int f2fs_gc(struct f2fs_sb_info *);
+int f2fs_gc(struct f2fs_sb_info *, bool);
 void build_gc_manager(struct f2fs_sb_info *);
 
 /*
@@ -1820,7 +1875,8 @@ struct f2fs_stat_info {
        struct f2fs_sb_info *sbi;
        int all_area_segs, sit_area_segs, nat_area_segs, ssa_area_segs;
        int main_area_segs, main_area_sections, main_area_zones;
-       int hit_largest, hit_cached, hit_rbtree, hit_total, total_ext;
+       unsigned long long hit_largest, hit_cached, hit_rbtree;
+       unsigned long long hit_total, total_ext;
        int ext_tree, ext_node;
        int ndirty_node, ndirty_dent, ndirty_dirs, ndirty_meta;
        int nats, dirty_nats, sits, dirty_sits, fnids;
@@ -1844,7 +1900,7 @@ struct f2fs_stat_info {
        unsigned int segment_count[2];
        unsigned int block_count[2];
        unsigned int inplace_count;
-       unsigned base_mem, cache_mem, page_mem;
+       unsigned long long base_mem, cache_mem, page_mem;
 };
 
 static inline struct f2fs_stat_info *F2FS_STAT(struct f2fs_sb_info *sbi)
@@ -1857,10 +1913,10 @@ static inline struct f2fs_stat_info *F2FS_STAT(struct f2fs_sb_info *sbi)
 #define stat_inc_bggc_count(sbi)       ((sbi)->bg_gc++)
 #define stat_inc_dirty_dir(sbi)                ((sbi)->n_dirty_dirs++)
 #define stat_dec_dirty_dir(sbi)                ((sbi)->n_dirty_dirs--)
-#define stat_inc_total_hit(sbi)                (atomic_inc(&(sbi)->total_hit_ext))
-#define stat_inc_rbtree_node_hit(sbi)  (atomic_inc(&(sbi)->read_hit_rbtree))
-#define stat_inc_largest_node_hit(sbi) (atomic_inc(&(sbi)->read_hit_largest))
-#define stat_inc_cached_node_hit(sbi)  (atomic_inc(&(sbi)->read_hit_cached))
+#define stat_inc_total_hit(sbi)                (atomic64_inc(&(sbi)->total_hit_ext))
+#define stat_inc_rbtree_node_hit(sbi)  (atomic64_inc(&(sbi)->read_hit_rbtree))
+#define stat_inc_largest_node_hit(sbi) (atomic64_inc(&(sbi)->read_hit_largest))
+#define stat_inc_cached_node_hit(sbi)  (atomic64_inc(&(sbi)->read_hit_cached))
 #define stat_inc_inline_xattr(inode)                                   \
        do {                                                            \
                if (f2fs_has_inline_xattr(inode))                       \
@@ -1998,6 +2054,8 @@ void f2fs_delete_inline_entry(struct f2fs_dir_entry *, struct page *,
 bool f2fs_empty_inline_dir(struct inode *);
 int f2fs_read_inline_dir(struct file *, struct dir_context *,
                                                struct f2fs_str *);
+int f2fs_inline_data_fiemap(struct inode *,
+               struct fiemap_extent_info *, __u64, __u64);
 
 /*
  * shrinker.c
index 8120f86..a197215 100644 (file)
@@ -74,7 +74,8 @@ static int f2fs_vm_page_mkwrite(struct vm_area_struct *vma,
                goto mapped;
 
        /* page is wholly or partially inside EOF */
-       if (((page->index + 1) << PAGE_CACHE_SHIFT) > i_size_read(inode)) {
+       if (((loff_t)(page->index + 1) << PAGE_CACHE_SHIFT) >
+                                               i_size_read(inode)) {
                unsigned offset;
                offset = i_size_read(inode) & ~PAGE_CACHE_MASK;
                zero_user_segment(page, offset, PAGE_CACHE_SIZE);
@@ -86,6 +87,11 @@ static int f2fs_vm_page_mkwrite(struct vm_area_struct *vma,
 mapped:
        /* fill the page */
        f2fs_wait_on_page_writeback(page, DATA);
+
+       /* wait for GCed encrypted page writeback */
+       if (f2fs_encrypted_inode(inode) && S_ISREG(inode->i_mode))
+               f2fs_wait_on_encrypted_page_writeback(sbi, dn.data_blkaddr);
+
        /* if gced page is attached, don't write to cold segment */
        clear_cold_data(page);
 out:
@@ -343,7 +349,7 @@ static loff_t f2fs_seek_block(struct file *file, loff_t offset, int whence)
 
        dirty = __get_first_dirty_index(inode->i_mapping, pgofs, whence);
 
-       for (; data_ofs < isize; data_ofs = pgofs << PAGE_CACHE_SHIFT) {
+       for (; data_ofs < isize; data_ofs = (loff_t)pgofs << PAGE_CACHE_SHIFT) {
                set_new_dnode(&dn, inode, NULL, NULL, 0);
                err = get_dnode_of_data(&dn, pgofs, LOOKUP_NODE_RA);
                if (err && err != -ENOENT) {
@@ -504,14 +510,14 @@ static int truncate_partial_data_page(struct inode *inode, u64 from,
                return 0;
 
        if (cache_only) {
-               page = grab_cache_page(mapping, index);
+               page = f2fs_grab_cache_page(mapping, index, false);
                if (page && PageUptodate(page))
                        goto truncate_out;
                f2fs_put_page(page, 1);
                return 0;
        }
 
-       page = get_lock_data_page(inode, index);
+       page = get_lock_data_page(inode, index, true);
        if (IS_ERR(page))
                return 0;
 truncate_out:
@@ -680,6 +686,7 @@ int f2fs_setattr(struct dentry *dentry, struct iattr *attr)
                         * larger than i_size.
                         */
                        truncate_setsize(inode, attr->ia_size);
+                       inode->i_mtime = inode->i_ctime = CURRENT_TIME;
                }
        }
 
@@ -738,23 +745,31 @@ static int fill_zero(struct inode *inode, pgoff_t index,
 
 int truncate_hole(struct inode *inode, pgoff_t pg_start, pgoff_t pg_end)
 {
-       pgoff_t index;
        int err;
 
-       for (index = pg_start; index < pg_end; index++) {
+       while (pg_start < pg_end) {
                struct dnode_of_data dn;
+               pgoff_t end_offset, count;
 
                set_new_dnode(&dn, inode, NULL, NULL, 0);
-               err = get_dnode_of_data(&dn, index, LOOKUP_NODE);
+               err = get_dnode_of_data(&dn, pg_start, LOOKUP_NODE);
                if (err) {
-                       if (err == -ENOENT)
+                       if (err == -ENOENT) {
+                               pg_start++;
                                continue;
+                       }
                        return err;
                }
 
-               if (dn.data_blkaddr != NULL_ADDR)
-                       truncate_data_blocks_range(&dn, 1);
+               end_offset = ADDRS_PER_PAGE(dn.node_page, F2FS_I(inode));
+               count = min(end_offset - dn.ofs_in_node, pg_end - pg_start);
+
+               f2fs_bug_on(F2FS_I_SB(inode), count == 0 || count > end_offset);
+
+               truncate_data_blocks_range(&dn, count);
                f2fs_put_dnode(&dn);
+
+               pg_start += count;
        }
        return 0;
 }
@@ -765,9 +780,6 @@ static int punch_hole(struct inode *inode, loff_t offset, loff_t len)
        loff_t off_start, off_end;
        int ret = 0;
 
-       if (!S_ISREG(inode->i_mode))
-               return -EOPNOTSUPP;
-
        if (f2fs_has_inline_data(inode)) {
                ret = f2fs_convert_inline_inode(inode);
                if (ret)
@@ -805,8 +817,8 @@ static int punch_hole(struct inode *inode, loff_t offset, loff_t len)
 
                        f2fs_balance_fs(sbi);
 
-                       blk_start = pg_start << PAGE_CACHE_SHIFT;
-                       blk_end = pg_end << PAGE_CACHE_SHIFT;
+                       blk_start = (loff_t)pg_start << PAGE_CACHE_SHIFT;
+                       blk_end = (loff_t)pg_end << PAGE_CACHE_SHIFT;
                        truncate_inode_pages_range(mapping, blk_start,
                                        blk_end - 1);
 
@@ -819,86 +831,100 @@ static int punch_hole(struct inode *inode, loff_t offset, loff_t len)
        return ret;
 }
 
-static int f2fs_do_collapse(struct inode *inode, pgoff_t start, pgoff_t end)
+static int __exchange_data_block(struct inode *inode, pgoff_t src,
+                                       pgoff_t dst, bool full)
 {
        struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
        struct dnode_of_data dn;
-       pgoff_t nrpages = (i_size_read(inode) + PAGE_SIZE - 1) / PAGE_SIZE;
-       int ret = 0;
-
-       for (; end < nrpages; start++, end++) {
-               block_t new_addr, old_addr;
-
-               f2fs_lock_op(sbi);
+       block_t new_addr;
+       bool do_replace = false;
+       int ret;
 
-               set_new_dnode(&dn, inode, NULL, NULL, 0);
-               ret = get_dnode_of_data(&dn, end, LOOKUP_NODE_RA);
-               if (ret && ret != -ENOENT) {
-                       goto out;
-               } else if (ret == -ENOENT) {
-                       new_addr = NULL_ADDR;
-               } else {
-                       new_addr = dn.data_blkaddr;
-                       truncate_data_blocks_range(&dn, 1);
-                       f2fs_put_dnode(&dn);
+       set_new_dnode(&dn, inode, NULL, NULL, 0);
+       ret = get_dnode_of_data(&dn, src, LOOKUP_NODE_RA);
+       if (ret && ret != -ENOENT) {
+               return ret;
+       } else if (ret == -ENOENT) {
+               new_addr = NULL_ADDR;
+       } else {
+               new_addr = dn.data_blkaddr;
+               if (!is_checkpointed_data(sbi, new_addr)) {
+                       dn.data_blkaddr = NULL_ADDR;
+                       /* do not invalidate this block address */
+                       set_data_blkaddr(&dn);
+                       f2fs_update_extent_cache(&dn);
+                       do_replace = true;
                }
+               f2fs_put_dnode(&dn);
+       }
 
-               if (new_addr == NULL_ADDR) {
-                       set_new_dnode(&dn, inode, NULL, NULL, 0);
-                       ret = get_dnode_of_data(&dn, start, LOOKUP_NODE_RA);
-                       if (ret && ret != -ENOENT) {
-                               goto out;
-                       } else if (ret == -ENOENT) {
-                               f2fs_unlock_op(sbi);
-                               continue;
-                       }
+       if (new_addr == NULL_ADDR)
+               return full ? truncate_hole(inode, dst, dst + 1) : 0;
 
-                       if (dn.data_blkaddr == NULL_ADDR) {
-                               f2fs_put_dnode(&dn);
-                               f2fs_unlock_op(sbi);
-                               continue;
-                       } else {
-                               truncate_data_blocks_range(&dn, 1);
-                       }
+       if (do_replace) {
+               struct page *ipage = get_node_page(sbi, inode->i_ino);
+               struct node_info ni;
 
-                       f2fs_put_dnode(&dn);
-               } else {
-                       struct page *ipage;
+               if (IS_ERR(ipage)) {
+                       ret = PTR_ERR(ipage);
+                       goto err_out;
+               }
 
-                       ipage = get_node_page(sbi, inode->i_ino);
-                       if (IS_ERR(ipage)) {
-                               ret = PTR_ERR(ipage);
-                               goto out;
-                       }
+               set_new_dnode(&dn, inode, ipage, NULL, 0);
+               ret = f2fs_reserve_block(&dn, dst);
+               if (ret)
+                       goto err_out;
 
-                       set_new_dnode(&dn, inode, ipage, NULL, 0);
-                       ret = f2fs_reserve_block(&dn, start);
-                       if (ret)
-                               goto out;
+               truncate_data_blocks_range(&dn, 1);
 
-                       old_addr = dn.data_blkaddr;
-                       if (old_addr != NEW_ADDR && new_addr == NEW_ADDR) {
-                               dn.data_blkaddr = NULL_ADDR;
-                               f2fs_update_extent_cache(&dn);
-                               invalidate_blocks(sbi, old_addr);
+               get_node_info(sbi, dn.nid, &ni);
+               f2fs_replace_block(sbi, &dn, dn.data_blkaddr, new_addr,
+                               ni.version, true);
+               f2fs_put_dnode(&dn);
+       } else {
+               struct page *psrc, *pdst;
+
+               psrc = get_lock_data_page(inode, src, true);
+               if (IS_ERR(psrc))
+                       return PTR_ERR(psrc);
+               pdst = get_new_data_page(inode, NULL, dst, false);
+               if (IS_ERR(pdst)) {
+                       f2fs_put_page(psrc, 1);
+                       return PTR_ERR(pdst);
+               }
+               f2fs_copy_page(psrc, pdst);
+               set_page_dirty(pdst);
+               f2fs_put_page(pdst, 1);
+               f2fs_put_page(psrc, 1);
 
-                               dn.data_blkaddr = new_addr;
-                               set_data_blkaddr(&dn);
-                       } else if (new_addr != NEW_ADDR) {
-                               struct node_info ni;
+               return truncate_hole(inode, src, src + 1);
+       }
+       return 0;
 
-                               get_node_info(sbi, dn.nid, &ni);
-                               f2fs_replace_block(sbi, &dn, old_addr, new_addr,
-                                                       ni.version, true);
-                       }
+err_out:
+       if (!get_dnode_of_data(&dn, src, LOOKUP_NODE)) {
+               dn.data_blkaddr = new_addr;
+               set_data_blkaddr(&dn);
+               f2fs_update_extent_cache(&dn);
+               f2fs_put_dnode(&dn);
+       }
+       return ret;
+}
 
-                       f2fs_put_dnode(&dn);
-               }
+static int f2fs_do_collapse(struct inode *inode, pgoff_t start, pgoff_t end)
+{
+       struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
+       pgoff_t nrpages = (i_size_read(inode) + PAGE_SIZE - 1) / PAGE_SIZE;
+       int ret = 0;
+
+       for (; end < nrpages; start++, end++) {
+               f2fs_balance_fs(sbi);
+               f2fs_lock_op(sbi);
+               ret = __exchange_data_block(inode, end, start, true);
                f2fs_unlock_op(sbi);
+               if (ret)
+                       break;
        }
-       return 0;
-out:
-       f2fs_unlock_op(sbi);
        return ret;
 }
 
@@ -908,9 +934,6 @@ static int f2fs_collapse_range(struct inode *inode, loff_t offset, loff_t len)
        loff_t new_size;
        int ret;
 
-       if (!S_ISREG(inode->i_mode))
-               return -EINVAL;
-
        if (offset + len >= i_size_read(inode))
                return -EINVAL;
 
@@ -940,7 +963,12 @@ static int f2fs_collapse_range(struct inode *inode, loff_t offset, loff_t len)
        if (ret)
                return ret;
 
+       /* write out all moved pages, if possible */
+       filemap_write_and_wait_range(inode->i_mapping, offset, LLONG_MAX);
+       truncate_pagecache(inode, offset);
+
        new_size = i_size_read(inode) - len;
+       truncate_pagecache(inode, new_size);
 
        ret = truncate_blocks(inode, new_size, true);
        if (!ret)
@@ -959,9 +987,6 @@ static int f2fs_zero_range(struct inode *inode, loff_t offset, loff_t len,
        loff_t off_start, off_end;
        int ret = 0;
 
-       if (!S_ISREG(inode->i_mode))
-               return -EINVAL;
-
        ret = inode_newsize_ok(inode, (len + offset));
        if (ret)
                return ret;
@@ -1003,7 +1028,7 @@ static int f2fs_zero_range(struct inode *inode, loff_t offset, loff_t len,
                                return ret;
 
                        new_size = max_t(loff_t, new_size,
-                                               pg_start << PAGE_CACHE_SHIFT);
+                                       (loff_t)pg_start << PAGE_CACHE_SHIFT);
                }
 
                for (index = pg_start; index < pg_end; index++) {
@@ -1039,7 +1064,7 @@ static int f2fs_zero_range(struct inode *inode, loff_t offset, loff_t len,
                        f2fs_unlock_op(sbi);
 
                        new_size = max_t(loff_t, new_size,
-                                       (index + 1) << PAGE_CACHE_SHIFT);
+                               (loff_t)(index + 1) << PAGE_CACHE_SHIFT);
                }
 
                if (off_end) {
@@ -1066,10 +1091,7 @@ static int f2fs_insert_range(struct inode *inode, loff_t offset, loff_t len)
        struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
        pgoff_t pg_start, pg_end, delta, nrpages, idx;
        loff_t new_size;
-       int ret;
-
-       if (!S_ISREG(inode->i_mode))
-               return -EINVAL;
+       int ret = 0;
 
        new_size = i_size_read(inode) + len;
        if (new_size > inode->i_sb->s_maxbytes)
@@ -1107,57 +1129,19 @@ static int f2fs_insert_range(struct inode *inode, loff_t offset, loff_t len)
        nrpages = (i_size_read(inode) + PAGE_SIZE - 1) / PAGE_SIZE;
 
        for (idx = nrpages - 1; idx >= pg_start && idx != -1; idx--) {
-               struct dnode_of_data dn;
-               struct page *ipage;
-               block_t new_addr, old_addr;
-
                f2fs_lock_op(sbi);
-
-               set_new_dnode(&dn, inode, NULL, NULL, 0);
-               ret = get_dnode_of_data(&dn, idx, LOOKUP_NODE_RA);
-               if (ret && ret != -ENOENT) {
-                       goto out;
-               } else if (ret == -ENOENT) {
-                       goto next;
-               } else if (dn.data_blkaddr == NULL_ADDR) {
-                       f2fs_put_dnode(&dn);
-                       goto next;
-               } else {
-                       new_addr = dn.data_blkaddr;
-                       truncate_data_blocks_range(&dn, 1);
-                       f2fs_put_dnode(&dn);
-               }
-
-               ipage = get_node_page(sbi, inode->i_ino);
-               if (IS_ERR(ipage)) {
-                       ret = PTR_ERR(ipage);
-                       goto out;
-               }
-
-               set_new_dnode(&dn, inode, ipage, NULL, 0);
-               ret = f2fs_reserve_block(&dn, idx + delta);
-               if (ret)
-                       goto out;
-
-               old_addr = dn.data_blkaddr;
-               f2fs_bug_on(sbi, old_addr != NEW_ADDR);
-
-               if (new_addr != NEW_ADDR) {
-                       struct node_info ni;
-
-                       get_node_info(sbi, dn.nid, &ni);
-                       f2fs_replace_block(sbi, &dn, old_addr, new_addr,
-                                                       ni.version, true);
-               }
-               f2fs_put_dnode(&dn);
-next:
+               ret = __exchange_data_block(inode, idx, idx + delta, false);
                f2fs_unlock_op(sbi);
+               if (ret)
+                       break;
        }
 
-       i_size_write(inode, new_size);
-       return 0;
-out:
-       f2fs_unlock_op(sbi);
+       /* write out all moved pages, if possible */
+       filemap_write_and_wait_range(inode->i_mapping, offset, LLONG_MAX);
+       truncate_pagecache(inode, offset);
+
+       if (!ret)
+               i_size_write(inode, new_size);
        return ret;
 }
 
@@ -1204,9 +1188,10 @@ noalloc:
                if (pg_start == pg_end)
                        new_size = offset + len;
                else if (index == pg_start && off_start)
-                       new_size = (index + 1) << PAGE_CACHE_SHIFT;
+                       new_size = (loff_t)(index + 1) << PAGE_CACHE_SHIFT;
                else if (index == pg_end)
-                       new_size = (index << PAGE_CACHE_SHIFT) + off_end;
+                       new_size = ((loff_t)index << PAGE_CACHE_SHIFT) +
+                                                               off_end;
                else
                        new_size += PAGE_CACHE_SIZE;
        }
@@ -1228,6 +1213,10 @@ static long f2fs_fallocate(struct file *file, int mode,
        struct inode *inode = file_inode(file);
        long ret = 0;
 
+       /* f2fs only support ->fallocate for regular file */
+       if (!S_ISREG(inode->i_mode))
+               return -EINVAL;
+
        if (f2fs_encrypted_inode(inode) &&
                (mode & (FALLOC_FL_COLLAPSE_RANGE | FALLOC_FL_INSERT_RANGE)))
                return -EOPNOTSUPP;
@@ -1437,8 +1426,7 @@ static int f2fs_ioc_release_volatile_write(struct file *filp)
        if (!f2fs_is_first_block_written(inode))
                return truncate_partial_data_page(inode, 0, true);
 
-       punch_hole(inode, 0, F2FS_BLKSIZE);
-       return 0;
+       return punch_hole(inode, 0, F2FS_BLKSIZE);
 }
 
 static int f2fs_ioc_abort_volatile_write(struct file *filp)
@@ -1455,13 +1443,9 @@ static int f2fs_ioc_abort_volatile_write(struct file *filp)
 
        f2fs_balance_fs(F2FS_I_SB(inode));
 
-       if (f2fs_is_atomic_file(inode)) {
-               clear_inode_flag(F2FS_I(inode), FI_ATOMIC_FILE);
-               commit_inmem_pages(inode, true);
-       }
-
-       if (f2fs_is_volatile_file(inode))
-               clear_inode_flag(F2FS_I(inode), FI_VOLATILE_FILE);
+       clear_inode_flag(F2FS_I(inode), FI_ATOMIC_FILE);
+       clear_inode_flag(F2FS_I(inode), FI_VOLATILE_FILE);
+       commit_inmem_pages(inode, true);
 
        mnt_drop_write_file(filp);
        return ret;
@@ -1496,6 +1480,10 @@ static int f2fs_ioc_shutdown(struct file *filp, unsigned long arg)
        case F2FS_GOING_DOWN_NOSYNC:
                f2fs_stop_checkpoint(sbi);
                break;
+       case F2FS_GOING_DOWN_METAFLUSH:
+               sync_meta_pages(sbi, META, LONG_MAX);
+               f2fs_stop_checkpoint(sbi);
+               break;
        default:
                return -EINVAL;
        }
@@ -1616,27 +1604,44 @@ static int f2fs_ioc_gc(struct file *filp, unsigned long arg)
 {
        struct inode *inode = file_inode(filp);
        struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
-       __u32 i, count;
+       __u32 sync;
 
        if (!capable(CAP_SYS_ADMIN))
                return -EPERM;
 
-       if (get_user(count, (__u32 __user *)arg))
+       if (get_user(sync, (__u32 __user *)arg))
                return -EFAULT;
 
-       if (!count || count > F2FS_BATCH_GC_MAX_NUM)
-               return -EINVAL;
+       if (f2fs_readonly(sbi->sb))
+               return -EROFS;
 
-       for (i = 0; i < count; i++) {
+       if (!sync) {
                if (!mutex_trylock(&sbi->gc_mutex))
-                       break;
-
-               if (f2fs_gc(sbi))
-                       break;
+                       return -EBUSY;
+       } else {
+               mutex_lock(&sbi->gc_mutex);
        }
 
-       if (put_user(i, (__u32 __user *)arg))
-               return -EFAULT;
+       return f2fs_gc(sbi, sync);
+}
+
+static int f2fs_ioc_write_checkpoint(struct file *filp, unsigned long arg)
+{
+       struct inode *inode = file_inode(filp);
+       struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
+       struct cp_control cpc;
+
+       if (!capable(CAP_SYS_ADMIN))
+               return -EPERM;
+
+       if (f2fs_readonly(sbi->sb))
+               return -EROFS;
+
+       cpc.reason = __get_cp_reason(sbi);
+
+       mutex_lock(&sbi->gc_mutex);
+       write_checkpoint(sbi, &cpc);
+       mutex_unlock(&sbi->gc_mutex);
 
        return 0;
 }
@@ -1672,6 +1677,8 @@ long f2fs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
                return f2fs_ioc_get_encryption_pwsalt(filp, arg);
        case F2FS_IOC_GARBAGE_COLLECT:
                return f2fs_ioc_gc(filp, arg);
+       case F2FS_IOC_WRITE_CHECKPOINT:
+               return f2fs_ioc_write_checkpoint(filp, arg);
        default:
                return -ENOTTY;
        }
index 782b8e7..fedbf67 100644 (file)
@@ -78,9 +78,12 @@ static int gc_thread_func(void *data)
                stat_inc_bggc_count(sbi);
 
                /* if return value is not zero, no victim was selected */
-               if (f2fs_gc(sbi))
+               if (f2fs_gc(sbi, test_opt(sbi, FORCE_FG_GC)))
                        wait_ms = gc_th->no_gc_sleep_time;
 
+               trace_f2fs_background_gc(sbi->sb, wait_ms,
+                               prefree_segments(sbi), free_segments(sbi));
+
                /* balancing f2fs's metadata periodically */
                f2fs_balance_fs_bg(sbi);
 
@@ -257,6 +260,7 @@ static int get_victim_by_default(struct f2fs_sb_info *sbi,
        struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
        struct victim_sel_policy p;
        unsigned int secno, max_cost;
+       unsigned int last_segment = MAIN_SEGS(sbi);
        int nsearched = 0;
 
        mutex_lock(&dirty_i->seglist_lock);
@@ -267,6 +271,9 @@ static int get_victim_by_default(struct f2fs_sb_info *sbi,
        p.min_segno = NULL_SEGNO;
        p.min_cost = max_cost = get_max_cost(sbi, &p);
 
+       if (p.max_search == 0)
+               goto out;
+
        if (p.alloc_mode == LFS && gc_type == FG_GC) {
                p.min_segno = check_bg_victims(sbi);
                if (p.min_segno != NULL_SEGNO)
@@ -277,9 +284,10 @@ static int get_victim_by_default(struct f2fs_sb_info *sbi,
                unsigned long cost;
                unsigned int segno;
 
-               segno = find_next_bit(p.dirty_segmap, MAIN_SEGS(sbi), p.offset);
-               if (segno >= MAIN_SEGS(sbi)) {
+               segno = find_next_bit(p.dirty_segmap, last_segment, p.offset);
+               if (segno >= last_segment) {
                        if (sbi->last_victim[p.gc_mode]) {
+                               last_segment = sbi->last_victim[p.gc_mode];
                                sbi->last_victim[p.gc_mode] = 0;
                                p.offset = 0;
                                continue;
@@ -327,6 +335,7 @@ got_it:
                                sbi->cur_victim_sec,
                                prefree_segments(sbi), free_segments(sbi));
        }
+out:
        mutex_unlock(&dirty_i->seglist_lock);
 
        return (p.min_segno == NULL_SEGNO) ? 0 : 1;
@@ -541,7 +550,7 @@ static void move_encrypted_block(struct inode *inode, block_t bidx)
        int err;
 
        /* do not read out */
-       page = grab_cache_page(inode->i_mapping, bidx);
+       page = f2fs_grab_cache_page(inode->i_mapping, bidx, false);
        if (!page)
                return;
 
@@ -550,8 +559,16 @@ static void move_encrypted_block(struct inode *inode, block_t bidx)
        if (err)
                goto out;
 
-       if (unlikely(dn.data_blkaddr == NULL_ADDR))
+       if (unlikely(dn.data_blkaddr == NULL_ADDR)) {
+               ClearPageUptodate(page);
                goto put_out;
+       }
+
+       /*
+        * don't cache encrypted data into meta inode until previous dirty
+        * data were writebacked to avoid racing between GC and flush.
+        */
+       f2fs_wait_on_page_writeback(page, DATA);
 
        get_node_info(fio.sbi, dn.nid, &ni);
        set_summary(&sum, dn.nid, dn.ofs_in_node, ni.version);
@@ -580,7 +597,7 @@ static void move_encrypted_block(struct inode *inode, block_t bidx)
                goto put_page_out;
 
        set_page_dirty(fio.encrypted_page);
-       f2fs_wait_on_page_writeback(fio.encrypted_page, META);
+       f2fs_wait_on_page_writeback(fio.encrypted_page, DATA);
        if (clear_page_dirty_for_io(fio.encrypted_page))
                dec_page_count(fio.sbi, F2FS_DIRTY_META);
 
@@ -611,7 +628,7 @@ static void move_data_page(struct inode *inode, block_t bidx, int gc_type)
 {
        struct page *page;
 
-       page = get_lock_data_page(inode, bidx);
+       page = get_lock_data_page(inode, bidx, true);
        if (IS_ERR(page))
                return;
 
@@ -705,7 +722,7 @@ next_step:
 
                        start_bidx = start_bidx_of_node(nofs, F2FS_I(inode));
                        data_page = get_read_data_page(inode,
-                                       start_bidx + ofs_in_node, READA);
+                                       start_bidx + ofs_in_node, READA, true);
                        if (IS_ERR(data_page)) {
                                iput(inode);
                                continue;
@@ -797,13 +814,12 @@ static int do_garbage_collect(struct f2fs_sb_info *sbi, unsigned int segno,
        return nfree;
 }
 
-int f2fs_gc(struct f2fs_sb_info *sbi)
+int f2fs_gc(struct f2fs_sb_info *sbi, bool sync)
 {
-       unsigned int segno = NULL_SEGNO;
-       unsigned int i;
-       int gc_type = BG_GC;
-       int nfree = 0;
-       int ret = -1;
+       unsigned int segno, i;
+       int gc_type = sync ? FG_GC : BG_GC;
+       int sec_freed = 0;
+       int ret = -EINVAL;
        struct cp_control cpc;
        struct gc_inode_list gc_list = {
                .ilist = LIST_HEAD_INIT(gc_list.ilist),
@@ -812,12 +828,14 @@ int f2fs_gc(struct f2fs_sb_info *sbi)
 
        cpc.reason = __get_cp_reason(sbi);
 gc_more:
+       segno = NULL_SEGNO;
+
        if (unlikely(!(sbi->sb->s_flags & MS_ACTIVE)))
                goto stop;
        if (unlikely(f2fs_cp_error(sbi)))
                goto stop;
 
-       if (gc_type == BG_GC && has_not_enough_free_secs(sbi, nfree)) {
+       if (gc_type == BG_GC && has_not_enough_free_secs(sbi, sec_freed)) {
                gc_type = FG_GC;
                if (__get_victim(sbi, &segno, gc_type) || prefree_segments(sbi))
                        write_checkpoint(sbi, &cpc);
@@ -830,23 +848,38 @@ gc_more:
        /* readahead multi ssa blocks those have contiguous address */
        if (sbi->segs_per_sec > 1)
                ra_meta_pages(sbi, GET_SUM_BLOCK(sbi, segno), sbi->segs_per_sec,
-                                                               META_SSA);
+                                                       META_SSA, true);
 
-       for (i = 0; i < sbi->segs_per_sec; i++)
-               nfree += do_garbage_collect(sbi, segno + i, &gc_list, gc_type);
+       for (i = 0; i < sbi->segs_per_sec; i++) {
+               /*
+                * for FG_GC case, halt gcing left segments once failed one
+                * of segments in selected section to avoid long latency.
+                */
+               if (!do_garbage_collect(sbi, segno + i, &gc_list, gc_type) &&
+                               gc_type == FG_GC)
+                       break;
+       }
+
+       if (i == sbi->segs_per_sec && gc_type == FG_GC)
+               sec_freed++;
 
        if (gc_type == FG_GC)
                sbi->cur_victim_sec = NULL_SEGNO;
 
-       if (has_not_enough_free_secs(sbi, nfree))
-               goto gc_more;
+       if (!sync) {
+               if (has_not_enough_free_secs(sbi, sec_freed))
+                       goto gc_more;
 
-       if (gc_type == FG_GC)
-               write_checkpoint(sbi, &cpc);
+               if (gc_type == FG_GC)
+                       write_checkpoint(sbi, &cpc);
+       }
 stop:
        mutex_unlock(&sbi->gc_mutex);
 
        put_gc_inode(&gc_list);
+
+       if (sync)
+               ret = sec_freed ? 0 : -EAGAIN;
        return ret;
 }
 
index c5a055b..b4a65be 100644 (file)
 #define LIMIT_INVALID_BLOCK    40 /* percentage over total user space */
 #define LIMIT_FREE_BLOCK       40 /* percentage over invalid + free space */
 
-/*
- * with this macro, we can control the max time we do garbage collection,
- * when user triggers batch mode gc by ioctl.
- */
-#define F2FS_BATCH_GC_MAX_NUM          16
-
 /* Search max. number of dirty segments to select a victim segment */
 #define DEF_MAX_VICTIM_SEARCH 4096 /* covers 8GB */
 
index 3d143be..bda7126 100644 (file)
@@ -12,6 +12,7 @@
 #include <linux/f2fs_fs.h>
 
 #include "f2fs.h"
+#include "node.h"
 
 bool f2fs_may_inline_data(struct inode *inode)
 {
@@ -274,12 +275,14 @@ process_inline:
        if (f2fs_has_inline_data(inode)) {
                ipage = get_node_page(sbi, inode->i_ino);
                f2fs_bug_on(sbi, IS_ERR(ipage));
-               truncate_inline_inode(ipage, 0);
+               if (!truncate_inline_inode(ipage, 0))
+                       return false;
                f2fs_clear_inline_inode(inode);
                update_inode(inode, ipage);
                f2fs_put_page(ipage, 1);
        } else if (ri && (ri->i_inline & F2FS_INLINE_DATA)) {
-               truncate_blocks(inode, 0, false);
+               if (truncate_blocks(inode, 0, false))
+                       return false;
                goto process_inline;
        }
        return false;
@@ -568,3 +571,38 @@ int f2fs_read_inline_dir(struct file *file, struct dir_context *ctx,
        f2fs_put_page(ipage, 1);
        return 0;
 }
+
+int f2fs_inline_data_fiemap(struct inode *inode,
+               struct fiemap_extent_info *fieinfo, __u64 start, __u64 len)
+{
+       __u64 byteaddr, ilen;
+       __u32 flags = FIEMAP_EXTENT_DATA_INLINE | FIEMAP_EXTENT_NOT_ALIGNED |
+               FIEMAP_EXTENT_LAST;
+       struct node_info ni;
+       struct page *ipage;
+       int err = 0;
+
+       ipage = get_node_page(F2FS_I_SB(inode), inode->i_ino);
+       if (IS_ERR(ipage))
+               return PTR_ERR(ipage);
+
+       if (!f2fs_has_inline_data(inode)) {
+               err = -EAGAIN;
+               goto out;
+       }
+
+       ilen = min_t(size_t, MAX_INLINE_DATA, i_size_read(inode));
+       if (start >= ilen)
+               goto out;
+       if (start + len < ilen)
+               ilen = start + len;
+       ilen -= start;
+
+       get_node_info(F2FS_I_SB(inode), inode->i_ino, &ni);
+       byteaddr = (__u64)ni.blk_addr << inode->i_sb->s_blocksize_bits;
+       byteaddr += (char *)inline_data_addr(ipage) - (char *)F2FS_INODE(ipage);
+       err = fiemap_fill_next_extent(fieinfo, start, byteaddr, ilen, flags);
+out:
+       f2fs_put_page(ipage, 1);
+       return err;
+}
index 35aae65..97e20de 100644 (file)
@@ -296,16 +296,12 @@ int f2fs_write_inode(struct inode *inode, struct writeback_control *wbc)
                return 0;
 
        /*
-        * We need to lock here to prevent from producing dirty node pages
+        * We need to balance fs here to prevent from producing dirty node pages
         * during the urgent cleaning time when runing out of free sections.
         */
-       f2fs_lock_op(sbi);
        update_inode_page(inode);
-       f2fs_unlock_op(sbi);
-
-       if (wbc)
-               f2fs_balance_fs(sbi);
 
+       f2fs_balance_fs(sbi);
        return 0;
 }
 
index a680bf3..e48b80c 100644 (file)
@@ -410,11 +410,14 @@ err_out:
         * If the symlink path is stored into inline_data, there is no
         * performance regression.
         */
-       if (!err)
+       if (!err) {
                filemap_write_and_wait_range(inode->i_mapping, 0, p_len - 1);
 
-       if (IS_DIRSYNC(dir))
-               f2fs_sync_fs(sbi->sb, 1);
+               if (IS_DIRSYNC(dir))
+                       f2fs_sync_fs(sbi->sb, 1);
+       } else {
+               f2fs_unlink(dir, dentry);
+       }
 
        kfree(sd);
        f2fs_fname_crypto_free_buffer(&disk_link);
@@ -947,8 +950,13 @@ static const char *f2fs_encrypted_follow_link(struct dentry *dentry, void **cook
 
        /* Symlink is encrypted */
        sd = (struct f2fs_encrypted_symlink_data *)caddr;
-       cstr.name = sd->encrypted_path;
        cstr.len = le16_to_cpu(sd->len);
+       cstr.name = kmalloc(cstr.len, GFP_NOFS);
+       if (!cstr.name) {
+               res = -ENOMEM;
+               goto errout;
+       }
+       memcpy(cstr.name, sd->encrypted_path, cstr.len);
 
        /* this is broken symlink case */
        if (cstr.name[0] == 0 && cstr.len == 0) {
@@ -970,6 +978,8 @@ static const char *f2fs_encrypted_follow_link(struct dentry *dentry, void **cook
        if (res < 0)
                goto errout;
 
+       kfree(cstr.name);
+
        paddr = pstr.name;
 
        /* Null-terminate the name */
@@ -979,6 +989,7 @@ static const char *f2fs_encrypted_follow_link(struct dentry *dentry, void **cook
        page_cache_release(cpage);
        return *cookie = paddr;
 errout:
+       kfree(cstr.name);
        f2fs_fname_crypto_free_buffer(&pstr);
        kunmap(cpage);
        page_cache_release(cpage);
index 27d1a74..7bcbc6e 100644 (file)
@@ -1323,23 +1323,24 @@ static int f2fs_write_node_page(struct page *page,
        nid = nid_of_node(page);
        f2fs_bug_on(sbi, page->index != nid);
 
+       if (wbc->for_reclaim) {
+               if (!down_read_trylock(&sbi->node_write))
+                       goto redirty_out;
+       } else {
+               down_read(&sbi->node_write);
+       }
+
        get_node_info(sbi, nid, &ni);
 
        /* This page is already truncated */
        if (unlikely(ni.blk_addr == NULL_ADDR)) {
                ClearPageUptodate(page);
                dec_page_count(sbi, F2FS_DIRTY_NODES);
+               up_read(&sbi->node_write);
                unlock_page(page);
                return 0;
        }
 
-       if (wbc->for_reclaim) {
-               if (!down_read_trylock(&sbi->node_write))
-                       goto redirty_out;
-       } else {
-               down_read(&sbi->node_write);
-       }
-
        set_page_writeback(page);
        fio.blk_addr = ni.blk_addr;
        write_node_page(nid, &fio);
@@ -1528,7 +1529,8 @@ static void build_free_nids(struct f2fs_sb_info *sbi)
                return;
 
        /* readahead nat pages to be scanned */
-       ra_meta_pages(sbi, NAT_BLOCK_OFFSET(nid), FREE_NID_PAGES, META_NAT);
+       ra_meta_pages(sbi, NAT_BLOCK_OFFSET(nid), FREE_NID_PAGES,
+                                                       META_NAT, true);
 
        while (1) {
                struct page *page = get_current_nat_page(sbi, nid);
@@ -1558,6 +1560,9 @@ static void build_free_nids(struct f2fs_sb_info *sbi)
                        remove_free_nid(nm_i, nid);
        }
        mutex_unlock(&curseg->curseg_mutex);
+
+       ra_meta_pages(sbi, NAT_BLOCK_OFFSET(nm_i->next_scan_nid),
+                                       nm_i->ra_nid_pages, META_NAT, false);
 }
 
 /*
@@ -1803,10 +1808,10 @@ int restore_node_summary(struct f2fs_sb_info *sbi,
                nrpages = min(last_offset - i, bio_blocks);
 
                /* readahead node pages */
-               ra_meta_pages(sbi, addr, nrpages, META_POR);
+               ra_meta_pages(sbi, addr, nrpages, META_POR, true);
 
                for (idx = addr; idx < addr + nrpages; idx++) {
-                       struct page *page = get_meta_page(sbi, idx);
+                       struct page *page = get_tmp_page(sbi, idx);
 
                        rn = F2FS_NODE(page);
                        sum_entry->nid = rn->footer.nid;
@@ -2000,6 +2005,7 @@ static int init_node_manager(struct f2fs_sb_info *sbi)
        nm_i->fcnt = 0;
        nm_i->nat_cnt = 0;
        nm_i->ram_thresh = DEF_RAM_THRESHOLD;
+       nm_i->ra_nid_pages = DEF_RA_NID_PAGES;
 
        INIT_RADIX_TREE(&nm_i->free_nid_root, GFP_ATOMIC);
        INIT_LIST_HEAD(&nm_i->free_nid_list);
index 7427e95..e4fffd2 100644 (file)
 /* node block offset on the NAT area dedicated to the given start node id */
 #define        NAT_BLOCK_OFFSET(start_nid) (start_nid / NAT_ENTRY_PER_BLOCK)
 
-/* # of pages to perform readahead before building free nids */
+/* # of pages to perform synchronous readahead before building free nids */
 #define FREE_NID_PAGES 4
 
+#define DEF_RA_NID_PAGES       4       /* # of nid pages to be readaheaded */
+
 /* maximum readahead size for node during getting data blocks */
 #define MAX_RA_NODE            128
 
index faec2ca..cbf74f4 100644 (file)
@@ -180,7 +180,7 @@ static int find_fsync_dnodes(struct f2fs_sb_info *sbi, struct list_head *head)
        curseg = CURSEG_I(sbi, CURSEG_WARM_NODE);
        blkaddr = NEXT_FREE_BLKADDR(sbi, curseg);
 
-       ra_meta_pages(sbi, blkaddr, 1, META_POR);
+       ra_meta_pages(sbi, blkaddr, 1, META_POR, true);
 
        while (1) {
                struct fsync_inode_entry *entry;
@@ -188,7 +188,7 @@ static int find_fsync_dnodes(struct f2fs_sb_info *sbi, struct list_head *head)
                if (!is_valid_blkaddr(sbi, blkaddr, META_POR))
                        return 0;
 
-               page = get_meta_page(sbi, blkaddr);
+               page = get_tmp_page(sbi, blkaddr);
 
                if (cp_ver != cpver_of_node(page))
                        break;
@@ -383,15 +383,11 @@ static int do_recover_data(struct f2fs_sb_info *sbi, struct inode *inode,
        start = start_bidx_of_node(ofs_of_node(page), fi);
        end = start + ADDRS_PER_PAGE(page, fi);
 
-       f2fs_lock_op(sbi);
-
        set_new_dnode(&dn, inode, NULL, NULL, 0);
 
        err = get_dnode_of_data(&dn, start, ALLOC_NODE);
-       if (err) {
-               f2fs_unlock_op(sbi);
+       if (err)
                goto out;
-       }
 
        f2fs_wait_on_page_writeback(dn.node_page, NODE);
 
@@ -456,7 +452,6 @@ static int do_recover_data(struct f2fs_sb_info *sbi, struct inode *inode,
        set_page_dirty(dn.node_page);
 err:
        f2fs_put_dnode(&dn);
-       f2fs_unlock_op(sbi);
 out:
        f2fs_msg(sbi->sb, KERN_NOTICE,
                "recover_data: ino = %lx, recovered = %d blocks, err = %d",
@@ -485,7 +480,7 @@ static int recover_data(struct f2fs_sb_info *sbi,
 
                ra_meta_pages_cond(sbi, blkaddr);
 
-               page = get_meta_page(sbi, blkaddr);
+               page = get_tmp_page(sbi, blkaddr);
 
                if (cp_ver != cpver_of_node(page)) {
                        f2fs_put_page(page, 1);
@@ -570,7 +565,7 @@ out:
 
        /* truncate meta pages to be used by the recovery */
        truncate_inode_pages_range(META_MAPPING(sbi),
-                       MAIN_BLKADDR(sbi) << PAGE_CACHE_SHIFT, -1);
+                       (loff_t)MAIN_BLKADDR(sbi) << PAGE_CACHE_SHIFT, -1);
 
        if (err) {
                truncate_inode_pages_final(NODE_MAPPING(sbi));
index 78e6d06..f77b325 100644 (file)
@@ -14,8 +14,8 @@
 #include <linux/blkdev.h>
 #include <linux/prefetch.h>
 #include <linux/kthread.h>
-#include <linux/vmalloc.h>
 #include <linux/swap.h>
+#include <linux/timer.h>
 
 #include "f2fs.h"
 #include "segment.h"
@@ -29,6 +29,21 @@ static struct kmem_cache *discard_entry_slab;
 static struct kmem_cache *sit_entry_set_slab;
 static struct kmem_cache *inmem_entry_slab;
 
+static unsigned long __reverse_ulong(unsigned char *str)
+{
+       unsigned long tmp = 0;
+       int shift = 24, idx = 0;
+
+#if BITS_PER_LONG == 64
+       shift = 56;
+#endif
+       while (shift >= 0) {
+               tmp |= (unsigned long)str[idx++] << shift;
+               shift -= BITS_PER_BYTE;
+       }
+       return tmp;
+}
+
 /*
  * __reverse_ffs is copied from include/asm-generic/bitops/__ffs.h since
  * MSB and LSB are reversed in a byte by f2fs_set_bit.
@@ -38,27 +53,31 @@ static inline unsigned long __reverse_ffs(unsigned long word)
        int num = 0;
 
 #if BITS_PER_LONG == 64
-       if ((word & 0xffffffff) == 0) {
+       if ((word & 0xffffffff00000000UL) == 0)
                num += 32;
+       else
                word >>= 32;
-       }
 #endif
-       if ((word & 0xffff) == 0) {
+       if ((word & 0xffff0000) == 0)
                num += 16;
+       else
                word >>= 16;
-       }
-       if ((word & 0xff) == 0) {
+
+       if ((word & 0xff00) == 0)
                num += 8;
+       else
                word >>= 8;
-       }
+
        if ((word & 0xf0) == 0)
                num += 4;
        else
                word >>= 4;
+
        if ((word & 0xc) == 0)
                num += 2;
        else
                word >>= 2;
+
        if ((word & 0x2) == 0)
                num += 1;
        return num;
@@ -68,26 +87,16 @@ static inline unsigned long __reverse_ffs(unsigned long word)
  * __find_rev_next(_zero)_bit is copied from lib/find_next_bit.c because
  * f2fs_set_bit makes MSB and LSB reversed in a byte.
  * Example:
- *                             LSB <--> MSB
- *   f2fs_set_bit(0, bitmap) => 0000 0001
- *   f2fs_set_bit(7, bitmap) => 1000 0000
+ *                             MSB <--> LSB
+ *   f2fs_set_bit(0, bitmap) => 1000 0000
+ *   f2fs_set_bit(7, bitmap) => 0000 0001
  */
 static unsigned long __find_rev_next_bit(const unsigned long *addr,
                        unsigned long size, unsigned long offset)
 {
-       while (!f2fs_test_bit(offset, (unsigned char *)addr))
-               offset++;
-
-       if (offset > size)
-               offset = size;
-
-       return offset;
-#if 0
        const unsigned long *p = addr + BIT_WORD(offset);
        unsigned long result = offset & ~(BITS_PER_LONG - 1);
        unsigned long tmp;
-       unsigned long mask, submask;
-       unsigned long quot, rest;
 
        if (offset >= size)
                return size;
@@ -97,14 +106,9 @@ static unsigned long __find_rev_next_bit(const unsigned long *addr,
        if (!offset)
                goto aligned;
 
-       tmp = *(p++);
-       quot = (offset >> 3) << 3;
-       rest = offset & 0x7;
-       mask = ~0UL << quot;
-       submask = (unsigned char)(0xff << rest) >> rest;
-       submask <<= quot;
-       mask &= submask;
-       tmp &= mask;
+       tmp = __reverse_ulong((unsigned char *)p);
+       tmp &= ~0UL >> offset;
+
        if (size < BITS_PER_LONG)
                goto found_first;
        if (tmp)
@@ -112,42 +116,34 @@ static unsigned long __find_rev_next_bit(const unsigned long *addr,
 
        size -= BITS_PER_LONG;
        result += BITS_PER_LONG;
+       p++;
 aligned:
        while (size & ~(BITS_PER_LONG-1)) {
-               tmp = *(p++);
+               tmp = __reverse_ulong((unsigned char *)p);
                if (tmp)
                        goto found_middle;
                result += BITS_PER_LONG;
                size -= BITS_PER_LONG;
+               p++;
        }
        if (!size)
                return result;
-       tmp = *p;
+
+       tmp = __reverse_ulong((unsigned char *)p);
 found_first:
-       tmp &= (~0UL >> (BITS_PER_LONG - size));
-       if (tmp == 0UL)         /* Are any bits set? */
+       tmp &= (~0UL << (BITS_PER_LONG - size));
+       if (!tmp)               /* Are any bits set? */
                return result + size;   /* Nope. */
 found_middle:
        return result + __reverse_ffs(tmp);
-#endif
 }
 
 static unsigned long __find_rev_next_zero_bit(const unsigned long *addr,
                        unsigned long size, unsigned long offset)
 {
-       while (f2fs_test_bit(offset, (unsigned char *)addr))
-               offset++;
-
-       if (offset > size)
-               offset = size;
-
-       return offset;
-#if 0
        const unsigned long *p = addr + BIT_WORD(offset);
        unsigned long result = offset & ~(BITS_PER_LONG - 1);
        unsigned long tmp;
-       unsigned long mask, submask;
-       unsigned long quot, rest;
 
        if (offset >= size)
                return size;
@@ -157,40 +153,36 @@ static unsigned long __find_rev_next_zero_bit(const unsigned long *addr,
        if (!offset)
                goto aligned;
 
-       tmp = *(p++);
-       quot = (offset >> 3) << 3;
-       rest = offset & 0x7;
-       mask = ~(~0UL << quot);
-       submask = (unsigned char)~((unsigned char)(0xff << rest) >> rest);
-       submask <<= quot;
-       mask += submask;
-       tmp |= mask;
+       tmp = __reverse_ulong((unsigned char *)p);
+       tmp |= ~((~0UL << offset) >> offset);
+
        if (size < BITS_PER_LONG)
                goto found_first;
-       if (~tmp)
+       if (tmp != ~0UL)
                goto found_middle;
 
        size -= BITS_PER_LONG;
        result += BITS_PER_LONG;
+       p++;
 aligned:
        while (size & ~(BITS_PER_LONG - 1)) {
-               tmp = *(p++);
-               if (~tmp)
+               tmp = __reverse_ulong((unsigned char *)p);
+               if (tmp != ~0UL)
                        goto found_middle;
                result += BITS_PER_LONG;
                size -= BITS_PER_LONG;
+               p++;
        }
        if (!size)
                return result;
-       tmp = *p;
 
+       tmp = __reverse_ulong((unsigned char *)p);
 found_first:
-       tmp |= ~0UL << size;
-       if (tmp == ~0UL)        /* Are any bits zero? */
+       tmp |= ~(~0UL << (BITS_PER_LONG - size));
+       if (tmp == ~0UL)        /* Are any bits zero? */
                return result + size;   /* Nope. */
 found_middle:
        return result + __reverse_ffz(tmp);
-#endif
 }
 
 void register_inmem_page(struct inode *inode, struct page *page)
@@ -257,11 +249,12 @@ int commit_inmem_pages(struct inode *inode, bool abort)
                                trace_f2fs_commit_inmem_page(cur->page, INMEM);
                                fio.page = cur->page;
                                err = do_write_data_page(&fio);
-                               submit_bio = true;
                                if (err) {
                                        unlock_page(cur->page);
                                        break;
                                }
+                               clear_cold_data(cur->page);
+                               submit_bio = true;
                        }
                } else {
                        trace_f2fs_commit_inmem_page(cur->page, INMEM_DROP);
@@ -296,7 +289,7 @@ void f2fs_balance_fs(struct f2fs_sb_info *sbi)
         */
        if (has_not_enough_free_secs(sbi, 0)) {
                mutex_lock(&sbi->gc_mutex);
-               f2fs_gc(sbi);
+               f2fs_gc(sbi, false);
        }
 }
 
@@ -316,7 +309,8 @@ void f2fs_balance_fs_bg(struct f2fs_sb_info *sbi)
        /* checkpoint is the only way to shrink partial cached entries */
        if (!available_free_memory(sbi, NAT_ENTRIES) ||
                        excess_prefree_segs(sbi) ||
-                       !available_free_memory(sbi, INO_ENTRIES))
+                       !available_free_memory(sbi, INO_ENTRIES) ||
+                       jiffies > sbi->cp_expires)
                f2fs_sync_fs(sbi->sb, true);
 }
 
@@ -767,6 +761,30 @@ void invalidate_blocks(struct f2fs_sb_info *sbi, block_t addr)
        mutex_unlock(&sit_i->sentry_lock);
 }
 
+bool is_checkpointed_data(struct f2fs_sb_info *sbi, block_t blkaddr)
+{
+       struct sit_info *sit_i = SIT_I(sbi);
+       unsigned int segno, offset;
+       struct seg_entry *se;
+       bool is_cp = false;
+
+       if (blkaddr == NEW_ADDR || blkaddr == NULL_ADDR)
+               return true;
+
+       mutex_lock(&sit_i->sentry_lock);
+
+       segno = GET_SEGNO(sbi, blkaddr);
+       se = get_seg_entry(sbi, segno);
+       offset = GET_BLKOFF_FROM_SEG0(sbi, blkaddr);
+
+       if (f2fs_test_bit(offset, se->ckpt_valid_map))
+               is_cp = true;
+
+       mutex_unlock(&sit_i->sentry_lock);
+
+       return is_cp;
+}
+
 /*
  * This function should be resided under the curseg_mutex lock
  */
@@ -1292,6 +1310,9 @@ void write_meta_page(struct f2fs_sb_info *sbi, struct page *page)
                .encrypted_page = NULL,
        };
 
+       if (unlikely(page->index >= MAIN_BLKADDR(sbi)))
+               fio.rw &= ~REQ_META;
+
        set_page_writeback(page);
        f2fs_submit_page_mbio(&fio);
 }
@@ -1369,7 +1390,14 @@ static void __f2fs_replace_block(struct f2fs_sb_info *sbi,
        curseg->next_blkoff = GET_BLKOFF_FROM_SEG0(sbi, new_blkaddr);
        __add_sum_entry(sbi, type, sum);
 
-       refresh_sit_entry(sbi, old_blkaddr, new_blkaddr);
+       if (!recover_curseg)
+               update_sit_entry(sbi, new_blkaddr, 1);
+       if (GET_SEGNO(sbi, old_blkaddr) != NULL_SEGNO)
+               update_sit_entry(sbi, old_blkaddr, -1);
+
+       locate_dirty_segment(sbi, GET_SEGNO(sbi, old_blkaddr));
+       locate_dirty_segment(sbi, GET_SEGNO(sbi, new_blkaddr));
+
        locate_dirty_segment(sbi, old_cursegno);
 
        if (recover_curseg) {
@@ -1449,6 +1477,23 @@ void f2fs_wait_on_page_writeback(struct page *page,
        }
 }
 
+void f2fs_wait_on_encrypted_page_writeback(struct f2fs_sb_info *sbi,
+                                                       block_t blkaddr)
+{
+       struct page *cpage;
+
+       if (blkaddr == NEW_ADDR)
+               return;
+
+       f2fs_bug_on(sbi, blkaddr == NULL_ADDR);
+
+       cpage = find_lock_page(META_MAPPING(sbi), blkaddr);
+       if (cpage) {
+               f2fs_wait_on_page_writeback(cpage, DATA);
+               f2fs_put_page(cpage, 1);
+       }
+}
+
 static int read_compacted_summaries(struct f2fs_sb_info *sbi)
 {
        struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
@@ -1586,7 +1631,7 @@ static int restore_curseg_summaries(struct f2fs_sb_info *sbi)
 
                if (npages >= 2)
                        ra_meta_pages(sbi, start_sum_block(sbi), npages,
-                                                               META_CP);
+                                                       META_CP, true);
 
                /* restore for compacted data summary */
                if (read_compacted_summaries(sbi))
@@ -1596,7 +1641,7 @@ static int restore_curseg_summaries(struct f2fs_sb_info *sbi)
 
        if (__exist_node_summaries(sbi))
                ra_meta_pages(sbi, sum_blk_addr(sbi, NR_CURSEG_TYPE, type),
-                                       NR_CURSEG_TYPE - type, META_CP);
+                                       NR_CURSEG_TYPE - type, META_CP, true);
 
        for (; type <= CURSEG_COLD_NODE; type++) {
                err = read_normal_summaries(sbi, type);
@@ -1955,12 +2000,13 @@ static int build_sit_info(struct f2fs_sb_info *sbi)
 
        SM_I(sbi)->sit_info = sit_i;
 
-       sit_i->sentries = vzalloc(MAIN_SEGS(sbi) * sizeof(struct seg_entry));
+       sit_i->sentries = f2fs_kvzalloc(MAIN_SEGS(sbi) *
+                                       sizeof(struct seg_entry), GFP_KERNEL);
        if (!sit_i->sentries)
                return -ENOMEM;
 
        bitmap_size = f2fs_bitmap_size(MAIN_SEGS(sbi));
-       sit_i->dirty_sentries_bitmap = kzalloc(bitmap_size, GFP_KERNEL);
+       sit_i->dirty_sentries_bitmap = f2fs_kvzalloc(bitmap_size, GFP_KERNEL);
        if (!sit_i->dirty_sentries_bitmap)
                return -ENOMEM;
 
@@ -1982,8 +2028,8 @@ static int build_sit_info(struct f2fs_sb_info *sbi)
                return -ENOMEM;
 
        if (sbi->segs_per_sec > 1) {
-               sit_i->sec_entries = vzalloc(MAIN_SECS(sbi) *
-                                       sizeof(struct sec_entry));
+               sit_i->sec_entries = f2fs_kvzalloc(MAIN_SECS(sbi) *
+                                       sizeof(struct sec_entry), GFP_KERNEL);
                if (!sit_i->sec_entries)
                        return -ENOMEM;
        }
@@ -2028,12 +2074,12 @@ static int build_free_segmap(struct f2fs_sb_info *sbi)
        SM_I(sbi)->free_info = free_i;
 
        bitmap_size = f2fs_bitmap_size(MAIN_SEGS(sbi));
-       free_i->free_segmap = kmalloc(bitmap_size, GFP_KERNEL);
+       free_i->free_segmap = f2fs_kvmalloc(bitmap_size, GFP_KERNEL);
        if (!free_i->free_segmap)
                return -ENOMEM;
 
        sec_bitmap_size = f2fs_bitmap_size(MAIN_SECS(sbi));
-       free_i->free_secmap = kmalloc(sec_bitmap_size, GFP_KERNEL);
+       free_i->free_secmap = f2fs_kvmalloc(sec_bitmap_size, GFP_KERNEL);
        if (!free_i->free_secmap)
                return -ENOMEM;
 
@@ -2082,7 +2128,7 @@ static void build_sit_entries(struct f2fs_sb_info *sbi)
        int nrpages = MAX_BIO_BLOCKS(sbi);
 
        do {
-               readed = ra_meta_pages(sbi, start_blk, nrpages, META_SIT);
+               readed = ra_meta_pages(sbi, start_blk, nrpages, META_SIT, true);
 
                start = start_blk * sit_i->sents_per_block;
                end = (start_blk + readed) * sit_i->sents_per_block;
@@ -2174,7 +2220,7 @@ static int init_victim_secmap(struct f2fs_sb_info *sbi)
        struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
        unsigned int bitmap_size = f2fs_bitmap_size(MAIN_SECS(sbi));
 
-       dirty_i->victim_secmap = kzalloc(bitmap_size, GFP_KERNEL);
+       dirty_i->victim_secmap = f2fs_kvzalloc(bitmap_size, GFP_KERNEL);
        if (!dirty_i->victim_secmap)
                return -ENOMEM;
        return 0;
@@ -2196,7 +2242,7 @@ static int build_dirty_segmap(struct f2fs_sb_info *sbi)
        bitmap_size = f2fs_bitmap_size(MAIN_SEGS(sbi));
 
        for (i = 0; i < NR_DIRTY_TYPE; i++) {
-               dirty_i->dirty_segmap[i] = kzalloc(bitmap_size, GFP_KERNEL);
+               dirty_i->dirty_segmap[i] = f2fs_kvzalloc(bitmap_size, GFP_KERNEL);
                if (!dirty_i->dirty_segmap[i])
                        return -ENOMEM;
        }
@@ -2301,7 +2347,7 @@ static void discard_dirty_segmap(struct f2fs_sb_info *sbi,
        struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
 
        mutex_lock(&dirty_i->seglist_lock);
-       kfree(dirty_i->dirty_segmap[dirty_type]);
+       kvfree(dirty_i->dirty_segmap[dirty_type]);
        dirty_i->nr_dirty[dirty_type] = 0;
        mutex_unlock(&dirty_i->seglist_lock);
 }
@@ -2309,7 +2355,7 @@ static void discard_dirty_segmap(struct f2fs_sb_info *sbi,
 static void destroy_victim_secmap(struct f2fs_sb_info *sbi)
 {
        struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
-       kfree(dirty_i->victim_secmap);
+       kvfree(dirty_i->victim_secmap);
 }
 
 static void destroy_dirty_segmap(struct f2fs_sb_info *sbi)
@@ -2348,8 +2394,8 @@ static void destroy_free_segmap(struct f2fs_sb_info *sbi)
        if (!free_i)
                return;
        SM_I(sbi)->free_info = NULL;
-       kfree(free_i->free_segmap);
-       kfree(free_i->free_secmap);
+       kvfree(free_i->free_segmap);
+       kvfree(free_i->free_secmap);
        kfree(free_i);
 }
 
@@ -2370,9 +2416,9 @@ static void destroy_sit_info(struct f2fs_sb_info *sbi)
        }
        kfree(sit_i->tmp_map);
 
-       vfree(sit_i->sentries);
-       vfree(sit_i->sec_entries);
-       kfree(sit_i->dirty_sentries_bitmap);
+       kvfree(sit_i->sentries);
+       kvfree(sit_i->sec_entries);
+       kvfree(sit_i->dirty_sentries_bitmap);
 
        SM_I(sbi)->sit_info = NULL;
        kfree(sit_i->sit_bitmap);
index b6e4ed1..ee44d34 100644 (file)
@@ -137,10 +137,12 @@ enum {
 /*
  * BG_GC means the background cleaning job.
  * FG_GC means the on-demand cleaning job.
+ * FORCE_FG_GC means on-demand cleaning job in background.
  */
 enum {
        BG_GC = 0,
-       FG_GC
+       FG_GC,
+       FORCE_FG_GC,
 };
 
 /* for a function parameter to select a victim segment */
index f794781..3a65e01 100644 (file)
@@ -213,8 +213,10 @@ F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, ipu_policy, ipu_policy);
 F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, min_ipu_util, min_ipu_util);
 F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, min_fsync_blocks, min_fsync_blocks);
 F2FS_RW_ATTR(NM_INFO, f2fs_nm_info, ram_thresh, ram_thresh);
+F2FS_RW_ATTR(NM_INFO, f2fs_nm_info, ra_nid_pages, ra_nid_pages);
 F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, max_victim_search, max_victim_search);
 F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, dir_level, dir_level);
+F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, cp_interval, cp_interval);
 
 #define ATTR_LIST(name) (&f2fs_attr_##name.attr)
 static struct attribute *f2fs_attrs[] = {
@@ -231,6 +233,8 @@ static struct attribute *f2fs_attrs[] = {
        ATTR_LIST(max_victim_search),
        ATTR_LIST(dir_level),
        ATTR_LIST(ram_thresh),
+       ATTR_LIST(ra_nid_pages),
+       ATTR_LIST(cp_interval),
        NULL,
 };
 
@@ -292,11 +296,16 @@ static int parse_options(struct super_block *sb, char *options)
 
                        if (!name)
                                return -ENOMEM;
-                       if (strlen(name) == 2 && !strncmp(name, "on", 2))
+                       if (strlen(name) == 2 && !strncmp(name, "on", 2)) {
                                set_opt(sbi, BG_GC);
-                       else if (strlen(name) == 3 && !strncmp(name, "off", 3))
+                               clear_opt(sbi, FORCE_FG_GC);
+                       } else if (strlen(name) == 3 && !strncmp(name, "off", 3)) {
                                clear_opt(sbi, BG_GC);
-                       else {
+                               clear_opt(sbi, FORCE_FG_GC);
+                       } else if (strlen(name) == 4 && !strncmp(name, "sync", 4)) {
+                               set_opt(sbi, BG_GC);
+                               set_opt(sbi, FORCE_FG_GC);
+                       } else {
                                kfree(name);
                                return -EINVAL;
                        }
@@ -631,10 +640,14 @@ static int f2fs_show_options(struct seq_file *seq, struct dentry *root)
 {
        struct f2fs_sb_info *sbi = F2FS_SB(root->d_sb);
 
-       if (!f2fs_readonly(sbi->sb) && test_opt(sbi, BG_GC))
-               seq_printf(seq, ",background_gc=%s", "on");
-       else
+       if (!f2fs_readonly(sbi->sb) && test_opt(sbi, BG_GC)) {
+               if (test_opt(sbi, FORCE_FG_GC))
+                       seq_printf(seq, ",background_gc=%s", "sync");
+               else
+                       seq_printf(seq, ",background_gc=%s", "on");
+       } else {
                seq_printf(seq, ",background_gc=%s", "off");
+       }
        if (test_opt(sbi, DISABLE_ROLL_FORWARD))
                seq_puts(seq, ",disable_roll_forward");
        if (test_opt(sbi, DISCARD))
@@ -742,6 +755,7 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data)
        int err, active_logs;
        bool need_restart_gc = false;
        bool need_stop_gc = false;
+       bool no_extent_cache = !test_opt(sbi, EXTENT_CACHE);
 
        sync_filesystem(sb);
 
@@ -767,6 +781,14 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data)
        if (f2fs_readonly(sb) && (*flags & MS_RDONLY))
                goto skip;
 
+       /* disallow enable/disable extent_cache dynamically */
+       if (no_extent_cache == !!test_opt(sbi, EXTENT_CACHE)) {
+               err = -EINVAL;
+               f2fs_msg(sbi->sb, KERN_WARNING,
+                               "switch extent_cache option is not allowed");
+               goto restore_opts;
+       }
+
        /*
         * We stop the GC thread if FS is mounted as RO
         * or if background_gc = off is passed in mount
@@ -996,6 +1018,7 @@ static void init_sb_info(struct f2fs_sb_info *sbi)
                atomic_set(&sbi->nr_pages[i], 0);
 
        sbi->dir_level = DEF_DIR_LEVEL;
+       sbi->cp_interval = DEF_CP_INTERVAL;
        clear_sbi_flag(sbi, SBI_NEED_FSCK);
 
        INIT_LIST_HEAD(&sbi->s_list);
@@ -1332,6 +1355,8 @@ try_onemore:
                f2fs_commit_super(sbi, true);
        }
 
+       sbi->cp_expires = round_jiffies_up(jiffies);
+
        return 0;
 
 free_kobj:
index a019465..00b4a63 100644 (file)
@@ -514,6 +514,34 @@ TRACE_EVENT(f2fs_map_blocks,
                __entry->ret)
 );
 
+TRACE_EVENT(f2fs_background_gc,
+
+       TP_PROTO(struct super_block *sb, long wait_ms,
+                       unsigned int prefree, unsigned int free),
+
+       TP_ARGS(sb, wait_ms, prefree, free),
+
+       TP_STRUCT__entry(
+               __field(dev_t,  dev)
+               __field(long,   wait_ms)
+               __field(unsigned int,   prefree)
+               __field(unsigned int,   free)
+       ),
+
+       TP_fast_assign(
+               __entry->dev            = sb->s_dev;
+               __entry->wait_ms        = wait_ms;
+               __entry->prefree        = prefree;
+               __entry->free           = free;
+       ),
+
+       TP_printk("dev = (%d,%d), wait_ms = %ld, prefree = %u, free = %u",
+               show_dev(__entry),
+               __entry->wait_ms,
+               __entry->prefree,
+               __entry->free)
+);
+
 TRACE_EVENT(f2fs_get_victim,
 
        TP_PROTO(struct super_block *sb, int type, int gc_type,
@@ -1000,6 +1028,32 @@ TRACE_EVENT(f2fs_writepages,
                __entry->for_sync)
 );
 
+TRACE_EVENT(f2fs_readpages,
+
+       TP_PROTO(struct inode *inode, struct page *page, unsigned int nrpage),
+
+       TP_ARGS(inode, page, nrpage),
+
+       TP_STRUCT__entry(
+               __field(dev_t,  dev)
+               __field(ino_t,  ino)
+               __field(pgoff_t,        start)
+               __field(unsigned int,   nrpage)
+       ),
+
+       TP_fast_assign(
+               __entry->dev    = inode->i_sb->s_dev;
+               __entry->ino    = inode->i_ino;
+               __entry->start  = page->index;
+               __entry->nrpage = nrpage;
+       ),
+
+       TP_printk("dev = (%d,%d), ino = %lu, start = %lu nrpage = %u",
+               show_dev_ino(__entry),
+               (unsigned long)__entry->start,
+               __entry->nrpage)
+);
+
 TRACE_EVENT(f2fs_write_checkpoint,
 
        TP_PROTO(struct super_block *sb, int reason, char *msg),
@@ -1132,17 +1186,19 @@ TRACE_EVENT_CONDITION(f2fs_lookup_extent_tree_end,
                __entry->len)
 );
 
-TRACE_EVENT(f2fs_update_extent_tree,
+TRACE_EVENT(f2fs_update_extent_tree_range,
 
-       TP_PROTO(struct inode *inode, unsigned int pgofs, block_t blkaddr),
+       TP_PROTO(struct inode *inode, unsigned int pgofs, block_t blkaddr,
+                                               unsigned int len),
 
-       TP_ARGS(inode, pgofs, blkaddr),
+       TP_ARGS(inode, pgofs, blkaddr, len),
 
        TP_STRUCT__entry(
                __field(dev_t,  dev)
                __field(ino_t,  ino)
                __field(unsigned int, pgofs)
                __field(u32, blk)
+               __field(unsigned int, len)
        ),
 
        TP_fast_assign(
@@ -1150,12 +1206,15 @@ TRACE_EVENT(f2fs_update_extent_tree,
                __entry->ino = inode->i_ino;
                __entry->pgofs = pgofs;
                __entry->blk = blkaddr;
+               __entry->len = len;
        ),
 
-       TP_printk("dev = (%d,%d), ino = %lu, pgofs = %u, blkaddr = %u",
+       TP_printk("dev = (%d,%d), ino = %lu, pgofs = %u, "
+                                       "blkaddr = %u, len = %u",
                show_dev_ino(__entry),
                __entry->pgofs,
-               __entry->blk)
+               __entry->blk,
+               __entry->len)
 );
 
 TRACE_EVENT(f2fs_shrink_extent_tree,