]> git.kernelconcepts.de Git - karo-tx-linux.git/blobdiff - fs/f2fs/node.c
f2fs: combine nat_bits and free_nid_bitmap cache
[karo-tx-linux.git] / fs / f2fs / node.c
index 8ebc4c78e6a4882b95304eaecaf433af56e431fb..481aa8dc79f46f4c156cf67cca665e8160e36e6a 100644 (file)
@@ -1762,7 +1762,8 @@ static void __remove_nid_from_list(struct f2fs_sb_info *sbi,
                radix_tree_delete(&nm_i->free_nid_root, i->nid);
 }
 
-static int add_free_nid(struct f2fs_sb_info *sbi, nid_t nid, bool build)
+/* return if the nid is recognized as free */
+static bool add_free_nid(struct f2fs_sb_info *sbi, nid_t nid, bool build)
 {
        struct f2fs_nm_info *nm_i = NM_I(sbi);
        struct free_nid *i;
@@ -1771,14 +1772,14 @@ static int add_free_nid(struct f2fs_sb_info *sbi, nid_t nid, bool build)
 
        /* 0 nid should not be used */
        if (unlikely(nid == 0))
-               return 0;
+               return false;
 
        if (build) {
                /* do not add allocated nids */
                ne = __lookup_nat_cache(nm_i, nid);
                if (ne && (!get_nat_flag(ne, IS_CHECKPOINTED) ||
                                nat_get_blkaddr(ne) != NULL_ADDR))
-                       return 0;
+                       return false;
        }
 
        i = f2fs_kmem_cache_alloc(free_nid_slab, GFP_NOFS);
@@ -1787,7 +1788,7 @@ static int add_free_nid(struct f2fs_sb_info *sbi, nid_t nid, bool build)
 
        if (radix_tree_preload(GFP_NOFS)) {
                kmem_cache_free(free_nid_slab, i);
-               return 0;
+               return true;
        }
 
        spin_lock(&nm_i->nid_list_lock);
@@ -1796,9 +1797,9 @@ static int add_free_nid(struct f2fs_sb_info *sbi, nid_t nid, bool build)
        radix_tree_preload_end();
        if (err) {
                kmem_cache_free(free_nid_slab, i);
-               return 0;
+               return true;
        }
-       return 1;
+       return true;
 }
 
 static void remove_free_nid(struct f2fs_sb_info *sbi, nid_t nid)
@@ -1819,17 +1820,49 @@ static void remove_free_nid(struct f2fs_sb_info *sbi, nid_t nid)
                kmem_cache_free(free_nid_slab, i);
 }
 
+static void update_free_nid_bitmap(struct f2fs_sb_info *sbi, nid_t nid,
+                       bool set, bool build, bool locked)
+{
+       struct f2fs_nm_info *nm_i = NM_I(sbi);
+       unsigned int nat_ofs = NAT_BLOCK_OFFSET(nid);
+       unsigned int nid_ofs = nid - START_NID(nid);
+
+       if (!test_bit_le(nat_ofs, nm_i->nat_block_bitmap))
+               return;
+
+       if (set)
+               __set_bit_le(nid_ofs, nm_i->free_nid_bitmap[nat_ofs]);
+       else
+               __clear_bit_le(nid_ofs, nm_i->free_nid_bitmap[nat_ofs]);
+
+       if (!locked)
+               spin_lock(&nm_i->free_nid_lock);
+       if (set)
+               nm_i->free_nid_count[nat_ofs]++;
+       else if (!build)
+               nm_i->free_nid_count[nat_ofs]--;
+       if (!locked)
+               spin_unlock(&nm_i->free_nid_lock);
+}
+
 static void scan_nat_page(struct f2fs_sb_info *sbi,
                        struct page *nat_page, nid_t start_nid)
 {
        struct f2fs_nm_info *nm_i = NM_I(sbi);
        struct f2fs_nat_block *nat_blk = page_address(nat_page);
        block_t blk_addr;
+       unsigned int nat_ofs = NAT_BLOCK_OFFSET(start_nid);
        int i;
 
+       if (test_bit_le(nat_ofs, nm_i->nat_block_bitmap))
+               return;
+
+       __set_bit_le(nat_ofs, nm_i->nat_block_bitmap);
+
        i = start_nid % NAT_ENTRY_PER_BLOCK;
 
        for (; i < NAT_ENTRY_PER_BLOCK; i++, start_nid++) {
+               bool freed = false;
 
                if (unlikely(start_nid >= nm_i->max_nid))
                        break;
@@ -1837,11 +1870,56 @@ static void scan_nat_page(struct f2fs_sb_info *sbi,
                blk_addr = le32_to_cpu(nat_blk->entries[i].block_addr);
                f2fs_bug_on(sbi, blk_addr == NEW_ADDR);
                if (blk_addr == NULL_ADDR)
-                       add_free_nid(sbi, start_nid, true);
+                       freed = add_free_nid(sbi, start_nid, true);
+               update_free_nid_bitmap(sbi, start_nid, freed, true, false);
        }
 }
 
-static void __build_free_nids(struct f2fs_sb_info *sbi, bool sync)
+static void scan_free_nid_bits(struct f2fs_sb_info *sbi)
+{
+       struct f2fs_nm_info *nm_i = NM_I(sbi);
+       struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA);
+       struct f2fs_journal *journal = curseg->journal;
+       unsigned int i, idx;
+
+       down_read(&nm_i->nat_tree_lock);
+
+       for (i = 0; i < nm_i->nat_blocks; i++) {
+               if (!test_bit_le(i, nm_i->nat_block_bitmap))
+                       continue;
+               if (!nm_i->free_nid_count[i])
+                       continue;
+               for (idx = 0; idx < NAT_ENTRY_PER_BLOCK; idx++) {
+                       nid_t nid;
+
+                       if (!test_bit_le(idx, nm_i->free_nid_bitmap[i]))
+                               continue;
+
+                       nid = i * NAT_ENTRY_PER_BLOCK + idx;
+                       add_free_nid(sbi, nid, true);
+
+                       if (nm_i->nid_cnt[FREE_NID_LIST] >= MAX_FREE_NIDS)
+                               goto out;
+               }
+       }
+out:
+       down_read(&curseg->journal_rwsem);
+       for (i = 0; i < nats_in_cursum(journal); i++) {
+               block_t addr;
+               nid_t nid;
+
+               addr = le32_to_cpu(nat_in_journal(journal, i).block_addr);
+               nid = le32_to_cpu(nid_in_journal(journal, i));
+               if (addr == NULL_ADDR)
+                       add_free_nid(sbi, nid, true);
+               else
+                       remove_free_nid(sbi, nid);
+       }
+       up_read(&curseg->journal_rwsem);
+       up_read(&nm_i->nat_tree_lock);
+}
+
+static void __build_free_nids(struct f2fs_sb_info *sbi, bool sync, bool mount)
 {
        struct f2fs_nm_info *nm_i = NM_I(sbi);
        struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA);
@@ -1856,6 +1934,14 @@ static void __build_free_nids(struct f2fs_sb_info *sbi, bool sync)
        if (!sync && !available_free_memory(sbi, FREE_NIDS))
                return;
 
+       if (!mount) {
+               /* try to find free nids in free_nid_bitmap */
+               scan_free_nid_bits(sbi);
+
+               if (nm_i->nid_cnt[FREE_NID_LIST])
+                       return;
+       }
+
        /* readahead nat pages to be scanned */
        ra_meta_pages(sbi, NAT_BLOCK_OFFSET(nid), FREE_NID_PAGES,
                                                        META_NAT, true);
@@ -1898,10 +1984,10 @@ static void __build_free_nids(struct f2fs_sb_info *sbi, bool sync)
                                        nm_i->ra_nid_pages, META_NAT, false);
 }
 
-void build_free_nids(struct f2fs_sb_info *sbi, bool sync)
+void build_free_nids(struct f2fs_sb_info *sbi, bool sync, bool mount)
 {
        mutex_lock(&NM_I(sbi)->build_lock);
-       __build_free_nids(sbi, sync);
+       __build_free_nids(sbi, sync, mount);
        mutex_unlock(&NM_I(sbi)->build_lock);
 }
 
@@ -1916,8 +2002,10 @@ bool alloc_nid(struct f2fs_sb_info *sbi, nid_t *nid)
        struct free_nid *i = NULL;
 retry:
 #ifdef CONFIG_F2FS_FAULT_INJECTION
-       if (time_to_inject(sbi, FAULT_ALLOC_NID))
+       if (time_to_inject(sbi, FAULT_ALLOC_NID)) {
+               f2fs_show_injection_info(FAULT_ALLOC_NID);
                return false;
+       }
 #endif
        spin_lock(&nm_i->nid_list_lock);
 
@@ -1937,13 +2025,16 @@ retry:
                i->state = NID_ALLOC;
                __insert_nid_to_list(sbi, i, ALLOC_NID_LIST, false);
                nm_i->available_nids--;
+
+               update_free_nid_bitmap(sbi, *nid, false, false, false);
+
                spin_unlock(&nm_i->nid_list_lock);
                return true;
        }
        spin_unlock(&nm_i->nid_list_lock);
 
        /* Let's scan nat pages and its caches to get free nids */
-       build_free_nids(sbi, true);
+       build_free_nids(sbi, true, false);
        goto retry;
 }
 
@@ -1991,6 +2082,8 @@ void alloc_nid_failed(struct f2fs_sb_info *sbi, nid_t nid)
 
        nm_i->available_nids++;
 
+       update_free_nid_bitmap(sbi, nid, true, false, false);
+
        spin_unlock(&nm_i->nid_list_lock);
 
        if (need_free)
@@ -2235,8 +2328,39 @@ add_out:
        list_add_tail(&nes->set_list, head);
 }
 
+static void __update_nat_bits(struct f2fs_sb_info *sbi, nid_t start_nid,
+                                               struct page *page)
+{
+       struct f2fs_nm_info *nm_i = NM_I(sbi);
+       unsigned int nat_index = start_nid / NAT_ENTRY_PER_BLOCK;
+       struct f2fs_nat_block *nat_blk = page_address(page);
+       int valid = 0;
+       int i;
+
+       if (!enabled_nat_bits(sbi, NULL))
+               return;
+
+       for (i = 0; i < NAT_ENTRY_PER_BLOCK; i++) {
+               if (start_nid == 0 && i == 0)
+                       valid++;
+               if (nat_blk->entries[i].block_addr)
+                       valid++;
+       }
+       if (valid == 0) {
+               __set_bit_le(nat_index, nm_i->empty_nat_bits);
+               __clear_bit_le(nat_index, nm_i->full_nat_bits);
+               return;
+       }
+
+       __clear_bit_le(nat_index, nm_i->empty_nat_bits);
+       if (valid == NAT_ENTRY_PER_BLOCK)
+               __set_bit_le(nat_index, nm_i->full_nat_bits);
+       else
+               __clear_bit_le(nat_index, nm_i->full_nat_bits);
+}
+
 static void __flush_nat_entry_set(struct f2fs_sb_info *sbi,
-                                       struct nat_entry_set *set)
+               struct nat_entry_set *set, struct cp_control *cpc)
 {
        struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA);
        struct f2fs_journal *journal = curseg->journal;
@@ -2251,7 +2375,8 @@ static void __flush_nat_entry_set(struct f2fs_sb_info *sbi,
         * #1, flush nat entries to journal in current hot data summary block.
         * #2, flush nat entries to nat page.
         */
-       if (!__has_cursum_space(journal, set->entry_cnt, NAT_JOURNAL))
+       if (enabled_nat_bits(sbi, cpc) ||
+               !__has_cursum_space(journal, set->entry_cnt, NAT_JOURNAL))
                to_journal = false;
 
        if (to_journal) {
@@ -2287,14 +2412,21 @@ static void __flush_nat_entry_set(struct f2fs_sb_info *sbi,
                        add_free_nid(sbi, nid, false);
                        spin_lock(&NM_I(sbi)->nid_list_lock);
                        NM_I(sbi)->available_nids++;
+                       update_free_nid_bitmap(sbi, nid, true, false, false);
+                       spin_unlock(&NM_I(sbi)->nid_list_lock);
+               } else {
+                       spin_lock(&NM_I(sbi)->nid_list_lock);
+                       update_free_nid_bitmap(sbi, nid, false, false, false);
                        spin_unlock(&NM_I(sbi)->nid_list_lock);
                }
        }
 
-       if (to_journal)
+       if (to_journal) {
                up_write(&curseg->journal_rwsem);
-       else
+       } else {
+               __update_nat_bits(sbi, start_nid, page);
                f2fs_put_page(page, 1);
+       }
 
        f2fs_bug_on(sbi, set->entry_cnt);
 
@@ -2305,7 +2437,7 @@ static void __flush_nat_entry_set(struct f2fs_sb_info *sbi,
 /*
  * This function is called during the checkpointing process.
  */
-void flush_nat_entries(struct f2fs_sb_info *sbi)
+void flush_nat_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc)
 {
        struct f2fs_nm_info *nm_i = NM_I(sbi);
        struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA);
@@ -2326,7 +2458,8 @@ void flush_nat_entries(struct f2fs_sb_info *sbi)
         * entries, remove all entries from journal and merge them
         * into nat entry set.
         */
-       if (!__has_cursum_space(journal, nm_i->dirty_nat_cnt, NAT_JOURNAL))
+       if (enabled_nat_bits(sbi, cpc) ||
+               !__has_cursum_space(journal, nm_i->dirty_nat_cnt, NAT_JOURNAL))
                remove_nats_in_journal(sbi);
 
        while ((found = __gang_lookup_nat_set(nm_i,
@@ -2340,27 +2473,103 @@ void flush_nat_entries(struct f2fs_sb_info *sbi)
 
        /* flush dirty nats in nat entry set */
        list_for_each_entry_safe(set, tmp, &sets, set_list)
-               __flush_nat_entry_set(sbi, set);
+               __flush_nat_entry_set(sbi, set, cpc);
 
        up_write(&nm_i->nat_tree_lock);
 
        f2fs_bug_on(sbi, nm_i->dirty_nat_cnt);
 }
 
+static int __get_nat_bitmaps(struct f2fs_sb_info *sbi)
+{
+       struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
+       struct f2fs_nm_info *nm_i = NM_I(sbi);
+       unsigned int nat_bits_bytes = nm_i->nat_blocks / BITS_PER_BYTE;
+       unsigned int i;
+       __u64 cp_ver = cur_cp_version(ckpt);
+       block_t nat_bits_addr;
+
+       if (!enabled_nat_bits(sbi, NULL))
+               return 0;
+
+       nm_i->nat_bits_blocks = F2FS_BYTES_TO_BLK((nat_bits_bytes << 1) + 8 +
+                                               F2FS_BLKSIZE - 1);
+       nm_i->nat_bits = kzalloc(nm_i->nat_bits_blocks << F2FS_BLKSIZE_BITS,
+                                               GFP_KERNEL);
+       if (!nm_i->nat_bits)
+               return -ENOMEM;
+
+       nat_bits_addr = __start_cp_addr(sbi) + sbi->blocks_per_seg -
+                                               nm_i->nat_bits_blocks;
+       for (i = 0; i < nm_i->nat_bits_blocks; i++) {
+               struct page *page = get_meta_page(sbi, nat_bits_addr++);
+
+               memcpy(nm_i->nat_bits + (i << F2FS_BLKSIZE_BITS),
+                                       page_address(page), F2FS_BLKSIZE);
+               f2fs_put_page(page, 1);
+       }
+
+       cp_ver |= (cur_cp_crc(ckpt) << 32);
+       if (cpu_to_le64(cp_ver) != *(__le64 *)nm_i->nat_bits) {
+               disable_nat_bits(sbi, true);
+               return 0;
+       }
+
+       nm_i->full_nat_bits = nm_i->nat_bits + 8;
+       nm_i->empty_nat_bits = nm_i->full_nat_bits + nat_bits_bytes;
+
+       f2fs_msg(sbi->sb, KERN_NOTICE, "Found nat_bits in checkpoint");
+       return 0;
+}
+
+inline void load_free_nid_bitmap(struct f2fs_sb_info *sbi)
+{
+       struct f2fs_nm_info *nm_i = NM_I(sbi);
+       unsigned int i = 0;
+       nid_t nid, last_nid;
+
+       if (!enabled_nat_bits(sbi, NULL))
+               return;
+
+       for (i = 0; i < nm_i->nat_blocks; i++) {
+               i = find_next_bit_le(nm_i->empty_nat_bits, nm_i->nat_blocks, i);
+               if (i >= nm_i->nat_blocks)
+                       break;
+
+               __set_bit_le(i, nm_i->nat_block_bitmap);
+
+               nid = i * NAT_ENTRY_PER_BLOCK;
+               last_nid = (i + 1) * NAT_ENTRY_PER_BLOCK;
+
+               spin_lock(&nm_i->free_nid_lock);
+               for (; nid < last_nid; nid++)
+                       update_free_nid_bitmap(sbi, nid, true, true, true);
+               spin_unlock(&nm_i->free_nid_lock);
+       }
+
+       for (i = 0; i < nm_i->nat_blocks; i++) {
+               i = find_next_bit_le(nm_i->full_nat_bits, nm_i->nat_blocks, i);
+               if (i >= nm_i->nat_blocks)
+                       break;
+
+               __set_bit_le(i, nm_i->nat_block_bitmap);
+       }
+}
+
 static int init_node_manager(struct f2fs_sb_info *sbi)
 {
        struct f2fs_super_block *sb_raw = F2FS_RAW_SUPER(sbi);
        struct f2fs_nm_info *nm_i = NM_I(sbi);
        unsigned char *version_bitmap;
-       unsigned int nat_segs, nat_blocks;
+       unsigned int nat_segs;
+       int err;
 
        nm_i->nat_blkaddr = le32_to_cpu(sb_raw->nat_blkaddr);
 
        /* segment_count_nat includes pair segment so divide to 2. */
        nat_segs = le32_to_cpu(sb_raw->segment_count_nat) >> 1;
-       nat_blocks = nat_segs << le32_to_cpu(sb_raw->log_blocks_per_seg);
-
-       nm_i->max_nid = NAT_ENTRY_PER_BLOCK * nat_blocks;
+       nm_i->nat_blocks = nat_segs << le32_to_cpu(sb_raw->log_blocks_per_seg);
+       nm_i->max_nid = NAT_ENTRY_PER_BLOCK * nm_i->nat_blocks;
 
        /* not used nids: 0, node, meta, (and root counted as valid node) */
        nm_i->available_nids = nm_i->max_nid - sbi->total_valid_node_count -
@@ -2394,6 +2603,10 @@ static int init_node_manager(struct f2fs_sb_info *sbi)
        if (!nm_i->nat_bitmap)
                return -ENOMEM;
 
+       err = __get_nat_bitmaps(sbi);
+       if (err)
+               return err;
+
 #ifdef CONFIG_F2FS_CHECK_FS
        nm_i->nat_bitmap_mir = kmemdup(version_bitmap, nm_i->bitmap_size,
                                        GFP_KERNEL);
@@ -2404,6 +2617,30 @@ static int init_node_manager(struct f2fs_sb_info *sbi)
        return 0;
 }
 
+static int init_free_nid_cache(struct f2fs_sb_info *sbi)
+{
+       struct f2fs_nm_info *nm_i = NM_I(sbi);
+
+       nm_i->free_nid_bitmap = f2fs_kvzalloc(nm_i->nat_blocks *
+                                       NAT_ENTRY_BITMAP_SIZE, GFP_KERNEL);
+       if (!nm_i->free_nid_bitmap)
+               return -ENOMEM;
+
+       nm_i->nat_block_bitmap = f2fs_kvzalloc(nm_i->nat_blocks / 8,
+                                                               GFP_KERNEL);
+       if (!nm_i->nat_block_bitmap)
+               return -ENOMEM;
+
+       nm_i->free_nid_count = f2fs_kvzalloc(nm_i->nat_blocks *
+                                       sizeof(unsigned short), GFP_KERNEL);
+       if (!nm_i->free_nid_count)
+               return -ENOMEM;
+
+       spin_lock_init(&nm_i->free_nid_lock);
+
+       return 0;
+}
+
 int build_node_manager(struct f2fs_sb_info *sbi)
 {
        int err;
@@ -2416,7 +2653,14 @@ int build_node_manager(struct f2fs_sb_info *sbi)
        if (err)
                return err;
 
-       build_free_nids(sbi, true);
+       err = init_free_nid_cache(sbi);
+       if (err)
+               return err;
+
+       /* load free nid status from nat_bits table */
+       load_free_nid_bitmap(sbi);
+
+       build_free_nids(sbi, true, true);
        return 0;
 }
 
@@ -2474,7 +2718,12 @@ void destroy_node_manager(struct f2fs_sb_info *sbi)
        }
        up_write(&nm_i->nat_tree_lock);
 
+       kvfree(nm_i->nat_block_bitmap);
+       kvfree(nm_i->free_nid_bitmap);
+       kvfree(nm_i->free_nid_count);
+
        kfree(nm_i->nat_bitmap);
+       kfree(nm_i->nat_bits);
 #ifdef CONFIG_F2FS_CHECK_FS
        kfree(nm_i->nat_bitmap_mir);
 #endif