]> git.kernelconcepts.de Git - karo-tx-linux.git/blobdiff - fs/ext4/mballoc.c
ext4: Use EXT4_GROUP_INFO_NEED_INIT_BIT during resize
[karo-tx-linux.git] / fs / ext4 / mballoc.c
index edf9730ba72e77af3a2e78978fc7cab1e8a68ba9..d2b1bcaf88ecb994ef8be9c1e91b1ca204fbbd94 100644 (file)
@@ -886,18 +886,20 @@ static noinline_for_stack int
 ext4_mb_load_buddy(struct super_block *sb, ext4_group_t group,
                                        struct ext4_buddy *e4b)
 {
-       struct ext4_sb_info *sbi = EXT4_SB(sb);
-       struct inode *inode = sbi->s_buddy_cache;
        int blocks_per_page;
        int block;
        int pnum;
        int poff;
        struct page *page;
        int ret;
+       struct ext4_group_info *grp;
+       struct ext4_sb_info *sbi = EXT4_SB(sb);
+       struct inode *inode = sbi->s_buddy_cache;
 
        mb_debug("load group %u\n", group);
 
        blocks_per_page = PAGE_CACHE_SIZE / sb->s_blocksize;
+       grp = ext4_get_group_info(sb, group);
 
        e4b->bd_blkbits = sb->s_blocksize_bits;
        e4b->bd_info = ext4_get_group_info(sb, group);
@@ -905,6 +907,15 @@ ext4_mb_load_buddy(struct super_block *sb, ext4_group_t group,
        e4b->bd_group = group;
        e4b->bd_buddy_page = NULL;
        e4b->bd_bitmap_page = NULL;
+       e4b->alloc_semp = &grp->alloc_sem;
+
+       /* Take the read lock on the group alloc
+        * sem. This would make sure a parallel
+        * ext4_mb_init_group happening on other
+        * groups mapped by the page is blocked
+        * till we are done with allocation
+        */
+       down_read(e4b->alloc_semp);
 
        /*
         * the buddy cache inode stores the block bitmap
@@ -920,6 +931,14 @@ ext4_mb_load_buddy(struct super_block *sb, ext4_group_t group,
        page = find_get_page(inode->i_mapping, pnum);
        if (page == NULL || !PageUptodate(page)) {
                if (page)
+                       /*
+                        * drop the page reference and try
+                        * to get the page with lock. If we
+                        * are not uptodate that implies
+                        * somebody just created the page but
+                        * is yet to initialize the same. So
+                        * wait for it to initialize.
+                        */
                        page_cache_release(page);
                page = find_or_create_page(inode->i_mapping, pnum, GFP_NOFS);
                if (page) {
@@ -985,6 +1004,9 @@ err:
                page_cache_release(e4b->bd_buddy_page);
        e4b->bd_buddy = NULL;
        e4b->bd_bitmap = NULL;
+
+       /* Done with the buddy cache */
+       up_read(e4b->alloc_semp);
        return ret;
 }
 
@@ -994,6 +1016,8 @@ static void ext4_mb_release_desc(struct ext4_buddy *e4b)
                page_cache_release(e4b->bd_bitmap_page);
        if (e4b->bd_buddy_page)
                page_cache_release(e4b->bd_buddy_page);
+       /* Done with the buddy cache */
+       up_read(e4b->alloc_semp);
 }
 
 
@@ -1696,6 +1720,173 @@ static int ext4_mb_good_group(struct ext4_allocation_context *ac,
        return 0;
 }
 
+/*
+ * lock the group_info alloc_sem of all the groups
+ * belonging to the same buddy cache page. This
+ * make sure other parallel operation on the buddy
+ * cache doesn't happen  whild holding the buddy cache
+ * lock
+ */
+int ext4_mb_get_buddy_cache_lock(struct super_block *sb, ext4_group_t group)
+{
+       int i;
+       int block, pnum;
+       int blocks_per_page;
+       int groups_per_page;
+       ext4_group_t first_group;
+       struct ext4_group_info *grp;
+
+       blocks_per_page = PAGE_CACHE_SIZE / sb->s_blocksize;
+       /*
+        * the buddy cache inode stores the block bitmap
+        * and buddy information in consecutive blocks.
+        * So for each group we need two blocks.
+        */
+       block = group * 2;
+       pnum = block / blocks_per_page;
+       first_group = pnum * blocks_per_page / 2;
+
+       groups_per_page = blocks_per_page >> 1;
+       if (groups_per_page == 0)
+               groups_per_page = 1;
+       /* read all groups the page covers into the cache */
+       for (i = 0; i < groups_per_page; i++) {
+
+               if ((first_group + i) >= EXT4_SB(sb)->s_groups_count)
+                       break;
+               grp = ext4_get_group_info(sb, first_group + i);
+               /* take all groups write allocation
+                * semaphore. This make sure there is
+                * no block allocation going on in any
+                * of that groups
+                */
+               down_write(&grp->alloc_sem);
+       }
+       return i;
+}
+
+void ext4_mb_put_buddy_cache_lock(struct super_block *sb,
+                                       ext4_group_t group, int locked_group)
+{
+       int i;
+       int block, pnum;
+       int blocks_per_page;
+       ext4_group_t first_group;
+       struct ext4_group_info *grp;
+
+       blocks_per_page = PAGE_CACHE_SIZE / sb->s_blocksize;
+       /*
+        * the buddy cache inode stores the block bitmap
+        * and buddy information in consecutive blocks.
+        * So for each group we need two blocks.
+        */
+       block = group * 2;
+       pnum = block / blocks_per_page;
+       first_group = pnum * blocks_per_page / 2;
+       /* release locks on all the groups */
+       for (i = 0; i < locked_group; i++) {
+
+               grp = ext4_get_group_info(sb, first_group + i);
+               /* take all groups write allocation
+                * semaphore. This make sure there is
+                * no block allocation going on in any
+                * of that groups
+                */
+               up_write(&grp->alloc_sem);
+       }
+
+}
+
+static int ext4_mb_init_group(struct super_block *sb, ext4_group_t group)
+{
+
+       int ret;
+       void *bitmap;
+       int blocks_per_page;
+       int block, pnum, poff;
+       int num_grp_locked = 0;
+       struct ext4_group_info *this_grp;
+       struct ext4_sb_info *sbi = EXT4_SB(sb);
+       struct inode *inode = sbi->s_buddy_cache;
+       struct page *page = NULL, *bitmap_page = NULL;
+
+       mb_debug("init group %lu\n", group);
+       blocks_per_page = PAGE_CACHE_SIZE / sb->s_blocksize;
+       this_grp = ext4_get_group_info(sb, group);
+       /*
+        * This ensures we don't add group
+        * to this buddy cache via resize
+        */
+       num_grp_locked =  ext4_mb_get_buddy_cache_lock(sb, group);
+       if (!EXT4_MB_GRP_NEED_INIT(this_grp)) {
+               /*
+                * somebody initialized the group
+                * return without doing anything
+                */
+               ret = 0;
+               goto err;
+       }
+       /*
+        * the buddy cache inode stores the block bitmap
+        * and buddy information in consecutive blocks.
+        * So for each group we need two blocks.
+        */
+       block = group * 2;
+       pnum = block / blocks_per_page;
+       poff = block % blocks_per_page;
+       page = find_or_create_page(inode->i_mapping, pnum, GFP_NOFS);
+       if (page) {
+               BUG_ON(page->mapping != inode->i_mapping);
+               ret = ext4_mb_init_cache(page, NULL);
+               if (ret) {
+                       unlock_page(page);
+                       goto err;
+               }
+               unlock_page(page);
+       }
+       if (page == NULL || !PageUptodate(page)) {
+               ret = -EIO;
+               goto err;
+       }
+       mark_page_accessed(page);
+       bitmap_page = page;
+       bitmap = page_address(page) + (poff * sb->s_blocksize);
+
+       /* init buddy cache */
+       block++;
+       pnum = block / blocks_per_page;
+       poff = block % blocks_per_page;
+       page = find_or_create_page(inode->i_mapping, pnum, GFP_NOFS);
+       if (page == bitmap_page) {
+               /*
+                * If both the bitmap and buddy are in
+                * the same page we don't need to force
+                * init the buddy
+                */
+               unlock_page(page);
+       } else if (page) {
+               BUG_ON(page->mapping != inode->i_mapping);
+               ret = ext4_mb_init_cache(page, bitmap);
+               if (ret) {
+                       unlock_page(page);
+                       goto err;
+               }
+               unlock_page(page);
+       }
+       if (page == NULL || !PageUptodate(page)) {
+               ret = -EIO;
+               goto err;
+       }
+       mark_page_accessed(page);
+err:
+       ext4_mb_put_buddy_cache_lock(sb, group, num_grp_locked);
+       if (bitmap_page)
+               page_cache_release(bitmap_page);
+       if (page)
+               page_cache_release(page);
+       return ret;
+}
+
 static noinline_for_stack int
 ext4_mb_regular_allocator(struct ext4_allocation_context *ac)
 {
@@ -1779,7 +1970,7 @@ repeat:
                                group = 0;
 
                        /* quick check to skip empty groups */
-                       grp = ext4_get_group_info(ac->ac_sb, group);
+                       grp = ext4_get_group_info(sb, group);
                        if (grp->bb_free == 0)
                                continue;
 
@@ -1792,10 +1983,9 @@ repeat:
                                 * we need full data about the group
                                 * to make a good selection
                                 */
-                               err = ext4_mb_load_buddy(sb, group, &e4b);
+                               err = ext4_mb_init_group(sb, group);
                                if (err)
                                        goto out;
-                               ext4_mb_release_desc(&e4b);
                        }
 
                        /*
@@ -2246,7 +2436,7 @@ ext4_mb_store_history(struct ext4_allocation_context *ac)
 
 
 /* Create and initialize ext4_group_info data for the given group. */
-static int ext4_mb_add_groupinfo(struct super_block *sb, ext4_group_t group,
+int ext4_mb_add_groupinfo(struct super_block *sb, ext4_group_t group,
                          struct ext4_group_desc *desc)
 {
        int i, len;
@@ -2304,6 +2494,7 @@ static int ext4_mb_add_groupinfo(struct super_block *sb, ext4_group_t group,
        }
 
        INIT_LIST_HEAD(&meta_group_info[i]->bb_prealloc_list);
+       init_rwsem(&meta_group_info[i]->alloc_sem);
        meta_group_info[i]->bb_free_root.rb_node = NULL;;
 
 #ifdef DOUBLE_CHECK
@@ -2330,54 +2521,6 @@ exit_meta_group_info:
        return -ENOMEM;
 } /* ext4_mb_add_groupinfo */
 
-/*
- * Add a group to the existing groups.
- * This function is used for online resize
- */
-int ext4_mb_add_more_groupinfo(struct super_block *sb, ext4_group_t group,
-                              struct ext4_group_desc *desc)
-{
-       struct ext4_sb_info *sbi = EXT4_SB(sb);
-       struct inode *inode = sbi->s_buddy_cache;
-       int blocks_per_page;
-       int block;
-       int pnum;
-       struct page *page;
-       int err;
-
-       /* Add group based on group descriptor*/
-       err = ext4_mb_add_groupinfo(sb, group, desc);
-       if (err)
-               return err;
-
-       /*
-        * Cache pages containing dynamic mb_alloc datas (buddy and bitmap
-        * datas) are set not up to date so that they will be re-initilaized
-        * during the next call to ext4_mb_load_buddy
-        */
-
-       /* Set buddy page as not up to date */
-       blocks_per_page = PAGE_CACHE_SIZE / sb->s_blocksize;
-       block = group * 2;
-       pnum = block / blocks_per_page;
-       page = find_get_page(inode->i_mapping, pnum);
-       if (page != NULL) {
-               ClearPageUptodate(page);
-               page_cache_release(page);
-       }
-
-       /* Set bitmap page as not up to date */
-       block++;
-       pnum = block / blocks_per_page;
-       page = find_get_page(inode->i_mapping, pnum);
-       if (page != NULL) {
-               ClearPageUptodate(page);
-               page_cache_release(page);
-       }
-
-       return 0;
-}
-
 /*
  * Update an existing group.
  * This function is used for online resize
@@ -4588,11 +4731,6 @@ do_more:
        err = ext4_journal_get_write_access(handle, gd_bh);
        if (err)
                goto error_return;
-
-       err = ext4_mb_load_buddy(sb, block_group, &e4b);
-       if (err)
-               goto error_return;
-
 #ifdef AGGRESSIVE_CHECK
        {
                int i;
@@ -4606,6 +4744,8 @@ do_more:
        /* We dirtied the bitmap block */
        BUFFER_TRACE(bitmap_bh, "dirtied bitmap block");
        err = ext4_handle_dirty_metadata(handle, NULL, bitmap_bh);
+       if (err)
+               goto error_return;
 
        if (ac) {
                ac->ac_b_ex.fe_group = block_group;
@@ -4614,6 +4754,9 @@ do_more:
                ext4_mb_store_history(ac);
        }
 
+       err = ext4_mb_load_buddy(sb, block_group, &e4b);
+       if (err)
+               goto error_return;
        if (metadata && ext4_handle_valid(handle)) {
                /* blocks being freed are metadata. these blocks shouldn't
                 * be used until this transaction is committed */