]> git.kernelconcepts.de Git - karo-tx-linux.git/blobdiff - fs/btrfs/transaction.c
Btrfs: don't flush the delalloc inodes in the while loop if flushoncommit is set
[karo-tx-linux.git] / fs / btrfs / transaction.c
index 50767bbaad6c6bfeb40e4d0e815446effd5392b3..265db57b33417b229b886573fda8f8b8836e4b07 100644 (file)
@@ -34,7 +34,7 @@
 
 #define BTRFS_ROOT_TRANS_TAG 0
 
-void put_transaction(struct btrfs_transaction *transaction)
+static void put_transaction(struct btrfs_transaction *transaction)
 {
        WARN_ON(atomic_read(&transaction->use_count) == 0);
        if (atomic_dec_and_test(&transaction->use_count)) {
@@ -51,17 +51,41 @@ static noinline void switch_commit_root(struct btrfs_root *root)
 }
 
 static inline int can_join_transaction(struct btrfs_transaction *trans,
-                                      int type)
+                                      unsigned int type)
 {
        return !(trans->in_commit &&
-                type != TRANS_JOIN &&
-                type != TRANS_JOIN_NOLOCK);
+                (type & TRANS_EXTWRITERS));
+}
+
+static inline void extwriter_counter_inc(struct btrfs_transaction *trans,
+                                        unsigned int type)
+{
+       if (type & TRANS_EXTWRITERS)
+               atomic_inc(&trans->num_extwriters);
+}
+
+static inline void extwriter_counter_dec(struct btrfs_transaction *trans,
+                                        unsigned int type)
+{
+       if (type & TRANS_EXTWRITERS)
+               atomic_dec(&trans->num_extwriters);
+}
+
+static inline void extwriter_counter_init(struct btrfs_transaction *trans,
+                                         unsigned int type)
+{
+       atomic_set(&trans->num_extwriters, ((type & TRANS_EXTWRITERS) ? 1 : 0));
+}
+
+static inline int extwriter_counter_read(struct btrfs_transaction *trans)
+{
+       return atomic_read(&trans->num_extwriters);
 }
 
 /*
  * either allocate a new transaction or hop into the existing one
  */
-static noinline int join_transaction(struct btrfs_root *root, int type)
+static noinline int join_transaction(struct btrfs_root *root, unsigned int type)
 {
        struct btrfs_transaction *cur_trans;
        struct btrfs_fs_info *fs_info = root->fs_info;
@@ -99,6 +123,7 @@ loop:
                }
                atomic_inc(&cur_trans->use_count);
                atomic_inc(&cur_trans->num_writers);
+               extwriter_counter_inc(cur_trans, type);
                cur_trans->num_joined++;
                spin_unlock(&fs_info->trans_lock);
                return 0;
@@ -131,6 +156,7 @@ loop:
        }
 
        atomic_set(&cur_trans->num_writers, 1);
+       extwriter_counter_init(cur_trans, type);
        cur_trans->num_joined = 0;
        init_waitqueue_head(&cur_trans->writer_wait);
        init_waitqueue_head(&cur_trans->commit_wait);
@@ -162,7 +188,7 @@ loop:
        if (!RB_EMPTY_ROOT(&fs_info->tree_mod_log))
                WARN(1, KERN_ERR "btrfs: tree_mod_log rb tree not empty when "
                        "creating a fresh transaction\n");
-       atomic_set(&fs_info->tree_mod_seq, 0);
+       atomic64_set(&fs_info->tree_mod_seq, 0);
 
        spin_lock_init(&cur_trans->commit_lock);
        spin_lock_init(&cur_trans->delayed_refs.lock);
@@ -307,7 +333,7 @@ static int may_wait_transaction(struct btrfs_root *root, int type)
 }
 
 static struct btrfs_trans_handle *
-start_transaction(struct btrfs_root *root, u64 num_items, int type,
+start_transaction(struct btrfs_root *root, u64 num_items, unsigned int type,
                  enum btrfs_reserve_flush_enum flush)
 {
        struct btrfs_trans_handle *h;
@@ -320,7 +346,7 @@ start_transaction(struct btrfs_root *root, u64 num_items, int type,
                return ERR_PTR(-EROFS);
 
        if (current->journal_info) {
-               WARN_ON(type != TRANS_JOIN && type != TRANS_JOIN_NOLOCK);
+               WARN_ON(type & TRANS_EXTWRITERS);
                h = current->journal_info;
                h->use_count++;
                WARN_ON(h->use_count > 2);
@@ -366,7 +392,7 @@ again:
         * If we are ATTACH, it means we just want to catch the current
         * transaction and commit it, so we needn't do sb_start_intwrite(). 
         */
-       if (type < TRANS_JOIN_NOLOCK)
+       if (type & __TRANS_FREEZABLE)
                sb_start_intwrite(root->fs_info->sb);
 
        if (may_wait_transaction(root, type))
@@ -429,7 +455,7 @@ got_it:
        return h;
 
 join_fail:
-       if (type < TRANS_JOIN_NOLOCK)
+       if (type & __TRANS_FREEZABLE)
                sb_end_intwrite(root->fs_info->sb);
        kmem_cache_free(btrfs_trans_handle_cachep, h);
 alloc_fail:
@@ -677,12 +703,13 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans,
                }
        }
 
-       if (trans->type < TRANS_JOIN_NOLOCK)
+       if (trans->type & __TRANS_FREEZABLE)
                sb_end_intwrite(root->fs_info->sb);
 
        WARN_ON(cur_trans != info->running_transaction);
        WARN_ON(atomic_read(&cur_trans->num_writers) < 1);
        atomic_dec(&cur_trans->num_writers);
+       extwriter_counter_dec(cur_trans, trans->type);
 
        smp_mb();
        if (waitqueue_active(&cur_trans->writer_wait))
@@ -707,23 +734,13 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans,
 int btrfs_end_transaction(struct btrfs_trans_handle *trans,
                          struct btrfs_root *root)
 {
-       int ret;
-
-       ret = __btrfs_end_transaction(trans, root, 0);
-       if (ret)
-               return ret;
-       return 0;
+       return __btrfs_end_transaction(trans, root, 0);
 }
 
 int btrfs_end_transaction_throttle(struct btrfs_trans_handle *trans,
                                   struct btrfs_root *root)
 {
-       int ret;
-
-       ret = __btrfs_end_transaction(trans, root, 1);
-       if (ret)
-               return ret;
-       return 0;
+       return __btrfs_end_transaction(trans, root, 1);
 }
 
 int btrfs_end_transaction_dmeta(struct btrfs_trans_handle *trans,
@@ -948,7 +965,7 @@ static noinline int commit_cowonly_roots(struct btrfs_trans_handle *trans,
 int btrfs_add_dead_root(struct btrfs_root *root)
 {
        spin_lock(&root->fs_info->trans_lock);
-       list_add(&root->root_list, &root->fs_info->dead_roots);
+       list_add_tail(&root->root_list, &root->fs_info->dead_roots);
        spin_unlock(&root->fs_info->trans_lock);
        return 0;
 }
@@ -1179,13 +1196,17 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
        memcpy(new_root_item->uuid, new_uuid.b, BTRFS_UUID_SIZE);
        memcpy(new_root_item->parent_uuid, root->root_item.uuid,
                        BTRFS_UUID_SIZE);
+       if (!(root_flags & BTRFS_ROOT_SUBVOL_RDONLY)) {
+               memset(new_root_item->received_uuid, 0,
+                      sizeof(new_root_item->received_uuid));
+               memset(&new_root_item->stime, 0, sizeof(new_root_item->stime));
+               memset(&new_root_item->rtime, 0, sizeof(new_root_item->rtime));
+               btrfs_set_root_stransid(new_root_item, 0);
+               btrfs_set_root_rtransid(new_root_item, 0);
+       }
        new_root_item->otime.sec = cpu_to_le64(cur_time.tv_sec);
        new_root_item->otime.nsec = cpu_to_le32(cur_time.tv_nsec);
        btrfs_set_root_otransid(new_root_item, trans->transid);
-       memset(&new_root_item->stime, 0, sizeof(new_root_item->stime));
-       memset(&new_root_item->rtime, 0, sizeof(new_root_item->rtime));
-       btrfs_set_root_stransid(new_root_item, 0);
-       btrfs_set_root_rtransid(new_root_item, 0);
 
        old = btrfs_lock_root_node(root);
        ret = btrfs_cow_block(trans, root, old, NULL, 0, &old);
@@ -1456,11 +1477,12 @@ static void cleanup_transaction(struct btrfs_trans_handle *trans,
 
        spin_lock(&root->fs_info->trans_lock);
 
-       if (list_empty(&cur_trans->list)) {
-               spin_unlock(&root->fs_info->trans_lock);
-               btrfs_end_transaction(trans, root);
-               return;
-       }
+       /*
+        * If the transaction is removed from the list, it means this
+        * transaction has been committed successfully, so it is impossible
+        * to call the cleanup function.
+        */
+       BUG_ON(list_empty(&cur_trans->list));
 
        list_del_init(&cur_trans->list);
        if (cur_trans == root->fs_info->running_transaction) {
@@ -1487,29 +1509,17 @@ static void cleanup_transaction(struct btrfs_trans_handle *trans,
                current->journal_info = NULL;
 
        kmem_cache_free(btrfs_trans_handle_cachep, trans);
+
+       spin_lock(&root->fs_info->trans_lock);
+       root->fs_info->trans_no_join = 0;
+       spin_unlock(&root->fs_info->trans_lock);
 }
 
 static int btrfs_flush_all_pending_stuffs(struct btrfs_trans_handle *trans,
                                          struct btrfs_root *root)
 {
-       int flush_on_commit = btrfs_test_opt(root, FLUSHONCOMMIT);
-       int snap_pending = 0;
        int ret;
 
-       if (!flush_on_commit) {
-               spin_lock(&root->fs_info->trans_lock);
-               if (!list_empty(&trans->transaction->pending_snapshots))
-                       snap_pending = 1;
-               spin_unlock(&root->fs_info->trans_lock);
-       }
-
-       if (flush_on_commit || snap_pending) {
-               ret = btrfs_start_delalloc_inodes(root, 1);
-               if (ret)
-                       return ret;
-               btrfs_wait_ordered_extents(root, 1);
-       }
-
        ret = btrfs_run_delayed_items(trans, root);
        if (ret)
                return ret;
@@ -1533,6 +1543,19 @@ static int btrfs_flush_all_pending_stuffs(struct btrfs_trans_handle *trans,
        return ret;
 }
 
+static inline int btrfs_start_delalloc_flush(struct btrfs_fs_info *fs_info)
+{
+       if (btrfs_test_opt(fs_info->tree_root, FLUSHONCOMMIT))
+               return btrfs_start_all_delalloc_inodes(fs_info, 1);
+       return 0;
+}
+
+static inline void btrfs_wait_delalloc_flush(struct btrfs_fs_info *fs_info)
+{
+       if (btrfs_test_opt(fs_info->tree_root, FLUSHONCOMMIT))
+               btrfs_wait_all_ordered_extents(fs_info, 1);
+}
+
 /*
  * btrfs_transaction state sequence:
  *    in_commit = 0, blocked = 0  (initial)
@@ -1634,6 +1657,12 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
                spin_unlock(&root->fs_info->trans_lock);
        }
 
+       extwriter_counter_dec(cur_trans, trans->type);
+
+       ret = btrfs_start_delalloc_flush(root->fs_info);
+       if (ret)
+               goto cleanup_transaction;
+
        if (!btrfs_test_opt(root, SSD) &&
            (now < cur_trans->start_time || now - cur_trans->start_time < 1))
                should_grow = 1;
@@ -1650,19 +1679,20 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
                prepare_to_wait(&cur_trans->writer_wait, &wait,
                                TASK_UNINTERRUPTIBLE);
 
-               if (atomic_read(&cur_trans->num_writers) > 1)
-                       schedule_timeout(MAX_SCHEDULE_TIMEOUT);
+               if (extwriter_counter_read(cur_trans) > 0)
+                       schedule();
                else if (should_grow)
                        schedule_timeout(1);
 
                finish_wait(&cur_trans->writer_wait, &wait);
-       } while (atomic_read(&cur_trans->num_writers) > 1 ||
+       } while (extwriter_counter_read(cur_trans) > 0 ||
                 (should_grow && cur_trans->num_joined != joined));
 
        ret = btrfs_flush_all_pending_stuffs(trans, root);
        if (ret)
                goto cleanup_transaction;
 
+       btrfs_wait_delalloc_flush(root->fs_info);
        /*
         * Ok now we need to make sure to block out any other joins while we
         * commit the transaction.  We could have started a join before setting
@@ -1808,7 +1838,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
        ret = btrfs_write_and_wait_transaction(trans, root);
        if (ret) {
                btrfs_error(root->fs_info, ret,
-                           "Error while writing out transaction.");
+                           "Error while writing out transaction");
                mutex_unlock(&root->fs_info->tree_log_mutex);
                goto cleanup_transaction;
        }
@@ -1840,7 +1870,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
        put_transaction(cur_trans);
        put_transaction(cur_trans);
 
-       if (trans->type < TRANS_JOIN_NOLOCK)
+       if (trans->type & __TRANS_FREEZABLE)
                sb_end_intwrite(root->fs_info->sb);
 
        trace_btrfs_transaction_commit(root);
@@ -1864,8 +1894,7 @@ cleanup_transaction:
                btrfs_qgroup_free(root, trans->qgroup_reserved);
                trans->qgroup_reserved = 0;
        }
-       btrfs_printk(root->fs_info, "Skipping commit of aborted transaction.\n");
-//     WARN_ON(1);
+       btrfs_warn(root->fs_info, "Skipping commit of aborted transaction.");
        if (current->journal_info == trans)
                current->journal_info = NULL;
        cleanup_transaction(trans, root, ret);
@@ -1874,31 +1903,44 @@ cleanup_transaction:
 }
 
 /*
- * interface function to delete all the snapshots we have scheduled for deletion
+ * return < 0 if error
+ * 0 if there are no more dead_roots at the time of call
+ * 1 there are more to be processed, call me again
+ *
+ * The return value indicates there are certainly more snapshots to delete, but
+ * if there comes a new one during processing, it may return 0. We don't mind,
+ * because btrfs_commit_super will poke cleaner thread and it will process it a
+ * few seconds later.
  */
-int btrfs_clean_old_snapshots(struct btrfs_root *root)
+int btrfs_clean_one_deleted_snapshot(struct btrfs_root *root)
 {
-       LIST_HEAD(list);
+       int ret;
        struct btrfs_fs_info *fs_info = root->fs_info;
 
        spin_lock(&fs_info->trans_lock);
-       list_splice_init(&fs_info->dead_roots, &list);
+       if (list_empty(&fs_info->dead_roots)) {
+               spin_unlock(&fs_info->trans_lock);
+               return 0;
+       }
+       root = list_first_entry(&fs_info->dead_roots,
+                       struct btrfs_root, root_list);
+       list_del(&root->root_list);
        spin_unlock(&fs_info->trans_lock);
 
-       while (!list_empty(&list)) {
-               int ret;
+       pr_debug("btrfs: cleaner removing %llu\n",
+                       (unsigned long long)root->objectid);
 
-               root = list_entry(list.next, struct btrfs_root, root_list);
-               list_del(&root->root_list);
+       btrfs_kill_all_delayed_nodes(root);
 
-               btrfs_kill_all_delayed_nodes(root);
-
-               if (btrfs_header_backref_rev(root->node) <
-                   BTRFS_MIXED_BACKREF_REV)
-                       ret = btrfs_drop_snapshot(root, NULL, 0, 0);
-               else
-                       ret =btrfs_drop_snapshot(root, NULL, 1, 0);
-               BUG_ON(ret < 0);
-       }
-       return 0;
+       if (btrfs_header_backref_rev(root->node) <
+                       BTRFS_MIXED_BACKREF_REV)
+               ret = btrfs_drop_snapshot(root, NULL, 0, 0);
+       else
+               ret = btrfs_drop_snapshot(root, NULL, 1, 0);
+       /*
+        * If we encounter a transaction abort during snapshot cleaning, we
+        * don't want to crash here
+        */
+       BUG_ON(ret < 0 && ret != -EAGAIN && ret != -EROFS);
+       return 1;
 }