Btrfs: Btree defrag on the extent-mapping tree as well

Signed-off-by: Chris Mason <chris.mason@oracle.com>
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c
index c7e47e7..ee1ae00 100644
--- a/fs/btrfs/ctree.c
+++ b/fs/btrfs/ctree.c
@@ -99,7 +99,6 @@
 		if (ret)
 			return ret;
 	} else {
-		WARN_ON(!root->ref_cows);
 		clean_tree_block(trans, root, buf);
 	}
 
@@ -162,13 +161,14 @@
 
 int btrfs_realloc_node(struct btrfs_trans_handle *trans,
 		       struct btrfs_root *root, struct buffer_head *parent,
-		       int cache_only)
+		       int cache_only, u64 *last_ret)
 {
 	struct btrfs_node *parent_node;
 	struct buffer_head *cur_bh;
 	struct buffer_head *tmp_bh;
 	u64 blocknr;
-	u64 search_start = 0;
+	u64 search_start = *last_ret;
+	u64 last_block = 0;
 	u64 other;
 	u32 parent_nritems;
 	int start_slot;
@@ -198,6 +198,8 @@
 	for (i = start_slot; i < end_slot; i++) {
 		int close = 1;
 		blocknr = btrfs_node_blockptr(parent_node, i);
+		if (last_block == 0)
+			last_block = blocknr;
 		if (i > 0) {
 			other = btrfs_node_blockptr(parent_node, i - 1);
 			close = close_blocks(blocknr, other);
@@ -206,8 +208,10 @@
 			other = btrfs_node_blockptr(parent_node, i + 1);
 			close = close_blocks(blocknr, other);
 		}
-		if (close)
+		if (close) {
+			last_block = blocknr;
 			continue;
+		}
 
 		cur_bh = btrfs_find_tree_block(root, blocknr);
 		if (!cur_bh || !buffer_uptodate(cur_bh) ||
@@ -219,9 +223,9 @@
 			brelse(cur_bh);
 			cur_bh = read_tree_block(root, blocknr);
 		}
-		if (search_start == 0) {
-			search_start = bh_blocknr(cur_bh) & ~((u64)65535);
-		}
+		if (search_start == 0)
+			search_start = last_block & ~((u64)65535);
+
 		err = __btrfs_cow_block(trans, root, cur_bh, parent, i,
 					&tmp_bh, search_start,
 					min(8, end_slot - i));
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 59e09e3..d3cd564 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -1019,6 +1019,8 @@
 	btrfs_item_offset((leaf)->items + (slot))))
 
 /* extent-tree.c */
+int btrfs_extent_post_op(struct btrfs_trans_handle *trans,
+			 struct btrfs_root *root);
 int btrfs_copy_pinned(struct btrfs_root *root, struct radix_tree_root *copy);
 struct btrfs_block_group_cache *btrfs_lookup_block_group(struct
 							 btrfs_fs_info *info,
@@ -1066,7 +1068,7 @@
 		      ins_len, int cow);
 int btrfs_realloc_node(struct btrfs_trans_handle *trans,
 		       struct btrfs_root *root, struct buffer_head *parent,
-		       int cache_only);
+		       int cache_only, u64 *last_ret);
 void btrfs_release_path(struct btrfs_root *root, struct btrfs_path *p);
 struct btrfs_path *btrfs_alloc_path(void);
 void btrfs_free_path(struct btrfs_path *p);
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 120c448..3418bb6 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -396,6 +396,14 @@
 	return 0;
 }
 
+int btrfs_extent_post_op(struct btrfs_trans_handle *trans,
+			 struct btrfs_root *root)
+{
+	finish_current_insert(trans, root->fs_info->extent_root);
+	del_pending_extents(trans, root->fs_info->extent_root);
+	return 0;
+}
+
 static int lookup_extent_ref(struct btrfs_trans_handle *trans,
 			     struct btrfs_root *root, u64 blocknr,
 			     u64 num_blocks, u32 *refs)
@@ -1374,7 +1382,25 @@
 			BUG_ON(ret);
 			continue;
 		}
-		next = read_tree_block(root, blocknr);
+		next = btrfs_find_tree_block(root, blocknr);
+		if (!next || !buffer_uptodate(next)) {
+			brelse(next);
+			mutex_unlock(&root->fs_info->fs_mutex);
+			next = read_tree_block(root, blocknr);
+			mutex_lock(&root->fs_info->fs_mutex);
+
+			/* we dropped the lock, check one more time */
+			ret = lookup_extent_ref(trans, root, blocknr, 1, &refs);
+			BUG_ON(ret);
+			if (refs != 1) {
+				path->slots[*level]++;
+				brelse(next);
+				ret = btrfs_free_extent(trans, root,
+							blocknr, 1, 1);
+				BUG_ON(ret);
+				continue;
+			}
+		}
 		WARN_ON(*level <= 0);
 		if (path->nodes[*level-1])
 			btrfs_block_release(root, path->nodes[*level-1]);
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 12aa043..5c05ecb 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -2159,9 +2159,7 @@
 {
 	struct btrfs_root *root = BTRFS_I(inode)->root;
 	struct btrfs_ioctl_vol_args vol_args;
-	struct btrfs_trans_handle *trans;
 	int ret = 0;
-	int err;
 	struct btrfs_dir_item *di;
 	int namelen;
 	struct btrfs_path *path;
@@ -2201,25 +2199,8 @@
 
 	case BTRFS_IOC_DEFRAG:
 		mutex_lock(&root->fs_info->fs_mutex);
-		trans = btrfs_start_transaction(root, 1);
-		memset(&root->defrag_progress, 0,
-		       sizeof(root->defrag_progress));
-		while (1) {
-			root->defrag_running = 1;
-			err = btrfs_defrag_leaves(trans, root, 0);
-
-			btrfs_end_transaction(trans, root);
-			mutex_unlock(&root->fs_info->fs_mutex);
-
-			btrfs_btree_balance_dirty(root);
-
-			mutex_lock(&root->fs_info->fs_mutex);
-			trans = btrfs_start_transaction(root, 1);
-			if (err != -EAGAIN)
-				break;
-		}
-		root->defrag_running = 0;
-		btrfs_end_transaction(trans, root);
+		btrfs_defrag_root(root, 0);
+		btrfs_defrag_root(root->fs_info->extent_root, 0);
 		mutex_unlock(&root->fs_info->fs_mutex);
 		ret = 0;
 		break;
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index 2e797d5..74f3de4 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -121,7 +121,9 @@
 		filemap_flush(root->fs_info->btree_inode->i_mapping);
 		return 0;
 	}
+	btrfs_clean_old_snapshots(root);
 	mutex_lock(&root->fs_info->fs_mutex);
+	btrfs_defrag_dirty_roots(root->fs_info);
 	trans = btrfs_start_transaction(root, 1);
 	ret = btrfs_commit_transaction(trans, root);
 	sb->s_dirt = 0;
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index 204337c..c9d52dc 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -317,18 +317,47 @@
 	return err;
 }
 
+int btrfs_defrag_root(struct btrfs_root *root, int cacheonly)
+{
+	struct btrfs_fs_info *info = root->fs_info;
+	int ret;
+	struct btrfs_trans_handle *trans;
+
+	if (root->defrag_running)
+		return 0;
+
+	trans = btrfs_start_transaction(root, 1);
+	while (1) {
+		root->defrag_running = 1;
+		ret = btrfs_defrag_leaves(trans, root, cacheonly);
+		btrfs_end_transaction(trans, root);
+		mutex_unlock(&info->fs_mutex);
+
+		btrfs_btree_balance_dirty(root);
+		cond_resched();
+
+		mutex_lock(&info->fs_mutex);
+		trans = btrfs_start_transaction(root, 1);
+		if (ret != -EAGAIN)
+			break;
+	}
+	root->defrag_running = 0;
+	radix_tree_tag_clear(&info->fs_roots_radix,
+		     (unsigned long)root->root_key.objectid,
+		     BTRFS_ROOT_DEFRAG_TAG);
+	btrfs_end_transaction(trans, root);
+	return 0;
+}
+
 int btrfs_defrag_dirty_roots(struct btrfs_fs_info *info)
 {
 	struct btrfs_root *gang[1];
 	struct btrfs_root *root;
-	struct btrfs_root *tree_root = info->tree_root;
-	struct btrfs_trans_handle *trans;
 	int i;
 	int ret;
 	int err = 0;
 	u64 last = 0;
 
-	trans = btrfs_start_transaction(tree_root, 1);
 	while(1) {
 		ret = radix_tree_gang_lookup_tag(&info->fs_roots_radix,
 						 (void **)gang, last,
@@ -339,37 +368,10 @@
 		for (i = 0; i < ret; i++) {
 			root = gang[i];
 			last = root->root_key.objectid + 1;
-			radix_tree_tag_clear(&info->fs_roots_radix,
-				     (unsigned long)root->root_key.objectid,
-				     BTRFS_ROOT_DEFRAG_TAG);
-			if (root->defrag_running)
-				continue;
-
-			while (1) {
-				mutex_lock(&root->fs_info->trans_mutex);
-				record_root_in_trans(root);
-				mutex_unlock(&root->fs_info->trans_mutex);
-
-				root->defrag_running = 1;
-				err = btrfs_defrag_leaves(trans, root, 1);
-				btrfs_end_transaction(trans, tree_root);
-				mutex_unlock(&info->fs_mutex);
-
-				btrfs_btree_balance_dirty(root);
-				cond_resched();
-
-				mutex_lock(&info->fs_mutex);
-				trans = btrfs_start_transaction(tree_root, 1);
-				if (err != -EAGAIN)
-					break;
-			}
-			root->defrag_running = 0;
-			radix_tree_tag_clear(&info->fs_roots_radix,
-				     (unsigned long)root->root_key.objectid,
-				     BTRFS_ROOT_DEFRAG_TAG);
+			btrfs_defrag_root(root, 1);
 		}
 	}
-	btrfs_end_transaction(trans, tree_root);
+	btrfs_defrag_root(info->extent_root, 1);
 	return err;
 }
 
@@ -527,6 +529,20 @@
 	return ret;
 }
 
+int btrfs_clean_old_snapshots(struct btrfs_root *root)
+{
+	struct list_head dirty_roots;
+	INIT_LIST_HEAD(&dirty_roots);
+
+	mutex_lock(&root->fs_info->trans_mutex);
+	list_splice_init(&root->fs_info->dead_roots, &dirty_roots);
+	mutex_unlock(&root->fs_info->trans_mutex);
+
+	if (!list_empty(&dirty_roots)) {
+		drop_dirty_roots(root, &dirty_roots);
+	}
+	return 0;
+}
 void btrfs_transaction_cleaner(struct work_struct *work)
 {
 	struct btrfs_fs_info *fs_info = container_of(work,
@@ -536,12 +552,10 @@
 	struct btrfs_root *root = fs_info->tree_root;
 	struct btrfs_transaction *cur;
 	struct btrfs_trans_handle *trans;
-	struct list_head dirty_roots;
 	unsigned long now;
 	unsigned long delay = HZ * 30;
 	int ret;
 
-	INIT_LIST_HEAD(&dirty_roots);
 	mutex_lock(&root->fs_info->fs_mutex);
 	mutex_lock(&root->fs_info->trans_mutex);
 	cur = root->fs_info->running_transaction;
@@ -561,14 +575,7 @@
 	ret = btrfs_commit_transaction(trans, root);
 out:
 	mutex_unlock(&root->fs_info->fs_mutex);
-
-	mutex_lock(&root->fs_info->trans_mutex);
-	list_splice_init(&root->fs_info->dead_roots, &dirty_roots);
-	mutex_unlock(&root->fs_info->trans_mutex);
-
-	if (!list_empty(&dirty_roots)) {
-		drop_dirty_roots(root, &dirty_roots);
-	}
+	btrfs_clean_old_snapshots(root);
 	btrfs_transaction_queue_work(root, delay);
 }
 
diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h
index 65a395e..d5f491d 100644
--- a/fs/btrfs/transaction.h
+++ b/fs/btrfs/transaction.h
@@ -73,5 +73,7 @@
 void btrfs_exit_transaction_sys(void);
 int btrfs_add_dead_root(struct btrfs_root *root, struct list_head *dead_list);
 int btrfs_defrag_dirty_roots(struct btrfs_fs_info *info);
+int btrfs_defrag_root(struct btrfs_root *root, int cacheonly);
+int btrfs_clean_old_snapshots(struct btrfs_root *root);
 
 #endif
diff --git a/fs/btrfs/tree-defrag.c b/fs/btrfs/tree-defrag.c
index 7ea66b4..a09064a 100644
--- a/fs/btrfs/tree-defrag.c
+++ b/fs/btrfs/tree-defrag.c
@@ -42,16 +42,20 @@
 static int defrag_walk_down(struct btrfs_trans_handle *trans,
 			    struct btrfs_root *root,
 			    struct btrfs_path *path, int *level,
-			    int cache_only)
+			    int cache_only, u64 *last_ret)
 {
 	struct buffer_head *next;
 	struct buffer_head *cur;
 	u64 blocknr;
 	int ret = 0;
+	int is_extent = 0;
 
 	WARN_ON(*level < 0);
 	WARN_ON(*level >= BTRFS_MAX_LEVEL);
 
+	if (root->fs_info->extent_root == root)
+		is_extent = 1;
+
 	while(*level > 0) {
 		WARN_ON(*level < 0);
 		WARN_ON(*level >= BTRFS_MAX_LEVEL);
@@ -70,7 +74,10 @@
 		if (*level == 1) {
 			ret = btrfs_realloc_node(trans, root,
 						 path->nodes[*level],
-						 cache_only);
+						 cache_only, last_ret);
+			if (is_extent)
+				btrfs_extent_post_op(trans, root);
+
 			break;
 		}
 		blocknr = btrfs_node_blockptr(btrfs_buffer_node(cur),
@@ -90,8 +97,13 @@
 		ret = btrfs_cow_block(trans, root, next, path->nodes[*level],
 				      path->slots[*level], &next);
 		BUG_ON(ret);
-		ret = btrfs_realloc_node(trans, root, next, cache_only);
+		ret = btrfs_realloc_node(trans, root, next, cache_only,
+					 last_ret);
 		BUG_ON(ret);
+
+		if (is_extent)
+			btrfs_extent_post_op(trans, root);
+
 		WARN_ON(*level <= 0);
 		if (path->nodes[*level-1])
 			btrfs_block_release(root, path->nodes[*level-1]);
@@ -148,10 +160,14 @@
 	int level;
 	int orig_level;
 	int i;
+	int is_extent = 0;
+	u64 last_ret = 0;
 
-	if (root->ref_cows == 0) {
+	if (root->fs_info->extent_root == root)
+		is_extent = 1;
+
+	if (root->ref_cows == 0 && !is_extent)
 		goto out;
-	}
 	path = btrfs_alloc_path();
 	if (!path)
 		return -ENOMEM;
@@ -165,16 +181,21 @@
 		get_bh(root->node);
 		ret = btrfs_cow_block(trans, root, root->node, NULL, 0, &tmp);
 		BUG_ON(ret);
-		ret = btrfs_realloc_node(trans, root, root->node, cache_only);
+		ret = btrfs_realloc_node(trans, root, root->node, cache_only,
+					 &last_ret);
 		BUG_ON(ret);
 		path->nodes[level] = root->node;
 		path->slots[level] = 0;
+		if (is_extent)
+			btrfs_extent_post_op(trans, root);
 	} else {
 		level = root->defrag_level;
 		path->lowest_level = level;
 		wret = btrfs_search_slot(trans, root, &root->defrag_progress,
 					 path, 0, 1);
 
+		if (is_extent)
+			btrfs_extent_post_op(trans, root);
 		if (wret < 0) {
 			ret = wret;
 			goto out;
@@ -188,7 +209,8 @@
 	}
 
 	while(1) {
-		wret = defrag_walk_down(trans, root, path, &level, cache_only);
+		wret = defrag_walk_down(trans, root, path, &level, cache_only,
+					&last_ret);
 		if (wret > 0)
 			break;
 		if (wret < 0)