Btrfs: extent_map and data=ordered fixes for space balancing
* Add an EXTENT_BOUNDARY state bit to keep the writepage code
from merging data extents that are in the process of being
relocated. This allows us to do accounting for them properly.
* The balancing code relocates data extents indepdent of the underlying
inode. The extent_map code was modified to properly account for
things moving around (invalidating extent_map caches in the inode).
* Don't take the drop_mutex in the create_subvol ioctl. It isn't
required.
* Fix walking of the ordered extent list to avoid races with sys_unlink
* Change the lock ordering rules. Transaction start goes outside
the drop_mutex. This allows btrfs_commit_transaction to directly
drop the relocation trees.
Signed-off-by: Chris Mason <chris.mason@oracle.com>
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 8856570..1b7e51a 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -294,7 +294,7 @@
last_pos_in_file,
0, 0, hole_size, 0);
btrfs_drop_extent_cache(inode, last_pos_in_file,
- last_pos_in_file + hole_size -1);
+ last_pos_in_file + hole_size - 1, 0);
mutex_unlock(&BTRFS_I(inode)->extent_mutex);
btrfs_check_file(root, inode);
}
@@ -337,7 +337,7 @@
inline_size -= start_pos;
err = insert_inline_extent(trans, root, inode, start_pos,
inline_size, pages, 0, num_pages);
- btrfs_drop_extent_cache(inode, start_pos, aligned_end - 1);
+ btrfs_drop_extent_cache(inode, start_pos, aligned_end - 1, 0);
BUG_ON(err);
mutex_unlock(&BTRFS_I(inode)->extent_mutex);
@@ -362,7 +362,8 @@
return err;
}
-int noinline btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end)
+int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end,
+ int skip_pinned)
{
struct extent_map *em;
struct extent_map *split = NULL;
@@ -371,6 +372,7 @@
u64 len = end - start + 1;
int ret;
int testend = 1;
+ unsigned long flags;
WARN_ON(end < start);
if (end == (u64)-1) {
@@ -389,6 +391,23 @@
spin_unlock(&em_tree->lock);
break;
}
+ flags = em->flags;
+ if (skip_pinned && test_bit(EXTENT_FLAG_PINNED, &em->flags)) {
+ spin_unlock(&em_tree->lock);
+ if (em->start <= start &&
+ (!testend || em->start + em->len >= start + len)) {
+ free_extent_map(em);
+ break;
+ }
+ if (start < em->start) {
+ len = em->start - start;
+ } else {
+ len = start + len - (em->start + em->len);
+ start = em->start + em->len;
+ }
+ free_extent_map(em);
+ continue;
+ }
clear_bit(EXTENT_FLAG_PINNED, &em->flags);
remove_extent_mapping(em_tree, em);
@@ -398,7 +417,7 @@
split->len = start - em->start;
split->block_start = em->block_start;
split->bdev = em->bdev;
- split->flags = em->flags;
+ split->flags = flags;
ret = add_extent_mapping(em_tree, split);
BUG_ON(ret);
free_extent_map(split);
@@ -412,7 +431,7 @@
split->start = start + len;
split->len = em->start + em->len - (start + len);
split->bdev = em->bdev;
- split->flags = em->flags;
+ split->flags = flags;
split->block_start = em->block_start + diff;
@@ -541,7 +560,7 @@
int recow;
int ret;
- btrfs_drop_extent_cache(inode, start, end - 1);
+ btrfs_drop_extent_cache(inode, start, end - 1, 0);
path = btrfs_alloc_path();
if (!path)