Btrfs: tree logging checksum fixes
This patch contains following things.
1) Limit the max size of btrfs_ordered_sum structure to PAGE_SIZE. This
struct is kmalloced so we want to keep it reasonable.
2) Replace copy_extent_csums by btrfs_lookup_csums_range. This was
duplicated code in tree-log.c
3) Remove replay_one_csum. csum items are replayed at the same time as
replaying file extents. This guarantees we only replay useful csums.
4) nbytes accounting fix.
Signed-off-by: Yan Zheng <zheng.yan@oracle.com>
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 171ca30..293da65 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -5579,7 +5579,7 @@
BUG_ON(ordered->file_offset != file_pos || ordered->len != len);
disk_bytenr = file_pos + BTRFS_I(inode)->index_cnt;
- ret = btrfs_lookup_csums_range(root, disk_bytenr,
+ ret = btrfs_lookup_csums_range(root->fs_info->csum_root, disk_bytenr,
disk_bytenr + len - 1, &list);
while (!list_empty(&list)) {
diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c
index b11abfa..9646524 100644
--- a/fs/btrfs/file-item.c
+++ b/fs/btrfs/file-item.c
@@ -27,6 +27,12 @@
#define MAX_CSUM_ITEMS(r, size) ((((BTRFS_LEAF_DATA_SIZE(r) - \
sizeof(struct btrfs_item) * 2) / \
size) - 1))
+
+#define MAX_ORDERED_SUM_BYTES(r) ((PAGE_SIZE - \
+ sizeof(struct btrfs_ordered_sum)) / \
+ sizeof(struct btrfs_sector_sum) * \
+ (r)->sectorsize - (r)->sectorsize)
+
int btrfs_insert_file_extent(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
u64 objectid, u64 pos,
@@ -259,8 +265,7 @@
key.offset = start;
key.type = BTRFS_EXTENT_CSUM_KEY;
- ret = btrfs_search_slot(NULL, root->fs_info->csum_root,
- &key, path, 0, 0);
+ ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
if (ret < 0)
goto fail;
if (ret > 0 && path->slots[0] > 0) {
@@ -279,7 +284,7 @@
while (start <= end) {
leaf = path->nodes[0];
if (path->slots[0] >= btrfs_header_nritems(leaf)) {
- ret = btrfs_next_leaf(root->fs_info->csum_root, path);
+ ret = btrfs_next_leaf(root, path);
if (ret < 0)
goto fail;
if (ret > 0)
@@ -306,33 +311,38 @@
continue;
}
- size = min(csum_end, end + 1) - start;
- sums = kzalloc(btrfs_ordered_sum_size(root, size), GFP_NOFS);
- BUG_ON(!sums);
-
- sector_sum = sums->sums;
- sums->bytenr = start;
- sums->len = size;
-
- offset = (start - key.offset) >>
- root->fs_info->sb->s_blocksize_bits;
- offset *= csum_size;
-
+ csum_end = min(csum_end, end + 1);
item = btrfs_item_ptr(path->nodes[0], path->slots[0],
struct btrfs_csum_item);
- while (size > 0) {
- read_extent_buffer(path->nodes[0], §or_sum->sum,
- ((unsigned long)item) + offset,
- csum_size);
- sector_sum->bytenr = start;
+ while (start < csum_end) {
+ size = min_t(size_t, csum_end - start,
+ MAX_ORDERED_SUM_BYTES(root));
+ sums = kzalloc(btrfs_ordered_sum_size(root, size),
+ GFP_NOFS);
+ BUG_ON(!sums);
- size -= root->sectorsize;
- start += root->sectorsize;
- offset += csum_size;
- sector_sum++;
+ sector_sum = sums->sums;
+ sums->bytenr = start;
+ sums->len = size;
+
+ offset = (start - key.offset) >>
+ root->fs_info->sb->s_blocksize_bits;
+ offset *= csum_size;
+
+ while (size > 0) {
+ read_extent_buffer(path->nodes[0],
+ §or_sum->sum,
+ ((unsigned long)item) +
+ offset, csum_size);
+ sector_sum->bytenr = start;
+
+ size -= root->sectorsize;
+ start += root->sectorsize;
+ offset += csum_size;
+ sector_sum++;
+ }
+ list_add_tail(&sums->list, list);
}
- list_add_tail(&sums->list, list);
-
path->slots[0]++;
}
ret = 0;
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index c0ca9c3..4e57fe6 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -157,7 +157,6 @@
key.objectid = inode->i_ino;
key.offset = start;
btrfs_set_key_type(&key, BTRFS_EXTENT_DATA_KEY);
- inode_add_bytes(inode, size);
datasize = btrfs_file_extent_calc_inline_size(cur_size);
inode_add_bytes(inode, size);
@@ -920,8 +919,8 @@
struct btrfs_ordered_sum *sums;
LIST_HEAD(list);
- ret = btrfs_lookup_csums_range(root, bytenr, bytenr + num_bytes - 1,
- &list);
+ ret = btrfs_lookup_csums_range(root->fs_info->csum_root, bytenr,
+ bytenr + num_bytes - 1, &list);
if (ret == 0 && list_empty(&list))
return 0;
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index 3a72a1b..332ec35 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -433,49 +433,6 @@
trans->transid);
}
}
-
- if (overwrite_root &&
- key->type == BTRFS_EXTENT_DATA_KEY) {
- int extent_type;
- struct btrfs_file_extent_item *fi;
-
- fi = (struct btrfs_file_extent_item *)dst_ptr;
- extent_type = btrfs_file_extent_type(path->nodes[0], fi);
- if (extent_type == BTRFS_FILE_EXTENT_REG ||
- extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
- struct btrfs_key ins;
- ins.objectid = btrfs_file_extent_disk_bytenr(
- path->nodes[0], fi);
- ins.offset = btrfs_file_extent_disk_num_bytes(
- path->nodes[0], fi);
- ins.type = BTRFS_EXTENT_ITEM_KEY;
-
- /*
- * is this extent already allocated in the extent
- * allocation tree? If so, just add a reference
- */
- ret = btrfs_lookup_extent(root, ins.objectid,
- ins.offset);
- if (ret == 0) {
- ret = btrfs_inc_extent_ref(trans, root,
- ins.objectid, ins.offset,
- path->nodes[0]->start,
- root->root_key.objectid,
- trans->transid, key->objectid);
- } else {
- /*
- * insert the extent pointer in the extent
- * allocation tree
- */
- ret = btrfs_alloc_logged_extent(trans, root,
- path->nodes[0]->start,
- root->root_key.objectid,
- trans->transid, key->objectid,
- &ins);
- BUG_ON(ret);
- }
- }
- }
no_copy:
btrfs_mark_buffer_dirty(path->nodes[0]);
btrfs_release_path(root, path);
@@ -530,6 +487,7 @@
u64 extent_end;
u64 alloc_hint;
u64 start = key->offset;
+ u64 saved_nbytes;
struct btrfs_file_extent_item *item;
struct inode *inode = NULL;
unsigned long size;
@@ -591,17 +549,95 @@
}
btrfs_release_path(root, path);
+ saved_nbytes = inode_get_bytes(inode);
/* drop any overlapping extents */
ret = btrfs_drop_extents(trans, root, inode,
start, extent_end, start, &alloc_hint);
BUG_ON(ret);
- /* insert the extent */
- ret = overwrite_item(trans, root, path, eb, slot, key);
- BUG_ON(ret);
+ if (found_type == BTRFS_FILE_EXTENT_REG ||
+ found_type == BTRFS_FILE_EXTENT_PREALLOC) {
+ unsigned long dest_offset;
+ struct btrfs_key ins;
- /* btrfs_drop_extents changes i_bytes & i_blocks, update it here */
- inode_add_bytes(inode, extent_end - start);
+ ret = btrfs_insert_empty_item(trans, root, path, key,
+ sizeof(*item));
+ BUG_ON(ret);
+ dest_offset = btrfs_item_ptr_offset(path->nodes[0],
+ path->slots[0]);
+ copy_extent_buffer(path->nodes[0], eb, dest_offset,
+ (unsigned long)item, sizeof(*item));
+
+ ins.objectid = btrfs_file_extent_disk_bytenr(eb, item);
+ ins.offset = btrfs_file_extent_disk_num_bytes(eb, item);
+ ins.type = BTRFS_EXTENT_ITEM_KEY;
+
+ if (ins.objectid > 0) {
+ u64 csum_start;
+ u64 csum_end;
+ LIST_HEAD(ordered_sums);
+ /*
+ * is this extent already allocated in the extent
+ * allocation tree? If so, just add a reference
+ */
+ ret = btrfs_lookup_extent(root, ins.objectid,
+ ins.offset);
+ if (ret == 0) {
+ ret = btrfs_inc_extent_ref(trans, root,
+ ins.objectid, ins.offset,
+ path->nodes[0]->start,
+ root->root_key.objectid,
+ trans->transid, key->objectid);
+ } else {
+ /*
+ * insert the extent pointer in the extent
+ * allocation tree
+ */
+ ret = btrfs_alloc_logged_extent(trans, root,
+ path->nodes[0]->start,
+ root->root_key.objectid,
+ trans->transid, key->objectid,
+ &ins);
+ BUG_ON(ret);
+ }
+ btrfs_release_path(root, path);
+
+ if (btrfs_file_extent_compression(eb, item)) {
+ csum_start = ins.objectid;
+ csum_end = csum_start + ins.offset;
+ } else {
+ csum_start = ins.objectid +
+ btrfs_file_extent_offset(eb, item);
+ csum_end = csum_start +
+ btrfs_file_extent_num_bytes(eb, item);
+ }
+
+ ret = btrfs_lookup_csums_range(root->log_root,
+ csum_start, csum_end - 1,
+ &ordered_sums);
+ BUG_ON(ret);
+ while (!list_empty(&ordered_sums)) {
+ struct btrfs_ordered_sum *sums;
+ sums = list_entry(ordered_sums.next,
+ struct btrfs_ordered_sum,
+ list);
+ ret = btrfs_csum_file_blocks(trans,
+ root->fs_info->csum_root,
+ sums);
+ BUG_ON(ret);
+ list_del(&sums->list);
+ kfree(sums);
+ }
+ } else {
+ btrfs_release_path(root, path);
+ }
+ } else if (found_type == BTRFS_FILE_EXTENT_INLINE) {
+ /* inline extents are easy, we just overwrite them */
+ ret = overwrite_item(trans, root, path, eb, slot, key);
+ BUG_ON(ret);
+ }
+
+ inode_set_bytes(inode, saved_nbytes);
btrfs_update_inode(trans, root, inode);
out:
if (inode)
@@ -903,70 +939,6 @@
}
/*
- * replay one csum item from the log tree into the subvolume 'root'
- * eb, slot and key all refer to the log tree
- * path is for temp use by this function and should be released on return
- *
- * This copies the checksums out of the log tree and inserts them into
- * the subvolume. Any existing checksums for this range in the file
- * are overwritten, and new items are added where required.
- *
- * We keep this simple by reusing the btrfs_ordered_sum code from
- * the data=ordered mode. This basically means making a copy
- * of all the checksums in ram, which we have to do anyway for kmap
- * rules.
- *
- * The copy is then sent down to btrfs_csum_file_blocks, which
- * does all the hard work of finding existing items in the file
- * or adding new ones.
- */
-static noinline int replay_one_csum(struct btrfs_trans_handle *trans,
- struct btrfs_root *root,
- struct btrfs_path *path,
- struct extent_buffer *eb, int slot,
- struct btrfs_key *key)
-{
- int ret;
- u32 item_size = btrfs_item_size_nr(eb, slot);
- u64 cur_offset;
- u16 csum_size =
- btrfs_super_csum_size(&root->fs_info->super_copy);
- unsigned long file_bytes;
- struct btrfs_ordered_sum *sums;
- struct btrfs_sector_sum *sector_sum;
- unsigned long ptr;
-
- file_bytes = (item_size / csum_size) * root->sectorsize;
- sums = kzalloc(btrfs_ordered_sum_size(root, file_bytes), GFP_NOFS);
- if (!sums)
- return -ENOMEM;
-
- INIT_LIST_HEAD(&sums->list);
- sums->len = file_bytes;
- sums->bytenr = key->offset;
-
- /*
- * copy all the sums into the ordered sum struct
- */
- sector_sum = sums->sums;
- cur_offset = key->offset;
- ptr = btrfs_item_ptr_offset(eb, slot);
- while (item_size > 0) {
- sector_sum->bytenr = cur_offset;
- read_extent_buffer(eb, §or_sum->sum, ptr, csum_size);
- sector_sum++;
- item_size -= csum_size;
- ptr += csum_size;
- cur_offset += root->sectorsize;
- }
-
- /* let btrfs_csum_file_blocks add them into the file */
- ret = btrfs_csum_file_blocks(trans, root->fs_info->csum_root, sums);
- BUG_ON(ret);
- kfree(sums);
- return 0;
-}
-/*
* There are a few corners where the link count of the file can't
* be properly maintained during replay. So, instead of adding
* lots of complexity to the log code, we just scan the backrefs
@@ -1659,10 +1631,6 @@
ret = replay_one_extent(wc->trans, root, path,
eb, i, &key);
BUG_ON(ret);
- } else if (key.type == BTRFS_EXTENT_CSUM_KEY) {
- ret = replay_one_csum(wc->trans, root, path,
- eb, i, &key);
- BUG_ON(ret);
} else if (key.type == BTRFS_DIR_ITEM_KEY ||
key.type == BTRFS_DIR_INDEX_KEY) {
ret = replay_one_dir_item(wc->trans, root, path,
@@ -2021,7 +1989,7 @@
.process_func = process_one_buffer
};
- if (!root->log_root)
+ if (!root->log_root || root->fs_info->log_root_recovering)
return 0;
log = root->log_root;
@@ -2453,86 +2421,6 @@
return 0;
}
-static noinline int copy_extent_csums(struct btrfs_trans_handle *trans,
- struct list_head *list,
- struct btrfs_root *root,
- u64 disk_bytenr, u64 len)
-{
- struct btrfs_ordered_sum *sums;
- struct btrfs_sector_sum *sector_sum;
- int ret;
- struct btrfs_path *path;
- struct btrfs_csum_item *item = NULL;
- u64 end = disk_bytenr + len;
- u64 item_start_offset = 0;
- u64 item_last_offset = 0;
- u32 diff;
- u32 sum;
- u16 csum_size = btrfs_super_csum_size(&root->fs_info->super_copy);
-
- sums = kzalloc(btrfs_ordered_sum_size(root, len), GFP_NOFS);
-
- sector_sum = sums->sums;
- sums->bytenr = disk_bytenr;
- sums->len = len;
- list_add_tail(&sums->list, list);
-
- path = btrfs_alloc_path();
- while (disk_bytenr < end) {
- if (!item || disk_bytenr < item_start_offset ||
- disk_bytenr >= item_last_offset) {
- struct btrfs_key found_key;
- u32 item_size;
-
- if (item)
- btrfs_release_path(root, path);
- item = btrfs_lookup_csum(NULL, root, path,
- disk_bytenr, 0);
- if (IS_ERR(item)) {
- ret = PTR_ERR(item);
- if (ret == -ENOENT || ret == -EFBIG)
- ret = 0;
- sum = 0;
- printk(KERN_INFO "log no csum found for "
- "byte %llu\n",
- (unsigned long long)disk_bytenr);
- item = NULL;
- btrfs_release_path(root, path);
- goto found;
- }
- btrfs_item_key_to_cpu(path->nodes[0], &found_key,
- path->slots[0]);
-
- item_start_offset = found_key.offset;
- item_size = btrfs_item_size_nr(path->nodes[0],
- path->slots[0]);
- item_last_offset = item_start_offset +
- (item_size / csum_size) *
- root->sectorsize;
- item = btrfs_item_ptr(path->nodes[0], path->slots[0],
- struct btrfs_csum_item);
- }
- /*
- * this byte range must be able to fit inside
- * a single leaf so it will also fit inside a u32
- */
- diff = disk_bytenr - item_start_offset;
- diff = diff / root->sectorsize;
- diff = diff * csum_size;
-
- read_extent_buffer(path->nodes[0], &sum,
- ((unsigned long)item) + diff,
- csum_size);
-found:
- sector_sum->bytenr = disk_bytenr;
- sector_sum->sum = sum;
- disk_bytenr += root->sectorsize;
- sector_sum++;
- }
- btrfs_free_path(path);
- return 0;
-}
-
static noinline int copy_items(struct btrfs_trans_handle *trans,
struct btrfs_root *log,
struct btrfs_path *dst_path,
@@ -2622,10 +2510,10 @@
trans->transid,
ins_keys[i].objectid);
BUG_ON(ret);
- ret = copy_extent_csums(trans,
- &ordered_sums,
- log->fs_info->csum_root,
- ds + cs, cl);
+ ret = btrfs_lookup_csums_range(
+ log->fs_info->csum_root,
+ ds + cs, ds + cs + cl - 1,
+ &ordered_sums);
BUG_ON(ret);
}
}
@@ -2942,9 +2830,9 @@
tmp_key.offset = (u64)-1;
wc.replay_dest = btrfs_read_fs_root_no_name(fs_info, &tmp_key);
-
BUG_ON(!wc.replay_dest);
+ wc.replay_dest->log_root = log;
btrfs_record_root_in_trans(wc.replay_dest);
ret = walk_log_tree(trans, log, &wc);
BUG_ON(ret);
@@ -2961,6 +2849,7 @@
}
key.offset = found_key.offset - 1;
+ wc.replay_dest->log_root = NULL;
free_extent_buffer(log->node);
kfree(log);