Btrfs: Implement basic support for -ENOSPC

This is intended to prevent accidentally filling the drive.  A determined
user can still make things oops.

It includes some accounting of the current bytes under delayed allocation,
but this will change as things get optimized

Signed-off-by: Chris Mason <chris.mason@oracle.com>
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 2559f5d..10129cc 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -346,6 +346,8 @@
 	int closing;
 
 	u64 total_pinned;
+	spinlock_t delalloc_lock;
+	u64 delalloc_bytes;
 };
 /*
  * in ram representation of the tree.  extent_root is used for all allocations
@@ -1115,6 +1117,8 @@
 			struct btrfs_root *root, struct btrfs_path *path,
 			u64 isize);
 /* inode.c */
+int btrfs_check_free_space(struct btrfs_root *root, u64 num_required,
+			   int for_del);
 int btrfs_page_mkwrite(struct vm_area_struct *vma, struct page *page);
 int btrfs_readpage(struct file *file, struct page *page);
 void btrfs_delete_inode(struct inode *inode);
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index ebb2db6..eebb4fb 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -223,7 +223,8 @@
 		} else {
 			thresh = 8 * 1024 * 1024;
 		}
-		num_dirty = count_range_bits(tree, &start, thresh, EXTENT_DIRTY);
+		num_dirty = count_range_bits(tree, &start, (u64)-1,
+					     thresh, EXTENT_DIRTY);
 		if (num_dirty < thresh) {
 			return 0;
 		}
@@ -559,6 +560,7 @@
 	INIT_LIST_HEAD(&fs_info->dead_roots);
 	INIT_LIST_HEAD(&fs_info->hashers);
 	spin_lock_init(&fs_info->hash_lock);
+	spin_lock_init(&fs_info->delalloc_lock);
 
 	memset(&fs_info->super_kobj, 0, sizeof(fs_info->super_kobj));
 	init_completion(&fs_info->kobj_unregister);
@@ -570,6 +572,7 @@
 	fs_info->sb = sb;
 	fs_info->mount_opt = 0;
 	fs_info->max_extent = (u64)-1;
+	fs_info->delalloc_bytes = 0;
 	fs_info->btree_inode = new_inode(sb);
 	fs_info->btree_inode->i_ino = 1;
 	fs_info->btree_inode->i_nlink = 1;
diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c
index a0dff34..2b92f10 100644
--- a/fs/btrfs/extent_map.c
+++ b/fs/btrfs/extent_map.c
@@ -1131,7 +1131,8 @@
 }
 
 u64 count_range_bits(struct extent_map_tree *tree,
-		     u64 *start, u64 max_bytes, unsigned long bits)
+		     u64 *start, u64 search_end, u64 max_bytes,
+		     unsigned long bits)
 {
 	struct rb_node *node;
 	struct extent_state *state;
@@ -1139,9 +1140,14 @@
 	u64 total_bytes = 0;
 	int found = 0;
 
+	if (search_end <= cur_start) {
+		printk("search_end %Lu start %Lu\n", search_end, cur_start);
+		WARN_ON(1);
+		return 0;
+	}
+
 	write_lock_irq(&tree->lock);
-	if (bits == EXTENT_DIRTY) {
-		*start = 0;
+	if (cur_start == 0 && bits == EXTENT_DIRTY) {
 		total_bytes = tree->dirty_bytes;
 		goto out;
 	}
@@ -1156,8 +1162,11 @@
 
 	while(1) {
 		state = rb_entry(node, struct extent_state, rb_node);
-		if ((state->state & bits)) {
-			total_bytes += state->end - state->start + 1;
+		if (state->start > search_end)
+			break;
+		if (state->end >= cur_start && (state->state & bits)) {
+			total_bytes += min(search_end, state->end) + 1 -
+				       max(cur_start, state->start);
 			if (total_bytes >= max_bytes)
 				break;
 			if (!found) {
@@ -1173,7 +1182,6 @@
 	write_unlock_irq(&tree->lock);
 	return total_bytes;
 }
-
 /*
  * helper function to lock both pages and extents in the tree.
  * pages must be locked first.
diff --git a/fs/btrfs/extent_map.h b/fs/btrfs/extent_map.h
index 6e572d3..ea60f54 100644
--- a/fs/btrfs/extent_map.h
+++ b/fs/btrfs/extent_map.h
@@ -115,7 +115,8 @@
 void extent_map_exit(void);
 
 u64 count_range_bits(struct extent_map_tree *tree,
-		     u64 *start, u64 max_bytes, unsigned long bits);
+		     u64 *start, u64 search_end,
+		     u64 max_bytes, unsigned long bits);
 
 int test_range_bit(struct extent_map_tree *tree, u64 start, u64 end,
 		   int bits, int filled);
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 461b096..71dc2d3 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -307,6 +307,7 @@
 	    inline_size > 32768 ||
 	    inline_size >= BTRFS_MAX_INLINE_DATA_SIZE(root)) {
 		u64 last_end;
+		u64 existing_delalloc = 0;
 
 		for (i = 0; i < num_pages; i++) {
 			struct page *p = pages[i];
@@ -316,8 +317,19 @@
 		last_end = (u64)(pages[num_pages -1]->index) <<
 				PAGE_CACHE_SHIFT;
 		last_end += PAGE_CACHE_SIZE - 1;
+		if (start_pos < isize) {
+			u64 delalloc_start = start_pos;
+			existing_delalloc = count_range_bits(em_tree,
+					     &delalloc_start,
+					     end_of_last_block, (u64)-1,
+					     EXTENT_DELALLOC);
+		}
 		set_extent_delalloc(em_tree, start_pos, end_of_last_block,
 				 GFP_NOFS);
+		spin_lock(&root->fs_info->delalloc_lock);
+		root->fs_info->delalloc_bytes += (end_of_last_block + 1 -
+					  start_pos) - existing_delalloc;
+		spin_unlock(&root->fs_info->delalloc_lock);
 	} else {
 		u64 aligned_end;
 		/* step one, delete the existing extents in this range */
@@ -708,12 +720,12 @@
 	current->backing_dev_info = inode->i_mapping->backing_dev_info;
 	err = generic_write_checks(file, &pos, &count, S_ISBLK(inode->i_mode));
 	if (err)
-		goto out;
+		goto out_nolock;
 	if (count == 0)
-		goto out;
+		goto out_nolock;
 	err = remove_suid(fdentry(file));
 	if (err)
-		goto out;
+		goto out_nolock;
 	file_update_time(file);
 
 	pages = kmalloc(nrptrs * sizeof(struct page *), GFP_KERNEL);
@@ -758,6 +770,13 @@
 
 		WARN_ON(num_pages > nrptrs);
 		memset(pages, 0, sizeof(pages));
+
+		mutex_lock(&root->fs_info->fs_mutex);
+		ret = btrfs_check_free_space(root, write_bytes, 0);
+		mutex_unlock(&root->fs_info->fs_mutex);
+		if (ret)
+			goto out;
+
 		ret = prepare_pages(root, file, pages, num_pages,
 				    pos, first_index, last_index,
 				    write_bytes);
@@ -787,8 +806,9 @@
 			btrfs_btree_balance_dirty(root, 1);
 		cond_resched();
 	}
-	mutex_unlock(&inode->i_mutex);
 out:
+	mutex_unlock(&inode->i_mutex);
+out_nolock:
 	kfree(pages);
 	if (pinned[0])
 		page_cache_release(pinned[0]);
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 1c61c85..a9f5d6d 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -72,6 +72,26 @@
 	[S_IFLNK >> S_SHIFT]	= BTRFS_FT_SYMLINK,
 };
 
+int btrfs_check_free_space(struct btrfs_root *root, u64 num_required,
+			   int for_del)
+{
+	u64 total = btrfs_super_total_bytes(&root->fs_info->super_copy);
+	u64 used = btrfs_super_bytes_used(&root->fs_info->super_copy);
+	u64 thresh;
+	int ret = 0;
+
+	if (for_del)
+		thresh = (total * 90) / 100;
+	else
+		thresh = (total * 85) / 100;
+
+	spin_lock(&root->fs_info->delalloc_lock);
+	if (used + root->fs_info->delalloc_bytes + num_required > thresh)
+		ret = -ENOSPC;
+	spin_unlock(&root->fs_info->delalloc_lock);
+	return ret;
+}
+
 static int cow_file_range(struct inode *inode, u64 start, u64 end)
 {
 	struct btrfs_root *root = BTRFS_I(inode)->root;
@@ -124,6 +144,7 @@
 	u64 extent_end;
 	u64 bytenr;
 	u64 cow_end;
+	u64 loops = 0;
 	struct btrfs_root *root = BTRFS_I(inode)->root;
 	struct extent_buffer *leaf;
 	int found_type;
@@ -169,6 +190,9 @@
 		       btrfs_file_extent_num_bytes(leaf, item);
 		err = 0;
 
+		if (loops && start != extent_start)
+			goto not_found;
+
 		if (start < extent_start || start >= extent_end)
 			goto not_found;
 
@@ -191,6 +215,7 @@
 		return 0;
 	}
 	btrfs_release_path(root, path);
+	loops++;
 	goto again;
 
 not_found:
@@ -202,6 +227,7 @@
 static int run_delalloc_range(struct inode *inode, u64 start, u64 end)
 {
 	struct btrfs_root *root = BTRFS_I(inode)->root;
+	u64 num_bytes;
 	int ret;
 
 	mutex_lock(&root->fs_info->fs_mutex);
@@ -209,6 +235,17 @@
 		ret = run_delalloc_nocow(inode, start, end);
 	else
 		ret = cow_file_range(inode, start, end);
+
+	spin_lock(&root->fs_info->delalloc_lock);
+	num_bytes = end + 1 - start;
+	if (root->fs_info->delalloc_bytes < num_bytes) {
+		printk("delalloc accounting error total %llu sub %llu\n",
+		       root->fs_info->delalloc_bytes, num_bytes);
+	} else {
+		root->fs_info->delalloc_bytes -= num_bytes;
+	}
+	spin_unlock(&root->fs_info->delalloc_lock);
+
 	mutex_unlock(&root->fs_info->fs_mutex);
 	return ret;
 }
@@ -547,10 +584,15 @@
 	struct btrfs_root *root;
 	struct btrfs_trans_handle *trans;
 	int ret;
-	unsigned long nr;
+	unsigned long nr = 0;
 
 	root = BTRFS_I(dir)->root;
 	mutex_lock(&root->fs_info->fs_mutex);
+
+	ret = btrfs_check_free_space(root, 1, 1);
+	if (ret)
+		goto fail;
+
 	trans = btrfs_start_transaction(root, 1);
 
 	btrfs_set_trans_block_group(trans, dir);
@@ -558,25 +600,29 @@
 	nr = trans->blocks_used;
 
 	btrfs_end_transaction(trans, root);
+fail:
 	mutex_unlock(&root->fs_info->fs_mutex);
 	btrfs_btree_balance_dirty(root, nr);
-
 	return ret;
 }
 
 static int btrfs_rmdir(struct inode *dir, struct dentry *dentry)
 {
 	struct inode *inode = dentry->d_inode;
-	int err;
+	int err = 0;
 	int ret;
 	struct btrfs_root *root = BTRFS_I(dir)->root;
 	struct btrfs_trans_handle *trans;
-	unsigned long nr;
+	unsigned long nr = 0;
 
 	if (inode->i_size > BTRFS_EMPTY_DIR_SIZE)
 		return -ENOTEMPTY;
 
 	mutex_lock(&root->fs_info->fs_mutex);
+	ret = btrfs_check_free_space(root, 1, 1);
+	if (ret)
+		goto fail;
+
 	trans = btrfs_start_transaction(root, 1);
 	btrfs_set_trans_block_group(trans, dir);
 
@@ -588,6 +634,7 @@
 
 	nr = trans->blocks_used;
 	ret = btrfs_end_transaction(trans, root);
+fail:
 	mutex_unlock(&root->fs_info->fs_mutex);
 	btrfs_btree_balance_dirty(root, nr);
 
@@ -792,17 +839,29 @@
 			      size_t zero_start)
 {
 	char *kaddr;
-	int ret = 0;
 	struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
+	struct btrfs_root *root = BTRFS_I(inode)->root;
 	u64 page_start = (u64)page->index << PAGE_CACHE_SHIFT;
 	u64 page_end = page_start + PAGE_CACHE_SIZE - 1;
+	u64 existing_delalloc;
+	u64 delalloc_start;
+	int ret = 0;
 
 	WARN_ON(!PageLocked(page));
 	set_page_extent_mapped(page);
 
 	lock_extent(em_tree, page_start, page_end, GFP_NOFS);
+	delalloc_start = page_start;
+	existing_delalloc = count_range_bits(&BTRFS_I(inode)->extent_tree,
+					     &delalloc_start, page_end,
+					     PAGE_CACHE_SIZE, EXTENT_DELALLOC);
 	set_extent_delalloc(&BTRFS_I(inode)->extent_tree, page_start,
 			    page_end, GFP_NOFS);
+
+	spin_lock(&root->fs_info->delalloc_lock);
+	root->fs_info->delalloc_bytes += PAGE_CACHE_SIZE - existing_delalloc;
+	spin_unlock(&root->fs_info->delalloc_lock);
+
 	if (zero_start != PAGE_CACHE_SIZE) {
 		kaddr = kmap(page);
 		memset(kaddr + zero_start, 0, PAGE_CACHE_SIZE - zero_start);
@@ -881,6 +940,12 @@
 		if (attr->ia_size <= pos)
 			goto out;
 
+		mutex_lock(&root->fs_info->fs_mutex);
+		err = btrfs_check_free_space(root, 1, 0);
+		mutex_unlock(&root->fs_info->fs_mutex);
+		if (err)
+			goto fail;
+
 		btrfs_truncate_page(inode->i_mapping, inode->i_size);
 
 		lock_extent(em_tree, pos, block_end, GFP_NOFS);
@@ -906,7 +971,7 @@
 	}
 out:
 	err = inode_setattr(inode, attr);
-
+fail:
 	return err;
 }
 void btrfs_delete_inode(struct inode *inode)
@@ -1440,16 +1505,20 @@
 {
 	struct btrfs_trans_handle *trans;
 	struct btrfs_root *root = BTRFS_I(dir)->root;
-	struct inode *inode;
+	struct inode *inode = NULL;
 	int err;
 	int drop_inode = 0;
 	u64 objectid;
-	unsigned long nr;
+	unsigned long nr = 0;
 
 	if (!new_valid_dev(rdev))
 		return -EINVAL;
 
 	mutex_lock(&root->fs_info->fs_mutex);
+	err = btrfs_check_free_space(root, 1, 0);
+	if (err)
+		goto fail;
+
 	trans = btrfs_start_transaction(root, 1);
 	btrfs_set_trans_block_group(trans, dir);
 
@@ -1480,6 +1549,7 @@
 out_unlock:
 	nr = trans->blocks_used;
 	btrfs_end_transaction(trans, root);
+fail:
 	mutex_unlock(&root->fs_info->fs_mutex);
 
 	if (drop_inode) {
@@ -1495,13 +1565,16 @@
 {
 	struct btrfs_trans_handle *trans;
 	struct btrfs_root *root = BTRFS_I(dir)->root;
-	struct inode *inode;
+	struct inode *inode = NULL;
 	int err;
 	int drop_inode = 0;
-	unsigned long nr;
+	unsigned long nr = 0;
 	u64 objectid;
 
 	mutex_lock(&root->fs_info->fs_mutex);
+	err = btrfs_check_free_space(root, 1, 0);
+	if (err)
+		goto fail;
 	trans = btrfs_start_transaction(root, 1);
 	btrfs_set_trans_block_group(trans, dir);
 
@@ -1535,6 +1608,7 @@
 out_unlock:
 	nr = trans->blocks_used;
 	btrfs_end_transaction(trans, root);
+fail:
 	mutex_unlock(&root->fs_info->fs_mutex);
 
 	if (drop_inode) {
@@ -1551,7 +1625,7 @@
 	struct btrfs_trans_handle *trans;
 	struct btrfs_root *root = BTRFS_I(dir)->root;
 	struct inode *inode = old_dentry->d_inode;
-	unsigned long nr;
+	unsigned long nr = 0;
 	int err;
 	int drop_inode = 0;
 
@@ -1564,6 +1638,9 @@
 	inc_nlink(inode);
 #endif
 	mutex_lock(&root->fs_info->fs_mutex);
+	err = btrfs_check_free_space(root, 1, 0);
+	if (err)
+		goto fail;
 	trans = btrfs_start_transaction(root, 1);
 
 	btrfs_set_trans_block_group(trans, dir);
@@ -1582,6 +1659,7 @@
 
 	nr = trans->blocks_used;
 	btrfs_end_transaction(trans, root);
+fail:
 	mutex_unlock(&root->fs_info->fs_mutex);
 
 	if (drop_inode) {
@@ -1603,6 +1681,10 @@
 	unsigned long nr = 1;
 
 	mutex_lock(&root->fs_info->fs_mutex);
+	err = btrfs_check_free_space(root, 1, 0);
+	if (err)
+		goto out_unlock;
+
 	trans = btrfs_start_transaction(root, 1);
 	btrfs_set_trans_block_group(trans, dir);
 
@@ -1869,6 +1951,15 @@
 static int btrfs_prepare_write(struct file *file, struct page *page,
 			       unsigned from, unsigned to)
 {
+	struct btrfs_root *root = BTRFS_I(page->mapping->host)->root;
+	int err;
+
+	mutex_lock(&root->fs_info->fs_mutex);
+	err = btrfs_check_free_space(root, PAGE_CACHE_SIZE, 0);
+	mutex_lock(&root->fs_info->fs_mutex);
+	if (err)
+		return -ENOSPC;
+
 	return extent_prepare_write(&BTRFS_I(page->mapping->host)->extent_tree,
 				    page->mapping->host, page, from, to,
 				    btrfs_get_extent);
@@ -1880,6 +1971,7 @@
 	tree = &BTRFS_I(page->mapping->host)->extent_tree;
 	return extent_read_full_page(tree, page, btrfs_get_extent);
 }
+
 static int btrfs_writepage(struct page *page, struct writeback_control *wbc)
 {
 	struct extent_map_tree *tree;
@@ -1954,11 +2046,20 @@
 int btrfs_page_mkwrite(struct vm_area_struct *vma, struct page *page)
 {
 	struct inode *inode = fdentry(vma->vm_file)->d_inode;
+	struct btrfs_root *root = BTRFS_I(inode)->root;
 	unsigned long end;
 	loff_t size;
-	int ret = -EINVAL;
+	int ret;
 	u64 page_start;
 
+	mutex_lock(&root->fs_info->fs_mutex);
+	ret = btrfs_check_free_space(root, PAGE_CACHE_SIZE, 0);
+	mutex_lock(&root->fs_info->fs_mutex);
+	if (ret)
+		goto out;
+
+	ret = -EINVAL;
+
 	down_read(&BTRFS_I(inode)->root->snap_sem);
 	lock_page(page);
 	wait_on_page_writeback(page);
@@ -1982,6 +2083,7 @@
 out_unlock:
 	up_read(&BTRFS_I(inode)->root->snap_sem);
 	unlock_page(page);
+out:
 	return ret;
 }
 
@@ -2046,6 +2148,10 @@
 	unsigned long nr = 1;
 
 	mutex_lock(&root->fs_info->fs_mutex);
+	ret = btrfs_check_free_space(root, 1, 0);
+	if (ret)
+		goto fail_commit;
+
 	trans = btrfs_start_transaction(root, 1);
 	BUG_ON(!trans);
 
@@ -2162,7 +2268,7 @@
 	int ret;
 	int err;
 	u64 objectid;
-	unsigned long nr;
+	unsigned long nr = 0;
 
 	if (!root->ref_cows)
 		return -EINVAL;
@@ -2172,6 +2278,10 @@
 	thaw_bdev(root->fs_info->sb->s_bdev, root->fs_info->sb);
 
 	mutex_lock(&root->fs_info->fs_mutex);
+	ret = btrfs_check_free_space(root, 1, 0);
+	if (ret)
+		goto fail_unlock;
+
 	trans = btrfs_start_transaction(root, 1);
 	BUG_ON(!trans);
 
@@ -2229,7 +2339,7 @@
 
 	if (err && !ret)
 		ret = err;
-
+fail_unlock:
 	mutex_unlock(&root->fs_info->fs_mutex);
 	up_write(&root->snap_sem);
 	btrfs_btree_balance_dirty(root, nr);
@@ -2255,6 +2365,7 @@
 
 int btrfs_defrag_file(struct file *file) {
 	struct inode *inode = fdentry(file)->d_inode;
+	struct btrfs_root *root = BTRFS_I(inode)->root;
 	struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
 	struct page *page;
 	unsigned long last_index;
@@ -2262,6 +2373,13 @@
 	u64 page_start;
 	u64 page_end;
 	unsigned long i;
+	int ret;
+
+	mutex_lock(&root->fs_info->fs_mutex);
+	ret = btrfs_check_free_space(root, inode->i_size, 0);
+	mutex_unlock(&root->fs_info->fs_mutex);
+	if (ret)
+		return -ENOSPC;
 
 	mutex_lock(&inode->i_mutex);
 	last_index = inode->i_size >> PAGE_CACHE_SHIFT;
@@ -2522,6 +2640,10 @@
 	}
 
 	mutex_lock(&root->fs_info->fs_mutex);
+	ret = btrfs_check_free_space(root, 1, 0);
+	if (ret)
+		goto out_unlock;
+
 	trans = btrfs_start_transaction(root, 1);
 
 	btrfs_set_trans_block_group(trans, new_dir);
@@ -2553,6 +2675,7 @@
 out_fail:
 	btrfs_free_path(path);
 	btrfs_end_transaction(trans, root);
+out_unlock:
 	mutex_unlock(&root->fs_info->fs_mutex);
 	return ret;
 }
@@ -2564,7 +2687,7 @@
 	struct btrfs_root *root = BTRFS_I(dir)->root;
 	struct btrfs_path *path;
 	struct btrfs_key key;
-	struct inode *inode;
+	struct inode *inode = NULL;
 	int err;
 	int drop_inode = 0;
 	u64 objectid;
@@ -2573,12 +2696,17 @@
 	unsigned long ptr;
 	struct btrfs_file_extent_item *ei;
 	struct extent_buffer *leaf;
-	unsigned long nr;
+	unsigned long nr = 0;
 
 	name_len = strlen(symname) + 1;
 	if (name_len > BTRFS_MAX_INLINE_DATA_SIZE(root))
 		return -ENAMETOOLONG;
+
 	mutex_lock(&root->fs_info->fs_mutex);
+	err = btrfs_check_free_space(root, 1, 0);
+	if (err)
+		goto out_fail;
+
 	trans = btrfs_start_transaction(root, 1);
 	btrfs_set_trans_block_group(trans, dir);
 
@@ -2645,6 +2773,7 @@
 out_unlock:
 	nr = trans->blocks_used;
 	btrfs_end_transaction(trans, root);
+out_fail:
 	mutex_unlock(&root->fs_info->fs_mutex);
 	if (drop_inode) {
 		inode_dec_link_count(inode);