From 406266ab9ac8ed8b085c58aacd9e3161480dc5d5 Mon Sep 17 00:00:00 2001 From: Jan Engelhardt Date: Wed, 9 Dec 2009 22:00:38 +0000 Subject: btrfs: fix missing last-entry in readdir(3) parent 49313cdac7b34c9f7ecbb1780cfc648b1c082cd7 (v2.6.32-1-g49313cd) commit ff48c08e1c05c67e8348ab6f8a24de8034e0e34d Author: Jan Engelhardt Date: Wed Dec 9 22:57:36 2009 +0100 Btrfs: fix missing last-entry in readdir(3) When one does a 32-bit readdir(3), the last entry of a directory is missing. This is however not due to passing a large value to filldir, but it seems to have to do with glibc doing telldir or something quirky. In any case, this patch fixes it in practice. Signed-off-by: Jan Engelhardt Signed-off-by: Chris Mason --- fs/btrfs/inode.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'fs/btrfs') diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 5440bab2363..d5aa9731094 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -3995,7 +3995,11 @@ skip: /* Reached end of directory/root. Bump pos past the last item. */ if (key_type == BTRFS_DIR_INDEX_KEY) - filp->f_pos = INT_LIMIT(off_t); + /* + * 32-bit glibc will use getdents64, but then strtol - + * so the last number we can serve is this. + */ + filp->f_pos = 0x7fffffff; else filp->f_pos++; nopos: -- cgit v1.2.3 From a038fab0cb873c75d6675e2bcffce8a3935bdce7 Mon Sep 17 00:00:00 2001 From: "Yan, Zheng" Date: Mon, 28 Dec 2009 05:01:58 +0000 Subject: Btrfs: align offsets for btrfs_ordered_update_i_size Some callers of btrfs_ordered_update_i_size can now pass in a NULL for the ordered extent to update against. This makes sure we properly align the offset they pass in when deciding how much to bump the on disk i_size. Signed-off-by: Chris Mason --- fs/btrfs/ordered-data.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'fs/btrfs') diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c index b10a49d4bc6..5c2a9e78a94 100644 --- a/fs/btrfs/ordered-data.c +++ b/fs/btrfs/ordered-data.c @@ -626,6 +626,8 @@ int btrfs_ordered_update_i_size(struct inode *inode, u64 offset, if (ordered) offset = entry_end(ordered); + else + offset = ALIGN(offset, BTRFS_I(inode)->root->sectorsize); mutex_lock(&tree->mutex); disk_i_size = BTRFS_I(inode)->disk_i_size; -- cgit v1.2.3 From 2423fdfb96e3f9ff3baeb6c4c78d74145547891d Mon Sep 17 00:00:00 2001 From: Jiri Slaby Date: Wed, 6 Jan 2010 16:57:22 +0000 Subject: Btrfs, fix memory leaks in error paths Stanse found 2 memory leaks in relocate_block_group and __btrfs_map_block. cluster and multi are not freed/assigned on all paths. Fix that. Signed-off-by: Jiri Slaby Cc: linux-btrfs@vger.kernel.org Signed-off-by: Chris Mason --- fs/btrfs/relocation.c | 4 +++- fs/btrfs/volumes.c | 4 +++- 2 files changed, 6 insertions(+), 2 deletions(-) (limited to 'fs/btrfs') diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c index a9728680eca..ed3e4a2ec2c 100644 --- a/fs/btrfs/relocation.c +++ b/fs/btrfs/relocation.c @@ -3281,8 +3281,10 @@ static noinline_for_stack int relocate_block_group(struct reloc_control *rc) return -ENOMEM; path = btrfs_alloc_path(); - if (!path) + if (!path) { + kfree(cluster); return -ENOMEM; + } rc->extents_found = 0; rc->extents_skipped = 0; diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 198cff28766..220dad5db01 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -2649,8 +2649,10 @@ again: em = lookup_extent_mapping(em_tree, logical, *length); read_unlock(&em_tree->lock); - if (!em && unplug_page) + if (!em && unplug_page) { + kfree(multi); return 0; + } if (!em) { printk(KERN_CRIT "unable to find logical %llu len %llu\n", -- cgit v1.2.3 From 6c7d54ac87f338c479d9729e8392eca3f76e11e1 Mon Sep 17 00:00:00 2001 From: "Yan, Zheng" Date: Fri, 15 Jan 2010 08:43:09 +0000 Subject: Btrfs: Fix race in btrfs_mark_extent_written Fix bug reported by Johannes Hirte. The reason of that bug is btrfs_del_items is called after btrfs_duplicate_item and btrfs_del_items triggers tree balance. The fix is check that case and call btrfs_search_slot when needed. Signed-off-by: Yan Zheng Signed-off-by: Chris Mason --- fs/btrfs/file.c | 100 ++++++++++++++++++++++++++++++++++++++++++++------------ 1 file changed, 80 insertions(+), 20 deletions(-) (limited to 'fs/btrfs') diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 3bfe9f03990..ae96fdae1f7 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -506,7 +506,8 @@ next_slot: } static int extent_mergeable(struct extent_buffer *leaf, int slot, - u64 objectid, u64 bytenr, u64 *start, u64 *end) + u64 objectid, u64 bytenr, u64 orig_offset, + u64 *start, u64 *end) { struct btrfs_file_extent_item *fi; struct btrfs_key key; @@ -522,6 +523,7 @@ static int extent_mergeable(struct extent_buffer *leaf, int slot, fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item); if (btrfs_file_extent_type(leaf, fi) != BTRFS_FILE_EXTENT_REG || btrfs_file_extent_disk_bytenr(leaf, fi) != bytenr || + btrfs_file_extent_offset(leaf, fi) != key.offset - orig_offset || btrfs_file_extent_compression(leaf, fi) || btrfs_file_extent_encryption(leaf, fi) || btrfs_file_extent_other_encoding(leaf, fi)) @@ -561,6 +563,7 @@ int btrfs_mark_extent_written(struct btrfs_trans_handle *trans, u64 split; int del_nr = 0; int del_slot = 0; + int recow; int ret; btrfs_drop_extent_cache(inode, start, end - 1, 0); @@ -568,6 +571,7 @@ int btrfs_mark_extent_written(struct btrfs_trans_handle *trans, path = btrfs_alloc_path(); BUG_ON(!path); again: + recow = 0; split = start; key.objectid = inode->i_ino; key.type = BTRFS_EXTENT_DATA_KEY; @@ -591,12 +595,60 @@ again: bytenr = btrfs_file_extent_disk_bytenr(leaf, fi); num_bytes = btrfs_file_extent_disk_num_bytes(leaf, fi); orig_offset = key.offset - btrfs_file_extent_offset(leaf, fi); + memcpy(&new_key, &key, sizeof(new_key)); + + if (start == key.offset && end < extent_end) { + other_start = 0; + other_end = start; + if (extent_mergeable(leaf, path->slots[0] - 1, + inode->i_ino, bytenr, orig_offset, + &other_start, &other_end)) { + new_key.offset = end; + btrfs_set_item_key_safe(trans, root, path, &new_key); + fi = btrfs_item_ptr(leaf, path->slots[0], + struct btrfs_file_extent_item); + btrfs_set_file_extent_num_bytes(leaf, fi, + extent_end - end); + btrfs_set_file_extent_offset(leaf, fi, + end - orig_offset); + fi = btrfs_item_ptr(leaf, path->slots[0] - 1, + struct btrfs_file_extent_item); + btrfs_set_file_extent_num_bytes(leaf, fi, + end - other_start); + btrfs_mark_buffer_dirty(leaf); + goto out; + } + } + + if (start > key.offset && end == extent_end) { + other_start = end; + other_end = 0; + if (extent_mergeable(leaf, path->slots[0] + 1, + inode->i_ino, bytenr, orig_offset, + &other_start, &other_end)) { + fi = btrfs_item_ptr(leaf, path->slots[0], + struct btrfs_file_extent_item); + btrfs_set_file_extent_num_bytes(leaf, fi, + start - key.offset); + path->slots[0]++; + new_key.offset = start; + btrfs_set_item_key_safe(trans, root, path, &new_key); + + fi = btrfs_item_ptr(leaf, path->slots[0], + struct btrfs_file_extent_item); + btrfs_set_file_extent_num_bytes(leaf, fi, + other_end - start); + btrfs_set_file_extent_offset(leaf, fi, + start - orig_offset); + btrfs_mark_buffer_dirty(leaf); + goto out; + } + } while (start > key.offset || end < extent_end) { if (key.offset == start) split = end; - memcpy(&new_key, &key, sizeof(new_key)); new_key.offset = split; ret = btrfs_duplicate_item(trans, root, path, &new_key); if (ret == -EAGAIN) { @@ -631,15 +683,18 @@ again: path->slots[0]--; extent_end = end; } + recow = 1; } - fi = btrfs_item_ptr(leaf, path->slots[0], - struct btrfs_file_extent_item); - other_start = end; other_end = 0; - if (extent_mergeable(leaf, path->slots[0] + 1, inode->i_ino, - bytenr, &other_start, &other_end)) { + if (extent_mergeable(leaf, path->slots[0] + 1, + inode->i_ino, bytenr, orig_offset, + &other_start, &other_end)) { + if (recow) { + btrfs_release_path(root, path); + goto again; + } extent_end = other_end; del_slot = path->slots[0] + 1; del_nr++; @@ -650,8 +705,13 @@ again: } other_start = 0; other_end = start; - if (extent_mergeable(leaf, path->slots[0] - 1, inode->i_ino, - bytenr, &other_start, &other_end)) { + if (extent_mergeable(leaf, path->slots[0] - 1, + inode->i_ino, bytenr, orig_offset, + &other_start, &other_end)) { + if (recow) { + btrfs_release_path(root, path); + goto again; + } key.offset = other_start; del_slot = path->slots[0]; del_nr++; @@ -660,22 +720,22 @@ again: inode->i_ino, orig_offset); BUG_ON(ret); } + fi = btrfs_item_ptr(leaf, path->slots[0], + struct btrfs_file_extent_item); if (del_nr == 0) { btrfs_set_file_extent_type(leaf, fi, BTRFS_FILE_EXTENT_REG); btrfs_mark_buffer_dirty(leaf); - goto out; - } - - fi = btrfs_item_ptr(leaf, del_slot - 1, - struct btrfs_file_extent_item); - btrfs_set_file_extent_type(leaf, fi, BTRFS_FILE_EXTENT_REG); - btrfs_set_file_extent_num_bytes(leaf, fi, - extent_end - key.offset); - btrfs_mark_buffer_dirty(leaf); + } else { + btrfs_set_file_extent_type(leaf, fi, + BTRFS_FILE_EXTENT_REG); + btrfs_set_file_extent_num_bytes(leaf, fi, + extent_end - key.offset); + btrfs_mark_buffer_dirty(leaf); - ret = btrfs_del_items(trans, root, path, del_slot, del_nr); - BUG_ON(ret); + ret = btrfs_del_items(trans, root, path, del_slot, del_nr); + BUG_ON(ret); + } out: btrfs_free_path(path); return 0; -- cgit v1.2.3 From 6c090a11e1c403b727a6a8eff0b97d5fb9e95cb5 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Fri, 15 Jan 2010 20:08:22 +0000 Subject: Btrfs: fix regression in orphan cleanup Currently orphan cleanup only ever gets triggered if we cross subvolumes during a lookup, which means that if we just mount a plain jane fs that has orphans in it, they will never get cleaned up. This results in panic's like these http://www.kerneloops.org/oops.php?number=1109085 where adding an orphan entry results in -EEXIST being returned and we panic. In order to fix this, we check to see on lookup if our root has had the orphan cleanup done, and if not go ahead and do it. This is easily reproduceable by running this testcase #include #include #include #include #include #include int main(int argc, char **argv) { char data[4096]; char newdata[4096]; int fd1, fd2; memset(data, 'a', 4096); memset(newdata, 'b', 4096); while (1) { int i; fd1 = creat("file1", 0666); if (fd1 < 0) break; for (i = 0; i < 512; i++) write(fd1, data, 4096); fsync(fd1); close(fd1); fd2 = creat("file2", 0666); if (fd2 < 0) break; ftruncate(fd2, 4096 * 512); for (i = 0; i < 512; i++) write(fd2, newdata, 4096); close(fd2); i = rename("file2", "file1"); unlink("file1"); } return 0; } and then pulling the power on the box, and then trying to run that test again when the box comes back up. I've tested this locally and it fixes the problem. Thanks to Tomas Carnecky for helping me track this down initially. Signed-off-by: Josef Bacik Signed-off-by: Chris Mason --- fs/btrfs/inode.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'fs/btrfs') diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index d5aa9731094..b330e27c2d8 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -3796,6 +3796,12 @@ struct inode *btrfs_lookup_dentry(struct inode *dir, struct dentry *dentry) if (location.type == BTRFS_INODE_ITEM_KEY) { inode = btrfs_iget(dir->i_sb, &location, root); + if (unlikely(root->clean_orphans) && + !(inode->i_sb->s_flags & MS_RDONLY)) { + down_read(&root->fs_info->cleanup_work_sem); + btrfs_orphan_cleanup(root); + up_read(&root->fs_info->cleanup_work_sem); + } return inode; } -- cgit v1.2.3 From a9cc71a60c29a09174bee2fcef8f924c529fd4b7 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Sun, 17 Jan 2010 20:36:18 -0500 Subject: Btrfs: deal with NULL acl sent to btrfs_set_acl It is legal for btrfs_set_acl to be sent a NULL acl. This makes sure we don't dereference it. A similar patch was sent by Johannes Hirte Signed-off-by: Chris Mason --- fs/btrfs/acl.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) (limited to 'fs/btrfs') diff --git a/fs/btrfs/acl.c b/fs/btrfs/acl.c index 1898f8555f0..fa44e92e9b8 100644 --- a/fs/btrfs/acl.c +++ b/fs/btrfs/acl.c @@ -112,12 +112,14 @@ static int btrfs_set_acl(struct btrfs_trans_handle *trans, switch (type) { case ACL_TYPE_ACCESS: mode = inode->i_mode; - ret = posix_acl_equiv_mode(acl, &mode); - if (ret < 0) - return ret; - ret = 0; - inode->i_mode = mode; name = POSIX_ACL_XATTR_ACCESS; + if (acl) { + ret = posix_acl_equiv_mode(acl, &mode); + if (ret < 0) + return ret; + inode->i_mode = mode; + } + ret = 0; break; case ACL_TYPE_DEFAULT: if (!S_ISDIR(inode->i_mode)) -- cgit v1.2.3 From 11dfe35a0108097f2df1f042c485fa7f758c2cdf Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Fri, 13 Nov 2009 20:12:59 +0000 Subject: Btrfs: fix possible panic on unmount We can race with the unmount of an fs and the stopping of a kthread where we will free the block group before we're done using it. The reason for this is because we do not hold a reference on the block group while its caching, since the allocator drops its reference once it exits or moves on to the next block group. This patch fixes the problem by taking a reference to the block group before we start caching and dropping it when we're done to make sure all accesses to the block group are safe. Thanks, Signed-off-by: Josef Bacik Signed-off-by: Chris Mason --- fs/btrfs/extent-tree.c | 32 +++++++++++++++++++------------- 1 file changed, 19 insertions(+), 13 deletions(-) (limited to 'fs/btrfs') diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 56e50137d0e..432a2da4641 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -83,6 +83,17 @@ static int block_group_bits(struct btrfs_block_group_cache *cache, u64 bits) return (cache->flags & bits) == bits; } +void btrfs_get_block_group(struct btrfs_block_group_cache *cache) +{ + atomic_inc(&cache->count); +} + +void btrfs_put_block_group(struct btrfs_block_group_cache *cache) +{ + if (atomic_dec_and_test(&cache->count)) + kfree(cache); +} + /* * this adds the block group to the fs_info rb tree for the block group * cache @@ -156,7 +167,7 @@ block_group_cache_tree_search(struct btrfs_fs_info *info, u64 bytenr, } } if (ret) - atomic_inc(&ret->count); + btrfs_get_block_group(ret); spin_unlock(&info->block_group_cache_lock); return ret; @@ -407,6 +418,8 @@ err: put_caching_control(caching_ctl); atomic_dec(&block_group->space_info->caching_threads); + btrfs_put_block_group(block_group); + return 0; } @@ -447,6 +460,7 @@ static int cache_block_group(struct btrfs_block_group_cache *cache) up_write(&fs_info->extent_commit_sem); atomic_inc(&cache->space_info->caching_threads); + btrfs_get_block_group(cache); tsk = kthread_run(caching_kthread, cache, "btrfs-cache-%llu\n", cache->key.objectid); @@ -486,12 +500,6 @@ struct btrfs_block_group_cache *btrfs_lookup_block_group( return cache; } -void btrfs_put_block_group(struct btrfs_block_group_cache *cache) -{ - if (atomic_dec_and_test(&cache->count)) - kfree(cache); -} - static struct btrfs_space_info *__find_space_info(struct btrfs_fs_info *info, u64 flags) { @@ -2582,7 +2590,7 @@ next_block_group(struct btrfs_root *root, if (node) { cache = rb_entry(node, struct btrfs_block_group_cache, cache_node); - atomic_inc(&cache->count); + btrfs_get_block_group(cache); } else cache = NULL; spin_unlock(&root->fs_info->block_group_cache_lock); @@ -4227,7 +4235,7 @@ search: u64 offset; int cached; - atomic_inc(&block_group->count); + btrfs_get_block_group(block_group); search_start = block_group->key.objectid; have_block_group: @@ -4315,7 +4323,7 @@ have_block_group: btrfs_put_block_group(block_group); block_group = last_ptr->block_group; - atomic_inc(&block_group->count); + btrfs_get_block_group(block_group); spin_unlock(&last_ptr->lock); spin_unlock(&last_ptr->refill_lock); @@ -7395,9 +7403,7 @@ int btrfs_free_block_groups(struct btrfs_fs_info *info) wait_block_group_cache_done(block_group); btrfs_remove_free_space_cache(block_group); - - WARN_ON(atomic_read(&block_group->count) != 1); - kfree(block_group); + btrfs_put_block_group(block_group); spin_lock(&info->block_group_cache_lock); } -- cgit v1.2.3 From a555f810af6d63ea5960abaed88e150ad95c3011 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Thu, 28 Jan 2010 16:18:15 -0500 Subject: Btrfs: Add mount -o compress-force The default btrfs mount -o compress mode will quickly back off compressing a file if it notices that compression does not reduce the size of the data being written. This can save considerable CPU because all future writes to the file go through uncompressed. But some files are both very large and have mixed data stored in them. In that case, we want to add the ability to always try compressing data before writing it. This commit adds mount -o compress-force. A later commit will add a new inode flag that does the same thing. Signed-off-by: Chris Mason --- fs/btrfs/ctree.h | 1 + fs/btrfs/inode.c | 3 ++- fs/btrfs/super.c | 9 ++++++++- 3 files changed, 11 insertions(+), 2 deletions(-) (limited to 'fs/btrfs') diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 9f806dd04c2..2aa8ec6a098 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -1161,6 +1161,7 @@ struct btrfs_root { #define BTRFS_MOUNT_SSD_SPREAD (1 << 8) #define BTRFS_MOUNT_NOSSD (1 << 9) #define BTRFS_MOUNT_DISCARD (1 << 10) +#define BTRFS_MOUNT_FORCE_COMPRESS (1 << 11) #define btrfs_clear_opt(o, opt) ((o) &= ~BTRFS_MOUNT_##opt) #define btrfs_set_opt(o, opt) ((o) |= BTRFS_MOUNT_##opt) diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index b330e27c2d8..f46c5727684 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -483,7 +483,8 @@ again: nr_pages_ret = 0; /* flag the file so we don't compress in the future */ - BTRFS_I(inode)->flags |= BTRFS_INODE_NOCOMPRESS; + if (!btrfs_test_opt(root, FORCE_COMPRESS)) + BTRFS_I(inode)->flags |= BTRFS_INODE_NOCOMPRESS; } if (will_compress) { *num_added += 1; diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 3f9b45704fc..8a1ea6e6457 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -66,7 +66,8 @@ enum { Opt_degraded, Opt_subvol, Opt_device, Opt_nodatasum, Opt_nodatacow, Opt_max_extent, Opt_max_inline, Opt_alloc_start, Opt_nobarrier, Opt_ssd, Opt_nossd, Opt_ssd_spread, Opt_thread_pool, Opt_noacl, - Opt_compress, Opt_notreelog, Opt_ratio, Opt_flushoncommit, + Opt_compress, Opt_compress_force, Opt_notreelog, Opt_ratio, + Opt_flushoncommit, Opt_discard, Opt_err, }; @@ -82,6 +83,7 @@ static match_table_t tokens = { {Opt_alloc_start, "alloc_start=%s"}, {Opt_thread_pool, "thread_pool=%d"}, {Opt_compress, "compress"}, + {Opt_compress_force, "compress-force"}, {Opt_ssd, "ssd"}, {Opt_ssd_spread, "ssd_spread"}, {Opt_nossd, "nossd"}, @@ -173,6 +175,11 @@ int btrfs_parse_options(struct btrfs_root *root, char *options) printk(KERN_INFO "btrfs: use compression\n"); btrfs_set_opt(info->mount_opt, COMPRESS); break; + case Opt_compress_force: + printk(KERN_INFO "btrfs: forcing compression\n"); + btrfs_set_opt(info->mount_opt, FORCE_COMPRESS); + btrfs_set_opt(info->mount_opt, COMPRESS); + break; case Opt_ssd: printk(KERN_INFO "btrfs: use ssd allocation scheme\n"); btrfs_set_opt(info->mount_opt, SSD); -- cgit v1.2.3 From b8d9bfeb18f9af794020d96e9bee984d18a8d737 Mon Sep 17 00:00:00 2001 From: Miao Xie Date: Tue, 15 Dec 2009 06:54:17 +0000 Subject: Btrfs: remove tree_search() in extent_map.c This patch removes tree_search() in extent_map.c because it is not called by anything. Signed-off-by: Miao Xie Signed-off-by: Chris Mason --- fs/btrfs/extent_map.c | 14 -------------- 1 file changed, 14 deletions(-) (limited to 'fs/btrfs') diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c index ccbdcb54ec5..5a4f73b79b7 100644 --- a/fs/btrfs/extent_map.c +++ b/fs/btrfs/extent_map.c @@ -155,20 +155,6 @@ static struct rb_node *__tree_search(struct rb_root *root, u64 offset, return NULL; } -/* - * look for an offset in the tree, and if it can't be found, return - * the first offset we can find smaller than 'offset'. - */ -static inline struct rb_node *tree_search(struct rb_root *root, u64 offset) -{ - struct rb_node *prev; - struct rb_node *ret; - ret = __tree_search(root, offset, &prev, NULL); - if (!ret) - return prev; - return ret; -} - /* check to see if two extent_map structs are adjacent and safe to merge */ static int mergable_maps(struct extent_map *prev, struct extent_map *next) { -- cgit v1.2.3 From d1ea6a61454e7d7ff0873d0ad1ae27d5807da0d3 Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Wed, 20 Jan 2010 07:28:54 +0000 Subject: Btrfs: Use correct values when updating inode i_size on fallocate commit f2bc9dd07e3424c4ec5f3949961fe053d47bc825 Author: Aneesh Kumar K.V Date: Wed Jan 20 12:57:53 2010 +0530 Btrfs: Use correct values when updating inode i_size on fallocate Even though we allocate more, we should be updating inode i_size as per the arguments passed Signed-off-by: Aneesh Kumar K.V Signed-off-by: Chris Mason --- fs/btrfs/inode.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) (limited to 'fs/btrfs') diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index f46c5727684..5606361b5f0 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -5800,7 +5800,7 @@ out_fail: } static int prealloc_file_range(struct inode *inode, u64 start, u64 end, - u64 alloc_hint, int mode) + u64 alloc_hint, int mode, loff_t actual_len) { struct btrfs_trans_handle *trans; struct btrfs_root *root = BTRFS_I(inode)->root; @@ -5809,6 +5809,7 @@ static int prealloc_file_range(struct inode *inode, u64 start, u64 end, u64 cur_offset = start; u64 num_bytes = end - start; int ret = 0; + u64 i_size; while (num_bytes > 0) { alloc_size = min(num_bytes, root->fs_info->max_extent); @@ -5847,8 +5848,12 @@ static int prealloc_file_range(struct inode *inode, u64 start, u64 end, BTRFS_I(inode)->flags |= BTRFS_INODE_PREALLOC; if (!(mode & FALLOC_FL_KEEP_SIZE) && cur_offset > inode->i_size) { - i_size_write(inode, cur_offset); - btrfs_ordered_update_i_size(inode, cur_offset, NULL); + if (cur_offset > actual_len) + i_size = actual_len; + else + i_size = cur_offset; + i_size_write(inode, i_size); + btrfs_ordered_update_i_size(inode, i_size, NULL); } ret = btrfs_update_inode(trans, root, inode); @@ -5941,7 +5946,7 @@ static long btrfs_fallocate(struct inode *inode, int mode, !test_bit(EXTENT_FLAG_PREALLOC, &em->flags))) { ret = prealloc_file_range(inode, cur_offset, last_byte, - alloc_hint, mode); + alloc_hint, mode, offset+len); if (ret < 0) { free_extent_map(em); break; -- cgit v1.2.3 From f858153c367a397235d3e81136741e40e44faf1d Mon Sep 17 00:00:00 2001 From: Yang Hongyang Date: Tue, 26 Jan 2010 00:48:23 +0000 Subject: Btrfs: fix a memory leak in btrfs_init_acl In btrfs_init_acl() cloned acl is not released Signed-off-by: Yang Hongyang Signed-off-by: Chris Mason --- fs/btrfs/acl.c | 1 + 1 file changed, 1 insertion(+) (limited to 'fs/btrfs') diff --git a/fs/btrfs/acl.c b/fs/btrfs/acl.c index fa44e92e9b8..da3133c6983 100644 --- a/fs/btrfs/acl.c +++ b/fs/btrfs/acl.c @@ -269,6 +269,7 @@ int btrfs_init_acl(struct btrfs_trans_handle *trans, ACL_TYPE_ACCESS); } } + posix_acl_release(clone); } failed: posix_acl_release(acl); -- cgit v1.2.3 From e3acc2a6850efff647f1c5458524eb3a8bcba20a Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Tue, 26 Jan 2010 14:30:53 +0000 Subject: Btrfs: run orphan cleanup on default fs root This patch revert's commit 6c090a11e1c403b727a6a8eff0b97d5fb9e95cb5 Since it introduces this problem where we can run orphan cleanup on a volume that can have orphan entries re-added. Instead of my original fix, Yan Zheng pointed out that we can just revert my original fix and then run the orphan cleanup in open_ctree after we look up the fs_root. I have tested this with all the tests that gave me problems and this patch fixes both problems. Thanks, Signed-off-by: Josef Bacik Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 6 ++++++ fs/btrfs/inode.c | 6 ------ 2 files changed, 6 insertions(+), 6 deletions(-) (limited to 'fs/btrfs') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 009e3bd18f2..87b25543d7d 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -1993,6 +1993,12 @@ struct btrfs_root *open_ctree(struct super_block *sb, if (!fs_info->fs_root) goto fail_trans_kthread; + if (!(sb->s_flags & MS_RDONLY)) { + down_read(&fs_info->cleanup_work_sem); + btrfs_orphan_cleanup(fs_info->fs_root); + up_read(&fs_info->cleanup_work_sem); + } + return tree_root; fail_trans_kthread: diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 5606361b5f0..8cd109972fa 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -3797,12 +3797,6 @@ struct inode *btrfs_lookup_dentry(struct inode *dir, struct dentry *dentry) if (location.type == BTRFS_INODE_ITEM_KEY) { inode = btrfs_iget(dir->i_sb, &location, root); - if (unlikely(root->clean_orphans) && - !(inode->i_sb->s_flags & MS_RDONLY)) { - down_read(&root->fs_info->cleanup_work_sem); - btrfs_orphan_cleanup(root); - up_read(&root->fs_info->cleanup_work_sem); - } return inode; } -- cgit v1.2.3 From f48b90756bd834dda852ff514f2690d3175b1f44 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Wed, 27 Jan 2010 02:07:59 +0000 Subject: Btrfs: do not mark the chunk as readonly if in degraded mode If a RAID setup has chunks that span multiple disks, and one of those disks has failed, btrfs_chunk_readonly will return 1 since one of the disks in that chunk's stripes is dead and therefore not writeable. So instead if we are in degraded mode, return 0 so we can go ahead and allocate stuff. Without this patch all of the block groups in a RAID1 setup will end up read-only, which will mean we can't add new disks to the array since we won't be able to make allocations. Signed-off-by: Josef Bacik Signed-off-by: Chris Mason --- fs/btrfs/volumes.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'fs/btrfs') diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 220dad5db01..66122bdf8bb 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -2538,6 +2538,11 @@ int btrfs_chunk_readonly(struct btrfs_root *root, u64 chunk_offset) if (!em) return 1; + if (btrfs_test_opt(root, DEGRADED)) { + free_extent_map(em); + return 0; + } + map = (struct map_lookup *)em->bdev; for (i = 0; i < map->num_stripes; i++) { if (!map->stripes[i].dev->writeable) { -- cgit v1.2.3 From 7f59203abeaf18bf3497b308891f95a4489810ad Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Wed, 27 Jan 2010 02:09:00 +0000 Subject: Btrfs: check return value of open_bdev_exclusive properly Hit this problem while testing RAID1 failure stuff. open_bdev_exclusive returns ERR_PTR(), not NULL. So change the return value properly. This is important if you accidently specify a device that doesn't exist when trying to add a new device to an array, you will panic the box dereferencing bdev. Signed-off-by: Josef Bacik Signed-off-by: Chris Mason --- fs/btrfs/volumes.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'fs/btrfs') diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 66122bdf8bb..5eb7459e378 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -1434,8 +1434,8 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path) return -EINVAL; bdev = open_bdev_exclusive(device_path, 0, root->fs_info->bdev_holder); - if (!bdev) - return -EIO; + if (IS_ERR(bdev)) + return PTR_ERR(bdev); if (root->fs_info->fs_devices->seeding) { seeding_dev = 1; -- cgit v1.2.3 From 035fe03a7ad56982b30ab3a522b7b08d58feccd0 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Wed, 27 Jan 2010 02:09:38 +0000 Subject: Btrfs: check total number of devices when removing missing If you have a disk failure in RAID1 and then add a new disk to the array, and then try to remove the missing volume, it will fail. The reason is the sanity check only looks at the total number of rw devices, which is just 2 because we have 2 good disks and 1 bad one. Instead check the total number of devices in the array to make sure we can actually remove the device. Tested this with a failed disk setup and with this test we can now run btrfs-vol -r missing /mount/point and it works fine. Signed-off-by: Josef Bacik Signed-off-by: Chris Mason --- fs/btrfs/volumes.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'fs/btrfs') diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 5eb7459e378..41ecbb2347f 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -1135,7 +1135,7 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path) root->fs_info->avail_metadata_alloc_bits; if ((all_avail & BTRFS_BLOCK_GROUP_RAID10) && - root->fs_info->fs_devices->rw_devices <= 4) { + root->fs_info->fs_devices->num_devices <= 4) { printk(KERN_ERR "btrfs: unable to go below four devices " "on raid10\n"); ret = -EINVAL; @@ -1143,7 +1143,7 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path) } if ((all_avail & BTRFS_BLOCK_GROUP_RAID1) && - root->fs_info->fs_devices->rw_devices <= 2) { + root->fs_info->fs_devices->num_devices <= 2) { printk(KERN_ERR "btrfs: unable to go below two " "devices on raid1\n"); ret = -EINVAL; -- cgit v1.2.3 From f044ba7835b84e69c68b620ca8fa27e5ef67759d Mon Sep 17 00:00:00 2001 From: "Yan, Zheng" Date: Thu, 4 Feb 2010 08:46:56 +0000 Subject: Btrfs: fix race between allocate and release extent buffer. Increase extent buffer's reference count while holding the lock. Otherwise it can race with try_release_extent_buffer. Signed-off-by: Yan Zheng Signed-off-by: Chris Mason --- fs/btrfs/extent_io.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'fs/btrfs') diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 96577e8bf9f..b177ed31961 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -3165,10 +3165,9 @@ struct extent_buffer *alloc_extent_buffer(struct extent_io_tree *tree, spin_unlock(&tree->buffer_lock); goto free_eb; } - spin_unlock(&tree->buffer_lock); - /* add one reference for the tree */ atomic_inc(&eb->refs); + spin_unlock(&tree->buffer_lock); return eb; free_eb: -- cgit v1.2.3 From 014e4ac4f7d9c981750491fa40ea35efadc9ed49 Mon Sep 17 00:00:00 2001 From: Roel Kluin Date: Fri, 29 Jan 2010 10:42:11 +0000 Subject: Btrfs: make error return negative in btrfs_sync_file() It appears the error return should be negative Signed-off-by: Roel Kluin Signed-off-by: Chris Mason --- fs/btrfs/file.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/btrfs') diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index ae96fdae1f7..413a30dafcd 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -1133,7 +1133,7 @@ int btrfs_sync_file(struct file *file, struct dentry *dentry, int datasync) } mutex_lock(&dentry->d_inode->i_mutex); out: - return ret > 0 ? EIO : ret; + return ret > 0 ? -EIO : ret; } static const struct vm_operations_struct btrfs_file_vm_ops = { -- cgit v1.2.3 From d7ce5843bb28ada6845ab2ae8510ba3f12d33154 Mon Sep 17 00:00:00 2001 From: Miao Xie Date: Tue, 2 Feb 2010 08:46:44 +0000 Subject: Btrfs: remove BUG_ON() due to mounting bad filesystem Mounting a bad filesystem caused a BUG_ON(). The following is steps to reproduce it. # mkfs.btrfs /dev/sda2 # mount /dev/sda2 /mnt # mkfs.btrfs /dev/sda1 /dev/sda2 (the program says that /dev/sda2 was mounted, and then exits. ) # umount /mnt # mount /dev/sda1 /mnt At the third step, mkfs.btrfs exited in the way of make filesystem. So the initialization of the filesystem didn't finish. So the filesystem was bad, and it caused BUG_ON() when mounting it. But BUG_ON() should be called by the wrong code, not user's operation, so I think it is a bug of btrfs. This patch fixes it. Signed-off-by: Miao Xie Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 7 ++++++- fs/btrfs/relocation.c | 3 ++- 2 files changed, 8 insertions(+), 2 deletions(-) (limited to 'fs/btrfs') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 87b25543d7d..2b59201b955 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -1982,7 +1982,12 @@ struct btrfs_root *open_ctree(struct super_block *sb, if (!(sb->s_flags & MS_RDONLY)) { ret = btrfs_recover_relocation(tree_root); - BUG_ON(ret); + if (ret < 0) { + printk(KERN_WARNING + "btrfs: failed to recover relocation\n"); + err = -EINVAL; + goto fail_trans_kthread; + } } location.objectid = BTRFS_FS_TREE_OBJECTID; diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c index ed3e4a2ec2c..ab7ab531874 100644 --- a/fs/btrfs/relocation.c +++ b/fs/btrfs/relocation.c @@ -3764,7 +3764,8 @@ out: BTRFS_DATA_RELOC_TREE_OBJECTID); if (IS_ERR(fs_root)) err = PTR_ERR(fs_root); - btrfs_orphan_cleanup(fs_root); + else + btrfs_orphan_cleanup(fs_root); } return err; } -- cgit v1.2.3 From 7a7965f83e89f0be506a96769938a721e4e5ae50 Mon Sep 17 00:00:00 2001 From: "Yan, Zheng" Date: Mon, 1 Feb 2010 02:41:17 +0000 Subject: Btrfs: Fix oopsen when dropping empty tree. When dropping a empty tree, walk_down_tree() skips checking extent information for the tree root. This will triggers a BUG_ON in walk_up_proc(). Signed-off-by: Yan Zheng Signed-off-by: Chris Mason --- fs/btrfs/extent-tree.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'fs/btrfs') diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 432a2da4641..559f72489b3 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -5402,10 +5402,6 @@ static noinline int walk_down_tree(struct btrfs_trans_handle *trans, int ret; while (level >= 0) { - if (path->slots[level] >= - btrfs_header_nritems(path->nodes[level])) - break; - ret = walk_down_proc(trans, root, path, wc, lookup_info); if (ret > 0) break; @@ -5413,6 +5409,10 @@ static noinline int walk_down_tree(struct btrfs_trans_handle *trans, if (level == 0) break; + if (path->slots[level] >= + btrfs_header_nritems(path->nodes[level])) + break; + ret = do_walk_down(trans, root, path, wc, &lookup_info); if (ret > 0) { path->slots[level]++; -- cgit v1.2.3 From efd049fb26a162c3830fd3cb1001fdc09b147f3b Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Tue, 2 Feb 2010 20:50:10 +0000 Subject: Btrfs: do not try and lookup the file extent when finishing ordered io When running the following fio job [torrent] filename=torrent-test rw=randwrite size=4g filesize=4g bs=4k ioengine=sync you would see long stalls where no work was being done. That is because we were doing all this extra work to read in the file extent outside of the transaction, however in the random io case this ends up hurting us because the file extents are not there to begin with. So axe this logic, since we end up reading in the file extent when we go to update it anyway. This took the fio job from 11 mb/s with several ~10 second stalls to 24 mb/s to a couple of 1-2 second stalls. Signed-off-by: Josef Bacik Signed-off-by: Chris Mason --- fs/btrfs/inode.c | 46 ++-------------------------------------------- 1 file changed, 2 insertions(+), 44 deletions(-) (limited to 'fs/btrfs') diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 8cd109972fa..6782aa19130 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -1681,24 +1681,6 @@ static int insert_reserved_file_extent(struct btrfs_trans_handle *trans, * before we start the transaction. It limits the amount of btree * reads required while inside the transaction. */ -static noinline void reada_csum(struct btrfs_root *root, - struct btrfs_path *path, - struct btrfs_ordered_extent *ordered_extent) -{ - struct btrfs_ordered_sum *sum; - u64 bytenr; - - sum = list_entry(ordered_extent->list.next, struct btrfs_ordered_sum, - list); - bytenr = sum->sums[0].bytenr; - - /* - * we don't care about the results, the point of this search is - * just to get the btree leaves into ram - */ - btrfs_lookup_csum(NULL, root->fs_info->csum_root, path, bytenr, 0); -} - /* as ordered data IO finishes, this gets called so we can finish * an ordered extent if the range of bytes in the file it covers are * fully written. @@ -1709,7 +1691,6 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end) struct btrfs_trans_handle *trans; struct btrfs_ordered_extent *ordered_extent = NULL; struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; - struct btrfs_path *path; int compressed = 0; int ret; @@ -1717,32 +1698,9 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end) if (!ret) return 0; - /* - * before we join the transaction, try to do some of our IO. - * This will limit the amount of IO that we have to do with - * the transaction running. We're unlikely to need to do any - * IO if the file extents are new, the disk_i_size checks - * covers the most common case. - */ - if (start < BTRFS_I(inode)->disk_i_size) { - path = btrfs_alloc_path(); - if (path) { - ret = btrfs_lookup_file_extent(NULL, root, path, - inode->i_ino, - start, 0); - ordered_extent = btrfs_lookup_ordered_extent(inode, - start); - if (!list_empty(&ordered_extent->list)) { - btrfs_release_path(root, path); - reada_csum(root, path, ordered_extent); - } - btrfs_free_path(path); - } - } - - if (!ordered_extent) - ordered_extent = btrfs_lookup_ordered_extent(inode, start); + ordered_extent = btrfs_lookup_ordered_extent(inode, start); BUG_ON(!ordered_extent); + if (test_bit(BTRFS_ORDERED_NOCOW, &ordered_extent->flags)) { BUG_ON(!list_empty(&ordered_extent->list)); ret = btrfs_ordered_update_i_size(inode, 0, ordered_extent); -- cgit v1.2.3 From 23b5c50945f2294add0137799400329c0ebba290 Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Thu, 4 Feb 2010 11:33:03 -0500 Subject: Btrfs: apply updated fallocate i_size fix This version of the i_size fix for fallocate makes sure we only update the i_size when the current fallocate is really operating outside of i_size. Signed-off-by: Chris Mason --- fs/btrfs/inode.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'fs/btrfs') diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 6782aa19130..4deb280f896 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -5799,7 +5799,9 @@ static int prealloc_file_range(struct inode *inode, u64 start, u64 end, inode->i_ctime = CURRENT_TIME; BTRFS_I(inode)->flags |= BTRFS_INODE_PREALLOC; if (!(mode & FALLOC_FL_KEEP_SIZE) && - cur_offset > inode->i_size) { + (actual_len > inode->i_size) && + (cur_offset > inode->i_size)) { + if (cur_offset > actual_len) i_size = actual_len; else -- cgit v1.2.3 From 3f6fae9559225741c91f1320090b285da1413290 Mon Sep 17 00:00:00 2001 From: Shaohua Li Date: Thu, 11 Feb 2010 07:43:00 +0000 Subject: Btrfs: btrfs_mark_extent_written uses the wrong slot My test do: fallocate a big file and do write. The file is 512M, but after file write is done btrfs-debug-tree shows: item 6 key (257 EXTENT_DATA 0) itemoff 3516 itemsize 53 extent data disk byte 1103101952 nr 536870912 extent data offset 0 nr 399634432 ram 536870912 extent compression 0 Looks like a regression introducted by 6c7d54ac87f338c479d9729e8392eca3f76e11e1, where we set wrong slot. Signed-off-by: Shaohua Li Acked-by: Yan Zheng Signed-off-by: Chris Mason --- fs/btrfs/file.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'fs/btrfs') diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 413a30dafcd..a7fd9f3a750 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -720,13 +720,15 @@ again: inode->i_ino, orig_offset); BUG_ON(ret); } - fi = btrfs_item_ptr(leaf, path->slots[0], - struct btrfs_file_extent_item); if (del_nr == 0) { + fi = btrfs_item_ptr(leaf, path->slots[0], + struct btrfs_file_extent_item); btrfs_set_file_extent_type(leaf, fi, BTRFS_FILE_EXTENT_REG); btrfs_mark_buffer_dirty(leaf); } else { + fi = btrfs_item_ptr(leaf, del_slot - 1, + struct btrfs_file_extent_item); btrfs_set_file_extent_type(leaf, fi, BTRFS_FILE_EXTENT_REG); btrfs_set_file_extent_num_bytes(leaf, fi, -- cgit v1.2.3