From 98d377a0894e6bcca44eafd4d2eee74e8af4db83 Mon Sep 17 00:00:00 2001 From: TARUISI Hiroaki Date: Wed, 18 Nov 2009 05:42:14 +0000 Subject: Btrfs: add a function to lookup a directory path by following backrefs This will be used by the inode lookup ioctl. Signed-off-by: TARUISI Hiroaki Signed-off-by: Chris Mason --- fs/btrfs/ioctl.c | 92 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 92 insertions(+) (limited to 'fs/btrfs/ioctl.c') diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 645a17927a8..ac2a28f4fa1 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -48,6 +48,7 @@ #include "print-tree.h" #include "volumes.h" #include "locking.h" +#include "ctree.h" /* Mask out flags that are inappropriate for the given type of inode. */ static inline __u32 btrfs_mask_flags(umode_t mode, __u32 flags) @@ -743,6 +744,97 @@ out: return ret; } +/* + Search INODE_REFs to identify path name of 'dirid' directory + in a 'tree_id' tree. and sets path name to 'name'. +*/ +static noinline int btrfs_search_path_in_tree(struct btrfs_fs_info *info, + u64 tree_id, u64 dirid, char *name) +{ + struct btrfs_root *root; + struct btrfs_key key; + char *name_stack, *ptr; + int ret = -1; + int slot; + int len; + int total_len = 0; + struct btrfs_inode_ref *iref; + struct extent_buffer *l; + struct btrfs_path *path; + + if (dirid == BTRFS_FIRST_FREE_OBJECTID) { + name[0]='\0'; + return 0; + } + + path = btrfs_alloc_path(); + if (!path) + return -ENOMEM; + + name_stack = kzalloc(BTRFS_PATH_NAME_MAX+1, GFP_NOFS); + if (!name_stack) { + btrfs_free_path(path); + return -ENOMEM; + } + + ptr = &name_stack[BTRFS_PATH_NAME_MAX]; + + key.objectid = tree_id; + key.type = BTRFS_ROOT_ITEM_KEY; + key.offset = (u64)-1; + root = btrfs_read_fs_root_no_name(info, &key); + if (IS_ERR(root)) { + printk(KERN_ERR "could not find root %llu\n", tree_id); + return -ENOENT; + } + + key.objectid = dirid; + key.type = BTRFS_INODE_REF_KEY; + key.offset = 0; + + while(1) { + ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); + if (ret < 0) + goto out; + + l = path->nodes[0]; + slot = path->slots[0]; + btrfs_item_key_to_cpu(l, &key, slot); + + if (ret > 0 && (key.objectid != dirid || + key.type != BTRFS_INODE_REF_KEY)) + goto out; + + iref = btrfs_item_ptr(l, slot, struct btrfs_inode_ref); + len = btrfs_inode_ref_name_len(l, iref); + ptr -= len + 1; + total_len += len + 1; + if (ptr < name_stack) + goto out; + + *(ptr + len) = '/'; + read_extent_buffer(l, ptr,(unsigned long)(iref + 1), len); + + if (key.offset == BTRFS_FIRST_FREE_OBJECTID) + break; + + btrfs_release_path(root, path); + key.objectid = key.offset; + key.offset = 0; + dirid = key.objectid; + + } + if (ptr < name_stack) + goto out; + strncpy(name, ptr, total_len); + name[total_len]='\0'; + ret = 0; +out: + btrfs_free_path(path); + kfree(name_stack); + return ret; +} + static noinline int btrfs_ioctl_snap_destroy(struct file *file, void __user *arg) { -- cgit v1.2.3 From ac8e9819d71f907a0532b01b22c26b56bbbcbd21 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Sun, 28 Feb 2010 15:39:26 -0500 Subject: Btrfs: add search and inode lookup ioctls The search ioctl is a generic tool for doing btree searches from userland applications. The first user of the search ioctl is a subvolume listing feature, but we'll also use it to find new files in a subvolume. The search ioctl allows you to specify min and max keys to search for, along with min and max transid. It returns the items along with a header that includes the item key. Signed-off-by: Chris Mason --- fs/btrfs/ioctl.c | 249 +++++++++++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 233 insertions(+), 16 deletions(-) (limited to 'fs/btrfs/ioctl.c') diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index ac2a28f4fa1..c6044733198 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -744,16 +744,206 @@ out: return ret; } +static noinline int key_in_sk(struct btrfs_key *key, + struct btrfs_ioctl_search_key *sk) +{ + if (key->objectid < sk->min_objectid) + return 0; + if (key->offset < sk->min_offset) + return 0; + if (key->type < sk->min_type) + return 0; + if (key->objectid > sk->max_objectid) + return 0; + if (key->type > sk->max_type) + return 0; + if (key->offset > sk->max_offset) + return 0; + return 1; +} + +static noinline int copy_to_sk(struct btrfs_root *root, + struct btrfs_path *path, + struct btrfs_key *key, + struct btrfs_ioctl_search_key *sk, + char *buf, + unsigned long *sk_offset, + int *num_found) +{ + u64 found_transid; + struct extent_buffer *leaf; + struct btrfs_ioctl_search_header sh; + unsigned long item_off; + unsigned long item_len; + int nritems; + int i; + int slot; + int found = 0; + int ret = 0; + + leaf = path->nodes[0]; + slot = path->slots[0]; + nritems = btrfs_header_nritems(leaf); + + if (btrfs_header_generation(leaf) > sk->max_transid) { + i = nritems; + goto advance_key; + } + found_transid = btrfs_header_generation(leaf); + + for (i = slot; i < nritems; i++) { + item_off = btrfs_item_ptr_offset(leaf, i); + item_len = btrfs_item_size_nr(leaf, i); + + if (item_len > BTRFS_SEARCH_ARGS_BUFSIZE) + item_len = 0; + + if (sizeof(sh) + item_len + *sk_offset > + BTRFS_SEARCH_ARGS_BUFSIZE) { + ret = 1; + goto overflow; + } + + btrfs_item_key_to_cpu(leaf, key, i); + if (!key_in_sk(key, sk)) + continue; + + sh.objectid = key->objectid; + sh.offset = key->offset; + sh.type = key->type; + sh.len = item_len; + sh.transid = found_transid; + + /* copy search result header */ + memcpy(buf + *sk_offset, &sh, sizeof(sh)); + *sk_offset += sizeof(sh); + + if (item_len) { + char *p = buf + *sk_offset; + /* copy the item */ + read_extent_buffer(leaf, p, + item_off, item_len); + *sk_offset += item_len; + found++; + } + + if (*num_found >= sk->nr_items) + break; + } +advance_key: + if (key->offset < (u64)-1) + key->offset++; + else if (key->type < (u64)-1) + key->type++; + else if (key->objectid < (u64)-1) + key->objectid++; + ret = 0; +overflow: + *num_found += found; + return ret; +} + +static noinline int search_ioctl(struct inode *inode, + struct btrfs_ioctl_search_args *args) +{ + struct btrfs_root *root; + struct btrfs_key key; + struct btrfs_key max_key; + struct btrfs_path *path; + struct btrfs_ioctl_search_key *sk = &args->key; + struct btrfs_fs_info *info = BTRFS_I(inode)->root->fs_info; + int ret; + int num_found = 0; + unsigned long sk_offset = 0; + + path = btrfs_alloc_path(); + if (!path) + return -ENOMEM; + + if (sk->tree_id == 0) { + /* search the root of the inode that was passed */ + root = BTRFS_I(inode)->root; + } else { + key.objectid = sk->tree_id; + key.type = BTRFS_ROOT_ITEM_KEY; + key.offset = (u64)-1; + root = btrfs_read_fs_root_no_name(info, &key); + if (IS_ERR(root)) { + printk(KERN_ERR "could not find root %llu\n", + sk->tree_id); + btrfs_free_path(path); + return -ENOENT; + } + } + + key.objectid = sk->min_objectid; + key.type = sk->min_type; + key.offset = sk->min_offset; + + max_key.objectid = sk->max_objectid; + max_key.type = sk->max_type; + max_key.offset = sk->max_offset; + + path->keep_locks = 1; + + while(1) { + ret = btrfs_search_forward(root, &key, &max_key, path, 0, + sk->min_transid); + if (ret != 0) { + if (ret > 0) + ret = 0; + goto err; + } + ret = copy_to_sk(root, path, &key, sk, args->buf, + &sk_offset, &num_found); + btrfs_release_path(root, path); + if (ret || num_found >= sk->nr_items) + break; + + } + ret = 0; +err: + sk->nr_items = num_found; + btrfs_free_path(path); + return ret; +} + +static noinline int btrfs_ioctl_tree_search(struct file *file, + void __user *argp) +{ + struct btrfs_ioctl_search_args *args; + struct inode *inode; + int ret; + + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + + args = kmalloc(sizeof(*args), GFP_KERNEL); + if (!args) + return -ENOMEM; + + if (copy_from_user(args, argp, sizeof(*args))) { + kfree(args); + return -EFAULT; + } + inode = fdentry(file)->d_inode; + ret = search_ioctl(inode, args); + if (ret == 0 && copy_to_user(argp, args, sizeof(*args))) + ret = -EFAULT; + kfree(args); + return ret; +} + /* - Search INODE_REFs to identify path name of 'dirid' directory - in a 'tree_id' tree. and sets path name to 'name'. -*/ + * Search INODE_REFs to identify path name of 'dirid' directory + * in a 'tree_id' tree. and sets path name to 'name'. + */ static noinline int btrfs_search_path_in_tree(struct btrfs_fs_info *info, u64 tree_id, u64 dirid, char *name) { struct btrfs_root *root; struct btrfs_key key; - char *name_stack, *ptr; + char *ptr; int ret = -1; int slot; int len; @@ -771,13 +961,7 @@ static noinline int btrfs_search_path_in_tree(struct btrfs_fs_info *info, if (!path) return -ENOMEM; - name_stack = kzalloc(BTRFS_PATH_NAME_MAX+1, GFP_NOFS); - if (!name_stack) { - btrfs_free_path(path); - return -ENOMEM; - } - - ptr = &name_stack[BTRFS_PATH_NAME_MAX]; + ptr = &name[BTRFS_INO_LOOKUP_PATH_MAX]; key.objectid = tree_id; key.type = BTRFS_ROOT_ITEM_KEY; @@ -802,14 +986,16 @@ static noinline int btrfs_search_path_in_tree(struct btrfs_fs_info *info, btrfs_item_key_to_cpu(l, &key, slot); if (ret > 0 && (key.objectid != dirid || - key.type != BTRFS_INODE_REF_KEY)) + key.type != BTRFS_INODE_REF_KEY)) { + ret = -ENOENT; goto out; + } iref = btrfs_item_ptr(l, slot, struct btrfs_inode_ref); len = btrfs_inode_ref_name_len(l, iref); ptr -= len + 1; total_len += len + 1; - if (ptr < name_stack) + if (ptr < name) goto out; *(ptr + len) = '/'; @@ -824,14 +1010,41 @@ static noinline int btrfs_search_path_in_tree(struct btrfs_fs_info *info, dirid = key.objectid; } - if (ptr < name_stack) + if (ptr < name) goto out; - strncpy(name, ptr, total_len); + memcpy(name, ptr, total_len); name[total_len]='\0'; ret = 0; out: btrfs_free_path(path); - kfree(name_stack); + return ret; +} + +static noinline int btrfs_ioctl_ino_lookup(struct file *file, + void __user *argp) +{ + struct btrfs_ioctl_ino_lookup_args *args; + struct inode *inode; + int ret; + + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + + args = kmalloc(sizeof(*args), GFP_KERNEL); + if (copy_from_user(args, argp, sizeof(*args))) { + kfree(args); + return -EFAULT; + } + inode = fdentry(file)->d_inode; + + ret = btrfs_search_path_in_tree(BTRFS_I(inode)->root->fs_info, + args->treeid, args->objectid, + args->name); + + if (ret == 0 && copy_to_user(argp, args, sizeof(*args))) + ret = -EFAULT; + + kfree(args); return ret; } @@ -1430,6 +1643,10 @@ long btrfs_ioctl(struct file *file, unsigned int return btrfs_ioctl_trans_start(file); case BTRFS_IOC_TRANS_END: return btrfs_ioctl_trans_end(file); + case BTRFS_IOC_TREE_SEARCH: + return btrfs_ioctl_tree_search(file, argp); + case BTRFS_IOC_INO_LOOKUP: + return btrfs_ioctl_ino_lookup(file, argp); case BTRFS_IOC_SYNC: btrfs_sync_fs(file->f_dentry->d_sb, 1); return 0; -- cgit v1.2.3 From 6ef5ed0d386be5c43ec66d6f2999919c0893558b Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Fri, 11 Dec 2009 21:11:29 +0000 Subject: Btrfs: add ioctl and incompat flag to set the default mount subvol This patch needs to go along with my previous patch. This lets us set the default dir item's location to whatever root we want to use as our default mounting subvol. With this we don't have to use mount -o subvol= anymore to mount a different subvol, we can just set the new one and it will just magically work. I've done some moderate testing with this, mostly just switching the default mount around, mounting subvols and the default mount at the same time and such, everything seems to work. Thanks, Older kernels would generally be able to still mount the filesystem with the default subvolume set, but it would result in a different volume being mounted, which could be an even more unpleasant suprise for users. So if you set your default subvolume, you can't go back to older kernels. Thanks, Signed-off-by: Josef Bacik Signed-off-by: Chris Mason --- fs/btrfs/ioctl.c | 75 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 75 insertions(+) (limited to 'fs/btrfs/ioctl.c') diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index c6044733198..7875a75315d 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -1579,6 +1579,79 @@ out: return ret; } +static long btrfs_ioctl_default_subvol(struct file *file, void __user *argp) +{ + struct inode *inode = fdentry(file)->d_inode; + struct btrfs_root *root = BTRFS_I(inode)->root; + struct btrfs_root *new_root; + struct btrfs_dir_item *di; + struct btrfs_trans_handle *trans; + struct btrfs_path *path; + struct btrfs_key location; + struct btrfs_disk_key disk_key; + struct btrfs_super_block *disk_super; + u64 features; + u64 objectid = 0; + u64 dir_id; + + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + + if (copy_from_user(&objectid, argp, sizeof(objectid))) + return -EFAULT; + + if (!objectid) + objectid = root->root_key.objectid; + + location.objectid = objectid; + location.type = BTRFS_ROOT_ITEM_KEY; + location.offset = (u64)-1; + + new_root = btrfs_read_fs_root_no_name(root->fs_info, &location); + if (IS_ERR(new_root)) + return PTR_ERR(new_root); + + if (btrfs_root_refs(&new_root->root_item) == 0) + return -ENOENT; + + path = btrfs_alloc_path(); + if (!path) + return -ENOMEM; + path->leave_spinning = 1; + + trans = btrfs_start_transaction(root, 1); + if (!trans) { + btrfs_free_path(path); + return -ENOMEM; + } + + dir_id = btrfs_super_root_dir(&root->fs_info->super_copy); + di = btrfs_lookup_dir_item(trans, root->fs_info->tree_root, path, + dir_id, "default", 7, 1); + if (!di) { + btrfs_free_path(path); + btrfs_end_transaction(trans, root); + printk(KERN_ERR "Umm, you don't have the default dir item, " + "this isn't going to work\n"); + return -ENOENT; + } + + btrfs_cpu_key_to_disk(&disk_key, &new_root->root_key); + btrfs_set_dir_item_key(path->nodes[0], di, &disk_key); + btrfs_mark_buffer_dirty(path->nodes[0]); + btrfs_free_path(path); + + disk_super = &root->fs_info->super_copy; + features = btrfs_super_incompat_flags(disk_super); + if (!(features & BTRFS_FEATURE_INCOMPAT_DEFAULT_SUBVOL)) { + features |= BTRFS_FEATURE_INCOMPAT_DEFAULT_SUBVOL; + btrfs_set_super_incompat_flags(disk_super, features); + } + btrfs_end_transaction(trans, root); + + return 0; +} + /* * there are many ways the trans_start and trans_end ioctls can lead * to deadlocks. They should only be used by applications that @@ -1625,6 +1698,8 @@ long btrfs_ioctl(struct file *file, unsigned int return btrfs_ioctl_snap_create(file, argp, 1); case BTRFS_IOC_SNAP_DESTROY: return btrfs_ioctl_snap_destroy(file, argp); + case BTRFS_IOC_DEFAULT_SUBVOL: + return btrfs_ioctl_default_subvol(file, argp); case BTRFS_IOC_DEFRAG: return btrfs_ioctl_defrag(file); case BTRFS_IOC_RESIZE: -- cgit v1.2.3 From 940100a4a7b78b27e60a3e72340fb9b5397dcdb2 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Wed, 10 Mar 2010 10:52:59 -0500 Subject: Btrfs: be more selective in the defrag ioctl The btrfs defrag ioctl had some bugs around delalloc accounting, and it wasn't properly skipping pages that were not in the mapping. It wasn't properly clearing the page checked flag, which could make the writeback code ignore the page forever while pinning it as dirty. This commit fixes those problems and makes defrag a little smarter. It skips holes and it doesn't waste time defragging large extents. If a tiny extent comes before a very large extent, it will defrag both of them to make sure the tiny extent ends up next to something big. Signed-off-by: Chris Mason --- fs/btrfs/ioctl.c | 150 +++++++++++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 140 insertions(+), 10 deletions(-) (limited to 'fs/btrfs/ioctl.c') diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 7875a75315d..3a89cd77f30 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -475,6 +475,73 @@ out_unlock: return error; } +static int should_defrag_range(struct inode *inode, u64 start, u64 len, + u64 *last_len, u64 *skip, u64 *defrag_end) +{ + struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; + struct extent_map *em = NULL; + struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; + int ret = 1; + + /* + * make sure that once we start defragging and extent, we keep on + * defragging it + */ + if (start < *defrag_end) + return 1; + + *skip = 0; + + /* + * hopefully we have this extent in the tree already, try without + * the full extent lock + */ + read_lock(&em_tree->lock); + em = lookup_extent_mapping(em_tree, start, len); + read_unlock(&em_tree->lock); + + if (!em) { + /* get the big lock and read metadata off disk */ + lock_extent(io_tree, start, start + len - 1, GFP_NOFS); + em = btrfs_get_extent(inode, NULL, 0, start, len, 0); + unlock_extent(io_tree, start, start + len - 1, GFP_NOFS); + + if (!em) + return 0; + } + + /* this will cover holes, and inline extents */ + if (em->block_start >= EXTENT_MAP_LAST_BYTE) + ret = 0; + + /* + * we hit a real extent, if it is big don't bother defragging it again + */ + if ((*last_len == 0 || *last_len >= 256 * 1024) && + em->len >= 256 * 1024) + ret = 0; + + /* + * last_len ends up being a counter of how many bytes we've defragged. + * every time we choose not to defrag an extent, we reset *last_len + * so that the next tiny extent will force a defrag. + * + * The end result of this is that tiny extents before a single big + * extent will force at least part of that big extent to be defragged. + */ + if (ret) { + *last_len += len; + *defrag_end = extent_map_end(em); + } else { + *last_len = 0; + *skip = extent_map_end(em); + *defrag_end = 0; + } + + free_extent_map(em); + return ret; +} + static int btrfs_defrag_file(struct file *file) { struct inode *inode = fdentry(file)->d_inode; @@ -487,37 +554,86 @@ static int btrfs_defrag_file(struct file *file) unsigned long total_read = 0; u64 page_start; u64 page_end; + u64 last_len = 0; + u64 skip = 0; + u64 defrag_end = 0; unsigned long i; int ret; - ret = btrfs_check_data_free_space(root, inode, inode->i_size); - if (ret) - return -ENOSPC; + if (inode->i_size == 0) + return 0; + + last_index = (inode->i_size - 1) >> PAGE_CACHE_SHIFT; + i = 0; + while (i <= last_index) { + if (!should_defrag_range(inode, (u64)i << PAGE_CACHE_SHIFT, + PAGE_CACHE_SIZE, &last_len, &skip, + &defrag_end)) { + unsigned long next; + /* + * the should_defrag function tells us how much to skip + * bump our counter by the suggested amount + */ + next = (skip + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; + i = max(i + 1, next); + continue; + } - mutex_lock(&inode->i_mutex); - last_index = inode->i_size >> PAGE_CACHE_SHIFT; - for (i = 0; i <= last_index; i++) { if (total_read % ra_pages == 0) { btrfs_force_ra(inode->i_mapping, &file->f_ra, file, i, min(last_index, i + ra_pages - 1)); } total_read++; + mutex_lock(&inode->i_mutex); + + ret = btrfs_check_data_free_space(root, inode, PAGE_CACHE_SIZE); + if (ret) { + ret = -ENOSPC; + break; + } + + ret = btrfs_reserve_metadata_for_delalloc(root, inode, 1); + if (ret) { + btrfs_free_reserved_data_space(root, inode, + PAGE_CACHE_SIZE); + ret = -ENOSPC; + break; + } again: + if (inode->i_size == 0 || + i > ((inode->i_size - 1) >> PAGE_CACHE_SHIFT)) { + ret = 0; + goto err_reservations; + } + page = grab_cache_page(inode->i_mapping, i); if (!page) - goto out_unlock; + goto err_reservations; + if (!PageUptodate(page)) { btrfs_readpage(NULL, page); lock_page(page); if (!PageUptodate(page)) { unlock_page(page); page_cache_release(page); - goto out_unlock; + goto err_reservations; } } + if (page->mapping != inode->i_mapping) { + unlock_page(page); + page_cache_release(page); + goto again; + } + wait_on_page_writeback(page); + if (PageDirty(page)) { + btrfs_free_reserved_data_space(root, inode, + PAGE_CACHE_SIZE); + goto loop_unlock; + } + page_start = (u64)page->index << PAGE_CACHE_SHIFT; page_end = page_start + PAGE_CACHE_SIZE - 1; lock_extent(io_tree, page_start, page_end, GFP_NOFS); @@ -538,18 +654,32 @@ again: * page if it is dirtied again later */ clear_page_dirty_for_io(page); + clear_extent_bits(&BTRFS_I(inode)->io_tree, page_start, + page_end, EXTENT_DIRTY | EXTENT_DELALLOC | + EXTENT_DO_ACCOUNTING, GFP_NOFS); btrfs_set_extent_delalloc(inode, page_start, page_end); + ClearPageChecked(page); set_page_dirty(page); unlock_extent(io_tree, page_start, page_end, GFP_NOFS); + +loop_unlock: unlock_page(page); page_cache_release(page); + mutex_unlock(&inode->i_mutex); + + btrfs_unreserve_metadata_for_delalloc(root, inode, 1); balance_dirty_pages_ratelimited_nr(inode->i_mapping, 1); + i++; } -out_unlock: - mutex_unlock(&inode->i_mutex); return 0; + +err_reservations: + mutex_unlock(&inode->i_mutex); + btrfs_free_reserved_data_space(root, inode, PAGE_CACHE_SIZE); + btrfs_unreserve_metadata_for_delalloc(root, inode, 1); + return ret; } static noinline int btrfs_ioctl_resize(struct btrfs_root *root, -- cgit v1.2.3 From 1e701a3292e25a6c4939cad9f24951dc6b6ad853 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Thu, 11 Mar 2010 09:42:04 -0500 Subject: Btrfs: add new defrag-range ioctl. The btrfs defrag ioctl was limited to doing the entire file. This commit adds a new interface that can defrag a specific range inside the file. It can also force compression on the file, allowing you to selectively compress individual files after they were created, even when mount -o compress isn't turned on. Signed-off-by: Chris Mason --- fs/btrfs/ioctl.c | 83 +++++++++++++++++++++++++++++++++++++++++++++++++------- 1 file changed, 73 insertions(+), 10 deletions(-) (limited to 'fs/btrfs/ioctl.c') diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 3a89cd77f30..d866b460c26 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -476,13 +476,18 @@ out_unlock: } static int should_defrag_range(struct inode *inode, u64 start, u64 len, - u64 *last_len, u64 *skip, u64 *defrag_end) + int thresh, u64 *last_len, u64 *skip, + u64 *defrag_end) { struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; struct extent_map *em = NULL; struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; int ret = 1; + + if (thresh == 0) + thresh = 256 * 1024; + /* * make sure that once we start defragging and extent, we keep on * defragging it @@ -517,8 +522,7 @@ static int should_defrag_range(struct inode *inode, u64 start, u64 len, /* * we hit a real extent, if it is big don't bother defragging it again */ - if ((*last_len == 0 || *last_len >= 256 * 1024) && - em->len >= 256 * 1024) + if ((*last_len == 0 || *last_len >= thresh) && em->len >= thresh) ret = 0; /* @@ -542,7 +546,8 @@ static int should_defrag_range(struct inode *inode, u64 start, u64 len, return ret; } -static int btrfs_defrag_file(struct file *file) +static int btrfs_defrag_file(struct file *file, + struct btrfs_ioctl_defrag_range_args *range) { struct inode *inode = fdentry(file)->d_inode; struct btrfs_root *root = BTRFS_I(inode)->root; @@ -563,11 +568,19 @@ static int btrfs_defrag_file(struct file *file) if (inode->i_size == 0) return 0; - last_index = (inode->i_size - 1) >> PAGE_CACHE_SHIFT; - i = 0; + if (range->start + range->len > range->start) { + last_index = min_t(u64, inode->i_size - 1, + range->start + range->len - 1) >> PAGE_CACHE_SHIFT; + } else { + last_index = (inode->i_size - 1) >> PAGE_CACHE_SHIFT; + } + + i = range->start >> PAGE_CACHE_SHIFT; while (i <= last_index) { if (!should_defrag_range(inode, (u64)i << PAGE_CACHE_SHIFT, - PAGE_CACHE_SIZE, &last_len, &skip, + PAGE_CACHE_SIZE, + range->extent_thresh, + &last_len, &skip, &defrag_end)) { unsigned long next; /* @@ -585,6 +598,8 @@ static int btrfs_defrag_file(struct file *file) } total_read++; mutex_lock(&inode->i_mutex); + if (range->flags & BTRFS_DEFRAG_RANGE_COMPRESS) + BTRFS_I(inode)->force_compress = 1; ret = btrfs_check_data_free_space(root, inode, PAGE_CACHE_SIZE); if (ret) { @@ -673,6 +688,28 @@ loop_unlock: i++; } + if ((range->flags & BTRFS_DEFRAG_RANGE_START_IO)) + filemap_flush(inode->i_mapping); + + if ((range->flags & BTRFS_DEFRAG_RANGE_COMPRESS)) { + /* the filemap_flush will queue IO into the worker threads, but + * we have to make sure the IO is actually started and that + * ordered extents get created before we return + */ + atomic_inc(&root->fs_info->async_submit_draining); + while (atomic_read(&root->fs_info->nr_async_submits) || + atomic_read(&root->fs_info->async_delalloc_pages)) { + wait_event(root->fs_info->async_submit_wait, + (atomic_read(&root->fs_info->nr_async_submits) == 0 && + atomic_read(&root->fs_info->async_delalloc_pages) == 0)); + } + atomic_dec(&root->fs_info->async_submit_draining); + + mutex_lock(&inode->i_mutex); + BTRFS_I(inode)->force_compress = 0; + mutex_unlock(&inode->i_mutex); + } + return 0; err_reservations: @@ -1284,10 +1321,11 @@ out: return err; } -static int btrfs_ioctl_defrag(struct file *file) +static int btrfs_ioctl_defrag(struct file *file, void __user *argp) { struct inode *inode = fdentry(file)->d_inode; struct btrfs_root *root = BTRFS_I(inode)->root; + struct btrfs_ioctl_defrag_range_args *range; int ret; ret = mnt_want_write(file->f_path.mnt); @@ -1308,7 +1346,30 @@ static int btrfs_ioctl_defrag(struct file *file) ret = -EINVAL; goto out; } - btrfs_defrag_file(file); + + range = kzalloc(sizeof(*range), GFP_KERNEL); + if (!range) { + ret = -ENOMEM; + goto out; + } + + if (argp) { + if (copy_from_user(range, argp, + sizeof(*range))) { + ret = -EFAULT; + kfree(range); + } + /* compression requires us to start the IO */ + if ((range->flags & BTRFS_DEFRAG_RANGE_COMPRESS)) { + range->flags |= BTRFS_DEFRAG_RANGE_START_IO; + range->extent_thresh = (u32)-1; + } + } else { + /* the rest are all set to zero by kzalloc */ + range->len = (u64)-1; + } + btrfs_defrag_file(file, range); + kfree(range); break; } out: @@ -1831,7 +1892,9 @@ long btrfs_ioctl(struct file *file, unsigned int case BTRFS_IOC_DEFAULT_SUBVOL: return btrfs_ioctl_default_subvol(file, argp); case BTRFS_IOC_DEFRAG: - return btrfs_ioctl_defrag(file); + return btrfs_ioctl_defrag(file, NULL); + case BTRFS_IOC_DEFRAG_RANGE: + return btrfs_ioctl_defrag(file, argp); case BTRFS_IOC_RESIZE: return btrfs_ioctl_resize(root, argp); case BTRFS_IOC_ADD_DEV: -- cgit v1.2.3 From 2ac55d41b5d6bf49e76bc85db5431240617e2f8f Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Wed, 3 Feb 2010 19:33:23 +0000 Subject: Btrfs: cache the extent state everywhere we possibly can V2 This patch just goes through and fixes everybody that does lock_extent() blah unlock_extent() to use lock_extent_bits() blah unlock_extent_cached() and pass around a extent_state so we only have to do the searches once per function. This gives me about a 3 mb/s boots on my random write test. I have not converted some things, like the relocation and ioctl's, since they aren't heavily used and the relocation stuff is in the middle of being re-written. I also changed the clear_extent_bit() to only unset the cached state if we are clearing EXTENT_LOCKED and related stuff, so we can do things like this lock_extent_bits() clear delalloc bits unlock_extent_cached() without losing our cached state. I tested this thoroughly and turned on LEAK_DEBUG to make sure we weren't leaking extent states, everything worked out fine. Signed-off-by: Josef Bacik Signed-off-by: Chris Mason --- fs/btrfs/ioctl.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/btrfs/ioctl.c') diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index d866b460c26..9aaba6e472d 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -673,7 +673,7 @@ again: page_end, EXTENT_DIRTY | EXTENT_DELALLOC | EXTENT_DO_ACCOUNTING, GFP_NOFS); - btrfs_set_extent_delalloc(inode, page_start, page_end); + btrfs_set_extent_delalloc(inode, page_start, page_end, NULL); ClearPageChecked(page); set_page_dirty(page); unlock_extent(io_tree, page_start, page_end, GFP_NOFS); -- cgit v1.2.3 From 1406e4327be3a533a2b18582f715ce2cfbcf6804 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Wed, 13 Jan 2010 18:19:06 +0000 Subject: Btrfs: add a "df" ioctl for btrfs df is a very loaded question in btrfs. This gives us a way to get the per-space usage information so we can tell exactly what is in use where. This will help us figure out ENOSPC problems, and help users better understand where their disk space is going. Signed-off-by: Josef Bacik Signed-off-by: Chris Mason --- fs/btrfs/ioctl.c | 45 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 45 insertions(+) (limited to 'fs/btrfs/ioctl.c') diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 9aaba6e472d..9213d39d36c 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -1843,6 +1843,49 @@ static long btrfs_ioctl_default_subvol(struct file *file, void __user *argp) return 0; } +long btrfs_ioctl_space_info(struct btrfs_root *root, void __user *arg) +{ + struct btrfs_ioctl_space_args space_args; + struct btrfs_ioctl_space_info space; + struct btrfs_ioctl_space_info *dest; + struct btrfs_space_info *info; + int ret = 0; + + if (copy_from_user(&space_args, + (struct btrfs_ioctl_space_args __user *)arg, + sizeof(space_args))) + return -EFAULT; + + space_args.total_spaces = 0; + dest = (struct btrfs_ioctl_space_info *) + (arg + sizeof(struct btrfs_ioctl_space_args)); + + rcu_read_lock(); + list_for_each_entry_rcu(info, &root->fs_info->space_info, list) { + if (!space_args.space_slots) { + space_args.total_spaces++; + continue; + } + if (space_args.total_spaces >= space_args.space_slots) + break; + space.flags = info->flags; + space.total_bytes = info->total_bytes; + space.used_bytes = info->bytes_used; + if (copy_to_user(dest, &space, sizeof(space))) { + ret = -EFAULT; + break; + } + dest++; + space_args.total_spaces++; + } + rcu_read_unlock(); + + if (copy_to_user(arg, &space_args, sizeof(space_args))) + ret = -EFAULT; + + return ret; +} + /* * there are many ways the trans_start and trans_end ioctls can lead * to deadlocks. They should only be used by applications that @@ -1915,6 +1958,8 @@ long btrfs_ioctl(struct file *file, unsigned int return btrfs_ioctl_tree_search(file, argp); case BTRFS_IOC_INO_LOOKUP: return btrfs_ioctl_ino_lookup(file, argp); + case BTRFS_IOC_SPACE_INFO: + return btrfs_ioctl_space_info(root, argp); case BTRFS_IOC_SYNC: btrfs_sync_fs(file->f_dentry->d_sb, 1); return 0; -- cgit v1.2.3 From 91748467a5c5884e44ad5cf58630c0c28474f1f6 Mon Sep 17 00:00:00 2001 From: Akinobu Mita Date: Sun, 28 Feb 2010 10:59:11 +0000 Subject: btrfs: use memparse Use memparse() instead of its own private implementation. Signed-off-by: Akinobu Mita Cc: Chris Mason Cc: linux-btrfs@vger.kernel.org Signed-off-by: Chris Mason --- fs/btrfs/ioctl.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/btrfs/ioctl.c') diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 9213d39d36c..363e209679b 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -776,7 +776,7 @@ static noinline int btrfs_ioctl_resize(struct btrfs_root *root, mod = 1; sizestr++; } - new_size = btrfs_parse_size(sizestr); + new_size = memparse(sizestr, NULL); if (new_size == 0) { ret = -EINVAL; goto out_unlock; -- cgit v1.2.3 From 854d2c3531e6d32e76b94ca5e096ea54c7497e40 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Tue, 16 Mar 2010 00:02:25 +0000 Subject: Btrfs: fix search_ioctl key advance key->type is u8, not u64. fs/btrfs/ioctl.c: In function 'copy_to_sk': fs/btrfs/ioctl.c:1024: warning: comparison is always true due to limited range of data type Signed-off-by: Sage Weil Signed-off-by: Chris Mason --- fs/btrfs/ioctl.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/btrfs/ioctl.c') diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 363e209679b..38a68863390 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -1000,7 +1000,7 @@ static noinline int copy_to_sk(struct btrfs_root *root, advance_key: if (key->offset < (u64)-1) key->offset++; - else if (key->type < (u64)-1) + else if (key->type < (u8)-1) key->type++; else if (key->objectid < (u64)-1) key->objectid++; -- cgit v1.2.3 From 7fde62bffb576d384ea49a3aed3403d5609ee5bc Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Tue, 16 Mar 2010 15:40:10 -0400 Subject: Btrfs: buffer results in the space_info ioctl The space_info ioctl was using copy_to_user inside rcu_read_lock. This commit changes things to copy into a buffer first and then dump the result down to userland. Signed-off-by: Chris Mason --- fs/btrfs/ioctl.c | 57 +++++++++++++++++++++++++++++++++++++++++++++----------- 1 file changed, 46 insertions(+), 11 deletions(-) (limited to 'fs/btrfs/ioctl.c') diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 38a68863390..4329610b141 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -1848,39 +1848,74 @@ long btrfs_ioctl_space_info(struct btrfs_root *root, void __user *arg) struct btrfs_ioctl_space_args space_args; struct btrfs_ioctl_space_info space; struct btrfs_ioctl_space_info *dest; + struct btrfs_ioctl_space_info *dest_orig; + struct btrfs_ioctl_space_info *user_dest; struct btrfs_space_info *info; + int alloc_size; int ret = 0; + int slot_count = 0; if (copy_from_user(&space_args, (struct btrfs_ioctl_space_args __user *)arg, sizeof(space_args))) return -EFAULT; + /* first we count slots */ + rcu_read_lock(); + list_for_each_entry_rcu(info, &root->fs_info->space_info, list) + slot_count++; + rcu_read_unlock(); + + /* space_slots == 0 means they are asking for a count */ + if (space_args.space_slots == 0) { + space_args.total_spaces = slot_count; + goto out; + } + alloc_size = sizeof(*dest) * slot_count; + /* we generally have at most 6 or so space infos, one for each raid + * level. So, a whole page should be more than enough for everyone + */ + if (alloc_size > PAGE_CACHE_SIZE) + return -ENOMEM; + space_args.total_spaces = 0; - dest = (struct btrfs_ioctl_space_info *) - (arg + sizeof(struct btrfs_ioctl_space_args)); + dest = kmalloc(alloc_size, GFP_NOFS); + if (!dest) + return -ENOMEM; + dest_orig = dest; + /* now we have a buffer to copy into */ rcu_read_lock(); list_for_each_entry_rcu(info, &root->fs_info->space_info, list) { - if (!space_args.space_slots) { - space_args.total_spaces++; - continue; - } + /* make sure we don't copy more than we allocated + * in our buffer + */ + if (slot_count == 0) + break; + slot_count--; + + /* make sure userland has enough room in their buffer */ if (space_args.total_spaces >= space_args.space_slots) break; + space.flags = info->flags; space.total_bytes = info->total_bytes; space.used_bytes = info->bytes_used; - if (copy_to_user(dest, &space, sizeof(space))) { - ret = -EFAULT; - break; - } + memcpy(dest, &space, sizeof(space)); dest++; space_args.total_spaces++; } rcu_read_unlock(); - if (copy_to_user(arg, &space_args, sizeof(space_args))) + user_dest = (struct btrfs_ioctl_space_info *) + (arg + sizeof(struct btrfs_ioctl_space_args)); + + if (copy_to_user(user_dest, dest_orig, alloc_size)) + ret = -EFAULT; + + kfree(dest_orig); +out: + if (ret == 0 && copy_to_user(arg, &space_args, sizeof(space_args))) ret = -EFAULT; return ret; -- cgit v1.2.3 From abc6e1341bda974e2d0eddb75f57a20ac18e9b33 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Thu, 18 Mar 2010 12:10:08 -0400 Subject: Btrfs: fix key checks and advance in the search ioctl The search ioctl was working well for finding tree roots, but using it for generic searches requires a few changes to how the keys are advanced. This treats the search control min fields for objectid, type and offset more like a key, where we drop the offset to zero once we bump the type, etc. The downside of this is that we are changing the min_type and min_offset fields during the search, and so the ioctl caller needs extra checks to make sure the keys in the result are the ones it wanted. This also changes key_in_sk to use btrfs_comp_cpu_keys, just to make things more readable. Signed-off-by: Chris Mason --- fs/btrfs/ioctl.c | 39 +++++++++++++++++++++++++-------------- 1 file changed, 25 insertions(+), 14 deletions(-) (limited to 'fs/btrfs/ioctl.c') diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 4329610b141..291aa51ff42 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -914,17 +914,23 @@ out: static noinline int key_in_sk(struct btrfs_key *key, struct btrfs_ioctl_search_key *sk) { - if (key->objectid < sk->min_objectid) - return 0; - if (key->offset < sk->min_offset) - return 0; - if (key->type < sk->min_type) - return 0; - if (key->objectid > sk->max_objectid) - return 0; - if (key->type > sk->max_type) + struct btrfs_key test; + int ret; + + test.objectid = sk->min_objectid; + test.type = sk->min_type; + test.offset = sk->min_offset; + + ret = btrfs_comp_cpu_keys(key, &test); + if (ret < 0) return 0; - if (key->offset > sk->max_offset) + + test.objectid = sk->max_objectid; + test.type = sk->max_type; + test.offset = sk->max_offset; + + ret = btrfs_comp_cpu_keys(key, &test); + if (ret > 0) return 0; return 1; } @@ -998,13 +1004,18 @@ static noinline int copy_to_sk(struct btrfs_root *root, break; } advance_key: - if (key->offset < (u64)-1) + ret = 0; + if (key->offset < (u64)-1 && key->offset < sk->max_offset) key->offset++; - else if (key->type < (u8)-1) + else if (key->type < (u8)-1 && key->type < sk->max_type) { + key->offset = 0; key->type++; - else if (key->objectid < (u64)-1) + } else if (key->objectid < (u64)-1 && key->objectid < sk->max_objectid) { + key->offset = 0; + key->type = 0; key->objectid++; - ret = 0; + } else + ret = 1; overflow: *num_found += found; return ret; -- cgit v1.2.3 From 90fdde147fd32d18a20be5b498d5f26e56cca8a3 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Thu, 18 Mar 2010 12:14:54 -0400 Subject: Btrfs: return keys for large items to the search ioctl The search ioctl was skipping large items entirely (ones that are too big for the results buffer). This changes things to at least copy the item header so that we can send information about the item back to userland. Signed-off-by: Chris Mason --- fs/btrfs/ioctl.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/btrfs/ioctl.c') diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 291aa51ff42..fd757f57695 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -997,8 +997,8 @@ static noinline int copy_to_sk(struct btrfs_root *root, read_extent_buffer(leaf, p, item_off, item_len); *sk_offset += item_len; - found++; } + found++; if (*num_found >= sk->nr_items) break; -- cgit v1.2.3 From 1b53ac4d1b75b23bdc2b54ace787b8f718a987ef Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Thu, 18 Mar 2010 12:17:05 -0400 Subject: Btrfs: allow treeid==0 in the inode lookup ioctl When a root id of 0 is sent to the inode lookup ioctl, it will use the root of the file we're ioctling and pass the root id back to userland along with the results. This allows userland to do searches based on that root later on. Signed-off-by: Chris Mason --- fs/btrfs/ioctl.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'fs/btrfs/ioctl.c') diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index fd757f57695..1e462de6556 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -1215,6 +1215,9 @@ static noinline int btrfs_ioctl_ino_lookup(struct file *file, } inode = fdentry(file)->d_inode; + if (args->treeid == 0) + args->treeid = BTRFS_I(inode)->root->root_key.objectid; + ret = btrfs_search_path_in_tree(BTRFS_I(inode)->root->fs_info, args->treeid, args->objectid, args->name); -- cgit v1.2.3 From 8ad6fcab564c5bc956bdc3dfa440ab152b6e780f Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Thu, 18 Mar 2010 12:23:10 -0400 Subject: Btrfs: fix the inode ref searches done by btrfs_search_path_in_tree This is used by the inode lookup ioctl to follow all the backrefs up to the subvol root. But the search being done would sometimes land one past the last item in the leaf instead of finding the backref. This changes the search to look for the highest possible backref and hop back one item. It also fixes a leaked path on failure to find the root. Signed-off-by: Chris Mason --- fs/btrfs/ioctl.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) (limited to 'fs/btrfs/ioctl.c') diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 1e462de6556..2845c6ceecd 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -1147,12 +1147,13 @@ static noinline int btrfs_search_path_in_tree(struct btrfs_fs_info *info, root = btrfs_read_fs_root_no_name(info, &key); if (IS_ERR(root)) { printk(KERN_ERR "could not find root %llu\n", tree_id); - return -ENOENT; + ret = -ENOENT; + goto out; } key.objectid = dirid; key.type = BTRFS_INODE_REF_KEY; - key.offset = 0; + key.offset = (u64)-1; while(1) { ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); @@ -1161,6 +1162,8 @@ static noinline int btrfs_search_path_in_tree(struct btrfs_fs_info *info, l = path->nodes[0]; slot = path->slots[0]; + if (ret > 0 && slot > 0) + slot--; btrfs_item_key_to_cpu(l, &key, slot); if (ret > 0 && (key.objectid != dirid || @@ -1184,7 +1187,7 @@ static noinline int btrfs_search_path_in_tree(struct btrfs_fs_info *info, btrfs_release_path(root, path); key.objectid = key.offset; - key.offset = 0; + key.offset = (u64)-1; dirid = key.objectid; } -- cgit v1.2.3