aboutsummaryrefslogtreecommitdiff
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/Kconfig10
-rw-r--r--fs/bio.c4
-rw-r--r--fs/compat.c2
-rw-r--r--fs/compat_ioctl.c2
-rw-r--r--fs/configfs/configfs_internal.h3
-rw-r--r--fs/configfs/dir.c196
-rw-r--r--fs/configfs/inode.c38
-rw-r--r--fs/dlm/dir.c7
-rw-r--r--fs/dlm/lockspace.c17
-rw-r--r--fs/dlm/lowcomms.c22
-rw-r--r--fs/dlm/lowcomms.h3
-rw-r--r--fs/dlm/member.c19
-rw-r--r--fs/dlm/requestqueue.c2
-rw-r--r--fs/eventfd.c3
-rw-r--r--fs/exofs/common.h6
-rw-r--r--fs/exofs/inode.c8
-rw-r--r--fs/exofs/osd.c26
-rw-r--r--fs/ext2/ext2.h2
-rw-r--r--fs/fuse/Makefile1
-rw-r--r--fs/fuse/cuse.c610
-rw-r--r--fs/fuse/dev.c15
-rw-r--r--fs/fuse/dir.c33
-rw-r--r--fs/fuse/file.c346
-rw-r--r--fs/fuse/fuse_i.h47
-rw-r--r--fs/fuse/inode.c118
-rw-r--r--fs/gfs2/Makefile1
-rw-r--r--fs/gfs2/bmap.c3
-rw-r--r--fs/gfs2/glock.c12
-rw-r--r--fs/gfs2/log.c9
-rw-r--r--fs/gfs2/lops.c3
-rw-r--r--fs/gfs2/ops_fstype.c2
-rw-r--r--fs/gfs2/rgrp.c11
-rw-r--r--fs/gfs2/super.c4
-rw-r--r--fs/gfs2/trace_gfs2.h407
-rw-r--r--fs/inode.c2
-rw-r--r--fs/partitions/check.c42
-rw-r--r--fs/xfs/Kconfig1
-rw-r--r--fs/xfs/Makefile5
-rw-r--r--fs/xfs/linux-2.6/xfs_acl.c523
-rw-r--r--fs/xfs/linux-2.6/xfs_ioctl.c25
-rw-r--r--fs/xfs/linux-2.6/xfs_iops.c53
-rw-r--r--fs/xfs/linux-2.6/xfs_lrw.c1
-rw-r--r--fs/xfs/linux-2.6/xfs_quotaops.c4
-rw-r--r--fs/xfs/linux-2.6/xfs_super.c49
-rw-r--r--fs/xfs/linux-2.6/xfs_sync.c479
-rw-r--r--fs/xfs/linux-2.6/xfs_sync.h19
-rw-r--r--fs/xfs/linux-2.6/xfs_xattr.c67
-rw-r--r--fs/xfs/quota/xfs_dquot.c5
-rw-r--r--fs/xfs/quota/xfs_dquot.h1
-rw-r--r--fs/xfs/quota/xfs_dquot_item.c1
-rw-r--r--fs/xfs/quota/xfs_qm.c168
-rw-r--r--fs/xfs/quota/xfs_qm.h21
-rw-r--r--fs/xfs/quota/xfs_qm_bhv.c77
-rw-r--r--fs/xfs/quota/xfs_qm_stats.c1
-rw-r--r--fs/xfs/quota/xfs_qm_syscalls.c113
-rw-r--r--fs/xfs/quota/xfs_trans_dquot.c66
-rw-r--r--fs/xfs/xfs_acl.c874
-rw-r--r--fs/xfs/xfs_acl.h97
-rw-r--r--fs/xfs/xfs_ag.h2
-rw-r--r--fs/xfs/xfs_arch.h32
-rw-r--r--fs/xfs/xfs_attr.c13
-rw-r--r--fs/xfs/xfs_bmap.c34
-rw-r--r--fs/xfs/xfs_bmap_btree.c4
-rw-r--r--fs/xfs/xfs_filestream.c6
-rw-r--r--fs/xfs/xfs_fs.h11
-rw-r--r--fs/xfs/xfs_iget.c8
-rw-r--r--fs/xfs/xfs_inode.c1
-rw-r--r--fs/xfs/xfs_inode.h6
-rw-r--r--fs/xfs/xfs_iomap.c13
-rw-r--r--fs/xfs/xfs_log_recover.c38
-rw-r--r--fs/xfs/xfs_mount.c105
-rw-r--r--fs/xfs/xfs_mount.h84
-rw-r--r--fs/xfs/xfs_qmops.c152
-rw-r--r--fs/xfs/xfs_quota.h129
-rw-r--r--fs/xfs/xfs_rename.c3
-rw-r--r--fs/xfs/xfs_rw.c1
-rw-r--r--fs/xfs/xfs_trans.c15
-rw-r--r--fs/xfs/xfs_utils.c2
-rw-r--r--fs/xfs/xfs_vnodeops.c114
-rw-r--r--fs/xfs/xfs_vnodeops.h1
80 files changed, 3001 insertions, 2459 deletions
diff --git a/fs/Kconfig b/fs/Kconfig
index 9f7270f36b2..525da2e8f73 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -62,6 +62,16 @@ source "fs/autofs/Kconfig"
source "fs/autofs4/Kconfig"
source "fs/fuse/Kconfig"
+config CUSE
+ tristate "Character device in Userpace support"
+ depends on FUSE_FS
+ help
+ This FUSE extension allows character devices to be
+ implemented in userspace.
+
+ If you want to develop or use userspace character device
+ based on CUSE, answer Y or M.
+
config GENERIC_ACL
bool
select FS_POSIX_ACL
diff --git a/fs/bio.c b/fs/bio.c
index 59000215e59..5f80848c320 100644
--- a/fs/bio.c
+++ b/fs/bio.c
@@ -358,9 +358,9 @@ static void bio_kmalloc_destructor(struct bio *bio)
*
* If %__GFP_WAIT is set, then bio_alloc will always be able to allocate
* a bio. This is due to the mempool guarantees. To make this work, callers
- * must never allocate more than 1 bio at the time from this pool. Callers
+ * must never allocate more than 1 bio at a time from this pool. Callers
* that need to allocate more than 1 bio must always submit the previously
- * allocate bio for IO before attempting to allocate a new one. Failure to
+ * allocated bio for IO before attempting to allocate a new one. Failure to
* do so can cause livelocks under memory pressure.
*
**/
diff --git a/fs/compat.c b/fs/compat.c
index 6aefb776dfe..cdd51a3a7c5 100644
--- a/fs/compat.c
+++ b/fs/compat.c
@@ -471,7 +471,7 @@ asmlinkage long compat_sys_fcntl64(unsigned int fd, unsigned int cmd,
ret = sys_fcntl(fd, cmd, (unsigned long)&f);
set_fs(old_fs);
if (cmd == F_GETLK && ret == 0) {
- /* GETLK was successfule and we need to return the data...
+ /* GETLK was successful and we need to return the data...
* but it needs to fit in the compat structure.
* l_start shouldn't be too big, unless the original
* start + end is greater than COMPAT_OFF_T_MAX, in which
diff --git a/fs/compat_ioctl.c b/fs/compat_ioctl.c
index b83f6bcfa51..0aac371bff0 100644
--- a/fs/compat_ioctl.c
+++ b/fs/compat_ioctl.c
@@ -1765,7 +1765,7 @@ static int do_i2c_smbus_ioctl(unsigned int fd, unsigned int cmd, unsigned long a
/* Since old style bridge ioctl's endup using SIOCDEVPRIVATE
* for some operations; this forces use of the newer bridge-utils that
- * use compatiable ioctls
+ * use compatible ioctls
*/
static int old_bridge_ioctl(unsigned int fd, unsigned int cmd, unsigned long arg)
{
diff --git a/fs/configfs/configfs_internal.h b/fs/configfs/configfs_internal.h
index 762d287123c..da6061a6df4 100644
--- a/fs/configfs/configfs_internal.h
+++ b/fs/configfs/configfs_internal.h
@@ -39,6 +39,9 @@ struct configfs_dirent {
umode_t s_mode;
struct dentry * s_dentry;
struct iattr * s_iattr;
+#ifdef CONFIG_LOCKDEP
+ int s_depth;
+#endif
};
#define CONFIGFS_ROOT 0x0001
diff --git a/fs/configfs/dir.c b/fs/configfs/dir.c
index 05373db21a4..8e48b52205a 100644
--- a/fs/configfs/dir.c
+++ b/fs/configfs/dir.c
@@ -78,11 +78,97 @@ static const struct dentry_operations configfs_dentry_ops = {
.d_delete = configfs_d_delete,
};
+#ifdef CONFIG_LOCKDEP
+
+/*
+ * Helpers to make lockdep happy with our recursive locking of default groups'
+ * inodes (see configfs_attach_group() and configfs_detach_group()).
+ * We put default groups i_mutexes in separate classes according to their depth
+ * from the youngest non-default group ancestor.
+ *
+ * For a non-default group A having default groups A/B, A/C, and A/C/D, default
+ * groups A/B and A/C will have their inode's mutex in class
+ * default_group_class[0], and default group A/C/D will be in
+ * default_group_class[1].
+ *
+ * The lock classes are declared and assigned in inode.c, according to the
+ * s_depth value.
+ * The s_depth value is initialized to -1, adjusted to >= 0 when attaching
+ * default groups, and reset to -1 when all default groups are attached. During
+ * attachment, if configfs_create() sees s_depth > 0, the lock class of the new
+ * inode's mutex is set to default_group_class[s_depth - 1].
+ */
+
+static void configfs_init_dirent_depth(struct configfs_dirent *sd)
+{
+ sd->s_depth = -1;
+}
+
+static void configfs_set_dir_dirent_depth(struct configfs_dirent *parent_sd,
+ struct configfs_dirent *sd)
+{
+ int parent_depth = parent_sd->s_depth;
+
+ if (parent_depth >= 0)
+ sd->s_depth = parent_depth + 1;
+}
+
+static void
+configfs_adjust_dir_dirent_depth_before_populate(struct configfs_dirent *sd)
+{
+ /*
+ * item's i_mutex class is already setup, so s_depth is now only
+ * used to set new sub-directories s_depth, which is always done
+ * with item's i_mutex locked.
+ */
+ /*
+ * sd->s_depth == -1 iff we are a non default group.
+ * else (we are a default group) sd->s_depth > 0 (see
+ * create_dir()).
+ */
+ if (sd->s_depth == -1)
+ /*
+ * We are a non default group and we are going to create
+ * default groups.
+ */
+ sd->s_depth = 0;
+}
+
+static void
+configfs_adjust_dir_dirent_depth_after_populate(struct configfs_dirent *sd)
+{
+ /* We will not create default groups anymore. */
+ sd->s_depth = -1;
+}
+
+#else /* CONFIG_LOCKDEP */
+
+static void configfs_init_dirent_depth(struct configfs_dirent *sd)
+{
+}
+
+static void configfs_set_dir_dirent_depth(struct configfs_dirent *parent_sd,
+ struct configfs_dirent *sd)
+{
+}
+
+static void
+configfs_adjust_dir_dirent_depth_before_populate(struct configfs_dirent *sd)
+{
+}
+
+static void
+configfs_adjust_dir_dirent_depth_after_populate(struct configfs_dirent *sd)
+{
+}
+
+#endif /* CONFIG_LOCKDEP */
+
/*
* Allocates a new configfs_dirent and links it to the parent configfs_dirent
*/
-static struct configfs_dirent *configfs_new_dirent(struct configfs_dirent * parent_sd,
- void * element)
+static struct configfs_dirent *configfs_new_dirent(struct configfs_dirent *parent_sd,
+ void *element, int type)
{
struct configfs_dirent * sd;
@@ -94,6 +180,8 @@ static struct configfs_dirent *configfs_new_dirent(struct configfs_dirent * pare
INIT_LIST_HEAD(&sd->s_links);
INIT_LIST_HEAD(&sd->s_children);
sd->s_element = element;
+ sd->s_type = type;
+ configfs_init_dirent_depth(sd);
spin_lock(&configfs_dirent_lock);
if (parent_sd->s_type & CONFIGFS_USET_DROPPING) {
spin_unlock(&configfs_dirent_lock);
@@ -138,12 +226,11 @@ int configfs_make_dirent(struct configfs_dirent * parent_sd,
{
struct configfs_dirent * sd;
- sd = configfs_new_dirent(parent_sd, element);
+ sd = configfs_new_dirent(parent_sd, element, type);
if (IS_ERR(sd))
return PTR_ERR(sd);
sd->s_mode = mode;
- sd->s_type = type;
sd->s_dentry = dentry;
if (dentry) {
dentry->d_fsdata = configfs_get(sd);
@@ -187,6 +274,7 @@ static int create_dir(struct config_item * k, struct dentry * p,
error = configfs_make_dirent(p->d_fsdata, d, k, mode,
CONFIGFS_DIR | CONFIGFS_USET_CREATING);
if (!error) {
+ configfs_set_dir_dirent_depth(p->d_fsdata, d->d_fsdata);
error = configfs_create(d, mode, init_dir);
if (!error) {
inc_nlink(p->d_inode);
@@ -789,11 +877,13 @@ static int configfs_attach_group(struct config_item *parent_item,
* error, as rmdir() would.
*/
mutex_lock_nested(&dentry->d_inode->i_mutex, I_MUTEX_CHILD);
+ configfs_adjust_dir_dirent_depth_before_populate(sd);
ret = populate_groups(to_config_group(item));
if (ret) {
configfs_detach_item(item);
dentry->d_inode->i_flags |= S_DEAD;
}
+ configfs_adjust_dir_dirent_depth_after_populate(sd);
mutex_unlock(&dentry->d_inode->i_mutex);
if (ret)
d_delete(dentry);
@@ -916,11 +1006,11 @@ static int configfs_dump(struct configfs_dirent *sd, int level)
* Note, btw, that this can be called at *any* time, even when a configfs
* subsystem isn't registered, or when configfs is loading or unloading.
* Just like configfs_register_subsystem(). So we take the same
- * precautions. We pin the filesystem. We lock each i_mutex _in_order_
- * on our way down the tree. If we can find the target item in the
+ * precautions. We pin the filesystem. We lock configfs_dirent_lock.
+ * If we can find the target item in the
* configfs tree, it must be part of the subsystem tree as well, so we
- * do not need the subsystem semaphore. Holding the i_mutex chain locks
- * out mkdir() and rmdir(), who might be racing us.
+ * do not need the subsystem semaphore. Holding configfs_dirent_lock helps
+ * locking out mkdir() and rmdir(), who might be racing us.
*/
/*
@@ -933,17 +1023,21 @@ static int configfs_dump(struct configfs_dirent *sd, int level)
* do that so we can unlock it if we find nothing.
*
* Here we do a depth-first search of the dentry hierarchy looking for
- * our object. We take i_mutex on each step of the way down. IT IS
- * ESSENTIAL THAT i_mutex LOCKING IS ORDERED. If we come back up a branch,
- * we'll drop the i_mutex.
+ * our object.
+ * We deliberately ignore items tagged as dropping since they are virtually
+ * dead, as well as items in the middle of attachment since they virtually
+ * do not exist yet. This completes the locking out of racing mkdir() and
+ * rmdir().
+ * Note: subdirectories in the middle of attachment start with s_type =
+ * CONFIGFS_DIR|CONFIGFS_USET_CREATING set by create_dir(). When
+ * CONFIGFS_USET_CREATING is set, we ignore the item. The actual set of
+ * s_type is in configfs_new_dirent(), which has configfs_dirent_lock.
*
- * If the target is not found, -ENOENT is bubbled up and we have released
- * all locks. If the target was found, the locks will be cleared by
- * configfs_depend_rollback().
+ * If the target is not found, -ENOENT is bubbled up.
*
* This adds a requirement that all config_items be unique!
*
- * This is recursive because the locking traversal is tricky. There isn't
+ * This is recursive. There isn't
* much on the stack, though, so folks that need this function - be careful
* about your stack! Patches will be accepted to make it iterative.
*/
@@ -955,13 +1049,13 @@ static int configfs_depend_prep(struct dentry *origin,
BUG_ON(!origin || !sd);
- /* Lock this guy on the way down */
- mutex_lock(&sd->s_dentry->d_inode->i_mutex);
if (sd->s_element == target) /* Boo-yah */
goto out;
list_for_each_entry(child_sd, &sd->s_children, s_sibling) {
- if (child_sd->s_type & CONFIGFS_DIR) {
+ if ((child_sd->s_type & CONFIGFS_DIR) &&
+ !(child_sd->s_type & CONFIGFS_USET_DROPPING) &&
+ !(child_sd->s_type & CONFIGFS_USET_CREATING)) {
ret = configfs_depend_prep(child_sd->s_dentry,
target);
if (!ret)
@@ -970,33 +1064,12 @@ static int configfs_depend_prep(struct dentry *origin,
}
/* We looped all our children and didn't find target */
- mutex_unlock(&sd->s_dentry->d_inode->i_mutex);
ret = -ENOENT;
out:
return ret;
}
-/*
- * This is ONLY called if configfs_depend_prep() did its job. So we can
- * trust the entire path from item back up to origin.
- *
- * We walk backwards from item, unlocking each i_mutex. We finish by
- * unlocking origin.
- */
-static void configfs_depend_rollback(struct dentry *origin,
- struct config_item *item)
-{
- struct dentry *dentry = item->ci_dentry;
-
- while (dentry != origin) {
- mutex_unlock(&dentry->d_inode->i_mutex);
- dentry = dentry->d_parent;
- }
-
- mutex_unlock(&origin->d_inode->i_mutex);
-}
-
int configfs_depend_item(struct configfs_subsystem *subsys,
struct config_item *target)
{
@@ -1037,17 +1110,21 @@ int configfs_depend_item(struct configfs_subsystem *subsys,
/* Ok, now we can trust subsys/s_item */
- /* Scan the tree, locking i_mutex recursively, return 0 if found */
+ spin_lock(&configfs_dirent_lock);
+ /* Scan the tree, return 0 if found */
ret = configfs_depend_prep(subsys_sd->s_dentry, target);
if (ret)
- goto out_unlock_fs;
+ goto out_unlock_dirent_lock;
- /* We hold all i_mutexes from the subsystem down to the target */
+ /*
+ * We are sure that the item is not about to be removed by rmdir(), and
+ * not in the middle of attachment by mkdir().
+ */
p = target->ci_dentry->d_fsdata;
p->s_dependent_count += 1;
- configfs_depend_rollback(subsys_sd->s_dentry, target);
-
+out_unlock_dirent_lock:
+ spin_unlock(&configfs_dirent_lock);
out_unlock_fs:
mutex_unlock(&configfs_sb->s_root->d_inode->i_mutex);
@@ -1072,10 +1149,10 @@ void configfs_undepend_item(struct configfs_subsystem *subsys,
struct configfs_dirent *sd;
/*
- * Since we can trust everything is pinned, we just need i_mutex
- * on the item.
+ * Since we can trust everything is pinned, we just need
+ * configfs_dirent_lock.
*/
- mutex_lock(&target->ci_dentry->d_inode->i_mutex);
+ spin_lock(&configfs_dirent_lock);
sd = target->ci_dentry->d_fsdata;
BUG_ON(sd->s_dependent_count < 1);
@@ -1086,7 +1163,7 @@ void configfs_undepend_item(struct configfs_subsystem *subsys,
* After this unlock, we cannot trust the item to stay alive!
* DO NOT REFERENCE item after this unlock.
*/
- mutex_unlock(&target->ci_dentry->d_inode->i_mutex);
+ spin_unlock(&configfs_dirent_lock);
}
EXPORT_SYMBOL(configfs_undepend_item);
@@ -1286,13 +1363,6 @@ static int configfs_rmdir(struct inode *dir, struct dentry *dentry)
if (sd->s_type & CONFIGFS_USET_DEFAULT)
return -EPERM;
- /*
- * Here's where we check for dependents. We're protected by
- * i_mutex.
- */
- if (sd->s_dependent_count)
- return -EBUSY;
-
/* Get a working ref until we have the child */
parent_item = configfs_get_config_item(dentry->d_parent);
subsys = to_config_group(parent_item)->cg_subsys;
@@ -1316,9 +1386,17 @@ static int configfs_rmdir(struct inode *dir, struct dentry *dentry)
mutex_lock(&configfs_symlink_mutex);
spin_lock(&configfs_dirent_lock);
- ret = configfs_detach_prep(dentry, &wait_mutex);
- if (ret)
- configfs_detach_rollback(dentry);
+ /*
+ * Here's where we check for dependents. We're protected by
+ * configfs_dirent_lock.
+ * If no dependent, atomically tag the item as dropping.
+ */
+ ret = sd->s_dependent_count ? -EBUSY : 0;
+ if (!ret) {
+ ret = configfs_detach_prep(dentry, &wait_mutex);
+ if (ret)
+ configfs_detach_rollback(dentry);
+ }
spin_unlock(&configfs_dirent_lock);
mutex_unlock(&configfs_symlink_mutex);
@@ -1429,7 +1507,7 @@ static int configfs_dir_open(struct inode *inode, struct file *file)
*/
err = -ENOENT;
if (configfs_dirent_is_ready(parent_sd)) {
- file->private_data = configfs_new_dirent(parent_sd, NULL);
+ file->private_data = configfs_new_dirent(parent_sd, NULL, 0);
if (IS_ERR(file->private_data))
err = PTR_ERR(file->private_data);
else
diff --git a/fs/configfs/inode.c b/fs/configfs/inode.c
index 5d349d38e05..4921e7426d9 100644
--- a/fs/configfs/inode.c
+++ b/fs/configfs/inode.c
@@ -33,10 +33,15 @@
#include <linux/backing-dev.h>
#include <linux/capability.h>
#include <linux/sched.h>
+#include <linux/lockdep.h>
#include <linux/configfs.h>
#include "configfs_internal.h"
+#ifdef CONFIG_LOCKDEP
+static struct lock_class_key default_group_class[MAX_LOCK_DEPTH];
+#endif
+
extern struct super_block * configfs_sb;
static const struct address_space_operations configfs_aops = {
@@ -150,6 +155,38 @@ struct inode * configfs_new_inode(mode_t mode, struct configfs_dirent * sd)
return inode;
}
+#ifdef CONFIG_LOCKDEP
+
+static void configfs_set_inode_lock_class(struct configfs_dirent *sd,
+ struct inode *inode)
+{
+ int depth = sd->s_depth;
+
+ if (depth > 0) {
+ if (depth <= ARRAY_SIZE(default_group_class)) {
+ lockdep_set_class(&inode->i_mutex,
+ &default_group_class[depth - 1]);
+ } else {
+ /*
+ * In practice the maximum level of locking depth is
+ * already reached. Just inform about possible reasons.
+ */
+ printk(KERN_INFO "configfs: Too many levels of inodes"
+ " for the locking correctness validator.\n");
+ printk(KERN_INFO "Spurious warnings may appear.\n");
+ }
+ }
+}
+
+#else /* CONFIG_LOCKDEP */
+
+static void configfs_set_inode_lock_class(struct configfs_dirent *sd,
+ struct inode *inode)
+{
+}
+
+#endif /* CONFIG_LOCKDEP */
+
int configfs_create(struct dentry * dentry, int mode, int (*init)(struct inode *))
{
int error = 0;
@@ -162,6 +199,7 @@ int configfs_create(struct dentry * dentry, int mode, int (*init)(struct inode *
struct inode *p_inode = dentry->d_parent->d_inode;
p_inode->i_mtime = p_inode->i_ctime = CURRENT_TIME;
}
+ configfs_set_inode_lock_class(sd, inode);
goto Proceed;
}
else
diff --git a/fs/dlm/dir.c b/fs/dlm/dir.c
index 858fba14aaa..c4dfa1dcc86 100644
--- a/fs/dlm/dir.c
+++ b/fs/dlm/dir.c
@@ -49,7 +49,8 @@ static struct dlm_direntry *get_free_de(struct dlm_ls *ls, int len)
spin_unlock(&ls->ls_recover_list_lock);
if (!found)
- de = kzalloc(sizeof(struct dlm_direntry) + len, GFP_KERNEL);
+ de = kzalloc(sizeof(struct dlm_direntry) + len,
+ ls->ls_allocation);
return de;
}
@@ -211,7 +212,7 @@ int dlm_recover_directory(struct dlm_ls *ls)
dlm_dir_clear(ls);
- last_name = kmalloc(DLM_RESNAME_MAXLEN, GFP_KERNEL);
+ last_name = kmalloc(DLM_RESNAME_MAXLEN, ls->ls_allocation);
if (!last_name)
goto out;
@@ -322,7 +323,7 @@ static int get_entry(struct dlm_ls *ls, int nodeid, char *name,
if (namelen > DLM_RESNAME_MAXLEN)
return -EINVAL;
- de = kzalloc(sizeof(struct dlm_direntry) + namelen, GFP_KERNEL);
+ de = kzalloc(sizeof(struct dlm_direntry) + namelen, ls->ls_allocation);
if (!de)
return -ENOMEM;
diff --git a/fs/dlm/lockspace.c b/fs/dlm/lockspace.c
index cd8e2df3c29..d489fcc8671 100644
--- a/fs/dlm/lockspace.c
+++ b/fs/dlm/lockspace.c
@@ -384,7 +384,7 @@ static void threads_stop(void)
dlm_astd_stop();
}
-static int new_lockspace(char *name, int namelen, void **lockspace,
+static int new_lockspace(const char *name, int namelen, void **lockspace,
uint32_t flags, int lvblen)
{
struct dlm_ls *ls;
@@ -419,16 +419,14 @@ static int new_lockspace(char *name, int namelen, void **lockspace,
break;
}
ls->ls_create_count++;
- module_put(THIS_MODULE);
- error = 1; /* not an error, return 0 */
+ *lockspace = ls;
+ error = 1;
break;
}
spin_unlock(&lslist_lock);
- if (error < 0)
- goto out;
if (error)
- goto ret_zero;
+ goto out;
error = -ENOMEM;
@@ -583,7 +581,6 @@ static int new_lockspace(char *name, int namelen, void **lockspace,
dlm_create_debug_file(ls);
log_debug(ls, "join complete");
- ret_zero:
*lockspace = ls;
return 0;
@@ -614,7 +611,7 @@ static int new_lockspace(char *name, int namelen, void **lockspace,
return error;
}
-int dlm_new_lockspace(char *name, int namelen, void **lockspace,
+int dlm_new_lockspace(const char *name, int namelen, void **lockspace,
uint32_t flags, int lvblen)
{
int error = 0;
@@ -628,7 +625,9 @@ int dlm_new_lockspace(char *name, int namelen, void **lockspace,
error = new_lockspace(name, namelen, lockspace, flags, lvblen);
if (!error)
ls_count++;
- else if (!ls_count)
+ if (error > 0)
+ error = 0;
+ if (!ls_count)
threads_stop();
out:
mutex_unlock(&ls_lock);
diff --git a/fs/dlm/lowcomms.c b/fs/dlm/lowcomms.c
index 609108a8326..cdb580a9c7a 100644
--- a/fs/dlm/lowcomms.c
+++ b/fs/dlm/lowcomms.c
@@ -309,6 +309,20 @@ static void lowcomms_state_change(struct sock *sk)
lowcomms_write_space(sk);
}
+int dlm_lowcomms_connect_node(int nodeid)
+{
+ struct connection *con;
+
+ if (nodeid == dlm_our_nodeid())
+ return 0;
+
+ con = nodeid2con(nodeid, GFP_NOFS);
+ if (!con)
+ return -ENOMEM;
+ lowcomms_connect_sock(con);
+ return 0;
+}
+
/* Make a socket active */
static int add_sock(struct socket *sock, struct connection *con)
{
@@ -486,7 +500,7 @@ static void process_sctp_notification(struct connection *con,
return;
}
- new_con = nodeid2con(nodeid, GFP_KERNEL);
+ new_con = nodeid2con(nodeid, GFP_NOFS);
if (!new_con)
return;
@@ -722,7 +736,7 @@ static int tcp_accept_from_sock(struct connection *con)
* the same time and the connections cross on the wire.
* In this case we store the incoming one in "othercon"
*/
- newcon = nodeid2con(nodeid, GFP_KERNEL);
+ newcon = nodeid2con(nodeid, GFP_NOFS);
if (!newcon) {
result = -ENOMEM;
goto accept_err;
@@ -732,7 +746,7 @@ static int tcp_accept_from_sock(struct connection *con)
struct connection *othercon = newcon->othercon;
if (!othercon) {
- othercon = kmem_cache_zalloc(con_cache, GFP_KERNEL);
+ othercon = kmem_cache_zalloc(con_cache, GFP_NOFS);
if (!othercon) {
log_print("failed to allocate incoming socket");
mutex_unlock(&newcon->sock_mutex);
@@ -1421,7 +1435,7 @@ static int work_start(void)
static void stop_conn(struct connection *con)
{
con->flags |= 0x0F;
- if (con->sock)
+ if (con->sock && con->sock->sk)
con->sock->sk->sk_user_data = NULL;
}
diff --git a/fs/dlm/lowcomms.h b/fs/dlm/lowcomms.h
index a9a9618c0d3..1311e642628 100644
--- a/fs/dlm/lowcomms.h
+++ b/fs/dlm/lowcomms.h
@@ -2,7 +2,7 @@
*******************************************************************************
**
** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
-** Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved.
+** Copyright (C) 2004-2009 Red Hat, Inc. All rights reserved.
**
** This copyrighted material is made available to anyone wishing to use,
** modify, copy, or redistribute it subject to the terms and conditions
@@ -19,6 +19,7 @@ void dlm_lowcomms_stop(void);
int dlm_lowcomms_close(int nodeid);
void *dlm_lowcomms_get_buffer(int nodeid, int len, gfp_t allocation, char **ppc);
void dlm_lowcomms_commit_buffer(void *mh);
+int dlm_lowcomms_connect_node(int nodeid);
#endif /* __LOWCOMMS_DOT_H__ */
diff --git a/fs/dlm/member.c b/fs/dlm/member.c
index 26133f05ae3..b128775913b 100644
--- a/fs/dlm/member.c
+++ b/fs/dlm/member.c
@@ -1,7 +1,7 @@
/******************************************************************************
*******************************************************************************
**
-** Copyright (C) 2005-2008 Red Hat, Inc. All rights reserved.
+** Copyright (C) 2005-2009 Red Hat, Inc. All rights reserved.
**
** This copyrighted material is made available to anyone wishing to use,
** modify, copy, or redistribute it subject to the terms and conditions
@@ -17,6 +17,7 @@
#include "recover.h"
#include "rcom.h"
#include "config.h"
+#include "lowcomms.h"
static void add_ordered_member(struct dlm_ls *ls, struct dlm_member *new)
{
@@ -45,9 +46,9 @@ static void add_ordered_member(struct dlm_ls *ls, struct dlm_member *new)
static int dlm_add_member(struct dlm_ls *ls, int nodeid)
{
struct dlm_member *memb;
- int w;
+ int w, error;
- memb = kzalloc(sizeof(struct dlm_member), GFP_KERNEL);
+ memb = kzalloc(sizeof(struct dlm_member), ls->ls_allocation);
if (!memb)
return -ENOMEM;
@@ -57,6 +58,12 @@ static int dlm_add_member(struct dlm_ls *ls, int nodeid)
return w;
}
+ error = dlm_lowcomms_connect_node(nodeid);
+ if (error < 0) {
+ kfree(memb);
+ return error;
+ }
+
memb->nodeid = nodeid;
memb->weight = w;
add_ordered_member(ls, memb);
@@ -136,7 +143,7 @@ static void make_member_array(struct dlm_ls *ls)
ls->ls_total_weight = total;
- array = kmalloc(sizeof(int) * total, GFP_KERNEL);
+ array = kmalloc(sizeof(int) * total, ls->ls_allocation);
if (!array)
return;
@@ -219,7 +226,7 @@ int dlm_recover_members(struct dlm_ls *ls, struct dlm_recover *rv, int *neg_out)
continue;
log_debug(ls, "new nodeid %d is a re-added member", rv->new[i]);
- memb = kzalloc(sizeof(struct dlm_member), GFP_KERNEL);
+ memb = kzalloc(sizeof(struct dlm_member), ls->ls_allocation);
if (!memb)
return -ENOMEM;
memb->nodeid = rv->new[i];
@@ -334,7 +341,7 @@ int dlm_ls_start(struct dlm_ls *ls)
int *ids = NULL, *new = NULL;
int error, ids_count = 0, new_count = 0;
- rv = kzalloc(sizeof(struct dlm_recover), GFP_KERNEL);
+ rv = kzalloc(sizeof(struct dlm_recover), ls->ls_allocation);
if (!rv)
return -ENOMEM;
diff --git a/fs/dlm/requestqueue.c b/fs/dlm/requestqueue.c
index daa4183fbb8..7a2307c0891 100644
--- a/fs/dlm/requestqueue.c
+++ b/fs/dlm/requestqueue.c
@@ -35,7 +35,7 @@ void dlm_add_requestqueue(struct dlm_ls *ls, int nodeid, struct dlm_message *ms)
struct rq_entry *e;
int length = ms->m_header.h_length - sizeof(struct dlm_message);
- e = kmalloc(sizeof(struct rq_entry) + length, GFP_KERNEL);
+ e = kmalloc(sizeof(struct rq_entry) + length, ls->ls_allocation);
if (!e) {
log_print("dlm_add_requestqueue: out of memory len %d", length);
return;
diff --git a/fs/eventfd.c b/fs/eventfd.c
index 2a701d593d3..3f0e1974abd 100644
--- a/fs/eventfd.c
+++ b/fs/eventfd.c
@@ -16,6 +16,7 @@
#include <linux/anon_inodes.h>
#include <linux/eventfd.h>
#include <linux/syscalls.h>
+#include <linux/module.h>
struct eventfd_ctx {
wait_queue_head_t wqh;
@@ -56,6 +57,7 @@ int eventfd_signal(struct file *file, int n)
return n;
}
+EXPORT_SYMBOL_GPL(eventfd_signal);
static int eventfd_release(struct inode *inode, struct file *file)
{
@@ -197,6 +199,7 @@ struct file *eventfd_fget(int fd)
return file;
}
+EXPORT_SYMBOL_GPL(eventfd_fget);
SYSCALL_DEFINE2(eventfd2, unsigned int, count, int, flags)
{
diff --git a/fs/exofs/common.h b/fs/exofs/common.h
index b1512c4bb8c..24667eedc02 100644
--- a/fs/exofs/common.h
+++ b/fs/exofs/common.h
@@ -175,10 +175,4 @@ int exofs_async_op(struct osd_request *or,
int extract_attr_from_req(struct osd_request *or, struct osd_attr *attr);
-int osd_req_read_kern(struct osd_request *or,
- const struct osd_obj_id *obj, u64 offset, void *buff, u64 len);
-
-int osd_req_write_kern(struct osd_request *or,
- const struct osd_obj_id *obj, u64 offset, void *buff, u64 len);
-
#endif /*ifndef __EXOFS_COM_H__*/
diff --git a/fs/exofs/inode.c b/fs/exofs/inode.c
index ba8d9fab469..77d0a295eb1 100644
--- a/fs/exofs/inode.c
+++ b/fs/exofs/inode.c
@@ -59,10 +59,9 @@ static void _pcol_init(struct page_collect *pcol, unsigned expected_pages,
struct inode *inode)
{
struct exofs_sb_info *sbi = inode->i_sb->s_fs_info;
- struct request_queue *req_q = sbi->s_dev->scsi_device->request_queue;
pcol->sbi = sbi;
- pcol->req_q = req_q;
+ pcol->req_q = osd_request_queue(sbi->s_dev);
pcol->inode = inode;
pcol->expected_pages = expected_pages;
@@ -266,7 +265,7 @@ static int read_exec(struct page_collect *pcol, bool is_sync)
goto err;
}
- osd_req_read(or, &obj, pcol->bio, i_start);
+ osd_req_read(or, &obj, i_start, pcol->bio, pcol->length);
if (is_sync) {
exofs_sync_op(or, pcol->sbi->s_timeout, oi->i_cred);
@@ -522,7 +521,8 @@ static int write_exec(struct page_collect *pcol)
*pcol_copy = *pcol;
- osd_req_write(or, &obj, pcol_copy->bio, i_start);
+ pcol_copy->bio->bi_rw |= (1 << BIO_RW); /* FIXME: bio_set_dir() */
+ osd_req_write(or, &obj, i_start, pcol_copy->bio, pcol_copy->length);
ret = exofs_async_op(or, writepages_done, pcol_copy, oi->i_cred);
if (unlikely(ret)) {
EXOFS_ERR("write_exec: exofs_async_op() Faild\n");
diff --git a/fs/exofs/osd.c b/fs/exofs/osd.c
index 06ca92672eb..b3d2ccb87aa 100644
--- a/fs/exofs/osd.c
+++ b/fs/exofs/osd.c
@@ -125,29 +125,3 @@ int extract_attr_from_req(struct osd_request *or, struct osd_attr *attr)
return -EIO;
}
-
-int osd_req_read_kern(struct osd_request *or,
- const struct osd_obj_id *obj, u64 offset, void* buff, u64 len)
-{
- struct request_queue *req_q = or->osd_dev->scsi_device->request_queue;
- struct bio *bio = bio_map_kern(req_q, buff, len, GFP_KERNEL);
-
- if (!bio)
- return -ENOMEM;
-
- osd_req_read(or, obj, bio, offset);
- return 0;
-}
-
-int osd_req_write_kern(struct osd_request *or,
- const struct osd_obj_id *obj, u64 offset, void* buff, u64 len)
-{
- struct request_queue *req_q = or->osd_dev->scsi_device->request_queue;
- struct bio *bio = bio_map_kern(req_q, buff, len, GFP_KERNEL);
-
- if (!bio)
- return -ENOMEM;
-
- osd_req_write(or, obj, bio, offset);
- return 0;
-}
diff --git a/fs/ext2/ext2.h b/fs/ext2/ext2.h
index b2bbf45039e..f2e5811936d 100644
--- a/fs/ext2/ext2.h
+++ b/fs/ext2/ext2.h
@@ -27,7 +27,7 @@ struct ext2_inode_info {
/*
* i_block_group is the number of the block group which contains
* this file's inode. Constant across the lifetime of the inode,
- * it is ued for making block allocation decisions - we try to
+ * it is used for making block allocation decisions - we try to
* place a file's data blocks near its inode block, and new inodes
* near to their parent directory's inode.
*/
diff --git a/fs/fuse/Makefile b/fs/fuse/Makefile
index 72437065f6a..e95eeb445e5 100644
--- a/fs/fuse/Makefile
+++ b/fs/fuse/Makefile
@@ -3,5 +3,6 @@
#
obj-$(CONFIG_FUSE_FS) += fuse.o
+obj-$(CONFIG_CUSE) += cuse.o
fuse-objs := dev.o dir.o file.o inode.o control.o
diff --git a/fs/fuse/cuse.c b/fs/fuse/cuse.c
new file mode 100644
index 00000000000..de792dcf327
--- /dev/null
+++ b/fs/fuse/cuse.c
@@ -0,0 +1,610 @@
+/*
+ * CUSE: Character device in Userspace
+ *
+ * Copyright (C) 2008-2009 SUSE Linux Products GmbH
+ * Copyright (C) 2008-2009 Tejun Heo <tj@kernel.org>
+ *
+ * This file is released under the GPLv2.
+ *
+ * CUSE enables character devices to be implemented from userland much
+ * like FUSE allows filesystems. On initialization /dev/cuse is
+ * created. By opening the file and replying to the CUSE_INIT request
+ * userland CUSE server can create a character device. After that the
+ * operation is very similar to FUSE.
+ *
+ * A CUSE instance involves the following objects.
+ *
+ * cuse_conn : contains fuse_conn and serves as bonding structure
+ * channel : file handle connected to the userland CUSE server
+ * cdev : the implemented character device
+ * dev : generic device for cdev
+ *
+ * Note that 'channel' is what 'dev' is in FUSE. As CUSE deals with
+ * devices, it's called 'channel' to reduce confusion.
+ *
+ * channel determines when the character device dies. When channel is
+ * closed, everything begins to destruct. The cuse_conn is taken off
+ * the lookup table preventing further access from cdev, cdev and
+ * generic device are removed and the base reference of cuse_conn is
+ * put.
+ *
+ * On each open, the matching cuse_conn is looked up and if found an
+ * additional reference is taken which is released when the file is
+ * closed.
+ */
+
+#include <linux/fuse.h>
+#include <linux/cdev.h>
+#include <linux/device.h>
+#include <linux/file.h>
+#include <linux/fs.h>
+#include <linux/kdev_t.h>
+#include <linux/kthread.h>
+#include <linux/list.h>
+#include <linux/magic.h>
+#include <linux/miscdevice.h>
+#include <linux/mutex.h>
+#include <linux/spinlock.h>
+#include <linux/stat.h>
+
+#include "fuse_i.h"
+
+#define CUSE_CONNTBL_LEN 64
+
+struct cuse_conn {
+ struct list_head list; /* linked on cuse_conntbl */
+ struct fuse_conn fc; /* fuse connection */
+ struct cdev *cdev; /* associated character device */
+ struct device *dev; /* device representing @cdev */
+
+ /* init parameters, set once during initialization */
+ bool unrestricted_ioctl;
+};
+
+static DEFINE_SPINLOCK(cuse_lock); /* protects cuse_conntbl */
+static struct list_head cuse_conntbl[CUSE_CONNTBL_LEN];
+static struct class *cuse_class;
+
+static struct cuse_conn *fc_to_cc(struct fuse_conn *fc)
+{
+ return container_of(fc, struct cuse_conn, fc);
+}
+
+static struct list_head *cuse_conntbl_head(dev_t devt)
+{
+ return &cuse_conntbl[(MAJOR(devt) + MINOR(devt)) % CUSE_CONNTBL_LEN];
+}
+
+
+/**************************************************************************
+ * CUSE frontend operations
+ *
+ * These are file operations for the character device.
+ *
+ * On open, CUSE opens a file from the FUSE mnt and stores it to
+ * private_data of the open file. All other ops call FUSE ops on the
+ * FUSE file.
+ */
+
+static ssize_t cuse_read(struct file *file, char __user *buf, size_t count,
+ loff_t *ppos)
+{
+ loff_t pos = 0;
+
+ return fuse_direct_io(file, buf, count, &pos, 0);
+}
+
+static ssize_t cuse_write(struct file *file, const char __user *buf,
+ size_t count, loff_t *ppos)
+{
+ loff_t pos = 0;
+ /*
+ * No locking or generic_write_checks(), the server is
+ * responsible for locking and sanity checks.
+ */
+ return fuse_direct_io(file, buf, count, &pos, 1);
+}
+
+static int cuse_open(struct inode *inode, struct file *file)
+{
+ dev_t devt = inode->i_cdev->dev;
+ struct cuse_conn *cc = NULL, *pos;
+ int rc;
+
+ /* look up and get the connection */
+ spin_lock(&cuse_lock);
+ list_for_each_entry(pos, cuse_conntbl_head(devt), list)
+ if (pos->dev->devt == devt) {
+ fuse_conn_get(&pos->fc);
+ cc = pos;
+ break;
+ }
+ spin_unlock(&cuse_lock);
+
+ /* dead? */
+ if (!cc)
+ return -ENODEV;
+
+ /*
+ * Generic permission check is already done against the chrdev
+ * file, proceed to open.
+ */
+ rc = fuse_do_open(&cc->fc, 0, file, 0);
+ if (rc)
+ fuse_conn_put(&cc->fc);
+ return rc;
+}
+
+static int cuse_release(struct inode *inode, struct file *file)
+{
+ struct fuse_file *ff = file->private_data;
+ struct fuse_conn *fc = ff->fc;
+
+ fuse_sync_release(ff, file->f_flags);
+ fuse_conn_put(fc);
+
+ return 0;
+}
+
+static long cuse_file_ioctl(struct file *file, unsigned int cmd,
+ unsigned long arg)
+{
+ struct fuse_file *ff = file->private_data;
+ struct cuse_conn *cc = fc_to_cc(ff->fc);
+ unsigned int flags = 0;
+
+ if (cc->unrestricted_ioctl)
+ flags |= FUSE_IOCTL_UNRESTRICTED;
+
+ return fuse_do_ioctl(file, cmd, arg, flags);
+}
+
+static long cuse_file_compat_ioctl(struct file *file, unsigned int cmd,
+ unsigned long arg)
+{
+ struct fuse_file *ff = file->private_data;
+ struct cuse_conn *cc = fc_to_cc(ff->fc);
+ unsigned int flags = FUSE_IOCTL_COMPAT;
+
+ if (cc->unrestricted_ioctl)
+ flags |= FUSE_IOCTL_UNRESTRICTED;
+
+ return fuse_do_ioctl(file, cmd, arg, flags);
+}
+
+static const struct file_operations cuse_frontend_fops = {
+ .owner = THIS_MODULE,
+ .read = cuse_read,
+ .write = cuse_write,
+ .open = cuse_open,
+ .release = cuse_release,
+ .unlocked_ioctl = cuse_file_ioctl,
+ .compat_ioctl = cuse_file_compat_ioctl,
+ .poll = fuse_file_poll,
+};
+
+
+/**************************************************************************
+ * CUSE channel initialization and destruction
+ */
+
+struct cuse_devinfo {
+ const char *name;
+};
+
+/**
+ * cuse_parse_one - parse one key=value pair
+ * @pp: i/o parameter for the current position
+ * @end: points to one past the end of the packed string
+ * @keyp: out parameter for key
+ * @valp: out parameter for value
+ *
+ * *@pp points to packed strings - "key0=val0\0key1=val1\0" which ends
+ * at @end - 1. This function parses one pair and set *@keyp to the
+ * start of the key and *@valp to the start of the value. Note that
+ * the original string is modified such that the key string is
+ * terminated with '\0'. *@pp is updated to point to the next string.
+ *
+ * RETURNS:
+ * 1 on successful parse, 0 on EOF, -errno on failure.
+ */
+static int cuse_parse_one(char **pp, char *end, char **keyp, char **valp)
+{
+ char *p = *pp;
+ char *key, *val;
+
+ while (p < end && *p == '\0')
+ p++;
+ if (p == end)
+ return 0;
+
+ if (end[-1] != '\0') {
+ printk(KERN_ERR "CUSE: info not properly terminated\n");
+ return -EINVAL;
+ }
+
+ key = val = p;
+ p += strlen(p);
+
+ if (valp) {
+ strsep(&val, "=");
+ if (!val)
+ val = key + strlen(key);
+ key = strstrip(key);
+ val = strstrip(val);
+ } else
+ key = strstrip(key);
+
+ if (!strlen(key)) {
+ printk(KERN_ERR "CUSE: zero length info key specified\n");
+ return -EINVAL;
+ }
+
+ *pp = p;
+ *keyp = key;
+ if (valp)
+ *valp = val;
+
+ return 1;
+}
+
+/**
+ * cuse_parse_dev_info - parse device info
+ * @p: device info string
+ * @len: length of device info string
+ * @devinfo: out parameter for parsed device info
+ *
+ * Parse @p to extract device info and store it into @devinfo. String
+ * pointed to by @p is modified by parsing and @devinfo points into
+ * them, so @p shouldn't be freed while @devinfo is in use.
+ *
+ * RETURNS:
+ * 0 on success, -errno on failure.
+ */
+static int cuse_parse_devinfo(char *p, size_t len, struct cuse_devinfo *devinfo)
+{
+ char *end = p + len;
+ char *key, *val;
+ int rc;
+
+ while (true) {
+ rc = cuse_parse_one(&p, end, &key, &val);
+ if (rc < 0)
+ return rc;
+ if (!rc)
+ break;
+ if (strcmp(key, "DEVNAME") == 0)
+ devinfo->name = val;
+ else
+ printk(KERN_WARNING "CUSE: unknown device info \"%s\"\n",
+ key);
+ }
+
+ if (!devinfo->name || !strlen(devinfo->name)) {
+ printk(KERN_ERR "CUSE: DEVNAME unspecified\n");
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static void cuse_gendev_release(struct device *dev)
+{
+ kfree(dev);
+}
+
+/**
+ * cuse_process_init_reply - finish initializing CUSE channel
+ *
+ * This function creates the character device and sets up all the
+ * required data structures for it. Please read the comment at the
+ * top of this file for high level overview.
+ */
+static void cuse_process_init_reply(struct fuse_conn *fc, struct fuse_req *req)
+{
+ struct cuse_conn *cc = fc_to_cc(fc);
+ struct cuse_init_out *arg = &req->misc.cuse_init_out;
+ struct page *page = req->pages[0];
+ struct cuse_devinfo devinfo = { };
+ struct device *dev;
+ struct cdev *cdev;
+ dev_t devt;
+ int rc;
+
+ if (req->out.h.error ||
+ arg->major != FUSE_KERNEL_VERSION || arg->minor < 11) {
+ goto err;
+ }
+
+ fc->minor = arg->minor;
+ fc->max_read = max_t(unsigned, arg->max_read, 4096);
+ fc->max_write = max_t(unsigned, arg->max_write, 4096);
+
+ /* parse init reply */
+ cc->unrestricted_ioctl = arg->flags & CUSE_UNRESTRICTED_IOCTL;
+
+ rc = cuse_parse_devinfo(page_address(page), req->out.args[1].size,
+ &devinfo);
+ if (rc)
+ goto err;
+
+ /* determine and reserve devt */
+ devt = MKDEV(arg->dev_major, arg->dev_minor);
+ if (!MAJOR(devt))
+ rc = alloc_chrdev_region(&devt, MINOR(devt), 1, devinfo.name);
+ else
+ rc = register_chrdev_region(devt, 1, devinfo.name);
+ if (rc) {
+ printk(KERN_ERR "CUSE: failed to register chrdev region\n");
+ goto err;
+ }
+
+ /* devt determined, create device */
+ rc = -ENOMEM;
+ dev = kzalloc(sizeof(*dev), GFP_KERNEL);
+ if (!dev)
+ goto err_region;
+
+ device_initialize(dev);
+ dev_set_uevent_suppress(dev, 1);
+ dev->class = cuse_class;
+ dev->devt = devt;
+ dev->release = cuse_gendev_release;
+ dev_set_drvdata(dev, cc);
+ dev_set_name(dev, "%s", devinfo.name);
+
+ rc = device_add(dev);
+ if (rc)
+ goto err_device;
+
+ /* register cdev */
+ rc = -ENOMEM;
+ cdev = cdev_alloc();
+ if (!cdev)
+ goto err_device;
+
+ cdev->owner = THIS_MODULE;
+ cdev->ops = &cuse_frontend_fops;
+
+ rc = cdev_add(cdev, devt, 1);
+ if (rc)
+ goto err_cdev;
+
+ cc->dev = dev;
+ cc->cdev = cdev;
+
+ /* make the device available */
+ spin_lock(&cuse_lock);
+ list_add(&cc->list, cuse_conntbl_head(devt));
+ spin_unlock(&cuse_lock);
+
+ /* announce device availability */
+ dev_set_uevent_suppress(dev, 0);
+ kobject_uevent(&dev->kobj, KOBJ_ADD);
+out:
+ __free_page(page);
+ return;
+
+err_cdev:
+ cdev_del(cdev);
+err_device:
+ put_device(dev);
+err_region:
+ unregister_chrdev_region(devt, 1);
+err:
+ fc->conn_error = 1;
+ goto out;
+}
+
+static int cuse_send_init(struct cuse_conn *cc)
+{
+ int rc;
+ struct fuse_req *req;
+ struct page *page;
+ struct fuse_conn *fc = &cc->fc;
+ struct cuse_init_in *arg;
+
+ BUILD_BUG_ON(CUSE_INIT_INFO_MAX > PAGE_SIZE);
+
+ req = fuse_get_req(fc);
+ if (IS_ERR(req)) {
+ rc = PTR_ERR(req);
+ goto err;
+ }
+
+ rc = -ENOMEM;
+ page = alloc_page(GFP_KERNEL | __GFP_ZERO);
+ if (!page)
+ goto err_put_req;
+
+ arg = &req->misc.cuse_init_in;
+ arg->major = FUSE_KERNEL_VERSION;
+ arg->minor = FUSE_KERNEL_MINOR_VERSION;
+ arg->flags |= CUSE_UNRESTRICTED_IOCTL;
+ req->in.h.opcode = CUSE_INIT;
+ req->in.numargs = 1;
+ req->in.args[0].size = sizeof(struct cuse_init_in);
+ req->in.args[0].value = arg;
+ req->out.numargs = 2;
+ req->out.args[0].size = sizeof(struct cuse_init_out);
+ req->out.args[0].value = &req->misc.cuse_init_out;
+ req->out.args[1].size = CUSE_INIT_INFO_MAX;
+ req->out.argvar = 1;
+ req->out.argpages = 1;
+ req->pages[0] = page;
+ req->num_pages = 1;
+ req->end = cuse_process_init_reply;
+ fuse_request_send_background(fc, req);
+
+ return 0;
+
+err_put_req:
+ fuse_put_request(fc, req);
+err:
+ return rc;
+}
+
+static void cuse_fc_release(struct fuse_conn *fc)
+{
+ struct cuse_conn *cc = fc_to_cc(fc);
+ kfree(cc);
+}
+
+/**
+ * cuse_channel_open - open method for /dev/cuse
+ * @inode: inode for /dev/cuse
+ * @file: file struct being opened
+ *
+ * Userland CUSE server can create a CUSE device by opening /dev/cuse
+ * and replying to the initilaization request kernel sends. This
+ * function is responsible for handling CUSE device initialization.
+ * Because the fd opened by this function is used during
+ * initialization, this function only creates cuse_conn and sends
+ * init. The rest is delegated to a kthread.
+ *
+ * RETURNS:
+ * 0 on success, -errno on failure.
+ */
+static int cuse_channel_open(struct inode *inode, struct file *file)
+{
+ struct cuse_conn *cc;
+ int rc;
+
+ /* set up cuse_conn */
+ cc = kzalloc(sizeof(*cc), GFP_KERNEL);
+ if (!cc)
+ return -ENOMEM;
+
+ fuse_conn_init(&cc->fc);
+
+ INIT_LIST_HEAD(&cc->list);
+ cc->fc.release = cuse_fc_release;
+
+ cc->fc.connected = 1;
+ cc->fc.blocked = 0;
+ rc = cuse_send_init(cc);
+ if (rc) {
+ fuse_conn_put(&cc->fc);
+ return rc;
+ }
+ file->private_data = &cc->fc; /* channel owns base reference to cc */
+
+ return 0;
+}
+
+/**
+ * cuse_channel_release - release method for /dev/cuse
+ * @inode: inode for /dev/cuse
+ * @file: file struct being closed
+ *
+ * Disconnect the channel, deregister CUSE device and initiate
+ * destruction by putting the default reference.
+ *
+ * RETURNS:
+ * 0 on success, -errno on failure.
+ */
+static int cuse_channel_release(struct inode *inode, struct file *file)
+{
+ struct cuse_conn *cc = fc_to_cc(file->private_data);
+ int rc;
+
+ /* remove from the conntbl, no more access from this point on */
+ spin_lock(&cuse_lock);
+ list_del_init(&cc->list);
+ spin_unlock(&cuse_lock);
+
+ /* remove device */
+ if (cc->dev)
+ device_unregister(cc->dev);
+ if (cc->cdev) {
+ unregister_chrdev_region(cc->cdev->dev, 1);
+ cdev_del(cc->cdev);
+ }
+
+ /* kill connection and shutdown channel */
+ fuse_conn_kill(&cc->fc);
+ rc = fuse_dev_release(inode, file); /* puts the base reference */
+
+ return rc;
+}
+
+static struct file_operations cuse_channel_fops; /* initialized during init */
+
+
+/**************************************************************************
+ * Misc stuff and module initializatiion
+ *
+ * CUSE exports the same set of attributes to sysfs as fusectl.
+ */
+
+static ssize_t cuse_class_waiting_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct cuse_conn *cc = dev_get_drvdata(dev);
+
+ return sprintf(buf, "%d\n", atomic_read(&cc->fc.num_waiting));
+}
+
+static ssize_t cuse_class_abort_store(struct device *dev,
+ struct device_attribute *attr,
+ const char *buf, size_t count)
+{
+ struct cuse_conn *cc = dev_get_drvdata(dev);
+
+ fuse_abort_conn(&cc->fc);
+ return count;
+}
+
+static struct device_attribute cuse_class_dev_attrs[] = {
+ __ATTR(waiting, S_IFREG | 0400, cuse_class_waiting_show, NULL),
+ __ATTR(abort, S_IFREG | 0200, NULL, cuse_class_abort_store),
+ { }
+};
+
+static struct miscdevice cuse_miscdev = {
+ .minor = MISC_DYNAMIC_MINOR,
+ .name = "cuse",
+ .fops = &cuse_channel_fops,
+};
+
+static int __init cuse_init(void)
+{
+ int i, rc;
+
+ /* init conntbl */
+ for (i = 0; i < CUSE_CONNTBL_LEN; i++)
+ INIT_LIST_HEAD(&cuse_conntbl[i]);
+
+ /* inherit and extend fuse_dev_operations */
+ cuse_channel_fops = fuse_dev_operations;
+ cuse_channel_fops.owner = THIS_MODULE;
+ cuse_channel_fops.open = cuse_channel_open;
+ cuse_channel_fops.release = cuse_channel_release;
+
+ cuse_class = class_create(THIS_MODULE, "cuse");
+ if (IS_ERR(cuse_class))
+ return PTR_ERR(cuse_class);
+
+ cuse_class->dev_attrs = cuse_class_dev_attrs;
+
+ rc = misc_register(&cuse_miscdev);
+ if (rc) {
+ class_destroy(cuse_class);
+ return rc;
+ }
+
+ return 0;
+}
+
+static void __exit cuse_exit(void)
+{
+ misc_deregister(&cuse_miscdev);
+ class_destroy(cuse_class);
+}
+
+module_init(cuse_init);
+module_exit(cuse_exit);
+
+MODULE_AUTHOR("Tejun Heo <tj@kernel.org>");
+MODULE_DESCRIPTION("Character device in Userspace");
+MODULE_LICENSE("GPL");
diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c
index ba76b68c52f..8fed2ed12f3 100644
--- a/fs/fuse/dev.c
+++ b/fs/fuse/dev.c
@@ -46,6 +46,7 @@ struct fuse_req *fuse_request_alloc(void)
fuse_request_init(req);
return req;
}
+EXPORT_SYMBOL_GPL(fuse_request_alloc);
struct fuse_req *fuse_request_alloc_nofs(void)
{
@@ -124,6 +125,7 @@ struct fuse_req *fuse_get_req(struct fuse_conn *fc)
atomic_dec(&fc->num_waiting);
return ERR_PTR(err);
}
+EXPORT_SYMBOL_GPL(fuse_get_req);
/*
* Return request in fuse_file->reserved_req. However that may
@@ -208,6 +210,7 @@ void fuse_put_request(struct fuse_conn *fc, struct fuse_req *req)
fuse_request_free(req);
}
}
+EXPORT_SYMBOL_GPL(fuse_put_request);
static unsigned len_args(unsigned numargs, struct fuse_arg *args)
{
@@ -282,7 +285,7 @@ __releases(&fc->lock)
wake_up_all(&fc->blocked_waitq);
}
if (fc->num_background == FUSE_CONGESTION_THRESHOLD &&
- fc->connected) {
+ fc->connected && fc->bdi_initialized) {
clear_bdi_congested(&fc->bdi, READ);
clear_bdi_congested(&fc->bdi, WRITE);
}
@@ -400,6 +403,7 @@ void fuse_request_send(struct fuse_conn *fc, struct fuse_req *req)
}
spin_unlock(&fc->lock);
}
+EXPORT_SYMBOL_GPL(fuse_request_send);
static void fuse_request_send_nowait_locked(struct fuse_conn *fc,
struct fuse_req *req)
@@ -408,7 +412,8 @@ static void fuse_request_send_nowait_locked(struct fuse_conn *fc,
fc->num_background++;
if (fc->num_background == FUSE_MAX_BACKGROUND)
fc->blocked = 1;
- if (fc->num_background == FUSE_CONGESTION_THRESHOLD) {
+ if (fc->num_background == FUSE_CONGESTION_THRESHOLD &&
+ fc->bdi_initialized) {
set_bdi_congested(&fc->bdi, READ);
set_bdi_congested(&fc->bdi, WRITE);
}
@@ -439,6 +444,7 @@ void fuse_request_send_background(struct fuse_conn *fc, struct fuse_req *req)
req->isreply = 1;
fuse_request_send_nowait(fc, req);
}
+EXPORT_SYMBOL_GPL(fuse_request_send_background);
/*
* Called under fc->lock
@@ -1105,8 +1111,9 @@ void fuse_abort_conn(struct fuse_conn *fc)
}
spin_unlock(&fc->lock);
}
+EXPORT_SYMBOL_GPL(fuse_abort_conn);
-static int fuse_dev_release(struct inode *inode, struct file *file)
+int fuse_dev_release(struct inode *inode, struct file *file)
{
struct fuse_conn *fc = fuse_get_conn(file);
if (fc) {
@@ -1120,6 +1127,7 @@ static int fuse_dev_release(struct inode *inode, struct file *file)
return 0;
}
+EXPORT_SYMBOL_GPL(fuse_dev_release);
static int fuse_dev_fasync(int fd, struct file *file, int on)
{
@@ -1142,6 +1150,7 @@ const struct file_operations fuse_dev_operations = {
.release = fuse_dev_release,
.fasync = fuse_dev_fasync,
};
+EXPORT_SYMBOL_GPL(fuse_dev_operations);
static struct miscdevice fuse_miscdevice = {
.minor = FUSE_MINOR,
diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c
index 8b8eebc5614..b3089a083d3 100644
--- a/fs/fuse/dir.c
+++ b/fs/fuse/dir.c
@@ -362,19 +362,6 @@ static struct dentry *fuse_lookup(struct inode *dir, struct dentry *entry,
}
/*
- * Synchronous release for the case when something goes wrong in CREATE_OPEN
- */
-static void fuse_sync_release(struct fuse_conn *fc, struct fuse_file *ff,
- u64 nodeid, int flags)
-{
- fuse_release_fill(ff, nodeid, flags, FUSE_RELEASE);
- ff->reserved_req->force = 1;
- fuse_request_send(fc, ff->reserved_req);
- fuse_put_request(fc, ff->reserved_req);
- kfree(ff);
-}
-
-/*
* Atomic create+open operation
*
* If the filesystem doesn't support this, then fall back to separate
@@ -445,12 +432,14 @@ static int fuse_create_open(struct inode *dir, struct dentry *entry, int mode,
goto out_free_ff;
fuse_put_request(fc, req);
+ ff->fh = outopen.fh;
+ ff->nodeid = outentry.nodeid;
+ ff->open_flags = outopen.open_flags;
inode = fuse_iget(dir->i_sb, outentry.nodeid, outentry.generation,
&outentry.attr, entry_attr_timeout(&outentry), 0);
if (!inode) {
flags &= ~(O_CREAT | O_EXCL | O_TRUNC);
- ff->fh = outopen.fh;
- fuse_sync_release(fc, ff, outentry.nodeid, flags);
+ fuse_sync_release(ff, flags);
fuse_send_forget(fc, forget_req, outentry.nodeid, 1);
return -ENOMEM;
}
@@ -460,11 +449,11 @@ static int fuse_create_open(struct inode *dir, struct dentry *entry, int mode,
fuse_invalidate_attr(dir);
file = lookup_instantiate_filp(nd, entry, generic_file_open);
if (IS_ERR(file)) {
- ff->fh = outopen.fh;
- fuse_sync_release(fc, ff, outentry.nodeid, flags);
+ fuse_sync_release(ff, flags);
return PTR_ERR(file);
}
- fuse_finish_open(inode, file, ff, &outopen);
+ file->private_data = fuse_file_get(ff);
+ fuse_finish_open(inode, file);
return 0;
out_free_ff:
@@ -1035,7 +1024,7 @@ static int fuse_readdir(struct file *file, void *dstbuf, filldir_t filldir)
req->out.argpages = 1;
req->num_pages = 1;
req->pages[0] = page;
- fuse_read_fill(req, file, inode, file->f_pos, PAGE_SIZE, FUSE_READDIR);
+ fuse_read_fill(req, file, file->f_pos, PAGE_SIZE, FUSE_READDIR);
fuse_request_send(fc, req);
nbytes = req->out.args[0].size;
err = req->out.h.error;
@@ -1101,12 +1090,14 @@ static void fuse_put_link(struct dentry *dentry, struct nameidata *nd, void *c)
static int fuse_dir_open(struct inode *inode, struct file *file)
{
- return fuse_open_common(inode, file, 1);
+ return fuse_open_common(inode, file, true);
}
static int fuse_dir_release(struct inode *inode, struct file *file)
{
- return fuse_release_common(inode, file, 1);
+ fuse_release_common(file, FUSE_RELEASEDIR);
+
+ return 0;
}
static int fuse_dir_fsync(struct file *file, struct dentry *de, int datasync)
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index 06f30e96567..fce6ce694fd 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -12,13 +12,13 @@
#include <linux/slab.h>
#include <linux/kernel.h>
#include <linux/sched.h>
+#include <linux/module.h>
static const struct file_operations fuse_direct_io_file_operations;
-static int fuse_send_open(struct inode *inode, struct file *file, int isdir,
- struct fuse_open_out *outargp)
+static int fuse_send_open(struct fuse_conn *fc, u64 nodeid, struct file *file,
+ int opcode, struct fuse_open_out *outargp)
{
- struct fuse_conn *fc = get_fuse_conn(inode);
struct fuse_open_in inarg;
struct fuse_req *req;
int err;
@@ -31,8 +31,8 @@ static int fuse_send_open(struct inode *inode, struct file *file, int isdir,
inarg.flags = file->f_flags & ~(O_CREAT | O_EXCL | O_NOCTTY);
if (!fc->atomic_o_trunc)
inarg.flags &= ~O_TRUNC;
- req->in.h.opcode = isdir ? FUSE_OPENDIR : FUSE_OPEN;
- req->in.h.nodeid = get_node_id(inode);
+ req->in.h.opcode = opcode;
+ req->in.h.nodeid = nodeid;
req->in.numargs = 1;
req->in.args[0].size = sizeof(inarg);
req->in.args[0].value = &inarg;
@@ -49,22 +49,27 @@ static int fuse_send_open(struct inode *inode, struct file *file, int isdir,
struct fuse_file *fuse_file_alloc(struct fuse_conn *fc)
{
struct fuse_file *ff;
+
ff = kmalloc(sizeof(struct fuse_file), GFP_KERNEL);
- if (ff) {
- ff->reserved_req = fuse_request_alloc();
- if (!ff->reserved_req) {
- kfree(ff);
- return NULL;
- } else {
- INIT_LIST_HEAD(&ff->write_entry);
- atomic_set(&ff->count, 0);
- spin_lock(&fc->lock);
- ff->kh = ++fc->khctr;
- spin_unlock(&fc->lock);
- }
- RB_CLEAR_NODE(&ff->polled_node);
- init_waitqueue_head(&ff->poll_wait);
+ if (unlikely(!ff))
+ return NULL;
+
+ ff->fc = fc;
+ ff->reserved_req = fuse_request_alloc();
+ if (unlikely(!ff->reserved_req)) {
+ kfree(ff);
+ return NULL;
}
+
+ INIT_LIST_HEAD(&ff->write_entry);
+ atomic_set(&ff->count, 0);
+ RB_CLEAR_NODE(&ff->polled_node);
+ init_waitqueue_head(&ff->poll_wait);
+
+ spin_lock(&fc->lock);
+ ff->kh = ++fc->khctr;
+ spin_unlock(&fc->lock);
+
return ff;
}
@@ -74,7 +79,7 @@ void fuse_file_free(struct fuse_file *ff)
kfree(ff);
}
-static struct fuse_file *fuse_file_get(struct fuse_file *ff)
+struct fuse_file *fuse_file_get(struct fuse_file *ff)
{
atomic_inc(&ff->count);
return ff;
@@ -82,40 +87,65 @@ static struct fuse_file *fuse_file_get(struct fuse_file *ff)
static void fuse_release_end(struct fuse_conn *fc, struct fuse_req *req)
{
- dput(req->misc.release.dentry);
- mntput(req->misc.release.vfsmount);
+ path_put(&req->misc.release.path);
}
static void fuse_file_put(struct fuse_file *ff)
{
if (atomic_dec_and_test(&ff->count)) {
struct fuse_req *req = ff->reserved_req;
- struct inode *inode = req->misc.release.dentry->d_inode;
- struct fuse_conn *fc = get_fuse_conn(inode);
+
req->end = fuse_release_end;
- fuse_request_send_background(fc, req);
+ fuse_request_send_background(ff->fc, req);
kfree(ff);
}
}
-void fuse_finish_open(struct inode *inode, struct file *file,
- struct fuse_file *ff, struct fuse_open_out *outarg)
+int fuse_do_open(struct fuse_conn *fc, u64 nodeid, struct file *file,
+ bool isdir)
{
- if (outarg->open_flags & FOPEN_DIRECT_IO)
+ struct fuse_open_out outarg;
+ struct fuse_file *ff;
+ int err;
+ int opcode = isdir ? FUSE_OPENDIR : FUSE_OPEN;
+
+ ff = fuse_file_alloc(fc);
+ if (!ff)
+ return -ENOMEM;
+
+ err = fuse_send_open(fc, nodeid, file, opcode, &outarg);
+ if (err) {
+ fuse_file_free(ff);
+ return err;
+ }
+
+ if (isdir)
+ outarg.open_flags &= ~FOPEN_DIRECT_IO;
+
+ ff->fh = outarg.fh;
+ ff->nodeid = nodeid;
+ ff->open_flags = outarg.open_flags;
+ file->private_data = fuse_file_get(ff);
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(fuse_do_open);
+
+void fuse_finish_open(struct inode *inode, struct file *file)
+{
+ struct fuse_file *ff = file->private_data;
+
+ if (ff->open_flags & FOPEN_DIRECT_IO)
file->f_op = &fuse_direct_io_file_operations;
- if (!(outarg->open_flags & FOPEN_KEEP_CACHE))
+ if (!(ff->open_flags & FOPEN_KEEP_CACHE))
invalidate_inode_pages2(inode->i_mapping);
- if (outarg->open_flags & FOPEN_NONSEEKABLE)
+ if (ff->open_flags & FOPEN_NONSEEKABLE)
nonseekable_open(inode, file);
- ff->fh = outarg->fh;
- file->private_data = fuse_file_get(ff);
}
-int fuse_open_common(struct inode *inode, struct file *file, int isdir)
+int fuse_open_common(struct inode *inode, struct file *file, bool isdir)
{
struct fuse_conn *fc = get_fuse_conn(inode);
- struct fuse_open_out outarg;
- struct fuse_file *ff;
int err;
/* VFS checks this, but only _after_ ->open() */
@@ -126,78 +156,85 @@ int fuse_open_common(struct inode *inode, struct file *file, int isdir)
if (err)
return err;
- ff = fuse_file_alloc(fc);
- if (!ff)
- return -ENOMEM;
-
- err = fuse_send_open(inode, file, isdir, &outarg);
+ err = fuse_do_open(fc, get_node_id(inode), file, isdir);
if (err)
- fuse_file_free(ff);
- else {
- if (isdir)
- outarg.open_flags &= ~FOPEN_DIRECT_IO;
- fuse_finish_open(inode, file, ff, &outarg);
- }
+ return err;
- return err;
+ fuse_finish_open(inode, file);
+
+ return 0;
}
-void fuse_release_fill(struct fuse_file *ff, u64 nodeid, int flags, int opcode)
+static void fuse_prepare_release(struct fuse_file *ff, int flags, int opcode)
{
+ struct fuse_conn *fc = ff->fc;
struct fuse_req *req = ff->reserved_req;
struct fuse_release_in *inarg = &req->misc.release.in;
+ spin_lock(&fc->lock);
+ list_del(&ff->write_entry);
+ if (!RB_EMPTY_NODE(&ff->polled_node))
+ rb_erase(&ff->polled_node, &fc->polled_files);
+ spin_unlock(&fc->lock);
+
+ wake_up_interruptible_sync(&ff->poll_wait);
+
inarg->fh = ff->fh;
inarg->flags = flags;
req->in.h.opcode = opcode;
- req->in.h.nodeid = nodeid;
+ req->in.h.nodeid = ff->nodeid;
req->in.numargs = 1;
req->in.args[0].size = sizeof(struct fuse_release_in);
req->in.args[0].value = inarg;
}
-int fuse_release_common(struct inode *inode, struct file *file, int isdir)
+void fuse_release_common(struct file *file, int opcode)
{
- struct fuse_file *ff = file->private_data;
- if (ff) {
- struct fuse_conn *fc = get_fuse_conn(inode);
- struct fuse_req *req = ff->reserved_req;
-
- fuse_release_fill(ff, get_node_id(inode), file->f_flags,
- isdir ? FUSE_RELEASEDIR : FUSE_RELEASE);
+ struct fuse_file *ff;
+ struct fuse_req *req;
- /* Hold vfsmount and dentry until release is finished */
- req->misc.release.vfsmount = mntget(file->f_path.mnt);
- req->misc.release.dentry = dget(file->f_path.dentry);
+ ff = file->private_data;
+ if (unlikely(!ff))
+ return;
- spin_lock(&fc->lock);
- list_del(&ff->write_entry);
- if (!RB_EMPTY_NODE(&ff->polled_node))
- rb_erase(&ff->polled_node, &fc->polled_files);
- spin_unlock(&fc->lock);
+ req = ff->reserved_req;
+ fuse_prepare_release(ff, file->f_flags, opcode);
- wake_up_interruptible_sync(&ff->poll_wait);
- /*
- * Normally this will send the RELEASE request,
- * however if some asynchronous READ or WRITE requests
- * are outstanding, the sending will be delayed
- */
- fuse_file_put(ff);
- }
+ /* Hold vfsmount and dentry until release is finished */
+ path_get(&file->f_path);
+ req->misc.release.path = file->f_path;
- /* Return value is ignored by VFS */
- return 0;
+ /*
+ * Normally this will send the RELEASE request, however if
+ * some asynchronous READ or WRITE requests are outstanding,
+ * the sending will be delayed.
+ */
+ fuse_file_put(ff);
}
static int fuse_open(struct inode *inode, struct file *file)
{
- return fuse_open_common(inode, file, 0);
+ return fuse_open_common(inode, file, false);
}
static int fuse_release(struct inode *inode, struct file *file)
{
- return fuse_release_common(inode, file, 0);
+ fuse_release_common(file, FUSE_RELEASE);
+
+ /* return value is ignored by VFS */
+ return 0;
+}
+
+void fuse_sync_release(struct fuse_file *ff, int flags)
+{
+ WARN_ON(atomic_read(&ff->count) > 1);
+ fuse_prepare_release(ff, flags, FUSE_RELEASE);
+ ff->reserved_req->force = 1;
+ fuse_request_send(ff->fc, ff->reserved_req);
+ fuse_put_request(ff->fc, ff->reserved_req);
+ kfree(ff);
}
+EXPORT_SYMBOL_GPL(fuse_sync_release);
/*
* Scramble the ID space with XTEA, so that the value of the files_struct
@@ -371,8 +408,8 @@ static int fuse_fsync(struct file *file, struct dentry *de, int datasync)
return fuse_fsync_common(file, de, datasync, 0);
}
-void fuse_read_fill(struct fuse_req *req, struct file *file,
- struct inode *inode, loff_t pos, size_t count, int opcode)
+void fuse_read_fill(struct fuse_req *req, struct file *file, loff_t pos,
+ size_t count, int opcode)
{
struct fuse_read_in *inarg = &req->misc.read.in;
struct fuse_file *ff = file->private_data;
@@ -382,7 +419,7 @@ void fuse_read_fill(struct fuse_req *req, struct file *file,
inarg->size = count;
inarg->flags = file->f_flags;
req->in.h.opcode = opcode;
- req->in.h.nodeid = get_node_id(inode);
+ req->in.h.nodeid = ff->nodeid;
req->in.numargs = 1;
req->in.args[0].size = sizeof(struct fuse_read_in);
req->in.args[0].value = inarg;
@@ -392,12 +429,12 @@ void fuse_read_fill(struct fuse_req *req, struct file *file,
}
static size_t fuse_send_read(struct fuse_req *req, struct file *file,
- struct inode *inode, loff_t pos, size_t count,
- fl_owner_t owner)
+ loff_t pos, size_t count, fl_owner_t owner)
{
- struct fuse_conn *fc = get_fuse_conn(inode);
+ struct fuse_file *ff = file->private_data;
+ struct fuse_conn *fc = ff->fc;
- fuse_read_fill(req, file, inode, pos, count, FUSE_READ);
+ fuse_read_fill(req, file, pos, count, FUSE_READ);
if (owner != NULL) {
struct fuse_read_in *inarg = &req->misc.read.in;
@@ -455,7 +492,7 @@ static int fuse_readpage(struct file *file, struct page *page)
req->out.argpages = 1;
req->num_pages = 1;
req->pages[0] = page;
- num_read = fuse_send_read(req, file, inode, pos, count, NULL);
+ num_read = fuse_send_read(req, file, pos, count, NULL);
err = req->out.h.error;
fuse_put_request(fc, req);
@@ -504,19 +541,18 @@ static void fuse_readpages_end(struct fuse_conn *fc, struct fuse_req *req)
fuse_file_put(req->ff);
}
-static void fuse_send_readpages(struct fuse_req *req, struct file *file,
- struct inode *inode)
+static void fuse_send_readpages(struct fuse_req *req, struct file *file)
{
- struct fuse_conn *fc = get_fuse_conn(inode);
+ struct fuse_file *ff = file->private_data;
+ struct fuse_conn *fc = ff->fc;
loff_t pos = page_offset(req->pages[0]);
size_t count = req->num_pages << PAGE_CACHE_SHIFT;
req->out.argpages = 1;
req->out.page_zeroing = 1;
- fuse_read_fill(req, file, inode, pos, count, FUSE_READ);
+ fuse_read_fill(req, file, pos, count, FUSE_READ);
req->misc.read.attr_ver = fuse_get_attr_version(fc);
if (fc->async_read) {
- struct fuse_file *ff = file->private_data;
req->ff = fuse_file_get(ff);
req->end = fuse_readpages_end;
fuse_request_send_background(fc, req);
@@ -546,7 +582,7 @@ static int fuse_readpages_fill(void *_data, struct page *page)
(req->num_pages == FUSE_MAX_PAGES_PER_REQ ||
(req->num_pages + 1) * PAGE_CACHE_SIZE > fc->max_read ||
req->pages[req->num_pages - 1]->index + 1 != page->index)) {
- fuse_send_readpages(req, data->file, inode);
+ fuse_send_readpages(req, data->file);
data->req = req = fuse_get_req(fc);
if (IS_ERR(req)) {
unlock_page(page);
@@ -580,7 +616,7 @@ static int fuse_readpages(struct file *file, struct address_space *mapping,
err = read_cache_pages(mapping, pages, fuse_readpages_fill, &data);
if (!err) {
if (data.req->num_pages)
- fuse_send_readpages(data.req, file, inode);
+ fuse_send_readpages(data.req, file);
else
fuse_put_request(fc, data.req);
}
@@ -607,24 +643,19 @@ static ssize_t fuse_file_aio_read(struct kiocb *iocb, const struct iovec *iov,
return generic_file_aio_read(iocb, iov, nr_segs, pos);
}
-static void fuse_write_fill(struct fuse_req *req, struct file *file,
- struct fuse_file *ff, struct inode *inode,
- loff_t pos, size_t count, int writepage)
+static void fuse_write_fill(struct fuse_req *req, struct fuse_file *ff,
+ loff_t pos, size_t count)
{
- struct fuse_conn *fc = get_fuse_conn(inode);
struct fuse_write_in *inarg = &req->misc.write.in;
struct fuse_write_out *outarg = &req->misc.write.out;
- memset(inarg, 0, sizeof(struct fuse_write_in));
inarg->fh = ff->fh;
inarg->offset = pos;
inarg->size = count;
- inarg->write_flags = writepage ? FUSE_WRITE_CACHE : 0;
- inarg->flags = file ? file->f_flags : 0;
req->in.h.opcode = FUSE_WRITE;
- req->in.h.nodeid = get_node_id(inode);
+ req->in.h.nodeid = ff->nodeid;
req->in.numargs = 2;
- if (fc->minor < 9)
+ if (ff->fc->minor < 9)
req->in.args[0].size = FUSE_COMPAT_WRITE_IN_SIZE;
else
req->in.args[0].size = sizeof(struct fuse_write_in);
@@ -636,13 +667,15 @@ static void fuse_write_fill(struct fuse_req *req, struct file *file,
}
static size_t fuse_send_write(struct fuse_req *req, struct file *file,
- struct inode *inode, loff_t pos, size_t count,
- fl_owner_t owner)
+ loff_t pos, size_t count, fl_owner_t owner)
{
- struct fuse_conn *fc = get_fuse_conn(inode);
- fuse_write_fill(req, file, file->private_data, inode, pos, count, 0);
+ struct fuse_file *ff = file->private_data;
+ struct fuse_conn *fc = ff->fc;
+ struct fuse_write_in *inarg = &req->misc.write.in;
+
+ fuse_write_fill(req, ff, pos, count);
+ inarg->flags = file->f_flags;
if (owner != NULL) {
- struct fuse_write_in *inarg = &req->misc.write.in;
inarg->write_flags |= FUSE_WRITE_LOCKOWNER;
inarg->lock_owner = fuse_lock_owner_id(fc, owner);
}
@@ -700,7 +733,7 @@ static int fuse_buffered_write(struct file *file, struct inode *inode,
req->num_pages = 1;
req->pages[0] = page;
req->page_offset = offset;
- nres = fuse_send_write(req, file, inode, pos, count, NULL);
+ nres = fuse_send_write(req, file, pos, count, NULL);
err = req->out.h.error;
fuse_put_request(fc, req);
if (!err && !nres)
@@ -741,7 +774,7 @@ static size_t fuse_send_write_pages(struct fuse_req *req, struct file *file,
for (i = 0; i < req->num_pages; i++)
fuse_wait_on_page_writeback(inode, req->pages[i]->index);
- res = fuse_send_write(req, file, inode, pos, count, NULL);
+ res = fuse_send_write(req, file, pos, count, NULL);
offset = req->page_offset;
count = res;
@@ -979,25 +1012,23 @@ static int fuse_get_user_pages(struct fuse_req *req, const char __user *buf,
return 0;
}
-static ssize_t fuse_direct_io(struct file *file, const char __user *buf,
- size_t count, loff_t *ppos, int write)
+ssize_t fuse_direct_io(struct file *file, const char __user *buf,
+ size_t count, loff_t *ppos, int write)
{
- struct inode *inode = file->f_path.dentry->d_inode;
- struct fuse_conn *fc = get_fuse_conn(inode);
+ struct fuse_file *ff = file->private_data;
+ struct fuse_conn *fc = ff->fc;
size_t nmax = write ? fc->max_write : fc->max_read;
loff_t pos = *ppos;
ssize_t res = 0;
struct fuse_req *req;
- if (is_bad_inode(inode))
- return -EIO;
-
req = fuse_get_req(fc);
if (IS_ERR(req))
return PTR_ERR(req);
while (count) {
size_t nres;
+ fl_owner_t owner = current->files;
size_t nbytes = min(count, nmax);
int err = fuse_get_user_pages(req, buf, &nbytes, write);
if (err) {
@@ -1006,11 +1037,10 @@ static ssize_t fuse_direct_io(struct file *file, const char __user *buf,
}
if (write)
- nres = fuse_send_write(req, file, inode, pos, nbytes,
- current->files);
+ nres = fuse_send_write(req, file, pos, nbytes, owner);
else
- nres = fuse_send_read(req, file, inode, pos, nbytes,
- current->files);
+ nres = fuse_send_read(req, file, pos, nbytes, owner);
+
fuse_release_user_pages(req, !write);
if (req->out.h.error) {
if (!res)
@@ -1034,20 +1064,27 @@ static ssize_t fuse_direct_io(struct file *file, const char __user *buf,
}
}
fuse_put_request(fc, req);
- if (res > 0) {
- if (write)
- fuse_write_update_size(inode, pos);
+ if (res > 0)
*ppos = pos;
- }
- fuse_invalidate_attr(inode);
return res;
}
+EXPORT_SYMBOL_GPL(fuse_direct_io);
static ssize_t fuse_direct_read(struct file *file, char __user *buf,
size_t count, loff_t *ppos)
{
- return fuse_direct_io(file, buf, count, ppos, 0);
+ ssize_t res;
+ struct inode *inode = file->f_path.dentry->d_inode;
+
+ if (is_bad_inode(inode))
+ return -EIO;
+
+ res = fuse_direct_io(file, buf, count, ppos, 0);
+
+ fuse_invalidate_attr(inode);
+
+ return res;
}
static ssize_t fuse_direct_write(struct file *file, const char __user *buf,
@@ -1055,12 +1092,22 @@ static ssize_t fuse_direct_write(struct file *file, const char __user *buf,
{
struct inode *inode = file->f_path.dentry->d_inode;
ssize_t res;
+
+ if (is_bad_inode(inode))
+ return -EIO;
+
/* Don't allow parallel writes to the same file */
mutex_lock(&inode->i_mutex);
res = generic_write_checks(file, ppos, &count, 0);
- if (!res)
+ if (!res) {
res = fuse_direct_io(file, buf, count, ppos, 1);
+ if (res > 0)
+ fuse_write_update_size(inode, *ppos);
+ }
mutex_unlock(&inode->i_mutex);
+
+ fuse_invalidate_attr(inode);
+
return res;
}
@@ -1177,9 +1224,10 @@ static int fuse_writepage_locked(struct page *page)
req->ff = fuse_file_get(ff);
spin_unlock(&fc->lock);
- fuse_write_fill(req, NULL, ff, inode, page_offset(page), 0, 1);
+ fuse_write_fill(req, ff, page_offset(page), 0);
copy_highpage(tmp_page, page);
+ req->misc.write.in.write_flags |= FUSE_WRITE_CACHE;
req->in.argpages = 1;
req->num_pages = 1;
req->pages[0] = tmp_page;
@@ -1603,12 +1651,11 @@ static int fuse_ioctl_copy_user(struct page **pages, struct iovec *iov,
* limits ioctl data transfers to well-formed ioctls and is the forced
* behavior for all FUSE servers.
*/
-static long fuse_file_do_ioctl(struct file *file, unsigned int cmd,
- unsigned long arg, unsigned int flags)
+long fuse_do_ioctl(struct file *file, unsigned int cmd, unsigned long arg,
+ unsigned int flags)
{
- struct inode *inode = file->f_dentry->d_inode;
struct fuse_file *ff = file->private_data;
- struct fuse_conn *fc = get_fuse_conn(inode);
+ struct fuse_conn *fc = ff->fc;
struct fuse_ioctl_in inarg = {
.fh = ff->fh,
.cmd = cmd,
@@ -1627,13 +1674,6 @@ static long fuse_file_do_ioctl(struct file *file, unsigned int cmd,
/* assume all the iovs returned by client always fits in a page */
BUILD_BUG_ON(sizeof(struct iovec) * FUSE_IOCTL_MAX_IOV > PAGE_SIZE);
- if (!fuse_allow_task(fc, current))
- return -EACCES;
-
- err = -EIO;
- if (is_bad_inode(inode))
- goto out;
-
err = -ENOMEM;
pages = kzalloc(sizeof(pages[0]) * FUSE_MAX_PAGES_PER_REQ, GFP_KERNEL);
iov_page = alloc_page(GFP_KERNEL);
@@ -1694,7 +1734,7 @@ static long fuse_file_do_ioctl(struct file *file, unsigned int cmd,
/* okay, let's send it to the client */
req->in.h.opcode = FUSE_IOCTL;
- req->in.h.nodeid = get_node_id(inode);
+ req->in.h.nodeid = ff->nodeid;
req->in.numargs = 1;
req->in.args[0].size = sizeof(inarg);
req->in.args[0].value = &inarg;
@@ -1777,17 +1817,33 @@ static long fuse_file_do_ioctl(struct file *file, unsigned int cmd,
return err ? err : outarg.result;
}
+EXPORT_SYMBOL_GPL(fuse_do_ioctl);
+
+static long fuse_file_ioctl_common(struct file *file, unsigned int cmd,
+ unsigned long arg, unsigned int flags)
+{
+ struct inode *inode = file->f_dentry->d_inode;
+ struct fuse_conn *fc = get_fuse_conn(inode);
+
+ if (!fuse_allow_task(fc, current))
+ return -EACCES;
+
+ if (is_bad_inode(inode))
+ return -EIO;
+
+ return fuse_do_ioctl(file, cmd, arg, flags);
+}
static long fuse_file_ioctl(struct file *file, unsigned int cmd,
unsigned long arg)
{
- return fuse_file_do_ioctl(file, cmd, arg, 0);
+ return fuse_file_ioctl_common(file, cmd, arg, 0);
}
static long fuse_file_compat_ioctl(struct file *file, unsigned int cmd,
unsigned long arg)
{
- return fuse_file_do_ioctl(file, cmd, arg, FUSE_IOCTL_COMPAT);
+ return fuse_file_ioctl_common(file, cmd, arg, FUSE_IOCTL_COMPAT);
}
/*
@@ -1841,11 +1897,10 @@ static void fuse_register_polled_file(struct fuse_conn *fc,
spin_unlock(&fc->lock);
}
-static unsigned fuse_file_poll(struct file *file, poll_table *wait)
+unsigned fuse_file_poll(struct file *file, poll_table *wait)
{
- struct inode *inode = file->f_dentry->d_inode;
struct fuse_file *ff = file->private_data;
- struct fuse_conn *fc = get_fuse_conn(inode);
+ struct fuse_conn *fc = ff->fc;
struct fuse_poll_in inarg = { .fh = ff->fh, .kh = ff->kh };
struct fuse_poll_out outarg;
struct fuse_req *req;
@@ -1870,7 +1925,7 @@ static unsigned fuse_file_poll(struct file *file, poll_table *wait)
return PTR_ERR(req);
req->in.h.opcode = FUSE_POLL;
- req->in.h.nodeid = get_node_id(inode);
+ req->in.h.nodeid = ff->nodeid;
req->in.numargs = 1;
req->in.args[0].size = sizeof(inarg);
req->in.args[0].value = &inarg;
@@ -1889,6 +1944,7 @@ static unsigned fuse_file_poll(struct file *file, poll_table *wait)
}
return POLLERR;
}
+EXPORT_SYMBOL_GPL(fuse_file_poll);
/*
* This is called from fuse_handle_notify() on FUSE_NOTIFY_POLL and
diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
index 6fc5aedaa0d..aaf2f9ff970 100644
--- a/fs/fuse/fuse_i.h
+++ b/fs/fuse/fuse_i.h
@@ -97,8 +97,13 @@ struct fuse_inode {
struct list_head writepages;
};
+struct fuse_conn;
+
/** FUSE specific file data */
struct fuse_file {
+ /** Fuse connection for this file */
+ struct fuse_conn *fc;
+
/** Request reserved for flush and release */
struct fuse_req *reserved_req;
@@ -108,9 +113,15 @@ struct fuse_file {
/** File handle used by userspace */
u64 fh;
+ /** Node id of this file */
+ u64 nodeid;
+
/** Refcount */
atomic_t count;
+ /** FOPEN_* flags returned by open */
+ u32 open_flags;
+
/** Entry on inode's write_files list */
struct list_head write_entry;
@@ -185,8 +196,6 @@ enum fuse_req_state {
FUSE_REQ_FINISHED
};
-struct fuse_conn;
-
/**
* A request to the client
*/
@@ -248,11 +257,12 @@ struct fuse_req {
struct fuse_forget_in forget_in;
struct {
struct fuse_release_in in;
- struct vfsmount *vfsmount;
- struct dentry *dentry;
+ struct path path;
} release;
struct fuse_init_in init_in;
struct fuse_init_out init_out;
+ struct cuse_init_in cuse_init_in;
+ struct cuse_init_out cuse_init_out;
struct {
struct fuse_read_in in;
u64 attr_ver;
@@ -386,6 +396,9 @@ struct fuse_conn {
/** Filesystem supports NFS exporting. Only set in INIT */
unsigned export_support:1;
+ /** Set if bdi is valid */
+ unsigned bdi_initialized:1;
+
/*
* The following bitfields are only for optimization purposes
* and hence races in setting them will not cause malfunction
@@ -515,25 +528,24 @@ void fuse_send_forget(struct fuse_conn *fc, struct fuse_req *req,
* Initialize READ or READDIR request
*/
void fuse_read_fill(struct fuse_req *req, struct file *file,
- struct inode *inode, loff_t pos, size_t count, int opcode);
+ loff_t pos, size_t count, int opcode);
/**
* Send OPEN or OPENDIR request
*/
-int fuse_open_common(struct inode *inode, struct file *file, int isdir);
+int fuse_open_common(struct inode *inode, struct file *file, bool isdir);
struct fuse_file *fuse_file_alloc(struct fuse_conn *fc);
+struct fuse_file *fuse_file_get(struct fuse_file *ff);
void fuse_file_free(struct fuse_file *ff);
-void fuse_finish_open(struct inode *inode, struct file *file,
- struct fuse_file *ff, struct fuse_open_out *outarg);
+void fuse_finish_open(struct inode *inode, struct file *file);
-/** Fill in ff->reserved_req with a RELEASE request */
-void fuse_release_fill(struct fuse_file *ff, u64 nodeid, int flags, int opcode);
+void fuse_sync_release(struct fuse_file *ff, int flags);
/**
* Send RELEASE or RELEASEDIR request
*/
-int fuse_release_common(struct inode *inode, struct file *file, int isdir);
+void fuse_release_common(struct file *file, int opcode);
/**
* Send FSYNC or FSYNCDIR request
@@ -652,10 +664,12 @@ void fuse_invalidate_entry_cache(struct dentry *entry);
*/
struct fuse_conn *fuse_conn_get(struct fuse_conn *fc);
+void fuse_conn_kill(struct fuse_conn *fc);
+
/**
* Initialize fuse_conn
*/
-int fuse_conn_init(struct fuse_conn *fc, struct super_block *sb);
+void fuse_conn_init(struct fuse_conn *fc);
/**
* Release reference to fuse_conn
@@ -694,4 +708,13 @@ void fuse_release_nowrite(struct inode *inode);
u64 fuse_get_attr_version(struct fuse_conn *fc);
+int fuse_do_open(struct fuse_conn *fc, u64 nodeid, struct file *file,
+ bool isdir);
+ssize_t fuse_direct_io(struct file *file, const char __user *buf,
+ size_t count, loff_t *ppos, int write);
+long fuse_do_ioctl(struct file *file, unsigned int cmd, unsigned long arg,
+ unsigned int flags);
+unsigned fuse_file_poll(struct file *file, poll_table *wait);
+int fuse_dev_release(struct inode *inode, struct file *file);
+
#endif /* _FS_FUSE_I_H */
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index 91f7c85f1ff..f0df55a5292 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -277,11 +277,14 @@ static void fuse_send_destroy(struct fuse_conn *fc)
}
}
-static void fuse_put_super(struct super_block *sb)
+static void fuse_bdi_destroy(struct fuse_conn *fc)
{
- struct fuse_conn *fc = get_fuse_conn_super(sb);
+ if (fc->bdi_initialized)
+ bdi_destroy(&fc->bdi);
+}
- fuse_send_destroy(fc);
+void fuse_conn_kill(struct fuse_conn *fc)
+{
spin_lock(&fc->lock);
fc->connected = 0;
fc->blocked = 0;
@@ -295,7 +298,16 @@ static void fuse_put_super(struct super_block *sb)
list_del(&fc->entry);
fuse_ctl_remove_conn(fc);
mutex_unlock(&fuse_mutex);
- bdi_destroy(&fc->bdi);
+ fuse_bdi_destroy(fc);
+}
+EXPORT_SYMBOL_GPL(fuse_conn_kill);
+
+static void fuse_put_super(struct super_block *sb)
+{
+ struct fuse_conn *fc = get_fuse_conn_super(sb);
+
+ fuse_send_destroy(fc);
+ fuse_conn_kill(fc);
fuse_conn_put(fc);
}
@@ -466,10 +478,8 @@ static int fuse_show_options(struct seq_file *m, struct vfsmount *mnt)
return 0;
}
-int fuse_conn_init(struct fuse_conn *fc, struct super_block *sb)
+void fuse_conn_init(struct fuse_conn *fc)
{
- int err;
-
memset(fc, 0, sizeof(*fc));
spin_lock_init(&fc->lock);
mutex_init(&fc->inst_mutex);
@@ -484,49 +494,12 @@ int fuse_conn_init(struct fuse_conn *fc, struct super_block *sb)
INIT_LIST_HEAD(&fc->bg_queue);
INIT_LIST_HEAD(&fc->entry);
atomic_set(&fc->num_waiting, 0);
- fc->bdi.ra_pages = (VM_MAX_READAHEAD * 1024) / PAGE_CACHE_SIZE;
- fc->bdi.unplug_io_fn = default_unplug_io_fn;
- /* fuse does it's own writeback accounting */
- fc->bdi.capabilities = BDI_CAP_NO_ACCT_WB;
fc->khctr = 0;
fc->polled_files = RB_ROOT;
- fc->dev = sb->s_dev;
- err = bdi_init(&fc->bdi);
- if (err)
- goto error_mutex_destroy;
- if (sb->s_bdev) {
- err = bdi_register(&fc->bdi, NULL, "%u:%u-fuseblk",
- MAJOR(fc->dev), MINOR(fc->dev));
- } else {
- err = bdi_register_dev(&fc->bdi, fc->dev);
- }
- if (err)
- goto error_bdi_destroy;
- /*
- * For a single fuse filesystem use max 1% of dirty +
- * writeback threshold.
- *
- * This gives about 1M of write buffer for memory maps on a
- * machine with 1G and 10% dirty_ratio, which should be more
- * than enough.
- *
- * Privileged users can raise it by writing to
- *
- * /sys/class/bdi/<bdi>/max_ratio
- */
- bdi_set_max_ratio(&fc->bdi, 1);
fc->reqctr = 0;
fc->blocked = 1;
fc->attr_version = 1;
get_random_bytes(&fc->scramble_key, sizeof(fc->scramble_key));
-
- return 0;
-
- error_bdi_destroy:
- bdi_destroy(&fc->bdi);
- error_mutex_destroy:
- mutex_destroy(&fc->inst_mutex);
- return err;
}
EXPORT_SYMBOL_GPL(fuse_conn_init);
@@ -539,12 +512,14 @@ void fuse_conn_put(struct fuse_conn *fc)
fc->release(fc);
}
}
+EXPORT_SYMBOL_GPL(fuse_conn_put);
struct fuse_conn *fuse_conn_get(struct fuse_conn *fc)
{
atomic_inc(&fc->count);
return fc;
}
+EXPORT_SYMBOL_GPL(fuse_conn_get);
static struct inode *fuse_get_root_inode(struct super_block *sb, unsigned mode)
{
@@ -797,6 +772,48 @@ static void fuse_free_conn(struct fuse_conn *fc)
kfree(fc);
}
+static int fuse_bdi_init(struct fuse_conn *fc, struct super_block *sb)
+{
+ int err;
+
+ fc->bdi.ra_pages = (VM_MAX_READAHEAD * 1024) / PAGE_CACHE_SIZE;
+ fc->bdi.unplug_io_fn = default_unplug_io_fn;
+ /* fuse does it's own writeback accounting */
+ fc->bdi.capabilities = BDI_CAP_NO_ACCT_WB;
+
+ err = bdi_init(&fc->bdi);
+ if (err)
+ return err;
+
+ fc->bdi_initialized = 1;
+
+ if (sb->s_bdev) {
+ err = bdi_register(&fc->bdi, NULL, "%u:%u-fuseblk",
+ MAJOR(fc->dev), MINOR(fc->dev));
+ } else {
+ err = bdi_register_dev(&fc->bdi, fc->dev);
+ }
+
+ if (err)
+ return err;
+
+ /*
+ * For a single fuse filesystem use max 1% of dirty +
+ * writeback threshold.
+ *
+ * This gives about 1M of write buffer for memory maps on a
+ * machine with 1G and 10% dirty_ratio, which should be more
+ * than enough.
+ *
+ * Privileged users can raise it by writing to
+ *
+ * /sys/class/bdi/<bdi>/max_ratio
+ */
+ bdi_set_max_ratio(&fc->bdi, 1);
+
+ return 0;
+}
+
static int fuse_fill_super(struct super_block *sb, void *data, int silent)
{
struct fuse_conn *fc;
@@ -843,11 +860,12 @@ static int fuse_fill_super(struct super_block *sb, void *data, int silent)
if (!fc)
goto err_fput;
- err = fuse_conn_init(fc, sb);
- if (err) {
- kfree(fc);
- goto err_fput;
- }
+ fuse_conn_init(fc);
+
+ fc->dev = sb->s_dev;
+ err = fuse_bdi_init(fc, sb);
+ if (err)
+ goto err_put_conn;
fc->release = fuse_free_conn;
fc->flags = d.flags;
@@ -911,7 +929,7 @@ static int fuse_fill_super(struct super_block *sb, void *data, int silent)
err_put_root:
dput(root_dentry);
err_put_conn:
- bdi_destroy(&fc->bdi);
+ fuse_bdi_destroy(fc);
fuse_conn_put(fc);
err_fput:
fput(file);
diff --git a/fs/gfs2/Makefile b/fs/gfs2/Makefile
index d53a9bea1c2..3da2f1f4f73 100644
--- a/fs/gfs2/Makefile
+++ b/fs/gfs2/Makefile
@@ -1,3 +1,4 @@
+EXTRA_CFLAGS := -I$(src)
obj-$(CONFIG_GFS2_FS) += gfs2.o
gfs2-y := acl.o bmap.o dir.o eaops.o eattr.o glock.o \
glops.o inode.o log.o lops.o main.o meta_io.o \
diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c
index 329763530dc..6d47379e794 100644
--- a/fs/gfs2/bmap.c
+++ b/fs/gfs2/bmap.c
@@ -25,6 +25,7 @@
#include "trans.h"
#include "dir.h"
#include "util.h"
+#include "trace_gfs2.h"
/* This doesn't need to be that large as max 64 bit pointers in a 4k
* block is 512, so __u16 is fine for that. It saves stack space to
@@ -589,6 +590,7 @@ int gfs2_block_map(struct inode *inode, sector_t lblock,
clear_buffer_mapped(bh_map);
clear_buffer_new(bh_map);
clear_buffer_boundary(bh_map);
+ trace_gfs2_bmap(ip, bh_map, lblock, create, 1);
if (gfs2_is_dir(ip)) {
bsize = sdp->sd_jbsize;
arr = sdp->sd_jheightsize;
@@ -623,6 +625,7 @@ int gfs2_block_map(struct inode *inode, sector_t lblock,
ret = 0;
out:
release_metapath(&mp);
+ trace_gfs2_bmap(ip, bh_map, lblock, create, ret);
bmap_unlock(ip, create);
return ret;
diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c
index 2bf62bcc518..297421c0427 100644
--- a/fs/gfs2/glock.c
+++ b/fs/gfs2/glock.c
@@ -39,6 +39,8 @@
#include "super.h"
#include "util.h"
#include "bmap.h"
+#define CREATE_TRACE_POINTS
+#include "trace_gfs2.h"
struct gfs2_gl_hash_bucket {
struct hlist_head hb_list;
@@ -155,7 +157,7 @@ static void glock_free(struct gfs2_glock *gl)
if (aspace)
gfs2_aspace_put(aspace);
-
+ trace_gfs2_glock_put(gl);
sdp->sd_lockstruct.ls_ops->lm_put_lock(gfs2_glock_cachep, gl);
}
@@ -317,14 +319,17 @@ restart:
return 2;
gh->gh_error = ret;
list_del_init(&gh->gh_list);
+ trace_gfs2_glock_queue(gh, 0);
gfs2_holder_wake(gh);
goto restart;
}
set_bit(HIF_HOLDER, &gh->gh_iflags);
+ trace_gfs2_promote(gh, 1);
gfs2_holder_wake(gh);
goto restart;
}
set_bit(HIF_HOLDER, &gh->gh_iflags);
+ trace_gfs2_promote(gh, 0);
gfs2_holder_wake(gh);
continue;
}
@@ -354,6 +359,7 @@ static inline void do_error(struct gfs2_glock *gl, const int ret)
else
continue;
list_del_init(&gh->gh_list);
+ trace_gfs2_glock_queue(gh, 0);
gfs2_holder_wake(gh);
}
}
@@ -422,6 +428,7 @@ static void finish_xmote(struct gfs2_glock *gl, unsigned int ret)
int rv;
spin_lock(&gl->gl_spin);
+ trace_gfs2_glock_state_change(gl, state);
state_change(gl, state);
gh = find_first_waiter(gl);
@@ -851,6 +858,7 @@ static void handle_callback(struct gfs2_glock *gl, unsigned int state,
gl->gl_demote_state != state) {
gl->gl_demote_state = LM_ST_UNLOCKED;
}
+ trace_gfs2_demote_rq(gl);
}
/**
@@ -936,6 +944,7 @@ fail:
goto do_cancel;
return;
}
+ trace_gfs2_glock_queue(gh, 1);
list_add_tail(&gh->gh_list, insert_pt);
do_cancel:
gh = list_entry(gl->gl_holders.next, struct gfs2_holder, gh_list);
@@ -1032,6 +1041,7 @@ void gfs2_glock_dq(struct gfs2_holder *gh)
!test_bit(GLF_DEMOTE, &gl->gl_flags))
fast_path = 1;
}
+ trace_gfs2_glock_queue(gh, 0);
spin_unlock(&gl->gl_spin);
if (likely(fast_path))
return;
diff --git a/fs/gfs2/log.c b/fs/gfs2/log.c
index f2e449c595b..13c6237c5f6 100644
--- a/fs/gfs2/log.c
+++ b/fs/gfs2/log.c
@@ -28,6 +28,7 @@
#include "meta_io.h"
#include "util.h"
#include "dir.h"
+#include "trace_gfs2.h"
#define PULL 1
@@ -313,6 +314,7 @@ int gfs2_log_reserve(struct gfs2_sbd *sdp, unsigned int blks)
gfs2_log_lock(sdp);
}
atomic_sub(blks, &sdp->sd_log_blks_free);
+ trace_gfs2_log_blocks(sdp, -blks);
gfs2_log_unlock(sdp);
mutex_unlock(&sdp->sd_log_reserve_mutex);
@@ -333,6 +335,7 @@ void gfs2_log_release(struct gfs2_sbd *sdp, unsigned int blks)
gfs2_log_lock(sdp);
atomic_add(blks, &sdp->sd_log_blks_free);
+ trace_gfs2_log_blocks(sdp, blks);
gfs2_assert_withdraw(sdp,
atomic_read(&sdp->sd_log_blks_free) <= sdp->sd_jdesc->jd_blocks);
gfs2_log_unlock(sdp);
@@ -558,6 +561,7 @@ static void log_pull_tail(struct gfs2_sbd *sdp, unsigned int new_tail)
gfs2_log_lock(sdp);
atomic_add(dist, &sdp->sd_log_blks_free);
+ trace_gfs2_log_blocks(sdp, dist);
gfs2_assert_withdraw(sdp, atomic_read(&sdp->sd_log_blks_free) <= sdp->sd_jdesc->jd_blocks);
gfs2_log_unlock(sdp);
@@ -715,6 +719,7 @@ void __gfs2_log_flush(struct gfs2_sbd *sdp, struct gfs2_glock *gl)
up_write(&sdp->sd_log_flush_lock);
return;
}
+ trace_gfs2_log_flush(sdp, 1);
ai = kzalloc(sizeof(struct gfs2_ail), GFP_NOFS | __GFP_NOFAIL);
INIT_LIST_HEAD(&ai->ai_ail1_list);
@@ -746,6 +751,7 @@ void __gfs2_log_flush(struct gfs2_sbd *sdp, struct gfs2_glock *gl)
else if (sdp->sd_log_tail != current_tail(sdp) && !sdp->sd_log_idle){
gfs2_log_lock(sdp);
atomic_dec(&sdp->sd_log_blks_free); /* Adjust for unreserved buffer */
+ trace_gfs2_log_blocks(sdp, -1);
gfs2_log_unlock(sdp);
log_write_header(sdp, 0, PULL);
}
@@ -763,7 +769,7 @@ void __gfs2_log_flush(struct gfs2_sbd *sdp, struct gfs2_glock *gl)
ai = NULL;
}
gfs2_log_unlock(sdp);
-
+ trace_gfs2_log_flush(sdp, 0);
up_write(&sdp->sd_log_flush_lock);
kfree(ai);
@@ -787,6 +793,7 @@ static void log_refund(struct gfs2_sbd *sdp, struct gfs2_trans *tr)
gfs2_assert_withdraw(sdp, sdp->sd_log_blks_reserved + tr->tr_reserved >= reserved);
unused = sdp->sd_log_blks_reserved - reserved + tr->tr_reserved;
atomic_add(unused, &sdp->sd_log_blks_free);
+ trace_gfs2_log_blocks(sdp, unused);
gfs2_assert_withdraw(sdp, atomic_read(&sdp->sd_log_blks_free) <=
sdp->sd_jdesc->jd_blocks);
sdp->sd_log_blks_reserved = reserved;
diff --git a/fs/gfs2/lops.c b/fs/gfs2/lops.c
index 00315f50fa4..9969ff062c5 100644
--- a/fs/gfs2/lops.c
+++ b/fs/gfs2/lops.c
@@ -27,6 +27,7 @@
#include "rgrp.h"
#include "trans.h"
#include "util.h"
+#include "trace_gfs2.h"
/**
* gfs2_pin - Pin a buffer in memory
@@ -53,6 +54,7 @@ static void gfs2_pin(struct gfs2_sbd *sdp, struct buffer_head *bh)
if (bd->bd_ail)
list_move(&bd->bd_ail_st_list, &bd->bd_ail->ai_ail2_list);
get_bh(bh);
+ trace_gfs2_pin(bd, 1);
}
/**
@@ -89,6 +91,7 @@ static void gfs2_unpin(struct gfs2_sbd *sdp, struct buffer_head *bh,
bd->bd_ail = ai;
list_add(&bd->bd_ail_st_list, &ai->ai_ail1_list);
clear_bit(GLF_LFLUSH, &bd->bd_gl->gl_flags);
+ trace_gfs2_pin(bd, 0);
gfs2_log_unlock(sdp);
unlock_buffer(bh);
}
diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c
index cc34f271b3e..7bc3c45cd67 100644
--- a/fs/gfs2/ops_fstype.c
+++ b/fs/gfs2/ops_fstype.c
@@ -33,6 +33,7 @@
#include "log.h"
#include "quota.h"
#include "dir.h"
+#include "trace_gfs2.h"
#define DO 0
#define UNDO 1
@@ -775,6 +776,7 @@ static int init_journal(struct gfs2_sbd *sdp, int undo)
/* Map the extents for this journal's blocks */
map_journal_extents(sdp);
}
+ trace_gfs2_log_blocks(sdp, atomic_read(&sdp->sd_log_blks_free));
if (sdp->sd_lockstruct.ls_first) {
unsigned int x;
diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c
index de3239731db..daa4ae341a2 100644
--- a/fs/gfs2/rgrp.c
+++ b/fs/gfs2/rgrp.c
@@ -29,6 +29,7 @@
#include "util.h"
#include "log.h"
#include "inode.h"
+#include "trace_gfs2.h"
#define BFITNOENT ((u32)~0)
#define NO_BLOCK ((u64)~0)
@@ -1519,7 +1520,7 @@ int gfs2_alloc_block(struct gfs2_inode *ip, u64 *bn, unsigned int *n)
spin_lock(&sdp->sd_rindex_spin);
rgd->rd_free_clone -= *n;
spin_unlock(&sdp->sd_rindex_spin);
-
+ trace_gfs2_block_alloc(ip, block, *n, GFS2_BLKST_USED);
*bn = block;
return 0;
@@ -1571,7 +1572,7 @@ u64 gfs2_alloc_di(struct gfs2_inode *dip, u64 *generation)
spin_lock(&sdp->sd_rindex_spin);
rgd->rd_free_clone--;
spin_unlock(&sdp->sd_rindex_spin);
-
+ trace_gfs2_block_alloc(dip, block, 1, GFS2_BLKST_DINODE);
return block;
}
@@ -1591,7 +1592,7 @@ void gfs2_free_data(struct gfs2_inode *ip, u64 bstart, u32 blen)
rgd = rgblk_free(sdp, bstart, blen, GFS2_BLKST_FREE);
if (!rgd)
return;
-
+ trace_gfs2_block_alloc(ip, bstart, blen, GFS2_BLKST_FREE);
rgd->rd_free += blen;
gfs2_trans_add_bh(rgd->rd_gl, rgd->rd_bits[0].bi_bh, 1);
@@ -1619,7 +1620,7 @@ void gfs2_free_meta(struct gfs2_inode *ip, u64 bstart, u32 blen)
rgd = rgblk_free(sdp, bstart, blen, GFS2_BLKST_FREE);
if (!rgd)
return;
-
+ trace_gfs2_block_alloc(ip, bstart, blen, GFS2_BLKST_FREE);
rgd->rd_free += blen;
gfs2_trans_add_bh(rgd->rd_gl, rgd->rd_bits[0].bi_bh, 1);
@@ -1642,6 +1643,7 @@ void gfs2_unlink_di(struct inode *inode)
rgd = rgblk_free(sdp, blkno, 1, GFS2_BLKST_UNLINKED);
if (!rgd)
return;
+ trace_gfs2_block_alloc(ip, blkno, 1, GFS2_BLKST_UNLINKED);
gfs2_trans_add_bh(rgd->rd_gl, rgd->rd_bits[0].bi_bh, 1);
gfs2_rgrp_out(rgd, rgd->rd_bits[0].bi_bh->b_data);
gfs2_trans_add_rg(rgd);
@@ -1673,6 +1675,7 @@ static void gfs2_free_uninit_di(struct gfs2_rgrpd *rgd, u64 blkno)
void gfs2_free_di(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip)
{
gfs2_free_uninit_di(rgd, ip->i_no_addr);
+ trace_gfs2_block_alloc(ip, ip->i_no_addr, 1, GFS2_BLKST_FREE);
gfs2_quota_change(ip, -1, ip->i_inode.i_uid, ip->i_inode.i_gid);
gfs2_meta_wipe(ip, ip->i_no_addr, 1);
}
diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c
index c8930b31cdf..0a680133647 100644
--- a/fs/gfs2/super.c
+++ b/fs/gfs2/super.c
@@ -719,8 +719,6 @@ static void gfs2_put_super(struct super_block *sb)
int error;
struct gfs2_jdesc *jd;
- lock_kernel();
-
/* Unfreeze the filesystem, if we need to */
mutex_lock(&sdp->sd_freeze_lock);
@@ -787,8 +785,6 @@ restart:
/* At this point, we're through participating in the lockspace */
gfs2_sys_fs_del(sdp);
-
- unlock_kernel();
}
/**
diff --git a/fs/gfs2/trace_gfs2.h b/fs/gfs2/trace_gfs2.h
new file mode 100644
index 00000000000..98d6ef1c1dc
--- /dev/null
+++ b/fs/gfs2/trace_gfs2.h
@@ -0,0 +1,407 @@
+#if !defined(_TRACE_GFS2_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_GFS2_H
+
+#include <linux/tracepoint.h>
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM gfs2
+#define TRACE_INCLUDE_FILE trace_gfs2
+
+#include <linux/fs.h>
+#include <linux/buffer_head.h>
+#include <linux/dlmconstants.h>
+#include <linux/gfs2_ondisk.h>
+#include "incore.h"
+#include "glock.h"
+
+#define dlm_state_name(nn) { DLM_LOCK_##nn, #nn }
+#define glock_trace_name(x) __print_symbolic(x, \
+ dlm_state_name(IV), \
+ dlm_state_name(NL), \
+ dlm_state_name(CR), \
+ dlm_state_name(CW), \
+ dlm_state_name(PR), \
+ dlm_state_name(PW), \
+ dlm_state_name(EX))
+
+#define block_state_name(x) __print_symbolic(x, \
+ { GFS2_BLKST_FREE, "free" }, \
+ { GFS2_BLKST_USED, "used" }, \
+ { GFS2_BLKST_DINODE, "dinode" }, \
+ { GFS2_BLKST_UNLINKED, "unlinked" })
+
+#define show_glock_flags(flags) __print_flags(flags, "", \
+ {(1UL << GLF_LOCK), "l" }, \
+ {(1UL << GLF_DEMOTE), "D" }, \
+ {(1UL << GLF_PENDING_DEMOTE), "d" }, \
+ {(1UL << GLF_DEMOTE_IN_PROGRESS), "p" }, \
+ {(1UL << GLF_DIRTY), "y" }, \
+ {(1UL << GLF_LFLUSH), "f" }, \
+ {(1UL << GLF_INVALIDATE_IN_PROGRESS), "i" }, \
+ {(1UL << GLF_REPLY_PENDING), "r" }, \
+ {(1UL << GLF_INITIAL), "I" }, \
+ {(1UL << GLF_FROZEN), "F" })
+
+#ifndef NUMPTY
+#define NUMPTY
+static inline u8 glock_trace_state(unsigned int state)
+{
+ switch(state) {
+ case LM_ST_SHARED:
+ return DLM_LOCK_PR;
+ case LM_ST_DEFERRED:
+ return DLM_LOCK_CW;
+ case LM_ST_EXCLUSIVE:
+ return DLM_LOCK_EX;
+ }
+ return DLM_LOCK_NL;
+}
+#endif
+
+/* Section 1 - Locking
+ *
+ * Objectives:
+ * Latency: Remote demote request to state change
+ * Latency: Local lock request to state change
+ * Latency: State change to lock grant
+ * Correctness: Ordering of local lock state vs. I/O requests
+ * Correctness: Responses to remote demote requests
+ */
+
+/* General glock state change (DLM lock request completes) */
+TRACE_EVENT(gfs2_glock_state_change,
+
+ TP_PROTO(const struct gfs2_glock *gl, unsigned int new_state),
+
+ TP_ARGS(gl, new_state),
+
+ TP_STRUCT__entry(
+ __field( dev_t, dev )
+ __field( u64, glnum )
+ __field( u32, gltype )
+ __field( u8, cur_state )
+ __field( u8, new_state )
+ __field( u8, dmt_state )
+ __field( u8, tgt_state )
+ __field( unsigned long, flags )
+ ),
+
+ TP_fast_assign(
+ __entry->dev = gl->gl_sbd->sd_vfs->s_dev;
+ __entry->glnum = gl->gl_name.ln_number;
+ __entry->gltype = gl->gl_name.ln_type;
+ __entry->cur_state = glock_trace_state(gl->gl_state);
+ __entry->new_state = glock_trace_state(new_state);
+ __entry->tgt_state = glock_trace_state(gl->gl_target);
+ __entry->dmt_state = glock_trace_state(gl->gl_demote_state);
+ __entry->flags = gl->gl_flags;
+ ),
+
+ TP_printk("%u,%u glock %d:%lld state %s to %s tgt:%s dmt:%s flags:%s",
+ MAJOR(__entry->dev), MINOR(__entry->dev), __entry->gltype,
+ (unsigned long long)__entry->glnum,
+ glock_trace_name(__entry->cur_state),
+ glock_trace_name(__entry->new_state),
+ glock_trace_name(__entry->tgt_state),
+ glock_trace_name(__entry->dmt_state),
+ show_glock_flags(__entry->flags))
+);
+
+/* State change -> unlocked, glock is being deallocated */
+TRACE_EVENT(gfs2_glock_put,
+
+ TP_PROTO(const struct gfs2_glock *gl),
+
+ TP_ARGS(gl),
+
+ TP_STRUCT__entry(
+ __field( dev_t, dev )
+ __field( u64, glnum )
+ __field( u32, gltype )
+ __field( u8, cur_state )
+ __field( unsigned long, flags )
+ ),
+
+ TP_fast_assign(
+ __entry->dev = gl->gl_sbd->sd_vfs->s_dev;
+ __entry->gltype = gl->gl_name.ln_type;
+ __entry->glnum = gl->gl_name.ln_number;
+ __entry->cur_state = glock_trace_state(gl->gl_state);
+ __entry->flags = gl->gl_flags;
+ ),
+
+ TP_printk("%u,%u glock %d:%lld state %s => %s flags:%s",
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ __entry->gltype, (unsigned long long)__entry->glnum,
+ glock_trace_name(__entry->cur_state),
+ glock_trace_name(DLM_LOCK_IV),
+ show_glock_flags(__entry->flags))
+
+);
+
+/* Callback (local or remote) requesting lock demotion */
+TRACE_EVENT(gfs2_demote_rq,
+
+ TP_PROTO(const struct gfs2_glock *gl),
+
+ TP_ARGS(gl),
+
+ TP_STRUCT__entry(
+ __field( dev_t, dev )
+ __field( u64, glnum )
+ __field( u32, gltype )
+ __field( u8, cur_state )
+ __field( u8, dmt_state )
+ __field( unsigned long, flags )
+ ),
+
+ TP_fast_assign(
+ __entry->dev = gl->gl_sbd->sd_vfs->s_dev;
+ __entry->gltype = gl->gl_name.ln_type;
+ __entry->glnum = gl->gl_name.ln_number;
+ __entry->cur_state = glock_trace_state(gl->gl_state);
+ __entry->dmt_state = glock_trace_state(gl->gl_demote_state);
+ __entry->flags = gl->gl_flags;
+ ),
+
+ TP_printk("%u,%u glock %d:%lld demote %s to %s flags:%s",
+ MAJOR(__entry->dev), MINOR(__entry->dev), __entry->gltype,
+ (unsigned long long)__entry->glnum,
+ glock_trace_name(__entry->cur_state),
+ glock_trace_name(__entry->dmt_state),
+ show_glock_flags(__entry->flags))
+
+);
+
+/* Promotion/grant of a glock */
+TRACE_EVENT(gfs2_promote,
+
+ TP_PROTO(const struct gfs2_holder *gh, int first),
+
+ TP_ARGS(gh, first),
+
+ TP_STRUCT__entry(
+ __field( dev_t, dev )
+ __field( u64, glnum )
+ __field( u32, gltype )
+ __field( int, first )
+ __field( u8, state )
+ ),
+
+ TP_fast_assign(
+ __entry->dev = gh->gh_gl->gl_sbd->sd_vfs->s_dev;
+ __entry->glnum = gh->gh_gl->gl_name.ln_number;
+ __entry->gltype = gh->gh_gl->gl_name.ln_type;
+ __entry->first = first;
+ __entry->state = glock_trace_state(gh->gh_state);
+ ),
+
+ TP_printk("%u,%u glock %u:%llu promote %s %s",
+ MAJOR(__entry->dev), MINOR(__entry->dev), __entry->gltype,
+ (unsigned long long)__entry->glnum,
+ __entry->first ? "first": "other",
+ glock_trace_name(__entry->state))
+);
+
+/* Queue/dequeue a lock request */
+TRACE_EVENT(gfs2_glock_queue,
+
+ TP_PROTO(const struct gfs2_holder *gh, int queue),
+
+ TP_ARGS(gh, queue),
+
+ TP_STRUCT__entry(
+ __field( dev_t, dev )
+ __field( u64, glnum )
+ __field( u32, gltype )
+ __field( int, queue )
+ __field( u8, state )
+ ),
+
+ TP_fast_assign(
+ __entry->dev = gh->gh_gl->gl_sbd->sd_vfs->s_dev;
+ __entry->glnum = gh->gh_gl->gl_name.ln_number;
+ __entry->gltype = gh->gh_gl->gl_name.ln_type;
+ __entry->queue = queue;
+ __entry->state = glock_trace_state(gh->gh_state);
+ ),
+
+ TP_printk("%u,%u glock %u:%llu %squeue %s",
+ MAJOR(__entry->dev), MINOR(__entry->dev), __entry->gltype,
+ (unsigned long long)__entry->glnum,
+ __entry->queue ? "" : "de",
+ glock_trace_name(__entry->state))
+);
+
+/* Section 2 - Log/journal
+ *
+ * Objectives:
+ * Latency: Log flush time
+ * Correctness: pin/unpin vs. disk I/O ordering
+ * Performance: Log usage stats
+ */
+
+/* Pin/unpin a block in the log */
+TRACE_EVENT(gfs2_pin,
+
+ TP_PROTO(const struct gfs2_bufdata *bd, int pin),
+
+ TP_ARGS(bd, pin),
+
+ TP_STRUCT__entry(
+ __field( dev_t, dev )
+ __field( int, pin )
+ __field( u32, len )
+ __field( sector_t, block )
+ __field( u64, ino )
+ ),
+
+ TP_fast_assign(
+ __entry->dev = bd->bd_gl->gl_sbd->sd_vfs->s_dev;
+ __entry->pin = pin;
+ __entry->len = bd->bd_bh->b_size;
+ __entry->block = bd->bd_bh->b_blocknr;
+ __entry->ino = bd->bd_gl->gl_name.ln_number;
+ ),
+
+ TP_printk("%u,%u log %s %llu/%lu inode %llu",
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ __entry->pin ? "pin" : "unpin",
+ (unsigned long long)__entry->block,
+ (unsigned long)__entry->len,
+ (unsigned long long)__entry->ino)
+);
+
+/* Flushing the log */
+TRACE_EVENT(gfs2_log_flush,
+
+ TP_PROTO(const struct gfs2_sbd *sdp, int start),
+
+ TP_ARGS(sdp, start),
+
+ TP_STRUCT__entry(
+ __field( dev_t, dev )
+ __field( int, start )
+ __field( u64, log_seq )
+ ),
+
+ TP_fast_assign(
+ __entry->dev = sdp->sd_vfs->s_dev;
+ __entry->start = start;
+ __entry->log_seq = sdp->sd_log_sequence;
+ ),
+
+ TP_printk("%u,%u log flush %s %llu",
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ __entry->start ? "start" : "end",
+ (unsigned long long)__entry->log_seq)
+);
+
+/* Reserving/releasing blocks in the log */
+TRACE_EVENT(gfs2_log_blocks,
+
+ TP_PROTO(const struct gfs2_sbd *sdp, int blocks),
+
+ TP_ARGS(sdp, blocks),
+
+ TP_STRUCT__entry(
+ __field( dev_t, dev )
+ __field( int, blocks )
+ ),
+
+ TP_fast_assign(
+ __entry->dev = sdp->sd_vfs->s_dev;
+ __entry->blocks = blocks;
+ ),
+
+ TP_printk("%u,%u log reserve %d", MAJOR(__entry->dev),
+ MINOR(__entry->dev), __entry->blocks)
+);
+
+/* Section 3 - bmap
+ *
+ * Objectives:
+ * Latency: Bmap request time
+ * Performance: Block allocator tracing
+ * Correctness: Test of disard generation vs. blocks allocated
+ */
+
+/* Map an extent of blocks, possibly a new allocation */
+TRACE_EVENT(gfs2_bmap,
+
+ TP_PROTO(const struct gfs2_inode *ip, const struct buffer_head *bh,
+ sector_t lblock, int create, int errno),
+
+ TP_ARGS(ip, bh, lblock, create, errno),
+
+ TP_STRUCT__entry(
+ __field( dev_t, dev )
+ __field( sector_t, lblock )
+ __field( sector_t, pblock )
+ __field( u64, inum )
+ __field( unsigned long, state )
+ __field( u32, len )
+ __field( int, create )
+ __field( int, errno )
+ ),
+
+ TP_fast_assign(
+ __entry->dev = ip->i_gl->gl_sbd->sd_vfs->s_dev;
+ __entry->lblock = lblock;
+ __entry->pblock = buffer_mapped(bh) ? bh->b_blocknr : 0;
+ __entry->inum = ip->i_no_addr;
+ __entry->state = bh->b_state;
+ __entry->len = bh->b_size;
+ __entry->create = create;
+ __entry->errno = errno;
+ ),
+
+ TP_printk("%u,%u bmap %llu map %llu/%lu to %llu flags:%08lx %s %d",
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ (unsigned long long)__entry->inum,
+ (unsigned long long)__entry->lblock,
+ (unsigned long)__entry->len,
+ (unsigned long long)__entry->pblock,
+ __entry->state, __entry->create ? "create " : "nocreate",
+ __entry->errno)
+);
+
+/* Keep track of blocks as they are allocated/freed */
+TRACE_EVENT(gfs2_block_alloc,
+
+ TP_PROTO(const struct gfs2_inode *ip, u64 block, unsigned len,
+ u8 block_state),
+
+ TP_ARGS(ip, block, len, block_state),
+
+ TP_STRUCT__entry(
+ __field( dev_t, dev )
+ __field( u64, start )
+ __field( u64, inum )
+ __field( u32, len )
+ __field( u8, block_state )
+ ),
+
+ TP_fast_assign(
+ __entry->dev = ip->i_gl->gl_sbd->sd_vfs->s_dev;
+ __entry->start = block;
+ __entry->inum = ip->i_no_addr;
+ __entry->len = len;
+ __entry->block_state = block_state;
+ ),
+
+ TP_printk("%u,%u bmap %llu alloc %llu/%lu %s",
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ (unsigned long long)__entry->inum,
+ (unsigned long long)__entry->start,
+ (unsigned long)__entry->len,
+ block_state_name(__entry->block_state))
+);
+
+#endif /* _TRACE_GFS2_H */
+
+/* This part must be outside protection */
+#undef TRACE_INCLUDE_PATH
+#define TRACE_INCLUDE_PATH .
+#include <trace/define_trace.h>
+
diff --git a/fs/inode.c b/fs/inode.c
index a88baebf77c..f643be565df 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -1408,7 +1408,7 @@ EXPORT_SYMBOL(touch_atime);
* for writeback. Note that this function is meant exclusively for
* usage in the file write path of filesystems, and filesystems may
* choose to explicitly ignore update via this function with the
- * S_NOCTIME inode flag, e.g. for network filesystem where these
+ * S_NOCMTIME inode flag, e.g. for network filesystem where these
* timestamps are handled by the server.
*/
diff --git a/fs/partitions/check.c b/fs/partitions/check.c
index 0af36085eb2..1a9c7878f86 100644
--- a/fs/partitions/check.c
+++ b/fs/partitions/check.c
@@ -556,27 +556,49 @@ int rescan_partitions(struct gendisk *disk, struct block_device *bdev)
/* add partitions */
for (p = 1; p < state->limit; p++) {
- sector_t size = state->parts[p].size;
- sector_t from = state->parts[p].from;
+ sector_t size, from;
+try_scan:
+ size = state->parts[p].size;
if (!size)
continue;
+
+ from = state->parts[p].from;
if (from >= get_capacity(disk)) {
printk(KERN_WARNING
"%s: p%d ignored, start %llu is behind the end of the disk\n",
disk->disk_name, p, (unsigned long long) from);
continue;
}
+
if (from + size > get_capacity(disk)) {
- /*
- * we can not ignore partitions of broken tables
- * created by for example camera firmware, but we
- * limit them to the end of the disk to avoid
- * creating invalid block devices
- */
+ struct block_device_operations *bdops = disk->fops;
+ unsigned long long capacity;
+
printk(KERN_WARNING
- "%s: p%d size %llu limited to end of disk\n",
+ "%s: p%d size %llu exceeds device capacity, ",
disk->disk_name, p, (unsigned long long) size);
- size = get_capacity(disk) - from;
+
+ if (bdops->set_capacity &&
+ (disk->flags & GENHD_FL_NATIVE_CAPACITY) == 0) {
+ printk(KERN_CONT "enabling native capacity\n");
+ capacity = bdops->set_capacity(disk, ~0ULL);
+ disk->flags |= GENHD_FL_NATIVE_CAPACITY;
+ if (capacity > get_capacity(disk)) {
+ set_capacity(disk, capacity);
+ check_disk_size_change(disk, bdev);
+ bdev->bd_invalidated = 0;
+ }
+ goto try_scan;
+ } else {
+ /*
+ * we can not ignore partitions of broken tables
+ * created by for example camera firmware, but
+ * we limit them to the end of the disk to avoid
+ * creating invalid block devices
+ */
+ printk(KERN_CONT "limited to end of disk\n");
+ size = get_capacity(disk) - from;
+ }
}
part = add_partition(disk, p, from, size,
state->parts[p].flags);
diff --git a/fs/xfs/Kconfig b/fs/xfs/Kconfig
index 29228f5899c..480f28127f0 100644
--- a/fs/xfs/Kconfig
+++ b/fs/xfs/Kconfig
@@ -39,6 +39,7 @@ config XFS_QUOTA
config XFS_POSIX_ACL
bool "XFS POSIX ACL support"
depends on XFS_FS
+ select FS_POSIX_ACL
help
POSIX Access Control Lists (ACLs) support permissions for users and
groups beyond the owner/group/world scheme.
diff --git a/fs/xfs/Makefile b/fs/xfs/Makefile
index 60f107e47fe..7a59daed178 100644
--- a/fs/xfs/Makefile
+++ b/fs/xfs/Makefile
@@ -40,7 +40,7 @@ xfs-$(CONFIG_PROC_FS) += quota/xfs_qm_stats.o
endif
xfs-$(CONFIG_XFS_RT) += xfs_rtalloc.o
-xfs-$(CONFIG_XFS_POSIX_ACL) += xfs_acl.o
+xfs-$(CONFIG_XFS_POSIX_ACL) += $(XFS_LINUX)/xfs_acl.o
xfs-$(CONFIG_PROC_FS) += $(XFS_LINUX)/xfs_stats.o
xfs-$(CONFIG_SYSCTL) += $(XFS_LINUX)/xfs_sysctl.o
xfs-$(CONFIG_COMPAT) += $(XFS_LINUX)/xfs_ioctl32.o
@@ -88,8 +88,7 @@ xfs-y += xfs_alloc.o \
xfs_utils.o \
xfs_vnodeops.o \
xfs_rw.o \
- xfs_dmops.o \
- xfs_qmops.o
+ xfs_dmops.o
xfs-$(CONFIG_XFS_TRACE) += xfs_btree_trace.o \
xfs_dir2_trace.o
diff --git a/fs/xfs/linux-2.6/xfs_acl.c b/fs/xfs/linux-2.6/xfs_acl.c
new file mode 100644
index 00000000000..1e9d1246eeb
--- /dev/null
+++ b/fs/xfs/linux-2.6/xfs_acl.c
@@ -0,0 +1,523 @@
+/*
+ * Copyright (c) 2008, Christoph Hellwig
+ * All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+#include "xfs.h"
+#include "xfs_acl.h"
+#include "xfs_attr.h"
+#include "xfs_bmap_btree.h"
+#include "xfs_inode.h"
+#include "xfs_vnodeops.h"
+#include <linux/xattr.h>
+#include <linux/posix_acl_xattr.h>
+
+
+#define XFS_ACL_NOT_CACHED ((void *)-1)
+
+/*
+ * Locking scheme:
+ * - all ACL updates are protected by inode->i_mutex, which is taken before
+ * calling into this file.
+ * - access and updates to the ip->i_acl and ip->i_default_acl pointers are
+ * protected by inode->i_lock.
+ */
+
+STATIC struct posix_acl *
+xfs_acl_from_disk(struct xfs_acl *aclp)
+{
+ struct posix_acl_entry *acl_e;
+ struct posix_acl *acl;
+ struct xfs_acl_entry *ace;
+ int count, i;
+
+ count = be32_to_cpu(aclp->acl_cnt);
+
+ acl = posix_acl_alloc(count, GFP_KERNEL);
+ if (!acl)
+ return ERR_PTR(-ENOMEM);
+
+ for (i = 0; i < count; i++) {
+ acl_e = &acl->a_entries[i];
+ ace = &aclp->acl_entry[i];
+
+ /*
+ * The tag is 32 bits on disk and 16 bits in core.
+ *
+ * Because every access to it goes through the core
+ * format first this is not a problem.
+ */
+ acl_e->e_tag = be32_to_cpu(ace->ae_tag);
+ acl_e->e_perm = be16_to_cpu(ace->ae_perm);
+
+ switch (acl_e->e_tag) {
+ case ACL_USER:
+ case ACL_GROUP:
+ acl_e->e_id = be32_to_cpu(ace->ae_id);
+ break;
+ case ACL_USER_OBJ:
+ case ACL_GROUP_OBJ:
+ case ACL_MASK:
+ case ACL_OTHER:
+ acl_e->e_id = ACL_UNDEFINED_ID;
+ break;
+ default:
+ goto fail;
+ }
+ }
+ return acl;
+
+fail:
+ posix_acl_release(acl);
+ return ERR_PTR(-EINVAL);
+}
+
+STATIC void
+xfs_acl_to_disk(struct xfs_acl *aclp, const struct posix_acl *acl)
+{
+ const struct posix_acl_entry *acl_e;
+ struct xfs_acl_entry *ace;
+ int i;
+
+ aclp->acl_cnt = cpu_to_be32(acl->a_count);
+ for (i = 0; i < acl->a_count; i++) {
+ ace = &aclp->acl_entry[i];
+ acl_e = &acl->a_entries[i];
+
+ ace->ae_tag = cpu_to_be32(acl_e->e_tag);
+ ace->ae_id = cpu_to_be32(acl_e->e_id);
+ ace->ae_perm = cpu_to_be16(acl_e->e_perm);
+ }
+}
+
+/*
+ * Update the cached ACL pointer in the inode.
+ *
+ * Because we don't hold any locks while reading/writing the attribute
+ * from/to disk another thread could have raced and updated the cached
+ * ACL value before us. In that case we release the previous cached value
+ * and update it with our new value.
+ */
+STATIC void
+xfs_update_cached_acl(struct inode *inode, struct posix_acl **p_acl,
+ struct posix_acl *acl)
+{
+ spin_lock(&inode->i_lock);
+ if (*p_acl && *p_acl != XFS_ACL_NOT_CACHED)
+ posix_acl_release(*p_acl);
+ *p_acl = posix_acl_dup(acl);
+ spin_unlock(&inode->i_lock);
+}
+
+struct posix_acl *
+xfs_get_acl(struct inode *inode, int type)
+{
+ struct xfs_inode *ip = XFS_I(inode);
+ struct posix_acl *acl = NULL, **p_acl;
+ struct xfs_acl *xfs_acl;
+ int len = sizeof(struct xfs_acl);
+ char *ea_name;
+ int error;
+
+ switch (type) {
+ case ACL_TYPE_ACCESS:
+ ea_name = SGI_ACL_FILE;
+ p_acl = &ip->i_acl;
+ break;
+ case ACL_TYPE_DEFAULT:
+ ea_name = SGI_ACL_DEFAULT;
+ p_acl = &ip->i_default_acl;
+ break;
+ default:
+ return ERR_PTR(-EINVAL);
+ }
+
+ spin_lock(&inode->i_lock);
+ if (*p_acl != XFS_ACL_NOT_CACHED)
+ acl = posix_acl_dup(*p_acl);
+ spin_unlock(&inode->i_lock);
+
+ /*
+ * If we have a cached ACLs value just return it, not need to
+ * go out to the disk.
+ */
+ if (acl)
+ return acl;
+
+ xfs_acl = kzalloc(sizeof(struct xfs_acl), GFP_KERNEL);
+ if (!xfs_acl)
+ return ERR_PTR(-ENOMEM);
+
+ error = -xfs_attr_get(ip, ea_name, (char *)xfs_acl, &len, ATTR_ROOT);
+ if (error) {
+ /*
+ * If the attribute doesn't exist make sure we have a negative
+ * cache entry, for any other error assume it is transient and
+ * leave the cache entry as XFS_ACL_NOT_CACHED.
+ */
+ if (error == -ENOATTR) {
+ acl = NULL;
+ goto out_update_cache;
+ }
+ goto out;
+ }
+
+ acl = xfs_acl_from_disk(xfs_acl);
+ if (IS_ERR(acl))
+ goto out;
+
+ out_update_cache:
+ xfs_update_cached_acl(inode, p_acl, acl);
+ out:
+ kfree(xfs_acl);
+ return acl;
+}
+
+STATIC int
+xfs_set_acl(struct inode *inode, int type, struct posix_acl *acl)
+{
+ struct xfs_inode *ip = XFS_I(inode);
+ struct posix_acl **p_acl;
+ char *ea_name;
+ int error;
+
+ if (S_ISLNK(inode->i_mode))
+ return -EOPNOTSUPP;
+
+ switch (type) {
+ case ACL_TYPE_ACCESS:
+ ea_name = SGI_ACL_FILE;
+ p_acl = &ip->i_acl;
+ break;
+ case ACL_TYPE_DEFAULT:
+ if (!S_ISDIR(inode->i_mode))
+ return acl ? -EACCES : 0;
+ ea_name = SGI_ACL_DEFAULT;
+ p_acl = &ip->i_default_acl;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ if (acl) {
+ struct xfs_acl *xfs_acl;
+ int len;
+
+ xfs_acl = kzalloc(sizeof(struct xfs_acl), GFP_KERNEL);
+ if (!xfs_acl)
+ return -ENOMEM;
+
+ xfs_acl_to_disk(xfs_acl, acl);
+ len = sizeof(struct xfs_acl) -
+ (sizeof(struct xfs_acl_entry) *
+ (XFS_ACL_MAX_ENTRIES - acl->a_count));
+
+ error = -xfs_attr_set(ip, ea_name, (char *)xfs_acl,
+ len, ATTR_ROOT);
+
+ kfree(xfs_acl);
+ } else {
+ /*
+ * A NULL ACL argument means we want to remove the ACL.
+ */
+ error = -xfs_attr_remove(ip, ea_name, ATTR_ROOT);
+
+ /*
+ * If the attribute didn't exist to start with that's fine.
+ */
+ if (error == -ENOATTR)
+ error = 0;
+ }
+
+ if (!error)
+ xfs_update_cached_acl(inode, p_acl, acl);
+ return error;
+}
+
+int
+xfs_check_acl(struct inode *inode, int mask)
+{
+ struct xfs_inode *ip = XFS_I(inode);
+ struct posix_acl *acl;
+ int error = -EAGAIN;
+
+ xfs_itrace_entry(ip);
+
+ /*
+ * If there is no attribute fork no ACL exists on this inode and
+ * we can skip the whole exercise.
+ */
+ if (!XFS_IFORK_Q(ip))
+ return -EAGAIN;
+
+ acl = xfs_get_acl(inode, ACL_TYPE_ACCESS);
+ if (IS_ERR(acl))
+ return PTR_ERR(acl);
+ if (acl) {
+ error = posix_acl_permission(inode, acl, mask);
+ posix_acl_release(acl);
+ }
+
+ return error;
+}
+
+static int
+xfs_set_mode(struct inode *inode, mode_t mode)
+{
+ int error = 0;
+
+ if (mode != inode->i_mode) {
+ struct iattr iattr;
+
+ iattr.ia_valid = ATTR_MODE;
+ iattr.ia_mode = mode;
+
+ error = -xfs_setattr(XFS_I(inode), &iattr, XFS_ATTR_NOACL);
+ }
+
+ return error;
+}
+
+static int
+xfs_acl_exists(struct inode *inode, char *name)
+{
+ int len = sizeof(struct xfs_acl);
+
+ return (xfs_attr_get(XFS_I(inode), name, NULL, &len,
+ ATTR_ROOT|ATTR_KERNOVAL) == 0);
+}
+
+int
+posix_acl_access_exists(struct inode *inode)
+{
+ return xfs_acl_exists(inode, SGI_ACL_FILE);
+}
+
+int
+posix_acl_default_exists(struct inode *inode)
+{
+ if (!S_ISDIR(inode->i_mode))
+ return 0;
+ return xfs_acl_exists(inode, SGI_ACL_DEFAULT);
+}
+
+/*
+ * No need for i_mutex because the inode is not yet exposed to the VFS.
+ */
+int
+xfs_inherit_acl(struct inode *inode, struct posix_acl *default_acl)
+{
+ struct posix_acl *clone;
+ mode_t mode;
+ int error = 0, inherit = 0;
+
+ if (S_ISDIR(inode->i_mode)) {
+ error = xfs_set_acl(inode, ACL_TYPE_DEFAULT, default_acl);
+ if (error)
+ return error;
+ }
+
+ clone = posix_acl_clone(default_acl, GFP_KERNEL);
+ if (!clone)
+ return -ENOMEM;
+
+ mode = inode->i_mode;
+ error = posix_acl_create_masq(clone, &mode);
+ if (error < 0)
+ goto out_release_clone;
+
+ /*
+ * If posix_acl_create_masq returns a positive value we need to
+ * inherit a permission that can't be represented using the Unix
+ * mode bits and we actually need to set an ACL.
+ */
+ if (error > 0)
+ inherit = 1;
+
+ error = xfs_set_mode(inode, mode);
+ if (error)
+ goto out_release_clone;
+
+ if (inherit)
+ error = xfs_set_acl(inode, ACL_TYPE_ACCESS, clone);
+
+ out_release_clone:
+ posix_acl_release(clone);
+ return error;
+}
+
+int
+xfs_acl_chmod(struct inode *inode)
+{
+ struct posix_acl *acl, *clone;
+ int error;
+
+ if (S_ISLNK(inode->i_mode))
+ return -EOPNOTSUPP;
+
+ acl = xfs_get_acl(inode, ACL_TYPE_ACCESS);
+ if (IS_ERR(acl) || !acl)
+ return PTR_ERR(acl);
+
+ clone = posix_acl_clone(acl, GFP_KERNEL);
+ posix_acl_release(acl);
+ if (!clone)
+ return -ENOMEM;
+
+ error = posix_acl_chmod_masq(clone, inode->i_mode);
+ if (!error)
+ error = xfs_set_acl(inode, ACL_TYPE_ACCESS, clone);
+
+ posix_acl_release(clone);
+ return error;
+}
+
+void
+xfs_inode_init_acls(struct xfs_inode *ip)
+{
+ /*
+ * No need for locking, inode is not live yet.
+ */
+ ip->i_acl = XFS_ACL_NOT_CACHED;
+ ip->i_default_acl = XFS_ACL_NOT_CACHED;
+}
+
+void
+xfs_inode_clear_acls(struct xfs_inode *ip)
+{
+ /*
+ * No need for locking here, the inode is not live anymore
+ * and just about to be freed.
+ */
+ if (ip->i_acl != XFS_ACL_NOT_CACHED)
+ posix_acl_release(ip->i_acl);
+ if (ip->i_default_acl != XFS_ACL_NOT_CACHED)
+ posix_acl_release(ip->i_default_acl);
+}
+
+
+/*
+ * System xattr handlers.
+ *
+ * Currently Posix ACLs are the only system namespace extended attribute
+ * handlers supported by XFS, so we just implement the handlers here.
+ * If we ever support other system extended attributes this will need
+ * some refactoring.
+ */
+
+static int
+xfs_decode_acl(const char *name)
+{
+ if (strcmp(name, "posix_acl_access") == 0)
+ return ACL_TYPE_ACCESS;
+ else if (strcmp(name, "posix_acl_default") == 0)
+ return ACL_TYPE_DEFAULT;
+ return -EINVAL;
+}
+
+static int
+xfs_xattr_system_get(struct inode *inode, const char *name,
+ void *value, size_t size)
+{
+ struct posix_acl *acl;
+ int type, error;
+
+ type = xfs_decode_acl(name);
+ if (type < 0)
+ return type;
+
+ acl = xfs_get_acl(inode, type);
+ if (IS_ERR(acl))
+ return PTR_ERR(acl);
+ if (acl == NULL)
+ return -ENODATA;
+
+ error = posix_acl_to_xattr(acl, value, size);
+ posix_acl_release(acl);
+
+ return error;
+}
+
+static int
+xfs_xattr_system_set(struct inode *inode, const char *name,
+ const void *value, size_t size, int flags)
+{
+ struct posix_acl *acl = NULL;
+ int error = 0, type;
+
+ type = xfs_decode_acl(name);
+ if (type < 0)
+ return type;
+ if (flags & XATTR_CREATE)
+ return -EINVAL;
+ if (type == ACL_TYPE_DEFAULT && !S_ISDIR(inode->i_mode))
+ return value ? -EACCES : 0;
+ if ((current_fsuid() != inode->i_uid) && !capable(CAP_FOWNER))
+ return -EPERM;
+
+ if (!value)
+ goto set_acl;
+
+ acl = posix_acl_from_xattr(value, size);
+ if (!acl) {
+ /*
+ * acl_set_file(3) may request that we set default ACLs with
+ * zero length -- defend (gracefully) against that here.
+ */
+ goto out;
+ }
+ if (IS_ERR(acl)) {
+ error = PTR_ERR(acl);
+ goto out;
+ }
+
+ error = posix_acl_valid(acl);
+ if (error)
+ goto out_release;
+
+ error = -EINVAL;
+ if (acl->a_count > XFS_ACL_MAX_ENTRIES)
+ goto out_release;
+
+ if (type == ACL_TYPE_ACCESS) {
+ mode_t mode = inode->i_mode;
+ error = posix_acl_equiv_mode(acl, &mode);
+
+ if (error <= 0) {
+ posix_acl_release(acl);
+ acl = NULL;
+
+ if (error < 0)
+ return error;
+ }
+
+ error = xfs_set_mode(inode, mode);
+ if (error)
+ goto out_release;
+ }
+
+ set_acl:
+ error = xfs_set_acl(inode, type, acl);
+ out_release:
+ posix_acl_release(acl);
+ out:
+ return error;
+}
+
+struct xattr_handler xfs_xattr_system_handler = {
+ .prefix = XATTR_SYSTEM_PREFIX,
+ .get = xfs_xattr_system_get,
+ .set = xfs_xattr_system_set,
+};
diff --git a/fs/xfs/linux-2.6/xfs_ioctl.c b/fs/xfs/linux-2.6/xfs_ioctl.c
index 34eaab608e6..5bb523d7f37 100644
--- a/fs/xfs/linux-2.6/xfs_ioctl.c
+++ b/fs/xfs/linux-2.6/xfs_ioctl.c
@@ -41,7 +41,6 @@
#include "xfs_itable.h"
#include "xfs_error.h"
#include "xfs_rw.h"
-#include "xfs_acl.h"
#include "xfs_attr.h"
#include "xfs_bmap.h"
#include "xfs_buf_item.h"
@@ -899,7 +898,8 @@ xfs_ioctl_setattr(
struct xfs_mount *mp = ip->i_mount;
struct xfs_trans *tp;
unsigned int lock_flags = 0;
- struct xfs_dquot *udqp = NULL, *gdqp = NULL;
+ struct xfs_dquot *udqp = NULL;
+ struct xfs_dquot *gdqp = NULL;
struct xfs_dquot *olddquot = NULL;
int code;
@@ -919,7 +919,7 @@ xfs_ioctl_setattr(
* because the i_*dquot fields will get updated anyway.
*/
if (XFS_IS_QUOTA_ON(mp) && (mask & FSX_PROJID)) {
- code = XFS_QM_DQVOPALLOC(mp, ip, ip->i_d.di_uid,
+ code = xfs_qm_vop_dqalloc(ip, ip->i_d.di_uid,
ip->i_d.di_gid, fa->fsx_projid,
XFS_QMOPT_PQUOTA, &udqp, &gdqp);
if (code)
@@ -954,10 +954,11 @@ xfs_ioctl_setattr(
* Do a quota reservation only if projid is actually going to change.
*/
if (mask & FSX_PROJID) {
- if (XFS_IS_PQUOTA_ON(mp) &&
+ if (XFS_IS_QUOTA_RUNNING(mp) &&
+ XFS_IS_PQUOTA_ON(mp) &&
ip->i_d.di_projid != fa->fsx_projid) {
ASSERT(tp);
- code = XFS_QM_DQVOPCHOWNRESV(mp, tp, ip, udqp, gdqp,
+ code = xfs_qm_vop_chown_reserve(tp, ip, udqp, gdqp,
capable(CAP_FOWNER) ?
XFS_QMOPT_FORCE_RES : 0);
if (code) /* out of quota */
@@ -1059,8 +1060,8 @@ xfs_ioctl_setattr(
* in the transaction.
*/
if (ip->i_d.di_projid != fa->fsx_projid) {
- if (XFS_IS_PQUOTA_ON(mp)) {
- olddquot = XFS_QM_DQVOPCHOWN(mp, tp, ip,
+ if (XFS_IS_QUOTA_RUNNING(mp) && XFS_IS_PQUOTA_ON(mp)) {
+ olddquot = xfs_qm_vop_chown(tp, ip,
&ip->i_gdquot, gdqp);
}
ip->i_d.di_projid = fa->fsx_projid;
@@ -1106,9 +1107,9 @@ xfs_ioctl_setattr(
/*
* Release any dquot(s) the inode had kept before chown.
*/
- XFS_QM_DQRELE(mp, olddquot);
- XFS_QM_DQRELE(mp, udqp);
- XFS_QM_DQRELE(mp, gdqp);
+ xfs_qm_dqrele(olddquot);
+ xfs_qm_dqrele(udqp);
+ xfs_qm_dqrele(gdqp);
if (code)
return code;
@@ -1122,8 +1123,8 @@ xfs_ioctl_setattr(
return 0;
error_return:
- XFS_QM_DQRELE(mp, udqp);
- XFS_QM_DQRELE(mp, gdqp);
+ xfs_qm_dqrele(udqp);
+ xfs_qm_dqrele(gdqp);
xfs_trans_cancel(tp, 0);
if (lock_flags)
xfs_iunlock(ip, lock_flags);
diff --git a/fs/xfs/linux-2.6/xfs_iops.c b/fs/xfs/linux-2.6/xfs_iops.c
index 6075382336d..58973bb4603 100644
--- a/fs/xfs/linux-2.6/xfs_iops.c
+++ b/fs/xfs/linux-2.6/xfs_iops.c
@@ -17,6 +17,7 @@
*/
#include "xfs.h"
#include "xfs_fs.h"
+#include "xfs_acl.h"
#include "xfs_bit.h"
#include "xfs_log.h"
#include "xfs_inum.h"
@@ -51,6 +52,7 @@
#include <linux/capability.h>
#include <linux/xattr.h>
#include <linux/namei.h>
+#include <linux/posix_acl.h>
#include <linux/security.h>
#include <linux/falloc.h>
#include <linux/fiemap.h>
@@ -202,9 +204,8 @@ xfs_vn_mknod(
{
struct inode *inode;
struct xfs_inode *ip = NULL;
- xfs_acl_t *default_acl = NULL;
+ struct posix_acl *default_acl = NULL;
struct xfs_name name;
- int (*test_default_acl)(struct inode *) = _ACL_DEFAULT_EXISTS;
int error;
/*
@@ -219,18 +220,14 @@ xfs_vn_mknod(
rdev = 0;
}
- if (test_default_acl && test_default_acl(dir)) {
- if (!_ACL_ALLOC(default_acl)) {
- return -ENOMEM;
- }
- if (!_ACL_GET_DEFAULT(dir, default_acl)) {
- _ACL_FREE(default_acl);
- default_acl = NULL;
- }
- }
+ if (IS_POSIXACL(dir)) {
+ default_acl = xfs_get_acl(dir, ACL_TYPE_DEFAULT);
+ if (IS_ERR(default_acl))
+ return -PTR_ERR(default_acl);
- if (IS_POSIXACL(dir) && !default_acl)
- mode &= ~current_umask();
+ if (!default_acl)
+ mode &= ~current_umask();
+ }
xfs_dentry_to_name(&name, dentry);
error = xfs_create(XFS_I(dir), &name, mode, rdev, &ip, NULL);
@@ -244,10 +241,10 @@ xfs_vn_mknod(
goto out_cleanup_inode;
if (default_acl) {
- error = _ACL_INHERIT(inode, mode, default_acl);
+ error = -xfs_inherit_acl(inode, default_acl);
if (unlikely(error))
goto out_cleanup_inode;
- _ACL_FREE(default_acl);
+ posix_acl_release(default_acl);
}
@@ -257,8 +254,7 @@ xfs_vn_mknod(
out_cleanup_inode:
xfs_cleanup_inode(dir, inode, dentry);
out_free_acl:
- if (default_acl)
- _ACL_FREE(default_acl);
+ posix_acl_release(default_acl);
return -error;
}
@@ -488,26 +484,6 @@ xfs_vn_put_link(
kfree(s);
}
-#ifdef CONFIG_XFS_POSIX_ACL
-STATIC int
-xfs_check_acl(
- struct inode *inode,
- int mask)
-{
- struct xfs_inode *ip = XFS_I(inode);
- int error;
-
- xfs_itrace_entry(ip);
-
- if (XFS_IFORK_Q(ip)) {
- error = xfs_acl_iaccess(ip, mask, NULL);
- if (error != -1)
- return -error;
- }
-
- return -EAGAIN;
-}
-
STATIC int
xfs_vn_permission(
struct inode *inode,
@@ -515,9 +491,6 @@ xfs_vn_permission(
{
return generic_permission(inode, mask, xfs_check_acl);
}
-#else
-#define xfs_vn_permission NULL
-#endif
STATIC int
xfs_vn_getattr(
diff --git a/fs/xfs/linux-2.6/xfs_lrw.c b/fs/xfs/linux-2.6/xfs_lrw.c
index 9142192ccbe..7078974a6ee 100644
--- a/fs/xfs/linux-2.6/xfs_lrw.c
+++ b/fs/xfs/linux-2.6/xfs_lrw.c
@@ -42,7 +42,6 @@
#include "xfs_error.h"
#include "xfs_itable.h"
#include "xfs_rw.h"
-#include "xfs_acl.h"
#include "xfs_attr.h"
#include "xfs_inode_item.h"
#include "xfs_buf_item.h"
diff --git a/fs/xfs/linux-2.6/xfs_quotaops.c b/fs/xfs/linux-2.6/xfs_quotaops.c
index 94d9a633d3d..cb6e2cca214 100644
--- a/fs/xfs/linux-2.6/xfs_quotaops.c
+++ b/fs/xfs/linux-2.6/xfs_quotaops.c
@@ -50,9 +50,11 @@ xfs_fs_quota_sync(
{
struct xfs_mount *mp = XFS_M(sb);
+ if (sb->s_flags & MS_RDONLY)
+ return -EROFS;
if (!XFS_IS_QUOTA_RUNNING(mp))
return -ENOSYS;
- return -xfs_sync_inodes(mp, SYNC_DELWRI);
+ return -xfs_sync_data(mp, 0);
}
STATIC int
diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c
index 08d6bd9a394..2e09efbca8d 100644
--- a/fs/xfs/linux-2.6/xfs_super.c
+++ b/fs/xfs/linux-2.6/xfs_super.c
@@ -43,7 +43,6 @@
#include "xfs_itable.h"
#include "xfs_fsops.h"
#include "xfs_rw.h"
-#include "xfs_acl.h"
#include "xfs_attr.h"
#include "xfs_buf_item.h"
#include "xfs_utils.h"
@@ -405,6 +404,14 @@ xfs_parseargs(
return EINVAL;
}
+#ifndef CONFIG_XFS_QUOTA
+ if (XFS_IS_QUOTA_RUNNING(mp)) {
+ cmn_err(CE_WARN,
+ "XFS: quota support not available in this kernel.");
+ return EINVAL;
+ }
+#endif
+
if ((mp->m_qflags & (XFS_GQUOTA_ACCT | XFS_GQUOTA_ACTIVE)) &&
(mp->m_qflags & (XFS_PQUOTA_ACCT | XFS_PQUOTA_ACTIVE))) {
cmn_err(CE_WARN,
@@ -1063,7 +1070,18 @@ xfs_fs_put_super(
int unmount_event_flags = 0;
xfs_syncd_stop(mp);
- xfs_sync_inodes(mp, SYNC_ATTR|SYNC_DELWRI);
+
+ if (!(sb->s_flags & MS_RDONLY)) {
+ /*
+ * XXX(hch): this should be SYNC_WAIT.
+ *
+ * Or more likely not needed at all because the VFS is already
+ * calling ->sync_fs after shutting down all filestem
+ * operations and just before calling ->put_super.
+ */
+ xfs_sync_data(mp, 0);
+ xfs_sync_attr(mp, 0);
+ }
#ifdef HAVE_DMAPI
if (mp->m_flags & XFS_MOUNT_DMAPI) {
@@ -1098,7 +1116,6 @@ xfs_fs_put_super(
xfs_freesb(mp);
xfs_icsb_destroy_counters(mp);
xfs_close_devices(mp);
- xfs_qmops_put(mp);
xfs_dmops_put(mp);
xfs_free_fsname(mp);
kfree(mp);
@@ -1158,6 +1175,7 @@ xfs_fs_statfs(
{
struct xfs_mount *mp = XFS_M(dentry->d_sb);
xfs_sb_t *sbp = &mp->m_sb;
+ struct xfs_inode *ip = XFS_I(dentry->d_inode);
__uint64_t fakeinos, id;
xfs_extlen_t lsize;
@@ -1186,7 +1204,10 @@ xfs_fs_statfs(
statp->f_ffree = statp->f_files - (sbp->sb_icount - sbp->sb_ifree);
spin_unlock(&mp->m_sb_lock);
- XFS_QM_DQSTATVFS(XFS_I(dentry->d_inode), statp);
+ if ((ip->i_d.di_flags & XFS_DIFLAG_PROJINHERIT) ||
+ ((mp->m_qflags & (XFS_PQUOTA_ACCT|XFS_OQUOTA_ENFD))) ==
+ (XFS_PQUOTA_ACCT|XFS_OQUOTA_ENFD))
+ xfs_qm_statvfs(ip, statp);
return 0;
}
@@ -1394,16 +1415,13 @@ xfs_fs_fill_super(
error = xfs_dmops_get(mp);
if (error)
goto out_free_fsname;
- error = xfs_qmops_get(mp);
- if (error)
- goto out_put_dmops;
if (silent)
flags |= XFS_MFSI_QUIET;
error = xfs_open_devices(mp);
if (error)
- goto out_put_qmops;
+ goto out_put_dmops;
if (xfs_icsb_init_counters(mp))
mp->m_flags |= XFS_MOUNT_NO_PERCPU_SB;
@@ -1471,8 +1489,6 @@ xfs_fs_fill_super(
out_destroy_counters:
xfs_icsb_destroy_counters(mp);
xfs_close_devices(mp);
- out_put_qmops:
- xfs_qmops_put(mp);
out_put_dmops:
xfs_dmops_put(mp);
out_free_fsname:
@@ -1706,18 +1722,8 @@ xfs_init_zones(void)
if (!xfs_ili_zone)
goto out_destroy_inode_zone;
-#ifdef CONFIG_XFS_POSIX_ACL
- xfs_acl_zone = kmem_zone_init(sizeof(xfs_acl_t), "xfs_acl");
- if (!xfs_acl_zone)
- goto out_destroy_ili_zone;
-#endif
-
return 0;
-#ifdef CONFIG_XFS_POSIX_ACL
- out_destroy_ili_zone:
-#endif
- kmem_zone_destroy(xfs_ili_zone);
out_destroy_inode_zone:
kmem_zone_destroy(xfs_inode_zone);
out_destroy_efi_zone:
@@ -1751,9 +1757,6 @@ xfs_init_zones(void)
STATIC void
xfs_destroy_zones(void)
{
-#ifdef CONFIG_XFS_POSIX_ACL
- kmem_zone_destroy(xfs_acl_zone);
-#endif
kmem_zone_destroy(xfs_ili_zone);
kmem_zone_destroy(xfs_inode_zone);
kmem_zone_destroy(xfs_efi_zone);
diff --git a/fs/xfs/linux-2.6/xfs_sync.c b/fs/xfs/linux-2.6/xfs_sync.c
index f7ba76633c2..b619d6b8ca4 100644
--- a/fs/xfs/linux-2.6/xfs_sync.c
+++ b/fs/xfs/linux-2.6/xfs_sync.c
@@ -43,166 +43,267 @@
#include "xfs_buf_item.h"
#include "xfs_inode_item.h"
#include "xfs_rw.h"
+#include "xfs_quota.h"
#include <linux/kthread.h>
#include <linux/freezer.h>
-/*
- * Sync all the inodes in the given AG according to the
- * direction given by the flags.
- */
-STATIC int
-xfs_sync_inodes_ag(
- xfs_mount_t *mp,
- int ag,
- int flags)
-{
- xfs_perag_t *pag = &mp->m_perag[ag];
- int nr_found;
- uint32_t first_index = 0;
- int error = 0;
- int last_error = 0;
- do {
- struct inode *inode;
- xfs_inode_t *ip = NULL;
- int lock_flags = XFS_ILOCK_SHARED;
+STATIC xfs_inode_t *
+xfs_inode_ag_lookup(
+ struct xfs_mount *mp,
+ struct xfs_perag *pag,
+ uint32_t *first_index,
+ int tag)
+{
+ int nr_found;
+ struct xfs_inode *ip;
- /*
- * use a gang lookup to find the next inode in the tree
- * as the tree is sparse and a gang lookup walks to find
- * the number of objects requested.
- */
- read_lock(&pag->pag_ici_lock);
+ /*
+ * use a gang lookup to find the next inode in the tree
+ * as the tree is sparse and a gang lookup walks to find
+ * the number of objects requested.
+ */
+ read_lock(&pag->pag_ici_lock);
+ if (tag == XFS_ICI_NO_TAG) {
nr_found = radix_tree_gang_lookup(&pag->pag_ici_root,
- (void**)&ip, first_index, 1);
+ (void **)&ip, *first_index, 1);
+ } else {
+ nr_found = radix_tree_gang_lookup_tag(&pag->pag_ici_root,
+ (void **)&ip, *first_index, 1, tag);
+ }
+ if (!nr_found)
+ goto unlock;
- if (!nr_found) {
- read_unlock(&pag->pag_ici_lock);
- break;
- }
+ /*
+ * Update the index for the next lookup. Catch overflows
+ * into the next AG range which can occur if we have inodes
+ * in the last block of the AG and we are currently
+ * pointing to the last inode.
+ */
+ *first_index = XFS_INO_TO_AGINO(mp, ip->i_ino + 1);
+ if (*first_index < XFS_INO_TO_AGINO(mp, ip->i_ino))
+ goto unlock;
- /*
- * Update the index for the next lookup. Catch overflows
- * into the next AG range which can occur if we have inodes
- * in the last block of the AG and we are currently
- * pointing to the last inode.
- */
- first_index = XFS_INO_TO_AGINO(mp, ip->i_ino + 1);
- if (first_index < XFS_INO_TO_AGINO(mp, ip->i_ino)) {
- read_unlock(&pag->pag_ici_lock);
- break;
- }
+ return ip;
- /* nothing to sync during shutdown */
- if (XFS_FORCED_SHUTDOWN(mp)) {
- read_unlock(&pag->pag_ici_lock);
- return 0;
- }
+unlock:
+ read_unlock(&pag->pag_ici_lock);
+ return NULL;
+}
- /*
- * If we can't get a reference on the inode, it must be
- * in reclaim. Leave it for the reclaim code to flush.
- */
- inode = VFS_I(ip);
- if (!igrab(inode)) {
- read_unlock(&pag->pag_ici_lock);
- continue;
- }
- read_unlock(&pag->pag_ici_lock);
+STATIC int
+xfs_inode_ag_walk(
+ struct xfs_mount *mp,
+ xfs_agnumber_t ag,
+ int (*execute)(struct xfs_inode *ip,
+ struct xfs_perag *pag, int flags),
+ int flags,
+ int tag)
+{
+ struct xfs_perag *pag = &mp->m_perag[ag];
+ uint32_t first_index;
+ int last_error = 0;
+ int skipped;
- /* avoid new or bad inodes */
- if (is_bad_inode(inode) ||
- xfs_iflags_test(ip, XFS_INEW)) {
- IRELE(ip);
- continue;
- }
+restart:
+ skipped = 0;
+ first_index = 0;
+ do {
+ int error = 0;
+ xfs_inode_t *ip;
- /*
- * If we have to flush data or wait for I/O completion
- * we need to hold the iolock.
- */
- if (flags & SYNC_DELWRI) {
- if (VN_DIRTY(inode)) {
- if (flags & SYNC_TRYLOCK) {
- if (xfs_ilock_nowait(ip, XFS_IOLOCK_SHARED))
- lock_flags |= XFS_IOLOCK_SHARED;
- } else {
- xfs_ilock(ip, XFS_IOLOCK_SHARED);
- lock_flags |= XFS_IOLOCK_SHARED;
- }
- if (lock_flags & XFS_IOLOCK_SHARED) {
- error = xfs_flush_pages(ip, 0, -1,
- (flags & SYNC_WAIT) ? 0
- : XFS_B_ASYNC,
- FI_NONE);
- }
- }
- if (VN_CACHED(inode) && (flags & SYNC_IOWAIT))
- xfs_ioend_wait(ip);
- }
- xfs_ilock(ip, XFS_ILOCK_SHARED);
-
- if ((flags & SYNC_ATTR) && !xfs_inode_clean(ip)) {
- if (flags & SYNC_WAIT) {
- xfs_iflock(ip);
- if (!xfs_inode_clean(ip))
- error = xfs_iflush(ip, XFS_IFLUSH_SYNC);
- else
- xfs_ifunlock(ip);
- } else if (xfs_iflock_nowait(ip)) {
- if (!xfs_inode_clean(ip))
- error = xfs_iflush(ip, XFS_IFLUSH_DELWRI);
- else
- xfs_ifunlock(ip);
- }
- }
- xfs_iput(ip, lock_flags);
+ ip = xfs_inode_ag_lookup(mp, pag, &first_index, tag);
+ if (!ip)
+ break;
+ error = execute(ip, pag, flags);
+ if (error == EAGAIN) {
+ skipped++;
+ continue;
+ }
if (error)
last_error = error;
/*
* bail out if the filesystem is corrupted.
*/
if (error == EFSCORRUPTED)
- return XFS_ERROR(error);
+ break;
- } while (nr_found);
+ } while (1);
+
+ if (skipped) {
+ delay(1);
+ goto restart;
+ }
+ xfs_put_perag(mp, pag);
return last_error;
}
int
-xfs_sync_inodes(
- xfs_mount_t *mp,
- int flags)
+xfs_inode_ag_iterator(
+ struct xfs_mount *mp,
+ int (*execute)(struct xfs_inode *ip,
+ struct xfs_perag *pag, int flags),
+ int flags,
+ int tag)
{
- int error;
- int last_error;
- int i;
- int lflags = XFS_LOG_FORCE;
+ int error = 0;
+ int last_error = 0;
+ xfs_agnumber_t ag;
- if (mp->m_flags & XFS_MOUNT_RDONLY)
- return 0;
- error = 0;
- last_error = 0;
+ for (ag = 0; ag < mp->m_sb.sb_agcount; ag++) {
+ if (!mp->m_perag[ag].pag_ici_init)
+ continue;
+ error = xfs_inode_ag_walk(mp, ag, execute, flags, tag);
+ if (error) {
+ last_error = error;
+ if (error == EFSCORRUPTED)
+ break;
+ }
+ }
+ return XFS_ERROR(last_error);
+}
+
+/* must be called with pag_ici_lock held and releases it */
+int
+xfs_sync_inode_valid(
+ struct xfs_inode *ip,
+ struct xfs_perag *pag)
+{
+ struct inode *inode = VFS_I(ip);
+
+ /* nothing to sync during shutdown */
+ if (XFS_FORCED_SHUTDOWN(ip->i_mount)) {
+ read_unlock(&pag->pag_ici_lock);
+ return EFSCORRUPTED;
+ }
+
+ /*
+ * If we can't get a reference on the inode, it must be in reclaim.
+ * Leave it for the reclaim code to flush. Also avoid inodes that
+ * haven't been fully initialised.
+ */
+ if (!igrab(inode)) {
+ read_unlock(&pag->pag_ici_lock);
+ return ENOENT;
+ }
+ read_unlock(&pag->pag_ici_lock);
+
+ if (is_bad_inode(inode) || xfs_iflags_test(ip, XFS_INEW)) {
+ IRELE(ip);
+ return ENOENT;
+ }
+
+ return 0;
+}
+
+STATIC int
+xfs_sync_inode_data(
+ struct xfs_inode *ip,
+ struct xfs_perag *pag,
+ int flags)
+{
+ struct inode *inode = VFS_I(ip);
+ struct address_space *mapping = inode->i_mapping;
+ int error = 0;
+
+ error = xfs_sync_inode_valid(ip, pag);
+ if (error)
+ return error;
+
+ if (!mapping_tagged(mapping, PAGECACHE_TAG_DIRTY))
+ goto out_wait;
+
+ if (!xfs_ilock_nowait(ip, XFS_IOLOCK_SHARED)) {
+ if (flags & SYNC_TRYLOCK)
+ goto out_wait;
+ xfs_ilock(ip, XFS_IOLOCK_SHARED);
+ }
+
+ error = xfs_flush_pages(ip, 0, -1, (flags & SYNC_WAIT) ?
+ 0 : XFS_B_ASYNC, FI_NONE);
+ xfs_iunlock(ip, XFS_IOLOCK_SHARED);
+ out_wait:
if (flags & SYNC_WAIT)
- lflags |= XFS_LOG_SYNC;
+ xfs_ioend_wait(ip);
+ IRELE(ip);
+ return error;
+}
- for (i = 0; i < mp->m_sb.sb_agcount; i++) {
- if (!mp->m_perag[i].pag_ici_init)
- continue;
- error = xfs_sync_inodes_ag(mp, i, flags);
- if (error)
- last_error = error;
- if (error == EFSCORRUPTED)
- break;
+STATIC int
+xfs_sync_inode_attr(
+ struct xfs_inode *ip,
+ struct xfs_perag *pag,
+ int flags)
+{
+ int error = 0;
+
+ error = xfs_sync_inode_valid(ip, pag);
+ if (error)
+ return error;
+
+ xfs_ilock(ip, XFS_ILOCK_SHARED);
+ if (xfs_inode_clean(ip))
+ goto out_unlock;
+ if (!xfs_iflock_nowait(ip)) {
+ if (!(flags & SYNC_WAIT))
+ goto out_unlock;
+ xfs_iflock(ip);
}
- if (flags & SYNC_DELWRI)
- xfs_log_force(mp, 0, lflags);
- return XFS_ERROR(last_error);
+ if (xfs_inode_clean(ip)) {
+ xfs_ifunlock(ip);
+ goto out_unlock;
+ }
+
+ error = xfs_iflush(ip, (flags & SYNC_WAIT) ?
+ XFS_IFLUSH_SYNC : XFS_IFLUSH_DELWRI);
+
+ out_unlock:
+ xfs_iunlock(ip, XFS_ILOCK_SHARED);
+ IRELE(ip);
+ return error;
+}
+
+/*
+ * Write out pagecache data for the whole filesystem.
+ */
+int
+xfs_sync_data(
+ struct xfs_mount *mp,
+ int flags)
+{
+ int error;
+
+ ASSERT((flags & ~(SYNC_TRYLOCK|SYNC_WAIT)) == 0);
+
+ error = xfs_inode_ag_iterator(mp, xfs_sync_inode_data, flags,
+ XFS_ICI_NO_TAG);
+ if (error)
+ return XFS_ERROR(error);
+
+ xfs_log_force(mp, 0,
+ (flags & SYNC_WAIT) ?
+ XFS_LOG_FORCE | XFS_LOG_SYNC :
+ XFS_LOG_FORCE);
+ return 0;
+}
+
+/*
+ * Write out inode metadata (attributes) for the whole filesystem.
+ */
+int
+xfs_sync_attr(
+ struct xfs_mount *mp,
+ int flags)
+{
+ ASSERT((flags & ~SYNC_WAIT) == 0);
+
+ return xfs_inode_ag_iterator(mp, xfs_sync_inode_attr, flags,
+ XFS_ICI_NO_TAG);
}
STATIC int
@@ -252,7 +353,7 @@ xfs_sync_fsdata(
* If this is xfssyncd() then only sync the superblock if we can
* lock it without sleeping and it is not pinned.
*/
- if (flags & SYNC_BDFLUSH) {
+ if (flags & SYNC_TRYLOCK) {
ASSERT(!(flags & SYNC_WAIT));
bp = xfs_getsb(mp, XFS_BUF_TRYLOCK);
@@ -316,13 +417,13 @@ xfs_quiesce_data(
int error;
/* push non-blocking */
- xfs_sync_inodes(mp, SYNC_DELWRI|SYNC_BDFLUSH);
- XFS_QM_DQSYNC(mp, SYNC_BDFLUSH);
+ xfs_sync_data(mp, 0);
+ xfs_qm_sync(mp, SYNC_TRYLOCK);
xfs_filestream_flush(mp);
/* push and block */
- xfs_sync_inodes(mp, SYNC_DELWRI|SYNC_WAIT|SYNC_IOWAIT);
- XFS_QM_DQSYNC(mp, SYNC_WAIT);
+ xfs_sync_data(mp, SYNC_WAIT);
+ xfs_qm_sync(mp, SYNC_WAIT);
/* write superblock and hoover up shutdown errors */
error = xfs_sync_fsdata(mp, 0);
@@ -341,7 +442,7 @@ xfs_quiesce_fs(
int count = 0, pincount;
xfs_flush_buftarg(mp->m_ddev_targp, 0);
- xfs_reclaim_inodes(mp, 0, XFS_IFLUSH_DELWRI_ELSE_ASYNC);
+ xfs_reclaim_inodes(mp, XFS_IFLUSH_DELWRI_ELSE_ASYNC);
/*
* This loop must run at least twice. The first instance of the loop
@@ -350,7 +451,7 @@ xfs_quiesce_fs(
* logged before we can write the unmount record.
*/
do {
- xfs_sync_inodes(mp, SYNC_ATTR|SYNC_WAIT);
+ xfs_sync_attr(mp, SYNC_WAIT);
pincount = xfs_flush_buftarg(mp->m_ddev_targp, 1);
if (!pincount) {
delay(50);
@@ -433,8 +534,8 @@ xfs_flush_inodes_work(
void *arg)
{
struct inode *inode = arg;
- xfs_sync_inodes(mp, SYNC_DELWRI | SYNC_TRYLOCK);
- xfs_sync_inodes(mp, SYNC_DELWRI | SYNC_TRYLOCK | SYNC_IOWAIT);
+ xfs_sync_data(mp, SYNC_TRYLOCK);
+ xfs_sync_data(mp, SYNC_TRYLOCK | SYNC_WAIT);
iput(inode);
}
@@ -465,10 +566,10 @@ xfs_sync_worker(
if (!(mp->m_flags & XFS_MOUNT_RDONLY)) {
xfs_log_force(mp, (xfs_lsn_t)0, XFS_LOG_FORCE);
- xfs_reclaim_inodes(mp, 0, XFS_IFLUSH_DELWRI_ELSE_ASYNC);
+ xfs_reclaim_inodes(mp, XFS_IFLUSH_DELWRI_ELSE_ASYNC);
/* dgc: errors ignored here */
- error = XFS_QM_DQSYNC(mp, SYNC_BDFLUSH);
- error = xfs_sync_fsdata(mp, SYNC_BDFLUSH);
+ error = xfs_qm_sync(mp, SYNC_TRYLOCK);
+ error = xfs_sync_fsdata(mp, SYNC_TRYLOCK);
if (xfs_log_need_covered(mp))
error = xfs_commit_dummy_trans(mp, XFS_LOG_FORCE);
}
@@ -569,7 +670,7 @@ xfs_reclaim_inode(
xfs_ifunlock(ip);
xfs_iunlock(ip, XFS_ILOCK_EXCL);
}
- return 1;
+ return -EAGAIN;
}
__xfs_iflags_set(ip, XFS_IRECLAIM);
spin_unlock(&ip->i_flags_lock);
@@ -654,101 +755,27 @@ xfs_inode_clear_reclaim_tag(
xfs_put_perag(mp, pag);
}
-
-STATIC void
-xfs_reclaim_inodes_ag(
- xfs_mount_t *mp,
- int ag,
- int noblock,
- int mode)
+STATIC int
+xfs_reclaim_inode_now(
+ struct xfs_inode *ip,
+ struct xfs_perag *pag,
+ int flags)
{
- xfs_inode_t *ip = NULL;
- xfs_perag_t *pag = &mp->m_perag[ag];
- int nr_found;
- uint32_t first_index;
- int skipped;
-
-restart:
- first_index = 0;
- skipped = 0;
- do {
- /*
- * use a gang lookup to find the next inode in the tree
- * as the tree is sparse and a gang lookup walks to find
- * the number of objects requested.
- */
- read_lock(&pag->pag_ici_lock);
- nr_found = radix_tree_gang_lookup_tag(&pag->pag_ici_root,
- (void**)&ip, first_index, 1,
- XFS_ICI_RECLAIM_TAG);
-
- if (!nr_found) {
- read_unlock(&pag->pag_ici_lock);
- break;
- }
-
- /*
- * Update the index for the next lookup. Catch overflows
- * into the next AG range which can occur if we have inodes
- * in the last block of the AG and we are currently
- * pointing to the last inode.
- */
- first_index = XFS_INO_TO_AGINO(mp, ip->i_ino + 1);
- if (first_index < XFS_INO_TO_AGINO(mp, ip->i_ino)) {
- read_unlock(&pag->pag_ici_lock);
- break;
- }
-
- /* ignore if already under reclaim */
- if (xfs_iflags_test(ip, XFS_IRECLAIM)) {
- read_unlock(&pag->pag_ici_lock);
- continue;
- }
-
- if (noblock) {
- if (!xfs_ilock_nowait(ip, XFS_ILOCK_EXCL)) {
- read_unlock(&pag->pag_ici_lock);
- continue;
- }
- if (xfs_ipincount(ip) ||
- !xfs_iflock_nowait(ip)) {
- xfs_iunlock(ip, XFS_ILOCK_EXCL);
- read_unlock(&pag->pag_ici_lock);
- continue;
- }
- }
+ /* ignore if already under reclaim */
+ if (xfs_iflags_test(ip, XFS_IRECLAIM)) {
read_unlock(&pag->pag_ici_lock);
-
- /*
- * hmmm - this is an inode already in reclaim. Do
- * we even bother catching it here?
- */
- if (xfs_reclaim_inode(ip, noblock, mode))
- skipped++;
- } while (nr_found);
-
- if (skipped) {
- delay(1);
- goto restart;
+ return 0;
}
- return;
+ read_unlock(&pag->pag_ici_lock);
+ return xfs_reclaim_inode(ip, 0, flags);
}
int
xfs_reclaim_inodes(
xfs_mount_t *mp,
- int noblock,
int mode)
{
- int i;
-
- for (i = 0; i < mp->m_sb.sb_agcount; i++) {
- if (!mp->m_perag[i].pag_ici_init)
- continue;
- xfs_reclaim_inodes_ag(mp, i, noblock, mode);
- }
- return 0;
+ return xfs_inode_ag_iterator(mp, xfs_reclaim_inode_now, mode,
+ XFS_ICI_RECLAIM_TAG);
}
-
-
diff --git a/fs/xfs/linux-2.6/xfs_sync.h b/fs/xfs/linux-2.6/xfs_sync.h
index 308d5bf6dfb..2a10301c99c 100644
--- a/fs/xfs/linux-2.6/xfs_sync.h
+++ b/fs/xfs/linux-2.6/xfs_sync.h
@@ -29,17 +29,14 @@ typedef struct xfs_sync_work {
struct completion *w_completion;
} xfs_sync_work_t;
-#define SYNC_ATTR 0x0001 /* sync attributes */
-#define SYNC_DELWRI 0x0002 /* look at delayed writes */
-#define SYNC_WAIT 0x0004 /* wait for i/o to complete */
-#define SYNC_BDFLUSH 0x0008 /* BDFLUSH is calling -- don't block */
-#define SYNC_IOWAIT 0x0010 /* wait for all I/O to complete */
-#define SYNC_TRYLOCK 0x0020 /* only try to lock inodes */
+#define SYNC_WAIT 0x0001 /* wait for i/o to complete */
+#define SYNC_TRYLOCK 0x0002 /* only try to lock inodes */
int xfs_syncd_init(struct xfs_mount *mp);
void xfs_syncd_stop(struct xfs_mount *mp);
-int xfs_sync_inodes(struct xfs_mount *mp, int flags);
+int xfs_sync_attr(struct xfs_mount *mp, int flags);
+int xfs_sync_data(struct xfs_mount *mp, int flags);
int xfs_sync_fsdata(struct xfs_mount *mp, int flags);
int xfs_quiesce_data(struct xfs_mount *mp);
@@ -48,10 +45,16 @@ void xfs_quiesce_attr(struct xfs_mount *mp);
void xfs_flush_inodes(struct xfs_inode *ip);
int xfs_reclaim_inode(struct xfs_inode *ip, int locked, int sync_mode);
-int xfs_reclaim_inodes(struct xfs_mount *mp, int noblock, int mode);
+int xfs_reclaim_inodes(struct xfs_mount *mp, int mode);
void xfs_inode_set_reclaim_tag(struct xfs_inode *ip);
void xfs_inode_clear_reclaim_tag(struct xfs_inode *ip);
void __xfs_inode_clear_reclaim_tag(struct xfs_mount *mp, struct xfs_perag *pag,
struct xfs_inode *ip);
+
+int xfs_sync_inode_valid(struct xfs_inode *ip, struct xfs_perag *pag);
+int xfs_inode_ag_iterator(struct xfs_mount *mp,
+ int (*execute)(struct xfs_inode *ip, struct xfs_perag *pag, int flags),
+ int flags, int tag);
+
#endif
diff --git a/fs/xfs/linux-2.6/xfs_xattr.c b/fs/xfs/linux-2.6/xfs_xattr.c
index 964621fde6e..497c7fb75cc 100644
--- a/fs/xfs/linux-2.6/xfs_xattr.c
+++ b/fs/xfs/linux-2.6/xfs_xattr.c
@@ -29,67 +29,6 @@
#include <linux/xattr.h>
-/*
- * ACL handling. Should eventually be moved into xfs_acl.c
- */
-
-static int
-xfs_decode_acl(const char *name)
-{
- if (strcmp(name, "posix_acl_access") == 0)
- return _ACL_TYPE_ACCESS;
- else if (strcmp(name, "posix_acl_default") == 0)
- return _ACL_TYPE_DEFAULT;
- return -EINVAL;
-}
-
-/*
- * Get system extended attributes which at the moment only
- * includes Posix ACLs.
- */
-static int
-xfs_xattr_system_get(struct inode *inode, const char *name,
- void *buffer, size_t size)
-{
- int acl;
-
- acl = xfs_decode_acl(name);
- if (acl < 0)
- return acl;
-
- return xfs_acl_vget(inode, buffer, size, acl);
-}
-
-static int
-xfs_xattr_system_set(struct inode *inode, const char *name,
- const void *value, size_t size, int flags)
-{
- int acl;
-
- acl = xfs_decode_acl(name);
- if (acl < 0)
- return acl;
- if (flags & XATTR_CREATE)
- return -EINVAL;
-
- if (!value)
- return xfs_acl_vremove(inode, acl);
-
- return xfs_acl_vset(inode, (void *)value, size, acl);
-}
-
-static struct xattr_handler xfs_xattr_system_handler = {
- .prefix = XATTR_SYSTEM_PREFIX,
- .get = xfs_xattr_system_get,
- .set = xfs_xattr_system_set,
-};
-
-
-/*
- * Real xattr handling. The only difference between the namespaces is
- * a flag passed to the low-level attr code.
- */
-
static int
__xfs_xattr_get(struct inode *inode, const char *name,
void *value, size_t size, int xflags)
@@ -199,7 +138,9 @@ struct xattr_handler *xfs_xattr_handlers[] = {
&xfs_xattr_user_handler,
&xfs_xattr_trusted_handler,
&xfs_xattr_security_handler,
+#ifdef CONFIG_XFS_POSIX_ACL
&xfs_xattr_system_handler,
+#endif
NULL
};
@@ -310,7 +251,7 @@ xfs_vn_listxattr(struct dentry *dentry, char *data, size_t size)
/*
* Then add the two synthetic ACL attributes.
*/
- if (xfs_acl_vhasacl_access(inode)) {
+ if (posix_acl_access_exists(inode)) {
error = list_one_attr(POSIX_ACL_XATTR_ACCESS,
strlen(POSIX_ACL_XATTR_ACCESS) + 1,
data, size, &context.count);
@@ -318,7 +259,7 @@ xfs_vn_listxattr(struct dentry *dentry, char *data, size_t size)
return error;
}
- if (xfs_acl_vhasacl_default(inode)) {
+ if (posix_acl_default_exists(inode)) {
error = list_one_attr(POSIX_ACL_XATTR_DEFAULT,
strlen(POSIX_ACL_XATTR_DEFAULT) + 1,
data, size, &context.count);
diff --git a/fs/xfs/quota/xfs_dquot.c b/fs/xfs/quota/xfs_dquot.c
index e4babcc6342..2f3f2229eaa 100644
--- a/fs/xfs/quota/xfs_dquot.c
+++ b/fs/xfs/quota/xfs_dquot.c
@@ -42,7 +42,6 @@
#include "xfs_error.h"
#include "xfs_itable.h"
#include "xfs_rw.h"
-#include "xfs_acl.h"
#include "xfs_attr.h"
#include "xfs_buf_item.h"
#include "xfs_trans_space.h"
@@ -1194,7 +1193,9 @@ void
xfs_qm_dqrele(
xfs_dquot_t *dqp)
{
- ASSERT(dqp);
+ if (!dqp)
+ return;
+
xfs_dqtrace_entry(dqp, "DQRELE");
xfs_dqlock(dqp);
diff --git a/fs/xfs/quota/xfs_dquot.h b/fs/xfs/quota/xfs_dquot.h
index de0f402ddb4..6533ead9b88 100644
--- a/fs/xfs/quota/xfs_dquot.h
+++ b/fs/xfs/quota/xfs_dquot.h
@@ -181,7 +181,6 @@ extern void xfs_qm_adjust_dqlimits(xfs_mount_t *,
extern int xfs_qm_dqget(xfs_mount_t *, xfs_inode_t *,
xfs_dqid_t, uint, uint, xfs_dquot_t **);
extern void xfs_qm_dqput(xfs_dquot_t *);
-extern void xfs_qm_dqrele(xfs_dquot_t *);
extern void xfs_dqlock(xfs_dquot_t *);
extern void xfs_dqlock2(xfs_dquot_t *, xfs_dquot_t *);
extern void xfs_dqunlock(xfs_dquot_t *);
diff --git a/fs/xfs/quota/xfs_dquot_item.c b/fs/xfs/quota/xfs_dquot_item.c
index 1728f6a7c4f..d0d4a9a0bbd 100644
--- a/fs/xfs/quota/xfs_dquot_item.c
+++ b/fs/xfs/quota/xfs_dquot_item.c
@@ -42,7 +42,6 @@
#include "xfs_error.h"
#include "xfs_itable.h"
#include "xfs_rw.h"
-#include "xfs_acl.h"
#include "xfs_attr.h"
#include "xfs_buf_item.h"
#include "xfs_trans_priv.h"
diff --git a/fs/xfs/quota/xfs_qm.c b/fs/xfs/quota/xfs_qm.c
index 5b6695049e0..45b1bfef738 100644
--- a/fs/xfs/quota/xfs_qm.c
+++ b/fs/xfs/quota/xfs_qm.c
@@ -42,7 +42,6 @@
#include "xfs_error.h"
#include "xfs_bmap.h"
#include "xfs_rw.h"
-#include "xfs_acl.h"
#include "xfs_attr.h"
#include "xfs_buf_item.h"
#include "xfs_trans_space.h"
@@ -287,11 +286,13 @@ xfs_qm_rele_quotafs_ref(
* Just destroy the quotainfo structure.
*/
void
-xfs_qm_unmount_quotadestroy(
- xfs_mount_t *mp)
+xfs_qm_unmount(
+ struct xfs_mount *mp)
{
- if (mp->m_quotainfo)
+ if (mp->m_quotainfo) {
+ xfs_qm_dqpurge_all(mp, XFS_QMOPT_QUOTALL | XFS_QMOPT_UMOUNTING);
xfs_qm_destroy_quotainfo(mp);
+ }
}
@@ -385,8 +386,13 @@ xfs_qm_mount_quotas(
if (error) {
xfs_fs_cmn_err(CE_WARN, mp,
"Failed to initialize disk quotas.");
+ return;
}
- return;
+
+#ifdef QUOTADEBUG
+ if (XFS_IS_QUOTA_ON(mp))
+ xfs_qm_internalqcheck(mp);
+#endif
}
/*
@@ -774,12 +780,11 @@ xfs_qm_dqattach_grouphint(
* Given a locked inode, attach dquot(s) to it, taking U/G/P-QUOTAON
* into account.
* If XFS_QMOPT_DQALLOC, the dquot(s) will be allocated if needed.
- * If XFS_QMOPT_ILOCKED, then inode sent is already locked EXCL.
* Inode may get unlocked and relocked in here, and the caller must deal with
* the consequences.
*/
int
-xfs_qm_dqattach(
+xfs_qm_dqattach_locked(
xfs_inode_t *ip,
uint flags)
{
@@ -787,17 +792,14 @@ xfs_qm_dqattach(
uint nquotas = 0;
int error = 0;
- if ((! XFS_IS_QUOTA_ON(mp)) ||
- (! XFS_NOT_DQATTACHED(mp, ip)) ||
- (ip->i_ino == mp->m_sb.sb_uquotino) ||
- (ip->i_ino == mp->m_sb.sb_gquotino))
+ if (!XFS_IS_QUOTA_RUNNING(mp) ||
+ !XFS_IS_QUOTA_ON(mp) ||
+ !XFS_NOT_DQATTACHED(mp, ip) ||
+ ip->i_ino == mp->m_sb.sb_uquotino ||
+ ip->i_ino == mp->m_sb.sb_gquotino)
return 0;
- ASSERT((flags & XFS_QMOPT_ILOCKED) == 0 ||
- xfs_isilocked(ip, XFS_ILOCK_EXCL));
-
- if (! (flags & XFS_QMOPT_ILOCKED))
- xfs_ilock(ip, XFS_ILOCK_EXCL);
+ ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
if (XFS_IS_UQUOTA_ON(mp)) {
error = xfs_qm_dqattach_one(ip, ip->i_d.di_uid, XFS_DQ_USER,
@@ -849,8 +851,7 @@ xfs_qm_dqattach(
xfs_qm_dqattach_grouphint(ip->i_udquot, ip->i_gdquot);
}
- done:
-
+ done:
#ifdef QUOTADEBUG
if (! error) {
if (XFS_IS_UQUOTA_ON(mp))
@@ -858,15 +859,22 @@ xfs_qm_dqattach(
if (XFS_IS_OQUOTA_ON(mp))
ASSERT(ip->i_gdquot);
}
+ ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
#endif
+ return error;
+}
- if (! (flags & XFS_QMOPT_ILOCKED))
- xfs_iunlock(ip, XFS_ILOCK_EXCL);
+int
+xfs_qm_dqattach(
+ struct xfs_inode *ip,
+ uint flags)
+{
+ int error;
+
+ xfs_ilock(ip, XFS_ILOCK_EXCL);
+ error = xfs_qm_dqattach_locked(ip, flags);
+ xfs_iunlock(ip, XFS_ILOCK_EXCL);
-#ifdef QUOTADEBUG
- else
- ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
-#endif
return error;
}
@@ -896,11 +904,6 @@ xfs_qm_dqdetach(
}
}
-/*
- * This is called to sync quotas. We can be told to use non-blocking
- * semantics by either the SYNC_BDFLUSH flag or the absence of the
- * SYNC_WAIT flag.
- */
int
xfs_qm_sync(
xfs_mount_t *mp,
@@ -909,17 +912,13 @@ xfs_qm_sync(
int recl, restarts;
xfs_dquot_t *dqp;
uint flush_flags;
- boolean_t nowait;
int error;
- if (! XFS_IS_QUOTA_ON(mp))
+ if (!XFS_IS_QUOTA_RUNNING(mp) || !XFS_IS_QUOTA_ON(mp))
return 0;
+ flush_flags = (flags & SYNC_WAIT) ? XFS_QMOPT_SYNC : XFS_QMOPT_DELWRI;
restarts = 0;
- /*
- * We won't block unless we are asked to.
- */
- nowait = (boolean_t)(flags & SYNC_BDFLUSH || (flags & SYNC_WAIT) == 0);
again:
xfs_qm_mplist_lock(mp);
@@ -939,18 +938,10 @@ xfs_qm_sync(
* don't 'seem' to be dirty. ie. don't acquire dqlock.
* This is very similar to what xfs_sync does with inodes.
*/
- if (flags & SYNC_BDFLUSH) {
- if (! XFS_DQ_IS_DIRTY(dqp))
+ if (flags & SYNC_TRYLOCK) {
+ if (!XFS_DQ_IS_DIRTY(dqp))
continue;
- }
-
- if (nowait) {
- /*
- * Try to acquire the dquot lock. We are NOT out of
- * lock order, but we just don't want to wait for this
- * lock, unless somebody wanted us to.
- */
- if (! xfs_qm_dqlock_nowait(dqp))
+ if (!xfs_qm_dqlock_nowait(dqp))
continue;
} else {
xfs_dqlock(dqp);
@@ -967,7 +958,7 @@ xfs_qm_sync(
/* XXX a sentinel would be better */
recl = XFS_QI_MPLRECLAIMS(mp);
if (!xfs_dqflock_nowait(dqp)) {
- if (nowait) {
+ if (flags & SYNC_TRYLOCK) {
xfs_dqunlock(dqp);
continue;
}
@@ -985,7 +976,6 @@ xfs_qm_sync(
* Let go of the mplist lock. We don't want to hold it
* across a disk write
*/
- flush_flags = (nowait) ? XFS_QMOPT_DELWRI : XFS_QMOPT_SYNC;
xfs_qm_mplist_unlock(mp);
xfs_dqtrace_entry(dqp, "XQM_SYNC: DQFLUSH");
error = xfs_qm_dqflush(dqp, flush_flags);
@@ -2319,20 +2309,20 @@ xfs_qm_write_sb_changes(
*/
int
xfs_qm_vop_dqalloc(
- xfs_mount_t *mp,
- xfs_inode_t *ip,
- uid_t uid,
- gid_t gid,
- prid_t prid,
- uint flags,
- xfs_dquot_t **O_udqpp,
- xfs_dquot_t **O_gdqpp)
+ struct xfs_inode *ip,
+ uid_t uid,
+ gid_t gid,
+ prid_t prid,
+ uint flags,
+ struct xfs_dquot **O_udqpp,
+ struct xfs_dquot **O_gdqpp)
{
- int error;
- xfs_dquot_t *uq, *gq;
- uint lockflags;
+ struct xfs_mount *mp = ip->i_mount;
+ struct xfs_dquot *uq, *gq;
+ int error;
+ uint lockflags;
- if (!XFS_IS_QUOTA_ON(mp))
+ if (!XFS_IS_QUOTA_RUNNING(mp) || !XFS_IS_QUOTA_ON(mp))
return 0;
lockflags = XFS_ILOCK_EXCL;
@@ -2346,8 +2336,8 @@ xfs_qm_vop_dqalloc(
* if necessary. The dquot(s) will not be locked.
*/
if (XFS_NOT_DQATTACHED(mp, ip)) {
- if ((error = xfs_qm_dqattach(ip, XFS_QMOPT_DQALLOC |
- XFS_QMOPT_ILOCKED))) {
+ error = xfs_qm_dqattach_locked(ip, XFS_QMOPT_DQALLOC);
+ if (error) {
xfs_iunlock(ip, lockflags);
return error;
}
@@ -2469,6 +2459,7 @@ xfs_qm_vop_chown(
uint bfield = XFS_IS_REALTIME_INODE(ip) ?
XFS_TRANS_DQ_RTBCOUNT : XFS_TRANS_DQ_BCOUNT;
+
ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
ASSERT(XFS_IS_QUOTA_RUNNING(ip->i_mount));
@@ -2508,13 +2499,13 @@ xfs_qm_vop_chown_reserve(
xfs_dquot_t *gdqp,
uint flags)
{
- int error;
- xfs_mount_t *mp;
+ xfs_mount_t *mp = ip->i_mount;
uint delblks, blkflags, prjflags = 0;
xfs_dquot_t *unresudq, *unresgdq, *delblksudq, *delblksgdq;
+ int error;
+
ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_ILOCK_SHARED));
- mp = ip->i_mount;
ASSERT(XFS_IS_QUOTA_RUNNING(mp));
delblks = ip->i_delayed_blks;
@@ -2582,28 +2573,23 @@ xfs_qm_vop_chown_reserve(
int
xfs_qm_vop_rename_dqattach(
- xfs_inode_t **i_tab)
+ struct xfs_inode **i_tab)
{
- xfs_inode_t *ip;
- int i;
- int error;
+ struct xfs_mount *mp = i_tab[0]->i_mount;
+ int i;
- ip = i_tab[0];
-
- if (! XFS_IS_QUOTA_ON(ip->i_mount))
+ if (!XFS_IS_QUOTA_RUNNING(mp) || !XFS_IS_QUOTA_ON(mp))
return 0;
- if (XFS_NOT_DQATTACHED(ip->i_mount, ip)) {
- error = xfs_qm_dqattach(ip, 0);
- if (error)
- return error;
- }
- for (i = 1; (i < 4 && i_tab[i]); i++) {
+ for (i = 0; (i < 4 && i_tab[i]); i++) {
+ struct xfs_inode *ip = i_tab[i];
+ int error;
+
/*
* Watch out for duplicate entries in the table.
*/
- if ((ip = i_tab[i]) != i_tab[i-1]) {
- if (XFS_NOT_DQATTACHED(ip->i_mount, ip)) {
+ if (i == 0 || ip != i_tab[i-1]) {
+ if (XFS_NOT_DQATTACHED(mp, ip)) {
error = xfs_qm_dqattach(ip, 0);
if (error)
return error;
@@ -2614,17 +2600,19 @@ xfs_qm_vop_rename_dqattach(
}
void
-xfs_qm_vop_dqattach_and_dqmod_newinode(
- xfs_trans_t *tp,
- xfs_inode_t *ip,
- xfs_dquot_t *udqp,
- xfs_dquot_t *gdqp)
+xfs_qm_vop_create_dqattach(
+ struct xfs_trans *tp,
+ struct xfs_inode *ip,
+ struct xfs_dquot *udqp,
+ struct xfs_dquot *gdqp)
{
- if (!XFS_IS_QUOTA_ON(tp->t_mountp))
+ struct xfs_mount *mp = tp->t_mountp;
+
+ if (!XFS_IS_QUOTA_RUNNING(mp) || !XFS_IS_QUOTA_ON(mp))
return;
ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
- ASSERT(XFS_IS_QUOTA_RUNNING(tp->t_mountp));
+ ASSERT(XFS_IS_QUOTA_RUNNING(mp));
if (udqp) {
xfs_dqlock(udqp);
@@ -2632,7 +2620,7 @@ xfs_qm_vop_dqattach_and_dqmod_newinode(
xfs_dqunlock(udqp);
ASSERT(ip->i_udquot == NULL);
ip->i_udquot = udqp;
- ASSERT(XFS_IS_UQUOTA_ON(tp->t_mountp));
+ ASSERT(XFS_IS_UQUOTA_ON(mp));
ASSERT(ip->i_d.di_uid == be32_to_cpu(udqp->q_core.d_id));
xfs_trans_mod_dquot(tp, udqp, XFS_TRANS_DQ_ICOUNT, 1);
}
@@ -2642,8 +2630,8 @@ xfs_qm_vop_dqattach_and_dqmod_newinode(
xfs_dqunlock(gdqp);
ASSERT(ip->i_gdquot == NULL);
ip->i_gdquot = gdqp;
- ASSERT(XFS_IS_OQUOTA_ON(tp->t_mountp));
- ASSERT((XFS_IS_GQUOTA_ON(tp->t_mountp) ?
+ ASSERT(XFS_IS_OQUOTA_ON(mp));
+ ASSERT((XFS_IS_GQUOTA_ON(mp) ?
ip->i_d.di_gid : ip->i_d.di_projid) ==
be32_to_cpu(gdqp->q_core.d_id));
xfs_trans_mod_dquot(tp, gdqp, XFS_TRANS_DQ_ICOUNT, 1);
diff --git a/fs/xfs/quota/xfs_qm.h b/fs/xfs/quota/xfs_qm.h
index a371954cae1..495564b8af3 100644
--- a/fs/xfs/quota/xfs_qm.h
+++ b/fs/xfs/quota/xfs_qm.h
@@ -127,8 +127,6 @@ typedef struct xfs_quotainfo {
} xfs_quotainfo_t;
-extern xfs_dqtrxops_t xfs_trans_dquot_ops;
-
extern void xfs_trans_mod_dquot(xfs_trans_t *, xfs_dquot_t *, uint, long);
extern int xfs_trans_reserve_quota_bydquots(xfs_trans_t *, xfs_mount_t *,
xfs_dquot_t *, xfs_dquot_t *, long, long, uint);
@@ -159,17 +157,11 @@ typedef struct xfs_dquot_acct {
#define XFS_QM_RTBWARNLIMIT 5
extern void xfs_qm_destroy_quotainfo(xfs_mount_t *);
-extern void xfs_qm_mount_quotas(xfs_mount_t *);
extern int xfs_qm_quotacheck(xfs_mount_t *);
-extern void xfs_qm_unmount_quotadestroy(xfs_mount_t *);
-extern void xfs_qm_unmount_quotas(xfs_mount_t *);
extern int xfs_qm_write_sb_changes(xfs_mount_t *, __int64_t);
-extern int xfs_qm_sync(xfs_mount_t *, int);
/* dquot stuff */
extern boolean_t xfs_qm_dqalloc_incore(xfs_dquot_t **);
-extern int xfs_qm_dqattach(xfs_inode_t *, uint);
-extern void xfs_qm_dqdetach(xfs_inode_t *);
extern int xfs_qm_dqpurge_all(xfs_mount_t *, uint);
extern void xfs_qm_dqrele_all_inodes(xfs_mount_t *, uint);
@@ -183,19 +175,6 @@ extern int xfs_qm_scall_getqstat(xfs_mount_t *, fs_quota_stat_t *);
extern int xfs_qm_scall_quotaon(xfs_mount_t *, uint);
extern int xfs_qm_scall_quotaoff(xfs_mount_t *, uint);
-/* vop stuff */
-extern int xfs_qm_vop_dqalloc(xfs_mount_t *, xfs_inode_t *,
- uid_t, gid_t, prid_t, uint,
- xfs_dquot_t **, xfs_dquot_t **);
-extern void xfs_qm_vop_dqattach_and_dqmod_newinode(
- xfs_trans_t *, xfs_inode_t *,
- xfs_dquot_t *, xfs_dquot_t *);
-extern int xfs_qm_vop_rename_dqattach(xfs_inode_t **);
-extern xfs_dquot_t * xfs_qm_vop_chown(xfs_trans_t *, xfs_inode_t *,
- xfs_dquot_t **, xfs_dquot_t *);
-extern int xfs_qm_vop_chown_reserve(xfs_trans_t *, xfs_inode_t *,
- xfs_dquot_t *, xfs_dquot_t *, uint);
-
/* list stuff */
extern void xfs_qm_freelist_append(xfs_frlist_t *, xfs_dquot_t *);
extern void xfs_qm_freelist_unlink(xfs_dquot_t *);
diff --git a/fs/xfs/quota/xfs_qm_bhv.c b/fs/xfs/quota/xfs_qm_bhv.c
index 63037c689a4..a5346630dfa 100644
--- a/fs/xfs/quota/xfs_qm_bhv.c
+++ b/fs/xfs/quota/xfs_qm_bhv.c
@@ -42,7 +42,6 @@
#include "xfs_rtalloc.h"
#include "xfs_error.h"
#include "xfs_rw.h"
-#include "xfs_acl.h"
#include "xfs_attr.h"
#include "xfs_buf_item.h"
#include "xfs_qm.h"
@@ -84,7 +83,7 @@ xfs_fill_statvfs_from_dquot(
* return a statvfs of the project, not the entire filesystem.
* This makes such trees appear as if they are filesystems in themselves.
*/
-STATIC void
+void
xfs_qm_statvfs(
xfs_inode_t *ip,
struct kstatfs *statp)
@@ -92,20 +91,13 @@ xfs_qm_statvfs(
xfs_mount_t *mp = ip->i_mount;
xfs_dquot_t *dqp;
- if (!(ip->i_d.di_flags & XFS_DIFLAG_PROJINHERIT) ||
- !((mp->m_qflags & (XFS_PQUOTA_ACCT|XFS_OQUOTA_ENFD))) ==
- (XFS_PQUOTA_ACCT|XFS_OQUOTA_ENFD))
- return;
-
if (!xfs_qm_dqget(mp, NULL, ip->i_d.di_projid, XFS_DQ_PROJ, 0, &dqp)) {
- xfs_disk_dquot_t *dp = &dqp->q_core;
-
- xfs_fill_statvfs_from_dquot(statp, dp);
+ xfs_fill_statvfs_from_dquot(statp, &dqp->q_core);
xfs_qm_dqput(dqp);
}
}
-STATIC int
+int
xfs_qm_newmount(
xfs_mount_t *mp,
uint *needquotamount,
@@ -114,9 +106,6 @@ xfs_qm_newmount(
uint quotaondisk;
uint uquotaondisk = 0, gquotaondisk = 0, pquotaondisk = 0;
- *quotaflags = 0;
- *needquotamount = B_FALSE;
-
quotaondisk = xfs_sb_version_hasquota(&mp->m_sb) &&
(mp->m_sb.sb_qflags & XFS_ALL_QUOTA_ACCT);
@@ -179,66 +168,6 @@ xfs_qm_newmount(
return 0;
}
-STATIC int
-xfs_qm_endmount(
- xfs_mount_t *mp,
- uint needquotamount,
- uint quotaflags)
-{
- if (needquotamount) {
- ASSERT(mp->m_qflags == 0);
- mp->m_qflags = quotaflags;
- xfs_qm_mount_quotas(mp);
- }
-
-#if defined(DEBUG) && defined(XFS_LOUD_RECOVERY)
- if (! (XFS_IS_QUOTA_ON(mp)))
- xfs_fs_cmn_err(CE_NOTE, mp, "Disk quotas not turned on");
- else
- xfs_fs_cmn_err(CE_NOTE, mp, "Disk quotas turned on");
-#endif
-
-#ifdef QUOTADEBUG
- if (XFS_IS_QUOTA_ON(mp) && xfs_qm_internalqcheck(mp))
- cmn_err(CE_WARN, "XFS: mount internalqcheck failed");
-#endif
-
- return 0;
-}
-
-STATIC void
-xfs_qm_dqrele_null(
- xfs_dquot_t *dq)
-{
- /*
- * Called from XFS, where we always check first for a NULL dquot.
- */
- if (!dq)
- return;
- xfs_qm_dqrele(dq);
-}
-
-
-struct xfs_qmops xfs_qmcore_xfs = {
- .xfs_qminit = xfs_qm_newmount,
- .xfs_qmdone = xfs_qm_unmount_quotadestroy,
- .xfs_qmmount = xfs_qm_endmount,
- .xfs_qmunmount = xfs_qm_unmount_quotas,
- .xfs_dqrele = xfs_qm_dqrele_null,
- .xfs_dqattach = xfs_qm_dqattach,
- .xfs_dqdetach = xfs_qm_dqdetach,
- .xfs_dqpurgeall = xfs_qm_dqpurge_all,
- .xfs_dqvopalloc = xfs_qm_vop_dqalloc,
- .xfs_dqvopcreate = xfs_qm_vop_dqattach_and_dqmod_newinode,
- .xfs_dqvoprename = xfs_qm_vop_rename_dqattach,
- .xfs_dqvopchown = xfs_qm_vop_chown,
- .xfs_dqvopchownresv = xfs_qm_vop_chown_reserve,
- .xfs_dqstatvfs = xfs_qm_statvfs,
- .xfs_dqsync = xfs_qm_sync,
- .xfs_dqtrxops = &xfs_trans_dquot_ops,
-};
-EXPORT_SYMBOL(xfs_qmcore_xfs);
-
void __init
xfs_qm_init(void)
{
diff --git a/fs/xfs/quota/xfs_qm_stats.c b/fs/xfs/quota/xfs_qm_stats.c
index 709f5f545cf..21b08c0396a 100644
--- a/fs/xfs/quota/xfs_qm_stats.c
+++ b/fs/xfs/quota/xfs_qm_stats.c
@@ -42,7 +42,6 @@
#include "xfs_rtalloc.h"
#include "xfs_error.h"
#include "xfs_rw.h"
-#include "xfs_acl.h"
#include "xfs_attr.h"
#include "xfs_buf_item.h"
#include "xfs_qm.h"
diff --git a/fs/xfs/quota/xfs_qm_syscalls.c b/fs/xfs/quota/xfs_qm_syscalls.c
index c7b66f6506c..4e4276b956e 100644
--- a/fs/xfs/quota/xfs_qm_syscalls.c
+++ b/fs/xfs/quota/xfs_qm_syscalls.c
@@ -45,7 +45,6 @@
#include "xfs_rtalloc.h"
#include "xfs_error.h"
#include "xfs_rw.h"
-#include "xfs_acl.h"
#include "xfs_attr.h"
#include "xfs_buf_item.h"
#include "xfs_utils.h"
@@ -847,105 +846,55 @@ xfs_qm_export_flags(
}
-/*
- * Release all the dquots on the inodes in an AG.
- */
-STATIC void
-xfs_qm_dqrele_inodes_ag(
- xfs_mount_t *mp,
- int ag,
- uint flags)
+STATIC int
+xfs_dqrele_inode(
+ struct xfs_inode *ip,
+ struct xfs_perag *pag,
+ int flags)
{
- xfs_inode_t *ip = NULL;
- xfs_perag_t *pag = &mp->m_perag[ag];
- int first_index = 0;
- int nr_found;
-
- do {
- /*
- * use a gang lookup to find the next inode in the tree
- * as the tree is sparse and a gang lookup walks to find
- * the number of objects requested.
- */
- read_lock(&pag->pag_ici_lock);
- nr_found = radix_tree_gang_lookup(&pag->pag_ici_root,
- (void**)&ip, first_index, 1);
-
- if (!nr_found) {
- read_unlock(&pag->pag_ici_lock);
- break;
- }
-
- /*
- * Update the index for the next lookup. Catch overflows
- * into the next AG range which can occur if we have inodes
- * in the last block of the AG and we are currently
- * pointing to the last inode.
- */
- first_index = XFS_INO_TO_AGINO(mp, ip->i_ino + 1);
- if (first_index < XFS_INO_TO_AGINO(mp, ip->i_ino)) {
- read_unlock(&pag->pag_ici_lock);
- break;
- }
-
- /* skip quota inodes */
- if (ip == XFS_QI_UQIP(mp) || ip == XFS_QI_GQIP(mp)) {
- ASSERT(ip->i_udquot == NULL);
- ASSERT(ip->i_gdquot == NULL);
- read_unlock(&pag->pag_ici_lock);
- continue;
- }
+ int error;
- /*
- * If we can't get a reference on the inode, it must be
- * in reclaim. Leave it for the reclaim code to flush.
- */
- if (!igrab(VFS_I(ip))) {
- read_unlock(&pag->pag_ici_lock);
- continue;
- }
+ /* skip quota inodes */
+ if (ip == XFS_QI_UQIP(ip->i_mount) || ip == XFS_QI_GQIP(ip->i_mount)) {
+ ASSERT(ip->i_udquot == NULL);
+ ASSERT(ip->i_gdquot == NULL);
read_unlock(&pag->pag_ici_lock);
+ return 0;
+ }
- /* avoid new inodes though we shouldn't find any here */
- if (xfs_iflags_test(ip, XFS_INEW)) {
- IRELE(ip);
- continue;
- }
+ error = xfs_sync_inode_valid(ip, pag);
+ if (error)
+ return error;
- xfs_ilock(ip, XFS_ILOCK_EXCL);
- if ((flags & XFS_UQUOTA_ACCT) && ip->i_udquot) {
- xfs_qm_dqrele(ip->i_udquot);
- ip->i_udquot = NULL;
- }
- if (flags & (XFS_PQUOTA_ACCT|XFS_GQUOTA_ACCT) &&
- ip->i_gdquot) {
- xfs_qm_dqrele(ip->i_gdquot);
- ip->i_gdquot = NULL;
- }
- xfs_iput(ip, XFS_ILOCK_EXCL);
+ xfs_ilock(ip, XFS_ILOCK_EXCL);
+ if ((flags & XFS_UQUOTA_ACCT) && ip->i_udquot) {
+ xfs_qm_dqrele(ip->i_udquot);
+ ip->i_udquot = NULL;
+ }
+ if (flags & (XFS_PQUOTA_ACCT|XFS_GQUOTA_ACCT) && ip->i_gdquot) {
+ xfs_qm_dqrele(ip->i_gdquot);
+ ip->i_gdquot = NULL;
+ }
+ xfs_iput(ip, XFS_ILOCK_EXCL);
+ IRELE(ip);
- } while (nr_found);
+ return 0;
}
+
/*
* Go thru all the inodes in the file system, releasing their dquots.
+ *
* Note that the mount structure gets modified to indicate that quotas are off
- * AFTER this, in the case of quotaoff. This also gets called from
- * xfs_rootumount.
+ * AFTER this, in the case of quotaoff.
*/
void
xfs_qm_dqrele_all_inodes(
struct xfs_mount *mp,
uint flags)
{
- int i;
-
ASSERT(mp->m_quotainfo);
- for (i = 0; i < mp->m_sb.sb_agcount; i++) {
- if (!mp->m_perag[i].pag_ici_init)
- continue;
- xfs_qm_dqrele_inodes_ag(mp, i, flags);
- }
+ xfs_inode_ag_iterator(mp, xfs_dqrele_inode, flags, XFS_ICI_NO_TAG);
}
/*------------------------------------------------------------------------*/
diff --git a/fs/xfs/quota/xfs_trans_dquot.c b/fs/xfs/quota/xfs_trans_dquot.c
index 447173bcf96..97ac9640be9 100644
--- a/fs/xfs/quota/xfs_trans_dquot.c
+++ b/fs/xfs/quota/xfs_trans_dquot.c
@@ -42,7 +42,6 @@
#include "xfs_rtalloc.h"
#include "xfs_error.h"
#include "xfs_rw.h"
-#include "xfs_acl.h"
#include "xfs_attr.h"
#include "xfs_buf_item.h"
#include "xfs_trans_priv.h"
@@ -111,7 +110,7 @@ xfs_trans_log_dquot(
* Carry forward whatever is left of the quota blk reservation to
* the spanky new transaction
*/
-STATIC void
+void
xfs_trans_dup_dqinfo(
xfs_trans_t *otp,
xfs_trans_t *ntp)
@@ -167,19 +166,17 @@ xfs_trans_dup_dqinfo(
/*
* Wrap around mod_dquot to account for both user and group quotas.
*/
-STATIC void
+void
xfs_trans_mod_dquot_byino(
xfs_trans_t *tp,
xfs_inode_t *ip,
uint field,
long delta)
{
- xfs_mount_t *mp;
-
- ASSERT(tp);
- mp = tp->t_mountp;
+ xfs_mount_t *mp = tp->t_mountp;
- if (!XFS_IS_QUOTA_ON(mp) ||
+ if (!XFS_IS_QUOTA_RUNNING(mp) ||
+ !XFS_IS_QUOTA_ON(mp) ||
ip->i_ino == mp->m_sb.sb_uquotino ||
ip->i_ino == mp->m_sb.sb_gquotino)
return;
@@ -229,6 +226,7 @@ xfs_trans_mod_dquot(
xfs_dqtrx_t *qtrx;
ASSERT(tp);
+ ASSERT(XFS_IS_QUOTA_RUNNING(tp->t_mountp));
qtrx = NULL;
if (tp->t_dqinfo == NULL)
@@ -346,7 +344,7 @@ xfs_trans_dqlockedjoin(
* Unreserve just the reservations done by this transaction.
* dquot is still left locked at exit.
*/
-STATIC void
+void
xfs_trans_apply_dquot_deltas(
xfs_trans_t *tp)
{
@@ -357,7 +355,7 @@ xfs_trans_apply_dquot_deltas(
long totalbdelta;
long totalrtbdelta;
- if (! (tp->t_flags & XFS_TRANS_DQ_DIRTY))
+ if (!(tp->t_flags & XFS_TRANS_DQ_DIRTY))
return;
ASSERT(tp->t_dqinfo);
@@ -531,7 +529,7 @@ xfs_trans_apply_dquot_deltas(
* we simply throw those away, since that's the expected behavior
* when a transaction is curtailed without a commit.
*/
-STATIC void
+void
xfs_trans_unreserve_and_mod_dquots(
xfs_trans_t *tp)
{
@@ -768,7 +766,7 @@ xfs_trans_reserve_quota_bydquots(
{
int resvd = 0, error;
- if (!XFS_IS_QUOTA_ON(mp))
+ if (!XFS_IS_QUOTA_RUNNING(mp) || !XFS_IS_QUOTA_ON(mp))
return 0;
if (tp && tp->t_dqinfo == NULL)
@@ -811,18 +809,17 @@ xfs_trans_reserve_quota_bydquots(
* This doesn't change the actual usage, just the reservation.
* The inode sent in is locked.
*/
-STATIC int
+int
xfs_trans_reserve_quota_nblks(
- xfs_trans_t *tp,
- xfs_mount_t *mp,
- xfs_inode_t *ip,
- long nblks,
- long ninos,
- uint flags)
+ struct xfs_trans *tp,
+ struct xfs_inode *ip,
+ long nblks,
+ long ninos,
+ uint flags)
{
- int error;
+ struct xfs_mount *mp = ip->i_mount;
- if (!XFS_IS_QUOTA_ON(mp))
+ if (!XFS_IS_QUOTA_RUNNING(mp) || !XFS_IS_QUOTA_ON(mp))
return 0;
if (XFS_IS_PQUOTA_ON(mp))
flags |= XFS_QMOPT_ENOSPC;
@@ -831,7 +828,6 @@ xfs_trans_reserve_quota_nblks(
ASSERT(ip->i_ino != mp->m_sb.sb_gquotino);
ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
- ASSERT(XFS_IS_QUOTA_RUNNING(ip->i_mount));
ASSERT((flags & ~(XFS_QMOPT_FORCE_RES | XFS_QMOPT_ENOSPC)) ==
XFS_TRANS_DQ_RES_RTBLKS ||
(flags & ~(XFS_QMOPT_FORCE_RES | XFS_QMOPT_ENOSPC)) ==
@@ -840,11 +836,9 @@ xfs_trans_reserve_quota_nblks(
/*
* Reserve nblks against these dquots, with trans as the mediator.
*/
- error = xfs_trans_reserve_quota_bydquots(tp, mp,
- ip->i_udquot, ip->i_gdquot,
- nblks, ninos,
- flags);
- return error;
+ return xfs_trans_reserve_quota_bydquots(tp, mp,
+ ip->i_udquot, ip->i_gdquot,
+ nblks, ninos, flags);
}
/*
@@ -895,25 +889,15 @@ STATIC void
xfs_trans_alloc_dqinfo(
xfs_trans_t *tp)
{
- (tp)->t_dqinfo = kmem_zone_zalloc(xfs_Gqm->qm_dqtrxzone, KM_SLEEP);
+ tp->t_dqinfo = kmem_zone_zalloc(xfs_Gqm->qm_dqtrxzone, KM_SLEEP);
}
-STATIC void
+void
xfs_trans_free_dqinfo(
xfs_trans_t *tp)
{
if (!tp->t_dqinfo)
return;
- kmem_zone_free(xfs_Gqm->qm_dqtrxzone, (tp)->t_dqinfo);
- (tp)->t_dqinfo = NULL;
+ kmem_zone_free(xfs_Gqm->qm_dqtrxzone, tp->t_dqinfo);
+ tp->t_dqinfo = NULL;
}
-
-xfs_dqtrxops_t xfs_trans_dquot_ops = {
- .qo_dup_dqinfo = xfs_trans_dup_dqinfo,
- .qo_free_dqinfo = xfs_trans_free_dqinfo,
- .qo_mod_dquot_byino = xfs_trans_mod_dquot_byino,
- .qo_apply_dquot_deltas = xfs_trans_apply_dquot_deltas,
- .qo_reserve_quota_nblks = xfs_trans_reserve_quota_nblks,
- .qo_reserve_quota_bydquots = xfs_trans_reserve_quota_bydquots,
- .qo_unreserve_and_mod_dquots = xfs_trans_unreserve_and_mod_dquots,
-};
diff --git a/fs/xfs/xfs_acl.c b/fs/xfs/xfs_acl.c
deleted file mode 100644
index a8cdd73999a..00000000000
--- a/fs/xfs/xfs_acl.c
+++ /dev/null
@@ -1,874 +0,0 @@
-/*
- * Copyright (c) 2001-2002,2005 Silicon Graphics, Inc.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
- */
-#include "xfs.h"
-#include "xfs_fs.h"
-#include "xfs_types.h"
-#include "xfs_bit.h"
-#include "xfs_inum.h"
-#include "xfs_ag.h"
-#include "xfs_dir2.h"
-#include "xfs_bmap_btree.h"
-#include "xfs_alloc_btree.h"
-#include "xfs_ialloc_btree.h"
-#include "xfs_dir2_sf.h"
-#include "xfs_attr_sf.h"
-#include "xfs_dinode.h"
-#include "xfs_inode.h"
-#include "xfs_btree.h"
-#include "xfs_acl.h"
-#include "xfs_attr.h"
-#include "xfs_vnodeops.h"
-
-#include <linux/capability.h>
-#include <linux/posix_acl_xattr.h>
-
-STATIC int xfs_acl_setmode(struct inode *, xfs_acl_t *, int *);
-STATIC void xfs_acl_filter_mode(mode_t, xfs_acl_t *);
-STATIC void xfs_acl_get_endian(xfs_acl_t *);
-STATIC int xfs_acl_access(uid_t, gid_t, xfs_acl_t *, mode_t, cred_t *);
-STATIC int xfs_acl_invalid(xfs_acl_t *);
-STATIC void xfs_acl_sync_mode(mode_t, xfs_acl_t *);
-STATIC void xfs_acl_get_attr(struct inode *, xfs_acl_t *, int, int, int *);
-STATIC void xfs_acl_set_attr(struct inode *, xfs_acl_t *, int, int *);
-STATIC int xfs_acl_allow_set(struct inode *, int);
-
-kmem_zone_t *xfs_acl_zone;
-
-
-/*
- * Test for existence of access ACL attribute as efficiently as possible.
- */
-int
-xfs_acl_vhasacl_access(
- struct inode *vp)
-{
- int error;
-
- xfs_acl_get_attr(vp, NULL, _ACL_TYPE_ACCESS, ATTR_KERNOVAL, &error);
- return (error == 0);
-}
-
-/*
- * Test for existence of default ACL attribute as efficiently as possible.
- */
-int
-xfs_acl_vhasacl_default(
- struct inode *vp)
-{
- int error;
-
- if (!S_ISDIR(vp->i_mode))
- return 0;
- xfs_acl_get_attr(vp, NULL, _ACL_TYPE_DEFAULT, ATTR_KERNOVAL, &error);
- return (error == 0);
-}
-
-/*
- * Convert from extended attribute representation to in-memory for XFS.
- */
-STATIC int
-posix_acl_xattr_to_xfs(
- posix_acl_xattr_header *src,
- size_t size,
- xfs_acl_t *dest)
-{
- posix_acl_xattr_entry *src_entry;
- xfs_acl_entry_t *dest_entry;
- int n;
-
- if (!src || !dest)
- return EINVAL;
-
- if (size < sizeof(posix_acl_xattr_header))
- return EINVAL;
-
- if (src->a_version != cpu_to_le32(POSIX_ACL_XATTR_VERSION))
- return EOPNOTSUPP;
-
- memset(dest, 0, sizeof(xfs_acl_t));
- dest->acl_cnt = posix_acl_xattr_count(size);
- if (dest->acl_cnt < 0 || dest->acl_cnt > XFS_ACL_MAX_ENTRIES)
- return EINVAL;
-
- /*
- * acl_set_file(3) may request that we set default ACLs with
- * zero length -- defend (gracefully) against that here.
- */
- if (!dest->acl_cnt)
- return 0;
-
- src_entry = (posix_acl_xattr_entry *)((char *)src + sizeof(*src));
- dest_entry = &dest->acl_entry[0];
-
- for (n = 0; n < dest->acl_cnt; n++, src_entry++, dest_entry++) {
- dest_entry->ae_perm = le16_to_cpu(src_entry->e_perm);
- if (_ACL_PERM_INVALID(dest_entry->ae_perm))
- return EINVAL;
- dest_entry->ae_tag = le16_to_cpu(src_entry->e_tag);
- switch(dest_entry->ae_tag) {
- case ACL_USER:
- case ACL_GROUP:
- dest_entry->ae_id = le32_to_cpu(src_entry->e_id);
- break;
- case ACL_USER_OBJ:
- case ACL_GROUP_OBJ:
- case ACL_MASK:
- case ACL_OTHER:
- dest_entry->ae_id = ACL_UNDEFINED_ID;
- break;
- default:
- return EINVAL;
- }
- }
- if (xfs_acl_invalid(dest))
- return EINVAL;
-
- return 0;
-}
-
-/*
- * Comparison function called from xfs_sort().
- * Primary key is ae_tag, secondary key is ae_id.
- */
-STATIC int
-xfs_acl_entry_compare(
- const void *va,
- const void *vb)
-{
- xfs_acl_entry_t *a = (xfs_acl_entry_t *)va,
- *b = (xfs_acl_entry_t *)vb;
-
- if (a->ae_tag == b->ae_tag)
- return (a->ae_id - b->ae_id);
- return (a->ae_tag - b->ae_tag);
-}
-
-/*
- * Convert from in-memory XFS to extended attribute representation.
- */
-STATIC int
-posix_acl_xfs_to_xattr(
- xfs_acl_t *src,
- posix_acl_xattr_header *dest,
- size_t size)
-{
- int n;
- size_t new_size = posix_acl_xattr_size(src->acl_cnt);
- posix_acl_xattr_entry *dest_entry;
- xfs_acl_entry_t *src_entry;
-
- if (size < new_size)
- return -ERANGE;
-
- /* Need to sort src XFS ACL by <ae_tag,ae_id> */
- xfs_sort(src->acl_entry, src->acl_cnt, sizeof(src->acl_entry[0]),
- xfs_acl_entry_compare);
-
- dest->a_version = cpu_to_le32(POSIX_ACL_XATTR_VERSION);
- dest_entry = &dest->a_entries[0];
- src_entry = &src->acl_entry[0];
- for (n = 0; n < src->acl_cnt; n++, dest_entry++, src_entry++) {
- dest_entry->e_perm = cpu_to_le16(src_entry->ae_perm);
- if (_ACL_PERM_INVALID(src_entry->ae_perm))
- return -EINVAL;
- dest_entry->e_tag = cpu_to_le16(src_entry->ae_tag);
- switch (src_entry->ae_tag) {
- case ACL_USER:
- case ACL_GROUP:
- dest_entry->e_id = cpu_to_le32(src_entry->ae_id);
- break;
- case ACL_USER_OBJ:
- case ACL_GROUP_OBJ:
- case ACL_MASK:
- case ACL_OTHER:
- dest_entry->e_id = cpu_to_le32(ACL_UNDEFINED_ID);
- break;
- default:
- return -EINVAL;
- }
- }
- return new_size;
-}
-
-int
-xfs_acl_vget(
- struct inode *vp,
- void *acl,
- size_t size,
- int kind)
-{
- int error;
- xfs_acl_t *xfs_acl = NULL;
- posix_acl_xattr_header *ext_acl = acl;
- int flags = 0;
-
- if(size) {
- if (!(_ACL_ALLOC(xfs_acl))) {
- error = ENOMEM;
- goto out;
- }
- memset(xfs_acl, 0, sizeof(xfs_acl_t));
- } else
- flags = ATTR_KERNOVAL;
-
- xfs_acl_get_attr(vp, xfs_acl, kind, flags, &error);
- if (error)
- goto out;
-
- if (!size) {
- error = -posix_acl_xattr_size(XFS_ACL_MAX_ENTRIES);
- } else {
- if (xfs_acl_invalid(xfs_acl)) {
- error = EINVAL;
- goto out;
- }
- if (kind == _ACL_TYPE_ACCESS)
- xfs_acl_sync_mode(XFS_I(vp)->i_d.di_mode, xfs_acl);
- error = -posix_acl_xfs_to_xattr(xfs_acl, ext_acl, size);
- }
-out:
- if(xfs_acl)
- _ACL_FREE(xfs_acl);
- return -error;
-}
-
-int
-xfs_acl_vremove(
- struct inode *vp,
- int kind)
-{
- int error;
-
- error = xfs_acl_allow_set(vp, kind);
- if (!error) {
- error = xfs_attr_remove(XFS_I(vp),
- kind == _ACL_TYPE_DEFAULT?
- SGI_ACL_DEFAULT: SGI_ACL_FILE,
- ATTR_ROOT);
- if (error == ENOATTR)
- error = 0; /* 'scool */
- }
- return -error;
-}
-
-int
-xfs_acl_vset(
- struct inode *vp,
- void *acl,
- size_t size,
- int kind)
-{
- posix_acl_xattr_header *ext_acl = acl;
- xfs_acl_t *xfs_acl;
- int error;
- int basicperms = 0; /* more than std unix perms? */
-
- if (!acl)
- return -EINVAL;
-
- if (!(_ACL_ALLOC(xfs_acl)))
- return -ENOMEM;
-
- error = posix_acl_xattr_to_xfs(ext_acl, size, xfs_acl);
- if (error) {
- _ACL_FREE(xfs_acl);
- return -error;
- }
- if (!xfs_acl->acl_cnt) {
- _ACL_FREE(xfs_acl);
- return 0;
- }
-
- error = xfs_acl_allow_set(vp, kind);
-
- /* Incoming ACL exists, set file mode based on its value */
- if (!error && kind == _ACL_TYPE_ACCESS)
- error = xfs_acl_setmode(vp, xfs_acl, &basicperms);
-
- if (error)
- goto out;
-
- /*
- * If we have more than std unix permissions, set up the actual attr.
- * Otherwise, delete any existing attr. This prevents us from
- * having actual attrs for permissions that can be stored in the
- * standard permission bits.
- */
- if (!basicperms) {
- xfs_acl_set_attr(vp, xfs_acl, kind, &error);
- } else {
- error = -xfs_acl_vremove(vp, _ACL_TYPE_ACCESS);
- }
-
-out:
- _ACL_FREE(xfs_acl);
- return -error;
-}
-
-int
-xfs_acl_iaccess(
- xfs_inode_t *ip,
- mode_t mode,
- cred_t *cr)
-{
- xfs_acl_t *acl;
- int rval;
- struct xfs_name acl_name = {SGI_ACL_FILE, SGI_ACL_FILE_SIZE};
-
- if (!(_ACL_ALLOC(acl)))
- return -1;
-
- /* If the file has no ACL return -1. */
- rval = sizeof(xfs_acl_t);
- if (xfs_attr_fetch(ip, &acl_name, (char *)acl, &rval, ATTR_ROOT)) {
- _ACL_FREE(acl);
- return -1;
- }
- xfs_acl_get_endian(acl);
-
- /* If the file has an empty ACL return -1. */
- if (acl->acl_cnt == XFS_ACL_NOT_PRESENT) {
- _ACL_FREE(acl);
- return -1;
- }
-
- /* Synchronize ACL with mode bits */
- xfs_acl_sync_mode(ip->i_d.di_mode, acl);
-
- rval = xfs_acl_access(ip->i_d.di_uid, ip->i_d.di_gid, acl, mode, cr);
- _ACL_FREE(acl);
- return rval;
-}
-
-STATIC int
-xfs_acl_allow_set(
- struct inode *vp,
- int kind)
-{
- if (vp->i_flags & (S_IMMUTABLE|S_APPEND))
- return EPERM;
- if (kind == _ACL_TYPE_DEFAULT && !S_ISDIR(vp->i_mode))
- return ENOTDIR;
- if (vp->i_sb->s_flags & MS_RDONLY)
- return EROFS;
- if (XFS_I(vp)->i_d.di_uid != current_fsuid() && !capable(CAP_FOWNER))
- return EPERM;
- return 0;
-}
-
-/*
- * Note: cr is only used here for the capability check if the ACL test fails.
- * It is not used to find out the credentials uid or groups etc, as was
- * done in IRIX. It is assumed that the uid and groups for the current
- * thread are taken from "current" instead of the cr parameter.
- */
-STATIC int
-xfs_acl_access(
- uid_t fuid,
- gid_t fgid,
- xfs_acl_t *fap,
- mode_t md,
- cred_t *cr)
-{
- xfs_acl_entry_t matched;
- int i, allows;
- int maskallows = -1; /* true, but not 1, either */
- int seen_userobj = 0;
-
- matched.ae_tag = 0; /* Invalid type */
- matched.ae_perm = 0;
-
- for (i = 0; i < fap->acl_cnt; i++) {
- /*
- * Break out if we've got a user_obj entry or
- * a user entry and the mask (and have processed USER_OBJ)
- */
- if (matched.ae_tag == ACL_USER_OBJ)
- break;
- if (matched.ae_tag == ACL_USER) {
- if (maskallows != -1 && seen_userobj)
- break;
- if (fap->acl_entry[i].ae_tag != ACL_MASK &&
- fap->acl_entry[i].ae_tag != ACL_USER_OBJ)
- continue;
- }
- /* True if this entry allows the requested access */
- allows = ((fap->acl_entry[i].ae_perm & md) == md);
-
- switch (fap->acl_entry[i].ae_tag) {
- case ACL_USER_OBJ:
- seen_userobj = 1;
- if (fuid != current_fsuid())
- continue;
- matched.ae_tag = ACL_USER_OBJ;
- matched.ae_perm = allows;
- break;
- case ACL_USER:
- if (fap->acl_entry[i].ae_id != current_fsuid())
- continue;
- matched.ae_tag = ACL_USER;
- matched.ae_perm = allows;
- break;
- case ACL_GROUP_OBJ:
- if ((matched.ae_tag == ACL_GROUP_OBJ ||
- matched.ae_tag == ACL_GROUP) && !allows)
- continue;
- if (!in_group_p(fgid))
- continue;
- matched.ae_tag = ACL_GROUP_OBJ;
- matched.ae_perm = allows;
- break;
- case ACL_GROUP:
- if ((matched.ae_tag == ACL_GROUP_OBJ ||
- matched.ae_tag == ACL_GROUP) && !allows)
- continue;
- if (!in_group_p(fap->acl_entry[i].ae_id))
- continue;
- matched.ae_tag = ACL_GROUP;
- matched.ae_perm = allows;
- break;
- case ACL_MASK:
- maskallows = allows;
- break;
- case ACL_OTHER:
- if (matched.ae_tag != 0)
- continue;
- matched.ae_tag = ACL_OTHER;
- matched.ae_perm = allows;
- break;
- }
- }
- /*
- * First possibility is that no matched entry allows access.
- * The capability to override DAC may exist, so check for it.
- */
- switch (matched.ae_tag) {
- case ACL_OTHER:
- case ACL_USER_OBJ:
- if (matched.ae_perm)
- return 0;
- break;
- case ACL_USER:
- case ACL_GROUP_OBJ:
- case ACL_GROUP:
- if (maskallows && matched.ae_perm)
- return 0;
- break;
- case 0:
- break;
- }
-
- /* EACCES tells generic_permission to check for capability overrides */
- return EACCES;
-}
-
-/*
- * ACL validity checker.
- * This acl validation routine checks each ACL entry read in makes sense.
- */
-STATIC int
-xfs_acl_invalid(
- xfs_acl_t *aclp)
-{
- xfs_acl_entry_t *entry, *e;
- int user = 0, group = 0, other = 0, mask = 0;
- int mask_required = 0;
- int i, j;
-
- if (!aclp)
- goto acl_invalid;
-
- if (aclp->acl_cnt > XFS_ACL_MAX_ENTRIES)
- goto acl_invalid;
-
- for (i = 0; i < aclp->acl_cnt; i++) {
- entry = &aclp->acl_entry[i];
- switch (entry->ae_tag) {
- case ACL_USER_OBJ:
- if (user++)
- goto acl_invalid;
- break;
- case ACL_GROUP_OBJ:
- if (group++)
- goto acl_invalid;
- break;
- case ACL_OTHER:
- if (other++)
- goto acl_invalid;
- break;
- case ACL_USER:
- case ACL_GROUP:
- for (j = i + 1; j < aclp->acl_cnt; j++) {
- e = &aclp->acl_entry[j];
- if (e->ae_id == entry->ae_id &&
- e->ae_tag == entry->ae_tag)
- goto acl_invalid;
- }
- mask_required++;
- break;
- case ACL_MASK:
- if (mask++)
- goto acl_invalid;
- break;
- default:
- goto acl_invalid;
- }
- }
- if (!user || !group || !other || (mask_required && !mask))
- goto acl_invalid;
- else
- return 0;
-acl_invalid:
- return EINVAL;
-}
-
-/*
- * Do ACL endian conversion.
- */
-STATIC void
-xfs_acl_get_endian(
- xfs_acl_t *aclp)
-{
- xfs_acl_entry_t *ace, *end;
-
- INT_SET(aclp->acl_cnt, ARCH_CONVERT, aclp->acl_cnt);
- end = &aclp->acl_entry[0]+aclp->acl_cnt;
- for (ace = &aclp->acl_entry[0]; ace < end; ace++) {
- INT_SET(ace->ae_tag, ARCH_CONVERT, ace->ae_tag);
- INT_SET(ace->ae_id, ARCH_CONVERT, ace->ae_id);
- INT_SET(ace->ae_perm, ARCH_CONVERT, ace->ae_perm);
- }
-}
-
-/*
- * Get the ACL from the EA and do endian conversion.
- */
-STATIC void
-xfs_acl_get_attr(
- struct inode *vp,
- xfs_acl_t *aclp,
- int kind,
- int flags,
- int *error)
-{
- int len = sizeof(xfs_acl_t);
-
- ASSERT((flags & ATTR_KERNOVAL) ? (aclp == NULL) : 1);
- flags |= ATTR_ROOT;
- *error = xfs_attr_get(XFS_I(vp),
- kind == _ACL_TYPE_ACCESS ?
- SGI_ACL_FILE : SGI_ACL_DEFAULT,
- (char *)aclp, &len, flags);
- if (*error || (flags & ATTR_KERNOVAL))
- return;
- xfs_acl_get_endian(aclp);
-}
-
-/*
- * Set the EA with the ACL and do endian conversion.
- */
-STATIC void
-xfs_acl_set_attr(
- struct inode *vp,
- xfs_acl_t *aclp,
- int kind,
- int *error)
-{
- xfs_acl_entry_t *ace, *newace, *end;
- xfs_acl_t *newacl;
- int len;
-
- if (!(_ACL_ALLOC(newacl))) {
- *error = ENOMEM;
- return;
- }
-
- len = sizeof(xfs_acl_t) -
- (sizeof(xfs_acl_entry_t) * (XFS_ACL_MAX_ENTRIES - aclp->acl_cnt));
- end = &aclp->acl_entry[0]+aclp->acl_cnt;
- for (ace = &aclp->acl_entry[0], newace = &newacl->acl_entry[0];
- ace < end;
- ace++, newace++) {
- INT_SET(newace->ae_tag, ARCH_CONVERT, ace->ae_tag);
- INT_SET(newace->ae_id, ARCH_CONVERT, ace->ae_id);
- INT_SET(newace->ae_perm, ARCH_CONVERT, ace->ae_perm);
- }
- INT_SET(newacl->acl_cnt, ARCH_CONVERT, aclp->acl_cnt);
- *error = xfs_attr_set(XFS_I(vp),
- kind == _ACL_TYPE_ACCESS ?
- SGI_ACL_FILE: SGI_ACL_DEFAULT,
- (char *)newacl, len, ATTR_ROOT);
- _ACL_FREE(newacl);
-}
-
-int
-xfs_acl_vtoacl(
- struct inode *vp,
- xfs_acl_t *access_acl,
- xfs_acl_t *default_acl)
-{
- int error = 0;
-
- if (access_acl) {
- /*
- * Get the Access ACL and the mode. If either cannot
- * be obtained for some reason, invalidate the access ACL.
- */
- xfs_acl_get_attr(vp, access_acl, _ACL_TYPE_ACCESS, 0, &error);
- if (error)
- access_acl->acl_cnt = XFS_ACL_NOT_PRESENT;
- else /* We have a good ACL and the file mode, synchronize. */
- xfs_acl_sync_mode(XFS_I(vp)->i_d.di_mode, access_acl);
- }
-
- if (default_acl) {
- xfs_acl_get_attr(vp, default_acl, _ACL_TYPE_DEFAULT, 0, &error);
- if (error)
- default_acl->acl_cnt = XFS_ACL_NOT_PRESENT;
- }
- return error;
-}
-
-/*
- * This function retrieves the parent directory's acl, processes it
- * and lets the child inherit the acl(s) that it should.
- */
-int
-xfs_acl_inherit(
- struct inode *vp,
- mode_t mode,
- xfs_acl_t *pdaclp)
-{
- xfs_acl_t *cacl;
- int error = 0;
- int basicperms = 0;
-
- /*
- * If the parent does not have a default ACL, or it's an
- * invalid ACL, we're done.
- */
- if (!vp)
- return 0;
- if (!pdaclp || xfs_acl_invalid(pdaclp))
- return 0;
-
- /*
- * Copy the default ACL of the containing directory to
- * the access ACL of the new file and use the mode that
- * was passed in to set up the correct initial values for
- * the u::,g::[m::], and o:: entries. This is what makes
- * umask() "work" with ACL's.
- */
-
- if (!(_ACL_ALLOC(cacl)))
- return ENOMEM;
-
- memcpy(cacl, pdaclp, sizeof(xfs_acl_t));
- xfs_acl_filter_mode(mode, cacl);
- error = xfs_acl_setmode(vp, cacl, &basicperms);
- if (error)
- goto out_error;
-
- /*
- * Set the Default and Access ACL on the file. The mode is already
- * set on the file, so we don't need to worry about that.
- *
- * If the new file is a directory, its default ACL is a copy of
- * the containing directory's default ACL.
- */
- if (S_ISDIR(vp->i_mode))
- xfs_acl_set_attr(vp, pdaclp, _ACL_TYPE_DEFAULT, &error);
- if (!error && !basicperms)
- xfs_acl_set_attr(vp, cacl, _ACL_TYPE_ACCESS, &error);
-out_error:
- _ACL_FREE(cacl);
- return error;
-}
-
-/*
- * Set up the correct mode on the file based on the supplied ACL. This
- * makes sure that the mode on the file reflects the state of the
- * u::,g::[m::], and o:: entries in the ACL. Since the mode is where
- * the ACL is going to get the permissions for these entries, we must
- * synchronize the mode whenever we set the ACL on a file.
- */
-STATIC int
-xfs_acl_setmode(
- struct inode *vp,
- xfs_acl_t *acl,
- int *basicperms)
-{
- struct iattr iattr;
- xfs_acl_entry_t *ap;
- xfs_acl_entry_t *gap = NULL;
- int i, nomask = 1;
-
- *basicperms = 1;
-
- if (acl->acl_cnt == XFS_ACL_NOT_PRESENT)
- return 0;
-
- /*
- * Copy the u::, g::, o::, and m:: bits from the ACL into the
- * mode. The m:: bits take precedence over the g:: bits.
- */
- iattr.ia_valid = ATTR_MODE;
- iattr.ia_mode = XFS_I(vp)->i_d.di_mode;
- iattr.ia_mode &= ~(S_IRWXU|S_IRWXG|S_IRWXO);
- ap = acl->acl_entry;
- for (i = 0; i < acl->acl_cnt; ++i) {
- switch (ap->ae_tag) {
- case ACL_USER_OBJ:
- iattr.ia_mode |= ap->ae_perm << 6;
- break;
- case ACL_GROUP_OBJ:
- gap = ap;
- break;
- case ACL_MASK: /* more than just standard modes */
- nomask = 0;
- iattr.ia_mode |= ap->ae_perm << 3;
- *basicperms = 0;
- break;
- case ACL_OTHER:
- iattr.ia_mode |= ap->ae_perm;
- break;
- default: /* more than just standard modes */
- *basicperms = 0;
- break;
- }
- ap++;
- }
-
- /* Set the group bits from ACL_GROUP_OBJ if there's no ACL_MASK */
- if (gap && nomask)
- iattr.ia_mode |= gap->ae_perm << 3;
-
- return xfs_setattr(XFS_I(vp), &iattr, 0);
-}
-
-/*
- * The permissions for the special ACL entries (u::, g::[m::], o::) are
- * actually stored in the file mode (if there is both a group and a mask,
- * the group is stored in the ACL entry and the mask is stored on the file).
- * This allows the mode to remain automatically in sync with the ACL without
- * the need for a call-back to the ACL system at every point where the mode
- * could change. This function takes the permissions from the specified mode
- * and places it in the supplied ACL.
- *
- * This implementation draws its validity from the fact that, when the ACL
- * was assigned, the mode was copied from the ACL.
- * If the mode did not change, therefore, the mode remains exactly what was
- * taken from the special ACL entries at assignment.
- * If a subsequent chmod() was done, the POSIX spec says that the change in
- * mode must cause an update to the ACL seen at user level and used for
- * access checks. Before and after a mode change, therefore, the file mode
- * most accurately reflects what the special ACL entries should permit/deny.
- *
- * CAVEAT: If someone sets the SGI_ACL_FILE attribute directly,
- * the existing mode bits will override whatever is in the
- * ACL. Similarly, if there is a pre-existing ACL that was
- * never in sync with its mode (owing to a bug in 6.5 and
- * before), it will now magically (or mystically) be
- * synchronized. This could cause slight astonishment, but
- * it is better than inconsistent permissions.
- *
- * The supplied ACL is a template that may contain any combination
- * of special entries. These are treated as place holders when we fill
- * out the ACL. This routine does not add or remove special entries, it
- * simply unites each special entry with its associated set of permissions.
- */
-STATIC void
-xfs_acl_sync_mode(
- mode_t mode,
- xfs_acl_t *acl)
-{
- int i, nomask = 1;
- xfs_acl_entry_t *ap;
- xfs_acl_entry_t *gap = NULL;
-
- /*
- * Set ACL entries. POSIX1003.1eD16 requires that the MASK
- * be set instead of the GROUP entry, if there is a MASK.
- */
- for (ap = acl->acl_entry, i = 0; i < acl->acl_cnt; ap++, i++) {
- switch (ap->ae_tag) {
- case ACL_USER_OBJ:
- ap->ae_perm = (mode >> 6) & 0x7;
- break;
- case ACL_GROUP_OBJ:
- gap = ap;
- break;
- case ACL_MASK:
- nomask = 0;
- ap->ae_perm = (mode >> 3) & 0x7;
- break;
- case ACL_OTHER:
- ap->ae_perm = mode & 0x7;
- break;
- default:
- break;
- }
- }
- /* Set the ACL_GROUP_OBJ if there's no ACL_MASK */
- if (gap && nomask)
- gap->ae_perm = (mode >> 3) & 0x7;
-}
-
-/*
- * When inheriting an Access ACL from a directory Default ACL,
- * the ACL bits are set to the intersection of the ACL default
- * permission bits and the file permission bits in mode. If there
- * are no permission bits on the file then we must not give them
- * the ACL. This is what what makes umask() work with ACLs.
- */
-STATIC void
-xfs_acl_filter_mode(
- mode_t mode,
- xfs_acl_t *acl)
-{
- int i, nomask = 1;
- xfs_acl_entry_t *ap;
- xfs_acl_entry_t *gap = NULL;
-
- /*
- * Set ACL entries. POSIX1003.1eD16 requires that the MASK
- * be merged with GROUP entry, if there is a MASK.
- */
- for (ap = acl->acl_entry, i = 0; i < acl->acl_cnt; ap++, i++) {
- switch (ap->ae_tag) {
- case ACL_USER_OBJ:
- ap->ae_perm &= (mode >> 6) & 0x7;
- break;
- case ACL_GROUP_OBJ:
- gap = ap;
- break;
- case ACL_MASK:
- nomask = 0;
- ap->ae_perm &= (mode >> 3) & 0x7;
- break;
- case ACL_OTHER:
- ap->ae_perm &= mode & 0x7;
- break;
- default:
- break;
- }
- }
- /* Set the ACL_GROUP_OBJ if there's no ACL_MASK */
- if (gap && nomask)
- gap->ae_perm &= (mode >> 3) & 0x7;
-}
diff --git a/fs/xfs/xfs_acl.h b/fs/xfs/xfs_acl.h
index 642f1db4def..63dc1f2efad 100644
--- a/fs/xfs/xfs_acl.h
+++ b/fs/xfs/xfs_acl.h
@@ -18,81 +18,48 @@
#ifndef __XFS_ACL_H__
#define __XFS_ACL_H__
-/*
- * Access Control Lists
- */
-typedef __uint16_t xfs_acl_perm_t;
-typedef __int32_t xfs_acl_tag_t;
-typedef __int32_t xfs_acl_id_t;
+struct inode;
+struct posix_acl;
+struct xfs_inode;
#define XFS_ACL_MAX_ENTRIES 25
#define XFS_ACL_NOT_PRESENT (-1)
-typedef struct xfs_acl_entry {
- xfs_acl_tag_t ae_tag;
- xfs_acl_id_t ae_id;
- xfs_acl_perm_t ae_perm;
-} xfs_acl_entry_t;
-
-typedef struct xfs_acl {
- __int32_t acl_cnt;
- xfs_acl_entry_t acl_entry[XFS_ACL_MAX_ENTRIES];
-} xfs_acl_t;
+/* On-disk XFS access control list structure */
+struct xfs_acl {
+ __be32 acl_cnt;
+ struct xfs_acl_entry {
+ __be32 ae_tag;
+ __be32 ae_id;
+ __be16 ae_perm;
+ } acl_entry[XFS_ACL_MAX_ENTRIES];
+};
/* On-disk XFS extended attribute names */
-#define SGI_ACL_FILE "SGI_ACL_FILE"
-#define SGI_ACL_DEFAULT "SGI_ACL_DEFAULT"
+#define SGI_ACL_FILE "SGI_ACL_FILE"
+#define SGI_ACL_DEFAULT "SGI_ACL_DEFAULT"
#define SGI_ACL_FILE_SIZE (sizeof(SGI_ACL_FILE)-1)
#define SGI_ACL_DEFAULT_SIZE (sizeof(SGI_ACL_DEFAULT)-1)
-#define _ACL_TYPE_ACCESS 1
-#define _ACL_TYPE_DEFAULT 2
-
#ifdef CONFIG_XFS_POSIX_ACL
+extern int xfs_check_acl(struct inode *inode, int mask);
+extern struct posix_acl *xfs_get_acl(struct inode *inode, int type);
+extern int xfs_inherit_acl(struct inode *inode, struct posix_acl *default_acl);
+extern int xfs_acl_chmod(struct inode *inode);
+extern void xfs_inode_init_acls(struct xfs_inode *ip);
+extern void xfs_inode_clear_acls(struct xfs_inode *ip);
+extern int posix_acl_access_exists(struct inode *inode);
+extern int posix_acl_default_exists(struct inode *inode);
-struct vattr;
-struct xfs_inode;
-
-extern struct kmem_zone *xfs_acl_zone;
-#define xfs_acl_zone_init(zone, name) \
- (zone) = kmem_zone_init(sizeof(xfs_acl_t), (name))
-#define xfs_acl_zone_destroy(zone) kmem_zone_destroy(zone)
-
-extern int xfs_acl_inherit(struct inode *, mode_t mode, xfs_acl_t *);
-extern int xfs_acl_iaccess(struct xfs_inode *, mode_t, cred_t *);
-extern int xfs_acl_vtoacl(struct inode *, xfs_acl_t *, xfs_acl_t *);
-extern int xfs_acl_vhasacl_access(struct inode *);
-extern int xfs_acl_vhasacl_default(struct inode *);
-extern int xfs_acl_vset(struct inode *, void *, size_t, int);
-extern int xfs_acl_vget(struct inode *, void *, size_t, int);
-extern int xfs_acl_vremove(struct inode *, int);
-
-#define _ACL_PERM_INVALID(perm) ((perm) & ~(ACL_READ|ACL_WRITE|ACL_EXECUTE))
-
-#define _ACL_INHERIT(c,m,d) (xfs_acl_inherit(c,m,d))
-#define _ACL_GET_ACCESS(pv,pa) (xfs_acl_vtoacl(pv,pa,NULL) == 0)
-#define _ACL_GET_DEFAULT(pv,pd) (xfs_acl_vtoacl(pv,NULL,pd) == 0)
-#define _ACL_ACCESS_EXISTS xfs_acl_vhasacl_access
-#define _ACL_DEFAULT_EXISTS xfs_acl_vhasacl_default
-
-#define _ACL_ALLOC(a) ((a) = kmem_zone_alloc(xfs_acl_zone, KM_SLEEP))
-#define _ACL_FREE(a) ((a)? kmem_zone_free(xfs_acl_zone, (a)):(void)0)
-
+extern struct xattr_handler xfs_xattr_system_handler;
#else
-#define xfs_acl_zone_init(zone,name)
-#define xfs_acl_zone_destroy(zone)
-#define xfs_acl_vset(v,p,sz,t) (-EOPNOTSUPP)
-#define xfs_acl_vget(v,p,sz,t) (-EOPNOTSUPP)
-#define xfs_acl_vremove(v,t) (-EOPNOTSUPP)
-#define xfs_acl_vhasacl_access(v) (0)
-#define xfs_acl_vhasacl_default(v) (0)
-#define _ACL_ALLOC(a) (1) /* successfully allocate nothing */
-#define _ACL_FREE(a) ((void)0)
-#define _ACL_INHERIT(c,m,d) (0)
-#define _ACL_GET_ACCESS(pv,pa) (0)
-#define _ACL_GET_DEFAULT(pv,pd) (0)
-#define _ACL_ACCESS_EXISTS (NULL)
-#define _ACL_DEFAULT_EXISTS (NULL)
-#endif
-
+# define xfs_check_acl NULL
+# define xfs_get_acl(inode, type) NULL
+# define xfs_inherit_acl(inode, default_acl) 0
+# define xfs_acl_chmod(inode) 0
+# define xfs_inode_init_acls(ip)
+# define xfs_inode_clear_acls(ip)
+# define posix_acl_access_exists(inode) 0
+# define posix_acl_default_exists(inode) 0
+#endif /* CONFIG_XFS_POSIX_ACL */
#endif /* __XFS_ACL_H__ */
diff --git a/fs/xfs/xfs_ag.h b/fs/xfs/xfs_ag.h
index c8641f713ca..f24b50b68d0 100644
--- a/fs/xfs/xfs_ag.h
+++ b/fs/xfs/xfs_ag.h
@@ -212,6 +212,8 @@ typedef struct xfs_perag
/*
* tags for inode radix tree
*/
+#define XFS_ICI_NO_TAG (-1) /* special flag for an untagged lookup
+ in xfs_inode_ag_iterator */
#define XFS_ICI_RECLAIM_TAG 0 /* inode is to be reclaimed */
#define XFS_AG_MAXLEVELS(mp) ((mp)->m_ag_maxlevels)
diff --git a/fs/xfs/xfs_arch.h b/fs/xfs/xfs_arch.h
index 53d5e70d136..0902249354a 100644
--- a/fs/xfs/xfs_arch.h
+++ b/fs/xfs/xfs_arch.h
@@ -73,28 +73,6 @@ static inline void be64_add_cpu(__be64 *a, __s64 b)
#endif /* __KERNEL__ */
-/* do we need conversion? */
-#define ARCH_NOCONVERT 1
-#ifdef XFS_NATIVE_HOST
-# define ARCH_CONVERT ARCH_NOCONVERT
-#else
-# define ARCH_CONVERT 0
-#endif
-
-/* generic swapping macros */
-
-#ifndef HAVE_SWABMACROS
-#define INT_SWAP16(type,var) ((typeof(type))(__swab16((__u16)(var))))
-#define INT_SWAP32(type,var) ((typeof(type))(__swab32((__u32)(var))))
-#define INT_SWAP64(type,var) ((typeof(type))(__swab64((__u64)(var))))
-#endif
-
-#define INT_SWAP(type, var) \
- ((sizeof(type) == 8) ? INT_SWAP64(type,var) : \
- ((sizeof(type) == 4) ? INT_SWAP32(type,var) : \
- ((sizeof(type) == 2) ? INT_SWAP16(type,var) : \
- (var))))
-
/*
* get and set integers from potentially unaligned locations
*/
@@ -107,16 +85,6 @@ static inline void be64_add_cpu(__be64 *a, __s64 b)
((__u8*)(pointer))[1] = (((value) ) & 0xff); \
}
-/* does not return a value */
-#define INT_SET(reference,arch,valueref) \
- (__builtin_constant_p(valueref) ? \
- (void)( (reference) = ( ((arch) != ARCH_NOCONVERT) ? (INT_SWAP((reference),(valueref))) : (valueref)) ) : \
- (void)( \
- ((reference) = (valueref)), \
- ( ((arch) != ARCH_NOCONVERT) ? (reference) = INT_SWAP((reference),(reference)) : 0 ) \
- ) \
- )
-
/*
* In directories inode numbers are stored as unaligned arrays of unsigned
* 8bit integers on disk.
diff --git a/fs/xfs/xfs_attr.c b/fs/xfs/xfs_attr.c
index 5fde1654b43..db15feb906f 100644
--- a/fs/xfs/xfs_attr.c
+++ b/fs/xfs/xfs_attr.c
@@ -45,7 +45,6 @@
#include "xfs_error.h"
#include "xfs_quota.h"
#include "xfs_trans_space.h"
-#include "xfs_acl.h"
#include "xfs_rw.h"
#include "xfs_vnodeops.h"
@@ -249,8 +248,9 @@ xfs_attr_set_int(xfs_inode_t *dp, struct xfs_name *name,
/*
* Attach the dquots to the inode.
*/
- if ((error = XFS_QM_DQATTACH(mp, dp, 0)))
- return (error);
+ error = xfs_qm_dqattach(dp, 0);
+ if (error)
+ return error;
/*
* If the inode doesn't have an attribute fork, add one.
@@ -311,7 +311,7 @@ xfs_attr_set_int(xfs_inode_t *dp, struct xfs_name *name,
}
xfs_ilock(dp, XFS_ILOCK_EXCL);
- error = XFS_TRANS_RESERVE_QUOTA_NBLKS(mp, args.trans, dp, args.total, 0,
+ error = xfs_trans_reserve_quota_nblks(args.trans, dp, args.total, 0,
rsvd ? XFS_QMOPT_RES_REGBLKS | XFS_QMOPT_FORCE_RES :
XFS_QMOPT_RES_REGBLKS);
if (error) {
@@ -501,8 +501,9 @@ xfs_attr_remove_int(xfs_inode_t *dp, struct xfs_name *name, int flags)
/*
* Attach the dquots to the inode.
*/
- if ((error = XFS_QM_DQATTACH(mp, dp, 0)))
- return (error);
+ error = xfs_qm_dqattach(dp, 0);
+ if (error)
+ return error;
/*
* Start our first transaction of the day.
diff --git a/fs/xfs/xfs_bmap.c b/fs/xfs/xfs_bmap.c
index ca7c6005a48..7928b9983c1 100644
--- a/fs/xfs/xfs_bmap.c
+++ b/fs/xfs/xfs_bmap.c
@@ -2691,7 +2691,7 @@ xfs_bmap_rtalloc(
* Adjust the disk quota also. This was reserved
* earlier.
*/
- XFS_TRANS_MOD_DQUOT_BYINO(mp, ap->tp, ap->ip,
+ xfs_trans_mod_dquot_byino(ap->tp, ap->ip,
ap->wasdel ? XFS_TRANS_DQ_DELRTBCOUNT :
XFS_TRANS_DQ_RTBCOUNT, (long) ralen);
} else {
@@ -2995,7 +2995,7 @@ xfs_bmap_btalloc(
* Adjust the disk quota also. This was reserved
* earlier.
*/
- XFS_TRANS_MOD_DQUOT_BYINO(mp, ap->tp, ap->ip,
+ xfs_trans_mod_dquot_byino(ap->tp, ap->ip,
ap->wasdel ? XFS_TRANS_DQ_DELBCOUNT :
XFS_TRANS_DQ_BCOUNT,
(long) args.len);
@@ -3066,7 +3066,7 @@ xfs_bmap_btree_to_extents(
return error;
xfs_bmap_add_free(cbno, 1, cur->bc_private.b.flist, mp);
ip->i_d.di_nblocks--;
- XFS_TRANS_MOD_DQUOT_BYINO(mp, tp, ip, XFS_TRANS_DQ_BCOUNT, -1L);
+ xfs_trans_mod_dquot_byino(tp, ip, XFS_TRANS_DQ_BCOUNT, -1L);
xfs_trans_binval(tp, cbp);
if (cur->bc_bufs[0] == cbp)
cur->bc_bufs[0] = NULL;
@@ -3386,7 +3386,7 @@ xfs_bmap_del_extent(
* Adjust quota data.
*/
if (qfield)
- XFS_TRANS_MOD_DQUOT_BYINO(mp, tp, ip, qfield, (long)-nblks);
+ xfs_trans_mod_dquot_byino(tp, ip, qfield, (long)-nblks);
/*
* Account for change in delayed indirect blocks.
@@ -3523,7 +3523,7 @@ xfs_bmap_extents_to_btree(
*firstblock = cur->bc_private.b.firstblock = args.fsbno;
cur->bc_private.b.allocated++;
ip->i_d.di_nblocks++;
- XFS_TRANS_MOD_DQUOT_BYINO(mp, tp, ip, XFS_TRANS_DQ_BCOUNT, 1L);
+ xfs_trans_mod_dquot_byino(tp, ip, XFS_TRANS_DQ_BCOUNT, 1L);
abp = xfs_btree_get_bufl(mp, tp, args.fsbno, 0);
/*
* Fill in the child block.
@@ -3690,7 +3690,7 @@ xfs_bmap_local_to_extents(
XFS_BMAP_TRACE_POST_UPDATE("new", ip, 0, whichfork);
XFS_IFORK_NEXT_SET(ip, whichfork, 1);
ip->i_d.di_nblocks = 1;
- XFS_TRANS_MOD_DQUOT_BYINO(args.mp, tp, ip,
+ xfs_trans_mod_dquot_byino(tp, ip,
XFS_TRANS_DQ_BCOUNT, 1L);
flags |= xfs_ilog_fext(whichfork);
} else {
@@ -4048,7 +4048,7 @@ xfs_bmap_add_attrfork(
XFS_TRANS_PERM_LOG_RES, XFS_ADDAFORK_LOG_COUNT)))
goto error0;
xfs_ilock(ip, XFS_ILOCK_EXCL);
- error = XFS_TRANS_RESERVE_QUOTA_NBLKS(mp, tp, ip, blks, 0, rsvd ?
+ error = xfs_trans_reserve_quota_nblks(tp, ip, blks, 0, rsvd ?
XFS_QMOPT_RES_REGBLKS | XFS_QMOPT_FORCE_RES :
XFS_QMOPT_RES_REGBLKS);
if (error) {
@@ -4983,10 +4983,11 @@ xfs_bmapi(
* adjusted later. We return if we haven't
* allocated blocks already inside this loop.
*/
- if ((error = XFS_TRANS_RESERVE_QUOTA_NBLKS(
- mp, NULL, ip, (long)alen, 0,
+ error = xfs_trans_reserve_quota_nblks(
+ NULL, ip, (long)alen, 0,
rt ? XFS_QMOPT_RES_RTBLKS :
- XFS_QMOPT_RES_REGBLKS))) {
+ XFS_QMOPT_RES_REGBLKS);
+ if (error) {
if (n == 0) {
*nmap = 0;
ASSERT(cur == NULL);
@@ -5035,8 +5036,8 @@ xfs_bmapi(
if (XFS_IS_QUOTA_ON(mp))
/* unreserve the blocks now */
(void)
- XFS_TRANS_UNRESERVE_QUOTA_NBLKS(
- mp, NULL, ip,
+ xfs_trans_unreserve_quota_nblks(
+ NULL, ip,
(long)alen, 0, rt ?
XFS_QMOPT_RES_RTBLKS :
XFS_QMOPT_RES_REGBLKS);
@@ -5691,14 +5692,14 @@ xfs_bunmapi(
do_div(rtexts, mp->m_sb.sb_rextsize);
xfs_mod_incore_sb(mp, XFS_SBS_FREXTENTS,
(int64_t)rtexts, rsvd);
- (void)XFS_TRANS_RESERVE_QUOTA_NBLKS(mp,
- NULL, ip, -((long)del.br_blockcount), 0,
+ (void)xfs_trans_reserve_quota_nblks(NULL,
+ ip, -((long)del.br_blockcount), 0,
XFS_QMOPT_RES_RTBLKS);
} else {
xfs_mod_incore_sb(mp, XFS_SBS_FDBLOCKS,
(int64_t)del.br_blockcount, rsvd);
- (void)XFS_TRANS_RESERVE_QUOTA_NBLKS(mp,
- NULL, ip, -((long)del.br_blockcount), 0,
+ (void)xfs_trans_reserve_quota_nblks(NULL,
+ ip, -((long)del.br_blockcount), 0,
XFS_QMOPT_RES_REGBLKS);
}
ip->i_delayed_blks -= del.br_blockcount;
@@ -6085,6 +6086,7 @@ xfs_getbmap(
break;
}
+ kmem_free(out);
return error;
}
diff --git a/fs/xfs/xfs_bmap_btree.c b/fs/xfs/xfs_bmap_btree.c
index 0760d352586..5c1ade06578 100644
--- a/fs/xfs/xfs_bmap_btree.c
+++ b/fs/xfs/xfs_bmap_btree.c
@@ -590,7 +590,7 @@ xfs_bmbt_alloc_block(
cur->bc_private.b.allocated++;
cur->bc_private.b.ip->i_d.di_nblocks++;
xfs_trans_log_inode(args.tp, cur->bc_private.b.ip, XFS_ILOG_CORE);
- XFS_TRANS_MOD_DQUOT_BYINO(args.mp, args.tp, cur->bc_private.b.ip,
+ xfs_trans_mod_dquot_byino(args.tp, cur->bc_private.b.ip,
XFS_TRANS_DQ_BCOUNT, 1L);
new->l = cpu_to_be64(args.fsbno);
@@ -618,7 +618,7 @@ xfs_bmbt_free_block(
ip->i_d.di_nblocks--;
xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
- XFS_TRANS_MOD_DQUOT_BYINO(mp, tp, ip, XFS_TRANS_DQ_BCOUNT, -1L);
+ xfs_trans_mod_dquot_byino(tp, ip, XFS_TRANS_DQ_BCOUNT, -1L);
xfs_trans_binval(tp, bp);
return 0;
}
diff --git a/fs/xfs/xfs_filestream.c b/fs/xfs/xfs_filestream.c
index 6c87c8f304e..edf8bdf4141 100644
--- a/fs/xfs/xfs_filestream.c
+++ b/fs/xfs/xfs_filestream.c
@@ -542,10 +542,8 @@ xfs_filestream_associate(
* waiting for the lock because someone else is waiting on the lock we
* hold and we cannot drop that as we are in a transaction here.
*
- * Lucky for us, this inversion is rarely a problem because it's a
- * directory inode that we are trying to lock here and that means the
- * only place that matters is xfs_sync_inodes() and SYNC_DELWRI is
- * used. i.e. freeze, remount-ro, quotasync or unmount.
+ * Lucky for us, this inversion is not a problem because it's a
+ * directory inode that we are trying to lock here.
*
* So, if we can't get the iolock without sleeping then just give up
*/
diff --git a/fs/xfs/xfs_fs.h b/fs/xfs/xfs_fs.h
index f7c06fac822..c4ea51b55dc 100644
--- a/fs/xfs/xfs_fs.h
+++ b/fs/xfs/xfs_fs.h
@@ -239,10 +239,13 @@ typedef struct xfs_fsop_resblks {
* Minimum and maximum sizes need for growth checks
*/
#define XFS_MIN_AG_BLOCKS 64
-#define XFS_MIN_LOG_BLOCKS 512
-#define XFS_MAX_LOG_BLOCKS (64 * 1024)
-#define XFS_MIN_LOG_BYTES (256 * 1024)
-#define XFS_MAX_LOG_BYTES (128 * 1024 * 1024)
+#define XFS_MIN_LOG_BLOCKS 512ULL
+#define XFS_MAX_LOG_BLOCKS (1024 * 1024ULL)
+#define XFS_MIN_LOG_BYTES (10 * 1024 * 1024ULL)
+
+/* keep the maximum size under 2^31 by a small amount */
+#define XFS_MAX_LOG_BYTES \
+ ((2 * 1024 * 1024 * 1024ULL) - XFS_MIN_LOG_BYTES)
/*
* Structures for XFS_IOC_FSGROWFSDATA, XFS_IOC_FSGROWFSLOG & XFS_IOC_FSGROWFSRT
diff --git a/fs/xfs/xfs_iget.c b/fs/xfs/xfs_iget.c
index 89b81eedce6..76c540f719e 100644
--- a/fs/xfs/xfs_iget.c
+++ b/fs/xfs/xfs_iget.c
@@ -18,6 +18,7 @@
#include "xfs.h"
#include "xfs_fs.h"
#include "xfs_types.h"
+#include "xfs_acl.h"
#include "xfs_bit.h"
#include "xfs_log.h"
#include "xfs_inum.h"
@@ -82,6 +83,7 @@ xfs_inode_alloc(
memset(&ip->i_d, 0, sizeof(xfs_icdinode_t));
ip->i_size = 0;
ip->i_new_size = 0;
+ xfs_inode_init_acls(ip);
/*
* Initialize inode's trace buffers.
@@ -500,10 +502,7 @@ xfs_ireclaim(
* ilock one but will still hold the iolock.
*/
xfs_ilock(ip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL);
- /*
- * Release dquots (and their references) if any.
- */
- XFS_QM_DQDETACH(ip->i_mount, ip);
+ xfs_qm_dqdetach(ip);
xfs_iunlock(ip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL);
switch (ip->i_d.di_mode & S_IFMT) {
@@ -561,6 +560,7 @@ xfs_ireclaim(
ASSERT(atomic_read(&ip->i_pincount) == 0);
ASSERT(!spin_is_locked(&ip->i_flags_lock));
ASSERT(completion_done(&ip->i_flush));
+ xfs_inode_clear_acls(ip);
kmem_zone_free(xfs_inode_zone, ip);
}
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index 123b20c8cbf..1f22d65fed0 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -49,7 +49,6 @@
#include "xfs_utils.h"
#include "xfs_dir2_trace.h"
#include "xfs_quota.h"
-#include "xfs_acl.h"
#include "xfs_filestream.h"
#include "xfs_vnodeops.h"
diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h
index f879c1bc4b9..77016702938 100644
--- a/fs/xfs/xfs_inode.h
+++ b/fs/xfs/xfs_inode.h
@@ -18,6 +18,7 @@
#ifndef __XFS_INODE_H__
#define __XFS_INODE_H__
+struct posix_acl;
struct xfs_dinode;
struct xfs_inode;
@@ -272,6 +273,11 @@ typedef struct xfs_inode {
/* VFS inode */
struct inode i_vnode; /* embedded VFS inode */
+#ifdef CONFIG_XFS_POSIX_ACL
+ struct posix_acl *i_acl;
+ struct posix_acl *i_default_acl;
+#endif
+
/* Trace buffers per inode. */
#ifdef XFS_INODE_TRACE
struct ktrace *i_trace; /* general inode trace */
diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c
index 5aaa2d7ec15..67ae5555a30 100644
--- a/fs/xfs/xfs_iomap.c
+++ b/fs/xfs/xfs_iomap.c
@@ -42,7 +42,6 @@
#include "xfs_error.h"
#include "xfs_itable.h"
#include "xfs_rw.h"
-#include "xfs_acl.h"
#include "xfs_attr.h"
#include "xfs_buf_item.h"
#include "xfs_trans_space.h"
@@ -385,7 +384,7 @@ xfs_iomap_write_direct(
* Make sure that the dquots are there. This doesn't hold
* the ilock across a disk read.
*/
- error = XFS_QM_DQATTACH(ip->i_mount, ip, XFS_QMOPT_ILOCKED);
+ error = xfs_qm_dqattach_locked(ip, 0);
if (error)
return XFS_ERROR(error);
@@ -444,8 +443,7 @@ xfs_iomap_write_direct(
if (error)
goto error_out;
- error = XFS_TRANS_RESERVE_QUOTA_NBLKS(mp, tp, ip,
- qblocks, 0, quota_flag);
+ error = xfs_trans_reserve_quota_nblks(tp, ip, qblocks, 0, quota_flag);
if (error)
goto error1;
@@ -495,7 +493,7 @@ xfs_iomap_write_direct(
error0: /* Cancel bmap, unlock inode, unreserve quota blocks, cancel trans */
xfs_bmap_cancel(&free_list);
- XFS_TRANS_UNRESERVE_QUOTA_NBLKS(mp, tp, ip, qblocks, 0, quota_flag);
+ xfs_trans_unreserve_quota_nblks(tp, ip, qblocks, 0, quota_flag);
error1: /* Just cancel transaction */
xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT);
@@ -582,7 +580,7 @@ xfs_iomap_write_delay(
* Make sure that the dquots are there. This doesn't hold
* the ilock across a disk read.
*/
- error = XFS_QM_DQATTACH(mp, ip, XFS_QMOPT_ILOCKED);
+ error = xfs_qm_dqattach_locked(ip, 0);
if (error)
return XFS_ERROR(error);
@@ -684,7 +682,8 @@ xfs_iomap_write_allocate(
/*
* Make sure that the dquots are there.
*/
- if ((error = XFS_QM_DQATTACH(mp, ip, 0)))
+ error = xfs_qm_dqattach(ip, 0);
+ if (error)
return XFS_ERROR(error);
offset_fsb = XFS_B_TO_FSBT(mp, offset);
diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c
index 7ba450116d4..47da2fb4537 100644
--- a/fs/xfs/xfs_log_recover.c
+++ b/fs/xfs/xfs_log_recover.c
@@ -1975,16 +1975,30 @@ xlog_recover_do_reg_buffer(
error = 0;
if (buf_f->blf_flags &
(XFS_BLI_UDQUOT_BUF|XFS_BLI_PDQUOT_BUF|XFS_BLI_GDQUOT_BUF)) {
+ if (item->ri_buf[i].i_addr == NULL) {
+ cmn_err(CE_ALERT,
+ "XFS: NULL dquot in %s.", __func__);
+ goto next;
+ }
+ if (item->ri_buf[i].i_len < sizeof(xfs_dqblk_t)) {
+ cmn_err(CE_ALERT,
+ "XFS: dquot too small (%d) in %s.",
+ item->ri_buf[i].i_len, __func__);
+ goto next;
+ }
error = xfs_qm_dqcheck((xfs_disk_dquot_t *)
item->ri_buf[i].i_addr,
-1, 0, XFS_QMOPT_DOWARN,
"dquot_buf_recover");
+ if (error)
+ goto next;
}
- if (!error)
- memcpy(xfs_buf_offset(bp,
- (uint)bit << XFS_BLI_SHIFT), /* dest */
- item->ri_buf[i].i_addr, /* source */
- nbits<<XFS_BLI_SHIFT); /* length */
+
+ memcpy(xfs_buf_offset(bp,
+ (uint)bit << XFS_BLI_SHIFT), /* dest */
+ item->ri_buf[i].i_addr, /* source */
+ nbits<<XFS_BLI_SHIFT); /* length */
+ next:
i++;
bit += nbits;
}
@@ -2615,7 +2629,19 @@ xlog_recover_do_dquot_trans(
return (0);
recddq = (xfs_disk_dquot_t *)item->ri_buf[1].i_addr;
- ASSERT(recddq);
+
+ if (item->ri_buf[1].i_addr == NULL) {
+ cmn_err(CE_ALERT,
+ "XFS: NULL dquot in %s.", __func__);
+ return XFS_ERROR(EIO);
+ }
+ if (item->ri_buf[1].i_len < sizeof(xfs_dqblk_t)) {
+ cmn_err(CE_ALERT,
+ "XFS: dquot too small (%d) in %s.",
+ item->ri_buf[1].i_len, __func__);
+ return XFS_ERROR(EIO);
+ }
+
/*
* This type of quotas was turned off, so ignore this record.
*/
diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c
index 65a99725d0c..5c6f092659c 100644
--- a/fs/xfs/xfs_mount.c
+++ b/fs/xfs/xfs_mount.c
@@ -960,6 +960,53 @@ xfs_check_sizes(xfs_mount_t *mp)
}
/*
+ * Clear the quotaflags in memory and in the superblock.
+ */
+int
+xfs_mount_reset_sbqflags(
+ struct xfs_mount *mp)
+{
+ int error;
+ struct xfs_trans *tp;
+
+ mp->m_qflags = 0;
+
+ /*
+ * It is OK to look at sb_qflags here in mount path,
+ * without m_sb_lock.
+ */
+ if (mp->m_sb.sb_qflags == 0)
+ return 0;
+ spin_lock(&mp->m_sb_lock);
+ mp->m_sb.sb_qflags = 0;
+ spin_unlock(&mp->m_sb_lock);
+
+ /*
+ * If the fs is readonly, let the incore superblock run
+ * with quotas off but don't flush the update out to disk
+ */
+ if (mp->m_flags & XFS_MOUNT_RDONLY)
+ return 0;
+
+#ifdef QUOTADEBUG
+ xfs_fs_cmn_err(CE_NOTE, mp, "Writing superblock quota changes");
+#endif
+
+ tp = xfs_trans_alloc(mp, XFS_TRANS_QM_SBCHANGE);
+ error = xfs_trans_reserve(tp, 0, mp->m_sb.sb_sectsize + 128, 0, 0,
+ XFS_DEFAULT_LOG_COUNT);
+ if (error) {
+ xfs_trans_cancel(tp, 0);
+ xfs_fs_cmn_err(CE_ALERT, mp,
+ "xfs_mount_reset_sbqflags: Superblock update failed!");
+ return error;
+ }
+
+ xfs_mod_sb(tp, XFS_SB_QFLAGS);
+ return xfs_trans_commit(tp, 0);
+}
+
+/*
* This function does the following on an initial mount of a file system:
* - reads the superblock from disk and init the mount struct
* - if we're a 32-bit kernel, do a size check on the superblock
@@ -976,7 +1023,8 @@ xfs_mountfs(
xfs_sb_t *sbp = &(mp->m_sb);
xfs_inode_t *rip;
__uint64_t resblks;
- uint quotamount, quotaflags;
+ uint quotamount = 0;
+ uint quotaflags = 0;
int error = 0;
xfs_mount_common(mp, sbp);
@@ -1210,9 +1258,28 @@ xfs_mountfs(
/*
* Initialise the XFS quota management subsystem for this mount
*/
- error = XFS_QM_INIT(mp, &quotamount, &quotaflags);
- if (error)
- goto out_rtunmount;
+ if (XFS_IS_QUOTA_RUNNING(mp)) {
+ error = xfs_qm_newmount(mp, &quotamount, &quotaflags);
+ if (error)
+ goto out_rtunmount;
+ } else {
+ ASSERT(!XFS_IS_QUOTA_ON(mp));
+
+ /*
+ * If a file system had quotas running earlier, but decided to
+ * mount without -o uquota/pquota/gquota options, revoke the
+ * quotachecked license.
+ */
+ if (mp->m_sb.sb_qflags & XFS_ALL_QUOTA_ACCT) {
+ cmn_err(CE_NOTE,
+ "XFS: resetting qflags for filesystem %s",
+ mp->m_fsname);
+
+ error = xfs_mount_reset_sbqflags(mp);
+ if (error)
+ return error;
+ }
+ }
/*
* Finish recovering the file system. This part needed to be
@@ -1228,9 +1295,19 @@ xfs_mountfs(
/*
* Complete the quota initialisation, post-log-replay component.
*/
- error = XFS_QM_MOUNT(mp, quotamount, quotaflags);
- if (error)
- goto out_rtunmount;
+ if (quotamount) {
+ ASSERT(mp->m_qflags == 0);
+ mp->m_qflags = quotaflags;
+
+ xfs_qm_mount_quotas(mp);
+ }
+
+#if defined(DEBUG) && defined(XFS_LOUD_RECOVERY)
+ if (XFS_IS_QUOTA_ON(mp))
+ xfs_fs_cmn_err(CE_NOTE, mp, "Disk quotas turned on");
+ else
+ xfs_fs_cmn_err(CE_NOTE, mp, "Disk quotas not turned on");
+#endif
/*
* Now we are mounted, reserve a small amount of unused space for
@@ -1279,12 +1356,7 @@ xfs_unmountfs(
__uint64_t resblks;
int error;
- /*
- * Release dquot that rootinode, rbmino and rsumino might be holding,
- * and release the quota inodes.
- */
- XFS_QM_UNMOUNT(mp);
-
+ xfs_qm_unmount_quotas(mp);
xfs_rtunmount_inodes(mp);
IRELE(mp->m_rootip);
@@ -1299,12 +1371,9 @@ xfs_unmountfs(
* need to force the log first.
*/
xfs_log_force(mp, (xfs_lsn_t)0, XFS_LOG_FORCE | XFS_LOG_SYNC);
- xfs_reclaim_inodes(mp, 0, XFS_IFLUSH_ASYNC);
-
- XFS_QM_DQPURGEALL(mp, XFS_QMOPT_QUOTALL | XFS_QMOPT_UMOUNTING);
+ xfs_reclaim_inodes(mp, XFS_IFLUSH_ASYNC);
- if (mp->m_quotainfo)
- XFS_QM_DONE(mp);
+ xfs_qm_unmount(mp);
/*
* Flush out the log synchronously so that we know for sure
diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h
index d6a64392f98..a5122382afd 100644
--- a/fs/xfs/xfs_mount.h
+++ b/fs/xfs/xfs_mount.h
@@ -64,6 +64,8 @@ struct xfs_swapext;
struct xfs_mru_cache;
struct xfs_nameops;
struct xfs_ail;
+struct xfs_quotainfo;
+
/*
* Prototypes and functions for the Data Migration subsystem.
@@ -107,86 +109,6 @@ typedef struct xfs_dmops {
(*(mp)->m_dm_ops->xfs_send_unmount)(mp,ip,right,mode,rval,fl)
-/*
- * Prototypes and functions for the Quota Management subsystem.
- */
-
-struct xfs_dquot;
-struct xfs_dqtrxops;
-struct xfs_quotainfo;
-
-typedef int (*xfs_qminit_t)(struct xfs_mount *, uint *, uint *);
-typedef int (*xfs_qmmount_t)(struct xfs_mount *, uint, uint);
-typedef void (*xfs_qmunmount_t)(struct xfs_mount *);
-typedef void (*xfs_qmdone_t)(struct xfs_mount *);
-typedef void (*xfs_dqrele_t)(struct xfs_dquot *);
-typedef int (*xfs_dqattach_t)(struct xfs_inode *, uint);
-typedef void (*xfs_dqdetach_t)(struct xfs_inode *);
-typedef int (*xfs_dqpurgeall_t)(struct xfs_mount *, uint);
-typedef int (*xfs_dqvopalloc_t)(struct xfs_mount *,
- struct xfs_inode *, uid_t, gid_t, prid_t, uint,
- struct xfs_dquot **, struct xfs_dquot **);
-typedef void (*xfs_dqvopcreate_t)(struct xfs_trans *, struct xfs_inode *,
- struct xfs_dquot *, struct xfs_dquot *);
-typedef int (*xfs_dqvoprename_t)(struct xfs_inode **);
-typedef struct xfs_dquot * (*xfs_dqvopchown_t)(
- struct xfs_trans *, struct xfs_inode *,
- struct xfs_dquot **, struct xfs_dquot *);
-typedef int (*xfs_dqvopchownresv_t)(struct xfs_trans *, struct xfs_inode *,
- struct xfs_dquot *, struct xfs_dquot *, uint);
-typedef void (*xfs_dqstatvfs_t)(struct xfs_inode *, struct kstatfs *);
-typedef int (*xfs_dqsync_t)(struct xfs_mount *, int flags);
-
-typedef struct xfs_qmops {
- xfs_qminit_t xfs_qminit;
- xfs_qmdone_t xfs_qmdone;
- xfs_qmmount_t xfs_qmmount;
- xfs_qmunmount_t xfs_qmunmount;
- xfs_dqrele_t xfs_dqrele;
- xfs_dqattach_t xfs_dqattach;
- xfs_dqdetach_t xfs_dqdetach;
- xfs_dqpurgeall_t xfs_dqpurgeall;
- xfs_dqvopalloc_t xfs_dqvopalloc;
- xfs_dqvopcreate_t xfs_dqvopcreate;
- xfs_dqvoprename_t xfs_dqvoprename;
- xfs_dqvopchown_t xfs_dqvopchown;
- xfs_dqvopchownresv_t xfs_dqvopchownresv;
- xfs_dqstatvfs_t xfs_dqstatvfs;
- xfs_dqsync_t xfs_dqsync;
- struct xfs_dqtrxops *xfs_dqtrxops;
-} xfs_qmops_t;
-
-#define XFS_QM_INIT(mp, mnt, fl) \
- (*(mp)->m_qm_ops->xfs_qminit)(mp, mnt, fl)
-#define XFS_QM_MOUNT(mp, mnt, fl) \
- (*(mp)->m_qm_ops->xfs_qmmount)(mp, mnt, fl)
-#define XFS_QM_UNMOUNT(mp) \
- (*(mp)->m_qm_ops->xfs_qmunmount)(mp)
-#define XFS_QM_DONE(mp) \
- (*(mp)->m_qm_ops->xfs_qmdone)(mp)
-#define XFS_QM_DQRELE(mp, dq) \
- (*(mp)->m_qm_ops->xfs_dqrele)(dq)
-#define XFS_QM_DQATTACH(mp, ip, fl) \
- (*(mp)->m_qm_ops->xfs_dqattach)(ip, fl)
-#define XFS_QM_DQDETACH(mp, ip) \
- (*(mp)->m_qm_ops->xfs_dqdetach)(ip)
-#define XFS_QM_DQPURGEALL(mp, fl) \
- (*(mp)->m_qm_ops->xfs_dqpurgeall)(mp, fl)
-#define XFS_QM_DQVOPALLOC(mp, ip, uid, gid, prid, fl, dq1, dq2) \
- (*(mp)->m_qm_ops->xfs_dqvopalloc)(mp, ip, uid, gid, prid, fl, dq1, dq2)
-#define XFS_QM_DQVOPCREATE(mp, tp, ip, dq1, dq2) \
- (*(mp)->m_qm_ops->xfs_dqvopcreate)(tp, ip, dq1, dq2)
-#define XFS_QM_DQVOPRENAME(mp, ip) \
- (*(mp)->m_qm_ops->xfs_dqvoprename)(ip)
-#define XFS_QM_DQVOPCHOWN(mp, tp, ip, dqp, dq) \
- (*(mp)->m_qm_ops->xfs_dqvopchown)(tp, ip, dqp, dq)
-#define XFS_QM_DQVOPCHOWNRESV(mp, tp, ip, dq1, dq2, fl) \
- (*(mp)->m_qm_ops->xfs_dqvopchownresv)(tp, ip, dq1, dq2, fl)
-#define XFS_QM_DQSTATVFS(ip, statp) \
- (*(ip)->i_mount->m_qm_ops->xfs_dqstatvfs)(ip, statp)
-#define XFS_QM_DQSYNC(mp, flags) \
- (*(mp)->m_qm_ops->xfs_dqsync)(mp, flags)
-
#ifdef HAVE_PERCPU_SB
/*
@@ -510,8 +432,6 @@ extern int xfs_sb_validate_fsb_count(struct xfs_sb *, __uint64_t);
extern int xfs_dmops_get(struct xfs_mount *);
extern void xfs_dmops_put(struct xfs_mount *);
-extern int xfs_qmops_get(struct xfs_mount *);
-extern void xfs_qmops_put(struct xfs_mount *);
extern struct xfs_dmops xfs_dmcore_xfs;
diff --git a/fs/xfs/xfs_qmops.c b/fs/xfs/xfs_qmops.c
deleted file mode 100644
index e101790ea8e..00000000000
--- a/fs/xfs/xfs_qmops.c
+++ /dev/null
@@ -1,152 +0,0 @@
-/*
- * Copyright (c) 2000-2005 Silicon Graphics, Inc.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
- */
-#include "xfs.h"
-#include "xfs_fs.h"
-#include "xfs_types.h"
-#include "xfs_log.h"
-#include "xfs_inum.h"
-#include "xfs_trans.h"
-#include "xfs_sb.h"
-#include "xfs_ag.h"
-#include "xfs_dir2.h"
-#include "xfs_dmapi.h"
-#include "xfs_mount.h"
-#include "xfs_quota.h"
-#include "xfs_error.h"
-
-
-STATIC struct xfs_dquot *
-xfs_dqvopchown_default(
- struct xfs_trans *tp,
- struct xfs_inode *ip,
- struct xfs_dquot **dqp,
- struct xfs_dquot *dq)
-{
- return NULL;
-}
-
-/*
- * Clear the quotaflags in memory and in the superblock.
- */
-int
-xfs_mount_reset_sbqflags(xfs_mount_t *mp)
-{
- int error;
- xfs_trans_t *tp;
-
- mp->m_qflags = 0;
- /*
- * It is OK to look at sb_qflags here in mount path,
- * without m_sb_lock.
- */
- if (mp->m_sb.sb_qflags == 0)
- return 0;
- spin_lock(&mp->m_sb_lock);
- mp->m_sb.sb_qflags = 0;
- spin_unlock(&mp->m_sb_lock);
-
- /*
- * if the fs is readonly, let the incore superblock run
- * with quotas off but don't flush the update out to disk
- */
- if (mp->m_flags & XFS_MOUNT_RDONLY)
- return 0;
-#ifdef QUOTADEBUG
- xfs_fs_cmn_err(CE_NOTE, mp, "Writing superblock quota changes");
-#endif
- tp = xfs_trans_alloc(mp, XFS_TRANS_QM_SBCHANGE);
- if ((error = xfs_trans_reserve(tp, 0, mp->m_sb.sb_sectsize + 128, 0, 0,
- XFS_DEFAULT_LOG_COUNT))) {
- xfs_trans_cancel(tp, 0);
- xfs_fs_cmn_err(CE_ALERT, mp,
- "xfs_mount_reset_sbqflags: Superblock update failed!");
- return error;
- }
- xfs_mod_sb(tp, XFS_SB_QFLAGS);
- error = xfs_trans_commit(tp, 0);
- return error;
-}
-
-STATIC int
-xfs_noquota_init(
- xfs_mount_t *mp,
- uint *needquotamount,
- uint *quotaflags)
-{
- int error = 0;
-
- *quotaflags = 0;
- *needquotamount = B_FALSE;
-
- ASSERT(!XFS_IS_QUOTA_ON(mp));
-
- /*
- * If a file system had quotas running earlier, but decided to
- * mount without -o uquota/pquota/gquota options, revoke the
- * quotachecked license.
- */
- if (mp->m_sb.sb_qflags & XFS_ALL_QUOTA_ACCT) {
- cmn_err(CE_NOTE,
- "XFS resetting qflags for filesystem %s",
- mp->m_fsname);
-
- error = xfs_mount_reset_sbqflags(mp);
- }
- return error;
-}
-
-static struct xfs_qmops xfs_qmcore_stub = {
- .xfs_qminit = (xfs_qminit_t) xfs_noquota_init,
- .xfs_qmdone = (xfs_qmdone_t) fs_noerr,
- .xfs_qmmount = (xfs_qmmount_t) fs_noerr,
- .xfs_qmunmount = (xfs_qmunmount_t) fs_noerr,
- .xfs_dqrele = (xfs_dqrele_t) fs_noerr,
- .xfs_dqattach = (xfs_dqattach_t) fs_noerr,
- .xfs_dqdetach = (xfs_dqdetach_t) fs_noerr,
- .xfs_dqpurgeall = (xfs_dqpurgeall_t) fs_noerr,
- .xfs_dqvopalloc = (xfs_dqvopalloc_t) fs_noerr,
- .xfs_dqvopcreate = (xfs_dqvopcreate_t) fs_noerr,
- .xfs_dqvoprename = (xfs_dqvoprename_t) fs_noerr,
- .xfs_dqvopchown = xfs_dqvopchown_default,
- .xfs_dqvopchownresv = (xfs_dqvopchownresv_t) fs_noerr,
- .xfs_dqstatvfs = (xfs_dqstatvfs_t) fs_noval,
- .xfs_dqsync = (xfs_dqsync_t) fs_noerr,
-};
-
-int
-xfs_qmops_get(struct xfs_mount *mp)
-{
- if (XFS_IS_QUOTA_RUNNING(mp)) {
-#ifdef CONFIG_XFS_QUOTA
- mp->m_qm_ops = &xfs_qmcore_xfs;
-#else
- cmn_err(CE_WARN,
- "XFS: qouta support not available in this kernel.");
- return EINVAL;
-#endif
- } else {
- mp->m_qm_ops = &xfs_qmcore_stub;
- }
-
- return 0;
-}
-
-void
-xfs_qmops_put(struct xfs_mount *mp)
-{
-}
diff --git a/fs/xfs/xfs_quota.h b/fs/xfs/xfs_quota.h
index f5d1202dde2..3ec91ac74c2 100644
--- a/fs/xfs/xfs_quota.h
+++ b/fs/xfs/xfs_quota.h
@@ -197,7 +197,6 @@ typedef struct xfs_qoff_logformat {
#define XFS_QMOPT_UMOUNTING 0x0000100 /* filesys is being unmounted */
#define XFS_QMOPT_DOLOG 0x0000200 /* log buf changes (in quotacheck) */
#define XFS_QMOPT_DOWARN 0x0000400 /* increase warning cnt if needed */
-#define XFS_QMOPT_ILOCKED 0x0000800 /* inode is already locked (excl) */
#define XFS_QMOPT_DQREPAIR 0x0001000 /* repair dquot if damaged */
#define XFS_QMOPT_GQUOTA 0x0002000 /* group dquot requested */
#define XFS_QMOPT_ENOSPC 0x0004000 /* enospc instead of edquot (prj) */
@@ -302,69 +301,79 @@ typedef struct xfs_dqtrx {
long qt_delrtb_delta; /* delayed RT blk count changes */
} xfs_dqtrx_t;
-/*
- * Dquot transaction functions, used if quota is enabled.
- */
-typedef void (*qo_dup_dqinfo_t)(struct xfs_trans *, struct xfs_trans *);
-typedef void (*qo_mod_dquot_byino_t)(struct xfs_trans *,
- struct xfs_inode *, uint, long);
-typedef void (*qo_free_dqinfo_t)(struct xfs_trans *);
-typedef void (*qo_apply_dquot_deltas_t)(struct xfs_trans *);
-typedef void (*qo_unreserve_and_mod_dquots_t)(struct xfs_trans *);
-typedef int (*qo_reserve_quota_nblks_t)(
- struct xfs_trans *, struct xfs_mount *,
- struct xfs_inode *, long, long, uint);
-typedef int (*qo_reserve_quota_bydquots_t)(
- struct xfs_trans *, struct xfs_mount *,
- struct xfs_dquot *, struct xfs_dquot *,
- long, long, uint);
-typedef struct xfs_dqtrxops {
- qo_dup_dqinfo_t qo_dup_dqinfo;
- qo_free_dqinfo_t qo_free_dqinfo;
- qo_mod_dquot_byino_t qo_mod_dquot_byino;
- qo_apply_dquot_deltas_t qo_apply_dquot_deltas;
- qo_reserve_quota_nblks_t qo_reserve_quota_nblks;
- qo_reserve_quota_bydquots_t qo_reserve_quota_bydquots;
- qo_unreserve_and_mod_dquots_t qo_unreserve_and_mod_dquots;
-} xfs_dqtrxops_t;
-
-#define XFS_DQTRXOP(mp, tp, op, args...) \
- ((mp)->m_qm_ops->xfs_dqtrxops ? \
- ((mp)->m_qm_ops->xfs_dqtrxops->op)(tp, ## args) : 0)
-
-#define XFS_DQTRXOP_VOID(mp, tp, op, args...) \
- ((mp)->m_qm_ops->xfs_dqtrxops ? \
- ((mp)->m_qm_ops->xfs_dqtrxops->op)(tp, ## args) : (void)0)
-
-#define XFS_TRANS_DUP_DQINFO(mp, otp, ntp) \
- XFS_DQTRXOP_VOID(mp, otp, qo_dup_dqinfo, ntp)
-#define XFS_TRANS_FREE_DQINFO(mp, tp) \
- XFS_DQTRXOP_VOID(mp, tp, qo_free_dqinfo)
-#define XFS_TRANS_MOD_DQUOT_BYINO(mp, tp, ip, field, delta) \
- XFS_DQTRXOP_VOID(mp, tp, qo_mod_dquot_byino, ip, field, delta)
-#define XFS_TRANS_APPLY_DQUOT_DELTAS(mp, tp) \
- XFS_DQTRXOP_VOID(mp, tp, qo_apply_dquot_deltas)
-#define XFS_TRANS_RESERVE_QUOTA_NBLKS(mp, tp, ip, nblks, ninos, fl) \
- XFS_DQTRXOP(mp, tp, qo_reserve_quota_nblks, mp, ip, nblks, ninos, fl)
-#define XFS_TRANS_RESERVE_QUOTA_BYDQUOTS(mp, tp, ud, gd, nb, ni, fl) \
- XFS_DQTRXOP(mp, tp, qo_reserve_quota_bydquots, mp, ud, gd, nb, ni, fl)
-#define XFS_TRANS_UNRESERVE_AND_MOD_DQUOTS(mp, tp) \
- XFS_DQTRXOP_VOID(mp, tp, qo_unreserve_and_mod_dquots)
-
-#define XFS_TRANS_UNRESERVE_QUOTA_NBLKS(mp, tp, ip, nblks, ninos, flags) \
- XFS_TRANS_RESERVE_QUOTA_NBLKS(mp, tp, ip, -(nblks), -(ninos), flags)
-#define XFS_TRANS_RESERVE_QUOTA(mp, tp, ud, gd, nb, ni, f) \
- XFS_TRANS_RESERVE_QUOTA_BYDQUOTS(mp, tp, ud, gd, nb, ni, \
- f | XFS_QMOPT_RES_REGBLKS)
-#define XFS_TRANS_UNRESERVE_QUOTA(mp, tp, ud, gd, nb, ni, f) \
- XFS_TRANS_RESERVE_QUOTA_BYDQUOTS(mp, tp, ud, gd, -(nb), -(ni), \
+#ifdef CONFIG_XFS_QUOTA
+extern void xfs_trans_dup_dqinfo(struct xfs_trans *, struct xfs_trans *);
+extern void xfs_trans_free_dqinfo(struct xfs_trans *);
+extern void xfs_trans_mod_dquot_byino(struct xfs_trans *, struct xfs_inode *,
+ uint, long);
+extern void xfs_trans_apply_dquot_deltas(struct xfs_trans *);
+extern void xfs_trans_unreserve_and_mod_dquots(struct xfs_trans *);
+extern int xfs_trans_reserve_quota_nblks(struct xfs_trans *,
+ struct xfs_inode *, long, long, uint);
+extern int xfs_trans_reserve_quota_bydquots(struct xfs_trans *,
+ struct xfs_mount *, struct xfs_dquot *,
+ struct xfs_dquot *, long, long, uint);
+
+extern int xfs_qm_vop_dqalloc(struct xfs_inode *, uid_t, gid_t, prid_t, uint,
+ struct xfs_dquot **, struct xfs_dquot **);
+extern void xfs_qm_vop_create_dqattach(struct xfs_trans *, struct xfs_inode *,
+ struct xfs_dquot *, struct xfs_dquot *);
+extern int xfs_qm_vop_rename_dqattach(struct xfs_inode **);
+extern struct xfs_dquot *xfs_qm_vop_chown(struct xfs_trans *,
+ struct xfs_inode *, struct xfs_dquot **, struct xfs_dquot *);
+extern int xfs_qm_vop_chown_reserve(struct xfs_trans *, struct xfs_inode *,
+ struct xfs_dquot *, struct xfs_dquot *, uint);
+extern int xfs_qm_dqattach(struct xfs_inode *, uint);
+extern int xfs_qm_dqattach_locked(struct xfs_inode *, uint);
+extern void xfs_qm_dqdetach(struct xfs_inode *);
+extern void xfs_qm_dqrele(struct xfs_dquot *);
+extern void xfs_qm_statvfs(struct xfs_inode *, struct kstatfs *);
+extern int xfs_qm_sync(struct xfs_mount *, int);
+extern int xfs_qm_newmount(struct xfs_mount *, uint *, uint *);
+extern void xfs_qm_mount_quotas(struct xfs_mount *);
+extern void xfs_qm_unmount(struct xfs_mount *);
+extern void xfs_qm_unmount_quotas(struct xfs_mount *);
+
+#else
+static inline int
+xfs_qm_vop_dqalloc(struct xfs_inode *ip, uid_t uid, gid_t gid, prid_t prid,
+ uint flags, struct xfs_dquot **udqp, struct xfs_dquot **gdqp)
+{
+ *udqp = NULL;
+ *gdqp = NULL;
+ return 0;
+}
+#define xfs_trans_dup_dqinfo(tp, tp2)
+#define xfs_trans_free_dqinfo(tp)
+#define xfs_trans_mod_dquot_byino(tp, ip, fields, delta)
+#define xfs_trans_apply_dquot_deltas(tp)
+#define xfs_trans_unreserve_and_mod_dquots(tp)
+#define xfs_trans_reserve_quota_nblks(tp, ip, nblks, ninos, flags) (0)
+#define xfs_trans_reserve_quota_bydquots(tp, mp, u, g, nb, ni, fl) (0)
+#define xfs_qm_vop_create_dqattach(tp, ip, u, g)
+#define xfs_qm_vop_rename_dqattach(it) (0)
+#define xfs_qm_vop_chown(tp, ip, old, new) (NULL)
+#define xfs_qm_vop_chown_reserve(tp, ip, u, g, fl) (0)
+#define xfs_qm_dqattach(ip, fl) (0)
+#define xfs_qm_dqattach_locked(ip, fl) (0)
+#define xfs_qm_dqdetach(ip)
+#define xfs_qm_dqrele(d)
+#define xfs_qm_statvfs(ip, s)
+#define xfs_qm_sync(mp, fl) (0)
+#define xfs_qm_newmount(mp, a, b) (0)
+#define xfs_qm_mount_quotas(mp)
+#define xfs_qm_unmount(mp)
+#define xfs_qm_unmount_quotas(mp) (0)
+#endif /* CONFIG_XFS_QUOTA */
+
+#define xfs_trans_unreserve_quota_nblks(tp, ip, nblks, ninos, flags) \
+ xfs_trans_reserve_quota_nblks(tp, ip, -(nblks), -(ninos), flags)
+#define xfs_trans_reserve_quota(tp, mp, ud, gd, nb, ni, f) \
+ xfs_trans_reserve_quota_bydquots(tp, mp, ud, gd, nb, ni, \
f | XFS_QMOPT_RES_REGBLKS)
extern int xfs_qm_dqcheck(xfs_disk_dquot_t *, xfs_dqid_t, uint, uint, char *);
extern int xfs_mount_reset_sbqflags(struct xfs_mount *);
-extern struct xfs_qmops xfs_qmcore_xfs;
-
#endif /* __KERNEL__ */
-
#endif /* __XFS_QUOTA_H__ */
diff --git a/fs/xfs/xfs_rename.c b/fs/xfs/xfs_rename.c
index 58f85e9cd11..b81deea0ce1 100644
--- a/fs/xfs/xfs_rename.c
+++ b/fs/xfs/xfs_rename.c
@@ -166,7 +166,8 @@ xfs_rename(
/*
* Attach the dquots to the inodes
*/
- if ((error = XFS_QM_DQVOPRENAME(mp, inodes))) {
+ error = xfs_qm_vop_rename_dqattach(inodes);
+ if (error) {
xfs_trans_cancel(tp, cancel_flags);
goto std_return;
}
diff --git a/fs/xfs/xfs_rw.c b/fs/xfs/xfs_rw.c
index 36f3a21c54d..fea68615ed2 100644
--- a/fs/xfs/xfs_rw.c
+++ b/fs/xfs/xfs_rw.c
@@ -41,7 +41,6 @@
#include "xfs_ialloc.h"
#include "xfs_attr.h"
#include "xfs_bmap.h"
-#include "xfs_acl.h"
#include "xfs_error.h"
#include "xfs_buf_item.h"
#include "xfs_rw.h"
diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c
index bcc39d358ad..66b849358e6 100644
--- a/fs/xfs/xfs_trans.c
+++ b/fs/xfs/xfs_trans.c
@@ -297,7 +297,7 @@ xfs_trans_dup(
tp->t_rtx_res = tp->t_rtx_res_used;
ntp->t_pflags = tp->t_pflags;
- XFS_TRANS_DUP_DQINFO(tp->t_mountp, tp, ntp);
+ xfs_trans_dup_dqinfo(tp, ntp);
atomic_inc(&tp->t_mountp->m_active_trans);
return ntp;
@@ -829,7 +829,7 @@ shut_us_down:
* means is that we have some (non-persistent) quota
* reservations that need to be unreserved.
*/
- XFS_TRANS_UNRESERVE_AND_MOD_DQUOTS(mp, tp);
+ xfs_trans_unreserve_and_mod_dquots(tp);
if (tp->t_ticket) {
commit_lsn = xfs_log_done(mp, tp->t_ticket,
NULL, log_flags);
@@ -848,10 +848,9 @@ shut_us_down:
/*
* If we need to update the superblock, then do it now.
*/
- if (tp->t_flags & XFS_TRANS_SB_DIRTY) {
+ if (tp->t_flags & XFS_TRANS_SB_DIRTY)
xfs_trans_apply_sb_deltas(tp);
- }
- XFS_TRANS_APPLY_DQUOT_DELTAS(mp, tp);
+ xfs_trans_apply_dquot_deltas(tp);
/*
* Ask each log item how many log_vector entries it will
@@ -1056,7 +1055,7 @@ xfs_trans_uncommit(
}
xfs_trans_unreserve_and_mod_sb(tp);
- XFS_TRANS_UNRESERVE_AND_MOD_DQUOTS(tp->t_mountp, tp);
+ xfs_trans_unreserve_and_mod_dquots(tp);
xfs_trans_free_items(tp, flags);
xfs_trans_free_busy(tp);
@@ -1181,7 +1180,7 @@ xfs_trans_cancel(
}
#endif
xfs_trans_unreserve_and_mod_sb(tp);
- XFS_TRANS_UNRESERVE_AND_MOD_DQUOTS(mp, tp);
+ xfs_trans_unreserve_and_mod_dquots(tp);
if (tp->t_ticket) {
if (flags & XFS_TRANS_RELEASE_LOG_RES) {
@@ -1211,7 +1210,7 @@ xfs_trans_free(
xfs_trans_t *tp)
{
atomic_dec(&tp->t_mountp->m_active_trans);
- XFS_TRANS_FREE_DQINFO(tp->t_mountp, tp);
+ xfs_trans_free_dqinfo(tp);
kmem_zone_free(xfs_trans_zone, tp);
}
diff --git a/fs/xfs/xfs_utils.c b/fs/xfs/xfs_utils.c
index 79b9e5ea535..4d88616bde9 100644
--- a/fs/xfs/xfs_utils.c
+++ b/fs/xfs/xfs_utils.c
@@ -166,7 +166,7 @@ xfs_dir_ialloc(
xfs_buf_relse(ialloc_context);
if (dqinfo) {
tp->t_dqinfo = dqinfo;
- XFS_TRANS_FREE_DQINFO(tp->t_mountp, tp);
+ xfs_trans_free_dqinfo(tp);
}
*tpp = ntp;
*ipp = NULL;
diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c
index 19cf90a9c76..c4eca5ed5da 100644
--- a/fs/xfs/xfs_vnodeops.c
+++ b/fs/xfs/xfs_vnodeops.c
@@ -42,6 +42,7 @@
#include "xfs_ialloc.h"
#include "xfs_alloc.h"
#include "xfs_bmap.h"
+#include "xfs_acl.h"
#include "xfs_attr.h"
#include "xfs_rw.h"
#include "xfs_error.h"
@@ -118,7 +119,7 @@ xfs_setattr(
*/
ASSERT(udqp == NULL);
ASSERT(gdqp == NULL);
- code = XFS_QM_DQVOPALLOC(mp, ip, uid, gid, ip->i_d.di_projid,
+ code = xfs_qm_vop_dqalloc(ip, uid, gid, ip->i_d.di_projid,
qflags, &udqp, &gdqp);
if (code)
return code;
@@ -180,10 +181,11 @@ xfs_setattr(
* Do a quota reservation only if uid/gid is actually
* going to change.
*/
- if ((XFS_IS_UQUOTA_ON(mp) && iuid != uid) ||
- (XFS_IS_GQUOTA_ON(mp) && igid != gid)) {
+ if (XFS_IS_QUOTA_RUNNING(mp) &&
+ ((XFS_IS_UQUOTA_ON(mp) && iuid != uid) ||
+ (XFS_IS_GQUOTA_ON(mp) && igid != gid))) {
ASSERT(tp);
- code = XFS_QM_DQVOPCHOWNRESV(mp, tp, ip, udqp, gdqp,
+ code = xfs_qm_vop_chown_reserve(tp, ip, udqp, gdqp,
capable(CAP_FOWNER) ?
XFS_QMOPT_FORCE_RES : 0);
if (code) /* out of quota */
@@ -217,7 +219,7 @@ xfs_setattr(
/*
* Make sure that the dquots are attached to the inode.
*/
- code = XFS_QM_DQATTACH(mp, ip, XFS_QMOPT_ILOCKED);
+ code = xfs_qm_dqattach_locked(ip, 0);
if (code)
goto error_return;
@@ -351,21 +353,21 @@ xfs_setattr(
* in the transaction.
*/
if (iuid != uid) {
- if (XFS_IS_UQUOTA_ON(mp)) {
+ if (XFS_IS_QUOTA_RUNNING(mp) && XFS_IS_UQUOTA_ON(mp)) {
ASSERT(mask & ATTR_UID);
ASSERT(udqp);
- olddquot1 = XFS_QM_DQVOPCHOWN(mp, tp, ip,
+ olddquot1 = xfs_qm_vop_chown(tp, ip,
&ip->i_udquot, udqp);
}
ip->i_d.di_uid = uid;
inode->i_uid = uid;
}
if (igid != gid) {
- if (XFS_IS_GQUOTA_ON(mp)) {
+ if (XFS_IS_QUOTA_RUNNING(mp) && XFS_IS_GQUOTA_ON(mp)) {
ASSERT(!XFS_IS_PQUOTA_ON(mp));
ASSERT(mask & ATTR_GID);
ASSERT(gdqp);
- olddquot2 = XFS_QM_DQVOPCHOWN(mp, tp, ip,
+ olddquot2 = xfs_qm_vop_chown(tp, ip,
&ip->i_gdquot, gdqp);
}
ip->i_d.di_gid = gid;
@@ -461,13 +463,25 @@ xfs_setattr(
/*
* Release any dquot(s) the inode had kept before chown.
*/
- XFS_QM_DQRELE(mp, olddquot1);
- XFS_QM_DQRELE(mp, olddquot2);
- XFS_QM_DQRELE(mp, udqp);
- XFS_QM_DQRELE(mp, gdqp);
+ xfs_qm_dqrele(olddquot1);
+ xfs_qm_dqrele(olddquot2);
+ xfs_qm_dqrele(udqp);
+ xfs_qm_dqrele(gdqp);
- if (code) {
+ if (code)
return code;
+
+ /*
+ * XXX(hch): Updating the ACL entries is not atomic vs the i_mode
+ * update. We could avoid this with linked transactions
+ * and passing down the transaction pointer all the way
+ * to attr_set. No previous user of the generic
+ * Posix ACL code seems to care about this issue either.
+ */
+ if ((mask & ATTR_MODE) && !(flags & XFS_ATTR_NOACL)) {
+ code = -xfs_acl_chmod(inode);
+ if (code)
+ return XFS_ERROR(code);
}
if (DM_EVENT_ENABLED(ip, DM_EVENT_ATTRIBUTE) &&
@@ -482,8 +496,8 @@ xfs_setattr(
commit_flags |= XFS_TRANS_ABORT;
/* FALLTHROUGH */
error_return:
- XFS_QM_DQRELE(mp, udqp);
- XFS_QM_DQRELE(mp, gdqp);
+ xfs_qm_dqrele(udqp);
+ xfs_qm_dqrele(gdqp);
if (tp) {
xfs_trans_cancel(tp, commit_flags);
}
@@ -739,7 +753,8 @@ xfs_free_eofblocks(
/*
* Attach the dquots to the inode up front.
*/
- if ((error = XFS_QM_DQATTACH(mp, ip, 0)))
+ error = xfs_qm_dqattach(ip, 0);
+ if (error)
return error;
/*
@@ -1181,7 +1196,8 @@ xfs_inactive(
ASSERT(ip->i_d.di_nlink == 0);
- if ((error = XFS_QM_DQATTACH(mp, ip, 0)))
+ error = xfs_qm_dqattach(ip, 0);
+ if (error)
return VN_INACTIVE_CACHE;
tp = xfs_trans_alloc(mp, XFS_TRANS_INACTIVE);
@@ -1307,7 +1323,7 @@ xfs_inactive(
/*
* Credit the quota account(s). The inode is gone.
*/
- XFS_TRANS_MOD_DQUOT_BYINO(mp, tp, ip, XFS_TRANS_DQ_ICOUNT, -1);
+ xfs_trans_mod_dquot_byino(tp, ip, XFS_TRANS_DQ_ICOUNT, -1);
/*
* Just ignore errors at this point. There is nothing we can
@@ -1323,11 +1339,11 @@ xfs_inactive(
xfs_fs_cmn_err(CE_NOTE, mp, "xfs_inactive: "
"xfs_trans_commit() returned error %d", error);
}
+
/*
* Release the dquots held by inode, if any.
*/
- XFS_QM_DQDETACH(mp, ip);
-
+ xfs_qm_dqdetach(ip);
xfs_iunlock(ip, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL);
out:
@@ -1427,8 +1443,7 @@ xfs_create(
/*
* Make sure that we have allocated dquot(s) on disk.
*/
- error = XFS_QM_DQVOPALLOC(mp, dp,
- current_fsuid(), current_fsgid(), prid,
+ error = xfs_qm_vop_dqalloc(dp, current_fsuid(), current_fsgid(), prid,
XFS_QMOPT_QUOTALL | XFS_QMOPT_INHERIT, &udqp, &gdqp);
if (error)
goto std_return;
@@ -1489,7 +1504,7 @@ xfs_create(
/*
* Reserve disk quota and the inode.
*/
- error = XFS_TRANS_RESERVE_QUOTA(mp, tp, udqp, gdqp, resblks, 1, 0);
+ error = xfs_trans_reserve_quota(tp, mp, udqp, gdqp, resblks, 1, 0);
if (error)
goto out_trans_cancel;
@@ -1561,7 +1576,7 @@ xfs_create(
* These ids of the inode couldn't have changed since the new
* inode has been locked ever since it was created.
*/
- XFS_QM_DQVOPCREATE(mp, tp, ip, udqp, gdqp);
+ xfs_qm_vop_create_dqattach(tp, ip, udqp, gdqp);
/*
* xfs_trans_commit normally decrements the vnode ref count
@@ -1580,8 +1595,8 @@ xfs_create(
goto out_dqrele;
}
- XFS_QM_DQRELE(mp, udqp);
- XFS_QM_DQRELE(mp, gdqp);
+ xfs_qm_dqrele(udqp);
+ xfs_qm_dqrele(gdqp);
*ipp = ip;
@@ -1602,8 +1617,8 @@ xfs_create(
out_trans_cancel:
xfs_trans_cancel(tp, cancel_flags);
out_dqrele:
- XFS_QM_DQRELE(mp, udqp);
- XFS_QM_DQRELE(mp, gdqp);
+ xfs_qm_dqrele(udqp);
+ xfs_qm_dqrele(gdqp);
if (unlock_dp_on_error)
xfs_iunlock(dp, XFS_ILOCK_EXCL);
@@ -1837,11 +1852,11 @@ xfs_remove(
return error;
}
- error = XFS_QM_DQATTACH(mp, dp, 0);
+ error = xfs_qm_dqattach(dp, 0);
if (error)
goto std_return;
- error = XFS_QM_DQATTACH(mp, ip, 0);
+ error = xfs_qm_dqattach(ip, 0);
if (error)
goto std_return;
@@ -2028,11 +2043,11 @@ xfs_link(
/* Return through std_return after this point. */
- error = XFS_QM_DQATTACH(mp, sip, 0);
+ error = xfs_qm_dqattach(sip, 0);
if (error)
goto std_return;
- error = XFS_QM_DQATTACH(mp, tdp, 0);
+ error = xfs_qm_dqattach(tdp, 0);
if (error)
goto std_return;
@@ -2205,8 +2220,7 @@ xfs_symlink(
/*
* Make sure that we have allocated dquot(s) on disk.
*/
- error = XFS_QM_DQVOPALLOC(mp, dp,
- current_fsuid(), current_fsgid(), prid,
+ error = xfs_qm_vop_dqalloc(dp, current_fsuid(), current_fsgid(), prid,
XFS_QMOPT_QUOTALL | XFS_QMOPT_INHERIT, &udqp, &gdqp);
if (error)
goto std_return;
@@ -2248,7 +2262,7 @@ xfs_symlink(
/*
* Reserve disk quota : blocks and inode.
*/
- error = XFS_TRANS_RESERVE_QUOTA(mp, tp, udqp, gdqp, resblks, 1, 0);
+ error = xfs_trans_reserve_quota(tp, mp, udqp, gdqp, resblks, 1, 0);
if (error)
goto error_return;
@@ -2288,7 +2302,7 @@ xfs_symlink(
/*
* Also attach the dquot(s) to it, if applicable.
*/
- XFS_QM_DQVOPCREATE(mp, tp, ip, udqp, gdqp);
+ xfs_qm_vop_create_dqattach(tp, ip, udqp, gdqp);
if (resblks)
resblks -= XFS_IALLOC_SPACE_RES(mp);
@@ -2376,8 +2390,8 @@ xfs_symlink(
goto error2;
}
error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
- XFS_QM_DQRELE(mp, udqp);
- XFS_QM_DQRELE(mp, gdqp);
+ xfs_qm_dqrele(udqp);
+ xfs_qm_dqrele(gdqp);
/* Fall through to std_return with error = 0 or errno from
* xfs_trans_commit */
@@ -2401,8 +2415,8 @@ std_return:
cancel_flags |= XFS_TRANS_ABORT;
error_return:
xfs_trans_cancel(tp, cancel_flags);
- XFS_QM_DQRELE(mp, udqp);
- XFS_QM_DQRELE(mp, gdqp);
+ xfs_qm_dqrele(udqp);
+ xfs_qm_dqrele(gdqp);
if (unlock_dp_on_error)
xfs_iunlock(dp, XFS_ILOCK_EXCL);
@@ -2541,7 +2555,8 @@ xfs_alloc_file_space(
if (XFS_FORCED_SHUTDOWN(mp))
return XFS_ERROR(EIO);
- if ((error = XFS_QM_DQATTACH(mp, ip, 0)))
+ error = xfs_qm_dqattach(ip, 0);
+ if (error)
return error;
if (len <= 0)
@@ -2628,8 +2643,8 @@ retry:
break;
}
xfs_ilock(ip, XFS_ILOCK_EXCL);
- error = XFS_TRANS_RESERVE_QUOTA_NBLKS(mp, tp, ip,
- qblocks, 0, quota_flag);
+ error = xfs_trans_reserve_quota_nblks(tp, ip, qblocks,
+ 0, quota_flag);
if (error)
goto error1;
@@ -2688,7 +2703,7 @@ dmapi_enospc_check:
error0: /* Cancel bmap, unlock inode, unreserve quota blocks, cancel trans */
xfs_bmap_cancel(&free_list);
- XFS_TRANS_UNRESERVE_QUOTA_NBLKS(mp, tp, ip, qblocks, 0, quota_flag);
+ xfs_trans_unreserve_quota_nblks(tp, ip, qblocks, 0, quota_flag);
error1: /* Just cancel transaction */
xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT);
@@ -2827,7 +2842,8 @@ xfs_free_file_space(
xfs_itrace_entry(ip);
- if ((error = XFS_QM_DQATTACH(mp, ip, 0)))
+ error = xfs_qm_dqattach(ip, 0);
+ if (error)
return error;
error = 0;
@@ -2953,9 +2969,9 @@ xfs_free_file_space(
break;
}
xfs_ilock(ip, XFS_ILOCK_EXCL);
- error = XFS_TRANS_RESERVE_QUOTA(mp, tp,
- ip->i_udquot, ip->i_gdquot, resblks, 0,
- XFS_QMOPT_RES_REGBLKS);
+ error = xfs_trans_reserve_quota(tp, mp,
+ ip->i_udquot, ip->i_gdquot,
+ resblks, 0, XFS_QMOPT_RES_REGBLKS);
if (error)
goto error1;
diff --git a/fs/xfs/xfs_vnodeops.h b/fs/xfs/xfs_vnodeops.h
index 04373c6c61f..a9e102de71a 100644
--- a/fs/xfs/xfs_vnodeops.h
+++ b/fs/xfs/xfs_vnodeops.h
@@ -18,6 +18,7 @@ int xfs_setattr(struct xfs_inode *ip, struct iattr *vap, int flags);
#define XFS_ATTR_DMI 0x01 /* invocation from a DMI function */
#define XFS_ATTR_NONBLOCK 0x02 /* return EAGAIN if operation would block */
#define XFS_ATTR_NOLOCK 0x04 /* Don't grab any conflicting locks */
+#define XFS_ATTR_NOACL 0x08 /* Don't call xfs_acl_chmod */
int xfs_readlink(struct xfs_inode *ip, char *link);
int xfs_fsync(struct xfs_inode *ip);