aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Documentation/filesystems/nfs-rdma.txt14
-rw-r--r--fs/lockd/svclock.c2
-rw-r--r--fs/lockd/svcsubs.c69
-rw-r--r--fs/locks.c32
-rw-r--r--fs/nfsd/nfs4state.c3
-rw-r--r--fs/nfsd/nfs4xdr.c14
-rw-r--r--fs/nfsd/nfsctl.c65
-rw-r--r--include/linux/fs.h1
-rw-r--r--include/linux/lockd/lockd.h8
-rw-r--r--include/linux/nfsd/nfsd.h2
10 files changed, 162 insertions, 48 deletions
diff --git a/Documentation/filesystems/nfs-rdma.txt b/Documentation/filesystems/nfs-rdma.txt
index 1ae34879574..d0ec45ae4e7 100644
--- a/Documentation/filesystems/nfs-rdma.txt
+++ b/Documentation/filesystems/nfs-rdma.txt
@@ -5,7 +5,7 @@
################################################################################
Author: NetApp and Open Grid Computing
- Date: February 25, 2008
+ Date: April 15, 2008
Table of Contents
~~~~~~~~~~~~~~~~~
@@ -197,12 +197,16 @@ NFS/RDMA Setup
- On the server system, configure the /etc/exports file and
start the NFS/RDMA server.
- Exports entries with the following format have been tested:
+ Exports entries with the following formats have been tested:
- /vol0 10.97.103.47(rw,async) 192.168.0.47(rw,async,insecure,no_root_squash)
+ /vol0 192.168.0.47(fsid=0,rw,async,insecure,no_root_squash)
+ /vol0 192.168.0.0/255.255.255.0(fsid=0,rw,async,insecure,no_root_squash)
- Here the first IP address is the client's Ethernet address and the second
- IP address is the clients IPoIB address.
+ The IP address(es) is(are) the client's IPoIB address for an InfiniBand HCA or the
+ cleint's iWARP address(es) for an RNIC.
+
+ NOTE: The "insecure" option must be used because the NFS/RDMA client does not
+ use a reserved port.
Each time a machine boots:
diff --git a/fs/lockd/svclock.c b/fs/lockd/svclock.c
index 1f122c1940a..4d81553d294 100644
--- a/fs/lockd/svclock.c
+++ b/fs/lockd/svclock.c
@@ -632,7 +632,7 @@ nlmsvc_update_deferred_block(struct nlm_block *block, struct file_lock *conf,
block->b_flags |= B_TIMED_OUT;
if (conf) {
if (block->b_fl)
- locks_copy_lock(block->b_fl, conf);
+ __locks_copy_lock(block->b_fl, conf);
}
}
diff --git a/fs/lockd/svcsubs.c b/fs/lockd/svcsubs.c
index dbbefbcd671..d1c48b539df 100644
--- a/fs/lockd/svcsubs.c
+++ b/fs/lockd/svcsubs.c
@@ -18,6 +18,8 @@
#include <linux/lockd/lockd.h>
#include <linux/lockd/share.h>
#include <linux/lockd/sm_inter.h>
+#include <linux/module.h>
+#include <linux/mount.h>
#define NLMDBG_FACILITY NLMDBG_SVCSUBS
@@ -194,6 +196,12 @@ again:
return 0;
}
+static int
+nlmsvc_always_match(void *dummy1, struct nlm_host *dummy2)
+{
+ return 1;
+}
+
/*
* Inspect a single file
*/
@@ -230,7 +238,8 @@ nlm_file_inuse(struct nlm_file *file)
* Loop over all files in the file table.
*/
static int
-nlm_traverse_files(struct nlm_host *host, nlm_host_match_fn_t match)
+nlm_traverse_files(void *data, nlm_host_match_fn_t match,
+ int (*is_failover_file)(void *data, struct nlm_file *file))
{
struct hlist_node *pos, *next;
struct nlm_file *file;
@@ -239,12 +248,14 @@ nlm_traverse_files(struct nlm_host *host, nlm_host_match_fn_t match)
mutex_lock(&nlm_file_mutex);
for (i = 0; i < FILE_NRHASH; i++) {
hlist_for_each_entry_safe(file, pos, next, &nlm_files[i], f_list) {
+ if (is_failover_file && !is_failover_file(data, file))
+ continue;
file->f_count++;
mutex_unlock(&nlm_file_mutex);
/* Traverse locks, blocks and shares of this file
* and update file->f_locks count */
- if (nlm_inspect_file(host, file, match))
+ if (nlm_inspect_file(data, file, match))
ret = 1;
mutex_lock(&nlm_file_mutex);
@@ -303,21 +314,27 @@ nlm_release_file(struct nlm_file *file)
* Used by nlmsvc_invalidate_all
*/
static int
-nlmsvc_mark_host(struct nlm_host *host, struct nlm_host *dummy)
+nlmsvc_mark_host(void *data, struct nlm_host *dummy)
{
+ struct nlm_host *host = data;
+
host->h_inuse = 1;
return 0;
}
static int
-nlmsvc_same_host(struct nlm_host *host, struct nlm_host *other)
+nlmsvc_same_host(void *data, struct nlm_host *other)
{
+ struct nlm_host *host = data;
+
return host == other;
}
static int
-nlmsvc_is_client(struct nlm_host *host, struct nlm_host *dummy)
+nlmsvc_is_client(void *data, struct nlm_host *dummy)
{
+ struct nlm_host *host = data;
+
if (host->h_server) {
/* we are destroying locks even though the client
* hasn't asked us too, so don't unmonitor the
@@ -337,7 +354,7 @@ void
nlmsvc_mark_resources(void)
{
dprintk("lockd: nlmsvc_mark_resources\n");
- nlm_traverse_files(NULL, nlmsvc_mark_host);
+ nlm_traverse_files(NULL, nlmsvc_mark_host, NULL);
}
/*
@@ -348,7 +365,7 @@ nlmsvc_free_host_resources(struct nlm_host *host)
{
dprintk("lockd: nlmsvc_free_host_resources\n");
- if (nlm_traverse_files(host, nlmsvc_same_host)) {
+ if (nlm_traverse_files(host, nlmsvc_same_host, NULL)) {
printk(KERN_WARNING
"lockd: couldn't remove all locks held by %s\n",
host->h_name);
@@ -368,5 +385,41 @@ nlmsvc_invalidate_all(void)
* turn, which is about as inefficient as it gets.
* Now we just do it once in nlm_traverse_files.
*/
- nlm_traverse_files(NULL, nlmsvc_is_client);
+ nlm_traverse_files(NULL, nlmsvc_is_client, NULL);
+}
+
+static int
+nlmsvc_match_sb(void *datap, struct nlm_file *file)
+{
+ struct super_block *sb = datap;
+
+ return sb == file->f_file->f_path.mnt->mnt_sb;
+}
+
+int
+nlmsvc_unlock_all_by_sb(struct super_block *sb)
+{
+ int ret;
+
+ ret = nlm_traverse_files(sb, nlmsvc_always_match, nlmsvc_match_sb);
+ return ret ? -EIO : 0;
+}
+EXPORT_SYMBOL_GPL(nlmsvc_unlock_all_by_sb);
+
+static int
+nlmsvc_match_ip(void *datap, struct nlm_host *host)
+{
+ __be32 *server_addr = datap;
+
+ return host->h_saddr.sin_addr.s_addr == *server_addr;
+}
+
+int
+nlmsvc_unlock_all_by_ip(__be32 server_addr)
+{
+ int ret;
+ ret = nlm_traverse_files(&server_addr, nlmsvc_match_ip, NULL);
+ return ret ? -EIO : 0;
+
}
+EXPORT_SYMBOL_GPL(nlmsvc_unlock_all_by_ip);
diff --git a/fs/locks.c b/fs/locks.c
index 592faadbcec..e1ea2fe0368 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -224,7 +224,7 @@ static void locks_copy_private(struct file_lock *new, struct file_lock *fl)
/*
* Initialize a new lock from an existing file_lock structure.
*/
-static void __locks_copy_lock(struct file_lock *new, const struct file_lock *fl)
+void __locks_copy_lock(struct file_lock *new, const struct file_lock *fl)
{
new->fl_owner = fl->fl_owner;
new->fl_pid = fl->fl_pid;
@@ -833,7 +833,7 @@ static int __posix_lock_file(struct inode *inode, struct file_lock *request, str
if (!posix_locks_conflict(request, fl))
continue;
if (conflock)
- locks_copy_lock(conflock, fl);
+ __locks_copy_lock(conflock, fl);
error = -EAGAIN;
if (!(request->fl_flags & FL_SLEEP))
goto out;
@@ -1367,18 +1367,20 @@ int generic_setlease(struct file *filp, long arg, struct file_lock **flp)
lease = *flp;
- error = -EAGAIN;
- if ((arg == F_RDLCK) && (atomic_read(&inode->i_writecount) > 0))
- goto out;
- if ((arg == F_WRLCK)
- && ((atomic_read(&dentry->d_count) > 1)
- || (atomic_read(&inode->i_count) > 1)))
- goto out;
+ if (arg != F_UNLCK) {
+ error = -ENOMEM;
+ new_fl = locks_alloc_lock();
+ if (new_fl == NULL)
+ goto out;
- error = -ENOMEM;
- new_fl = locks_alloc_lock();
- if (new_fl == NULL)
- goto out;
+ error = -EAGAIN;
+ if ((arg == F_RDLCK) && (atomic_read(&inode->i_writecount) > 0))
+ goto out;
+ if ((arg == F_WRLCK)
+ && ((atomic_read(&dentry->d_count) > 1)
+ || (atomic_read(&inode->i_count) > 1)))
+ goto out;
+ }
/*
* At this point, we know that if there is an exclusive
@@ -1404,6 +1406,7 @@ int generic_setlease(struct file *filp, long arg, struct file_lock **flp)
rdlease_count++;
}
+ error = -EAGAIN;
if ((arg == F_RDLCK && (wrlease_count > 0)) ||
(arg == F_WRLCK && ((rdlease_count + wrlease_count) > 0)))
goto out;
@@ -1490,8 +1493,7 @@ EXPORT_SYMBOL_GPL(vfs_setlease);
int fcntl_setlease(unsigned int fd, struct file *filp, long arg)
{
struct file_lock fl, *flp = &fl;
- struct dentry *dentry = filp->f_path.dentry;
- struct inode *inode = dentry->d_inode;
+ struct inode *inode = filp->f_path.dentry->d_inode;
int error;
locks_init_lock(&fl);
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 55dfdd71f1b..8799b870818 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -2712,9 +2712,6 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
* Note: locks.c uses the BKL to protect the inode's lock list.
*/
- /* XXX?: Just to divert the locks_release_private at the start of
- * locks_copy_lock: */
- locks_init_lock(&conflock);
err = vfs_lock_file(filp, cmd, &file_lock, &conflock);
switch (-err) {
case 0: /* success! */
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index 1ba7ad98193..c513bbdf2d3 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -376,20 +376,6 @@ nfsd4_decode_fattr(struct nfsd4_compoundargs *argp, u32 *bmval, struct iattr *ia
goto xdr_error;
}
}
- if (bmval[1] & FATTR4_WORD1_TIME_METADATA) {
- /* We require the high 32 bits of 'seconds' to be 0, and we ignore
- all 32 bits of 'nseconds'. */
- READ_BUF(12);
- len += 12;
- READ32(dummy32);
- if (dummy32)
- return nfserr_inval;
- READ32(iattr->ia_ctime.tv_sec);
- READ32(iattr->ia_ctime.tv_nsec);
- if (iattr->ia_ctime.tv_nsec >= (u32)1000000000)
- return nfserr_inval;
- iattr->ia_valid |= ATTR_CTIME;
- }
if (bmval[1] & FATTR4_WORD1_TIME_MODIFY_SET) {
READ_BUF(4);
len += 4;
diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c
index 613bcb8171a..42f3820ee8f 100644
--- a/fs/nfsd/nfsctl.c
+++ b/fs/nfsd/nfsctl.c
@@ -22,6 +22,7 @@
#include <linux/seq_file.h>
#include <linux/pagemap.h>
#include <linux/init.h>
+#include <linux/inet.h>
#include <linux/string.h>
#include <linux/smp_lock.h>
#include <linux/ctype.h>
@@ -35,6 +36,7 @@
#include <linux/nfsd/cache.h>
#include <linux/nfsd/xdr.h>
#include <linux/nfsd/syscall.h>
+#include <linux/lockd/lockd.h>
#include <asm/uaccess.h>
#include <net/ipv6.h>
@@ -53,6 +55,8 @@ enum {
NFSD_Getfs,
NFSD_List,
NFSD_Fh,
+ NFSD_FO_UnlockIP,
+ NFSD_FO_UnlockFS,
NFSD_Threads,
NFSD_Pool_Threads,
NFSD_Versions,
@@ -89,6 +93,9 @@ static ssize_t write_leasetime(struct file *file, char *buf, size_t size);
static ssize_t write_recoverydir(struct file *file, char *buf, size_t size);
#endif
+static ssize_t failover_unlock_ip(struct file *file, char *buf, size_t size);
+static ssize_t failover_unlock_fs(struct file *file, char *buf, size_t size);
+
static ssize_t (*write_op[])(struct file *, char *, size_t) = {
[NFSD_Svc] = write_svc,
[NFSD_Add] = write_add,
@@ -98,6 +105,8 @@ static ssize_t (*write_op[])(struct file *, char *, size_t) = {
[NFSD_Getfd] = write_getfd,
[NFSD_Getfs] = write_getfs,
[NFSD_Fh] = write_filehandle,
+ [NFSD_FO_UnlockIP] = failover_unlock_ip,
+ [NFSD_FO_UnlockFS] = failover_unlock_fs,
[NFSD_Threads] = write_threads,
[NFSD_Pool_Threads] = write_pool_threads,
[NFSD_Versions] = write_versions,
@@ -298,6 +307,58 @@ static ssize_t write_getfd(struct file *file, char *buf, size_t size)
return err;
}
+static ssize_t failover_unlock_ip(struct file *file, char *buf, size_t size)
+{
+ __be32 server_ip;
+ char *fo_path, c;
+ int b1, b2, b3, b4;
+
+ /* sanity check */
+ if (size == 0)
+ return -EINVAL;
+
+ if (buf[size-1] != '\n')
+ return -EINVAL;
+
+ fo_path = buf;
+ if (qword_get(&buf, fo_path, size) < 0)
+ return -EINVAL;
+
+ /* get ipv4 address */
+ if (sscanf(fo_path, "%u.%u.%u.%u%c", &b1, &b2, &b3, &b4, &c) != 4)
+ return -EINVAL;
+ server_ip = htonl((((((b1<<8)|b2)<<8)|b3)<<8)|b4);
+
+ return nlmsvc_unlock_all_by_ip(server_ip);
+}
+
+static ssize_t failover_unlock_fs(struct file *file, char *buf, size_t size)
+{
+ struct nameidata nd;
+ char *fo_path;
+ int error;
+
+ /* sanity check */
+ if (size == 0)
+ return -EINVAL;
+
+ if (buf[size-1] != '\n')
+ return -EINVAL;
+
+ fo_path = buf;
+ if (qword_get(&buf, fo_path, size) < 0)
+ return -EINVAL;
+
+ error = path_lookup(fo_path, 0, &nd);
+ if (error)
+ return error;
+
+ error = nlmsvc_unlock_all_by_sb(nd.path.mnt->mnt_sb);
+
+ path_put(&nd.path);
+ return error;
+}
+
static ssize_t write_filehandle(struct file *file, char *buf, size_t size)
{
/* request is:
@@ -700,6 +761,10 @@ static int nfsd_fill_super(struct super_block * sb, void * data, int silent)
[NFSD_Getfd] = {".getfd", &transaction_ops, S_IWUSR|S_IRUSR},
[NFSD_Getfs] = {".getfs", &transaction_ops, S_IWUSR|S_IRUSR},
[NFSD_List] = {"exports", &exports_operations, S_IRUGO},
+ [NFSD_FO_UnlockIP] = {"unlock_ip",
+ &transaction_ops, S_IWUSR|S_IRUSR},
+ [NFSD_FO_UnlockFS] = {"unlock_filesystem",
+ &transaction_ops, S_IWUSR|S_IRUSR},
[NFSD_Fh] = {"filehandle", &transaction_ops, S_IWUSR|S_IRUSR},
[NFSD_Threads] = {"threads", &transaction_ops, S_IWUSR|S_IRUSR},
[NFSD_Pool_Threads] = {"pool_threads", &transaction_ops, S_IWUSR|S_IRUSR},
diff --git a/include/linux/fs.h b/include/linux/fs.h
index cc2be2cf7d4..6556f2f967e 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -973,6 +973,7 @@ extern int do_sync_mapping_range(struct address_space *mapping, loff_t offset,
/* fs/locks.c */
extern void locks_init_lock(struct file_lock *);
extern void locks_copy_lock(struct file_lock *, struct file_lock *);
+extern void __locks_copy_lock(struct file_lock *, const struct file_lock *);
extern void locks_remove_posix(struct file *, fl_owner_t);
extern void locks_remove_flock(struct file *);
extern void posix_test_lock(struct file *, struct file_lock *);
diff --git a/include/linux/lockd/lockd.h b/include/linux/lockd/lockd.h
index 94649a8da01..102d928f720 100644
--- a/include/linux/lockd/lockd.h
+++ b/include/linux/lockd/lockd.h
@@ -194,7 +194,7 @@ void nsm_release(struct nsm_handle *);
* This is used in garbage collection and resource reclaim
* A return value != 0 means destroy the lock/block/share
*/
-typedef int (*nlm_host_match_fn_t)(struct nlm_host *cur, struct nlm_host *ref);
+typedef int (*nlm_host_match_fn_t)(void *cur, struct nlm_host *ref);
/*
* Server-side lock handling
@@ -220,6 +220,12 @@ void nlmsvc_mark_resources(void);
void nlmsvc_free_host_resources(struct nlm_host *);
void nlmsvc_invalidate_all(void);
+/*
+ * Cluster failover support
+ */
+int nlmsvc_unlock_all_by_sb(struct super_block *sb);
+int nlmsvc_unlock_all_by_ip(__be32 server_addr);
+
static inline struct inode *nlmsvc_file_inode(struct nlm_file *file)
{
return file->f_file->f_path.dentry->d_inode;
diff --git a/include/linux/nfsd/nfsd.h b/include/linux/nfsd/nfsd.h
index 21ee440dd3e..41d30c9c9de 100644
--- a/include/linux/nfsd/nfsd.h
+++ b/include/linux/nfsd/nfsd.h
@@ -329,7 +329,7 @@ extern struct timeval nfssvc_boot;
(FATTR4_WORD0_SIZE | FATTR4_WORD0_ACL )
#define NFSD_WRITEABLE_ATTRS_WORD1 \
(FATTR4_WORD1_MODE | FATTR4_WORD1_OWNER | FATTR4_WORD1_OWNER_GROUP \
- | FATTR4_WORD1_TIME_ACCESS_SET | FATTR4_WORD1_TIME_METADATA | FATTR4_WORD1_TIME_MODIFY_SET)
+ | FATTR4_WORD1_TIME_ACCESS_SET | FATTR4_WORD1_TIME_MODIFY_SET)
#endif /* CONFIG_NFSD_V4 */