diff options
Diffstat (limited to 'fs')
101 files changed, 1460 insertions, 1035 deletions
diff --git a/fs/Kconfig b/fs/Kconfig index 488521ed9e9..a722b5a3f75 100644 --- a/fs/Kconfig +++ b/fs/Kconfig @@ -1864,20 +1864,14 @@ config CIFS file servers such as Windows 2000 (including Windows 2003, NT 4 and Windows XP) as well by Samba (which provides excellent CIFS server support for Linux and many other operating systems). Limited - support for Windows ME and similar servers is provided as well. - You must use the smbfs client filesystem to access older SMB servers - such as OS/2 and DOS. + support for OS/2 and Windows ME and similar servers is provided as well. The intent of the cifs module is to provide an advanced - network file system client for mounting to CIFS compliant servers, + network file system client for mounting to CIFS compliant servers, including support for dfs (hierarchical name space), secure per-user session establishment, safe distributed caching (oplock), optional - packet signing, Unicode and other internationalization improvements, - and optional Winbind (nsswitch) integration. You do not need to enable - cifs if running only a (Samba) server. It is possible to enable both - smbfs and cifs (e.g. if you are using CIFS for accessing Windows 2003 - and Samba 3 servers, and smbfs for accessing old servers). If you need - to mount to Samba or Windows from this machine, say Y. + packet signing, Unicode and other internationalization improvements. + If you need to mount to Samba or Windows from this machine, say Y. config CIFS_STATS bool "CIFS statistics" @@ -1970,14 +1964,13 @@ config CIFS_EXPERIMENTAL depends on CIFS && EXPERIMENTAL help Enables cifs features under testing. These features are - experimental and currently include support for writepages - (multipage writebehind performance improvements) and directory - change notification ie fcntl(F_DNOTIFY) as well as some security - improvements. Some also depend on setting at runtime the - pseudo-file /proc/fs/cifs/Experimental (which is disabled by - default). See the file fs/cifs/README for more details. - - If unsure, say N. + experimental and currently include DFS support and directory + change notification ie fcntl(F_DNOTIFY), as well as the upcall + mechanism which will be used for Kerberos session negotiation + and uid remapping. Some of these features also may depend on + setting a value of 1 to the pseudo-file /proc/fs/cifs/Experimental + (which is disabled by default). See the file fs/cifs/README + for more details. If unsure, say N. config CIFS_UPCALL bool "Kerberos/SPNEGO advanced session setup (EXPERIMENTAL)" diff --git a/fs/afs/cell.c b/fs/afs/cell.c index bfc1fd22d5b..1fc57837275 100644 --- a/fs/afs/cell.c +++ b/fs/afs/cell.c @@ -10,7 +10,6 @@ */ #include <linux/module.h> -#include <linux/sched.h> #include <linux/slab.h> #include <rxrpc/peer.h> #include <rxrpc/connection.h> diff --git a/fs/afs/dir.c b/fs/afs/dir.c index 9908462bcad..b6dc2ebe47a 100644 --- a/fs/afs/dir.c +++ b/fs/afs/dir.c @@ -12,7 +12,6 @@ #include <linux/kernel.h> #include <linux/module.h> #include <linux/init.h> -#include <linux/sched.h> #include <linux/slab.h> #include <linux/fs.h> #include <linux/pagemap.h> diff --git a/fs/afs/file.c b/fs/afs/file.c index eeff14c3f74..b17634541f6 100644 --- a/fs/afs/file.c +++ b/fs/afs/file.c @@ -12,7 +12,6 @@ #include <linux/kernel.h> #include <linux/module.h> #include <linux/init.h> -#include <linux/sched.h> #include <linux/slab.h> #include <linux/fs.h> #include <linux/pagemap.h> diff --git a/fs/afs/inode.c b/fs/afs/inode.c index 6f37754906c..9d9bca6c28b 100644 --- a/fs/afs/inode.c +++ b/fs/afs/inode.c @@ -16,7 +16,6 @@ #include <linux/kernel.h> #include <linux/module.h> #include <linux/init.h> -#include <linux/sched.h> #include <linux/slab.h> #include <linux/fs.h> #include <linux/pagemap.h> diff --git a/fs/afs/main.c b/fs/afs/main.c index 913c689bdb3..f2704ba5385 100644 --- a/fs/afs/main.c +++ b/fs/afs/main.c @@ -12,7 +12,6 @@ #include <linux/module.h> #include <linux/moduleparam.h> #include <linux/init.h> -#include <linux/sched.h> #include <linux/completion.h> #include <rxrpc/rxrpc.h> #include <rxrpc/transport.h> diff --git a/fs/afs/mntpt.c b/fs/afs/mntpt.c index fdf23b2a211..68495f0de7b 100644 --- a/fs/afs/mntpt.c +++ b/fs/afs/mntpt.c @@ -12,7 +12,6 @@ #include <linux/kernel.h> #include <linux/module.h> #include <linux/init.h> -#include <linux/sched.h> #include <linux/slab.h> #include <linux/fs.h> #include <linux/pagemap.h> diff --git a/fs/afs/proc.c b/fs/afs/proc.c index 86463ec9ccb..ae6b85b1e48 100644 --- a/fs/afs/proc.c +++ b/fs/afs/proc.c @@ -9,7 +9,6 @@ * 2 of the License, or (at your option) any later version. */ -#include <linux/sched.h> #include <linux/slab.h> #include <linux/module.h> #include <linux/proc_fs.h> diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c index 669dbe5b031..51db1182b27 100644 --- a/fs/binfmt_elf.c +++ b/fs/binfmt_elf.c @@ -76,7 +76,8 @@ static struct linux_binfmt elf_format = { .load_binary = load_elf_binary, .load_shlib = load_elf_library, .core_dump = elf_core_dump, - .min_coredump = ELF_EXEC_PAGESIZE + .min_coredump = ELF_EXEC_PAGESIZE, + .hasvdso = 1 }; #define BAD_ADDR(x) ((unsigned long)(x) >= TASK_SIZE) diff --git a/fs/cifs/CHANGES b/fs/cifs/CHANGES index 85e3850bf2c..5fe13593b57 100644 --- a/fs/cifs/CHANGES +++ b/fs/cifs/CHANGES @@ -1,8 +1,15 @@ Version 1.47 ------------ Fix oops in list_del during mount caused by unaligned string. +Fix file corruption which could occur on some large file +copies caused by writepages page i/o completion bug. Seek to SEEK_END forces check for update of file size for non-cached -files. +files. Allow file size to be updated on remote extend of locally open, +non-cached file. Fix reconnect to newer Samba servers (or other servers +which support the CIFS Unix/POSIX extensions) so that we again tell the +server the Unix/POSIX cifs capabilities which we support (SetFSInfo). +Add experimental support for new POSIX Open/Mkdir (which returns +stat information on the open, and allows setting the mode). Version 1.46 ------------ diff --git a/fs/cifs/TODO b/fs/cifs/TODO index fc34c74ec4b..68372946dc9 100644 --- a/fs/cifs/TODO +++ b/fs/cifs/TODO @@ -128,3 +128,11 @@ negotiated size) and send larger write sizes to modern servers. 4) More exhaustively test against less common servers. More testing against Windows 9x, Windows ME servers. + +DOS attrs - returned as pseudo-xattr in Samba format (check VFAT and NTFS for this too) + +mount check for unmatched uids - and uid override + +Add mount option for Linux extension disable per mount, and partial disable per mount (uid off, symlink/fifo/mknod on but what about posix acls?) + +Free threads at umount --force that are stuck on the sesSem diff --git a/fs/cifs/cifsfs.h b/fs/cifs/cifsfs.h index 01ae24af9cf..c97c08eb481 100644 --- a/fs/cifs/cifsfs.h +++ b/fs/cifs/cifsfs.h @@ -100,5 +100,5 @@ extern ssize_t cifs_getxattr(struct dentry *, const char *, void *, size_t); extern ssize_t cifs_listxattr(struct dentry *, char *, size_t); extern int cifs_ioctl (struct inode * inode, struct file * filep, unsigned int command, unsigned long arg); -#define CIFS_VERSION "1.47" +#define CIFS_VERSION "1.48" #endif /* _CIFSFS_H */ diff --git a/fs/cifs/cifspdu.h b/fs/cifs/cifspdu.h index 068ef51edbf..7d9505491b1 100644 --- a/fs/cifs/cifspdu.h +++ b/fs/cifs/cifspdu.h @@ -1,7 +1,7 @@ /* * fs/cifs/cifspdu.h * - * Copyright (c) International Business Machines Corp., 2002,2005 + * Copyright (c) International Business Machines Corp., 2002,2007 * Author(s): Steve French (sfrench@us.ibm.com) * * This library is free software; you can redistribute it and/or modify @@ -544,7 +544,8 @@ typedef union smb_com_session_setup_andx { /* unsigned char * NativeOS; */ /* unsigned char * NativeLanMan; */ /* unsigned char * PrimaryDomain; */ - } __attribute__((packed)) resp; /* NTLM response with or without extended sec*/ + } __attribute__((packed)) resp; /* NTLM response + (with or without extended sec) */ struct { /* request format */ struct smb_hdr hdr; /* wct = 10 */ @@ -795,6 +796,8 @@ typedef struct smb_com_openx_rsp { __u16 ByteCount; } __attribute__((packed)) OPENX_RSP; +/* For encoding of POSIX Open Request - see trans2 function 0x209 data struct */ + /* Legacy write request for older servers */ typedef struct smb_com_writex_req { struct smb_hdr hdr; /* wct = 12 */ @@ -1352,11 +1355,13 @@ struct smb_t2_rsp { #define SMB_QUERY_FILE_UNIX_BASIC 0x200 #define SMB_QUERY_FILE_UNIX_LINK 0x201 #define SMB_QUERY_POSIX_ACL 0x204 -#define SMB_QUERY_XATTR 0x205 +#define SMB_QUERY_XATTR 0x205 /* e.g. system EA name space */ #define SMB_QUERY_ATTR_FLAGS 0x206 /* append,immutable etc. */ #define SMB_QUERY_POSIX_PERMISSION 0x207 #define SMB_QUERY_POSIX_LOCK 0x208 -/* #define SMB_POSIX_OPEN 0x209 */ +/* #define SMB_POSIX_OPEN 0x209 */ +/* #define SMB_POSIX_UNLINK 0x20a */ +#define SMB_QUERY_FILE__UNIX_INFO2 0x20b #define SMB_QUERY_FILE_INTERNAL_INFO 0x3ee #define SMB_QUERY_FILE_ACCESS_INFO 0x3f0 #define SMB_QUERY_FILE_NAME_INFO2 0x3f1 /* 0x30 bytes */ @@ -1377,8 +1382,10 @@ struct smb_t2_rsp { #define SMB_SET_ATTR_FLAGS 0x206 /* append, immutable etc. */ #define SMB_SET_POSIX_LOCK 0x208 #define SMB_POSIX_OPEN 0x209 +#define SMB_POSIX_UNLINK 0x20a +#define SMB_SET_FILE_UNIX_INFO2 #define SMB_SET_FILE_BASIC_INFO2 0x3ec -#define SMB_SET_FILE_RENAME_INFORMATION 0x3f2 /* BB check if qpathinfo level too */ +#define SMB_SET_FILE_RENAME_INFORMATION 0x3f2 /* BB check if qpathinfo too */ #define SMB_FILE_ALL_INFO2 0x3fa #define SMB_SET_FILE_ALLOCATION_INFO2 0x3fb #define SMB_SET_FILE_END_OF_FILE_INFO2 0x3fc @@ -1428,7 +1435,7 @@ typedef struct smb_com_transaction2_qpi_rsp { struct smb_hdr hdr; /* wct = 10 + SetupCount */ struct trans2_resp t2; __u16 ByteCount; - __u16 Reserved2; /* parameter word reserved - present for infolevels > 100 */ + __u16 Reserved2; /* parameter word is present for infolevels > 100 */ } __attribute__((packed)) TRANSACTION2_QPI_RSP; typedef struct smb_com_transaction2_spi_req { @@ -1461,7 +1468,7 @@ typedef struct smb_com_transaction2_spi_rsp { struct smb_hdr hdr; /* wct = 10 + SetupCount */ struct trans2_resp t2; __u16 ByteCount; - __u16 Reserved2; /* parameter word reserved - present for infolevels > 100 */ + __u16 Reserved2; /* parameter word is present for infolevels > 100 */ } __attribute__((packed)) TRANSACTION2_SPI_RSP; struct set_file_rename { @@ -1627,6 +1634,7 @@ typedef struct smb_com_transaction2_fnext_rsp_parms { #define SMB_QUERY_FS_ATTRIBUTE_INFO 0x105 #define SMB_QUERY_CIFS_UNIX_INFO 0x200 #define SMB_QUERY_POSIX_FS_INFO 0x201 +#define SMB_QUERY_POSIX_WHO_AM_I 0x202 #define SMB_QUERY_LABEL_INFO 0x3ea #define SMB_QUERY_FS_QUOTA_INFO 0x3ee #define SMB_QUERY_FS_FULL_SIZE_INFO 0x3ef @@ -1659,9 +1667,21 @@ typedef struct smb_com_transaction_qfsi_rsp { struct smb_hdr hdr; /* wct = 10 + SetupCount */ struct trans2_resp t2; __u16 ByteCount; - __u8 Pad; /* may be three bytes *//* followed by data area */ + __u8 Pad; /* may be three bytes? *//* followed by data area */ } __attribute__((packed)) TRANSACTION2_QFSI_RSP; +typedef struct whoami_rsp_data { /* Query level 0x202 */ + __u32 flags; /* 0 = Authenticated user 1 = GUEST */ + __u32 mask; /* which flags bits server understands ie 0x0001 */ + __u64 unix_user_id; + __u64 unix_user_gid; + __u32 number_of_supplementary_gids; /* may be zero */ + __u32 number_of_sids; /* may be zero */ + __u32 length_of_sid_array; /* in bytes - may be zero */ + __u32 pad; /* reserved - MBZ */ + /* __u64 gid_array[0]; */ /* may be empty */ + /* __u8 * psid_list */ /* may be empty */ +} __attribute__((packed)) WHOAMI_RSP_DATA; /* SETFSInfo Levels */ #define SMB_SET_CIFS_UNIX_INFO 0x200 @@ -1858,8 +1878,11 @@ typedef struct { #define CIFS_UNIX_XATTR_CAP 0x00000004 /* support new namespace */ #define CIFS_UNIX_EXTATTR_CAP 0x00000008 /* support chattr/chflag */ #define CIFS_UNIX_POSIX_PATHNAMES_CAP 0x00000010 /* Allow POSIX path chars */ +#define CIFS_UNIX_POSIX_PATH_OPS_CAP 0x00000020 /* Allow new POSIX path based + calls including posix open + and posix unlink */ #ifdef CONFIG_CIFS_POSIX -#define CIFS_UNIX_CAP_MASK 0x0000001b +#define CIFS_UNIX_CAP_MASK 0x0000003b #else #define CIFS_UNIX_CAP_MASK 0x00000013 #endif /* CONFIG_CIFS_POSIX */ @@ -1946,7 +1969,7 @@ typedef struct { /* data block encoding of response to level 263 QPathInfo */ __le32 AlignmentRequirement; __le32 FileNameLength; char FileName[1]; -} __attribute__((packed)) FILE_ALL_INFO; /* level 0x107 QPathInfo */ +} __attribute__((packed)) FILE_ALL_INFO; /* level 0x107 QPathInfo */ /* defines for enumerating possible values of the Unix type field below */ #define UNIX_FILE 0 @@ -1970,11 +1993,11 @@ typedef struct { __u64 UniqueId; __le64 Permissions; __le64 Nlinks; -} __attribute__((packed)) FILE_UNIX_BASIC_INFO; /* level 0x200 QPathInfo */ +} __attribute__((packed)) FILE_UNIX_BASIC_INFO; /* level 0x200 QPathInfo */ typedef struct { char LinkDest[1]; -} __attribute__((packed)) FILE_UNIX_LINK_INFO; /* level 0x201 QPathInfo */ +} __attribute__((packed)) FILE_UNIX_LINK_INFO; /* level 0x201 QPathInfo */ /* The following three structures are needed only for setting time to NT4 and some older servers via @@ -2011,7 +2034,7 @@ typedef struct { __le64 ChangeTime; __le32 Attributes; __u32 Pad; -} __attribute__((packed)) FILE_BASIC_INFO; /* size info, level 0x101 */ +} __attribute__((packed)) FILE_BASIC_INFO; /* size info, level 0x101 */ struct file_allocation_info { __le64 AllocationSize; /* Note old Samba srvr rounds this up too much */ @@ -2020,7 +2043,7 @@ struct file_allocation_info { struct file_end_of_file_info { __le64 FileSize; /* offset to end of file */ -} __attribute__((packed)); /* size info, level 0x104 for set, 0x106 for query */ +} __attribute__((packed)); /* size info, level 0x104 for set, 0x106 for query */ struct file_alt_name_info { __u8 alt_name[1]; @@ -2075,6 +2098,19 @@ struct cifs_posix_acl { /* access conrol list (ACL) */ /* end of POSIX ACL definitions */ +typedef struct { + __u32 OpenFlags; /* same as NT CreateX */ + __u32 PosixOpenFlags; + __u32 Mode; + __u16 Level; /* reply level requested (see QPathInfo levels) */ + __u16 Pad; /* reserved - MBZ */ +} __attribute__((packed)) OPEN_PSX_REQ; /* level 0x209 SetPathInfo data */ + +typedef struct { + /* reply varies based on requested level */ +} __attribute__((packed)) OPEN_PSX_RSP; /* level 0x209 SetPathInfo data */ + + struct file_internal_info { __u64 UniqueId; /* inode number */ } __attribute__((packed)); /* level 0x3ee */ @@ -2238,7 +2274,8 @@ struct data_blob { 1) PosixCreateX - to set and return the mode, inode#, device info and perhaps add a CreateDevice - to create Pipes and other special .inodes Also note POSIX open flags - 2) Close - to return the last write time to do cache across close more safely + 2) Close - to return the last write time to do cache across close + more safely 3) FindFirst return unique inode number - what about resume key, two forms short (matches readdir) and full (enough info to cache inodes) 4) Mkdir - set mode @@ -2273,7 +2310,8 @@ struct data_blob { TRANSACTION2 (18 cases) SMB_SET_FILE_END_OF_FILE_INFO2 SMB_SET_PATH_END_OF_FILE_INFO2 (BB verify that never need to set allocation size) - SMB_SET_FILE_BASIC_INFO2 (setting times - BB can it be done via Unix ext?) + SMB_SET_FILE_BASIC_INFO2 (setting times - BB can it be done via + Unix ext?) COPY (note support for copy across directories) - FUTURE, OPTIONAL setting/getting OS/2 EAs - FUTURE (BB can this handle @@ -2293,13 +2331,13 @@ struct data_blob { T2 QUERY_PATH_INFO (SMB_QUERY_FILE_UNIX_BASIC) - BB check for missing inode fields Actually need QUERY_FILE_UNIX_INFO since has inode num BB what about a) blksize/blkbits/blocks - b) i_version - c) i_rdev - d) notify mask? - e) generation - f) size_seqcount + b) i_version + c) i_rdev + d) notify mask? + e) generation + f) size_seqcount T2 FIND_FIRST/FIND_NEXT FIND_FILE_UNIX - TRANS2_GET_DFS_REFERRAL - OPTIONAL but recommended + TRANS2_GET_DFS_REFERRAL - OPTIONAL but recommended T2_QFS_INFO QueryDevice/AttributeInfo - OPTIONAL @@ -2338,7 +2376,7 @@ typedef struct file_xattr_info { __u32 xattr_value_len; char xattr_name[0]; /* followed by xattr_value[xattr_value_len], no pad */ -} __attribute__((packed)) FILE_XATTR_INFO; /* extended attribute, info level 0x205 */ +} __attribute__((packed)) FILE_XATTR_INFO; /* extended attribute, info level 0x205 */ /* flags for chattr command */ diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h index f1f8225102f..6148b82170c 100644 --- a/fs/cifs/cifsproto.h +++ b/fs/cifs/cifsproto.h @@ -23,6 +23,7 @@ #include <linux/nls.h> struct statfs; +struct smb_vol; /* ***************************************************************** @@ -57,7 +58,7 @@ extern int SendReceiveBlockingLock(const unsigned int /* xid */ , int * /* bytes returned */); extern int checkSMB(struct smb_hdr *smb, __u16 mid, unsigned int length); extern int is_valid_oplock_break(struct smb_hdr *smb, struct TCP_Server_Info *); -extern int is_size_safe_to_change(struct cifsInodeInfo *); +extern int is_size_safe_to_change(struct cifsInodeInfo *, __u64 eof); extern struct cifsFileInfo *find_writable_file(struct cifsInodeInfo *); extern unsigned int smbCalcSize(struct smb_hdr *ptr); extern unsigned int smbCalcSize_LE(struct smb_hdr *ptr); @@ -147,6 +148,8 @@ extern int get_dfs_path(int xid, struct cifsSesInfo *pSesInfo, unsigned int *pnum_referrals, unsigned char ** preferrals, int remap); +extern void reset_cifs_unix_caps(int xid, struct cifsTconInfo *tcon, + struct super_block * sb, struct smb_vol * vol); extern int CIFSSMBQFSInfo(const int xid, struct cifsTconInfo *tcon, struct kstatfs *FSData); extern int SMBOldQFSInfo(const int xid, struct cifsTconInfo *tcon, diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c index 472e33e0f3c..b8e91470c27 100644 --- a/fs/cifs/cifssmb.c +++ b/fs/cifs/cifssmb.c @@ -158,9 +158,15 @@ small_smb_init(int smb_command, int wct, struct cifsTconInfo *tcon, nls_codepage); if(!rc && (tcon->tidStatus == CifsNeedReconnect)) { mark_open_files_invalid(tcon); - rc = CIFSTCon(0, tcon->ses, tcon->treeName, tcon - , nls_codepage); + rc = CIFSTCon(0, tcon->ses, tcon->treeName, + tcon, nls_codepage); up(&tcon->ses->sesSem); + /* tell server which Unix caps we support */ + if (tcon->ses->capabilities & CAP_UNIX) + reset_cifs_unix_caps(0 /* no xid */, + tcon, + NULL /* we do not know sb */, + NULL /* no vol info */); /* BB FIXME add code to check if wsize needs update due to negotiated smb buffer size shrinking */ @@ -298,6 +304,12 @@ smb_init(int smb_command, int wct, struct cifsTconInfo *tcon, rc = CIFSTCon(0, tcon->ses, tcon->treeName, tcon, nls_codepage); up(&tcon->ses->sesSem); + /* tell server which Unix caps we support */ + if (tcon->ses->capabilities & CAP_UNIX) + reset_cifs_unix_caps(0 /* no xid */, + tcon, + NULL /* do not know sb */, + NULL /* no vol info */); /* BB FIXME add code to check if wsize needs update due to negotiated smb buffer size shrinking */ diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index 2caca06b4ba..20ba7dcc995 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -1613,6 +1613,76 @@ ipv6_connect(struct sockaddr_in6 *psin_server, struct socket **csocket) return rc; } +void reset_cifs_unix_caps(int xid, struct cifsTconInfo * tcon, + struct super_block * sb, struct smb_vol * vol_info) +{ + /* if we are reconnecting then should we check to see if + * any requested capabilities changed locally e.g. via + * remount but we can not do much about it here + * if they have (even if we could detect it by the following) + * Perhaps we could add a backpointer to array of sb from tcon + * or if we change to make all sb to same share the same + * sb as NFS - then we only have one backpointer to sb. + * What if we wanted to mount the server share twice once with + * and once without posixacls or posix paths? */ + __u64 saved_cap = le64_to_cpu(tcon->fsUnixInfo.Capability); + + + if(!CIFSSMBQFSUnixInfo(xid, tcon)) { + __u64 cap = le64_to_cpu(tcon->fsUnixInfo.Capability); + + /* check for reconnect case in which we do not + want to change the mount behavior if we can avoid it */ + if(vol_info == NULL) { + /* turn off POSIX ACL and PATHNAMES if not set + originally at mount time */ + if ((saved_cap & CIFS_UNIX_POSIX_ACL_CAP) == 0) + cap &= ~CIFS_UNIX_POSIX_ACL_CAP; + if ((saved_cap & CIFS_UNIX_POSIX_PATHNAMES_CAP) == 0) + cap &= ~CIFS_UNIX_POSIX_PATHNAMES_CAP; + + + + + } + + cap &= CIFS_UNIX_CAP_MASK; + if(vol_info && vol_info->no_psx_acl) + cap &= ~CIFS_UNIX_POSIX_ACL_CAP; + else if(CIFS_UNIX_POSIX_ACL_CAP & cap) { + cFYI(1,("negotiated posix acl support")); + if(sb) + sb->s_flags |= MS_POSIXACL; + } + + if(vol_info && vol_info->posix_paths == 0) + cap &= ~CIFS_UNIX_POSIX_PATHNAMES_CAP; + else if(cap & CIFS_UNIX_POSIX_PATHNAMES_CAP) { + cFYI(1,("negotiate posix pathnames")); + if(sb) + CIFS_SB(sb)->mnt_cifs_flags |= + CIFS_MOUNT_POSIX_PATHS; + } + + cFYI(1,("Negotiate caps 0x%x",(int)cap)); +#ifdef CONFIG_CIFS_DEBUG2 + if(cap & CIFS_UNIX_FCNTL_CAP) + cFYI(1,("FCNTL cap")); + if(cap & CIFS_UNIX_EXTATTR_CAP) + cFYI(1,("EXTATTR cap")); + if(cap & CIFS_UNIX_POSIX_PATHNAMES_CAP) + cFYI(1,("POSIX path cap")); + if(cap & CIFS_UNIX_XATTR_CAP) + cFYI(1,("XATTR cap")); + if(cap & CIFS_UNIX_POSIX_ACL_CAP) + cFYI(1,("POSIX ACL cap")); +#endif /* CIFS_DEBUG2 */ + if (CIFSSMBSetFSUnixInfo(xid, tcon, cap)) { + cFYI(1,("setting capabilities failed")); + } + } +} + int cifs_mount(struct super_block *sb, struct cifs_sb_info *cifs_sb, char *mount_data, const char *devname) @@ -1928,20 +1998,25 @@ cifs_mount(struct super_block *sb, struct cifs_sb_info *cifs_sb, if (tcon == NULL) rc = -ENOMEM; else { - /* check for null share name ie connect to dfs root */ + /* check for null share name ie connecting to + * dfs root */ - /* BB check if this works for exactly length three strings */ + /* BB check if this works for exactly length + * three strings */ if ((strchr(volume_info.UNC + 3, '\\') == NULL) && (strchr(volume_info.UNC + 3, '/') == NULL)) { rc = connect_to_dfs_path(xid, pSesInfo, - "", cifs_sb->local_nls, - cifs_sb->mnt_cifs_flags & - CIFS_MOUNT_MAP_SPECIAL_CHR); + "", cifs_sb->local_nls, + cifs_sb->mnt_cifs_flags & + CIFS_MOUNT_MAP_SPECIAL_CHR); kfree(volume_info.UNC); FreeXid(xid); return -ENODEV; } else { + /* BB Do we need to wrap sesSem around + * this TCon call and Unix SetFS as + * we do on SessSetup and reconnect? */ rc = CIFSTCon(xid, pSesInfo, volume_info.UNC, tcon, cifs_sb->local_nls); @@ -1962,6 +2037,7 @@ cifs_mount(struct super_block *sb, struct cifs_sb_info *cifs_sb, sb->s_maxbytes = (u64) 1 << 31; /* 2 GB */ } + /* BB FIXME fix time_gran to be larger for LANMAN sessions */ sb->s_time_gran = 100; /* on error free sesinfo and tcon struct if needed */ @@ -2006,45 +2082,11 @@ cifs_mount(struct super_block *sb, struct cifs_sb_info *cifs_sb, /* do not care if following two calls succeed - informational */ CIFSSMBQFSDeviceInfo(xid, tcon); CIFSSMBQFSAttributeInfo(xid, tcon); - - if (tcon->ses->capabilities & CAP_UNIX) { - if(!CIFSSMBQFSUnixInfo(xid, tcon)) { - __u64 cap = - le64_to_cpu(tcon->fsUnixInfo.Capability); - cap &= CIFS_UNIX_CAP_MASK; - if(volume_info.no_psx_acl) - cap &= ~CIFS_UNIX_POSIX_ACL_CAP; - else if(CIFS_UNIX_POSIX_ACL_CAP & cap) { - cFYI(1,("negotiated posix acl support")); - sb->s_flags |= MS_POSIXACL; - } - - if(volume_info.posix_paths == 0) - cap &= ~CIFS_UNIX_POSIX_PATHNAMES_CAP; - else if(cap & CIFS_UNIX_POSIX_PATHNAMES_CAP) { - cFYI(1,("negotiate posix pathnames")); - cifs_sb->mnt_cifs_flags |= - CIFS_MOUNT_POSIX_PATHS; - } - - cFYI(1,("Negotiate caps 0x%x",(int)cap)); -#ifdef CONFIG_CIFS_DEBUG2 - if(cap & CIFS_UNIX_FCNTL_CAP) - cFYI(1,("FCNTL cap")); - if(cap & CIFS_UNIX_EXTATTR_CAP) - cFYI(1,("EXTATTR cap")); - if(cap & CIFS_UNIX_POSIX_PATHNAMES_CAP) - cFYI(1,("POSIX path cap")); - if(cap & CIFS_UNIX_XATTR_CAP) - cFYI(1,("XATTR cap")); - if(cap & CIFS_UNIX_POSIX_ACL_CAP) - cFYI(1,("POSIX ACL cap")); -#endif /* CIFS_DEBUG2 */ - if (CIFSSMBSetFSUnixInfo(xid, tcon, cap)) { - cFYI(1,("setting capabilities failed")); - } - } - } + + /* tell server which Unix caps we support */ + if (tcon->ses->capabilities & CAP_UNIX) + reset_cifs_unix_caps(xid, tcon, sb, &volume_info); + if (!(tcon->ses->capabilities & CAP_LARGE_WRITE_X)) cifs_sb->wsize = min(cifs_sb->wsize, (tcon->ses->server->maxBuf - diff --git a/fs/cifs/file.c b/fs/cifs/file.c index e9dcf5ee29a..07ff9351e9e 100644 --- a/fs/cifs/file.c +++ b/fs/cifs/file.c @@ -1954,7 +1954,7 @@ static int cifs_readpage(struct file *file, struct page *page) refreshing the inode only on increases in the file size but this is tricky to do without racing with writebehind page caching in the current Linux kernel design */ -int is_size_safe_to_change(struct cifsInodeInfo *cifsInode) +int is_size_safe_to_change(struct cifsInodeInfo *cifsInode, __u64 end_of_file) { struct cifsFileInfo *open_file = NULL; @@ -1976,6 +1976,9 @@ int is_size_safe_to_change(struct cifsInodeInfo *cifsInode) return 1; } + if(i_size_read(&cifsInode->vfs_inode) < end_of_file) + return 1; + return 0; } else return 1; diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c index c4fa91b8b62..3f5bc83dc3d 100644 --- a/fs/cifs/inode.c +++ b/fs/cifs/inode.c @@ -140,7 +140,7 @@ int cifs_get_inode_info_unix(struct inode **pinode, inode->i_gid = le64_to_cpu(findData.Gid); inode->i_nlink = le64_to_cpu(findData.Nlinks); - if (is_size_safe_to_change(cifsInfo)) { + if (is_size_safe_to_change(cifsInfo, end_of_file)) { /* can not safely change the file size here if the client is writing to it due to potential races */ @@ -491,8 +491,8 @@ int cifs_get_inode_info(struct inode **pinode, /* BB add code here - validate if device or weird share or device type? */ } - if (is_size_safe_to_change(cifsInfo)) { - /* can not safely change the file size here if the + if (is_size_safe_to_change(cifsInfo, le64_to_cpu(pfindData->EndOfFile))) { + /* can not safely shrink the file size here if the client is writing to it due to potential races */ i_size_write(inode,le64_to_cpu(pfindData->EndOfFile)); diff --git a/fs/cifs/readdir.c b/fs/cifs/readdir.c index 782940be550..c6220bd2716 100644 --- a/fs/cifs/readdir.c +++ b/fs/cifs/readdir.c @@ -222,7 +222,7 @@ static void fill_in_inode(struct inode *tmp_inode, int new_buf_type, atomic_set(&cifsInfo->inUse, 1); } - if (is_size_safe_to_change(cifsInfo)) { + if (is_size_safe_to_change(cifsInfo, end_of_file)) { /* can not safely change the file size here if the client is writing to it due to potential races */ i_size_write(tmp_inode, end_of_file); @@ -351,10 +351,10 @@ static void unix_fill_in_inode(struct inode *tmp_inode, tmp_inode->i_gid = le64_to_cpu(pfindData->Gid); tmp_inode->i_nlink = le64_to_cpu(pfindData->Nlinks); - if (is_size_safe_to_change(cifsInfo)) { + if (is_size_safe_to_change(cifsInfo, end_of_file)) { /* can not safely change the file size here if the client is writing to it due to potential races */ - i_size_write(tmp_inode,end_of_file); + i_size_write(tmp_inode, end_of_file); /* 512 bytes (2**9) is the fake blocksize that must be used */ /* for this calculation, not the real blocksize */ diff --git a/fs/coda/sysctl.c b/fs/coda/sysctl.c index db3b1a9c9a5..c57a1fa7cf2 100644 --- a/fs/coda/sysctl.c +++ b/fs/coda/sysctl.c @@ -33,8 +33,6 @@ static struct ctl_table_header *fs_table_header; -#define FS_CODA 1 /* Coda file system */ - #define CODA_TIMEOUT 3 /* timeout on upcalls to become intrble */ #define CODA_HARD 5 /* mount type "hard" or "soft" */ #define CODA_VFS 6 /* vfs statistics */ @@ -183,17 +181,57 @@ static const struct file_operations proc_cache_inv_stats_fops = { }; static ctl_table coda_table[] = { - {CODA_TIMEOUT, "timeout", &coda_timeout, sizeof(int), 0644, NULL, &proc_dointvec}, - {CODA_HARD, "hard", &coda_hard, sizeof(int), 0644, NULL, &proc_dointvec}, - {CODA_VFS, "vfs_stats", NULL, 0, 0644, NULL, &do_reset_coda_vfs_stats}, - {CODA_CACHE_INV, "cache_inv_stats", NULL, 0, 0644, NULL, &do_reset_coda_cache_inv_stats}, - {CODA_FAKE_STATFS, "fake_statfs", &coda_fake_statfs, sizeof(int), 0600, NULL, &proc_dointvec}, - { 0 } + { + .ctl_name = CTL_UNNUMBERED, + .procname = "timeout", + .data = &coda_timeout, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &proc_dointvec + }, + { + .ctl_name = CTL_UNNUMBERED, + .procname = "hard", + .data = &coda_hard, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &proc_dointvec + }, + { + .ctl_name = CTL_UNNUMBERED, + .procname = "vfs_stats", + .data = NULL, + .maxlen = 0, + .mode = 0644, + .proc_handler = &do_reset_coda_vfs_stats + }, + { + .ctl_name = CTL_UNNUMBERED, + .procname = "cache_inv_stats", + .data = NULL, + .maxlen = 0, + .mode = 0644, + .proc_handler = &do_reset_coda_cache_inv_stats + }, + { + .ctl_name = CTL_UNNUMBERED, + .procname = "fake_statfs", + .data = &coda_fake_statfs, + .maxlen = sizeof(int), + .mode = 0600, + .proc_handler = &proc_dointvec + }, + {} }; static ctl_table fs_table[] = { - {FS_CODA, "coda", NULL, 0, 0555, coda_table}, - {0} + { + .ctl_name = CTL_UNNUMBERED, + .procname = "coda", + .mode = 0555, + .child = coda_table + }, + {} }; @@ -233,7 +271,7 @@ void coda_sysctl_init(void) #ifdef CONFIG_SYSCTL if ( !fs_table_header ) - fs_table_header = register_sysctl_table(fs_table, 0); + fs_table_header = register_sysctl_table(fs_table); #endif } diff --git a/fs/dcache.c b/fs/dcache.c index b5f61393291..d68631f18df 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -1739,41 +1739,45 @@ shouldnt_be_hashed: * @rootmnt: vfsmnt to which the root dentry belongs * @buffer: buffer to return value in * @buflen: buffer length - * @fail_deleted: what to return for deleted files * - * Convert a dentry into an ASCII path name. If the entry has been deleted, - * then if @fail_deleted is true, ERR_PTR(-ENOENT) is returned. Otherwise, - * the the string " (deleted)" is appended. Note that this is ambiguous. + * Convert a dentry into an ASCII path name. If the entry has been deleted + * the string " (deleted)" is appended. Note that this is ambiguous. * - * Returns the buffer or an error code. + * Returns the buffer or an error code if the path was too long. + * + * "buflen" should be positive. Caller holds the dcache_lock. */ -static char *__d_path(struct dentry *dentry, struct vfsmount *vfsmnt, - struct dentry *root, struct vfsmount *rootmnt, - char *buffer, int buflen, int fail_deleted) +static char * __d_path( struct dentry *dentry, struct vfsmount *vfsmnt, + struct dentry *root, struct vfsmount *rootmnt, + char *buffer, int buflen) { - int namelen, is_slash; - - if (buflen < 2) - return ERR_PTR(-ENAMETOOLONG); - buffer += --buflen; - *buffer = '\0'; + char * end = buffer+buflen; + char * retval; + int namelen; - spin_lock(&dcache_lock); + *--end = '\0'; + buflen--; if (!IS_ROOT(dentry) && d_unhashed(dentry)) { - if (fail_deleted) { - buffer = ERR_PTR(-ENOENT); - goto out; - } - if (buflen < 10) - goto Elong; buflen -= 10; - buffer -= 10; - memcpy(buffer, " (deleted)", 10); + end -= 10; + if (buflen < 0) + goto Elong; + memcpy(end, " (deleted)", 10); } - while (dentry != root || vfsmnt != rootmnt) { + + if (buflen < 1) + goto Elong; + /* Get '/' right */ + retval = end-1; + *retval = '/'; + + for (;;) { struct dentry * parent; + if (dentry == root && vfsmnt == rootmnt) + break; if (dentry == vfsmnt->mnt_root || IS_ROOT(dentry)) { + /* Global root? */ spin_lock(&vfsmount_lock); if (vfsmnt->mnt_parent == vfsmnt) { spin_unlock(&vfsmount_lock); @@ -1787,60 +1791,33 @@ static char *__d_path(struct dentry *dentry, struct vfsmount *vfsmnt, parent = dentry->d_parent; prefetch(parent); namelen = dentry->d_name.len; - if (buflen <= namelen) - goto Elong; buflen -= namelen + 1; - buffer -= namelen; - memcpy(buffer, dentry->d_name.name, namelen); - *--buffer = '/'; + if (buflen < 0) + goto Elong; + end -= namelen; + memcpy(end, dentry->d_name.name, namelen); + *--end = '/'; + retval = end; dentry = parent; } - /* Get '/' right */ - if (*buffer != '/') - *--buffer = '/'; -out: - spin_unlock(&dcache_lock); - return buffer; + return retval; global_root: - /* - * We went past the (vfsmount, dentry) we were looking for and have - * either hit a root dentry, a lazily unmounted dentry, an - * unconnected dentry, or the file is on a pseudo filesystem. - */ namelen = dentry->d_name.len; - is_slash = (namelen == 1 && *dentry->d_name.name == '/'); - if (is_slash || (dentry->d_sb->s_flags & MS_NOUSER)) { - /* - * Make sure we won't return a pathname starting with '/'. - * - * Historically, we also glue together the root dentry and - * remaining name for pseudo filesystems like pipefs, which - * have the MS_NOUSER flag set. This results in pathnames - * like "pipe:[439336]". - */ - if (*buffer == '/') { - buffer++; - buflen++; - } - if (is_slash) - goto out; - } - if (buflen < namelen) + buflen -= namelen; + if (buflen < 0) goto Elong; - buffer -= namelen; - memcpy(buffer, dentry->d_name.name, namelen); - goto out; - + retval -= namelen-1; /* hit the slash */ + memcpy(retval, dentry->d_name.name, namelen); + return retval; Elong: - buffer = ERR_PTR(-ENAMETOOLONG); - goto out; + return ERR_PTR(-ENAMETOOLONG); } /* write full pathname into buffer and return start of pathname */ -char *d_path(struct dentry *dentry, struct vfsmount *vfsmnt, char *buf, - int buflen) +char * d_path(struct dentry *dentry, struct vfsmount *vfsmnt, + char *buf, int buflen) { char *res; struct vfsmount *rootmnt; @@ -1850,7 +1827,9 @@ char *d_path(struct dentry *dentry, struct vfsmount *vfsmnt, char *buf, rootmnt = mntget(current->fs->rootmnt); root = dget(current->fs->root); read_unlock(¤t->fs->lock); - res = __d_path(dentry, vfsmnt, root, rootmnt, buf, buflen, 0); + spin_lock(&dcache_lock); + res = __d_path(dentry, vfsmnt, root, rootmnt, buf, buflen); + spin_unlock(&dcache_lock); dput(root); mntput(rootmnt); return res; @@ -1876,10 +1855,10 @@ char *d_path(struct dentry *dentry, struct vfsmount *vfsmnt, char *buf, */ asmlinkage long sys_getcwd(char __user *buf, unsigned long size) { - int error, len; + int error; struct vfsmount *pwdmnt, *rootmnt; struct dentry *pwd, *root; - char *page = (char *) __get_free_page(GFP_USER), *cwd; + char *page = (char *) __get_free_page(GFP_USER); if (!page) return -ENOMEM; @@ -1891,18 +1870,29 @@ asmlinkage long sys_getcwd(char __user *buf, unsigned long size) root = dget(current->fs->root); read_unlock(¤t->fs->lock); - cwd = __d_path(pwd, pwdmnt, root, rootmnt, page, PAGE_SIZE, 1); - error = PTR_ERR(cwd); - if (IS_ERR(cwd)) - goto out; + error = -ENOENT; + /* Has the current directory has been unlinked? */ + spin_lock(&dcache_lock); + if (pwd->d_parent == pwd || !d_unhashed(pwd)) { + unsigned long len; + char * cwd; - error = -ERANGE; - len = PAGE_SIZE + page - cwd; - if (len <= size) { - error = len; - if (copy_to_user(buf, cwd, len)) - error = -EFAULT; - } + cwd = __d_path(pwd, pwdmnt, root, rootmnt, page, PAGE_SIZE); + spin_unlock(&dcache_lock); + + error = PTR_ERR(cwd); + if (IS_ERR(cwd)) + goto out; + + error = -ERANGE; + len = PAGE_SIZE + page - cwd; + if (len <= size) { + error = len; + if (copy_to_user(buf, cwd, len)) + error = -EFAULT; + } + } else + spin_unlock(&dcache_lock); out: dput(pwd); diff --git a/fs/dquot.c b/fs/dquot.c index 9eb166f9148..b16f991662c 100644 --- a/fs/dquot.c +++ b/fs/dquot.c @@ -1841,7 +1841,7 @@ static int __init dquot_init(void) printk(KERN_NOTICE "VFS: Disk quotas %s\n", __DQUOT_VERSION__); - register_sysctl_table(sys_table, 0); + register_sysctl_table(sys_table); dquot_cachep = kmem_cache_create("dquot", sizeof(struct dquot), sizeof(unsigned long) * 4, diff --git a/fs/ecryptfs/ecryptfs_kernel.h b/fs/ecryptfs/ecryptfs_kernel.h index b3609b7cdf1..403e3bad145 100644 --- a/fs/ecryptfs/ecryptfs_kernel.h +++ b/fs/ecryptfs/ecryptfs_kernel.h @@ -467,6 +467,7 @@ extern struct kmem_cache *ecryptfs_header_cache_1; extern struct kmem_cache *ecryptfs_header_cache_2; extern struct kmem_cache *ecryptfs_xattr_cache; extern struct kmem_cache *ecryptfs_lower_page_cache; +extern struct kmem_cache *ecryptfs_key_record_cache; int ecryptfs_interpose(struct dentry *hidden_dentry, struct dentry *this_dentry, struct super_block *sb, diff --git a/fs/ecryptfs/keystore.c b/fs/ecryptfs/keystore.c index c209f67e7a2..b550dea8eee 100644 --- a/fs/ecryptfs/keystore.c +++ b/fs/ecryptfs/keystore.c @@ -26,7 +26,6 @@ */ #include <linux/string.h> -#include <linux/sched.h> #include <linux/syscalls.h> #include <linux/pagemap.h> #include <linux/key.h> @@ -1639,6 +1638,8 @@ out: return rc; } +struct kmem_cache *ecryptfs_key_record_cache; + /** * ecryptfs_generate_key_packet_set * @dest: Virtual address from which to write the key record set @@ -1665,50 +1666,55 @@ ecryptfs_generate_key_packet_set(char *dest_base, &ecryptfs_superblock_to_private( ecryptfs_dentry->d_sb)->mount_crypt_stat; size_t written; - struct ecryptfs_key_record key_rec; + struct ecryptfs_key_record *key_rec; int rc = 0; (*len) = 0; + key_rec = kmem_cache_alloc(ecryptfs_key_record_cache, GFP_KERNEL); + if (!key_rec) { + rc = -ENOMEM; + goto out; + } if (mount_crypt_stat->global_auth_tok) { auth_tok = mount_crypt_stat->global_auth_tok; if (auth_tok->token_type == ECRYPTFS_PASSWORD) { rc = write_tag_3_packet((dest_base + (*len)), max, auth_tok, - crypt_stat, &key_rec, + crypt_stat, key_rec, &written); if (rc) { ecryptfs_printk(KERN_WARNING, "Error " "writing tag 3 packet\n"); - goto out; + goto out_free; } (*len) += written; /* Write auth tok signature packet */ rc = write_tag_11_packet( (dest_base + (*len)), (max - (*len)), - key_rec.sig, ECRYPTFS_SIG_SIZE, &written); + key_rec->sig, ECRYPTFS_SIG_SIZE, &written); if (rc) { ecryptfs_printk(KERN_ERR, "Error writing " "auth tok signature packet\n"); - goto out; + goto out_free; } (*len) += written; } else if (auth_tok->token_type == ECRYPTFS_PRIVATE_KEY) { rc = write_tag_1_packet(dest_base + (*len), max, auth_tok, crypt_stat,mount_crypt_stat, - &key_rec, &written); + key_rec, &written); if (rc) { ecryptfs_printk(KERN_WARNING, "Error " "writing tag 1 packet\n"); - goto out; + goto out_free; } (*len) += written; } else { ecryptfs_printk(KERN_WARNING, "Unsupported " "authentication token type\n"); rc = -EINVAL; - goto out; + goto out_free; } } else BUG(); @@ -1718,6 +1724,9 @@ ecryptfs_generate_key_packet_set(char *dest_base, ecryptfs_printk(KERN_ERR, "Error writing boundary byte\n"); rc = -EIO; } + +out_free: + kmem_cache_free(ecryptfs_key_record_cache, key_rec); out: if (rc) (*len) = 0; diff --git a/fs/ecryptfs/main.c b/fs/ecryptfs/main.c index 26fe405a576..80044d196fe 100644 --- a/fs/ecryptfs/main.c +++ b/fs/ecryptfs/main.c @@ -651,6 +651,11 @@ static struct ecryptfs_cache_info { .name = "ecryptfs_lower_page_cache", .size = PAGE_CACHE_SIZE, }, + { + .cache = &ecryptfs_key_record_cache, + .name = "ecryptfs_key_record_cache", + .size = sizeof(struct ecryptfs_key_record), + }, }; static void ecryptfs_free_kmem_caches(void) diff --git a/fs/ecryptfs/messaging.c b/fs/ecryptfs/messaging.c index 47d7e7b611f..3baf253be95 100644 --- a/fs/ecryptfs/messaging.c +++ b/fs/ecryptfs/messaging.c @@ -169,7 +169,8 @@ int ecryptfs_process_helo(unsigned int transport, uid_t uid, pid_t pid) if (!new_id) { rc = -ENOMEM; ecryptfs_printk(KERN_ERR, "Failed to allocate memory; unable " - "to register daemon [%d] for user\n", pid, uid); + "to register daemon [%d] for user [%d]\n", + pid, uid); goto unlock; } if (!ecryptfs_find_daemon_id(uid, &old_id)) { diff --git a/fs/ext3/hash.c b/fs/ext3/hash.c index deeb27b5ba8..c30e149fbd2 100644 --- a/fs/ext3/hash.c +++ b/fs/ext3/hash.c @@ -11,7 +11,6 @@ #include <linux/fs.h> #include <linux/jbd.h> -#include <linux/sched.h> #include <linux/ext3_fs.h> #include <linux/cryptohash.h> diff --git a/fs/ext3/resize.c b/fs/ext3/resize.c index b73cba12f79..ecf89904c11 100644 --- a/fs/ext3/resize.c +++ b/fs/ext3/resize.c @@ -11,7 +11,6 @@ #define EXT3FS_DEBUG -#include <linux/sched.h> #include <linux/smp_lock.h> #include <linux/ext3_jbd.h> diff --git a/fs/ext4/hash.c b/fs/ext4/hash.c index a67966385e0..1555024e3b3 100644 --- a/fs/ext4/hash.c +++ b/fs/ext4/hash.c @@ -11,7 +11,6 @@ #include <linux/fs.h> #include <linux/jbd2.h> -#include <linux/sched.h> #include <linux/ext4_fs.h> #include <linux/cryptohash.h> diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c index 4fe49c3661b..ea99f6c97f5 100644 --- a/fs/ext4/resize.c +++ b/fs/ext4/resize.c @@ -11,7 +11,6 @@ #define EXT4FS_DEBUG -#include <linux/sched.h> #include <linux/smp_lock.h> #include <linux/ext4_jbd2.h> diff --git a/fs/filesystems.c b/fs/filesystems.c index e3fa77c6ed5..7a4f61aa05f 100644 --- a/fs/filesystems.c +++ b/fs/filesystems.c @@ -12,7 +12,6 @@ #include <linux/kmod.h> #include <linux/init.h> #include <linux/module.h> -#include <linux/sched.h> /* for 'current' */ #include <asm/uaccess.h> /* diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c index 113f6c9110c..c53a5d2d059 100644 --- a/fs/gfs2/bmap.c +++ b/fs/gfs2/bmap.c @@ -7,7 +7,6 @@ * of the GNU General Public License version 2. */ -#include <linux/sched.h> #include <linux/slab.h> #include <linux/spinlock.h> #include <linux/completion.h> diff --git a/fs/gfs2/dir.c b/fs/gfs2/dir.c index c93ca8f361b..82a1ac7895a 100644 --- a/fs/gfs2/dir.c +++ b/fs/gfs2/dir.c @@ -53,7 +53,6 @@ * but never before the maximum hash table size has been reached. */ -#include <linux/sched.h> #include <linux/slab.h> #include <linux/spinlock.h> #include <linux/buffer_head.h> diff --git a/fs/gfs2/eaops.c b/fs/gfs2/eaops.c index cd747c00f67..c1f44009853 100644 --- a/fs/gfs2/eaops.c +++ b/fs/gfs2/eaops.c @@ -7,7 +7,6 @@ * of the GNU General Public License version 2. */ -#include <linux/sched.h> #include <linux/slab.h> #include <linux/spinlock.h> #include <linux/completion.h> diff --git a/fs/gfs2/eattr.c b/fs/gfs2/eattr.c index 0c83c7f4dda..5b83ca6acab 100644 --- a/fs/gfs2/eattr.c +++ b/fs/gfs2/eattr.c @@ -7,7 +7,6 @@ * of the GNU General Public License version 2. */ -#include <linux/sched.h> #include <linux/slab.h> #include <linux/spinlock.h> #include <linux/completion.h> diff --git a/fs/gfs2/glops.c b/fs/gfs2/glops.c index c4b0391b7aa..46af5535551 100644 --- a/fs/gfs2/glops.c +++ b/fs/gfs2/glops.c @@ -7,7 +7,6 @@ * of the GNU General Public License version 2. */ -#include <linux/sched.h> #include <linux/slab.h> #include <linux/spinlock.h> #include <linux/completion.h> diff --git a/fs/gfs2/lm.c b/fs/gfs2/lm.c index e30673dd37e..cfcc39b86a5 100644 --- a/fs/gfs2/lm.c +++ b/fs/gfs2/lm.c @@ -7,7 +7,6 @@ * of the GNU General Public License version 2. */ -#include <linux/sched.h> #include <linux/slab.h> #include <linux/spinlock.h> #include <linux/completion.h> diff --git a/fs/gfs2/main.c b/fs/gfs2/main.c index 7c1a9e22a52..6e8a59809ab 100644 --- a/fs/gfs2/main.c +++ b/fs/gfs2/main.c @@ -7,7 +7,6 @@ * of the GNU General Public License version 2. */ -#include <linux/sched.h> #include <linux/slab.h> #include <linux/spinlock.h> #include <linux/completion.h> diff --git a/fs/gfs2/mount.c b/fs/gfs2/mount.c index ef3092e2960..32caecd2030 100644 --- a/fs/gfs2/mount.c +++ b/fs/gfs2/mount.c @@ -7,7 +7,6 @@ * of the GNU General Public License version 2. */ -#include <linux/sched.h> #include <linux/slab.h> #include <linux/spinlock.h> #include <linux/completion.h> diff --git a/fs/gfs2/ondisk.c b/fs/gfs2/ondisk.c index f2495f1e21a..d9ecfd23a49 100644 --- a/fs/gfs2/ondisk.c +++ b/fs/gfs2/ondisk.c @@ -7,7 +7,6 @@ * of the GNU General Public License version 2. */ -#include <linux/sched.h> #include <linux/slab.h> #include <linux/spinlock.h> #include <linux/completion.h> diff --git a/fs/gfs2/ops_dentry.c b/fs/gfs2/ops_dentry.c index 9187eb174b4..c6bac6b6942 100644 --- a/fs/gfs2/ops_dentry.c +++ b/fs/gfs2/ops_dentry.c @@ -7,7 +7,6 @@ * of the GNU General Public License version 2. */ -#include <linux/sched.h> #include <linux/slab.h> #include <linux/spinlock.h> #include <linux/completion.h> diff --git a/fs/gfs2/ops_export.c b/fs/gfs2/ops_export.c index 4855e8cca62..1de05b63d43 100644 --- a/fs/gfs2/ops_export.c +++ b/fs/gfs2/ops_export.c @@ -7,7 +7,6 @@ * of the GNU General Public License version 2. */ -#include <linux/sched.h> #include <linux/slab.h> #include <linux/spinlock.h> #include <linux/completion.h> diff --git a/fs/gfs2/ops_file.c b/fs/gfs2/ops_file.c index c996aa739a0..b50180e2277 100644 --- a/fs/gfs2/ops_file.c +++ b/fs/gfs2/ops_file.c @@ -7,7 +7,6 @@ * of the GNU General Public License version 2. */ -#include <linux/sched.h> #include <linux/slab.h> #include <linux/spinlock.h> #include <linux/completion.h> diff --git a/fs/gfs2/ops_inode.c b/fs/gfs2/ops_inode.c index 60f47bf2e8e..d85f6e05cb9 100644 --- a/fs/gfs2/ops_inode.c +++ b/fs/gfs2/ops_inode.c @@ -7,7 +7,6 @@ * of the GNU General Public License version 2. */ -#include <linux/sched.h> #include <linux/slab.h> #include <linux/spinlock.h> #include <linux/completion.h> diff --git a/fs/gfs2/ops_vm.c b/fs/gfs2/ops_vm.c index 14b380fb060..aa0dbd2aac1 100644 --- a/fs/gfs2/ops_vm.c +++ b/fs/gfs2/ops_vm.c @@ -7,7 +7,6 @@ * of the GNU General Public License version 2. */ -#include <linux/sched.h> #include <linux/slab.h> #include <linux/spinlock.h> #include <linux/completion.h> diff --git a/fs/gfs2/recovery.c b/fs/gfs2/recovery.c index d0c806b85c8..8bc182c7e2e 100644 --- a/fs/gfs2/recovery.c +++ b/fs/gfs2/recovery.c @@ -7,7 +7,6 @@ * of the GNU General Public License version 2. */ -#include <linux/sched.h> #include <linux/slab.h> #include <linux/spinlock.h> #include <linux/completion.h> diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c index ff0846528d5..8d9c08b5c4b 100644 --- a/fs/gfs2/rgrp.c +++ b/fs/gfs2/rgrp.c @@ -7,7 +7,6 @@ * of the GNU General Public License version 2. */ -#include <linux/sched.h> #include <linux/slab.h> #include <linux/spinlock.h> #include <linux/completion.h> diff --git a/fs/gfs2/util.c b/fs/gfs2/util.c index e5707a9f78c..601eaa1b9ed 100644 --- a/fs/gfs2/util.c +++ b/fs/gfs2/util.c @@ -7,7 +7,6 @@ * of the GNU General Public License version 2. */ -#include <linux/sched.h> #include <linux/slab.h> #include <linux/spinlock.h> #include <linux/completion.h> diff --git a/fs/hfsplus/catalog.c b/fs/hfsplus/catalog.c index f2d7c49ce75..ba117c445e7 100644 --- a/fs/hfsplus/catalog.c +++ b/fs/hfsplus/catalog.c @@ -8,7 +8,6 @@ * Handling of catalog records */ -#include <linux/sched.h> #include "hfsplus_fs.h" #include "hfsplus_raw.h" diff --git a/fs/hfsplus/dir.c b/fs/hfsplus/dir.c index 78137007ccc..80b5682a227 100644 --- a/fs/hfsplus/dir.c +++ b/fs/hfsplus/dir.c @@ -10,7 +10,6 @@ #include <linux/errno.h> #include <linux/fs.h> -#include <linux/sched.h> #include <linux/slab.h> #include <linux/random.h> diff --git a/fs/hfsplus/super.c b/fs/hfsplus/super.c index 5a282f64c63..1a97f929344 100644 --- a/fs/hfsplus/super.c +++ b/fs/hfsplus/super.c @@ -11,7 +11,6 @@ #include <linux/init.h> #include <linux/pagemap.h> #include <linux/fs.h> -#include <linux/sched.h> #include <linux/slab.h> #include <linux/vfs.h> #include <linux/nls.h> diff --git a/fs/jffs2/compr_zlib.c b/fs/jffs2/compr_zlib.c index 3681d0728ac..0c1fc6e20b4 100644 --- a/fs/jffs2/compr_zlib.c +++ b/fs/jffs2/compr_zlib.c @@ -16,7 +16,6 @@ #endif #include <linux/kernel.h> -#include <linux/sched.h> #include <linux/slab.h> #include <linux/zlib.h> #include <linux/zutil.h> diff --git a/fs/jffs2/dir.c b/fs/jffs2/dir.c index cdbe2fe14e2..9fa2e27f064 100644 --- a/fs/jffs2/dir.c +++ b/fs/jffs2/dir.c @@ -13,7 +13,6 @@ #include <linux/kernel.h> #include <linux/slab.h> -#include <linux/sched.h> #include <linux/fs.h> #include <linux/crc32.h> #include <linux/jffs2.h> diff --git a/fs/jffs2/summary.c b/fs/jffs2/summary.c index 25265965bdc..30f888414ce 100644 --- a/fs/jffs2/summary.c +++ b/fs/jffs2/summary.c @@ -14,7 +14,6 @@ */ #include <linux/kernel.h> -#include <linux/sched.h> #include <linux/slab.h> #include <linux/mtd/mtd.h> #include <linux/pagemap.h> diff --git a/fs/lockd/clntproc.c b/fs/lockd/clntproc.c index 0b4acc1c5e7..a5c019e1a44 100644 --- a/fs/lockd/clntproc.c +++ b/fs/lockd/clntproc.c @@ -361,7 +361,6 @@ static int __nlm_async_call(struct nlm_rqst *req, u32 proc, struct rpc_message * { struct nlm_host *host = req->a_host; struct rpc_clnt *clnt; - int status = -ENOLCK; dprintk("lockd: call procedure %d on %s (async)\n", (int)proc, host->h_name); @@ -373,12 +372,10 @@ static int __nlm_async_call(struct nlm_rqst *req, u32 proc, struct rpc_message * msg->rpc_proc = &clnt->cl_procinfo[proc]; /* bootstrap and kick off the async RPC call */ - status = rpc_call_async(clnt, msg, RPC_TASK_ASYNC, tk_ops, req); - if (status == 0) - return 0; + return rpc_call_async(clnt, msg, RPC_TASK_ASYNC, tk_ops, req); out_err: - nlm_release_call(req); - return status; + tk_ops->rpc_release(req); + return -ENOLCK; } int nlm_async_call(struct nlm_rqst *req, u32 proc, const struct rpc_call_ops *tk_ops) diff --git a/fs/lockd/host.c b/fs/lockd/host.c index 22d40320897..ad21c0713ef 100644 --- a/fs/lockd/host.c +++ b/fs/lockd/host.c @@ -9,7 +9,6 @@ */ #include <linux/types.h> -#include <linux/sched.h> #include <linux/slab.h> #include <linux/in.h> #include <linux/sunrpc/clnt.h> diff --git a/fs/lockd/svc.c b/fs/lockd/svc.c index 80fcacc1acf..50cb8daba4e 100644 --- a/fs/lockd/svc.c +++ b/fs/lockd/svc.c @@ -512,7 +512,7 @@ module_param(nsm_use_hostnames, bool, 0644); static int __init init_nlm(void) { - nlm_sysctl_table = register_sysctl_table(nlm_sysctl_root, 0); + nlm_sysctl_table = register_sysctl_table(nlm_sysctl_root); return nlm_sysctl_table ? 0 : -ENOMEM; } diff --git a/fs/lockd/svclock.c b/fs/lockd/svclock.c index c7db0a5bccd..cf51f849e76 100644 --- a/fs/lockd/svclock.c +++ b/fs/lockd/svclock.c @@ -593,9 +593,7 @@ callback: /* Call the client */ kref_get(&block->b_count); - if (nlm_async_call(block->b_call, NLMPROC_GRANTED_MSG, - &nlmsvc_grant_ops) < 0) - nlmsvc_release_block(block); + nlm_async_call(block->b_call, NLMPROC_GRANTED_MSG, &nlmsvc_grant_ops); } /* diff --git a/fs/namei.c b/fs/namei.c index 161e2225c75..ee60cc4d345 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -2688,10 +2688,11 @@ int __page_symlink(struct inode *inode, const char *symname, int len, { struct address_space *mapping = inode->i_mapping; struct page *page; - int err = -ENOMEM; + int err; char *kaddr; retry: + err = -ENOMEM; page = find_or_create_page(mapping, 0, gfp_mask); if (!page) goto fail; diff --git a/fs/nfs/client.c b/fs/nfs/client.c index ae9f36e393c..2190e6c2792 100644 --- a/fs/nfs/client.c +++ b/fs/nfs/client.c @@ -394,7 +394,8 @@ static void nfs_init_timeout_values(struct rpc_timeout *to, int proto, static int nfs_create_rpc_client(struct nfs_client *clp, int proto, unsigned int timeo, unsigned int retrans, - rpc_authflavor_t flavor) + rpc_authflavor_t flavor, + int flags) { struct rpc_timeout timeparms; struct rpc_clnt *clnt = NULL; @@ -407,6 +408,7 @@ static int nfs_create_rpc_client(struct nfs_client *clp, int proto, .program = &nfs_program, .version = clp->rpc_ops->version, .authflavor = flavor, + .flags = flags, }; if (!IS_ERR(clp->cl_rpcclient)) @@ -548,7 +550,7 @@ static int nfs_init_client(struct nfs_client *clp, const struct nfs_mount_data * * - RFC 2623, sec 2.3.2 */ error = nfs_create_rpc_client(clp, proto, data->timeo, data->retrans, - RPC_AUTH_UNIX); + RPC_AUTH_UNIX, 0); if (error < 0) goto error; nfs_mark_client_ready(clp, NFS_CS_READY); @@ -868,7 +870,8 @@ static int nfs4_init_client(struct nfs_client *clp, /* Check NFS protocol revision and initialize RPC op vector */ clp->rpc_ops = &nfs_v4_clientops; - error = nfs_create_rpc_client(clp, proto, timeo, retrans, authflavour); + error = nfs_create_rpc_client(clp, proto, timeo, retrans, authflavour, + RPC_CLNT_CREATE_DISCRTRY); if (error < 0) goto error; memcpy(clp->cl_ipaddr, ip_addr, sizeof(clp->cl_ipaddr)); @@ -1030,7 +1033,7 @@ error: * Create an NFS4 referral server record */ struct nfs_server *nfs4_create_referral_server(struct nfs_clone_mount *data, - struct nfs_fh *fh) + struct nfs_fh *mntfh) { struct nfs_client *parent_client; struct nfs_server *server, *parent_server; @@ -1069,8 +1072,13 @@ struct nfs_server *nfs4_create_referral_server(struct nfs_clone_mount *data, BUG_ON(!server->nfs_client->rpc_ops); BUG_ON(!server->nfs_client->rpc_ops->file_inode_ops); + /* Probe the root fh to retrieve its FSID and filehandle */ + error = nfs4_path_walk(server, mntfh, data->mnt_path); + if (error < 0) + goto error; + /* probe the filesystem info for this server filesystem */ - error = nfs_probe_fsinfo(server, fh, &fattr); + error = nfs_probe_fsinfo(server, mntfh, &fattr); if (error < 0) goto error; diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index f03a770bacb..92d8ec859e2 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -637,7 +637,7 @@ int nfs_fsync_dir(struct file *filp, struct dentry *dentry, int datasync) * In the case it has, we assume that the dentries are untrustworthy * and may need to be looked up again. */ -static inline int nfs_check_verifier(struct inode *dir, struct dentry *dentry) +static int nfs_check_verifier(struct inode *dir, struct dentry *dentry) { if (IS_ROOT(dentry)) return 1; @@ -652,6 +652,12 @@ static inline void nfs_set_verifier(struct dentry * dentry, unsigned long verf) dentry->d_fsdata = (void *)verf; } +static void nfs_refresh_verifier(struct dentry * dentry, unsigned long verf) +{ + if (time_after(verf, (unsigned long)dentry->d_fsdata)) + nfs_set_verifier(dentry, verf); +} + /* * Whenever an NFS operation succeeds, we know that the dentry * is valid, so we update the revalidation timestamp. @@ -785,7 +791,7 @@ static int nfs_lookup_revalidate(struct dentry * dentry, struct nameidata *nd) goto out_bad; nfs_renew_times(dentry); - nfs_set_verifier(dentry, verifier); + nfs_refresh_verifier(dentry, verifier); out_valid: unlock_kernel(); dput(parent); @@ -1085,7 +1091,7 @@ static int nfs_open_revalidate(struct dentry *dentry, struct nameidata *nd) verifier = nfs_save_change_attribute(dir); ret = nfs4_open_revalidate(dir, dentry, openflags, nd); if (!ret) - nfs_set_verifier(dentry, verifier); + nfs_refresh_verifier(dentry, verifier); unlock_kernel(); out: dput(parent); @@ -1123,8 +1129,21 @@ static struct dentry *nfs_readdir_lookup(nfs_readdir_descriptor_t *desc) } name.hash = full_name_hash(name.name, name.len); dentry = d_lookup(parent, &name); - if (dentry != NULL) - return dentry; + if (dentry != NULL) { + /* Is this a positive dentry that matches the readdir info? */ + if (dentry->d_inode != NULL && + (NFS_FILEID(dentry->d_inode) == entry->ino || + d_mountpoint(dentry))) { + if (!desc->plus || entry->fh->size == 0) + return dentry; + if (nfs_compare_fh(NFS_FH(dentry->d_inode), + entry->fh) == 0) + goto out_renew; + } + /* No, so d_drop to allow one to be created */ + d_drop(dentry); + dput(dentry); + } if (!desc->plus || !(entry->fattr->valid & NFS_ATTR_FATTR)) return NULL; /* Note: caller is already holding the dir->i_mutex! */ @@ -1149,6 +1168,10 @@ static struct dentry *nfs_readdir_lookup(nfs_readdir_descriptor_t *desc) nfs_renew_times(dentry); nfs_set_verifier(dentry, nfs_save_change_attribute(dir)); return dentry; +out_renew: + nfs_renew_times(dentry); + nfs_refresh_verifier(dentry, nfs_save_change_attribute(dir)); + return dentry; } /* @@ -1443,6 +1466,8 @@ static int nfs_unlink(struct inode *dir, struct dentry *dentry) if (atomic_read(&dentry->d_count) > 1) { spin_unlock(&dentry->d_lock); spin_unlock(&dcache_lock); + /* Start asynchronous writeout of the inode */ + write_inode_now(dentry->d_inode, 0); error = nfs_sillyrename(dir, dentry); unlock_kernel(); return error; @@ -1684,7 +1709,7 @@ out: if (!error) { d_move(old_dentry, new_dentry); nfs_renew_times(new_dentry); - nfs_set_verifier(new_dentry, nfs_save_change_attribute(new_dir)); + nfs_refresh_verifier(new_dentry, nfs_save_change_attribute(new_dir)); } /* new dentry created? */ diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index bd21d7fde65..b1c98ea39b7 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c @@ -309,7 +309,8 @@ static ssize_t nfs_direct_read_schedule(struct nfs_direct_req *dreq, unsigned lo rpc_execute(&data->task); - dfprintk(VFS, "NFS: %5u initiated direct read call (req %s/%Ld, %zu bytes @ offset %Lu)\n", + dprintk("NFS: %5u initiated direct read call " + "(req %s/%Ld, %zu bytes @ offset %Lu)\n", data->task.tk_pid, inode->i_sb->s_id, (long long)NFS_FILEID(inode), @@ -639,7 +640,8 @@ static ssize_t nfs_direct_write_schedule(struct nfs_direct_req *dreq, unsigned l rpc_execute(&data->task); - dfprintk(VFS, "NFS: %5u initiated direct write call (req %s/%Ld, %zu bytes @ offset %Lu)\n", + dprintk("NFS: %5u initiated direct write call " + "(req %s/%Ld, %zu bytes @ offset %Lu)\n", data->task.tk_pid, inode->i_sb->s_id, (long long)NFS_FILEID(inode), @@ -797,7 +799,7 @@ ssize_t nfs_file_direct_write(struct kiocb *iocb, const struct iovec *iov, const char __user *buf = iov[0].iov_base; size_t count = iov[0].iov_len; - dfprintk(VFS, "nfs: direct write(%s/%s, %lu@%Ld)\n", + dprintk("nfs: direct write(%s/%s, %lu@%Ld)\n", file->f_path.dentry->d_parent->d_name.name, file->f_path.dentry->d_name.name, (unsigned long) count, (long long) pos); diff --git a/fs/nfs/getroot.c b/fs/nfs/getroot.c index 8391bd7a83c..6ef268f7c30 100644 --- a/fs/nfs/getroot.c +++ b/fs/nfs/getroot.c @@ -135,17 +135,15 @@ int nfs4_path_walk(struct nfs_server *server, struct nfs_fh lastfh; struct qstr name; int ret; - //int referral_count = 0; dprintk("--> nfs4_path_walk(,,%s)\n", path); fsinfo.fattr = &fattr; nfs_fattr_init(&fattr); - if (*path++ != '/') { - dprintk("nfs4_get_root: Path does not begin with a slash\n"); - return -EINVAL; - } + /* Eat leading slashes */ + while (*path == '/') + path++; /* Start by getting the root filehandle from the server */ ret = server->nfs_client->rpc_ops->getroot(server, mntfh, &fsinfo); @@ -160,6 +158,7 @@ int nfs4_path_walk(struct nfs_server *server, return -ENOTDIR; } + /* FIXME: It is quite valid for the server to return a referral here */ if (fattr.valid & NFS_ATTR_FATTR_V4_REFERRAL) { printk(KERN_ERR "nfs4_get_root:" " getroot obtained referral\n"); @@ -187,6 +186,7 @@ eat_dot_dir: goto eat_dot_dir; } + /* FIXME: Why shouldn't the user be able to use ".." in the path? */ if (path[0] == '.' && path[1] == '.' && (path[2] == '/' || !path[2]) ) { printk(KERN_ERR "nfs4_get_root:" @@ -212,6 +212,7 @@ eat_dot_dir: return -ENOTDIR; } + /* FIXME: Referrals are quite valid here too */ if (fattr.valid & NFS_ATTR_FATTR_V4_REFERRAL) { printk(KERN_ERR "nfs4_get_root:" " lookupfh obtained referral\n"); diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index d8349828283..af53c02f473 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -65,13 +65,18 @@ nfs_fattr_to_ino_t(struct nfs_fattr *fattr) int nfs_write_inode(struct inode *inode, int sync) { - int flags = sync ? FLUSH_SYNC : 0; int ret; - ret = nfs_commit_inode(inode, flags); - if (ret < 0) - return ret; - return 0; + if (sync) { + ret = filemap_fdatawait(inode->i_mapping); + if (ret == 0) + ret = nfs_commit_inode(inode, FLUSH_SYNC); + } else + ret = nfs_commit_inode(inode, 0); + if (ret >= 0) + return 0; + __mark_inode_dirty(inode, I_DIRTY_DATASYNC); + return ret; } void nfs_clear_inode(struct inode *inode) @@ -235,6 +240,7 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr) if (inode->i_state & I_NEW) { struct nfs_inode *nfsi = NFS_I(inode); + unsigned long now = jiffies; /* We set i_ino for the few things that still rely on it, * such as stat(2) */ @@ -271,7 +277,8 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr) init_special_inode(inode, inode->i_mode, fattr->rdev); nfsi->read_cache_jiffies = fattr->time_start; - nfsi->last_updated = jiffies; + nfsi->last_updated = now; + nfsi->cache_change_attribute = now; inode->i_atime = fattr->atime; inode->i_mtime = fattr->mtime; inode->i_ctime = fattr->ctime; @@ -290,7 +297,7 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr) inode->i_blocks = fattr->du.nfs2.blocks; } nfsi->attrtimeo = NFS_MINATTRTIMEO(inode); - nfsi->attrtimeo_timestamp = jiffies; + nfsi->attrtimeo_timestamp = now; memset(nfsi->cookieverf, 0, sizeof(nfsi->cookieverf)); nfsi->access_cache = RB_ROOT; @@ -783,20 +790,21 @@ void nfs_end_data_update(struct inode *inode) static void nfs_wcc_update_inode(struct inode *inode, struct nfs_fattr *fattr) { struct nfs_inode *nfsi = NFS_I(inode); + unsigned long now = jiffies; /* If we have atomic WCC data, we may update some attributes */ if ((fattr->valid & NFS_ATTR_WCC) != 0) { if (timespec_equal(&inode->i_ctime, &fattr->pre_ctime)) { memcpy(&inode->i_ctime, &fattr->ctime, sizeof(inode->i_ctime)); - nfsi->cache_change_attribute = jiffies; + nfsi->cache_change_attribute = now; } if (timespec_equal(&inode->i_mtime, &fattr->pre_mtime)) { memcpy(&inode->i_mtime, &fattr->mtime, sizeof(inode->i_mtime)); - nfsi->cache_change_attribute = jiffies; + nfsi->cache_change_attribute = now; } if (inode->i_size == fattr->pre_size && nfsi->npages == 0) { inode->i_size = fattr->size; - nfsi->cache_change_attribute = jiffies; + nfsi->cache_change_attribute = now; } } } @@ -934,6 +942,7 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) struct nfs_inode *nfsi = NFS_I(inode); loff_t cur_isize, new_isize; unsigned int invalid = 0; + unsigned long now = jiffies; int data_stable; dfprintk(VFS, "NFS: %s(%s/%ld ct=%d info=0x%x)\n", @@ -959,7 +968,11 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) * Update the read time so we don't revalidate too often. */ nfsi->read_cache_jiffies = fattr->time_start; - nfsi->last_updated = jiffies; + nfsi->last_updated = now; + + /* Fix a wraparound issue with nfsi->cache_change_attribute */ + if (time_before(now, nfsi->cache_change_attribute)) + nfsi->cache_change_attribute = now - 600*HZ; /* Are we racing with known updates of the metadata on the server? */ data_stable = nfs_verify_change_attribute(inode, fattr->time_start); @@ -985,7 +998,7 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) inode->i_size = new_isize; invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA; } - nfsi->cache_change_attribute = jiffies; + nfsi->cache_change_attribute = now; dprintk("NFS: isize change on server for file %s/%ld\n", inode->i_sb->s_id, inode->i_ino); } @@ -996,14 +1009,14 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) dprintk("NFS: mtime change on server for file %s/%ld\n", inode->i_sb->s_id, inode->i_ino); invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA; - nfsi->cache_change_attribute = jiffies; + nfsi->cache_change_attribute = now; } /* If ctime has changed we should definitely clear access+acl caches */ if (!timespec_equal(&inode->i_ctime, &fattr->ctime)) { invalid |= NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL; memcpy(&inode->i_ctime, &fattr->ctime, sizeof(inode->i_ctime)); - nfsi->cache_change_attribute = jiffies; + nfsi->cache_change_attribute = now; } memcpy(&inode->i_atime, &fattr->atime, sizeof(inode->i_atime)); @@ -1032,18 +1045,18 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) inode->i_sb->s_id, inode->i_ino); nfsi->change_attr = fattr->change_attr; invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL; - nfsi->cache_change_attribute = jiffies; + nfsi->cache_change_attribute = now; } /* Update attrtimeo value if we're out of the unstable period */ if (invalid & NFS_INO_INVALID_ATTR) { nfs_inc_stats(inode, NFSIOS_ATTRINVALIDATE); nfsi->attrtimeo = NFS_MINATTRTIMEO(inode); - nfsi->attrtimeo_timestamp = jiffies; - } else if (time_after(jiffies, nfsi->attrtimeo_timestamp+nfsi->attrtimeo)) { + nfsi->attrtimeo_timestamp = now; + } else if (time_after(now, nfsi->attrtimeo_timestamp+nfsi->attrtimeo)) { if ((nfsi->attrtimeo <<= 1) > NFS_MAXATTRTIMEO(inode)) nfsi->attrtimeo = NFS_MAXATTRTIMEO(inode); - nfsi->attrtimeo_timestamp = jiffies; + nfsi->attrtimeo_timestamp = now; } /* Don't invalidate the data if we were to blame */ if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) @@ -1122,7 +1135,6 @@ struct inode *nfs_alloc_inode(struct super_block *sb) return NULL; nfsi->flags = 0UL; nfsi->cache_validity = 0UL; - nfsi->cache_change_attribute = jiffies; #ifdef CONFIG_NFS_V3_ACL nfsi->acl_access = ERR_PTR(-EAGAIN); nfsi->acl_default = ERR_PTR(-EAGAIN); diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index a28f6ce2e13..6610f2b0207 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -107,10 +107,6 @@ extern __be32 *nfs4_decode_dirent(__be32 *p, struct nfs_entry *entry, int plus); /* nfs4proc.c */ #ifdef CONFIG_NFS_V4 extern struct rpc_procinfo nfs4_procedures[]; - -extern int nfs4_proc_fs_locations(struct inode *dir, struct dentry *dentry, - struct nfs4_fs_locations *fs_locations, - struct page *page); #endif /* dir.c */ diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c index acd8fe9762d..7d0371e2bad 100644 --- a/fs/nfs/nfs3proc.c +++ b/fs/nfs/nfs3proc.c @@ -253,29 +253,6 @@ static int nfs3_proc_readlink(struct inode *inode, struct page *page, return status; } -static int nfs3_proc_read(struct nfs_read_data *rdata) -{ - int flags = rdata->flags; - struct inode * inode = rdata->inode; - struct nfs_fattr * fattr = rdata->res.fattr; - struct rpc_message msg = { - .rpc_proc = &nfs3_procedures[NFS3PROC_READ], - .rpc_argp = &rdata->args, - .rpc_resp = &rdata->res, - .rpc_cred = rdata->cred, - }; - int status; - - dprintk("NFS call read %d @ %Ld\n", rdata->args.count, - (long long) rdata->args.offset); - nfs_fattr_init(fattr); - status = rpc_call_sync(NFS_CLIENT(inode), &msg, flags); - if (status >= 0) - nfs_refresh_inode(inode, fattr); - dprintk("NFS reply read: %d\n", status); - return status; -} - /* * Create a regular file. * For now, we don't implement O_EXCL. @@ -855,7 +832,6 @@ const struct nfs_rpc_ops nfs_v3_clientops = { .lookup = nfs3_proc_lookup, .access = nfs3_proc_access, .readlink = nfs3_proc_readlink, - .read = nfs3_proc_read, .create = nfs3_proc_create, .remove = nfs3_proc_remove, .unlink_setup = nfs3_proc_unlink_setup, diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h index e2341766c4f..cf3a17eb5c0 100644 --- a/fs/nfs/nfs4_fs.h +++ b/fs/nfs/nfs4_fs.h @@ -169,7 +169,7 @@ extern int nfs4_do_close(struct inode *inode, struct nfs4_state *state); extern struct dentry *nfs4_atomic_open(struct inode *, struct dentry *, struct nameidata *); extern int nfs4_open_revalidate(struct inode *, struct dentry *, int, struct nameidata *); extern int nfs4_server_capabilities(struct nfs_server *server, struct nfs_fh *fhandle); -extern int nfs4_proc_fs_locations(struct inode *dir, struct dentry *dentry, +extern int nfs4_proc_fs_locations(struct inode *dir, struct qstr *name, struct nfs4_fs_locations *fs_locations, struct page *page); extern struct nfs4_state_recovery_ops nfs4_reboot_recovery_ops; diff --git a/fs/nfs/nfs4namespace.c b/fs/nfs/nfs4namespace.c index b872779d7cd..dd5fef20c70 100644 --- a/fs/nfs/nfs4namespace.c +++ b/fs/nfs/nfs4namespace.c @@ -16,6 +16,7 @@ #include <linux/vfs.h> #include <linux/inet.h> #include "internal.h" +#include "nfs4_fs.h" #define NFSDBG_FACILITY NFSDBG_VFS @@ -130,7 +131,6 @@ static struct vfsmount *nfs_follow_referral(const struct vfsmount *mnt_parent, .authflavor = NFS_SB(mnt_parent->mnt_sb)->client->cl_auth->au_flavor, }; char *page = NULL, *page2 = NULL; - char *devname; int loc, s, error; if (locations == NULL || locations->nlocations <= 0) @@ -154,12 +154,6 @@ static struct vfsmount *nfs_follow_referral(const struct vfsmount *mnt_parent, goto out; } - devname = nfs_devname(mnt_parent, dentry, page, PAGE_SIZE); - if (IS_ERR(devname)) { - mnt = (struct vfsmount *)devname; - goto out; - } - loc = 0; while (loc < locations->nlocations && IS_ERR(mnt)) { const struct nfs4_fs_location *location = &locations->locations[loc]; @@ -194,7 +188,11 @@ static struct vfsmount *nfs_follow_referral(const struct vfsmount *mnt_parent, addr.sin_port = htons(NFS_PORT); mountdata.addr = &addr; - mnt = vfs_kern_mount(&nfs4_referral_fs_type, 0, devname, &mountdata); + snprintf(page, PAGE_SIZE, "%s:%s", + mountdata.hostname, + mountdata.mnt_path); + + mnt = vfs_kern_mount(&nfs4_referral_fs_type, 0, page, &mountdata); if (!IS_ERR(mnt)) { break; } @@ -242,7 +240,7 @@ struct vfsmount *nfs_do_refmount(const struct vfsmount *mnt_parent, struct dentr dprintk("%s: getting locations for %s/%s\n", __FUNCTION__, parent->d_name.name, dentry->d_name.name); - err = nfs4_proc_fs_locations(parent->d_inode, dentry, fs_locations, page); + err = nfs4_proc_fs_locations(parent->d_inode, &dentry->d_name, fs_locations, page); dput(parent); if (err != 0 || fs_locations->nlocations <= 0 || diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 1daee65b517..f52cf5c33c6 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -1140,7 +1140,6 @@ static void nfs4_close_done(struct rpc_task *task, void *data) break; case -NFS4ERR_STALE_STATEID: case -NFS4ERR_EXPIRED: - nfs4_schedule_state_recovery(server->nfs_client); break; default: if (nfs4_async_handle_error(task, server) == -EAGAIN) { @@ -1424,7 +1423,6 @@ static int nfs4_get_referral(struct inode *dir, struct qstr *name, struct nfs_fa int status = -ENOMEM; struct page *page = NULL; struct nfs4_fs_locations *locations = NULL; - struct dentry dentry = {}; page = alloc_page(GFP_KERNEL); if (page == NULL) @@ -1433,9 +1431,7 @@ static int nfs4_get_referral(struct inode *dir, struct qstr *name, struct nfs_fa if (locations == NULL) goto out; - dentry.d_name.name = name->name; - dentry.d_name.len = name->len; - status = nfs4_proc_fs_locations(dir, &dentry, locations, page); + status = nfs4_proc_fs_locations(dir, name, locations, page); if (status != 0) goto out; /* Make sure server returned a different fsid for the referral */ @@ -1737,44 +1733,6 @@ static int nfs4_proc_readlink(struct inode *inode, struct page *page, return err; } -static int _nfs4_proc_read(struct nfs_read_data *rdata) -{ - int flags = rdata->flags; - struct inode *inode = rdata->inode; - struct nfs_fattr *fattr = rdata->res.fattr; - struct nfs_server *server = NFS_SERVER(inode); - struct rpc_message msg = { - .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_READ], - .rpc_argp = &rdata->args, - .rpc_resp = &rdata->res, - .rpc_cred = rdata->cred, - }; - unsigned long timestamp = jiffies; - int status; - - dprintk("NFS call read %d @ %Ld\n", rdata->args.count, - (long long) rdata->args.offset); - - nfs_fattr_init(fattr); - status = rpc_call_sync(server->client, &msg, flags); - if (!status) - renew_lease(server, timestamp); - dprintk("NFS reply read: %d\n", status); - return status; -} - -static int nfs4_proc_read(struct nfs_read_data *rdata) -{ - struct nfs4_exception exception = { }; - int err; - do { - err = nfs4_handle_exception(NFS_SERVER(rdata->inode), - _nfs4_proc_read(rdata), - &exception); - } while (exception.retry); - return err; -} - /* * Got race? * We will need to arrange for the VFS layer to provide an atomic open. @@ -2753,11 +2711,15 @@ static int nfs4_wait_clnt_recover(struct rpc_clnt *clnt, struct nfs_client *clp) might_sleep(); + rwsem_acquire(&clp->cl_sem.dep_map, 0, 0, _RET_IP_); + rpc_clnt_sigmask(clnt, &oldset); res = wait_on_bit(&clp->cl_state, NFS4CLNT_STATE_RECOVER, nfs4_wait_bit_interruptible, TASK_INTERRUPTIBLE); rpc_clnt_sigunmask(clnt, &oldset); + + rwsem_release(&clp->cl_sem.dep_map, 1, _RET_IP_); return res; } @@ -2996,7 +2958,6 @@ int nfs4_proc_delegreturn(struct inode *inode, struct rpc_cred *cred, const nfs4 switch (err) { case -NFS4ERR_STALE_STATEID: case -NFS4ERR_EXPIRED: - nfs4_schedule_state_recovery(server->nfs_client); case 0: return 0; } @@ -3150,12 +3111,10 @@ static void nfs4_locku_done(struct rpc_task *task, void *data) break; case -NFS4ERR_STALE_STATEID: case -NFS4ERR_EXPIRED: - nfs4_schedule_state_recovery(calldata->server->nfs_client); break; default: - if (nfs4_async_handle_error(task, calldata->server) == -EAGAIN) { + if (nfs4_async_handle_error(task, calldata->server) == -EAGAIN) rpc_restart_call(task); - } } } @@ -3585,7 +3544,7 @@ ssize_t nfs4_listxattr(struct dentry *dentry, char *buf, size_t buflen) return len; } -int nfs4_proc_fs_locations(struct inode *dir, struct dentry *dentry, +int nfs4_proc_fs_locations(struct inode *dir, struct qstr *name, struct nfs4_fs_locations *fs_locations, struct page *page) { struct nfs_server *server = NFS_SERVER(dir); @@ -3595,7 +3554,7 @@ int nfs4_proc_fs_locations(struct inode *dir, struct dentry *dentry, }; struct nfs4_fs_locations_arg args = { .dir_fh = NFS_FH(dir), - .name = &dentry->d_name, + .name = name, .page = page, .bitmask = bitmask, }; @@ -3607,7 +3566,7 @@ int nfs4_proc_fs_locations(struct inode *dir, struct dentry *dentry, int status; dprintk("%s: start\n", __FUNCTION__); - fs_locations->fattr.valid = 0; + nfs_fattr_init(&fs_locations->fattr); fs_locations->server = server; fs_locations->nlocations = 0; status = rpc_call_sync(server->client, &msg, 0); @@ -3646,7 +3605,6 @@ const struct nfs_rpc_ops nfs_v4_clientops = { .lookup = nfs4_proc_lookup, .access = nfs4_proc_access, .readlink = nfs4_proc_readlink, - .read = nfs4_proc_read, .create = nfs4_proc_create, .remove = nfs4_proc_remove, .unlink_setup = nfs4_proc_unlink_setup, diff --git a/fs/nfs/nfs4renewd.c b/fs/nfs/nfs4renewd.c index 823298561c0..f5f4430fb2a 100644 --- a/fs/nfs/nfs4renewd.c +++ b/fs/nfs/nfs4renewd.c @@ -43,7 +43,6 @@ * child task framework of the RPC layer? */ -#include <linux/sched.h> #include <linux/smp_lock.h> #include <linux/mm.h> #include <linux/pagemap.h> diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index 0cf3fa312a3..f02d522fd78 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -387,8 +387,10 @@ static int nfs4_stat_to_errno(int); decode_putfh_maxsz + \ op_decode_hdr_maxsz + 12) #define NFS4_enc_server_caps_sz (compound_encode_hdr_maxsz + \ + encode_putfh_maxsz + \ encode_getattr_maxsz) #define NFS4_dec_server_caps_sz (compound_decode_hdr_maxsz + \ + decode_putfh_maxsz + \ decode_getattr_maxsz) #define NFS4_enc_delegreturn_sz (compound_encode_hdr_maxsz + \ encode_putfh_maxsz + \ diff --git a/fs/nfs/proc.c b/fs/nfs/proc.c index 560536ad74a..1dcf56de948 100644 --- a/fs/nfs/proc.c +++ b/fs/nfs/proc.c @@ -186,35 +186,6 @@ static int nfs_proc_readlink(struct inode *inode, struct page *page, return status; } -static int nfs_proc_read(struct nfs_read_data *rdata) -{ - int flags = rdata->flags; - struct inode * inode = rdata->inode; - struct nfs_fattr * fattr = rdata->res.fattr; - struct rpc_message msg = { - .rpc_proc = &nfs_procedures[NFSPROC_READ], - .rpc_argp = &rdata->args, - .rpc_resp = &rdata->res, - .rpc_cred = rdata->cred, - }; - int status; - - dprintk("NFS call read %d @ %Ld\n", rdata->args.count, - (long long) rdata->args.offset); - nfs_fattr_init(fattr); - status = rpc_call_sync(NFS_CLIENT(inode), &msg, flags); - if (status >= 0) { - nfs_refresh_inode(inode, fattr); - /* Emulate the eof flag, which isn't normally needed in NFSv2 - * as it is guaranteed to always return the file attributes - */ - if (rdata->args.offset + rdata->args.count >= fattr->size) - rdata->res.eof = 1; - } - dprintk("NFS reply read: %d\n", status); - return status; -} - static int nfs_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr, int flags, struct nameidata *nd) @@ -666,7 +637,6 @@ const struct nfs_rpc_ops nfs_v2_clientops = { .lookup = nfs_proc_lookup, .access = NULL, /* access */ .readlink = nfs_proc_readlink, - .read = nfs_proc_read, .create = nfs_proc_create, .remove = nfs_proc_remove, .unlink_setup = nfs_proc_unlink_setup, diff --git a/fs/nfs/read.c b/fs/nfs/read.c index a9c26521a9e..6ab4d5a9edf 100644 --- a/fs/nfs/read.c +++ b/fs/nfs/read.c @@ -5,14 +5,6 @@ * * Partial copy of Linus' read cache modifications to fs/nfs/file.c * modified for async RPC by okir@monad.swb.de - * - * We do an ugly hack here in order to return proper error codes to the - * user program when a read request failed: since generic_file_read - * only checks the return value of inode->i_op->readpage() which is always 0 - * for async RPC, we set the error bit of the page to 1 when an error occurs, - * and make nfs_readpage transmit requests synchronously when encountering this. - * This is only a small problem, though, since we now retry all operations - * within the RPC code when root squashing is suspected. */ #include <linux/time.h> @@ -122,93 +114,6 @@ static void nfs_readpage_truncate_uninitialised_page(struct nfs_read_data *data) } } -/* - * Read a page synchronously. - */ -static int nfs_readpage_sync(struct nfs_open_context *ctx, struct inode *inode, - struct page *page) -{ - unsigned int rsize = NFS_SERVER(inode)->rsize; - unsigned int count = PAGE_CACHE_SIZE; - int result = -ENOMEM; - struct nfs_read_data *rdata; - - rdata = nfs_readdata_alloc(count); - if (!rdata) - goto out_unlock; - - memset(rdata, 0, sizeof(*rdata)); - rdata->flags = (IS_SWAPFILE(inode)? NFS_RPC_SWAPFLAGS : 0); - rdata->cred = ctx->cred; - rdata->inode = inode; - INIT_LIST_HEAD(&rdata->pages); - rdata->args.fh = NFS_FH(inode); - rdata->args.context = ctx; - rdata->args.pages = &page; - rdata->args.pgbase = 0UL; - rdata->args.count = rsize; - rdata->res.fattr = &rdata->fattr; - - dprintk("NFS: nfs_readpage_sync(%p)\n", page); - - /* - * This works now because the socket layer never tries to DMA - * into this buffer directly. - */ - do { - if (count < rsize) - rdata->args.count = count; - rdata->res.count = rdata->args.count; - rdata->args.offset = page_offset(page) + rdata->args.pgbase; - - dprintk("NFS: nfs_proc_read(%s, (%s/%Ld), %Lu, %u)\n", - NFS_SERVER(inode)->nfs_client->cl_hostname, - inode->i_sb->s_id, - (long long)NFS_FILEID(inode), - (unsigned long long)rdata->args.pgbase, - rdata->args.count); - - lock_kernel(); - result = NFS_PROTO(inode)->read(rdata); - unlock_kernel(); - - /* - * Even if we had a partial success we can't mark the page - * cache valid. - */ - if (result < 0) { - if (result == -EISDIR) - result = -EINVAL; - goto io_error; - } - count -= result; - rdata->args.pgbase += result; - nfs_add_stats(inode, NFSIOS_SERVERREADBYTES, result); - - /* Note: result == 0 should only happen if we're caching - * a write that extends the file and punches a hole. - */ - if (rdata->res.eof != 0 || result == 0) - break; - } while (count); - spin_lock(&inode->i_lock); - NFS_I(inode)->cache_validity |= NFS_INO_INVALID_ATIME; - spin_unlock(&inode->i_lock); - - if (rdata->res.eof || rdata->res.count == rdata->args.count) { - SetPageUptodate(page); - if (rdata->res.eof && count != 0) - memclear_highpage_flush(page, rdata->args.pgbase, count); - } - result = 0; - -io_error: - nfs_readdata_free(rdata); -out_unlock: - unlock_page(page); - return result; -} - static int nfs_readpage_async(struct nfs_open_context *ctx, struct inode *inode, struct page *page) { @@ -278,7 +183,7 @@ static void nfs_read_rpcsetup(struct nfs_page *req, struct nfs_read_data *data, data->task.tk_cookie = (unsigned long)inode; - dprintk("NFS: %4d initiated read call (req %s/%Ld, %u bytes @ offset %Lu)\n", + dprintk("NFS: %5u initiated read call (req %s/%Ld, %u bytes @ offset %Lu)\n", data->task.tk_pid, inode->i_sb->s_id, (long long)NFS_FILEID(inode), @@ -452,7 +357,7 @@ int nfs_readpage_result(struct rpc_task *task, struct nfs_read_data *data) { int status; - dprintk("%s: %4d, (status %d)\n", __FUNCTION__, task->tk_pid, + dprintk("NFS: %s: %5u, (status %d)\n", __FUNCTION__, task->tk_pid, task->tk_status); status = NFS_PROTO(data->inode)->read_done(task, data); @@ -621,15 +526,9 @@ int nfs_readpage(struct file *file, struct page *page) } else ctx = get_nfs_open_context((struct nfs_open_context *) file->private_data); - if (!IS_SYNC(inode)) { - error = nfs_readpage_async(ctx, inode, page); - goto out; - } - error = nfs_readpage_sync(ctx, inode, page); - if (error < 0 && IS_SWAPFILE(inode)) - printk("Aiee.. nfs swap-in of page failed!\n"); -out: + error = nfs_readpage_async(ctx, inode, page); + put_nfs_open_context(ctx); return error; diff --git a/fs/nfs/super.c b/fs/nfs/super.c index baa28860ad2..bb516a2cfba 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -1045,7 +1045,7 @@ static int nfs4_referral_get_sb(struct file_system_type *fs_type, int flags, nfs4_fill_super(s); } - mntroot = nfs4_get_root(s, data->fh); + mntroot = nfs4_get_root(s, &mntfh); if (IS_ERR(mntroot)) { error = PTR_ERR(mntroot); goto error_splat_super; diff --git a/fs/nfs/sysctl.c b/fs/nfs/sysctl.c index 3ea50ac6482..fcdcafbb329 100644 --- a/fs/nfs/sysctl.c +++ b/fs/nfs/sysctl.c @@ -75,7 +75,7 @@ static ctl_table nfs_cb_sysctl_root[] = { int nfs_register_sysctl(void) { - nfs_callback_sysctl_table = register_sysctl_table(nfs_cb_sysctl_root, 0); + nfs_callback_sysctl_table = register_sysctl_table(nfs_cb_sysctl_root); if (nfs_callback_sysctl_table == NULL) return -ENOMEM; return 0; diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 345492e7864..febdade9167 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -1,47 +1,7 @@ /* * linux/fs/nfs/write.c * - * Writing file data over NFS. - * - * We do it like this: When a (user) process wishes to write data to an - * NFS file, a write request is allocated that contains the RPC task data - * plus some info on the page to be written, and added to the inode's - * write chain. If the process writes past the end of the page, an async - * RPC call to write the page is scheduled immediately; otherwise, the call - * is delayed for a few seconds. - * - * Just like readahead, no async I/O is performed if wsize < PAGE_SIZE. - * - * Write requests are kept on the inode's writeback list. Each entry in - * that list references the page (portion) to be written. When the - * cache timeout has expired, the RPC task is woken up, and tries to - * lock the page. As soon as it manages to do so, the request is moved - * from the writeback list to the writelock list. - * - * Note: we must make sure never to confuse the inode passed in the - * write_page request with the one in page->inode. As far as I understand - * it, these are different when doing a swap-out. - * - * To understand everything that goes on here and in the NFS read code, - * one should be aware that a page is locked in exactly one of the following - * cases: - * - * - A write request is in progress. - * - A user process is in generic_file_write/nfs_update_page - * - A user process is in generic_file_read - * - * Also note that because of the way pages are invalidated in - * nfs_revalidate_inode, the following assertions hold: - * - * - If a page is dirty, there will be no read requests (a page will - * not be re-read unless invalidated by nfs_revalidate_inode). - * - If the page is not uptodate, there will be no pending write - * requests, and no process will be in nfs_update_page. - * - * FIXME: Interaction with the vmscan routines is not optimal yet. - * Either vmscan must be made nfs-savvy, or we need a different page - * reclaim concept that supports something like FS-independent - * buffer_heads with a b_ops-> field. + * Write file data over NFS. * * Copyright (C) 1996, 1997, Olaf Kirch <okir@monad.swb.de> */ @@ -79,7 +39,6 @@ static struct nfs_page * nfs_update_request(struct nfs_open_context*, unsigned int, unsigned int); static void nfs_mark_request_dirty(struct nfs_page *req); static int nfs_wait_on_write_congestion(struct address_space *, int); -static int nfs_wait_on_requests(struct inode *, unsigned long, unsigned int); static long nfs_flush_mapping(struct address_space *mapping, struct writeback_control *wbc, int how); static const struct rpc_call_ops nfs_write_partial_ops; static const struct rpc_call_ops nfs_write_full_ops; @@ -194,6 +153,13 @@ static void nfs_grow_file(struct page *page, unsigned int offset, unsigned int c i_size_write(inode, end); } +/* A writeback failed: mark the page as bad, and invalidate the page cache */ +static void nfs_set_pageerror(struct page *page) +{ + SetPageError(page); + nfs_zap_mapping(page->mapping->host, page->mapping); +} + /* We can set the PG_uptodate flag if we see that a write request * covers the full page. */ @@ -323,7 +289,7 @@ static int nfs_writepage_locked(struct page *page, struct writeback_control *wbc err = 0; out: if (!wbc->for_writepages) - nfs_flush_mapping(page->mapping, wbc, wb_priority(wbc)); + nfs_flush_mapping(page->mapping, wbc, FLUSH_STABLE|wb_priority(wbc)); return err; } @@ -360,14 +326,7 @@ int nfs_writepages(struct address_space *mapping, struct writeback_control *wbc) if (err < 0) goto out; nfs_add_stats(inode, NFSIOS_WRITEPAGES, err); - if (!wbc->nonblocking && wbc->sync_mode == WB_SYNC_ALL) { - err = nfs_wait_on_requests(inode, 0, 0); - if (err < 0) - goto out; - } - err = nfs_commit_inode(inode, wb_priority(wbc)); - if (err > 0) - err = 0; + err = 0; out: clear_bit(BDI_write_congested, &bdi->state); wake_up_all(&nfs_write_congestion); @@ -516,17 +475,6 @@ static int nfs_wait_on_requests_locked(struct inode *inode, unsigned long idx_st return res; } -static int nfs_wait_on_requests(struct inode *inode, unsigned long idx_start, unsigned int npages) -{ - struct nfs_inode *nfsi = NFS_I(inode); - int ret; - - spin_lock(&nfsi->req_lock); - ret = nfs_wait_on_requests_locked(inode, idx_start, npages); - spin_unlock(&nfsi->req_lock); - return ret; -} - static void nfs_cancel_dirty_list(struct list_head *head) { struct nfs_page *req; @@ -773,7 +721,7 @@ int nfs_updatepage(struct file *file, struct page *page, dprintk("NFS: nfs_updatepage returns %d (isize %Ld)\n", status, (long long)i_size_read(inode)); if (status < 0) - ClearPageUptodate(page); + nfs_set_pageerror(page); return status; } @@ -852,7 +800,8 @@ static void nfs_write_rpcsetup(struct nfs_page *req, data->task.tk_priority = flush_task_priority(how); data->task.tk_cookie = (unsigned long)inode; - dprintk("NFS: %4d initiated write call (req %s/%Ld, %u bytes @ offset %Lu)\n", + dprintk("NFS: %5u initiated write call " + "(req %s/%Ld, %u bytes @ offset %Lu)\n", data->task.tk_pid, inode->i_sb->s_id, (long long)NFS_FILEID(inode), @@ -1034,8 +983,7 @@ static void nfs_writeback_done_partial(struct rpc_task *task, void *calldata) return; if (task->tk_status < 0) { - ClearPageUptodate(page); - SetPageError(page); + nfs_set_pageerror(page); req->wb_context->error = task->tk_status; dprintk(", error = %d\n", task->tk_status); } else { @@ -1092,8 +1040,7 @@ static void nfs_writeback_done_full(struct rpc_task *task, void *calldata) (long long)req_offset(req)); if (task->tk_status < 0) { - ClearPageUptodate(page); - SetPageError(page); + nfs_set_pageerror(page); req->wb_context->error = task->tk_status; end_page_writeback(page); nfs_inode_remove_request(req); @@ -1134,7 +1081,7 @@ int nfs_writeback_done(struct rpc_task *task, struct nfs_write_data *data) struct nfs_writeres *resp = &data->res; int status; - dprintk("NFS: %4d nfs_writeback_done (status %d)\n", + dprintk("NFS: %5u nfs_writeback_done (status %d)\n", task->tk_pid, task->tk_status); /* @@ -1250,7 +1197,7 @@ static void nfs_commit_rpcsetup(struct list_head *head, data->task.tk_priority = flush_task_priority(how); data->task.tk_cookie = (unsigned long)inode; - dprintk("NFS: %4d initiated commit call\n", data->task.tk_pid); + dprintk("NFS: %5u initiated commit call\n", data->task.tk_pid); } /* @@ -1291,7 +1238,7 @@ static void nfs_commit_done(struct rpc_task *task, void *calldata) struct nfs_write_data *data = calldata; struct nfs_page *req; - dprintk("NFS: %4d nfs_commit_done (status %d)\n", + dprintk("NFS: %5u nfs_commit_done (status %d)\n", task->tk_pid, task->tk_status); /* Call the NFS version-specific code */ @@ -1516,6 +1463,8 @@ int nfs_wb_page_priority(struct inode *inode, struct page *page, int how) if (ret < 0) goto out; } + if (!PagePrivate(page)) + return 0; ret = nfs_sync_mapping_wait(page->mapping, &wbc, how); if (ret >= 0) return 0; diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c index 49c310b8492..6f24768272a 100644 --- a/fs/nfsd/export.c +++ b/fs/nfsd/export.c @@ -16,7 +16,6 @@ #include <linux/unistd.h> #include <linux/slab.h> -#include <linux/sched.h> #include <linux/stat.h> #include <linux/in.h> #include <linux/seq_file.h> @@ -190,18 +189,17 @@ static int expkey_show(struct seq_file *m, struct cache_head *h) { struct svc_expkey *ek ; + int i; if (h ==NULL) { seq_puts(m, "#domain fsidtype fsid [path]\n"); return 0; } ek = container_of(h, struct svc_expkey, h); - seq_printf(m, "%s %d 0x%08x", ek->ek_client->name, - ek->ek_fsidtype, ek->ek_fsid[0]); - if (ek->ek_fsidtype != 1) - seq_printf(m, "%08x", ek->ek_fsid[1]); - if (ek->ek_fsidtype == 2) - seq_printf(m, "%08x", ek->ek_fsid[2]); + seq_printf(m, "%s %d 0x", ek->ek_client->name, + ek->ek_fsidtype); + for (i=0; i < key_len(ek->ek_fsidtype)/4; i++) + seq_printf(m, "%08x", ek->ek_fsid[i]); if (test_bit(CACHE_VALID, &h->flags) && !test_bit(CACHE_NEGATIVE, &h->flags)) { seq_printf(m, " "); @@ -232,9 +230,8 @@ static inline void expkey_init(struct cache_head *cnew, kref_get(&item->ek_client->ref); new->ek_client = item->ek_client; new->ek_fsidtype = item->ek_fsidtype; - new->ek_fsid[0] = item->ek_fsid[0]; - new->ek_fsid[1] = item->ek_fsid[1]; - new->ek_fsid[2] = item->ek_fsid[2]; + + memcpy(new->ek_fsid, item->ek_fsid, sizeof(new->ek_fsid)); } static inline void expkey_update(struct cache_head *cnew, @@ -363,7 +360,7 @@ static struct svc_export *svc_export_update(struct svc_export *new, struct svc_export *old); static struct svc_export *svc_export_lookup(struct svc_export *); -static int check_export(struct inode *inode, int flags) +static int check_export(struct inode *inode, int flags, unsigned char *uuid) { /* We currently export only dirs and regular files. @@ -376,12 +373,13 @@ static int check_export(struct inode *inode, int flags) /* There are two requirements on a filesystem to be exportable. * 1: We must be able to identify the filesystem from a number. * either a device number (so FS_REQUIRES_DEV needed) - * or an FSID number (so NFSEXP_FSID needed). + * or an FSID number (so NFSEXP_FSID or ->uuid is needed). * 2: We must be able to find an inode from a filehandle. * This means that s_export_op must be set. */ if (!(inode->i_sb->s_type->fs_flags & FS_REQUIRES_DEV) && - !(flags & NFSEXP_FSID)) { + !(flags & NFSEXP_FSID) && + uuid == NULL) { dprintk("exp_export: export of non-dev fs without fsid\n"); return -EINVAL; } @@ -406,10 +404,6 @@ fsloc_parse(char **mesg, char *buf, struct nfsd4_fs_locations *fsloc) int len; int migrated, i, err; - len = qword_get(mesg, buf, PAGE_SIZE); - if (len != 5 || memcmp(buf, "fsloc", 5)) - return 0; - /* listsize */ err = get_int(mesg, &fsloc->locations_count); if (err) @@ -520,6 +514,8 @@ static int svc_export_parse(struct cache_detail *cd, char *mesg, int mlen) exp.ex_fslocs.locations_count = 0; exp.ex_fslocs.migrated = 0; + exp.ex_uuid = NULL; + /* flags */ err = get_int(&mesg, &an_int); if (err == -ENOENT) @@ -543,12 +539,33 @@ static int svc_export_parse(struct cache_detail *cd, char *mesg, int mlen) if (err) goto out; exp.ex_fsid = an_int; - err = check_export(nd.dentry->d_inode, exp.ex_flags); - if (err) goto out; + while ((len = qword_get(&mesg, buf, PAGE_SIZE)) > 0) { + if (strcmp(buf, "fsloc") == 0) + err = fsloc_parse(&mesg, buf, &exp.ex_fslocs); + else if (strcmp(buf, "uuid") == 0) { + /* expect a 16 byte uuid encoded as \xXXXX... */ + len = qword_get(&mesg, buf, PAGE_SIZE); + if (len != 16) + err = -EINVAL; + else { + exp.ex_uuid = + kmemdup(buf, 16, GFP_KERNEL); + if (exp.ex_uuid == NULL) + err = -ENOMEM; + } + } else + /* quietly ignore unknown words and anything + * following. Newer user-space can try to set + * new values, then see what the result was. + */ + break; + if (err) + goto out; + } - err = fsloc_parse(&mesg, buf, &exp.ex_fslocs); - if (err) - goto out; + err = check_export(nd.dentry->d_inode, exp.ex_flags, + exp.ex_uuid); + if (err) goto out; } expp = svc_export_lookup(&exp); @@ -562,6 +579,8 @@ static int svc_export_parse(struct cache_detail *cd, char *mesg, int mlen) else exp_put(expp); out: + nfsd4_fslocs_free(&exp.ex_fslocs); + kfree(exp.ex_uuid); kfree(exp.ex_path); if (nd.dentry) path_release(&nd); @@ -591,9 +610,19 @@ static int svc_export_show(struct seq_file *m, seq_escape(m, exp->ex_client->name, " \t\n\\"); seq_putc(m, '('); if (test_bit(CACHE_VALID, &h->flags) && - !test_bit(CACHE_NEGATIVE, &h->flags)) + !test_bit(CACHE_NEGATIVE, &h->flags)) { exp_flags(m, exp->ex_flags, exp->ex_fsid, exp->ex_anon_uid, exp->ex_anon_gid, &exp->ex_fslocs); + if (exp->ex_uuid) { + int i; + seq_puts(m, ",uuid="); + for (i=0; i<16; i++) { + if ((i&3) == 0 && i) + seq_putc(m, ':'); + seq_printf(m, "%02x", exp->ex_uuid[i]); + } + } + } seq_puts(m, ")\n"); return 0; } @@ -630,6 +659,8 @@ static void export_update(struct cache_head *cnew, struct cache_head *citem) new->ex_anon_uid = item->ex_anon_uid; new->ex_anon_gid = item->ex_anon_gid; new->ex_fsid = item->ex_fsid; + new->ex_uuid = item->ex_uuid; + item->ex_uuid = NULL; new->ex_path = item->ex_path; item->ex_path = NULL; new->ex_fslocs.locations = item->ex_fslocs.locations; @@ -752,11 +783,11 @@ exp_get_key(svc_client *clp, dev_t dev, ino_t ino) u32 fsidv[3]; if (old_valid_dev(dev)) { - mk_fsid_v0(fsidv, dev, ino); - return exp_find_key(clp, 0, fsidv, NULL); + mk_fsid(FSID_DEV, fsidv, dev, ino, 0, NULL); + return exp_find_key(clp, FSID_DEV, fsidv, NULL); } - mk_fsid_v3(fsidv, dev, ino); - return exp_find_key(clp, 3, fsidv, NULL); + mk_fsid(FSID_ENCODE_DEV, fsidv, dev, ino, 0, NULL); + return exp_find_key(clp, FSID_ENCODE_DEV, fsidv, NULL); } /* @@ -767,9 +798,9 @@ exp_get_fsid_key(svc_client *clp, int fsid) { u32 fsidv[2]; - mk_fsid_v1(fsidv, fsid); + mk_fsid(FSID_NUM, fsidv, 0, 0, fsid, NULL); - return exp_find_key(clp, 1, fsidv, NULL); + return exp_find_key(clp, FSID_NUM, fsidv, NULL); } svc_export * @@ -883,8 +914,8 @@ static int exp_fsid_hash(svc_client *clp, struct svc_export *exp) if ((exp->ex_flags & NFSEXP_FSID) == 0) return 0; - mk_fsid_v1(fsid, exp->ex_fsid); - return exp_set_key(clp, 1, fsid, exp); + mk_fsid(FSID_NUM, fsid, 0, 0, exp->ex_fsid, NULL); + return exp_set_key(clp, FSID_NUM, fsid, exp); } static int exp_hash(struct auth_domain *clp, struct svc_export *exp) @@ -894,11 +925,11 @@ static int exp_hash(struct auth_domain *clp, struct svc_export *exp) dev_t dev = inode->i_sb->s_dev; if (old_valid_dev(dev)) { - mk_fsid_v0(fsid, dev, inode->i_ino); - return exp_set_key(clp, 0, fsid, exp); + mk_fsid(FSID_DEV, fsid, dev, inode->i_ino, 0, NULL); + return exp_set_key(clp, FSID_DEV, fsid, exp); } - mk_fsid_v3(fsid, dev, inode->i_ino); - return exp_set_key(clp, 3, fsid, exp); + mk_fsid(FSID_ENCODE_DEV, fsid, dev, inode->i_ino, 0, NULL); + return exp_set_key(clp, FSID_ENCODE_DEV, fsid, exp); } static void exp_unhash(struct svc_export *exp) @@ -977,7 +1008,7 @@ exp_export(struct nfsctl_export *nxp) goto finish; } - err = check_export(nd.dentry->d_inode, nxp->ex_flags); + err = check_export(nd.dentry->d_inode, nxp->ex_flags, NULL); if (err) goto finish; err = -ENOMEM; @@ -1170,9 +1201,9 @@ exp_pseudoroot(struct auth_domain *clp, struct svc_fh *fhp, __be32 rv; u32 fsidv[2]; - mk_fsid_v1(fsidv, 0); + mk_fsid(FSID_NUM, fsidv, 0, 0, 0, NULL); - exp = exp_find(clp, 1, fsidv, creq); + exp = exp_find(clp, FSID_NUM, fsidv, creq); if (IS_ERR(exp)) return nfserrno(PTR_ERR(exp)); if (exp == NULL) diff --git a/fs/nfsd/nfs3xdr.c b/fs/nfsd/nfs3xdr.c index e695660921e..6f677988c71 100644 --- a/fs/nfsd/nfs3xdr.c +++ b/fs/nfsd/nfs3xdr.c @@ -149,6 +149,27 @@ decode_sattr3(__be32 *p, struct iattr *iap) return p; } +static __be32 *encode_fsid(__be32 *p, struct svc_fh *fhp) +{ + u64 f; + switch(fsid_source(fhp)) { + default: + case FSIDSOURCE_DEV: + p = xdr_encode_hyper(p, (u64)huge_encode_dev + (fhp->fh_dentry->d_inode->i_sb->s_dev)); + break; + case FSIDSOURCE_FSID: + p = xdr_encode_hyper(p, (u64) fhp->fh_export->ex_fsid); + break; + case FSIDSOURCE_UUID: + f = ((u64*)fhp->fh_export->ex_uuid)[0]; + f ^= ((u64*)fhp->fh_export->ex_uuid)[1]; + p = xdr_encode_hyper(p, f); + break; + } + return p; +} + static __be32 * encode_fattr3(struct svc_rqst *rqstp, __be32 *p, struct svc_fh *fhp, struct kstat *stat) @@ -169,10 +190,7 @@ encode_fattr3(struct svc_rqst *rqstp, __be32 *p, struct svc_fh *fhp, p = xdr_encode_hyper(p, ((u64)stat->blocks) << 9); *p++ = htonl((u32) MAJOR(stat->rdev)); *p++ = htonl((u32) MINOR(stat->rdev)); - if (is_fsid(fhp, rqstp->rq_reffh)) - p = xdr_encode_hyper(p, (u64) fhp->fh_export->ex_fsid); - else - p = xdr_encode_hyper(p, (u64) huge_encode_dev(stat->dev)); + p = encode_fsid(p, fhp); p = xdr_encode_hyper(p, (u64) stat->ino); p = encode_time3(p, &stat->atime); lease_get_mtime(dentry->d_inode, &time); @@ -203,10 +221,7 @@ encode_saved_post_attr(struct svc_rqst *rqstp, __be32 *p, struct svc_fh *fhp) p = xdr_encode_hyper(p, ((u64)fhp->fh_post_blocks) << 9); *p++ = fhp->fh_post_rdev[0]; *p++ = fhp->fh_post_rdev[1]; - if (is_fsid(fhp, rqstp->rq_reffh)) - p = xdr_encode_hyper(p, (u64) fhp->fh_export->ex_fsid); - else - p = xdr_encode_hyper(p, (u64)huge_encode_dev(inode->i_sb->s_dev)); + p = encode_fsid(p, fhp); p = xdr_encode_hyper(p, (u64) inode->i_ino); p = encode_time3(p, &fhp->fh_post_atime); p = encode_time3(p, &fhp->fh_post_mtime); diff --git a/fs/nfsd/nfs4acl.c b/fs/nfsd/nfs4acl.c index 5d94555cdc8..832673b1458 100644 --- a/fs/nfsd/nfs4acl.c +++ b/fs/nfsd/nfs4acl.c @@ -61,9 +61,11 @@ /* flags used to simulate posix default ACLs */ #define NFS4_INHERITANCE_FLAGS (NFS4_ACE_FILE_INHERIT_ACE \ - | NFS4_ACE_DIRECTORY_INHERIT_ACE | NFS4_ACE_INHERIT_ONLY_ACE) + | NFS4_ACE_DIRECTORY_INHERIT_ACE) -#define NFS4_SUPPORTED_FLAGS (NFS4_INHERITANCE_FLAGS | NFS4_ACE_IDENTIFIER_GROUP) +#define NFS4_SUPPORTED_FLAGS (NFS4_INHERITANCE_FLAGS \ + | NFS4_ACE_INHERIT_ONLY_ACE \ + | NFS4_ACE_IDENTIFIER_GROUP) #define MASK_EQUAL(mask1, mask2) \ ( ((mask1) & NFS4_ACE_MASK_ALL) == ((mask2) & NFS4_ACE_MASK_ALL) ) @@ -87,12 +89,19 @@ mask_from_posix(unsigned short perm, unsigned int flags) } static u32 -deny_mask(u32 allow_mask, unsigned int flags) +deny_mask_from_posix(unsigned short perm, u32 flags) { - u32 ret = ~allow_mask & ~NFS4_MASK_UNSUPP; - if (!(flags & NFS4_ACL_DIR)) - ret &= ~NFS4_ACE_DELETE_CHILD; - return ret; + u32 mask = 0; + + if (perm & ACL_READ) + mask |= NFS4_READ_MODE; + if (perm & ACL_WRITE) + mask |= NFS4_WRITE_MODE; + if ((perm & ACL_WRITE) && (flags & NFS4_ACL_DIR)) + mask |= NFS4_ACE_DELETE_CHILD; + if (perm & ACL_EXECUTE) + mask |= NFS4_EXECUTE_MODE; + return mask; } /* XXX: modify functions to return NFS errors; they're only ever @@ -126,108 +135,151 @@ struct ace_container { }; static short ace2type(struct nfs4_ace *); -static int _posix_to_nfsv4_one(struct posix_acl *, struct nfs4_acl *, unsigned int); -static struct posix_acl *_nfsv4_to_posix_one(struct nfs4_acl *, unsigned int); -int nfs4_acl_add_ace(struct nfs4_acl *, u32, u32, u32, int, uid_t); -static int nfs4_acl_split(struct nfs4_acl *, struct nfs4_acl *); +static void _posix_to_nfsv4_one(struct posix_acl *, struct nfs4_acl *, + unsigned int); +void nfs4_acl_add_ace(struct nfs4_acl *, u32, u32, u32, int, uid_t); struct nfs4_acl * nfs4_acl_posix_to_nfsv4(struct posix_acl *pacl, struct posix_acl *dpacl, unsigned int flags) { struct nfs4_acl *acl; - int error = -EINVAL; + int size = 0; - if ((pacl != NULL && - (posix_acl_valid(pacl) < 0 || pacl->a_count == 0)) || - (dpacl != NULL && - (posix_acl_valid(dpacl) < 0 || dpacl->a_count == 0))) - goto out_err; - - acl = nfs4_acl_new(); - if (acl == NULL) { - error = -ENOMEM; - goto out_err; + if (pacl) { + if (posix_acl_valid(pacl) < 0) + return ERR_PTR(-EINVAL); + size += 2*pacl->a_count; } - - if (pacl != NULL) { - error = _posix_to_nfsv4_one(pacl, acl, - flags & ~NFS4_ACL_TYPE_DEFAULT); - if (error < 0) - goto out_acl; + if (dpacl) { + if (posix_acl_valid(dpacl) < 0) + return ERR_PTR(-EINVAL); + size += 2*dpacl->a_count; } - if (dpacl != NULL) { - error = _posix_to_nfsv4_one(dpacl, acl, - flags | NFS4_ACL_TYPE_DEFAULT); - if (error < 0) - goto out_acl; - } + /* Allocate for worst case: one (deny, allow) pair each: */ + acl = nfs4_acl_new(size); + if (acl == NULL) + return ERR_PTR(-ENOMEM); - return acl; + if (pacl) + _posix_to_nfsv4_one(pacl, acl, flags & ~NFS4_ACL_TYPE_DEFAULT); -out_acl: - nfs4_acl_free(acl); -out_err: - acl = ERR_PTR(error); + if (dpacl) + _posix_to_nfsv4_one(dpacl, acl, flags | NFS4_ACL_TYPE_DEFAULT); return acl; } -static int -nfs4_acl_add_pair(struct nfs4_acl *acl, int eflag, u32 mask, int whotype, - uid_t owner, unsigned int flags) +struct posix_acl_summary { + unsigned short owner; + unsigned short users; + unsigned short group; + unsigned short groups; + unsigned short other; + unsigned short mask; +}; + +static void +summarize_posix_acl(struct posix_acl *acl, struct posix_acl_summary *pas) { - int error; - - error = nfs4_acl_add_ace(acl, NFS4_ACE_ACCESS_ALLOWED_ACE_TYPE, - eflag, mask, whotype, owner); - if (error < 0) - return error; - error = nfs4_acl_add_ace(acl, NFS4_ACE_ACCESS_DENIED_ACE_TYPE, - eflag, deny_mask(mask, flags), whotype, owner); - return error; + struct posix_acl_entry *pa, *pe; + pas->users = 0; + pas->groups = 0; + pas->mask = 07; + + pe = acl->a_entries + acl->a_count; + + FOREACH_ACL_ENTRY(pa, acl, pe) { + switch (pa->e_tag) { + case ACL_USER_OBJ: + pas->owner = pa->e_perm; + break; + case ACL_GROUP_OBJ: + pas->group = pa->e_perm; + break; + case ACL_USER: + pas->users |= pa->e_perm; + break; + case ACL_GROUP: + pas->groups |= pa->e_perm; + break; + case ACL_OTHER: + pas->other = pa->e_perm; + break; + case ACL_MASK: + pas->mask = pa->e_perm; + break; + } + } + /* We'll only care about effective permissions: */ + pas->users &= pas->mask; + pas->group &= pas->mask; + pas->groups &= pas->mask; } /* We assume the acl has been verified with posix_acl_valid. */ -static int +static void _posix_to_nfsv4_one(struct posix_acl *pacl, struct nfs4_acl *acl, unsigned int flags) { - struct posix_acl_entry *pa, *pe, *group_owner_entry; - int error = -EINVAL; - u32 mask, mask_mask; + struct posix_acl_entry *pa, *group_owner_entry; + struct nfs4_ace *ace; + struct posix_acl_summary pas; + unsigned short deny; int eflag = ((flags & NFS4_ACL_TYPE_DEFAULT) ? NFS4_INHERITANCE_FLAGS : 0); BUG_ON(pacl->a_count < 3); - pe = pacl->a_entries + pacl->a_count; - pa = pe - 2; /* if mask entry exists, it's second from the last. */ - if (pa->e_tag == ACL_MASK) - mask_mask = deny_mask(mask_from_posix(pa->e_perm, flags), flags); - else - mask_mask = 0; + summarize_posix_acl(pacl, &pas); pa = pacl->a_entries; - BUG_ON(pa->e_tag != ACL_USER_OBJ); - mask = mask_from_posix(pa->e_perm, flags | NFS4_ACL_OWNER); - error = nfs4_acl_add_pair(acl, eflag, mask, NFS4_ACL_WHO_OWNER, 0, flags); - if (error < 0) - goto out; - pa++; + ace = acl->aces + acl->naces; - while (pa->e_tag == ACL_USER) { - mask = mask_from_posix(pa->e_perm, flags); - error = nfs4_acl_add_ace(acl, NFS4_ACE_ACCESS_DENIED_ACE_TYPE, - eflag, mask_mask, NFS4_ACL_WHO_NAMED, pa->e_id); - if (error < 0) - goto out; + /* We could deny everything not granted by the owner: */ + deny = ~pas.owner; + /* + * but it is equivalent (and simpler) to deny only what is not + * granted by later entries: + */ + deny &= pas.users | pas.group | pas.groups | pas.other; + if (deny) { + ace->type = NFS4_ACE_ACCESS_DENIED_ACE_TYPE; + ace->flag = eflag; + ace->access_mask = deny_mask_from_posix(deny, flags); + ace->whotype = NFS4_ACL_WHO_OWNER; + ace++; + acl->naces++; + } + ace->type = NFS4_ACE_ACCESS_ALLOWED_ACE_TYPE; + ace->flag = eflag; + ace->access_mask = mask_from_posix(pa->e_perm, flags | NFS4_ACL_OWNER); + ace->whotype = NFS4_ACL_WHO_OWNER; + ace++; + acl->naces++; + pa++; - error = nfs4_acl_add_pair(acl, eflag, mask, - NFS4_ACL_WHO_NAMED, pa->e_id, flags); - if (error < 0) - goto out; + while (pa->e_tag == ACL_USER) { + deny = ~(pa->e_perm & pas.mask); + deny &= pas.groups | pas.group | pas.other; + if (deny) { + ace->type = NFS4_ACE_ACCESS_DENIED_ACE_TYPE; + ace->flag = eflag; + ace->access_mask = deny_mask_from_posix(deny, flags); + ace->whotype = NFS4_ACL_WHO_NAMED; + ace->who = pa->e_id; + ace++; + acl->naces++; + } + ace->type = NFS4_ACE_ACCESS_ALLOWED_ACE_TYPE; + ace->flag = eflag; + ace->access_mask = mask_from_posix(pa->e_perm & pas.mask, + flags); + ace->whotype = NFS4_ACL_WHO_NAMED; + ace->who = pa->e_id; + ace++; + acl->naces++; pa++; } @@ -236,67 +288,65 @@ _posix_to_nfsv4_one(struct posix_acl *pacl, struct nfs4_acl *acl, /* allow ACEs */ - if (pacl->a_count > 3) { - BUG_ON(pa->e_tag != ACL_GROUP_OBJ); - error = nfs4_acl_add_ace(acl, NFS4_ACE_ACCESS_DENIED_ACE_TYPE, - NFS4_ACE_IDENTIFIER_GROUP | eflag, mask_mask, - NFS4_ACL_WHO_GROUP, 0); - if (error < 0) - goto out; - } group_owner_entry = pa; - mask = mask_from_posix(pa->e_perm, flags); - error = nfs4_acl_add_ace(acl, NFS4_ACE_ACCESS_ALLOWED_ACE_TYPE, - NFS4_ACE_IDENTIFIER_GROUP | eflag, mask, - NFS4_ACL_WHO_GROUP, 0); - if (error < 0) - goto out; + + ace->type = NFS4_ACE_ACCESS_ALLOWED_ACE_TYPE; + ace->flag = eflag; + ace->access_mask = mask_from_posix(pas.group, flags); + ace->whotype = NFS4_ACL_WHO_GROUP; + ace++; + acl->naces++; pa++; while (pa->e_tag == ACL_GROUP) { - mask = mask_from_posix(pa->e_perm, flags); - error = nfs4_acl_add_ace(acl, NFS4_ACE_ACCESS_DENIED_ACE_TYPE, - NFS4_ACE_IDENTIFIER_GROUP | eflag, mask_mask, - NFS4_ACL_WHO_NAMED, pa->e_id); - if (error < 0) - goto out; - - error = nfs4_acl_add_ace(acl, NFS4_ACE_ACCESS_ALLOWED_ACE_TYPE, - NFS4_ACE_IDENTIFIER_GROUP | eflag, mask, - NFS4_ACL_WHO_NAMED, pa->e_id); - if (error < 0) - goto out; + ace->type = NFS4_ACE_ACCESS_ALLOWED_ACE_TYPE; + ace->flag = eflag | NFS4_ACE_IDENTIFIER_GROUP; + ace->access_mask = mask_from_posix(pa->e_perm & pas.mask, + flags); + ace->whotype = NFS4_ACL_WHO_NAMED; + ace->who = pa->e_id; + ace++; + acl->naces++; pa++; } /* deny ACEs */ pa = group_owner_entry; - mask = mask_from_posix(pa->e_perm, flags); - error = nfs4_acl_add_ace(acl, NFS4_ACE_ACCESS_DENIED_ACE_TYPE, - NFS4_ACE_IDENTIFIER_GROUP | eflag, - deny_mask(mask, flags), NFS4_ACL_WHO_GROUP, 0); - if (error < 0) - goto out; + + deny = ~pas.group & pas.other; + if (deny) { + ace->type = NFS4_ACE_ACCESS_DENIED_ACE_TYPE; + ace->flag = eflag | NFS4_ACE_IDENTIFIER_GROUP; + ace->access_mask = deny_mask_from_posix(deny, flags); + ace->whotype = NFS4_ACL_WHO_GROUP; + ace++; + acl->naces++; + } pa++; + while (pa->e_tag == ACL_GROUP) { - mask = mask_from_posix(pa->e_perm, flags); - error = nfs4_acl_add_ace(acl, NFS4_ACE_ACCESS_DENIED_ACE_TYPE, - NFS4_ACE_IDENTIFIER_GROUP | eflag, - deny_mask(mask, flags), NFS4_ACL_WHO_NAMED, pa->e_id); - if (error < 0) - goto out; + deny = ~(pa->e_perm & pas.mask); + deny &= pas.other; + if (deny) { + ace->type = NFS4_ACE_ACCESS_DENIED_ACE_TYPE; + ace->flag = eflag | NFS4_ACE_IDENTIFIER_GROUP; + ace->access_mask = mask_from_posix(deny, flags); + ace->whotype = NFS4_ACL_WHO_NAMED; + ace->who = pa->e_id; + ace++; + acl->naces++; + } pa++; } if (pa->e_tag == ACL_MASK) pa++; - BUG_ON(pa->e_tag != ACL_OTHER); - mask = mask_from_posix(pa->e_perm, flags); - error = nfs4_acl_add_pair(acl, eflag, mask, NFS4_ACL_WHO_EVERYONE, 0, flags); - -out: - return error; + ace->type = NFS4_ACE_ACCESS_ALLOWED_ACE_TYPE; + ace->flag = eflag; + ace->access_mask = mask_from_posix(pa->e_perm, flags); + ace->whotype = NFS4_ACL_WHO_EVERYONE; + acl->naces++; } static void @@ -342,46 +392,6 @@ sort_pacl(struct posix_acl *pacl) return; } -int -nfs4_acl_nfsv4_to_posix(struct nfs4_acl *acl, struct posix_acl **pacl, - struct posix_acl **dpacl, unsigned int flags) -{ - struct nfs4_acl *dacl; - int error = -ENOMEM; - - *pacl = NULL; - *dpacl = NULL; - - dacl = nfs4_acl_new(); - if (dacl == NULL) - goto out; - - error = nfs4_acl_split(acl, dacl); - if (error) - goto out_acl; - - *pacl = _nfsv4_to_posix_one(acl, flags); - if (IS_ERR(*pacl)) { - error = PTR_ERR(*pacl); - *pacl = NULL; - goto out_acl; - } - - *dpacl = _nfsv4_to_posix_one(dacl, flags); - if (IS_ERR(*dpacl)) { - error = PTR_ERR(*dpacl); - *dpacl = NULL; - } -out_acl: - if (error) { - posix_acl_release(*pacl); - *pacl = NULL; - } - nfs4_acl_free(dacl); -out: - return error; -} - /* * While processing the NFSv4 ACE, this maintains bitmasks representing * which permission bits have been allowed and which denied to a given @@ -406,6 +416,7 @@ struct posix_ace_state_array { * calculated so far: */ struct posix_acl_state { + int empty; struct posix_ace_state owner; struct posix_ace_state group; struct posix_ace_state other; @@ -421,6 +432,7 @@ init_state(struct posix_acl_state *state, int cnt) int alloc; memset(state, 0, sizeof(struct posix_acl_state)); + state->empty = 1; /* * In the worst case, each individual acl could be for a distinct * named user or group, but we don't no which, so we allocate @@ -488,6 +500,20 @@ posix_state_to_acl(struct posix_acl_state *state, unsigned int flags) int nace; int i, error = 0; + /* + * ACLs with no ACEs are treated differently in the inheritable + * and effective cases: when there are no inheritable ACEs, we + * set a zero-length default posix acl: + */ + if (state->empty && (flags & NFS4_ACL_TYPE_DEFAULT)) { + pacl = posix_acl_alloc(0, GFP_KERNEL); + return pacl ? pacl : ERR_PTR(-ENOMEM); + } + /* + * When there are no effective ACEs, the following will end + * up setting a 3-element effective posix ACL with all + * permissions zero. + */ nace = 4 + state->users->n + state->groups->n; pacl = posix_acl_alloc(nace, GFP_KERNEL); if (!pacl) @@ -603,6 +629,8 @@ static void process_one_v4_ace(struct posix_acl_state *state, u32 mask = ace->access_mask; int i; + state->empty = 0; + switch (ace2type(ace)) { case ACL_USER_OBJ: if (ace->type == NFS4_ACE_ACCESS_ALLOWED_ACE_TYPE) { @@ -666,75 +694,62 @@ static void process_one_v4_ace(struct posix_acl_state *state, } } -static struct posix_acl * -_nfsv4_to_posix_one(struct nfs4_acl *n4acl, unsigned int flags) +int nfs4_acl_nfsv4_to_posix(struct nfs4_acl *acl, struct posix_acl **pacl, + struct posix_acl **dpacl, unsigned int flags) { - struct posix_acl_state state; - struct posix_acl *pacl; + struct posix_acl_state effective_acl_state, default_acl_state; struct nfs4_ace *ace; int ret; - ret = init_state(&state, n4acl->naces); + ret = init_state(&effective_acl_state, acl->naces); if (ret) - return ERR_PTR(ret); - - list_for_each_entry(ace, &n4acl->ace_head, l_ace) - process_one_v4_ace(&state, ace); - - pacl = posix_state_to_acl(&state, flags); - - free_state(&state); - - if (!IS_ERR(pacl)) - sort_pacl(pacl); - return pacl; -} - -static int -nfs4_acl_split(struct nfs4_acl *acl, struct nfs4_acl *dacl) -{ - struct list_head *h, *n; - struct nfs4_ace *ace; - int error = 0; - - list_for_each_safe(h, n, &acl->ace_head) { - ace = list_entry(h, struct nfs4_ace, l_ace); - + return ret; + ret = init_state(&default_acl_state, acl->naces); + if (ret) + goto out_estate; + ret = -EINVAL; + for (ace = acl->aces; ace < acl->aces + acl->naces; ace++) { if (ace->type != NFS4_ACE_ACCESS_ALLOWED_ACE_TYPE && ace->type != NFS4_ACE_ACCESS_DENIED_ACE_TYPE) - return -EINVAL; - + goto out_dstate; if (ace->flag & ~NFS4_SUPPORTED_FLAGS) - return -EINVAL; - - switch (ace->flag & NFS4_INHERITANCE_FLAGS) { - case 0: - /* Leave this ace in the effective acl: */ + goto out_dstate; + if ((ace->flag & NFS4_INHERITANCE_FLAGS) == 0) { + process_one_v4_ace(&effective_acl_state, ace); continue; - case NFS4_INHERITANCE_FLAGS: - /* Add this ace to the default acl and remove it - * from the effective acl: */ - error = nfs4_acl_add_ace(dacl, ace->type, ace->flag, - ace->access_mask, ace->whotype, ace->who); - if (error) - return error; - list_del(h); - kfree(ace); - acl->naces--; - break; - case NFS4_INHERITANCE_FLAGS & ~NFS4_ACE_INHERIT_ONLY_ACE: - /* Add this ace to the default, but leave it in - * the effective acl as well: */ - error = nfs4_acl_add_ace(dacl, ace->type, ace->flag, - ace->access_mask, ace->whotype, ace->who); - if (error) - return error; - break; - default: - return -EINVAL; } + if (!(flags & NFS4_ACL_DIR)) + goto out_dstate; + /* + * Note that when only one of FILE_INHERIT or DIRECTORY_INHERIT + * is set, we're effectively turning on the other. That's OK, + * according to rfc 3530. + */ + process_one_v4_ace(&default_acl_state, ace); + + if (!(ace->flag & NFS4_ACE_INHERIT_ONLY_ACE)) + process_one_v4_ace(&effective_acl_state, ace); } - return 0; + *pacl = posix_state_to_acl(&effective_acl_state, flags); + if (IS_ERR(*pacl)) { + ret = PTR_ERR(*pacl); + goto out_dstate; + } + *dpacl = posix_state_to_acl(&default_acl_state, + flags | NFS4_ACL_TYPE_DEFAULT); + if (IS_ERR(*dpacl)) { + ret = PTR_ERR(*dpacl); + posix_acl_release(*pacl); + goto out_dstate; + } + sort_pacl(*pacl); + sort_pacl(*dpacl); + ret = 0; +out_dstate: + free_state(&default_acl_state); +out_estate: + free_state(&effective_acl_state); + return ret; } static short @@ -759,48 +774,22 @@ EXPORT_SYMBOL(nfs4_acl_posix_to_nfsv4); EXPORT_SYMBOL(nfs4_acl_nfsv4_to_posix); struct nfs4_acl * -nfs4_acl_new(void) +nfs4_acl_new(int n) { struct nfs4_acl *acl; - if ((acl = kmalloc(sizeof(*acl), GFP_KERNEL)) == NULL) + acl = kmalloc(sizeof(*acl) + n*sizeof(struct nfs4_ace), GFP_KERNEL); + if (acl == NULL) return NULL; - acl->naces = 0; - INIT_LIST_HEAD(&acl->ace_head); - return acl; } void -nfs4_acl_free(struct nfs4_acl *acl) -{ - struct list_head *h; - struct nfs4_ace *ace; - - if (!acl) - return; - - while (!list_empty(&acl->ace_head)) { - h = acl->ace_head.next; - list_del(h); - ace = list_entry(h, struct nfs4_ace, l_ace); - kfree(ace); - } - - kfree(acl); - - return; -} - -int nfs4_acl_add_ace(struct nfs4_acl *acl, u32 type, u32 flag, u32 access_mask, int whotype, uid_t who) { - struct nfs4_ace *ace; - - if ((ace = kmalloc(sizeof(*ace), GFP_KERNEL)) == NULL) - return -ENOMEM; + struct nfs4_ace *ace = acl->aces + acl->naces; ace->type = type; ace->flag = flag; @@ -808,10 +797,7 @@ nfs4_acl_add_ace(struct nfs4_acl *acl, u32 type, u32 flag, u32 access_mask, ace->whotype = whotype; ace->who = who; - list_add_tail(&ace->l_ace, &acl->ace_head); acl->naces++; - - return 0; } static struct { @@ -865,7 +851,6 @@ nfs4_acl_write_who(int who, char *p) } EXPORT_SYMBOL(nfs4_acl_new); -EXPORT_SYMBOL(nfs4_acl_free); EXPORT_SYMBOL(nfs4_acl_add_ace); EXPORT_SYMBOL(nfs4_acl_get_whotype); EXPORT_SYMBOL(nfs4_acl_write_who); diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c index f57655a7a2b..fb14d68eaca 100644 --- a/fs/nfsd/nfs4callback.c +++ b/fs/nfsd/nfs4callback.c @@ -387,7 +387,6 @@ nfsd4_probe_callback(struct nfs4_client *clp) .address = (struct sockaddr *)&addr, .addrsize = sizeof(addr), .timeout = &timeparms, - .servername = clp->cl_name.data, .program = program, .version = nfs_cb_version[1]->number, .authflavor = RPC_AUTH_UNIX, /* XXX: need AUTH_GSS... */ @@ -397,6 +396,7 @@ nfsd4_probe_callback(struct nfs4_client *clp) .rpc_proc = &nfs4_cb_procedures[NFSPROC4_CLNT_CB_NULL], .rpc_argp = clp, }; + char clientname[16]; int status; if (atomic_read(&cb->cb_set)) @@ -419,6 +419,11 @@ nfsd4_probe_callback(struct nfs4_client *clp) memset(program->stats, 0, sizeof(cb->cb_stat)); program->stats->program = program; + /* Just here to make some printk's more useful: */ + snprintf(clientname, sizeof(clientname), + "%u.%u.%u.%u", NIPQUAD(addr.sin_addr)); + args.servername = clientname; + /* Create RPC client */ cb->cb_client = rpc_create(&args); if (IS_ERR(cb->cb_client)) { diff --git a/fs/nfsd/nfs4idmap.c b/fs/nfsd/nfs4idmap.c index b1902ebaab4..e4a83d727af 100644 --- a/fs/nfsd/nfs4idmap.c +++ b/fs/nfsd/nfs4idmap.c @@ -50,7 +50,6 @@ #include <linux/sunrpc/cache.h> #include <linux/nfsd_idmap.h> #include <linux/list.h> -#include <linux/sched.h> #include <linux/time.h> #include <linux/seq_file.h> #include <linux/sunrpc/svcauth.h> diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index 18aa9440df1..5d090f11f2b 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -199,24 +199,22 @@ defer_free(struct nfsd4_compoundargs *argp, static char *savemem(struct nfsd4_compoundargs *argp, __be32 *p, int nbytes) { - void *new = NULL; if (p == argp->tmp) { - new = kmalloc(nbytes, GFP_KERNEL); - if (!new) return NULL; - p = new; + p = kmalloc(nbytes, GFP_KERNEL); + if (!p) + return NULL; memcpy(p, argp->tmp, nbytes); } else { BUG_ON(p != argp->tmpp); argp->tmpp = NULL; } if (defer_free(argp, kfree, p)) { - kfree(new); + kfree(p); return NULL; } else return (char *)p; } - static __be32 nfsd4_decode_bitmap(struct nfsd4_compoundargs *argp, u32 *bmval) { @@ -255,7 +253,7 @@ nfsd4_decode_fattr(struct nfsd4_compoundargs *argp, u32 *bmval, struct iattr *ia return status; /* - * According to spec, unsupported attributes return ERR_NOTSUPP; + * According to spec, unsupported attributes return ERR_ATTRNOTSUPP; * read-only attributes return ERR_INVAL. */ if ((bmval[0] & ~NFSD_SUPPORTED_ATTRS_WORD0) || (bmval[1] & ~NFSD_SUPPORTED_ATTRS_WORD1)) @@ -273,42 +271,42 @@ nfsd4_decode_fattr(struct nfsd4_compoundargs *argp, u32 *bmval, struct iattr *ia iattr->ia_valid |= ATTR_SIZE; } if (bmval[0] & FATTR4_WORD0_ACL) { - int nace, i; - struct nfs4_ace ace; + int nace; + struct nfs4_ace *ace; READ_BUF(4); len += 4; READ32(nace); - *acl = nfs4_acl_new(); + if (nace > NFS4_ACL_MAX) + return nfserr_resource; + + *acl = nfs4_acl_new(nace); if (*acl == NULL) { host_err = -ENOMEM; goto out_nfserr; } - defer_free(argp, (void (*)(const void *))nfs4_acl_free, *acl); + defer_free(argp, kfree, *acl); - for (i = 0; i < nace; i++) { + (*acl)->naces = nace; + for (ace = (*acl)->aces; ace < (*acl)->aces + nace; ace++) { READ_BUF(16); len += 16; - READ32(ace.type); - READ32(ace.flag); - READ32(ace.access_mask); + READ32(ace->type); + READ32(ace->flag); + READ32(ace->access_mask); READ32(dummy32); READ_BUF(dummy32); len += XDR_QUADLEN(dummy32) << 2; READMEM(buf, dummy32); - ace.whotype = nfs4_acl_get_whotype(buf, dummy32); + ace->whotype = nfs4_acl_get_whotype(buf, dummy32); host_err = 0; - if (ace.whotype != NFS4_ACL_WHO_NAMED) - ace.who = 0; - else if (ace.flag & NFS4_ACE_IDENTIFIER_GROUP) + if (ace->whotype != NFS4_ACL_WHO_NAMED) + ace->who = 0; + else if (ace->flag & NFS4_ACE_IDENTIFIER_GROUP) host_err = nfsd_map_name_to_gid(argp->rqstp, - buf, dummy32, &ace.who); + buf, dummy32, &ace->who); else host_err = nfsd_map_name_to_uid(argp->rqstp, - buf, dummy32, &ace.who); - if (host_err) - goto out_nfserr; - host_err = nfs4_acl_add_ace(*acl, ace.type, ace.flag, - ace.access_mask, ace.whotype, ace.who); + buf, dummy32, &ace->who); if (host_err) goto out_nfserr; } @@ -1563,14 +1561,20 @@ nfsd4_encode_fattr(struct svc_fh *fhp, struct svc_export *exp, if (exp->ex_fslocs.migrated) { WRITE64(NFS4_REFERRAL_FSID_MAJOR); WRITE64(NFS4_REFERRAL_FSID_MINOR); - } else if (is_fsid(fhp, rqstp->rq_reffh)) { + } else switch(fsid_source(fhp)) { + case FSIDSOURCE_FSID: WRITE64((u64)exp->ex_fsid); WRITE64((u64)0); - } else { + break; + case FSIDSOURCE_DEV: WRITE32(0); WRITE32(MAJOR(stat.dev)); WRITE32(0); WRITE32(MINOR(stat.dev)); + break; + case FSIDSOURCE_UUID: + WRITEMEM(exp->ex_uuid, 16); + break; } } if (bmval0 & FATTR4_WORD0_UNIQUE_HANDLES) { @@ -1590,7 +1594,6 @@ nfsd4_encode_fattr(struct svc_fh *fhp, struct svc_export *exp, } if (bmval0 & FATTR4_WORD0_ACL) { struct nfs4_ace *ace; - struct list_head *h; if (acl == NULL) { if ((buflen -= 4) < 0) @@ -1603,9 +1606,7 @@ nfsd4_encode_fattr(struct svc_fh *fhp, struct svc_export *exp, goto out_resource; WRITE32(acl->naces); - list_for_each(h, &acl->ace_head) { - ace = list_entry(h, struct nfs4_ace, l_ace); - + for (ace = acl->aces; ace < acl->aces + acl->naces; ace++) { if ((buflen -= 4*3) < 0) goto out_resource; WRITE32(ace->type); @@ -1815,7 +1816,7 @@ out_acl: status = nfs_ok; out: - nfs4_acl_free(acl); + kfree(acl); if (fhp == &tempfh) fh_put(&tempfh); return status; diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c index eedf2e3990a..71c686dc725 100644 --- a/fs/nfsd/nfsctl.c +++ b/fs/nfsd/nfsctl.c @@ -123,7 +123,7 @@ static ssize_t nfsctl_transaction_write(struct file *file, const char __user *bu return PTR_ERR(data); rv = write_op[ino](file, data, size); - if (rv>0) { + if (rv >= 0) { simple_transaction_set(file, rv); rv = size; } diff --git a/fs/nfsd/nfsfh.c b/fs/nfsd/nfsfh.c index a0b4282cb28..c2660cbfcd9 100644 --- a/fs/nfsd/nfsfh.c +++ b/fs/nfsd/nfsfh.c @@ -9,7 +9,6 @@ * ... and again Southern-Winter 2001 to support export_operations */ -#include <linux/sched.h> #include <linux/slab.h> #include <linux/smp_lock.h> #include <linux/fs.h> @@ -119,9 +118,6 @@ fh_verify(struct svc_rqst *rqstp, struct svc_fh *fhp, int type, int access) dprintk("nfsd: fh_verify(%s)\n", SVCFH_fmt(fhp)); - /* keep this filehandle for possible reference when encoding attributes */ - rqstp->rq_reffh = fh; - if (!fhp->fh_dentry) { __u32 *datap=NULL; __u32 tfh[3]; /* filehandle fragment for oldstyle filehandles */ @@ -146,10 +142,10 @@ fh_verify(struct svc_rqst *rqstp, struct svc_fh *fhp, int type, int access) } len = key_len(fh->fh_fsid_type) / 4; if (len == 0) goto out; - if (fh->fh_fsid_type == 2) { + if (fh->fh_fsid_type == FSID_MAJOR_MINOR) { /* deprecated, convert to type 3 */ - len = 3; - fh->fh_fsid_type = 3; + len = key_len(FSID_ENCODE_DEV)/4; + fh->fh_fsid_type = FSID_ENCODE_DEV; fh->fh_fsid[0] = new_encode_dev(MKDEV(ntohl(fh->fh_fsid[0]), ntohl(fh->fh_fsid[1]))); fh->fh_fsid[1] = fh->fh_fsid[2]; } @@ -164,8 +160,9 @@ fh_verify(struct svc_rqst *rqstp, struct svc_fh *fhp, int type, int access) /* assume old filehandle format */ xdev = old_decode_dev(fh->ofh_xdev); xino = u32_to_ino_t(fh->ofh_xino); - mk_fsid_v0(tfh, xdev, xino); - exp = exp_find(rqstp->rq_client, 0, tfh, &rqstp->rq_chandle); + mk_fsid(FSID_DEV, tfh, xdev, xino, 0, NULL); + exp = exp_find(rqstp->rq_client, FSID_DEV, tfh, + &rqstp->rq_chandle); } if (IS_ERR(exp) && (PTR_ERR(exp) == -EAGAIN @@ -212,7 +209,7 @@ fh_verify(struct svc_rqst *rqstp, struct svc_fh *fhp, int type, int access) fileid_type = 2; } else fileid_type = fh->fh_fileid_type; - + if (fileid_type == 0) dentry = dget(exp->ex_dentry); else { @@ -292,7 +289,7 @@ static inline int _fh_update(struct dentry *dentry, struct svc_export *exp, __u32 *datap, int *maxsize) { struct export_operations *nop = exp->ex_mnt->mnt_sb->s_export_op; - + if (dentry == exp->ex_dentry) { *maxsize = 0; return 0; @@ -317,7 +314,8 @@ static inline void _fh_update_old(struct dentry *dentry, } __be32 -fh_compose(struct svc_fh *fhp, struct svc_export *exp, struct dentry *dentry, struct svc_fh *ref_fh) +fh_compose(struct svc_fh *fhp, struct svc_export *exp, struct dentry *dentry, + struct svc_fh *ref_fh) { /* ref_fh is a reference file handle. * if it is non-null and for the same filesystem, then we should compose @@ -327,12 +325,13 @@ fh_compose(struct svc_fh *fhp, struct svc_export *exp, struct dentry *dentry, st * */ - u8 ref_fh_version = 0; - u8 ref_fh_fsid_type = 0; + u8 version = 1; + u8 fsid_type = 0; struct inode * inode = dentry->d_inode; struct dentry *parent = dentry->d_parent; __u32 *datap; dev_t ex_dev = exp->ex_dentry->d_inode->i_sb->s_dev; + int root_export = (exp->ex_dentry == exp->ex_dentry->d_sb->s_root); dprintk("nfsd: fh_compose(exp %02x:%02x/%ld %s/%s, ino=%ld)\n", MAJOR(ex_dev), MINOR(ex_dev), @@ -340,57 +339,64 @@ fh_compose(struct svc_fh *fhp, struct svc_export *exp, struct dentry *dentry, st parent->d_name.name, dentry->d_name.name, (inode ? inode->i_ino : 0)); + /* Choose filehandle version and fsid type based on + * the reference filehandle (if it is in the same export) + * or the export options. + */ if (ref_fh && ref_fh->fh_export == exp) { - ref_fh_version = ref_fh->fh_handle.fh_version; - if (ref_fh_version == 0xca) - ref_fh_fsid_type = 0; + version = ref_fh->fh_handle.fh_version; + if (version == 0xca) + fsid_type = FSID_DEV; else - ref_fh_fsid_type = ref_fh->fh_handle.fh_fsid_type; - if (ref_fh_fsid_type > 3) - ref_fh_fsid_type = 0; - - /* make sure ref_fh type works for given export */ - if (ref_fh_fsid_type == 1 && - !(exp->ex_flags & NFSEXP_FSID)) { - /* if we don't have an fsid, we cannot provide one... */ - ref_fh_fsid_type = 0; + fsid_type = ref_fh->fh_handle.fh_fsid_type; + /* We know this version/type works for this export + * so there is no need for further checks. + */ + } else if (exp->ex_uuid) { + if (fhp->fh_maxsize >= 64) { + if (root_export) + fsid_type = FSID_UUID16; + else + fsid_type = FSID_UUID16_INUM; + } else { + if (root_export) + fsid_type = FSID_UUID8; + else + fsid_type = FSID_UUID4_INUM; } } else if (exp->ex_flags & NFSEXP_FSID) - ref_fh_fsid_type = 1; - - if (!old_valid_dev(ex_dev) && ref_fh_fsid_type == 0) { + fsid_type = FSID_NUM; + else if (!old_valid_dev(ex_dev)) /* for newer device numbers, we must use a newer fsid format */ - ref_fh_version = 1; - ref_fh_fsid_type = 3; - } - if (old_valid_dev(ex_dev) && - (ref_fh_fsid_type == 2 || ref_fh_fsid_type == 3)) - /* must use type1 for smaller device numbers */ - ref_fh_fsid_type = 0; + fsid_type = FSID_ENCODE_DEV; + else + fsid_type = FSID_DEV; if (ref_fh == fhp) fh_put(ref_fh); if (fhp->fh_locked || fhp->fh_dentry) { printk(KERN_ERR "fh_compose: fh %s/%s not initialized!\n", - parent->d_name.name, dentry->d_name.name); + parent->d_name.name, dentry->d_name.name); } if (fhp->fh_maxsize < NFS_FHSIZE) printk(KERN_ERR "fh_compose: called with maxsize %d! %s/%s\n", - fhp->fh_maxsize, parent->d_name.name, dentry->d_name.name); + fhp->fh_maxsize, + parent->d_name.name, dentry->d_name.name); fhp->fh_dentry = dget(dentry); /* our internal copy */ fhp->fh_export = exp; cache_get(&exp->h); - if (ref_fh_version == 0xca) { + if (version == 0xca) { /* old style filehandle please */ memset(&fhp->fh_handle.fh_base, 0, NFS_FHSIZE); fhp->fh_handle.fh_size = NFS_FHSIZE; fhp->fh_handle.ofh_dcookie = 0xfeebbaca; fhp->fh_handle.ofh_dev = old_encode_dev(ex_dev); fhp->fh_handle.ofh_xdev = fhp->fh_handle.ofh_dev; - fhp->fh_handle.ofh_xino = ino_t_to_u32(exp->ex_dentry->d_inode->i_ino); + fhp->fh_handle.ofh_xino = + ino_t_to_u32(exp->ex_dentry->d_inode->i_ino); fhp->fh_handle.ofh_dirino = ino_t_to_u32(parent_ino(dentry)); if (inode) _fh_update_old(dentry, exp, &fhp->fh_handle); @@ -399,38 +405,12 @@ fh_compose(struct svc_fh *fhp, struct svc_export *exp, struct dentry *dentry, st fhp->fh_handle.fh_version = 1; fhp->fh_handle.fh_auth_type = 0; datap = fhp->fh_handle.fh_auth+0; - fhp->fh_handle.fh_fsid_type = ref_fh_fsid_type; - switch (ref_fh_fsid_type) { - case 0: - /* - * fsid_type 0: - * 2byte major, 2byte minor, 4byte inode - */ - mk_fsid_v0(datap, ex_dev, - exp->ex_dentry->d_inode->i_ino); - break; - case 1: - /* fsid_type 1 == 4 bytes filesystem id */ - mk_fsid_v1(datap, exp->ex_fsid); - break; - case 2: - /* - * fsid_type 2: - * 4byte major, 4byte minor, 4byte inode - */ - mk_fsid_v2(datap, ex_dev, - exp->ex_dentry->d_inode->i_ino); - break; - case 3: - /* - * fsid_type 3: - * 4byte devicenumber, 4byte inode - */ - mk_fsid_v3(datap, ex_dev, - exp->ex_dentry->d_inode->i_ino); - break; - } - len = key_len(ref_fh_fsid_type); + fhp->fh_handle.fh_fsid_type = fsid_type; + mk_fsid(fsid_type, datap, ex_dev, + exp->ex_dentry->d_inode->i_ino, + exp->ex_fsid, exp->ex_uuid); + + len = key_len(fsid_type); datap += len/4; fhp->fh_handle.fh_size = 4 + len; @@ -457,7 +437,7 @@ fh_update(struct svc_fh *fhp) { struct dentry *dentry; __u32 *datap; - + if (!fhp->fh_dentry) goto out_bad; @@ -534,3 +514,22 @@ char * SVCFH_fmt(struct svc_fh *fhp) fh->fh_base.fh_pad[5]); return buf; } + +enum fsid_source fsid_source(struct svc_fh *fhp) +{ + if (fhp->fh_handle.fh_version != 1) + return FSIDSOURCE_DEV; + switch(fhp->fh_handle.fh_fsid_type) { + case FSID_DEV: + case FSID_ENCODE_DEV: + case FSID_MAJOR_MINOR: + return FSIDSOURCE_DEV; + case FSID_NUM: + return FSIDSOURCE_FSID; + default: + if (fhp->fh_export->ex_flags & NFSEXP_FSID) + return FSIDSOURCE_FSID; + else + return FSIDSOURCE_UUID; + } +} diff --git a/fs/nfsd/nfsxdr.c b/fs/nfsd/nfsxdr.c index 6555c50d900..0c24b9e24fe 100644 --- a/fs/nfsd/nfsxdr.c +++ b/fs/nfsd/nfsxdr.c @@ -153,6 +153,7 @@ encode_fattr(struct svc_rqst *rqstp, __be32 *p, struct svc_fh *fhp, struct dentry *dentry = fhp->fh_dentry; int type; struct timespec time; + u32 f; type = (stat->mode & S_IFMT); @@ -173,10 +174,22 @@ encode_fattr(struct svc_rqst *rqstp, __be32 *p, struct svc_fh *fhp, else *p++ = htonl(0xffffffff); *p++ = htonl((u32) stat->blocks); - if (is_fsid(fhp, rqstp->rq_reffh)) - *p++ = htonl((u32) fhp->fh_export->ex_fsid); - else + switch (fsid_source(fhp)) { + default: + case FSIDSOURCE_DEV: *p++ = htonl(new_encode_dev(stat->dev)); + break; + case FSIDSOURCE_FSID: + *p++ = htonl((u32) fhp->fh_export->ex_fsid); + break; + case FSIDSOURCE_UUID: + f = ((u32*)fhp->fh_export->ex_uuid)[0]; + f ^= ((u32*)fhp->fh_export->ex_uuid)[1]; + f ^= ((u32*)fhp->fh_export->ex_uuid)[2]; + f ^= ((u32*)fhp->fh_export->ex_uuid)[3]; + *p++ = htonl(f); + break; + } *p++ = htonl((u32) stat->ino); *p++ = htonl((u32) stat->atime.tv_sec); *p++ = htonl(stat->atime.tv_nsec ? stat->atime.tv_nsec / 1000 : 0); diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index 8283236c6a0..7e6aa245b5d 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -466,7 +466,10 @@ out: posix_acl_release(dpacl); return (error); out_nfserr: - error = nfserrno(host_error); + if (host_error == -EOPNOTSUPP) + error = nfserr_attrnotsupp; + else + error = nfserrno(host_error); goto out; } diff --git a/fs/ntfs/sysctl.c b/fs/ntfs/sysctl.c index 1c23138d00b..4847fbfb010 100644 --- a/fs/ntfs/sysctl.c +++ b/fs/ntfs/sysctl.c @@ -33,20 +33,28 @@ #include "sysctl.h" #include "debug.h" -#define FS_NTFS 1 - /* Definition of the ntfs sysctl. */ static ctl_table ntfs_sysctls[] = { - { FS_NTFS, "ntfs-debug", /* Binary and text IDs. */ - &debug_msgs,sizeof(debug_msgs), /* Data pointer and size. */ - 0644, NULL, &proc_dointvec }, /* Mode, child, proc handler. */ - { 0 } + { + .ctl_name = CTL_UNNUMBERED, /* Binary and text IDs. */ + .procname = "ntfs-debug", + .data = &debug_msgs, /* Data pointer and size. */ + .maxlen = sizeof(debug_msgs), + .mode = 0644, /* Mode, proc handler. */ + .proc_handler = &proc_dointvec + }, + {} }; /* Define the parent directory /proc/sys/fs. */ static ctl_table sysctls_root[] = { - { CTL_FS, "fs", NULL, 0, 0555, ntfs_sysctls }, - { 0 } + { + .ctl_name = CTL_FS, + .procname = "fs", + .mode = 0555, + .child = ntfs_sysctls + }, + {} }; /* Storage for the sysctls header. */ @@ -62,17 +70,9 @@ int ntfs_sysctl(int add) { if (add) { BUG_ON(sysctls_root_table); - sysctls_root_table = register_sysctl_table(sysctls_root, 0); + sysctls_root_table = register_sysctl_table(sysctls_root); if (!sysctls_root_table) return -ENOMEM; -#ifdef CONFIG_PROC_FS - /* - * If the proc filesystem is in use and we are a module, need - * to set the owner of our proc entry to our module. In the - * non-modular case, THIS_MODULE is NULL, so this is ok. - */ - ntfs_sysctls[0].de->owner = THIS_MODULE; -#endif } else { BUG_ON(!sysctls_root_table); unregister_sysctl_table(sysctls_root_table); diff --git a/fs/ocfs2/cluster/nodemanager.c b/fs/ocfs2/cluster/nodemanager.c index b17333a0606..9f5ad0f01ce 100644 --- a/fs/ocfs2/cluster/nodemanager.c +++ b/fs/ocfs2/cluster/nodemanager.c @@ -55,7 +55,7 @@ static ctl_table ocfs2_nm_table[] = { static ctl_table ocfs2_mod_table[] = { { - .ctl_name = KERN_OCFS2_NM, + .ctl_name = FS_OCFS2_NM, .procname = "nm", .data = NULL, .maxlen = 0, @@ -67,7 +67,7 @@ static ctl_table ocfs2_mod_table[] = { static ctl_table ocfs2_kern_table[] = { { - .ctl_name = KERN_OCFS2, + .ctl_name = FS_OCFS2, .procname = "ocfs2", .data = NULL, .maxlen = 0, @@ -922,7 +922,7 @@ static int __init init_o2nm(void) o2hb_init(); o2net_init(); - ocfs2_table_header = register_sysctl_table(ocfs2_root_table, 0); + ocfs2_table_header = register_sysctl_table(ocfs2_root_table); if (!ocfs2_table_header) { printk(KERN_ERR "nodemanager: unable to register sysctl\n"); ret = -ENOMEM; /* or something. */ diff --git a/fs/ocfs2/cluster/nodemanager.h b/fs/ocfs2/cluster/nodemanager.h index 8fb23cacc2f..070522138ae 100644 --- a/fs/ocfs2/cluster/nodemanager.h +++ b/fs/ocfs2/cluster/nodemanager.h @@ -33,8 +33,7 @@ #include <linux/configfs.h> #include <linux/rbtree.h> -#define KERN_OCFS2 988 -#define KERN_OCFS2_NM 1 +#define FS_OCFS2_NM 1 const char *o2nm_get_hb_ctl_path(void); diff --git a/fs/proc/Makefile b/fs/proc/Makefile index f6c77627257..a6b3a8f878f 100644 --- a/fs/proc/Makefile +++ b/fs/proc/Makefile @@ -8,7 +8,7 @@ proc-y := nommu.o task_nommu.o proc-$(CONFIG_MMU) := mmu.o task_mmu.o proc-y += inode.o root.o base.o generic.o array.o \ - proc_tty.o proc_misc.o + proc_tty.o proc_misc.o proc_sysctl.o proc-$(CONFIG_PROC_KCORE) += kcore.o proc-$(CONFIG_PROC_VMCORE) += vmcore.o diff --git a/fs/proc/generic.c b/fs/proc/generic.c index 0cdc00d9d97..775fb21294d 100644 --- a/fs/proc/generic.c +++ b/fs/proc/generic.c @@ -32,7 +32,7 @@ static loff_t proc_file_lseek(struct file *, loff_t, int); DEFINE_SPINLOCK(proc_subdir_lock); -int proc_match(int len, const char *name, struct proc_dir_entry *de) +static int proc_match(int len, const char *name, struct proc_dir_entry *de) { if (de->namelen != len) return 0; diff --git a/fs/proc/inode.c b/fs/proc/inode.c index f6722be37dd..c372eb151a3 100644 --- a/fs/proc/inode.c +++ b/fs/proc/inode.c @@ -161,6 +161,7 @@ struct inode *proc_get_inode(struct super_block *sb, unsigned int ino, if (!inode) goto out_ino; + PROC_I(inode)->fd = 0; PROC_I(inode)->pde = de; if (de) { if (de->mode) { diff --git a/fs/proc/internal.h b/fs/proc/internal.h index 277dcd66ebe..c932aa65e19 100644 --- a/fs/proc/internal.h +++ b/fs/proc/internal.h @@ -11,6 +11,8 @@ #include <linux/proc_fs.h> +extern int proc_sys_init(void); + struct vmalloc_info { unsigned long used; unsigned long largest_chunk; diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c new file mode 100644 index 00000000000..20e8cbb3436 --- /dev/null +++ b/fs/proc/proc_sysctl.c @@ -0,0 +1,479 @@ +/* + * /proc/sys support + */ + +#include <linux/sysctl.h> +#include <linux/proc_fs.h> +#include <linux/security.h> +#include "internal.h" + +static struct dentry_operations proc_sys_dentry_operations; +static const struct file_operations proc_sys_file_operations; +static struct inode_operations proc_sys_inode_operations; + +static void proc_sys_refresh_inode(struct inode *inode, struct ctl_table *table) +{ + /* Refresh the cached information bits in the inode */ + if (table) { + inode->i_uid = 0; + inode->i_gid = 0; + inode->i_mode = table->mode; + if (table->proc_handler) { + inode->i_mode |= S_IFREG; + inode->i_nlink = 1; + } else { + inode->i_mode |= S_IFDIR; + inode->i_nlink = 0; /* It is too hard to figure out */ + } + } +} + +static struct inode *proc_sys_make_inode(struct inode *dir, struct ctl_table *table) +{ + struct inode *inode; + struct proc_inode *dir_ei, *ei; + int depth; + + inode = new_inode(dir->i_sb); + if (!inode) + goto out; + + /* A directory is always one deeper than it's parent */ + dir_ei = PROC_I(dir); + depth = dir_ei->fd + 1; + + ei = PROC_I(inode); + ei->fd = depth; + inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; + inode->i_op = &proc_sys_inode_operations; + inode->i_fop = &proc_sys_file_operations; + inode->i_flags |= S_PRIVATE; /* tell selinux to ignore this inode */ + proc_sys_refresh_inode(inode, table); +out: + return inode; +} + +static struct dentry *proc_sys_ancestor(struct dentry *dentry, int depth) +{ + for (;;) { + struct proc_inode *ei; + + ei = PROC_I(dentry->d_inode); + if (ei->fd == depth) + break; /* found */ + + dentry = dentry->d_parent; + } + return dentry; +} + +static struct ctl_table *proc_sys_lookup_table_one(struct ctl_table *table, + struct qstr *name) +{ + int len; + for ( ; table->ctl_name || table->procname; table++) { + + if (!table->procname) + continue; + + len = strlen(table->procname); + if (len != name->len) + continue; + + if (memcmp(table->procname, name->name, len) != 0) + continue; + + /* I have a match */ + return table; + } + return NULL; +} + +static struct ctl_table *proc_sys_lookup_table(struct dentry *dentry, + struct ctl_table *table) +{ + struct dentry *ancestor; + struct proc_inode *ei; + int depth, i; + + ei = PROC_I(dentry->d_inode); + depth = ei->fd; + + if (depth == 0) + return table; + + for (i = 1; table && (i <= depth); i++) { + ancestor = proc_sys_ancestor(dentry, i); + table = proc_sys_lookup_table_one(table, &ancestor->d_name); + if (table) + table = table->child; + } + return table; + +} +static struct ctl_table *proc_sys_lookup_entry(struct dentry *dparent, + struct qstr *name, + struct ctl_table *table) +{ + table = proc_sys_lookup_table(dparent, table); + if (table) + table = proc_sys_lookup_table_one(table, name); + return table; +} + +static struct ctl_table *do_proc_sys_lookup(struct dentry *parent, + struct qstr *name, + struct ctl_table_header **ptr) +{ + struct ctl_table_header *head; + struct ctl_table *table = NULL; + + for (head = sysctl_head_next(NULL); head; + head = sysctl_head_next(head)) { + table = proc_sys_lookup_entry(parent, name, head->ctl_table); + if (table) + break; + } + *ptr = head; + return table; +} + +static struct dentry *proc_sys_lookup(struct inode *dir, struct dentry *dentry, + struct nameidata *nd) +{ + struct ctl_table_header *head; + struct inode *inode; + struct dentry *err; + struct ctl_table *table; + + err = ERR_PTR(-ENOENT); + table = do_proc_sys_lookup(dentry->d_parent, &dentry->d_name, &head); + if (!table) + goto out; + + err = ERR_PTR(-ENOMEM); + inode = proc_sys_make_inode(dir, table); + if (!inode) + goto out; + + err = NULL; + dentry->d_op = &proc_sys_dentry_operations; + d_add(dentry, inode); + +out: + sysctl_head_finish(head); + return err; +} + +static ssize_t proc_sys_read(struct file *filp, char __user *buf, + size_t count, loff_t *ppos) +{ + struct dentry *dentry = filp->f_dentry; + struct ctl_table_header *head; + struct ctl_table *table; + ssize_t error, res; + + table = do_proc_sys_lookup(dentry->d_parent, &dentry->d_name, &head); + /* Has the sysctl entry disappeared on us? */ + error = -ENOENT; + if (!table) + goto out; + + /* Has the sysctl entry been replaced by a directory? */ + error = -EISDIR; + if (!table->proc_handler) + goto out; + + /* + * At this point we know that the sysctl was not unregistered + * and won't be until we finish. + */ + error = -EPERM; + if (sysctl_perm(table, MAY_READ)) + goto out; + + /* careful: calling conventions are nasty here */ + res = count; + error = table->proc_handler(table, 0, filp, buf, &res, ppos); + if (!error) + error = res; +out: + sysctl_head_finish(head); + + return error; +} + +static ssize_t proc_sys_write(struct file *filp, const char __user *buf, + size_t count, loff_t *ppos) +{ + struct dentry *dentry = filp->f_dentry; + struct ctl_table_header *head; + struct ctl_table *table; + ssize_t error, res; + + table = do_proc_sys_lookup(dentry->d_parent, &dentry->d_name, &head); + /* Has the sysctl entry disappeared on us? */ + error = -ENOENT; + if (!table) + goto out; + + /* Has the sysctl entry been replaced by a directory? */ + error = -EISDIR; + if (!table->proc_handler) + goto out; + + /* + * At this point we know that the sysctl was not unregistered + * and won't be until we finish. + */ + error = -EPERM; + if (sysctl_perm(table, MAY_WRITE)) + goto out; + + /* careful: calling conventions are nasty here */ + res = count; + error = table->proc_handler(table, 1, filp, (char __user *)buf, + &res, ppos); + if (!error) + error = res; +out: + sysctl_head_finish(head); + + return error; +} + + +static int proc_sys_fill_cache(struct file *filp, void *dirent, + filldir_t filldir, struct ctl_table *table) +{ + struct ctl_table_header *head; + struct ctl_table *child_table = NULL; + struct dentry *child, *dir = filp->f_path.dentry; + struct inode *inode; + struct qstr qname; + ino_t ino = 0; + unsigned type = DT_UNKNOWN; + int ret; + + qname.name = table->procname; + qname.len = strlen(table->procname); + qname.hash = full_name_hash(qname.name, qname.len); + + /* Suppress duplicates. + * Only fill a directory entry if it is the value that + * an ordinary lookup of that name returns. Hide all + * others. + * + * If we ever cache this translation in the dcache + * I should do a dcache lookup first. But for now + * it is just simpler not to. + */ + ret = 0; + child_table = do_proc_sys_lookup(dir, &qname, &head); + sysctl_head_finish(head); + if (child_table != table) + return 0; + + child = d_lookup(dir, &qname); + if (!child) { + struct dentry *new; + new = d_alloc(dir, &qname); + if (new) { + inode = proc_sys_make_inode(dir->d_inode, table); + if (!inode) + child = ERR_PTR(-ENOMEM); + else { + new->d_op = &proc_sys_dentry_operations; + d_add(new, inode); + } + if (child) + dput(new); + else + child = new; + } + } + if (!child || IS_ERR(child) || !child->d_inode) + goto end_instantiate; + inode = child->d_inode; + if (inode) { + ino = inode->i_ino; + type = inode->i_mode >> 12; + } + dput(child); +end_instantiate: + if (!ino) + ino= find_inode_number(dir, &qname); + if (!ino) + ino = 1; + return filldir(dirent, qname.name, qname.len, filp->f_pos, ino, type); +} + +static int proc_sys_readdir(struct file *filp, void *dirent, filldir_t filldir) +{ + struct dentry *dentry = filp->f_dentry; + struct inode *inode = dentry->d_inode; + struct ctl_table_header *head = NULL; + struct ctl_table *table; + unsigned long pos; + int ret; + + ret = -ENOTDIR; + if (!S_ISDIR(inode->i_mode)) + goto out; + + ret = 0; + /* Avoid a switch here: arm builds fail with missing __cmpdi2 */ + if (filp->f_pos == 0) { + if (filldir(dirent, ".", 1, filp->f_pos, + inode->i_ino, DT_DIR) < 0) + goto out; + filp->f_pos++; + } + if (filp->f_pos == 1) { + if (filldir(dirent, "..", 2, filp->f_pos, + parent_ino(dentry), DT_DIR) < 0) + goto out; + filp->f_pos++; + } + pos = 2; + + /* - Find each instance of the directory + * - Read all entries in each instance + * - Before returning an entry to user space lookup the entry + * by name and if I find a different entry don't return + * this one because it means it is a buried dup. + * For sysctl this should only happen for directory entries. + */ + for (head = sysctl_head_next(NULL); head; head = sysctl_head_next(head)) { + table = proc_sys_lookup_table(dentry, head->ctl_table); + + if (!table) + continue; + + for (; table->ctl_name || table->procname; table++, pos++) { + /* Can't do anything without a proc name */ + if (!table->procname) + continue; + + if (pos < filp->f_pos) + continue; + + if (proc_sys_fill_cache(filp, dirent, filldir, table) < 0) + goto out; + filp->f_pos = pos + 1; + } + } + ret = 1; +out: + sysctl_head_finish(head); + return ret; +} + +static int proc_sys_permission(struct inode *inode, int mask, struct nameidata *nd) +{ + /* + * sysctl entries that are not writeable, + * are _NOT_ writeable, capabilities or not. + */ + struct ctl_table_header *head; + struct ctl_table *table; + struct dentry *dentry; + int mode; + int depth; + int error; + + head = NULL; + depth = PROC_I(inode)->fd; + + /* First check the cached permissions, in case we don't have + * enough information to lookup the sysctl table entry. + */ + error = -EACCES; + mode = inode->i_mode; + + if (current->euid == 0) + mode >>= 6; + else if (in_group_p(0)) + mode >>= 3; + + if ((mode & mask & (MAY_READ|MAY_WRITE|MAY_EXEC)) == mask) + error = 0; + + /* If we can't get a sysctl table entry the permission + * checks on the cached mode will have to be enough. + */ + if (!nd || !depth) + goto out; + + dentry = nd->dentry; + table = do_proc_sys_lookup(dentry->d_parent, &dentry->d_name, &head); + + /* If the entry does not exist deny permission */ + error = -EACCES; + if (!table) + goto out; + + /* Use the permissions on the sysctl table entry */ + error = sysctl_perm(table, mask); +out: + sysctl_head_finish(head); + return error; +} + +static int proc_sys_setattr(struct dentry *dentry, struct iattr *attr) +{ + struct inode *inode = dentry->d_inode; + int error; + + if (attr->ia_valid & (ATTR_MODE | ATTR_UID | ATTR_GID)) + return -EPERM; + + error = inode_change_ok(inode, attr); + if (!error) { + error = security_inode_setattr(dentry, attr); + if (!error) + error = inode_setattr(inode, attr); + } + + return error; +} + +/* I'm lazy and don't distinguish between files and directories, + * until access time. + */ +static const struct file_operations proc_sys_file_operations = { + .read = proc_sys_read, + .write = proc_sys_write, + .readdir = proc_sys_readdir, +}; + +static struct inode_operations proc_sys_inode_operations = { + .lookup = proc_sys_lookup, + .permission = proc_sys_permission, + .setattr = proc_sys_setattr, +}; + +static int proc_sys_revalidate(struct dentry *dentry, struct nameidata *nd) +{ + struct ctl_table_header *head; + struct ctl_table *table; + table = do_proc_sys_lookup(dentry->d_parent, &dentry->d_name, &head); + proc_sys_refresh_inode(dentry->d_inode, table); + sysctl_head_finish(head); + return !!table; +} + +static struct dentry_operations proc_sys_dentry_operations = { + .d_revalidate = proc_sys_revalidate, +}; + +static struct proc_dir_entry *proc_sys_root; + +int proc_sys_init(void) +{ + proc_sys_root = proc_mkdir("sys", NULL); + proc_sys_root->proc_iops = &proc_sys_inode_operations; + proc_sys_root->proc_fops = &proc_sys_file_operations; + proc_sys_root->nlink = 0; + return 0; +} diff --git a/fs/proc/root.c b/fs/proc/root.c index af154458b54..5834a744c2a 100644 --- a/fs/proc/root.c +++ b/fs/proc/root.c @@ -23,10 +23,6 @@ struct proc_dir_entry *proc_net, *proc_net_stat, *proc_bus, *proc_root_fs, *proc_root_driver; -#ifdef CONFIG_SYSCTL -struct proc_dir_entry *proc_sys_root; -#endif - static int proc_get_sb(struct file_system_type *fs_type, int flags, const char *dev_name, void *data, struct vfsmount *mnt) { @@ -71,13 +67,6 @@ void __init proc_root_init(void) #ifdef CONFIG_SYSVIPC proc_mkdir("sysvipc", NULL); #endif -#ifdef CONFIG_SYSCTL - proc_sys_root = proc_mkdir("sys", NULL); -#endif -#if defined(CONFIG_BINFMT_MISC) || defined(CONFIG_BINFMT_MISC_MODULE) - proc_mkdir("sys/fs", NULL); - proc_mkdir("sys/fs/binfmt_misc", NULL); -#endif proc_root_fs = proc_mkdir("fs", NULL); proc_root_driver = proc_mkdir("driver", NULL); proc_mkdir("fs/nfsd", NULL); /* somewhere for the nfsd filesystem to be mounted */ @@ -90,6 +79,9 @@ void __init proc_root_init(void) proc_device_tree_init(); #endif proc_bus = proc_mkdir("bus", NULL); +#ifdef CONFIG_SYSCTL + proc_sys_init(); +#endif } static int proc_root_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat diff --git a/fs/smbfs/symlink.c b/fs/smbfs/symlink.c index e4bf3456d07..fea20ceb8a5 100644 --- a/fs/smbfs/symlink.c +++ b/fs/smbfs/symlink.c @@ -6,7 +6,6 @@ * Please add a note about your changes to smbfs in the ChangeLog file. */ -#include <linux/sched.h> #include <linux/kernel.h> #include <linux/errno.h> #include <linux/fcntl.h> diff --git a/fs/ufs/balloc.c b/fs/ufs/balloc.c index 0e97a4f79c3..bcc44084e00 100644 --- a/fs/ufs/balloc.c +++ b/fs/ufs/balloc.c @@ -16,7 +16,6 @@ #include <linux/quotaops.h> #include <linux/buffer_head.h> #include <linux/capability.h> -#include <linux/sched.h> #include <linux/bitops.h> #include <asm/byteorder.h> diff --git a/fs/ufs/dir.c b/fs/ufs/dir.c index a6c0ca9f48b..4890ddf1518 100644 --- a/fs/ufs/dir.c +++ b/fs/ufs/dir.c @@ -20,7 +20,6 @@ #include <linux/fs.h> #include <linux/ufs_fs.h> #include <linux/smp_lock.h> -#include <linux/sched.h> #include "swab.h" #include "util.h" diff --git a/fs/xattr_acl.c b/fs/xattr_acl.c index 789a2559bd5..c6ad7c7e3ee 100644 --- a/fs/xattr_acl.c +++ b/fs/xattr_acl.c @@ -6,7 +6,6 @@ */ #include <linux/module.h> -#include <linux/sched.h> #include <linux/slab.h> #include <linux/fs.h> #include <linux/posix_acl_xattr.h> diff --git a/fs/xfs/linux-2.6/kmem.c b/fs/xfs/linux-2.6/kmem.c index 004baf60061..ed2b16dff91 100644 --- a/fs/xfs/linux-2.6/kmem.c +++ b/fs/xfs/linux-2.6/kmem.c @@ -15,7 +15,6 @@ * along with this program; if not, write the Free Software Foundation, * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ -#include <linux/sched.h> #include <linux/mm.h> #include <linux/vmalloc.h> #include <linux/highmem.h> diff --git a/fs/xfs/linux-2.6/xfs_sysctl.c b/fs/xfs/linux-2.6/xfs_sysctl.c index 5c46c35a97a..cd6eaa44aa2 100644 --- a/fs/xfs/linux-2.6/xfs_sysctl.c +++ b/fs/xfs/linux-2.6/xfs_sysctl.c @@ -251,7 +251,7 @@ static ctl_table xfs_root_table[] = { void xfs_sysctl_register(void) { - xfs_table_header = register_sysctl_table(xfs_root_table, 0); + xfs_table_header = register_sysctl_table(xfs_root_table); } void |