From 6b2f3d1f769be5779b479c37800229d9a4809fc3 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 27 Oct 2009 11:05:28 +0100 Subject: vfs: Implement proper O_SYNC semantics While Linux provided an O_SYNC flag basically since day 1, it took until Linux 2.4.0-test12pre2 to actually get it implemented for filesystems, since that day we had generic_osync_around with only minor changes and the great "For now, when the user asks for O_SYNC, we'll actually give O_DSYNC" comment. This patch intends to actually give us real O_SYNC semantics in addition to the O_DSYNC semantics. After Jan's O_SYNC patches which are required before this patch it's actually surprisingly simple, we just need to figure out when to set the datasync flag to vfs_fsync_range and when not. This patch renames the existing O_SYNC flag to O_DSYNC while keeping it's numerical value to keep binary compatibility, and adds a new real O_SYNC flag. To guarantee backwards compatiblity it is defined as expanding to both the O_DSYNC and the new additional binary flag (__O_SYNC) to make sure we are backwards-compatible when compiled against the new headers. This also means that all places that don't care about the differences can just check O_DSYNC and get the right behaviour for O_SYNC, too - only places that actuall care need to check __O_SYNC in addition. Drivers and network filesystems have been updated in a fail safe way to always do the full sync magic if O_DSYNC is set. The few places setting O_SYNC for lower layers are kept that way for now to stay failsafe. We enforce that O_DSYNC is set when __O_SYNC is set early in the open path to make sure we always get these sane options. Note that parisc really screwed up their headers as they already define a O_DSYNC that has always been a no-op. We try to repair it by using it for the new O_DSYNC and redefinining O_SYNC to send both the traditional O_SYNC numerical value _and_ the O_DSYNC one. Cc: Richard Henderson Cc: Ivan Kokshaysky Cc: Grant Grundler Cc: "David S. Miller" Cc: Ingo Molnar Cc: "H. Peter Anvin" Cc: Thomas Gleixner Cc: Al Viro Cc: Andreas Dilger Acked-by: Trond Myklebust Acked-by: Kyle McMartin Acked-by: Ulrich Drepper Signed-off-by: Christoph Hellwig Signed-off-by: Andrew Morton Signed-off-by: Jan Kara --- fs/afs/write.c | 5 +++-- fs/btrfs/file.c | 4 ++-- fs/cifs/dir.c | 3 ++- fs/cifs/file.c | 6 ++++-- fs/namei.c | 9 +++++++++ fs/nfs/file.c | 4 ++-- fs/nfs/write.c | 2 +- fs/ocfs2/file.c | 2 +- fs/sync.c | 5 +++-- fs/ubifs/file.c | 2 +- fs/xfs/linux-2.6/xfs_lrw.c | 2 +- 11 files changed, 29 insertions(+), 15 deletions(-) (limited to 'fs') diff --git a/fs/afs/write.c b/fs/afs/write.c index c63a3c8beb7..6be1bc31616 100644 --- a/fs/afs/write.c +++ b/fs/afs/write.c @@ -692,8 +692,9 @@ ssize_t afs_file_write(struct kiocb *iocb, const struct iovec *iov, } /* return error values for O_SYNC and IS_SYNC() */ - if (IS_SYNC(&vnode->vfs_inode) || iocb->ki_filp->f_flags & O_SYNC) { - ret = afs_fsync(iocb->ki_filp, dentry, 1); + if (IS_SYNC(&vnode->vfs_inode) || iocb->ki_filp->f_flags & O_DSYNC) { + ret = afs_fsync(iocb->ki_filp, dentry, + (iocb->ki_filp->f_flags & __O_SYNC) ? 0 : 1); if (ret < 0) result = ret; } diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 06550affbd2..77f759302e1 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -909,7 +909,7 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf, unsigned long last_index; int will_write; - will_write = ((file->f_flags & O_SYNC) || IS_SYNC(inode) || + will_write = ((file->f_flags & O_DSYNC) || IS_SYNC(inode) || (file->f_flags & O_DIRECT)); nrptrs = min((count + PAGE_CACHE_SIZE - 1) / PAGE_CACHE_SIZE, @@ -1076,7 +1076,7 @@ out_nolock: if (err) num_written = err; - if ((file->f_flags & O_SYNC) || IS_SYNC(inode)) { + if ((file->f_flags & O_DSYNC) || IS_SYNC(inode)) { trans = btrfs_start_transaction(root, 1); ret = btrfs_log_dentry_safe(trans, root, file->f_dentry); diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c index 1f42f772865..6ccf7262d1b 100644 --- a/fs/cifs/dir.c +++ b/fs/cifs/dir.c @@ -214,7 +214,8 @@ int cifs_posix_open(char *full_path, struct inode **pinode, posix_flags |= SMB_O_EXCL; if (oflags & O_TRUNC) posix_flags |= SMB_O_TRUNC; - if (oflags & O_SYNC) + /* be safe and imply O_SYNC for O_DSYNC */ + if (oflags & O_DSYNC) posix_flags |= SMB_O_SYNC; if (oflags & O_DIRECTORY) posix_flags |= SMB_O_DIRECTORY; diff --git a/fs/cifs/file.c b/fs/cifs/file.c index 429337eb7af..057e1dae12a 100644 --- a/fs/cifs/file.c +++ b/fs/cifs/file.c @@ -76,8 +76,10 @@ static inline fmode_t cifs_posix_convert_flags(unsigned int flags) reopening a file. They had their effect on the original open */ if (flags & O_APPEND) posix_flags |= (fmode_t)O_APPEND; - if (flags & O_SYNC) - posix_flags |= (fmode_t)O_SYNC; + if (flags & O_DSYNC) + posix_flags |= (fmode_t)O_DSYNC; + if (flags & __O_SYNC) + posix_flags |= (fmode_t)__O_SYNC; if (flags & O_DIRECTORY) posix_flags |= (fmode_t)O_DIRECTORY; if (flags & O_NOFOLLOW) diff --git a/fs/namei.c b/fs/namei.c index d11f404667e..b83d38f614f 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -1678,6 +1678,15 @@ struct file *do_filp_open(int dfd, const char *pathname, int will_write; int flag = open_to_namei_flags(open_flag); + /* + * O_SYNC is implemented as __O_SYNC|O_DSYNC. As many places only + * check for O_DSYNC if the need any syncing at all we enforce it's + * always set instead of having to deal with possibly weird behaviour + * for malicious applications setting only __O_SYNC. + */ + if (open_flag & __O_SYNC) + open_flag |= O_DSYNC; + if (!acc_mode) acc_mode = MAY_OPEN | ACC_MODE(flag); diff --git a/fs/nfs/file.c b/fs/nfs/file.c index f5fdd39e037..6b891328f33 100644 --- a/fs/nfs/file.c +++ b/fs/nfs/file.c @@ -581,7 +581,7 @@ static int nfs_need_sync_write(struct file *filp, struct inode *inode) { struct nfs_open_context *ctx; - if (IS_SYNC(inode) || (filp->f_flags & O_SYNC)) + if (IS_SYNC(inode) || (filp->f_flags & O_DSYNC)) return 1; ctx = nfs_file_open_context(filp); if (test_bit(NFS_CONTEXT_ERROR_WRITE, &ctx->flags)) @@ -622,7 +622,7 @@ static ssize_t nfs_file_write(struct kiocb *iocb, const struct iovec *iov, nfs_add_stats(inode, NFSIOS_NORMALWRITTENBYTES, count); result = generic_file_aio_write(iocb, iov, nr_segs, pos); - /* Return error values for O_SYNC and IS_SYNC() */ + /* Return error values for O_DSYNC and IS_SYNC() */ if (result >= 0 && nfs_need_sync_write(iocb->ki_filp, inode)) { int err = nfs_do_fsync(nfs_file_open_context(iocb->ki_filp), inode); if (err < 0) diff --git a/fs/nfs/write.c b/fs/nfs/write.c index c84b5cc1a94..b1ce2ea9b93 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -774,7 +774,7 @@ int nfs_updatepage(struct file *file, struct page *page, */ if (nfs_write_pageuptodate(page, inode) && inode->i_flock == NULL && - !(file->f_flags & O_SYNC)) { + !(file->f_flags & O_DSYNC)) { count = max(count + offset, nfs_page_length(page)); offset = 0; } diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c index de059f49058..3d30a1c974a 100644 --- a/fs/ocfs2/file.c +++ b/fs/ocfs2/file.c @@ -2006,7 +2006,7 @@ out_dio: /* buffered aio wouldn't have proper lock coverage today */ BUG_ON(ret == -EIOCBQUEUED && !(file->f_flags & O_DIRECT)); - if ((file->f_flags & O_SYNC && !direct_io) || IS_SYNC(inode)) { + if ((file->f_flags & O_DSYNC && !direct_io) || IS_SYNC(inode)) { ret = filemap_fdatawrite_range(file->f_mapping, pos, pos + count - 1); if (ret < 0) diff --git a/fs/sync.c b/fs/sync.c index d104591b066..b75ca68dc08 100644 --- a/fs/sync.c +++ b/fs/sync.c @@ -295,10 +295,11 @@ SYSCALL_DEFINE1(fdatasync, unsigned int, fd) */ int generic_write_sync(struct file *file, loff_t pos, loff_t count) { - if (!(file->f_flags & O_SYNC) && !IS_SYNC(file->f_mapping->host)) + if (!(file->f_flags & O_DSYNC) && !IS_SYNC(file->f_mapping->host)) return 0; return vfs_fsync_range(file, file->f_path.dentry, pos, - pos + count - 1, 1); + pos + count - 1, + (file->f_flags & __O_SYNC) ? 0 : 1); } EXPORT_SYMBOL(generic_write_sync); diff --git a/fs/ubifs/file.c b/fs/ubifs/file.c index 1009adc8d60..eaa3d480bc2 100644 --- a/fs/ubifs/file.c +++ b/fs/ubifs/file.c @@ -1401,7 +1401,7 @@ static ssize_t ubifs_aio_write(struct kiocb *iocb, const struct iovec *iov, if (ret < 0) return ret; - if (ret > 0 && (IS_SYNC(inode) || iocb->ki_filp->f_flags & O_SYNC)) { + if (ret > 0 && (IS_SYNC(inode) || iocb->ki_filp->f_flags & O_DSYNC)) { err = ubifs_sync_wbufs_by_inode(c, inode); if (err) return err; diff --git a/fs/xfs/linux-2.6/xfs_lrw.c b/fs/xfs/linux-2.6/xfs_lrw.c index 072050f8d34..339c52b1a43 100644 --- a/fs/xfs/linux-2.6/xfs_lrw.c +++ b/fs/xfs/linux-2.6/xfs_lrw.c @@ -811,7 +811,7 @@ write_retry: XFS_STATS_ADD(xs_write_bytes, ret); /* Handle various SYNC-type writes */ - if ((file->f_flags & O_SYNC) || IS_SYNC(inode)) { + if ((file->f_flags & O_DSYNC) || IS_SYNC(inode)) { loff_t end = pos + ret - 1; int error2; -- cgit v1.2.3