aboutsummaryrefslogtreecommitdiff
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/9p/acl.c40
-rw-r--r--fs/9p/vfs_inode.c2
-rw-r--r--fs/9p/vfs_inode_dotl.c2
-rw-r--r--fs/Kconfig4
-rw-r--r--fs/Kconfig.binfmt2
-rw-r--r--fs/adfs/inode.c2
-rw-r--r--fs/affs/inode.c2
-rw-r--r--fs/afs/callback.c4
-rw-r--r--fs/afs/cmservice.c168
-rw-r--r--fs/afs/flock.c4
-rw-r--r--fs/afs/fsclient.c148
-rw-r--r--fs/afs/internal.h38
-rw-r--r--fs/afs/main.c1
-rw-r--r--fs/afs/rxrpc.c522
-rw-r--r--fs/afs/server.c11
-rw-r--r--fs/afs/vlclient.c7
-rw-r--r--fs/afs/vlocation.c4
-rw-r--r--fs/aio.c9
-rw-r--r--fs/attr.c50
-rw-r--r--fs/autofs4/waitq.c4
-rw-r--r--fs/befs/linuxvfs.c2
-rw-r--r--fs/binfmt_elf.c23
-rw-r--r--fs/block_dev.c18
-rw-r--r--fs/btrfs/acl.c6
-rw-r--r--fs/btrfs/ctree.h1
-rw-r--r--fs/btrfs/file.c2
-rw-r--r--fs/btrfs/inode.c24
-rw-r--r--fs/btrfs/send.c2
-rw-r--r--fs/btrfs/tree-log.c4
-rw-r--r--fs/buffer.c3
-rw-r--r--fs/cachefiles/interface.c8
-rw-r--r--fs/cachefiles/internal.h3
-rw-r--r--fs/cachefiles/namei.c8
-rw-r--r--fs/ceph/acl.c11
-rw-r--r--fs/ceph/addr.c24
-rw-r--r--fs/ceph/file.c4
-rw-r--r--fs/ceph/inode.c19
-rw-r--r--fs/ceph/locks.c4
-rw-r--r--fs/ceph/mds_client.c30
-rw-r--r--fs/ceph/mds_client.h1
-rw-r--r--fs/ceph/strings.c2
-rw-r--r--fs/ceph/super.c49
-rw-r--r--fs/cifs/cifsfs.h10
-rw-r--r--fs/cifs/cifsproto.h3
-rw-r--r--fs/cifs/cifssmb.c27
-rw-r--r--fs/cifs/dir.c6
-rw-r--r--fs/cifs/file.c14
-rw-r--r--fs/cifs/inode.c6
-rw-r--r--fs/cifs/link.c2
-rw-r--r--fs/coda/file.c23
-rw-r--r--fs/compat.c16
-rw-r--r--fs/crypto/crypto.c11
-rw-r--r--fs/crypto/fname.c85
-rw-r--r--fs/crypto/keyinfo.c71
-rw-r--r--fs/dax.c254
-rw-r--r--fs/debugfs/file.c15
-rw-r--r--fs/debugfs/internal.h4
-rw-r--r--fs/devpts/inode.c71
-rw-r--r--fs/direct-io.c3
-rw-r--r--fs/dlm/lowcomms.c8
-rw-r--r--fs/ecryptfs/inode.c2
-rw-r--r--fs/efivarfs/inode.c5
-rw-r--r--fs/efivarfs/super.c13
-rw-r--r--fs/exofs/inode.c2
-rw-r--r--fs/ext2/Kconfig1
-rw-r--r--fs/ext2/acl.c12
-rw-r--r--fs/ext2/ext2.h1
-rw-r--r--fs/ext2/file.c77
-rw-r--r--fs/ext2/ialloc.c5
-rw-r--r--fs/ext2/inode.c112
-rw-r--r--fs/ext4/acl.c12
-rw-r--r--fs/ext4/dir.c8
-rw-r--r--fs/ext4/ext4.h35
-rw-r--r--fs/ext4/extents.c27
-rw-r--r--fs/ext4/file.c11
-rw-r--r--fs/ext4/fsync.c9
-rw-r--r--fs/ext4/ialloc.c2
-rw-r--r--fs/ext4/inode.c73
-rw-r--r--fs/ext4/ioctl.c11
-rw-r--r--fs/ext4/move_extent.c7
-rw-r--r--fs/ext4/namei.c22
-rw-r--r--fs/ext4/page-io.c4
-rw-r--r--fs/ext4/super.c126
-rw-r--r--fs/ext4/symlink.c10
-rw-r--r--fs/ext4/xattr.c340
-rw-r--r--fs/f2fs/acl.c18
-rw-r--r--fs/f2fs/acl.h1
-rw-r--r--fs/f2fs/checkpoint.c205
-rw-r--r--fs/f2fs/data.c131
-rw-r--r--fs/f2fs/debug.c17
-rw-r--r--fs/f2fs/dir.c116
-rw-r--r--fs/f2fs/f2fs.h175
-rw-r--r--fs/f2fs/file.c22
-rw-r--r--fs/f2fs/gc.c90
-rw-r--r--fs/f2fs/inline.c25
-rw-r--r--fs/f2fs/inode.c17
-rw-r--r--fs/f2fs/namei.c21
-rw-r--r--fs/f2fs/node.c40
-rw-r--r--fs/f2fs/node.h77
-rw-r--r--fs/f2fs/recovery.c124
-rw-r--r--fs/f2fs/segment.c219
-rw-r--r--fs/f2fs/segment.h11
-rw-r--r--fs/f2fs/super.c93
-rw-r--r--fs/f2fs/xattr.c39
-rw-r--r--fs/fat/file.c2
-rw-r--r--fs/fat/namei_vfat.c19
-rw-r--r--fs/file.c34
-rw-r--r--fs/fuse/Kconfig1
-rw-r--r--fs/fuse/Makefile2
-rw-r--r--fs/fuse/acl.c99
-rw-r--r--fs/fuse/dev.c64
-rw-r--r--fs/fuse/dir.c132
-rw-r--r--fs/fuse/file.c70
-rw-r--r--fs/fuse/fuse_i.h37
-rw-r--r--fs/fuse/inode.c27
-rw-r--r--fs/fuse/xattr.c22
-rw-r--r--fs/gfs2/acl.c12
-rw-r--r--fs/gfs2/aops.c19
-rw-r--r--fs/gfs2/bmap.c6
-rw-r--r--fs/gfs2/dir.c20
-rw-r--r--fs/gfs2/file.c34
-rw-r--r--fs/gfs2/glock.c10
-rw-r--r--fs/gfs2/inode.c6
-rw-r--r--fs/gfs2/inode.h2
-rw-r--r--fs/gfs2/main.c4
-rw-r--r--fs/gfs2/meta_io.c35
-rw-r--r--fs/gfs2/ops_fstype.c8
-rw-r--r--fs/gfs2/quota.c4
-rw-r--r--fs/gfs2/super.c2
-rw-r--r--fs/hfs/inode.c2
-rw-r--r--fs/hfsplus/inode.c2
-rw-r--r--fs/hfsplus/posix_acl.c4
-rw-r--r--fs/hostfs/hostfs_kern.c2
-rw-r--r--fs/hpfs/file.c6
-rw-r--r--fs/hpfs/inode.c2
-rw-r--r--fs/hugetlbfs/inode.c14
-rw-r--r--fs/inode.c74
-rw-r--r--fs/internal.h22
-rw-r--r--fs/iomap.c89
-rw-r--r--fs/jbd2/journal.c131
-rw-r--r--fs/jbd2/transaction.c6
-rw-r--r--fs/jffs2/acl.c9
-rw-r--r--fs/jffs2/fs.c2
-rw-r--r--fs/jfs/acl.c6
-rw-r--r--fs/jfs/file.c2
-rw-r--r--fs/jfs/jfs_txnmgr.c3
-rw-r--r--fs/jfs/resize.c10
-rw-r--r--fs/kernfs/inode.c2
-rw-r--r--fs/libfs.c2
-rw-r--r--fs/locks.c142
-rw-r--r--fs/logfs/file.c2
-rw-r--r--fs/mbcache.c6
-rw-r--r--fs/minix/file.c2
-rw-r--r--fs/mount.h3
-rw-r--r--fs/namei.c2
-rw-r--r--fs/namespace.c79
-rw-r--r--fs/ncpfs/inode.c2
-rw-r--r--fs/nfs/file.c25
-rw-r--r--fs/nfs/internal.h8
-rw-r--r--fs/nfs/nfs4file.c2
-rw-r--r--fs/nfs/pagelist.c2
-rw-r--r--fs/nfs/read.c2
-rw-r--r--fs/nfs/write.c4
-rw-r--r--fs/nfsd/auth.c6
-rw-r--r--fs/nfsd/nfs4state.c2
-rw-r--r--fs/nfsd/nfsproc.c8
-rw-r--r--fs/nilfs2/inode.c2
-rw-r--r--fs/notify/fanotify/fanotify_user.c39
-rw-r--r--fs/notify/group.c6
-rw-r--r--fs/notify/inotify/inotify_user.c16
-rw-r--r--fs/notify/notification.c35
-rw-r--r--fs/nsfs.c105
-rw-r--r--fs/ntfs/file.c2
-rw-r--r--fs/ntfs/inode.c2
-rw-r--r--fs/ocfs2/acl.c10
-rw-r--r--fs/ocfs2/aops.c10
-rw-r--r--fs/ocfs2/cluster/tcp.c2
-rw-r--r--fs/ocfs2/dlm/dlmdomain.c2
-rw-r--r--fs/ocfs2/dlmfs/dlmfs.c4
-rw-r--r--fs/ocfs2/file.c36
-rw-r--r--fs/ocfs2/inode.h2
-rw-r--r--fs/ocfs2/ocfs2_trace.h2
-rw-r--r--fs/ocfs2/super.c2
-rw-r--r--fs/omfs/file.c2
-rw-r--r--fs/open.c17
-rw-r--r--fs/orangefs/acl.c15
-rw-r--r--fs/orangefs/dcache.c2
-rw-r--r--fs/orangefs/devorangefs-req.c164
-rw-r--r--fs/orangefs/dir.c4
-rw-r--r--fs/orangefs/downcall.h11
-rw-r--r--fs/orangefs/file.c49
-rw-r--r--fs/orangefs/inode.c14
-rw-r--r--fs/orangefs/namei.c52
-rw-r--r--fs/orangefs/orangefs-cache.c6
-rw-r--r--fs/orangefs/orangefs-debugfs.c765
-rw-r--r--fs/orangefs/orangefs-debugfs.h8
-rw-r--r--fs/orangefs/orangefs-dev-proto.h6
-rw-r--r--fs/orangefs/orangefs-kernel.h51
-rw-r--r--fs/orangefs/orangefs-mod.c86
-rw-r--r--fs/orangefs/orangefs-sysfs.c1287
-rw-r--r--fs/orangefs/orangefs-utils.c403
-rw-r--r--fs/orangefs/protocol.h25
-rw-r--r--fs/orangefs/super.c31
-rw-r--r--fs/orangefs/upcall.h18
-rw-r--r--fs/orangefs/waitqueue.c10
-rw-r--r--fs/overlayfs/copy_up.c22
-rw-r--r--fs/overlayfs/dir.c10
-rw-r--r--fs/overlayfs/inode.c2
-rw-r--r--fs/overlayfs/super.c2
-rw-r--r--fs/pipe.c13
-rw-r--r--fs/pnode.c2
-rw-r--r--fs/pnode.h1
-rw-r--r--fs/posix_acl.c60
-rw-r--r--fs/proc/array.c189
-rw-r--r--fs/proc/base.c101
-rw-r--r--fs/proc/fd.c8
-rw-r--r--fs/proc/fd.h2
-rw-r--r--fs/proc/generic.c4
-rw-r--r--fs/proc/internal.h2
-rw-r--r--fs/proc/meminfo.c211
-rw-r--r--fs/proc/proc_net.c13
-rw-r--r--fs/proc/proc_sysctl.c21
-rw-r--r--fs/proc/stat.c49
-rw-r--r--fs/proc/task_mmu.c10
-rw-r--r--fs/pstore/platform.c53
-rw-r--r--fs/pstore/pmsg.c35
-rw-r--r--fs/pstore/ram.c46
-rw-r--r--fs/pstore/ram_core.c96
-rw-r--r--fs/quota/quota.c18
-rw-r--r--fs/ramfs/file-nommu.c2
-rw-r--r--fs/reiserfs/inode.c2
-rw-r--r--fs/reiserfs/super.c12
-rw-r--r--fs/reiserfs/xattr_acl.c8
-rw-r--r--fs/seq_file.c57
-rw-r--r--fs/splice.c677
-rw-r--r--fs/sysfs/group.c4
-rw-r--r--fs/sysv/file.c2
-rw-r--r--fs/ubifs/file.c2
-rw-r--r--fs/udf/file.c20
-rw-r--r--fs/ufs/inode.c2
-rw-r--r--fs/utimes.c19
-rw-r--r--fs/xfs/Makefile1
-rw-r--r--fs/xfs/libxfs/xfs_ag_resv.c325
-rw-r--r--fs/xfs/libxfs/xfs_ag_resv.h35
-rw-r--r--fs/xfs/libxfs/xfs_alloc.c135
-rw-r--r--fs/xfs/libxfs/xfs_alloc.h25
-rw-r--r--fs/xfs/libxfs/xfs_bmap.c136
-rw-r--r--fs/xfs/libxfs/xfs_bmap.h12
-rw-r--r--fs/xfs/libxfs/xfs_btree.c59
-rw-r--r--fs/xfs/libxfs/xfs_btree.h28
-rw-r--r--fs/xfs/libxfs/xfs_defer.c79
-rw-r--r--fs/xfs/libxfs/xfs_ialloc_btree.c2
-rw-r--r--fs/xfs/libxfs/xfs_log_format.h10
-rw-r--r--fs/xfs/xfs_acl.c13
-rw-r--r--fs/xfs/xfs_aops.c31
-rw-r--r--fs/xfs/xfs_aops.h1
-rw-r--r--fs/xfs/xfs_bmap_util.c2
-rw-r--r--fs/xfs/xfs_buf_item.c9
-rw-r--r--fs/xfs/xfs_extent_busy.c2
-rw-r--r--fs/xfs/xfs_file.c128
-rw-r--r--fs/xfs/xfs_filestream.c13
-rw-r--r--fs/xfs/xfs_fsops.c2
-rw-r--r--fs/xfs/xfs_icache.c14
-rw-r--r--fs/xfs/xfs_inode.c2
-rw-r--r--fs/xfs/xfs_inode.h1
-rw-r--r--fs/xfs/xfs_ioctl.c2
-rw-r--r--fs/xfs/xfs_iomap.c494
-rw-r--r--fs/xfs/xfs_iomap.h2
-rw-r--r--fs/xfs/xfs_iops.c92
-rw-r--r--fs/xfs/xfs_iops.h3
-rw-r--r--fs/xfs/xfs_log_priv.h3
-rw-r--r--fs/xfs/xfs_log_recover.c191
-rw-r--r--fs/xfs/xfs_mount.c14
-rw-r--r--fs/xfs/xfs_mount.h44
-rw-r--r--fs/xfs/xfs_rmap_item.c36
-rw-r--r--fs/xfs/xfs_rmap_item.h8
-rw-r--r--fs/xfs/xfs_super.c7
-rw-r--r--fs/xfs/xfs_super.h1
-rw-r--r--fs/xfs/xfs_sysfs.c47
-rw-r--r--fs/xfs/xfs_trace.h115
-rw-r--r--fs/xfs/xfs_trans.c3
-rw-r--r--fs/xfs/xfs_trans_extfree.c3
-rw-r--r--fs/xfs/xfs_xattr.c1
283 files changed, 6985 insertions, 5853 deletions
diff --git a/fs/9p/acl.c b/fs/9p/acl.c
index 5b6a1743ea17..b3c2cc79c20d 100644
--- a/fs/9p/acl.c
+++ b/fs/9p/acl.c
@@ -276,32 +276,26 @@ static int v9fs_xattr_set_acl(const struct xattr_handler *handler,
switch (handler->flags) {
case ACL_TYPE_ACCESS:
if (acl) {
- umode_t mode = inode->i_mode;
- retval = posix_acl_equiv_mode(acl, &mode);
- if (retval < 0)
+ struct iattr iattr;
+
+ retval = posix_acl_update_mode(inode, &iattr.ia_mode, &acl);
+ if (retval)
goto err_out;
- else {
- struct iattr iattr;
- if (retval == 0) {
- /*
- * ACL can be represented
- * by the mode bits. So don't
- * update ACL.
- */
- acl = NULL;
- value = NULL;
- size = 0;
- }
- /* Updte the mode bits */
- iattr.ia_mode = ((mode & S_IALLUGO) |
- (inode->i_mode & ~S_IALLUGO));
- iattr.ia_valid = ATTR_MODE;
- /* FIXME should we update ctime ?
- * What is the following setxattr update the
- * mode ?
+ if (!acl) {
+ /*
+ * ACL can be represented
+ * by the mode bits. So don't
+ * update ACL.
*/
- v9fs_vfs_setattr_dotl(dentry, &iattr);
+ value = NULL;
+ size = 0;
}
+ iattr.ia_valid = ATTR_MODE;
+ /* FIXME should we update ctime ?
+ * What is the following setxattr update the
+ * mode ?
+ */
+ v9fs_vfs_setattr_dotl(dentry, &iattr);
}
break;
case ACL_TYPE_DEFAULT:
diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c
index 8b1999b528e9..aa639bb1f289 100644
--- a/fs/9p/vfs_inode.c
+++ b/fs/9p/vfs_inode.c
@@ -1094,7 +1094,7 @@ static int v9fs_vfs_setattr(struct dentry *dentry, struct iattr *iattr)
struct p9_wstat wstat;
p9_debug(P9_DEBUG_VFS, "\n");
- retval = inode_change_ok(d_inode(dentry), iattr);
+ retval = setattr_prepare(dentry, iattr);
if (retval)
return retval;
diff --git a/fs/9p/vfs_inode_dotl.c b/fs/9p/vfs_inode_dotl.c
index d8220efdd752..afaa4b6de801 100644
--- a/fs/9p/vfs_inode_dotl.c
+++ b/fs/9p/vfs_inode_dotl.c
@@ -558,7 +558,7 @@ int v9fs_vfs_setattr_dotl(struct dentry *dentry, struct iattr *iattr)
p9_debug(P9_DEBUG_VFS, "\n");
- retval = inode_change_ok(inode, iattr);
+ retval = setattr_prepare(dentry, iattr);
if (retval)
return retval;
diff --git a/fs/Kconfig b/fs/Kconfig
index 2bc7ad775842..4bd03a2b0518 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -79,6 +79,7 @@ config EXPORTFS_BLOCK_OPS
config FILE_LOCKING
bool "Enable POSIX file locking API" if EXPERT
default y
+ select PERCPU_RWSEM
help
This option enables standard file locking support, required
for filesystems like NFS and for the flock() system
@@ -199,6 +200,9 @@ config HUGETLBFS
config HUGETLB_PAGE
def_bool HUGETLBFS
+config ARCH_HAS_GIGANTIC_PAGE
+ bool
+
source "fs/configfs/Kconfig"
source "fs/efivarfs/Kconfig"
diff --git a/fs/Kconfig.binfmt b/fs/Kconfig.binfmt
index c7efddf6e038..4c09d93d9569 100644
--- a/fs/Kconfig.binfmt
+++ b/fs/Kconfig.binfmt
@@ -89,7 +89,7 @@ config BINFMT_SCRIPT
config BINFMT_FLAT
bool "Kernel support for flat binaries"
- depends on !MMU || M68K
+ depends on !MMU || ARM || M68K
depends on !FRV || BROKEN
help
Support uClinux FLAT format binaries.
diff --git a/fs/adfs/inode.c b/fs/adfs/inode.c
index 335055d828e4..f57baaa511aa 100644
--- a/fs/adfs/inode.c
+++ b/fs/adfs/inode.c
@@ -303,7 +303,7 @@ adfs_notify_change(struct dentry *dentry, struct iattr *attr)
unsigned int ia_valid = attr->ia_valid;
int error;
- error = inode_change_ok(inode, attr);
+ error = setattr_prepare(dentry, attr);
/*
* we can't change the UID or GID of any file -
diff --git a/fs/affs/inode.c b/fs/affs/inode.c
index 0fdb0f5b2239..1aa243502c7f 100644
--- a/fs/affs/inode.c
+++ b/fs/affs/inode.c
@@ -219,7 +219,7 @@ affs_notify_change(struct dentry *dentry, struct iattr *attr)
pr_debug("notify_change(%lu,0x%x)\n", inode->i_ino, attr->ia_valid);
- error = inode_change_ok(inode,attr);
+ error = setattr_prepare(dentry, attr);
if (error)
goto out;
diff --git a/fs/afs/callback.c b/fs/afs/callback.c
index 7ef637d7f3a5..1e9d2f84e5b5 100644
--- a/fs/afs/callback.c
+++ b/fs/afs/callback.c
@@ -461,8 +461,8 @@ static void afs_callback_updater(struct work_struct *work)
*/
int __init afs_callback_update_init(void)
{
- afs_callback_update_worker =
- create_singlethread_workqueue("kafs_callbackd");
+ afs_callback_update_worker = alloc_ordered_workqueue("kafs_callbackd",
+ WQ_MEM_RECLAIM);
return afs_callback_update_worker ? 0 : -ENOMEM;
}
diff --git a/fs/afs/cmservice.c b/fs/afs/cmservice.c
index 85737e96ab8b..2037e7a77a37 100644
--- a/fs/afs/cmservice.c
+++ b/fs/afs/cmservice.c
@@ -17,19 +17,12 @@
#include "internal.h"
#include "afs_cm.h"
-#if 0
-struct workqueue_struct *afs_cm_workqueue;
-#endif /* 0 */
-
-static int afs_deliver_cb_init_call_back_state(struct afs_call *,
- struct sk_buff *, bool);
-static int afs_deliver_cb_init_call_back_state3(struct afs_call *,
- struct sk_buff *, bool);
-static int afs_deliver_cb_probe(struct afs_call *, struct sk_buff *, bool);
-static int afs_deliver_cb_callback(struct afs_call *, struct sk_buff *, bool);
-static int afs_deliver_cb_probe_uuid(struct afs_call *, struct sk_buff *, bool);
-static int afs_deliver_cb_tell_me_about_yourself(struct afs_call *,
- struct sk_buff *, bool);
+static int afs_deliver_cb_init_call_back_state(struct afs_call *);
+static int afs_deliver_cb_init_call_back_state3(struct afs_call *);
+static int afs_deliver_cb_probe(struct afs_call *);
+static int afs_deliver_cb_callback(struct afs_call *);
+static int afs_deliver_cb_probe_uuid(struct afs_call *);
+static int afs_deliver_cb_tell_me_about_yourself(struct afs_call *);
static void afs_cm_destructor(struct afs_call *);
/*
@@ -134,7 +127,7 @@ static void afs_cm_destructor(struct afs_call *call)
* received. The step number here must match the final number in
* afs_deliver_cb_callback().
*/
- if (call->unmarshall == 6) {
+ if (call->unmarshall == 5) {
ASSERT(call->server && call->count && call->request);
afs_break_callbacks(call->server, call->count, call->request);
}
@@ -168,27 +161,27 @@ static void SRXAFSCB_CallBack(struct work_struct *work)
/*
* deliver request data to a CB.CallBack call
*/
-static int afs_deliver_cb_callback(struct afs_call *call, struct sk_buff *skb,
- bool last)
+static int afs_deliver_cb_callback(struct afs_call *call)
{
+ struct sockaddr_rxrpc srx;
struct afs_callback *cb;
struct afs_server *server;
- struct in_addr addr;
__be32 *bp;
u32 tmp;
int ret, loop;
- _enter("{%u},{%u},%d", call->unmarshall, skb->len, last);
+ _enter("{%u}", call->unmarshall);
switch (call->unmarshall) {
case 0:
+ rxrpc_kernel_get_peer(afs_socket, call->rxcall, &srx);
call->offset = 0;
call->unmarshall++;
/* extract the FID array and its count in two steps */
case 1:
_debug("extract FID count");
- ret = afs_extract_data(call, skb, last, &call->tmp, 4);
+ ret = afs_extract_data(call, &call->tmp, 4, true);
if (ret < 0)
return ret;
@@ -205,8 +198,8 @@ static int afs_deliver_cb_callback(struct afs_call *call, struct sk_buff *skb,
case 2:
_debug("extract FID array");
- ret = afs_extract_data(call, skb, last, call->buffer,
- call->count * 3 * 4);
+ ret = afs_extract_data(call, call->buffer,
+ call->count * 3 * 4, true);
if (ret < 0)
return ret;
@@ -232,7 +225,7 @@ static int afs_deliver_cb_callback(struct afs_call *call, struct sk_buff *skb,
/* extract the callback array and its count in two steps */
case 3:
_debug("extract CB count");
- ret = afs_extract_data(call, skb, last, &call->tmp, 4);
+ ret = afs_extract_data(call, &call->tmp, 4, true);
if (ret < 0)
return ret;
@@ -242,13 +235,11 @@ static int afs_deliver_cb_callback(struct afs_call *call, struct sk_buff *skb,
return -EBADMSG;
call->offset = 0;
call->unmarshall++;
- if (tmp == 0)
- goto empty_cb_array;
case 4:
_debug("extract CB array");
- ret = afs_extract_data(call, skb, last, call->request,
- call->count * 3 * 4);
+ ret = afs_extract_data(call, call->buffer,
+ call->count * 3 * 4, false);
if (ret < 0)
return ret;
@@ -261,15 +252,9 @@ static int afs_deliver_cb_callback(struct afs_call *call, struct sk_buff *skb,
cb->type = ntohl(*bp++);
}
- empty_cb_array:
call->offset = 0;
call->unmarshall++;
- case 5:
- ret = afs_data_complete(call, skb, last);
- if (ret < 0)
- return ret;
-
/* Record that the message was unmarshalled successfully so
* that the call destructor can know do the callback breaking
* work, even if the final ACK isn't received.
@@ -278,17 +263,15 @@ static int afs_deliver_cb_callback(struct afs_call *call, struct sk_buff *skb,
* updated also.
*/
call->unmarshall++;
- case 6:
+ case 5:
break;
}
-
call->state = AFS_CALL_REPLYING;
/* we'll need the file server record as that tells us which set of
* vnodes to operate upon */
- memcpy(&addr, &ip_hdr(skb)->saddr, 4);
- server = afs_find_server(&addr);
+ server = afs_find_server(&srx);
if (!server)
return -ENOTCONN;
call->server = server;
@@ -315,17 +298,17 @@ static void SRXAFSCB_InitCallBackState(struct work_struct *work)
/*
* deliver request data to a CB.InitCallBackState call
*/
-static int afs_deliver_cb_init_call_back_state(struct afs_call *call,
- struct sk_buff *skb,
- bool last)
+static int afs_deliver_cb_init_call_back_state(struct afs_call *call)
{
+ struct sockaddr_rxrpc srx;
struct afs_server *server;
- struct in_addr addr;
int ret;
- _enter(",{%u},%d", skb->len, last);
+ _enter("");
+
+ rxrpc_kernel_get_peer(afs_socket, call->rxcall, &srx);
- ret = afs_data_complete(call, skb, last);
+ ret = afs_extract_data(call, NULL, 0, false);
if (ret < 0)
return ret;
@@ -334,8 +317,7 @@ static int afs_deliver_cb_init_call_back_state(struct afs_call *call,
/* we'll need the file server record as that tells us which set of
* vnodes to operate upon */
- memcpy(&addr, &ip_hdr(skb)->saddr, 4);
- server = afs_find_server(&addr);
+ server = afs_find_server(&srx);
if (!server)
return -ENOTCONN;
call->server = server;
@@ -348,27 +330,68 @@ static int afs_deliver_cb_init_call_back_state(struct afs_call *call,
/*
* deliver request data to a CB.InitCallBackState3 call
*/
-static int afs_deliver_cb_init_call_back_state3(struct afs_call *call,
- struct sk_buff *skb,
- bool last)
+static int afs_deliver_cb_init_call_back_state3(struct afs_call *call)
{
+ struct sockaddr_rxrpc srx;
struct afs_server *server;
- struct in_addr addr;
+ struct afs_uuid *r;
+ unsigned loop;
+ __be32 *b;
+ int ret;
+
+ _enter("");
+
+ rxrpc_kernel_get_peer(afs_socket, call->rxcall, &srx);
+
+ _enter("{%u}", call->unmarshall);
- _enter(",{%u},%d", skb->len, last);
+ switch (call->unmarshall) {
+ case 0:
+ call->offset = 0;
+ call->buffer = kmalloc(11 * sizeof(__be32), GFP_KERNEL);
+ if (!call->buffer)
+ return -ENOMEM;
+ call->unmarshall++;
- /* There are some arguments that we ignore */
- afs_data_consumed(call, skb);
- if (!last)
- return -EAGAIN;
+ case 1:
+ _debug("extract UUID");
+ ret = afs_extract_data(call, call->buffer,
+ 11 * sizeof(__be32), false);
+ switch (ret) {
+ case 0: break;
+ case -EAGAIN: return 0;
+ default: return ret;
+ }
+
+ _debug("unmarshall UUID");
+ call->request = kmalloc(sizeof(struct afs_uuid), GFP_KERNEL);
+ if (!call->request)
+ return -ENOMEM;
+
+ b = call->buffer;
+ r = call->request;
+ r->time_low = ntohl(b[0]);
+ r->time_mid = ntohl(b[1]);
+ r->time_hi_and_version = ntohl(b[2]);
+ r->clock_seq_hi_and_reserved = ntohl(b[3]);
+ r->clock_seq_low = ntohl(b[4]);
+
+ for (loop = 0; loop < 6; loop++)
+ r->node[loop] = ntohl(b[loop + 5]);
+
+ call->offset = 0;
+ call->unmarshall++;
+
+ case 2:
+ break;
+ }
/* no unmarshalling required */
call->state = AFS_CALL_REPLYING;
/* we'll need the file server record as that tells us which set of
* vnodes to operate upon */
- memcpy(&addr, &ip_hdr(skb)->saddr, 4);
- server = afs_find_server(&addr);
+ server = afs_find_server(&srx);
if (!server)
return -ENOTCONN;
call->server = server;
@@ -393,14 +416,13 @@ static void SRXAFSCB_Probe(struct work_struct *work)
/*
* deliver request data to a CB.Probe call
*/
-static int afs_deliver_cb_probe(struct afs_call *call, struct sk_buff *skb,
- bool last)
+static int afs_deliver_cb_probe(struct afs_call *call)
{
int ret;
- _enter(",{%u},%d", skb->len, last);
+ _enter("");
- ret = afs_data_complete(call, skb, last);
+ ret = afs_extract_data(call, NULL, 0, false);
if (ret < 0)
return ret;
@@ -426,7 +448,6 @@ static void SRXAFSCB_ProbeUuid(struct work_struct *work)
_enter("");
-
if (memcmp(r, &afs_uuid, sizeof(afs_uuid)) == 0)
reply.match = htonl(0);
else
@@ -439,19 +460,14 @@ static void SRXAFSCB_ProbeUuid(struct work_struct *work)
/*
* deliver request data to a CB.ProbeUuid call
*/
-static int afs_deliver_cb_probe_uuid(struct afs_call *call, struct sk_buff *skb,
- bool last)
+static int afs_deliver_cb_probe_uuid(struct afs_call *call)
{
struct afs_uuid *r;
unsigned loop;
__be32 *b;
int ret;
- _enter("{%u},{%u},%d", call->unmarshall, skb->len, last);
-
- ret = afs_data_complete(call, skb, last);
- if (ret < 0)
- return ret;
+ _enter("{%u}", call->unmarshall);
switch (call->unmarshall) {
case 0:
@@ -463,8 +479,8 @@ static int afs_deliver_cb_probe_uuid(struct afs_call *call, struct sk_buff *skb,
case 1:
_debug("extract UUID");
- ret = afs_extract_data(call, skb, last, call->buffer,
- 11 * sizeof(__be32));
+ ret = afs_extract_data(call, call->buffer,
+ 11 * sizeof(__be32), false);
switch (ret) {
case 0: break;
case -EAGAIN: return 0;
@@ -491,16 +507,9 @@ static int afs_deliver_cb_probe_uuid(struct afs_call *call, struct sk_buff *skb,
call->unmarshall++;
case 2:
- _debug("trailer");
- if (skb->len != 0)
- return -EBADMSG;
break;
}
- ret = afs_data_complete(call, skb, last);
- if (ret < 0)
- return ret;
-
call->state = AFS_CALL_REPLYING;
INIT_WORK(&call->work, SRXAFSCB_ProbeUuid);
@@ -574,14 +583,13 @@ static void SRXAFSCB_TellMeAboutYourself(struct work_struct *work)
/*
* deliver request data to a CB.TellMeAboutYourself call
*/
-static int afs_deliver_cb_tell_me_about_yourself(struct afs_call *call,
- struct sk_buff *skb, bool last)
+static int afs_deliver_cb_tell_me_about_yourself(struct afs_call *call)
{
int ret;
- _enter(",{%u},%d", skb->len, last);
+ _enter("");
- ret = afs_data_complete(call, skb, last);
+ ret = afs_extract_data(call, NULL, 0, false);
if (ret < 0)
return ret;
diff --git a/fs/afs/flock.c b/fs/afs/flock.c
index d91a9c9cfbd0..3191dff2c156 100644
--- a/fs/afs/flock.c
+++ b/fs/afs/flock.c
@@ -36,8 +36,8 @@ static int afs_init_lock_manager(void)
if (!afs_lock_manager) {
mutex_lock(&afs_lock_manager_mutex);
if (!afs_lock_manager) {
- afs_lock_manager =
- create_singlethread_workqueue("kafs_lockd");
+ afs_lock_manager = alloc_workqueue("kafs_lockd",
+ WQ_MEM_RECLAIM, 0);
if (!afs_lock_manager)
ret = -ENOMEM;
}
diff --git a/fs/afs/fsclient.c b/fs/afs/fsclient.c
index 9312b92e54be..96f4d764d1a6 100644
--- a/fs/afs/fsclient.c
+++ b/fs/afs/fsclient.c
@@ -235,16 +235,15 @@ static void xdr_decode_AFSFetchVolumeStatus(const __be32 **_bp,
/*
* deliver reply data to an FS.FetchStatus
*/
-static int afs_deliver_fs_fetch_status(struct afs_call *call,
- struct sk_buff *skb, bool last)
+static int afs_deliver_fs_fetch_status(struct afs_call *call)
{
struct afs_vnode *vnode = call->reply;
const __be32 *bp;
int ret;
- _enter(",,%u", last);
+ _enter("");
- ret = afs_transfer_reply(call, skb, last);
+ ret = afs_transfer_reply(call);
if (ret < 0)
return ret;
@@ -307,8 +306,7 @@ int afs_fs_fetch_file_status(struct afs_server *server,
/*
* deliver reply data to an FS.FetchData
*/
-static int afs_deliver_fs_fetch_data(struct afs_call *call,
- struct sk_buff *skb, bool last)
+static int afs_deliver_fs_fetch_data(struct afs_call *call)
{
struct afs_vnode *vnode = call->reply;
const __be32 *bp;
@@ -316,7 +314,7 @@ static int afs_deliver_fs_fetch_data(struct afs_call *call,
void *buffer;
int ret;
- _enter("{%u},{%u},%d", call->unmarshall, skb->len, last);
+ _enter("{%u}", call->unmarshall);
switch (call->unmarshall) {
case 0:
@@ -332,7 +330,7 @@ static int afs_deliver_fs_fetch_data(struct afs_call *call,
* client) */
case 1:
_debug("extract data length (MSW)");
- ret = afs_extract_data(call, skb, last, &call->tmp, 4);
+ ret = afs_extract_data(call, &call->tmp, 4, true);
if (ret < 0)
return ret;
@@ -347,7 +345,7 @@ static int afs_deliver_fs_fetch_data(struct afs_call *call,
/* extract the returned data length */
case 2:
_debug("extract data length");
- ret = afs_extract_data(call, skb, last, &call->tmp, 4);
+ ret = afs_extract_data(call, &call->tmp, 4, true);
if (ret < 0)
return ret;
@@ -363,10 +361,10 @@ static int afs_deliver_fs_fetch_data(struct afs_call *call,
_debug("extract data");
if (call->count > 0) {
page = call->reply3;
- buffer = kmap_atomic(page);
- ret = afs_extract_data(call, skb, last, buffer,
- call->count);
- kunmap_atomic(buffer);
+ buffer = kmap(page);
+ ret = afs_extract_data(call, buffer,
+ call->count, true);
+ kunmap(buffer);
if (ret < 0)
return ret;
}
@@ -376,8 +374,8 @@ static int afs_deliver_fs_fetch_data(struct afs_call *call,
/* extract the metadata */
case 4:
- ret = afs_extract_data(call, skb, last, call->buffer,
- (21 + 3 + 6) * 4);
+ ret = afs_extract_data(call, call->buffer,
+ (21 + 3 + 6) * 4, false);
if (ret < 0)
return ret;
@@ -391,18 +389,15 @@ static int afs_deliver_fs_fetch_data(struct afs_call *call,
call->unmarshall++;
case 5:
- ret = afs_data_complete(call, skb, last);
- if (ret < 0)
- return ret;
break;
}
if (call->count < PAGE_SIZE) {
_debug("clear");
page = call->reply3;
- buffer = kmap_atomic(page);
+ buffer = kmap(page);
memset(buffer + call->count, 0, PAGE_SIZE - call->count);
- kunmap_atomic(buffer);
+ kunmap(buffer);
}
_leave(" = 0 [done]");
@@ -515,13 +510,12 @@ int afs_fs_fetch_data(struct afs_server *server,
/*
* deliver reply data to an FS.GiveUpCallBacks
*/
-static int afs_deliver_fs_give_up_callbacks(struct afs_call *call,
- struct sk_buff *skb, bool last)
+static int afs_deliver_fs_give_up_callbacks(struct afs_call *call)
{
- _enter(",{%u},%d", skb->len, last);
+ _enter("");
/* shouldn't be any reply data */
- return afs_data_complete(call, skb, last);
+ return afs_extract_data(call, NULL, 0, false);
}
/*
@@ -599,16 +593,15 @@ int afs_fs_give_up_callbacks(struct afs_server *server,
/*
* deliver reply data to an FS.CreateFile or an FS.MakeDir
*/
-static int afs_deliver_fs_create_vnode(struct afs_call *call,
- struct sk_buff *skb, bool last)
+static int afs_deliver_fs_create_vnode(struct afs_call *call)
{
struct afs_vnode *vnode = call->reply;
const __be32 *bp;
int ret;
- _enter("{%u},{%u},%d", call->unmarshall, skb->len, last);
+ _enter("{%u}", call->unmarshall);
- ret = afs_transfer_reply(call, skb, last);
+ ret = afs_transfer_reply(call);
if (ret < 0)
return ret;
@@ -696,16 +689,15 @@ int afs_fs_create(struct afs_server *server,
/*
* deliver reply data to an FS.RemoveFile or FS.RemoveDir
*/
-static int afs_deliver_fs_remove(struct afs_call *call,
- struct sk_buff *skb, bool last)
+static int afs_deliver_fs_remove(struct afs_call *call)
{
struct afs_vnode *vnode = call->reply;
const __be32 *bp;
int ret;
- _enter("{%u},{%u},%d", call->unmarshall, skb->len, last);
+ _enter("{%u}", call->unmarshall);
- ret = afs_transfer_reply(call, skb, last);
+ ret = afs_transfer_reply(call);
if (ret < 0)
return ret;
@@ -777,16 +769,15 @@ int afs_fs_remove(struct afs_server *server,
/*
* deliver reply data to an FS.Link
*/
-static int afs_deliver_fs_link(struct afs_call *call,
- struct sk_buff *skb, bool last)
+static int afs_deliver_fs_link(struct afs_call *call)
{
struct afs_vnode *dvnode = call->reply, *vnode = call->reply2;
const __be32 *bp;
int ret;
- _enter("{%u},{%u},%d", call->unmarshall, skb->len, last);
+ _enter("{%u}", call->unmarshall);
- ret = afs_transfer_reply(call, skb, last);
+ ret = afs_transfer_reply(call);
if (ret < 0)
return ret;
@@ -863,16 +854,15 @@ int afs_fs_link(struct afs_server *server,
/*
* deliver reply data to an FS.Symlink
*/
-static int afs_deliver_fs_symlink(struct afs_call *call,
- struct sk_buff *skb, bool last)
+static int afs_deliver_fs_symlink(struct afs_call *call)
{
struct afs_vnode *vnode = call->reply;
const __be32 *bp;
int ret;
- _enter("{%u},{%u},%d", call->unmarshall, skb->len, last);
+ _enter("{%u}", call->unmarshall);
- ret = afs_transfer_reply(call, skb, last);
+ ret = afs_transfer_reply(call);
if (ret < 0)
return ret;
@@ -968,16 +958,15 @@ int afs_fs_symlink(struct afs_server *server,
/*
* deliver reply data to an FS.Rename
*/
-static int afs_deliver_fs_rename(struct afs_call *call,
- struct sk_buff *skb, bool last)
+static int afs_deliver_fs_rename(struct afs_call *call)
{
struct afs_vnode *orig_dvnode = call->reply, *new_dvnode = call->reply2;
const __be32 *bp;
int ret;
- _enter("{%u},{%u},%d", call->unmarshall, skb->len, last);
+ _enter("{%u}", call->unmarshall);
- ret = afs_transfer_reply(call, skb, last);
+ ret = afs_transfer_reply(call);
if (ret < 0)
return ret;
@@ -1072,16 +1061,15 @@ int afs_fs_rename(struct afs_server *server,
/*
* deliver reply data to an FS.StoreData
*/
-static int afs_deliver_fs_store_data(struct afs_call *call,
- struct sk_buff *skb, bool last)
+static int afs_deliver_fs_store_data(struct afs_call *call)
{
struct afs_vnode *vnode = call->reply;
const __be32 *bp;
int ret;
- _enter(",,%u", last);
+ _enter("");
- ret = afs_transfer_reply(call, skb, last);
+ ret = afs_transfer_reply(call);
if (ret < 0)
return ret;
@@ -1251,17 +1239,16 @@ int afs_fs_store_data(struct afs_server *server, struct afs_writeback *wb,
/*
* deliver reply data to an FS.StoreStatus
*/
-static int afs_deliver_fs_store_status(struct afs_call *call,
- struct sk_buff *skb, bool last)
+static int afs_deliver_fs_store_status(struct afs_call *call)
{
afs_dataversion_t *store_version;
struct afs_vnode *vnode = call->reply;
const __be32 *bp;
int ret;
- _enter(",,%u", last);
+ _enter("");
- ret = afs_transfer_reply(call, skb, last);
+ ret = afs_transfer_reply(call);
if (ret < 0)
return ret;
@@ -1443,14 +1430,13 @@ int afs_fs_setattr(struct afs_server *server, struct key *key,
/*
* deliver reply data to an FS.GetVolumeStatus
*/
-static int afs_deliver_fs_get_volume_status(struct afs_call *call,
- struct sk_buff *skb, bool last)
+static int afs_deliver_fs_get_volume_status(struct afs_call *call)
{
const __be32 *bp;
char *p;
int ret;
- _enter("{%u},{%u},%d", call->unmarshall, skb->len, last);
+ _enter("{%u}", call->unmarshall);
switch (call->unmarshall) {
case 0:
@@ -1460,8 +1446,8 @@ static int afs_deliver_fs_get_volume_status(struct afs_call *call,
/* extract the returned status record */
case 1:
_debug("extract status");
- ret = afs_extract_data(call, skb, last, call->buffer,
- 12 * 4);
+ ret = afs_extract_data(call, call->buffer,
+ 12 * 4, true);
if (ret < 0)
return ret;
@@ -1472,7 +1458,7 @@ static int afs_deliver_fs_get_volume_status(struct afs_call *call,
/* extract the volume name length */
case 2:
- ret = afs_extract_data(call, skb, last, &call->tmp, 4);
+ ret = afs_extract_data(call, &call->tmp, 4, true);
if (ret < 0)
return ret;
@@ -1487,8 +1473,8 @@ static int afs_deliver_fs_get_volume_status(struct afs_call *call,
case 3:
_debug("extract volname");
if (call->count > 0) {
- ret = afs_extract_data(call, skb, last, call->reply3,
- call->count);
+ ret = afs_extract_data(call, call->reply3,
+ call->count, true);
if (ret < 0)
return ret;
}
@@ -1508,8 +1494,8 @@ static int afs_deliver_fs_get_volume_status(struct afs_call *call,
call->count = 4 - (call->count & 3);
case 4:
- ret = afs_extract_data(call, skb, last, call->buffer,
- call->count);
+ ret = afs_extract_data(call, call->buffer,
+ call->count, true);
if (ret < 0)
return ret;
@@ -1519,7 +1505,7 @@ static int afs_deliver_fs_get_volume_status(struct afs_call *call,
/* extract the offline message length */
case 5:
- ret = afs_extract_data(call, skb, last, &call->tmp, 4);
+ ret = afs_extract_data(call, &call->tmp, 4, true);
if (ret < 0)
return ret;
@@ -1534,8 +1520,8 @@ static int afs_deliver_fs_get_volume_status(struct afs_call *call,
case 6:
_debug("extract offline");
if (call->count > 0) {
- ret = afs_extract_data(call, skb, last, call->reply3,
- call->count);
+ ret = afs_extract_data(call, call->reply3,
+ call->count, true);
if (ret < 0)
return ret;
}
@@ -1555,8 +1541,8 @@ static int afs_deliver_fs_get_volume_status(struct afs_call *call,
call->count = 4 - (call->count & 3);
case 7:
- ret = afs_extract_data(call, skb, last, call->buffer,
- call->count);
+ ret = afs_extract_data(call, call->buffer,
+ call->count, true);
if (ret < 0)
return ret;
@@ -1566,7 +1552,7 @@ static int afs_deliver_fs_get_volume_status(struct afs_call *call,
/* extract the message of the day length */
case 8:
- ret = afs_extract_data(call, skb, last, &call->tmp, 4);
+ ret = afs_extract_data(call, &call->tmp, 4, true);
if (ret < 0)
return ret;
@@ -1581,8 +1567,8 @@ static int afs_deliver_fs_get_volume_status(struct afs_call *call,
case 9:
_debug("extract motd");
if (call->count > 0) {
- ret = afs_extract_data(call, skb, last, call->reply3,
- call->count);
+ ret = afs_extract_data(call, call->reply3,
+ call->count, true);
if (ret < 0)
return ret;
}
@@ -1595,26 +1581,17 @@ static int afs_deliver_fs_get_volume_status(struct afs_call *call,
call->unmarshall++;
/* extract the message of the day padding */
- if ((call->count & 3) == 0) {
- call->unmarshall++;
- goto no_motd_padding;
- }
- call->count = 4 - (call->count & 3);
+ call->count = (4 - (call->count & 3)) & 3;
case 10:
- ret = afs_extract_data(call, skb, last, call->buffer,
- call->count);
+ ret = afs_extract_data(call, call->buffer,
+ call->count, false);
if (ret < 0)
return ret;
call->offset = 0;
call->unmarshall++;
- no_motd_padding:
-
case 11:
- ret = afs_data_complete(call, skb, last);
- if (ret < 0)
- return ret;
break;
}
@@ -1685,15 +1662,14 @@ int afs_fs_get_volume_status(struct afs_server *server,
/*
* deliver reply data to an FS.SetLock, FS.ExtendLock or FS.ReleaseLock
*/
-static int afs_deliver_fs_xxxx_lock(struct afs_call *call,
- struct sk_buff *skb, bool last)
+static int afs_deliver_fs_xxxx_lock(struct afs_call *call)
{
const __be32 *bp;
int ret;
- _enter("{%u},{%u},%d", call->unmarshall, skb->len, last);
+ _enter("{%u}", call->unmarshall);
- ret = afs_transfer_reply(call, skb, last);
+ ret = afs_transfer_reply(call);
if (ret < 0)
return ret;
diff --git a/fs/afs/internal.h b/fs/afs/internal.h
index df976b2a7f40..5497c8496055 100644
--- a/fs/afs/internal.h
+++ b/fs/afs/internal.h
@@ -13,13 +13,13 @@
#include <linux/kernel.h>
#include <linux/fs.h>
#include <linux/pagemap.h>
-#include <linux/skbuff.h>
#include <linux/rxrpc.h>
#include <linux/key.h>
#include <linux/workqueue.h>
#include <linux/sched.h>
#include <linux/fscache.h>
#include <linux/backing-dev.h>
+#include <net/af_rxrpc.h>
#include "afs.h"
#include "afs_vl.h"
@@ -56,7 +56,7 @@ struct afs_mount_params {
*/
struct afs_wait_mode {
/* RxRPC received message notification */
- void (*rx_wakeup)(struct afs_call *call);
+ rxrpc_notify_rx_t notify_rx;
/* synchronous call waiter and call dispatched notification */
int (*wait)(struct afs_call *call);
@@ -75,10 +75,8 @@ struct afs_call {
const struct afs_call_type *type; /* type of call */
const struct afs_wait_mode *wait_mode; /* completion wait mode */
wait_queue_head_t waitq; /* processes awaiting completion */
- void (*async_workfn)(struct afs_call *call); /* asynchronous work function */
struct work_struct async_work; /* asynchronous work processor */
struct work_struct work; /* actual work processor */
- struct sk_buff_head rx_queue; /* received packets */
struct rxrpc_call *rxcall; /* RxRPC call handle */
struct key *key; /* security for this call */
struct afs_server *server; /* server affected by incoming CM call */
@@ -92,6 +90,7 @@ struct afs_call {
void *reply4; /* reply buffer (fourth part) */
pgoff_t first; /* first page in mapping to deal with */
pgoff_t last; /* last page in mapping to deal with */
+ size_t offset; /* offset into received data store */
enum { /* call state */
AFS_CALL_REQUESTING, /* request is being sent for outgoing call */
AFS_CALL_AWAIT_REPLY, /* awaiting reply to outgoing call */
@@ -99,21 +98,18 @@ struct afs_call {
AFS_CALL_AWAIT_REQUEST, /* awaiting request data on incoming call */
AFS_CALL_REPLYING, /* replying to incoming call */
AFS_CALL_AWAIT_ACK, /* awaiting final ACK of incoming call */
- AFS_CALL_COMPLETE, /* successfully completed */
- AFS_CALL_BUSY, /* server was busy */
- AFS_CALL_ABORTED, /* call was aborted */
- AFS_CALL_ERROR, /* call failed due to error */
+ AFS_CALL_COMPLETE, /* Completed or failed */
} state;
int error; /* error code */
+ u32 abort_code; /* Remote abort ID or 0 */
unsigned request_size; /* size of request data */
unsigned reply_max; /* maximum size of reply */
- unsigned reply_size; /* current size of reply */
unsigned first_offset; /* offset into mapping[first] */
unsigned last_to; /* amount of mapping[last] */
- unsigned offset; /* offset into received data store */
unsigned char unmarshall; /* unmarshalling phase */
bool incoming; /* T if incoming call */
bool send_pages; /* T if data from mapping should be sent */
+ bool need_attention; /* T if RxRPC poked us */
u16 service_id; /* RxRPC service ID to call */
__be16 port; /* target UDP port */
__be32 operation_ID; /* operation ID for an incoming call */
@@ -128,8 +124,7 @@ struct afs_call_type {
/* deliver request or reply data to an call
* - returning an error will cause the call to be aborted
*/
- int (*deliver)(struct afs_call *call, struct sk_buff *skb,
- bool last);
+ int (*deliver)(struct afs_call *call);
/* map an abort code to an error number */
int (*abort_to_error)(u32 abort_code);
@@ -607,29 +602,22 @@ extern void afs_proc_cell_remove(struct afs_cell *);
/*
* rxrpc.c
*/
+extern struct socket *afs_socket;
+
extern int afs_open_socket(void);
extern void afs_close_socket(void);
-extern void afs_data_consumed(struct afs_call *, struct sk_buff *);
extern int afs_make_call(struct in_addr *, struct afs_call *, gfp_t,
const struct afs_wait_mode *);
extern struct afs_call *afs_alloc_flat_call(const struct afs_call_type *,
size_t, size_t);
extern void afs_flat_call_destructor(struct afs_call *);
-extern int afs_transfer_reply(struct afs_call *, struct sk_buff *, bool);
extern void afs_send_empty_reply(struct afs_call *);
extern void afs_send_simple_reply(struct afs_call *, const void *, size_t);
-extern int afs_extract_data(struct afs_call *, struct sk_buff *, bool, void *,
- size_t);
+extern int afs_extract_data(struct afs_call *, void *, size_t, bool);
-static inline int afs_data_complete(struct afs_call *call, struct sk_buff *skb,
- bool last)
+static inline int afs_transfer_reply(struct afs_call *call)
{
- if (skb->len > 0)
- return -EBADMSG;
- afs_data_consumed(call, skb);
- if (!last)
- return -EAGAIN;
- return 0;
+ return afs_extract_data(call, call->buffer, call->reply_max, false);
}
/*
@@ -654,7 +642,7 @@ do { \
extern struct afs_server *afs_lookup_server(struct afs_cell *,
const struct in_addr *);
-extern struct afs_server *afs_find_server(const struct in_addr *);
+extern struct afs_server *afs_find_server(const struct sockaddr_rxrpc *);
extern void afs_put_server(struct afs_server *);
extern void __exit afs_purge_servers(void);
diff --git a/fs/afs/main.c b/fs/afs/main.c
index 35de0c04729f..0b187ef3b5b7 100644
--- a/fs/afs/main.c
+++ b/fs/afs/main.c
@@ -14,6 +14,7 @@
#include <linux/init.h>
#include <linux/completion.h>
#include <linux/sched.h>
+#include <linux/random.h>
#include "internal.h"
MODULE_DESCRIPTION("AFS Client File System");
diff --git a/fs/afs/rxrpc.c b/fs/afs/rxrpc.c
index 14d04c848465..59bdaa7527b6 100644
--- a/fs/afs/rxrpc.c
+++ b/fs/afs/rxrpc.c
@@ -16,34 +16,36 @@
#include "internal.h"
#include "afs_cm.h"
-static struct socket *afs_socket; /* my RxRPC socket */
+struct socket *afs_socket; /* my RxRPC socket */
static struct workqueue_struct *afs_async_calls;
+static struct afs_call *afs_spare_incoming_call;
static atomic_t afs_outstanding_calls;
-static atomic_t afs_outstanding_skbs;
-static void afs_wake_up_call_waiter(struct afs_call *);
+static void afs_free_call(struct afs_call *);
+static void afs_wake_up_call_waiter(struct sock *, struct rxrpc_call *, unsigned long);
static int afs_wait_for_call_to_complete(struct afs_call *);
-static void afs_wake_up_async_call(struct afs_call *);
+static void afs_wake_up_async_call(struct sock *, struct rxrpc_call *, unsigned long);
static int afs_dont_wait_for_call_to_complete(struct afs_call *);
-static void afs_process_async_call(struct afs_call *);
-static void afs_rx_interceptor(struct sock *, unsigned long, struct sk_buff *);
-static int afs_deliver_cm_op_id(struct afs_call *, struct sk_buff *, bool);
+static void afs_process_async_call(struct work_struct *);
+static void afs_rx_new_call(struct sock *, struct rxrpc_call *, unsigned long);
+static void afs_rx_discard_new_call(struct rxrpc_call *, unsigned long);
+static int afs_deliver_cm_op_id(struct afs_call *);
/* synchronous call management */
const struct afs_wait_mode afs_sync_call = {
- .rx_wakeup = afs_wake_up_call_waiter,
+ .notify_rx = afs_wake_up_call_waiter,
.wait = afs_wait_for_call_to_complete,
};
/* asynchronous call management */
const struct afs_wait_mode afs_async_call = {
- .rx_wakeup = afs_wake_up_async_call,
+ .notify_rx = afs_wake_up_async_call,
.wait = afs_dont_wait_for_call_to_complete,
};
/* asynchronous incoming call management */
static const struct afs_wait_mode afs_async_incoming_call = {
- .rx_wakeup = afs_wake_up_async_call,
+ .notify_rx = afs_wake_up_async_call,
};
/* asynchronous incoming call initial processing */
@@ -53,17 +55,9 @@ static const struct afs_call_type afs_RXCMxxxx = {
.abort_to_error = afs_abort_to_error,
};
-static void afs_collect_incoming_call(struct work_struct *);
+static void afs_charge_preallocation(struct work_struct *);
-static struct sk_buff_head afs_incoming_calls;
-static DECLARE_WORK(afs_collect_incoming_call_work, afs_collect_incoming_call);
-
-static void afs_async_workfn(struct work_struct *work)
-{
- struct afs_call *call = container_of(work, struct afs_call, async_work);
-
- call->async_workfn(call);
-}
+static DECLARE_WORK(afs_charge_preallocation_work, afs_charge_preallocation);
static int afs_wait_atomic_t(atomic_t *p)
{
@@ -83,10 +77,8 @@ int afs_open_socket(void)
_enter("");
- skb_queue_head_init(&afs_incoming_calls);
-
ret = -ENOMEM;
- afs_async_calls = create_singlethread_workqueue("kafsd");
+ afs_async_calls = alloc_workqueue("kafsd", WQ_MEM_RECLAIM, 0);
if (!afs_async_calls)
goto error_0;
@@ -110,13 +102,15 @@ int afs_open_socket(void)
if (ret < 0)
goto error_2;
+ rxrpc_kernel_new_call_notification(socket, afs_rx_new_call,
+ afs_rx_discard_new_call);
+
ret = kernel_listen(socket, INT_MAX);
if (ret < 0)
goto error_2;
- rxrpc_kernel_intercept_rx_messages(socket, afs_rx_interceptor);
-
afs_socket = socket;
+ afs_charge_preallocation(NULL);
_leave(" = 0");
return 0;
@@ -136,52 +130,28 @@ void afs_close_socket(void)
{
_enter("");
+ if (afs_spare_incoming_call) {
+ atomic_inc(&afs_outstanding_calls);
+ afs_free_call(afs_spare_incoming_call);
+ afs_spare_incoming_call = NULL;
+ }
+
+ _debug("outstanding %u", atomic_read(&afs_outstanding_calls));
wait_on_atomic_t(&afs_outstanding_calls, afs_wait_atomic_t,
TASK_UNINTERRUPTIBLE);
_debug("no outstanding calls");
+ flush_workqueue(afs_async_calls);
+ kernel_sock_shutdown(afs_socket, SHUT_RDWR);
+ flush_workqueue(afs_async_calls);
sock_release(afs_socket);
_debug("dework");
destroy_workqueue(afs_async_calls);
-
- ASSERTCMP(atomic_read(&afs_outstanding_skbs), ==, 0);
_leave("");
}
/*
- * Note that the data in a socket buffer is now consumed.
- */
-void afs_data_consumed(struct afs_call *call, struct sk_buff *skb)
-{
- if (!skb) {
- _debug("DLVR NULL [%d]", atomic_read(&afs_outstanding_skbs));
- dump_stack();
- } else {
- _debug("DLVR %p{%u} [%d]",
- skb, skb->mark, atomic_read(&afs_outstanding_skbs));
- rxrpc_kernel_data_consumed(call->rxcall, skb);
- }
-}
-
-/*
- * free a socket buffer
- */
-static void afs_free_skb(struct sk_buff *skb)
-{
- if (!skb) {
- _debug("FREE NULL [%d]", atomic_read(&afs_outstanding_skbs));
- dump_stack();
- } else {
- _debug("FREE %p{%u} [%d]",
- skb, skb->mark, atomic_read(&afs_outstanding_skbs));
- if (atomic_dec_return(&afs_outstanding_skbs) == -1)
- BUG();
- rxrpc_kernel_free_skb(skb);
- }
-}
-
-/*
* free a call
*/
static void afs_free_call(struct afs_call *call)
@@ -191,7 +161,6 @@ static void afs_free_call(struct afs_call *call)
ASSERTCMP(call->rxcall, ==, NULL);
ASSERT(!work_pending(&call->async_work));
- ASSERT(skb_queue_empty(&call->rx_queue));
ASSERT(call->type->name != NULL);
kfree(call->request);
@@ -207,7 +176,7 @@ static void afs_free_call(struct afs_call *call)
static void afs_end_call_nofree(struct afs_call *call)
{
if (call->rxcall) {
- rxrpc_kernel_end_call(call->rxcall);
+ rxrpc_kernel_end_call(afs_socket, call->rxcall);
call->rxcall = NULL;
}
if (call->type->destructor)
@@ -227,7 +196,7 @@ static void afs_end_call(struct afs_call *call)
* allocate a call with flat request and reply buffers
*/
struct afs_call *afs_alloc_flat_call(const struct afs_call_type *type,
- size_t request_size, size_t reply_size)
+ size_t request_size, size_t reply_max)
{
struct afs_call *call;
@@ -241,7 +210,7 @@ struct afs_call *afs_alloc_flat_call(const struct afs_call_type *type,
call->type = type;
call->request_size = request_size;
- call->reply_max = reply_size;
+ call->reply_max = reply_max;
if (request_size) {
call->request = kmalloc(request_size, GFP_NOFS);
@@ -249,14 +218,13 @@ struct afs_call *afs_alloc_flat_call(const struct afs_call_type *type,
goto nomem_free;
}
- if (reply_size) {
- call->buffer = kmalloc(reply_size, GFP_NOFS);
+ if (reply_max) {
+ call->buffer = kmalloc(reply_max, GFP_NOFS);
if (!call->buffer)
goto nomem_free;
}
init_waitqueue_head(&call->waitq);
- skb_queue_head_init(&call->rx_queue);
return call;
nomem_free:
@@ -325,8 +293,8 @@ static int afs_send_pages(struct afs_call *call, struct msghdr *msg,
* returns from sending the request */
if (first + loop >= last)
call->state = AFS_CALL_AWAIT_REPLY;
- ret = rxrpc_kernel_send_data(call->rxcall, msg,
- to - offset);
+ ret = rxrpc_kernel_send_data(afs_socket, call->rxcall,
+ msg, to - offset);
kunmap(pages[loop]);
if (ret < 0)
break;
@@ -354,7 +322,6 @@ int afs_make_call(struct in_addr *addr, struct afs_call *call, gfp_t gfp,
struct msghdr msg;
struct kvec iov[1];
int ret;
- struct sk_buff *skb;
_enter("%x,{%d},", addr->s_addr, ntohs(call->port));
@@ -366,8 +333,7 @@ int afs_make_call(struct in_addr *addr, struct afs_call *call, gfp_t gfp,
atomic_read(&afs_outstanding_calls));
call->wait_mode = wait_mode;
- call->async_workfn = afs_process_async_call;
- INIT_WORK(&call->async_work, afs_async_workfn);
+ INIT_WORK(&call->async_work, afs_process_async_call);
memset(&srx, 0, sizeof(srx));
srx.srx_family = AF_RXRPC;
@@ -380,7 +346,8 @@ int afs_make_call(struct in_addr *addr, struct afs_call *call, gfp_t gfp,
/* create a call */
rxcall = rxrpc_kernel_begin_call(afs_socket, &srx, call->key,
- (unsigned long) call, gfp);
+ (unsigned long) call, gfp,
+ wait_mode->notify_rx);
call->key = NULL;
if (IS_ERR(rxcall)) {
ret = PTR_ERR(rxcall);
@@ -406,7 +373,8 @@ int afs_make_call(struct in_addr *addr, struct afs_call *call, gfp_t gfp,
* request */
if (!call->send_pages)
call->state = AFS_CALL_AWAIT_REPLY;
- ret = rxrpc_kernel_send_data(rxcall, &msg, call->request_size);
+ ret = rxrpc_kernel_send_data(afs_socket, rxcall,
+ &msg, call->request_size);
if (ret < 0)
goto error_do_abort;
@@ -421,9 +389,7 @@ int afs_make_call(struct in_addr *addr, struct afs_call *call, gfp_t gfp,
return wait_mode->wait(call);
error_do_abort:
- rxrpc_kernel_abort_call(rxcall, RX_USER_ABORT);
- while ((skb = skb_dequeue(&call->rx_queue)))
- afs_free_skb(skb);
+ rxrpc_kernel_abort_call(afs_socket, rxcall, RX_USER_ABORT, -ret, "KSD");
error_kill_call:
afs_end_call(call);
_leave(" = %d", ret);
@@ -431,140 +397,77 @@ error_kill_call:
}
/*
- * Handles intercepted messages that were arriving in the socket's Rx queue.
- *
- * Called from the AF_RXRPC call processor in waitqueue process context. For
- * each call, it is guaranteed this will be called in order of packet to be
- * delivered.
- */
-static void afs_rx_interceptor(struct sock *sk, unsigned long user_call_ID,
- struct sk_buff *skb)
-{
- struct afs_call *call = (struct afs_call *) user_call_ID;
-
- _enter("%p,,%u", call, skb->mark);
-
- _debug("ICPT %p{%u} [%d]",
- skb, skb->mark, atomic_read(&afs_outstanding_skbs));
-
- ASSERTCMP(sk, ==, afs_socket->sk);
- atomic_inc(&afs_outstanding_skbs);
-
- if (!call) {
- /* its an incoming call for our callback service */
- skb_queue_tail(&afs_incoming_calls, skb);
- queue_work(afs_wq, &afs_collect_incoming_call_work);
- } else {
- /* route the messages directly to the appropriate call */
- skb_queue_tail(&call->rx_queue, skb);
- call->wait_mode->rx_wakeup(call);
- }
-
- _leave("");
-}
-
-/*
* deliver messages to a call
*/
static void afs_deliver_to_call(struct afs_call *call)
{
- struct sk_buff *skb;
- bool last;
u32 abort_code;
int ret;
- _enter("");
-
- while ((call->state == AFS_CALL_AWAIT_REPLY ||
- call->state == AFS_CALL_AWAIT_OP_ID ||
- call->state == AFS_CALL_AWAIT_REQUEST ||
- call->state == AFS_CALL_AWAIT_ACK) &&
- (skb = skb_dequeue(&call->rx_queue))) {
- switch (skb->mark) {
- case RXRPC_SKB_MARK_DATA:
- _debug("Rcv DATA");
- last = rxrpc_kernel_is_data_last(skb);
- ret = call->type->deliver(call, skb, last);
- switch (ret) {
- case -EAGAIN:
- if (last) {
- _debug("short data");
- goto unmarshal_error;
- }
- break;
- case 0:
- ASSERT(last);
- if (call->state == AFS_CALL_AWAIT_REPLY)
- call->state = AFS_CALL_COMPLETE;
- break;
- case -ENOTCONN:
- abort_code = RX_CALL_DEAD;
- goto do_abort;
- case -ENOTSUPP:
- abort_code = RX_INVALID_OPERATION;
- goto do_abort;
- default:
- unmarshal_error:
- abort_code = RXGEN_CC_UNMARSHAL;
- if (call->state != AFS_CALL_AWAIT_REPLY)
- abort_code = RXGEN_SS_UNMARSHAL;
- do_abort:
- rxrpc_kernel_abort_call(call->rxcall,
- abort_code);
- call->error = ret;
- call->state = AFS_CALL_ERROR;
- break;
+ _enter("%s", call->type->name);
+
+ while (call->state == AFS_CALL_AWAIT_REPLY ||
+ call->state == AFS_CALL_AWAIT_OP_ID ||
+ call->state == AFS_CALL_AWAIT_REQUEST ||
+ call->state == AFS_CALL_AWAIT_ACK
+ ) {
+ if (call->state == AFS_CALL_AWAIT_ACK) {
+ size_t offset = 0;
+ ret = rxrpc_kernel_recv_data(afs_socket, call->rxcall,
+ NULL, 0, &offset, false,
+ &call->abort_code);
+ if (ret == -EINPROGRESS || ret == -EAGAIN)
+ return;
+ if (ret == 1) {
+ call->state = AFS_CALL_COMPLETE;
+ goto done;
}
- break;
- case RXRPC_SKB_MARK_FINAL_ACK:
- _debug("Rcv ACK");
- call->state = AFS_CALL_COMPLETE;
- break;
- case RXRPC_SKB_MARK_BUSY:
- _debug("Rcv BUSY");
- call->error = -EBUSY;
- call->state = AFS_CALL_BUSY;
- break;
- case RXRPC_SKB_MARK_REMOTE_ABORT:
- abort_code = rxrpc_kernel_get_abort_code(skb);
- call->error = call->type->abort_to_error(abort_code);
- call->state = AFS_CALL_ABORTED;
- _debug("Rcv ABORT %u -> %d", abort_code, call->error);
- break;
- case RXRPC_SKB_MARK_LOCAL_ABORT:
- abort_code = rxrpc_kernel_get_abort_code(skb);
- call->error = call->type->abort_to_error(abort_code);
- call->state = AFS_CALL_ABORTED;
- _debug("Loc ABORT %u -> %d", abort_code, call->error);
- break;
- case RXRPC_SKB_MARK_NET_ERROR:
- call->error = -rxrpc_kernel_get_error_number(skb);
- call->state = AFS_CALL_ERROR;
- _debug("Rcv NET ERROR %d", call->error);
- break;
- case RXRPC_SKB_MARK_LOCAL_ERROR:
- call->error = -rxrpc_kernel_get_error_number(skb);
- call->state = AFS_CALL_ERROR;
- _debug("Rcv LOCAL ERROR %d", call->error);
- break;
- default:
- BUG();
- break;
+ return;
}
- afs_free_skb(skb);
- }
-
- /* make sure the queue is empty if the call is done with (we might have
- * aborted the call early because of an unmarshalling error) */
- if (call->state >= AFS_CALL_COMPLETE) {
- while ((skb = skb_dequeue(&call->rx_queue)))
- afs_free_skb(skb);
- if (call->incoming)
- afs_end_call(call);
+ ret = call->type->deliver(call);
+ switch (ret) {
+ case 0:
+ if (call->state == AFS_CALL_AWAIT_REPLY)
+ call->state = AFS_CALL_COMPLETE;
+ goto done;
+ case -EINPROGRESS:
+ case -EAGAIN:
+ goto out;
+ case -ENOTCONN:
+ abort_code = RX_CALL_DEAD;
+ rxrpc_kernel_abort_call(afs_socket, call->rxcall,
+ abort_code, -ret, "KNC");
+ goto do_abort;
+ case -ENOTSUPP:
+ abort_code = RX_INVALID_OPERATION;
+ rxrpc_kernel_abort_call(afs_socket, call->rxcall,
+ abort_code, -ret, "KIV");
+ goto do_abort;
+ case -ENODATA:
+ case -EBADMSG:
+ case -EMSGSIZE:
+ default:
+ abort_code = RXGEN_CC_UNMARSHAL;
+ if (call->state != AFS_CALL_AWAIT_REPLY)
+ abort_code = RXGEN_SS_UNMARSHAL;
+ rxrpc_kernel_abort_call(afs_socket, call->rxcall,
+ abort_code, EBADMSG, "KUM");
+ goto do_abort;
+ }
}
+done:
+ if (call->state == AFS_CALL_COMPLETE && call->incoming)
+ afs_end_call(call);
+out:
_leave("");
+ return;
+
+do_abort:
+ call->error = ret;
+ call->state = AFS_CALL_COMPLETE;
+ goto done;
}
/*
@@ -572,7 +475,7 @@ static void afs_deliver_to_call(struct afs_call *call)
*/
static int afs_wait_for_call_to_complete(struct afs_call *call)
{
- struct sk_buff *skb;
+ const char *abort_why;
int ret;
DECLARE_WAITQUEUE(myself, current);
@@ -584,15 +487,18 @@ static int afs_wait_for_call_to_complete(struct afs_call *call)
set_current_state(TASK_INTERRUPTIBLE);
/* deliver any messages that are in the queue */
- if (!skb_queue_empty(&call->rx_queue)) {
+ if (call->state < AFS_CALL_COMPLETE && call->need_attention) {
+ call->need_attention = false;
__set_current_state(TASK_RUNNING);
afs_deliver_to_call(call);
continue;
}
+ abort_why = "KWC";
ret = call->error;
- if (call->state >= AFS_CALL_COMPLETE)
+ if (call->state == AFS_CALL_COMPLETE)
break;
+ abort_why = "KWI";
ret = -EINTR;
if (signal_pending(current))
break;
@@ -605,9 +511,8 @@ static int afs_wait_for_call_to_complete(struct afs_call *call)
/* kill the call */
if (call->state < AFS_CALL_COMPLETE) {
_debug("call incomplete");
- rxrpc_kernel_abort_call(call->rxcall, RX_CALL_DEAD);
- while ((skb = skb_dequeue(&call->rx_queue)))
- afs_free_skb(skb);
+ rxrpc_kernel_abort_call(afs_socket, call->rxcall,
+ RX_CALL_DEAD, -ret, abort_why);
}
_debug("call complete");
@@ -619,17 +524,24 @@ static int afs_wait_for_call_to_complete(struct afs_call *call)
/*
* wake up a waiting call
*/
-static void afs_wake_up_call_waiter(struct afs_call *call)
+static void afs_wake_up_call_waiter(struct sock *sk, struct rxrpc_call *rxcall,
+ unsigned long call_user_ID)
{
+ struct afs_call *call = (struct afs_call *)call_user_ID;
+
+ call->need_attention = true;
wake_up(&call->waitq);
}
/*
* wake up an asynchronous call
*/
-static void afs_wake_up_async_call(struct afs_call *call)
+static void afs_wake_up_async_call(struct sock *sk, struct rxrpc_call *rxcall,
+ unsigned long call_user_ID)
{
- _enter("");
+ struct afs_call *call = (struct afs_call *)call_user_ID;
+
+ call->need_attention = true;
queue_work(afs_async_calls, &call->async_work);
}
@@ -647,8 +559,10 @@ static int afs_dont_wait_for_call_to_complete(struct afs_call *call)
/*
* delete an asynchronous call
*/
-static void afs_delete_async_call(struct afs_call *call)
+static void afs_delete_async_call(struct work_struct *work)
{
+ struct afs_call *call = container_of(work, struct afs_call, async_work);
+
_enter("");
afs_free_call(call);
@@ -658,17 +572,19 @@ static void afs_delete_async_call(struct afs_call *call)
/*
* perform processing on an asynchronous call
- * - on a multiple-thread workqueue this work item may try to run on several
- * CPUs at the same time
*/
-static void afs_process_async_call(struct afs_call *call)
+static void afs_process_async_call(struct work_struct *work)
{
+ struct afs_call *call = container_of(work, struct afs_call, async_work);
+
_enter("");
- if (!skb_queue_empty(&call->rx_queue))
+ if (call->state < AFS_CALL_COMPLETE && call->need_attention) {
+ call->need_attention = false;
afs_deliver_to_call(call);
+ }
- if (call->state >= AFS_CALL_COMPLETE && call->wait_mode) {
+ if (call->state == AFS_CALL_COMPLETE && call->wait_mode) {
if (call->wait_mode->async_complete)
call->wait_mode->async_complete(call->reply,
call->error);
@@ -679,122 +595,93 @@ static void afs_process_async_call(struct afs_call *call)
/* we can't just delete the call because the work item may be
* queued */
- call->async_workfn = afs_delete_async_call;
+ call->async_work.func = afs_delete_async_call;
queue_work(afs_async_calls, &call->async_work);
}
_leave("");
}
-/*
- * Empty a socket buffer into a flat reply buffer.
- */
-int afs_transfer_reply(struct afs_call *call, struct sk_buff *skb, bool last)
+static void afs_rx_attach(struct rxrpc_call *rxcall, unsigned long user_call_ID)
{
- size_t len = skb->len;
-
- if (len > call->reply_max - call->reply_size) {
- _leave(" = -EBADMSG [%zu > %u]",
- len, call->reply_max - call->reply_size);
- return -EBADMSG;
- }
+ struct afs_call *call = (struct afs_call *)user_call_ID;
- if (len > 0) {
- if (skb_copy_bits(skb, 0, call->buffer + call->reply_size,
- len) < 0)
- BUG();
- call->reply_size += len;
- }
-
- afs_data_consumed(call, skb);
- if (!last)
- return -EAGAIN;
-
- if (call->reply_size != call->reply_max) {
- _leave(" = -EBADMSG [%u != %u]",
- call->reply_size, call->reply_max);
- return -EBADMSG;
- }
- return 0;
+ call->rxcall = rxcall;
}
/*
- * accept the backlog of incoming calls
+ * Charge the incoming call preallocation.
*/
-static void afs_collect_incoming_call(struct work_struct *work)
+static void afs_charge_preallocation(struct work_struct *work)
{
- struct rxrpc_call *rxcall;
- struct afs_call *call = NULL;
- struct sk_buff *skb;
-
- while ((skb = skb_dequeue(&afs_incoming_calls))) {
- _debug("new call");
-
- /* don't need the notification */
- afs_free_skb(skb);
+ struct afs_call *call = afs_spare_incoming_call;
+ for (;;) {
if (!call) {
call = kzalloc(sizeof(struct afs_call), GFP_KERNEL);
- if (!call) {
- rxrpc_kernel_reject_call(afs_socket);
- return;
- }
+ if (!call)
+ break;
- call->async_workfn = afs_process_async_call;
- INIT_WORK(&call->async_work, afs_async_workfn);
+ INIT_WORK(&call->async_work, afs_process_async_call);
call->wait_mode = &afs_async_incoming_call;
call->type = &afs_RXCMxxxx;
init_waitqueue_head(&call->waitq);
- skb_queue_head_init(&call->rx_queue);
call->state = AFS_CALL_AWAIT_OP_ID;
-
- _debug("CALL %p{%s} [%d]",
- call, call->type->name,
- atomic_read(&afs_outstanding_calls));
- atomic_inc(&afs_outstanding_calls);
}
- rxcall = rxrpc_kernel_accept_call(afs_socket,
- (unsigned long) call);
- if (!IS_ERR(rxcall)) {
- call->rxcall = rxcall;
- call = NULL;
- }
+ if (rxrpc_kernel_charge_accept(afs_socket,
+ afs_wake_up_async_call,
+ afs_rx_attach,
+ (unsigned long)call,
+ GFP_KERNEL) < 0)
+ break;
+ call = NULL;
}
+ afs_spare_incoming_call = call;
+}
+
+/*
+ * Discard a preallocated call when a socket is shut down.
+ */
+static void afs_rx_discard_new_call(struct rxrpc_call *rxcall,
+ unsigned long user_call_ID)
+{
+ struct afs_call *call = (struct afs_call *)user_call_ID;
- if (call)
- afs_free_call(call);
+ atomic_inc(&afs_outstanding_calls);
+ call->rxcall = NULL;
+ afs_free_call(call);
+}
+
+/*
+ * Notification of an incoming call.
+ */
+static void afs_rx_new_call(struct sock *sk, struct rxrpc_call *rxcall,
+ unsigned long user_call_ID)
+{
+ atomic_inc(&afs_outstanding_calls);
+ queue_work(afs_wq, &afs_charge_preallocation_work);
}
/*
* Grab the operation ID from an incoming cache manager call. The socket
* buffer is discarded on error or if we don't yet have sufficient data.
*/
-static int afs_deliver_cm_op_id(struct afs_call *call, struct sk_buff *skb,
- bool last)
+static int afs_deliver_cm_op_id(struct afs_call *call)
{
- size_t len = skb->len;
- void *oibuf = (void *) &call->operation_ID;
+ int ret;
- _enter("{%u},{%zu},%d", call->offset, len, last);
+ _enter("{%zu}", call->offset);
ASSERTCMP(call->offset, <, 4);
/* the operation ID forms the first four bytes of the request data */
- len = min_t(size_t, len, 4 - call->offset);
- if (skb_copy_bits(skb, 0, oibuf + call->offset, len) < 0)
- BUG();
- if (!pskb_pull(skb, len))
- BUG();
- call->offset += len;
-
- if (call->offset < 4) {
- afs_data_consumed(call, skb);
- _leave(" = -EAGAIN");
- return -EAGAIN;
- }
+ ret = afs_extract_data(call, &call->operation_ID, 4, true);
+ if (ret < 0)
+ return ret;
call->state = AFS_CALL_AWAIT_REQUEST;
+ call->offset = 0;
/* ask the cache manager to route the call (it'll change the call type
* if successful) */
@@ -803,7 +690,7 @@ static int afs_deliver_cm_op_id(struct afs_call *call, struct sk_buff *skb,
/* pass responsibility for the remainer of this message off to the
* cache manager op */
- return call->type->deliver(call, skb, last);
+ return call->type->deliver(call);
}
/*
@@ -823,14 +710,15 @@ void afs_send_empty_reply(struct afs_call *call)
msg.msg_flags = 0;
call->state = AFS_CALL_AWAIT_ACK;
- switch (rxrpc_kernel_send_data(call->rxcall, &msg, 0)) {
+ switch (rxrpc_kernel_send_data(afs_socket, call->rxcall, &msg, 0)) {
case 0:
_leave(" [replied]");
return;
case -ENOMEM:
_debug("oom");
- rxrpc_kernel_abort_call(call->rxcall, RX_USER_ABORT);
+ rxrpc_kernel_abort_call(afs_socket, call->rxcall,
+ RX_USER_ABORT, ENOMEM, "KOO");
default:
afs_end_call(call);
_leave(" [error]");
@@ -859,7 +747,7 @@ void afs_send_simple_reply(struct afs_call *call, const void *buf, size_t len)
msg.msg_flags = 0;
call->state = AFS_CALL_AWAIT_ACK;
- n = rxrpc_kernel_send_data(call->rxcall, &msg, len);
+ n = rxrpc_kernel_send_data(afs_socket, call->rxcall, &msg, len);
if (n >= 0) {
/* Success */
_leave(" [replied]");
@@ -868,7 +756,8 @@ void afs_send_simple_reply(struct afs_call *call, const void *buf, size_t len)
if (n == -ENOMEM) {
_debug("oom");
- rxrpc_kernel_abort_call(call->rxcall, RX_USER_ABORT);
+ rxrpc_kernel_abort_call(afs_socket, call->rxcall,
+ RX_USER_ABORT, ENOMEM, "KOO");
}
afs_end_call(call);
_leave(" [error]");
@@ -877,25 +766,40 @@ void afs_send_simple_reply(struct afs_call *call, const void *buf, size_t len)
/*
* Extract a piece of data from the received data socket buffers.
*/
-int afs_extract_data(struct afs_call *call, struct sk_buff *skb,
- bool last, void *buf, size_t count)
+int afs_extract_data(struct afs_call *call, void *buf, size_t count,
+ bool want_more)
{
- size_t len = skb->len;
+ int ret;
- _enter("{%u},{%zu},%d,,%zu", call->offset, len, last, count);
+ _enter("{%s,%zu},,%zu,%d",
+ call->type->name, call->offset, count, want_more);
- ASSERTCMP(call->offset, <, count);
+ ASSERTCMP(call->offset, <=, count);
- len = min_t(size_t, len, count - call->offset);
- if (skb_copy_bits(skb, 0, buf + call->offset, len) < 0 ||
- !pskb_pull(skb, len))
- BUG();
- call->offset += len;
+ ret = rxrpc_kernel_recv_data(afs_socket, call->rxcall,
+ buf, count, &call->offset,
+ want_more, &call->abort_code);
+ if (ret == 0 || ret == -EAGAIN)
+ return ret;
- if (call->offset < count) {
- afs_data_consumed(call, skb);
- _leave(" = -EAGAIN");
- return -EAGAIN;
+ if (ret == 1) {
+ switch (call->state) {
+ case AFS_CALL_AWAIT_REPLY:
+ call->state = AFS_CALL_COMPLETE;
+ break;
+ case AFS_CALL_AWAIT_REQUEST:
+ call->state = AFS_CALL_REPLYING;
+ break;
+ default:
+ break;
+ }
+ return 0;
}
- return 0;
+
+ if (ret == -ECONNABORTED)
+ call->error = call->type->abort_to_error(call->abort_code);
+ else
+ call->error = ret;
+ call->state = AFS_CALL_COMPLETE;
+ return ret;
}
diff --git a/fs/afs/server.c b/fs/afs/server.c
index f342acf3547d..d4066ab7dd55 100644
--- a/fs/afs/server.c
+++ b/fs/afs/server.c
@@ -178,13 +178,18 @@ server_in_two_cells:
/*
* look up a server by its IP address
*/
-struct afs_server *afs_find_server(const struct in_addr *_addr)
+struct afs_server *afs_find_server(const struct sockaddr_rxrpc *srx)
{
struct afs_server *server = NULL;
struct rb_node *p;
- struct in_addr addr = *_addr;
+ struct in_addr addr = srx->transport.sin.sin_addr;
- _enter("%pI4", &addr.s_addr);
+ _enter("{%d,%pI4}", srx->transport.family, &addr.s_addr);
+
+ if (srx->transport.family != AF_INET) {
+ WARN(true, "AFS does not yes support non-IPv4 addresses\n");
+ return NULL;
+ }
read_lock(&afs_servers_lock);
diff --git a/fs/afs/vlclient.c b/fs/afs/vlclient.c
index f94d1abdc3eb..94bcd97d22b8 100644
--- a/fs/afs/vlclient.c
+++ b/fs/afs/vlclient.c
@@ -58,17 +58,16 @@ static int afs_vl_abort_to_error(u32 abort_code)
/*
* deliver reply data to a VL.GetEntryByXXX call
*/
-static int afs_deliver_vl_get_entry_by_xxx(struct afs_call *call,
- struct sk_buff *skb, bool last)
+static int afs_deliver_vl_get_entry_by_xxx(struct afs_call *call)
{
struct afs_cache_vlocation *entry;
__be32 *bp;
u32 tmp;
int loop, ret;
- _enter(",,%u", last);
+ _enter("");
- ret = afs_transfer_reply(call, skb, last);
+ ret = afs_transfer_reply(call);
if (ret < 0)
return ret;
diff --git a/fs/afs/vlocation.c b/fs/afs/vlocation.c
index 52976785a32c..45a86396fd2d 100644
--- a/fs/afs/vlocation.c
+++ b/fs/afs/vlocation.c
@@ -594,8 +594,8 @@ static void afs_vlocation_reaper(struct work_struct *work)
*/
int __init afs_vlocation_update_init(void)
{
- afs_vlocation_update_worker =
- create_singlethread_workqueue("kafs_vlupdated");
+ afs_vlocation_update_worker = alloc_workqueue("kafs_vlupdated",
+ WQ_MEM_RECLAIM, 0);
return afs_vlocation_update_worker ? 0 : -ENOMEM;
}
diff --git a/fs/aio.c b/fs/aio.c
index 4fe81d1c60f9..1157e13a36d6 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -274,14 +274,17 @@ __initcall(aio_setup);
static void put_aio_ring_file(struct kioctx *ctx)
{
struct file *aio_ring_file = ctx->aio_ring_file;
+ struct address_space *i_mapping;
+
if (aio_ring_file) {
truncate_setsize(aio_ring_file->f_inode, 0);
/* Prevent further access to the kioctx from migratepages */
- spin_lock(&aio_ring_file->f_inode->i_mapping->private_lock);
- aio_ring_file->f_inode->i_mapping->private_data = NULL;
+ i_mapping = aio_ring_file->f_inode->i_mapping;
+ spin_lock(&i_mapping->private_lock);
+ i_mapping->private_data = NULL;
ctx->aio_ring_file = NULL;
- spin_unlock(&aio_ring_file->f_inode->i_mapping->private_lock);
+ spin_unlock(&i_mapping->private_lock);
fput(aio_ring_file);
}
diff --git a/fs/attr.c b/fs/attr.c
index 42bb42bb3c72..a19a64d41e7e 100644
--- a/fs/attr.c
+++ b/fs/attr.c
@@ -17,19 +17,22 @@
#include <linux/ima.h>
/**
- * inode_change_ok - check if attribute changes to an inode are allowed
- * @inode: inode to check
+ * setattr_prepare - check if attribute changes to a dentry are allowed
+ * @dentry: dentry to check
* @attr: attributes to change
*
* Check if we are allowed to change the attributes contained in @attr
- * in the given inode. This includes the normal unix access permission
- * checks, as well as checks for rlimits and others.
+ * in the given dentry. This includes the normal unix access permission
+ * checks, as well as checks for rlimits and others. The function also clears
+ * SGID bit from mode if user is not allowed to set it. Also file capabilities
+ * and IMA extended attributes are cleared if ATTR_KILL_PRIV is set.
*
* Should be called as the first thing in ->setattr implementations,
* possibly after taking additional locks.
*/
-int inode_change_ok(const struct inode *inode, struct iattr *attr)
+int setattr_prepare(struct dentry *dentry, struct iattr *attr)
{
+ struct inode *inode = d_inode(dentry);
unsigned int ia_valid = attr->ia_valid;
/*
@@ -44,7 +47,7 @@ int inode_change_ok(const struct inode *inode, struct iattr *attr)
/* If force is set do it anyway. */
if (ia_valid & ATTR_FORCE)
- return 0;
+ goto kill_priv;
/* Make sure a caller can chown. */
if ((ia_valid & ATTR_UID) &&
@@ -77,9 +80,19 @@ int inode_change_ok(const struct inode *inode, struct iattr *attr)
return -EPERM;
}
+kill_priv:
+ /* User has permission for the change */
+ if (ia_valid & ATTR_KILL_PRIV) {
+ int error;
+
+ error = security_inode_killpriv(dentry);
+ if (error)
+ return error;
+ }
+
return 0;
}
-EXPORT_SYMBOL(inode_change_ok);
+EXPORT_SYMBOL(setattr_prepare);
/**
* inode_newsize_ok - may this inode be truncated to a given size
@@ -202,6 +215,21 @@ int notify_change(struct dentry * dentry, struct iattr * attr, struct inode **de
return -EPERM;
}
+ /*
+ * If utimes(2) and friends are called with times == NULL (or both
+ * times are UTIME_NOW), then we need to check for write permission
+ */
+ if (ia_valid & ATTR_TOUCH) {
+ if (IS_IMMUTABLE(inode))
+ return -EPERM;
+
+ if (!inode_owner_or_capable(inode)) {
+ error = inode_permission(inode, MAY_WRITE);
+ if (error)
+ return error;
+ }
+ }
+
if ((ia_valid & ATTR_MODE)) {
umode_t amode = attr->ia_mode;
/* Flag setting protected by i_mutex */
@@ -217,13 +245,11 @@ int notify_change(struct dentry * dentry, struct iattr * attr, struct inode **de
if (!(ia_valid & ATTR_MTIME_SET))
attr->ia_mtime = now;
if (ia_valid & ATTR_KILL_PRIV) {
- attr->ia_valid &= ~ATTR_KILL_PRIV;
- ia_valid &= ~ATTR_KILL_PRIV;
error = security_inode_need_killpriv(dentry);
- if (error > 0)
- error = security_inode_killpriv(dentry);
- if (error)
+ if (error < 0)
return error;
+ if (error == 0)
+ ia_valid = attr->ia_valid &= ~ATTR_KILL_PRIV;
}
/*
diff --git a/fs/autofs4/waitq.c b/fs/autofs4/waitq.c
index 431fd7ee3488..e44271dfceb6 100644
--- a/fs/autofs4/waitq.c
+++ b/fs/autofs4/waitq.c
@@ -431,8 +431,8 @@ int autofs4_wait(struct autofs_sb_info *sbi,
memcpy(&wq->name, &qstr, sizeof(struct qstr));
wq->dev = autofs4_get_dev(sbi);
wq->ino = autofs4_get_ino(sbi);
- wq->uid = current_uid();
- wq->gid = current_gid();
+ wq->uid = current_real_cred()->uid;
+ wq->gid = current_real_cred()->gid;
wq->pid = pid;
wq->tgid = tgid;
wq->status = -EINTR; /* Status return if interrupted */
diff --git a/fs/befs/linuxvfs.c b/fs/befs/linuxvfs.c
index 7da05b159ade..bfe9f9994935 100644
--- a/fs/befs/linuxvfs.c
+++ b/fs/befs/linuxvfs.c
@@ -789,7 +789,7 @@ befs_fill_super(struct super_block *sb, void *data, int silent)
* Will be set to real fs blocksize later.
*
* Linux 2.4.10 and later refuse to read blocks smaller than
- * the hardsect size for the device. But we also need to read at
+ * the logical block size for the device. But we also need to read at
* least 1k to get the second 512 bytes of the volume.
* -WD 10-26-01
*/
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index e5495f37c6ed..2472af2798c7 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -1624,20 +1624,12 @@ static void do_thread_regset_writeback(struct task_struct *task,
regset->writeback(task, regset, 1);
}
-#ifndef PR_REG_SIZE
-#define PR_REG_SIZE(S) sizeof(S)
-#endif
-
#ifndef PRSTATUS_SIZE
-#define PRSTATUS_SIZE(S) sizeof(S)
-#endif
-
-#ifndef PR_REG_PTR
-#define PR_REG_PTR(S) (&((S)->pr_reg))
+#define PRSTATUS_SIZE(S, R) sizeof(S)
#endif
#ifndef SET_PR_FPVALID
-#define SET_PR_FPVALID(S, V) ((S)->pr_fpvalid = (V))
+#define SET_PR_FPVALID(S, V, R) ((S)->pr_fpvalid = (V))
#endif
static int fill_thread_core_info(struct elf_thread_core_info *t,
@@ -1645,6 +1637,7 @@ static int fill_thread_core_info(struct elf_thread_core_info *t,
long signr, size_t *total)
{
unsigned int i;
+ unsigned int regset_size = view->regsets[0].n * view->regsets[0].size;
/*
* NT_PRSTATUS is the one special case, because the regset data
@@ -1653,12 +1646,11 @@ static int fill_thread_core_info(struct elf_thread_core_info *t,
* We assume that regset 0 is NT_PRSTATUS.
*/
fill_prstatus(&t->prstatus, t->task, signr);
- (void) view->regsets[0].get(t->task, &view->regsets[0],
- 0, PR_REG_SIZE(t->prstatus.pr_reg),
- PR_REG_PTR(&t->prstatus), NULL);
+ (void) view->regsets[0].get(t->task, &view->regsets[0], 0, regset_size,
+ &t->prstatus.pr_reg, NULL);
fill_note(&t->notes[0], "CORE", NT_PRSTATUS,
- PRSTATUS_SIZE(t->prstatus), &t->prstatus);
+ PRSTATUS_SIZE(t->prstatus, regset_size), &t->prstatus);
*total += notesize(&t->notes[0]);
do_thread_regset_writeback(t->task, &view->regsets[0]);
@@ -1688,7 +1680,8 @@ static int fill_thread_core_info(struct elf_thread_core_info *t,
regset->core_note_type,
size, data);
else {
- SET_PR_FPVALID(&t->prstatus, 1);
+ SET_PR_FPVALID(&t->prstatus,
+ 1, regset_size);
fill_note(&t->notes[i], "CORE",
NT_PRFPREG, size, data);
}
diff --git a/fs/block_dev.c b/fs/block_dev.c
index 08ae99343d92..376e4e426324 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -180,9 +180,6 @@ blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter)
struct file *file = iocb->ki_filp;
struct inode *inode = bdev_file_inode(file);
- if (IS_DAX(inode))
- return dax_do_io(iocb, inode, iter, blkdev_get_block,
- NULL, DIO_SKIP_DIO_COUNT);
return __blockdev_direct_IO(iocb, inode, I_BDEV(inode), iter,
blkdev_get_block, NULL, NULL,
DIO_SKIP_DIO_COUNT);
@@ -302,14 +299,11 @@ int thaw_bdev(struct block_device *bdev, struct super_block *sb)
error = sb->s_op->thaw_super(sb);
else
error = thaw_super(sb);
- if (error) {
+ if (error)
bdev->bd_fsfreeze_count++;
- mutex_unlock(&bdev->bd_fsfreeze_mutex);
- return error;
- }
out:
mutex_unlock(&bdev->bd_fsfreeze_mutex);
- return 0;
+ return error;
}
EXPORT_SYMBOL(thaw_bdev);
@@ -1275,7 +1269,6 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part)
bdev->bd_disk = disk;
bdev->bd_queue = disk->queue;
bdev->bd_contains = bdev;
- bdev->bd_inode->i_flags = 0;
if (!partno) {
ret = -ENXIO;
@@ -1303,11 +1296,8 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part)
}
}
- if (!ret) {
+ if (!ret)
bd_set_size(bdev,(loff_t)get_capacity(disk)<<9);
- if (!bdev_dax_capable(bdev))
- bdev->bd_inode->i_flags &= ~S_DAX;
- }
/*
* If the device is invalidated, rescan partition
@@ -1342,8 +1332,6 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part)
goto out_clear;
}
bd_set_size(bdev, (loff_t)bdev->bd_part->nr_sects << 9);
- if (!bdev_dax_capable(bdev))
- bdev->bd_inode->i_flags &= ~S_DAX;
}
} else {
if (bdev->bd_contains == bdev) {
diff --git a/fs/btrfs/acl.c b/fs/btrfs/acl.c
index 53bb7af4e5f0..247b8dfaf6e5 100644
--- a/fs/btrfs/acl.c
+++ b/fs/btrfs/acl.c
@@ -79,11 +79,9 @@ static int __btrfs_set_acl(struct btrfs_trans_handle *trans,
case ACL_TYPE_ACCESS:
name = XATTR_NAME_POSIX_ACL_ACCESS;
if (acl) {
- ret = posix_acl_equiv_mode(acl, &inode->i_mode);
- if (ret < 0)
+ ret = posix_acl_update_mode(inode, &inode->i_mode, &acl);
+ if (ret)
return ret;
- if (ret == 0)
- acl = NULL;
}
ret = 0;
break;
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 33fe03551105..e62fd50237e4 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -3161,7 +3161,6 @@ int btrfs_prealloc_file_range_trans(struct inode *inode,
struct btrfs_trans_handle *trans, int mode,
u64 start, u64 num_bytes, u64 min_size,
loff_t actual_len, u64 *alloc_hint);
-int btrfs_inode_check_errors(struct inode *inode);
extern const struct dentry_operations btrfs_dentry_operations;
#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
void btrfs_test_inode_set_ops(struct inode *inode);
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index fea31a4a6e36..4843cb994835 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -2040,7 +2040,7 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
* flags for any errors that might have happened while doing
* writeback of file data.
*/
- ret = btrfs_inode_check_errors(inode);
+ ret = filemap_check_errors(inode->i_mapping);
inode_unlock(inode);
goto out;
}
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index d06c6a288512..994fe5af160b 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -5072,7 +5072,7 @@ static int btrfs_setattr(struct dentry *dentry, struct iattr *attr)
if (btrfs_root_readonly(root))
return -EROFS;
- err = inode_change_ok(inode, attr);
+ err = setattr_prepare(dentry, attr);
if (err)
return err;
@@ -8412,7 +8412,7 @@ static int btrfs_submit_direct_hook(struct btrfs_dio_private *dip,
if (!bio)
return -ENOMEM;
- bio_set_op_attrs(bio, bio_op(orig_bio), orig_bio->bi_opf);
+ bio_set_op_attrs(bio, bio_op(orig_bio), bio_flags(orig_bio));
bio->bi_private = dip;
bio->bi_end_io = btrfs_end_dio_bio;
btrfs_io_bio(bio)->logical = file_offset;
@@ -8450,7 +8450,8 @@ next_block:
start_sector, GFP_NOFS);
if (!bio)
goto out_err;
- bio_set_op_attrs(bio, bio_op(orig_bio), orig_bio->bi_opf);
+ bio_set_op_attrs(bio, bio_op(orig_bio),
+ bio_flags(orig_bio));
bio->bi_private = dip;
bio->bi_end_io = btrfs_end_dio_bio;
btrfs_io_bio(bio)->logical = file_offset;
@@ -8618,7 +8619,7 @@ static ssize_t check_direct_IO(struct btrfs_root *root, struct kiocb *iocb,
goto out;
/* If this is a write we don't need to check anymore */
- if (iov_iter_rw(iter) == WRITE)
+ if (iov_iter_rw(iter) != READ || !iter_is_iovec(iter))
return 0;
/*
* Check to make sure we don't have duplicate iov_base's in this
@@ -10543,21 +10544,6 @@ out_inode:
}
-/* Inspired by filemap_check_errors() */
-int btrfs_inode_check_errors(struct inode *inode)
-{
- int ret = 0;
-
- if (test_bit(AS_ENOSPC, &inode->i_mapping->flags) &&
- test_and_clear_bit(AS_ENOSPC, &inode->i_mapping->flags))
- ret = -ENOSPC;
- if (test_bit(AS_EIO, &inode->i_mapping->flags) &&
- test_and_clear_bit(AS_EIO, &inode->i_mapping->flags))
- ret = -EIO;
-
- return ret;
-}
-
static const struct inode_operations btrfs_dir_inode_operations = {
.getattr = btrfs_getattr,
.lookup = btrfs_lookup,
diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c
index a87675ffd02b..1379e59277e2 100644
--- a/fs/btrfs/send.c
+++ b/fs/btrfs/send.c
@@ -4329,7 +4329,7 @@ static int __process_new_xattr(int num, struct btrfs_key *di_key,
int ret;
struct send_ctx *sctx = ctx;
struct fs_path *p;
- posix_acl_xattr_header dummy_acl;
+ struct posix_acl_xattr_header dummy_acl;
p = fs_path_alloc();
if (!p)
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index ef9c55bc7907..8a84ebd8e7cc 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -3961,7 +3961,7 @@ static int wait_ordered_extents(struct btrfs_trans_handle *trans,
* i_mapping flags, so that the next fsync won't get
* an outdated io error too.
*/
- btrfs_inode_check_errors(inode);
+ filemap_check_errors(inode->i_mapping);
*ordered_io_error = true;
break;
}
@@ -4198,7 +4198,7 @@ static int btrfs_log_changed_extents(struct btrfs_trans_handle *trans,
* without writing to the log tree and the fsync must report the
* file data write error and not commit the current transaction.
*/
- ret = btrfs_inode_check_errors(inode);
+ ret = filemap_check_errors(inode->i_mapping);
if (ret)
ctx->io_err = ret;
process:
diff --git a/fs/buffer.c b/fs/buffer.c
index 9c8eb9b6db6a..7dad8713fac8 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -1078,7 +1078,7 @@ grow_buffers(struct block_device *bdev, sector_t block, int size, gfp_t gfp)
return grow_dev_page(bdev, block, index, size, sizebits, gfp);
}
-struct buffer_head *
+static struct buffer_head *
__getblk_slow(struct block_device *bdev, sector_t block,
unsigned size, gfp_t gfp)
{
@@ -1109,7 +1109,6 @@ __getblk_slow(struct block_device *bdev, sector_t block,
free_more_memory();
}
}
-EXPORT_SYMBOL(__getblk_slow);
/*
* The relationship between dirty buffers and dirty pages:
diff --git a/fs/cachefiles/interface.c b/fs/cachefiles/interface.c
index ce5f345d70f5..e7f16a77a22a 100644
--- a/fs/cachefiles/interface.c
+++ b/fs/cachefiles/interface.c
@@ -253,6 +253,8 @@ static void cachefiles_drop_object(struct fscache_object *_object)
struct cachefiles_object *object;
struct cachefiles_cache *cache;
const struct cred *saved_cred;
+ struct inode *inode;
+ blkcnt_t i_blocks = 0;
ASSERT(_object);
@@ -279,6 +281,10 @@ static void cachefiles_drop_object(struct fscache_object *_object)
_object != cache->cache.fsdef
) {
_debug("- retire object OBJ%x", object->fscache.debug_id);
+ inode = d_backing_inode(object->dentry);
+ if (inode)
+ i_blocks = inode->i_blocks;
+
cachefiles_begin_secure(cache, &saved_cred);
cachefiles_delete_object(cache, object);
cachefiles_end_secure(cache, saved_cred);
@@ -292,7 +298,7 @@ static void cachefiles_drop_object(struct fscache_object *_object)
/* note that the object is now inactive */
if (test_bit(CACHEFILES_OBJECT_ACTIVE, &object->flags))
- cachefiles_mark_object_inactive(cache, object);
+ cachefiles_mark_object_inactive(cache, object, i_blocks);
dput(object->dentry);
object->dentry = NULL;
diff --git a/fs/cachefiles/internal.h b/fs/cachefiles/internal.h
index 2fcde1a34b7c..cd1effee8a49 100644
--- a/fs/cachefiles/internal.h
+++ b/fs/cachefiles/internal.h
@@ -160,7 +160,8 @@ extern char *cachefiles_cook_key(const u8 *raw, int keylen, uint8_t type);
* namei.c
*/
extern void cachefiles_mark_object_inactive(struct cachefiles_cache *cache,
- struct cachefiles_object *object);
+ struct cachefiles_object *object,
+ blkcnt_t i_blocks);
extern int cachefiles_delete_object(struct cachefiles_cache *cache,
struct cachefiles_object *object);
extern int cachefiles_walk_to_object(struct cachefiles_object *parent,
diff --git a/fs/cachefiles/namei.c b/fs/cachefiles/namei.c
index 6eb3dec2adbb..339c910da916 100644
--- a/fs/cachefiles/namei.c
+++ b/fs/cachefiles/namei.c
@@ -262,10 +262,9 @@ requeue:
* Mark an object as being inactive.
*/
void cachefiles_mark_object_inactive(struct cachefiles_cache *cache,
- struct cachefiles_object *object)
+ struct cachefiles_object *object,
+ blkcnt_t i_blocks)
{
- blkcnt_t i_blocks = d_backing_inode(object->dentry)->i_blocks;
-
write_lock(&cache->active_lock);
rb_erase(&object->active_node, &cache->active_nodes);
clear_bit(CACHEFILES_OBJECT_ACTIVE, &object->flags);
@@ -708,7 +707,8 @@ mark_active_timed_out:
check_error:
_debug("check error %d", ret);
- cachefiles_mark_object_inactive(cache, object);
+ cachefiles_mark_object_inactive(
+ cache, object, d_backing_inode(object->dentry)->i_blocks);
release_dentry:
dput(object->dentry);
object->dentry = NULL;
diff --git a/fs/ceph/acl.c b/fs/ceph/acl.c
index 4f67227f69a5..987044bca1c2 100644
--- a/fs/ceph/acl.c
+++ b/fs/ceph/acl.c
@@ -95,11 +95,9 @@ int ceph_set_acl(struct inode *inode, struct posix_acl *acl, int type)
case ACL_TYPE_ACCESS:
name = XATTR_NAME_POSIX_ACL_ACCESS;
if (acl) {
- ret = posix_acl_equiv_mode(acl, &new_mode);
- if (ret < 0)
+ ret = posix_acl_update_mode(inode, &new_mode, &acl);
+ if (ret)
goto out;
- if (ret == 0)
- acl = NULL;
}
break;
case ACL_TYPE_DEFAULT:
@@ -127,6 +125,11 @@ int ceph_set_acl(struct inode *inode, struct posix_acl *acl, int type)
goto out_free;
}
+ if (ceph_snap(inode) != CEPH_NOSNAP) {
+ ret = -EROFS;
+ goto out_free;
+ }
+
if (new_mode != old_mode) {
newattrs.ia_mode = new_mode;
newattrs.ia_valid = ATTR_MODE;
diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c
index d5b6f959a3c3..ef3ebd780aff 100644
--- a/fs/ceph/addr.c
+++ b/fs/ceph/addr.c
@@ -175,9 +175,8 @@ static void ceph_invalidatepage(struct page *page, unsigned int offset,
static int ceph_releasepage(struct page *page, gfp_t g)
{
- dout("%p releasepage %p idx %lu\n", page->mapping->host,
- page, page->index);
- WARN_ON(PageDirty(page));
+ dout("%p releasepage %p idx %lu (%sdirty)\n", page->mapping->host,
+ page, page->index, PageDirty(page) ? "" : "not ");
/* Can we release the page from the cache? */
if (!ceph_release_fscache_page(page, g))
@@ -298,14 +297,6 @@ unlock:
kfree(osd_data->pages);
}
-static void ceph_unlock_page_vector(struct page **pages, int num_pages)
-{
- int i;
-
- for (i = 0; i < num_pages; i++)
- unlock_page(pages[i]);
-}
-
/*
* start an async read(ahead) operation. return nr_pages we submitted
* a read for on success, or negative error code.
@@ -370,6 +361,10 @@ static int start_read(struct inode *inode, struct list_head *page_list, int max)
dout("start_read %p add_to_page_cache failed %p\n",
inode, page);
nr_pages = i;
+ if (nr_pages > 0) {
+ len = nr_pages << PAGE_SHIFT;
+ break;
+ }
goto out_pages;
}
pages[i] = page;
@@ -386,8 +381,11 @@ static int start_read(struct inode *inode, struct list_head *page_list, int max)
return nr_pages;
out_pages:
- ceph_unlock_page_vector(pages, nr_pages);
- ceph_release_page_vector(pages, nr_pages);
+ for (i = 0; i < nr_pages; ++i) {
+ ceph_fscache_readpage_cancel(inode, pages[i]);
+ unlock_page(pages[i]);
+ }
+ ceph_put_page_vector(pages, nr_pages, false);
out:
ceph_osdc_put_request(req);
return ret;
diff --git a/fs/ceph/file.c b/fs/ceph/file.c
index 0f5375d8e030..395c7fcb1cea 100644
--- a/fs/ceph/file.c
+++ b/fs/ceph/file.c
@@ -902,10 +902,10 @@ ceph_direct_read_write(struct kiocb *iocb, struct iov_iter *iter,
return ret;
if (write) {
- ret = invalidate_inode_pages2_range(inode->i_mapping,
+ int ret2 = invalidate_inode_pages2_range(inode->i_mapping,
pos >> PAGE_SHIFT,
(pos + count) >> PAGE_SHIFT);
- if (ret < 0)
+ if (ret2 < 0)
dout("invalidate_inode_pages2_range returned %d\n", ret);
flags = CEPH_OSD_FLAG_ORDERSNAP |
diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c
index e70b4f535c79..da00b11d4a7a 100644
--- a/fs/ceph/inode.c
+++ b/fs/ceph/inode.c
@@ -1899,13 +1899,6 @@ int __ceph_setattr(struct inode *inode, struct iattr *attr)
int inode_dirty_flags = 0;
bool lock_snap_rwsem = false;
- if (ceph_snap(inode) != CEPH_NOSNAP)
- return -EROFS;
-
- err = inode_change_ok(inode, attr);
- if (err != 0)
- return err;
-
prealloc_cf = ceph_alloc_cap_flush();
if (!prealloc_cf)
return -ENOMEM;
@@ -2118,7 +2111,17 @@ out_put:
*/
int ceph_setattr(struct dentry *dentry, struct iattr *attr)
{
- return __ceph_setattr(d_inode(dentry), attr);
+ struct inode *inode = d_inode(dentry);
+ int err;
+
+ if (ceph_snap(inode) != CEPH_NOSNAP)
+ return -EROFS;
+
+ err = setattr_prepare(dentry, attr);
+ if (err != 0)
+ return err;
+
+ return __ceph_setattr(inode, attr);
}
/*
diff --git a/fs/ceph/locks.c b/fs/ceph/locks.c
index a2cb0c254060..6806dbeaee19 100644
--- a/fs/ceph/locks.c
+++ b/fs/ceph/locks.c
@@ -210,8 +210,8 @@ int ceph_flock(struct file *file, int cmd, struct file_lock *fl)
if (!(fl->fl_flags & FL_FLOCK))
return -ENOLCK;
/* No mandatory locks */
- if (__mandatory_lock(file->f_mapping->host) && fl->fl_type != F_UNLCK)
- return -ENOLCK;
+ if (fl->fl_type & LOCK_MAND)
+ return -EOPNOTSUPP;
dout("ceph_flock, fl_file: %p", fl->fl_file);
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index f72d4ae303b2..815acd1a56d4 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -370,6 +370,7 @@ const char *ceph_session_state_name(int s)
case CEPH_MDS_SESSION_CLOSING: return "closing";
case CEPH_MDS_SESSION_RESTARTING: return "restarting";
case CEPH_MDS_SESSION_RECONNECTING: return "reconnecting";
+ case CEPH_MDS_SESSION_REJECTED: return "rejected";
default: return "???";
}
}
@@ -1150,8 +1151,7 @@ static int remove_session_caps_cb(struct inode *inode, struct ceph_cap *cap,
while (!list_empty(&ci->i_cap_flush_list)) {
cf = list_first_entry(&ci->i_cap_flush_list,
struct ceph_cap_flush, i_list);
- list_del(&cf->i_list);
- list_add(&cf->i_list, &to_remove);
+ list_move(&cf->i_list, &to_remove);
}
spin_lock(&mdsc->cap_dirty_lock);
@@ -1378,7 +1378,7 @@ static int request_close_session(struct ceph_mds_client *mdsc,
if (!msg)
return -ENOMEM;
ceph_con_send(&session->s_con, msg);
- return 0;
+ return 1;
}
/*
@@ -2131,6 +2131,10 @@ static int __do_request(struct ceph_mds_client *mdsc,
ceph_session_state_name(session->s_state));
if (session->s_state != CEPH_MDS_SESSION_OPEN &&
session->s_state != CEPH_MDS_SESSION_HUNG) {
+ if (session->s_state == CEPH_MDS_SESSION_REJECTED) {
+ err = -EACCES;
+ goto out_session;
+ }
if (session->s_state == CEPH_MDS_SESSION_NEW ||
session->s_state == CEPH_MDS_SESSION_CLOSING)
__open_session(mdsc, session);
@@ -2652,6 +2656,15 @@ static void handle_session(struct ceph_mds_session *session,
wake_up_session_caps(session, 0);
break;
+ case CEPH_SESSION_REJECT:
+ WARN_ON(session->s_state != CEPH_MDS_SESSION_OPENING);
+ pr_info("mds%d rejected session\n", session->s_mds);
+ session->s_state = CEPH_MDS_SESSION_REJECTED;
+ cleanup_session_requests(mdsc, session);
+ remove_session_caps(session);
+ wake = 2; /* for good measure */
+ break;
+
default:
pr_err("mdsc_handle_session bad op %d mds%d\n", op, mds);
WARN_ON(1);
@@ -3557,11 +3570,11 @@ void ceph_mdsc_sync(struct ceph_mds_client *mdsc)
/*
* true if all sessions are closed, or we force unmount
*/
-static bool done_closing_sessions(struct ceph_mds_client *mdsc)
+static bool done_closing_sessions(struct ceph_mds_client *mdsc, int skipped)
{
if (ACCESS_ONCE(mdsc->fsc->mount_state) == CEPH_MOUNT_SHUTDOWN)
return true;
- return atomic_read(&mdsc->num_sessions) == 0;
+ return atomic_read(&mdsc->num_sessions) <= skipped;
}
/*
@@ -3572,6 +3585,7 @@ void ceph_mdsc_close_sessions(struct ceph_mds_client *mdsc)
struct ceph_options *opts = mdsc->fsc->client->options;
struct ceph_mds_session *session;
int i;
+ int skipped = 0;
dout("close_sessions\n");
@@ -3583,7 +3597,8 @@ void ceph_mdsc_close_sessions(struct ceph_mds_client *mdsc)
continue;
mutex_unlock(&mdsc->mutex);
mutex_lock(&session->s_mutex);
- __close_session(mdsc, session);
+ if (__close_session(mdsc, session) <= 0)
+ skipped++;
mutex_unlock(&session->s_mutex);
ceph_put_mds_session(session);
mutex_lock(&mdsc->mutex);
@@ -3591,7 +3606,8 @@ void ceph_mdsc_close_sessions(struct ceph_mds_client *mdsc)
mutex_unlock(&mdsc->mutex);
dout("waiting for sessions to close\n");
- wait_event_timeout(mdsc->session_close_wq, done_closing_sessions(mdsc),
+ wait_event_timeout(mdsc->session_close_wq,
+ done_closing_sessions(mdsc, skipped),
ceph_timeout_jiffies(opts->mount_timeout));
/* tear down remaining sessions */
diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h
index 6b3679737d4a..3c6f77b7bb02 100644
--- a/fs/ceph/mds_client.h
+++ b/fs/ceph/mds_client.h
@@ -121,6 +121,7 @@ enum {
CEPH_MDS_SESSION_CLOSING = 5,
CEPH_MDS_SESSION_RESTARTING = 6,
CEPH_MDS_SESSION_RECONNECTING = 7,
+ CEPH_MDS_SESSION_REJECTED = 8,
};
struct ceph_mds_session {
diff --git a/fs/ceph/strings.c b/fs/ceph/strings.c
index 89e6bc321df3..913dea163d5c 100644
--- a/fs/ceph/strings.c
+++ b/fs/ceph/strings.c
@@ -43,6 +43,8 @@ const char *ceph_session_op_name(int op)
case CEPH_SESSION_RECALL_STATE: return "recall_state";
case CEPH_SESSION_FLUSHMSG: return "flushmsg";
case CEPH_SESSION_FLUSHMSG_ACK: return "flushmsg_ack";
+ case CEPH_SESSION_FORCE_RO: return "force_ro";
+ case CEPH_SESSION_REJECT: return "reject";
}
return "???";
}
diff --git a/fs/ceph/super.c b/fs/ceph/super.c
index e247f6f0feb7..a29ffce98187 100644
--- a/fs/ceph/super.c
+++ b/fs/ceph/super.c
@@ -396,10 +396,12 @@ static int parse_mount_options(struct ceph_mount_options **pfsopt,
*/
dev_name_end = strchr(dev_name, '/');
if (dev_name_end) {
- fsopt->server_path = kstrdup(dev_name_end, GFP_KERNEL);
- if (!fsopt->server_path) {
- err = -ENOMEM;
- goto out;
+ if (strlen(dev_name_end) > 1) {
+ fsopt->server_path = kstrdup(dev_name_end, GFP_KERNEL);
+ if (!fsopt->server_path) {
+ err = -ENOMEM;
+ goto out;
+ }
}
} else {
dev_name_end = dev_name + strlen(dev_name);
@@ -788,15 +790,10 @@ static struct dentry *open_root_dentry(struct ceph_fs_client *fsc,
struct inode *inode = req->r_target_inode;
req->r_target_inode = NULL;
dout("open_root_inode success\n");
- if (ceph_ino(inode) == CEPH_INO_ROOT &&
- fsc->sb->s_root == NULL) {
- root = d_make_root(inode);
- if (!root) {
- root = ERR_PTR(-ENOMEM);
- goto out;
- }
- } else {
- root = d_obtain_root(inode);
+ root = d_make_root(inode);
+ if (!root) {
+ root = ERR_PTR(-ENOMEM);
+ goto out;
}
ceph_init_dentry(root);
dout("open_root_inode success, root dentry is %p\n", root);
@@ -825,17 +822,24 @@ static struct dentry *ceph_real_mount(struct ceph_fs_client *fsc)
mutex_lock(&fsc->client->mount_mutex);
if (!fsc->sb->s_root) {
+ const char *path;
err = __ceph_open_session(fsc->client, started);
if (err < 0)
goto out;
- dout("mount opening root\n");
- root = open_root_dentry(fsc, "", started);
+ if (!fsc->mount_options->server_path) {
+ path = "";
+ dout("mount opening path \\t\n");
+ } else {
+ path = fsc->mount_options->server_path + 1;
+ dout("mount opening path %s\n", path);
+ }
+ root = open_root_dentry(fsc, path, started);
if (IS_ERR(root)) {
err = PTR_ERR(root);
goto out;
}
- fsc->sb->s_root = root;
+ fsc->sb->s_root = dget(root);
first = 1;
err = ceph_fs_debugfs_init(fsc);
@@ -843,19 +847,6 @@ static struct dentry *ceph_real_mount(struct ceph_fs_client *fsc)
goto fail;
}
- if (!fsc->mount_options->server_path) {
- root = fsc->sb->s_root;
- dget(root);
- } else {
- const char *path = fsc->mount_options->server_path + 1;
- dout("mount opening path %s\n", path);
- root = open_root_dentry(fsc, path, started);
- if (IS_ERR(root)) {
- err = PTR_ERR(root);
- goto fail;
- }
- }
-
fsc->mount_state = CEPH_MOUNT_MOUNTED;
dout("mount success\n");
mutex_unlock(&fsc->client->mount_mutex);
diff --git a/fs/cifs/cifsfs.h b/fs/cifs/cifsfs.h
index 9dcf974acc47..c9c00a862036 100644
--- a/fs/cifs/cifsfs.h
+++ b/fs/cifs/cifsfs.h
@@ -41,6 +41,16 @@ cifs_uniqueid_to_ino_t(u64 fileid)
}
+static inline void cifs_set_time(struct dentry *dentry, unsigned long time)
+{
+ dentry->d_fsdata = (void *) time;
+}
+
+static inline unsigned long cifs_get_time(struct dentry *dentry)
+{
+ return (unsigned long) dentry->d_fsdata;
+}
+
extern struct file_system_type cifs_fs_type;
extern const struct address_space_operations cifs_addr_ops;
extern const struct address_space_operations cifs_addr_ops_smallbuf;
diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h
index 95dab43646f0..4ead72a001f9 100644
--- a/fs/cifs/cifsproto.h
+++ b/fs/cifs/cifsproto.h
@@ -392,8 +392,7 @@ extern int CIFSSMBRead(const unsigned int xid, struct cifs_io_parms *io_parms,
unsigned int *nbytes, char **buf,
int *return_buf_type);
extern int CIFSSMBWrite(const unsigned int xid, struct cifs_io_parms *io_parms,
- unsigned int *nbytes, const char *buf,
- const char __user *ubuf, const int long_op);
+ unsigned int *nbytes, const char *buf);
extern int CIFSSMBWrite2(const unsigned int xid, struct cifs_io_parms *io_parms,
unsigned int *nbytes, struct kvec *iov, const int nvec);
extern int CIFSGetSrvInodeNumber(const unsigned int xid, struct cifs_tcon *tcon,
diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c
index d47197ea4ab6..f82d2823622f 100644
--- a/fs/cifs/cifssmb.c
+++ b/fs/cifs/cifssmb.c
@@ -1228,7 +1228,6 @@ OldOpenRetry:
inc_rfc1001_len(pSMB, count);
pSMB->ByteCount = cpu_to_le16(count);
- /* long_op set to 1 to allow for oplock break timeouts */
rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
(struct smb_hdr *)pSMBr, &bytes_returned, 0);
cifs_stats_inc(&tcon->stats.cifs_stats.num_opens);
@@ -1768,8 +1767,7 @@ CIFSSMBRead(const unsigned int xid, struct cifs_io_parms *io_parms,
int
CIFSSMBWrite(const unsigned int xid, struct cifs_io_parms *io_parms,
- unsigned int *nbytes, const char *buf,
- const char __user *ubuf, const int long_op)
+ unsigned int *nbytes, const char *buf)
{
int rc = -EACCES;
WRITE_REQ *pSMB = NULL;
@@ -1838,12 +1836,7 @@ CIFSSMBWrite(const unsigned int xid, struct cifs_io_parms *io_parms,
cpu_to_le16(offsetof(struct smb_com_write_req, Data) - 4);
if (buf)
memcpy(pSMB->Data, buf, bytes_sent);
- else if (ubuf) {
- if (copy_from_user(pSMB->Data, ubuf, bytes_sent)) {
- cifs_buf_release(pSMB);
- return -EFAULT;
- }
- } else if (count != 0) {
+ else if (count != 0) {
/* No buffer */
cifs_buf_release(pSMB);
return -EINVAL;
@@ -1867,7 +1860,7 @@ CIFSSMBWrite(const unsigned int xid, struct cifs_io_parms *io_parms,
}
rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
- (struct smb_hdr *) pSMBr, &bytes_returned, long_op);
+ (struct smb_hdr *) pSMBr, &bytes_returned, 0);
cifs_stats_inc(&tcon->stats.cifs_stats.num_writes);
if (rc) {
cifs_dbg(FYI, "Send error in write = %d\n", rc);
@@ -3334,7 +3327,7 @@ CIFSSMB_set_compression(const unsigned int xid, struct cifs_tcon *tcon,
#ifdef CONFIG_CIFS_POSIX
/*Convert an Access Control Entry from wire format to local POSIX xattr format*/
-static void cifs_convert_ace(posix_acl_xattr_entry *ace,
+static void cifs_convert_ace(struct posix_acl_xattr_entry *ace,
struct cifs_posix_ace *cifs_ace)
{
/* u8 cifs fields do not need le conversion */
@@ -3358,7 +3351,7 @@ static int cifs_copy_posix_acl(char *trgt, char *src, const int buflen,
__u16 count;
struct cifs_posix_ace *pACE;
struct cifs_posix_acl *cifs_acl = (struct cifs_posix_acl *)src;
- posix_acl_xattr_header *local_acl = (posix_acl_xattr_header *)trgt;
+ struct posix_acl_xattr_header *local_acl = (void *)trgt;
if (le16_to_cpu(cifs_acl->version) != CIFS_ACL_VERSION)
return -EOPNOTSUPP;
@@ -3396,9 +3389,11 @@ static int cifs_copy_posix_acl(char *trgt, char *src, const int buflen,
} else if (size > buflen) {
return -ERANGE;
} else /* buffer big enough */ {
+ struct posix_acl_xattr_entry *ace = (void *)(local_acl + 1);
+
local_acl->a_version = cpu_to_le32(POSIX_ACL_XATTR_VERSION);
for (i = 0; i < count ; i++) {
- cifs_convert_ace(&local_acl->a_entries[i], pACE);
+ cifs_convert_ace(&ace[i], pACE);
pACE++;
}
}
@@ -3406,7 +3401,7 @@ static int cifs_copy_posix_acl(char *trgt, char *src, const int buflen,
}
static __u16 convert_ace_to_cifs_ace(struct cifs_posix_ace *cifs_ace,
- const posix_acl_xattr_entry *local_ace)
+ const struct posix_acl_xattr_entry *local_ace)
{
__u16 rc = 0; /* 0 = ACL converted ok */
@@ -3431,7 +3426,7 @@ static __u16 ACL_to_cifs_posix(char *parm_data, const char *pACL,
{
__u16 rc = 0;
struct cifs_posix_acl *cifs_acl = (struct cifs_posix_acl *)parm_data;
- posix_acl_xattr_header *local_acl = (posix_acl_xattr_header *)pACL;
+ struct posix_acl_xattr_header *local_acl = (void *)pACL;
int count;
int i;
@@ -3459,7 +3454,7 @@ static __u16 ACL_to_cifs_posix(char *parm_data, const char *pACL,
}
for (i = 0; i < count; i++) {
rc = convert_ace_to_cifs_ace(&cifs_acl->ace_array[i],
- &local_acl->a_entries[i]);
+ (struct posix_acl_xattr_entry *)(local_acl + 1));
if (rc != 0) {
/* ACE not converted */
break;
diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c
index 4716c54dbfc6..789ff1df2d8d 100644
--- a/fs/cifs/dir.c
+++ b/fs/cifs/dir.c
@@ -40,7 +40,7 @@ renew_parental_timestamps(struct dentry *direntry)
/* BB check if there is a way to get the kernel to do this or if we
really need this */
do {
- direntry->d_time = jiffies;
+ cifs_set_time(direntry, jiffies);
direntry = direntry->d_parent;
} while (!IS_ROOT(direntry));
}
@@ -802,7 +802,7 @@ cifs_lookup(struct inode *parent_dir_inode, struct dentry *direntry,
} else if (rc == -ENOENT) {
rc = 0;
- direntry->d_time = jiffies;
+ cifs_set_time(direntry, jiffies);
d_add(direntry, NULL);
/* if it was once a directory (but how can we tell?) we could do
shrink_dcache_parent(direntry); */
@@ -862,7 +862,7 @@ cifs_d_revalidate(struct dentry *direntry, unsigned int flags)
if (flags & (LOOKUP_CREATE | LOOKUP_RENAME_TARGET))
return 0;
- if (time_after(jiffies, direntry->d_time + HZ) || !lookupCacheEnabled)
+ if (time_after(jiffies, cifs_get_time(direntry) + HZ) || !lookupCacheEnabled)
return 0;
return 1;
diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index 579e41b350a2..42b99af74e0a 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -2478,7 +2478,7 @@ cifs_write_from_iter(loff_t offset, size_t len, struct iov_iter *from,
size_t cur_len;
unsigned long nr_pages, num_pages, i;
struct cifs_writedata *wdata;
- struct iov_iter saved_from;
+ struct iov_iter saved_from = *from;
loff_t saved_offset = offset;
pid_t pid;
struct TCP_Server_Info *server;
@@ -2489,7 +2489,6 @@ cifs_write_from_iter(loff_t offset, size_t len, struct iov_iter *from,
pid = current->tgid;
server = tlink_tcon(open_file->tlink)->ses->server;
- memcpy(&saved_from, from, sizeof(struct iov_iter));
do {
unsigned int wsize, credits;
@@ -2551,8 +2550,7 @@ cifs_write_from_iter(loff_t offset, size_t len, struct iov_iter *from,
kref_put(&wdata->refcount,
cifs_uncached_writedata_release);
if (rc == -EAGAIN) {
- memcpy(from, &saved_from,
- sizeof(struct iov_iter));
+ *from = saved_from;
iov_iter_advance(from, offset - saved_offset);
continue;
}
@@ -2576,7 +2574,7 @@ ssize_t cifs_user_writev(struct kiocb *iocb, struct iov_iter *from)
struct cifs_sb_info *cifs_sb;
struct cifs_writedata *wdata, *tmp;
struct list_head wdata_list;
- struct iov_iter saved_from;
+ struct iov_iter saved_from = *from;
int rc;
/*
@@ -2597,8 +2595,6 @@ ssize_t cifs_user_writev(struct kiocb *iocb, struct iov_iter *from)
if (!tcon->ses->server->ops->async_writev)
return -ENOSYS;
- memcpy(&saved_from, from, sizeof(struct iov_iter));
-
rc = cifs_write_from_iter(iocb->ki_pos, iov_iter_count(from), from,
open_file, cifs_sb, &wdata_list);
@@ -2631,13 +2627,11 @@ restart_loop:
/* resend call if it's a retryable error */
if (rc == -EAGAIN) {
struct list_head tmp_list;
- struct iov_iter tmp_from;
+ struct iov_iter tmp_from = saved_from;
INIT_LIST_HEAD(&tmp_list);
list_del_init(&wdata->list);
- memcpy(&tmp_from, &saved_from,
- sizeof(struct iov_iter));
iov_iter_advance(&tmp_from,
wdata->offset - iocb->ki_pos);
diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c
index b87efd0c92d6..7ab5be7944aa 100644
--- a/fs/cifs/inode.c
+++ b/fs/cifs/inode.c
@@ -1951,7 +1951,7 @@ int cifs_revalidate_dentry_attr(struct dentry *dentry)
cifs_dbg(FYI, "Update attributes: %s inode 0x%p count %d dentry: 0x%p d_time %ld jiffies %ld\n",
full_path, inode, inode->i_count.counter,
- dentry, dentry->d_time, jiffies);
+ dentry, cifs_get_time(dentry), jiffies);
if (cifs_sb_master_tcon(CIFS_SB(sb))->unix_ext)
rc = cifs_get_inode_info_unix(&inode, full_path, sb, xid);
@@ -2154,7 +2154,7 @@ cifs_setattr_unix(struct dentry *direntry, struct iattr *attrs)
if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_PERM)
attrs->ia_valid |= ATTR_FORCE;
- rc = inode_change_ok(inode, attrs);
+ rc = setattr_prepare(direntry, attrs);
if (rc < 0)
goto out;
@@ -2294,7 +2294,7 @@ cifs_setattr_nounix(struct dentry *direntry, struct iattr *attrs)
if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_PERM)
attrs->ia_valid |= ATTR_FORCE;
- rc = inode_change_ok(inode, attrs);
+ rc = setattr_prepare(direntry, attrs);
if (rc < 0) {
free_xid(xid);
return rc;
diff --git a/fs/cifs/link.c b/fs/cifs/link.c
index 062c2375549a..d031af8d3d4d 100644
--- a/fs/cifs/link.c
+++ b/fs/cifs/link.c
@@ -399,7 +399,7 @@ cifs_create_mf_symlink(unsigned int xid, struct cifs_tcon *tcon,
io_parms.offset = 0;
io_parms.length = CIFS_MF_SYMLINK_FILE_SIZE;
- rc = CIFSSMBWrite(xid, &io_parms, pbytes_written, pbuf, NULL, 0);
+ rc = CIFSSMBWrite(xid, &io_parms, pbytes_written, pbuf);
CIFSSMBClose(xid, tcon, fid.netfid);
return rc;
}
diff --git a/fs/coda/file.c b/fs/coda/file.c
index f47c7483863b..8415d4f8d1a1 100644
--- a/fs/coda/file.c
+++ b/fs/coda/file.c
@@ -38,27 +38,6 @@ coda_file_read_iter(struct kiocb *iocb, struct iov_iter *to)
}
static ssize_t
-coda_file_splice_read(struct file *coda_file, loff_t *ppos,
- struct pipe_inode_info *pipe, size_t count,
- unsigned int flags)
-{
- ssize_t (*splice_read)(struct file *, loff_t *,
- struct pipe_inode_info *, size_t, unsigned int);
- struct coda_file_info *cfi;
- struct file *host_file;
-
- cfi = CODA_FTOC(coda_file);
- BUG_ON(!cfi || cfi->cfi_magic != CODA_MAGIC);
- host_file = cfi->cfi_container;
-
- splice_read = host_file->f_op->splice_read;
- if (!splice_read)
- splice_read = default_file_splice_read;
-
- return splice_read(host_file, ppos, pipe, count, flags);
-}
-
-static ssize_t
coda_file_write_iter(struct kiocb *iocb, struct iov_iter *to)
{
struct file *coda_file = iocb->ki_filp;
@@ -225,6 +204,6 @@ const struct file_operations coda_file_operations = {
.open = coda_open,
.release = coda_release,
.fsync = coda_fsync,
- .splice_read = coda_file_splice_read,
+ .splice_read = generic_file_splice_read,
};
diff --git a/fs/compat.c b/fs/compat.c
index be6e48b0a46c..bd064a2c3550 100644
--- a/fs/compat.c
+++ b/fs/compat.c
@@ -54,20 +54,6 @@
#include <asm/ioctls.h>
#include "internal.h"
-int compat_log = 1;
-
-int compat_printk(const char *fmt, ...)
-{
- va_list ap;
- int ret;
- if (!compat_log)
- return 0;
- va_start(ap, fmt);
- ret = vprintk(fmt, ap);
- va_end(ap);
- return ret;
-}
-
/*
* Not all architectures have sys_utime, so implement this in terms
* of sys_utimes.
@@ -562,7 +548,7 @@ ssize_t compat_rw_copy_check_uvector(int type,
goto out;
ret = -EINVAL;
- if (nr_segs > UIO_MAXIOV || nr_segs < 0)
+ if (nr_segs > UIO_MAXIOV)
goto out;
if (nr_segs > fast_segs) {
ret = -ENOMEM;
diff --git a/fs/crypto/crypto.c b/fs/crypto/crypto.c
index c502c116924c..61057b7dbddb 100644
--- a/fs/crypto/crypto.c
+++ b/fs/crypto/crypto.c
@@ -28,7 +28,6 @@
#include <linux/dcache.h>
#include <linux/namei.h>
#include <linux/fscrypto.h>
-#include <linux/ecryptfs.h>
static unsigned int num_prealloc_crypto_pages = 32;
static unsigned int num_prealloc_crypto_ctxs = 128;
@@ -128,11 +127,11 @@ struct fscrypt_ctx *fscrypt_get_ctx(struct inode *inode, gfp_t gfp_flags)
EXPORT_SYMBOL(fscrypt_get_ctx);
/**
- * fscrypt_complete() - The completion callback for page encryption
- * @req: The asynchronous encryption request context
- * @res: The result of the encryption operation
+ * page_crypt_complete() - completion callback for page crypto
+ * @req: The asynchronous cipher request context
+ * @res: The result of the cipher operation
*/
-static void fscrypt_complete(struct crypto_async_request *req, int res)
+static void page_crypt_complete(struct crypto_async_request *req, int res)
{
struct fscrypt_completion_result *ecr = req->data;
@@ -170,7 +169,7 @@ static int do_page_crypto(struct inode *inode,
skcipher_request_set_callback(
req, CRYPTO_TFM_REQ_MAY_BACKLOG | CRYPTO_TFM_REQ_MAY_SLEEP,
- fscrypt_complete, &ecr);
+ page_crypt_complete, &ecr);
BUILD_BUG_ON(FS_XTS_TWEAK_SIZE < sizeof(index));
memcpy(xts_tweak, &index, sizeof(index));
diff --git a/fs/crypto/fname.c b/fs/crypto/fname.c
index 5d6d49113efa..9a28133ac3b8 100644
--- a/fs/crypto/fname.c
+++ b/fs/crypto/fname.c
@@ -10,21 +10,16 @@
* This has not yet undergone a rigorous security audit.
*/
-#include <keys/encrypted-type.h>
-#include <keys/user-type.h>
#include <linux/scatterlist.h>
#include <linux/ratelimit.h>
#include <linux/fscrypto.h>
-static u32 size_round_up(size_t size, size_t blksize)
-{
- return ((size + blksize - 1) / blksize) * blksize;
-}
-
/**
- * dir_crypt_complete() -
+ * fname_crypt_complete() - completion callback for filename crypto
+ * @req: The asynchronous cipher request context
+ * @res: The result of the cipher operation
*/
-static void dir_crypt_complete(struct crypto_async_request *req, int res)
+static void fname_crypt_complete(struct crypto_async_request *req, int res)
{
struct fscrypt_completion_result *ecr = req->data;
@@ -35,11 +30,11 @@ static void dir_crypt_complete(struct crypto_async_request *req, int res)
}
/**
- * fname_encrypt() -
+ * fname_encrypt() - encrypt a filename
*
- * This function encrypts the input filename, and returns the length of the
- * ciphertext. Errors are returned as negative numbers. We trust the caller to
- * allocate sufficient memory to oname string.
+ * The caller must have allocated sufficient memory for the @oname string.
+ *
+ * Return: 0 on success, -errno on failure
*/
static int fname_encrypt(struct inode *inode,
const struct qstr *iname, struct fscrypt_str *oname)
@@ -60,10 +55,9 @@ static int fname_encrypt(struct inode *inode,
if (iname->len <= 0 || iname->len > lim)
return -EIO;
- ciphertext_len = (iname->len < FS_CRYPTO_BLOCK_SIZE) ?
- FS_CRYPTO_BLOCK_SIZE : iname->len;
- ciphertext_len = size_round_up(ciphertext_len, padding);
- ciphertext_len = (ciphertext_len > lim) ? lim : ciphertext_len;
+ ciphertext_len = max(iname->len, (u32)FS_CRYPTO_BLOCK_SIZE);
+ ciphertext_len = round_up(ciphertext_len, padding);
+ ciphertext_len = min(ciphertext_len, lim);
if (ciphertext_len <= sizeof(buf)) {
workbuf = buf;
@@ -84,7 +78,7 @@ static int fname_encrypt(struct inode *inode,
}
skcipher_request_set_callback(req,
CRYPTO_TFM_REQ_MAY_BACKLOG | CRYPTO_TFM_REQ_MAY_SLEEP,
- dir_crypt_complete, &ecr);
+ fname_crypt_complete, &ecr);
/* Copy the input */
memcpy(workbuf, iname->name, iname->len);
@@ -105,20 +99,22 @@ static int fname_encrypt(struct inode *inode,
}
kfree(alloc_buf);
skcipher_request_free(req);
- if (res < 0)
+ if (res < 0) {
printk_ratelimited(KERN_ERR
"%s: Error (error code %d)\n", __func__, res);
+ return res;
+ }
oname->len = ciphertext_len;
- return res;
+ return 0;
}
-/*
- * fname_decrypt()
- * This function decrypts the input filename, and returns
- * the length of the plaintext.
- * Errors are returned as negative numbers.
- * We trust the caller to allocate sufficient memory to oname string.
+/**
+ * fname_decrypt() - decrypt a filename
+ *
+ * The caller must have allocated sufficient memory for the @oname string.
+ *
+ * Return: 0 on success, -errno on failure
*/
static int fname_decrypt(struct inode *inode,
const struct fscrypt_str *iname,
@@ -146,7 +142,7 @@ static int fname_decrypt(struct inode *inode,
}
skcipher_request_set_callback(req,
CRYPTO_TFM_REQ_MAY_BACKLOG | CRYPTO_TFM_REQ_MAY_SLEEP,
- dir_crypt_complete, &ecr);
+ fname_crypt_complete, &ecr);
/* Initialize IV */
memset(iv, 0, FS_CRYPTO_BLOCK_SIZE);
@@ -168,7 +164,7 @@ static int fname_decrypt(struct inode *inode,
}
oname->len = strnlen(oname->name, iname->len);
- return oname->len;
+ return 0;
}
static const char *lookup_table =
@@ -231,9 +227,8 @@ u32 fscrypt_fname_encrypted_size(struct inode *inode, u32 ilen)
if (ci)
padding = 4 << (ci->ci_flags & FS_POLICY_FLAGS_PAD_MASK);
- if (ilen < FS_CRYPTO_BLOCK_SIZE)
- ilen = FS_CRYPTO_BLOCK_SIZE;
- return size_round_up(ilen, padding);
+ ilen = max(ilen, (u32)FS_CRYPTO_BLOCK_SIZE);
+ return round_up(ilen, padding);
}
EXPORT_SYMBOL(fscrypt_fname_encrypted_size);
@@ -279,6 +274,10 @@ EXPORT_SYMBOL(fscrypt_fname_free_buffer);
/**
* fscrypt_fname_disk_to_usr() - converts a filename from disk space to user
* space
+ *
+ * The caller must have allocated sufficient memory for the @oname string.
+ *
+ * Return: 0 on success, -errno on failure
*/
int fscrypt_fname_disk_to_usr(struct inode *inode,
u32 hash, u32 minor_hash,
@@ -287,13 +286,12 @@ int fscrypt_fname_disk_to_usr(struct inode *inode,
{
const struct qstr qname = FSTR_TO_QSTR(iname);
char buf[24];
- int ret;
if (fscrypt_is_dot_dotdot(&qname)) {
oname->name[0] = '.';
oname->name[iname->len - 1] = '.';
oname->len = iname->len;
- return oname->len;
+ return 0;
}
if (iname->len < FS_CRYPTO_BLOCK_SIZE)
@@ -303,9 +301,9 @@ int fscrypt_fname_disk_to_usr(struct inode *inode,
return fname_decrypt(inode, iname, oname);
if (iname->len <= FS_FNAME_CRYPTO_DIGEST_SIZE) {
- ret = digest_encode(iname->name, iname->len, oname->name);
- oname->len = ret;
- return ret;
+ oname->len = digest_encode(iname->name, iname->len,
+ oname->name);
+ return 0;
}
if (hash) {
memcpy(buf, &hash, 4);
@@ -315,15 +313,18 @@ int fscrypt_fname_disk_to_usr(struct inode *inode,
}
memcpy(buf + 8, iname->name + iname->len - 16, 16);
oname->name[0] = '_';
- ret = digest_encode(buf, 24, oname->name + 1);
- oname->len = ret + 1;
- return ret + 1;
+ oname->len = 1 + digest_encode(buf, 24, oname->name + 1);
+ return 0;
}
EXPORT_SYMBOL(fscrypt_fname_disk_to_usr);
/**
* fscrypt_fname_usr_to_disk() - converts a filename from user space to disk
* space
+ *
+ * The caller must have allocated sufficient memory for the @oname string.
+ *
+ * Return: 0 on success, -errno on failure
*/
int fscrypt_fname_usr_to_disk(struct inode *inode,
const struct qstr *iname,
@@ -333,7 +334,7 @@ int fscrypt_fname_usr_to_disk(struct inode *inode,
oname->name[0] = '.';
oname->name[iname->len - 1] = '.';
oname->len = iname->len;
- return oname->len;
+ return 0;
}
if (inode->i_crypt_info)
return fname_encrypt(inode, iname, oname);
@@ -367,10 +368,10 @@ int fscrypt_setup_filename(struct inode *dir, const struct qstr *iname,
if (dir->i_crypt_info) {
ret = fscrypt_fname_alloc_buffer(dir, iname->len,
&fname->crypto_buf);
- if (ret < 0)
+ if (ret)
return ret;
ret = fname_encrypt(dir, iname, &fname->crypto_buf);
- if (ret < 0)
+ if (ret)
goto errout;
fname->disk_name.name = fname->crypto_buf.name;
fname->disk_name.len = fname->crypto_buf.len;
diff --git a/fs/crypto/keyinfo.c b/fs/crypto/keyinfo.c
index 1ac263eddc4e..82f0285f5d08 100644
--- a/fs/crypto/keyinfo.c
+++ b/fs/crypto/keyinfo.c
@@ -8,11 +8,8 @@
* Written by Michael Halcrow, Ildar Muslukhov, and Uday Savagaonkar, 2015.
*/
-#include <keys/encrypted-type.h>
#include <keys/user-type.h>
-#include <linux/random.h>
#include <linux/scatterlist.h>
-#include <uapi/linux/keyctl.h>
#include <linux/fscrypto.h>
static void derive_crypt_complete(struct crypto_async_request *req, int rc)
@@ -139,6 +136,38 @@ out:
return res;
}
+static int determine_cipher_type(struct fscrypt_info *ci, struct inode *inode,
+ const char **cipher_str_ret, int *keysize_ret)
+{
+ if (S_ISREG(inode->i_mode)) {
+ if (ci->ci_data_mode == FS_ENCRYPTION_MODE_AES_256_XTS) {
+ *cipher_str_ret = "xts(aes)";
+ *keysize_ret = FS_AES_256_XTS_KEY_SIZE;
+ return 0;
+ }
+ pr_warn_once("fscrypto: unsupported contents encryption mode "
+ "%d for inode %lu\n",
+ ci->ci_data_mode, inode->i_ino);
+ return -ENOKEY;
+ }
+
+ if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode)) {
+ if (ci->ci_filename_mode == FS_ENCRYPTION_MODE_AES_256_CTS) {
+ *cipher_str_ret = "cts(cbc(aes))";
+ *keysize_ret = FS_AES_256_CTS_KEY_SIZE;
+ return 0;
+ }
+ pr_warn_once("fscrypto: unsupported filenames encryption mode "
+ "%d for inode %lu\n",
+ ci->ci_filename_mode, inode->i_ino);
+ return -ENOKEY;
+ }
+
+ pr_warn_once("fscrypto: unsupported file type %d for inode %lu\n",
+ (inode->i_mode & S_IFMT), inode->i_ino);
+ return -ENOKEY;
+}
+
static void put_crypt_info(struct fscrypt_info *ci)
{
if (!ci)
@@ -155,8 +184,8 @@ int get_crypt_info(struct inode *inode)
struct fscrypt_context ctx;
struct crypto_skcipher *ctfm;
const char *cipher_str;
+ int keysize;
u8 raw_key[FS_MAX_KEY_SIZE];
- u8 mode;
int res;
res = fscrypt_initialize();
@@ -179,13 +208,19 @@ retry:
if (res < 0) {
if (!fscrypt_dummy_context_enabled(inode))
return res;
+ ctx.format = FS_ENCRYPTION_CONTEXT_FORMAT_V1;
ctx.contents_encryption_mode = FS_ENCRYPTION_MODE_AES_256_XTS;
ctx.filenames_encryption_mode = FS_ENCRYPTION_MODE_AES_256_CTS;
ctx.flags = 0;
} else if (res != sizeof(ctx)) {
return -EINVAL;
}
- res = 0;
+
+ if (ctx.format != FS_ENCRYPTION_CONTEXT_FORMAT_V1)
+ return -EINVAL;
+
+ if (ctx.flags & ~FS_POLICY_FLAGS_VALID)
+ return -EINVAL;
crypt_info = kmem_cache_alloc(fscrypt_info_cachep, GFP_NOFS);
if (!crypt_info)
@@ -198,27 +233,11 @@ retry:
crypt_info->ci_keyring_key = NULL;
memcpy(crypt_info->ci_master_key, ctx.master_key_descriptor,
sizeof(crypt_info->ci_master_key));
- if (S_ISREG(inode->i_mode))
- mode = crypt_info->ci_data_mode;
- else if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode))
- mode = crypt_info->ci_filename_mode;
- else
- BUG();
-
- switch (mode) {
- case FS_ENCRYPTION_MODE_AES_256_XTS:
- cipher_str = "xts(aes)";
- break;
- case FS_ENCRYPTION_MODE_AES_256_CTS:
- cipher_str = "cts(cbc(aes))";
- break;
- default:
- printk_once(KERN_WARNING
- "%s: unsupported key mode %d (ino %u)\n",
- __func__, mode, (unsigned) inode->i_ino);
- res = -ENOKEY;
+
+ res = determine_cipher_type(crypt_info, inode, &cipher_str, &keysize);
+ if (res)
goto out;
- }
+
if (fscrypt_dummy_context_enabled(inode)) {
memset(raw_key, 0x42, FS_AES_256_XTS_KEY_SIZE);
goto got_key;
@@ -253,7 +272,7 @@ got_key:
crypt_info->ci_ctfm = ctfm;
crypto_skcipher_clear_flags(ctfm, ~0);
crypto_skcipher_set_flags(ctfm, CRYPTO_TFM_REQ_WEAK_KEY);
- res = crypto_skcipher_setkey(ctfm, raw_key, fscrypt_key_size(mode));
+ res = crypto_skcipher_setkey(ctfm, raw_key, keysize);
if (res)
goto out;
diff --git a/fs/dax.c b/fs/dax.c
index 993dc6fe0416..014defd2e744 100644
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -31,6 +31,8 @@
#include <linux/vmstat.h>
#include <linux/pfn_t.h>
#include <linux/sizes.h>
+#include <linux/iomap.h>
+#include "internal.h"
/*
* We use lowest available bit in exceptional entry for locking, other two
@@ -580,14 +582,13 @@ static int dax_load_hole(struct address_space *mapping, void *entry,
return VM_FAULT_LOCKED;
}
-static int copy_user_bh(struct page *to, struct inode *inode,
- struct buffer_head *bh, unsigned long vaddr)
+static int copy_user_dax(struct block_device *bdev, sector_t sector, size_t size,
+ struct page *to, unsigned long vaddr)
{
struct blk_dax_ctl dax = {
- .sector = to_sector(bh, inode),
- .size = bh->b_size,
+ .sector = sector,
+ .size = size,
};
- struct block_device *bdev = bh->b_bdev;
void *vto;
if (dax_map_atomic(bdev, &dax) < 0)
@@ -790,14 +791,13 @@ int dax_writeback_mapping_range(struct address_space *mapping,
EXPORT_SYMBOL_GPL(dax_writeback_mapping_range);
static int dax_insert_mapping(struct address_space *mapping,
- struct buffer_head *bh, void **entryp,
- struct vm_area_struct *vma, struct vm_fault *vmf)
+ struct block_device *bdev, sector_t sector, size_t size,
+ void **entryp, struct vm_area_struct *vma, struct vm_fault *vmf)
{
unsigned long vaddr = (unsigned long)vmf->virtual_address;
- struct block_device *bdev = bh->b_bdev;
struct blk_dax_ctl dax = {
- .sector = to_sector(bh, mapping->host),
- .size = bh->b_size,
+ .sector = sector,
+ .size = size,
};
void *ret;
void *entry = *entryp;
@@ -868,7 +868,8 @@ int dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf,
if (vmf->cow_page) {
struct page *new_page = vmf->cow_page;
if (buffer_written(&bh))
- error = copy_user_bh(new_page, inode, &bh, vaddr);
+ error = copy_user_dax(bh.b_bdev, to_sector(&bh, inode),
+ bh.b_size, new_page, vaddr);
else
clear_user_highpage(new_page, vaddr);
if (error)
@@ -898,7 +899,8 @@ int dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf,
/* Filesystem should not return unwritten buffers to us! */
WARN_ON_ONCE(buffer_unwritten(&bh) || buffer_new(&bh));
- error = dax_insert_mapping(mapping, &bh, &entry, vma, vmf);
+ error = dax_insert_mapping(mapping, bh.b_bdev, to_sector(&bh, inode),
+ bh.b_size, &entry, vma, vmf);
unlock_entry:
put_locked_mapping_entry(mapping, vmf->pgoff, entry);
out:
@@ -1034,7 +1036,7 @@ int dax_pmd_fault(struct vm_area_struct *vma, unsigned long address,
if (!write && !buffer_mapped(&bh)) {
spinlock_t *ptl;
pmd_t entry;
- struct page *zero_page = get_huge_zero_page();
+ struct page *zero_page = mm_get_huge_zero_page(vma->vm_mm);
if (unlikely(!zero_page)) {
dax_pmd_dbg(&bh, address, "no zero page");
@@ -1241,3 +1243,229 @@ int dax_truncate_page(struct inode *inode, loff_t from, get_block_t get_block)
return dax_zero_page_range(inode, from, length, get_block);
}
EXPORT_SYMBOL_GPL(dax_truncate_page);
+
+#ifdef CONFIG_FS_IOMAP
+static loff_t
+iomap_dax_actor(struct inode *inode, loff_t pos, loff_t length, void *data,
+ struct iomap *iomap)
+{
+ struct iov_iter *iter = data;
+ loff_t end = pos + length, done = 0;
+ ssize_t ret = 0;
+
+ if (iov_iter_rw(iter) == READ) {
+ end = min(end, i_size_read(inode));
+ if (pos >= end)
+ return 0;
+
+ if (iomap->type == IOMAP_HOLE || iomap->type == IOMAP_UNWRITTEN)
+ return iov_iter_zero(min(length, end - pos), iter);
+ }
+
+ if (WARN_ON_ONCE(iomap->type != IOMAP_MAPPED))
+ return -EIO;
+
+ while (pos < end) {
+ unsigned offset = pos & (PAGE_SIZE - 1);
+ struct blk_dax_ctl dax = { 0 };
+ ssize_t map_len;
+
+ dax.sector = iomap->blkno +
+ (((pos & PAGE_MASK) - iomap->offset) >> 9);
+ dax.size = (length + offset + PAGE_SIZE - 1) & PAGE_MASK;
+ map_len = dax_map_atomic(iomap->bdev, &dax);
+ if (map_len < 0) {
+ ret = map_len;
+ break;
+ }
+
+ dax.addr += offset;
+ map_len -= offset;
+ if (map_len > end - pos)
+ map_len = end - pos;
+
+ if (iov_iter_rw(iter) == WRITE)
+ map_len = copy_from_iter_pmem(dax.addr, map_len, iter);
+ else
+ map_len = copy_to_iter(dax.addr, map_len, iter);
+ dax_unmap_atomic(iomap->bdev, &dax);
+ if (map_len <= 0) {
+ ret = map_len ? map_len : -EFAULT;
+ break;
+ }
+
+ pos += map_len;
+ length -= map_len;
+ done += map_len;
+ }
+
+ return done ? done : ret;
+}
+
+/**
+ * iomap_dax_rw - Perform I/O to a DAX file
+ * @iocb: The control block for this I/O
+ * @iter: The addresses to do I/O from or to
+ * @ops: iomap ops passed from the file system
+ *
+ * This function performs read and write operations to directly mapped
+ * persistent memory. The callers needs to take care of read/write exclusion
+ * and evicting any page cache pages in the region under I/O.
+ */
+ssize_t
+iomap_dax_rw(struct kiocb *iocb, struct iov_iter *iter,
+ struct iomap_ops *ops)
+{
+ struct address_space *mapping = iocb->ki_filp->f_mapping;
+ struct inode *inode = mapping->host;
+ loff_t pos = iocb->ki_pos, ret = 0, done = 0;
+ unsigned flags = 0;
+
+ if (iov_iter_rw(iter) == WRITE)
+ flags |= IOMAP_WRITE;
+
+ /*
+ * Yes, even DAX files can have page cache attached to them: A zeroed
+ * page is inserted into the pagecache when we have to serve a write
+ * fault on a hole. It should never be dirtied and can simply be
+ * dropped from the pagecache once we get real data for the page.
+ *
+ * XXX: This is racy against mmap, and there's nothing we can do about
+ * it. We'll eventually need to shift this down even further so that
+ * we can check if we allocated blocks over a hole first.
+ */
+ if (mapping->nrpages) {
+ ret = invalidate_inode_pages2_range(mapping,
+ pos >> PAGE_SHIFT,
+ (pos + iov_iter_count(iter) - 1) >> PAGE_SHIFT);
+ WARN_ON_ONCE(ret);
+ }
+
+ while (iov_iter_count(iter)) {
+ ret = iomap_apply(inode, pos, iov_iter_count(iter), flags, ops,
+ iter, iomap_dax_actor);
+ if (ret <= 0)
+ break;
+ pos += ret;
+ done += ret;
+ }
+
+ iocb->ki_pos += done;
+ return done ? done : ret;
+}
+EXPORT_SYMBOL_GPL(iomap_dax_rw);
+
+/**
+ * iomap_dax_fault - handle a page fault on a DAX file
+ * @vma: The virtual memory area where the fault occurred
+ * @vmf: The description of the fault
+ * @ops: iomap ops passed from the file system
+ *
+ * When a page fault occurs, filesystems may call this helper in their fault
+ * or mkwrite handler for DAX files. Assumes the caller has done all the
+ * necessary locking for the page fault to proceed successfully.
+ */
+int iomap_dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf,
+ struct iomap_ops *ops)
+{
+ struct address_space *mapping = vma->vm_file->f_mapping;
+ struct inode *inode = mapping->host;
+ unsigned long vaddr = (unsigned long)vmf->virtual_address;
+ loff_t pos = (loff_t)vmf->pgoff << PAGE_SHIFT;
+ sector_t sector;
+ struct iomap iomap = { 0 };
+ unsigned flags = 0;
+ int error, major = 0;
+ void *entry;
+
+ /*
+ * Check whether offset isn't beyond end of file now. Caller is supposed
+ * to hold locks serializing us with truncate / punch hole so this is
+ * a reliable test.
+ */
+ if (pos >= i_size_read(inode))
+ return VM_FAULT_SIGBUS;
+
+ entry = grab_mapping_entry(mapping, vmf->pgoff);
+ if (IS_ERR(entry)) {
+ error = PTR_ERR(entry);
+ goto out;
+ }
+
+ if ((vmf->flags & FAULT_FLAG_WRITE) && !vmf->cow_page)
+ flags |= IOMAP_WRITE;
+
+ /*
+ * Note that we don't bother to use iomap_apply here: DAX required
+ * the file system block size to be equal the page size, which means
+ * that we never have to deal with more than a single extent here.
+ */
+ error = ops->iomap_begin(inode, pos, PAGE_SIZE, flags, &iomap);
+ if (error)
+ goto unlock_entry;
+ if (WARN_ON_ONCE(iomap.offset + iomap.length < pos + PAGE_SIZE)) {
+ error = -EIO; /* fs corruption? */
+ goto unlock_entry;
+ }
+
+ sector = iomap.blkno + (((pos & PAGE_MASK) - iomap.offset) >> 9);
+
+ if (vmf->cow_page) {
+ switch (iomap.type) {
+ case IOMAP_HOLE:
+ case IOMAP_UNWRITTEN:
+ clear_user_highpage(vmf->cow_page, vaddr);
+ break;
+ case IOMAP_MAPPED:
+ error = copy_user_dax(iomap.bdev, sector, PAGE_SIZE,
+ vmf->cow_page, vaddr);
+ break;
+ default:
+ WARN_ON_ONCE(1);
+ error = -EIO;
+ break;
+ }
+
+ if (error)
+ goto unlock_entry;
+ if (!radix_tree_exceptional_entry(entry)) {
+ vmf->page = entry;
+ return VM_FAULT_LOCKED;
+ }
+ vmf->entry = entry;
+ return VM_FAULT_DAX_LOCKED;
+ }
+
+ switch (iomap.type) {
+ case IOMAP_MAPPED:
+ if (iomap.flags & IOMAP_F_NEW) {
+ count_vm_event(PGMAJFAULT);
+ mem_cgroup_count_vm_event(vma->vm_mm, PGMAJFAULT);
+ major = VM_FAULT_MAJOR;
+ }
+ error = dax_insert_mapping(mapping, iomap.bdev, sector,
+ PAGE_SIZE, &entry, vma, vmf);
+ break;
+ case IOMAP_UNWRITTEN:
+ case IOMAP_HOLE:
+ if (!(vmf->flags & FAULT_FLAG_WRITE))
+ return dax_load_hole(mapping, entry, vmf);
+ /*FALLTHRU*/
+ default:
+ WARN_ON_ONCE(1);
+ error = -EIO;
+ break;
+ }
+
+ unlock_entry:
+ put_locked_mapping_entry(mapping, vmf->pgoff, entry);
+ out:
+ if (error == -ENOMEM)
+ return VM_FAULT_OOM | major;
+ /* -EBUSY is fine, somebody else faulted on the same PTE */
+ if (error < 0 && error != -EBUSY)
+ return VM_FAULT_SIGBUS | major;
+ return VM_FAULT_NOPAGE | major;
+}
+EXPORT_SYMBOL_GPL(iomap_dax_fault);
+#endif /* CONFIG_FS_IOMAP */
diff --git a/fs/debugfs/file.c b/fs/debugfs/file.c
index 592059f88e04..354e2ab62031 100644
--- a/fs/debugfs/file.c
+++ b/fs/debugfs/file.c
@@ -97,9 +97,6 @@ EXPORT_SYMBOL_GPL(debugfs_use_file_finish);
#define F_DENTRY(filp) ((filp)->f_path.dentry)
-#define REAL_FOPS_DEREF(dentry) \
- ((const struct file_operations *)(dentry)->d_fsdata)
-
static int open_proxy_open(struct inode *inode, struct file *filp)
{
const struct dentry *dentry = F_DENTRY(filp);
@@ -112,7 +109,7 @@ static int open_proxy_open(struct inode *inode, struct file *filp)
goto out;
}
- real_fops = REAL_FOPS_DEREF(dentry);
+ real_fops = debugfs_real_fops(filp);
real_fops = fops_get(real_fops);
if (!real_fops) {
/* Huh? Module did not clean up after itself at exit? */
@@ -143,7 +140,7 @@ static ret_type full_proxy_ ## name(proto) \
{ \
const struct dentry *dentry = F_DENTRY(filp); \
const struct file_operations *real_fops = \
- REAL_FOPS_DEREF(dentry); \
+ debugfs_real_fops(filp); \
int srcu_idx; \
ret_type r; \
\
@@ -176,7 +173,7 @@ static unsigned int full_proxy_poll(struct file *filp,
struct poll_table_struct *wait)
{
const struct dentry *dentry = F_DENTRY(filp);
- const struct file_operations *real_fops = REAL_FOPS_DEREF(dentry);
+ const struct file_operations *real_fops = debugfs_real_fops(filp);
int srcu_idx;
unsigned int r = 0;
@@ -193,7 +190,7 @@ static unsigned int full_proxy_poll(struct file *filp,
static int full_proxy_release(struct inode *inode, struct file *filp)
{
const struct dentry *dentry = F_DENTRY(filp);
- const struct file_operations *real_fops = REAL_FOPS_DEREF(dentry);
+ const struct file_operations *real_fops = debugfs_real_fops(filp);
const struct file_operations *proxy_fops = filp->f_op;
int r = 0;
@@ -209,7 +206,7 @@ static int full_proxy_release(struct inode *inode, struct file *filp)
replace_fops(filp, d_inode(dentry)->i_fop);
kfree((void *)proxy_fops);
fops_put(real_fops);
- return 0;
+ return r;
}
static void __full_proxy_fops_init(struct file_operations *proxy_fops,
@@ -241,7 +238,7 @@ static int full_proxy_open(struct inode *inode, struct file *filp)
goto out;
}
- real_fops = REAL_FOPS_DEREF(dentry);
+ real_fops = debugfs_real_fops(filp);
real_fops = fops_get(real_fops);
if (!real_fops) {
/* Huh? Module did not cleanup after itself at exit? */
diff --git a/fs/debugfs/internal.h b/fs/debugfs/internal.h
index bba52634b995..b3e8443a1f47 100644
--- a/fs/debugfs/internal.h
+++ b/fs/debugfs/internal.h
@@ -19,8 +19,4 @@ extern const struct file_operations debugfs_noop_file_operations;
extern const struct file_operations debugfs_open_proxy_file_operations;
extern const struct file_operations debugfs_full_proxy_file_operations;
-struct dentry *debugfs_create_file_unsafe(const char *name, umode_t mode,
- struct dentry *parent, void *data,
- const struct file_operations *fops);
-
#endif /* _DEBUGFS_INTERNAL_H_ */
diff --git a/fs/devpts/inode.c b/fs/devpts/inode.c
index 79a5941c2474..442d1a7e671b 100644
--- a/fs/devpts/inode.c
+++ b/fs/devpts/inode.c
@@ -272,13 +272,8 @@ static int mknod_ptmx(struct super_block *sb)
struct dentry *root = sb->s_root;
struct pts_fs_info *fsi = DEVPTS_SB(sb);
struct pts_mount_opts *opts = &fsi->mount_opts;
- kuid_t root_uid;
- kgid_t root_gid;
-
- root_uid = make_kuid(current_user_ns(), 0);
- root_gid = make_kgid(current_user_ns(), 0);
- if (!uid_valid(root_uid) || !gid_valid(root_gid))
- return -EINVAL;
+ kuid_t ptmx_uid = current_fsuid();
+ kgid_t ptmx_gid = current_fsgid();
inode_lock(d_inode(root));
@@ -309,8 +304,8 @@ static int mknod_ptmx(struct super_block *sb)
mode = S_IFCHR|opts->ptmxmode;
init_special_inode(inode, mode, MKDEV(TTYAUX_MAJOR, 2));
- inode->i_uid = root_uid;
- inode->i_gid = root_gid;
+ inode->i_uid = ptmx_uid;
+ inode->i_gid = ptmx_gid;
d_add(dentry, inode);
@@ -336,7 +331,6 @@ static int devpts_remount(struct super_block *sb, int *flags, char *data)
struct pts_fs_info *fsi = DEVPTS_SB(sb);
struct pts_mount_opts *opts = &fsi->mount_opts;
- sync_filesystem(sb);
err = parse_mount_options(data, PARSE_REMOUNT, opts);
/*
@@ -395,6 +389,7 @@ static int
devpts_fill_super(struct super_block *s, void *data, int silent)
{
struct inode *inode;
+ int error;
s->s_iflags &= ~SB_I_NODEV;
s->s_blocksize = 1024;
@@ -403,10 +398,16 @@ devpts_fill_super(struct super_block *s, void *data, int silent)
s->s_op = &devpts_sops;
s->s_time_gran = 1;
+ error = -ENOMEM;
s->s_fs_info = new_pts_fs_info(s);
if (!s->s_fs_info)
goto fail;
+ error = parse_mount_options(data, PARSE_MOUNT, &DEVPTS_SB(s)->mount_opts);
+ if (error)
+ goto fail;
+
+ error = -ENOMEM;
inode = new_inode(s);
if (!inode)
goto fail;
@@ -418,13 +419,21 @@ devpts_fill_super(struct super_block *s, void *data, int silent)
set_nlink(inode, 2);
s->s_root = d_make_root(inode);
- if (s->s_root)
- return 0;
+ if (!s->s_root) {
+ pr_err("get root dentry failed\n");
+ goto fail;
+ }
- pr_err("get root dentry failed\n");
+ error = mknod_ptmx(s);
+ if (error)
+ goto fail_dput;
+ return 0;
+fail_dput:
+ dput(s->s_root);
+ s->s_root = NULL;
fail:
- return -ENOMEM;
+ return error;
}
/*
@@ -436,43 +445,15 @@ fail:
static struct dentry *devpts_mount(struct file_system_type *fs_type,
int flags, const char *dev_name, void *data)
{
- int error;
- struct pts_mount_opts opts;
- struct super_block *s;
-
- error = parse_mount_options(data, PARSE_MOUNT, &opts);
- if (error)
- return ERR_PTR(error);
-
- s = sget(fs_type, NULL, set_anon_super, flags, NULL);
- if (IS_ERR(s))
- return ERR_CAST(s);
-
- if (!s->s_root) {
- error = devpts_fill_super(s, data, flags & MS_SILENT ? 1 : 0);
- if (error)
- goto out_undo_sget;
- s->s_flags |= MS_ACTIVE;
- }
-
- memcpy(&(DEVPTS_SB(s))->mount_opts, &opts, sizeof(opts));
-
- error = mknod_ptmx(s);
- if (error)
- goto out_undo_sget;
-
- return dget(s->s_root);
-
-out_undo_sget:
- deactivate_locked_super(s);
- return ERR_PTR(error);
+ return mount_nodev(fs_type, flags, data, devpts_fill_super);
}
static void devpts_kill_sb(struct super_block *sb)
{
struct pts_fs_info *fsi = DEVPTS_SB(sb);
- ida_destroy(&fsi->allocated_ptys);
+ if (fsi)
+ ida_destroy(&fsi->allocated_ptys);
kfree(fsi);
kill_litter_super(sb);
}
diff --git a/fs/direct-io.c b/fs/direct-io.c
index 7c3ce73cb617..fb9aa16a7727 100644
--- a/fs/direct-io.c
+++ b/fs/direct-io.c
@@ -246,6 +246,9 @@ static ssize_t dio_complete(struct dio *dio, ssize_t ret, bool is_async)
if ((dio->op == REQ_OP_READ) &&
((offset + transferred) > dio->i_size))
transferred = dio->i_size - offset;
+ /* ignore EFAULT if some IO has been done */
+ if (unlikely(ret == -EFAULT) && transferred)
+ ret = 0;
}
if (ret == 0)
diff --git a/fs/dlm/lowcomms.c b/fs/dlm/lowcomms.c
index 963016c8f3d1..609998de533e 100644
--- a/fs/dlm/lowcomms.c
+++ b/fs/dlm/lowcomms.c
@@ -1656,16 +1656,12 @@ void dlm_lowcomms_stop(void)
mutex_lock(&connections_lock);
dlm_allow_conn = 0;
foreach_conn(stop_conn);
+ clean_writequeues();
+ foreach_conn(free_conn);
mutex_unlock(&connections_lock);
work_stop();
- mutex_lock(&connections_lock);
- clean_writequeues();
-
- foreach_conn(free_conn);
-
- mutex_unlock(&connections_lock);
kmem_cache_destroy(con_cache);
}
diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c
index 3f2575ddd45e..ddccec3124d7 100644
--- a/fs/ecryptfs/inode.c
+++ b/fs/ecryptfs/inode.c
@@ -927,7 +927,7 @@ static int ecryptfs_setattr(struct dentry *dentry, struct iattr *ia)
}
mutex_unlock(&crypt_stat->cs_mutex);
- rc = inode_change_ok(inode, ia);
+ rc = setattr_prepare(dentry, ia);
if (rc)
goto out;
if (ia->ia_valid & ATTR_SIZE) {
diff --git a/fs/efivarfs/inode.c b/fs/efivarfs/inode.c
index 1d73fc6dba13..cbb50cadcffc 100644
--- a/fs/efivarfs/inode.c
+++ b/fs/efivarfs/inode.c
@@ -105,7 +105,10 @@ static int efivarfs_create(struct inode *dir, struct dentry *dentry,
inode->i_private = var;
- efivar_entry_add(var, &efivarfs_list);
+ err = efivar_entry_add(var, &efivarfs_list);
+ if (err)
+ goto out;
+
d_instantiate(dentry, inode);
dget(dentry);
out:
diff --git a/fs/efivarfs/super.c b/fs/efivarfs/super.c
index 688ccc16b702..d7a7c53803c1 100644
--- a/fs/efivarfs/super.c
+++ b/fs/efivarfs/super.c
@@ -157,12 +157,14 @@ static int efivarfs_callback(efi_char16_t *name16, efi_guid_t vendor,
goto fail_inode;
}
+ efivar_entry_size(entry, &size);
+ err = efivar_entry_add(entry, &efivarfs_list);
+ if (err)
+ goto fail_inode;
+
/* copied by the above to local storage in the dentry. */
kfree(name);
- efivar_entry_size(entry, &size);
- efivar_entry_add(entry, &efivarfs_list);
-
inode_lock(inode);
inode->i_private = entry;
i_size_write(inode, size + sizeof(entry->var.Attributes));
@@ -182,7 +184,10 @@ fail:
static int efivarfs_destroy(struct efivar_entry *entry, void *data)
{
- efivar_entry_remove(entry);
+ int err = efivar_entry_remove(entry);
+
+ if (err)
+ return err;
kfree(entry);
return 0;
}
diff --git a/fs/exofs/inode.c b/fs/exofs/inode.c
index 9dc4c6dbf3c9..5e68daee5fe4 100644
--- a/fs/exofs/inode.c
+++ b/fs/exofs/inode.c
@@ -1034,7 +1034,7 @@ int exofs_setattr(struct dentry *dentry, struct iattr *iattr)
if (unlikely(error))
return error;
- error = inode_change_ok(inode, iattr);
+ error = setattr_prepare(dentry, iattr);
if (unlikely(error))
return error;
diff --git a/fs/ext2/Kconfig b/fs/ext2/Kconfig
index c634874e12d9..36bea5adcaba 100644
--- a/fs/ext2/Kconfig
+++ b/fs/ext2/Kconfig
@@ -1,5 +1,6 @@
config EXT2_FS
tristate "Second extended fs support"
+ select FS_IOMAP if FS_DAX
help
Ext2 is a standard Linux file system for hard disks.
diff --git a/fs/ext2/acl.c b/fs/ext2/acl.c
index 42f1d1814083..e725aa0890e0 100644
--- a/fs/ext2/acl.c
+++ b/fs/ext2/acl.c
@@ -190,15 +190,11 @@ ext2_set_acl(struct inode *inode, struct posix_acl *acl, int type)
case ACL_TYPE_ACCESS:
name_index = EXT2_XATTR_INDEX_POSIX_ACL_ACCESS;
if (acl) {
- error = posix_acl_equiv_mode(acl, &inode->i_mode);
- if (error < 0)
+ error = posix_acl_update_mode(inode, &inode->i_mode, &acl);
+ if (error)
return error;
- else {
- inode->i_ctime = CURRENT_TIME_SEC;
- mark_inode_dirty(inode);
- if (error == 0)
- acl = NULL;
- }
+ inode->i_ctime = CURRENT_TIME_SEC;
+ mark_inode_dirty(inode);
}
break;
diff --git a/fs/ext2/ext2.h b/fs/ext2/ext2.h
index 06af2f92226c..37e2be784ac7 100644
--- a/fs/ext2/ext2.h
+++ b/fs/ext2/ext2.h
@@ -814,6 +814,7 @@ extern const struct file_operations ext2_file_operations;
/* inode.c */
extern const struct address_space_operations ext2_aops;
extern const struct address_space_operations ext2_nobh_aops;
+extern struct iomap_ops ext2_iomap_ops;
/* namei.c */
extern const struct inode_operations ext2_dir_inode_operations;
diff --git a/fs/ext2/file.c b/fs/ext2/file.c
index 538f77616f3c..a0e1478dfd04 100644
--- a/fs/ext2/file.c
+++ b/fs/ext2/file.c
@@ -22,11 +22,59 @@
#include <linux/pagemap.h>
#include <linux/dax.h>
#include <linux/quotaops.h>
+#include <linux/iomap.h>
+#include <linux/uio.h>
#include "ext2.h"
#include "xattr.h"
#include "acl.h"
#ifdef CONFIG_FS_DAX
+static ssize_t ext2_dax_read_iter(struct kiocb *iocb, struct iov_iter *to)
+{
+ struct inode *inode = iocb->ki_filp->f_mapping->host;
+ ssize_t ret;
+
+ if (!iov_iter_count(to))
+ return 0; /* skip atime */
+
+ inode_lock_shared(inode);
+ ret = iomap_dax_rw(iocb, to, &ext2_iomap_ops);
+ inode_unlock_shared(inode);
+
+ file_accessed(iocb->ki_filp);
+ return ret;
+}
+
+static ssize_t ext2_dax_write_iter(struct kiocb *iocb, struct iov_iter *from)
+{
+ struct file *file = iocb->ki_filp;
+ struct inode *inode = file->f_mapping->host;
+ ssize_t ret;
+
+ inode_lock(inode);
+ ret = generic_write_checks(iocb, from);
+ if (ret <= 0)
+ goto out_unlock;
+ ret = file_remove_privs(file);
+ if (ret)
+ goto out_unlock;
+ ret = file_update_time(file);
+ if (ret)
+ goto out_unlock;
+
+ ret = iomap_dax_rw(iocb, from, &ext2_iomap_ops);
+ if (ret > 0 && iocb->ki_pos > i_size_read(inode)) {
+ i_size_write(inode, iocb->ki_pos);
+ mark_inode_dirty(inode);
+ }
+
+out_unlock:
+ inode_unlock(inode);
+ if (ret > 0)
+ ret = generic_write_sync(iocb, ret);
+ return ret;
+}
+
/*
* The lock ordering for ext2 DAX fault paths is:
*
@@ -51,7 +99,7 @@ static int ext2_dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
}
down_read(&ei->dax_sem);
- ret = dax_fault(vma, vmf, ext2_get_block);
+ ret = iomap_dax_fault(vma, vmf, &ext2_iomap_ops);
up_read(&ei->dax_sem);
if (vmf->flags & FAULT_FLAG_WRITE)
@@ -156,14 +204,28 @@ int ext2_fsync(struct file *file, loff_t start, loff_t end, int datasync)
return ret;
}
-/*
- * We have mostly NULL's here: the current defaults are ok for
- * the ext2 filesystem.
- */
+static ssize_t ext2_file_read_iter(struct kiocb *iocb, struct iov_iter *to)
+{
+#ifdef CONFIG_FS_DAX
+ if (IS_DAX(iocb->ki_filp->f_mapping->host))
+ return ext2_dax_read_iter(iocb, to);
+#endif
+ return generic_file_read_iter(iocb, to);
+}
+
+static ssize_t ext2_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
+{
+#ifdef CONFIG_FS_DAX
+ if (IS_DAX(iocb->ki_filp->f_mapping->host))
+ return ext2_dax_write_iter(iocb, from);
+#endif
+ return generic_file_write_iter(iocb, from);
+}
+
const struct file_operations ext2_file_operations = {
.llseek = generic_file_llseek,
- .read_iter = generic_file_read_iter,
- .write_iter = generic_file_write_iter,
+ .read_iter = ext2_file_read_iter,
+ .write_iter = ext2_file_write_iter,
.unlocked_ioctl = ext2_ioctl,
#ifdef CONFIG_COMPAT
.compat_ioctl = ext2_compat_ioctl,
@@ -172,6 +234,7 @@ const struct file_operations ext2_file_operations = {
.open = dquot_file_open,
.release = ext2_release_file,
.fsync = ext2_fsync,
+ .get_unmapped_area = thp_get_unmapped_area,
.splice_read = generic_file_splice_read,
.splice_write = iter_file_splice_write,
};
diff --git a/fs/ext2/ialloc.c b/fs/ext2/ialloc.c
index efe5fb21c533..04e73a99902b 100644
--- a/fs/ext2/ialloc.c
+++ b/fs/ext2/ialloc.c
@@ -465,6 +465,11 @@ struct inode *ext2_new_inode(struct inode *dir, umode_t mode,
for (i = 0; i < sbi->s_groups_count; i++) {
gdp = ext2_get_group_desc(sb, group, &bh2);
+ if (!gdp) {
+ if (++group == sbi->s_groups_count)
+ group = 0;
+ continue;
+ }
brelse(bitmap_bh);
bitmap_bh = read_inode_bitmap(sb, group);
if (!bitmap_bh) {
diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c
index d5c7d09919f3..6831534924e1 100644
--- a/fs/ext2/inode.c
+++ b/fs/ext2/inode.c
@@ -32,6 +32,7 @@
#include <linux/buffer_head.h>
#include <linux/mpage.h>
#include <linux/fiemap.h>
+#include <linux/iomap.h>
#include <linux/namei.h>
#include <linux/uio.h>
#include "ext2.h"
@@ -618,7 +619,7 @@ static void ext2_splice_branch(struct inode *inode,
*/
static int ext2_get_blocks(struct inode *inode,
sector_t iblock, unsigned long maxblocks,
- struct buffer_head *bh_result,
+ u32 *bno, bool *new, bool *boundary,
int create)
{
int err = -EIO;
@@ -644,7 +645,6 @@ static int ext2_get_blocks(struct inode *inode,
/* Simplest case - block found, no allocation needed */
if (!partial) {
first_block = le32_to_cpu(chain[depth - 1].key);
- clear_buffer_new(bh_result); /* What's this do? */
count++;
/*map more blocks*/
while (count < maxblocks && count <= blocks_to_boundary) {
@@ -699,7 +699,6 @@ static int ext2_get_blocks(struct inode *inode,
mutex_unlock(&ei->truncate_mutex);
if (err)
goto cleanup;
- clear_buffer_new(bh_result);
goto got_it;
}
}
@@ -733,6 +732,16 @@ static int ext2_get_blocks(struct inode *inode,
}
if (IS_DAX(inode)) {
+ int i;
+
+ /*
+ * We must unmap blocks before zeroing so that writeback cannot
+ * overwrite zeros with stale data from block device page cache.
+ */
+ for (i = 0; i < count; i++) {
+ unmap_underlying_metadata(inode->i_sb->s_bdev,
+ le32_to_cpu(chain[depth-1].key) + i);
+ }
/*
* block must be initialised before we put it in the tree
* so that it's not found by another thread before it's
@@ -745,15 +754,16 @@ static int ext2_get_blocks(struct inode *inode,
mutex_unlock(&ei->truncate_mutex);
goto cleanup;
}
- } else
- set_buffer_new(bh_result);
+ } else {
+ *new = true;
+ }
ext2_splice_branch(inode, iblock, partial, indirect_blks, count);
mutex_unlock(&ei->truncate_mutex);
got_it:
- map_bh(bh_result, inode->i_sb, le32_to_cpu(chain[depth-1].key));
+ *bno = le32_to_cpu(chain[depth-1].key);
if (count > blocks_to_boundary)
- set_buffer_boundary(bh_result);
+ *boundary = true;
err = count;
/* Clean up and exit */
partial = chain + depth - 1; /* the whole chain */
@@ -765,19 +775,82 @@ cleanup:
return err;
}
-int ext2_get_block(struct inode *inode, sector_t iblock, struct buffer_head *bh_result, int create)
+int ext2_get_block(struct inode *inode, sector_t iblock,
+ struct buffer_head *bh_result, int create)
{
unsigned max_blocks = bh_result->b_size >> inode->i_blkbits;
- int ret = ext2_get_blocks(inode, iblock, max_blocks,
- bh_result, create);
- if (ret > 0) {
- bh_result->b_size = (ret << inode->i_blkbits);
- ret = 0;
+ bool new = false, boundary = false;
+ u32 bno;
+ int ret;
+
+ ret = ext2_get_blocks(inode, iblock, max_blocks, &bno, &new, &boundary,
+ create);
+ if (ret <= 0)
+ return ret;
+
+ map_bh(bh_result, inode->i_sb, bno);
+ bh_result->b_size = (ret << inode->i_blkbits);
+ if (new)
+ set_buffer_new(bh_result);
+ if (boundary)
+ set_buffer_boundary(bh_result);
+ return 0;
+
+}
+
+#ifdef CONFIG_FS_DAX
+static int ext2_iomap_begin(struct inode *inode, loff_t offset, loff_t length,
+ unsigned flags, struct iomap *iomap)
+{
+ unsigned int blkbits = inode->i_blkbits;
+ unsigned long first_block = offset >> blkbits;
+ unsigned long max_blocks = (length + (1 << blkbits) - 1) >> blkbits;
+ bool new = false, boundary = false;
+ u32 bno;
+ int ret;
+
+ ret = ext2_get_blocks(inode, first_block, max_blocks,
+ &bno, &new, &boundary, flags & IOMAP_WRITE);
+ if (ret < 0)
+ return ret;
+
+ iomap->flags = 0;
+ iomap->bdev = inode->i_sb->s_bdev;
+ iomap->offset = (u64)first_block << blkbits;
+
+ if (ret == 0) {
+ iomap->type = IOMAP_HOLE;
+ iomap->blkno = IOMAP_NULL_BLOCK;
+ iomap->length = 1 << blkbits;
+ } else {
+ iomap->type = IOMAP_MAPPED;
+ iomap->blkno = (sector_t)bno << (blkbits - 9);
+ iomap->length = (u64)ret << blkbits;
+ iomap->flags |= IOMAP_F_MERGED;
}
- return ret;
+ if (new)
+ iomap->flags |= IOMAP_F_NEW;
+ return 0;
}
+static int
+ext2_iomap_end(struct inode *inode, loff_t offset, loff_t length,
+ ssize_t written, unsigned flags, struct iomap *iomap)
+{
+ if (iomap->type == IOMAP_MAPPED &&
+ written < length &&
+ (flags & IOMAP_WRITE))
+ ext2_write_failed(inode->i_mapping, offset + length);
+ return 0;
+}
+
+struct iomap_ops ext2_iomap_ops = {
+ .iomap_begin = ext2_iomap_begin,
+ .iomap_end = ext2_iomap_end,
+};
+#endif /* CONFIG_FS_DAX */
+
int ext2_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
u64 start, u64 len)
{
@@ -863,11 +936,10 @@ ext2_direct_IO(struct kiocb *iocb, struct iov_iter *iter)
loff_t offset = iocb->ki_pos;
ssize_t ret;
- if (IS_DAX(inode))
- ret = dax_do_io(iocb, inode, iter, ext2_get_block, NULL,
- DIO_LOCKING);
- else
- ret = blockdev_direct_IO(iocb, inode, iter, ext2_get_block);
+ if (WARN_ON_ONCE(IS_DAX(inode)))
+ return -EIO;
+
+ ret = blockdev_direct_IO(iocb, inode, iter, ext2_get_block);
if (ret < 0 && iov_iter_rw(iter) == WRITE)
ext2_write_failed(mapping, offset + count);
return ret;
@@ -1580,7 +1652,7 @@ int ext2_setattr(struct dentry *dentry, struct iattr *iattr)
struct inode *inode = d_inode(dentry);
int error;
- error = inode_change_ok(inode, iattr);
+ error = setattr_prepare(dentry, iattr);
if (error)
return error;
diff --git a/fs/ext4/acl.c b/fs/ext4/acl.c
index c6601a476c02..dfa519979038 100644
--- a/fs/ext4/acl.c
+++ b/fs/ext4/acl.c
@@ -193,15 +193,11 @@ __ext4_set_acl(handle_t *handle, struct inode *inode, int type,
case ACL_TYPE_ACCESS:
name_index = EXT4_XATTR_INDEX_POSIX_ACL_ACCESS;
if (acl) {
- error = posix_acl_equiv_mode(acl, &inode->i_mode);
- if (error < 0)
+ error = posix_acl_update_mode(inode, &inode->i_mode, &acl);
+ if (error)
return error;
- else {
- inode->i_ctime = ext4_current_time(inode);
- ext4_mark_inode_dirty(handle, inode);
- if (error == 0)
- acl = NULL;
- }
+ inode->i_ctime = ext4_current_time(inode);
+ ext4_mark_inode_dirty(handle, inode);
}
break;
diff --git a/fs/ext4/dir.c b/fs/ext4/dir.c
index 67415e0e6af0..e8b365000d73 100644
--- a/fs/ext4/dir.c
+++ b/fs/ext4/dir.c
@@ -260,11 +260,12 @@ static int ext4_readdir(struct file *file, struct dir_context *ctx)
/* Directory is encrypted */
err = fscrypt_fname_disk_to_usr(inode,
0, 0, &de_name, &fstr);
+ de_name = fstr;
fstr.len = save_len;
- if (err < 0)
+ if (err)
goto errout;
if (!dir_emit(ctx,
- fstr.name, err,
+ de_name.name, de_name.len,
le32_to_cpu(de->inode),
get_dtype(sb, de->file_type)))
goto done;
@@ -627,7 +628,7 @@ int ext4_check_all_de(struct inode *dir, struct buffer_head *bh, void *buf,
int buf_size)
{
struct ext4_dir_entry_2 *de;
- int nlen, rlen;
+ int rlen;
unsigned int offset = 0;
char *top;
@@ -637,7 +638,6 @@ int ext4_check_all_de(struct inode *dir, struct buffer_head *bh, void *buf,
if (ext4_check_dir_entry(dir, NULL, de, bh,
buf, buf_size, offset))
return -EFSCORRUPTED;
- nlen = EXT4_DIR_REC_LEN(de->name_len);
rlen = ext4_rec_len_from_disk(de->rec_len, buf_size);
de = (struct ext4_dir_entry_2 *)((char *)de + rlen);
offset += rlen;
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index ea31931386ec..282a51b07c57 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -262,6 +262,9 @@ struct ext4_io_submit {
(s)->s_first_ino)
#endif
#define EXT4_BLOCK_ALIGN(size, blkbits) ALIGN((size), (1 << (blkbits)))
+#define EXT4_MAX_BLOCKS(size, offset, blkbits) \
+ ((EXT4_BLOCK_ALIGN(size + offset, blkbits) >> blkbits) - (offset >> \
+ blkbits))
/* Translate a block number to a cluster number */
#define EXT4_B2C(sbi, blk) ((blk) >> (sbi)->s_cluster_bits)
@@ -1117,9 +1120,15 @@ struct ext4_inode_info {
#define EXT4_MOUNT_POSIX_ACL 0x08000 /* POSIX Access Control Lists */
#define EXT4_MOUNT_NO_AUTO_DA_ALLOC 0x10000 /* No auto delalloc mapping */
#define EXT4_MOUNT_BARRIER 0x20000 /* Use block barriers */
-#define EXT4_MOUNT_QUOTA 0x80000 /* Some quota option set */
-#define EXT4_MOUNT_USRQUOTA 0x100000 /* "old" user quota */
-#define EXT4_MOUNT_GRPQUOTA 0x200000 /* "old" group quota */
+#define EXT4_MOUNT_QUOTA 0x40000 /* Some quota option set */
+#define EXT4_MOUNT_USRQUOTA 0x80000 /* "old" user quota,
+ * enable enforcement for hidden
+ * quota files */
+#define EXT4_MOUNT_GRPQUOTA 0x100000 /* "old" group quota, enable
+ * enforcement for hidden quota
+ * files */
+#define EXT4_MOUNT_PRJQUOTA 0x200000 /* Enable project quota
+ * enforcement */
#define EXT4_MOUNT_DIOREAD_NOLOCK 0x400000 /* Enable support for dio read nolocking */
#define EXT4_MOUNT_JOURNAL_CHECKSUM 0x800000 /* Journal checksums */
#define EXT4_MOUNT_JOURNAL_ASYNC_COMMIT 0x1000000 /* Journal Async Commit */
@@ -1636,26 +1645,6 @@ static inline void ext4_clear_state_flags(struct ext4_inode_info *ei)
* Feature set definitions
*/
-/* Use the ext4_{has,set,clear}_feature_* helpers; these will be removed */
-#define EXT4_HAS_COMPAT_FEATURE(sb,mask) \
- ((EXT4_SB(sb)->s_es->s_feature_compat & cpu_to_le32(mask)) != 0)
-#define EXT4_HAS_RO_COMPAT_FEATURE(sb,mask) \
- ((EXT4_SB(sb)->s_es->s_feature_ro_compat & cpu_to_le32(mask)) != 0)
-#define EXT4_HAS_INCOMPAT_FEATURE(sb,mask) \
- ((EXT4_SB(sb)->s_es->s_feature_incompat & cpu_to_le32(mask)) != 0)
-#define EXT4_SET_COMPAT_FEATURE(sb,mask) \
- EXT4_SB(sb)->s_es->s_feature_compat |= cpu_to_le32(mask)
-#define EXT4_SET_RO_COMPAT_FEATURE(sb,mask) \
- EXT4_SB(sb)->s_es->s_feature_ro_compat |= cpu_to_le32(mask)
-#define EXT4_SET_INCOMPAT_FEATURE(sb,mask) \
- EXT4_SB(sb)->s_es->s_feature_incompat |= cpu_to_le32(mask)
-#define EXT4_CLEAR_COMPAT_FEATURE(sb,mask) \
- EXT4_SB(sb)->s_es->s_feature_compat &= ~cpu_to_le32(mask)
-#define EXT4_CLEAR_RO_COMPAT_FEATURE(sb,mask) \
- EXT4_SB(sb)->s_es->s_feature_ro_compat &= ~cpu_to_le32(mask)
-#define EXT4_CLEAR_INCOMPAT_FEATURE(sb,mask) \
- EXT4_SB(sb)->s_es->s_feature_incompat &= ~cpu_to_le32(mask)
-
#define EXT4_FEATURE_COMPAT_DIR_PREALLOC 0x0001
#define EXT4_FEATURE_COMPAT_IMAGIC_INODES 0x0002
#define EXT4_FEATURE_COMPAT_HAS_JOURNAL 0x0004
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index d7ccb7f51dfc..c930a0110fb4 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -4679,6 +4679,7 @@ static int ext4_alloc_file_blocks(struct file *file, ext4_lblk_t offset,
unsigned int credits;
loff_t epos;
+ BUG_ON(!ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS));
map.m_lblk = offset;
map.m_len = len;
/*
@@ -4693,13 +4694,7 @@ static int ext4_alloc_file_blocks(struct file *file, ext4_lblk_t offset,
* credits to insert 1 extent into extent tree
*/
credits = ext4_chunk_trans_blocks(inode, len);
- /*
- * We can only call ext_depth() on extent based inodes
- */
- if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))
- depth = ext_depth(inode);
- else
- depth = -1;
+ depth = ext_depth(inode);
retry:
while (ret >= 0 && len) {
@@ -4966,13 +4961,8 @@ long ext4_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
trace_ext4_fallocate_enter(inode, offset, len, mode);
lblk = offset >> blkbits;
- /*
- * We can't just convert len to max_blocks because
- * If blocksize = 4096 offset = 3072 and len = 2048
- */
- max_blocks = (EXT4_BLOCK_ALIGN(len + offset, blkbits) >> blkbits)
- - lblk;
+ max_blocks = EXT4_MAX_BLOCKS(len, offset, blkbits);
flags = EXT4_GET_BLOCKS_CREATE_UNWRIT_EXT;
if (mode & FALLOC_FL_KEEP_SIZE)
flags |= EXT4_GET_BLOCKS_KEEP_SIZE;
@@ -5035,12 +5025,8 @@ int ext4_convert_unwritten_extents(handle_t *handle, struct inode *inode,
unsigned int credits, blkbits = inode->i_blkbits;
map.m_lblk = offset >> blkbits;
- /*
- * We can't just convert len to max_blocks because
- * If blocksize = 4096 offset = 3072 and len = 2048
- */
- max_blocks = ((EXT4_BLOCK_ALIGN(len + offset, blkbits) >> blkbits) -
- map.m_lblk);
+ max_blocks = EXT4_MAX_BLOCKS(len, offset, blkbits);
+
/*
* This is somewhat ugly but the idea is clear: When transaction is
* reserved, everything goes into it. Otherwise we rather start several
@@ -5734,6 +5720,9 @@ int ext4_insert_range(struct inode *inode, loff_t offset, loff_t len)
up_write(&EXT4_I(inode)->i_data_sem);
goto out_stop;
}
+ } else {
+ ext4_ext_drop_refs(path);
+ kfree(path);
}
ret = ext4_es_remove_extent(inode, offset_lblk,
diff --git a/fs/ext4/file.c b/fs/ext4/file.c
index 93072dbcb3ac..2a822d30e73f 100644
--- a/fs/ext4/file.c
+++ b/fs/ext4/file.c
@@ -91,9 +91,7 @@ ext4_unaligned_aio(struct inode *inode, struct iov_iter *from, loff_t pos)
static ssize_t
ext4_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
{
- struct file *file = iocb->ki_filp;
struct inode *inode = file_inode(iocb->ki_filp);
- struct blk_plug plug;
int o_direct = iocb->ki_flags & IOCB_DIRECT;
int unaligned_aio = 0;
int overwrite = 0;
@@ -134,18 +132,16 @@ ext4_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
if (o_direct) {
size_t length = iov_iter_count(from);
loff_t pos = iocb->ki_pos;
- blk_start_plug(&plug);
/* check whether we do a DIO overwrite or not */
if (ext4_should_dioread_nolock(inode) && !unaligned_aio &&
- !file->f_mapping->nrpages && pos + length <= i_size_read(inode)) {
+ pos + length <= i_size_read(inode)) {
struct ext4_map_blocks map;
unsigned int blkbits = inode->i_blkbits;
int err, len;
map.m_lblk = pos >> blkbits;
- map.m_len = (EXT4_BLOCK_ALIGN(pos + length, blkbits) >> blkbits)
- - map.m_lblk;
+ map.m_len = EXT4_MAX_BLOCKS(length, pos, blkbits);
len = map.m_len;
err = ext4_map_blocks(NULL, inode, &map, 0);
@@ -171,8 +167,6 @@ ext4_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
if (ret > 0)
ret = generic_write_sync(iocb, ret);
- if (o_direct)
- blk_finish_plug(&plug);
return ret;
@@ -703,6 +697,7 @@ const struct file_operations ext4_file_operations = {
.open = ext4_file_open,
.release = ext4_release_file,
.fsync = ext4_sync_file,
+ .get_unmapped_area = thp_get_unmapped_area,
.splice_read = generic_file_splice_read,
.splice_write = iter_file_splice_write,
.fallocate = ext4_fallocate,
diff --git a/fs/ext4/fsync.c b/fs/ext4/fsync.c
index 5c4372512ef7..88effb1053c7 100644
--- a/fs/ext4/fsync.c
+++ b/fs/ext4/fsync.c
@@ -61,6 +61,13 @@ static int ext4_sync_parent(struct inode *inode)
break;
iput(inode);
inode = next;
+ /*
+ * The directory inode may have gone through rmdir by now. But
+ * the inode itself and its blocks are still allocated (we hold
+ * a reference to the inode so it didn't go through
+ * ext4_evict_inode()) and so we are safe to flush metadata
+ * blocks and the inode.
+ */
ret = sync_mapping_buffers(inode->i_mapping);
if (ret)
break;
@@ -107,7 +114,7 @@ int ext4_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
if (!journal) {
ret = __generic_file_fsync(file, start, end, datasync);
- if (!ret && !hlist_empty(&inode->i_dentry))
+ if (!ret)
ret = ext4_sync_parent(inode);
if (test_opt(inode->i_sb, BARRIER))
goto issue_flush;
diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c
index 9e66cd1d7b78..170421edfdfe 100644
--- a/fs/ext4/ialloc.c
+++ b/fs/ext4/ialloc.c
@@ -802,7 +802,7 @@ struct inode *__ext4_new_inode(handle_t *handle, struct inode *dir,
} else
inode_init_owner(inode, dir, mode);
- if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_PROJECT) &&
+ if (ext4_has_feature_project(sb) &&
ext4_test_inode_flag(dir, EXT4_INODE_PROJINHERIT))
ei->i_projid = EXT4_I(dir)->i_projid;
else
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index c6ea25a190f8..9c064727ed62 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -647,11 +647,19 @@ found:
/*
* We have to zeroout blocks before inserting them into extent
* status tree. Otherwise someone could look them up there and
- * use them before they are really zeroed.
+ * use them before they are really zeroed. We also have to
+ * unmap metadata before zeroing as otherwise writeback can
+ * overwrite zeros with stale data from block device.
*/
if (flags & EXT4_GET_BLOCKS_ZERO &&
map->m_flags & EXT4_MAP_MAPPED &&
map->m_flags & EXT4_MAP_NEW) {
+ ext4_lblk_t i;
+
+ for (i = 0; i < map->m_len; i++) {
+ unmap_underlying_metadata(inode->i_sb->s_bdev,
+ map->m_pblk + i);
+ }
ret = ext4_issue_zeroout(inode, map->m_lblk,
map->m_pblk, map->m_len);
if (ret) {
@@ -1649,6 +1657,8 @@ static void mpage_release_unused_pages(struct mpage_da_data *mpd,
BUG_ON(!PageLocked(page));
BUG_ON(PageWriteback(page));
if (invalidate) {
+ if (page_mapped(page))
+ clear_page_dirty_for_io(page);
block_invalidatepage(page, 0, PAGE_SIZE);
ClearPageUptodate(page);
}
@@ -3526,35 +3536,31 @@ out:
static ssize_t ext4_direct_IO_read(struct kiocb *iocb, struct iov_iter *iter)
{
- int unlocked = 0;
- struct inode *inode = iocb->ki_filp->f_mapping->host;
+ struct address_space *mapping = iocb->ki_filp->f_mapping;
+ struct inode *inode = mapping->host;
ssize_t ret;
- if (ext4_should_dioread_nolock(inode)) {
- /*
- * Nolock dioread optimization may be dynamically disabled
- * via ext4_inode_block_unlocked_dio(). Check inode's state
- * while holding extra i_dio_count ref.
- */
- inode_dio_begin(inode);
- smp_mb();
- if (unlikely(ext4_test_inode_state(inode,
- EXT4_STATE_DIOREAD_LOCK)))
- inode_dio_end(inode);
- else
- unlocked = 1;
- }
+ /*
+ * Shared inode_lock is enough for us - it protects against concurrent
+ * writes & truncates and since we take care of writing back page cache,
+ * we are protected against page writeback as well.
+ */
+ inode_lock_shared(inode);
if (IS_DAX(inode)) {
- ret = dax_do_io(iocb, inode, iter, ext4_dio_get_block,
- NULL, unlocked ? 0 : DIO_LOCKING);
+ ret = dax_do_io(iocb, inode, iter, ext4_dio_get_block, NULL, 0);
} else {
+ size_t count = iov_iter_count(iter);
+
+ ret = filemap_write_and_wait_range(mapping, iocb->ki_pos,
+ iocb->ki_pos + count);
+ if (ret)
+ goto out_unlock;
ret = __blockdev_direct_IO(iocb, inode, inode->i_sb->s_bdev,
iter, ext4_dio_get_block,
- NULL, NULL,
- unlocked ? 0 : DIO_LOCKING);
+ NULL, NULL, 0);
}
- if (unlocked)
- inode_dio_end(inode);
+out_unlock:
+ inode_unlock_shared(inode);
return ret;
}
@@ -3890,7 +3896,7 @@ int ext4_update_disksize_before_punch(struct inode *inode, loff_t offset,
}
/*
- * ext4_punch_hole: punches a hole in a file by releaseing the blocks
+ * ext4_punch_hole: punches a hole in a file by releasing the blocks
* associated with the given offset and length
*
* @inode: File inode
@@ -3919,7 +3925,7 @@ int ext4_punch_hole(struct inode *inode, loff_t offset, loff_t length)
* Write out all dirty pages to avoid race conditions
* Then release them.
*/
- if (mapping->nrpages && mapping_tagged(mapping, PAGECACHE_TAG_DIRTY)) {
+ if (mapping_tagged(mapping, PAGECACHE_TAG_DIRTY)) {
ret = filemap_write_and_wait_range(mapping, offset,
offset + length - 1);
if (ret)
@@ -4414,7 +4420,7 @@ static inline void ext4_iget_extra_inode(struct inode *inode,
int ext4_get_projid(struct inode *inode, kprojid_t *projid)
{
- if (!EXT4_HAS_RO_COMPAT_FEATURE(inode->i_sb, EXT4_FEATURE_RO_COMPAT_PROJECT))
+ if (!ext4_has_feature_project(inode->i_sb))
return -EOPNOTSUPP;
*projid = EXT4_I(inode)->i_projid;
return 0;
@@ -4481,7 +4487,7 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
inode->i_mode = le16_to_cpu(raw_inode->i_mode);
i_uid = (uid_t)le16_to_cpu(raw_inode->i_uid_low);
i_gid = (gid_t)le16_to_cpu(raw_inode->i_gid_low);
- if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_PROJECT) &&
+ if (ext4_has_feature_project(sb) &&
EXT4_INODE_SIZE(sb) > EXT4_GOOD_OLD_INODE_SIZE &&
EXT4_FITS_IN_INODE(raw_inode, ei, i_projid))
i_projid = (projid_t)le32_to_cpu(raw_inode->i_projid);
@@ -4814,14 +4820,14 @@ static int ext4_do_update_inode(handle_t *handle,
* Fix up interoperability with old kernels. Otherwise, old inodes get
* re-used with the upper 16 bits of the uid/gid intact
*/
- if (!ei->i_dtime) {
+ if (ei->i_dtime && list_empty(&ei->i_orphan)) {
+ raw_inode->i_uid_high = 0;
+ raw_inode->i_gid_high = 0;
+ } else {
raw_inode->i_uid_high =
cpu_to_le16(high_16_bits(i_uid));
raw_inode->i_gid_high =
cpu_to_le16(high_16_bits(i_gid));
- } else {
- raw_inode->i_uid_high = 0;
- raw_inode->i_gid_high = 0;
}
} else {
raw_inode->i_uid_low = cpu_to_le16(fs_high2lowuid(i_uid));
@@ -4885,8 +4891,7 @@ static int ext4_do_update_inode(handle_t *handle,
}
}
- BUG_ON(!EXT4_HAS_RO_COMPAT_FEATURE(inode->i_sb,
- EXT4_FEATURE_RO_COMPAT_PROJECT) &&
+ BUG_ON(!ext4_has_feature_project(inode->i_sb) &&
i_projid != EXT4_DEF_PROJID);
if (EXT4_INODE_SIZE(inode->i_sb) > EXT4_GOOD_OLD_INODE_SIZE &&
@@ -5073,7 +5078,7 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr)
int orphan = 0;
const unsigned int ia_valid = attr->ia_valid;
- error = inode_change_ok(inode, attr);
+ error = setattr_prepare(dentry, attr);
if (error)
return error;
diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c
index 1bb7df5e4536..bf5ae8ebbc97 100644
--- a/fs/ext4/ioctl.c
+++ b/fs/ext4/ioctl.c
@@ -19,8 +19,6 @@
#include "ext4_jbd2.h"
#include "ext4.h"
-#define MAX_32_NUM ((((unsigned long long) 1) << 32) - 1)
-
/**
* Swap memory between @a and @b for @len bytes.
*
@@ -310,8 +308,7 @@ static int ext4_ioctl_setproject(struct file *filp, __u32 projid)
struct ext4_inode *raw_inode;
struct dquot *transfer_to[MAXQUOTAS] = { };
- if (!EXT4_HAS_RO_COMPAT_FEATURE(sb,
- EXT4_FEATURE_RO_COMPAT_PROJECT)) {
+ if (!ext4_has_feature_project(sb)) {
if (projid != EXT4_DEF_PROJID)
return -EOPNOTSUPP;
else
@@ -772,6 +769,9 @@ resizefs_out:
#ifdef CONFIG_EXT4_FS_ENCRYPTION
struct fscrypt_policy policy;
+ if (!ext4_has_feature_encrypt(sb))
+ return -EOPNOTSUPP;
+
if (copy_from_user(&policy,
(struct fscrypt_policy __user *)arg,
sizeof(policy)))
@@ -842,8 +842,7 @@ resizefs_out:
ext4_get_inode_flags(ei);
fa.fsx_xflags = ext4_iflags_to_xflags(ei->i_flags & EXT4_FL_USER_VISIBLE);
- if (EXT4_HAS_RO_COMPAT_FEATURE(inode->i_sb,
- EXT4_FEATURE_RO_COMPAT_PROJECT)) {
+ if (ext4_has_feature_project(inode->i_sb)) {
fa.fsx_projid = (__u32)from_kprojid(&init_user_ns,
EXT4_I(inode)->i_projid);
}
diff --git a/fs/ext4/move_extent.c b/fs/ext4/move_extent.c
index a920c5d29fac..6fc14def0c70 100644
--- a/fs/ext4/move_extent.c
+++ b/fs/ext4/move_extent.c
@@ -598,6 +598,13 @@ ext4_move_extents(struct file *o_filp, struct file *d_filp, __u64 orig_blk,
return -EOPNOTSUPP;
}
+ if (ext4_encrypted_inode(orig_inode) ||
+ ext4_encrypted_inode(donor_inode)) {
+ ext4_msg(orig_inode->i_sb, KERN_ERR,
+ "Online defrag not supported for encrypted files");
+ return -EOPNOTSUPP;
+ }
+
/* Protect orig and donor inodes against a truncate */
lock_two_nondirectories(orig_inode, donor_inode);
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
index ddc309e8471e..a73a9196b929 100644
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -639,7 +639,7 @@ static struct stats dx_show_leaf(struct inode *dir,
res = fscrypt_fname_alloc_buffer(
dir, len,
&fname_crypto_str);
- if (res < 0)
+ if (res)
printk(KERN_WARNING "Error "
"allocating crypto "
"buffer--skipping "
@@ -647,7 +647,7 @@ static struct stats dx_show_leaf(struct inode *dir,
res = fscrypt_fname_disk_to_usr(dir,
0, 0, &de_name,
&fname_crypto_str);
- if (res < 0) {
+ if (res) {
printk(KERN_WARNING "Error "
"converting filename "
"from disk to usr"
@@ -1011,7 +1011,7 @@ static int htree_dirblock_to_tree(struct file *dir_file,
err = fscrypt_fname_disk_to_usr(dir, hinfo->hash,
hinfo->minor_hash, &de_name,
&fname_crypto_str);
- if (err < 0) {
+ if (err) {
count = err;
goto errout;
}
@@ -2044,33 +2044,31 @@ static int make_indexed_dir(handle_t *handle, struct ext4_filename *fname,
frame->entries = entries;
frame->at = entries;
frame->bh = bh;
- bh = bh2;
retval = ext4_handle_dirty_dx_node(handle, dir, frame->bh);
if (retval)
goto out_frames;
- retval = ext4_handle_dirty_dirent_node(handle, dir, bh);
+ retval = ext4_handle_dirty_dirent_node(handle, dir, bh2);
if (retval)
goto out_frames;
- de = do_split(handle,dir, &bh, frame, &fname->hinfo);
+ de = do_split(handle,dir, &bh2, frame, &fname->hinfo);
if (IS_ERR(de)) {
retval = PTR_ERR(de);
goto out_frames;
}
- dx_release(frames);
- retval = add_dirent_to_buf(handle, fname, dir, inode, de, bh);
- brelse(bh);
- return retval;
+ retval = add_dirent_to_buf(handle, fname, dir, inode, de, bh2);
out_frames:
/*
* Even if the block split failed, we have to properly write
* out all the changes we did so far. Otherwise we can end up
* with corrupted filesystem.
*/
- ext4_mark_inode_dirty(handle, dir);
+ if (retval)
+ ext4_mark_inode_dirty(handle, dir);
dx_release(frames);
+ brelse(bh2);
return retval;
}
@@ -3144,7 +3142,7 @@ static int ext4_symlink(struct inode *dir,
istr.name = (const unsigned char *) symname;
istr.len = len;
err = fscrypt_fname_usr_to_disk(inode, &istr, &ostr);
- if (err < 0)
+ if (err)
goto err_drop_inode;
sd->len = cpu_to_le16(ostr.len);
disk_link.name = (char *) sd;
diff --git a/fs/ext4/page-io.c b/fs/ext4/page-io.c
index a6132a730967..b4cbee936cf8 100644
--- a/fs/ext4/page-io.c
+++ b/fs/ext4/page-io.c
@@ -405,14 +405,12 @@ int ext4_bio_write_page(struct ext4_io_submit *io,
{
struct page *data_page = NULL;
struct inode *inode = page->mapping->host;
- unsigned block_start, blocksize;
+ unsigned block_start;
struct buffer_head *bh, *head;
int ret = 0;
int nr_submitted = 0;
int nr_to_submit = 0;
- blocksize = 1 << inode->i_blkbits;
-
BUG_ON(!PageLocked(page));
BUG_ON(PageWriteback(page));
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 3ec8708989ca..6db81fbcbaa6 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -78,6 +78,8 @@ static int ext4_feature_set_ok(struct super_block *sb, int readonly);
static void ext4_destroy_lazyinit_thread(void);
static void ext4_unregister_li_request(struct super_block *sb);
static void ext4_clear_request_list(void);
+static struct inode *ext4_get_journal_inode(struct super_block *sb,
+ unsigned int journal_inum);
/*
* Lock ordering
@@ -1267,7 +1269,7 @@ enum {
Opt_usrjquota, Opt_grpjquota, Opt_offusrjquota, Opt_offgrpjquota,
Opt_jqfmt_vfsold, Opt_jqfmt_vfsv0, Opt_jqfmt_vfsv1, Opt_quota,
Opt_noquota, Opt_barrier, Opt_nobarrier, Opt_err,
- Opt_usrquota, Opt_grpquota, Opt_i_version, Opt_dax,
+ Opt_usrquota, Opt_grpquota, Opt_prjquota, Opt_i_version, Opt_dax,
Opt_stripe, Opt_delalloc, Opt_nodelalloc, Opt_mblk_io_submit,
Opt_lazytime, Opt_nolazytime,
Opt_nomblk_io_submit, Opt_block_validity, Opt_noblock_validity,
@@ -1327,6 +1329,7 @@ static const match_table_t tokens = {
{Opt_noquota, "noquota"},
{Opt_quota, "quota"},
{Opt_usrquota, "usrquota"},
+ {Opt_prjquota, "prjquota"},
{Opt_barrier, "barrier=%u"},
{Opt_barrier, "barrier"},
{Opt_nobarrier, "nobarrier"},
@@ -1546,8 +1549,11 @@ static const struct mount_opts {
MOPT_SET | MOPT_Q},
{Opt_grpquota, EXT4_MOUNT_QUOTA | EXT4_MOUNT_GRPQUOTA,
MOPT_SET | MOPT_Q},
+ {Opt_prjquota, EXT4_MOUNT_QUOTA | EXT4_MOUNT_PRJQUOTA,
+ MOPT_SET | MOPT_Q},
{Opt_noquota, (EXT4_MOUNT_QUOTA | EXT4_MOUNT_USRQUOTA |
- EXT4_MOUNT_GRPQUOTA), MOPT_CLEAR | MOPT_Q},
+ EXT4_MOUNT_GRPQUOTA | EXT4_MOUNT_PRJQUOTA),
+ MOPT_CLEAR | MOPT_Q},
{Opt_usrjquota, 0, MOPT_Q},
{Opt_grpjquota, 0, MOPT_Q},
{Opt_offusrjquota, 0, MOPT_Q},
@@ -1836,13 +1842,17 @@ static int parse_options(char *options, struct super_block *sb,
return 0;
}
#ifdef CONFIG_QUOTA
- if (ext4_has_feature_quota(sb) &&
- (test_opt(sb, USRQUOTA) || test_opt(sb, GRPQUOTA))) {
- ext4_msg(sb, KERN_INFO, "Quota feature enabled, usrquota and grpquota "
- "mount options ignored.");
- clear_opt(sb, USRQUOTA);
- clear_opt(sb, GRPQUOTA);
- } else if (sbi->s_qf_names[USRQUOTA] || sbi->s_qf_names[GRPQUOTA]) {
+ /*
+ * We do the test below only for project quotas. 'usrquota' and
+ * 'grpquota' mount options are allowed even without quota feature
+ * to support legacy quotas in quota files.
+ */
+ if (test_opt(sb, PRJQUOTA) && !ext4_has_feature_project(sb)) {
+ ext4_msg(sb, KERN_ERR, "Project quota feature not enabled. "
+ "Cannot enable project quota enforcement.");
+ return 0;
+ }
+ if (sbi->s_qf_names[USRQUOTA] || sbi->s_qf_names[GRPQUOTA]) {
if (test_opt(sb, USRQUOTA) && sbi->s_qf_names[USRQUOTA])
clear_opt(sb, USRQUOTA);
@@ -2741,7 +2751,6 @@ static int ext4_run_li_request(struct ext4_li_request *elr)
sb = elr->lr_super;
ngroups = EXT4_SB(sb)->s_groups_count;
- sb_start_write(sb);
for (group = elr->lr_next_group; group < ngroups; group++) {
gdp = ext4_get_group_desc(sb, group, NULL);
if (!gdp) {
@@ -2768,8 +2777,6 @@ static int ext4_run_li_request(struct ext4_li_request *elr)
elr->lr_next_sched = jiffies + elr->lr_timeout;
elr->lr_next_group = group + 1;
}
- sb_end_write(sb);
-
return ret;
}
@@ -2834,19 +2841,43 @@ cont_thread:
mutex_unlock(&eli->li_list_mtx);
goto exit_thread;
}
-
list_for_each_safe(pos, n, &eli->li_request_list) {
+ int err = 0;
+ int progress = 0;
elr = list_entry(pos, struct ext4_li_request,
lr_request);
- if (time_after_eq(jiffies, elr->lr_next_sched)) {
- if (ext4_run_li_request(elr) != 0) {
- /* error, remove the lazy_init job */
- ext4_remove_li_request(elr);
- continue;
+ if (time_before(jiffies, elr->lr_next_sched)) {
+ if (time_before(elr->lr_next_sched, next_wakeup))
+ next_wakeup = elr->lr_next_sched;
+ continue;
+ }
+ if (down_read_trylock(&elr->lr_super->s_umount)) {
+ if (sb_start_write_trylock(elr->lr_super)) {
+ progress = 1;
+ /*
+ * We hold sb->s_umount, sb can not
+ * be removed from the list, it is
+ * now safe to drop li_list_mtx
+ */
+ mutex_unlock(&eli->li_list_mtx);
+ err = ext4_run_li_request(elr);
+ sb_end_write(elr->lr_super);
+ mutex_lock(&eli->li_list_mtx);
+ n = pos->next;
}
+ up_read((&elr->lr_super->s_umount));
+ }
+ /* error, remove the lazy_init job */
+ if (err) {
+ ext4_remove_li_request(elr);
+ continue;
+ }
+ if (!progress) {
+ elr->lr_next_sched = jiffies +
+ (prandom_u32()
+ % (EXT4_DEF_LI_MAX_START_DELAY * HZ));
}
-
if (time_before(elr->lr_next_sched, next_wakeup))
next_wakeup = elr->lr_next_sched;
}
@@ -3179,6 +3210,8 @@ int ext4_calculate_overhead(struct super_block *sb)
{
struct ext4_sb_info *sbi = EXT4_SB(sb);
struct ext4_super_block *es = sbi->s_es;
+ struct inode *j_inode;
+ unsigned int j_blocks, j_inum = le32_to_cpu(es->s_journal_inum);
ext4_group_t i, ngroups = ext4_get_groups_count(sb);
ext4_fsblk_t overhead = 0;
char *buf = (char *) get_zeroed_page(GFP_NOFS);
@@ -3209,10 +3242,23 @@ int ext4_calculate_overhead(struct super_block *sb)
memset(buf, 0, PAGE_SIZE);
cond_resched();
}
- /* Add the internal journal blocks as well */
+
+ /*
+ * Add the internal journal blocks whether the journal has been
+ * loaded or not
+ */
if (sbi->s_journal && !sbi->journal_bdev)
overhead += EXT4_NUM_B2C(sbi, sbi->s_journal->j_maxlen);
-
+ else if (ext4_has_feature_journal(sb) && !sbi->s_journal) {
+ j_inode = ext4_get_journal_inode(sb, j_inum);
+ if (j_inode) {
+ j_blocks = j_inode->i_size >> sb->s_blocksize_bits;
+ overhead += EXT4_NUM_B2C(sbi, j_blocks);
+ iput(j_inode);
+ } else {
+ ext4_msg(sb, KERN_ERR, "can't get journal size");
+ }
+ }
sbi->s_overhead = overhead;
smp_wmb();
free_page((unsigned long) buf);
@@ -4208,18 +4254,16 @@ static void ext4_init_journal_params(struct super_block *sb, journal_t *journal)
write_unlock(&journal->j_state_lock);
}
-static journal_t *ext4_get_journal(struct super_block *sb,
- unsigned int journal_inum)
+static struct inode *ext4_get_journal_inode(struct super_block *sb,
+ unsigned int journal_inum)
{
struct inode *journal_inode;
- journal_t *journal;
-
- BUG_ON(!ext4_has_feature_journal(sb));
-
- /* First, test for the existence of a valid inode on disk. Bad
- * things happen if we iget() an unused inode, as the subsequent
- * iput() will try to delete it. */
+ /*
+ * Test for the existence of a valid inode on disk. Bad things
+ * happen if we iget() an unused inode, as the subsequent iput()
+ * will try to delete it.
+ */
journal_inode = ext4_iget(sb, journal_inum);
if (IS_ERR(journal_inode)) {
ext4_msg(sb, KERN_ERR, "no journal found");
@@ -4239,6 +4283,20 @@ static journal_t *ext4_get_journal(struct super_block *sb,
iput(journal_inode);
return NULL;
}
+ return journal_inode;
+}
+
+static journal_t *ext4_get_journal(struct super_block *sb,
+ unsigned int journal_inum)
+{
+ struct inode *journal_inode;
+ journal_t *journal;
+
+ BUG_ON(!ext4_has_feature_journal(sb));
+
+ journal_inode = ext4_get_journal_inode(sb, journal_inum);
+ if (!journal_inode)
+ return NULL;
journal = jbd2_journal_init_inode(journal_inode);
if (!journal) {
@@ -5250,12 +5308,18 @@ static int ext4_enable_quotas(struct super_block *sb)
le32_to_cpu(EXT4_SB(sb)->s_es->s_grp_quota_inum),
le32_to_cpu(EXT4_SB(sb)->s_es->s_prj_quota_inum)
};
+ bool quota_mopt[EXT4_MAXQUOTAS] = {
+ test_opt(sb, USRQUOTA),
+ test_opt(sb, GRPQUOTA),
+ test_opt(sb, PRJQUOTA),
+ };
sb_dqopt(sb)->flags |= DQUOT_QUOTA_SYS_FILE;
for (type = 0; type < EXT4_MAXQUOTAS; type++) {
if (qf_inums[type]) {
err = ext4_quota_enable(sb, type, QFMT_VFS_V1,
- DQUOT_USAGE_ENABLED);
+ DQUOT_USAGE_ENABLED |
+ (quota_mopt[type] ? DQUOT_LIMITS_ENABLED : 0));
if (err) {
ext4_warning(sb,
"Failed to enable quota tracking "
diff --git a/fs/ext4/symlink.c b/fs/ext4/symlink.c
index 128ea78b8958..557b3b0d668c 100644
--- a/fs/ext4/symlink.c
+++ b/fs/ext4/symlink.c
@@ -30,7 +30,6 @@ static const char *ext4_encrypted_get_link(struct dentry *dentry,
char *caddr, *paddr = NULL;
struct fscrypt_str cstr, pstr;
struct fscrypt_symlink_data *sd;
- loff_t size = min_t(loff_t, i_size_read(inode), PAGE_SIZE - 1);
int res;
u32 max_size = inode->i_sb->s_blocksize;
@@ -49,7 +48,6 @@ static const char *ext4_encrypted_get_link(struct dentry *dentry,
if (IS_ERR(cpage))
return ERR_CAST(cpage);
caddr = page_address(cpage);
- caddr[size] = 0;
}
/* Symlink is encrypted */
@@ -65,16 +63,14 @@ static const char *ext4_encrypted_get_link(struct dentry *dentry,
res = fscrypt_fname_alloc_buffer(inode, cstr.len, &pstr);
if (res)
goto errout;
+ paddr = pstr.name;
res = fscrypt_fname_disk_to_usr(inode, 0, 0, &cstr, &pstr);
- if (res < 0)
+ if (res)
goto errout;
- paddr = pstr.name;
-
/* Null-terminate the name */
- if (res <= pstr.len)
- paddr[res] = '\0';
+ paddr[pstr.len] = '\0';
if (cpage)
put_page(cpage);
set_delayed_call(done, kfree_link, paddr);
diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c
index 2eb935ca5d9e..c15d63389957 100644
--- a/fs/ext4/xattr.c
+++ b/fs/ext4/xattr.c
@@ -199,6 +199,8 @@ ext4_xattr_check_names(struct ext4_xattr_entry *entry, void *end,
}
while (!IS_LAST_ENTRY(entry)) {
+ if (entry->e_value_block != 0)
+ return -EFSCORRUPTED;
if (entry->e_value_size != 0 &&
(value_start + le16_to_cpu(entry->e_value_offs) <
(void *)e + sizeof(__u32) ||
@@ -641,7 +643,7 @@ static size_t ext4_xattr_free_space(struct ext4_xattr_entry *last,
size_t *min_offs, void *base, int *total)
{
for (; !IS_LAST_ENTRY(last); last = EXT4_XATTR_NEXT(last)) {
- if (!last->e_value_block && last->e_value_size) {
+ if (last->e_value_size) {
size_t offs = le16_to_cpu(last->e_value_offs);
if (offs < *min_offs)
*min_offs = offs;
@@ -661,7 +663,7 @@ ext4_xattr_set_entry(struct ext4_xattr_info *i, struct ext4_xattr_search *s)
/* Compute min_offs and last. */
last = s->first;
for (; !IS_LAST_ENTRY(last); last = EXT4_XATTR_NEXT(last)) {
- if (!last->e_value_block && last->e_value_size) {
+ if (last->e_value_size) {
size_t offs = le16_to_cpu(last->e_value_offs);
if (offs < min_offs)
min_offs = offs;
@@ -669,7 +671,7 @@ ext4_xattr_set_entry(struct ext4_xattr_info *i, struct ext4_xattr_search *s)
}
free = min_offs - ((void *)last - s->base) - sizeof(__u32);
if (!s->not_found) {
- if (!s->here->e_value_block && s->here->e_value_size) {
+ if (s->here->e_value_size) {
size_t size = le32_to_cpu(s->here->e_value_size);
free += EXT4_XATTR_SIZE(size);
}
@@ -691,7 +693,7 @@ ext4_xattr_set_entry(struct ext4_xattr_info *i, struct ext4_xattr_search *s)
s->here->e_name_len = name_len;
memcpy(s->here->e_name, i->name, name_len);
} else {
- if (!s->here->e_value_block && s->here->e_value_size) {
+ if (s->here->e_value_size) {
void *first_val = s->base + min_offs;
size_t offs = le16_to_cpu(s->here->e_value_offs);
void *val = s->base + offs;
@@ -725,8 +727,7 @@ ext4_xattr_set_entry(struct ext4_xattr_info *i, struct ext4_xattr_search *s)
last = s->first;
while (!IS_LAST_ENTRY(last)) {
size_t o = le16_to_cpu(last->e_value_offs);
- if (!last->e_value_block &&
- last->e_value_size && o < offs)
+ if (last->e_value_size && o < offs)
last->e_value_offs =
cpu_to_le16(o + size);
last = EXT4_XATTR_NEXT(last);
@@ -1318,18 +1319,19 @@ retry:
*/
static void ext4_xattr_shift_entries(struct ext4_xattr_entry *entry,
int value_offs_shift, void *to,
- void *from, size_t n, int blocksize)
+ void *from, size_t n)
{
struct ext4_xattr_entry *last = entry;
int new_offs;
+ /* We always shift xattr headers further thus offsets get lower */
+ BUG_ON(value_offs_shift > 0);
+
/* Adjust the value offsets of the entries */
for (; !IS_LAST_ENTRY(last); last = EXT4_XATTR_NEXT(last)) {
- if (!last->e_value_block && last->e_value_size) {
+ if (last->e_value_size) {
new_offs = le16_to_cpu(last->e_value_offs) +
value_offs_shift;
- BUG_ON(new_offs + le32_to_cpu(last->e_value_size)
- > blocksize);
last->e_value_offs = cpu_to_le16(new_offs);
}
}
@@ -1338,6 +1340,141 @@ static void ext4_xattr_shift_entries(struct ext4_xattr_entry *entry,
}
/*
+ * Move xattr pointed to by 'entry' from inode into external xattr block
+ */
+static int ext4_xattr_move_to_block(handle_t *handle, struct inode *inode,
+ struct ext4_inode *raw_inode,
+ struct ext4_xattr_entry *entry)
+{
+ struct ext4_xattr_ibody_find *is = NULL;
+ struct ext4_xattr_block_find *bs = NULL;
+ char *buffer = NULL, *b_entry_name = NULL;
+ size_t value_offs, value_size;
+ struct ext4_xattr_info i = {
+ .value = NULL,
+ .value_len = 0,
+ .name_index = entry->e_name_index,
+ };
+ struct ext4_xattr_ibody_header *header = IHDR(inode, raw_inode);
+ int error;
+
+ value_offs = le16_to_cpu(entry->e_value_offs);
+ value_size = le32_to_cpu(entry->e_value_size);
+
+ is = kzalloc(sizeof(struct ext4_xattr_ibody_find), GFP_NOFS);
+ bs = kzalloc(sizeof(struct ext4_xattr_block_find), GFP_NOFS);
+ buffer = kmalloc(value_size, GFP_NOFS);
+ b_entry_name = kmalloc(entry->e_name_len + 1, GFP_NOFS);
+ if (!is || !bs || !buffer || !b_entry_name) {
+ error = -ENOMEM;
+ goto out;
+ }
+
+ is->s.not_found = -ENODATA;
+ bs->s.not_found = -ENODATA;
+ is->iloc.bh = NULL;
+ bs->bh = NULL;
+
+ /* Save the entry name and the entry value */
+ memcpy(buffer, (void *)IFIRST(header) + value_offs, value_size);
+ memcpy(b_entry_name, entry->e_name, entry->e_name_len);
+ b_entry_name[entry->e_name_len] = '\0';
+ i.name = b_entry_name;
+
+ error = ext4_get_inode_loc(inode, &is->iloc);
+ if (error)
+ goto out;
+
+ error = ext4_xattr_ibody_find(inode, &i, is);
+ if (error)
+ goto out;
+
+ /* Remove the chosen entry from the inode */
+ error = ext4_xattr_ibody_set(handle, inode, &i, is);
+ if (error)
+ goto out;
+
+ i.name = b_entry_name;
+ i.value = buffer;
+ i.value_len = value_size;
+ error = ext4_xattr_block_find(inode, &i, bs);
+ if (error)
+ goto out;
+
+ /* Add entry which was removed from the inode into the block */
+ error = ext4_xattr_block_set(handle, inode, &i, bs);
+ if (error)
+ goto out;
+ error = 0;
+out:
+ kfree(b_entry_name);
+ kfree(buffer);
+ if (is)
+ brelse(is->iloc.bh);
+ kfree(is);
+ kfree(bs);
+
+ return error;
+}
+
+static int ext4_xattr_make_inode_space(handle_t *handle, struct inode *inode,
+ struct ext4_inode *raw_inode,
+ int isize_diff, size_t ifree,
+ size_t bfree, int *total_ino)
+{
+ struct ext4_xattr_ibody_header *header = IHDR(inode, raw_inode);
+ struct ext4_xattr_entry *small_entry;
+ struct ext4_xattr_entry *entry;
+ struct ext4_xattr_entry *last;
+ unsigned int entry_size; /* EA entry size */
+ unsigned int total_size; /* EA entry size + value size */
+ unsigned int min_total_size;
+ int error;
+
+ while (isize_diff > ifree) {
+ entry = NULL;
+ small_entry = NULL;
+ min_total_size = ~0U;
+ last = IFIRST(header);
+ /* Find the entry best suited to be pushed into EA block */
+ for (; !IS_LAST_ENTRY(last); last = EXT4_XATTR_NEXT(last)) {
+ total_size =
+ EXT4_XATTR_SIZE(le32_to_cpu(last->e_value_size)) +
+ EXT4_XATTR_LEN(last->e_name_len);
+ if (total_size <= bfree &&
+ total_size < min_total_size) {
+ if (total_size + ifree < isize_diff) {
+ small_entry = last;
+ } else {
+ entry = last;
+ min_total_size = total_size;
+ }
+ }
+ }
+
+ if (entry == NULL) {
+ if (small_entry == NULL)
+ return -ENOSPC;
+ entry = small_entry;
+ }
+
+ entry_size = EXT4_XATTR_LEN(entry->e_name_len);
+ total_size = entry_size +
+ EXT4_XATTR_SIZE(le32_to_cpu(entry->e_value_size));
+ error = ext4_xattr_move_to_block(handle, inode, raw_inode,
+ entry);
+ if (error)
+ return error;
+
+ *total_ino -= entry_size;
+ ifree += total_size;
+ bfree -= total_size;
+ }
+
+ return 0;
+}
+
+/*
* Expand an inode by new_extra_isize bytes when EAs are present.
* Returns 0 on success or negative error number on failure.
*/
@@ -1345,14 +1482,11 @@ int ext4_expand_extra_isize_ea(struct inode *inode, int new_extra_isize,
struct ext4_inode *raw_inode, handle_t *handle)
{
struct ext4_xattr_ibody_header *header;
- struct ext4_xattr_entry *entry, *last, *first;
struct buffer_head *bh = NULL;
- struct ext4_xattr_ibody_find *is = NULL;
- struct ext4_xattr_block_find *bs = NULL;
- char *buffer = NULL, *b_entry_name = NULL;
- size_t min_offs, free;
+ size_t min_offs;
+ size_t ifree, bfree;
int total_ino;
- void *base, *start, *end;
+ void *base, *end;
int error = 0, tried_min_extra_isize = 0;
int s_min_extra_isize = le16_to_cpu(EXT4_SB(inode->i_sb)->s_es->s_min_extra_isize);
int isize_diff; /* How much do we need to grow i_extra_isize */
@@ -1368,34 +1502,24 @@ retry:
goto out;
header = IHDR(inode, raw_inode);
- entry = IFIRST(header);
/*
* Check if enough free space is available in the inode to shift the
* entries ahead by new_extra_isize.
*/
- base = start = entry;
+ base = IFIRST(header);
end = (void *)raw_inode + EXT4_SB(inode->i_sb)->s_inode_size;
min_offs = end - base;
- last = entry;
total_ino = sizeof(struct ext4_xattr_ibody_header);
error = xattr_check_inode(inode, header, end);
if (error)
goto cleanup;
- free = ext4_xattr_free_space(last, &min_offs, base, &total_ino);
- if (free >= isize_diff) {
- entry = IFIRST(header);
- ext4_xattr_shift_entries(entry, EXT4_I(inode)->i_extra_isize
- - new_extra_isize, (void *)raw_inode +
- EXT4_GOOD_OLD_INODE_SIZE + new_extra_isize,
- (void *)header, total_ino,
- inode->i_sb->s_blocksize);
- EXT4_I(inode)->i_extra_isize = new_extra_isize;
- goto out;
- }
+ ifree = ext4_xattr_free_space(base, &min_offs, base, &total_ino);
+ if (ifree >= isize_diff)
+ goto shift;
/*
* Enough free space isn't available in the inode, check if
@@ -1413,146 +1537,44 @@ retry:
goto cleanup;
}
base = BHDR(bh);
- first = BFIRST(bh);
end = bh->b_data + bh->b_size;
min_offs = end - base;
- free = ext4_xattr_free_space(first, &min_offs, base, NULL);
- if (free < isize_diff) {
+ bfree = ext4_xattr_free_space(BFIRST(bh), &min_offs, base,
+ NULL);
+ if (bfree + ifree < isize_diff) {
if (!tried_min_extra_isize && s_min_extra_isize) {
tried_min_extra_isize++;
new_extra_isize = s_min_extra_isize;
brelse(bh);
goto retry;
}
- error = -1;
+ error = -ENOSPC;
goto cleanup;
}
} else {
- free = inode->i_sb->s_blocksize;
+ bfree = inode->i_sb->s_blocksize;
}
- while (isize_diff > 0) {
- size_t offs, size, entry_size;
- struct ext4_xattr_entry *small_entry = NULL;
- struct ext4_xattr_info i = {
- .value = NULL,
- .value_len = 0,
- };
- unsigned int total_size; /* EA entry size + value size */
- unsigned int shift_bytes; /* No. of bytes to shift EAs by? */
- unsigned int min_total_size = ~0U;
-
- is = kzalloc(sizeof(struct ext4_xattr_ibody_find), GFP_NOFS);
- bs = kzalloc(sizeof(struct ext4_xattr_block_find), GFP_NOFS);
- if (!is || !bs) {
- error = -ENOMEM;
- goto cleanup;
- }
-
- is->s.not_found = -ENODATA;
- bs->s.not_found = -ENODATA;
- is->iloc.bh = NULL;
- bs->bh = NULL;
-
- last = IFIRST(header);
- /* Find the entry best suited to be pushed into EA block */
- entry = NULL;
- for (; !IS_LAST_ENTRY(last); last = EXT4_XATTR_NEXT(last)) {
- total_size =
- EXT4_XATTR_SIZE(le32_to_cpu(last->e_value_size)) +
- EXT4_XATTR_LEN(last->e_name_len);
- if (total_size <= free && total_size < min_total_size) {
- if (total_size < isize_diff) {
- small_entry = last;
- } else {
- entry = last;
- min_total_size = total_size;
- }
- }
- }
-
- if (entry == NULL) {
- if (small_entry) {
- entry = small_entry;
- } else {
- if (!tried_min_extra_isize &&
- s_min_extra_isize) {
- tried_min_extra_isize++;
- new_extra_isize = s_min_extra_isize;
- kfree(is); is = NULL;
- kfree(bs); bs = NULL;
- brelse(bh);
- goto retry;
- }
- error = -1;
- goto cleanup;
- }
- }
- offs = le16_to_cpu(entry->e_value_offs);
- size = le32_to_cpu(entry->e_value_size);
- entry_size = EXT4_XATTR_LEN(entry->e_name_len);
- i.name_index = entry->e_name_index,
- buffer = kmalloc(EXT4_XATTR_SIZE(size), GFP_NOFS);
- b_entry_name = kmalloc(entry->e_name_len + 1, GFP_NOFS);
- if (!buffer || !b_entry_name) {
- error = -ENOMEM;
- goto cleanup;
+ error = ext4_xattr_make_inode_space(handle, inode, raw_inode,
+ isize_diff, ifree, bfree,
+ &total_ino);
+ if (error) {
+ if (error == -ENOSPC && !tried_min_extra_isize &&
+ s_min_extra_isize) {
+ tried_min_extra_isize++;
+ new_extra_isize = s_min_extra_isize;
+ brelse(bh);
+ goto retry;
}
- /* Save the entry name and the entry value */
- memcpy(buffer, (void *)IFIRST(header) + offs,
- EXT4_XATTR_SIZE(size));
- memcpy(b_entry_name, entry->e_name, entry->e_name_len);
- b_entry_name[entry->e_name_len] = '\0';
- i.name = b_entry_name;
-
- error = ext4_get_inode_loc(inode, &is->iloc);
- if (error)
- goto cleanup;
-
- error = ext4_xattr_ibody_find(inode, &i, is);
- if (error)
- goto cleanup;
-
- /* Remove the chosen entry from the inode */
- error = ext4_xattr_ibody_set(handle, inode, &i, is);
- if (error)
- goto cleanup;
- total_ino -= entry_size;
-
- entry = IFIRST(header);
- if (entry_size + EXT4_XATTR_SIZE(size) >= isize_diff)
- shift_bytes = isize_diff;
- else
- shift_bytes = entry_size + EXT4_XATTR_SIZE(size);
- /* Adjust the offsets and shift the remaining entries ahead */
- ext4_xattr_shift_entries(entry, -shift_bytes,
- (void *)raw_inode + EXT4_GOOD_OLD_INODE_SIZE +
- EXT4_I(inode)->i_extra_isize + shift_bytes,
- (void *)header, total_ino, inode->i_sb->s_blocksize);
-
- isize_diff -= shift_bytes;
- EXT4_I(inode)->i_extra_isize += shift_bytes;
- header = IHDR(inode, raw_inode);
-
- i.name = b_entry_name;
- i.value = buffer;
- i.value_len = size;
- error = ext4_xattr_block_find(inode, &i, bs);
- if (error)
- goto cleanup;
-
- /* Add entry which was removed from the inode into the block */
- error = ext4_xattr_block_set(handle, inode, &i, bs);
- if (error)
- goto cleanup;
- kfree(b_entry_name);
- kfree(buffer);
- b_entry_name = NULL;
- buffer = NULL;
- brelse(is->iloc.bh);
- kfree(is);
- kfree(bs);
+ goto cleanup;
}
+shift:
+ /* Adjust the offsets and shift the remaining entries ahead */
+ ext4_xattr_shift_entries(IFIRST(header), EXT4_I(inode)->i_extra_isize
+ - new_extra_isize, (void *)raw_inode +
+ EXT4_GOOD_OLD_INODE_SIZE + new_extra_isize,
+ (void *)header, total_ino);
+ EXT4_I(inode)->i_extra_isize = new_extra_isize;
brelse(bh);
out:
ext4_clear_inode_state(inode, EXT4_STATE_NO_EXPAND);
@@ -1560,12 +1582,6 @@ out:
return 0;
cleanup:
- kfree(b_entry_name);
- kfree(buffer);
- if (is)
- brelse(is->iloc.bh);
- kfree(is);
- kfree(bs);
brelse(bh);
/*
* We deliberately leave EXT4_STATE_NO_EXPAND set here since inode
@@ -1734,7 +1750,7 @@ static inline void ext4_xattr_hash_entry(struct ext4_xattr_header *header,
*name++;
}
- if (entry->e_value_block == 0 && entry->e_value_size != 0) {
+ if (entry->e_value_size != 0) {
__le32 *value = (__le32 *)((char *)header +
le16_to_cpu(entry->e_value_offs));
for (n = (le32_to_cpu(entry->e_value_size) +
diff --git a/fs/f2fs/acl.c b/fs/f2fs/acl.c
index 4dcc9e28dc5c..6fe23af509e1 100644
--- a/fs/f2fs/acl.c
+++ b/fs/f2fs/acl.c
@@ -109,14 +109,16 @@ fail:
return ERR_PTR(-EINVAL);
}
-static void *f2fs_acl_to_disk(const struct posix_acl *acl, size_t *size)
+static void *f2fs_acl_to_disk(struct f2fs_sb_info *sbi,
+ const struct posix_acl *acl, size_t *size)
{
struct f2fs_acl_header *f2fs_acl;
struct f2fs_acl_entry *entry;
int i;
- f2fs_acl = f2fs_kmalloc(sizeof(struct f2fs_acl_header) + acl->a_count *
- sizeof(struct f2fs_acl_entry), GFP_NOFS);
+ f2fs_acl = f2fs_kmalloc(sbi, sizeof(struct f2fs_acl_header) +
+ acl->a_count * sizeof(struct f2fs_acl_entry),
+ GFP_NOFS);
if (!f2fs_acl)
return ERR_PTR(-ENOMEM);
@@ -175,7 +177,7 @@ static struct posix_acl *__f2fs_get_acl(struct inode *inode, int type,
retval = f2fs_getxattr(inode, name_index, "", NULL, 0, dpage);
if (retval > 0) {
- value = f2fs_kmalloc(retval, GFP_F2FS_ZERO);
+ value = f2fs_kmalloc(F2FS_I_SB(inode), retval, GFP_F2FS_ZERO);
if (!value)
return ERR_PTR(-ENOMEM);
retval = f2fs_getxattr(inode, name_index, "", value,
@@ -210,12 +212,10 @@ static int __f2fs_set_acl(struct inode *inode, int type,
case ACL_TYPE_ACCESS:
name_index = F2FS_XATTR_INDEX_POSIX_ACL_ACCESS;
if (acl) {
- error = posix_acl_equiv_mode(acl, &inode->i_mode);
- if (error < 0)
+ error = posix_acl_update_mode(inode, &inode->i_mode, &acl);
+ if (error)
return error;
set_acl_inode(inode, inode->i_mode);
- if (error == 0)
- acl = NULL;
}
break;
@@ -230,7 +230,7 @@ static int __f2fs_set_acl(struct inode *inode, int type,
}
if (acl) {
- value = f2fs_acl_to_disk(acl, &size);
+ value = f2fs_acl_to_disk(F2FS_I_SB(inode), acl, &size);
if (IS_ERR(value)) {
clear_inode_flag(inode, FI_ACL_MODE);
return (int)PTR_ERR(value);
diff --git a/fs/f2fs/acl.h b/fs/f2fs/acl.h
index b2334d11dae8..2c685185c24d 100644
--- a/fs/f2fs/acl.h
+++ b/fs/f2fs/acl.h
@@ -41,7 +41,6 @@ extern int f2fs_set_acl(struct inode *, struct posix_acl *, int);
extern int f2fs_init_acl(struct inode *, struct inode *, struct page *,
struct page *);
#else
-#define f2fs_check_acl NULL
#define f2fs_get_acl NULL
#define f2fs_set_acl NULL
diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c
index f94d01e7d001..7e9b504bd8b2 100644
--- a/fs/f2fs/checkpoint.c
+++ b/fs/f2fs/checkpoint.c
@@ -28,7 +28,7 @@ struct kmem_cache *inode_entry_slab;
void f2fs_stop_checkpoint(struct f2fs_sb_info *sbi, bool end_io)
{
- set_ckpt_flags(sbi->ckpt, CP_ERROR_FLAG);
+ set_ckpt_flags(sbi, CP_ERROR_FLAG);
sbi->sb->s_flags |= MS_RDONLY;
if (!end_io)
f2fs_flush_merged_bios(sbi);
@@ -267,7 +267,6 @@ static int f2fs_write_meta_pages(struct address_space *mapping,
struct writeback_control *wbc)
{
struct f2fs_sb_info *sbi = F2FS_M_SB(mapping);
- struct blk_plug plug;
long diff, written;
/* collect a number of dirty meta pages and write together */
@@ -280,9 +279,7 @@ static int f2fs_write_meta_pages(struct address_space *mapping,
/* if mounting is failed, skip writing node pages */
mutex_lock(&sbi->cp_mutex);
diff = nr_pages_to_write(sbi, META, wbc);
- blk_start_plug(&plug);
written = sync_meta_pages(sbi, META, wbc->nr_to_write);
- blk_finish_plug(&plug);
mutex_unlock(&sbi->cp_mutex);
wbc->nr_to_write = max((long)0, wbc->nr_to_write - written - diff);
return 0;
@@ -388,6 +385,9 @@ const struct address_space_operations f2fs_meta_aops = {
.set_page_dirty = f2fs_set_meta_page_dirty,
.invalidatepage = f2fs_invalidate_page,
.releasepage = f2fs_release_page,
+#ifdef CONFIG_MIGRATION
+ .migratepage = f2fs_migrate_page,
+#endif
};
static void __add_ino_entry(struct f2fs_sb_info *sbi, nid_t ino, int type)
@@ -491,7 +491,7 @@ int acquire_orphan_inode(struct f2fs_sb_info *sbi)
spin_lock(&im->ino_lock);
#ifdef CONFIG_F2FS_FAULT_INJECTION
- if (time_to_inject(FAULT_ORPHAN)) {
+ if (time_to_inject(sbi, FAULT_ORPHAN)) {
spin_unlock(&im->ino_lock);
return -ENOSPC;
}
@@ -531,8 +531,20 @@ void remove_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino)
static int recover_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino)
{
struct inode *inode;
+ struct node_info ni;
+ int err = acquire_orphan_inode(sbi);
+
+ if (err) {
+ set_sbi_flag(sbi, SBI_NEED_FSCK);
+ f2fs_msg(sbi->sb, KERN_WARNING,
+ "%s: orphan failed (ino=%x), run fsck to fix.",
+ __func__, ino);
+ return err;
+ }
- inode = f2fs_iget(sbi->sb, ino);
+ __add_ino_entry(sbi, ino, ORPHAN_INO);
+
+ inode = f2fs_iget_retry(sbi->sb, ino);
if (IS_ERR(inode)) {
/*
* there should be a bug that we can't find the entry
@@ -546,6 +558,18 @@ static int recover_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino)
/* truncate all the data during iput */
iput(inode);
+
+ get_node_info(sbi, ino, &ni);
+
+ /* ENOMEM was fully retried in f2fs_evict_inode. */
+ if (ni.blk_addr != NULL_ADDR) {
+ set_sbi_flag(sbi, SBI_NEED_FSCK);
+ f2fs_msg(sbi->sb, KERN_WARNING,
+ "%s: orphan failed (ino=%x), run fsck to fix.",
+ __func__, ino);
+ return -EIO;
+ }
+ __remove_ino_entry(sbi, ino, ORPHAN_INO);
return 0;
}
@@ -554,7 +578,7 @@ int recover_orphan_inodes(struct f2fs_sb_info *sbi)
block_t start_blk, orphan_blocks, i, j;
int err;
- if (!is_set_ckpt_flags(F2FS_CKPT(sbi), CP_ORPHAN_PRESENT_FLAG))
+ if (!is_set_ckpt_flags(sbi, CP_ORPHAN_PRESENT_FLAG))
return 0;
start_blk = __start_cp_addr(sbi) + 1 + __cp_payload(sbi);
@@ -578,7 +602,7 @@ int recover_orphan_inodes(struct f2fs_sb_info *sbi)
f2fs_put_page(page, 1);
}
/* clear Orphan Flag */
- clear_ckpt_flags(F2FS_CKPT(sbi), CP_ORPHAN_PRESENT_FLAG);
+ clear_ckpt_flags(sbi, CP_ORPHAN_PRESENT_FLAG);
return 0;
}
@@ -639,45 +663,55 @@ static void write_orphan_inodes(struct f2fs_sb_info *sbi, block_t start_blk)
}
}
-static struct page *validate_checkpoint(struct f2fs_sb_info *sbi,
- block_t cp_addr, unsigned long long *version)
+static int get_checkpoint_version(struct f2fs_sb_info *sbi, block_t cp_addr,
+ struct f2fs_checkpoint **cp_block, struct page **cp_page,
+ unsigned long long *version)
{
- struct page *cp_page_1, *cp_page_2 = NULL;
unsigned long blk_size = sbi->blocksize;
- struct f2fs_checkpoint *cp_block;
- unsigned long long cur_version = 0, pre_version = 0;
- size_t crc_offset;
+ size_t crc_offset = 0;
__u32 crc = 0;
- /* Read the 1st cp block in this CP pack */
- cp_page_1 = get_meta_page(sbi, cp_addr);
+ *cp_page = get_meta_page(sbi, cp_addr);
+ *cp_block = (struct f2fs_checkpoint *)page_address(*cp_page);
- /* get the version number */
- cp_block = (struct f2fs_checkpoint *)page_address(cp_page_1);
- crc_offset = le32_to_cpu(cp_block->checksum_offset);
- if (crc_offset >= blk_size)
- goto invalid_cp1;
+ crc_offset = le32_to_cpu((*cp_block)->checksum_offset);
+ if (crc_offset >= blk_size) {
+ f2fs_msg(sbi->sb, KERN_WARNING,
+ "invalid crc_offset: %zu", crc_offset);
+ return -EINVAL;
+ }
- crc = le32_to_cpu(*((__le32 *)((unsigned char *)cp_block + crc_offset)));
- if (!f2fs_crc_valid(sbi, crc, cp_block, crc_offset))
- goto invalid_cp1;
+ crc = le32_to_cpu(*((__le32 *)((unsigned char *)*cp_block
+ + crc_offset)));
+ if (!f2fs_crc_valid(sbi, crc, *cp_block, crc_offset)) {
+ f2fs_msg(sbi->sb, KERN_WARNING, "invalid crc value");
+ return -EINVAL;
+ }
- pre_version = cur_cp_version(cp_block);
+ *version = cur_cp_version(*cp_block);
+ return 0;
+}
- /* Read the 2nd cp block in this CP pack */
- cp_addr += le32_to_cpu(cp_block->cp_pack_total_block_count) - 1;
- cp_page_2 = get_meta_page(sbi, cp_addr);
+static struct page *validate_checkpoint(struct f2fs_sb_info *sbi,
+ block_t cp_addr, unsigned long long *version)
+{
+ struct page *cp_page_1 = NULL, *cp_page_2 = NULL;
+ struct f2fs_checkpoint *cp_block = NULL;
+ unsigned long long cur_version = 0, pre_version = 0;
+ int err;
- cp_block = (struct f2fs_checkpoint *)page_address(cp_page_2);
- crc_offset = le32_to_cpu(cp_block->checksum_offset);
- if (crc_offset >= blk_size)
- goto invalid_cp2;
+ err = get_checkpoint_version(sbi, cp_addr, &cp_block,
+ &cp_page_1, version);
+ if (err)
+ goto invalid_cp1;
+ pre_version = *version;
- crc = le32_to_cpu(*((__le32 *)((unsigned char *)cp_block + crc_offset)));
- if (!f2fs_crc_valid(sbi, crc, cp_block, crc_offset))
+ cp_addr += le32_to_cpu(cp_block->cp_pack_total_block_count) - 1;
+ err = get_checkpoint_version(sbi, cp_addr, &cp_block,
+ &cp_page_2, version);
+ if (err)
goto invalid_cp2;
-
- cur_version = cur_cp_version(cp_block);
+ cur_version = *version;
if (cur_version == pre_version) {
*version = cur_version;
@@ -972,10 +1006,40 @@ static void wait_on_all_pages_writeback(struct f2fs_sb_info *sbi)
finish_wait(&sbi->cp_wait, &wait);
}
+static void update_ckpt_flags(struct f2fs_sb_info *sbi, struct cp_control *cpc)
+{
+ unsigned long orphan_num = sbi->im[ORPHAN_INO].ino_num;
+ struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
+
+ spin_lock(&sbi->cp_lock);
+
+ if (cpc->reason == CP_UMOUNT)
+ __set_ckpt_flags(ckpt, CP_UMOUNT_FLAG);
+ else
+ __clear_ckpt_flags(ckpt, CP_UMOUNT_FLAG);
+
+ if (cpc->reason == CP_FASTBOOT)
+ __set_ckpt_flags(ckpt, CP_FASTBOOT_FLAG);
+ else
+ __clear_ckpt_flags(ckpt, CP_FASTBOOT_FLAG);
+
+ if (orphan_num)
+ __set_ckpt_flags(ckpt, CP_ORPHAN_PRESENT_FLAG);
+ else
+ __clear_ckpt_flags(ckpt, CP_ORPHAN_PRESENT_FLAG);
+
+ if (is_sbi_flag_set(sbi, SBI_NEED_FSCK))
+ __set_ckpt_flags(ckpt, CP_FSCK_FLAG);
+
+ /* set this flag to activate crc|cp_ver for recovery */
+ __set_ckpt_flags(ckpt, CP_CRC_RECOVERY_FLAG);
+
+ spin_unlock(&sbi->cp_lock);
+}
+
static int do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
{
struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
- struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_WARM_NODE);
struct f2fs_nm_info *nm_i = NM_I(sbi);
unsigned long orphan_num = sbi->im[ORPHAN_INO].ino_num;
nid_t last_nid = nm_i->next_scan_nid;
@@ -984,19 +1048,10 @@ static int do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
__u32 crc32 = 0;
int i;
int cp_payload_blks = __cp_payload(sbi);
- block_t discard_blk = NEXT_FREE_BLKADDR(sbi, curseg);
- bool invalidate = false;
struct super_block *sb = sbi->sb;
struct curseg_info *seg_i = CURSEG_I(sbi, CURSEG_HOT_NODE);
u64 kbytes_written;
- /*
- * This avoids to conduct wrong roll-forward operations and uses
- * metapages, so should be called prior to sync_meta_pages below.
- */
- if (!test_opt(sbi, LFS) && discard_next_dnode(sbi, discard_blk))
- invalidate = true;
-
/* Flush all the NAT/SIT pages */
while (get_pages(sbi, F2FS_DIRTY_META)) {
sync_meta_pages(sbi, META, LONG_MAX);
@@ -1036,10 +1091,12 @@ static int do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
/* 2 cp + n data seg summary + orphan inode blocks */
data_sum_blocks = npages_for_summary_flush(sbi, false);
+ spin_lock(&sbi->cp_lock);
if (data_sum_blocks < NR_CURSEG_DATA_TYPE)
- set_ckpt_flags(ckpt, CP_COMPACT_SUM_FLAG);
+ __set_ckpt_flags(ckpt, CP_COMPACT_SUM_FLAG);
else
- clear_ckpt_flags(ckpt, CP_COMPACT_SUM_FLAG);
+ __clear_ckpt_flags(ckpt, CP_COMPACT_SUM_FLAG);
+ spin_unlock(&sbi->cp_lock);
orphan_blocks = GET_ORPHAN_BLOCKS(orphan_num);
ckpt->cp_pack_start_sum = cpu_to_le32(1 + cp_payload_blks +
@@ -1054,23 +1111,8 @@ static int do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
cp_payload_blks + data_sum_blocks +
orphan_blocks);
- if (cpc->reason == CP_UMOUNT)
- set_ckpt_flags(ckpt, CP_UMOUNT_FLAG);
- else
- clear_ckpt_flags(ckpt, CP_UMOUNT_FLAG);
-
- if (cpc->reason == CP_FASTBOOT)
- set_ckpt_flags(ckpt, CP_FASTBOOT_FLAG);
- else
- clear_ckpt_flags(ckpt, CP_FASTBOOT_FLAG);
-
- if (orphan_num)
- set_ckpt_flags(ckpt, CP_ORPHAN_PRESENT_FLAG);
- else
- clear_ckpt_flags(ckpt, CP_ORPHAN_PRESENT_FLAG);
-
- if (is_sbi_flag_set(sbi, SBI_NEED_FSCK))
- set_ckpt_flags(ckpt, CP_FSCK_FLAG);
+ /* update ckpt flag for checkpoint */
+ update_ckpt_flags(sbi, cpc);
/* update SIT/NAT bitmap */
get_sit_bitmap(sbi, __bitmap_ptr(sbi, SIT_BITMAP));
@@ -1137,14 +1179,6 @@ static int do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
/* wait for previous submitted meta pages writeback */
wait_on_all_pages_writeback(sbi);
- /*
- * invalidate meta page which is used temporarily for zeroing out
- * block at the end of warm node chain.
- */
- if (invalidate)
- invalidate_mapping_pages(META_MAPPING(sbi), discard_blk,
- discard_blk);
-
release_ino_entry(sbi, false);
if (unlikely(f2fs_cp_error(sbi)))
@@ -1152,6 +1186,17 @@ static int do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
clear_prefree_segments(sbi, cpc);
clear_sbi_flag(sbi, SBI_IS_DIRTY);
+ clear_sbi_flag(sbi, SBI_NEED_CP);
+
+ /*
+ * redirty superblock if metadata like node page or inode cache is
+ * updated during writing checkpoint.
+ */
+ if (get_pages(sbi, F2FS_DIRTY_NODES) ||
+ get_pages(sbi, F2FS_DIRTY_IMETA))
+ set_sbi_flag(sbi, SBI_IS_DIRTY);
+
+ f2fs_bug_on(sbi, get_pages(sbi, F2FS_DIRTY_DENTS));
return 0;
}
@@ -1190,6 +1235,18 @@ int write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
f2fs_flush_merged_bios(sbi);
+ /* this is the case of multiple fstrims without any changes */
+ if (cpc->reason == CP_DISCARD && !is_sbi_flag_set(sbi, SBI_IS_DIRTY)) {
+ f2fs_bug_on(sbi, NM_I(sbi)->dirty_nat_cnt);
+ f2fs_bug_on(sbi, SIT_I(sbi)->dirty_sentries);
+ f2fs_bug_on(sbi, prefree_segments(sbi));
+ flush_sit_entries(sbi, cpc);
+ clear_prefree_segments(sbi, cpc);
+ f2fs_wait_all_discard_bio(sbi);
+ unblock_operations(sbi);
+ goto out;
+ }
+
/*
* update checkpoint pack index
* Increase the version number so that
@@ -1205,6 +1262,8 @@ int write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
/* unlock all the fs_lock[] in do_checkpoint() */
err = do_checkpoint(sbi, cpc);
+ f2fs_wait_all_discard_bio(sbi);
+
unblock_operations(sbi);
stat_inc_cp_count(sbi->stat_info);
diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index ccb401eebc11..0d0177c9149c 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -34,6 +34,11 @@ static void f2fs_read_end_io(struct bio *bio)
struct bio_vec *bvec;
int i;
+#ifdef CONFIG_F2FS_FAULT_INJECTION
+ if (time_to_inject(F2FS_P_SB(bio->bi_io_vec->bv_page), FAULT_IO))
+ bio->bi_error = -EIO;
+#endif
+
if (f2fs_bio_encrypted(bio)) {
if (bio->bi_error) {
fscrypt_release_ctx(bio->bi_private);
@@ -626,11 +631,13 @@ ssize_t f2fs_preallocate_blocks(struct kiocb *iocb, struct iov_iter *from)
ssize_t ret = 0;
map.m_lblk = F2FS_BLK_ALIGN(iocb->ki_pos);
- map.m_len = F2FS_BYTES_TO_BLK(iov_iter_count(from));
- map.m_next_pgofs = NULL;
+ map.m_len = F2FS_BYTES_TO_BLK(iocb->ki_pos + iov_iter_count(from));
+ if (map.m_len > map.m_lblk)
+ map.m_len -= map.m_lblk;
+ else
+ map.m_len = 0;
- if (f2fs_encrypted_inode(inode))
- return 0;
+ map.m_next_pgofs = NULL;
if (iocb->ki_flags & IOCB_DIRECT) {
ret = f2fs_convert_inline_inode(inode);
@@ -672,6 +679,9 @@ int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map,
bool allocated = false;
block_t blkaddr;
+ if (!maxblocks)
+ return 0;
+
map->m_len = 0;
map->m_flags = 0;
@@ -783,6 +793,7 @@ skip:
err = reserve_new_blocks(&dn, prealloc);
if (err)
goto sync_out;
+ allocated = dn.node_changed;
map->m_len += dn.ofs_in_node - ofs_in_node;
if (prealloc && dn.ofs_in_node != last_ofs_in_node + 1) {
@@ -966,8 +977,8 @@ out:
return ret;
}
-struct bio *f2fs_grab_bio(struct inode *inode, block_t blkaddr,
- unsigned nr_pages)
+static struct bio *f2fs_grab_bio(struct inode *inode, block_t blkaddr,
+ unsigned nr_pages)
{
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
struct fscrypt_ctx *ctx = NULL;
@@ -1284,7 +1295,7 @@ write:
if (!wbc->for_reclaim)
need_balance_fs = true;
- else if (has_not_enough_free_secs(sbi, 0))
+ else if (has_not_enough_free_secs(sbi, 0, 0))
goto redirty_out;
err = -EAGAIN;
@@ -1344,6 +1355,7 @@ static int f2fs_write_cache_pages(struct address_space *mapping,
int cycled;
int range_whole = 0;
int tag;
+ int nwritten = 0;
pagevec_init(&pvec, 0);
@@ -1418,6 +1430,8 @@ continue_unlock:
done_index = page->index + 1;
done = 1;
break;
+ } else {
+ nwritten++;
}
if (--wbc->nr_to_write <= 0 &&
@@ -1439,6 +1453,10 @@ continue_unlock:
if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0))
mapping->writeback_index = done_index;
+ if (nwritten)
+ f2fs_submit_merged_bio_cond(F2FS_M_SB(mapping), mapping->host,
+ NULL, 0, DATA, WRITE);
+
return ret;
}
@@ -1480,7 +1498,6 @@ static int f2fs_write_data_pages(struct address_space *mapping,
* if some pages were truncated, we cannot guarantee its mapping->host
* to detect pending bios.
*/
- f2fs_submit_merged_bio(sbi, DATA, WRITE);
remove_dirty_inode(inode);
return ret;
@@ -1518,8 +1535,7 @@ static int prepare_write_begin(struct f2fs_sb_info *sbi,
* we already allocated all the blocks, so we don't need to get
* the block addresses when there is no need to fill the page.
*/
- if (!f2fs_has_inline_data(inode) && !f2fs_encrypted_inode(inode) &&
- len == PAGE_SIZE)
+ if (!f2fs_has_inline_data(inode) && len == PAGE_SIZE)
return 0;
if (f2fs_has_inline_data(inode) ||
@@ -1616,7 +1632,7 @@ repeat:
if (err)
goto fail;
- if (need_balance && has_not_enough_free_secs(sbi, 0)) {
+ if (need_balance && has_not_enough_free_secs(sbi, 0, 0)) {
unlock_page(page);
f2fs_balance_fs(sbi, true);
lock_page(page);
@@ -1633,22 +1649,12 @@ repeat:
if (f2fs_encrypted_inode(inode) && S_ISREG(inode->i_mode))
f2fs_wait_on_encrypted_page_writeback(sbi, blkaddr);
- if (len == PAGE_SIZE)
- goto out_update;
- if (PageUptodate(page))
- goto out_clear;
-
- if ((pos & PAGE_MASK) >= i_size_read(inode)) {
- unsigned start = pos & (PAGE_SIZE - 1);
- unsigned end = start + len;
-
- /* Reading beyond i_size is simple: memset to zero */
- zero_user_segments(page, 0, start, end, PAGE_SIZE);
- goto out_update;
- }
+ if (len == PAGE_SIZE || PageUptodate(page))
+ return 0;
if (blkaddr == NEW_ADDR) {
zero_user_segment(page, 0, PAGE_SIZE);
+ SetPageUptodate(page);
} else {
struct bio *bio;
@@ -1676,11 +1682,6 @@ repeat:
goto fail;
}
}
-out_update:
- if (!PageUptodate(page))
- SetPageUptodate(page);
-out_clear:
- clear_cold_data(page);
return 0;
fail:
@@ -1698,11 +1699,26 @@ static int f2fs_write_end(struct file *file,
trace_f2fs_write_end(inode, pos, len, copied);
+ /*
+ * This should be come from len == PAGE_SIZE, and we expect copied
+ * should be PAGE_SIZE. Otherwise, we treat it with zero copied and
+ * let generic_perform_write() try to copy data again through copied=0.
+ */
+ if (!PageUptodate(page)) {
+ if (unlikely(copied != PAGE_SIZE))
+ copied = 0;
+ else
+ SetPageUptodate(page);
+ }
+ if (!copied)
+ goto unlock_out;
+
set_page_dirty(page);
+ clear_cold_data(page);
if (pos + copied > i_size_read(inode))
f2fs_i_size_write(inode, pos + copied);
-
+unlock_out:
f2fs_put_page(page, 1);
f2fs_update_time(F2FS_I_SB(inode), REQ_TIME);
return copied;
@@ -1873,6 +1889,58 @@ static sector_t f2fs_bmap(struct address_space *mapping, sector_t block)
return generic_block_bmap(mapping, block, get_data_block_bmap);
}
+#ifdef CONFIG_MIGRATION
+#include <linux/migrate.h>
+
+int f2fs_migrate_page(struct address_space *mapping,
+ struct page *newpage, struct page *page, enum migrate_mode mode)
+{
+ int rc, extra_count;
+ struct f2fs_inode_info *fi = F2FS_I(mapping->host);
+ bool atomic_written = IS_ATOMIC_WRITTEN_PAGE(page);
+
+ BUG_ON(PageWriteback(page));
+
+ /* migrating an atomic written page is safe with the inmem_lock hold */
+ if (atomic_written && !mutex_trylock(&fi->inmem_lock))
+ return -EAGAIN;
+
+ /*
+ * A reference is expected if PagePrivate set when move mapping,
+ * however F2FS breaks this for maintaining dirty page counts when
+ * truncating pages. So here adjusting the 'extra_count' make it work.
+ */
+ extra_count = (atomic_written ? 1 : 0) - page_has_private(page);
+ rc = migrate_page_move_mapping(mapping, newpage,
+ page, NULL, mode, extra_count);
+ if (rc != MIGRATEPAGE_SUCCESS) {
+ if (atomic_written)
+ mutex_unlock(&fi->inmem_lock);
+ return rc;
+ }
+
+ if (atomic_written) {
+ struct inmem_pages *cur;
+ list_for_each_entry(cur, &fi->inmem_pages, list)
+ if (cur->page == page) {
+ cur->page = newpage;
+ break;
+ }
+ mutex_unlock(&fi->inmem_lock);
+ put_page(page);
+ get_page(newpage);
+ }
+
+ if (PagePrivate(page))
+ SetPagePrivate(newpage);
+ set_page_private(newpage, page_private(page));
+
+ migrate_page_copy(newpage, page);
+
+ return MIGRATEPAGE_SUCCESS;
+}
+#endif
+
const struct address_space_operations f2fs_dblock_aops = {
.readpage = f2fs_read_data_page,
.readpages = f2fs_read_data_pages,
@@ -1885,4 +1953,7 @@ const struct address_space_operations f2fs_dblock_aops = {
.releasepage = f2fs_release_page,
.direct_IO = f2fs_direct_IO,
.bmap = f2fs_bmap,
+#ifdef CONFIG_MIGRATION
+ .migratepage = f2fs_migrate_page,
+#endif
};
diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c
index badd407bb622..fb245bd302e4 100644
--- a/fs/f2fs/debug.c
+++ b/fs/f2fs/debug.c
@@ -45,6 +45,7 @@ static void update_general_status(struct f2fs_sb_info *sbi)
si->ndirty_dent = get_pages(sbi, F2FS_DIRTY_DENTS);
si->ndirty_meta = get_pages(sbi, F2FS_DIRTY_META);
si->ndirty_data = get_pages(sbi, F2FS_DIRTY_DATA);
+ si->ndirty_imeta = get_pages(sbi, F2FS_DIRTY_IMETA);
si->ndirty_dirs = sbi->ndirty_inode[DIR_INODE];
si->ndirty_files = sbi->ndirty_inode[FILE_INODE];
si->ndirty_all = sbi->ndirty_inode[DIRTY_META];
@@ -54,6 +55,7 @@ static void update_general_status(struct f2fs_sb_info *sbi)
si->rsvd_segs = reserved_segments(sbi);
si->overp_segs = overprovision_segments(sbi);
si->valid_count = valid_user_blocks(sbi);
+ si->discard_blks = discard_blocks(sbi);
si->valid_node_count = valid_node_count(sbi);
si->valid_inode_count = valid_inode_count(sbi);
si->inline_xattr = atomic_read(&sbi->inline_xattr);
@@ -154,7 +156,9 @@ static void update_mem_info(struct f2fs_sb_info *sbi)
si->base_mem += sizeof(struct sit_info);
si->base_mem += MAIN_SEGS(sbi) * sizeof(struct seg_entry);
si->base_mem += f2fs_bitmap_size(MAIN_SEGS(sbi));
- si->base_mem += 3 * SIT_VBLOCK_MAP_SIZE * MAIN_SEGS(sbi);
+ si->base_mem += 2 * SIT_VBLOCK_MAP_SIZE * MAIN_SEGS(sbi);
+ if (f2fs_discard_en(sbi))
+ si->base_mem += SIT_VBLOCK_MAP_SIZE * MAIN_SEGS(sbi);
si->base_mem += SIT_VBLOCK_MAP_SIZE;
if (sbi->segs_per_sec > 1)
si->base_mem += MAIN_SECS(sbi) * sizeof(struct sec_entry);
@@ -228,8 +232,13 @@ static int stat_show(struct seq_file *s, void *v)
si->ssa_area_segs, si->main_area_segs);
seq_printf(s, "(OverProv:%d Resv:%d)]\n\n",
si->overp_segs, si->rsvd_segs);
- seq_printf(s, "Utilization: %d%% (%d valid blocks)\n",
- si->utilization, si->valid_count);
+ if (test_opt(si->sbi, DISCARD))
+ seq_printf(s, "Utilization: %u%% (%u valid blocks, %u discard blocks)\n",
+ si->utilization, si->valid_count, si->discard_blks);
+ else
+ seq_printf(s, "Utilization: %u%% (%u valid blocks)\n",
+ si->utilization, si->valid_count);
+
seq_printf(s, " - Node: %u (Inode: %u, ",
si->valid_node_count, si->valid_inode_count);
seq_printf(s, "Other: %u)\n - Data: %u\n",
@@ -311,6 +320,8 @@ static int stat_show(struct seq_file *s, void *v)
si->ndirty_data, si->ndirty_files);
seq_printf(s, " - meta: %4lld in %4d\n",
si->ndirty_meta, si->meta_pages);
+ seq_printf(s, " - imeta: %4lld\n",
+ si->ndirty_imeta);
seq_printf(s, " - NATs: %9d/%9d\n - SITs: %9d/%9d\n",
si->dirty_nats, si->nats, si->dirty_sits, si->sits);
seq_printf(s, " - free_nids: %9d\n",
diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c
index 9054aeac8015..12b5836a1033 100644
--- a/fs/f2fs/dir.c
+++ b/fs/f2fs/dir.c
@@ -37,7 +37,7 @@ static unsigned int bucket_blocks(unsigned int level)
return 4;
}
-unsigned char f2fs_filetype_table[F2FS_FT_MAX] = {
+static unsigned char f2fs_filetype_table[F2FS_FT_MAX] = {
[F2FS_FT_UNKNOWN] = DT_UNKNOWN,
[F2FS_FT_REG_FILE] = DT_REG,
[F2FS_FT_DIR] = DT_DIR,
@@ -172,7 +172,10 @@ static struct f2fs_dir_entry *find_in_level(struct inode *dir,
int max_slots;
f2fs_hash_t namehash;
- namehash = f2fs_dentry_hash(&name);
+ if(fname->hash)
+ namehash = cpu_to_le32(fname->hash);
+ else
+ namehash = f2fs_dentry_hash(&name);
nbucket = dir_buckets(level, F2FS_I(dir)->i_dir_level);
nblock = bucket_blocks(level);
@@ -212,31 +215,17 @@ static struct f2fs_dir_entry *find_in_level(struct inode *dir,
return de;
}
-/*
- * Find an entry in the specified directory with the wanted name.
- * It returns the page where the entry was found (as a parameter - res_page),
- * and the entry itself. Page is returned mapped and unlocked.
- * Entry is guaranteed to be valid.
- */
-struct f2fs_dir_entry *f2fs_find_entry(struct inode *dir,
- const struct qstr *child, struct page **res_page)
+struct f2fs_dir_entry *__f2fs_find_entry(struct inode *dir,
+ struct fscrypt_name *fname, struct page **res_page)
{
unsigned long npages = dir_blocks(dir);
struct f2fs_dir_entry *de = NULL;
unsigned int max_depth;
unsigned int level;
- struct fscrypt_name fname;
- int err;
-
- err = fscrypt_setup_filename(dir, child, 1, &fname);
- if (err) {
- *res_page = ERR_PTR(err);
- return NULL;
- }
if (f2fs_has_inline_dentry(dir)) {
*res_page = NULL;
- de = find_in_inline_dir(dir, &fname, res_page);
+ de = find_in_inline_dir(dir, fname, res_page);
goto out;
}
@@ -256,11 +245,35 @@ struct f2fs_dir_entry *f2fs_find_entry(struct inode *dir,
for (level = 0; level < max_depth; level++) {
*res_page = NULL;
- de = find_in_level(dir, level, &fname, res_page);
+ de = find_in_level(dir, level, fname, res_page);
if (de || IS_ERR(*res_page))
break;
}
out:
+ return de;
+}
+
+/*
+ * Find an entry in the specified directory with the wanted name.
+ * It returns the page where the entry was found (as a parameter - res_page),
+ * and the entry itself. Page is returned mapped and unlocked.
+ * Entry is guaranteed to be valid.
+ */
+struct f2fs_dir_entry *f2fs_find_entry(struct inode *dir,
+ const struct qstr *child, struct page **res_page)
+{
+ struct f2fs_dir_entry *de = NULL;
+ struct fscrypt_name fname;
+ int err;
+
+ err = fscrypt_setup_filename(dir, child, 1, &fname);
+ if (err) {
+ *res_page = ERR_PTR(err);
+ return NULL;
+ }
+
+ de = __f2fs_find_entry(dir, &fname, res_page);
+
fscrypt_free_filename(&fname);
return de;
}
@@ -375,7 +388,8 @@ static int make_empty_dir(struct inode *inode,
}
struct page *init_inode_metadata(struct inode *inode, struct inode *dir,
- const struct qstr *name, struct page *dpage)
+ const struct qstr *new_name, const struct qstr *orig_name,
+ struct page *dpage)
{
struct page *page;
int err;
@@ -400,7 +414,7 @@ struct page *init_inode_metadata(struct inode *inode, struct inode *dir,
if (err)
goto put_error;
- err = f2fs_init_security(inode, dir, name, page);
+ err = f2fs_init_security(inode, dir, orig_name, page);
if (err)
goto put_error;
@@ -417,8 +431,8 @@ struct page *init_inode_metadata(struct inode *inode, struct inode *dir,
set_cold_node(inode, page);
}
- if (name)
- init_dent_inode(name, page);
+ if (new_name)
+ init_dent_inode(new_name, page);
/*
* This file should be checkpointed during fsync.
@@ -496,7 +510,7 @@ void f2fs_update_dentry(nid_t ino, umode_t mode, struct f2fs_dentry_ptr *d,
de->ino = cpu_to_le32(ino);
set_de_type(de, mode);
for (i = 0; i < slots; i++) {
- test_and_set_bit_le(bit_pos + i, (void *)d->bitmap);
+ __set_bit_le(bit_pos + i, (void *)d->bitmap);
/* avoid wrong garbage data for readdir */
if (i)
(de + i)->name_len = 0;
@@ -504,6 +518,7 @@ void f2fs_update_dentry(nid_t ino, umode_t mode, struct f2fs_dentry_ptr *d,
}
int f2fs_add_regular_entry(struct inode *dir, const struct qstr *new_name,
+ const struct qstr *orig_name,
struct inode *inode, nid_t ino, umode_t mode)
{
unsigned int bit_pos;
@@ -530,7 +545,7 @@ int f2fs_add_regular_entry(struct inode *dir, const struct qstr *new_name,
start:
#ifdef CONFIG_F2FS_FAULT_INJECTION
- if (time_to_inject(FAULT_DIR_DEPTH))
+ if (time_to_inject(F2FS_I_SB(dir), FAULT_DIR_DEPTH))
return -ENOSPC;
#endif
if (unlikely(current_depth == MAX_DIR_HASH_DEPTH))
@@ -569,7 +584,8 @@ add_dentry:
if (inode) {
down_write(&F2FS_I(inode)->i_sem);
- page = init_inode_metadata(inode, dir, new_name, NULL);
+ page = init_inode_metadata(inode, dir, new_name,
+ orig_name, NULL);
if (IS_ERR(page)) {
err = PTR_ERR(page);
goto fail;
@@ -599,6 +615,26 @@ fail:
return err;
}
+int __f2fs_do_add_link(struct inode *dir, struct fscrypt_name *fname,
+ struct inode *inode, nid_t ino, umode_t mode)
+{
+ struct qstr new_name;
+ int err = -EAGAIN;
+
+ new_name.name = fname_name(fname);
+ new_name.len = fname_len(fname);
+
+ if (f2fs_has_inline_dentry(dir))
+ err = f2fs_add_inline_entry(dir, &new_name, fname->usr_fname,
+ inode, ino, mode);
+ if (err == -EAGAIN)
+ err = f2fs_add_regular_entry(dir, &new_name, fname->usr_fname,
+ inode, ino, mode);
+
+ f2fs_update_time(F2FS_I_SB(dir), REQ_TIME);
+ return err;
+}
+
/*
* Caller should grab and release a rwsem by calling f2fs_lock_op() and
* f2fs_unlock_op().
@@ -607,24 +643,15 @@ int __f2fs_add_link(struct inode *dir, const struct qstr *name,
struct inode *inode, nid_t ino, umode_t mode)
{
struct fscrypt_name fname;
- struct qstr new_name;
int err;
err = fscrypt_setup_filename(dir, name, 0, &fname);
if (err)
return err;
- new_name.name = fname_name(&fname);
- new_name.len = fname_len(&fname);
-
- err = -EAGAIN;
- if (f2fs_has_inline_dentry(dir))
- err = f2fs_add_inline_entry(dir, &new_name, inode, ino, mode);
- if (err == -EAGAIN)
- err = f2fs_add_regular_entry(dir, &new_name, inode, ino, mode);
+ err = __f2fs_do_add_link(dir, &fname, inode, ino, mode);
fscrypt_free_filename(&fname);
- f2fs_update_time(F2FS_I_SB(dir), REQ_TIME);
return err;
}
@@ -634,7 +661,7 @@ int f2fs_do_tmpfile(struct inode *inode, struct inode *dir)
int err = 0;
down_write(&F2FS_I(inode)->i_sem);
- page = init_inode_metadata(inode, dir, NULL, NULL);
+ page = init_inode_metadata(inode, dir, NULL, NULL, NULL);
if (IS_ERR(page)) {
err = PTR_ERR(page);
goto fail;
@@ -786,19 +813,12 @@ bool f2fs_fill_dentries(struct dir_context *ctx, struct f2fs_dentry_ptr *d,
if (f2fs_encrypted_inode(d->inode)) {
int save_len = fstr->len;
- int ret;
+ int err;
- de_name.name = f2fs_kmalloc(de_name.len, GFP_NOFS);
- if (!de_name.name)
- return false;
-
- memcpy(de_name.name, d->filename[bit_pos], de_name.len);
-
- ret = fscrypt_fname_disk_to_usr(d->inode,
+ err = fscrypt_fname_disk_to_usr(d->inode,
(u32)de->hash_code, 0,
&de_name, fstr);
- kfree(de_name.name);
- if (ret < 0)
+ if (err)
return true;
de_name = *fstr;
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index 14f5fe2b841e..9e8de18a168a 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -46,6 +46,8 @@ enum {
FAULT_BLOCK,
FAULT_DIR_DEPTH,
FAULT_EVICT_INODE,
+ FAULT_IO,
+ FAULT_CHECKPOINT,
FAULT_MAX,
};
@@ -55,40 +57,8 @@ struct f2fs_fault_info {
unsigned int inject_type;
};
-extern struct f2fs_fault_info f2fs_fault;
extern char *fault_name[FAULT_MAX];
-#define IS_FAULT_SET(type) (f2fs_fault.inject_type & (1 << (type)))
-
-static inline bool time_to_inject(int type)
-{
- if (!f2fs_fault.inject_rate)
- return false;
- if (type == FAULT_KMALLOC && !IS_FAULT_SET(type))
- return false;
- else if (type == FAULT_PAGE_ALLOC && !IS_FAULT_SET(type))
- return false;
- else if (type == FAULT_ALLOC_NID && !IS_FAULT_SET(type))
- return false;
- else if (type == FAULT_ORPHAN && !IS_FAULT_SET(type))
- return false;
- else if (type == FAULT_BLOCK && !IS_FAULT_SET(type))
- return false;
- else if (type == FAULT_DIR_DEPTH && !IS_FAULT_SET(type))
- return false;
- else if (type == FAULT_EVICT_INODE && !IS_FAULT_SET(type))
- return false;
-
- atomic_inc(&f2fs_fault.inject_ops);
- if (atomic_read(&f2fs_fault.inject_ops) >= f2fs_fault.inject_rate) {
- atomic_set(&f2fs_fault.inject_ops, 0);
- printk("%sF2FS-fs : inject %s in %pF\n",
- KERN_INFO,
- fault_name[type],
- __builtin_return_address(0));
- return true;
- }
- return false;
-}
+#define IS_FAULT_SET(fi, type) (fi->inject_type & (1 << (type)))
#endif
/*
@@ -158,7 +128,7 @@ enum {
CP_DISCARD,
};
-#define DEF_BATCHED_TRIM_SECTIONS 32
+#define DEF_BATCHED_TRIM_SECTIONS 2
#define BATCHED_TRIM_SEGMENTS(sbi) \
(SM_I(sbi)->trim_sections * (sbi)->segs_per_sec)
#define BATCHED_TRIM_BLOCKS(sbi) \
@@ -211,6 +181,13 @@ struct discard_entry {
int len; /* # of consecutive blocks of the discard */
};
+struct bio_entry {
+ struct list_head list;
+ struct bio *bio;
+ struct completion event;
+ int error;
+};
+
/* for the list of fsync inodes, used only during recovery */
struct fsync_inode_entry {
struct list_head list; /* list head */
@@ -645,6 +622,7 @@ struct f2fs_sm_info {
/* for small discard management */
struct list_head discard_list; /* 4KB discard list */
+ struct list_head wait_list; /* linked with issued discard bio */
int nr_discards; /* # of discards in the list */
int max_discards; /* max. discards to be issued */
@@ -748,6 +726,7 @@ enum {
SBI_NEED_FSCK, /* need fsck.f2fs to fix */
SBI_POR_DOING, /* recovery is doing or not */
SBI_NEED_SB_WRITE, /* need to recover superblock */
+ SBI_NEED_CP, /* need to checkpoint */
};
enum {
@@ -765,7 +744,7 @@ struct f2fs_sb_info {
struct proc_dir_entry *s_proc; /* proc entry */
struct f2fs_super_block *raw_super; /* raw super block pointer */
int valid_super_block; /* valid super block no */
- int s_flag; /* flags for sbi */
+ unsigned long s_flag; /* flags for sbi */
#ifdef CONFIG_F2FS_FS_ENCRYPTION
u8 key_prefix[F2FS_KEY_DESC_PREFIX_SIZE];
@@ -785,6 +764,7 @@ struct f2fs_sb_info {
/* for checkpoint */
struct f2fs_checkpoint *ckpt; /* raw checkpoint pointer */
+ spinlock_t cp_lock; /* for flag in ckpt */
struct inode *meta_inode; /* cache meta blocks */
struct mutex cp_mutex; /* checkpoint procedure lock */
struct rw_semaphore cp_rwsem; /* blocking FS operations */
@@ -892,8 +872,37 @@ struct f2fs_sb_info {
/* Reference to checksum algorithm driver via cryptoapi */
struct crypto_shash *s_chksum_driver;
+
+ /* For fault injection */
+#ifdef CONFIG_F2FS_FAULT_INJECTION
+ struct f2fs_fault_info fault_info;
+#endif
};
+#ifdef CONFIG_F2FS_FAULT_INJECTION
+static inline bool time_to_inject(struct f2fs_sb_info *sbi, int type)
+{
+ struct f2fs_fault_info *ffi = &sbi->fault_info;
+
+ if (!ffi->inject_rate)
+ return false;
+
+ if (!IS_FAULT_SET(ffi, type))
+ return false;
+
+ atomic_inc(&ffi->inject_ops);
+ if (atomic_read(&ffi->inject_ops) >= ffi->inject_rate) {
+ atomic_set(&ffi->inject_ops, 0);
+ printk("%sF2FS-fs : inject %s in %pF\n",
+ KERN_INFO,
+ fault_name[type],
+ __builtin_return_address(0));
+ return true;
+ }
+ return false;
+}
+#endif
+
/* For write statistics. Suppose sector size is 512 bytes,
* and the return value is in kbytes. s is of struct f2fs_sb_info.
*/
@@ -1034,17 +1043,17 @@ static inline struct address_space *NODE_MAPPING(struct f2fs_sb_info *sbi)
static inline bool is_sbi_flag_set(struct f2fs_sb_info *sbi, unsigned int type)
{
- return sbi->s_flag & (0x01 << type);
+ return test_bit(type, &sbi->s_flag);
}
static inline void set_sbi_flag(struct f2fs_sb_info *sbi, unsigned int type)
{
- sbi->s_flag |= (0x01 << type);
+ set_bit(type, &sbi->s_flag);
}
static inline void clear_sbi_flag(struct f2fs_sb_info *sbi, unsigned int type)
{
- sbi->s_flag &= ~(0x01 << type);
+ clear_bit(type, &sbi->s_flag);
}
static inline unsigned long long cur_cp_version(struct f2fs_checkpoint *cp)
@@ -1052,26 +1061,57 @@ static inline unsigned long long cur_cp_version(struct f2fs_checkpoint *cp)
return le64_to_cpu(cp->checkpoint_ver);
}
-static inline bool is_set_ckpt_flags(struct f2fs_checkpoint *cp, unsigned int f)
+static inline bool __is_set_ckpt_flags(struct f2fs_checkpoint *cp, unsigned int f)
{
unsigned int ckpt_flags = le32_to_cpu(cp->ckpt_flags);
+
return ckpt_flags & f;
}
-static inline void set_ckpt_flags(struct f2fs_checkpoint *cp, unsigned int f)
+static inline bool is_set_ckpt_flags(struct f2fs_sb_info *sbi, unsigned int f)
{
- unsigned int ckpt_flags = le32_to_cpu(cp->ckpt_flags);
+ return __is_set_ckpt_flags(F2FS_CKPT(sbi), f);
+}
+
+static inline void __set_ckpt_flags(struct f2fs_checkpoint *cp, unsigned int f)
+{
+ unsigned int ckpt_flags;
+
+ ckpt_flags = le32_to_cpu(cp->ckpt_flags);
ckpt_flags |= f;
cp->ckpt_flags = cpu_to_le32(ckpt_flags);
}
-static inline void clear_ckpt_flags(struct f2fs_checkpoint *cp, unsigned int f)
+static inline void set_ckpt_flags(struct f2fs_sb_info *sbi, unsigned int f)
{
- unsigned int ckpt_flags = le32_to_cpu(cp->ckpt_flags);
+ spin_lock(&sbi->cp_lock);
+ __set_ckpt_flags(F2FS_CKPT(sbi), f);
+ spin_unlock(&sbi->cp_lock);
+}
+
+static inline void __clear_ckpt_flags(struct f2fs_checkpoint *cp, unsigned int f)
+{
+ unsigned int ckpt_flags;
+
+ ckpt_flags = le32_to_cpu(cp->ckpt_flags);
ckpt_flags &= (~f);
cp->ckpt_flags = cpu_to_le32(ckpt_flags);
}
+static inline void clear_ckpt_flags(struct f2fs_sb_info *sbi, unsigned int f)
+{
+ spin_lock(&sbi->cp_lock);
+ __clear_ckpt_flags(F2FS_CKPT(sbi), f);
+ spin_unlock(&sbi->cp_lock);
+}
+
+static inline bool f2fs_discard_en(struct f2fs_sb_info *sbi)
+{
+ struct request_queue *q = bdev_get_queue(sbi->sb->s_bdev);
+
+ return blk_queue_discard(q);
+}
+
static inline void f2fs_lock_op(struct f2fs_sb_info *sbi)
{
down_read(&sbi->cp_rwsem);
@@ -1110,8 +1150,8 @@ static inline bool __remain_node_summaries(int reason)
static inline bool __exist_node_summaries(struct f2fs_sb_info *sbi)
{
- return (is_set_ckpt_flags(F2FS_CKPT(sbi), CP_UMOUNT_FLAG) ||
- is_set_ckpt_flags(F2FS_CKPT(sbi), CP_FASTBOOT_FLAG));
+ return (is_set_ckpt_flags(sbi, CP_UMOUNT_FLAG) ||
+ is_set_ckpt_flags(sbi, CP_FASTBOOT_FLAG));
}
/*
@@ -1151,7 +1191,7 @@ static inline bool inc_valid_block_count(struct f2fs_sb_info *sbi,
blkcnt_t diff;
#ifdef CONFIG_F2FS_FAULT_INJECTION
- if (time_to_inject(FAULT_BLOCK))
+ if (time_to_inject(sbi, FAULT_BLOCK))
return false;
#endif
/*
@@ -1193,6 +1233,10 @@ static inline void dec_valid_block_count(struct f2fs_sb_info *sbi,
static inline void inc_page_count(struct f2fs_sb_info *sbi, int count_type)
{
percpu_counter_inc(&sbi->nr_pages[count_type]);
+
+ if (count_type == F2FS_DIRTY_DATA || count_type == F2FS_INMEM_PAGES)
+ return;
+
set_sbi_flag(sbi, SBI_IS_DIRTY);
}
@@ -1243,6 +1287,11 @@ static inline block_t valid_user_blocks(struct f2fs_sb_info *sbi)
return sbi->total_valid_block_count;
}
+static inline block_t discard_blocks(struct f2fs_sb_info *sbi)
+{
+ return sbi->discard_blks;
+}
+
static inline unsigned long __bitmap_size(struct f2fs_sb_info *sbi, int flag)
{
struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
@@ -1376,7 +1425,7 @@ static inline struct page *f2fs_grab_cache_page(struct address_space *mapping,
if (page)
return page;
- if (time_to_inject(FAULT_PAGE_ALLOC))
+ if (time_to_inject(F2FS_M_SB(mapping), FAULT_PAGE_ALLOC))
return NULL;
#endif
if (!for_write)
@@ -1804,7 +1853,7 @@ static inline int f2fs_readonly(struct super_block *sb)
static inline bool f2fs_cp_error(struct f2fs_sb_info *sbi)
{
- return is_set_ckpt_flags(sbi->ckpt, CP_ERROR_FLAG);
+ return is_set_ckpt_flags(sbi, CP_ERROR_FLAG);
}
static inline bool is_dot_dotdot(const struct qstr *str)
@@ -1827,10 +1876,11 @@ static inline bool f2fs_may_extent_tree(struct inode *inode)
return S_ISREG(inode->i_mode);
}
-static inline void *f2fs_kmalloc(size_t size, gfp_t flags)
+static inline void *f2fs_kmalloc(struct f2fs_sb_info *sbi,
+ size_t size, gfp_t flags)
{
#ifdef CONFIG_F2FS_FAULT_INJECTION
- if (time_to_inject(FAULT_KMALLOC))
+ if (time_to_inject(sbi, FAULT_KMALLOC))
return NULL;
#endif
return kmalloc(size, flags);
@@ -1885,6 +1935,7 @@ long f2fs_compat_ioctl(struct file *, unsigned int, unsigned long);
*/
void f2fs_set_inode_flags(struct inode *);
struct inode *f2fs_iget(struct super_block *, unsigned long);
+struct inode *f2fs_iget_retry(struct super_block *, unsigned long);
int try_to_free_nats(struct f2fs_sb_info *, int);
int update_inode(struct inode *, struct page *);
int update_inode_page(struct inode *);
@@ -1900,7 +1951,6 @@ struct dentry *f2fs_get_parent(struct dentry *child);
/*
* dir.c
*/
-extern unsigned char f2fs_filetype_table[F2FS_FT_MAX];
void set_de_type(struct f2fs_dir_entry *, umode_t);
unsigned char get_de_type(struct f2fs_dir_entry *);
struct f2fs_dir_entry *find_target_dentry(struct fscrypt_name *,
@@ -1910,10 +1960,12 @@ bool f2fs_fill_dentries(struct dir_context *, struct f2fs_dentry_ptr *,
void do_make_empty_dir(struct inode *, struct inode *,
struct f2fs_dentry_ptr *);
struct page *init_inode_metadata(struct inode *, struct inode *,
- const struct qstr *, struct page *);
+ const struct qstr *, const struct qstr *, struct page *);
void update_parent_metadata(struct inode *, struct inode *, unsigned int);
int room_for_filename(const void *, int, int);
void f2fs_drop_nlink(struct inode *, struct inode *);
+struct f2fs_dir_entry *__f2fs_find_entry(struct inode *, struct fscrypt_name *,
+ struct page **);
struct f2fs_dir_entry *f2fs_find_entry(struct inode *, const struct qstr *,
struct page **);
struct f2fs_dir_entry *f2fs_parent_dir(struct inode *, struct page **);
@@ -1924,7 +1976,9 @@ int update_dent_inode(struct inode *, struct inode *, const struct qstr *);
void f2fs_update_dentry(nid_t ino, umode_t mode, struct f2fs_dentry_ptr *,
const struct qstr *, f2fs_hash_t , unsigned int);
int f2fs_add_regular_entry(struct inode *, const struct qstr *,
- struct inode *, nid_t, umode_t);
+ const struct qstr *, struct inode *, nid_t, umode_t);
+int __f2fs_do_add_link(struct inode *, struct fscrypt_name*, struct inode *,
+ nid_t, umode_t);
int __f2fs_add_link(struct inode *, const struct qstr *, struct inode *, nid_t,
umode_t);
void f2fs_delete_entry(struct f2fs_dir_entry *, struct page *, struct inode *,
@@ -2010,9 +2064,9 @@ void destroy_flush_cmd_control(struct f2fs_sb_info *);
void invalidate_blocks(struct f2fs_sb_info *, block_t);
bool is_checkpointed_data(struct f2fs_sb_info *, block_t);
void refresh_sit_entry(struct f2fs_sb_info *, block_t, block_t);
+void f2fs_wait_all_discard_bio(struct f2fs_sb_info *);
void clear_prefree_segments(struct f2fs_sb_info *, struct cp_control *);
void release_discard_addrs(struct f2fs_sb_info *);
-bool discard_next_dnode(struct f2fs_sb_info *, block_t);
int npages_for_summary_flush(struct f2fs_sb_info *, bool);
void allocate_new_segments(struct f2fs_sb_info *);
int f2fs_trim_fs(struct f2fs_sb_info *, struct fstrim_range *);
@@ -2095,6 +2149,10 @@ int f2fs_fiemap(struct inode *inode, struct fiemap_extent_info *, u64, u64);
void f2fs_set_page_dirty_nobuffers(struct page *);
void f2fs_invalidate_page(struct page *, unsigned int, unsigned int);
int f2fs_release_page(struct page *, gfp_t);
+#ifdef CONFIG_MIGRATION
+int f2fs_migrate_page(struct address_space *, struct page *, struct page *,
+ enum migrate_mode);
+#endif
/*
* gc.c
@@ -2123,13 +2181,14 @@ struct f2fs_stat_info {
unsigned long long hit_largest, hit_cached, hit_rbtree;
unsigned long long hit_total, total_ext;
int ext_tree, zombie_tree, ext_node;
- s64 ndirty_node, ndirty_dent, ndirty_meta, ndirty_data, inmem_pages;
+ s64 ndirty_node, ndirty_dent, ndirty_meta, ndirty_data, ndirty_imeta;
+ s64 inmem_pages;
unsigned int ndirty_dirs, ndirty_files, ndirty_all;
int nats, dirty_nats, sits, dirty_sits, fnids;
int total_count, utilization;
int bg_gc, wb_bios;
int inline_xattr, inline_inode, inline_dir, orphans;
- unsigned int valid_count, valid_node_count, valid_inode_count;
+ unsigned int valid_count, valid_node_count, valid_inode_count, discard_blks;
unsigned int bimodal, avg_vblocks;
int util_free, util_valid, util_invalid;
int rsvd_segs, overp_segs;
@@ -2294,8 +2353,8 @@ bool recover_inline_data(struct inode *, struct page *);
struct f2fs_dir_entry *find_in_inline_dir(struct inode *,
struct fscrypt_name *, struct page **);
int make_empty_inline_dir(struct inode *inode, struct inode *, struct page *);
-int f2fs_add_inline_entry(struct inode *, const struct qstr *, struct inode *,
- nid_t, umode_t);
+int f2fs_add_inline_entry(struct inode *, const struct qstr *,
+ const struct qstr *, struct inode *, nid_t, umode_t);
void f2fs_delete_inline_entry(struct f2fs_dir_entry *, struct page *,
struct inode *, struct inode *);
bool f2fs_empty_inline_dir(struct inode *);
diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
index 2ebc4c79562c..acdf4b929f97 100644
--- a/fs/f2fs/file.c
+++ b/fs/f2fs/file.c
@@ -135,7 +135,7 @@ static inline bool need_do_checkpoint(struct inode *inode)
if (!S_ISREG(inode->i_mode) || inode->i_nlink != 1)
need_cp = true;
- else if (file_enc_name(inode) && need_dentry_mark(sbi, inode->i_ino))
+ else if (is_sbi_flag_set(sbi, SBI_NEED_CP))
need_cp = true;
else if (file_wrong_pino(inode))
need_cp = true;
@@ -523,7 +523,7 @@ static int truncate_partial_data_page(struct inode *inode, u64 from,
return 0;
if (cache_only) {
- page = f2fs_grab_cache_page(mapping, index, false);
+ page = find_lock_page(mapping, index);
if (page && PageUptodate(page))
goto truncate_out;
f2fs_put_page(page, 1);
@@ -680,7 +680,7 @@ int f2fs_setattr(struct dentry *dentry, struct iattr *attr)
struct inode *inode = d_inode(dentry);
int err;
- err = inode_change_ok(inode, attr);
+ err = setattr_prepare(dentry, attr);
if (err)
return err;
@@ -1451,7 +1451,7 @@ static int f2fs_ioc_setflags(struct file *filp, unsigned long arg)
{
struct inode *inode = file_inode(filp);
struct f2fs_inode_info *fi = F2FS_I(inode);
- unsigned int flags = fi->i_flags & FS_FL_USER_VISIBLE;
+ unsigned int flags;
unsigned int oldflags;
int ret;
@@ -1951,7 +1951,7 @@ static int f2fs_defragment_range(struct f2fs_sb_info *sbi,
* avoid defragment running in SSR mode when free section are allocated
* intensively
*/
- if (has_not_enough_free_secs(sbi, sec_num)) {
+ if (has_not_enough_free_secs(sbi, 0, sec_num)) {
err = -EAGAIN;
goto out;
}
@@ -2082,6 +2082,13 @@ static int f2fs_move_file_range(struct file *file_in, loff_t pos_in,
if (f2fs_encrypted_inode(src) || f2fs_encrypted_inode(dst))
return -EOPNOTSUPP;
+ if (src == dst) {
+ if (pos_in == pos_out)
+ return 0;
+ if (pos_out > pos_in && pos_out < pos_in + len)
+ return -EINVAL;
+ }
+
inode_lock(src);
if (src != dst) {
if (!inode_trylock(dst)) {
@@ -2133,8 +2140,9 @@ static int f2fs_move_file_range(struct file *file_in, loff_t pos_in,
f2fs_balance_fs(sbi, true);
f2fs_lock_op(sbi);
- ret = __exchange_data_block(src, dst, pos_in,
- pos_out, len >> F2FS_BLKSIZE_BITS, false);
+ ret = __exchange_data_block(src, dst, pos_in >> F2FS_BLKSIZE_BITS,
+ pos_out >> F2FS_BLKSIZE_BITS,
+ len >> F2FS_BLKSIZE_BITS, false);
if (!ret) {
if (dst_max_i_size)
diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c
index 8f7fa326ce95..93985c64d8a8 100644
--- a/fs/f2fs/gc.c
+++ b/fs/f2fs/gc.c
@@ -47,6 +47,11 @@ static int gc_thread_func(void *data)
continue;
}
+#ifdef CONFIG_F2FS_FAULT_INJECTION
+ if (time_to_inject(sbi, FAULT_CHECKPOINT))
+ f2fs_stop_checkpoint(sbi, false);
+#endif
+
/*
* [GC triggering condition]
* 0. GC is not conducted currently.
@@ -96,7 +101,7 @@ int start_gc_thread(struct f2fs_sb_info *sbi)
dev_t dev = sbi->sb->s_bdev->bd_dev;
int err = 0;
- gc_th = f2fs_kmalloc(sizeof(struct f2fs_gc_kthread), GFP_KERNEL);
+ gc_th = f2fs_kmalloc(sbi, sizeof(struct f2fs_gc_kthread), GFP_KERNEL);
if (!gc_th) {
err = -ENOMEM;
goto out;
@@ -270,7 +275,7 @@ static int get_victim_by_default(struct f2fs_sb_info *sbi,
{
struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
struct victim_sel_policy p;
- unsigned int secno, max_cost, last_victim;
+ unsigned int secno, last_victim;
unsigned int last_segment = MAIN_SEGS(sbi);
unsigned int nsearched = 0;
@@ -280,7 +285,7 @@ static int get_victim_by_default(struct f2fs_sb_info *sbi,
select_policy(sbi, gc_type, type, &p);
p.min_segno = NULL_SEGNO;
- p.min_cost = max_cost = get_max_cost(sbi, &p);
+ p.min_cost = get_max_cost(sbi, &p);
if (p.max_search == 0)
goto out;
@@ -423,10 +428,10 @@ static int check_valid_map(struct f2fs_sb_info *sbi,
static void gc_node_segment(struct f2fs_sb_info *sbi,
struct f2fs_summary *sum, unsigned int segno, int gc_type)
{
- bool initial = true;
struct f2fs_summary *entry;
block_t start_addr;
int off;
+ int phase = 0;
start_addr = START_BLOCK(sbi, segno);
@@ -439,16 +444,24 @@ next_step:
struct node_info ni;
/* stop BG_GC if there is not enough free sections. */
- if (gc_type == BG_GC && has_not_enough_free_secs(sbi, 0))
+ if (gc_type == BG_GC && has_not_enough_free_secs(sbi, 0, 0))
return;
if (check_valid_map(sbi, segno, off) == 0)
continue;
- if (initial) {
+ if (phase == 0) {
+ ra_meta_pages(sbi, NAT_BLOCK_OFFSET(nid), 1,
+ META_NAT, true);
+ continue;
+ }
+
+ if (phase == 1) {
ra_node_page(sbi, nid);
continue;
}
+
+ /* phase == 2 */
node_page = get_node_page(sbi, nid);
if (IS_ERR(node_page))
continue;
@@ -469,10 +482,8 @@ next_step:
stat_inc_node_blk_count(sbi, 1, gc_type);
}
- if (initial) {
- initial = false;
+ if (++phase < 3)
goto next_step;
- }
}
/*
@@ -706,16 +717,23 @@ next_step:
struct node_info dni; /* dnode info for the data */
unsigned int ofs_in_node, nofs;
block_t start_bidx;
+ nid_t nid = le32_to_cpu(entry->nid);
/* stop BG_GC if there is not enough free sections. */
- if (gc_type == BG_GC && has_not_enough_free_secs(sbi, 0))
+ if (gc_type == BG_GC && has_not_enough_free_secs(sbi, 0, 0))
return;
if (check_valid_map(sbi, segno, off) == 0)
continue;
if (phase == 0) {
- ra_node_page(sbi, le32_to_cpu(entry->nid));
+ ra_meta_pages(sbi, NAT_BLOCK_OFFSET(nid), 1,
+ META_NAT, true);
+ continue;
+ }
+
+ if (phase == 1) {
+ ra_node_page(sbi, nid);
continue;
}
@@ -723,14 +741,14 @@ next_step:
if (!is_alive(sbi, entry, &dni, start_addr + off, &nofs))
continue;
- if (phase == 1) {
+ if (phase == 2) {
ra_node_page(sbi, dni.ino);
continue;
}
ofs_in_node = le16_to_cpu(entry->ofs_in_node);
- if (phase == 2) {
+ if (phase == 3) {
inode = f2fs_iget(sb, dni.ino);
if (IS_ERR(inode) || is_bad_inode(inode))
continue;
@@ -756,7 +774,7 @@ next_step:
continue;
}
- /* phase 3 */
+ /* phase 4 */
inode = find_gc_inode(gc_list, dni.ino);
if (inode) {
struct f2fs_inode_info *fi = F2FS_I(inode);
@@ -789,7 +807,7 @@ next_step:
}
}
- if (++phase < 4)
+ if (++phase < 5)
goto next_step;
}
@@ -815,7 +833,7 @@ static int do_garbage_collect(struct f2fs_sb_info *sbi,
struct blk_plug plug;
unsigned int segno = start_segno;
unsigned int end_segno = start_segno + sbi->segs_per_sec;
- int seg_freed = 0;
+ int sec_freed = 0;
unsigned char type = IS_DATASEG(get_seg_entry(sbi, segno)->type) ?
SUM_TYPE_DATA : SUM_TYPE_NODE;
@@ -834,8 +852,9 @@ static int do_garbage_collect(struct f2fs_sb_info *sbi,
for (segno = start_segno; segno < end_segno; segno++) {
- if (get_valid_blocks(sbi, segno, 1) == 0)
- continue;
+ if (get_valid_blocks(sbi, segno, 1) == 0 ||
+ unlikely(f2fs_cp_error(sbi)))
+ goto next;
/* find segment summary of victim */
sum_page = find_get_page(META_MAPPING(sbi),
@@ -861,7 +880,7 @@ static int do_garbage_collect(struct f2fs_sb_info *sbi,
gc_type);
stat_inc_seg_count(sbi, type, gc_type);
-
+next:
f2fs_put_page(sum_page, 0);
}
@@ -871,22 +890,20 @@ static int do_garbage_collect(struct f2fs_sb_info *sbi,
blk_finish_plug(&plug);
- if (gc_type == FG_GC) {
- while (start_segno < end_segno)
- if (get_valid_blocks(sbi, start_segno++, 1) == 0)
- seg_freed++;
- }
+ if (gc_type == FG_GC &&
+ get_valid_blocks(sbi, start_segno, sbi->segs_per_sec) == 0)
+ sec_freed = 1;
stat_inc_call_count(sbi->stat_info);
- return seg_freed;
+ return sec_freed;
}
int f2fs_gc(struct f2fs_sb_info *sbi, bool sync)
{
unsigned int segno;
int gc_type = sync ? FG_GC : BG_GC;
- int sec_freed = 0, seg_freed;
+ int sec_freed = 0;
int ret = -EINVAL;
struct cp_control cpc;
struct gc_inode_list gc_list = {
@@ -905,7 +922,7 @@ gc_more:
goto stop;
}
- if (gc_type == BG_GC && has_not_enough_free_secs(sbi, sec_freed)) {
+ if (gc_type == BG_GC && has_not_enough_free_secs(sbi, sec_freed, 0)) {
gc_type = FG_GC;
/*
* If there is no victim and no prefree segment but still not
@@ -914,10 +931,14 @@ gc_more:
*/
if (__get_victim(sbi, &segno, gc_type) ||
prefree_segments(sbi)) {
- write_checkpoint(sbi, &cpc);
+ ret = write_checkpoint(sbi, &cpc);
+ if (ret)
+ goto stop;
segno = NULL_SEGNO;
- } else if (has_not_enough_free_secs(sbi, 0)) {
- write_checkpoint(sbi, &cpc);
+ } else if (has_not_enough_free_secs(sbi, 0, 0)) {
+ ret = write_checkpoint(sbi, &cpc);
+ if (ret)
+ goto stop;
}
}
@@ -925,20 +946,19 @@ gc_more:
goto stop;
ret = 0;
- seg_freed = do_garbage_collect(sbi, segno, &gc_list, gc_type);
-
- if (gc_type == FG_GC && seg_freed == sbi->segs_per_sec)
+ if (do_garbage_collect(sbi, segno, &gc_list, gc_type) &&
+ gc_type == FG_GC)
sec_freed++;
if (gc_type == FG_GC)
sbi->cur_victim_sec = NULL_SEGNO;
if (!sync) {
- if (has_not_enough_free_secs(sbi, sec_freed))
+ if (has_not_enough_free_secs(sbi, sec_freed, 0))
goto gc_more;
if (gc_type == FG_GC)
- write_checkpoint(sbi, &cpc);
+ ret = write_checkpoint(sbi, &cpc);
}
stop:
mutex_unlock(&sbi->gc_mutex);
diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c
index ccea8735de59..34234d84a38b 100644
--- a/fs/f2fs/inline.c
+++ b/fs/f2fs/inline.c
@@ -424,7 +424,7 @@ static int f2fs_add_inline_entries(struct inode *dir,
ino = le32_to_cpu(de->ino);
fake_mode = get_de_type(de) << S_SHIFT;
- err = f2fs_add_regular_entry(dir, &new_name, NULL,
+ err = f2fs_add_regular_entry(dir, &new_name, NULL, NULL,
ino, fake_mode);
if (err)
goto punch_dentry_pages;
@@ -445,8 +445,8 @@ static int f2fs_move_rehashed_dirents(struct inode *dir, struct page *ipage,
struct f2fs_inline_dentry *backup_dentry;
int err;
- backup_dentry = f2fs_kmalloc(sizeof(struct f2fs_inline_dentry),
- GFP_F2FS_ZERO);
+ backup_dentry = f2fs_kmalloc(F2FS_I_SB(dir),
+ sizeof(struct f2fs_inline_dentry), GFP_F2FS_ZERO);
if (!backup_dentry) {
f2fs_put_page(ipage, 1);
return -ENOMEM;
@@ -488,17 +488,17 @@ static int f2fs_convert_inline_dir(struct inode *dir, struct page *ipage,
return f2fs_move_rehashed_dirents(dir, ipage, inline_dentry);
}
-int f2fs_add_inline_entry(struct inode *dir, const struct qstr *name,
- struct inode *inode, nid_t ino, umode_t mode)
+int f2fs_add_inline_entry(struct inode *dir, const struct qstr *new_name,
+ const struct qstr *orig_name,
+ struct inode *inode, nid_t ino, umode_t mode)
{
struct f2fs_sb_info *sbi = F2FS_I_SB(dir);
struct page *ipage;
unsigned int bit_pos;
f2fs_hash_t name_hash;
- size_t namelen = name->len;
struct f2fs_inline_dentry *dentry_blk = NULL;
struct f2fs_dentry_ptr d;
- int slots = GET_DENTRY_SLOTS(namelen);
+ int slots = GET_DENTRY_SLOTS(new_name->len);
struct page *page = NULL;
int err = 0;
@@ -519,18 +519,21 @@ int f2fs_add_inline_entry(struct inode *dir, const struct qstr *name,
if (inode) {
down_write(&F2FS_I(inode)->i_sem);
- page = init_inode_metadata(inode, dir, name, ipage);
+ page = init_inode_metadata(inode, dir, new_name,
+ orig_name, ipage);
if (IS_ERR(page)) {
err = PTR_ERR(page);
goto fail;
}
+ if (f2fs_encrypted_inode(dir))
+ file_set_enc_name(inode);
}
f2fs_wait_on_page_writeback(ipage, NODE, true);
- name_hash = f2fs_dentry_hash(name);
+ name_hash = f2fs_dentry_hash(new_name);
make_dentry_ptr(NULL, &d, (void *)dentry_blk, 2);
- f2fs_update_dentry(ino, mode, &d, name, name_hash, bit_pos);
+ f2fs_update_dentry(ino, mode, &d, new_name, name_hash, bit_pos);
set_page_dirty(ipage);
@@ -563,7 +566,7 @@ void f2fs_delete_inline_entry(struct f2fs_dir_entry *dentry, struct page *page,
inline_dentry = inline_data_addr(page);
bit_pos = dentry - inline_dentry->dentry;
for (i = 0; i < slots; i++)
- test_and_clear_bit_le(bit_pos + i,
+ __clear_bit_le(bit_pos + i,
&inline_dentry->dentry_bitmap);
set_page_dirty(page);
diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c
index 9ac5efc15347..d7369895a78a 100644
--- a/fs/f2fs/inode.c
+++ b/fs/f2fs/inode.c
@@ -11,6 +11,7 @@
#include <linux/fs.h>
#include <linux/f2fs_fs.h>
#include <linux/buffer_head.h>
+#include <linux/backing-dev.h>
#include <linux/writeback.h>
#include "f2fs.h"
@@ -234,6 +235,20 @@ bad_inode:
return ERR_PTR(ret);
}
+struct inode *f2fs_iget_retry(struct super_block *sb, unsigned long ino)
+{
+ struct inode *inode;
+retry:
+ inode = f2fs_iget(sb, ino);
+ if (IS_ERR(inode)) {
+ if (PTR_ERR(inode) == -ENOMEM) {
+ congestion_wait(BLK_RW_ASYNC, HZ/50);
+ goto retry;
+ }
+ }
+ return inode;
+}
+
int update_inode(struct inode *inode, struct page *node_page)
{
struct f2fs_inode *ri;
@@ -354,7 +369,7 @@ void f2fs_evict_inode(struct inode *inode)
goto no_delete;
#ifdef CONFIG_F2FS_FAULT_INJECTION
- if (time_to_inject(FAULT_EVICT_INODE))
+ if (time_to_inject(sbi, FAULT_EVICT_INODE))
goto no_delete;
#endif
diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c
index 1c481c9dc088..e80ed0302c22 100644
--- a/fs/f2fs/namei.c
+++ b/fs/f2fs/namei.c
@@ -91,18 +91,23 @@ static int is_multimedia_file(const unsigned char *s, const char *sub)
{
size_t slen = strlen(s);
size_t sublen = strlen(sub);
+ int i;
/*
* filename format of multimedia file should be defined as:
- * "filename + '.' + extension".
+ * "filename + '.' + extension + (optional: '.' + temp extension)".
*/
if (slen < sublen + 2)
return 0;
- if (s[slen - sublen - 1] != '.')
- return 0;
+ for (i = 1; i < slen - sublen; i++) {
+ if (s[i] != '.')
+ continue;
+ if (!strncasecmp(s + i + 1, sub, sublen))
+ return 1;
+ }
- return !strncasecmp(s + slen - sublen, sub, sublen);
+ return 0;
}
/*
@@ -449,7 +454,7 @@ static int f2fs_symlink(struct inode *dir, struct dentry *dentry,
ostr.name = sd->encrypted_path;
ostr.len = disk_link.len;
err = fscrypt_fname_usr_to_disk(inode, &istr, &ostr);
- if (err < 0)
+ if (err)
goto err_out;
sd->len = cpu_to_le16(ostr.len);
@@ -1010,7 +1015,6 @@ static const char *f2fs_encrypted_get_link(struct dentry *dentry,
struct fscrypt_str cstr = FSTR_INIT(NULL, 0);
struct fscrypt_str pstr = FSTR_INIT(NULL, 0);
struct fscrypt_symlink_data *sd;
- loff_t size = min_t(loff_t, i_size_read(inode), PAGE_SIZE - 1);
u32 max_size = inode->i_sb->s_blocksize;
int res;
@@ -1025,7 +1029,6 @@ static const char *f2fs_encrypted_get_link(struct dentry *dentry,
if (IS_ERR(cpage))
return ERR_CAST(cpage);
caddr = page_address(cpage);
- caddr[size] = 0;
/* Symlink is encrypted */
sd = (struct fscrypt_symlink_data *)caddr;
@@ -1048,7 +1051,7 @@ static const char *f2fs_encrypted_get_link(struct dentry *dentry,
goto errout;
res = fscrypt_fname_disk_to_usr(inode, 0, 0, &cstr, &pstr);
- if (res < 0)
+ if (res)
goto errout;
/* this is broken symlink case */
@@ -1060,7 +1063,7 @@ static const char *f2fs_encrypted_get_link(struct dentry *dentry,
paddr = pstr.name;
/* Null-terminate the name */
- paddr[res] = '\0';
+ paddr[pstr.len] = '\0';
put_page(cpage);
set_delayed_call(done, kfree_link, paddr);
diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c
index f75d197d5beb..01177ecdeab8 100644
--- a/fs/f2fs/node.c
+++ b/fs/f2fs/node.c
@@ -54,8 +54,6 @@ bool available_free_memory(struct f2fs_sb_info *sbi, int type)
res = mem_size < ((avail_ram * nm_i->ram_thresh / 100) >> 2);
if (excess_cached_nats(sbi))
res = false;
- if (nm_i->nat_cnt > DEF_NAT_CACHE_THRESHOLD)
- res = false;
} else if (type == DIRTY_DENTS) {
if (sbi->sb->s_bdi->wb.dirty_exceeded)
return false;
@@ -1314,6 +1312,7 @@ int fsync_node_pages(struct f2fs_sb_info *sbi, struct inode *inode,
struct page *last_page = NULL;
bool marked = false;
nid_t ino = inode->i_ino;
+ int nwritten = 0;
if (atomic) {
last_page = last_fsync_dnode(sbi, ino);
@@ -1387,7 +1386,10 @@ continue_unlock:
unlock_page(page);
f2fs_put_page(last_page, 0);
break;
+ } else {
+ nwritten++;
}
+
if (page == last_page) {
f2fs_put_page(page, 0);
marked = true;
@@ -1409,6 +1411,9 @@ continue_unlock:
unlock_page(last_page);
goto retry;
}
+
+ if (nwritten)
+ f2fs_submit_merged_bio_cond(sbi, NULL, NULL, ino, NODE, WRITE);
return ret ? -EIO: 0;
}
@@ -1418,6 +1423,7 @@ int sync_node_pages(struct f2fs_sb_info *sbi, struct writeback_control *wbc)
struct pagevec pvec;
int step = 0;
int nwritten = 0;
+ int ret = 0;
pagevec_init(&pvec, 0);
@@ -1438,7 +1444,8 @@ next_step:
if (unlikely(f2fs_cp_error(sbi))) {
pagevec_release(&pvec);
- return -EIO;
+ ret = -EIO;
+ goto out;
}
/*
@@ -1489,6 +1496,8 @@ continue_unlock:
if (NODE_MAPPING(sbi)->a_ops->writepage(page, wbc))
unlock_page(page);
+ else
+ nwritten++;
if (--wbc->nr_to_write == 0)
break;
@@ -1506,14 +1515,17 @@ continue_unlock:
step++;
goto next_step;
}
- return nwritten;
+out:
+ if (nwritten)
+ f2fs_submit_merged_bio(sbi, NODE, WRITE);
+ return ret;
}
int wait_on_node_pages_writeback(struct f2fs_sb_info *sbi, nid_t ino)
{
pgoff_t index = 0, end = ULONG_MAX;
struct pagevec pvec;
- int ret2 = 0, ret = 0;
+ int ret2, ret = 0;
pagevec_init(&pvec, 0);
@@ -1542,10 +1554,7 @@ int wait_on_node_pages_writeback(struct f2fs_sb_info *sbi, nid_t ino)
cond_resched();
}
- if (unlikely(test_and_clear_bit(AS_ENOSPC, &NODE_MAPPING(sbi)->flags)))
- ret2 = -ENOSPC;
- if (unlikely(test_and_clear_bit(AS_EIO, &NODE_MAPPING(sbi)->flags)))
- ret2 = -EIO;
+ ret2 = filemap_check_errors(NODE_MAPPING(sbi));
if (!ret)
ret = ret2;
return ret;
@@ -1672,6 +1681,9 @@ const struct address_space_operations f2fs_node_aops = {
.set_page_dirty = f2fs_set_node_page_dirty,
.invalidatepage = f2fs_invalidate_page,
.releasepage = f2fs_release_page,
+#ifdef CONFIG_MIGRATION
+ .migratepage = f2fs_migrate_page,
+#endif
};
static struct free_nid *__lookup_free_nid_list(struct f2fs_nm_info *nm_i,
@@ -1838,7 +1850,7 @@ bool alloc_nid(struct f2fs_sb_info *sbi, nid_t *nid)
struct free_nid *i = NULL;
retry:
#ifdef CONFIG_F2FS_FAULT_INJECTION
- if (time_to_inject(FAULT_ALLOC_NID))
+ if (time_to_inject(sbi, FAULT_ALLOC_NID))
return false;
#endif
if (unlikely(sbi->total_valid_node_count + 1 > nm_i->available_nids))
@@ -2015,10 +2027,12 @@ int recover_inode_page(struct f2fs_sb_info *sbi, struct page *page)
if (unlikely(old_ni.blk_addr != NULL_ADDR))
return -EINVAL;
-
+retry:
ipage = f2fs_grab_cache_page(NODE_MAPPING(sbi), ino, false);
- if (!ipage)
- return -ENOMEM;
+ if (!ipage) {
+ congestion_wait(BLK_RW_ASYNC, HZ/50);
+ goto retry;
+ }
/* Should not use this inode from free nid list */
remove_free_nid(NM_I(sbi), ino);
diff --git a/fs/f2fs/node.h b/fs/f2fs/node.h
index fc7684554b1a..868bec65e51c 100644
--- a/fs/f2fs/node.h
+++ b/fs/f2fs/node.h
@@ -229,6 +229,37 @@ static inline void set_to_next_nat(struct f2fs_nm_info *nm_i, nid_t start_nid)
f2fs_change_bit(block_off, nm_i->nat_bitmap);
}
+static inline nid_t ino_of_node(struct page *node_page)
+{
+ struct f2fs_node *rn = F2FS_NODE(node_page);
+ return le32_to_cpu(rn->footer.ino);
+}
+
+static inline nid_t nid_of_node(struct page *node_page)
+{
+ struct f2fs_node *rn = F2FS_NODE(node_page);
+ return le32_to_cpu(rn->footer.nid);
+}
+
+static inline unsigned int ofs_of_node(struct page *node_page)
+{
+ struct f2fs_node *rn = F2FS_NODE(node_page);
+ unsigned flag = le32_to_cpu(rn->footer.flag);
+ return flag >> OFFSET_BIT_SHIFT;
+}
+
+static inline __u64 cpver_of_node(struct page *node_page)
+{
+ struct f2fs_node *rn = F2FS_NODE(node_page);
+ return le64_to_cpu(rn->footer.cp_ver);
+}
+
+static inline block_t next_blkaddr_of_node(struct page *node_page)
+{
+ struct f2fs_node *rn = F2FS_NODE(node_page);
+ return le32_to_cpu(rn->footer.next_blkaddr);
+}
+
static inline void fill_node_footer(struct page *page, nid_t nid,
nid_t ino, unsigned int ofs, bool reset)
{
@@ -259,40 +290,30 @@ static inline void fill_node_footer_blkaddr(struct page *page, block_t blkaddr)
{
struct f2fs_checkpoint *ckpt = F2FS_CKPT(F2FS_P_SB(page));
struct f2fs_node *rn = F2FS_NODE(page);
+ size_t crc_offset = le32_to_cpu(ckpt->checksum_offset);
+ __u64 cp_ver = le64_to_cpu(ckpt->checkpoint_ver);
- rn->footer.cp_ver = ckpt->checkpoint_ver;
+ if (__is_set_ckpt_flags(ckpt, CP_CRC_RECOVERY_FLAG)) {
+ __u64 crc = le32_to_cpu(*((__le32 *)
+ ((unsigned char *)ckpt + crc_offset)));
+ cp_ver |= (crc << 32);
+ }
+ rn->footer.cp_ver = cpu_to_le64(cp_ver);
rn->footer.next_blkaddr = cpu_to_le32(blkaddr);
}
-static inline nid_t ino_of_node(struct page *node_page)
-{
- struct f2fs_node *rn = F2FS_NODE(node_page);
- return le32_to_cpu(rn->footer.ino);
-}
-
-static inline nid_t nid_of_node(struct page *node_page)
+static inline bool is_recoverable_dnode(struct page *page)
{
- struct f2fs_node *rn = F2FS_NODE(node_page);
- return le32_to_cpu(rn->footer.nid);
-}
-
-static inline unsigned int ofs_of_node(struct page *node_page)
-{
- struct f2fs_node *rn = F2FS_NODE(node_page);
- unsigned flag = le32_to_cpu(rn->footer.flag);
- return flag >> OFFSET_BIT_SHIFT;
-}
-
-static inline unsigned long long cpver_of_node(struct page *node_page)
-{
- struct f2fs_node *rn = F2FS_NODE(node_page);
- return le64_to_cpu(rn->footer.cp_ver);
-}
+ struct f2fs_checkpoint *ckpt = F2FS_CKPT(F2FS_P_SB(page));
+ size_t crc_offset = le32_to_cpu(ckpt->checksum_offset);
+ __u64 cp_ver = cur_cp_version(ckpt);
-static inline block_t next_blkaddr_of_node(struct page *node_page)
-{
- struct f2fs_node *rn = F2FS_NODE(node_page);
- return le32_to_cpu(rn->footer.next_blkaddr);
+ if (__is_set_ckpt_flags(ckpt, CP_CRC_RECOVERY_FLAG)) {
+ __u64 crc = le32_to_cpu(*((__le32 *)
+ ((unsigned char *)ckpt + crc_offset)));
+ cp_ver |= (crc << 32);
+ }
+ return cpu_to_le64(cp_ver) == cpver_of_node(page);
}
/*
diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c
index 9e652d5a659b..2fc84a991325 100644
--- a/fs/f2fs/recovery.c
+++ b/fs/f2fs/recovery.c
@@ -68,15 +68,17 @@ static struct fsync_inode_entry *get_fsync_inode(struct list_head *head,
return NULL;
}
-static struct fsync_inode_entry *add_fsync_inode(struct list_head *head,
- struct inode *inode)
+static struct fsync_inode_entry *add_fsync_inode(struct f2fs_sb_info *sbi,
+ struct list_head *head, nid_t ino)
{
+ struct inode *inode;
struct fsync_inode_entry *entry;
- entry = kmem_cache_alloc(fsync_entry_slab, GFP_F2FS_ZERO);
- if (!entry)
- return NULL;
+ inode = f2fs_iget_retry(sbi->sb, ino);
+ if (IS_ERR(inode))
+ return ERR_CAST(inode);
+ entry = f2fs_kmem_cache_alloc(fsync_entry_slab, GFP_F2FS_ZERO);
entry->inode = inode;
list_add_tail(&entry->list, head);
@@ -96,48 +98,41 @@ static int recover_dentry(struct inode *inode, struct page *ipage,
struct f2fs_inode *raw_inode = F2FS_INODE(ipage);
nid_t pino = le32_to_cpu(raw_inode->i_pino);
struct f2fs_dir_entry *de;
- struct qstr name;
+ struct fscrypt_name fname;
struct page *page;
struct inode *dir, *einode;
struct fsync_inode_entry *entry;
int err = 0;
+ char *name;
entry = get_fsync_inode(dir_list, pino);
if (!entry) {
- dir = f2fs_iget(inode->i_sb, pino);
- if (IS_ERR(dir)) {
- err = PTR_ERR(dir);
- goto out;
- }
-
- entry = add_fsync_inode(dir_list, dir);
- if (!entry) {
- err = -ENOMEM;
- iput(dir);
+ entry = add_fsync_inode(F2FS_I_SB(inode), dir_list, pino);
+ if (IS_ERR(entry)) {
+ dir = ERR_CAST(entry);
+ err = PTR_ERR(entry);
goto out;
}
}
dir = entry->inode;
- if (file_enc_name(inode))
- return 0;
+ memset(&fname, 0, sizeof(struct fscrypt_name));
+ fname.disk_name.len = le32_to_cpu(raw_inode->i_namelen);
+ fname.disk_name.name = raw_inode->i_name;
- name.len = le32_to_cpu(raw_inode->i_namelen);
- name.name = raw_inode->i_name;
-
- if (unlikely(name.len > F2FS_NAME_LEN)) {
+ if (unlikely(fname.disk_name.len > F2FS_NAME_LEN)) {
WARN_ON(1);
err = -ENAMETOOLONG;
goto out;
}
retry:
- de = f2fs_find_entry(dir, &name, &page);
+ de = __f2fs_find_entry(dir, &fname, &page);
if (de && inode->i_ino == le32_to_cpu(de->ino))
goto out_unmap_put;
if (de) {
- einode = f2fs_iget(inode->i_sb, le32_to_cpu(de->ino));
+ einode = f2fs_iget_retry(inode->i_sb, le32_to_cpu(de->ino));
if (IS_ERR(einode)) {
WARN_ON(1);
err = PTR_ERR(einode);
@@ -156,18 +151,24 @@ retry:
} else if (IS_ERR(page)) {
err = PTR_ERR(page);
} else {
- err = __f2fs_add_link(dir, &name, inode,
+ err = __f2fs_do_add_link(dir, &fname, inode,
inode->i_ino, inode->i_mode);
}
+ if (err == -ENOMEM)
+ goto retry;
goto out;
out_unmap_put:
f2fs_dentry_kunmap(dir, page);
f2fs_put_page(page, 0);
out:
+ if (file_enc_name(inode))
+ name = "<encrypted>";
+ else
+ name = raw_inode->i_name;
f2fs_msg(inode->i_sb, KERN_NOTICE,
"%s: ino = %x, name = %s, dir = %lx, err = %d",
- __func__, ino_of_node(ipage), raw_inode->i_name,
+ __func__, ino_of_node(ipage), name,
IS_ERR(dir) ? 0 : dir->i_ino, err);
return err;
}
@@ -223,9 +224,7 @@ static bool is_same_inode(struct inode *inode, struct page *ipage)
static int find_fsync_dnodes(struct f2fs_sb_info *sbi, struct list_head *head)
{
- unsigned long long cp_ver = cur_cp_version(F2FS_CKPT(sbi));
struct curseg_info *curseg;
- struct inode *inode;
struct page *page = NULL;
block_t blkaddr;
int err = 0;
@@ -242,7 +241,7 @@ static int find_fsync_dnodes(struct f2fs_sb_info *sbi, struct list_head *head)
page = get_tmp_page(sbi, blkaddr);
- if (cp_ver != cpver_of_node(page))
+ if (!is_recoverable_dnode(page))
break;
if (!is_fsync_dnode(page))
@@ -263,23 +262,15 @@ static int find_fsync_dnodes(struct f2fs_sb_info *sbi, struct list_head *head)
* CP | dnode(F) | inode(DF)
* For this case, we should not give up now.
*/
- inode = f2fs_iget(sbi->sb, ino_of_node(page));
- if (IS_ERR(inode)) {
- err = PTR_ERR(inode);
+ entry = add_fsync_inode(sbi, head, ino_of_node(page));
+ if (IS_ERR(entry)) {
+ err = PTR_ERR(entry);
if (err == -ENOENT) {
err = 0;
goto next;
}
break;
}
-
- /* add this fsync inode to the list */
- entry = add_fsync_inode(head, inode);
- if (!entry) {
- err = -ENOMEM;
- iput(inode);
- break;
- }
}
entry->blkaddr = blkaddr;
@@ -363,7 +354,7 @@ got_it:
if (ino != dn->inode->i_ino) {
/* Deallocate previous index in the node page */
- inode = f2fs_iget(sbi->sb, ino);
+ inode = f2fs_iget_retry(sbi->sb, ino);
if (IS_ERR(inode))
return PTR_ERR(inode);
} else {
@@ -431,10 +422,15 @@ static int do_recover_data(struct f2fs_sb_info *sbi, struct inode *inode,
end = start + ADDRS_PER_PAGE(page, inode);
set_new_dnode(&dn, inode, NULL, NULL, 0);
-
+retry_dn:
err = get_dnode_of_data(&dn, start, ALLOC_NODE);
- if (err)
+ if (err) {
+ if (err == -ENOMEM) {
+ congestion_wait(BLK_RW_ASYNC, HZ/50);
+ goto retry_dn;
+ }
goto out;
+ }
f2fs_wait_on_page_writeback(dn.node_page, NODE, true);
@@ -485,11 +481,16 @@ static int do_recover_data(struct f2fs_sb_info *sbi, struct inode *inode,
if (err)
goto err;
}
-
+retry_prev:
/* Check the previous node page having this index */
err = check_index_in_prev_nodes(sbi, dest, &dn);
- if (err)
+ if (err) {
+ if (err == -ENOMEM) {
+ congestion_wait(BLK_RW_ASYNC, HZ/50);
+ goto retry_prev;
+ }
goto err;
+ }
/* write dummy data page */
f2fs_replace_block(sbi, &dn, src, dest,
@@ -514,7 +515,6 @@ out:
static int recover_data(struct f2fs_sb_info *sbi, struct list_head *inode_list,
struct list_head *dir_list)
{
- unsigned long long cp_ver = cur_cp_version(F2FS_CKPT(sbi));
struct curseg_info *curseg;
struct page *page = NULL;
int err = 0;
@@ -534,7 +534,7 @@ static int recover_data(struct f2fs_sb_info *sbi, struct list_head *inode_list,
page = get_tmp_page(sbi, blkaddr);
- if (cp_ver != cpver_of_node(page)) {
+ if (!is_recoverable_dnode(page)) {
f2fs_put_page(page, 1);
break;
}
@@ -626,38 +626,20 @@ out:
}
clear_sbi_flag(sbi, SBI_POR_DOING);
- if (err) {
- bool invalidate = false;
-
- if (test_opt(sbi, LFS)) {
- update_meta_page(sbi, NULL, blkaddr);
- invalidate = true;
- } else if (discard_next_dnode(sbi, blkaddr)) {
- invalidate = true;
- }
-
- /* Flush all the NAT/SIT pages */
- while (get_pages(sbi, F2FS_DIRTY_META))
- sync_meta_pages(sbi, META, LONG_MAX);
+ if (err)
+ set_ckpt_flags(sbi, CP_ERROR_FLAG);
+ mutex_unlock(&sbi->cp_mutex);
- /* invalidate temporary meta page */
- if (invalidate)
- invalidate_mapping_pages(META_MAPPING(sbi),
- blkaddr, blkaddr);
+ /* let's drop all the directory inodes for clean checkpoint */
+ destroy_fsync_dnodes(&dir_list);
- set_ckpt_flags(sbi->ckpt, CP_ERROR_FLAG);
- mutex_unlock(&sbi->cp_mutex);
- } else if (need_writecp) {
+ if (!err && need_writecp) {
struct cp_control cpc = {
.reason = CP_RECOVERY,
};
- mutex_unlock(&sbi->cp_mutex);
err = write_checkpoint(sbi, &cpc);
- } else {
- mutex_unlock(&sbi->cp_mutex);
}
- destroy_fsync_dnodes(&dir_list);
kmem_cache_destroy(fsync_entry_slab);
return ret ? ret: err;
}
diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
index a46296f57b02..fc886f008449 100644
--- a/fs/f2fs/segment.c
+++ b/fs/f2fs/segment.c
@@ -26,6 +26,7 @@
#define __reverse_ffz(x) __reverse_ffs(~(x))
static struct kmem_cache *discard_entry_slab;
+static struct kmem_cache *bio_entry_slab;
static struct kmem_cache *sit_entry_set_slab;
static struct kmem_cache *inmem_entry_slab;
@@ -344,6 +345,11 @@ int commit_inmem_pages(struct inode *inode)
*/
void f2fs_balance_fs(struct f2fs_sb_info *sbi, bool need)
{
+#ifdef CONFIG_F2FS_FAULT_INJECTION
+ if (time_to_inject(sbi, FAULT_CHECKPOINT))
+ f2fs_stop_checkpoint(sbi, false);
+#endif
+
if (!need)
return;
@@ -355,7 +361,7 @@ void f2fs_balance_fs(struct f2fs_sb_info *sbi, bool need)
* We should do GC or end up with checkpoint, if there are so many dirty
* dir/node pages without enough free segments.
*/
- if (has_not_enough_free_secs(sbi, 0)) {
+ if (has_not_enough_free_secs(sbi, 0, 0)) {
mutex_lock(&sbi->gc_mutex);
f2fs_gc(sbi, false);
}
@@ -580,6 +586,74 @@ static void locate_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno)
mutex_unlock(&dirty_i->seglist_lock);
}
+static struct bio_entry *__add_bio_entry(struct f2fs_sb_info *sbi,
+ struct bio *bio)
+{
+ struct list_head *wait_list = &(SM_I(sbi)->wait_list);
+ struct bio_entry *be = f2fs_kmem_cache_alloc(bio_entry_slab, GFP_NOFS);
+
+ INIT_LIST_HEAD(&be->list);
+ be->bio = bio;
+ init_completion(&be->event);
+ list_add_tail(&be->list, wait_list);
+
+ return be;
+}
+
+void f2fs_wait_all_discard_bio(struct f2fs_sb_info *sbi)
+{
+ struct list_head *wait_list = &(SM_I(sbi)->wait_list);
+ struct bio_entry *be, *tmp;
+
+ list_for_each_entry_safe(be, tmp, wait_list, list) {
+ struct bio *bio = be->bio;
+ int err;
+
+ wait_for_completion_io(&be->event);
+ err = be->error;
+ if (err == -EOPNOTSUPP)
+ err = 0;
+
+ if (err)
+ f2fs_msg(sbi->sb, KERN_INFO,
+ "Issue discard failed, ret: %d", err);
+
+ bio_put(bio);
+ list_del(&be->list);
+ kmem_cache_free(bio_entry_slab, be);
+ }
+}
+
+static void f2fs_submit_bio_wait_endio(struct bio *bio)
+{
+ struct bio_entry *be = (struct bio_entry *)bio->bi_private;
+
+ be->error = bio->bi_error;
+ complete(&be->event);
+}
+
+/* this function is copied from blkdev_issue_discard from block/blk-lib.c */
+int __f2fs_issue_discard_async(struct f2fs_sb_info *sbi, sector_t sector,
+ sector_t nr_sects, gfp_t gfp_mask, unsigned long flags)
+{
+ struct block_device *bdev = sbi->sb->s_bdev;
+ struct bio *bio = NULL;
+ int err;
+
+ err = __blkdev_issue_discard(bdev, sector, nr_sects, gfp_mask, flags,
+ &bio);
+ if (!err && bio) {
+ struct bio_entry *be = __add_bio_entry(sbi, bio);
+
+ bio->bi_private = be;
+ bio->bi_end_io = f2fs_submit_bio_wait_endio;
+ bio->bi_opf |= REQ_SYNC;
+ submit_bio(bio);
+ }
+
+ return err;
+}
+
static int f2fs_issue_discard(struct f2fs_sb_info *sbi,
block_t blkstart, block_t blklen)
{
@@ -597,29 +671,7 @@ static int f2fs_issue_discard(struct f2fs_sb_info *sbi,
sbi->discard_blks--;
}
trace_f2fs_issue_discard(sbi->sb, blkstart, blklen);
- return blkdev_issue_discard(sbi->sb->s_bdev, start, len, GFP_NOFS, 0);
-}
-
-bool discard_next_dnode(struct f2fs_sb_info *sbi, block_t blkaddr)
-{
- int err = -EOPNOTSUPP;
-
- if (test_opt(sbi, DISCARD)) {
- struct seg_entry *se = get_seg_entry(sbi,
- GET_SEGNO(sbi, blkaddr));
- unsigned int offset = GET_BLKOFF_FROM_SEG0(sbi, blkaddr);
-
- if (f2fs_test_bit(offset, se->discard_map))
- return false;
-
- err = f2fs_issue_discard(sbi, blkaddr, 1);
- }
-
- if (err) {
- update_meta_page(sbi, NULL, blkaddr);
- return true;
- }
- return false;
+ return __f2fs_issue_discard_async(sbi, start, len, GFP_NOFS, 0);
}
static void __add_discard_entry(struct f2fs_sb_info *sbi,
@@ -660,7 +712,7 @@ static void add_discard_addrs(struct f2fs_sb_info *sbi, struct cp_control *cpc)
bool force = (cpc->reason == CP_DISCARD);
int i;
- if (se->valid_blocks == max_blocks)
+ if (se->valid_blocks == max_blocks || !f2fs_discard_en(sbi))
return;
if (!force) {
@@ -719,11 +771,14 @@ void clear_prefree_segments(struct f2fs_sb_info *sbi, struct cp_control *cpc)
struct list_head *head = &(SM_I(sbi)->discard_list);
struct discard_entry *entry, *this;
struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
+ struct blk_plug plug;
unsigned long *prefree_map = dirty_i->dirty_segmap[PRE];
unsigned int start = 0, end = -1;
unsigned int secno, start_segno;
bool force = (cpc->reason == CP_DISCARD);
+ blk_start_plug(&plug);
+
mutex_lock(&dirty_i->seglist_lock);
while (1) {
@@ -772,6 +827,8 @@ skip:
SM_I(sbi)->nr_discards -= entry->len;
kmem_cache_free(discard_entry_slab, entry);
}
+
+ blk_finish_plug(&plug);
}
static bool __mark_sit_entry_dirty(struct f2fs_sb_info *sbi, unsigned int segno)
@@ -818,12 +875,14 @@ static void update_sit_entry(struct f2fs_sb_info *sbi, block_t blkaddr, int del)
if (del > 0) {
if (f2fs_test_and_set_bit(offset, se->cur_valid_map))
f2fs_bug_on(sbi, 1);
- if (!f2fs_test_and_set_bit(offset, se->discard_map))
+ if (f2fs_discard_en(sbi) &&
+ !f2fs_test_and_set_bit(offset, se->discard_map))
sbi->discard_blks--;
} else {
if (!f2fs_test_and_clear_bit(offset, se->cur_valid_map))
f2fs_bug_on(sbi, 1);
- if (f2fs_test_and_clear_bit(offset, se->discard_map))
+ if (f2fs_discard_en(sbi) &&
+ f2fs_test_and_clear_bit(offset, se->discard_map))
sbi->discard_blks++;
}
if (!f2fs_test_bit(offset, se->ckpt_valid_map))
@@ -1202,7 +1261,7 @@ static int get_ssr_segment(struct f2fs_sb_info *sbi, int type)
struct curseg_info *curseg = CURSEG_I(sbi, type);
const struct victim_selection *v_ops = DIRTY_I(sbi)->v_ops;
- if (IS_NODESEG(type) || !has_not_enough_free_secs(sbi, 0))
+ if (IS_NODESEG(type) || !has_not_enough_free_secs(sbi, 0, 0))
return v_ops->get_victim(sbi,
&(curseg)->next_segno, BG_GC, type, SSR);
@@ -1277,6 +1336,12 @@ int f2fs_trim_fs(struct f2fs_sb_info *sbi, struct fstrim_range *range)
if (end <= MAIN_BLKADDR(sbi))
goto out;
+ if (is_sbi_flag_set(sbi, SBI_NEED_FSCK)) {
+ f2fs_msg(sbi->sb, KERN_WARNING,
+ "Found FS corruption, run fsck to fix.");
+ goto out;
+ }
+
/* start/end segment number in main_area */
start_segno = (start <= MAIN_BLKADDR(sbi)) ? 0 : GET_SEGNO(sbi, start);
end_segno = (end >= MAX_BLKADDR(sbi)) ? MAIN_SEGS(sbi) - 1 :
@@ -1301,6 +1366,10 @@ int f2fs_trim_fs(struct f2fs_sb_info *sbi, struct fstrim_range *range)
mutex_lock(&sbi->gc_mutex);
err = write_checkpoint(sbi, &cpc);
mutex_unlock(&sbi->gc_mutex);
+ if (err)
+ break;
+
+ schedule();
}
out:
range->len = F2FS_BLK_TO_BYTES(cpc.trimmed);
@@ -1391,7 +1460,7 @@ void allocate_data_block(struct f2fs_sb_info *sbi, struct page *page,
/* direct_io'ed data is aligned to the segment for better performance */
if (direct_io && curseg->next_blkoff &&
- !has_not_enough_free_secs(sbi, 0))
+ !has_not_enough_free_secs(sbi, 0, 0))
__allocate_new_segments(sbi, type);
*new_blkaddr = NEXT_FREE_BLKADDR(sbi, curseg);
@@ -1589,11 +1658,9 @@ void f2fs_wait_on_encrypted_page_writeback(struct f2fs_sb_info *sbi,
{
struct page *cpage;
- if (blkaddr == NEW_ADDR)
+ if (blkaddr == NEW_ADDR || blkaddr == NULL_ADDR)
return;
- f2fs_bug_on(sbi, blkaddr == NULL_ADDR);
-
cpage = find_lock_page(META_MAPPING(sbi), blkaddr);
if (cpage) {
f2fs_wait_on_page_writeback(cpage, DATA, true);
@@ -1739,7 +1806,7 @@ static int restore_curseg_summaries(struct f2fs_sb_info *sbi)
int type = CURSEG_HOT_DATA;
int err;
- if (is_set_ckpt_flags(F2FS_CKPT(sbi), CP_COMPACT_SUM_FLAG)) {
+ if (is_set_ckpt_flags(sbi, CP_COMPACT_SUM_FLAG)) {
int npages = npages_for_summary_flush(sbi, true);
if (npages >= 2)
@@ -1836,7 +1903,7 @@ static void write_normal_summaries(struct f2fs_sb_info *sbi,
void write_data_summaries(struct f2fs_sb_info *sbi, block_t start_blk)
{
- if (is_set_ckpt_flags(F2FS_CKPT(sbi), CP_COMPACT_SUM_FLAG))
+ if (is_set_ckpt_flags(sbi, CP_COMPACT_SUM_FLAG))
write_compacted_summaries(sbi, start_blk);
else
write_normal_summaries(sbi, start_blk, CURSEG_HOT_DATA);
@@ -2127,12 +2194,16 @@ static int build_sit_info(struct f2fs_sb_info *sbi)
= kzalloc(SIT_VBLOCK_MAP_SIZE, GFP_KERNEL);
sit_i->sentries[start].ckpt_valid_map
= kzalloc(SIT_VBLOCK_MAP_SIZE, GFP_KERNEL);
- sit_i->sentries[start].discard_map
- = kzalloc(SIT_VBLOCK_MAP_SIZE, GFP_KERNEL);
if (!sit_i->sentries[start].cur_valid_map ||
- !sit_i->sentries[start].ckpt_valid_map ||
- !sit_i->sentries[start].discard_map)
+ !sit_i->sentries[start].ckpt_valid_map)
return -ENOMEM;
+
+ if (f2fs_discard_en(sbi)) {
+ sit_i->sentries[start].discard_map
+ = kzalloc(SIT_VBLOCK_MAP_SIZE, GFP_KERNEL);
+ if (!sit_i->sentries[start].discard_map)
+ return -ENOMEM;
+ }
}
sit_i->tmp_map = kzalloc(SIT_VBLOCK_MAP_SIZE, GFP_KERNEL);
@@ -2239,6 +2310,8 @@ static void build_sit_entries(struct f2fs_sb_info *sbi)
struct sit_info *sit_i = SIT_I(sbi);
struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_COLD_DATA);
struct f2fs_journal *journal = curseg->journal;
+ struct seg_entry *se;
+ struct f2fs_sit_entry sit;
int sit_blk_cnt = SIT_BLK_CNT(sbi);
unsigned int i, start, end;
unsigned int readed, start_blk = 0;
@@ -2251,41 +2324,58 @@ static void build_sit_entries(struct f2fs_sb_info *sbi)
end = (start_blk + readed) * sit_i->sents_per_block;
for (; start < end && start < MAIN_SEGS(sbi); start++) {
- struct seg_entry *se = &sit_i->sentries[start];
struct f2fs_sit_block *sit_blk;
- struct f2fs_sit_entry sit;
struct page *page;
- down_read(&curseg->journal_rwsem);
- for (i = 0; i < sits_in_cursum(journal); i++) {
- if (le32_to_cpu(segno_in_journal(journal, i))
- == start) {
- sit = sit_in_journal(journal, i);
- up_read(&curseg->journal_rwsem);
- goto got_it;
- }
- }
- up_read(&curseg->journal_rwsem);
-
+ se = &sit_i->sentries[start];
page = get_current_sit_page(sbi, start);
sit_blk = (struct f2fs_sit_block *)page_address(page);
sit = sit_blk->entries[SIT_ENTRY_OFFSET(sit_i, start)];
f2fs_put_page(page, 1);
-got_it:
+
check_block_count(sbi, start, &sit);
seg_info_from_raw_sit(se, &sit);
/* build discard map only one time */
- memcpy(se->discard_map, se->cur_valid_map, SIT_VBLOCK_MAP_SIZE);
- sbi->discard_blks += sbi->blocks_per_seg - se->valid_blocks;
-
- if (sbi->segs_per_sec > 1) {
- struct sec_entry *e = get_sec_entry(sbi, start);
- e->valid_blocks += se->valid_blocks;
+ if (f2fs_discard_en(sbi)) {
+ memcpy(se->discard_map, se->cur_valid_map,
+ SIT_VBLOCK_MAP_SIZE);
+ sbi->discard_blks += sbi->blocks_per_seg -
+ se->valid_blocks;
}
+
+ if (sbi->segs_per_sec > 1)
+ get_sec_entry(sbi, start)->valid_blocks +=
+ se->valid_blocks;
}
start_blk += readed;
} while (start_blk < sit_blk_cnt);
+
+ down_read(&curseg->journal_rwsem);
+ for (i = 0; i < sits_in_cursum(journal); i++) {
+ unsigned int old_valid_blocks;
+
+ start = le32_to_cpu(segno_in_journal(journal, i));
+ se = &sit_i->sentries[start];
+ sit = sit_in_journal(journal, i);
+
+ old_valid_blocks = se->valid_blocks;
+
+ check_block_count(sbi, start, &sit);
+ seg_info_from_raw_sit(se, &sit);
+
+ if (f2fs_discard_en(sbi)) {
+ memcpy(se->discard_map, se->cur_valid_map,
+ SIT_VBLOCK_MAP_SIZE);
+ sbi->discard_blks += old_valid_blocks -
+ se->valid_blocks;
+ }
+
+ if (sbi->segs_per_sec > 1)
+ get_sec_entry(sbi, start)->valid_blocks +=
+ se->valid_blocks - old_valid_blocks;
+ }
+ up_read(&curseg->journal_rwsem);
}
static void init_free_segmap(struct f2fs_sb_info *sbi)
@@ -2427,6 +2517,7 @@ int build_segment_manager(struct f2fs_sb_info *sbi)
sm_info->min_fsync_blocks = DEF_MIN_FSYNC_BLOCKS;
INIT_LIST_HEAD(&sm_info->discard_list);
+ INIT_LIST_HEAD(&sm_info->wait_list);
sm_info->nr_discards = 0;
sm_info->max_discards = 0;
@@ -2570,10 +2661,15 @@ int __init create_segment_manager_caches(void)
if (!discard_entry_slab)
goto fail;
+ bio_entry_slab = f2fs_kmem_cache_create("bio_entry",
+ sizeof(struct bio_entry));
+ if (!bio_entry_slab)
+ goto destroy_discard_entry;
+
sit_entry_set_slab = f2fs_kmem_cache_create("sit_entry_set",
sizeof(struct sit_entry_set));
if (!sit_entry_set_slab)
- goto destory_discard_entry;
+ goto destroy_bio_entry;
inmem_entry_slab = f2fs_kmem_cache_create("inmem_page_entry",
sizeof(struct inmem_pages));
@@ -2583,7 +2679,9 @@ int __init create_segment_manager_caches(void)
destroy_sit_entry_set:
kmem_cache_destroy(sit_entry_set_slab);
-destory_discard_entry:
+destroy_bio_entry:
+ kmem_cache_destroy(bio_entry_slab);
+destroy_discard_entry:
kmem_cache_destroy(discard_entry_slab);
fail:
return -ENOMEM;
@@ -2592,6 +2690,7 @@ fail:
void destroy_segment_manager_caches(void)
{
kmem_cache_destroy(sit_entry_set_slab);
+ kmem_cache_destroy(bio_entry_slab);
kmem_cache_destroy(discard_entry_slab);
kmem_cache_destroy(inmem_entry_slab);
}
diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h
index b33f73ec60a4..fecb856ad874 100644
--- a/fs/f2fs/segment.h
+++ b/fs/f2fs/segment.h
@@ -479,7 +479,8 @@ static inline bool need_SSR(struct f2fs_sb_info *sbi)
reserved_sections(sbi) + 1);
}
-static inline bool has_not_enough_free_secs(struct f2fs_sb_info *sbi, int freed)
+static inline bool has_not_enough_free_secs(struct f2fs_sb_info *sbi,
+ int freed, int needed)
{
int node_secs = get_blocktype_secs(sbi, F2FS_DIRTY_NODES);
int dent_secs = get_blocktype_secs(sbi, F2FS_DIRTY_DENTS);
@@ -489,8 +490,8 @@ static inline bool has_not_enough_free_secs(struct f2fs_sb_info *sbi, int freed)
if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING)))
return false;
- return (free_sections(sbi) + freed) <= (node_secs + 2 * dent_secs +
- reserved_sections(sbi));
+ return (free_sections(sbi) + freed) <=
+ (node_secs + 2 * dent_secs + reserved_sections(sbi) + needed);
}
static inline bool excess_prefree_segs(struct f2fs_sb_info *sbi)
@@ -587,8 +588,8 @@ static inline void check_seg_range(struct f2fs_sb_info *sbi, unsigned int segno)
static inline void verify_block_addr(struct f2fs_sb_info *sbi, block_t blk_addr)
{
- f2fs_bug_on(sbi, blk_addr < SEG0_BLKADDR(sbi)
- || blk_addr >= MAX_BLKADDR(sbi));
+ BUG_ON(blk_addr < SEG0_BLKADDR(sbi)
+ || blk_addr >= MAX_BLKADDR(sbi));
}
/*
diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index 7f863a645ab1..6132b4ce4e4c 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -40,7 +40,6 @@ static struct kmem_cache *f2fs_inode_cachep;
static struct kset *f2fs_kset;
#ifdef CONFIG_F2FS_FAULT_INJECTION
-struct f2fs_fault_info f2fs_fault;
char *fault_name[FAULT_MAX] = {
[FAULT_KMALLOC] = "kmalloc",
@@ -50,16 +49,21 @@ char *fault_name[FAULT_MAX] = {
[FAULT_BLOCK] = "no more block",
[FAULT_DIR_DEPTH] = "too big dir depth",
[FAULT_EVICT_INODE] = "evict_inode fail",
+ [FAULT_IO] = "IO error",
+ [FAULT_CHECKPOINT] = "checkpoint error",
};
-static void f2fs_build_fault_attr(unsigned int rate)
+static void f2fs_build_fault_attr(struct f2fs_sb_info *sbi,
+ unsigned int rate)
{
+ struct f2fs_fault_info *ffi = &sbi->fault_info;
+
if (rate) {
- atomic_set(&f2fs_fault.inject_ops, 0);
- f2fs_fault.inject_rate = rate;
- f2fs_fault.inject_type = (1 << FAULT_MAX) - 1;
+ atomic_set(&ffi->inject_ops, 0);
+ ffi->inject_rate = rate;
+ ffi->inject_type = (1 << FAULT_MAX) - 1;
} else {
- memset(&f2fs_fault, 0, sizeof(struct f2fs_fault_info));
+ memset(ffi, 0, sizeof(struct f2fs_fault_info));
}
}
#endif
@@ -87,6 +91,7 @@ enum {
Opt_inline_xattr,
Opt_inline_data,
Opt_inline_dentry,
+ Opt_noinline_dentry,
Opt_flush_merge,
Opt_noflush_merge,
Opt_nobarrier,
@@ -118,6 +123,7 @@ static match_table_t f2fs_tokens = {
{Opt_inline_xattr, "inline_xattr"},
{Opt_inline_data, "inline_data"},
{Opt_inline_dentry, "inline_dentry"},
+ {Opt_noinline_dentry, "noinline_dentry"},
{Opt_flush_merge, "flush_merge"},
{Opt_noflush_merge, "noflush_merge"},
{Opt_nobarrier, "nobarrier"},
@@ -167,7 +173,7 @@ static unsigned char *__struct_ptr(struct f2fs_sb_info *sbi, int struct_type)
#ifdef CONFIG_F2FS_FAULT_INJECTION
else if (struct_type == FAULT_INFO_RATE ||
struct_type == FAULT_INFO_TYPE)
- return (unsigned char *)&f2fs_fault;
+ return (unsigned char *)&sbi->fault_info;
#endif
return NULL;
}
@@ -312,6 +318,10 @@ static struct attribute *f2fs_attrs[] = {
ATTR_LIST(dirty_nats_ratio),
ATTR_LIST(cp_interval),
ATTR_LIST(idle_interval),
+#ifdef CONFIG_F2FS_FAULT_INJECTION
+ ATTR_LIST(inject_rate),
+ ATTR_LIST(inject_type),
+#endif
ATTR_LIST(lifetime_write_kbytes),
NULL,
};
@@ -327,22 +337,6 @@ static struct kobj_type f2fs_ktype = {
.release = f2fs_sb_release,
};
-#ifdef CONFIG_F2FS_FAULT_INJECTION
-/* sysfs for f2fs fault injection */
-static struct kobject f2fs_fault_inject;
-
-static struct attribute *f2fs_fault_attrs[] = {
- ATTR_LIST(inject_rate),
- ATTR_LIST(inject_type),
- NULL
-};
-
-static struct kobj_type f2fs_fault_ktype = {
- .default_attrs = f2fs_fault_attrs,
- .sysfs_ops = &f2fs_attr_ops,
-};
-#endif
-
void f2fs_msg(struct super_block *sb, const char *level, const char *fmt, ...)
{
struct va_format vaf;
@@ -370,10 +364,6 @@ static int parse_options(struct super_block *sb, char *options)
char *p, *name;
int arg = 0;
-#ifdef CONFIG_F2FS_FAULT_INJECTION
- f2fs_build_fault_attr(0);
-#endif
-
if (!options)
return 0;
@@ -488,6 +478,9 @@ static int parse_options(struct super_block *sb, char *options)
case Opt_inline_dentry:
set_opt(sbi, INLINE_DENTRY);
break;
+ case Opt_noinline_dentry:
+ clear_opt(sbi, INLINE_DENTRY);
+ break;
case Opt_flush_merge:
set_opt(sbi, FLUSH_MERGE);
break;
@@ -533,7 +526,7 @@ static int parse_options(struct super_block *sb, char *options)
if (args->from && match_int(args, &arg))
return -EINVAL;
#ifdef CONFIG_F2FS_FAULT_INJECTION
- f2fs_build_fault_attr(arg);
+ f2fs_build_fault_attr(sbi, arg);
#else
f2fs_msg(sb, KERN_INFO,
"FAULT_INJECTION was not selected");
@@ -730,7 +723,7 @@ static void f2fs_put_super(struct super_block *sb)
* clean checkpoint again.
*/
if (is_sbi_flag_set(sbi, SBI_IS_DIRTY) ||
- !is_set_ckpt_flags(F2FS_CKPT(sbi), CP_UMOUNT_FLAG)) {
+ !is_set_ckpt_flags(sbi, CP_UMOUNT_FLAG)) {
struct cp_control cpc = {
.reason = CP_UMOUNT,
};
@@ -878,6 +871,8 @@ static int f2fs_show_options(struct seq_file *seq, struct dentry *root)
seq_puts(seq, ",noinline_data");
if (test_opt(sbi, INLINE_DENTRY))
seq_puts(seq, ",inline_dentry");
+ else
+ seq_puts(seq, ",noinline_dentry");
if (!f2fs_readonly(sbi->sb) && test_opt(sbi, FLUSH_MERGE))
seq_puts(seq, ",flush_merge");
if (test_opt(sbi, NOBARRIER))
@@ -946,7 +941,7 @@ static int segment_bits_seq_show(struct seq_file *seq, void *offset)
seq_printf(seq, "%d|%-3u|", se->type,
get_valid_blocks(sbi, i, 1));
for (j = 0; j < SIT_VBLOCK_MAP_SIZE; j++)
- seq_printf(seq, "%x ", se->cur_valid_map[j]);
+ seq_printf(seq, " %.2x", se->cur_valid_map[j]);
seq_putc(seq, '\n');
}
return 0;
@@ -975,6 +970,7 @@ static void default_options(struct f2fs_sb_info *sbi)
set_opt(sbi, BG_GC);
set_opt(sbi, INLINE_DATA);
+ set_opt(sbi, INLINE_DENTRY);
set_opt(sbi, EXTENT_CACHE);
sbi->sb->s_flags |= MS_LAZYTIME;
set_opt(sbi, FLUSH_MERGE);
@@ -991,6 +987,10 @@ static void default_options(struct f2fs_sb_info *sbi)
#ifdef CONFIG_F2FS_FS_POSIX_ACL
set_opt(sbi, POSIX_ACL);
#endif
+
+#ifdef CONFIG_F2FS_FAULT_INJECTION
+ f2fs_build_fault_attr(sbi, 0);
+#endif
}
static int f2fs_remount(struct super_block *sb, int *flags, char *data)
@@ -1001,6 +1001,9 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data)
bool need_restart_gc = false;
bool need_stop_gc = false;
bool no_extent_cache = !test_opt(sbi, EXTENT_CACHE);
+#ifdef CONFIG_F2FS_FAULT_INJECTION
+ struct f2fs_fault_info ffi = sbi->fault_info;
+#endif
/*
* Save the old mount options in case we
@@ -1096,6 +1099,9 @@ restore_gc:
restore_opts:
sbi->mount_opt = org_mount_opt;
sbi->active_logs = active_logs;
+#ifdef CONFIG_F2FS_FAULT_INJECTION
+ sbi->fault_info = ffi;
+#endif
return err;
}
@@ -1469,6 +1475,7 @@ static void init_sb_info(struct f2fs_sb_info *sbi)
mutex_init(&sbi->umount_mutex);
mutex_init(&sbi->wio_mutex[NODE]);
mutex_init(&sbi->wio_mutex[DATA]);
+ spin_lock_init(&sbi->cp_lock);
#ifdef CONFIG_F2FS_FS_ENCRYPTION
memcpy(sbi->key_prefix, F2FS_KEY_DESC_PREFIX,
@@ -1810,7 +1817,7 @@ try_onemore:
* previous checkpoint was not done by clean system shutdown.
*/
if (bdev_read_only(sb->s_bdev) &&
- !is_set_ckpt_flags(sbi->ckpt, CP_UMOUNT_FLAG)) {
+ !is_set_ckpt_flags(sbi, CP_UMOUNT_FLAG)) {
err = -EROFS;
goto free_kobj;
}
@@ -1818,6 +1825,9 @@ try_onemore:
if (need_fsck)
set_sbi_flag(sbi, SBI_NEED_FSCK);
+ if (!retry)
+ goto skip_recovery;
+
err = recover_fsync_data(sbi, false);
if (err < 0) {
need_fsck = true;
@@ -1835,7 +1845,7 @@ try_onemore:
goto free_kobj;
}
}
-
+skip_recovery:
/* recover_fsync_data() cleared this already */
clear_sbi_flag(sbi, SBI_POR_DOING);
@@ -1879,7 +1889,9 @@ free_root_inode:
dput(sb->s_root);
sb->s_root = NULL;
free_node_inode:
+ truncate_inode_pages_final(NODE_MAPPING(sbi));
mutex_lock(&sbi->umount_mutex);
+ release_ino_entry(sbi, true);
f2fs_leave_shrinker(sbi);
iput(sbi->node_inode);
mutex_unlock(&sbi->umount_mutex);
@@ -1978,16 +1990,6 @@ static int __init init_f2fs_fs(void)
err = -ENOMEM;
goto free_extent_cache;
}
-#ifdef CONFIG_F2FS_FAULT_INJECTION
- f2fs_fault_inject.kset = f2fs_kset;
- f2fs_build_fault_attr(0);
- err = kobject_init_and_add(&f2fs_fault_inject, &f2fs_fault_ktype,
- NULL, "fault_injection");
- if (err) {
- f2fs_fault_inject.kset = NULL;
- goto free_kset;
- }
-#endif
err = register_shrinker(&f2fs_shrinker_info);
if (err)
goto free_kset;
@@ -2006,10 +2008,6 @@ free_filesystem:
free_shrinker:
unregister_shrinker(&f2fs_shrinker_info);
free_kset:
-#ifdef CONFIG_F2FS_FAULT_INJECTION
- if (f2fs_fault_inject.kset)
- kobject_put(&f2fs_fault_inject);
-#endif
kset_unregister(f2fs_kset);
free_extent_cache:
destroy_extent_cache();
@@ -2031,9 +2029,6 @@ static void __exit exit_f2fs_fs(void)
f2fs_destroy_root_stats();
unregister_filesystem(&f2fs_fs_type);
unregister_shrinker(&f2fs_shrinker_info);
-#ifdef CONFIG_F2FS_FAULT_INJECTION
- kobject_put(&f2fs_fault_inject);
-#endif
kset_unregister(f2fs_kset);
destroy_extent_cache();
destroy_checkpoint_caches();
diff --git a/fs/f2fs/xattr.c b/fs/f2fs/xattr.c
index c8898b5148eb..1f74876233b6 100644
--- a/fs/f2fs/xattr.c
+++ b/fs/f2fs/xattr.c
@@ -217,18 +217,20 @@ static struct f2fs_xattr_entry *__find_xattr(void *base_addr, int index,
return entry;
}
-static void *read_all_xattrs(struct inode *inode, struct page *ipage)
+static int read_all_xattrs(struct inode *inode, struct page *ipage,
+ void **base_addr)
{
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
struct f2fs_xattr_header *header;
size_t size = PAGE_SIZE, inline_size = 0;
void *txattr_addr;
+ int err;
inline_size = inline_xattr_size(inode);
txattr_addr = kzalloc(inline_size + size, GFP_F2FS_ZERO);
if (!txattr_addr)
- return NULL;
+ return -ENOMEM;
/* read from inline xattr */
if (inline_size) {
@@ -239,8 +241,10 @@ static void *read_all_xattrs(struct inode *inode, struct page *ipage)
inline_addr = inline_xattr_addr(ipage);
} else {
page = get_node_page(sbi, inode->i_ino);
- if (IS_ERR(page))
+ if (IS_ERR(page)) {
+ err = PTR_ERR(page);
goto fail;
+ }
inline_addr = inline_xattr_addr(page);
}
memcpy(txattr_addr, inline_addr, inline_size);
@@ -254,8 +258,10 @@ static void *read_all_xattrs(struct inode *inode, struct page *ipage)
/* The inode already has an extended attribute block. */
xpage = get_node_page(sbi, F2FS_I(inode)->i_xattr_nid);
- if (IS_ERR(xpage))
+ if (IS_ERR(xpage)) {
+ err = PTR_ERR(xpage);
goto fail;
+ }
xattr_addr = page_address(xpage);
memcpy(txattr_addr + inline_size, xattr_addr, PAGE_SIZE);
@@ -269,10 +275,11 @@ static void *read_all_xattrs(struct inode *inode, struct page *ipage)
header->h_magic = cpu_to_le32(F2FS_XATTR_MAGIC);
header->h_refcount = cpu_to_le32(1);
}
- return txattr_addr;
+ *base_addr = txattr_addr;
+ return 0;
fail:
kzfree(txattr_addr);
- return NULL;
+ return err;
}
static inline int write_all_xattrs(struct inode *inode, __u32 hsize,
@@ -366,9 +373,9 @@ int f2fs_getxattr(struct inode *inode, int index, const char *name,
if (len > F2FS_NAME_LEN)
return -ERANGE;
- base_addr = read_all_xattrs(inode, ipage);
- if (!base_addr)
- return -ENOMEM;
+ error = read_all_xattrs(inode, ipage, &base_addr);
+ if (error)
+ return error;
entry = __find_xattr(base_addr, index, len, name);
if (IS_XATTR_LAST_ENTRY(entry)) {
@@ -402,9 +409,9 @@ ssize_t f2fs_listxattr(struct dentry *dentry, char *buffer, size_t buffer_size)
int error = 0;
size_t rest = buffer_size;
- base_addr = read_all_xattrs(inode, NULL);
- if (!base_addr)
- return -ENOMEM;
+ error = read_all_xattrs(inode, NULL, &base_addr);
+ if (error)
+ return error;
list_for_each_xattr(entry, base_addr) {
const struct xattr_handler *handler =
@@ -463,9 +470,9 @@ static int __f2fs_setxattr(struct inode *inode, int index,
if (size > MAX_VALUE_LEN(inode))
return -E2BIG;
- base_addr = read_all_xattrs(inode, ipage);
- if (!base_addr)
- return -ENOMEM;
+ error = read_all_xattrs(inode, ipage, &base_addr);
+ if (error)
+ return error;
/* find entry with wanted name. */
here = __find_xattr(base_addr, index, len, name);
@@ -548,6 +555,8 @@ static int __f2fs_setxattr(struct inode *inode, int index,
!strcmp(name, F2FS_XATTR_NAME_ENCRYPTION_CONTEXT))
f2fs_set_encrypted_inode(inode);
f2fs_mark_inode_dirty_sync(inode);
+ if (!error && S_ISDIR(inode->i_mode))
+ set_sbi_flag(F2FS_I_SB(inode), SBI_NEED_CP);
exit:
kzfree(base_addr);
return error;
diff --git a/fs/fat/file.c b/fs/fat/file.c
index f70185668832..c09ab4e108e5 100644
--- a/fs/fat/file.c
+++ b/fs/fat/file.c
@@ -450,7 +450,7 @@ int fat_setattr(struct dentry *dentry, struct iattr *attr)
attr->ia_valid &= ~TIMES_SET_FLAGS;
}
- error = inode_change_ok(inode, attr);
+ error = setattr_prepare(dentry, attr);
attr->ia_valid = ia_valid;
if (error) {
if (sbi->options.quiet)
diff --git a/fs/fat/namei_vfat.c b/fs/fat/namei_vfat.c
index 92b7363dafa9..4afdc3f36470 100644
--- a/fs/fat/namei_vfat.c
+++ b/fs/fat/namei_vfat.c
@@ -21,6 +21,17 @@
#include <linux/namei.h>
#include "fat.h"
+static inline unsigned long vfat_d_version(struct dentry *dentry)
+{
+ return (unsigned long) dentry->d_fsdata;
+}
+
+static inline void vfat_d_version_set(struct dentry *dentry,
+ unsigned long version)
+{
+ dentry->d_fsdata = (void *) version;
+}
+
/*
* If new entry was created in the parent, it could create the 8.3
* alias (the shortname of logname). So, the parent may have the
@@ -33,7 +44,7 @@ static int vfat_revalidate_shortname(struct dentry *dentry)
{
int ret = 1;
spin_lock(&dentry->d_lock);
- if (dentry->d_time != d_inode(dentry->d_parent)->i_version)
+ if (vfat_d_version(dentry) != d_inode(dentry->d_parent)->i_version)
ret = 0;
spin_unlock(&dentry->d_lock);
return ret;
@@ -759,7 +770,7 @@ static struct dentry *vfat_lookup(struct inode *dir, struct dentry *dentry,
out:
mutex_unlock(&MSDOS_SB(sb)->s_lock);
if (!inode)
- dentry->d_time = dir->i_version;
+ vfat_d_version_set(dentry, dir->i_version);
return d_splice_alias(inode, dentry);
error:
mutex_unlock(&MSDOS_SB(sb)->s_lock);
@@ -823,7 +834,7 @@ static int vfat_rmdir(struct inode *dir, struct dentry *dentry)
clear_nlink(inode);
inode->i_mtime = inode->i_atime = CURRENT_TIME_SEC;
fat_detach(inode);
- dentry->d_time = dir->i_version;
+ vfat_d_version_set(dentry, dir->i_version);
out:
mutex_unlock(&MSDOS_SB(sb)->s_lock);
@@ -849,7 +860,7 @@ static int vfat_unlink(struct inode *dir, struct dentry *dentry)
clear_nlink(inode);
inode->i_mtime = inode->i_atime = CURRENT_TIME_SEC;
fat_detach(inode);
- dentry->d_time = dir->i_version;
+ vfat_d_version_set(dentry, dir->i_version);
out:
mutex_unlock(&MSDOS_SB(sb)->s_lock);
diff --git a/fs/file.c b/fs/file.c
index 6b1acdfe59da..69d6990e3021 100644
--- a/fs/file.c
+++ b/fs/file.c
@@ -23,12 +23,12 @@
#include <linux/rcupdate.h>
#include <linux/workqueue.h>
-int sysctl_nr_open __read_mostly = 1024*1024;
-int sysctl_nr_open_min = BITS_PER_LONG;
+unsigned int sysctl_nr_open __read_mostly = 1024*1024;
+unsigned int sysctl_nr_open_min = BITS_PER_LONG;
/* our min() is unusable in constant expressions ;-/ */
#define __const_min(x, y) ((x) < (y) ? (x) : (y))
-int sysctl_nr_open_max = __const_min(INT_MAX, ~(size_t)0/sizeof(void *)) &
- -BITS_PER_LONG;
+unsigned int sysctl_nr_open_max =
+ __const_min(INT_MAX, ~(size_t)0/sizeof(void *)) & -BITS_PER_LONG;
static void *alloc_fdmem(size_t size)
{
@@ -163,7 +163,7 @@ out:
* Return <0 error code on error; 1 on successful completion.
* The files->file_lock should be held on entry, and will be held on exit.
*/
-static int expand_fdtable(struct files_struct *files, int nr)
+static int expand_fdtable(struct files_struct *files, unsigned int nr)
__releases(files->file_lock)
__acquires(files->file_lock)
{
@@ -208,7 +208,7 @@ static int expand_fdtable(struct files_struct *files, int nr)
* expanded and execution may have blocked.
* The files->file_lock should be held on entry, and will be held on exit.
*/
-static int expand_files(struct files_struct *files, int nr)
+static int expand_files(struct files_struct *files, unsigned int nr)
__releases(files->file_lock)
__acquires(files->file_lock)
{
@@ -243,12 +243,12 @@ repeat:
return expanded;
}
-static inline void __set_close_on_exec(int fd, struct fdtable *fdt)
+static inline void __set_close_on_exec(unsigned int fd, struct fdtable *fdt)
{
__set_bit(fd, fdt->close_on_exec);
}
-static inline void __clear_close_on_exec(int fd, struct fdtable *fdt)
+static inline void __clear_close_on_exec(unsigned int fd, struct fdtable *fdt)
{
if (test_bit(fd, fdt->close_on_exec))
__clear_bit(fd, fdt->close_on_exec);
@@ -268,10 +268,10 @@ static inline void __clear_open_fd(unsigned int fd, struct fdtable *fdt)
__clear_bit(fd / BITS_PER_LONG, fdt->full_fds_bits);
}
-static int count_open_files(struct fdtable *fdt)
+static unsigned int count_open_files(struct fdtable *fdt)
{
- int size = fdt->max_fds;
- int i;
+ unsigned int size = fdt->max_fds;
+ unsigned int i;
/* Find the last open fd */
for (i = size / BITS_PER_LONG; i > 0; ) {
@@ -291,7 +291,7 @@ struct files_struct *dup_fd(struct files_struct *oldf, int *errorp)
{
struct files_struct *newf;
struct file **old_fds, **new_fds;
- int open_files, i;
+ unsigned int open_files, i;
struct fdtable *old_fdt, *new_fdt;
*errorp = -ENOMEM;
@@ -391,7 +391,7 @@ static struct fdtable *close_files(struct files_struct * files)
* files structure.
*/
struct fdtable *fdt = rcu_dereference_raw(files->fdt);
- int i, j = 0;
+ unsigned int i, j = 0;
for (;;) {
unsigned long set;
@@ -477,11 +477,11 @@ struct files_struct init_files = {
.file_lock = __SPIN_LOCK_UNLOCKED(init_files.file_lock),
};
-static unsigned long find_next_fd(struct fdtable *fdt, unsigned long start)
+static unsigned int find_next_fd(struct fdtable *fdt, unsigned int start)
{
- unsigned long maxfd = fdt->max_fds;
- unsigned long maxbit = maxfd / BITS_PER_LONG;
- unsigned long bitbit = start / BITS_PER_LONG;
+ unsigned int maxfd = fdt->max_fds;
+ unsigned int maxbit = maxfd / BITS_PER_LONG;
+ unsigned int bitbit = start / BITS_PER_LONG;
bitbit = find_next_zero_bit(fdt->full_fds_bits, maxbit, bitbit) * BITS_PER_LONG;
if (bitbit > maxfd)
diff --git a/fs/fuse/Kconfig b/fs/fuse/Kconfig
index 1b2f6c2c3aaf..76f09ce7e5b2 100644
--- a/fs/fuse/Kconfig
+++ b/fs/fuse/Kconfig
@@ -1,5 +1,6 @@
config FUSE_FS
tristate "FUSE (Filesystem in Userspace) support"
+ select FS_POSIX_ACL
help
With FUSE it is possible to implement a fully functional filesystem
in a userspace program.
diff --git a/fs/fuse/Makefile b/fs/fuse/Makefile
index 448aa27ada00..60da84a86dab 100644
--- a/fs/fuse/Makefile
+++ b/fs/fuse/Makefile
@@ -5,4 +5,4 @@
obj-$(CONFIG_FUSE_FS) += fuse.o
obj-$(CONFIG_CUSE) += cuse.o
-fuse-objs := dev.o dir.o file.o inode.o control.o xattr.o
+fuse-objs := dev.o dir.o file.o inode.o control.o xattr.o acl.o
diff --git a/fs/fuse/acl.c b/fs/fuse/acl.c
new file mode 100644
index 000000000000..ec85765502f1
--- /dev/null
+++ b/fs/fuse/acl.c
@@ -0,0 +1,99 @@
+/*
+ * FUSE: Filesystem in Userspace
+ * Copyright (C) 2016 Canonical Ltd. <seth.forshee@canonical.com>
+ *
+ * This program can be distributed under the terms of the GNU GPL.
+ * See the file COPYING.
+ */
+
+#include "fuse_i.h"
+
+#include <linux/posix_acl.h>
+#include <linux/posix_acl_xattr.h>
+
+struct posix_acl *fuse_get_acl(struct inode *inode, int type)
+{
+ struct fuse_conn *fc = get_fuse_conn(inode);
+ int size;
+ const char *name;
+ void *value = NULL;
+ struct posix_acl *acl;
+
+ if (!fc->posix_acl || fc->no_getxattr)
+ return NULL;
+
+ if (type == ACL_TYPE_ACCESS)
+ name = XATTR_NAME_POSIX_ACL_ACCESS;
+ else if (type == ACL_TYPE_DEFAULT)
+ name = XATTR_NAME_POSIX_ACL_DEFAULT;
+ else
+ return ERR_PTR(-EOPNOTSUPP);
+
+ value = kmalloc(PAGE_SIZE, GFP_KERNEL);
+ if (!value)
+ return ERR_PTR(-ENOMEM);
+ size = fuse_getxattr(inode, name, value, PAGE_SIZE);
+ if (size > 0)
+ acl = posix_acl_from_xattr(&init_user_ns, value, size);
+ else if ((size == 0) || (size == -ENODATA) ||
+ (size == -EOPNOTSUPP && fc->no_getxattr))
+ acl = NULL;
+ else if (size == -ERANGE)
+ acl = ERR_PTR(-E2BIG);
+ else
+ acl = ERR_PTR(size);
+
+ kfree(value);
+ return acl;
+}
+
+int fuse_set_acl(struct inode *inode, struct posix_acl *acl, int type)
+{
+ struct fuse_conn *fc = get_fuse_conn(inode);
+ const char *name;
+ int ret;
+
+ if (!fc->posix_acl || fc->no_setxattr)
+ return -EOPNOTSUPP;
+
+ if (type == ACL_TYPE_ACCESS)
+ name = XATTR_NAME_POSIX_ACL_ACCESS;
+ else if (type == ACL_TYPE_DEFAULT)
+ name = XATTR_NAME_POSIX_ACL_DEFAULT;
+ else
+ return -EINVAL;
+
+ if (acl) {
+ /*
+ * Fuse userspace is responsible for updating access
+ * permissions in the inode, if needed. fuse_setxattr
+ * invalidates the inode attributes, which will force
+ * them to be refreshed the next time they are used,
+ * and it also updates i_ctime.
+ */
+ size_t size = posix_acl_xattr_size(acl->a_count);
+ void *value;
+
+ if (size > PAGE_SIZE)
+ return -E2BIG;
+
+ value = kmalloc(size, GFP_KERNEL);
+ if (!value)
+ return -ENOMEM;
+
+ ret = posix_acl_to_xattr(&init_user_ns, acl, value, size);
+ if (ret < 0) {
+ kfree(value);
+ return ret;
+ }
+
+ ret = fuse_setxattr(inode, name, value, size, 0);
+ kfree(value);
+ } else {
+ ret = fuse_removexattr(inode, name);
+ }
+ forget_all_cached_acls(inode);
+ fuse_invalidate_attr(inode);
+
+ return ret;
+}
diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c
index a94d2ed81ab4..70ea57c7b6bb 100644
--- a/fs/fuse/dev.c
+++ b/fs/fuse/dev.c
@@ -728,7 +728,7 @@ static int fuse_copy_fill(struct fuse_copy_state *cs)
struct pipe_buffer *buf = cs->pipebufs;
if (!cs->write) {
- err = buf->ops->confirm(cs->pipe, buf);
+ err = pipe_buf_confirm(cs->pipe, buf);
if (err)
return err;
@@ -767,7 +767,6 @@ static int fuse_copy_fill(struct fuse_copy_state *cs)
cs->len = err;
cs->offset = off;
cs->pg = page;
- cs->offset = off;
iov_iter_advance(cs->iter, err);
}
@@ -828,7 +827,7 @@ static int fuse_try_move_page(struct fuse_copy_state *cs, struct page **pagep)
fuse_copy_finish(cs);
- err = buf->ops->confirm(cs->pipe, buf);
+ err = pipe_buf_confirm(cs->pipe, buf);
if (err)
return err;
@@ -841,7 +840,7 @@ static int fuse_try_move_page(struct fuse_copy_state *cs, struct page **pagep)
if (cs->len != PAGE_SIZE)
goto out_fallback;
- if (buf->ops->steal(cs->pipe, buf) != 0)
+ if (pipe_buf_steal(cs->pipe, buf) != 0)
goto out_fallback;
newpage = buf->page;
@@ -1342,9 +1341,8 @@ static ssize_t fuse_dev_splice_read(struct file *in, loff_t *ppos,
struct pipe_inode_info *pipe,
size_t len, unsigned int flags)
{
- int ret;
+ int total, ret;
int page_nr = 0;
- int do_wakeup = 0;
struct pipe_buffer *bufs;
struct fuse_copy_state cs;
struct fuse_dev *fud = fuse_get_dev(in);
@@ -1363,52 +1361,23 @@ static ssize_t fuse_dev_splice_read(struct file *in, loff_t *ppos,
if (ret < 0)
goto out;
- ret = 0;
- pipe_lock(pipe);
-
- if (!pipe->readers) {
- send_sig(SIGPIPE, current, 0);
- if (!ret)
- ret = -EPIPE;
- goto out_unlock;
- }
-
if (pipe->nrbufs + cs.nr_segs > pipe->buffers) {
ret = -EIO;
- goto out_unlock;
+ goto out;
}
- while (page_nr < cs.nr_segs) {
- int newbuf = (pipe->curbuf + pipe->nrbufs) & (pipe->buffers - 1);
- struct pipe_buffer *buf = pipe->bufs + newbuf;
-
- buf->page = bufs[page_nr].page;
- buf->offset = bufs[page_nr].offset;
- buf->len = bufs[page_nr].len;
+ for (ret = total = 0; page_nr < cs.nr_segs; total += ret) {
/*
* Need to be careful about this. Having buf->ops in module
* code can Oops if the buffer persists after module unload.
*/
- buf->ops = &nosteal_pipe_buf_ops;
-
- pipe->nrbufs++;
- page_nr++;
- ret += buf->len;
-
- if (pipe->files)
- do_wakeup = 1;
- }
-
-out_unlock:
- pipe_unlock(pipe);
-
- if (do_wakeup) {
- smp_mb();
- if (waitqueue_active(&pipe->wait))
- wake_up_interruptible(&pipe->wait);
- kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN);
+ bufs[page_nr].ops = &nosteal_pipe_buf_ops;
+ ret = add_to_pipe(pipe, &bufs[page_nr++]);
+ if (unlikely(ret < 0))
+ break;
}
-
+ if (total)
+ ret = total;
out:
for (; page_nr < cs.nr_segs; page_nr++)
put_page(bufs[page_nr].page);
@@ -1993,7 +1962,7 @@ static ssize_t fuse_dev_splice_write(struct pipe_inode_info *pipe,
pipe->curbuf = (pipe->curbuf + 1) & (pipe->buffers - 1);
pipe->nrbufs--;
} else {
- ibuf->ops->get(pipe, ibuf);
+ pipe_buf_get(pipe, ibuf);
*obuf = *ibuf;
obuf->flags &= ~PIPE_BUF_FLAG_GIFT;
obuf->len = rem;
@@ -2015,10 +1984,9 @@ static ssize_t fuse_dev_splice_write(struct pipe_inode_info *pipe,
ret = fuse_dev_do_write(fud, &cs, len);
- for (idx = 0; idx < nbuf; idx++) {
- struct pipe_buffer *buf = &bufs[idx];
- buf->ops->release(pipe, buf);
- }
+ for (idx = 0; idx < nbuf; idx++)
+ pipe_buf_release(pipe, &bufs[idx]);
+
out:
kfree(bufs);
return ret;
diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c
index dbf77fe1dc2e..572d12410c7c 100644
--- a/fs/fuse/dir.c
+++ b/fs/fuse/dir.c
@@ -14,6 +14,7 @@
#include <linux/namei.h>
#include <linux/slab.h>
#include <linux/xattr.h>
+#include <linux/posix_acl.h>
static bool fuse_use_readdirplus(struct inode *dir, struct dir_context *ctx)
{
@@ -38,47 +39,39 @@ static void fuse_advise_use_readdirplus(struct inode *dir)
set_bit(FUSE_I_ADVISE_RDPLUS, &fi->state);
}
-#if BITS_PER_LONG >= 64
+union fuse_dentry {
+ u64 time;
+ struct rcu_head rcu;
+};
+
static inline void fuse_dentry_settime(struct dentry *entry, u64 time)
{
- entry->d_time = time;
+ ((union fuse_dentry *) entry->d_fsdata)->time = time;
}
static inline u64 fuse_dentry_time(struct dentry *entry)
{
- return entry->d_time;
-}
-#else
-/*
- * On 32 bit archs store the high 32 bits of time in d_fsdata
- */
-static void fuse_dentry_settime(struct dentry *entry, u64 time)
-{
- entry->d_time = time;
- entry->d_fsdata = (void *) (unsigned long) (time >> 32);
-}
-
-static u64 fuse_dentry_time(struct dentry *entry)
-{
- return (u64) entry->d_time +
- ((u64) (unsigned long) entry->d_fsdata << 32);
+ return ((union fuse_dentry *) entry->d_fsdata)->time;
}
-#endif
/*
* FUSE caches dentries and attributes with separate timeout. The
* time in jiffies until the dentry/attributes are valid is stored in
- * dentry->d_time and fuse_inode->i_time respectively.
+ * dentry->d_fsdata and fuse_inode->i_time respectively.
*/
/*
* Calculate the time in jiffies until a dentry/attributes are valid
*/
-static u64 time_to_jiffies(unsigned long sec, unsigned long nsec)
+static u64 time_to_jiffies(u64 sec, u32 nsec)
{
if (sec || nsec) {
- struct timespec ts = {sec, nsec};
- return get_jiffies_64() + timespec_to_jiffies(&ts);
+ struct timespec64 ts = {
+ sec,
+ max_t(u32, nsec, NSEC_PER_SEC - 1)
+ };
+
+ return get_jiffies_64() + timespec64_to_jiffies(&ts);
} else
return 0;
}
@@ -244,6 +237,7 @@ static int fuse_dentry_revalidate(struct dentry *entry, unsigned int flags)
if (ret || (outarg.attr.mode ^ inode->i_mode) & S_IFMT)
goto invalid;
+ forget_all_cached_acls(inode);
fuse_change_attributes(inode, &outarg.attr,
entry_attr_timeout(&outarg),
attr_version);
@@ -273,8 +267,23 @@ static int invalid_nodeid(u64 nodeid)
return !nodeid || nodeid == FUSE_ROOT_ID;
}
+static int fuse_dentry_init(struct dentry *dentry)
+{
+ dentry->d_fsdata = kzalloc(sizeof(union fuse_dentry), GFP_KERNEL);
+
+ return dentry->d_fsdata ? 0 : -ENOMEM;
+}
+static void fuse_dentry_release(struct dentry *dentry)
+{
+ union fuse_dentry *fd = dentry->d_fsdata;
+
+ kfree_rcu(fd, rcu);
+}
+
const struct dentry_operations fuse_dentry_operations = {
.d_revalidate = fuse_dentry_revalidate,
+ .d_init = fuse_dentry_init,
+ .d_release = fuse_dentry_release,
};
int fuse_valid_type(int m)
@@ -918,6 +927,7 @@ int fuse_update_attributes(struct inode *inode, struct kstat *stat,
if (time_before64(fi->i_time, get_jiffies_64())) {
r = true;
+ forget_all_cached_acls(inode);
err = fuse_do_getattr(inode, stat, file);
} else {
r = false;
@@ -1018,7 +1028,7 @@ int fuse_allow_current_process(struct fuse_conn *fc)
{
const struct cred *cred;
- if (fc->flags & FUSE_ALLOW_OTHER)
+ if (fc->allow_other)
return 1;
cred = current_cred();
@@ -1065,6 +1075,7 @@ static int fuse_perm_getattr(struct inode *inode, int mask)
if (mask & MAY_NOT_BLOCK)
return -ECHILD;
+ forget_all_cached_acls(inode);
return fuse_do_getattr(inode, NULL, NULL);
}
@@ -1093,7 +1104,7 @@ static int fuse_permission(struct inode *inode, int mask)
/*
* If attributes are needed, refresh them before proceeding
*/
- if ((fc->flags & FUSE_DEFAULT_PERMISSIONS) ||
+ if (fc->default_permissions ||
((mask & MAY_EXEC) && S_ISREG(inode->i_mode))) {
struct fuse_inode *fi = get_fuse_inode(inode);
@@ -1106,7 +1117,7 @@ static int fuse_permission(struct inode *inode, int mask)
}
}
- if (fc->flags & FUSE_DEFAULT_PERMISSIONS) {
+ if (fc->default_permissions) {
err = generic_permission(inode, mask);
/* If permission is denied, try to refresh file
@@ -1234,6 +1245,7 @@ retry:
fi->nlookup++;
spin_unlock(&fc->lock);
+ forget_all_cached_acls(inode);
fuse_change_attributes(inode, &o->attr,
entry_attr_timeout(o),
attr_version);
@@ -1592,9 +1604,10 @@ int fuse_flush_times(struct inode *inode, struct fuse_file *ff)
* vmtruncate() doesn't allow for this case, so do the rlimit checking
* and the actual truncation by hand.
*/
-int fuse_do_setattr(struct inode *inode, struct iattr *attr,
+int fuse_do_setattr(struct dentry *dentry, struct iattr *attr,
struct file *file)
{
+ struct inode *inode = d_inode(dentry);
struct fuse_conn *fc = get_fuse_conn(inode);
struct fuse_inode *fi = get_fuse_inode(inode);
FUSE_ARGS(args);
@@ -1606,10 +1619,10 @@ int fuse_do_setattr(struct inode *inode, struct iattr *attr,
int err;
bool trust_local_cmtime = is_wb && S_ISREG(inode->i_mode);
- if (!(fc->flags & FUSE_DEFAULT_PERMISSIONS))
+ if (!fc->default_permissions)
attr->ia_valid |= ATTR_FORCE;
- err = inode_change_ok(inode, attr);
+ err = setattr_prepare(dentry, attr);
if (err)
return err;
@@ -1703,14 +1716,63 @@ error:
static int fuse_setattr(struct dentry *entry, struct iattr *attr)
{
struct inode *inode = d_inode(entry);
+ struct fuse_conn *fc = get_fuse_conn(inode);
+ struct file *file = (attr->ia_valid & ATTR_FILE) ? attr->ia_file : NULL;
+ int ret;
if (!fuse_allow_current_process(get_fuse_conn(inode)))
return -EACCES;
- if (attr->ia_valid & ATTR_FILE)
- return fuse_do_setattr(inode, attr, attr->ia_file);
- else
- return fuse_do_setattr(inode, attr, NULL);
+ if (attr->ia_valid & (ATTR_KILL_SUID | ATTR_KILL_SGID)) {
+ attr->ia_valid &= ~(ATTR_KILL_SUID | ATTR_KILL_SGID |
+ ATTR_MODE);
+
+ /*
+ * The only sane way to reliably kill suid/sgid is to do it in
+ * the userspace filesystem
+ *
+ * This should be done on write(), truncate() and chown().
+ */
+ if (!fc->handle_killpriv) {
+ int kill;
+
+ /*
+ * ia_mode calculation may have used stale i_mode.
+ * Refresh and recalculate.
+ */
+ ret = fuse_do_getattr(inode, NULL, file);
+ if (ret)
+ return ret;
+
+ attr->ia_mode = inode->i_mode;
+ kill = should_remove_suid(entry);
+ if (kill & ATTR_KILL_SUID) {
+ attr->ia_valid |= ATTR_MODE;
+ attr->ia_mode &= ~S_ISUID;
+ }
+ if (kill & ATTR_KILL_SGID) {
+ attr->ia_valid |= ATTR_MODE;
+ attr->ia_mode &= ~S_ISGID;
+ }
+ }
+ }
+ if (!attr->ia_valid)
+ return 0;
+
+ ret = fuse_do_setattr(entry, attr, file);
+ if (!ret) {
+ /*
+ * If filesystem supports acls it may have updated acl xattrs in
+ * the filesystem, so forget cached acls for the inode.
+ */
+ if (fc->posix_acl)
+ forget_all_cached_acls(inode);
+
+ /* Directory mode changed, may need to revalidate access */
+ if (d_is_dir(entry) && (attr->ia_valid & ATTR_MODE))
+ fuse_invalidate_entry_cache(entry);
+ }
+ return ret;
}
static int fuse_getattr(struct vfsmount *mnt, struct dentry *entry,
@@ -1740,6 +1802,8 @@ static const struct inode_operations fuse_dir_inode_operations = {
.permission = fuse_permission,
.getattr = fuse_getattr,
.listxattr = fuse_listxattr,
+ .get_acl = fuse_get_acl,
+ .set_acl = fuse_set_acl,
};
static const struct file_operations fuse_dir_operations = {
@@ -1758,6 +1822,8 @@ static const struct inode_operations fuse_common_inode_operations = {
.permission = fuse_permission,
.getattr = fuse_getattr,
.listxattr = fuse_listxattr,
+ .get_acl = fuse_get_acl,
+ .set_acl = fuse_set_acl,
};
static const struct inode_operations fuse_symlink_inode_operations = {
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index 3988b43c2f5a..abc66a6237fd 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -2326,49 +2326,6 @@ static loff_t fuse_file_llseek(struct file *file, loff_t offset, int whence)
return retval;
}
-static int fuse_ioctl_copy_user(struct page **pages, struct iovec *iov,
- unsigned int nr_segs, size_t bytes, bool to_user)
-{
- struct iov_iter ii;
- int page_idx = 0;
-
- if (!bytes)
- return 0;
-
- iov_iter_init(&ii, to_user ? READ : WRITE, iov, nr_segs, bytes);
-
- while (iov_iter_count(&ii)) {
- struct page *page = pages[page_idx++];
- size_t todo = min_t(size_t, PAGE_SIZE, iov_iter_count(&ii));
- void *kaddr;
-
- kaddr = kmap(page);
-
- while (todo) {
- char __user *uaddr = ii.iov->iov_base + ii.iov_offset;
- size_t iov_len = ii.iov->iov_len - ii.iov_offset;
- size_t copy = min(todo, iov_len);
- size_t left;
-
- if (!to_user)
- left = copy_from_user(kaddr, uaddr, copy);
- else
- left = copy_to_user(uaddr, kaddr, copy);
-
- if (unlikely(left))
- return -EFAULT;
-
- iov_iter_advance(&ii, copy);
- todo -= copy;
- kaddr += copy;
- }
-
- kunmap(page);
- }
-
- return 0;
-}
-
/*
* CUSE servers compiled on 32bit broke on 64bit kernels because the
* ABI was defined to be 'struct iovec' which is different on 32bit
@@ -2520,8 +2477,9 @@ long fuse_do_ioctl(struct file *file, unsigned int cmd, unsigned long arg,
struct iovec *iov_page = NULL;
struct iovec *in_iov = NULL, *out_iov = NULL;
unsigned int in_iovs = 0, out_iovs = 0, num_pages = 0, max_pages;
- size_t in_size, out_size, transferred;
- int err;
+ size_t in_size, out_size, transferred, c;
+ int err, i;
+ struct iov_iter ii;
#if BITS_PER_LONG == 32
inarg.flags |= FUSE_IOCTL_32BIT;
@@ -2603,10 +2561,13 @@ long fuse_do_ioctl(struct file *file, unsigned int cmd, unsigned long arg,
req->in.args[1].size = in_size;
req->in.argpages = 1;
- err = fuse_ioctl_copy_user(pages, in_iov, in_iovs, in_size,
- false);
- if (err)
- goto out;
+ err = -EFAULT;
+ iov_iter_init(&ii, WRITE, in_iov, in_iovs, in_size);
+ for (i = 0; iov_iter_count(&ii) && !WARN_ON(i >= num_pages); i++) {
+ c = copy_page_from_iter(pages[i], 0, PAGE_SIZE, &ii);
+ if (c != PAGE_SIZE && iov_iter_count(&ii))
+ goto out;
+ }
}
req->out.numargs = 2;
@@ -2672,7 +2633,14 @@ long fuse_do_ioctl(struct file *file, unsigned int cmd, unsigned long arg,
if (transferred > inarg.out_size)
goto out;
- err = fuse_ioctl_copy_user(pages, out_iov, out_iovs, transferred, true);
+ err = -EFAULT;
+ iov_iter_init(&ii, READ, out_iov, out_iovs, transferred);
+ for (i = 0; iov_iter_count(&ii) && !WARN_ON(i >= num_pages); i++) {
+ c = copy_page_to_iter(pages[i], 0, PAGE_SIZE, &ii);
+ if (c != PAGE_SIZE && iov_iter_count(&ii))
+ goto out;
+ }
+ err = 0;
out:
if (req)
fuse_put_request(fc, req);
@@ -2842,7 +2810,7 @@ static void fuse_do_truncate(struct file *file)
attr.ia_file = file;
attr.ia_valid |= ATTR_FILE;
- fuse_do_setattr(inode, &attr, file);
+ fuse_do_setattr(file_dentry(file), &attr, file);
}
static inline loff_t fuse_round_up(loff_t off)
diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
index 6db54d0bd81b..0dfbb136e59a 100644
--- a/fs/fuse/fuse_i.h
+++ b/fs/fuse/fuse_i.h
@@ -23,6 +23,7 @@
#include <linux/poll.h>
#include <linux/workqueue.h>
#include <linux/kref.h>
+#include <linux/xattr.h>
/** Max number of pages that can be used in a single read request */
#define FUSE_MAX_PAGES_PER_REQ 32
@@ -36,15 +37,6 @@
/** Number of dentries for each connection in the control filesystem */
#define FUSE_CTL_NUM_DENTRIES 5
-/** If the FUSE_DEFAULT_PERMISSIONS flag is given, the filesystem
- module will check permissions based on the file mode. Otherwise no
- permission checking is done in the kernel */
-#define FUSE_DEFAULT_PERMISSIONS (1 << 0)
-
-/** If the FUSE_ALLOW_OTHER flag is given, then not only the user
- doing the mount will be allowed to access the filesystem */
-#define FUSE_ALLOW_OTHER (1 << 1)
-
/** Number of page pointers embedded in fuse_req */
#define FUSE_REQ_INLINE_PAGES 1
@@ -469,9 +461,6 @@ struct fuse_conn {
/** The group id for this mount */
kgid_t group_id;
- /** The fuse mount flags for this mount */
- unsigned flags;
-
/** Maximum read size */
unsigned max_read;
@@ -547,6 +536,9 @@ struct fuse_conn {
/** allow parallel lookups and readdir (default is serialized) */
unsigned parallel_dirops:1;
+ /** handle fs handles killing suid/sgid/cap on write/chown/trunc */
+ unsigned handle_killpriv:1;
+
/*
* The following bitfields are only for optimization purposes
* and hence races in setting them will not cause malfunction
@@ -624,6 +616,15 @@ struct fuse_conn {
/** Is lseek not implemented by fs? */
unsigned no_lseek:1;
+ /** Does the filesystem support posix acls? */
+ unsigned posix_acl:1;
+
+ /** Check permissions based on the file mode or not? */
+ unsigned default_permissions:1;
+
+ /** Allow other than the mounter user to access the filesystem ? */
+ unsigned allow_other:1;
+
/** The number of requests waiting for completion */
atomic_t num_waiting;
@@ -960,7 +961,7 @@ bool fuse_write_update_size(struct inode *inode, loff_t pos);
int fuse_flush_times(struct inode *inode, struct fuse_file *ff);
int fuse_write_inode(struct inode *inode, struct writeback_control *wbc);
-int fuse_do_setattr(struct inode *inode, struct iattr *attr,
+int fuse_do_setattr(struct dentry *dentry, struct iattr *attr,
struct file *file);
void fuse_set_initialized(struct fuse_conn *fc);
@@ -968,7 +969,17 @@ void fuse_set_initialized(struct fuse_conn *fc);
void fuse_unlock_inode(struct inode *inode);
void fuse_lock_inode(struct inode *inode);
+int fuse_setxattr(struct inode *inode, const char *name, const void *value,
+ size_t size, int flags);
+ssize_t fuse_getxattr(struct inode *inode, const char *name, void *value,
+ size_t size);
ssize_t fuse_listxattr(struct dentry *entry, char *list, size_t size);
+int fuse_removexattr(struct inode *inode, const char *name);
extern const struct xattr_handler *fuse_xattr_handlers[];
+extern const struct xattr_handler *fuse_acl_xattr_handlers[];
+
+struct posix_acl;
+struct posix_acl *fuse_get_acl(struct inode *inode, int type);
+int fuse_set_acl(struct inode *inode, struct posix_acl *acl, int type);
#endif /* _FS_FUSE_I_H */
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index 1e535f31fed0..17141099f2e7 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -20,6 +20,7 @@
#include <linux/random.h>
#include <linux/sched.h>
#include <linux/exportfs.h>
+#include <linux/posix_acl.h>
MODULE_AUTHOR("Miklos Szeredi <miklos@szeredi.hu>");
MODULE_DESCRIPTION("Filesystem in Userspace");
@@ -66,7 +67,8 @@ struct fuse_mount_data {
unsigned rootmode_present:1;
unsigned user_id_present:1;
unsigned group_id_present:1;
- unsigned flags;
+ unsigned default_permissions:1;
+ unsigned allow_other:1;
unsigned max_read;
unsigned blksize;
};
@@ -192,7 +194,7 @@ void fuse_change_attributes_common(struct inode *inode, struct fuse_attr *attr,
* check in may_delete().
*/
fi->orig_i_mode = inode->i_mode;
- if (!(fc->flags & FUSE_DEFAULT_PERMISSIONS))
+ if (!fc->default_permissions)
inode->i_mode &= ~S_ISVTX;
fi->orig_ino = attr->ino;
@@ -340,6 +342,7 @@ int fuse_reverse_inval_inode(struct super_block *sb, u64 nodeid,
return -ENOENT;
fuse_invalidate_attr(inode);
+ forget_all_cached_acls(inode);
if (offset >= 0) {
pg_start = offset >> PAGE_SHIFT;
if (len <= 0)
@@ -532,11 +535,11 @@ static int parse_fuse_opt(char *opt, struct fuse_mount_data *d, int is_bdev)
break;
case OPT_DEFAULT_PERMISSIONS:
- d->flags |= FUSE_DEFAULT_PERMISSIONS;
+ d->default_permissions = 1;
break;
case OPT_ALLOW_OTHER:
- d->flags |= FUSE_ALLOW_OTHER;
+ d->allow_other = 1;
break;
case OPT_MAX_READ:
@@ -570,9 +573,9 @@ static int fuse_show_options(struct seq_file *m, struct dentry *root)
seq_printf(m, ",user_id=%u", from_kuid_munged(&init_user_ns, fc->user_id));
seq_printf(m, ",group_id=%u", from_kgid_munged(&init_user_ns, fc->group_id));
- if (fc->flags & FUSE_DEFAULT_PERMISSIONS)
+ if (fc->default_permissions)
seq_puts(m, ",default_permissions");
- if (fc->flags & FUSE_ALLOW_OTHER)
+ if (fc->allow_other)
seq_puts(m, ",allow_other");
if (fc->max_read != ~0)
seq_printf(m, ",max_read=%u", fc->max_read);
@@ -910,8 +913,15 @@ static void process_init_reply(struct fuse_conn *fc, struct fuse_req *req)
fc->writeback_cache = 1;
if (arg->flags & FUSE_PARALLEL_DIROPS)
fc->parallel_dirops = 1;
+ if (arg->flags & FUSE_HANDLE_KILLPRIV)
+ fc->handle_killpriv = 1;
if (arg->time_gran && arg->time_gran <= 1000000000)
fc->sb->s_time_gran = arg->time_gran;
+ if ((arg->flags & FUSE_POSIX_ACL)) {
+ fc->default_permissions = 1;
+ fc->posix_acl = 1;
+ fc->sb->s_xattr = fuse_acl_xattr_handlers;
+ }
} else {
ra_pages = fc->max_read / PAGE_SIZE;
fc->no_lock = 1;
@@ -941,7 +951,7 @@ static void fuse_send_init(struct fuse_conn *fc, struct fuse_req *req)
FUSE_FLOCK_LOCKS | FUSE_HAS_IOCTL_DIR | FUSE_AUTO_INVAL_DATA |
FUSE_DO_READDIRPLUS | FUSE_READDIRPLUS_AUTO | FUSE_ASYNC_DIO |
FUSE_WRITEBACK_CACHE | FUSE_NO_OPEN_SUPPORT |
- FUSE_PARALLEL_DIROPS;
+ FUSE_PARALLEL_DIROPS | FUSE_HANDLE_KILLPRIV | FUSE_POSIX_ACL;
req->in.h.opcode = FUSE_INIT;
req->in.numargs = 1;
req->in.args[0].size = sizeof(*arg);
@@ -1110,7 +1120,8 @@ static int fuse_fill_super(struct super_block *sb, void *data, int silent)
fc->dont_mask = 1;
sb->s_flags |= MS_POSIXACL;
- fc->flags = d.flags;
+ fc->default_permissions = d.default_permissions;
+ fc->allow_other = d.allow_other;
fc->user_id = d.user_id;
fc->group_id = d.group_id;
fc->max_read = max_t(unsigned, 4096, d.max_read);
diff --git a/fs/fuse/xattr.c b/fs/fuse/xattr.c
index e22980f0a9e2..3caac46b08b0 100644
--- a/fs/fuse/xattr.c
+++ b/fs/fuse/xattr.c
@@ -9,9 +9,10 @@
#include "fuse_i.h"
#include <linux/xattr.h>
+#include <linux/posix_acl_xattr.h>
-static int fuse_setxattr(struct inode *inode, const char *name,
- const void *value, size_t size, int flags)
+int fuse_setxattr(struct inode *inode, const char *name, const void *value,
+ size_t size, int flags)
{
struct fuse_conn *fc = get_fuse_conn(inode);
FUSE_ARGS(args);
@@ -45,8 +46,8 @@ static int fuse_setxattr(struct inode *inode, const char *name,
return err;
}
-static ssize_t fuse_getxattr(struct inode *inode, const char *name,
- void *value, size_t size)
+ssize_t fuse_getxattr(struct inode *inode, const char *name, void *value,
+ size_t size)
{
struct fuse_conn *fc = get_fuse_conn(inode);
FUSE_ARGS(args);
@@ -78,7 +79,7 @@ static ssize_t fuse_getxattr(struct inode *inode, const char *name,
}
ret = fuse_simple_request(fc, &args);
if (!ret && !size)
- ret = outarg.size;
+ ret = min_t(ssize_t, outarg.size, XATTR_SIZE_MAX);
if (ret == -ENOSYS) {
fc->no_getxattr = 1;
ret = -EOPNOTSUPP;
@@ -137,7 +138,7 @@ ssize_t fuse_listxattr(struct dentry *entry, char *list, size_t size)
}
ret = fuse_simple_request(fc, &args);
if (!ret && !size)
- ret = outarg.size;
+ ret = min_t(ssize_t, outarg.size, XATTR_LIST_MAX);
if (ret > 0 && size)
ret = fuse_verify_xattr_list(list, ret);
if (ret == -ENOSYS) {
@@ -147,7 +148,7 @@ ssize_t fuse_listxattr(struct dentry *entry, char *list, size_t size)
return ret;
}
-static int fuse_removexattr(struct inode *inode, const char *name)
+int fuse_removexattr(struct inode *inode, const char *name)
{
struct fuse_conn *fc = get_fuse_conn(inode);
FUSE_ARGS(args);
@@ -201,3 +202,10 @@ const struct xattr_handler *fuse_xattr_handlers[] = {
&fuse_xattr_handler,
NULL
};
+
+const struct xattr_handler *fuse_acl_xattr_handlers[] = {
+ &posix_acl_access_xattr_handler,
+ &posix_acl_default_xattr_handler,
+ &fuse_xattr_handler,
+ NULL
+};
diff --git a/fs/gfs2/acl.c b/fs/gfs2/acl.c
index 363ba9e9d8d0..2524807ee070 100644
--- a/fs/gfs2/acl.c
+++ b/fs/gfs2/acl.c
@@ -92,17 +92,11 @@ int __gfs2_set_acl(struct inode *inode, struct posix_acl *acl, int type)
if (type == ACL_TYPE_ACCESS) {
umode_t mode = inode->i_mode;
- error = posix_acl_equiv_mode(acl, &mode);
- if (error < 0)
+ error = posix_acl_update_mode(inode, &inode->i_mode, &acl);
+ if (error)
return error;
-
- if (error == 0)
- acl = NULL;
-
- if (mode != inode->i_mode) {
- inode->i_mode = mode;
+ if (mode != inode->i_mode)
mark_inode_dirty(inode);
- }
}
if (acl) {
diff --git a/fs/gfs2/aops.c b/fs/gfs2/aops.c
index 82df36886938..5a6f52ea2722 100644
--- a/fs/gfs2/aops.c
+++ b/fs/gfs2/aops.c
@@ -187,7 +187,7 @@ static int __gfs2_jdata_writepage(struct page *page, struct writeback_control *w
ClearPageChecked(page);
if (!page_has_buffers(page)) {
create_empty_buffers(page, inode->i_sb->s_blocksize,
- (1 << BH_Dirty)|(1 << BH_Uptodate));
+ BIT(BH_Dirty)|BIT(BH_Uptodate));
}
gfs2_page_add_databufs(ip, page, 0, sdp->sd_vfs->s_blocksize-1);
}
@@ -1147,6 +1147,16 @@ int gfs2_releasepage(struct page *page, gfp_t gfp_mask)
if (!page_has_buffers(page))
return 0;
+ /*
+ * From xfs_vm_releasepage: mm accommodates an old ext3 case where
+ * clean pages might not have had the dirty bit cleared. Thus, it can
+ * send actual dirty pages to ->releasepage() via shrink_active_list().
+ *
+ * As a workaround, we skip pages that contain dirty buffers below.
+ * Once ->releasepage isn't called on dirty pages anymore, we can warn
+ * on dirty buffers like we used to here again.
+ */
+
gfs2_log_lock(sdp);
spin_lock(&sdp->sd_ail_lock);
head = bh = page_buffers(page);
@@ -1156,8 +1166,8 @@ int gfs2_releasepage(struct page *page, gfp_t gfp_mask)
bd = bh->b_private;
if (bd && bd->bd_tr)
goto cannot_release;
- if (buffer_pinned(bh) || buffer_dirty(bh))
- goto not_possible;
+ if (buffer_dirty(bh) || WARN_ON(buffer_pinned(bh)))
+ goto cannot_release;
bh = bh->b_this_page;
} while(bh != head);
spin_unlock(&sdp->sd_ail_lock);
@@ -1180,9 +1190,6 @@ int gfs2_releasepage(struct page *page, gfp_t gfp_mask)
return try_to_free_buffers(page);
-not_possible: /* Should never happen */
- WARN_ON(buffer_dirty(bh));
- WARN_ON(buffer_pinned(bh));
cannot_release:
spin_unlock(&sdp->sd_ail_lock);
gfs2_log_unlock(sdp);
diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c
index 6e2bec1cd289..645721f3ff00 100644
--- a/fs/gfs2/bmap.c
+++ b/fs/gfs2/bmap.c
@@ -82,8 +82,8 @@ static int gfs2_unstuffer_page(struct gfs2_inode *ip, struct buffer_head *dibh,
}
if (!page_has_buffers(page))
- create_empty_buffers(page, 1 << inode->i_blkbits,
- (1 << BH_Uptodate));
+ create_empty_buffers(page, BIT(inode->i_blkbits),
+ BIT(BH_Uptodate));
bh = page_buffers(page);
@@ -690,7 +690,7 @@ int gfs2_extent_map(struct inode *inode, u64 lblock, int *new, u64 *dblock, unsi
BUG_ON(!dblock);
BUG_ON(!new);
- bh.b_size = 1 << (inode->i_blkbits + (create ? 0 : 5));
+ bh.b_size = BIT(inode->i_blkbits + (create ? 0 : 5));
ret = gfs2_block_map(inode, lblock, &bh, create);
*extlen = bh.b_size >> inode->i_blkbits;
*dblock = bh.b_blocknr;
diff --git a/fs/gfs2/dir.c b/fs/gfs2/dir.c
index fcb59b23f1e3..db8fbeb62483 100644
--- a/fs/gfs2/dir.c
+++ b/fs/gfs2/dir.c
@@ -351,7 +351,7 @@ static __be64 *gfs2_dir_get_hash_table(struct gfs2_inode *ip)
if (hc)
return hc;
- hsize = 1 << ip->i_depth;
+ hsize = BIT(ip->i_depth);
hsize *= sizeof(__be64);
if (hsize != i_size_read(&ip->i_inode)) {
gfs2_consist_inode(ip);
@@ -819,8 +819,8 @@ static struct gfs2_dirent *gfs2_dirent_search(struct inode *inode,
if (ip->i_diskflags & GFS2_DIF_EXHASH) {
struct gfs2_leaf *leaf;
- unsigned hsize = 1 << ip->i_depth;
- unsigned index;
+ unsigned int hsize = BIT(ip->i_depth);
+ unsigned int index;
u64 ln;
if (hsize * sizeof(u64) != i_size_read(inode)) {
gfs2_consist_inode(ip);
@@ -932,7 +932,7 @@ static int dir_make_exhash(struct inode *inode)
return -ENOSPC;
bn = bh->b_blocknr;
- gfs2_assert(sdp, dip->i_entries < (1 << 16));
+ gfs2_assert(sdp, dip->i_entries < BIT(16));
leaf->lf_entries = cpu_to_be16(dip->i_entries);
/* Copy dirents */
@@ -1041,7 +1041,7 @@ static int dir_split_leaf(struct inode *inode, const struct qstr *name)
bn = nbh->b_blocknr;
/* Compute the start and len of leaf pointers in the hash table. */
- len = 1 << (dip->i_depth - be16_to_cpu(oleaf->lf_depth));
+ len = BIT(dip->i_depth - be16_to_cpu(oleaf->lf_depth));
half_len = len >> 1;
if (!half_len) {
pr_warn("i_depth %u lf_depth %u index %u\n",
@@ -1163,7 +1163,7 @@ static int dir_double_exhash(struct gfs2_inode *dip)
int x;
int error = 0;
- hsize = 1 << dip->i_depth;
+ hsize = BIT(dip->i_depth);
hsize_bytes = hsize * sizeof(__be64);
hc = gfs2_dir_get_hash_table(dip);
@@ -1539,7 +1539,7 @@ static int dir_e_read(struct inode *inode, struct dir_context *ctx,
int error = 0;
unsigned depth = 0;
- hsize = 1 << dip->i_depth;
+ hsize = BIT(dip->i_depth);
hash = gfs2_dir_offset2hash(ctx->pos);
index = hash >> (32 - dip->i_depth);
@@ -1558,7 +1558,7 @@ static int dir_e_read(struct inode *inode, struct dir_context *ctx,
if (error)
break;
- len = 1 << (dip->i_depth - depth);
+ len = BIT(dip->i_depth - depth);
index = (index & ~(len - 1)) + len;
}
@@ -2113,7 +2113,7 @@ int gfs2_dir_exhash_dealloc(struct gfs2_inode *dip)
u64 leaf_no;
int error = 0, last;
- hsize = 1 << dip->i_depth;
+ hsize = BIT(dip->i_depth);
lp = gfs2_dir_get_hash_table(dip);
if (IS_ERR(lp))
@@ -2126,7 +2126,7 @@ int gfs2_dir_exhash_dealloc(struct gfs2_inode *dip)
if (error)
goto out;
leaf = (struct gfs2_leaf *)bh->b_data;
- len = 1 << (dip->i_depth - be16_to_cpu(leaf->lf_depth));
+ len = BIT(dip->i_depth - be16_to_cpu(leaf->lf_depth));
next_index = (index & ~(len - 1)) + len;
last = ((next_index >= hsize) ? 1 : 0);
diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c
index 320e65e61938..e23ff70b3435 100644
--- a/fs/gfs2/file.c
+++ b/fs/gfs2/file.c
@@ -395,9 +395,6 @@ static int gfs2_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
sb_start_pagefault(inode->i_sb);
- /* Update file times before taking page lock */
- file_update_time(vma->vm_file);
-
ret = gfs2_rsqa_alloc(ip);
if (ret)
goto out;
@@ -409,6 +406,9 @@ static int gfs2_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
if (ret)
goto out_uninit;
+ /* Update file times before taking page lock */
+ file_update_time(vma->vm_file);
+
set_bit(GLF_DIRTY, &ip->i_gl->gl_flags);
set_bit(GIF_SW_PAGED, &ip->i_flags);
@@ -954,30 +954,6 @@ out_uninit:
return ret;
}
-static ssize_t gfs2_file_splice_read(struct file *in, loff_t *ppos,
- struct pipe_inode_info *pipe, size_t len,
- unsigned int flags)
-{
- struct inode *inode = in->f_mapping->host;
- struct gfs2_inode *ip = GFS2_I(inode);
- struct gfs2_holder gh;
- int ret;
-
- inode_lock(inode);
-
- ret = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, 0, &gh);
- if (ret) {
- inode_unlock(inode);
- return ret;
- }
-
- gfs2_glock_dq_uninit(&gh);
- inode_unlock(inode);
-
- return generic_file_splice_read(in, ppos, pipe, len, flags);
-}
-
-
static ssize_t gfs2_file_splice_write(struct pipe_inode_info *pipe,
struct file *out, loff_t *ppos,
size_t len, unsigned int flags)
@@ -1140,7 +1116,7 @@ const struct file_operations gfs2_file_fops = {
.fsync = gfs2_fsync,
.lock = gfs2_lock,
.flock = gfs2_flock,
- .splice_read = gfs2_file_splice_read,
+ .splice_read = generic_file_splice_read,
.splice_write = gfs2_file_splice_write,
.setlease = simple_nosetlease,
.fallocate = gfs2_fallocate,
@@ -1168,7 +1144,7 @@ const struct file_operations gfs2_file_fops_nolock = {
.open = gfs2_open,
.release = gfs2_release,
.fsync = gfs2_fsync,
- .splice_read = gfs2_file_splice_read,
+ .splice_read = generic_file_splice_read,
.splice_write = gfs2_file_splice_write,
.setlease = generic_setlease,
.fallocate = gfs2_fallocate,
diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c
index 3a90b2b5b9bb..14cbf60167a7 100644
--- a/fs/gfs2/glock.c
+++ b/fs/gfs2/glock.c
@@ -69,7 +69,7 @@ static atomic_t lru_count = ATOMIC_INIT(0);
static DEFINE_SPINLOCK(lru_lock);
#define GFS2_GL_HASH_SHIFT 15
-#define GFS2_GL_HASH_SIZE (1 << GFS2_GL_HASH_SHIFT)
+#define GFS2_GL_HASH_SIZE BIT(GFS2_GL_HASH_SHIFT)
static struct rhashtable_params ht_parms = {
.nelem_hint = GFS2_GL_HASH_SIZE * 3 / 4,
@@ -1781,7 +1781,13 @@ int __init gfs2_glock_init(void)
return -ENOMEM;
}
- register_shrinker(&glock_shrinker);
+ ret = register_shrinker(&glock_shrinker);
+ if (ret) {
+ destroy_workqueue(gfs2_delete_workqueue);
+ destroy_workqueue(glock_workqueue);
+ rhashtable_destroy(&gl_hash_table);
+ return ret;
+ }
return 0;
}
diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c
index 9cbd4b6ebff1..f6c4f0058899 100644
--- a/fs/gfs2/inode.c
+++ b/fs/gfs2/inode.c
@@ -187,6 +187,10 @@ struct inode *gfs2_inode_lookup(struct super_block *sb, unsigned int type,
}
gfs2_set_iop(inode);
+
+ inode->i_atime.tv_sec = 0;
+ inode->i_atime.tv_nsec = 0;
+
unlock_new_inode(inode);
}
@@ -1932,7 +1936,7 @@ static int gfs2_setattr(struct dentry *dentry, struct iattr *attr)
if (IS_IMMUTABLE(inode) || IS_APPEND(inode))
goto out;
- error = inode_change_ok(inode, attr);
+ error = setattr_prepare(dentry, attr);
if (error)
goto out;
diff --git a/fs/gfs2/inode.h b/fs/gfs2/inode.h
index 7710dfd3af35..aace8ce34a18 100644
--- a/fs/gfs2/inode.h
+++ b/fs/gfs2/inode.h
@@ -85,7 +85,7 @@ static inline int gfs2_check_internal_file_size(struct inode *inode,
u64 size = i_size_read(inode);
if (size < minsize || size > maxsize)
goto err;
- if (size & ((1 << inode->i_blkbits) - 1))
+ if (size & (BIT(inode->i_blkbits) - 1))
goto err;
return 0;
err:
diff --git a/fs/gfs2/main.c b/fs/gfs2/main.c
index 74fd0139e6c2..67d1fc4668f7 100644
--- a/fs/gfs2/main.c
+++ b/fs/gfs2/main.c
@@ -145,7 +145,9 @@ static int __init init_gfs2_fs(void)
if (!gfs2_qadata_cachep)
goto fail;
- register_shrinker(&gfs2_qd_shrinker);
+ error = register_shrinker(&gfs2_qd_shrinker);
+ if (error)
+ goto fail;
error = register_filesystem(&gfs2_fs_type);
if (error)
diff --git a/fs/gfs2/meta_io.c b/fs/gfs2/meta_io.c
index 950b8be68e41..373639a59782 100644
--- a/fs/gfs2/meta_io.c
+++ b/fs/gfs2/meta_io.c
@@ -216,23 +216,26 @@ static void gfs2_meta_read_endio(struct bio *bio)
static void gfs2_submit_bhs(int op, int op_flags, struct buffer_head *bhs[],
int num)
{
- struct buffer_head *bh = bhs[0];
- struct bio *bio;
- int i;
-
- if (!num)
- return;
-
- bio = bio_alloc(GFP_NOIO, num);
- bio->bi_iter.bi_sector = bh->b_blocknr * (bh->b_size >> 9);
- bio->bi_bdev = bh->b_bdev;
- for (i = 0; i < num; i++) {
- bh = bhs[i];
- bio_add_page(bio, bh->b_page, bh->b_size, bh_offset(bh));
+ while (num > 0) {
+ struct buffer_head *bh = *bhs;
+ struct bio *bio;
+
+ bio = bio_alloc(GFP_NOIO, num);
+ bio->bi_iter.bi_sector = bh->b_blocknr * (bh->b_size >> 9);
+ bio->bi_bdev = bh->b_bdev;
+ while (num > 0) {
+ bh = *bhs;
+ if (!bio_add_page(bio, bh->b_page, bh->b_size, bh_offset(bh))) {
+ BUG_ON(bio->bi_iter.bi_size == 0);
+ break;
+ }
+ bhs++;
+ num--;
+ }
+ bio->bi_end_io = gfs2_meta_read_endio;
+ bio_set_op_attrs(bio, op, op_flags);
+ submit_bio(bio);
}
- bio->bi_end_io = gfs2_meta_read_endio;
- bio_set_op_attrs(bio, op, op_flags);
- submit_bio(bio);
}
/**
diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c
index ef1e1822977f..ff72ac6439c8 100644
--- a/fs/gfs2/ops_fstype.c
+++ b/fs/gfs2/ops_fstype.c
@@ -58,7 +58,7 @@ static void gfs2_tune_init(struct gfs2_tune *gt)
gt->gt_quota_scale_num = 1;
gt->gt_quota_scale_den = 1;
gt->gt_new_files_jdata = 0;
- gt->gt_max_readahead = 1 << 18;
+ gt->gt_max_readahead = BIT(18);
gt->gt_complain_secs = 10;
}
@@ -284,7 +284,7 @@ static int gfs2_read_sb(struct gfs2_sbd *sdp, int silent)
sdp->sd_fsb2bb_shift = sdp->sd_sb.sb_bsize_shift -
GFS2_BASIC_BLOCK_SHIFT;
- sdp->sd_fsb2bb = 1 << sdp->sd_fsb2bb_shift;
+ sdp->sd_fsb2bb = BIT(sdp->sd_fsb2bb_shift);
sdp->sd_diptrs = (sdp->sd_sb.sb_bsize -
sizeof(struct gfs2_dinode)) / sizeof(u64);
sdp->sd_inptrs = (sdp->sd_sb.sb_bsize -
@@ -302,7 +302,7 @@ static int gfs2_read_sb(struct gfs2_sbd *sdp, int silent)
/* Compute maximum reservation required to add a entry to a directory */
- hash_blocks = DIV_ROUND_UP(sizeof(u64) * (1 << GFS2_DIR_MAX_DEPTH),
+ hash_blocks = DIV_ROUND_UP(sizeof(u64) * BIT(GFS2_DIR_MAX_DEPTH),
sdp->sd_jbsize);
ind_blocks = 0;
@@ -1089,7 +1089,7 @@ static int fill_super(struct super_block *sb, struct gfs2_args *args, int silent
sdp->sd_sb.sb_bsize_shift = sb->s_blocksize_bits;
sdp->sd_fsb2bb_shift = sdp->sd_sb.sb_bsize_shift -
GFS2_BASIC_BLOCK_SHIFT;
- sdp->sd_fsb2bb = 1 << sdp->sd_fsb2bb_shift;
+ sdp->sd_fsb2bb = BIT(sdp->sd_fsb2bb_shift);
sdp->sd_tune.gt_logd_secs = sdp->sd_args.ar_commit;
sdp->sd_tune.gt_quota_quantum = sdp->sd_args.ar_quota_quantum;
diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c
index 77930ca25303..8af2dfa09236 100644
--- a/fs/gfs2/quota.c
+++ b/fs/gfs2/quota.c
@@ -75,7 +75,7 @@
#include "util.h"
#define GFS2_QD_HASH_SHIFT 12
-#define GFS2_QD_HASH_SIZE (1 << GFS2_QD_HASH_SHIFT)
+#define GFS2_QD_HASH_SIZE BIT(GFS2_QD_HASH_SHIFT)
#define GFS2_QD_HASH_MASK (GFS2_QD_HASH_SIZE - 1)
/* Lock order: qd_lock -> bucket lock -> qd->lockref.lock -> lru lock */
@@ -384,7 +384,7 @@ static int bh_get(struct gfs2_quota_data *qd)
block = qd->qd_slot / sdp->sd_qc_per_block;
offset = qd->qd_slot % sdp->sd_qc_per_block;
- bh_map.b_size = 1 << ip->i_inode.i_blkbits;
+ bh_map.b_size = BIT(ip->i_inode.i_blkbits);
error = gfs2_block_map(&ip->i_inode, block, &bh_map, 0);
if (error)
goto fail;
diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c
index 3a7e60bb39f8..e3ee387a6dfe 100644
--- a/fs/gfs2/super.c
+++ b/fs/gfs2/super.c
@@ -359,7 +359,7 @@ int gfs2_jdesc_check(struct gfs2_jdesc *jd)
struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode);
u64 size = i_size_read(jd->jd_inode);
- if (gfs2_check_internal_file_size(jd->jd_inode, 8 << 20, 1 << 30))
+ if (gfs2_check_internal_file_size(jd->jd_inode, 8 << 20, BIT(30)))
return -EIO;
jd->jd_blocks = size >> sdp->sd_sb.sb_bsize_shift;
diff --git a/fs/hfs/inode.c b/fs/hfs/inode.c
index a181bd709b1e..ed373261f26d 100644
--- a/fs/hfs/inode.c
+++ b/fs/hfs/inode.c
@@ -606,7 +606,7 @@ int hfs_inode_setattr(struct dentry *dentry, struct iattr * attr)
struct hfs_sb_info *hsb = HFS_SB(inode->i_sb);
int error;
- error = inode_change_ok(inode, attr); /* basic permission checks */
+ error = setattr_prepare(dentry, attr); /* basic permission checks */
if (error)
return error;
diff --git a/fs/hfsplus/inode.c b/fs/hfsplus/inode.c
index 4a7c9241213d..10827c912c4d 100644
--- a/fs/hfsplus/inode.c
+++ b/fs/hfsplus/inode.c
@@ -245,7 +245,7 @@ static int hfsplus_setattr(struct dentry *dentry, struct iattr *attr)
struct inode *inode = d_inode(dentry);
int error;
- error = inode_change_ok(inode, attr);
+ error = setattr_prepare(dentry, attr);
if (error)
return error;
diff --git a/fs/hfsplus/posix_acl.c b/fs/hfsplus/posix_acl.c
index ab7ea2506b4d..9b92058a1240 100644
--- a/fs/hfsplus/posix_acl.c
+++ b/fs/hfsplus/posix_acl.c
@@ -65,8 +65,8 @@ int hfsplus_set_posix_acl(struct inode *inode, struct posix_acl *acl,
case ACL_TYPE_ACCESS:
xattr_name = XATTR_NAME_POSIX_ACL_ACCESS;
if (acl) {
- err = posix_acl_equiv_mode(acl, &inode->i_mode);
- if (err < 0)
+ err = posix_acl_update_mode(inode, &inode->i_mode, &acl);
+ if (err)
return err;
}
err = 0;
diff --git a/fs/hostfs/hostfs_kern.c b/fs/hostfs/hostfs_kern.c
index 90e46cd752fe..44aa96ba1df8 100644
--- a/fs/hostfs/hostfs_kern.c
+++ b/fs/hostfs/hostfs_kern.c
@@ -812,7 +812,7 @@ static int hostfs_setattr(struct dentry *dentry, struct iattr *attr)
int fd = HOSTFS_I(inode)->fd;
- err = inode_change_ok(inode, attr);
+ err = setattr_prepare(dentry, attr);
if (err)
return err;
diff --git a/fs/hpfs/file.c b/fs/hpfs/file.c
index d3bcdd975700..b3be1b5a62e2 100644
--- a/fs/hpfs/file.c
+++ b/fs/hpfs/file.c
@@ -189,6 +189,11 @@ static sector_t _hpfs_bmap(struct address_space *mapping, sector_t block)
return generic_block_bmap(mapping, block, hpfs_get_block);
}
+static int hpfs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, u64 start, u64 len)
+{
+ return generic_block_fiemap(inode, fieinfo, start, len, hpfs_get_block);
+}
+
const struct address_space_operations hpfs_aops = {
.readpage = hpfs_readpage,
.writepage = hpfs_writepage,
@@ -214,4 +219,5 @@ const struct file_operations hpfs_file_ops =
const struct inode_operations hpfs_file_iops =
{
.setattr = hpfs_setattr,
+ .fiemap = hpfs_fiemap,
};
diff --git a/fs/hpfs/inode.c b/fs/hpfs/inode.c
index 1f3c6d76200b..b9c724ed1e7e 100644
--- a/fs/hpfs/inode.c
+++ b/fs/hpfs/inode.c
@@ -273,7 +273,7 @@ int hpfs_setattr(struct dentry *dentry, struct iattr *attr)
if ((attr->ia_valid & ATTR_SIZE) && attr->ia_size > inode->i_size)
goto out_unlock;
- error = inode_change_ok(inode, attr);
+ error = setattr_prepare(dentry, attr);
if (error)
goto out_unlock;
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index 4ea71eba40a5..2c0c3a017a6a 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -416,7 +416,6 @@ static void remove_inode_hugepages(struct inode *inode, loff_t lstart,
for (i = 0; i < pagevec_count(&pvec); ++i) {
struct page *page = pvec.pages[i];
- bool rsv_on_error;
u32 hash;
/*
@@ -458,18 +457,17 @@ static void remove_inode_hugepages(struct inode *inode, loff_t lstart,
* cache (remove_huge_page) BEFORE removing the
* region/reserve map (hugetlb_unreserve_pages). In
* rare out of memory conditions, removal of the
- * region/reserve map could fail. Before free'ing
- * the page, note PagePrivate which is used in case
- * of error.
+ * region/reserve map could fail. Correspondingly,
+ * the subpool and global reserve usage count can need
+ * to be adjusted.
*/
- rsv_on_error = !PagePrivate(page);
+ VM_BUG_ON(PagePrivate(page));
remove_huge_page(page);
freed++;
if (!truncate_op) {
if (unlikely(hugetlb_unreserve_pages(inode,
next, next + 1, 1)))
- hugetlb_fix_reserve_counts(inode,
- rsv_on_error);
+ hugetlb_fix_reserve_counts(inode);
}
unlock_page(page);
@@ -672,7 +670,7 @@ static int hugetlbfs_setattr(struct dentry *dentry, struct iattr *attr)
BUG_ON(!inode);
- error = inode_change_ok(inode, attr);
+ error = setattr_prepare(dentry, attr);
if (error)
return error;
diff --git a/fs/inode.c b/fs/inode.c
index 705f8609fdb8..7d037591259d 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -1023,13 +1023,17 @@ struct inode *iget5_locked(struct super_block *sb, unsigned long hashval,
{
struct hlist_head *head = inode_hashtable + hash(sb, hashval);
struct inode *inode;
-
+again:
spin_lock(&inode_hash_lock);
inode = find_inode(sb, head, test, data);
spin_unlock(&inode_hash_lock);
if (inode) {
wait_on_inode(inode);
+ if (unlikely(inode_unhashed(inode))) {
+ iput(inode);
+ goto again;
+ }
return inode;
}
@@ -1066,6 +1070,10 @@ struct inode *iget5_locked(struct super_block *sb, unsigned long hashval,
destroy_inode(inode);
inode = old;
wait_on_inode(inode);
+ if (unlikely(inode_unhashed(inode))) {
+ iput(inode);
+ goto again;
+ }
}
return inode;
@@ -1093,12 +1101,16 @@ struct inode *iget_locked(struct super_block *sb, unsigned long ino)
{
struct hlist_head *head = inode_hashtable + hash(sb, ino);
struct inode *inode;
-
+again:
spin_lock(&inode_hash_lock);
inode = find_inode_fast(sb, head, ino);
spin_unlock(&inode_hash_lock);
if (inode) {
wait_on_inode(inode);
+ if (unlikely(inode_unhashed(inode))) {
+ iput(inode);
+ goto again;
+ }
return inode;
}
@@ -1133,6 +1145,10 @@ struct inode *iget_locked(struct super_block *sb, unsigned long ino)
destroy_inode(inode);
inode = old;
wait_on_inode(inode);
+ if (unlikely(inode_unhashed(inode))) {
+ iput(inode);
+ goto again;
+ }
}
return inode;
}
@@ -1268,10 +1284,16 @@ EXPORT_SYMBOL(ilookup5_nowait);
struct inode *ilookup5(struct super_block *sb, unsigned long hashval,
int (*test)(struct inode *, void *), void *data)
{
- struct inode *inode = ilookup5_nowait(sb, hashval, test, data);
-
- if (inode)
+ struct inode *inode;
+again:
+ inode = ilookup5_nowait(sb, hashval, test, data);
+ if (inode) {
wait_on_inode(inode);
+ if (unlikely(inode_unhashed(inode))) {
+ iput(inode);
+ goto again;
+ }
+ }
return inode;
}
EXPORT_SYMBOL(ilookup5);
@@ -1288,13 +1310,18 @@ struct inode *ilookup(struct super_block *sb, unsigned long ino)
{
struct hlist_head *head = inode_hashtable + hash(sb, ino);
struct inode *inode;
-
+again:
spin_lock(&inode_hash_lock);
inode = find_inode_fast(sb, head, ino);
spin_unlock(&inode_hash_lock);
- if (inode)
+ if (inode) {
wait_on_inode(inode);
+ if (unlikely(inode_unhashed(inode))) {
+ iput(inode);
+ goto again;
+ }
+ }
return inode;
}
EXPORT_SYMBOL(ilookup);
@@ -1538,16 +1565,36 @@ sector_t bmap(struct inode *inode, sector_t block)
EXPORT_SYMBOL(bmap);
/*
+ * Update times in overlayed inode from underlying real inode
+ */
+static void update_ovl_inode_times(struct dentry *dentry, struct inode *inode,
+ bool rcu)
+{
+ if (!rcu) {
+ struct inode *realinode = d_real_inode(dentry);
+
+ if (unlikely(inode != realinode) &&
+ (!timespec_equal(&inode->i_mtime, &realinode->i_mtime) ||
+ !timespec_equal(&inode->i_ctime, &realinode->i_ctime))) {
+ inode->i_mtime = realinode->i_mtime;
+ inode->i_ctime = realinode->i_ctime;
+ }
+ }
+}
+
+/*
* With relative atime, only update atime if the previous atime is
* earlier than either the ctime or mtime or if at least a day has
* passed since the last atime update.
*/
-static int relatime_need_update(struct vfsmount *mnt, struct inode *inode,
- struct timespec now)
+static int relatime_need_update(const struct path *path, struct inode *inode,
+ struct timespec now, bool rcu)
{
- if (!(mnt->mnt_flags & MNT_RELATIME))
+ if (!(path->mnt->mnt_flags & MNT_RELATIME))
return 1;
+
+ update_ovl_inode_times(path->dentry, inode, rcu);
/*
* Is mtime younger than atime? If yes, update atime:
*/
@@ -1614,7 +1661,8 @@ static int update_time(struct inode *inode, struct timespec *time, int flags)
* This function automatically handles read only file systems and media,
* as well as the "noatime" flag and inode specific "noatime" markers.
*/
-bool atime_needs_update(const struct path *path, struct inode *inode)
+bool __atime_needs_update(const struct path *path, struct inode *inode,
+ bool rcu)
{
struct vfsmount *mnt = path->mnt;
struct timespec now;
@@ -1640,7 +1688,7 @@ bool atime_needs_update(const struct path *path, struct inode *inode)
now = current_fs_time(inode->i_sb);
- if (!relatime_need_update(mnt, inode, now))
+ if (!relatime_need_update(path, inode, now, rcu))
return false;
if (timespec_equal(&inode->i_atime, &now))
@@ -1655,7 +1703,7 @@ void touch_atime(const struct path *path)
struct inode *inode = d_inode(path->dentry);
struct timespec now;
- if (!atime_needs_update(path, inode))
+ if (!__atime_needs_update(path, inode, false))
return;
if (!sb_start_write_trylock(inode->i_sb))
diff --git a/fs/internal.h b/fs/internal.h
index ba0737649d4a..f4da3341b4a3 100644
--- a/fs/internal.h
+++ b/fs/internal.h
@@ -12,6 +12,7 @@
struct super_block;
struct file_system_type;
struct iomap;
+struct iomap_ops;
struct linux_binprm;
struct path;
struct mount;
@@ -120,6 +121,15 @@ extern long prune_icache_sb(struct super_block *sb, struct shrink_control *sc);
extern void inode_add_lru(struct inode *inode);
extern int dentry_needs_remove_privs(struct dentry *dentry);
+extern bool __atime_needs_update(const struct path *, struct inode *, bool);
+static inline bool atime_needs_update_rcu(const struct path *path,
+ struct inode *inode)
+{
+ return __atime_needs_update(path, inode, true);
+}
+
+extern bool atime_needs_update_rcu(const struct path *, struct inode *);
+
/*
* fs-writeback.c
*/
@@ -156,7 +166,7 @@ extern void mnt_pin_kill(struct mount *m);
/*
* fs/nsfs.c
*/
-extern struct dentry_operations ns_dentry_operations;
+extern const struct dentry_operations ns_dentry_operations;
/*
* fs/ioctl.c
@@ -164,3 +174,13 @@ extern struct dentry_operations ns_dentry_operations;
extern int do_vfs_ioctl(struct file *file, unsigned int fd, unsigned int cmd,
unsigned long arg);
extern long vfs_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
+
+/*
+ * iomap support:
+ */
+typedef loff_t (*iomap_actor_t)(struct inode *inode, loff_t pos, loff_t len,
+ void *data, struct iomap *iomap);
+
+loff_t iomap_apply(struct inode *inode, loff_t pos, loff_t length,
+ unsigned flags, struct iomap_ops *ops, void *data,
+ iomap_actor_t actor);
diff --git a/fs/iomap.c b/fs/iomap.c
index 706270f21b35..013d1d36fbbf 100644
--- a/fs/iomap.c
+++ b/fs/iomap.c
@@ -27,9 +27,6 @@
#include <linux/dax.h>
#include "internal.h"
-typedef loff_t (*iomap_actor_t)(struct inode *inode, loff_t pos, loff_t len,
- void *data, struct iomap *iomap);
-
/*
* Execute a iomap write on a segment of the mapping that spans a
* contiguous range of pages that have identical block mapping state.
@@ -41,7 +38,7 @@ typedef loff_t (*iomap_actor_t)(struct inode *inode, loff_t pos, loff_t len,
* resources they require in the iomap_begin call, and release them in the
* iomap_end call.
*/
-static loff_t
+loff_t
iomap_apply(struct inode *inode, loff_t pos, loff_t length, unsigned flags,
struct iomap_ops *ops, void *data, iomap_actor_t actor)
{
@@ -252,6 +249,88 @@ iomap_file_buffered_write(struct kiocb *iocb, struct iov_iter *iter,
}
EXPORT_SYMBOL_GPL(iomap_file_buffered_write);
+static struct page *
+__iomap_read_page(struct inode *inode, loff_t offset)
+{
+ struct address_space *mapping = inode->i_mapping;
+ struct page *page;
+
+ page = read_mapping_page(mapping, offset >> PAGE_SHIFT, NULL);
+ if (IS_ERR(page))
+ return page;
+ if (!PageUptodate(page)) {
+ put_page(page);
+ return ERR_PTR(-EIO);
+ }
+ return page;
+}
+
+static loff_t
+iomap_dirty_actor(struct inode *inode, loff_t pos, loff_t length, void *data,
+ struct iomap *iomap)
+{
+ long status = 0;
+ ssize_t written = 0;
+
+ do {
+ struct page *page, *rpage;
+ unsigned long offset; /* Offset into pagecache page */
+ unsigned long bytes; /* Bytes to write to page */
+
+ offset = (pos & (PAGE_SIZE - 1));
+ bytes = min_t(unsigned long, PAGE_SIZE - offset, length);
+
+ rpage = __iomap_read_page(inode, pos);
+ if (IS_ERR(rpage))
+ return PTR_ERR(rpage);
+
+ status = iomap_write_begin(inode, pos, bytes,
+ AOP_FLAG_NOFS | AOP_FLAG_UNINTERRUPTIBLE,
+ &page, iomap);
+ put_page(rpage);
+ if (unlikely(status))
+ return status;
+
+ WARN_ON_ONCE(!PageUptodate(page));
+
+ status = iomap_write_end(inode, pos, bytes, bytes, page);
+ if (unlikely(status <= 0)) {
+ if (WARN_ON_ONCE(status == 0))
+ return -EIO;
+ return status;
+ }
+
+ cond_resched();
+
+ pos += status;
+ written += status;
+ length -= status;
+
+ balance_dirty_pages_ratelimited(inode->i_mapping);
+ } while (length);
+
+ return written;
+}
+
+int
+iomap_file_dirty(struct inode *inode, loff_t pos, loff_t len,
+ struct iomap_ops *ops)
+{
+ loff_t ret;
+
+ while (len) {
+ ret = iomap_apply(inode, pos, len, IOMAP_WRITE, ops, NULL,
+ iomap_dirty_actor);
+ if (ret <= 0)
+ return ret;
+ pos += ret;
+ len -= ret;
+ }
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(iomap_file_dirty);
+
static int iomap_zero(struct inode *inode, loff_t pos, unsigned offset,
unsigned bytes, struct iomap *iomap)
{
@@ -430,6 +509,8 @@ static int iomap_to_fiemap(struct fiemap_extent_info *fi,
if (iomap->flags & IOMAP_F_MERGED)
flags |= FIEMAP_EXTENT_MERGED;
+ if (iomap->flags & IOMAP_F_SHARED)
+ flags |= FIEMAP_EXTENT_SHARED;
return fiemap_fill_next_extent(fi, iomap->offset,
iomap->blkno != IOMAP_NULL_BLOCK ? iomap->blkno << 9: 0,
diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c
index 46261a6f902d..927da4956a89 100644
--- a/fs/jbd2/journal.c
+++ b/fs/jbd2/journal.c
@@ -1090,11 +1090,15 @@ static void jbd2_stats_proc_exit(journal_t *journal)
* very few fields yet: that has to wait until we have created the
* journal structures from from scratch, or loaded them from disk. */
-static journal_t * journal_init_common (void)
+static journal_t *journal_init_common(struct block_device *bdev,
+ struct block_device *fs_dev,
+ unsigned long long start, int len, int blocksize)
{
static struct lock_class_key jbd2_trans_commit_key;
journal_t *journal;
int err;
+ struct buffer_head *bh;
+ int n;
journal = kzalloc(sizeof(*journal), GFP_KERNEL);
if (!journal)
@@ -1131,6 +1135,32 @@ static journal_t * journal_init_common (void)
lockdep_init_map(&journal->j_trans_commit_map, "jbd2_handle",
&jbd2_trans_commit_key, 0);
+ /* journal descriptor can store up to n blocks -bzzz */
+ journal->j_blocksize = blocksize;
+ journal->j_dev = bdev;
+ journal->j_fs_dev = fs_dev;
+ journal->j_blk_offset = start;
+ journal->j_maxlen = len;
+ n = journal->j_blocksize / sizeof(journal_block_tag_t);
+ journal->j_wbufsize = n;
+ journal->j_wbuf = kmalloc_array(n, sizeof(struct buffer_head *),
+ GFP_KERNEL);
+ if (!journal->j_wbuf) {
+ kfree(journal);
+ return NULL;
+ }
+
+ bh = getblk_unmovable(journal->j_dev, start, journal->j_blocksize);
+ if (!bh) {
+ pr_err("%s: Cannot get buffer for journal superblock\n",
+ __func__);
+ kfree(journal->j_wbuf);
+ kfree(journal);
+ return NULL;
+ }
+ journal->j_sb_buffer = bh;
+ journal->j_superblock = (journal_superblock_t *)bh->b_data;
+
return journal;
}
@@ -1157,51 +1187,21 @@ static journal_t * journal_init_common (void)
* range of blocks on an arbitrary block device.
*
*/
-journal_t * jbd2_journal_init_dev(struct block_device *bdev,
+journal_t *jbd2_journal_init_dev(struct block_device *bdev,
struct block_device *fs_dev,
unsigned long long start, int len, int blocksize)
{
- journal_t *journal = journal_init_common();
- struct buffer_head *bh;
- int n;
+ journal_t *journal;
+ journal = journal_init_common(bdev, fs_dev, start, len, blocksize);
if (!journal)
return NULL;
- /* journal descriptor can store up to n blocks -bzzz */
- journal->j_blocksize = blocksize;
- journal->j_dev = bdev;
- journal->j_fs_dev = fs_dev;
- journal->j_blk_offset = start;
- journal->j_maxlen = len;
bdevname(journal->j_dev, journal->j_devname);
strreplace(journal->j_devname, '/', '!');
jbd2_stats_proc_init(journal);
- n = journal->j_blocksize / sizeof(journal_block_tag_t);
- journal->j_wbufsize = n;
- journal->j_wbuf = kmalloc(n * sizeof(struct buffer_head*), GFP_KERNEL);
- if (!journal->j_wbuf) {
- printk(KERN_ERR "%s: Can't allocate bhs for commit thread\n",
- __func__);
- goto out_err;
- }
-
- bh = __getblk(journal->j_dev, start, journal->j_blocksize);
- if (!bh) {
- printk(KERN_ERR
- "%s: Cannot get buffer for journal superblock\n",
- __func__);
- goto out_err;
- }
- journal->j_sb_buffer = bh;
- journal->j_superblock = (journal_superblock_t *)bh->b_data;
return journal;
-out_err:
- kfree(journal->j_wbuf);
- jbd2_stats_proc_exit(journal);
- kfree(journal);
- return NULL;
}
/**
@@ -1212,67 +1212,36 @@ out_err:
* the journal. The inode must exist already, must support bmap() and
* must have all data blocks preallocated.
*/
-journal_t * jbd2_journal_init_inode (struct inode *inode)
+journal_t *jbd2_journal_init_inode(struct inode *inode)
{
- struct buffer_head *bh;
- journal_t *journal = journal_init_common();
+ journal_t *journal;
char *p;
- int err;
- int n;
unsigned long long blocknr;
+ blocknr = bmap(inode, 0);
+ if (!blocknr) {
+ pr_err("%s: Cannot locate journal superblock\n",
+ __func__);
+ return NULL;
+ }
+
+ jbd_debug(1, "JBD2: inode %s/%ld, size %lld, bits %d, blksize %ld\n",
+ inode->i_sb->s_id, inode->i_ino, (long long) inode->i_size,
+ inode->i_sb->s_blocksize_bits, inode->i_sb->s_blocksize);
+
+ journal = journal_init_common(inode->i_sb->s_bdev, inode->i_sb->s_bdev,
+ blocknr, inode->i_size >> inode->i_sb->s_blocksize_bits,
+ inode->i_sb->s_blocksize);
if (!journal)
return NULL;
- journal->j_dev = journal->j_fs_dev = inode->i_sb->s_bdev;
journal->j_inode = inode;
bdevname(journal->j_dev, journal->j_devname);
p = strreplace(journal->j_devname, '/', '!');
sprintf(p, "-%lu", journal->j_inode->i_ino);
- jbd_debug(1,
- "journal %p: inode %s/%ld, size %Ld, bits %d, blksize %ld\n",
- journal, inode->i_sb->s_id, inode->i_ino,
- (long long) inode->i_size,
- inode->i_sb->s_blocksize_bits, inode->i_sb->s_blocksize);
-
- journal->j_maxlen = inode->i_size >> inode->i_sb->s_blocksize_bits;
- journal->j_blocksize = inode->i_sb->s_blocksize;
jbd2_stats_proc_init(journal);
- /* journal descriptor can store up to n blocks -bzzz */
- n = journal->j_blocksize / sizeof(journal_block_tag_t);
- journal->j_wbufsize = n;
- journal->j_wbuf = kmalloc(n * sizeof(struct buffer_head*), GFP_KERNEL);
- if (!journal->j_wbuf) {
- printk(KERN_ERR "%s: Can't allocate bhs for commit thread\n",
- __func__);
- goto out_err;
- }
-
- err = jbd2_journal_bmap(journal, 0, &blocknr);
- /* If that failed, give up */
- if (err) {
- printk(KERN_ERR "%s: Cannot locate journal superblock\n",
- __func__);
- goto out_err;
- }
-
- bh = getblk_unmovable(journal->j_dev, blocknr, journal->j_blocksize);
- if (!bh) {
- printk(KERN_ERR
- "%s: Cannot get buffer for journal superblock\n",
- __func__);
- goto out_err;
- }
- journal->j_sb_buffer = bh;
- journal->j_superblock = (journal_superblock_t *)bh->b_data;
-
return journal;
-out_err:
- kfree(journal->j_wbuf);
- jbd2_stats_proc_exit(journal);
- kfree(journal);
- return NULL;
}
/*
diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c
index b5bc3e249163..3d8246a9faa4 100644
--- a/fs/jbd2/transaction.c
+++ b/fs/jbd2/transaction.c
@@ -159,6 +159,7 @@ static void wait_transaction_locked(journal_t *journal)
read_unlock(&journal->j_state_lock);
if (need_to_start)
jbd2_log_start_commit(journal, tid);
+ jbd2_might_wait_for_commit(journal);
schedule();
finish_wait(&journal->j_wait_transaction_locked, &wait);
}
@@ -182,8 +183,6 @@ static int add_transaction_credits(journal_t *journal, int blocks,
int needed;
int total = blocks + rsv_blocks;
- jbd2_might_wait_for_commit(journal);
-
/*
* If the current transaction is locked down for commit, wait
* for the lock to be released.
@@ -214,6 +213,7 @@ static int add_transaction_credits(journal_t *journal, int blocks,
if (atomic_read(&journal->j_reserved_credits) + total >
journal->j_max_transaction_buffers) {
read_unlock(&journal->j_state_lock);
+ jbd2_might_wait_for_commit(journal);
wait_event(journal->j_wait_reserved,
atomic_read(&journal->j_reserved_credits) + total <=
journal->j_max_transaction_buffers);
@@ -238,6 +238,7 @@ static int add_transaction_credits(journal_t *journal, int blocks,
if (jbd2_log_space_left(journal) < jbd2_space_needed(journal)) {
atomic_sub(total, &t->t_outstanding_credits);
read_unlock(&journal->j_state_lock);
+ jbd2_might_wait_for_commit(journal);
write_lock(&journal->j_state_lock);
if (jbd2_log_space_left(journal) < jbd2_space_needed(journal))
__jbd2_log_wait_for_space(journal);
@@ -255,6 +256,7 @@ static int add_transaction_credits(journal_t *journal, int blocks,
sub_reserved_credits(journal, rsv_blocks);
atomic_sub(total, &t->t_outstanding_credits);
read_unlock(&journal->j_state_lock);
+ jbd2_might_wait_for_commit(journal);
wait_event(journal->j_wait_reserved,
atomic_read(&journal->j_reserved_credits) + rsv_blocks
<= journal->j_max_transaction_buffers / 2);
diff --git a/fs/jffs2/acl.c b/fs/jffs2/acl.c
index bc2693d56298..2a0f2a1044c1 100644
--- a/fs/jffs2/acl.c
+++ b/fs/jffs2/acl.c
@@ -233,9 +233,10 @@ int jffs2_set_acl(struct inode *inode, struct posix_acl *acl, int type)
case ACL_TYPE_ACCESS:
xprefix = JFFS2_XPREFIX_ACL_ACCESS;
if (acl) {
- umode_t mode = inode->i_mode;
- rc = posix_acl_equiv_mode(acl, &mode);
- if (rc < 0)
+ umode_t mode;
+
+ rc = posix_acl_update_mode(inode, &mode, &acl);
+ if (rc)
return rc;
if (inode->i_mode != mode) {
struct iattr attr;
@@ -247,8 +248,6 @@ int jffs2_set_acl(struct inode *inode, struct posix_acl *acl, int type)
if (rc < 0)
return rc;
}
- if (rc == 0)
- acl = NULL;
}
break;
case ACL_TYPE_DEFAULT:
diff --git a/fs/jffs2/fs.c b/fs/jffs2/fs.c
index ae2ebb26b446..3773b24b4db0 100644
--- a/fs/jffs2/fs.c
+++ b/fs/jffs2/fs.c
@@ -193,7 +193,7 @@ int jffs2_setattr(struct dentry *dentry, struct iattr *iattr)
struct inode *inode = d_inode(dentry);
int rc;
- rc = inode_change_ok(inode, iattr);
+ rc = setattr_prepare(dentry, iattr);
if (rc)
return rc;
diff --git a/fs/jfs/acl.c b/fs/jfs/acl.c
index 21fa92ba2c19..3a1e1554a4e3 100644
--- a/fs/jfs/acl.c
+++ b/fs/jfs/acl.c
@@ -78,13 +78,11 @@ static int __jfs_set_acl(tid_t tid, struct inode *inode, int type,
case ACL_TYPE_ACCESS:
ea_name = XATTR_NAME_POSIX_ACL_ACCESS;
if (acl) {
- rc = posix_acl_equiv_mode(acl, &inode->i_mode);
- if (rc < 0)
+ rc = posix_acl_update_mode(inode, &inode->i_mode, &acl);
+ if (rc)
return rc;
inode->i_ctime = CURRENT_TIME;
mark_inode_dirty(inode);
- if (rc == 0)
- acl = NULL;
}
break;
case ACL_TYPE_DEFAULT:
diff --git a/fs/jfs/file.c b/fs/jfs/file.c
index f6eb0417a909..739492c7a3fd 100644
--- a/fs/jfs/file.c
+++ b/fs/jfs/file.c
@@ -103,7 +103,7 @@ int jfs_setattr(struct dentry *dentry, struct iattr *iattr)
struct inode *inode = d_inode(dentry);
int rc;
- rc = inode_change_ok(inode, iattr);
+ rc = setattr_prepare(dentry, iattr);
if (rc)
return rc;
diff --git a/fs/jfs/jfs_txnmgr.c b/fs/jfs/jfs_txnmgr.c
index 2e58978d6f45..4d973524c887 100644
--- a/fs/jfs/jfs_txnmgr.c
+++ b/fs/jfs/jfs_txnmgr.c
@@ -2893,8 +2893,7 @@ restart:
* on anon_list2. Let's check.
*/
if (!list_empty(&TxAnchor.anon_list2)) {
- list_splice(&TxAnchor.anon_list2, &TxAnchor.anon_list);
- INIT_LIST_HEAD(&TxAnchor.anon_list2);
+ list_splice_init(&TxAnchor.anon_list2, &TxAnchor.anon_list);
goto restart;
}
TXN_UNLOCK();
diff --git a/fs/jfs/resize.c b/fs/jfs/resize.c
index 90b3bc21e9b0..bd9b641ada2c 100644
--- a/fs/jfs/resize.c
+++ b/fs/jfs/resize.c
@@ -379,8 +379,14 @@ int jfs_extendfs(struct super_block *sb, s64 newLVSize, int newLogSize)
* cached in meta-data cache, and not written out
* by txCommit();
*/
- filemap_fdatawait(ipbmap->i_mapping);
- filemap_write_and_wait(ipbmap->i_mapping);
+ rc = filemap_fdatawait(ipbmap->i_mapping);
+ if (rc)
+ goto error_out;
+
+ rc = filemap_write_and_wait(ipbmap->i_mapping);
+ if (rc)
+ goto error_out;
+
diWriteSpecial(ipbmap, 0);
newPage = nPages; /* first new page number */
diff --git a/fs/kernfs/inode.c b/fs/kernfs/inode.c
index 2b735ce0268a..102b6f0bc7af 100644
--- a/fs/kernfs/inode.c
+++ b/fs/kernfs/inode.c
@@ -119,7 +119,7 @@ int kernfs_iop_setattr(struct dentry *dentry, struct iattr *iattr)
return -EINVAL;
mutex_lock(&kernfs_mutex);
- error = inode_change_ok(inode, iattr);
+ error = setattr_prepare(dentry, iattr);
if (error)
goto out;
diff --git a/fs/libfs.c b/fs/libfs.c
index b322d756b20d..a6d89f151771 100644
--- a/fs/libfs.c
+++ b/fs/libfs.c
@@ -395,7 +395,7 @@ int simple_setattr(struct dentry *dentry, struct iattr *iattr)
struct inode *inode = d_inode(dentry);
int error;
- error = inode_change_ok(inode, iattr);
+ error = setattr_prepare(dentry, iattr);
if (error)
return error;
diff --git a/fs/locks.c b/fs/locks.c
index ee1b15f6fc13..8cc218d7a039 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -127,7 +127,6 @@
#include <linux/pid_namespace.h>
#include <linux/hashtable.h>
#include <linux/percpu.h>
-#include <linux/lglock.h>
#define CREATE_TRACE_POINTS
#include <trace/events/filelock.h>
@@ -139,6 +138,11 @@
#define IS_LEASE(fl) (fl->fl_flags & (FL_LEASE|FL_DELEG|FL_LAYOUT))
#define IS_OFDLCK(fl) (fl->fl_flags & FL_OFDLCK)
+static inline bool is_remote_lock(struct file *filp)
+{
+ return likely(!(filp->f_path.dentry->d_sb->s_flags & MS_NOREMOTELOCK));
+}
+
static bool lease_breaking(struct file_lock *fl)
{
return fl->fl_flags & (FL_UNLOCK_PENDING | FL_DOWNGRADE_PENDING);
@@ -158,12 +162,18 @@ int lease_break_time = 45;
/*
* The global file_lock_list is only used for displaying /proc/locks, so we
- * keep a list on each CPU, with each list protected by its own spinlock via
- * the file_lock_lglock. Note that alterations to the list also require that
- * the relevant flc_lock is held.
+ * keep a list on each CPU, with each list protected by its own spinlock.
+ * Global serialization is done using file_rwsem.
+ *
+ * Note that alterations to the list also require that the relevant flc_lock is
+ * held.
*/
-DEFINE_STATIC_LGLOCK(file_lock_lglock);
-static DEFINE_PER_CPU(struct hlist_head, file_lock_list);
+struct file_lock_list_struct {
+ spinlock_t lock;
+ struct hlist_head hlist;
+};
+static DEFINE_PER_CPU(struct file_lock_list_struct, file_lock_list);
+DEFINE_STATIC_PERCPU_RWSEM(file_rwsem);
/*
* The blocked_hash is used to find POSIX lock loops for deadlock detection.
@@ -587,15 +597,23 @@ static int posix_same_owner(struct file_lock *fl1, struct file_lock *fl2)
/* Must be called with the flc_lock held! */
static void locks_insert_global_locks(struct file_lock *fl)
{
- lg_local_lock(&file_lock_lglock);
+ struct file_lock_list_struct *fll = this_cpu_ptr(&file_lock_list);
+
+ percpu_rwsem_assert_held(&file_rwsem);
+
+ spin_lock(&fll->lock);
fl->fl_link_cpu = smp_processor_id();
- hlist_add_head(&fl->fl_link, this_cpu_ptr(&file_lock_list));
- lg_local_unlock(&file_lock_lglock);
+ hlist_add_head(&fl->fl_link, &fll->hlist);
+ spin_unlock(&fll->lock);
}
/* Must be called with the flc_lock held! */
static void locks_delete_global_locks(struct file_lock *fl)
{
+ struct file_lock_list_struct *fll;
+
+ percpu_rwsem_assert_held(&file_rwsem);
+
/*
* Avoid taking lock if already unhashed. This is safe since this check
* is done while holding the flc_lock, and new insertions into the list
@@ -603,9 +621,11 @@ static void locks_delete_global_locks(struct file_lock *fl)
*/
if (hlist_unhashed(&fl->fl_link))
return;
- lg_local_lock_cpu(&file_lock_lglock, fl->fl_link_cpu);
+
+ fll = per_cpu_ptr(&file_lock_list, fl->fl_link_cpu);
+ spin_lock(&fll->lock);
hlist_del_init(&fl->fl_link);
- lg_local_unlock_cpu(&file_lock_lglock, fl->fl_link_cpu);
+ spin_unlock(&fll->lock);
}
static unsigned long
@@ -791,7 +811,7 @@ posix_test_lock(struct file *filp, struct file_lock *fl)
{
struct file_lock *cfl;
struct file_lock_context *ctx;
- struct inode *inode = file_inode(filp);
+ struct inode *inode = locks_inode(filp);
ctx = smp_load_acquire(&inode->i_flctx);
if (!ctx || list_empty_careful(&ctx->flc_posix)) {
@@ -915,6 +935,7 @@ static int flock_lock_inode(struct inode *inode, struct file_lock *request)
return -ENOMEM;
}
+ percpu_down_read_preempt_disable(&file_rwsem);
spin_lock(&ctx->flc_lock);
if (request->fl_flags & FL_ACCESS)
goto find_conflict;
@@ -955,6 +976,7 @@ find_conflict:
out:
spin_unlock(&ctx->flc_lock);
+ percpu_up_read_preempt_enable(&file_rwsem);
if (new_fl)
locks_free_lock(new_fl);
locks_dispose_list(&dispose);
@@ -991,6 +1013,7 @@ static int posix_lock_inode(struct inode *inode, struct file_lock *request,
new_fl2 = locks_alloc_lock();
}
+ percpu_down_read_preempt_disable(&file_rwsem);
spin_lock(&ctx->flc_lock);
/*
* New lock request. Walk all POSIX locks and look for conflicts. If
@@ -1162,6 +1185,7 @@ static int posix_lock_inode(struct inode *inode, struct file_lock *request,
}
out:
spin_unlock(&ctx->flc_lock);
+ percpu_up_read_preempt_enable(&file_rwsem);
/*
* Free any unused locks.
*/
@@ -1192,7 +1216,7 @@ static int posix_lock_inode(struct inode *inode, struct file_lock *request,
int posix_lock_file(struct file *filp, struct file_lock *fl,
struct file_lock *conflock)
{
- return posix_lock_inode(file_inode(filp), fl, conflock);
+ return posix_lock_inode(locks_inode(filp), fl, conflock);
}
EXPORT_SYMBOL(posix_lock_file);
@@ -1232,7 +1256,7 @@ static int posix_lock_inode_wait(struct inode *inode, struct file_lock *fl)
int locks_mandatory_locked(struct file *file)
{
int ret;
- struct inode *inode = file_inode(file);
+ struct inode *inode = locks_inode(file);
struct file_lock_context *ctx;
struct file_lock *fl;
@@ -1436,6 +1460,7 @@ int __break_lease(struct inode *inode, unsigned int mode, unsigned int type)
return error;
}
+ percpu_down_read_preempt_disable(&file_rwsem);
spin_lock(&ctx->flc_lock);
time_out_leases(inode, &dispose);
@@ -1487,9 +1512,13 @@ restart:
locks_insert_block(fl, new_fl);
trace_break_lease_block(inode, new_fl);
spin_unlock(&ctx->flc_lock);
+ percpu_up_read_preempt_enable(&file_rwsem);
+
locks_dispose_list(&dispose);
error = wait_event_interruptible_timeout(new_fl->fl_wait,
!new_fl->fl_next, break_time);
+
+ percpu_down_read_preempt_disable(&file_rwsem);
spin_lock(&ctx->flc_lock);
trace_break_lease_unblock(inode, new_fl);
locks_delete_block(new_fl);
@@ -1506,6 +1535,7 @@ restart:
}
out:
spin_unlock(&ctx->flc_lock);
+ percpu_up_read_preempt_enable(&file_rwsem);
locks_dispose_list(&dispose);
locks_free_lock(new_fl);
return error;
@@ -1572,7 +1602,7 @@ EXPORT_SYMBOL(lease_get_mtime);
int fcntl_getlease(struct file *filp)
{
struct file_lock *fl;
- struct inode *inode = file_inode(filp);
+ struct inode *inode = locks_inode(filp);
struct file_lock_context *ctx;
int type = F_UNLCK;
LIST_HEAD(dispose);
@@ -1580,7 +1610,7 @@ int fcntl_getlease(struct file *filp)
ctx = smp_load_acquire(&inode->i_flctx);
if (ctx && !list_empty_careful(&ctx->flc_lease)) {
spin_lock(&ctx->flc_lock);
- time_out_leases(file_inode(filp), &dispose);
+ time_out_leases(inode, &dispose);
list_for_each_entry(fl, &ctx->flc_lease, fl_list) {
if (fl->fl_file != filp)
continue;
@@ -1613,7 +1643,8 @@ check_conflicting_open(const struct dentry *dentry, const long arg, int flags)
if (flags & FL_LAYOUT)
return 0;
- if ((arg == F_RDLCK) && (atomic_read(&inode->i_writecount) > 0))
+ if ((arg == F_RDLCK) &&
+ (atomic_read(&d_real_inode(dentry)->i_writecount) > 0))
return -EAGAIN;
if ((arg == F_WRLCK) && ((d_count(dentry) > 1) ||
@@ -1628,7 +1659,7 @@ generic_add_lease(struct file *filp, long arg, struct file_lock **flp, void **pr
{
struct file_lock *fl, *my_fl = NULL, *lease;
struct dentry *dentry = filp->f_path.dentry;
- struct inode *inode = file_inode(filp);
+ struct inode *inode = dentry->d_inode;
struct file_lock_context *ctx;
bool is_deleg = (*flp)->fl_flags & FL_DELEG;
int error;
@@ -1660,6 +1691,7 @@ generic_add_lease(struct file *filp, long arg, struct file_lock **flp, void **pr
return -EINVAL;
}
+ percpu_down_read_preempt_disable(&file_rwsem);
spin_lock(&ctx->flc_lock);
time_out_leases(inode, &dispose);
error = check_conflicting_open(dentry, arg, lease->fl_flags);
@@ -1730,6 +1762,7 @@ out_setup:
lease->fl_lmops->lm_setup(lease, priv);
out:
spin_unlock(&ctx->flc_lock);
+ percpu_up_read_preempt_enable(&file_rwsem);
locks_dispose_list(&dispose);
if (is_deleg)
inode_unlock(inode);
@@ -1742,7 +1775,7 @@ static int generic_delete_lease(struct file *filp, void *owner)
{
int error = -EAGAIN;
struct file_lock *fl, *victim = NULL;
- struct inode *inode = file_inode(filp);
+ struct inode *inode = locks_inode(filp);
struct file_lock_context *ctx;
LIST_HEAD(dispose);
@@ -1752,6 +1785,7 @@ static int generic_delete_lease(struct file *filp, void *owner)
return error;
}
+ percpu_down_read_preempt_disable(&file_rwsem);
spin_lock(&ctx->flc_lock);
list_for_each_entry(fl, &ctx->flc_lease, fl_list) {
if (fl->fl_file == filp &&
@@ -1764,6 +1798,7 @@ static int generic_delete_lease(struct file *filp, void *owner)
if (victim)
error = fl->fl_lmops->lm_change(victim, F_UNLCK, &dispose);
spin_unlock(&ctx->flc_lock);
+ percpu_up_read_preempt_enable(&file_rwsem);
locks_dispose_list(&dispose);
return error;
}
@@ -1782,7 +1817,7 @@ static int generic_delete_lease(struct file *filp, void *owner)
int generic_setlease(struct file *filp, long arg, struct file_lock **flp,
void **priv)
{
- struct inode *inode = file_inode(filp);
+ struct inode *inode = locks_inode(filp);
int error;
if ((!uid_eq(current_fsuid(), inode->i_uid)) && !capable(CAP_LEASE))
@@ -1830,7 +1865,7 @@ EXPORT_SYMBOL(generic_setlease);
int
vfs_setlease(struct file *filp, long arg, struct file_lock **lease, void **priv)
{
- if (filp->f_op->setlease)
+ if (filp->f_op->setlease && is_remote_lock(filp))
return filp->f_op->setlease(filp, arg, lease, priv);
else
return generic_setlease(filp, arg, lease, priv);
@@ -1979,7 +2014,7 @@ SYSCALL_DEFINE2(flock, unsigned int, fd, unsigned int, cmd)
if (error)
goto out_free;
- if (f.file->f_op->flock)
+ if (f.file->f_op->flock && is_remote_lock(f.file))
error = f.file->f_op->flock(f.file,
(can_sleep) ? F_SETLKW : F_SETLK,
lock);
@@ -2005,7 +2040,7 @@ SYSCALL_DEFINE2(flock, unsigned int, fd, unsigned int, cmd)
*/
int vfs_test_lock(struct file *filp, struct file_lock *fl)
{
- if (filp->f_op->lock)
+ if (filp->f_op->lock && is_remote_lock(filp))
return filp->f_op->lock(filp, F_GETLK, fl);
posix_test_lock(filp, fl);
return 0;
@@ -2129,7 +2164,7 @@ out:
*/
int vfs_lock_file(struct file *filp, unsigned int cmd, struct file_lock *fl, struct file_lock *conf)
{
- if (filp->f_op->lock)
+ if (filp->f_op->lock && is_remote_lock(filp))
return filp->f_op->lock(filp, cmd, fl);
else
return posix_lock_file(filp, fl, conf);
@@ -2191,7 +2226,7 @@ int fcntl_setlk(unsigned int fd, struct file *filp, unsigned int cmd,
if (file_lock == NULL)
return -ENOLCK;
- inode = file_inode(filp);
+ inode = locks_inode(filp);
/*
* This might block, so we do it before checking the inode.
@@ -2343,7 +2378,7 @@ int fcntl_setlk64(unsigned int fd, struct file *filp, unsigned int cmd,
if (copy_from_user(&flock, l, sizeof(flock)))
goto out;
- inode = file_inode(filp);
+ inode = locks_inode(filp);
/* Don't allow mandatory locks on files that may be memory mapped
* and shared.
@@ -2426,6 +2461,7 @@ out:
void locks_remove_posix(struct file *filp, fl_owner_t owner)
{
int error;
+ struct inode *inode = locks_inode(filp);
struct file_lock lock;
struct file_lock_context *ctx;
@@ -2434,7 +2470,7 @@ void locks_remove_posix(struct file *filp, fl_owner_t owner)
* posix_lock_file(). Another process could be setting a lock on this
* file at the same time, but we wouldn't remove that lock anyway.
*/
- ctx = smp_load_acquire(&file_inode(filp)->i_flctx);
+ ctx = smp_load_acquire(&inode->i_flctx);
if (!ctx || list_empty(&ctx->flc_posix))
return;
@@ -2452,7 +2488,7 @@ void locks_remove_posix(struct file *filp, fl_owner_t owner)
if (lock.fl_ops && lock.fl_ops->fl_release_private)
lock.fl_ops->fl_release_private(&lock);
- trace_locks_remove_posix(file_inode(filp), &lock, error);
+ trace_locks_remove_posix(inode, &lock, error);
}
EXPORT_SYMBOL(locks_remove_posix);
@@ -2469,12 +2505,12 @@ locks_remove_flock(struct file *filp, struct file_lock_context *flctx)
.fl_type = F_UNLCK,
.fl_end = OFFSET_MAX,
};
- struct inode *inode = file_inode(filp);
+ struct inode *inode = locks_inode(filp);
if (list_empty(&flctx->flc_flock))
return;
- if (filp->f_op->flock)
+ if (filp->f_op->flock && is_remote_lock(filp))
filp->f_op->flock(filp, F_SETLKW, &fl);
else
flock_lock_inode(inode, &fl);
@@ -2508,7 +2544,7 @@ void locks_remove_file(struct file *filp)
{
struct file_lock_context *ctx;
- ctx = smp_load_acquire(&file_inode(filp)->i_flctx);
+ ctx = smp_load_acquire(&locks_inode(filp)->i_flctx);
if (!ctx)
return;
@@ -2552,7 +2588,7 @@ EXPORT_SYMBOL(posix_unblock_lock);
*/
int vfs_cancel_lock(struct file *filp, struct file_lock *fl)
{
- if (filp->f_op->lock)
+ if (filp->f_op->lock && is_remote_lock(filp))
return filp->f_op->lock(filp, F_CANCELLK, fl);
return 0;
}
@@ -2574,13 +2610,24 @@ static void lock_get_status(struct seq_file *f, struct file_lock *fl,
struct inode *inode = NULL;
unsigned int fl_pid;
- if (fl->fl_nspid)
- fl_pid = pid_vnr(fl->fl_nspid);
- else
+ if (fl->fl_nspid) {
+ struct pid_namespace *proc_pidns = file_inode(f->file)->i_sb->s_fs_info;
+
+ /* Don't let fl_pid change based on who is reading the file */
+ fl_pid = pid_nr_ns(fl->fl_nspid, proc_pidns);
+
+ /*
+ * If there isn't a fl_pid don't display who is waiting on
+ * the lock if we are called from locks_show, or if we are
+ * called from __show_fd_info - skip lock entirely
+ */
+ if (fl_pid == 0)
+ return;
+ } else
fl_pid = fl->fl_pid;
if (fl->fl_file != NULL)
- inode = file_inode(fl->fl_file);
+ inode = locks_inode(fl->fl_file);
seq_printf(f, "%lld:%s ", id, pfx);
if (IS_POSIX(fl)) {
@@ -2648,9 +2695,13 @@ static int locks_show(struct seq_file *f, void *v)
{
struct locks_iterator *iter = f->private;
struct file_lock *fl, *bfl;
+ struct pid_namespace *proc_pidns = file_inode(f->file)->i_sb->s_fs_info;
fl = hlist_entry(v, struct file_lock, fl_link);
+ if (fl->fl_nspid && !pid_nr_ns(fl->fl_nspid, proc_pidns))
+ return 0;
+
lock_get_status(f, fl, iter->li_pos, "");
list_for_each_entry(bfl, &fl->fl_block, fl_block)
@@ -2682,7 +2733,7 @@ static void __show_fd_locks(struct seq_file *f,
void show_fd_locks(struct seq_file *f,
struct file *filp, struct files_struct *files)
{
- struct inode *inode = file_inode(filp);
+ struct inode *inode = locks_inode(filp);
struct file_lock_context *ctx;
int id = 0;
@@ -2703,9 +2754,9 @@ static void *locks_start(struct seq_file *f, loff_t *pos)
struct locks_iterator *iter = f->private;
iter->li_pos = *pos + 1;
- lg_global_lock(&file_lock_lglock);
+ percpu_down_write(&file_rwsem);
spin_lock(&blocked_lock_lock);
- return seq_hlist_start_percpu(&file_lock_list, &iter->li_cpu, *pos);
+ return seq_hlist_start_percpu(&file_lock_list.hlist, &iter->li_cpu, *pos);
}
static void *locks_next(struct seq_file *f, void *v, loff_t *pos)
@@ -2713,14 +2764,14 @@ static void *locks_next(struct seq_file *f, void *v, loff_t *pos)
struct locks_iterator *iter = f->private;
++iter->li_pos;
- return seq_hlist_next_percpu(v, &file_lock_list, &iter->li_cpu, pos);
+ return seq_hlist_next_percpu(v, &file_lock_list.hlist, &iter->li_cpu, pos);
}
static void locks_stop(struct seq_file *f, void *v)
__releases(&blocked_lock_lock)
{
spin_unlock(&blocked_lock_lock);
- lg_global_unlock(&file_lock_lglock);
+ percpu_up_write(&file_rwsem);
}
static const struct seq_operations locks_seq_operations = {
@@ -2761,10 +2812,13 @@ static int __init filelock_init(void)
filelock_cache = kmem_cache_create("file_lock_cache",
sizeof(struct file_lock), 0, SLAB_PANIC, NULL);
- lg_lock_init(&file_lock_lglock, "file_lock_lglock");
- for_each_possible_cpu(i)
- INIT_HLIST_HEAD(per_cpu_ptr(&file_lock_list, i));
+ for_each_possible_cpu(i) {
+ struct file_lock_list_struct *fll = per_cpu_ptr(&file_lock_list, i);
+
+ spin_lock_init(&fll->lock);
+ INIT_HLIST_HEAD(&fll->hlist);
+ }
return 0;
}
diff --git a/fs/logfs/file.c b/fs/logfs/file.c
index f01ddfb1a03b..5d9fe466bbc9 100644
--- a/fs/logfs/file.c
+++ b/fs/logfs/file.c
@@ -244,7 +244,7 @@ static int logfs_setattr(struct dentry *dentry, struct iattr *attr)
struct inode *inode = d_inode(dentry);
int err = 0;
- err = inode_change_ok(inode, attr);
+ err = setattr_prepare(dentry, attr);
if (err)
return err;
diff --git a/fs/mbcache.c b/fs/mbcache.c
index eccda3a02de6..c5bd19ffa326 100644
--- a/fs/mbcache.c
+++ b/fs/mbcache.c
@@ -366,7 +366,11 @@ struct mb_cache *mb_cache_create(int bucket_bits)
cache->c_shrink.count_objects = mb_cache_count;
cache->c_shrink.scan_objects = mb_cache_scan;
cache->c_shrink.seeks = DEFAULT_SEEKS;
- register_shrinker(&cache->c_shrink);
+ if (register_shrinker(&cache->c_shrink)) {
+ kfree(cache->c_hash);
+ kfree(cache);
+ goto err_out;
+ }
INIT_WORK(&cache->c_shrink_work, mb_cache_shrink_worker);
diff --git a/fs/minix/file.c b/fs/minix/file.c
index 94f0eb9a6e2c..a6a4797aa0d4 100644
--- a/fs/minix/file.c
+++ b/fs/minix/file.c
@@ -26,7 +26,7 @@ static int minix_setattr(struct dentry *dentry, struct iattr *attr)
struct inode *inode = d_inode(dentry);
int error;
- error = inode_change_ok(inode, attr);
+ error = setattr_prepare(dentry, attr);
if (error)
return error;
diff --git a/fs/mount.h b/fs/mount.h
index 14db05d424f7..d2e25d7b64b3 100644
--- a/fs/mount.h
+++ b/fs/mount.h
@@ -10,9 +10,12 @@ struct mnt_namespace {
struct mount * root;
struct list_head list;
struct user_namespace *user_ns;
+ struct ucounts *ucounts;
u64 seq; /* Sequence number to prevent loops */
wait_queue_head_t poll;
u64 event;
+ unsigned int mounts; /* # of mounts in the namespace */
+ unsigned int pending_mounts;
};
struct mnt_pcp {
diff --git a/fs/namei.c b/fs/namei.c
index adb04146df09..4bbcae1ba58e 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -1015,7 +1015,7 @@ const char *get_link(struct nameidata *nd)
if (!(nd->flags & LOOKUP_RCU)) {
touch_atime(&last->link);
cond_resched();
- } else if (atime_needs_update(&last->link, inode)) {
+ } else if (atime_needs_update_rcu(&last->link, inode)) {
if (unlikely(unlazy_walk(nd, NULL, 0)))
return ERR_PTR(-ECHILD);
touch_atime(&last->link);
diff --git a/fs/namespace.c b/fs/namespace.c
index 7bb2cda3bfef..58aca9c931ac 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -27,6 +27,9 @@
#include "pnode.h"
#include "internal.h"
+/* Maximum number of mounts in a mount namespace */
+unsigned int sysctl_mount_max __read_mostly = 100000;
+
static unsigned int m_hash_mask __read_mostly;
static unsigned int m_hash_shift __read_mostly;
static unsigned int mp_hash_mask __read_mostly;
@@ -899,6 +902,9 @@ static void commit_tree(struct mount *mnt, struct mount *shadows)
list_splice(&head, n->list.prev);
+ n->mounts += n->pending_mounts;
+ n->pending_mounts = 0;
+
attach_shadowed(mnt, parent, shadows);
touch_mnt_namespace(n);
}
@@ -1419,11 +1425,16 @@ static void umount_tree(struct mount *mnt, enum umount_tree_flags how)
propagate_umount(&tmp_list);
while (!list_empty(&tmp_list)) {
+ struct mnt_namespace *ns;
bool disconnect;
p = list_first_entry(&tmp_list, struct mount, mnt_list);
list_del_init(&p->mnt_expire);
list_del_init(&p->mnt_list);
- __touch_mnt_namespace(p->mnt_ns);
+ ns = p->mnt_ns;
+ if (ns) {
+ ns->mounts--;
+ __touch_mnt_namespace(ns);
+ }
p->mnt_ns = NULL;
if (how & UMOUNT_SYNC)
p->mnt.mnt_flags |= MNT_SYNC_UMOUNT;
@@ -1840,6 +1851,28 @@ static int invent_group_ids(struct mount *mnt, bool recurse)
return 0;
}
+int count_mounts(struct mnt_namespace *ns, struct mount *mnt)
+{
+ unsigned int max = READ_ONCE(sysctl_mount_max);
+ unsigned int mounts = 0, old, pending, sum;
+ struct mount *p;
+
+ for (p = mnt; p; p = next_mnt(p, mnt))
+ mounts++;
+
+ old = ns->mounts;
+ pending = ns->pending_mounts;
+ sum = old + pending;
+ if ((old > sum) ||
+ (pending > sum) ||
+ (max < sum) ||
+ (mounts > (max - sum)))
+ return -ENOSPC;
+
+ ns->pending_mounts = pending + mounts;
+ return 0;
+}
+
/*
* @source_mnt : mount tree to be attached
* @nd : place the mount tree @source_mnt is attached
@@ -1909,10 +1942,18 @@ static int attach_recursive_mnt(struct mount *source_mnt,
struct path *parent_path)
{
HLIST_HEAD(tree_list);
+ struct mnt_namespace *ns = dest_mnt->mnt_ns;
struct mount *child, *p;
struct hlist_node *n;
int err;
+ /* Is there space to add these mounts to the mount namespace? */
+ if (!parent_path) {
+ err = count_mounts(ns, source_mnt);
+ if (err)
+ goto out;
+ }
+
if (IS_MNT_SHARED(dest_mnt)) {
err = invent_group_ids(source_mnt, true);
if (err)
@@ -1949,11 +1990,13 @@ static int attach_recursive_mnt(struct mount *source_mnt,
out_cleanup_ids:
while (!hlist_empty(&tree_list)) {
child = hlist_entry(tree_list.first, struct mount, mnt_hash);
+ child->mnt_parent->mnt_ns->pending_mounts = 0;
umount_tree(child, UMOUNT_SYNC);
}
unlock_mount_hash();
cleanup_group_ids(source_mnt, NULL);
out:
+ ns->pending_mounts = 0;
return err;
}
@@ -2700,7 +2743,7 @@ long do_mount(const char *dev_name, const char __user *dir_name,
flags &= ~(MS_NOSUID | MS_NOEXEC | MS_NODEV | MS_ACTIVE | MS_BORN |
MS_NOATIME | MS_NODIRATIME | MS_RELATIME| MS_KERNMOUNT |
- MS_STRICTATIME);
+ MS_STRICTATIME | MS_NOREMOTELOCK);
if (flags & MS_REMOUNT)
retval = do_remount(&path, flags & ~MS_REMOUNT, mnt_flags,
@@ -2719,9 +2762,20 @@ dput_out:
return retval;
}
+static struct ucounts *inc_mnt_namespaces(struct user_namespace *ns)
+{
+ return inc_ucount(ns, current_euid(), UCOUNT_MNT_NAMESPACES);
+}
+
+static void dec_mnt_namespaces(struct ucounts *ucounts)
+{
+ dec_ucount(ucounts, UCOUNT_MNT_NAMESPACES);
+}
+
static void free_mnt_ns(struct mnt_namespace *ns)
{
ns_free_inum(&ns->ns);
+ dec_mnt_namespaces(ns->ucounts);
put_user_ns(ns->user_ns);
kfree(ns);
}
@@ -2738,14 +2792,22 @@ static atomic64_t mnt_ns_seq = ATOMIC64_INIT(1);
static struct mnt_namespace *alloc_mnt_ns(struct user_namespace *user_ns)
{
struct mnt_namespace *new_ns;
+ struct ucounts *ucounts;
int ret;
+ ucounts = inc_mnt_namespaces(user_ns);
+ if (!ucounts)
+ return ERR_PTR(-ENOSPC);
+
new_ns = kmalloc(sizeof(struct mnt_namespace), GFP_KERNEL);
- if (!new_ns)
+ if (!new_ns) {
+ dec_mnt_namespaces(ucounts);
return ERR_PTR(-ENOMEM);
+ }
ret = ns_alloc_inum(&new_ns->ns);
if (ret) {
kfree(new_ns);
+ dec_mnt_namespaces(ucounts);
return ERR_PTR(ret);
}
new_ns->ns.ops = &mntns_operations;
@@ -2756,6 +2818,9 @@ static struct mnt_namespace *alloc_mnt_ns(struct user_namespace *user_ns)
init_waitqueue_head(&new_ns->poll);
new_ns->event = 0;
new_ns->user_ns = get_user_ns(user_ns);
+ new_ns->ucounts = ucounts;
+ new_ns->mounts = 0;
+ new_ns->pending_mounts = 0;
return new_ns;
}
@@ -2805,6 +2870,7 @@ struct mnt_namespace *copy_mnt_ns(unsigned long flags, struct mnt_namespace *ns,
q = new;
while (p) {
q->mnt_ns = new_ns;
+ new_ns->mounts++;
if (new_fs) {
if (&p->mnt == new_fs->root.mnt) {
new_fs->root.mnt = mntget(&q->mnt);
@@ -2843,6 +2909,7 @@ static struct mnt_namespace *create_mnt_ns(struct vfsmount *m)
struct mount *mnt = real_mount(m);
mnt->mnt_ns = new_ns;
new_ns->root = mnt;
+ new_ns->mounts++;
list_add(&mnt->mnt_list, &new_ns->list);
} else {
mntput(m);
@@ -3348,10 +3415,16 @@ static int mntns_install(struct nsproxy *nsproxy, struct ns_common *ns)
return 0;
}
+static struct user_namespace *mntns_owner(struct ns_common *ns)
+{
+ return to_mnt_ns(ns)->user_ns;
+}
+
const struct proc_ns_operations mntns_operations = {
.name = "mnt",
.type = CLONE_NEWNS,
.get = mntns_get,
.put = mntns_put,
.install = mntns_install,
+ .owner = mntns_owner,
};
diff --git a/fs/ncpfs/inode.c b/fs/ncpfs/inode.c
index 1af15fcbe57b..f6cf4c7e92b1 100644
--- a/fs/ncpfs/inode.c
+++ b/fs/ncpfs/inode.c
@@ -884,7 +884,7 @@ int ncp_notify_change(struct dentry *dentry, struct iattr *attr)
/* ageing the dentry to force validation */
ncp_age_dentry(server, dentry);
- result = inode_change_ok(inode, attr);
+ result = setattr_prepare(dentry, attr);
if (result < 0)
goto out;
diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index ca699ddc11c1..2efbdde36c3e 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -182,29 +182,6 @@ nfs_file_read(struct kiocb *iocb, struct iov_iter *to)
}
EXPORT_SYMBOL_GPL(nfs_file_read);
-ssize_t
-nfs_file_splice_read(struct file *filp, loff_t *ppos,
- struct pipe_inode_info *pipe, size_t count,
- unsigned int flags)
-{
- struct inode *inode = file_inode(filp);
- ssize_t res;
-
- dprintk("NFS: splice_read(%pD2, %lu@%Lu)\n",
- filp, (unsigned long) count, (unsigned long long) *ppos);
-
- nfs_start_io_read(inode);
- res = nfs_revalidate_mapping(inode, filp->f_mapping);
- if (!res) {
- res = generic_file_splice_read(filp, ppos, pipe, count, flags);
- if (res > 0)
- nfs_add_stats(inode, NFSIOS_NORMALREADBYTES, res);
- }
- nfs_end_io_read(inode);
- return res;
-}
-EXPORT_SYMBOL_GPL(nfs_file_splice_read);
-
int
nfs_file_mmap(struct file * file, struct vm_area_struct * vma)
{
@@ -871,7 +848,7 @@ const struct file_operations nfs_file_operations = {
.fsync = nfs_file_fsync,
.lock = nfs_lock,
.flock = nfs_flock,
- .splice_read = nfs_file_splice_read,
+ .splice_read = generic_file_splice_read,
.splice_write = iter_file_splice_write,
.check_flags = nfs_check_flags,
.setlease = simple_nosetlease,
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index 74935a19e4bf..4b308a1487a5 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -365,8 +365,6 @@ int nfs_rename(struct inode *, struct dentry *, struct inode *, struct dentry *)
int nfs_file_fsync(struct file *file, loff_t start, loff_t end, int datasync);
loff_t nfs_file_llseek(struct file *, loff_t, int);
ssize_t nfs_file_read(struct kiocb *, struct iov_iter *);
-ssize_t nfs_file_splice_read(struct file *, loff_t *, struct pipe_inode_info *,
- size_t, unsigned int);
int nfs_file_mmap(struct file *, struct vm_area_struct *);
ssize_t nfs_file_write(struct kiocb *, struct iov_iter *);
int nfs_file_release(struct inode *, struct file *);
@@ -681,11 +679,11 @@ unsigned int nfs_page_length(struct page *page)
loff_t i_size = i_size_read(page_file_mapping(page)->host);
if (i_size > 0) {
- pgoff_t page_index = page_file_index(page);
+ pgoff_t index = page_index(page);
pgoff_t end_index = (i_size - 1) >> PAGE_SHIFT;
- if (page_index < end_index)
+ if (index < end_index)
return PAGE_SIZE;
- if (page_index == end_index)
+ if (index == end_index)
return ((i_size - 1) & ~PAGE_MASK) + 1;
}
return 0;
diff --git a/fs/nfs/nfs4file.c b/fs/nfs/nfs4file.c
index d085ad794884..89a77950e0b0 100644
--- a/fs/nfs/nfs4file.c
+++ b/fs/nfs/nfs4file.c
@@ -248,7 +248,7 @@ const struct file_operations nfs4_file_operations = {
.fsync = nfs_file_fsync,
.lock = nfs_lock,
.flock = nfs_flock,
- .splice_read = nfs_file_splice_read,
+ .splice_read = generic_file_splice_read,
.splice_write = iter_file_splice_write,
.check_flags = nfs_check_flags,
.setlease = simple_nosetlease,
diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c
index 174dd4cf5747..965db474f4b0 100644
--- a/fs/nfs/pagelist.c
+++ b/fs/nfs/pagelist.c
@@ -342,7 +342,7 @@ nfs_create_request(struct nfs_open_context *ctx, struct page *page,
* update_nfs_request below if the region is not locked. */
req->wb_page = page;
if (page) {
- req->wb_index = page_file_index(page);
+ req->wb_index = page_index(page);
get_page(page);
}
req->wb_offset = offset;
diff --git a/fs/nfs/read.c b/fs/nfs/read.c
index 572e5b3b06f1..defc9233e985 100644
--- a/fs/nfs/read.c
+++ b/fs/nfs/read.c
@@ -295,7 +295,7 @@ int nfs_readpage(struct file *file, struct page *page)
int error;
dprintk("NFS: nfs_readpage (%p %ld@%lu)\n",
- page, PAGE_SIZE, page_file_index(page));
+ page, PAGE_SIZE, page_index(page));
nfs_inc_stats(inode, NFSIOS_VFSREADPAGE);
nfs_add_stats(inode, NFSIOS_READPAGES, 1);
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 3a6724c6eb5f..53211838f72a 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -151,7 +151,7 @@ static void nfs_grow_file(struct page *page, unsigned int offset, unsigned int c
spin_lock(&inode->i_lock);
i_size = i_size_read(inode);
end_index = (i_size - 1) >> PAGE_SHIFT;
- if (i_size > 0 && page_file_index(page) < end_index)
+ if (i_size > 0 && page_index(page) < end_index)
goto out;
end = page_file_offset(page) + ((loff_t)offset+count);
if (i_size >= end)
@@ -603,7 +603,7 @@ static int nfs_do_writepage(struct page *page, struct writeback_control *wbc,
{
int ret;
- nfs_pageio_cond_complete(pgio, page_file_index(page));
+ nfs_pageio_cond_complete(pgio, page_index(page));
ret = nfs_page_async_flush(pgio, page, wbc->sync_mode == WB_SYNC_NONE,
launder);
if (ret == -EAGAIN) {
diff --git a/fs/nfsd/auth.c b/fs/nfsd/auth.c
index 9d46a0bdd9f9..62469c60be23 100644
--- a/fs/nfsd/auth.c
+++ b/fs/nfsd/auth.c
@@ -55,10 +55,10 @@ int nfsd_setuser(struct svc_rqst *rqstp, struct svc_export *exp)
goto oom;
for (i = 0; i < rqgi->ngroups; i++) {
- if (gid_eq(GLOBAL_ROOT_GID, GROUP_AT(rqgi, i)))
- GROUP_AT(gi, i) = exp->ex_anon_gid;
+ if (gid_eq(GLOBAL_ROOT_GID, rqgi->gid[i]))
+ gi->gid[i] = exp->ex_anon_gid;
else
- GROUP_AT(gi, i) = GROUP_AT(rqgi, i);
+ gi->gid[i] = rqgi->gid[i];
}
} else {
gi = get_group_info(rqgi);
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index a204d7e109d4..39bfaba9c99c 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -1903,7 +1903,7 @@ static bool groups_equal(struct group_info *g1, struct group_info *g2)
if (g1->ngroups != g2->ngroups)
return false;
for (i=0; i<g1->ngroups; i++)
- if (!gid_eq(GROUP_AT(g1, i), GROUP_AT(g2, i)))
+ if (!gid_eq(g1->gid[i], g2->gid[i]))
return false;
return true;
}
diff --git a/fs/nfsd/nfsproc.c b/fs/nfsd/nfsproc.c
index e9214768cde9..08188743db53 100644
--- a/fs/nfsd/nfsproc.c
+++ b/fs/nfsd/nfsproc.c
@@ -74,10 +74,10 @@ nfsd_proc_setattr(struct svc_rqst *rqstp, struct nfsd_sattrargs *argp,
* which only requires access, and "set-[ac]time-to-X" which
* requires ownership.
* So if it looks like it might be "set both to the same time which
- * is close to now", and if inode_change_ok fails, then we
+ * is close to now", and if setattr_prepare fails, then we
* convert to "set to now" instead of "set to explicit time"
*
- * We only call inode_change_ok as the last test as technically
+ * We only call setattr_prepare as the last test as technically
* it is not an interface that we should be using.
*/
#define BOTH_TIME_SET (ATTR_ATIME_SET | ATTR_MTIME_SET)
@@ -92,17 +92,15 @@ nfsd_proc_setattr(struct svc_rqst *rqstp, struct nfsd_sattrargs *argp,
* request is. We require it be within 30 minutes of now.
*/
time_t delta = iap->ia_atime.tv_sec - get_seconds();
- struct inode *inode;
nfserr = fh_verify(rqstp, fhp, 0, NFSD_MAY_NOP);
if (nfserr)
goto done;
- inode = d_inode(fhp->fh_dentry);
if (delta < 0)
delta = -delta;
if (delta < MAX_TOUCH_TIME_ERROR &&
- inode_change_ok(inode, iap) != 0) {
+ setattr_prepare(fhp->fh_dentry, iap) != 0) {
/*
* Turn off ATTR_[AM]TIME_SET but leave ATTR_[AM]TIME.
* This will cause notify_change to set these times
diff --git a/fs/nilfs2/inode.c b/fs/nilfs2/inode.c
index af04f553d7c9..402c325e0467 100644
--- a/fs/nilfs2/inode.c
+++ b/fs/nilfs2/inode.c
@@ -829,7 +829,7 @@ int nilfs_setattr(struct dentry *dentry, struct iattr *iattr)
struct super_block *sb = inode->i_sb;
int err;
- err = inode_change_ok(inode, iattr);
+ err = setattr_prepare(dentry, iattr);
if (err)
return err;
diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c
index a64313868d3a..7ebfca6a1427 100644
--- a/fs/notify/fanotify/fanotify_user.c
+++ b/fs/notify/fanotify/fanotify_user.c
@@ -49,12 +49,12 @@ struct kmem_cache *fanotify_perm_event_cachep __read_mostly;
* enough to fit in "count". Return an error pointer if the count
* is not large enough.
*
- * Called with the group->notification_mutex held.
+ * Called with the group->notification_lock held.
*/
static struct fsnotify_event *get_one_event(struct fsnotify_group *group,
size_t count)
{
- BUG_ON(!mutex_is_locked(&group->notification_mutex));
+ assert_spin_locked(&group->notification_lock);
pr_debug("%s: group=%p count=%zd\n", __func__, group, count);
@@ -64,7 +64,7 @@ static struct fsnotify_event *get_one_event(struct fsnotify_group *group,
if (FAN_EVENT_METADATA_LEN > count)
return ERR_PTR(-EINVAL);
- /* held the notification_mutex the whole time, so this is the
+ /* held the notification_lock the whole time, so this is the
* same event we peeked above */
return fsnotify_remove_first_event(group);
}
@@ -147,7 +147,7 @@ static struct fanotify_perm_event_info *dequeue_event(
{
struct fanotify_perm_event_info *event, *return_e = NULL;
- spin_lock(&group->fanotify_data.access_lock);
+ spin_lock(&group->notification_lock);
list_for_each_entry(event, &group->fanotify_data.access_list,
fae.fse.list) {
if (event->fd != fd)
@@ -157,7 +157,7 @@ static struct fanotify_perm_event_info *dequeue_event(
return_e = event;
break;
}
- spin_unlock(&group->fanotify_data.access_lock);
+ spin_unlock(&group->notification_lock);
pr_debug("%s: found return_re=%p\n", __func__, return_e);
@@ -244,10 +244,10 @@ static unsigned int fanotify_poll(struct file *file, poll_table *wait)
int ret = 0;
poll_wait(file, &group->notification_waitq, wait);
- mutex_lock(&group->notification_mutex);
+ spin_lock(&group->notification_lock);
if (!fsnotify_notify_queue_is_empty(group))
ret = POLLIN | POLLRDNORM;
- mutex_unlock(&group->notification_mutex);
+ spin_unlock(&group->notification_lock);
return ret;
}
@@ -268,9 +268,9 @@ static ssize_t fanotify_read(struct file *file, char __user *buf,
add_wait_queue(&group->notification_waitq, &wait);
while (1) {
- mutex_lock(&group->notification_mutex);
+ spin_lock(&group->notification_lock);
kevent = get_one_event(group, count);
- mutex_unlock(&group->notification_mutex);
+ spin_unlock(&group->notification_lock);
if (IS_ERR(kevent)) {
ret = PTR_ERR(kevent);
@@ -309,10 +309,10 @@ static ssize_t fanotify_read(struct file *file, char __user *buf,
wake_up(&group->fanotify_data.access_waitq);
break;
}
- spin_lock(&group->fanotify_data.access_lock);
+ spin_lock(&group->notification_lock);
list_add_tail(&kevent->list,
&group->fanotify_data.access_list);
- spin_unlock(&group->fanotify_data.access_lock);
+ spin_unlock(&group->notification_lock);
#endif
}
buf += ret;
@@ -371,7 +371,7 @@ static int fanotify_release(struct inode *ignored, struct file *file)
* Process all permission events on access_list and notification queue
* and simulate reply from userspace.
*/
- spin_lock(&group->fanotify_data.access_lock);
+ spin_lock(&group->notification_lock);
list_for_each_entry_safe(event, next, &group->fanotify_data.access_list,
fae.fse.list) {
pr_debug("%s: found group=%p event=%p\n", __func__, group,
@@ -380,22 +380,22 @@ static int fanotify_release(struct inode *ignored, struct file *file)
list_del_init(&event->fae.fse.list);
event->response = FAN_ALLOW;
}
- spin_unlock(&group->fanotify_data.access_lock);
/*
* Destroy all non-permission events. For permission events just
* dequeue them and set the response. They will be freed once the
* response is consumed and fanotify_get_response() returns.
*/
- mutex_lock(&group->notification_mutex);
while (!fsnotify_notify_queue_is_empty(group)) {
fsn_event = fsnotify_remove_first_event(group);
- if (!(fsn_event->mask & FAN_ALL_PERM_EVENTS))
+ if (!(fsn_event->mask & FAN_ALL_PERM_EVENTS)) {
+ spin_unlock(&group->notification_lock);
fsnotify_destroy_event(group, fsn_event);
- else
+ spin_lock(&group->notification_lock);
+ } else
FANOTIFY_PE(fsn_event)->response = FAN_ALLOW;
}
- mutex_unlock(&group->notification_mutex);
+ spin_unlock(&group->notification_lock);
/* Response for all permission events it set, wakeup waiters */
wake_up(&group->fanotify_data.access_waitq);
@@ -421,10 +421,10 @@ static long fanotify_ioctl(struct file *file, unsigned int cmd, unsigned long ar
switch (cmd) {
case FIONREAD:
- mutex_lock(&group->notification_mutex);
+ spin_lock(&group->notification_lock);
list_for_each_entry(fsn_event, &group->notification_list, list)
send_len += FAN_EVENT_METADATA_LEN;
- mutex_unlock(&group->notification_mutex);
+ spin_unlock(&group->notification_lock);
ret = put_user(send_len, (int __user *) p);
break;
}
@@ -765,7 +765,6 @@ SYSCALL_DEFINE2(fanotify_init, unsigned int, flags, unsigned int, event_f_flags)
event_f_flags |= O_LARGEFILE;
group->fanotify_data.f_flags = event_f_flags;
#ifdef CONFIG_FANOTIFY_ACCESS_PERMISSIONS
- spin_lock_init(&group->fanotify_data.access_lock);
init_waitqueue_head(&group->fanotify_data.access_waitq);
INIT_LIST_HEAD(&group->fanotify_data.access_list);
#endif
diff --git a/fs/notify/group.c b/fs/notify/group.c
index b47f7cfdcaa4..fbe3cbebec16 100644
--- a/fs/notify/group.c
+++ b/fs/notify/group.c
@@ -45,9 +45,9 @@ static void fsnotify_final_destroy_group(struct fsnotify_group *group)
*/
void fsnotify_group_stop_queueing(struct fsnotify_group *group)
{
- mutex_lock(&group->notification_mutex);
+ spin_lock(&group->notification_lock);
group->shutdown = true;
- mutex_unlock(&group->notification_mutex);
+ spin_unlock(&group->notification_lock);
}
/*
@@ -125,7 +125,7 @@ struct fsnotify_group *fsnotify_alloc_group(const struct fsnotify_ops *ops)
atomic_set(&group->refcnt, 1);
atomic_set(&group->num_marks, 0);
- mutex_init(&group->notification_mutex);
+ spin_lock_init(&group->notification_lock);
INIT_LIST_HEAD(&group->notification_list);
init_waitqueue_head(&group->notification_waitq);
group->max_events = UINT_MAX;
diff --git a/fs/notify/inotify/inotify_user.c b/fs/notify/inotify/inotify_user.c
index b8d08d0d0a4d..69d1ea3d292a 100644
--- a/fs/notify/inotify/inotify_user.c
+++ b/fs/notify/inotify/inotify_user.c
@@ -115,10 +115,10 @@ static unsigned int inotify_poll(struct file *file, poll_table *wait)
int ret = 0;
poll_wait(file, &group->notification_waitq, wait);
- mutex_lock(&group->notification_mutex);
+ spin_lock(&group->notification_lock);
if (!fsnotify_notify_queue_is_empty(group))
ret = POLLIN | POLLRDNORM;
- mutex_unlock(&group->notification_mutex);
+ spin_unlock(&group->notification_lock);
return ret;
}
@@ -138,7 +138,7 @@ static int round_event_name_len(struct fsnotify_event *fsn_event)
* enough to fit in "count". Return an error pointer if
* not large enough.
*
- * Called with the group->notification_mutex held.
+ * Called with the group->notification_lock held.
*/
static struct fsnotify_event *get_one_event(struct fsnotify_group *group,
size_t count)
@@ -157,7 +157,7 @@ static struct fsnotify_event *get_one_event(struct fsnotify_group *group,
if (event_size > count)
return ERR_PTR(-EINVAL);
- /* held the notification_mutex the whole time, so this is the
+ /* held the notification_lock the whole time, so this is the
* same event we peeked above */
fsnotify_remove_first_event(group);
@@ -234,9 +234,9 @@ static ssize_t inotify_read(struct file *file, char __user *buf,
add_wait_queue(&group->notification_waitq, &wait);
while (1) {
- mutex_lock(&group->notification_mutex);
+ spin_lock(&group->notification_lock);
kevent = get_one_event(group, count);
- mutex_unlock(&group->notification_mutex);
+ spin_unlock(&group->notification_lock);
pr_debug("%s: group=%p kevent=%p\n", __func__, group, kevent);
@@ -300,13 +300,13 @@ static long inotify_ioctl(struct file *file, unsigned int cmd,
switch (cmd) {
case FIONREAD:
- mutex_lock(&group->notification_mutex);
+ spin_lock(&group->notification_lock);
list_for_each_entry(fsn_event, &group->notification_list,
list) {
send_len += sizeof(struct inotify_event);
send_len += round_event_name_len(fsn_event);
}
- mutex_unlock(&group->notification_mutex);
+ spin_unlock(&group->notification_lock);
ret = put_user(send_len, (int __user *) p);
break;
}
diff --git a/fs/notify/notification.c b/fs/notify/notification.c
index e455e83ceeeb..66f85c651c52 100644
--- a/fs/notify/notification.c
+++ b/fs/notify/notification.c
@@ -63,7 +63,7 @@ EXPORT_SYMBOL_GPL(fsnotify_get_cookie);
/* return true if the notify queue is empty, false otherwise */
bool fsnotify_notify_queue_is_empty(struct fsnotify_group *group)
{
- BUG_ON(!mutex_is_locked(&group->notification_mutex));
+ assert_spin_locked(&group->notification_lock);
return list_empty(&group->notification_list) ? true : false;
}
@@ -73,8 +73,17 @@ void fsnotify_destroy_event(struct fsnotify_group *group,
/* Overflow events are per-group and we don't want to free them */
if (!event || event->mask == FS_Q_OVERFLOW)
return;
- /* If the event is still queued, we have a problem... */
- WARN_ON(!list_empty(&event->list));
+ /*
+ * If the event is still queued, we have a problem... Do an unreliable
+ * lockless check first to avoid locking in the common case. The
+ * locking may be necessary for permission events which got removed
+ * from the list by a different CPU than the one freeing the event.
+ */
+ if (!list_empty(&event->list)) {
+ spin_lock(&group->notification_lock);
+ WARN_ON(!list_empty(&event->list));
+ spin_unlock(&group->notification_lock);
+ }
group->ops->free_event(event);
}
@@ -95,10 +104,10 @@ int fsnotify_add_event(struct fsnotify_group *group,
pr_debug("%s: group=%p event=%p\n", __func__, group, event);
- mutex_lock(&group->notification_mutex);
+ spin_lock(&group->notification_lock);
if (group->shutdown) {
- mutex_unlock(&group->notification_mutex);
+ spin_unlock(&group->notification_lock);
return 2;
}
@@ -106,7 +115,7 @@ int fsnotify_add_event(struct fsnotify_group *group,
ret = 2;
/* Queue overflow event only if it isn't already queued */
if (!list_empty(&group->overflow_event->list)) {
- mutex_unlock(&group->notification_mutex);
+ spin_unlock(&group->notification_lock);
return ret;
}
event = group->overflow_event;
@@ -116,7 +125,7 @@ int fsnotify_add_event(struct fsnotify_group *group,
if (!list_empty(list) && merge) {
ret = merge(list, event);
if (ret) {
- mutex_unlock(&group->notification_mutex);
+ spin_unlock(&group->notification_lock);
return ret;
}
}
@@ -124,7 +133,7 @@ int fsnotify_add_event(struct fsnotify_group *group,
queue:
group->q_len++;
list_add_tail(&event->list, list);
- mutex_unlock(&group->notification_mutex);
+ spin_unlock(&group->notification_lock);
wake_up(&group->notification_waitq);
kill_fasync(&group->fsn_fa, SIGIO, POLL_IN);
@@ -139,7 +148,7 @@ struct fsnotify_event *fsnotify_remove_first_event(struct fsnotify_group *group)
{
struct fsnotify_event *event;
- BUG_ON(!mutex_is_locked(&group->notification_mutex));
+ assert_spin_locked(&group->notification_lock);
pr_debug("%s: group=%p\n", __func__, group);
@@ -161,7 +170,7 @@ struct fsnotify_event *fsnotify_remove_first_event(struct fsnotify_group *group)
*/
struct fsnotify_event *fsnotify_peek_first_event(struct fsnotify_group *group)
{
- BUG_ON(!mutex_is_locked(&group->notification_mutex));
+ assert_spin_locked(&group->notification_lock);
return list_first_entry(&group->notification_list,
struct fsnotify_event, list);
@@ -175,12 +184,14 @@ void fsnotify_flush_notify(struct fsnotify_group *group)
{
struct fsnotify_event *event;
- mutex_lock(&group->notification_mutex);
+ spin_lock(&group->notification_lock);
while (!fsnotify_notify_queue_is_empty(group)) {
event = fsnotify_remove_first_event(group);
+ spin_unlock(&group->notification_lock);
fsnotify_destroy_event(group, event);
+ spin_lock(&group->notification_lock);
}
- mutex_unlock(&group->notification_mutex);
+ spin_unlock(&group->notification_lock);
}
/*
diff --git a/fs/nsfs.c b/fs/nsfs.c
index 8f20d6016e20..30bb10034120 100644
--- a/fs/nsfs.c
+++ b/fs/nsfs.c
@@ -5,11 +5,16 @@
#include <linux/magic.h>
#include <linux/ktime.h>
#include <linux/seq_file.h>
+#include <linux/user_namespace.h>
+#include <linux/nsfs.h>
static struct vfsmount *nsfs_mnt;
+static long ns_ioctl(struct file *filp, unsigned int ioctl,
+ unsigned long arg);
static const struct file_operations ns_file_operations = {
.llseek = no_llseek,
+ .unlocked_ioctl = ns_ioctl,
};
static char *ns_dname(struct dentry *dentry, char *buffer, int buflen)
@@ -44,22 +49,14 @@ static void nsfs_evict(struct inode *inode)
ns->ops->put(ns);
}
-void *ns_get_path(struct path *path, struct task_struct *task,
- const struct proc_ns_operations *ns_ops)
+static void *__ns_get_path(struct path *path, struct ns_common *ns)
{
- struct vfsmount *mnt = mntget(nsfs_mnt);
+ struct vfsmount *mnt = nsfs_mnt;
struct qstr qname = { .name = "", };
struct dentry *dentry;
struct inode *inode;
- struct ns_common *ns;
unsigned long d;
-again:
- ns = ns_ops->get(task);
- if (!ns) {
- mntput(mnt);
- return ERR_PTR(-ENOENT);
- }
rcu_read_lock();
d = atomic_long_read(&ns->stashed);
if (!d)
@@ -68,17 +65,16 @@ again:
if (!lockref_get_not_dead(&dentry->d_lockref))
goto slow;
rcu_read_unlock();
- ns_ops->put(ns);
+ ns->ops->put(ns);
got_it:
- path->mnt = mnt;
+ path->mnt = mntget(mnt);
path->dentry = dentry;
return NULL;
slow:
rcu_read_unlock();
inode = new_inode_pseudo(mnt->mnt_sb);
if (!inode) {
- ns_ops->put(ns);
- mntput(mnt);
+ ns->ops->put(ns);
return ERR_PTR(-ENOMEM);
}
inode->i_ino = ns->inum;
@@ -91,21 +87,96 @@ slow:
dentry = d_alloc_pseudo(mnt->mnt_sb, &qname);
if (!dentry) {
iput(inode);
- mntput(mnt);
return ERR_PTR(-ENOMEM);
}
d_instantiate(dentry, inode);
- dentry->d_fsdata = (void *)ns_ops;
+ dentry->d_fsdata = (void *)ns->ops;
d = atomic_long_cmpxchg(&ns->stashed, 0, (unsigned long)dentry);
if (d) {
d_delete(dentry); /* make sure ->d_prune() does nothing */
dput(dentry);
cpu_relax();
- goto again;
+ return ERR_PTR(-EAGAIN);
}
goto got_it;
}
+void *ns_get_path(struct path *path, struct task_struct *task,
+ const struct proc_ns_operations *ns_ops)
+{
+ struct ns_common *ns;
+ void *ret;
+
+again:
+ ns = ns_ops->get(task);
+ if (!ns)
+ return ERR_PTR(-ENOENT);
+
+ ret = __ns_get_path(path, ns);
+ if (IS_ERR(ret) && PTR_ERR(ret) == -EAGAIN)
+ goto again;
+ return ret;
+}
+
+static int open_related_ns(struct ns_common *ns,
+ struct ns_common *(*get_ns)(struct ns_common *ns))
+{
+ struct path path = {};
+ struct file *f;
+ void *err;
+ int fd;
+
+ fd = get_unused_fd_flags(O_CLOEXEC);
+ if (fd < 0)
+ return fd;
+
+ while (1) {
+ struct ns_common *relative;
+
+ relative = get_ns(ns);
+ if (IS_ERR(relative)) {
+ put_unused_fd(fd);
+ return PTR_ERR(relative);
+ }
+
+ err = __ns_get_path(&path, relative);
+ if (IS_ERR(err) && PTR_ERR(err) == -EAGAIN)
+ continue;
+ break;
+ }
+ if (IS_ERR(err)) {
+ put_unused_fd(fd);
+ return PTR_ERR(err);
+ }
+
+ f = dentry_open(&path, O_RDONLY, current_cred());
+ path_put(&path);
+ if (IS_ERR(f)) {
+ put_unused_fd(fd);
+ fd = PTR_ERR(f);
+ } else
+ fd_install(fd, f);
+
+ return fd;
+}
+
+static long ns_ioctl(struct file *filp, unsigned int ioctl,
+ unsigned long arg)
+{
+ struct ns_common *ns = get_proc_ns(file_inode(filp));
+
+ switch (ioctl) {
+ case NS_GET_USERNS:
+ return open_related_ns(ns, ns_get_owner);
+ case NS_GET_PARENT:
+ if (!ns->ops->get_parent)
+ return -EINVAL;
+ return open_related_ns(ns, ns->ops->get_parent);
+ default:
+ return -ENOTTY;
+ }
+}
+
int ns_get_name(char *buf, size_t size, struct task_struct *task,
const struct proc_ns_operations *ns_ops)
{
diff --git a/fs/ntfs/file.c b/fs/ntfs/file.c
index f548629dfaac..bf72a2c58b75 100644
--- a/fs/ntfs/file.c
+++ b/fs/ntfs/file.c
@@ -1850,7 +1850,7 @@ again:
* pages being swapped out between us bringing them into memory
* and doing the actual copying.
*/
- if (unlikely(iov_iter_fault_in_multipages_readable(i, bytes))) {
+ if (unlikely(iov_iter_fault_in_readable(i, bytes))) {
status = -EFAULT;
break;
}
diff --git a/fs/ntfs/inode.c b/fs/ntfs/inode.c
index e01287c964a8..9d7a44872df5 100644
--- a/fs/ntfs/inode.c
+++ b/fs/ntfs/inode.c
@@ -2893,7 +2893,7 @@ int ntfs_setattr(struct dentry *dentry, struct iattr *attr)
int err;
unsigned int ia_valid = attr->ia_valid;
- err = inode_change_ok(vi, attr);
+ err = setattr_prepare(dentry, attr);
if (err)
goto out;
/* We do not support NTFS ACLs yet. */
diff --git a/fs/ocfs2/acl.c b/fs/ocfs2/acl.c
index 2162434728c0..164307b99405 100644
--- a/fs/ocfs2/acl.c
+++ b/fs/ocfs2/acl.c
@@ -241,13 +241,11 @@ int ocfs2_set_acl(handle_t *handle,
case ACL_TYPE_ACCESS:
name_index = OCFS2_XATTR_INDEX_POSIX_ACL_ACCESS;
if (acl) {
- umode_t mode = inode->i_mode;
- ret = posix_acl_equiv_mode(acl, &mode);
- if (ret < 0)
- return ret;
+ umode_t mode;
- if (ret == 0)
- acl = NULL;
+ ret = posix_acl_update_mode(inode, &mode, &acl);
+ if (ret)
+ return ret;
ret = ocfs2_acl_set_mode(inode, di_bh,
handle, mode);
diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c
index 98d36548153d..bbb4b3e5b4ff 100644
--- a/fs/ocfs2/aops.c
+++ b/fs/ocfs2/aops.c
@@ -1842,6 +1842,16 @@ out_commit:
ocfs2_commit_trans(osb, handle);
out:
+ /*
+ * The mmapped page won't be unlocked in ocfs2_free_write_ctxt(),
+ * even in case of error here like ENOSPC and ENOMEM. So, we need
+ * to unlock the target page manually to prevent deadlocks when
+ * retrying again on ENOSPC, or when returning non-VM_FAULT_LOCKED
+ * to VM code.
+ */
+ if (wc->w_target_locked)
+ unlock_page(mmap_page);
+
ocfs2_free_write_ctxt(inode, wc);
if (data_ac) {
diff --git a/fs/ocfs2/cluster/tcp.c b/fs/ocfs2/cluster/tcp.c
index 1d67fcbf7160..8abab16b4602 100644
--- a/fs/ocfs2/cluster/tcp.c
+++ b/fs/ocfs2/cluster/tcp.c
@@ -2104,7 +2104,7 @@ int o2net_start_listening(struct o2nm_node *node)
BUG_ON(o2net_listen_sock != NULL);
mlog(ML_KTHREAD, "starting o2net thread...\n");
- o2net_wq = create_singlethread_workqueue("o2net");
+ o2net_wq = alloc_ordered_workqueue("o2net", WQ_MEM_RECLAIM);
if (o2net_wq == NULL) {
mlog(ML_ERROR, "unable to launch o2net thread\n");
return -ENOMEM; /* ? */
diff --git a/fs/ocfs2/dlm/dlmdomain.c b/fs/ocfs2/dlm/dlmdomain.c
index 533bd524e41e..733e4e79c8e2 100644
--- a/fs/ocfs2/dlm/dlmdomain.c
+++ b/fs/ocfs2/dlm/dlmdomain.c
@@ -1904,7 +1904,7 @@ static int dlm_join_domain(struct dlm_ctxt *dlm)
}
snprintf(wq_name, O2NM_MAX_NAME_LEN, "dlm_wq-%s", dlm->name);
- dlm->dlm_worker = create_singlethread_workqueue(wq_name);
+ dlm->dlm_worker = alloc_workqueue(wq_name, WQ_MEM_RECLAIM, 0);
if (!dlm->dlm_worker) {
status = -ENOMEM;
mlog_errno(status);
diff --git a/fs/ocfs2/dlmfs/dlmfs.c b/fs/ocfs2/dlmfs/dlmfs.c
index ef474cdd6404..a0c3e03f9cd3 100644
--- a/fs/ocfs2/dlmfs/dlmfs.c
+++ b/fs/ocfs2/dlmfs/dlmfs.c
@@ -211,7 +211,7 @@ static int dlmfs_file_setattr(struct dentry *dentry, struct iattr *attr)
struct inode *inode = d_inode(dentry);
attr->ia_valid &= ~ATTR_SIZE;
- error = inode_change_ok(inode, attr);
+ error = setattr_prepare(dentry, attr);
if (error)
return error;
@@ -646,7 +646,7 @@ static int __init init_dlmfs_fs(void)
}
cleanup_inode = 1;
- user_dlm_worker = create_singlethread_workqueue("user_dlm");
+ user_dlm_worker = alloc_workqueue("user_dlm", WQ_MEM_RECLAIM, 0);
if (!user_dlm_worker) {
status = -ENOMEM;
goto bail;
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index 63316db763da..ba5c177d0ed6 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -1155,7 +1155,7 @@ int ocfs2_setattr(struct dentry *dentry, struct iattr *attr)
if (!(attr->ia_valid & OCFS2_VALID_ATTRS))
return 0;
- status = inode_change_ok(inode, attr);
+ status = setattr_prepare(dentry, attr);
if (status)
return status;
@@ -2321,36 +2321,6 @@ out_mutex:
return ret;
}
-static ssize_t ocfs2_file_splice_read(struct file *in,
- loff_t *ppos,
- struct pipe_inode_info *pipe,
- size_t len,
- unsigned int flags)
-{
- int ret = 0, lock_level = 0;
- struct inode *inode = file_inode(in);
-
- trace_ocfs2_file_splice_read(inode, in, in->f_path.dentry,
- (unsigned long long)OCFS2_I(inode)->ip_blkno,
- in->f_path.dentry->d_name.len,
- in->f_path.dentry->d_name.name, len);
-
- /*
- * See the comment in ocfs2_file_read_iter()
- */
- ret = ocfs2_inode_lock_atime(inode, in->f_path.mnt, &lock_level);
- if (ret < 0) {
- mlog_errno(ret);
- goto bail;
- }
- ocfs2_inode_unlock(inode, lock_level);
-
- ret = generic_file_splice_read(in, ppos, pipe, len, flags);
-
-bail:
- return ret;
-}
-
static ssize_t ocfs2_file_read_iter(struct kiocb *iocb,
struct iov_iter *to)
{
@@ -2506,7 +2476,7 @@ const struct file_operations ocfs2_fops = {
#endif
.lock = ocfs2_lock,
.flock = ocfs2_flock,
- .splice_read = ocfs2_file_splice_read,
+ .splice_read = generic_file_splice_read,
.splice_write = iter_file_splice_write,
.fallocate = ocfs2_fallocate,
};
@@ -2551,7 +2521,7 @@ const struct file_operations ocfs2_fops_no_plocks = {
.compat_ioctl = ocfs2_compat_ioctl,
#endif
.flock = ocfs2_flock,
- .splice_read = ocfs2_file_splice_read,
+ .splice_read = generic_file_splice_read,
.splice_write = iter_file_splice_write,
.fallocate = ocfs2_fallocate,
};
diff --git a/fs/ocfs2/inode.h b/fs/ocfs2/inode.h
index 50cc55047443..5af68fcdf9d3 100644
--- a/fs/ocfs2/inode.h
+++ b/fs/ocfs2/inode.h
@@ -123,8 +123,6 @@ static inline struct ocfs2_inode_info *OCFS2_I(struct inode *inode)
#define INODE_JOURNAL(i) (OCFS2_I(i)->ip_flags & OCFS2_INODE_JOURNAL)
#define SET_INODE_JOURNAL(i) (OCFS2_I(i)->ip_flags |= OCFS2_INODE_JOURNAL)
-extern struct kmem_cache *ocfs2_inode_cache;
-
extern const struct address_space_operations ocfs2_aops;
extern const struct ocfs2_caching_operations ocfs2_inode_caching_ops;
diff --git a/fs/ocfs2/ocfs2_trace.h b/fs/ocfs2/ocfs2_trace.h
index f8f5fc5e6c05..0b58abcf1c6d 100644
--- a/fs/ocfs2/ocfs2_trace.h
+++ b/fs/ocfs2/ocfs2_trace.h
@@ -1314,8 +1314,6 @@ DEFINE_OCFS2_FILE_OPS(ocfs2_file_aio_write);
DEFINE_OCFS2_FILE_OPS(ocfs2_file_splice_write);
-DEFINE_OCFS2_FILE_OPS(ocfs2_file_splice_read);
-
DEFINE_OCFS2_FILE_OPS(ocfs2_file_aio_read);
DEFINE_OCFS2_ULL_ULL_ULL_EVENT(ocfs2_truncate_file);
diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c
index 603b28d6f008..f56fe39fab04 100644
--- a/fs/ocfs2/super.c
+++ b/fs/ocfs2/super.c
@@ -2329,7 +2329,7 @@ static int ocfs2_initialize_super(struct super_block *sb,
}
cleancache_init_shared_fs(sb);
- osb->ocfs2_wq = create_singlethread_workqueue("ocfs2_wq");
+ osb->ocfs2_wq = alloc_ordered_workqueue("ocfs2_wq", WQ_MEM_RECLAIM);
if (!osb->ocfs2_wq) {
status = -ENOMEM;
mlog_errno(status);
diff --git a/fs/omfs/file.c b/fs/omfs/file.c
index d9e26cfbb793..bf83e6644333 100644
--- a/fs/omfs/file.c
+++ b/fs/omfs/file.c
@@ -349,7 +349,7 @@ static int omfs_setattr(struct dentry *dentry, struct iattr *attr)
struct inode *inode = d_inode(dentry);
int error;
- error = inode_change_ok(inode, attr);
+ error = setattr_prepare(dentry, attr);
if (error)
return error;
diff --git a/fs/open.c b/fs/open.c
index 4fd6e256f4f4..8aeb08bb278b 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -68,6 +68,7 @@ int do_truncate(struct dentry *dentry, loff_t length, unsigned int time_attrs,
long vfs_truncate(const struct path *path, loff_t length)
{
struct inode *inode;
+ struct dentry *upperdentry;
long error;
inode = path->dentry->d_inode;
@@ -90,7 +91,17 @@ long vfs_truncate(const struct path *path, loff_t length)
if (IS_APPEND(inode))
goto mnt_drop_write_and_out;
- error = get_write_access(inode);
+ /*
+ * If this is an overlayfs then do as if opening the file so we get
+ * write access on the upper inode, not on the overlay inode. For
+ * non-overlay filesystems d_real() is an identity function.
+ */
+ upperdentry = d_real(path->dentry, NULL, O_WRONLY);
+ error = PTR_ERR(upperdentry);
+ if (IS_ERR(upperdentry))
+ goto mnt_drop_write_and_out;
+
+ error = get_write_access(upperdentry->d_inode);
if (error)
goto mnt_drop_write_and_out;
@@ -109,7 +120,7 @@ long vfs_truncate(const struct path *path, loff_t length)
error = do_truncate(path->dentry, length, 0, NULL);
put_write_and_out:
- put_write_access(inode);
+ put_write_access(upperdentry->d_inode);
mnt_drop_write_and_out:
mnt_drop_write(path->mnt);
out:
@@ -726,7 +737,7 @@ static int do_dentry_open(struct file *f,
if (error)
goto cleanup_all;
- error = break_lease(inode, f->f_flags);
+ error = break_lease(locks_inode(f), f->f_flags);
if (error)
goto cleanup_all;
diff --git a/fs/orangefs/acl.c b/fs/orangefs/acl.c
index 28f2195cd798..7a3754488312 100644
--- a/fs/orangefs/acl.c
+++ b/fs/orangefs/acl.c
@@ -73,14 +73,11 @@ int orangefs_set_acl(struct inode *inode, struct posix_acl *acl, int type)
case ACL_TYPE_ACCESS:
name = XATTR_NAME_POSIX_ACL_ACCESS;
if (acl) {
- umode_t mode = inode->i_mode;
- /*
- * can we represent this with the traditional file
- * mode permission bits?
- */
- error = posix_acl_equiv_mode(acl, &mode);
- if (error < 0) {
- gossip_err("%s: posix_acl_equiv_mode err: %d\n",
+ umode_t mode;
+
+ error = posix_acl_update_mode(inode, &mode, &acl);
+ if (error) {
+ gossip_err("%s: posix_acl_update_mode err: %d\n",
__func__,
error);
return error;
@@ -90,8 +87,6 @@ int orangefs_set_acl(struct inode *inode, struct posix_acl *acl, int type)
SetModeFlag(orangefs_inode);
inode->i_mode = mode;
mark_inode_dirty_sync(inode);
- if (error == 0)
- acl = NULL;
}
break;
case ACL_TYPE_DEFAULT:
diff --git a/fs/orangefs/dcache.c b/fs/orangefs/dcache.c
index 00235bf644dc..1e8fe844e69f 100644
--- a/fs/orangefs/dcache.c
+++ b/fs/orangefs/dcache.c
@@ -73,7 +73,7 @@ static int orangefs_revalidate_lookup(struct dentry *dentry)
}
}
- dentry->d_time = jiffies + dcache_timeout_msecs*HZ/1000;
+ dentry->d_time = jiffies + orangefs_dcache_timeout_msecs*HZ/1000;
ret = 1;
out_release_op:
op_release(new_op);
diff --git a/fs/orangefs/devorangefs-req.c b/fs/orangefs/devorangefs-req.c
index a287a66d94e3..516ffb4dc9a0 100644
--- a/fs/orangefs/devorangefs-req.c
+++ b/fs/orangefs/devorangefs-req.c
@@ -11,14 +11,19 @@
#include "orangefs-kernel.h"
#include "orangefs-dev-proto.h"
#include "orangefs-bufmap.h"
+#include "orangefs-debugfs.h"
#include <linux/debugfs.h>
#include <linux/slab.h>
/* this file implements the /dev/pvfs2-req device node */
+uint32_t orangefs_userspace_version;
+
static int open_access_count;
+static DEFINE_MUTEX(devreq_mutex);
+
#define DUMP_DEVICE_ERROR() \
do { \
gossip_err("*****************************************************\n");\
@@ -43,7 +48,7 @@ static void orangefs_devreq_add_op(struct orangefs_kernel_op_s *op)
{
int index = hash_func(op->tag, hash_table_size);
- list_add_tail(&op->list, &htable_ops_in_progress[index]);
+ list_add_tail(&op->list, &orangefs_htable_ops_in_progress[index]);
}
/*
@@ -57,20 +62,20 @@ static struct orangefs_kernel_op_s *orangefs_devreq_remove_op(__u64 tag)
index = hash_func(tag, hash_table_size);
- spin_lock(&htable_ops_in_progress_lock);
+ spin_lock(&orangefs_htable_ops_in_progress_lock);
list_for_each_entry_safe(op,
next,
- &htable_ops_in_progress[index],
+ &orangefs_htable_ops_in_progress[index],
list) {
if (op->tag == tag && !op_state_purged(op) &&
!op_state_given_up(op)) {
list_del_init(&op->list);
- spin_unlock(&htable_ops_in_progress_lock);
+ spin_unlock(&orangefs_htable_ops_in_progress_lock);
return op;
}
}
- spin_unlock(&htable_ops_in_progress_lock);
+ spin_unlock(&orangefs_htable_ops_in_progress_lock);
return NULL;
}
@@ -276,11 +281,11 @@ restart:
if (ret != 0)
goto error;
- spin_lock(&htable_ops_in_progress_lock);
+ spin_lock(&orangefs_htable_ops_in_progress_lock);
spin_lock(&cur_op->lock);
if (unlikely(op_state_given_up(cur_op))) {
spin_unlock(&cur_op->lock);
- spin_unlock(&htable_ops_in_progress_lock);
+ spin_unlock(&orangefs_htable_ops_in_progress_lock);
complete(&cur_op->waitq);
goto restart;
}
@@ -298,7 +303,7 @@ restart:
current->comm);
orangefs_devreq_add_op(cur_op);
spin_unlock(&cur_op->lock);
- spin_unlock(&htable_ops_in_progress_lock);
+ spin_unlock(&orangefs_htable_ops_in_progress_lock);
/* The client only asks to read one size buffer. */
return MAX_DEV_REQ_UPSIZE;
@@ -387,6 +392,13 @@ static ssize_t orangefs_devreq_write_iter(struct kiocb *iocb,
return -EPROTO;
}
+ if (!orangefs_userspace_version) {
+ orangefs_userspace_version = head.version;
+ } else if (orangefs_userspace_version != head.version) {
+ gossip_err("Error: userspace version changes\n");
+ return -EPROTO;
+ }
+
/* remove the op from the in progress hash table */
op = orangefs_devreq_remove_op(head.tag);
if (!op) {
@@ -527,6 +539,7 @@ static int orangefs_devreq_release(struct inode *inode, struct file *file)
gossip_debug(GOSSIP_DEV_DEBUG,
"pvfs2-client-core: device close complete\n");
open_access_count = 0;
+ orangefs_userspace_version = 0;
mutex_unlock(&devreq_mutex);
return 0;
}
@@ -576,8 +589,6 @@ static long dispatch_ioctl_command(unsigned int command, unsigned long arg)
static __s32 max_down_size = MAX_DEV_REQ_DOWNSIZE;
struct ORANGEFS_dev_map_desc user_desc;
int ret = 0;
- struct dev_mask_info_s mask_info = { 0 };
- struct dev_mask2_info_s mask2_info = { 0, 0 };
int upstream_kmod = 1;
struct orangefs_sb_info_s *orangefs_sb;
@@ -619,7 +630,7 @@ static long dispatch_ioctl_command(unsigned int command, unsigned long arg)
* all of the remounts are serviced (to avoid ops between
* mounts to fail)
*/
- ret = mutex_lock_interruptible(&request_mutex);
+ ret = mutex_lock_interruptible(&orangefs_request_mutex);
if (ret < 0)
return ret;
gossip_debug(GOSSIP_DEV_DEBUG,
@@ -654,7 +665,7 @@ static long dispatch_ioctl_command(unsigned int command, unsigned long arg)
gossip_debug(GOSSIP_DEV_DEBUG,
"%s: priority remount complete\n",
__func__);
- mutex_unlock(&request_mutex);
+ mutex_unlock(&orangefs_request_mutex);
return ret;
case ORANGEFS_DEV_UPSTREAM:
@@ -668,134 +679,11 @@ static long dispatch_ioctl_command(unsigned int command, unsigned long arg)
return ret;
case ORANGEFS_DEV_CLIENT_MASK:
- ret = copy_from_user(&mask2_info,
- (void __user *)arg,
- sizeof(struct dev_mask2_info_s));
-
- if (ret != 0)
- return -EIO;
-
- client_debug_mask.mask1 = mask2_info.mask1_value;
- client_debug_mask.mask2 = mask2_info.mask2_value;
-
- pr_info("%s: client debug mask has been been received "
- ":%llx: :%llx:\n",
- __func__,
- (unsigned long long)client_debug_mask.mask1,
- (unsigned long long)client_debug_mask.mask2);
-
- return ret;
-
+ return orangefs_debugfs_new_client_mask((void __user *)arg);
case ORANGEFS_DEV_CLIENT_STRING:
- ret = copy_from_user(&client_debug_array_string,
- (void __user *)arg,
- ORANGEFS_MAX_DEBUG_STRING_LEN);
- /*
- * The real client-core makes an effort to ensure
- * that actual strings that aren't too long to fit in
- * this buffer is what we get here. We're going to use
- * string functions on the stuff we got, so we'll make
- * this extra effort to try and keep from
- * flowing out of this buffer when we use the string
- * functions, even if somehow the stuff we end up
- * with here is garbage.
- */
- client_debug_array_string[ORANGEFS_MAX_DEBUG_STRING_LEN - 1] =
- '\0';
-
- if (ret != 0) {
- pr_info("%s: CLIENT_STRING: copy_from_user failed\n",
- __func__);
- return -EIO;
- }
-
- pr_info("%s: client debug array string has been received.\n",
- __func__);
-
- if (!help_string_initialized) {
-
- /* Free the "we don't know yet" default string... */
- kfree(debug_help_string);
-
- /* build a proper debug help string */
- if (orangefs_prepare_debugfs_help_string(0)) {
- gossip_err("%s: no debug help string \n",
- __func__);
- return -EIO;
- }
-
- /* Replace the boilerplate boot-time debug-help file. */
- debugfs_remove(help_file_dentry);
-
- help_file_dentry =
- debugfs_create_file(
- ORANGEFS_KMOD_DEBUG_HELP_FILE,
- 0444,
- debug_dir,
- debug_help_string,
- &debug_help_fops);
-
- if (!help_file_dentry) {
- gossip_err("%s: debugfs_create_file failed for"
- " :%s:!\n",
- __func__,
- ORANGEFS_KMOD_DEBUG_HELP_FILE);
- return -EIO;
- }
- }
-
- debug_mask_to_string(&client_debug_mask, 1);
-
- debugfs_remove(client_debug_dentry);
-
- orangefs_client_debug_init();
-
- help_string_initialized++;
-
- return ret;
-
+ return orangefs_debugfs_new_client_string((void __user *)arg);
case ORANGEFS_DEV_DEBUG:
- ret = copy_from_user(&mask_info,
- (void __user *)arg,
- sizeof(mask_info));
-
- if (ret != 0)
- return -EIO;
-
- if (mask_info.mask_type == KERNEL_MASK) {
- if ((mask_info.mask_value == 0)
- && (kernel_mask_set_mod_init)) {
- /*
- * the kernel debug mask was set when the
- * kernel module was loaded; don't override
- * it if the client-core was started without
- * a value for ORANGEFS_KMODMASK.
- */
- return 0;
- }
- debug_mask_to_string(&mask_info.mask_value,
- mask_info.mask_type);
- gossip_debug_mask = mask_info.mask_value;
- pr_info("%s: kernel debug mask has been modified to "
- ":%s: :%llx:\n",
- __func__,
- kernel_debug_string,
- (unsigned long long)gossip_debug_mask);
- } else if (mask_info.mask_type == CLIENT_MASK) {
- debug_mask_to_string(&mask_info.mask_value,
- mask_info.mask_type);
- pr_info("%s: client debug mask has been modified to"
- ":%s: :%llx:\n",
- __func__,
- client_debug_string,
- llu(mask_info.mask_value));
- } else {
- gossip_lerr("Invalid mask type....\n");
- return -EINVAL;
- }
-
- return ret;
-
+ return orangefs_debugfs_new_debug((void __user *)arg);
default:
return -ENOIOCTLCMD;
}
diff --git a/fs/orangefs/dir.c b/fs/orangefs/dir.c
index 324f0af40d7b..284373a57a08 100644
--- a/fs/orangefs/dir.c
+++ b/fs/orangefs/dir.c
@@ -177,8 +177,8 @@ static int orangefs_readdir(struct file *file, struct dir_context *ctx)
}
gossip_debug(GOSSIP_DIR_DEBUG,
- "orangefs_readdir called on %s (pos=%llu)\n",
- dentry->d_name.name, llu(pos));
+ "orangefs_readdir called on %pd (pos=%llu)\n",
+ dentry, llu(pos));
memset(&readdir_response, 0, sizeof(readdir_response));
diff --git a/fs/orangefs/downcall.h b/fs/orangefs/downcall.h
index 66b99210f1f9..3b8923f8bf21 100644
--- a/fs/orangefs/downcall.h
+++ b/fs/orangefs/downcall.h
@@ -83,7 +83,10 @@ struct orangefs_listxattr_response {
};
struct orangefs_param_response {
- __s64 value;
+ union {
+ __s64 value64;
+ __s32 value32[2];
+ } u;
};
#define PERF_COUNT_BUF_SIZE 4096
@@ -98,6 +101,11 @@ struct orangefs_fs_key_response {
char fs_key[FS_KEY_BUF_SIZE];
};
+/* 2.9.6 */
+struct orangefs_features_response {
+ __u64 features;
+};
+
struct orangefs_downcall_s {
__s32 type;
__s32 status;
@@ -119,6 +127,7 @@ struct orangefs_downcall_s {
struct orangefs_param_response param;
struct orangefs_perf_count_response perf_count;
struct orangefs_fs_key_response fs_key;
+ struct orangefs_features_response features;
} resp;
};
diff --git a/fs/orangefs/file.c b/fs/orangefs/file.c
index 526040e09f78..2aa088ab713b 100644
--- a/fs/orangefs/file.c
+++ b/fs/orangefs/file.c
@@ -14,6 +14,32 @@
#include <linux/fs.h>
#include <linux/pagemap.h>
+static int flush_racache(struct inode *inode)
+{
+ struct orangefs_inode_s *orangefs_inode = ORANGEFS_I(inode);
+ struct orangefs_kernel_op_s *new_op;
+ int ret;
+
+ gossip_debug(GOSSIP_UTILS_DEBUG,
+ "%s: %pU: Handle is %pU | fs_id %d\n", __func__,
+ get_khandle_from_ino(inode), &orangefs_inode->refn.khandle,
+ orangefs_inode->refn.fs_id);
+
+ new_op = op_alloc(ORANGEFS_VFS_OP_RA_FLUSH);
+ if (!new_op)
+ return -ENOMEM;
+ new_op->upcall.req.ra_cache_flush.refn = orangefs_inode->refn;
+
+ ret = service_operation(new_op, "orangefs_flush_racache",
+ get_interruptible_flag(inode));
+
+ gossip_debug(GOSSIP_UTILS_DEBUG, "%s: got return value of %d\n",
+ __func__, ret);
+
+ op_release(new_op);
+ return ret;
+}
+
/*
* Copy to client-core's address space from the buffers specified
* by the iovec upto total_size bytes.
@@ -386,7 +412,7 @@ ssize_t orangefs_inode_read(struct inode *inode,
size_t bufmap_size;
ssize_t ret = -EINVAL;
- g_orangefs_stats.reads++;
+ orangefs_stats.reads++;
bufmap_size = orangefs_bufmap_size_query();
if (count > bufmap_size) {
@@ -427,7 +453,7 @@ static ssize_t orangefs_file_read_iter(struct kiocb *iocb, struct iov_iter *iter
gossip_debug(GOSSIP_FILE_DEBUG, "orangefs_file_read_iter\n");
- g_orangefs_stats.reads++;
+ orangefs_stats.reads++;
rc = do_readv_writev(ORANGEFS_IO_READ, file, &pos, iter);
iocb->ki_pos = pos;
@@ -488,7 +514,7 @@ static ssize_t orangefs_file_write_iter(struct kiocb *iocb, struct iov_iter *ite
}
iocb->ki_pos = pos;
- g_orangefs_stats.writes++;
+ orangefs_stats.writes++;
out:
@@ -585,21 +611,30 @@ static int orangefs_file_mmap(struct file *file, struct vm_area_struct *vma)
static int orangefs_file_release(struct inode *inode, struct file *file)
{
gossip_debug(GOSSIP_FILE_DEBUG,
- "orangefs_file_release: called on %s\n",
- file->f_path.dentry->d_name.name);
+ "orangefs_file_release: called on %pD\n",
+ file);
orangefs_flush_inode(inode);
/*
- * remove all associated inode pages from the page cache and mmap
+ * remove all associated inode pages from the page cache and
* readahead cache (if any); this forces an expensive refresh of
* data for the next caller of mmap (or 'get_block' accesses)
*/
if (file->f_path.dentry->d_inode &&
file->f_path.dentry->d_inode->i_mapping &&
- mapping_nrpages(&file->f_path.dentry->d_inode->i_data))
+ mapping_nrpages(&file->f_path.dentry->d_inode->i_data)) {
+ if (orangefs_features & ORANGEFS_FEATURE_READAHEAD) {
+ gossip_debug(GOSSIP_INODE_DEBUG,
+ "calling flush_racache on %pU\n",
+ get_khandle_from_ino(inode));
+ flush_racache(inode);
+ gossip_debug(GOSSIP_INODE_DEBUG,
+ "flush_racache finished\n");
+ }
truncate_inode_pages(file->f_path.dentry->d_inode->i_mapping,
0);
+ }
return 0;
}
diff --git a/fs/orangefs/inode.c b/fs/orangefs/inode.c
index a9407eeecb21..0e3bd7e07f88 100644
--- a/fs/orangefs/inode.c
+++ b/fs/orangefs/inode.c
@@ -129,8 +129,8 @@ static ssize_t orangefs_direct_IO(struct kiocb *iocb,
struct iov_iter *iter)
{
gossip_debug(GOSSIP_INODE_DEBUG,
- "orangefs_direct_IO: %s\n",
- iocb->ki_filp->f_path.dentry->d_name.name);
+ "orangefs_direct_IO: %pD\n",
+ iocb->ki_filp);
return -EINVAL;
}
@@ -216,10 +216,10 @@ int orangefs_setattr(struct dentry *dentry, struct iattr *iattr)
struct inode *inode = dentry->d_inode;
gossip_debug(GOSSIP_INODE_DEBUG,
- "orangefs_setattr: called on %s\n",
- dentry->d_name.name);
+ "orangefs_setattr: called on %pd\n",
+ dentry);
- ret = inode_change_ok(inode, iattr);
+ ret = setattr_prepare(dentry, iattr);
if (ret)
goto out;
@@ -259,8 +259,8 @@ int orangefs_getattr(struct vfsmount *mnt,
struct orangefs_inode_s *orangefs_inode = NULL;
gossip_debug(GOSSIP_INODE_DEBUG,
- "orangefs_getattr: called on %s\n",
- dentry->d_name.name);
+ "orangefs_getattr: called on %pd\n",
+ dentry);
ret = orangefs_inode_getattr(inode, 0, 0);
if (ret == 0) {
diff --git a/fs/orangefs/namei.c b/fs/orangefs/namei.c
index ccca5186d5cd..4d5576a21c82 100644
--- a/fs/orangefs/namei.c
+++ b/fs/orangefs/namei.c
@@ -24,9 +24,9 @@ static int orangefs_create(struct inode *dir,
struct inode *inode;
int ret;
- gossip_debug(GOSSIP_NAME_DEBUG, "%s: %s\n",
+ gossip_debug(GOSSIP_NAME_DEBUG, "%s: %pd\n",
__func__,
- dentry->d_name.name);
+ dentry);
new_op = op_alloc(ORANGEFS_VFS_OP_CREATE);
if (!new_op)
@@ -43,9 +43,9 @@ static int orangefs_create(struct inode *dir,
ret = service_operation(new_op, __func__, get_interruptible_flag(dir));
gossip_debug(GOSSIP_NAME_DEBUG,
- "%s: %s: handle:%pU: fsid:%d: new_op:%p: ret:%d:\n",
+ "%s: %pd: handle:%pU: fsid:%d: new_op:%p: ret:%d:\n",
__func__,
- dentry->d_name.name,
+ dentry,
&new_op->downcall.resp.create.refn.khandle,
new_op->downcall.resp.create.refn.fs_id,
new_op,
@@ -57,28 +57,28 @@ static int orangefs_create(struct inode *dir,
inode = orangefs_new_inode(dir->i_sb, dir, S_IFREG | mode, 0,
&new_op->downcall.resp.create.refn);
if (IS_ERR(inode)) {
- gossip_err("%s: Failed to allocate inode for file :%s:\n",
+ gossip_err("%s: Failed to allocate inode for file :%pd:\n",
__func__,
- dentry->d_name.name);
+ dentry);
ret = PTR_ERR(inode);
goto out;
}
gossip_debug(GOSSIP_NAME_DEBUG,
- "%s: Assigned inode :%pU: for file :%s:\n",
+ "%s: Assigned inode :%pU: for file :%pd:\n",
__func__,
get_khandle_from_ino(inode),
- dentry->d_name.name);
+ dentry);
d_instantiate(dentry, inode);
unlock_new_inode(inode);
- dentry->d_time = jiffies + dcache_timeout_msecs*HZ/1000;
+ dentry->d_time = jiffies + orangefs_dcache_timeout_msecs*HZ/1000;
ORANGEFS_I(inode)->getattr_time = jiffies - 1;
gossip_debug(GOSSIP_NAME_DEBUG,
- "%s: dentry instantiated for %s\n",
+ "%s: dentry instantiated for %pd\n",
__func__,
- dentry->d_name.name);
+ dentry);
SetMtimeFlag(parent);
dir->i_mtime = dir->i_ctime = current_fs_time(dir->i_sb);
@@ -87,9 +87,9 @@ static int orangefs_create(struct inode *dir,
out:
op_release(new_op);
gossip_debug(GOSSIP_NAME_DEBUG,
- "%s: %s: returning %d\n",
+ "%s: %pd: returning %d\n",
__func__,
- dentry->d_name.name,
+ dentry,
ret);
return ret;
}
@@ -115,8 +115,8 @@ static struct dentry *orangefs_lookup(struct inode *dir, struct dentry *dentry,
* -EEXIST on O_EXCL opens, which is broken if we skip this lookup
* in the create path)
*/
- gossip_debug(GOSSIP_NAME_DEBUG, "%s called on %s\n",
- __func__, dentry->d_name.name);
+ gossip_debug(GOSSIP_NAME_DEBUG, "%s called on %pd\n",
+ __func__, dentry);
if (dentry->d_name.len > (ORANGEFS_NAME_MAX - 1))
return ERR_PTR(-ENAMETOOLONG);
@@ -169,9 +169,9 @@ static struct dentry *orangefs_lookup(struct inode *dir, struct dentry *dentry,
gossip_debug(GOSSIP_NAME_DEBUG,
"orangefs_lookup: Adding *negative* dentry "
- "%p for %s\n",
+ "%p for %pd\n",
dentry,
- dentry->d_name.name);
+ dentry);
d_add(dentry, NULL);
res = NULL;
@@ -183,7 +183,7 @@ static struct dentry *orangefs_lookup(struct inode *dir, struct dentry *dentry,
goto out;
}
- dentry->d_time = jiffies + dcache_timeout_msecs*HZ/1000;
+ dentry->d_time = jiffies + orangefs_dcache_timeout_msecs*HZ/1000;
inode = orangefs_iget(dir->i_sb, &new_op->downcall.resp.lookup.refn);
if (IS_ERR(inode)) {
@@ -224,10 +224,10 @@ static int orangefs_unlink(struct inode *dir, struct dentry *dentry)
int ret;
gossip_debug(GOSSIP_NAME_DEBUG,
- "%s: called on %s\n"
+ "%s: called on %pd\n"
" (inode %pU): Parent is %pU | fs_id %d\n",
__func__,
- dentry->d_name.name,
+ dentry,
get_khandle_from_ino(inode),
&parent->refn.khandle,
parent->refn.fs_id);
@@ -322,13 +322,13 @@ static int orangefs_symlink(struct inode *dir,
d_instantiate(dentry, inode);
unlock_new_inode(inode);
- dentry->d_time = jiffies + dcache_timeout_msecs*HZ/1000;
+ dentry->d_time = jiffies + orangefs_dcache_timeout_msecs*HZ/1000;
ORANGEFS_I(inode)->getattr_time = jiffies - 1;
gossip_debug(GOSSIP_NAME_DEBUG,
- "Inode (Symlink) %pU -> %s\n",
+ "Inode (Symlink) %pU -> %pd\n",
get_khandle_from_ino(inode),
- dentry->d_name.name);
+ dentry);
SetMtimeFlag(parent);
dir->i_mtime = dir->i_ctime = current_fs_time(dir->i_sb);
@@ -386,13 +386,13 @@ static int orangefs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode
d_instantiate(dentry, inode);
unlock_new_inode(inode);
- dentry->d_time = jiffies + dcache_timeout_msecs*HZ/1000;
+ dentry->d_time = jiffies + orangefs_dcache_timeout_msecs*HZ/1000;
ORANGEFS_I(inode)->getattr_time = jiffies - 1;
gossip_debug(GOSSIP_NAME_DEBUG,
- "Inode (Directory) %pU -> %s\n",
+ "Inode (Directory) %pU -> %pd\n",
get_khandle_from_ino(inode),
- dentry->d_name.name);
+ dentry);
/*
* NOTE: we have no good way to keep nlink consistent for directories
diff --git a/fs/orangefs/orangefs-cache.c b/fs/orangefs/orangefs-cache.c
index b6edbe9fb309..aa3830b741c7 100644
--- a/fs/orangefs/orangefs-cache.c
+++ b/fs/orangefs/orangefs-cache.c
@@ -73,8 +73,8 @@ char *get_opname_string(struct orangefs_kernel_op_s *new_op)
return "OP_STATFS";
else if (type == ORANGEFS_VFS_OP_TRUNCATE)
return "OP_TRUNCATE";
- else if (type == ORANGEFS_VFS_OP_MMAP_RA_FLUSH)
- return "OP_MMAP_RA_FLUSH";
+ else if (type == ORANGEFS_VFS_OP_RA_FLUSH)
+ return "OP_RA_FLUSH";
else if (type == ORANGEFS_VFS_OP_FS_MOUNT)
return "OP_FS_MOUNT";
else if (type == ORANGEFS_VFS_OP_FS_UMOUNT)
@@ -97,6 +97,8 @@ char *get_opname_string(struct orangefs_kernel_op_s *new_op)
return "OP_FSYNC";
else if (type == ORANGEFS_VFS_OP_FSKEY)
return "OP_FSKEY";
+ else if (type == ORANGEFS_VFS_OP_FEATURES)
+ return "OP_FEATURES";
}
return "OP_UNKNOWN?";
}
diff --git a/fs/orangefs/orangefs-debugfs.c b/fs/orangefs/orangefs-debugfs.c
index 1714a737d556..eb09aa026723 100644
--- a/fs/orangefs/orangefs-debugfs.c
+++ b/fs/orangefs/orangefs-debugfs.c
@@ -43,36 +43,35 @@
#include "protocol.h"
#include "orangefs-kernel.h"
-static int orangefs_debug_disabled = 1;
-
-static int orangefs_debug_help_open(struct inode *, struct file *);
+#define DEBUG_HELP_STRING_SIZE 4096
+#define HELP_STRING_UNINITIALIZED \
+ "Client Debug Keywords are unknown until the first time\n" \
+ "the client is started after boot.\n"
+#define ORANGEFS_KMOD_DEBUG_HELP_FILE "debug-help"
+#define ORANGEFS_KMOD_DEBUG_FILE "kernel-debug"
+#define ORANGEFS_CLIENT_DEBUG_FILE "client-debug"
+#define ORANGEFS_VERBOSE "verbose"
+#define ORANGEFS_ALL "all"
-const struct file_operations debug_help_fops = {
- .open = orangefs_debug_help_open,
- .read = seq_read,
- .release = seq_release,
- .llseek = seq_lseek,
+/*
+ * An array of client_debug_mask will be built to hold debug keyword/mask
+ * values fetched from userspace.
+ */
+struct client_debug_mask {
+ char *keyword;
+ __u64 mask1;
+ __u64 mask2;
};
+static int orangefs_kernel_debug_init(void);
+
+static int orangefs_debug_help_open(struct inode *, struct file *);
static void *help_start(struct seq_file *, loff_t *);
static void *help_next(struct seq_file *, void *, loff_t *);
static void help_stop(struct seq_file *, void *);
static int help_show(struct seq_file *, void *);
-static const struct seq_operations help_debug_ops = {
- .start = help_start,
- .next = help_next,
- .stop = help_stop,
- .show = help_show,
-};
-
-/*
- * Used to protect data in ORANGEFS_KMOD_DEBUG_FILE and
- * ORANGEFS_KMOD_DEBUG_FILE.
- */
-static DEFINE_MUTEX(orangefs_debug_lock);
-
-int orangefs_debug_open(struct inode *, struct file *);
+static int orangefs_debug_open(struct inode *, struct file *);
static ssize_t orangefs_debug_read(struct file *,
char __user *,
@@ -84,6 +83,43 @@ static ssize_t orangefs_debug_write(struct file *,
size_t,
loff_t *);
+static int orangefs_prepare_cdm_array(char *);
+static void debug_mask_to_string(void *, int);
+static void do_k_string(void *, int);
+static void do_c_string(void *, int);
+static int keyword_is_amalgam(char *);
+static int check_amalgam_keyword(void *, int);
+static void debug_string_to_mask(char *, void *, int);
+static void do_c_mask(int, char *, struct client_debug_mask **);
+static void do_k_mask(int, char *, __u64 **);
+
+static char kernel_debug_string[ORANGEFS_MAX_DEBUG_STRING_LEN] = "none";
+static char *debug_help_string;
+static char client_debug_string[ORANGEFS_MAX_DEBUG_STRING_LEN];
+static char client_debug_array_string[ORANGEFS_MAX_DEBUG_STRING_LEN];
+
+static struct dentry *help_file_dentry;
+static struct dentry *client_debug_dentry;
+static struct dentry *debug_dir;
+
+static unsigned int kernel_mask_set_mod_init;
+static int orangefs_debug_disabled = 1;
+static int help_string_initialized;
+
+static const struct seq_operations help_debug_ops = {
+ .start = help_start,
+ .next = help_next,
+ .stop = help_stop,
+ .show = help_show,
+};
+
+const struct file_operations debug_help_fops = {
+ .open = orangefs_debug_help_open,
+ .read = seq_read,
+ .release = seq_release,
+ .llseek = seq_lseek,
+};
+
static const struct file_operations kernel_debug_fops = {
.open = orangefs_debug_open,
.read = orangefs_debug_read,
@@ -91,15 +127,55 @@ static const struct file_operations kernel_debug_fops = {
.llseek = generic_file_llseek,
};
+static int client_all_index;
+static int client_verbose_index;
+
+static struct client_debug_mask *cdm_array;
+static int cdm_element_count;
+
+static struct client_debug_mask client_debug_mask;
+
+/*
+ * Used to protect data in ORANGEFS_KMOD_DEBUG_FILE and
+ * ORANGEFS_KMOD_DEBUG_FILE.
+ */
+static DEFINE_MUTEX(orangefs_debug_lock);
+
/*
* initialize kmod debug operations, create orangefs debugfs dir and
* ORANGEFS_KMOD_DEBUG_HELP_FILE.
*/
-int orangefs_debugfs_init(void)
+int orangefs_debugfs_init(int debug_mask)
{
-
int rc = -ENOMEM;
+ /* convert input debug mask to a 64-bit unsigned integer */
+ orangefs_gossip_debug_mask = (unsigned long long)debug_mask;
+
+ /*
+ * set the kernel's gossip debug string; invalid mask values will
+ * be ignored.
+ */
+ debug_mask_to_string(&orangefs_gossip_debug_mask, 0);
+
+ /* remove any invalid values from the mask */
+ debug_string_to_mask(kernel_debug_string, &orangefs_gossip_debug_mask,
+ 0);
+
+ /*
+ * if the mask has a non-zero value, then indicate that the mask
+ * was set when the kernel module was loaded. The orangefs dev ioctl
+ * command will look at this boolean to determine if the kernel's
+ * debug mask should be overwritten when the client-core is started.
+ */
+ if (orangefs_gossip_debug_mask != 0)
+ kernel_mask_set_mod_init = true;
+
+ pr_info("%s: called with debug mask: :%s: :%llx:\n",
+ __func__,
+ kernel_debug_string,
+ (unsigned long long)orangefs_gossip_debug_mask);
+
debug_dir = debugfs_create_dir("orangefs", NULL);
if (!debug_dir) {
pr_info("%s: debugfs_create_dir failed.\n", __func__);
@@ -117,13 +193,58 @@ int orangefs_debugfs_init(void)
}
orangefs_debug_disabled = 0;
+
+ rc = orangefs_kernel_debug_init();
+
+out:
+
+ return rc;
+}
+
+/*
+ * initialize the kernel-debug file.
+ */
+static int orangefs_kernel_debug_init(void)
+{
+ int rc = -ENOMEM;
+ struct dentry *ret;
+ char *k_buffer = NULL;
+
+ gossip_debug(GOSSIP_DEBUGFS_DEBUG, "%s: start\n", __func__);
+
+ k_buffer = kzalloc(ORANGEFS_MAX_DEBUG_STRING_LEN, GFP_KERNEL);
+ if (!k_buffer)
+ goto out;
+
+ if (strlen(kernel_debug_string) + 1 < ORANGEFS_MAX_DEBUG_STRING_LEN) {
+ strcpy(k_buffer, kernel_debug_string);
+ strcat(k_buffer, "\n");
+ } else {
+ strcpy(k_buffer, "none\n");
+ pr_info("%s: overflow 1!\n", __func__);
+ }
+
+ ret = debugfs_create_file(ORANGEFS_KMOD_DEBUG_FILE,
+ 0444,
+ debug_dir,
+ k_buffer,
+ &kernel_debug_fops);
+ if (!ret) {
+ pr_info("%s: failed to create %s.\n",
+ __func__,
+ ORANGEFS_KMOD_DEBUG_FILE);
+ goto out;
+ }
+
rc = 0;
out:
+ gossip_debug(GOSSIP_DEBUGFS_DEBUG, "%s: rc:%d:\n", __func__, rc);
return rc;
}
+
void orangefs_debugfs_cleanup(void)
{
debugfs_remove_recursive(debug_dir);
@@ -196,49 +317,6 @@ static int help_show(struct seq_file *m, void *v)
}
/*
- * initialize the kernel-debug file.
- */
-int orangefs_kernel_debug_init(void)
-{
- int rc = -ENOMEM;
- struct dentry *ret;
- char *k_buffer = NULL;
-
- gossip_debug(GOSSIP_DEBUGFS_DEBUG, "%s: start\n", __func__);
-
- k_buffer = kzalloc(ORANGEFS_MAX_DEBUG_STRING_LEN, GFP_KERNEL);
- if (!k_buffer)
- goto out;
-
- if (strlen(kernel_debug_string) + 1 < ORANGEFS_MAX_DEBUG_STRING_LEN) {
- strcpy(k_buffer, kernel_debug_string);
- strcat(k_buffer, "\n");
- } else {
- strcpy(k_buffer, "none\n");
- pr_info("%s: overflow 1!\n", __func__);
- }
-
- ret = debugfs_create_file(ORANGEFS_KMOD_DEBUG_FILE,
- 0444,
- debug_dir,
- k_buffer,
- &kernel_debug_fops);
- if (!ret) {
- pr_info("%s: failed to create %s.\n",
- __func__,
- ORANGEFS_KMOD_DEBUG_FILE);
- goto out;
- }
-
- rc = 0;
-
-out:
-
- gossip_debug(GOSSIP_DEBUGFS_DEBUG, "%s: rc:%d:\n", __func__, rc);
- return rc;
-}
-
-/*
* initialize the client-debug file.
*/
int orangefs_client_debug_init(void)
@@ -282,7 +360,7 @@ out:
}
/* open ORANGEFS_KMOD_DEBUG_FILE or ORANGEFS_CLIENT_DEBUG_FILE.*/
-int orangefs_debug_open(struct inode *inode, struct file *file)
+static int orangefs_debug_open(struct inode *inode, struct file *file)
{
int rc = -ENODEV;
@@ -350,8 +428,8 @@ static ssize_t orangefs_debug_write(struct file *file,
struct client_debug_mask c_mask = { NULL, 0, 0 };
gossip_debug(GOSSIP_DEBUGFS_DEBUG,
- "orangefs_debug_write: %s\n",
- file->f_path.dentry->d_name.name);
+ "orangefs_debug_write: %pD\n",
+ file);
/*
* Thwart users who try to jamb a ridiculous number
@@ -384,8 +462,8 @@ static ssize_t orangefs_debug_write(struct file *file,
*/
if (!strcmp(file->f_path.dentry->d_name.name,
ORANGEFS_KMOD_DEBUG_FILE)) {
- debug_string_to_mask(buf, &gossip_debug_mask, 0);
- debug_mask_to_string(&gossip_debug_mask, 0);
+ debug_string_to_mask(buf, &orangefs_gossip_debug_mask, 0);
+ debug_mask_to_string(&orangefs_gossip_debug_mask, 0);
debug_string = kernel_debug_string;
gossip_debug(GOSSIP_DEBUGFS_DEBUG,
"New kernel debug string is %s\n",
@@ -452,3 +530,546 @@ out:
kfree(buf);
return rc;
}
+
+/*
+ * After obtaining a string representation of the client's debug
+ * keywords and their associated masks, this function is called to build an
+ * array of these values.
+ */
+static int orangefs_prepare_cdm_array(char *debug_array_string)
+{
+ int i;
+ int rc = -EINVAL;
+ char *cds_head = NULL;
+ char *cds_delimiter = NULL;
+ int keyword_len = 0;
+
+ gossip_debug(GOSSIP_UTILS_DEBUG, "%s: start\n", __func__);
+
+ /*
+ * figure out how many elements the cdm_array needs.
+ */
+ for (i = 0; i < strlen(debug_array_string); i++)
+ if (debug_array_string[i] == '\n')
+ cdm_element_count++;
+
+ if (!cdm_element_count) {
+ pr_info("No elements in client debug array string!\n");
+ goto out;
+ }
+
+ cdm_array =
+ kzalloc(cdm_element_count * sizeof(struct client_debug_mask),
+ GFP_KERNEL);
+ if (!cdm_array) {
+ pr_info("malloc failed for cdm_array!\n");
+ rc = -ENOMEM;
+ goto out;
+ }
+
+ cds_head = debug_array_string;
+
+ for (i = 0; i < cdm_element_count; i++) {
+ cds_delimiter = strchr(cds_head, '\n');
+ *cds_delimiter = '\0';
+
+ keyword_len = strcspn(cds_head, " ");
+
+ cdm_array[i].keyword = kzalloc(keyword_len + 1, GFP_KERNEL);
+ if (!cdm_array[i].keyword) {
+ rc = -ENOMEM;
+ goto out;
+ }
+
+ sscanf(cds_head,
+ "%s %llx %llx",
+ cdm_array[i].keyword,
+ (unsigned long long *)&(cdm_array[i].mask1),
+ (unsigned long long *)&(cdm_array[i].mask2));
+
+ if (!strcmp(cdm_array[i].keyword, ORANGEFS_VERBOSE))
+ client_verbose_index = i;
+
+ if (!strcmp(cdm_array[i].keyword, ORANGEFS_ALL))
+ client_all_index = i;
+
+ cds_head = cds_delimiter + 1;
+ }
+
+ rc = cdm_element_count;
+
+ gossip_debug(GOSSIP_UTILS_DEBUG, "%s: rc:%d:\n", __func__, rc);
+
+out:
+
+ return rc;
+
+}
+
+/*
+ * /sys/kernel/debug/orangefs/debug-help can be catted to
+ * see all the available kernel and client debug keywords.
+ *
+ * When the kernel boots, we have no idea what keywords the
+ * client supports, nor their associated masks.
+ *
+ * We pass through this function once at boot and stamp a
+ * boilerplate "we don't know" message for the client in the
+ * debug-help file. We pass through here again when the client
+ * starts and then we can fill out the debug-help file fully.
+ *
+ * The client might be restarted any number of times between
+ * reboots, we only build the debug-help file the first time.
+ */
+int orangefs_prepare_debugfs_help_string(int at_boot)
+{
+ int rc = -EINVAL;
+ int i;
+ int byte_count = 0;
+ char *client_title = "Client Debug Keywords:\n";
+ char *kernel_title = "Kernel Debug Keywords:\n";
+
+ gossip_debug(GOSSIP_UTILS_DEBUG, "%s: start\n", __func__);
+
+ if (at_boot) {
+ byte_count += strlen(HELP_STRING_UNINITIALIZED);
+ client_title = HELP_STRING_UNINITIALIZED;
+ } else {
+ /*
+ * fill the client keyword/mask array and remember
+ * how many elements there were.
+ */
+ cdm_element_count =
+ orangefs_prepare_cdm_array(client_debug_array_string);
+ if (cdm_element_count <= 0)
+ goto out;
+
+ /* Count the bytes destined for debug_help_string. */
+ byte_count += strlen(client_title);
+
+ for (i = 0; i < cdm_element_count; i++) {
+ byte_count += strlen(cdm_array[i].keyword + 2);
+ if (byte_count >= DEBUG_HELP_STRING_SIZE) {
+ pr_info("%s: overflow 1!\n", __func__);
+ goto out;
+ }
+ }
+
+ gossip_debug(GOSSIP_UTILS_DEBUG,
+ "%s: cdm_element_count:%d:\n",
+ __func__,
+ cdm_element_count);
+ }
+
+ byte_count += strlen(kernel_title);
+ for (i = 0; i < num_kmod_keyword_mask_map; i++) {
+ byte_count +=
+ strlen(s_kmod_keyword_mask_map[i].keyword + 2);
+ if (byte_count >= DEBUG_HELP_STRING_SIZE) {
+ pr_info("%s: overflow 2!\n", __func__);
+ goto out;
+ }
+ }
+
+ /* build debug_help_string. */
+ debug_help_string = kzalloc(DEBUG_HELP_STRING_SIZE, GFP_KERNEL);
+ if (!debug_help_string) {
+ rc = -ENOMEM;
+ goto out;
+ }
+
+ strcat(debug_help_string, client_title);
+
+ if (!at_boot) {
+ for (i = 0; i < cdm_element_count; i++) {
+ strcat(debug_help_string, "\t");
+ strcat(debug_help_string, cdm_array[i].keyword);
+ strcat(debug_help_string, "\n");
+ }
+ }
+
+ strcat(debug_help_string, "\n");
+ strcat(debug_help_string, kernel_title);
+
+ for (i = 0; i < num_kmod_keyword_mask_map; i++) {
+ strcat(debug_help_string, "\t");
+ strcat(debug_help_string, s_kmod_keyword_mask_map[i].keyword);
+ strcat(debug_help_string, "\n");
+ }
+
+ rc = 0;
+
+out:
+
+ return rc;
+
+}
+
+/*
+ * kernel = type 0
+ * client = type 1
+ */
+static void debug_mask_to_string(void *mask, int type)
+{
+ int i;
+ int len = 0;
+ char *debug_string;
+ int element_count = 0;
+
+ gossip_debug(GOSSIP_UTILS_DEBUG, "%s: start\n", __func__);
+
+ if (type) {
+ debug_string = client_debug_string;
+ element_count = cdm_element_count;
+ } else {
+ debug_string = kernel_debug_string;
+ element_count = num_kmod_keyword_mask_map;
+ }
+
+ memset(debug_string, 0, ORANGEFS_MAX_DEBUG_STRING_LEN);
+
+ /*
+ * Some keywords, like "all" or "verbose", are amalgams of
+ * numerous other keywords. Make a special check for those
+ * before grinding through the whole mask only to find out
+ * later...
+ */
+ if (check_amalgam_keyword(mask, type))
+ goto out;
+
+ /* Build the debug string. */
+ for (i = 0; i < element_count; i++)
+ if (type)
+ do_c_string(mask, i);
+ else
+ do_k_string(mask, i);
+
+ len = strlen(debug_string);
+
+ if ((len) && (type))
+ client_debug_string[len - 1] = '\0';
+ else if (len)
+ kernel_debug_string[len - 1] = '\0';
+ else if (type)
+ strcpy(client_debug_string, "none");
+ else
+ strcpy(kernel_debug_string, "none");
+
+out:
+gossip_debug(GOSSIP_UTILS_DEBUG, "%s: string:%s:\n", __func__, debug_string);
+
+ return;
+
+}
+
+static void do_k_string(void *k_mask, int index)
+{
+ __u64 *mask = (__u64 *) k_mask;
+
+ if (keyword_is_amalgam((char *) s_kmod_keyword_mask_map[index].keyword))
+ goto out;
+
+ if (*mask & s_kmod_keyword_mask_map[index].mask_val) {
+ if ((strlen(kernel_debug_string) +
+ strlen(s_kmod_keyword_mask_map[index].keyword))
+ < ORANGEFS_MAX_DEBUG_STRING_LEN - 1) {
+ strcat(kernel_debug_string,
+ s_kmod_keyword_mask_map[index].keyword);
+ strcat(kernel_debug_string, ",");
+ } else {
+ gossip_err("%s: overflow!\n", __func__);
+ strcpy(kernel_debug_string, ORANGEFS_ALL);
+ goto out;
+ }
+ }
+
+out:
+
+ return;
+}
+
+static void do_c_string(void *c_mask, int index)
+{
+ struct client_debug_mask *mask = (struct client_debug_mask *) c_mask;
+
+ if (keyword_is_amalgam(cdm_array[index].keyword))
+ goto out;
+
+ if ((mask->mask1 & cdm_array[index].mask1) ||
+ (mask->mask2 & cdm_array[index].mask2)) {
+ if ((strlen(client_debug_string) +
+ strlen(cdm_array[index].keyword) + 1)
+ < ORANGEFS_MAX_DEBUG_STRING_LEN - 2) {
+ strcat(client_debug_string,
+ cdm_array[index].keyword);
+ strcat(client_debug_string, ",");
+ } else {
+ gossip_err("%s: overflow!\n", __func__);
+ strcpy(client_debug_string, ORANGEFS_ALL);
+ goto out;
+ }
+ }
+out:
+ return;
+}
+
+static int keyword_is_amalgam(char *keyword)
+{
+ int rc = 0;
+
+ if ((!strcmp(keyword, ORANGEFS_ALL)) || (!strcmp(keyword, ORANGEFS_VERBOSE)))
+ rc = 1;
+
+ return rc;
+}
+
+/*
+ * kernel = type 0
+ * client = type 1
+ *
+ * return 1 if we found an amalgam.
+ */
+static int check_amalgam_keyword(void *mask, int type)
+{
+ __u64 *k_mask;
+ struct client_debug_mask *c_mask;
+ int k_all_index = num_kmod_keyword_mask_map - 1;
+ int rc = 0;
+
+ if (type) {
+ c_mask = (struct client_debug_mask *) mask;
+
+ if ((c_mask->mask1 == cdm_array[client_all_index].mask1) &&
+ (c_mask->mask2 == cdm_array[client_all_index].mask2)) {
+ strcpy(client_debug_string, ORANGEFS_ALL);
+ rc = 1;
+ goto out;
+ }
+
+ if ((c_mask->mask1 == cdm_array[client_verbose_index].mask1) &&
+ (c_mask->mask2 == cdm_array[client_verbose_index].mask2)) {
+ strcpy(client_debug_string, ORANGEFS_VERBOSE);
+ rc = 1;
+ goto out;
+ }
+
+ } else {
+ k_mask = (__u64 *) mask;
+
+ if (*k_mask >= s_kmod_keyword_mask_map[k_all_index].mask_val) {
+ strcpy(kernel_debug_string, ORANGEFS_ALL);
+ rc = 1;
+ goto out;
+ }
+ }
+
+out:
+
+ return rc;
+}
+
+/*
+ * kernel = type 0
+ * client = type 1
+ */
+static void debug_string_to_mask(char *debug_string, void *mask, int type)
+{
+ char *unchecked_keyword;
+ int i;
+ char *strsep_fodder = kstrdup(debug_string, GFP_KERNEL);
+ char *original_pointer;
+ int element_count = 0;
+ struct client_debug_mask *c_mask = NULL;
+ __u64 *k_mask = NULL;
+
+ gossip_debug(GOSSIP_UTILS_DEBUG, "%s: start\n", __func__);
+
+ if (type) {
+ c_mask = (struct client_debug_mask *)mask;
+ element_count = cdm_element_count;
+ } else {
+ k_mask = (__u64 *)mask;
+ *k_mask = 0;
+ element_count = num_kmod_keyword_mask_map;
+ }
+
+ original_pointer = strsep_fodder;
+ while ((unchecked_keyword = strsep(&strsep_fodder, ",")))
+ if (strlen(unchecked_keyword)) {
+ for (i = 0; i < element_count; i++)
+ if (type)
+ do_c_mask(i,
+ unchecked_keyword,
+ &c_mask);
+ else
+ do_k_mask(i,
+ unchecked_keyword,
+ &k_mask);
+ }
+
+ kfree(original_pointer);
+}
+
+static void do_c_mask(int i, char *unchecked_keyword,
+ struct client_debug_mask **sane_mask)
+{
+
+ if (!strcmp(cdm_array[i].keyword, unchecked_keyword)) {
+ (**sane_mask).mask1 = (**sane_mask).mask1 | cdm_array[i].mask1;
+ (**sane_mask).mask2 = (**sane_mask).mask2 | cdm_array[i].mask2;
+ }
+}
+
+static void do_k_mask(int i, char *unchecked_keyword, __u64 **sane_mask)
+{
+
+ if (!strcmp(s_kmod_keyword_mask_map[i].keyword, unchecked_keyword))
+ **sane_mask = (**sane_mask) |
+ s_kmod_keyword_mask_map[i].mask_val;
+}
+
+int orangefs_debugfs_new_client_mask(void __user *arg)
+{
+ struct dev_mask2_info_s mask2_info = {0};
+ int ret;
+
+ ret = copy_from_user(&mask2_info,
+ (void __user *)arg,
+ sizeof(struct dev_mask2_info_s));
+
+ if (ret != 0)
+ return -EIO;
+
+ client_debug_mask.mask1 = mask2_info.mask1_value;
+ client_debug_mask.mask2 = mask2_info.mask2_value;
+
+ pr_info("%s: client debug mask has been been received "
+ ":%llx: :%llx:\n",
+ __func__,
+ (unsigned long long)client_debug_mask.mask1,
+ (unsigned long long)client_debug_mask.mask2);
+
+ return ret;
+}
+
+int orangefs_debugfs_new_client_string(void __user *arg)
+{
+ int ret;
+
+ ret = copy_from_user(&client_debug_array_string,
+ (void __user *)arg,
+ ORANGEFS_MAX_DEBUG_STRING_LEN);
+ if (ret != 0)
+ return -EIO;
+
+ /*
+ * The real client-core makes an effort to ensure
+ * that actual strings that aren't too long to fit in
+ * this buffer is what we get here. We're going to use
+ * string functions on the stuff we got, so we'll make
+ * this extra effort to try and keep from
+ * flowing out of this buffer when we use the string
+ * functions, even if somehow the stuff we end up
+ * with here is garbage.
+ */
+ client_debug_array_string[ORANGEFS_MAX_DEBUG_STRING_LEN - 1] =
+ '\0';
+
+ if (ret != 0) {
+ pr_info("%s: CLIENT_STRING: copy_from_user failed\n",
+ __func__);
+ return -EIO;
+ }
+
+ pr_info("%s: client debug array string has been received.\n",
+ __func__);
+
+ if (!help_string_initialized) {
+
+ /* Free the "we don't know yet" default string... */
+ kfree(debug_help_string);
+
+ /* build a proper debug help string */
+ if (orangefs_prepare_debugfs_help_string(0)) {
+ gossip_err("%s: no debug help string \n",
+ __func__);
+ return -EIO;
+ }
+
+ /* Replace the boilerplate boot-time debug-help file. */
+ debugfs_remove(help_file_dentry);
+
+ help_file_dentry =
+ debugfs_create_file(
+ ORANGEFS_KMOD_DEBUG_HELP_FILE,
+ 0444,
+ debug_dir,
+ debug_help_string,
+ &debug_help_fops);
+
+ if (!help_file_dentry) {
+ gossip_err("%s: debugfs_create_file failed for"
+ " :%s:!\n",
+ __func__,
+ ORANGEFS_KMOD_DEBUG_HELP_FILE);
+ return -EIO;
+ }
+ }
+
+ debug_mask_to_string(&client_debug_mask, 1);
+
+ debugfs_remove(client_debug_dentry);
+
+ orangefs_client_debug_init();
+
+ help_string_initialized++;
+
+ return ret;
+}
+
+int orangefs_debugfs_new_debug(void __user *arg)
+{
+ struct dev_mask_info_s mask_info = {0};
+ int ret;
+
+ ret = copy_from_user(&mask_info,
+ (void __user *)arg,
+ sizeof(mask_info));
+
+ if (ret != 0)
+ return -EIO;
+
+ if (mask_info.mask_type == KERNEL_MASK) {
+ if ((mask_info.mask_value == 0)
+ && (kernel_mask_set_mod_init)) {
+ /*
+ * the kernel debug mask was set when the
+ * kernel module was loaded; don't override
+ * it if the client-core was started without
+ * a value for ORANGEFS_KMODMASK.
+ */
+ return 0;
+ }
+ debug_mask_to_string(&mask_info.mask_value,
+ mask_info.mask_type);
+ orangefs_gossip_debug_mask = mask_info.mask_value;
+ pr_info("%s: kernel debug mask has been modified to "
+ ":%s: :%llx:\n",
+ __func__,
+ kernel_debug_string,
+ (unsigned long long)orangefs_gossip_debug_mask);
+ } else if (mask_info.mask_type == CLIENT_MASK) {
+ debug_mask_to_string(&mask_info.mask_value,
+ mask_info.mask_type);
+ pr_info("%s: client debug mask has been modified to"
+ ":%s: :%llx:\n",
+ __func__,
+ client_debug_string,
+ llu(mask_info.mask_value));
+ } else {
+ gossip_lerr("Invalid mask type....\n");
+ return -EINVAL;
+ }
+
+ return ret;
+}
diff --git a/fs/orangefs/orangefs-debugfs.h b/fs/orangefs/orangefs-debugfs.h
index e4828c0e3ef9..803517269ba6 100644
--- a/fs/orangefs/orangefs-debugfs.h
+++ b/fs/orangefs/orangefs-debugfs.h
@@ -1,3 +1,7 @@
-int orangefs_debugfs_init(void);
-int orangefs_kernel_debug_init(void);
+int orangefs_debugfs_init(int);
void orangefs_debugfs_cleanup(void);
+int orangefs_client_debug_init(void);
+int orangefs_prepare_debugfs_help_string(int);
+int orangefs_debugfs_new_client_mask(void __user *);
+int orangefs_debugfs_new_client_string(void __user *);
+int orangefs_debugfs_new_debug(void __user *);
diff --git a/fs/orangefs/orangefs-dev-proto.h b/fs/orangefs/orangefs-dev-proto.h
index 9eac9d9a3f3a..a3d84ffee905 100644
--- a/fs/orangefs/orangefs-dev-proto.h
+++ b/fs/orangefs/orangefs-dev-proto.h
@@ -28,7 +28,7 @@
#define ORANGEFS_VFS_OP_RENAME 0xFF00000A
#define ORANGEFS_VFS_OP_STATFS 0xFF00000B
#define ORANGEFS_VFS_OP_TRUNCATE 0xFF00000C
-#define ORANGEFS_VFS_OP_MMAP_RA_FLUSH 0xFF00000D
+#define ORANGEFS_VFS_OP_RA_FLUSH 0xFF00000D
#define ORANGEFS_VFS_OP_FS_MOUNT 0xFF00000E
#define ORANGEFS_VFS_OP_FS_UMOUNT 0xFF00000F
#define ORANGEFS_VFS_OP_GETXATTR 0xFF000010
@@ -41,6 +41,10 @@
#define ORANGEFS_VFS_OP_FSYNC 0xFF00EE01
#define ORANGEFS_VFS_OP_FSKEY 0xFF00EE02
#define ORANGEFS_VFS_OP_READDIRPLUS 0xFF00EE03
+#define ORANGEFS_VFS_OP_FEATURES 0xFF00EE05 /* 2.9.6 */
+
+/* features is a 64-bit unsigned bitmask */
+#define ORANGEFS_FEATURE_READAHEAD 1
/*
* Misc constants. Please retain them as multiples of 8!
diff --git a/fs/orangefs/orangefs-kernel.h b/fs/orangefs/orangefs-kernel.h
index 633c07a6e3d8..0a82048f3aaf 100644
--- a/fs/orangefs/orangefs-kernel.h
+++ b/fs/orangefs/orangefs-kernel.h
@@ -100,16 +100,6 @@ enum orangefs_vfs_op_states {
};
/*
- * An array of client_debug_mask will be built to hold debug keyword/mask
- * values fetched from userspace.
- */
-struct client_debug_mask {
- char *keyword;
- __u64 mask1;
- __u64 mask2;
-};
-
-/*
* orangefs kernel memory related flags
*/
@@ -119,29 +109,6 @@ struct client_debug_mask {
#define ORANGEFS_CACHE_CREATE_FLAGS 0
#endif /* ((defined ORANGEFS_KERNEL_DEBUG) && (defined CONFIG_DEBUG_SLAB)) */
-/* these functions are defined in orangefs-utils.c */
-int orangefs_prepare_cdm_array(char *debug_array_string);
-int orangefs_prepare_debugfs_help_string(int);
-
-/* defined in orangefs-debugfs.c */
-int orangefs_client_debug_init(void);
-
-void debug_string_to_mask(char *, void *, int);
-void do_c_mask(int, char *, struct client_debug_mask **);
-void do_k_mask(int, char *, __u64 **);
-
-void debug_mask_to_string(void *, int);
-void do_k_string(void *, int);
-void do_c_string(void *, int);
-int check_amalgam_keyword(void *, int);
-int keyword_is_amalgam(char *);
-
-/*these variables are defined in orangefs-mod.c */
-extern char kernel_debug_string[ORANGEFS_MAX_DEBUG_STRING_LEN];
-extern char client_debug_string[ORANGEFS_MAX_DEBUG_STRING_LEN];
-extern char client_debug_array_string[ORANGEFS_MAX_DEBUG_STRING_LEN];
-extern unsigned int kernel_mask_set_mod_init;
-
extern int orangefs_init_acl(struct inode *inode, struct inode *dir);
extern const struct xattr_handler *orangefs_xattr_handlers[];
@@ -331,7 +298,7 @@ struct orangefs_stats {
unsigned long writes;
};
-extern struct orangefs_stats g_orangefs_stats;
+extern struct orangefs_stats orangefs_stats;
/*
* NOTE: See Documentation/filesystems/porting for information
@@ -447,6 +414,8 @@ void purge_waiting_ops(void);
/*
* defined in super.c
*/
+extern uint64_t orangefs_features;
+
struct dentry *orangefs_mount(struct file_system_type *fst,
int flags,
const char *devname,
@@ -506,6 +475,8 @@ ssize_t orangefs_inode_read(struct inode *inode,
/*
* defined in devorangefs-req.c
*/
+extern uint32_t orangefs_userspace_version;
+
int orangefs_dev_init(void);
void orangefs_dev_cleanup(void);
int is_daemon_in_service(void);
@@ -543,20 +514,18 @@ bool orangefs_cancel_op_in_progress(struct orangefs_kernel_op_s *op);
int orangefs_normalize_to_errno(__s32 error_code);
-extern struct mutex devreq_mutex;
-extern struct mutex request_mutex;
-extern int debug;
+extern struct mutex orangefs_request_mutex;
extern int op_timeout_secs;
extern int slot_timeout_secs;
-extern int dcache_timeout_msecs;
-extern int getattr_timeout_msecs;
+extern int orangefs_dcache_timeout_msecs;
+extern int orangefs_getattr_timeout_msecs;
extern struct list_head orangefs_superblocks;
extern spinlock_t orangefs_superblocks_lock;
extern struct list_head orangefs_request_list;
extern spinlock_t orangefs_request_list_lock;
extern wait_queue_head_t orangefs_request_list_waitq;
-extern struct list_head *htable_ops_in_progress;
-extern spinlock_t htable_ops_in_progress_lock;
+extern struct list_head *orangefs_htable_ops_in_progress;
+extern spinlock_t orangefs_htable_ops_in_progress_lock;
extern int hash_table_size;
extern const struct address_space_operations orangefs_address_operations;
diff --git a/fs/orangefs/orangefs-mod.c b/fs/orangefs/orangefs-mod.c
index e9fd5755c05f..2e5b03065f34 100644
--- a/fs/orangefs/orangefs-mod.c
+++ b/fs/orangefs/orangefs-mod.c
@@ -21,34 +21,17 @@
* global variables declared here
*/
-/* array of client debug keyword/mask values */
-struct client_debug_mask *cdm_array;
-int cdm_element_count;
-
-char kernel_debug_string[ORANGEFS_MAX_DEBUG_STRING_LEN] = "none";
-char client_debug_string[ORANGEFS_MAX_DEBUG_STRING_LEN];
-char client_debug_array_string[ORANGEFS_MAX_DEBUG_STRING_LEN];
-
-char *debug_help_string;
-int help_string_initialized;
-struct dentry *help_file_dentry;
-struct dentry *client_debug_dentry;
-struct dentry *debug_dir;
-int client_verbose_index;
-int client_all_index;
-struct orangefs_stats g_orangefs_stats;
+struct orangefs_stats orangefs_stats;
/* the size of the hash tables for ops in progress */
int hash_table_size = 509;
static ulong module_parm_debug_mask;
-__u64 gossip_debug_mask;
-struct client_debug_mask client_debug_mask = { NULL, 0, 0 };
-unsigned int kernel_mask_set_mod_init; /* implicitly false */
+__u64 orangefs_gossip_debug_mask;
int op_timeout_secs = ORANGEFS_DEFAULT_OP_TIMEOUT_SECS;
int slot_timeout_secs = ORANGEFS_DEFAULT_SLOT_TIMEOUT_SECS;
-int dcache_timeout_msecs = 50;
-int getattr_timeout_msecs = 50;
+int orangefs_dcache_timeout_msecs = 50;
+int orangefs_getattr_timeout_msecs = 50;
MODULE_LICENSE("GPL");
MODULE_AUTHOR("ORANGEFS Development Team");
@@ -71,20 +54,17 @@ module_param(module_parm_debug_mask, ulong, 0644);
module_param(op_timeout_secs, int, 0);
module_param(slot_timeout_secs, int, 0);
-/* synchronizes the request device file */
-DEFINE_MUTEX(devreq_mutex);
-
/*
* Blocks non-priority requests from being queued for servicing. This
* could be used for protecting the request list data structure, but
* for now it's only being used to stall the op addition to the request
* list
*/
-DEFINE_MUTEX(request_mutex);
+DEFINE_MUTEX(orangefs_request_mutex);
/* hash table for storing operations waiting for matching downcall */
-struct list_head *htable_ops_in_progress;
-DEFINE_SPINLOCK(htable_ops_in_progress_lock);
+struct list_head *orangefs_htable_ops_in_progress;
+DEFINE_SPINLOCK(orangefs_htable_ops_in_progress_lock);
/* list for queueing upcall operations */
LIST_HEAD(orangefs_request_list);
@@ -100,32 +80,6 @@ static int __init orangefs_init(void)
int ret = -1;
__u32 i = 0;
- /* convert input debug mask to a 64-bit unsigned integer */
- gossip_debug_mask = (unsigned long long) module_parm_debug_mask;
-
- /*
- * set the kernel's gossip debug string; invalid mask values will
- * be ignored.
- */
- debug_mask_to_string(&gossip_debug_mask, 0);
-
- /* remove any invalid values from the mask */
- debug_string_to_mask(kernel_debug_string, &gossip_debug_mask, 0);
-
- /*
- * if the mask has a non-zero value, then indicate that the mask
- * was set when the kernel module was loaded. The orangefs dev ioctl
- * command will look at this boolean to determine if the kernel's
- * debug mask should be overwritten when the client-core is started.
- */
- if (gossip_debug_mask != 0)
- kernel_mask_set_mod_init = true;
-
- pr_info("%s: called with debug mask: :%s: :%llx:\n",
- __func__,
- kernel_debug_string,
- (unsigned long long)gossip_debug_mask);
-
ret = bdi_init(&orangefs_backing_dev_info);
if (ret)
@@ -146,9 +100,9 @@ static int __init orangefs_init(void)
if (ret < 0)
goto cleanup_op;
- htable_ops_in_progress =
+ orangefs_htable_ops_in_progress =
kcalloc(hash_table_size, sizeof(struct list_head), GFP_KERNEL);
- if (!htable_ops_in_progress) {
+ if (!orangefs_htable_ops_in_progress) {
gossip_err("Failed to initialize op hashtable");
ret = -ENOMEM;
goto cleanup_inode;
@@ -156,7 +110,7 @@ static int __init orangefs_init(void)
/* initialize a doubly linked at each hash table index */
for (i = 0; i < hash_table_size; i++)
- INIT_LIST_HEAD(&htable_ops_in_progress[i]);
+ INIT_LIST_HEAD(&orangefs_htable_ops_in_progress[i]);
ret = fsid_key_table_initialize();
if (ret < 0)
@@ -179,14 +133,10 @@ static int __init orangefs_init(void)
if (ret)
goto cleanup_key_table;
- ret = orangefs_debugfs_init();
+ ret = orangefs_debugfs_init(module_parm_debug_mask);
if (ret)
goto debugfs_init_failed;
- ret = orangefs_kernel_debug_init();
- if (ret)
- goto kernel_debug_init_failed;
-
ret = orangefs_sysfs_init();
if (ret)
goto sysfs_init_failed;
@@ -214,8 +164,6 @@ cleanup_device:
sysfs_init_failed:
-kernel_debug_init_failed:
-
debugfs_init_failed:
orangefs_debugfs_cleanup();
@@ -223,7 +171,7 @@ cleanup_key_table:
fsid_key_table_finalize();
cleanup_progress_table:
- kfree(htable_ops_in_progress);
+ kfree(orangefs_htable_ops_in_progress);
cleanup_inode:
orangefs_inode_cache_finalize();
@@ -250,12 +198,12 @@ static void __exit orangefs_exit(void)
orangefs_dev_cleanup();
BUG_ON(!list_empty(&orangefs_request_list));
for (i = 0; i < hash_table_size; i++)
- BUG_ON(!list_empty(&htable_ops_in_progress[i]));
+ BUG_ON(!list_empty(&orangefs_htable_ops_in_progress[i]));
orangefs_inode_cache_finalize();
op_cache_finalize();
- kfree(htable_ops_in_progress);
+ kfree(orangefs_htable_ops_in_progress);
bdi_destroy(&orangefs_backing_dev_info);
@@ -274,10 +222,10 @@ void purge_inprogress_ops(void)
struct orangefs_kernel_op_s *op;
struct orangefs_kernel_op_s *next;
- spin_lock(&htable_ops_in_progress_lock);
+ spin_lock(&orangefs_htable_ops_in_progress_lock);
list_for_each_entry_safe(op,
next,
- &htable_ops_in_progress[i],
+ &orangefs_htable_ops_in_progress[i],
list) {
set_op_state_purged(op);
gossip_debug(GOSSIP_DEV_DEBUG,
@@ -287,7 +235,7 @@ void purge_inprogress_ops(void)
op->op_state,
current->comm);
}
- spin_unlock(&htable_ops_in_progress_lock);
+ spin_unlock(&orangefs_htable_ops_in_progress_lock);
}
}
diff --git a/fs/orangefs/orangefs-sysfs.c b/fs/orangefs/orangefs-sysfs.c
index 375708c2db87..a799546a67f7 100644
--- a/fs/orangefs/orangefs-sysfs.c
+++ b/fs/orangefs/orangefs-sysfs.c
@@ -73,6 +73,24 @@
* Description:
* Time getattr is valid in milliseconds.
*
+ * What: /sys/fs/orangefs/readahead_count
+ * Date: Aug 2016
+ * Contact: Martin Brandenburg <martin@omnibond.com>
+ * Description:
+ * Readahead cache buffer count.
+ *
+ * What: /sys/fs/orangefs/readahead_size
+ * Date: Aug 2016
+ * Contact: Martin Brandenburg <martin@omnibond.com>
+ * Description:
+ * Readahead cache buffer size.
+ *
+ * What: /sys/fs/orangefs/readahead_count_size
+ * Date: Aug 2016
+ * Contact: Martin Brandenburg <martin@omnibond.com>
+ * Description:
+ * Readahead cache buffer count and size.
+ *
* What: /sys/fs/orangefs/acache/...
* Date: Jun 2015
* Contact: Martin Brandenburg <martin@omnibond.com>
@@ -121,159 +139,34 @@
#define PC_KOBJ_ID "pc"
#define STATS_KOBJ_ID "stats"
-struct orangefs_obj {
- struct kobject kobj;
- int op_timeout_secs;
- int perf_counter_reset;
- int perf_history_size;
- int perf_time_interval_secs;
- int slot_timeout_secs;
- int dcache_timeout_msecs;
- int getattr_timeout_msecs;
-};
-
-struct acache_orangefs_obj {
- struct kobject kobj;
- int hard_limit;
- int reclaim_percentage;
- int soft_limit;
- int timeout_msecs;
-};
-
-struct capcache_orangefs_obj {
- struct kobject kobj;
- int hard_limit;
- int reclaim_percentage;
- int soft_limit;
- int timeout_secs;
-};
-
-struct ccache_orangefs_obj {
- struct kobject kobj;
- int hard_limit;
- int reclaim_percentage;
- int soft_limit;
- int timeout_secs;
-};
-
-struct ncache_orangefs_obj {
- struct kobject kobj;
- int hard_limit;
- int reclaim_percentage;
- int soft_limit;
- int timeout_msecs;
-};
-
-struct pc_orangefs_obj {
- struct kobject kobj;
- char *acache;
- char *capcache;
- char *ncache;
-};
-
-struct stats_orangefs_obj {
- struct kobject kobj;
- int reads;
- int writes;
-};
+/*
+ * Every item calls orangefs_attr_show and orangefs_attr_store through
+ * orangefs_sysfs_ops. They look at the orangefs_attributes further below to
+ * call one of sysfs_int_show, sysfs_int_store, sysfs_service_op_show, or
+ * sysfs_service_op_store.
+ */
struct orangefs_attribute {
struct attribute attr;
- ssize_t (*show)(struct orangefs_obj *orangefs_obj,
+ ssize_t (*show)(struct kobject *kobj,
struct orangefs_attribute *attr,
char *buf);
- ssize_t (*store)(struct orangefs_obj *orangefs_obj,
+ ssize_t (*store)(struct kobject *kobj,
struct orangefs_attribute *attr,
const char *buf,
size_t count);
};
-struct acache_orangefs_attribute {
- struct attribute attr;
- ssize_t (*show)(struct acache_orangefs_obj *acache_orangefs_obj,
- struct acache_orangefs_attribute *attr,
- char *buf);
- ssize_t (*store)(struct acache_orangefs_obj *acache_orangefs_obj,
- struct acache_orangefs_attribute *attr,
- const char *buf,
- size_t count);
-};
-
-struct capcache_orangefs_attribute {
- struct attribute attr;
- ssize_t (*show)(struct capcache_orangefs_obj *capcache_orangefs_obj,
- struct capcache_orangefs_attribute *attr,
- char *buf);
- ssize_t (*store)(struct capcache_orangefs_obj *capcache_orangefs_obj,
- struct capcache_orangefs_attribute *attr,
- const char *buf,
- size_t count);
-};
-
-struct ccache_orangefs_attribute {
- struct attribute attr;
- ssize_t (*show)(struct ccache_orangefs_obj *ccache_orangefs_obj,
- struct ccache_orangefs_attribute *attr,
- char *buf);
- ssize_t (*store)(struct ccache_orangefs_obj *ccache_orangefs_obj,
- struct ccache_orangefs_attribute *attr,
- const char *buf,
- size_t count);
-};
-
-struct ncache_orangefs_attribute {
- struct attribute attr;
- ssize_t (*show)(struct ncache_orangefs_obj *ncache_orangefs_obj,
- struct ncache_orangefs_attribute *attr,
- char *buf);
- ssize_t (*store)(struct ncache_orangefs_obj *ncache_orangefs_obj,
- struct ncache_orangefs_attribute *attr,
- const char *buf,
- size_t count);
-};
-
-struct pc_orangefs_attribute {
- struct attribute attr;
- ssize_t (*show)(struct pc_orangefs_obj *pc_orangefs_obj,
- struct pc_orangefs_attribute *attr,
- char *buf);
- ssize_t (*store)(struct pc_orangefs_obj *pc_orangefs_obj,
- struct pc_orangefs_attribute *attr,
- const char *buf,
- size_t count);
-};
-
-struct stats_orangefs_attribute {
- struct attribute attr;
- ssize_t (*show)(struct stats_orangefs_obj *stats_orangefs_obj,
- struct stats_orangefs_attribute *attr,
- char *buf);
- ssize_t (*store)(struct stats_orangefs_obj *stats_orangefs_obj,
- struct stats_orangefs_attribute *attr,
- const char *buf,
- size_t count);
-};
-
static ssize_t orangefs_attr_show(struct kobject *kobj,
struct attribute *attr,
char *buf)
{
struct orangefs_attribute *attribute;
- struct orangefs_obj *orangefs_obj;
- int rc;
attribute = container_of(attr, struct orangefs_attribute, attr);
- orangefs_obj = container_of(kobj, struct orangefs_obj, kobj);
-
- if (!attribute->show) {
- rc = -EIO;
- goto out;
- }
-
- rc = attribute->show(orangefs_obj, attribute, buf);
-
-out:
- return rc;
+ if (!attribute->show)
+ return -EIO;
+ return attribute->show(kobj, attribute, buf);
}
static ssize_t orangefs_attr_store(struct kobject *kobj,
@@ -282,24 +175,15 @@ static ssize_t orangefs_attr_store(struct kobject *kobj,
size_t len)
{
struct orangefs_attribute *attribute;
- struct orangefs_obj *orangefs_obj;
- int rc;
- gossip_debug(GOSSIP_SYSFS_DEBUG,
- "orangefs_attr_store: start\n");
+ if (!strcmp(kobj->name, PC_KOBJ_ID) ||
+ !strcmp(kobj->name, STATS_KOBJ_ID))
+ return -EPERM;
attribute = container_of(attr, struct orangefs_attribute, attr);
- orangefs_obj = container_of(kobj, struct orangefs_obj, kobj);
-
- if (!attribute->store) {
- rc = -EIO;
- goto out;
- }
-
- rc = attribute->store(orangefs_obj, attribute, buf, len);
-
-out:
- return rc;
+ if (!attribute->store)
+ return -EIO;
+ return attribute->store(kobj, attribute, buf, len);
}
static const struct sysfs_ops orangefs_sysfs_ops = {
@@ -307,402 +191,58 @@ static const struct sysfs_ops orangefs_sysfs_ops = {
.store = orangefs_attr_store,
};
-static ssize_t acache_orangefs_attr_show(struct kobject *kobj,
- struct attribute *attr,
- char *buf)
-{
- struct acache_orangefs_attribute *attribute;
- struct acache_orangefs_obj *acache_orangefs_obj;
- int rc;
-
- attribute = container_of(attr, struct acache_orangefs_attribute, attr);
- acache_orangefs_obj =
- container_of(kobj, struct acache_orangefs_obj, kobj);
-
- if (!attribute->show) {
- rc = -EIO;
- goto out;
- }
-
- rc = attribute->show(acache_orangefs_obj, attribute, buf);
-
-out:
- return rc;
-}
-
-static ssize_t acache_orangefs_attr_store(struct kobject *kobj,
- struct attribute *attr,
- const char *buf,
- size_t len)
-{
- struct acache_orangefs_attribute *attribute;
- struct acache_orangefs_obj *acache_orangefs_obj;
- int rc;
-
- gossip_debug(GOSSIP_SYSFS_DEBUG,
- "acache_orangefs_attr_store: start\n");
-
- attribute = container_of(attr, struct acache_orangefs_attribute, attr);
- acache_orangefs_obj =
- container_of(kobj, struct acache_orangefs_obj, kobj);
-
- if (!attribute->store) {
- rc = -EIO;
- goto out;
- }
-
- rc = attribute->store(acache_orangefs_obj, attribute, buf, len);
-
-out:
- return rc;
-}
-
-static const struct sysfs_ops acache_orangefs_sysfs_ops = {
- .show = acache_orangefs_attr_show,
- .store = acache_orangefs_attr_store,
-};
-
-static ssize_t capcache_orangefs_attr_show(struct kobject *kobj,
- struct attribute *attr,
- char *buf)
-{
- struct capcache_orangefs_attribute *attribute;
- struct capcache_orangefs_obj *capcache_orangefs_obj;
- int rc;
-
- attribute =
- container_of(attr, struct capcache_orangefs_attribute, attr);
- capcache_orangefs_obj =
- container_of(kobj, struct capcache_orangefs_obj, kobj);
-
- if (!attribute->show) {
- rc = -EIO;
- goto out;
- }
-
- rc = attribute->show(capcache_orangefs_obj, attribute, buf);
-
-out:
- return rc;
-}
-
-static ssize_t capcache_orangefs_attr_store(struct kobject *kobj,
- struct attribute *attr,
- const char *buf,
- size_t len)
-{
- struct capcache_orangefs_attribute *attribute;
- struct capcache_orangefs_obj *capcache_orangefs_obj;
- int rc;
-
- gossip_debug(GOSSIP_SYSFS_DEBUG,
- "capcache_orangefs_attr_store: start\n");
-
- attribute =
- container_of(attr, struct capcache_orangefs_attribute, attr);
- capcache_orangefs_obj =
- container_of(kobj, struct capcache_orangefs_obj, kobj);
-
- if (!attribute->store) {
- rc = -EIO;
- goto out;
- }
-
- rc = attribute->store(capcache_orangefs_obj, attribute, buf, len);
-
-out:
- return rc;
-}
-
-static const struct sysfs_ops capcache_orangefs_sysfs_ops = {
- .show = capcache_orangefs_attr_show,
- .store = capcache_orangefs_attr_store,
-};
-
-static ssize_t ccache_orangefs_attr_show(struct kobject *kobj,
- struct attribute *attr,
- char *buf)
-{
- struct ccache_orangefs_attribute *attribute;
- struct ccache_orangefs_obj *ccache_orangefs_obj;
- int rc;
-
- attribute =
- container_of(attr, struct ccache_orangefs_attribute, attr);
- ccache_orangefs_obj =
- container_of(kobj, struct ccache_orangefs_obj, kobj);
-
- if (!attribute->show) {
- rc = -EIO;
- goto out;
- }
-
- rc = attribute->show(ccache_orangefs_obj, attribute, buf);
-
-out:
- return rc;
-}
-
-static ssize_t ccache_orangefs_attr_store(struct kobject *kobj,
- struct attribute *attr,
- const char *buf,
- size_t len)
-{
- struct ccache_orangefs_attribute *attribute;
- struct ccache_orangefs_obj *ccache_orangefs_obj;
- int rc;
-
- gossip_debug(GOSSIP_SYSFS_DEBUG,
- "ccache_orangefs_attr_store: start\n");
-
- attribute =
- container_of(attr, struct ccache_orangefs_attribute, attr);
- ccache_orangefs_obj =
- container_of(kobj, struct ccache_orangefs_obj, kobj);
-
- if (!attribute->store) {
- rc = -EIO;
- goto out;
- }
-
- rc = attribute->store(ccache_orangefs_obj, attribute, buf, len);
-
-out:
- return rc;
-}
-
-static const struct sysfs_ops ccache_orangefs_sysfs_ops = {
- .show = ccache_orangefs_attr_show,
- .store = ccache_orangefs_attr_store,
-};
-
-static ssize_t ncache_orangefs_attr_show(struct kobject *kobj,
- struct attribute *attr,
- char *buf)
-{
- struct ncache_orangefs_attribute *attribute;
- struct ncache_orangefs_obj *ncache_orangefs_obj;
- int rc;
-
- attribute = container_of(attr, struct ncache_orangefs_attribute, attr);
- ncache_orangefs_obj =
- container_of(kobj, struct ncache_orangefs_obj, kobj);
-
- if (!attribute->show) {
- rc = -EIO;
- goto out;
- }
-
- rc = attribute->show(ncache_orangefs_obj, attribute, buf);
-
-out:
- return rc;
-}
-
-static ssize_t ncache_orangefs_attr_store(struct kobject *kobj,
- struct attribute *attr,
- const char *buf,
- size_t len)
-{
- struct ncache_orangefs_attribute *attribute;
- struct ncache_orangefs_obj *ncache_orangefs_obj;
- int rc;
-
- gossip_debug(GOSSIP_SYSFS_DEBUG,
- "ncache_orangefs_attr_store: start\n");
-
- attribute = container_of(attr, struct ncache_orangefs_attribute, attr);
- ncache_orangefs_obj =
- container_of(kobj, struct ncache_orangefs_obj, kobj);
-
- if (!attribute->store) {
- rc = -EIO;
- goto out;
- }
-
- rc = attribute->store(ncache_orangefs_obj, attribute, buf, len);
-
-out:
- return rc;
-}
-
-static const struct sysfs_ops ncache_orangefs_sysfs_ops = {
- .show = ncache_orangefs_attr_show,