aboutsummaryrefslogtreecommitdiff
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/9p/vfs_file.c2
-rw-r--r--fs/adfs/dir_f.c12
-rw-r--r--fs/affs/amigaffs.c27
-rw-r--r--fs/affs/file.c26
-rw-r--r--fs/affs/inode.c2
-rw-r--r--fs/affs/super.c6
-rw-r--r--fs/afs/cmservice.c16
-rw-r--r--fs/afs/dynroot.c20
-rw-r--r--fs/afs/file.c2
-rw-r--r--fs/afs/flock.c1
-rw-r--r--fs/afs/fs_operation.c1
-rw-r--r--fs/afs/fs_probe.c4
-rw-r--r--fs/afs/fsclient.c42
-rw-r--r--fs/afs/inode.c47
-rw-r--r--fs/afs/internal.h15
-rw-r--r--fs/afs/misc.c18
-rw-r--r--fs/afs/proc.c5
-rw-r--r--fs/afs/rotate.c2
-rw-r--r--fs/afs/rxrpc.c6
-rw-r--r--fs/afs/vl_list.c1
-rw-r--r--fs/afs/vl_probe.c82
-rw-r--r--fs/afs/vl_rotate.c7
-rw-r--r--fs/afs/vlclient.c24
-rw-r--r--fs/afs/write.c13
-rw-r--r--fs/afs/yfsclient.c50
-rw-r--r--fs/aio.c2
-rw-r--r--fs/autofs/waitq.c2
-rw-r--r--fs/binfmt_flat.c20
-rw-r--r--fs/btrfs/block-group.c4
-rw-r--r--fs/btrfs/ctree.c8
-rw-r--r--fs/btrfs/ctree.h6
-rw-r--r--fs/btrfs/dev-replace.c46
-rw-r--r--fs/btrfs/disk-io.c14
-rw-r--r--fs/btrfs/extent-tree.c38
-rw-r--r--fs/btrfs/extent_io.c8
-rw-r--r--fs/btrfs/extent_io.h6
-rw-r--r--fs/btrfs/file.c2
-rw-r--r--fs/btrfs/free-space-cache.c2
-rw-r--r--fs/btrfs/free-space-tree.c4
-rw-r--r--fs/btrfs/inode.c29
-rw-r--r--fs/btrfs/ioctl.c28
-rw-r--r--fs/btrfs/print-tree.c12
-rw-r--r--fs/btrfs/scrub.c122
-rw-r--r--fs/btrfs/super.c1
-rw-r--r--fs/btrfs/sysfs.c16
-rw-r--r--fs/btrfs/transaction.c1
-rw-r--r--fs/btrfs/tree-checker.c2
-rw-r--r--fs/btrfs/tree-log.c10
-rw-r--r--fs/btrfs/volumes.c26
-rw-r--r--fs/btrfs/volumes.h3
-rw-r--r--fs/buffer.c11
-rw-r--r--fs/ceph/caps.c14
-rw-r--r--fs/ceph/debugfs.c4
-rw-r--r--fs/ceph/dir.c33
-rw-r--r--fs/ceph/file.c7
-rw-r--r--fs/ceph/inode.c19
-rw-r--r--fs/ceph/mds_client.h2
-rw-r--r--fs/ceph/quota.c4
-rw-r--r--fs/ceph/super.h73
-rw-r--r--fs/cifs/cifsglob.h15
-rw-r--r--fs/cifs/cifssmb.c2
-rw-r--r--fs/cifs/connect.c12
-rw-r--r--fs/cifs/inode.c4
-rw-r--r--fs/cifs/sess.c6
-rw-r--r--fs/cifs/smb2ops.c2
-rw-r--r--fs/cifs/smb2pdu.c2
-rw-r--r--fs/configfs/dir.c4
-rw-r--r--fs/dax.c2
-rw-r--r--fs/debugfs/file.c4
-rw-r--r--fs/dlm/lock.c2
-rw-r--r--fs/erofs/zmap.c6
-rw-r--r--fs/eventpoll.c102
-rw-r--r--fs/exfat/cache.c11
-rw-r--r--fs/exfat/exfat_fs.h3
-rw-r--r--fs/exfat/inode.c2
-rw-r--r--fs/exfat/namei.c13
-rw-r--r--fs/exfat/super.c5
-rw-r--r--fs/ext2/file.c6
-rw-r--r--fs/ext2/inode.c4
-rw-r--r--fs/ext2/super.c2
-rw-r--r--fs/ext4/Kconfig2
-rw-r--r--fs/ext4/balloc.c16
-rw-r--r--fs/ext4/block_validity.c159
-rw-r--r--fs/ext4/ext4.h91
-rw-r--r--fs/ext4/ext4_jbd2.c25
-rw-r--r--fs/ext4/extents.c42
-rw-r--r--fs/ext4/file.c11
-rw-r--r--fs/ext4/hash.c4
-rw-r--r--fs/ext4/indirect.c20
-rw-r--r--fs/ext4/inline.c4
-rw-r--r--fs/ext4/inode.c32
-rw-r--r--fs/ext4/ioctl.c34
-rw-r--r--fs/ext4/mballoc.c289
-rw-r--r--fs/ext4/mballoc.h4
-rw-r--r--fs/ext4/move_extent.c4
-rw-r--r--fs/ext4/namei.c66
-rw-r--r--fs/ext4/readpage.c4
-rw-r--r--fs/ext4/super.c268
-rw-r--r--fs/ext4/sysfs.c13
-rw-r--r--fs/ext4/xattr.c3
-rw-r--r--fs/f2fs/data.c3
-rw-r--r--fs/f2fs/f2fs.h2
-rw-r--r--fs/f2fs/node.c7
-rw-r--r--fs/f2fs/segment.c8
-rw-r--r--fs/fcntl.c4
-rw-r--r--fs/fs-writeback.c105
-rw-r--r--fs/fs_context.c2
-rw-r--r--fs/fsopen.c2
-rw-r--r--fs/gfs2/bmap.c4
-rw-r--r--fs/gfs2/log.c31
-rw-r--r--fs/gfs2/quota.c2
-rw-r--r--fs/gfs2/trans.c1
-rw-r--r--fs/hfsplus/wrapper.c2
-rw-r--r--fs/io-wq.c21
-rw-r--r--fs/io_uring.c426
-rw-r--r--fs/iomap/seek.c4
-rw-r--r--fs/jbd2/journal.c16
-rw-r--r--fs/jbd2/recovery.c46
-rw-r--r--fs/jbd2/transaction.c33
-rw-r--r--fs/jffs2/fs.c2
-rw-r--r--fs/jffs2/readinode.c2
-rw-r--r--fs/libfs.c4
-rw-r--r--fs/locks.c6
-rw-r--r--fs/nfs/blocklayout/blocklayout.c2
-rw-r--r--fs/nfs/dir.c5
-rw-r--r--fs/nfs/filelayout/filelayout.c2
-rw-r--r--fs/nfs/flexfilelayout/flexfilelayout.c47
-rw-r--r--fs/nfs/fs_context.c22
-rw-r--r--fs/nfs/nfs3acl.c4
-rw-r--r--fs/nfs/nfs42proc.c10
-rw-r--r--fs/nfs/nfs4file.c2
-rw-r--r--fs/nfs/nfs4idmap.c4
-rw-r--r--fs/nfs/nfs4proc.c43
-rw-r--r--fs/nfs/nfs4state.c14
-rw-r--r--fs/nfs/pagelist.c2
-rw-r--r--fs/nfs/pnfs.c2
-rw-r--r--fs/nfs_common/nfsacl.c2
-rw-r--r--fs/nfsd/blocklayout.c4
-rw-r--r--fs/nfsd/nfs4callback.c2
-rw-r--r--fs/nfsd/nfs4layouts.c2
-rw-r--r--fs/nfsd/nfs4proc.c2
-rw-r--r--fs/nfsd/nfs4state.c14
-rw-r--r--fs/nfsd/nfsfh.c4
-rw-r--r--fs/nfsd/nfsproc.c2
-rw-r--r--fs/nfsd/nfssvc.c2
-rw-r--r--fs/nfsd/vfs.c4
-rw-r--r--fs/nilfs2/bmap.c2
-rw-r--r--fs/nilfs2/recovery.c2
-rw-r--r--fs/nilfs2/segment.c19
-rw-r--r--fs/notify/fanotify/fanotify_user.c2
-rw-r--r--fs/ocfs2/cluster/quorum.c2
-rw-r--r--fs/pipe.c62
-rw-r--r--fs/pstore/zone.c1
-rw-r--r--fs/quota/quota.c2
-rw-r--r--fs/read_write.c8
-rw-r--r--fs/romfs/storage.c4
-rw-r--r--fs/seq_file.c2
-rw-r--r--fs/signalfd.c2
-rw-r--r--fs/splice.c28
-rw-r--r--fs/squashfs/block.c6
-rw-r--r--fs/ubifs/lprops.c4
-rw-r--r--fs/udf/symlink.c2
-rw-r--r--fs/ufs/util.h12
-rw-r--r--fs/vboxsf/utils.c2
-rw-r--r--fs/xfs/libxfs/xfs_attr_leaf.c8
-rw-r--r--fs/xfs/libxfs/xfs_bmap.c2
-rw-r--r--fs/xfs/libxfs/xfs_ialloc.c4
-rw-r--r--fs/xfs/libxfs/xfs_trans_inode.c4
-rw-r--r--fs/xfs/libxfs/xfs_trans_space.h2
-rw-r--r--fs/xfs/xfs_bmap_util.c2
-rw-r--r--fs/xfs/xfs_file.c12
171 files changed, 2155 insertions, 1281 deletions
diff --git a/fs/9p/vfs_file.c b/fs/9p/vfs_file.c
index 92cd1d80218d..3576123d8299 100644
--- a/fs/9p/vfs_file.c
+++ b/fs/9p/vfs_file.c
@@ -213,7 +213,7 @@ static int v9fs_file_do_lock(struct file *filp, int cmd, struct file_lock *fl)
break;
default:
WARN_ONCE(1, "unknown lock status code: %d\n", status);
- /* fall through */
+ fallthrough;
case P9_LOCK_ERROR:
case P9_LOCK_GRACE:
res = -ENOLCK;
diff --git a/fs/adfs/dir_f.c b/fs/adfs/dir_f.c
index 30d526fecc3f..05e963402e25 100644
--- a/fs/adfs/dir_f.c
+++ b/fs/adfs/dir_f.c
@@ -18,11 +18,11 @@ static inline unsigned int adfs_readval(unsigned char *p, int len)
switch (len) {
case 4: val |= p[3] << 24;
- /* fall through */
+ fallthrough;
case 3: val |= p[2] << 16;
- /* fall through */
+ fallthrough;
case 2: val |= p[1] << 8;
- /* fall through */
+ fallthrough;
default: val |= p[0];
}
return val;
@@ -32,11 +32,11 @@ static inline void adfs_writeval(unsigned char *p, int len, unsigned int val)
{
switch (len) {
case 4: p[3] = val >> 24;
- /* fall through */
+ fallthrough;
case 3: p[2] = val >> 16;
- /* fall through */
+ fallthrough;
case 2: p[1] = val >> 8;
- /* fall through */
+ fallthrough;
default: p[0] = val;
}
}
diff --git a/fs/affs/amigaffs.c b/fs/affs/amigaffs.c
index f708c45d5f66..29f11e10a7c7 100644
--- a/fs/affs/amigaffs.c
+++ b/fs/affs/amigaffs.c
@@ -420,24 +420,51 @@ affs_mode_to_prot(struct inode *inode)
u32 prot = AFFS_I(inode)->i_protect;
umode_t mode = inode->i_mode;
+ /*
+ * First, clear all RWED bits for owner, group, other.
+ * Then, recalculate them afresh.
+ *
+ * We'll always clear the delete-inhibit bit for the owner, as that is
+ * the classic single-user mode AmigaOS protection bit and we need to
+ * stay compatible with all scenarios.
+ *
+ * Since multi-user AmigaOS is an extension, we'll only set the
+ * delete-allow bit if any of the other bits in the same user class
+ * (group/other) are used.
+ */
+ prot &= ~(FIBF_NOEXECUTE | FIBF_NOREAD
+ | FIBF_NOWRITE | FIBF_NODELETE
+ | FIBF_GRP_EXECUTE | FIBF_GRP_READ
+ | FIBF_GRP_WRITE | FIBF_GRP_DELETE
+ | FIBF_OTR_EXECUTE | FIBF_OTR_READ
+ | FIBF_OTR_WRITE | FIBF_OTR_DELETE);
+
+ /* Classic single-user AmigaOS flags. These are inverted. */
if (!(mode & 0100))
prot |= FIBF_NOEXECUTE;
if (!(mode & 0400))
prot |= FIBF_NOREAD;
if (!(mode & 0200))
prot |= FIBF_NOWRITE;
+
+ /* Multi-user extended flags. Not inverted. */
if (mode & 0010)
prot |= FIBF_GRP_EXECUTE;
if (mode & 0040)
prot |= FIBF_GRP_READ;
if (mode & 0020)
prot |= FIBF_GRP_WRITE;
+ if (mode & 0070)
+ prot |= FIBF_GRP_DELETE;
+
if (mode & 0001)
prot |= FIBF_OTR_EXECUTE;
if (mode & 0004)
prot |= FIBF_OTR_READ;
if (mode & 0002)
prot |= FIBF_OTR_WRITE;
+ if (mode & 0007)
+ prot |= FIBF_OTR_DELETE;
AFFS_I(inode)->i_protect = prot;
}
diff --git a/fs/affs/file.c b/fs/affs/file.c
index a26a0f96c119..d91b0133d95d 100644
--- a/fs/affs/file.c
+++ b/fs/affs/file.c
@@ -429,6 +429,24 @@ static int affs_write_begin(struct file *file, struct address_space *mapping,
return ret;
}
+static int affs_write_end(struct file *file, struct address_space *mapping,
+ loff_t pos, unsigned int len, unsigned int copied,
+ struct page *page, void *fsdata)
+{
+ struct inode *inode = mapping->host;
+ int ret;
+
+ ret = generic_write_end(file, mapping, pos, len, copied, page, fsdata);
+
+ /* Clear Archived bit on file writes, as AmigaOS would do */
+ if (AFFS_I(inode)->i_protect & FIBF_ARCHIVED) {
+ AFFS_I(inode)->i_protect &= ~FIBF_ARCHIVED;
+ mark_inode_dirty(inode);
+ }
+
+ return ret;
+}
+
static sector_t _affs_bmap(struct address_space *mapping, sector_t block)
{
return generic_block_bmap(mapping,block,affs_get_block);
@@ -438,7 +456,7 @@ const struct address_space_operations affs_aops = {
.readpage = affs_readpage,
.writepage = affs_writepage,
.write_begin = affs_write_begin,
- .write_end = generic_write_end,
+ .write_end = affs_write_end,
.direct_IO = affs_direct_IO,
.bmap = _affs_bmap
};
@@ -795,6 +813,12 @@ done:
if (tmp > inode->i_size)
inode->i_size = AFFS_I(inode)->mmu_private = tmp;
+ /* Clear Archived bit on file writes, as AmigaOS would do */
+ if (AFFS_I(inode)->i_protect & FIBF_ARCHIVED) {
+ AFFS_I(inode)->i_protect &= ~FIBF_ARCHIVED;
+ mark_inode_dirty(inode);
+ }
+
err_first_bh:
unlock_page(page);
put_page(page);
diff --git a/fs/affs/inode.c b/fs/affs/inode.c
index a346cf7659f1..044412110b52 100644
--- a/fs/affs/inode.c
+++ b/fs/affs/inode.c
@@ -93,7 +93,7 @@ struct inode *affs_iget(struct super_block *sb, unsigned long ino)
case ST_ROOT:
inode->i_uid = sbi->s_uid;
inode->i_gid = sbi->s_gid;
- /* fall through */
+ fallthrough;
case ST_USERDIR:
if (be32_to_cpu(tail->stype) == ST_USERDIR ||
affs_test_opt(sbi->s_flags, SF_SETMODE)) {
diff --git a/fs/affs/super.c b/fs/affs/super.c
index 47107c6712a6..a100cd9950c8 100644
--- a/fs/affs/super.c
+++ b/fs/affs/super.c
@@ -474,7 +474,7 @@ got_root:
case MUFS_INTLFFS:
case MUFS_DCFFS:
affs_set_opt(sbi->s_flags, SF_MUFS);
- /* fall thru */
+ fallthrough;
case FS_INTLFFS:
case FS_DCFFS:
affs_set_opt(sbi->s_flags, SF_INTL);
@@ -486,7 +486,7 @@ got_root:
break;
case MUFS_OFS:
affs_set_opt(sbi->s_flags, SF_MUFS);
- /* fall through */
+ fallthrough;
case FS_OFS:
affs_set_opt(sbi->s_flags, SF_OFS);
sb->s_flags |= SB_NOEXEC;
@@ -494,7 +494,7 @@ got_root:
case MUFS_DCOFS:
case MUFS_INTLOFS:
affs_set_opt(sbi->s_flags, SF_MUFS);
- /* fall through */
+ fallthrough;
case FS_DCOFS:
case FS_INTLOFS:
affs_set_opt(sbi->s_flags, SF_INTL);
diff --git a/fs/afs/cmservice.c b/fs/afs/cmservice.c
index bef413818af7..a4e9e6e07e93 100644
--- a/fs/afs/cmservice.c
+++ b/fs/afs/cmservice.c
@@ -252,7 +252,7 @@ static int afs_deliver_cb_callback(struct afs_call *call)
call->unmarshall++;
/* extract the FID array and its count in two steps */
- /* fall through */
+ fallthrough;
case 1:
_debug("extract FID count");
ret = afs_extract_data(call, true);
@@ -271,7 +271,7 @@ static int afs_deliver_cb_callback(struct afs_call *call)
afs_extract_to_buf(call, call->count * 3 * 4);
call->unmarshall++;
- /* Fall through */
+ fallthrough;
case 2:
_debug("extract FID array");
ret = afs_extract_data(call, true);
@@ -297,7 +297,7 @@ static int afs_deliver_cb_callback(struct afs_call *call)
call->unmarshall++;
/* extract the callback array and its count in two steps */
- /* fall through */
+ fallthrough;
case 3:
_debug("extract CB count");
ret = afs_extract_data(call, true);
@@ -312,7 +312,7 @@ static int afs_deliver_cb_callback(struct afs_call *call)
iov_iter_discard(&call->def_iter, READ, call->count2 * 3 * 4);
call->unmarshall++;
- /* Fall through */
+ fallthrough;
case 4:
_debug("extract discard %zu/%u",
iov_iter_count(call->iter), call->count2 * 3 * 4);
@@ -391,7 +391,7 @@ static int afs_deliver_cb_init_call_back_state3(struct afs_call *call)
afs_extract_to_buf(call, 11 * sizeof(__be32));
call->unmarshall++;
- /* Fall through */
+ fallthrough;
case 1:
_debug("extract UUID");
ret = afs_extract_data(call, false);
@@ -503,7 +503,7 @@ static int afs_deliver_cb_probe_uuid(struct afs_call *call)
afs_extract_to_buf(call, 11 * sizeof(__be32));
call->unmarshall++;
- /* Fall through */
+ fallthrough;
case 1:
_debug("extract UUID");
ret = afs_extract_data(call, false);
@@ -618,7 +618,7 @@ static int afs_deliver_yfs_cb_callback(struct afs_call *call)
call->unmarshall++;
/* extract the FID array and its count in two steps */
- /* Fall through */
+ fallthrough;
case 1:
_debug("extract FID count");
ret = afs_extract_data(call, true);
@@ -637,7 +637,7 @@ static int afs_deliver_yfs_cb_callback(struct afs_call *call)
afs_extract_to_buf(call, size);
call->unmarshall++;
- /* Fall through */
+ fallthrough;
case 2:
_debug("extract FID array");
ret = afs_extract_data(call, false);
diff --git a/fs/afs/dynroot.c b/fs/afs/dynroot.c
index b79879aacc02..7b784af604fd 100644
--- a/fs/afs/dynroot.c
+++ b/fs/afs/dynroot.c
@@ -382,15 +382,17 @@ void afs_dynroot_depopulate(struct super_block *sb)
net->dynroot_sb = NULL;
mutex_unlock(&net->proc_cells_lock);
- inode_lock(root->d_inode);
-
- /* Remove all the pins for dirs created for manually added cells */
- list_for_each_entry_safe(subdir, tmp, &root->d_subdirs, d_child) {
- if (subdir->d_fsdata) {
- subdir->d_fsdata = NULL;
- dput(subdir);
+ if (root) {
+ inode_lock(root->d_inode);
+
+ /* Remove all the pins for dirs created for manually added cells */
+ list_for_each_entry_safe(subdir, tmp, &root->d_subdirs, d_child) {
+ if (subdir->d_fsdata) {
+ subdir->d_fsdata = NULL;
+ dput(subdir);
+ }
}
- }
- inode_unlock(root->d_inode);
+ inode_unlock(root->d_inode);
+ }
}
diff --git a/fs/afs/file.c b/fs/afs/file.c
index 6f6ed1605cfe..371d1488cc54 100644
--- a/fs/afs/file.c
+++ b/fs/afs/file.c
@@ -311,7 +311,7 @@ int afs_page_filler(void *data, struct page *page)
case -ENOBUFS:
_debug("cache said ENOBUFS");
- /* fall through */
+ fallthrough;
default:
go_on:
req = kzalloc(struct_size(req, array, 1), GFP_KERNEL);
diff --git a/fs/afs/flock.c b/fs/afs/flock.c
index ffb8575345ca..cb3054c7843e 100644
--- a/fs/afs/flock.c
+++ b/fs/afs/flock.c
@@ -376,7 +376,6 @@ again:
spin_unlock(&vnode->lock);
return;
- /* Fall through */
default:
/* Looks like a lock request was withdrawn. */
spin_unlock(&vnode->lock);
diff --git a/fs/afs/fs_operation.c b/fs/afs/fs_operation.c
index 24fd163c6323..97cab12b0a6c 100644
--- a/fs/afs/fs_operation.c
+++ b/fs/afs/fs_operation.c
@@ -235,6 +235,7 @@ int afs_put_operation(struct afs_operation *op)
afs_end_cursor(&op->ac);
afs_put_serverlist(op->net, op->server_list);
afs_put_volume(op->net, op->volume, afs_volume_trace_put_put_op);
+ key_put(op->key);
kfree(op);
return ret;
}
diff --git a/fs/afs/fs_probe.c b/fs/afs/fs_probe.c
index 5d9ef517cf81..e7e98ad63a91 100644
--- a/fs/afs/fs_probe.c
+++ b/fs/afs/fs_probe.c
@@ -161,8 +161,8 @@ responded:
}
}
- rtt_us = rxrpc_kernel_get_srtt(call->net->socket, call->rxcall);
- if (rtt_us < server->probe.rtt) {
+ if (rxrpc_kernel_get_srtt(call->net->socket, call->rxcall, &rtt_us) &&
+ rtt_us < server->probe.rtt) {
server->probe.rtt = rtt_us;
server->rtt = rtt_us;
alist->preferred = index;
diff --git a/fs/afs/fsclient.c b/fs/afs/fsclient.c
index acb4d0ca2649..1d95ed9dd86e 100644
--- a/fs/afs/fsclient.c
+++ b/fs/afs/fsclient.c
@@ -320,7 +320,7 @@ static int afs_deliver_fs_fetch_data(struct afs_call *call)
call->tmp_u = htonl(0);
afs_extract_to_tmp(call);
}
- /* Fall through */
+ fallthrough;
/* extract the returned data length */
case 1:
@@ -348,7 +348,7 @@ static int afs_deliver_fs_fetch_data(struct afs_call *call)
call->bvec[0].bv_page = req->pages[req->index];
iov_iter_bvec(&call->def_iter, READ, call->bvec, 1, size);
ASSERTCMP(size, <=, PAGE_SIZE);
- /* Fall through */
+ fallthrough;
/* extract the returned data */
case 2:
@@ -375,7 +375,7 @@ static int afs_deliver_fs_fetch_data(struct afs_call *call)
/* Discard any excess data the server gave us */
afs_extract_discard(call, req->actual_len - req->len);
call->unmarshall = 3;
- /* Fall through */
+ fallthrough;
case 3:
_debug("extract discard %zu/%llu",
@@ -388,7 +388,7 @@ static int afs_deliver_fs_fetch_data(struct afs_call *call)
no_more_data:
call->unmarshall = 4;
afs_extract_to_buf(call, (21 + 3 + 6) * 4);
- /* Fall through */
+ fallthrough;
/* extract the metadata */
case 4:
@@ -1343,7 +1343,7 @@ static int afs_deliver_fs_get_volume_status(struct afs_call *call)
case 0:
call->unmarshall++;
afs_extract_to_buf(call, 12 * 4);
- /* Fall through */
+ fallthrough;
/* extract the returned status record */
case 1:
@@ -1356,7 +1356,7 @@ static int afs_deliver_fs_get_volume_status(struct afs_call *call)
xdr_decode_AFSFetchVolumeStatus(&bp, &op->volstatus.vs);
call->unmarshall++;
afs_extract_to_tmp(call);
- /* Fall through */
+ fallthrough;
/* extract the volume name length */
case 2:
@@ -1371,7 +1371,7 @@ static int afs_deliver_fs_get_volume_status(struct afs_call *call)
size = (call->count + 3) & ~3; /* It's padded */
afs_extract_to_buf(call, size);
call->unmarshall++;
- /* Fall through */
+ fallthrough;
/* extract the volume name */
case 3:
@@ -1385,7 +1385,7 @@ static int afs_deliver_fs_get_volume_status(struct afs_call *call)
_debug("volname '%s'", p);
afs_extract_to_tmp(call);
call->unmarshall++;
- /* Fall through */
+ fallthrough;
/* extract the offline message length */
case 4:
@@ -1400,7 +1400,7 @@ static int afs_deliver_fs_get_volume_status(struct afs_call *call)
size = (call->count + 3) & ~3; /* It's padded */
afs_extract_to_buf(call, size);
call->unmarshall++;
- /* Fall through */
+ fallthrough;
/* extract the offline message */
case 5:
@@ -1415,7 +1415,7 @@ static int afs_deliver_fs_get_volume_status(struct afs_call *call)
afs_extract_to_tmp(call);
call->unmarshall++;
- /* Fall through */
+ fallthrough;
/* extract the message of the day length */
case 6:
@@ -1430,7 +1430,7 @@ static int afs_deliver_fs_get_volume_status(struct afs_call *call)
size = (call->count + 3) & ~3; /* It's padded */
afs_extract_to_buf(call, size);
call->unmarshall++;
- /* Fall through */
+ fallthrough;
/* extract the message of the day */
case 7:
@@ -1682,7 +1682,7 @@ static int afs_deliver_fs_get_capabilities(struct afs_call *call)
case 0:
afs_extract_to_tmp(call);
call->unmarshall++;
- /* Fall through */
+ fallthrough;
/* Extract the capabilities word count */
case 1:
@@ -1696,7 +1696,7 @@ static int afs_deliver_fs_get_capabilities(struct afs_call *call)
call->count2 = count;
afs_extract_discard(call, count * sizeof(__be32));
call->unmarshall++;
- /* Fall through */
+ fallthrough;
/* Extract capabilities words */
case 2:
@@ -1776,7 +1776,7 @@ static int afs_deliver_fs_inline_bulk_status(struct afs_call *call)
case 0:
afs_extract_to_tmp(call);
call->unmarshall++;
- /* Fall through */
+ fallthrough;
/* Extract the file status count and array in two steps */
case 1:
@@ -1794,7 +1794,7 @@ static int afs_deliver_fs_inline_bulk_status(struct afs_call *call)
call->unmarshall++;
more_counts:
afs_extract_to_buf(call, 21 * sizeof(__be32));
- /* Fall through */
+ fallthrough;
case 2:
_debug("extract status array %u", call->count);
@@ -1824,7 +1824,7 @@ static int afs_deliver_fs_inline_bulk_status(struct afs_call *call)
call->count = 0;
call->unmarshall++;
afs_extract_to_tmp(call);
- /* Fall through */
+ fallthrough;
/* Extract the callback count and array in two steps */
case 3:
@@ -1841,7 +1841,7 @@ static int afs_deliver_fs_inline_bulk_status(struct afs_call *call)
call->unmarshall++;
more_cbs:
afs_extract_to_buf(call, 3 * sizeof(__be32));
- /* Fall through */
+ fallthrough;
case 4:
_debug("extract CB array");
@@ -1870,7 +1870,7 @@ static int afs_deliver_fs_inline_bulk_status(struct afs_call *call)
afs_extract_to_buf(call, 6 * sizeof(__be32));
call->unmarshall++;
- /* Fall through */
+ fallthrough;
case 5:
ret = afs_extract_data(call, false);
@@ -1974,7 +1974,7 @@ static int afs_deliver_fs_fetch_acl(struct afs_call *call)
case 0:
afs_extract_to_tmp(call);
call->unmarshall++;
- /* Fall through */
+ fallthrough;
/* extract the returned data length */
case 1:
@@ -1992,7 +1992,7 @@ static int afs_deliver_fs_fetch_acl(struct afs_call *call)
acl->size = call->count2;
afs_extract_begin(call, acl->data, size);
call->unmarshall++;
- /* Fall through */
+ fallthrough;
/* extract the returned data */
case 2:
@@ -2002,7 +2002,7 @@ static int afs_deliver_fs_fetch_acl(struct afs_call *call)
afs_extract_to_buf(call, (21 + 6) * 4);
call->unmarshall++;
- /* Fall through */
+ fallthrough;
/* extract the metadata */
case 3:
diff --git a/fs/afs/inode.c b/fs/afs/inode.c
index 1d13d2e882ad..0fe8844b4bee 100644
--- a/fs/afs/inode.c
+++ b/fs/afs/inode.c
@@ -810,14 +810,32 @@ void afs_evict_inode(struct inode *inode)
static void afs_setattr_success(struct afs_operation *op)
{
- struct inode *inode = &op->file[0].vnode->vfs_inode;
+ struct afs_vnode_param *vp = &op->file[0];
+ struct inode *inode = &vp->vnode->vfs_inode;
+ loff_t old_i_size = i_size_read(inode);
+
+ op->setattr.old_i_size = old_i_size;
+ afs_vnode_commit_status(op, vp);
+ /* inode->i_size has now been changed. */
+
+ if (op->setattr.attr->ia_valid & ATTR_SIZE) {
+ loff_t size = op->setattr.attr->ia_size;
+ if (size > old_i_size)
+ pagecache_isize_extended(inode, old_i_size, size);
+ }
+}
+
+static void afs_setattr_edit_file(struct afs_operation *op)
+{
+ struct afs_vnode_param *vp = &op->file[0];
+ struct inode *inode = &vp->vnode->vfs_inode;
- afs_vnode_commit_status(op, &op->file[0]);
if (op->setattr.attr->ia_valid & ATTR_SIZE) {
- loff_t i_size = inode->i_size, size = op->setattr.attr->ia_size;
- if (size > i_size)
- pagecache_isize_extended(inode, i_size, size);
- truncate_pagecache(inode, size);
+ loff_t size = op->setattr.attr->ia_size;
+ loff_t i_size = op->setattr.old_i_size;
+
+ if (size < i_size)
+ truncate_pagecache(inode, size);
}
}
@@ -825,6 +843,7 @@ static const struct afs_operation_ops afs_setattr_operation = {
.issue_afs_rpc = afs_fs_setattr,
.issue_yfs_rpc = yfs_fs_setattr,
.success = afs_setattr_success,
+ .edit_dir = afs_setattr_edit_file,
};
/*
@@ -863,11 +882,16 @@ int afs_setattr(struct dentry *dentry, struct iattr *attr)
if (S_ISREG(vnode->vfs_inode.i_mode))
filemap_write_and_wait(vnode->vfs_inode.i_mapping);
+ /* Prevent any new writebacks from starting whilst we do this. */
+ down_write(&vnode->validate_lock);
+
op = afs_alloc_operation(((attr->ia_valid & ATTR_FILE) ?
afs_file_key(attr->ia_file) : NULL),
vnode->volume);
- if (IS_ERR(op))
- return PTR_ERR(op);
+ if (IS_ERR(op)) {
+ ret = PTR_ERR(op);
+ goto out_unlock;
+ }
afs_op_set_vnode(op, 0, vnode);
op->setattr.attr = attr;
@@ -880,5 +904,10 @@ int afs_setattr(struct dentry *dentry, struct iattr *attr)
op->file[0].update_ctime = 1;
op->ops = &afs_setattr_operation;
- return afs_do_sync_operation(op);
+ ret = afs_do_sync_operation(op);
+
+out_unlock:
+ up_write(&vnode->validate_lock);
+ _leave(" = %d", ret);
+ return ret;
}
diff --git a/fs/afs/internal.h b/fs/afs/internal.h
index 792ac711985e..e5f0446f27e5 100644
--- a/fs/afs/internal.h
+++ b/fs/afs/internal.h
@@ -401,22 +401,24 @@ struct afs_vlserver {
#define AFS_VLSERVER_FL_PROBED 0 /* The VL server has been probed */
#define AFS_VLSERVER_FL_PROBING 1 /* VL server is being probed */
#define AFS_VLSERVER_FL_IS_YFS 2 /* Server is YFS not AFS */
+#define AFS_VLSERVER_FL_RESPONDING 3 /* VL server is responding */
rwlock_t lock; /* Lock on addresses */
atomic_t usage;
+ unsigned int rtt; /* Server's current RTT in uS */
/* Probe state */
wait_queue_head_t probe_wq;
atomic_t probe_outstanding;
spinlock_t probe_lock;
struct {
- unsigned int rtt; /* RTT as ktime/64 */
+ unsigned int rtt; /* RTT in uS */
u32 abort_code;
short error;
- bool have_result;
- bool responded:1;
- bool is_yfs:1;
- bool not_yfs:1;
- bool local_failure:1;
+ unsigned short flags;
+#define AFS_VLSERVER_PROBE_RESPONDED 0x01 /* At least once response (may be abort) */
+#define AFS_VLSERVER_PROBE_IS_YFS 0x02 /* The peer appears to be YFS */
+#define AFS_VLSERVER_PROBE_NOT_YFS 0x04 /* The peer appears not to be YFS */
+#define AFS_VLSERVER_PROBE_LOCAL_FAILURE 0x08 /* A local failure prevented a probe */
} probe;
u16 port;
@@ -810,6 +812,7 @@ struct afs_operation {
} store;
struct {
struct iattr *attr;
+ loff_t old_i_size;
} setattr;
struct afs_acl *acl;
struct yfs_acl *yacl;
diff --git a/fs/afs/misc.c b/fs/afs/misc.c
index 5334f1bd2bca..1d1a8debe472 100644
--- a/fs/afs/misc.c
+++ b/fs/afs/misc.c
@@ -120,42 +120,42 @@ void afs_prioritise_error(struct afs_error *e, int error, u32 abort_code)
if (e->error == -ETIMEDOUT ||
e->error == -ETIME)
return;
- /* Fall through */
+ fallthrough;
case -ETIMEDOUT:
case -ETIME:
if (e->error == -ENOMEM ||
e->error == -ENONET)
return;
- /* Fall through */
+ fallthrough;
case -ENOMEM:
case -ENONET:
if (e->error == -ERFKILL)
return;
- /* Fall through */
+ fallthrough;
case -ERFKILL:
if (e->error == -EADDRNOTAVAIL)
return;
- /* Fall through */
+ fallthrough;
case -EADDRNOTAVAIL:
if (e->error == -ENETUNREACH)
return;
- /* Fall through */
+ fallthrough;
case -ENETUNREACH:
if (e->error == -EHOSTUNREACH)
return;
- /* Fall through */
+ fallthrough;
case -EHOSTUNREACH:
if (e->error == -EHOSTDOWN)
return;
- /* Fall through */
+ fallthrough;
case -EHOSTDOWN:
if (e->error == -ECONNREFUSED)
return;
- /* Fall through */
+ fallthrough;
case -ECONNREFUSED:
if (e->error == -ECONNRESET)
return;
- /* Fall through */
+ fallthrough;
case -ECONNRESET: /* Responded, but call expired. */
if (e->responded)
return;
diff --git a/fs/afs/proc.c b/fs/afs/proc.c
index e817fc740ba0..e8babb62ed44 100644
--- a/fs/afs/proc.c
+++ b/fs/afs/proc.c
@@ -310,6 +310,11 @@ static int afs_proc_cell_vlservers_show(struct seq_file *m, void *v)
alist->preferred == i ? '>' : '-',
&alist->addrs[i].transport);
}
+ seq_printf(m, " info: fl=%lx rtt=%d\n", vlserver->flags, vlserver->rtt);
+ seq_printf(m, " probe: fl=%x e=%d ac=%d out=%d\n",
+ vlserver->probe.flags, vlserver->probe.error,
+ vlserver->probe.abort_code,
+ atomic_read(&vlserver->probe_outstanding));
return 0;
}
diff --git a/fs/afs/rotate.c b/fs/afs/rotate.c
index 6a0935cb822f..d83f13c44b92 100644
--- a/fs/afs/rotate.c
+++ b/fs/afs/rotate.c
@@ -281,7 +281,7 @@ bool afs_select_fileserver(struct afs_operation *op)
case -ETIME:
if (op->error != -EDESTADDRREQ)
goto iterate_address;
- /* Fall through */
+ fallthrough;
case -ERFKILL:
case -EADDRNOTAVAIL:
case -ENETUNREACH:
diff --git a/fs/afs/rxrpc.c b/fs/afs/rxrpc.c
index 8fc8fb406a5a..8be709cb8542 100644
--- a/fs/afs/rxrpc.c
+++ b/fs/afs/rxrpc.c
@@ -568,7 +568,7 @@ static void afs_deliver_to_call(struct afs_call *call)
case -EIO:
pr_err("kAFS: Call %u in bad state %u\n",
call->debug_id, state);
- /* Fall through */
+ fallthrough;
case -ENODATA:
case -EBADMSG:
case -EMSGSIZE:
@@ -669,7 +669,7 @@ long afs_wait_for_call_to_complete(struct afs_call *call,
ret = call->ret0;
call->ret0 = 0;
- /* Fall through */
+ fallthrough;
case -ECONNABORTED:
ac->responded = true;
break;
@@ -872,7 +872,7 @@ void afs_send_empty_reply(struct afs_call *call)
_debug("oom");
rxrpc_kernel_abort_call(net->socket, call->rxcall,
RX_USER_ABORT, -ENOMEM, "KOO");
- /* Fall through */
+ fallthrough;
default:
_leave(" [error]");
return;
diff --git a/fs/afs/vl_list.c b/fs/afs/vl_list.c
index 8fea54eba0c2..38b2ba1d9ec0 100644
--- a/fs/afs/vl_list.c
+++ b/fs/afs/vl_list.c
@@ -21,6 +21,7 @@ struct afs_vlserver *afs_alloc_vlserver(const char *name, size_t name_len,
rwlock_init(&vlserver->lock);
init_waitqueue_head(&vlserver->probe_wq);
spin_lock_init(&vlserver->probe_lock);
+ vlserver->rtt = UINT_MAX;
vlserver->name_len = name_len;
vlserver->port = port;
memcpy(vlserver->name, name, name_len);
diff --git a/fs/afs/vl_probe.c b/fs/afs/vl_probe.c
index e3aa013c2177..d1c7068b4346 100644
--- a/fs/afs/vl_probe.c
+++ b/fs/afs/vl_probe.c
@@ -11,15 +11,33 @@
#include "internal.h"
#include "protocol_yfs.h"
-static bool afs_vl_probe_done(struct afs_vlserver *server)
+
+/*
+ * Handle the completion of a set of probes.
+ */
+static void afs_finished_vl_probe(struct afs_vlserver *server)
{
- if (!atomic_dec_and_test(&server->probe_outstanding))
- return false;
+ if (!(server->probe.flags & AFS_VLSERVER_PROBE_RESPONDED)) {
+ server->rtt = UINT_MAX;
+ clear_bit(AFS_VLSERVER_FL_RESPONDING, &server->flags);
+ }
- wake_up_var(&server->probe_outstanding);
clear_bit_unlock(AFS_VLSERVER_FL_PROBING, &server->flags);
wake_up_bit(&server->flags, AFS_VLSERVER_FL_PROBING);
- return true;
+}
+
+/*
+ * Handle the completion of a probe RPC call.
+ */
+static void afs_done_one_vl_probe(struct afs_vlserver *server, bool wake_up)
+{
+ if (atomic_dec_and_test(&server->probe_outstanding)) {
+ afs_finished_vl_probe(server);
+ wake_up = true;
+ }
+
+ if (wake_up)
+ wake_up_all(&server->probe_wq);
}
/*
@@ -45,15 +63,20 @@ void afs_vlserver_probe_result(struct afs_call *call)
server->probe.error = 0;
goto responded;
case -ECONNABORTED:
- if (!server->probe.responded) {
+ if (!(server->probe.flags & AFS_VLSERVER_PROBE_RESPONDED)) {
server->probe.abort_code = call->abort_code;
server->probe.error = ret;
}
goto responded;
case -ENOMEM:
case -ENONET:
- server->probe.local_failure = true;
- afs_io_error(call, afs_io_error_vl_probe_fail);
+ case -EKEYEXPIRED:
+ case -EKEYREVOKED:
+ case -EKEYREJECTED:
+ server->probe.flags |= AFS_VLSERVER_PROBE_LOCAL_FAILURE;
+ if (server->probe.error == 0)
+ server->probe.error = ret;
+ trace_afs_io_error(call->debug_id, ret, afs_io_error_vl_probe_fail);
goto out;
case -ECONNRESET: /* Responded, but call expired. */
case -ERFKILL:
@@ -67,12 +90,12 @@ void afs_vlserver_probe_result(struct afs_call *call)
default:
clear_bit(index, &alist->responded);
set_bit(index, &alist->failed);
- if (!server->probe.responded &&
+ if (!(server->probe.flags & AFS_VLSERVER_PROBE_RESPONDED) &&
(server->probe.error == 0 ||
server->probe.error == -ETIMEDOUT ||
server->probe.error == -ETIME))
server->probe.error = ret;
- afs_io_error(call, afs_io_error_vl_probe_fail);
+ trace_afs_io_error(call->debug_id, ret, afs_io_error_vl_probe_fail);
goto out;
}
@@ -81,39 +104,36 @@ responded:
clear_bit(index, &alist->failed);
if (call->service_id == YFS_VL_SERVICE) {
- server->probe.is_yfs = true;
+ server->probe.flags |= AFS_VLSERVER_PROBE_IS_YFS;
set_bit(AFS_VLSERVER_FL_IS_YFS, &server->flags);
alist->addrs[index].srx_service = call->service_id;
} else {
- server->probe.not_yfs = true;
- if (!server->probe.is_yfs) {
+ server->probe.flags |= AFS_VLSERVER_PROBE_NOT_YFS;
+ if (!(server->probe.flags & AFS_VLSERVER_PROBE_IS_YFS)) {
clear_bit(AFS_VLSERVER_FL_IS_YFS, &server->flags);
alist->addrs[index].srx_service = call->service_id;
}
}
- rtt_us = rxrpc_kernel_get_srtt(call->net->socket, call->rxcall);
- if (rtt_us < server->probe.rtt) {
+ if (rxrpc_kernel_get_srtt(call->net->socket, call->rxcall, &rtt_us) &&
+ rtt_us < server->probe.rtt) {
server->probe.rtt = rtt_us;
+ server->rtt = rtt_us;
alist->preferred = index;
- have_result = true;
}
smp_wmb(); /* Set rtt before responded. */
- server->probe.responded = true;
+ server->probe.flags |= AFS_VLSERVER_PROBE_RESPONDED;
set_bit(AFS_VLSERVER_FL_PROBED, &server->flags);
+ set_bit(AFS_VLSERVER_FL_RESPONDING, &server->flags);
+ have_result = true;
out:
spin_unlock(&server->probe_lock);
_debug("probe [%u][%u] %pISpc rtt=%u ret=%d",
server_index, index, &alist->addrs[index].transport, rtt_us, ret);
- have_result |= afs_vl_probe_done(server);
- if (have_result) {
- server->probe.have_result = true;
- wake_up_var(&server->probe.have_result);
- wake_up_all(&server->probe_wq);
- }
+ afs_done_one_vl_probe(server, have_result);
}
/*
@@ -151,11 +171,10 @@ static bool afs_do_probe_vlserver(struct afs_net *net,
in_progress = true;
} else {
afs_prioritise_error(_e, PTR_ERR(call), ac.abort_code);
+ afs_done_one_vl_probe(server, false);
}
}
- if (!in_progress)
- afs_vl_probe_done(server);
return in_progress;
}
@@ -193,7 +212,7 @@ int afs_wait_for_vl_probes(struct afs_vlserver_list *vllist,
{
struct wait_queue_entry *waits;
struct afs_vlserver *server;
- unsigned int rtt = UINT_MAX;
+ unsigned int rtt = UINT_MAX, rtt_s;
bool have_responders = false;
int pref = -1, i;
@@ -205,7 +224,7 @@ int afs_wait_for_vl_probes(struct afs_vlserver_list *vllist,
server = vllist->servers[i].server;
if (!test_bit(AFS_VLSERVER_FL_PROBING, &server->flags))
__clear_bit(i, &untried);
- if (server->probe.responded)
+ if (server->probe.flags & AFS_VLSERVER_PROBE_RESPONDED)
have_responders = true;
}
}
@@ -231,7 +250,7 @@ int afs_wait_for_vl_probes(struct afs_vlserver_list *vllist,
for (i = 0; i < vllist->nr_servers; i++) {
if (test_bit(i, &untried)) {
server = vllist->servers[i].server;
- if (server->probe.responded)
+ if (server->probe.flags & AFS_VLSERVER_PROBE_RESPONDED)
goto stop;
if (test_bit(AFS_VLSERVER_FL_PROBING, &server->flags))
still_probing = true;
@@ -249,10 +268,11 @@ stop:
for (i = 0; i < vllist->nr_servers; i++) {
if (test_bit(i, &untried)) {
server = vllist->servers[i].server;
- if (server->probe.responded &&
- server->probe.rtt < rtt) {
+ rtt_s = READ_ONCE(server->rtt);
+ if (test_bit(AFS_VLSERVER_FL_RESPONDING, &server->flags) &&
+ rtt_s < rtt) {
pref = i;
- rtt = server->probe.rtt;
+ rtt = rtt_s;
}
remove_wait_queue(&server->probe_wq, &waits[i]);
diff --git a/fs/afs/vl_rotate.c b/fs/afs/vl_rotate.c
index f405ca8b240a..c0458c903b31 100644
--- a/fs/afs/vl_rotate.c
+++ b/fs/afs/vl_rotate.c
@@ -192,7 +192,8 @@ pick_server:
for (i = 0; i < vc->server_list->nr_servers; i++) {
struct afs_vlserver *s = vc->server_list->servers[i].server;
- if (!test_bit(i, &vc->untried) || !s->probe.responded)
+ if (!test_bit(i, &vc->untried) ||
+ !test_bit(AFS_VLSERVER_FL_RESPONDING, &s->flags))
continue;
if (s->probe.rtt < rtt) {
vc->index = i;
@@ -262,10 +263,14 @@ no_more_servers:
for (i = 0; i < vc->server_list->nr_servers; i++) {
struct afs_vlserver *s = vc->server_list->servers[i].server;
+ if (test_bit(AFS_VLSERVER_FL_RESPONDING, &s->flags))
+ e.responded = true;
afs_prioritise_error(&e, READ_ONCE(s->probe.error),
s->probe.abort_code);
}
+ error = e.error;
+
failed_set_error:
vc->error = error;
failed:
diff --git a/fs/afs/vlclient.c b/fs/afs/vlclient.c
index fd82850cd424..dc9327332f06 100644
--- a/fs/afs/vlclient.c
+++ b/fs/afs/vlclient.c
@@ -196,7 +196,7 @@ static int afs_deliver_vl_get_addrs_u(struct afs_call *call)
/* Extract the returned uuid, uniquifier, nentries and
* blkaddrs size */
- /* Fall through */
+ fallthrough;
case 1:
ret = afs_extract_data(call, true);
if (ret < 0)
@@ -221,7 +221,7 @@ static int afs_deliver_vl_get_addrs_u(struct afs_call *call)
count = min(call->count, 4U);
afs_extract_to_buf(call, count * sizeof(__be32));
- /* Fall through - and extract entries */
+ fallthrough; /* and extract entries */
case 2:
ret = afs_extract_data(call, call->count > 4);
if (ret < 0)
@@ -324,7 +324,7 @@ static int afs_deliver_vl_get_capabilities(struct afs_call *call)
afs_extract_to_tmp(call);
call->unmarshall++;
- /* Fall through - and extract the capabilities word count */
+ fallthrough; /* and extract the capabilities word count */
case 1:
ret = afs_extract_data(call, true);
if (ret < 0)
@@ -337,7 +337,7 @@ static int afs_deliver_vl_get_capabilities(struct afs_call *call)
call->unmarshall++;
afs_extract_discard(call, count * sizeof(__be32));
- /* Fall through - and extract capabilities words */
+ fallthrough; /* and extract capabilities words */
case 2:
ret = afs_extract_data(call, false);
if (ret < 0)
@@ -436,7 +436,7 @@ static int afs_deliver_yfsvl_get_endpoints(struct afs_call *call)
/* Extract the returned uuid, uniquifier, fsEndpoints count and
* either the first fsEndpoint type or the volEndpoints
* count if there are no fsEndpoints. */
- /* Fall through */
+ fallthrough;
case 1:
ret = afs_extract_data(call, true);
if (ret < 0)
@@ -475,7 +475,7 @@ static int afs_deliver_yfsvl_get_endpoints(struct afs_call *call)
afs_extract_to_buf(call, size);
call->unmarshall = 2;
- /* Fall through - and extract fsEndpoints[] entries */
+ fallthrough; /* and extract fsEndpoints[] entries */
case 2:
ret = afs_extract_data(call, true);
if (ret < 0)
@@ -526,7 +526,7 @@ static int afs_deliver_yfsvl_get_endpoints(struct afs_call *call)
* extract the type of the next endpoint when we extract the
* data of the current one, but this is the first...
*/
- /* Fall through */
+ fallthrough;
case 3:
ret = afs_extract_data(call, true);
if (ret < 0)
@@ -552,7 +552,7 @@ static int afs_deliver_yfsvl_get_endpoints(struct afs_call *call)
afs_extract_to_buf(call, size);
call->unmarshall = 4;
- /* Fall through - and extract volEndpoints[] entries */
+ fallthrough; /* and extract volEndpoints[] entries */
case 4:
ret = afs_extract_data(call, true);
if (ret < 0)
@@ -587,7 +587,7 @@ static int afs_deliver_yfsvl_get_endpoints(struct afs_call *call)
afs_extract_discard(call, 0);
call->unmarshall = 5;
- /* Fall through - Done */
+ fallthrough; /* Done */
case 5:
ret = afs_extract_data(call, false);
if (ret < 0)
@@ -663,7 +663,7 @@ static int afs_deliver_yfsvl_get_cell_name(struct afs_call *call)
afs_extract_to_tmp(call);
call->unmarshall++;
- /* Fall through - and extract the cell name length */
+ fallthrough; /* and extract the cell name length */
case 1:
ret = afs_extract_data(call, true);
if (ret < 0)
@@ -685,7 +685,7 @@ static int afs_deliver_yfsvl_get_cell_name(struct afs_call *call)
afs_extract_begin(call, cell_name, namesz);
call->unmarshall++;
- /* Fall through - and extract cell name */
+ fallthrough; /* and extract cell name */
case 2:
ret = afs_extract_data(call, true);
if (ret < 0)
@@ -694,7 +694,7 @@ static int afs_deliver_yfsvl_get_cell_name(struct afs_call *call)
afs_extract_discard(call, call->count2);
call->unmarshall++;
- /* Fall through - and extract padding */
+ fallthrough; /* and extract padding */
case 3:
ret = afs_extract_data(call, false);
if (ret < 0)
diff --git a/fs/afs/write.c b/fs/afs/write.c
index a121c247d95a..da12abd6db21 100644
--- a/fs/afs/write.c
+++ b/fs/afs/write.c
@@ -609,7 +609,7 @@ no_more:
default:
pr_notice("kAFS: Unexpected error from FS.StoreData %d\n", ret);
- /* Fall through */
+ fallthrough;
case -EACCES:
case -EPERM:
case -ENOKEY:
@@ -738,11 +738,21 @@ static int afs_writepages_region(struct address_space *mapping,
int afs_writepages(struct address_space *mapping,
struct writeback_control *wbc)
{
+ struct afs_vnode *vnode = AFS_FS_I(mapping->host);
pgoff_t start, end, next;
int ret;
_enter("");
+ /* We have to be careful as we can end up racing with setattr()
+ * truncating the pagecache since the caller doesn't take a lock here
+ * to prevent it.
+ */
+ if (wbc->sync_mode == WB_SYNC_ALL)
+ down_read(&vnode->validate_lock);
+ else if (!down_read_trylock(&vnode->validate_lock))
+ return 0;
+
if (wbc->range_cyclic) {
start = mapping->writeback_index;
end = -1;
@@ -762,6 +772,7 @@ int afs_writepages(struct address_space *mapping,
ret = afs_writepages_region(mapping, wbc, start, end, &next);
}
+ up_read(&vnode->validate_lock);
_leave(" = %d", ret);
return ret;
}
diff --git a/fs/afs/yfsclient.c b/fs/afs/yfsclient.c
index 8c24fdc899e3..3b1239b7e90d 100644
--- a/fs/afs/yfsclient.c
+++ b/fs/afs/yfsclient.c
@@ -373,7 +373,7 @@ static int yfs_deliver_fs_fetch_data64(struct afs_call *call)
req->offset = req->pos & (PAGE_SIZE - 1);
afs_extract_to_tmp64(call);
call->unmarshall++;
- /* Fall through */
+ fallthrough;
/* extract the returned data length */
case 1:
@@ -401,7 +401,7 @@ static int yfs_deliver_fs_fetch_data64(struct afs_call *call)
call->bvec[0].bv_page = req->pages[req->index];
iov_iter_bvec(&call->def_iter, READ, call->bvec, 1, size);
ASSERTCMP(size, <=, PAGE_SIZE);
- /* Fall through */
+ fallthrough;
/* extract the returned data */
case 2:
@@ -428,7 +428,7 @@ static int yfs_deliver_fs_fetch_data64(struct afs_call *call)
/* Discard any excess data the server gave us */
afs_extract_discard(call, req->actual_len - req->len);
call->unmarshall = 3;
- /* Fall through */
+ fallthrough;
case 3:
_debug("extract discard %zu/%llu",
@@ -444,7 +444,7 @@ static int yfs_deliver_fs_fetch_data64(struct afs_call *call)
sizeof(struct yfs_xdr_YFSFetchStatus) +
sizeof(struct yfs_xdr_YFSCallBack) +
sizeof(struct yfs_xdr_YFSVolSync));
- /* Fall through */
+ fallthrough;
/* extract the metadata */
case 4:
@@ -461,7 +461,7 @@ static int yfs_deliver_fs_fetch_data64(struct afs_call *call)
req->file_size = vp->scb.status.size;
call->unmarshall++;
- /* Fall through */
+ fallthrough;
case 5:
break;
@@ -1262,7 +1262,7 @@ static int yfs_deliver_fs_get_volume_status(struct afs_call *call)
case 0:
call->unmarshall++;
afs_extract_to_buf(call, sizeof(struct yfs_xdr_YFSFetchVolumeStatus));
- /* Fall through */
+ fallthrough;
/* extract the returned status record */
case 1:
@@ -1275,7 +1275,7 @@ static int yfs_deliver_fs_get_volume_status(struct afs_call *call)
xdr_decode_YFSFetchVolumeStatus(&bp, &op->volstatus.vs);
call->unmarshall++;
afs_extract_to_tmp(call);
- /* Fall through */
+ fallthrough;
/* extract the volume name length */
case 2:
@@ -1290,7 +1290,7 @@ static int yfs_deliver_fs_get_volume_status(struct afs_call *call)
size = (call->count + 3) & ~3; /* It's padded */
afs_extract_to_buf(call, size);
call->unmarshall++;
- /* Fall through */
+ fallthrough;
/* extract the volume name */
case 3:
@@ -1304,7 +1304,7 @@ static int yfs_deliver_fs_get_volume_status(struct afs_call *call)
_debug("volname '%s'", p);
afs_extract_to_tmp(call);
call->unmarshall++;
- /* Fall through */
+ fallthrough;
/* extract the offline message length */
case 4:
@@ -1319,7 +1319,7 @@ static int yfs_deliver_fs_get_volume_status(struct afs_call *call)
size = (call->count + 3) & ~3; /* It's padded */
afs_extract_to_buf(call, size);
call->unmarshall++;
- /* Fall through */
+ fallthrough;
/* extract the offline message */
case 5:
@@ -1334,7 +1334,7 @@ static int yfs_deliver_fs_get_volume_status(struct afs_call *call)
afs_extract_to_tmp(call);
call->unmarshall++;
- /* Fall through */
+ fallthrough;
/* extract the message of the day length */
case 6:
@@ -1349,7 +1349,7 @@ static int yfs_deliver_fs_get_volume_status(struct afs_call *call)
size = (call->count + 3) & ~3; /* It's padded */
afs_extract_to_buf(call, size);
call->unmarshall++;
- /* Fall through */
+ fallthrough;
/* extract the message of the day */
case 7:
@@ -1363,7 +1363,7 @@ static int yfs_deliver_fs_get_volume_status(struct afs_call *call)
_debug("motd '%s'", p);
call->unmarshall++;
- /* Fall through */
+ fallthrough;
case 8:
break;
@@ -1622,7 +1622,7 @@ static int yfs_deliver_fs_inline_bulk_status(struct afs_call *call)
case 0:
afs_extract_to_tmp(call);
call->unmarshall++;
- /* Fall through */
+ fallthrough;
/* Extract the file status count and array in two steps */
case 1:
@@ -1640,7 +1640,7 @@ static int yfs_deliver_fs_inline_bulk_status(struct afs_call *call)
call->unmarshall++;
more_counts:
afs_extract_to_buf(call, sizeof(struct yfs_xdr_YFSFetchStatus));
- /* Fall through */
+ fallthrough;
case 2:
_debug("extract status array %u", call->count);
@@ -1670,7 +1670,7 @@ static int yfs_deliver_fs_inline_bulk_status(struct afs_call *call)
call->count = 0;
call->unmarshall++;
afs_extract_to_tmp(call);
- /* Fall through */
+ fallthrough;
/* Extract the callback count and array in two steps */
case 3:
@@ -1687,7 +1687,7 @@ static int yfs_deliver_fs_inline_bulk_status(struct afs_call *call)
call->unmarshall++;
more_cbs:
afs_extract_to_buf(call, sizeof(struct yfs_xdr_YFSCallBack));
- /* Fall through */
+ fallthrough;
case 4:
_debug("extract CB array");
@@ -1716,7 +1716,7 @@ static int yfs_deliver_fs_inline_bulk_status(struct afs_call *call)
afs_extract_to_buf(call, sizeof(struct yfs_xdr_YFSVolSync));
call->unmarshall++;
- /* Fall through */
+ fallthrough;
case 5:
ret = afs_extract_data(call, false);
@@ -1727,7 +1727,7 @@ static int yfs_deliver_fs_inline_bulk_status(struct afs_call *call)
xdr_decode_YFSVolSync(&bp, &op->volsync);
call->unmarshall++;
- /* Fall through */
+ fallthrough;
case 6:
break;
@@ -1804,7 +1804,7 @@ static int yfs_deliver_fs_fetch_opaque_acl(struct afs_call *call)
case 0:
afs_extract_to_tmp(call);
call->unmarshall++;
- /* Fall through */
+ fallthrough;
/* Extract the file ACL length */
case 1:
@@ -1826,7 +1826,7 @@ static int yfs_deliver_fs_fetch_opaque_acl(struct afs_call *call)
afs_extract_discard(call, size);
}
call->unmarshall++;
- /* Fall through */
+ fallthrough;
/* Extract the file ACL */
case 2:
@@ -1836,7 +1836,7 @@ static int yfs_deliver_fs_fetch_opaque_acl(struct afs_call *call)
afs_extract_to_tmp(call);
call->unmarshall++;
- /* Fall through */
+ fallthrough;
/* Extract the volume ACL length */
case 3:
@@ -1858,7 +1858,7 @@ static int yfs_deliver_fs_fetch_opaque_acl(struct afs_call *call)
afs_extract_discard(call, size);
}
call->unmarshall++;
- /* Fall through */
+ fallthrough;
/* Extract the volume ACL */
case 4:
@@ -1871,7 +1871,7 @@ static int yfs_deliver_fs_fetch_opaque_acl(struct afs_call *call)
sizeof(struct yfs_xdr_YFSFetchStatus) +
sizeof(struct yfs_xdr_YFSVolSync));
call->unmarshall++;
- /* Fall through */
+ fallthrough;
/* extract the metadata */
case 5:
@@ -1886,7 +1886,7 @@ static int yfs_deliver_fs_fetch_opaque_acl(struct afs_call *call)
xdr_decode_YFSVolSync(&bp, &op->volsync);
call->unmarshall++;
- /* Fall through */
+ fallthrough;
case 6:
break;
diff --git a/fs/aio.c b/fs/aio.c
index 5736bff48e9e..d5ec30385566 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -1511,7 +1511,7 @@ static inline void aio_rw_done(struct kiocb *req, ssize_t ret)
* may be already running. Just fail this IO with EINTR.
*/
ret = -EINTR;
- /*FALLTHRU*/
+ fallthrough;
default:
req->ki_complete(req, ret, 0);
}
diff --git a/fs/autofs/waitq.c b/fs/autofs/waitq.c
index 74c886f7c51c..5ced859dac53 100644
--- a/fs/autofs/waitq.c
+++ b/fs/autofs/waitq.c
@@ -53,7 +53,7 @@ static int autofs_write(struct autofs_sb_info *sbi,
mutex_lock(&sbi->pipe_mutex);
while (bytes) {
- wr = kernel_write(file, data, bytes, &file->f_pos);
+ wr = __kernel_write(file, data, bytes, NULL);
if (wr <= 0)
break;
data += wr;
diff --git a/fs/binfmt_flat.c b/fs/binfmt_flat.c
index f2f9086ebe98..b9c658e0548e 100644
--- a/fs/binfmt_flat.c
+++ b/fs/binfmt_flat.c
@@ -576,7 +576,7 @@ static int load_flat_file(struct linux_binprm *bprm,
goto err;
}
- len = data_len + extra;
+ len = data_len + extra + MAX_SHARED_LIBS * sizeof(unsigned long);
len = PAGE_ALIGN(len);
realdatastart = vm_mmap(NULL, 0, len,
PROT_READ|PROT_WRITE|PROT_EXEC, MAP_PRIVATE, 0);
@@ -590,7 +590,9 @@ static int load_flat_file(struct linux_binprm *bprm,
vm_munmap(textpos, text_len);
goto err;
}
- datapos = ALIGN(realdatastart, FLAT_DATA_ALIGN);
+ datapos = ALIGN(realdatastart +
+ MAX_SHARED_LIBS * sizeof(unsigned long),
+ FLAT_DATA_ALIGN);
pr_debug("Allocated data+bss+stack (%u bytes): %lx\n",
data_len + bss_len + stack_len, datapos);
@@ -620,7 +622,7 @@ static int load_flat_file(struct linux_binprm *bprm,
memp_size = len;
} else {
- len = text_len + data_len + extra;
+ len = text_len + data_len + extra + MAX_SHARED_LIBS * sizeof(u32);
len = PAGE_ALIGN(len);
textpos = vm_mmap(NULL, 0, len,
PROT_READ | PROT_EXEC | PROT_WRITE, MAP_PRIVATE, 0);
@@ -635,7 +637,9 @@ static int load_flat_file(struct linux_binprm *bprm,
}
realdatastart = textpos + ntohl(hdr->data_start);
- datapos = ALIGN(realdatastart, FLAT_DATA_ALIGN);
+ datapos = ALIGN(realdatastart +
+ MAX_SHARED_LIBS * sizeof(u32),
+ FLAT_DATA_ALIGN);
reloc = (__be32 __user *)
(datapos + (ntohl(hdr->reloc_start) - text_len));
@@ -652,9 +656,8 @@ static int load_flat_file(struct linux_binprm *bprm,
(text_len + full_data
- sizeof(struct flat_hdr)),
0);
- if (datapos != realdatastart)
- memmove((void *)datapos, (void *)realdatastart,
- full_data);
+ memmove((void *) datapos, (void *) realdatastart,
+ full_data);
#else
/*
* This is used on MMU systems mainly for testing.
@@ -710,7 +713,8 @@ static int load_flat_file(struct linux_binprm *bprm,
if (IS_ERR_VALUE(result)) {
ret = result;
pr_err("Unable to read code+data+bss, errno %d\n", ret);
- vm_munmap(textpos, text_len + data_len + extra);
+ vm_munmap(textpos, text_len + data_len + extra +
+ MAX_SHARED_LIBS * sizeof(u32));
goto err;
}
}
diff --git a/fs/btrfs/block-group.c b/fs/btrfs/block-group.c
index 613920c17ac1..ea8aaf36647e 100644
--- a/fs/btrfs/block-group.c
+++ b/fs/btrfs/block-group.c
@@ -1798,7 +1798,6 @@ static struct btrfs_block_group *btrfs_create_block_group_cache(
cache->fs_info = fs_info;
cache->full_stripe_len = btrfs_full_stripe_len(fs_info, start);
- set_free_space_tree_thresholds(cache);
cache->discard_index = BTRFS_DISCARD_INDEX_UNUSED;
@@ -1912,6 +1911,8 @@ static int read_one_block_group(struct btrfs_fs_info *info,
if (ret < 0)
goto error;
+ set_free_space_tree_thresholds(cache);
+
if (need_clear) {
/*
* When we mount with old space cache, we need to
@@ -2132,6 +2133,7 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans, u64 bytes_used,
return -ENOMEM;
cache->length = size;
+ set_free_space_tree_thresholds(cache);
cache->used = bytes_used;
cache->flags = type;
cache->last_byte_to_unpin = (u64)-1;
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c
index 70e49d8d4f6c..cd392da69b81 100644
--- a/fs/btrfs/ctree.c
+++ b/fs/btrfs/ctree.c
@@ -68,7 +68,7 @@ const char *btrfs_super_csum_driver(u16 csum_type)
btrfs_csums[csum_type].name;
}
-size_t __const btrfs_get_num_csums(void)
+size_t __attribute_const__ btrfs_get_num_csums(void)
{
return ARRAY_SIZE(btrfs_csums);
}
@@ -1297,6 +1297,8 @@ tree_mod_log_rewind(struct btrfs_fs_info *fs_info, struct btrfs_path *path,
btrfs_tree_read_unlock_blocking(eb);
free_extent_buffer(eb);
+ btrfs_set_buffer_lockdep_class(btrfs_header_owner(eb_rewin),
+ eb_rewin, btrfs_header_level(eb_rewin));
btrfs_tree_read_lock(eb_rewin);
__tree_mod_log_rewind(fs_info, eb_rewin, time_seq, tm);
WARN_ON(btrfs_header_nritems(eb_rewin) >
@@ -1370,7 +1372,6 @@ get_old_root(struct btrfs_root *root, u64 time_seq)
if (!eb)
return NULL;
- btrfs_tree_read_lock(eb);
if (old_root) {
btrfs_set_header_bytenr(eb, eb->start);
btrfs_set_header_backref_rev(eb, BTRFS_MIXED_BACKREF_REV);
@@ -1378,6 +1379,9 @@ get_old_root(struct btrfs_root *root, u64 time_seq)
btrfs_set_header_level(eb, old_root->level);
btrfs_set_header_generation(eb, old_generation);
}
+ btrfs_set_buffer_lockdep_class(btrfs_header_owner(eb), eb,
+ btrfs_header_level(eb));
+ btrfs_tree_read_lock(eb);
if (tm)
__tree_mod_log_rewind(fs_info, eb, time_seq, tm);
else
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 9c7e466f27a9..9a72896bed2e 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -2262,7 +2262,7 @@ BTRFS_SETGET_STACK_FUNCS(super_uuid_tree_generation, struct btrfs_super_block,
int btrfs_super_csum_size(const struct btrfs_super_block *s);
const char *btrfs_super_csum_name(u16 csum_type);
const char *btrfs_super_csum_driver(u16 csum_type);
-size_t __const btrfs_get_num_csums(void);
+size_t __attribute_const__ btrfs_get_num_csums(void);
/*
@@ -2518,7 +2518,7 @@ int btrfs_pin_extent_for_log_replay(struct btrfs_trans_handle *trans,
u64 bytenr, u64 num_bytes);
int btrfs_exclude_logged_extents(struct extent_buffer *eb);
int btrfs_cross_ref_exist(struct btrfs_root *root,
- u64 objectid, u64 offset, u64 bytenr);
+ u64 objectid, u64 offset, u64 bytenr, bool strict);
struct extent_buffer *btrfs_alloc_tree_block(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
u64 parent, u64 root_objectid,
@@ -2934,7 +2934,7 @@ struct extent_map *btrfs_get_extent_fiemap(struct btrfs_inode *inode,
u64 start, u64 len);
noinline int can_nocow_extent(struct inode *inode, u64 offset, u64 *len,
u64 *orig_start, u64 *orig_block_len,
- u64 *ram_bytes);
+ u64 *ram_bytes, bool strict);
void __btrfs_del_delalloc_inode(struct btrfs_root *root,
struct btrfs_inode *inode);
diff --git a/fs/btrfs/dev-replace.c b/fs/btrfs/dev-replace.c
index db93909b25e0..e4a1c6afe35d 100644
--- a/fs/btrfs/dev-replace.c
+++ b/fs/btrfs/dev-replace.c
@@ -599,6 +599,37 @@ static void btrfs_rm_dev_replace_unblocked(struct btrfs_fs_info *fs_info)
wake_up(&fs_info->dev_replace.replace_wait);
}
+/*
+ * When finishing the device replace, before swapping the source device with the
+ * target device we must update the chunk allocation state in the target device,
+ * as it is empty because replace works by directly copying the chunks and not
+ * through the normal chunk allocation path.
+ */
+static int btrfs_set_target_alloc_state(struct btrfs_device *srcdev,
+ struct btrfs_device *tgtdev)
+{
+ struct extent_state *cached_state = NULL;
+ u64 start = 0;
+ u64 found_start;
+ u64 found_end;
+ int ret = 0;
+
+ lockdep_assert_held(&srcdev->fs_info->chunk_mutex);
+
+ while (!find_first_extent_bit(&srcdev->alloc_state, start,
+ &found_start, &found_end,
+ CHUNK_ALLOCATED, &cached_state)) {
+ ret = set_extent_bits(&tgtdev->alloc_state, found_start,
+ found_end, CHUNK_ALLOCATED);
+ if (ret)
+ break;
+ start = found_end + 1;
+ }
+
+ free_extent_state(cached_state);
+ return ret;
+}
+
static int btrfs_dev_replace_finishing(struct btrfs_fs_info *fs_info,
int scrub_ret)
{
@@ -673,8 +704,14 @@ static int btrfs_dev_replace_finishing(struct btrfs_fs_info *fs_info,
dev_replace->time_stopped = ktime_get_real_seconds();
dev_replace->item_needs_writeback = 1;
- /* replace old device with new one in mapping tree */
+ /*
+ * Update allocation state in the new device and replace the old device
+ * with the new one in the mapping tree.
+ */
if (!scrub_ret) {
+ scrub_ret = btrfs_set_target_alloc_state(src_device, tgt_device);
+ if (scrub_ret)
+ goto error;
btrfs_dev_replace_update_device_in_mapping_tree(fs_info,
src_device,
tgt_device);
@@ -685,6 +722,7 @@ static int btrfs_dev_replace_finishing(struct btrfs_fs_info *fs_info,
btrfs_dev_name(src_device),
src_device->devid,
rcu_str_deref(tgt_device->name), scrub_ret);
+error:
up_write(&dev_replace->rwsem);
mutex_unlock(&fs_info->chunk_mutex);
mutex_unlock(&fs_info->fs_devices->device_list_mutex);
@@ -745,7 +783,9 @@ static int btrfs_dev_replace_finishing(struct btrfs_fs_info *fs_info,
/* replace the sysfs entry */
btrfs_sysfs_remove_devices_dir(fs_info->fs_devices, src_device);
btrfs_sysfs_update_devid(tgt_device);
- btrfs_rm_dev_replace_free_srcdev(src_device);
+ if (test_bit(BTRFS_DEV_STATE_WRITEABLE, &src_device->dev_state))
+ btrfs_scratch_superblocks(fs_info, src_device->bdev,
+ src_device->name->str);
/* write back the superblocks */
trans = btrfs_start_transaction(root, 0);
@@ -754,6 +794,8 @@ static int btrfs_dev_replace_finishing(struct btrfs_fs_info *fs_info,
mutex_unlock(&dev_replace->lock_finishing_cancel_unmount);
+ btrfs_rm_dev_replace_free_srcdev(src_device);
+
return 0;
}
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 9ae25f632157..9f72b092bc22 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -636,16 +636,15 @@ static int btree_readpage_end_io_hook(struct btrfs_io_bio *io_bio,
csum_tree_block(eb, result);
if (memcmp_extent_buffer(eb, result, 0, csum_size)) {
- u32 val;
- u32 found = 0;
-
- memcpy(&found, result, csum_size);
+ u8 val[BTRFS_CSUM_SIZE] = { 0 };
read_extent_buffer(eb, &val, 0, csum_size);
btrfs_warn_rl(fs_info,
- "%s checksum verify failed on %llu wanted %x found %x level %d",
+ "%s checksum verify failed on %llu wanted " CSUM_FMT " found " CSUM_FMT " level %d",
fs_info->sb->s_id, eb->start,
- val, found, btrfs_header_level(eb));
+ CSUM_FMT_VALUE(csum_size, val),
+ CSUM_FMT_VALUE(csum_size, result),
+ btrfs_header_level(eb));
ret = -EUCLEAN;
goto err;
}
@@ -3418,6 +3417,8 @@ fail_block_groups:
btrfs_put_block_group_cache(fs_info);
fail_tree_roots:
+ if (fs_info->data_reloc_root)
+ btrfs_drop_and_free_fs_root(fs_info, fs_info->data_reloc_root);
free_root_pointers(fs_info, true);
invalidate_inode_pages2(fs_info->btree_inode->i_mapping);
@@ -4551,6 +4552,7 @@ static void btrfs_cleanup_bg_io(struct btrfs_block_group *cache)
cache->io_ctl.inode = NULL;
iput(inode);
}
+ ASSERT(cache->io_ctl.pages == NULL);
btrfs_put_block_group(cache);
}
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index de6fe176fdfb..780b9c9a98fe 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -400,12 +400,11 @@ int btrfs_get_extent_inline_ref_type(const struct extent_buffer *eb,
if (type == BTRFS_SHARED_BLOCK_REF_KEY) {
ASSERT(eb->fs_info);
/*
- * Every shared one has parent tree
- * block, which must be aligned to
- * nodesize.
+ * Every shared one has parent tree block,
+ * which must be aligned to sector size.
*/
if (offset &&
- IS_ALIGNED(offset, eb->fs_info->nodesize))
+ IS_ALIGNED(offset, eb->fs_info->sectorsize))
return type;
}
} else if (is_data == BTRFS_REF_TYPE_DATA) {
@@ -414,12 +413,11 @@ int btrfs_get_extent_inline_ref_type(const struct extent_buffer *eb,
if (type == BTRFS_SHARED_DATA_REF_KEY) {
ASSERT(eb->fs_info);
/*
- * Every shared one has parent tree
- * block, which must be aligned to
- * nodesize.
+ * Every shared one has parent tree block,
+ * which must be aligned to sector size.
*/
if (offset &&
- IS_ALIGNED(offset, eb->fs_info->nodesize))
+ IS_ALIGNED(offset, eb->fs_info->sectorsize))
return type;
}
} else {
@@ -429,8 +427,9 @@ int btrfs_get_extent_inline_ref_type(const struct extent_buffer *eb,
}
btrfs_print_leaf((struct extent_buffer *)eb);
- btrfs_err(eb->fs_info, "eb %llu invalid extent inline ref type %d",
- eb->start, type);
+ btrfs_err(eb->fs_info,
+ "eb %llu iref 0x%lx invalid extent inline ref type %d",
+ eb->start, (unsigned long)iref, type);
WARN_ON(1);
return BTRFS_REF_TYPE_INVALID;
@@ -2306,7 +2305,8 @@ static noinline int check_delayed_ref(struct btrfs_root *root,
static noinline int check_committed_ref(struct btrfs_root *root,
struct btrfs_path *path,
- u64 objectid, u64 offset, u64 bytenr)
+ u64 objectid, u64 offset, u64 bytenr,
+ bool strict)
{
struct btrfs_fs_info *fs_info = root->fs_info;
struct btrfs_root *extent_root = fs_info->extent_root;
@@ -2348,9 +2348,13 @@ static noinline int check_committed_ref(struct btrfs_root *root,
btrfs_extent_inline_ref_size(BTRFS_EXTENT_DATA_REF_KEY))
goto out;
- /* If extent created before last snapshot => it's definitely shared */
- if (btrfs_extent_generation(leaf, ei) <=
- btrfs_root_last_snapshot(&root->root_item))
+ /*
+ * If extent created before last snapshot => it's shared unless the
+ * snapshot has been deleted. Use the heuristic if strict is false.
+ */
+ if (!strict &&
+ (btrfs_extent_generation(leaf, ei) <=
+ btrfs_root_last_snapshot(&root->root_item)))
goto out;
iref = (struct btrfs_extent_inline_ref *)(ei + 1);
@@ -2375,7 +2379,7 @@ out:
}
int btrfs_cross_ref_exist(struct btrfs_root *root, u64 objectid, u64 offset,
- u64 bytenr)
+ u64 bytenr, bool strict)
{
struct btrfs_path *path;
int ret;
@@ -2386,7 +2390,7 @@ int btrfs_cross_ref_exist(struct btrfs_root *root, u64 objectid, u64 offset,
do {
ret = check_committed_ref(root, path, objectid,
- offset, bytenr);
+ offset, bytenr, strict);
if (ret && ret != -ENOENT)
goto out;
@@ -4522,7 +4526,7 @@ btrfs_init_new_buffer(struct btrfs_trans_handle *trans, struct btrfs_root *root,
return ERR_PTR(-EUCLEAN);
}
- btrfs_set_buffer_lockdep_class(root->root_key.objectid, buf, level);
+ btrfs_set_buffer_lockdep_class(owner, buf, level);
btrfs_tree_lock(buf);
btrfs_clean_tree_block(buf);
clear_bit(EXTENT_BUFFER_STALE, &buf->bflags);
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 6def411b2eba..a940edb1e64f 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -5655,9 +5655,9 @@ void read_extent_buffer(const struct extent_buffer *eb, void *dstv,
}
}
-int read_extent_buffer_to_user(const struct extent_buffer *eb,
- void __user *dstv,
- unsigned long start, unsigned long len)
+int read_extent_buffer_to_user_nofault(const struct extent_buffer *eb,
+ void __user *dstv,
+ unsigned long start, unsigned long len)
{
size_t cur;
size_t offset;
@@ -5677,7 +5677,7 @@ int read_extent_buffer_to_user(const struct extent_buffer *eb,
cur = min(len, (PAGE_SIZE - offset));
kaddr = page_address(page);
- if (copy_to_user(dst, kaddr + offset, cur)) {
+ if (copy_to_user_nofault(dst, kaddr + offset, cur)) {
ret = -EFAULT;
break;
}
diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h
index 00a88f2eb5ab..30794ae58498 100644
--- a/fs/btrfs/extent_io.h
+++ b/fs/btrfs/extent_io.h
@@ -241,9 +241,9 @@ int memcmp_extent_buffer(const struct extent_buffer *eb, const void *ptrv,
void read_extent_buffer(const struct extent_buffer *eb, void *dst,
unsigned long start,
unsigned long len);
-int read_extent_buffer_to_user(const struct extent_buffer *eb,
- void __user *dst, unsigned long start,
- unsigned long len);
+int read_extent_buffer_to_user_nofault(const struct extent_buffer *eb,
+ void __user *dst, unsigned long start,
+ unsigned long len);
void write_extent_buffer_fsid(const struct extent_buffer *eb, const void *src);
void write_extent_buffer_chunk_tree_uuid(const struct extent_buffer *eb,
const void *src);
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index bb824c7cb7c7..4507c3d09399 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -1571,7 +1571,7 @@ static int check_can_nocow(struct btrfs_inode *inode, loff_t pos,
}
ret = can_nocow_extent(&inode->vfs_inode, lockstart, &num_bytes,
- NULL, NULL, NULL);
+ NULL, NULL, NULL, false);
if (ret <= 0) {
ret = 0;
if (!nowait)
diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c
index ef0fd7afb0b1..dc82fd0c80cb 100644
--- a/fs/btrfs/free-space-cache.c
+++ b/fs/btrfs/free-space-cache.c
@@ -1186,7 +1186,6 @@ static int __btrfs_wait_cache_io(struct btrfs_root *root,
ret = update_cache_item(trans, root, inode, path, offset,
io_ctl->entries, io_ctl->bitmaps);
out:
- io_ctl_free(io_ctl);
if (ret) {
invalidate_inode_pages2(inode->i_mapping);
BTRFS_I(inode)->generation = 0;
@@ -1347,6 +1346,7 @@ static int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode,
* them out later
*/
io_ctl_drop_pages(io_ctl);
+ io_ctl_free(io_ctl);
unlock_extent_cached(&BTRFS_I(inode)->io_tree, 0,
i_size_read(inode) - 1, &cached_state);
diff --git a/fs/btrfs/free-space-tree.c b/fs/btrfs/free-space-tree.c
index 8b1f5c8897b7..6b9faf3b0e96 100644
--- a/fs/btrfs/free-space-tree.c
+++ b/fs/btrfs/free-space-tree.c
@@ -22,6 +22,10 @@ void set_free_space_tree_thresholds(struct btrfs_block_group *cache)
size_t bitmap_size;
u64 num_bitmaps, total_bitmap_size;
+ if (WARN_ON(cache->length == 0))
+ btrfs_warn(cache->fs_info, "block group %llu length is zero",
+ cache->start);
+
/*
* We convert to bitmaps when the disk space required for using extents
* exceeds that required for using bitmaps.
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 51fcd82d41c0..9570458aa847 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -1610,7 +1610,7 @@ next_slot:
goto out_check;
ret = btrfs_cross_ref_exist(root, ino,
found_key.offset -
- extent_offset, disk_bytenr);
+ extent_offset, disk_bytenr, false);
if (ret) {
/*
* ret could be -EIO if the above fails to read
@@ -2161,11 +2161,8 @@ static blk_status_t btrfs_submit_bio_start(void *private_data, struct bio *bio,
u64 bio_offset)
{
struct inode *inode = private_data;
- blk_status_t ret = 0;
- ret = btrfs_csum_one_bio(BTRFS_I(inode), bio, 0, 0);
- BUG_ON(ret); /* -ENOMEM */
- return 0;
+ return btrfs_csum_one_bio(BTRFS_I(inode), bio, 0, 0);
}
/*
@@ -6953,6 +6950,8 @@ static struct extent_map *btrfs_new_extent_direct(struct btrfs_inode *inode,
* @orig_start: (optional) Return the original file offset of the file extent
* @orig_len: (optional) Return the original on-disk length of the file extent
* @ram_bytes: (optional) Return the ram_bytes of the file extent
+ * @strict: if true, omit optimizations that might force us into unnecessary
+ * cow. e.g., don't trust generation number.
*
* This function will flush ordered extents in the range to ensure proper
* nocow checks for (nowait == false) case.
@@ -6967,7 +6966,7 @@ static struct extent_map *btrfs_new_extent_direct(struct btrfs_inode *inode,
*/
noinline int can_nocow_extent(struct inode *inode, u64 offset, u64 *len,
u64 *orig_start, u64 *orig_block_len,
- u64 *ram_bytes)
+ u64 *ram_bytes, bool strict)
{
struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
struct btrfs_path *path;
@@ -7045,8 +7044,9 @@ noinline int can_nocow_extent(struct inode *inode, u64 offset, u64 *len,
* Do the same check as in btrfs_cross_ref_exist but without the
* unnecessary search.
*/
- if (btrfs_file_extent_generation(leaf, fi) <=
- btrfs_root_last_snapshot(&root->root_item))
+ if (!strict &&
+ (btrfs_file_extent_generation(leaf, fi) <=
+ btrfs_root_last_snapshot(&root->root_item)))
goto out;
backref_offset = btrfs_file_extent_offset(leaf, fi);
@@ -7082,7 +7082,8 @@ noinline int can_nocow_extent(struct inode *inode, u64 offset, u64 *len,
*/
ret = btrfs_cross_ref_exist(root, btrfs_ino(BTRFS_I(inode)),
- key.offset - backref_offset, disk_bytenr);
+ key.offset - backref_offset, disk_bytenr,
+ strict);
if (ret) {
ret = 0;
goto out;
@@ -7303,7 +7304,7 @@ static int btrfs_get_blocks_direct_write(struct extent_map **map,
block_start = em->block_start + (start - em->start);
if (can_nocow_extent(inode, start, &len, &orig_start,
- &orig_block_len, &ram_bytes) == 1 &&
+ &orig_block_len, &ram_bytes, false) == 1 &&
btrfs_inc_nocow_writers(fs_info, block_start)) {
struct extent_map *em2;
@@ -7619,10 +7620,8 @@ static blk_status_t btrfs_submit_bio_start_direct_io(void *private_data,
struct bio *bio, u64 offset)
{
struct inode *inode = private_data;
- blk_status_t ret;
- ret = btrfs_csum_one_bio(BTRFS_I(inode), bio, offset, 1);
- BUG_ON(ret); /* -ENOMEM */
- return 0;
+
+ return btrfs_csum_one_bio(BTRFS_I(inode), bio, offset, 1);
}
static void btrfs_end_dio_bio(struct bio *bio)
@@ -10136,7 +10135,7 @@ static int btrfs_swap_activate(struct swap_info_struct *sis, struct file *file,
free_extent_map(em);
em = NULL;
- ret = can_nocow_extent(inode, start, &len, NULL, NULL, NULL);
+ ret = can_nocow_extent(inode, start, &len, NULL, NULL, NULL, true);
if (ret < 0) {
goto out;
} else if (ret) {
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index bd3511c5ca81..2d9109d9e98f 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -2086,9 +2086,14 @@ static noinline int copy_to_sk(struct btrfs_path *path,
sh.len = item_len;
sh.transid = found_transid;
- /* copy search result header */
- if (copy_to_user(ubuf + *sk_offset, &sh, sizeof(sh))) {
- ret = -EFAULT;
+ /*
+ * Copy search result header. If we fault then loop again so we
+ * can fault in the pages and -EFAULT there if there's a
+ * problem. Otherwise we'll fault and then copy the buffer in
+ * properly this next time through
+ */
+ if (copy_to_user_nofault(ubuf + *sk_offset, &sh, sizeof(sh))) {
+ ret = 0;
goto out;
}
@@ -2096,10 +2101,14 @@ static noinline int copy_to_sk(struct btrfs_path *path,
if (item_len) {
char __user *up = ubuf + *sk_offset;
- /* copy the item */
- if (read_extent_buffer_to_user(leaf, up,
- item_off, item_len)) {
- ret = -EFAULT;
+ /*
+ * Copy the item, same behavior as above, but reset the
+ * * sk_offset so we copy the full thing again.
+ */
+ if (read_extent_buffer_to_user_nofault(leaf, up,
+ item_off, item_len)) {
+ ret = 0;
+ *sk_offset -= sizeof(sh);
goto out;
}
@@ -2184,6 +2193,11 @@ static noinline int search_ioctl(struct inode *inode,
key.offset = sk->min_offset;
while (1) {
+ ret = fault_in_pages_writeable(ubuf + sk_offset,
+ *buf_size - sk_offset);
+ if (ret)
+ break;
+
ret = btrfs_search_forward(root, &key, path, sk->min_transid);
if (ret != 0) {
if (ret > 0)
diff --git a/fs/btrfs/print-tree.c b/fs/btrfs/print-tree.c
index 61f44e78e3c9..80567c11ec12 100644
--- a/fs/btrfs/print-tree.c
+++ b/fs/btrfs/print-tree.c
@@ -95,9 +95,10 @@ static void print_extent_item(struct extent_buffer *eb, int slot, int type)
* offset is supposed to be a tree block which
* must be aligned to nodesize.
*/
- if (!IS_ALIGNED(offset, eb->fs_info->nodesize))
- pr_info("\t\t\t(parent %llu is NOT ALIGNED to nodesize %llu)\n",
- offset, (unsigned long long)eb->fs_info->nodesize);
+ if (!IS_ALIGNED(offset, eb->fs_info->sectorsize))
+ pr_info(
+ "\t\t\t(parent %llu not aligned to sectorsize %u)\n",
+ offset, eb->fs_info->sectorsize);
break;
case BTRFS_EXTENT_DATA_REF_KEY:
dref = (struct btrfs_extent_data_ref *)(&iref->offset);
@@ -112,8 +113,9 @@ static void print_extent_item(struct extent_buffer *eb, int slot, int type)
* must be aligned to nodesize.
*/
if (!IS_ALIGNED(offset, eb->fs_info->nodesize))
- pr_info("\t\t\t(parent %llu is NOT ALIGNED to nodesize %llu)\n",
- offset, (unsigned long long)eb->fs_info->nodesize);
+ pr_info(
+ "\t\t\t(parent %llu not aligned to sectorsize %u)\n",
+ offset, eb->fs_info->sectorsize);
break;
default:
pr_cont("(extent %llu has INVALID ref type %d)\n",
diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c
index 5a6cb9db512e..354ab9985a34 100644
--- a/fs/btrfs/scrub.c
+++ b/fs/btrfs/scrub.c
@@ -3716,50 +3716,84 @@ static noinline_for_stack int scrub_supers(struct scrub_ctx *sctx,
return 0;
}
+static void scrub_workers_put(struct btrfs_fs_info *fs_info)
+{
+ if (refcount_dec_and_mutex_lock(&fs_info->scrub_workers_refcnt,
+ &fs_info->scrub_lock)) {
+ struct btrfs_workqueue *scrub_workers = NULL;
+ struct btrfs_workqueue *scrub_wr_comp = NULL;
+ struct btrfs_workqueue *scrub_parity = NULL;
+
+ scrub_workers = fs_info->scrub_workers;
+ scrub_wr_comp = fs_info->scrub_wr_completion_workers;
+ scrub_parity = fs_info->scrub_parity_workers;
+
+ fs_info->scrub_workers = NULL;
+ fs_info->scrub_wr_completion_workers = NULL;
+ fs_info->scrub_parity_workers = NULL;
+ mutex_unlock(&fs_info->scrub_lock);
+
+ btrfs_destroy_workqueue(scrub_workers);
+ btrfs_destroy_workqueue(scrub_wr_comp);
+ btrfs_destroy_workqueue(scrub_parity);
+ }
+}
+
/*
* get a reference count on fs_info->scrub_workers. start worker if necessary
*/
static noinline_for_stack int scrub_workers_get(struct btrfs_fs_info *fs_info,
int is_dev_replace)
{
+ struct btrfs_workqueue *scrub_workers = NULL;
+ struct btrfs_workqueue *scrub_wr_comp = NULL;
+ struct btrfs_workqueue *scrub_parity = NULL;
unsigned int flags = WQ_FREEZABLE | WQ_UNBOUND;
int max_active = fs_info->thread_pool_size;
+ int ret = -ENOMEM;
- lockdep_assert_held(&fs_info->scrub_lock);
+ if (refcount_inc_not_zero(&fs_info->scrub_workers_refcnt))
+ return 0;
- if (refcount_read(&fs_info->scrub_workers_refcnt) == 0) {
- ASSERT(fs_info->scrub_workers == NULL);
- fs_info->scrub_workers = btrfs_alloc_workqueue(fs_info, "scrub",
- flags, is_dev_replace ? 1 : max_active, 4);
- if (!fs_info->scrub_workers)
- goto fail_scrub_workers;
-
- ASSERT(fs_info->scrub_wr_completion_workers == NULL);
- fs_info->scrub_wr_completion_workers =
- btrfs_alloc_workqueue(fs_info, "scrubwrc", flags,
- max_active, 2);
- if (!fs_info->scrub_wr_completion_workers)
- goto fail_scrub_wr_completion_workers;
+ scrub_workers = btrfs_alloc_workqueue(fs_info, "scrub", flags,
+ is_dev_replace ? 1 : max_active, 4);
+ if (!scrub_workers)
+ goto fail_scrub_workers;
- ASSERT(fs_info->scrub_parity_workers == NULL);
- fs_info->scrub_parity_workers =
- btrfs_alloc_workqueue(fs_info, "scrubparity", flags,
+ scrub_wr_comp = btrfs_alloc_workqueue(fs_info, "scrubwrc", flags,
max_active, 2);
- if (!fs_info->scrub_parity_workers)
- goto fail_scrub_parity_workers;
+ if (!scrub_wr_comp)
+ goto fail_scrub_wr_completion_workers;
+ scrub_parity = btrfs_alloc_workqueue(fs_info, "scrubparity", flags,
+ max_active, 2);
+ if (!scrub_parity)
+ goto fail_scrub_parity_workers;
+
+ mutex_lock(&fs_info->scrub_lock);
+ if (refcount_read(&fs_info->scrub_workers_refcnt) == 0) {
+ ASSERT(fs_info->scrub_workers == NULL &&
+ fs_info->scrub_wr_completion_workers == NULL &&
+ fs_info->scrub_parity_workers == NULL);
+ fs_info->scrub_workers = scrub_workers;
+ fs_info->scrub_wr_completion_workers = scrub_wr_comp;
+ fs_info->scrub_parity_workers = scrub_parity;
refcount_set(&fs_info->scrub_workers_refcnt, 1);
- } else {
- refcount_inc(&fs_info->scrub_workers_refcnt);
+ mutex_unlock(&fs_info->scrub_lock);
+ return 0;
}
- return 0;
+ /* Other thread raced in and created the workers for us */
+ refcount_inc(&fs_info->scrub_workers_refcnt);
+ mutex_unlock(&fs_info->scrub_lock);
+ ret = 0;
+ btrfs_destroy_workqueue(scrub_parity);
fail_scrub_parity_workers:
- btrfs_destroy_workqueue(fs_info->scrub_wr_completion_workers);
+ btrfs_destroy_workqueue(scrub_wr_comp);
fail_scrub_wr_completion_workers:
- btrfs_destroy_workqueue(fs_info->scrub_workers);
+ btrfs_destroy_workqueue(scrub_workers);
fail_scrub_workers:
- return -ENOMEM;
+ return ret;
}
int btrfs_scrub_dev(struct btrfs_fs_info *fs_info, u64 devid, u64 start,
@@ -3770,9 +3804,6 @@ int btrfs_scrub_dev(struct btrfs_fs_info *fs_info, u64 devid, u64 start,
int ret;
struct btrfs_device *dev;
unsigned int nofs_flag;
- struct btrfs_workqueue *scrub_workers = NULL;
- struct btrfs_workqueue *scrub_wr_comp = NULL;
- struct btrfs_workqueue *scrub_parity = NULL;
if (btrfs_fs_closing(fs_info))
return -EAGAIN;
@@ -3819,13 +3850,17 @@ int btrfs_scrub_dev(struct btrfs_fs_info *fs_info, u64 devid, u64 start,
if (IS_ERR(sctx))
return PTR_ERR(sctx);
+ ret = scrub_workers_get(fs_info, is_dev_replace);
+ if (ret)
+ goto out_free_ctx;
+
mutex_lock(&fs_info->fs_devices->device_list_mutex);
dev = btrfs_find_device(fs_info->fs_devices, devid, NULL, NULL, true);
if (!dev || (test_bit(BTRFS_DEV_STATE_MISSING, &dev->dev_state) &&
!is_dev_replace)) {
mutex_unlock(&fs_info->fs_devices->device_list_mutex);
ret = -ENODEV;
- goto out_free_ctx;
+ goto out;
}
if (!is_dev_replace && !readonly &&
@@ -3834,7 +3869,7 @@ int btrfs_scrub_dev(struct btrfs_fs_info *fs_info, u64 devid, u64 start,
btrfs_err_in_rcu(fs_info, "scrub: device %s is not writable",
rcu_str_deref(dev->name));
ret = -EROFS;
- goto out_free_ctx;
+ goto out;
}
mutex_lock(&fs_info->scrub_lock);
@@ -3843,7 +3878,7 @@ int btrfs_scrub_dev(struct btrfs_fs_info *fs_info, u64 devid, u64 start,
mutex_unlock(&fs_info->scrub_lock);
mutex_unlock(&fs_info->fs_devices->device_list_mutex);
ret = -EIO;
- goto out_free_ctx;
+ goto out;
}
down_read(&fs_info->dev_replace.rwsem);
@@ -3854,17 +3889,10 @@ int btrfs_scrub_dev(struct btrfs_fs_info *fs_info, u64 devid, u64 start,
mutex_unlock(&fs_info->scrub_lock);
mutex_unlock(&fs_info->fs_devices->device_list_mutex);
ret = -EINPROGRESS;
- goto out_free_ctx;
+ goto out;
}
up_read(&fs_info->dev_replace.rwsem);
- ret = scrub_workers_get(fs_info, is_dev_replace);
- if (ret) {
- mutex_unlock(&fs_info->scrub_lock);
- mutex_unlock(&fs_info->fs_devices->device_list_mutex);
- goto out_free_ctx;
- }
-
sctx->readonly = readonly;
dev->scrub_ctx = sctx;
mutex_unlock(&fs_info->fs_devices->device_list_mutex);
@@ -3917,24 +3945,14 @@ int btrfs_scrub_dev(struct btrfs_fs_info *fs_info, u64 devid, u64 start,
mutex_lock(&fs_info->scrub_lock);
dev->scrub_ctx = NULL;
- if (refcount_dec_and_test(&fs_info->scrub_workers_refcnt)) {
- scrub_workers = fs_info->scrub_workers;
- scrub_wr_comp = fs_info->scrub_wr_completion_workers;
- scrub_parity = fs_info->scrub_parity_workers;
-
- fs_info->scrub_workers = NULL;
- fs_info->scrub_wr_completion_workers = NULL;
- fs_info->scrub_parity_workers = NULL;
- }
mutex_unlock(&fs_info->scrub_lock);
- btrfs_destroy_workqueue(scrub_workers);
- btrfs_destroy_workqueue(scrub_wr_comp);
- btrfs_destroy_workqueue(scrub_parity);
+ scrub_workers_put(fs_info);
scrub_put_ctx(sctx);
return ret;
-
+out:
+ scrub_workers_put(fs_info);
out_free_ctx:
scrub_free_ctx(sctx);
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index e529ddb35b87..25967ecaaf0a 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -625,6 +625,7 @@ int btrfs_parse_options(struct btrfs_fs_info *info, char *options,
} else if (strncmp(args[0].from, "lzo", 3) == 0) {
compress_type = "lzo";
info->compress_type = BTRFS_COMPRESS_LZO;
+ info->compress_level = 0;
btrfs_set_opt(info->mount_opt, COMPRESS);
btrfs_clear_opt(info->mount_opt, NODATACOW);
btrfs_clear_opt(info->mount_opt, NODATASUM);
diff --git a/fs/btrfs/sysfs.c b/fs/btrfs/sysfs.c
index c8df2edafd85..5be30066563c 100644
--- a/fs/btrfs/sysfs.c
+++ b/fs/btrfs/sysfs.c
@@ -1170,10 +1170,12 @@ int btrfs_sysfs_remove_devices_dir(struct btrfs_fs_devices *fs_devices,
disk_kobj->name);
}
- kobject_del(&one_device->devid_kobj);
- kobject_put(&one_device->devid_kobj);
+ if (one_device->devid_kobj.state_initialized) {
+ kobject_del(&one_device->devid_kobj);
+ kobject_put(&one_device->devid_kobj);
- wait_for_completion(&one_device->kobj_unregister);
+ wait_for_completion(&one_device->kobj_unregister);
+ }
return 0;
}
@@ -1186,10 +1188,12 @@ int btrfs_sysfs_remove_devices_dir(struct btrfs_fs_devices *fs_devices,
sysfs_remove_link(fs_devices->devices_kobj,
disk_kobj->name);
}
- kobject_del(&one_device->devid_kobj);
- kobject_put(&one_device->devid_kobj);
+ if (one_device->devid_kobj.state_initialized) {
+ kobject_del(&one_device->devid_kobj);
+ kobject_put(&one_device->devid_kobj);
- wait_for_completion(&one_device->kobj_unregister);
+ wait_for_completion(&one_device->kobj_unregister);
+ }
}
return 0;
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index 20c6ac1a5de7..d2fc292ac61b 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -1636,6 +1636,7 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
pending->snap = btrfs_get_new_fs_root(fs_info, objectid, pending->anon_dev);
if (IS_ERR(pending->snap)) {
ret = PTR_ERR(pending->snap);
+ pending->snap = NULL;
btrfs_abort_transaction(trans, ret);
goto fail;
}
diff --git a/fs/btrfs/tree-checker.c b/fs/btrfs/tree-checker.c
index 517b44300a05..7b1fee630f97 100644
--- a/fs/btrfs/tree-checker.c
+++ b/fs/btrfs/tree-checker.c
@@ -984,7 +984,7 @@ static int check_inode_item(struct extent_buffer *leaf,
/* Note for ROOT_TREE_DIR_ITEM, mkfs could set its transid 0 */
if (btrfs_inode_transid(leaf, iitem) > super_gen + 1) {
inode_item_err(leaf, slot,
- "invalid inode generation: has %llu expect [0, %llu]",
+ "invalid inode transid: has %llu expect [0, %llu]",
btrfs_inode_transid(leaf, iitem), super_gen + 1);
return -EUCLEAN;
}
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index 696dd861cc3c..39da9db35278 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -3449,11 +3449,13 @@ fail:
btrfs_free_path(path);
out_unlock:
mutex_unlock(&dir->log_mutex);
- if (ret == -ENOSPC) {
+ if (err == -ENOSPC) {
btrfs_set_log_full_commit(trans);
- ret = 0;
- } else if (ret < 0)
- btrfs_abort_transaction(trans, ret);
+ err = 0;
+ } else if (err < 0 && err != -ENOENT) {
+ /* ENOENT can be returned if the entry hasn't been fsynced yet */
+ btrfs_abort_transaction(trans, err);
+ }
btrfs_end_log_trans(root);
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index ee96c5869f57..1997a7d67f22 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -4,6 +4,7 @@
*/
#include <linux/sched.h>
+#include <linux/sched/mm.h>
#include <linux/bio.h>
#include <linux/slab.h>
#include <linux/blkdev.h>
@@ -1998,9 +1999,9 @@ static u64 btrfs_num_devices(struct btrfs_fs_info *fs_info)
return num_devices;
}
-static void btrfs_scratch_superblocks(struct btrfs_fs_info *fs_info,
- struct block_device *bdev,
- const char *device_path)
+void btrfs_scratch_superblocks(struct btrfs_fs_info *fs_info,
+ struct block_device *bdev,
+ const char *device_path)
{
struct btrfs_super_block *disk_super;
int copy_num;
@@ -2223,11 +2224,7 @@ void btrfs_rm_dev_replace_free_srcdev(struct btrfs_device *srcdev)
struct btrfs_fs_info *fs_info = srcdev->fs_info;
struct btrfs_fs_devices *fs_devices = srcdev->fs_devices;
- if (test_bit(BTRFS_DEV_STATE_WRITEABLE, &srcdev->dev_state)) {
- /* zero out the old super if it is writable */
- btrfs_scratch_superblocks(fs_info, srcdev->bdev,
- srcdev->name->str);
- }
+ mutex_lock(&uuid_mutex);
btrfs_close_bdev(srcdev);
synchronize_rcu();
@@ -2257,6 +2254,7 @@ void btrfs_rm_dev_replace_free_srcdev(struct btrfs_device *srcdev)
close_fs_devices(fs_devices);
free_fs_devices(fs_devices);
}
+ mutex_unlock(&uuid_mutex);
}
void btrfs_destroy_dev_replace_tgtdev(struct btrfs_device *tgtdev)
@@ -4462,6 +4460,7 @@ int btrfs_uuid_scan_kthread(void *data)
goto skip;
}
update_tree:
+ btrfs_release_path(path);
if (!btrfs_is_empty_uuid(root_item.uuid)) {
ret = btrfs_uuid_tree_add(trans, root_item.uuid,
BTRFS_UUID_KEY_SUBVOL,
@@ -4486,6 +4485,7 @@ update_tree:
}
skip:
+ btrfs_release_path(path);
if (trans) {
ret = btrfs_end_transaction(trans);
trans = NULL;
@@ -4493,7 +4493,6 @@ skip:
break;
}
- btrfs_release_path(path);
if (key.offset < (u64)-1) {
key.offset++;
} else if (key.type < BTRFS_ROOT_ITEM_KEY) {
@@ -6483,8 +6482,17 @@ static struct btrfs_device *add_missing_dev(struct btrfs_fs_devices *fs_devices,
u64 devid, u8 *dev_uuid)
{
struct btrfs_device *device;
+ unsigned int nofs_flag;
+ /*
+ * We call this under the chunk_mutex, so we want to use NOFS for this
+ * allocation, however we don't want to change btrfs_alloc_device() to
+ * always do NOFS because we use it in a lot of other GFP_KERNEL safe
+ * places.
+ */
+ nofs_flag = memalloc_nofs_save();
device = btrfs_alloc_device(NULL, &devid, dev_uuid);
+ memalloc_nofs_restore(nofs_flag);
if (IS_ERR(device))
return device;
diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h
index 5eea93916fbf..302c9234f7d0 100644
--- a/fs/btrfs/volumes.h
+++ b/fs/btrfs/volumes.h
@@ -573,6 +573,9 @@ void btrfs_set_fs_info_ptr(struct btrfs_fs_info *fs_info);
void btrfs_reset_fs_info_ptr(struct btrfs_fs_info *fs_info);
bool btrfs_check_rw_degradable(struct btrfs_fs_info *fs_info,
struct btrfs_device *failing_dev);
+void btrfs_scratch_superblocks(struct btrfs_fs_info *fs_info,
+ struct block_device *bdev,
+ const char *device_path);
int btrfs_bg_type_to_factor(u64 flags);
const char *btrfs_bg_type_to_raid_name(u64 flags);
diff --git a/fs/buffer.c b/fs/buffer.c
index 061dd202979d..50bbc99e3d96 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -1958,7 +1958,7 @@ iomap_to_bh(struct inode *inode, sector_t block, struct buffer_head *bh,
*/
set_buffer_new(bh);
set_buffer_unwritten(bh);
- /* FALLTHRU */
+ fallthrough;
case IOMAP_MAPPED:
if ((iomap->flags & IOMAP_F_NEW) ||
offset >= i_size_read(inode))
@@ -3157,6 +3157,15 @@ int __sync_dirty_buffer(struct buffer_head *bh, int op_flags)
WARN_ON(atomic_read(&bh->b_count) < 1);
lock_buffer(bh);
if (test_clear_buffer_dirty(bh)) {
+ /*
+ * The bh should be mapped, but it might not be if the
+ * device was hot-removed. Not much we can do but fail the I/O.
+ */
+ if (!buffer_mapped(bh)) {
+ unlock_buffer(bh);
+ return -EIO;
+ }
+
get_bh(bh);
bh->b_end_io = end_buffer_write_sync;
ret = submit_bh(REQ_OP_WRITE, op_flags, bh);
diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
index 55ccccf77cea..034b3f4fdd3a 100644
--- a/fs/ceph/caps.c
+++ b/fs/ceph/caps.c
@@ -887,8 +887,8 @@ int __ceph_caps_issued_mask(struct ceph_inode_info *ci, int mask, int touch)
int have = ci->i_snap_caps;
if ((have & mask) == mask) {
- dout("__ceph_caps_issued_mask ino 0x%lx snap issued %s"
- " (mask %s)\n", ci->vfs_inode.i_ino,
+ dout("__ceph_caps_issued_mask ino 0x%llx snap issued %s"
+ " (mask %s)\n", ceph_ino(&ci->vfs_inode),
ceph_cap_string(have),
ceph_cap_string(mask));
return 1;
@@ -899,8 +899,8 @@ int __ceph_caps_issued_mask(struct ceph_inode_info *ci, int mask, int touch)
if (!__cap_is_valid(cap))
continue;
if ((cap->issued & mask) == mask) {
- dout("__ceph_caps_issued_mask ino 0x%lx cap %p issued %s"
- " (mask %s)\n", ci->vfs_inode.i_ino, cap,
+ dout("__ceph_caps_issued_mask ino 0x%llx cap %p issued %s"
+ " (mask %s)\n", ceph_ino(&ci->vfs_inode), cap,
ceph_cap_string(cap->issued),
ceph_cap_string(mask));
if (touch)
@@ -911,8 +911,8 @@ int __ceph_caps_issued_mask(struct ceph_inode_info *ci, int mask, int touch)
/* does a combination of caps satisfy mask? */
have |= cap->issued;
if ((have & mask) == mask) {
- dout("__ceph_caps_issued_mask ino 0x%lx combo issued %s"
- " (mask %s)\n", ci->vfs_inode.i_ino,
+ dout("__ceph_caps_issued_mask ino 0x%llx combo issued %s"
+ " (mask %s)\n", ceph_ino(&ci->vfs_inode),
ceph_cap_string(cap->issued),
ceph_cap_string(mask));
if (touch) {
@@ -2872,7 +2872,7 @@ int ceph_get_caps(struct file *filp, int need, int want,
struct cap_wait cw;
DEFINE_WAIT_FUNC(wait, woken_wake_function);
- cw.ino = inode->i_ino;
+ cw.ino = ceph_ino(inode);
cw.tgid = current->tgid;
cw.need = need;
cw.want = want;
diff --git a/fs/ceph/debugfs.c b/fs/ceph/debugfs.c
index 97539b497e4c..3e3fcda9b276 100644
--- a/fs/ceph/debugfs.c
+++ b/fs/ceph/debugfs.c
@@ -202,7 +202,7 @@ static int caps_show_cb(struct inode *inode, struct ceph_cap *cap, void *p)
{
struct seq_file *s = p;
- seq_printf(s, "0x%-17lx%-17s%-17s\n", inode->i_ino,
+ seq_printf(s, "0x%-17llx%-17s%-17s\n", ceph_ino(inode),
ceph_cap_string(cap->issued),
ceph_cap_string(cap->implemented));
return 0;
@@ -247,7 +247,7 @@ static int caps_show(struct seq_file *s, void *p)
spin_lock(&mdsc->caps_list_lock);
list_for_each_entry(cw, &mdsc->cap_wait_list, list) {
- seq_printf(s, "%-13d0x%-17lx%-17s%-17s\n", cw->tgid, cw->ino,
+ seq_printf(s, "%-13d0x%-17llx%-17s%-17s\n", cw->tgid, cw->ino,
ceph_cap_string(cw->need),
ceph_cap_string(cw->want));
}
diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c
index 060bdcc5ce32..d72e4a12bb69 100644
--- a/fs/ceph/dir.c
+++ b/fs/ceph/dir.c
@@ -259,9 +259,7 @@ static int __dcache_readdir(struct file *file, struct dir_context *ctx,
dentry, dentry, d_inode(dentry));
ctx->pos = di->offset;
if (!dir_emit(ctx, dentry->d_name.name,
- dentry->d_name.len,
- ceph_translate_ino(dentry->d_sb,
- d_inode(dentry)->i_ino),
+ dentry->d_name.len, ceph_present_inode(d_inode(dentry)),
d_inode(dentry)->i_mode >> 12)) {
dput(dentry);
err = 0;
@@ -324,18 +322,21 @@ static int ceph_readdir(struct file *file, struct dir_context *ctx)
/* always start with . and .. */
if (ctx->pos == 0) {
dout("readdir off 0 -> '.'\n");
- if (!dir_emit(ctx, ".", 1,
- ceph_translate_ino(inode->i_sb, inode->i_ino),
+ if (!dir_emit(ctx, ".", 1, ceph_present_inode(inode),
inode->i_mode >> 12))
return 0;
ctx->pos = 1;
}
if (ctx->pos == 1) {
- ino_t ino = parent_ino(file->f_path.dentry);
+ u64 ino;
+ struct dentry *dentry = file->f_path.dentry;
+
+ spin_lock(&dentry->d_lock);
+ ino = ceph_present_inode(dentry->d_parent->d_inode);
+ spin_unlock(&dentry->d_lock);
+
dout("readdir off 1 -> '..'\n");
- if (!dir_emit(ctx, "..", 2,
- ceph_translate_ino(inode->i_sb, ino),
- inode->i_mode >> 12))
+ if (!dir_emit(ctx, "..", 2, ino, inode->i_mode >> 12))
return 0;
ctx->pos = 2;
}
@@ -507,9 +508,6 @@ more:
}
for (; i < rinfo->dir_nr; i++) {
struct ceph_mds_reply_dir_entry *rde = rinfo->dir_entries + i;
- struct ceph_vino vino;
- ino_t ino;
- u32 ftype;
BUG_ON(rde->offset < ctx->pos);
@@ -519,13 +517,10 @@ more:
rde->name_len, rde->name, &rde->inode.in);
BUG_ON(!rde->inode.in);
- ftype = le32_to_cpu(rde->inode.in->mode) >> 12;
- vino.ino = le64_to_cpu(rde->inode.in->ino);
- vino.snap = le64_to_cpu(rde->inode.in->snapid);
- ino = ceph_vino_to_ino(vino);
if (!dir_emit(ctx, rde->name, rde->name_len,
- ceph_translate_ino(inode->i_sb, ino), ftype)) {
+ ceph_present_ino(inode->i_sb, le64_to_cpu(rde->inode.in->ino)),
+ le32_to_cpu(rde->inode.in->mode) >> 12)) {
dout("filldir stopping us...\n");
return 0;
}
@@ -1161,7 +1156,7 @@ retry:
if (try_async && op == CEPH_MDS_OP_UNLINK &&
(req->r_dir_caps = get_caps_for_async_unlink(dir, dentry))) {
- dout("async unlink on %lu/%.*s caps=%s", dir->i_ino,
+ dout("async unlink on %llu/%.*s caps=%s", ceph_ino(dir),
dentry->d_name.len, dentry->d_name.name,
ceph_cap_string(req->r_dir_caps));
set_bit(CEPH_MDS_R_ASYNC, &req->r_req_flags);
@@ -1745,7 +1740,7 @@ static int ceph_d_revalidate(struct dentry *dentry, unsigned int flags)
case -ENOENT:
if (d_really_is_negative(dentry))
valid = 1;
- /* Fallthrough */
+ fallthrough;
default:
break;
}
diff --git a/fs/ceph/file.c b/fs/ceph/file.c
index d51c3f2fdca0..3f4c993dfc6f 100644
--- a/fs/ceph/file.c
+++ b/fs/ceph/file.c
@@ -252,7 +252,7 @@ static int ceph_init_file(struct inode *inode, struct file *file, int fmode)
case S_IFREG:
ceph_fscache_register_inode_cookie(inode);
ceph_fscache_file_set_cookie(inode, file);
- /* fall through */
+ fallthrough;
case S_IFDIR:
ret = ceph_init_file_info(inode, file, fmode,
S_ISDIR(inode->i_mode));
@@ -630,8 +630,8 @@ static int ceph_finish_async_create(struct inode *dir, struct dentry *dentry,
} else {
struct dentry *dn;
- dout("%s d_adding new inode 0x%llx to 0x%lx/%s\n", __func__,
- vino.ino, dir->i_ino, dentry->d_name.name);
+ dout("%s d_adding new inode 0x%llx to 0x%llx/%s\n", __func__,
+ vino.ino, ceph_ino(dir), dentry->d_name.name);
ceph_dir_clear_ordered(dir);
ceph_init_inode_acls(inode, as_ctx);
if (inode->i_state & I_NEW) {
@@ -2507,6 +2507,7 @@ const struct file_operations ceph_file_fops = {
.mmap = ceph_mmap,
.fsync = ceph_fsync,
.lock = ceph_lock,
+ .setlease = simple_nosetlease,
.flock = ceph_flock,
.splice_read = generic_file_splice_read,
.splice_write = iter_file_splice_write,
diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c
index 357c937699d5..d163fa96cb40 100644
--- a/fs/ceph/inode.c
+++ b/fs/ceph/inode.c
@@ -41,8 +41,10 @@ static void ceph_inode_work(struct work_struct *work);
*/
static int ceph_set_ino_cb(struct inode *inode, void *data)
{
- ceph_inode(inode)->i_vino = *(struct ceph_vino *)data;
- inode->i_ino = ceph_vino_to_ino(*(struct ceph_vino *)data);
+ struct ceph_inode_info *ci = ceph_inode(inode);
+
+ ci->i_vino = *(struct ceph_vino *)data;
+ inode->i_ino = ceph_vino_to_ino_t(ci->i_vino);
inode_set_iversion_raw(inode, 0);
return 0;
}
@@ -50,17 +52,14 @@ static int ceph_set_ino_cb(struct inode *inode, void *data)
struct inode *ceph_get_inode(struct super_block *sb, struct ceph_vino vino)
{
struct inode *inode;
- ino_t t = ceph_vino_to_ino(vino);
- inode = iget5_locked(sb, t, ceph_ino_compare, ceph_set_ino_cb, &vino);
+ inode = iget5_locked(sb, (unsigned long)vino.ino, ceph_ino_compare,
+ ceph_set_ino_cb, &vino);
if (!inode)
return ERR_PTR(-ENOMEM);
- if (inode->i_state & I_NEW)
- dout("get_inode created new inode %p %llx.%llx ino %llx\n",
- inode, ceph_vinop(inode), (u64)inode->i_ino);
- dout("get_inode on %lu=%llx.%llx got %p\n", inode->i_ino, vino.ino,
- vino.snap, inode);
+ dout("get_inode on %llu=%llx.%llx got %p new %d\n", ceph_present_inode(inode),
+ ceph_vinop(inode), inode, !!(inode->i_state & I_NEW));
return inode;
}
@@ -2378,7 +2377,7 @@ int ceph_getattr(const struct path *path, struct kstat *stat,
}
generic_fillattr(inode, stat);
- stat->ino = ceph_translate_ino(inode->i_sb, inode->i_ino);
+ stat->ino = ceph_present_inode(inode);
/*
* btime on newly-allocated inodes is 0, so if this is still set to
diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h
index bc9e95937d7c..658800605bfb 100644
--- a/fs/ceph/mds_client.h
+++ b/fs/ceph/mds_client.h
@@ -372,7 +372,7 @@ struct ceph_quotarealm_inode {
struct cap_wait {
struct list_head list;
- unsigned long ino;
+ u64 ino;
pid_t tgid;
int need;
int want;
diff --git a/fs/ceph/quota.c b/fs/ceph/quota.c
index 198ddde5c1e6..cc2c4d40b022 100644
--- a/fs/ceph/quota.c
+++ b/fs/ceph/quota.c
@@ -23,12 +23,12 @@ static inline bool ceph_has_realms_with_quotas(struct inode *inode)
{
struct ceph_mds_client *mdsc = ceph_inode_to_client(inode)->mdsc;
struct super_block *sb = mdsc->fsc->sb;
+ struct inode *root = d_inode(sb->s_root);
if (atomic64_read(&mdsc->quotarealms_count) > 0)
return true;
/* if root is the real CephFS root, we don't have quota realms */
- if (sb->s_root->d_inode &&
- (sb->s_root->d_inode->i_ino == CEPH_INO_ROOT))
+ if (root && ceph_ino(root) == CEPH_INO_ROOT)
return false;
/* otherwise, we can't know for sure */
return true;
diff --git a/fs/ceph/super.h b/fs/ceph/super.h
index 4c3c964b1c54..a3995ebe0623 100644
--- a/fs/ceph/super.h
+++ b/fs/ceph/super.h
@@ -457,15 +457,7 @@ ceph_vino(const struct inode *inode)
return ceph_inode(inode)->i_vino;
}
-/*
- * ino_t is <64 bits on many architectures, blech.
- *
- * i_ino (kernel inode) st_ino (userspace)
- * i386 32 32
- * x86_64+ino32 64 32
- * x86_64 64 64
- */
-static inline u32 ceph_ino_to_ino32(__u64 vino)
+static inline u32 ceph_ino_to_ino32(u64 vino)
{
u32 ino = vino & 0xffffffff;
ino ^= vino >> 32;
@@ -475,34 +467,17 @@ static inline u32 ceph_ino_to_ino32(__u64 vino)
}
/*
- * kernel i_ino value
+ * Inode numbers in cephfs are 64 bits, but inode->i_ino is 32-bits on
+ * some arches. We generally do not use this value inside the ceph driver, but
+ * we do want to set it to something, so that generic vfs code has an
+ * appropriate value for tracepoints and the like.
*/
-static inline ino_t ceph_vino_to_ino(struct ceph_vino vino)
+static inline ino_t ceph_vino_to_ino_t(struct ceph_vino vino)
{
-#if BITS_PER_LONG == 32
- return ceph_ino_to_ino32(vino.ino);
-#else
+ if (sizeof(ino_t) == sizeof(u32))
+ return ceph_ino_to_ino32(vino.ino);
return (ino_t)vino.ino;
-#endif
-}
-
-/*
- * user-visible ino (stat, filldir)
- */
-#if BITS_PER_LONG == 32
-static inline ino_t ceph_translate_ino(struct super_block *sb, ino_t ino)
-{
- return ino;
-}
-#else
-static inline ino_t ceph_translate_ino(struct super_block *sb, ino_t ino)
-{
- if (ceph_test_mount_opt(ceph_sb_to_client(sb), INO32))
- ino = ceph_ino_to_ino32(ino);
- return ino;
}
-#endif
-
/* for printf-style formatting */
#define ceph_vinop(i) ceph_inode(i)->i_vino.ino, ceph_inode(i)->i_vino.snap
@@ -511,11 +486,34 @@ static inline u64 ceph_ino(struct inode *inode)
{
return ceph_inode(inode)->i_vino.ino;
}
+
static inline u64 ceph_snap(struct inode *inode)
{
return ceph_inode(inode)->i_vino.snap;
}
+/**
+ * ceph_present_ino - format an inode number for presentation to userland
+ * @sb: superblock where the inode lives
+ * @ino: inode number to (possibly) convert
+ *
+ * If the user mounted with the ino32 option, then the 64-bit value needs
+ * to be converted to something that can fit inside 32 bits. Note that
+ * internal kernel code never uses this value, so this is entirely for
+ * userland consumption.
+ */
+static inline u64 ceph_present_ino(struct super_block *sb, u64 ino)
+{
+ if (unlikely(ceph_test_mount_opt(ceph_sb_to_client(sb), INO32)))
+ return ceph_ino_to_ino32(ino);
+ return ino;
+}
+
+static inline u64 ceph_present_inode(struct inode *inode)
+{
+ return ceph_present_ino(inode->i_sb, ceph_ino(inode));
+}
+
static inline int ceph_ino_compare(struct inode *inode, void *data)
{
struct ceph_vino *pvino = (struct ceph_vino *)data;
@@ -524,11 +522,16 @@ static inline int ceph_ino_compare(struct inode *inode, void *data)
ci->i_vino.snap == pvino->snap;
}
+
static inline struct inode *ceph_find_inode(struct super_block *sb,
struct ceph_vino vino)
{
- ino_t t = ceph_vino_to_ino(vino);
- return ilookup5(sb, t, ceph_ino_compare, &vino);
+ /*
+ * NB: The hashval will be run through the fs/inode.c hash function
+ * anyway, so there is no need to squash the inode number down to
+ * 32-bits first. Just use low-order bits on arches with 32-bit long.
+ */
+ return ilookup5(sb, (unsigned long)vino.ino, ceph_ino_compare, &vino);
}
diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h
index b296964b8afa..b565d83ba89e 100644
--- a/fs/cifs/cifsglob.h
+++ b/fs/cifs/cifsglob.h
@@ -2031,4 +2031,19 @@ static inline bool is_smb1_server(struct TCP_Server_Info *server)
return strcmp(server->vals->version_string, SMB1_VERSION_STRING) == 0;
}
+static inline bool is_tcon_dfs(struct cifs_tcon *tcon)
+{
+ /*
+ * For SMB1, see MS-CIFS 2.4.55 SMB_COM_TREE_CONNECT_ANDX (0x75) and MS-CIFS 3.3.4.4 DFS
+ * Subsystem Notifies That a Share Is a DFS Share.
+ *
+ * For SMB2+, see MS-SMB2 2.2.10 SMB2 TREE_CONNECT Response and MS-SMB2 3.3.4.14 Server
+ * Application Updates a Share.
+ */
+ if (!tcon || !tcon->ses || !tcon->ses->server)
+ return false;
+ return is_smb1_server(tcon->ses->server) ? tcon->Flags & SMB_SHARE_IS_IN_DFS :
+ tcon->share_flags & (SHI1005_FLAGS_DFS | SHI1005_FLAGS_DFS_ROOT);
+}
+
#endif /* _CIFS_GLOB_H */
diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c
index 0e763d2dcf16..0496934feecb 100644
--- a/fs/cifs/cifssmb.c
+++ b/fs/cifs/cifssmb.c
@@ -581,7 +581,7 @@ should_set_ext_sec_flag(enum securityEnum sectype)
if (global_secflags &
(CIFSSEC_MAY_KRB5 | CIFSSEC_MAY_NTLMSSP))
return true;
- /* Fallthrough */
+ fallthrough;
default:
return false;
}
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index a275ee399dce..a5731dd6e656 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -1378,25 +1378,25 @@ static int cifs_parse_security_flavors(char *value,
return 1;
case Opt_sec_krb5i:
vol->sign = true;
- /* Fallthrough */
+ fallthrough;
case Opt_sec_krb5:
vol->sectype = Kerberos;
break;
case Opt_sec_ntlmsspi:
vol->sign = true;
- /* Fallthrough */
+ fallthrough;
case Opt_sec_ntlmssp:
vol->sectype = RawNTLMSSP;
break;
case Opt_sec_ntlmi:
vol->sign = true;
- /* Fallthrough */
+ fallthrough;
case Opt_ntlm:
vol->sectype = NTLM;
break;
case Opt_sec_ntlmv2i:
vol->sign = true;
- /* Fallthrough */
+ fallthrough;
case Opt_sec_ntlmv2:
vol->sectype = NTLMv2;
break;
@@ -2187,7 +2187,7 @@ cifs_parse_mount_options(const char *mountdata, const char *devname,
vol->password = NULL;
break;
}
- /* Fallthrough - to Opt_pass below.*/
+ fallthrough; /* to Opt_pass below */
case Opt_pass:
/* Obtain the value string */
value = strchr(data, '=');
@@ -4909,7 +4909,7 @@ int cifs_mount(struct cifs_sb_info *cifs_sb, struct smb_vol *vol)
if (!tcon)
continue;
/* Make sure that requests go through new root servers */
- if (tcon->share_flags & (SHI1005_FLAGS_DFS | SHI1005_FLAGS_DFS_ROOT)) {
+ if (is_tcon_dfs(tcon)) {
put_root_ses(root_ses);
set_root_ses(cifs_sb, ses, &root_ses);
}
diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c
index 3989d08396ac..1f75b25e559a 100644
--- a/fs/cifs/inode.c
+++ b/fs/cifs/inode.c
@@ -1017,6 +1017,8 @@ handle_mnt_opt:
if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MODE_FROM_SID) {
rc = cifs_acl_to_fattr(cifs_sb, &fattr, *inode, true,
full_path, fid);
+ if (rc == -EREMOTE)
+ rc = 0;
if (rc) {
cifs_dbg(FYI, "%s: Get mode from SID failed. rc=%d\n",
__func__, rc);
@@ -1025,6 +1027,8 @@ handle_mnt_opt:
} else if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_CIFS_ACL) {
rc = cifs_acl_to_fattr(cifs_sb, &fattr, *inode, false,
full_path, fid);
+ if (rc == -EREMOTE)
+ rc = 0;
if (rc) {
cifs_dbg(FYI, "%s: Getting ACL failed with error: %d\n",
__func__, rc);
diff --git a/fs/cifs/sess.c b/fs/cifs/sess.c
index 69cd5856621b..de564368a887 100644
--- a/fs/cifs/sess.c
+++ b/fs/cifs/sess.c
@@ -798,7 +798,7 @@ cifs_select_sectype(struct TCP_Server_Info *server, enum securityEnum requested)
if ((server->sec_kerberos || server->sec_mskerberos) &&
(global_secflags & CIFSSEC_MAY_KRB5))
return Kerberos;
- /* Fallthrough */
+ fallthrough;
default:
return Unspecified;
}
@@ -815,7 +815,7 @@ cifs_select_sectype(struct TCP_Server_Info *server, enum securityEnum requested)
default:
break;
}
- /* Fallthrough - to attempt LANMAN authentication next */
+ fallthrough; /* to attempt LANMAN authentication next */
case CIFS_NEGFLAVOR_LANMAN:
switch (requested) {
case LANMAN:
@@ -823,7 +823,7 @@ cifs_select_sectype(struct TCP_Server_Info *server, enum securityEnum requested)
case Unspecified:
if (global_secflags & CIFSSEC_MAY_LANMAN)
return LANMAN;
- /* Fallthrough */
+ fallthrough;
default:
return Unspecified;
}
diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c
index 32f90dc82c84..d44df8f95bcd 100644
--- a/fs/cifs/smb2ops.c
+++ b/fs/cifs/smb2ops.c
@@ -1208,7 +1208,7 @@ smb2_set_ea(const unsigned int xid, struct cifs_tcon *tcon,
rqst[1].rq_iov = si_iov;
rqst[1].rq_nvec = 1;
- len = sizeof(ea) + ea_name_len + ea_value_len + 1;
+ len = sizeof(*ea) + ea_name_len + ea_value_len + 1;
ea = kzalloc(len, GFP_KERNEL);
if (ea == NULL) {
rc = -ENOMEM;
diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c
index 667d70aa335f..96c172d94fba 100644
--- a/fs/cifs/smb2pdu.c
+++ b/fs/cifs/smb2pdu.c
@@ -1101,7 +1101,7 @@ smb2_select_sectype(struct TCP_Server_Info *server, enum securityEnum requested)
if ((server->sec_kerberos || server->sec_mskerberos) &&
(global_secflags & CIFSSEC_MAY_KRB5))
return Kerberos;
- /* Fallthrough */
+ fallthrough;
default:
return Unspecified;
}
diff --git a/fs/configfs/dir.c b/fs/configfs/dir.c
index cb733652ecca..ca2273727225 100644
--- a/fs/configfs/dir.c
+++ b/fs/configfs/dir.c
@@ -1688,11 +1688,11 @@ static loff_t configfs_dir_lseek(struct file *file, loff_t offset, int whence)
switch (whence) {
case 1:
offset += file->f_pos;
- /* fall through */
+ fallthrough;
case 0:
if (offset >= 0)
break;
- /* fall through */
+ fallthrough;
default:
return -EINVAL;
}
diff --git a/fs/dax.c b/fs/dax.c
index 95341af1a966..994ab66a9907 100644
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -1367,7 +1367,7 @@ static vm_fault_t dax_iomap_pte_fault(struct vm_fault *vmf, pfn_t *pfnp,
ret = dax_load_hole(&xas, mapping, &entry, vmf);
goto finish_iomap;
}
- /*FALLTHRU*/
+ fallthrough;
default:
WARN_ON_ONCE(1);
error = -EIO;
diff --git a/fs/debugfs/file.c b/fs/debugfs/file.c
index b167d2d02148..a768a09430c3 100644
--- a/fs/debugfs/file.c
+++ b/fs/debugfs/file.c
@@ -177,7 +177,7 @@ static int open_proxy_open(struct inode *inode, struct file *filp)
goto out;
if (!fops_get(real_fops)) {
-#ifdef MODULE
+#ifdef CONFIG_MODULES
if (real_fops->owner &&
real_fops->owner->state == MODULE_STATE_GOING)
goto out;
@@ -312,7 +312,7 @@ static int full_proxy_open(struct inode *inode, struct file *filp)
goto out;
if (!fops_get(real_fops)) {
-#ifdef MODULE
+#ifdef CONFIG_MODULES
if (real_fops->owner &&
real_fops->owner->state == MODULE_STATE_GOING)
goto out;
diff --git a/fs/dlm/lock.c b/fs/dlm/lock.c
index 18d81599522f..002123efc6b0 100644
--- a/fs/dlm/lock.c
+++ b/fs/dlm/lock.c
@@ -5817,7 +5817,7 @@ int dlm_user_request(struct dlm_ls *ls, struct dlm_user_args *ua,
break;
case -EAGAIN:
error = 0;
- /* fall through */
+ fallthrough;
default:
__put_lkb(ls, lkb);
goto out;
diff --git a/fs/erofs/zmap.c b/fs/erofs/zmap.c
index 7d40d78ea864..ae325541884e 100644
--- a/fs/erofs/zmap.c
+++ b/fs/erofs/zmap.c
@@ -359,7 +359,7 @@ static int z_erofs_extent_lookback(struct z_erofs_maprecorder *m,
return z_erofs_extent_lookback(m, m->delta[0]);
case Z_EROFS_VLE_CLUSTER_TYPE_PLAIN:
map->m_flags &= ~EROFS_MAP_ZIPPED;
- /* fallthrough */
+ fallthrough;
case Z_EROFS_VLE_CLUSTER_TYPE_HEAD:
map->m_la = (lcn << lclusterbits) | m->clusterofs;
break;
@@ -416,7 +416,7 @@ int z_erofs_map_blocks_iter(struct inode *inode,
case Z_EROFS_VLE_CLUSTER_TYPE_PLAIN:
if (endoff >= m.clusterofs)
map->m_flags &= ~EROFS_MAP_ZIPPED;
- /* fallthrough */
+ fallthrough;
case Z_EROFS_VLE_CLUSTER_TYPE_HEAD:
if (endoff >= m.clusterofs) {
map->m_la = (m.lcn << lclusterbits) | m.clusterofs;
@@ -433,7 +433,7 @@ int z_erofs_map_blocks_iter(struct inode *inode,
end = (m.lcn << lclusterbits) | m.clusterofs;
map->m_flags |= EROFS_MAP_FULL_MAPPED;
m.delta[0] = 1;
- /* fallthrough */
+ fallthrough;
case Z_EROFS_VLE_CLUSTER_TYPE_NONHEAD:
/* get the correspoinding first chunk */
err = z_erofs_extent_lookback(&m, m.delta[0]);
diff --git a/fs/eventpoll.c b/fs/eventpoll.c
index 12eebcdea9c8..4df61129566d 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -218,8 +218,7 @@ struct eventpoll {
struct file *file;
/* used to optimize loop detection check */
- struct list_head visited_list_link;
- int visited;
+ u64 gen;
#ifdef CONFIG_NET_RX_BUSY_POLL
/* used to track busy poll napi_id */
@@ -274,6 +273,8 @@ static long max_user_watches __read_mostly;
*/
static DEFINE_MUTEX(epmutex);
+static u64 loop_check_gen = 0;
+
/* Used to check for epoll file descriptor inclusion loops */
static struct nested_calls poll_loop_ncalls;
@@ -283,9 +284,6 @@ static struct kmem_cache *epi_cache __read_mostly;
/* Slab cache used to allocate "struct eppoll_entry" */
static struct kmem_cache *pwq_cache __read_mostly;
-/* Visited nodes during ep_loop_check(), so we can unset them when we finish */
-static LIST_HEAD(visited_list);
-
/*
* List of files with newly added links, where we may need to limit the number
* of emanating paths. Protected by the epmutex.
@@ -1450,7 +1448,7 @@ static int reverse_path_check(void)
static int ep_create_wakeup_source(struct epitem *epi)
{
- const char *name;
+ struct name_snapshot n;
struct wakeup_source *ws;
if (!epi->ep->ws) {
@@ -1459,8 +1457,9 @@ static int ep_create_wakeup_source(struct epitem *epi)
return -ENOMEM;
}
- name = epi->ffd.file->f_path.dentry->d_name.name;
- ws = wakeup_source_register(NULL, name);
+ take_dentry_name_snapshot(&n, epi->ffd.file->f_path.dentry);
+ ws = wakeup_source_register(NULL, n.name.name);
+ release_dentry_name_snapshot(&n);
if (!ws)
return -ENOMEM;
@@ -1522,6 +1521,22 @@ static int ep_insert(struct eventpoll *ep, const struct epoll_event *event,
RCU_INIT_POINTER(epi->ws, NULL);
}
+ /* Add the current item to the list of active epoll hook for this file */
+ spin_lock(&tfile->f_lock);
+ list_add_tail_rcu(&epi->fllink, &tfile->f_ep_links);
+ spin_unlock(&tfile->f_lock);
+
+ /*
+ * Add the current item to the RB tree. All RB tree operations are
+ * protected by "mtx", and ep_insert() is called with "mtx" held.
+ */
+ ep_rbtree_insert(ep, epi);
+
+ /* now check if we've created too many backpaths */
+ error = -EINVAL;
+ if (full_check && reverse_path_check())
+ goto error_remove_epi;
+
/* Initialize the poll table using the queue callback */
epq.epi = epi;
init_poll_funcptr(&epq.pt, ep_ptable_queue_proc);
@@ -1544,22 +1559,6 @@ static int ep_insert(struct eventpoll *ep, const struct epoll_event *event,
if (epi->nwait < 0)
goto error_unregister;
- /* Add the current item to the list of active epoll hook for this file */
- spin_lock(&tfile->f_lock);
- list_add_tail_rcu(&epi->fllink, &tfile->f_ep_links);
- spin_unlock(&tfile->f_lock);
-
- /*
- * Add the current item to the RB tree. All RB tree operations are
- * protected by "mtx", and ep_insert() is called with "mtx" held.
- */
- ep_rbtree_insert(ep, epi);
-
- /* now check if we've created too many backpaths */
- error = -EINVAL;
- if (full_check && reverse_path_check())
- goto error_remove_epi;
-
/* We have to drop the new item inside our item list to keep track of it */
write_lock_irq(&ep->lock);
@@ -1588,6 +1587,8 @@ static int ep_insert(struct eventpoll *ep, const struct epoll_event *event,
return 0;
+error_unregister:
+ ep_unregister_pollwait(ep, epi);
error_remove_epi:
spin_lock(&tfile->f_lock);
list_del_rcu(&epi->fllink);
@@ -1595,9 +1596,6 @@ error_remove_epi:
rb_erase_cached(&epi->rbn, &ep->rbr);
-error_unregister:
- ep_unregister_pollwait(ep, epi);
-
/*
* We need to do this because an event could have been arrived on some
* allocated wait queue. Note that we don't care about the ep->ovflist
@@ -1972,13 +1970,12 @@ static int ep_loop_check_proc(void *priv, void *cookie, int call_nests)
struct epitem *epi;
mutex_lock_nested(&ep->mtx, call_nests + 1);
- ep->visited = 1;
- list_add(&ep->visited_list_link, &visited_list);
+ ep->gen = loop_check_gen;
for (rbp = rb_first_cached(&ep->rbr); rbp; rbp = rb_next(rbp)) {
epi = rb_entry(rbp, struct epitem, rbn);
if (unlikely(is_file_epoll(epi->ffd.file))) {
ep_tovisit = epi->ffd.file->private_data;
- if (ep_tovisit->visited)
+ if (ep_tovisit->gen == loop_check_gen)
continue;
error = ep_call_nested(&poll_loop_ncalls,
ep_loop_check_proc, epi->ffd.file,
@@ -1994,9 +1991,11 @@ static int ep_loop_check_proc(void *priv, void *cookie, int call_nests)
* not already there, and calling reverse_path_check()
* during ep_insert().
*/
- if (list_empty(&epi->ffd.file->f_tfile_llink))
- list_add(&epi->ffd.file->f_tfile_llink,
- &tfile_check_list);
+ if (list_empty(&epi->ffd.file->f_tfile_llink)) {
+ if (get_file_rcu(epi->ffd.file))
+ list_add(&epi->ffd.file->f_tfile_llink,
+ &tfile_check_list);
+ }
}
}
mutex_unlock(&ep->mtx);
@@ -2017,18 +2016,8 @@ static int ep_loop_check_proc(void *priv, void *cookie, int call_nests)
*/
static int ep_loop_check(struct eventpoll *ep, struct file *file)
{
- int ret;
- struct eventpoll *ep_cur, *ep_next;
-
- ret = ep_call_nested(&poll_loop_ncalls,
+ return ep_call_nested(&poll_loop_ncalls,
ep_loop_check_proc, file, ep, current);
- /* clear visited list */
- list_for_each_entry_safe(ep_cur, ep_next, &visited_list,
- visited_list_link) {
- ep_cur->visited = 0;
- list_del(&ep_cur->visited_list_link);
- }
- return ret;
}
static void clear_tfile_check_list(void)
@@ -2040,6 +2029,7 @@ static void clear_tfile_check_list(void)
file = list_first_entry(&tfile_check_list, struct file,
f_tfile_llink);
list_del_init(&file->f_tfile_llink);
+ fput(file);
}
INIT_LIST_HEAD(&tfile_check_list);
}
@@ -2192,33 +2182,32 @@ int do_epoll_ctl(int epfd, int op, int fd, struct epoll_event *epds,
goto error_tgt_fput;
if (op == EPOLL_CTL_ADD) {
if (!list_empty(&f.file->f_ep_links) ||
+ ep->gen == loop_check_gen ||
is_file_epoll(tf.file)) {
mutex_unlock(&ep->mtx);
error = epoll_mutex_lock(&epmutex, 0, nonblock);
if (error)
goto error_tgt_fput;
+ loop_check_gen++;
full_check = 1;
if (is_file_epoll(tf.file)) {
error = -ELOOP;
- if (ep_loop_check(ep, tf.file) != 0) {
- clear_tfile_check_list();
+ if (ep_loop_check(ep, tf.file) != 0)
goto error_tgt_fput;
- }
- } else
+ } else {
+ get_file(tf.file);
list_add(&tf.file->f_tfile_llink,
&tfile_check_list);
+ }
error = epoll_mutex_lock(&ep->mtx, 0, nonblock);
- if (error) {
-out_del:
- list_del(&tf.file->f_tfile_llink);
+ if (error)
goto error_tgt_fput;
- }
if (is_file_epoll(tf.file)) {
tep = tf.file->private_data;
error = epoll_mutex_lock(&tep->mtx, 1, nonblock);
if (error) {
mutex_unlock(&ep->mtx);
- goto out_del;
+ goto error_tgt_fput;
}
}
}
@@ -2239,8 +2228,6 @@ out_del:
error = ep_insert(ep, epds, tf.file, fd, full_check);
} else
error = -EEXIST;
- if (full_check)
- clear_tfile_check_list();
break;
case EPOLL_CTL_DEL:
if (epi)
@@ -2263,8 +2250,11 @@ out_del:
mutex_unlock(&ep->mtx);
error_tgt_fput:
- if (full_check)
+ if (full_check) {
+ clear_tfile_check_list();
+ loop_check_gen++;
mutex_unlock(&epmutex);
+ }
fdput(tf);
error_fput:
diff --git a/fs/exfat/cache.c b/fs/exfat/cache.c
index 03d0824fc368..5a2f119b7e8c 100644
--- a/fs/exfat/cache.c
+++ b/fs/exfat/cache.c
@@ -17,7 +17,6 @@
#include "exfat_raw.h"
#include "exfat_fs.h"
-#define EXFAT_CACHE_VALID 0
#define EXFAT_MAX_CACHE 16
struct exfat_cache {
@@ -61,16 +60,6 @@ void exfat_cache_shutdown(void)
kmem_cache_destroy(exfat_cachep);
}
-void exfat_cache_init_inode(struct inode *inode)
-{
- struct exfat_inode_info *ei = EXFAT_I(inode);
-
- spin_lock_init(&ei->cache_lru_lock);
- ei->nr_caches = 0;
- ei->cache_valid_id = EXFAT_CACHE_VALID + 1;
- INIT_LIST_HEAD(&ei->cache_lru);
-}
-
static inline struct exfat_cache *exfat_cache_alloc(void)
{
return kmem_cache_alloc(exfat_cachep, GFP_NOFS);
diff --git a/fs/exfat/exfat_fs.h b/fs/exfat/exfat_fs.h
index 95d717f8620c..c013fe931d9c 100644
--- a/fs/exfat/exfat_fs.h
+++ b/fs/exfat/exfat_fs.h
@@ -248,6 +248,8 @@ struct exfat_sb_info {
struct rcu_head rcu;
};
+#define EXFAT_CACHE_VALID 0
+
/*
* EXFAT file system inode in-memory data
*/
@@ -428,7 +430,6 @@ extern const struct dentry_operations exfat_utf8_dentry_ops;
/* cache.c */
int exfat_cache_init(void);
void exfat_cache_shutdown(void);
-void exfat_cache_init_inode(struct inode *inode);
void exfat_cache_inval_inode(struct inode *inode);
int exfat_get_cluster(struct inode *inode, unsigned int cluster,
unsigned int *fclus, unsigned int *dclus,
diff --git a/fs/exfat/inode.c b/fs/exfat/inode.c
index 7f90204adef5..a6de17cac3df 100644
--- a/fs/exfat/inode.c
+++ b/fs/exfat/inode.c
@@ -611,8 +611,6 @@ static int exfat_fill_inode(struct inode *inode, struct exfat_dir_entry *info)
ei->i_crtime = info->crtime;
inode->i_atime = info->atime;
- exfat_cache_init_inode(inode);
-
return 0;
}
diff --git a/fs/exfat/namei.c b/fs/exfat/namei.c
index e73f20f66cb2..c94ac239f740 100644
--- a/fs/exfat/namei.c
+++ b/fs/exfat/namei.c
@@ -578,7 +578,8 @@ static int exfat_create(struct inode *dir, struct dentry *dentry, umode_t mode,
i_pos = exfat_make_i_pos(&info);
inode = exfat_build_inode(sb, &info, i_pos);
- if (IS_ERR(inode))
+ err = PTR_ERR_OR_ZERO(inode);
+ if (err)
goto unlock;
inode_inc_iversion(inode);
@@ -745,10 +746,9 @@ static struct dentry *exfat_lookup(struct inode *dir, struct dentry *dentry,
i_pos = exfat_make_i_pos(&info);
inode = exfat_build_inode(sb, &info, i_pos);
- if (IS_ERR(inode)) {
- err = PTR_ERR(inode);
+ err = PTR_ERR_OR_ZERO(inode);
+ if (err)
goto unlock;
- }
i_mode = inode->i_mode;
alias = d_find_alias(inode);
@@ -890,10 +890,9 @@ static int exfat_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
i_pos = exfat_make_i_pos(&info);
inode = exfat_build_inode(sb, &info, i_pos);
- if (IS_ERR(inode)) {
- err = PTR_ERR(inode);
+ err = PTR_ERR_OR_ZERO(inode);
+ if (err)
goto unlock;
- }
inode_inc_iversion(inode);
inode->i_mtime = inode->i_atime = inode->i_ctime =
diff --git a/fs/exfat/super.c b/fs/exfat/super.c
index 3b6a1659892f..60b941ba557b 100644
--- a/fs/exfat/super.c
+++ b/fs/exfat/super.c
@@ -376,7 +376,6 @@ static int exfat_read_root(struct inode *inode)
inode->i_mtime = inode->i_atime = inode->i_ctime = ei->i_crtime =
current_time(inode);
exfat_truncate_atime(&inode->i_atime);
- exfat_cache_init_inode(inode);
return 0;
}
@@ -763,6 +762,10 @@ static void exfat_inode_init_once(void *foo)
{
struct exfat_inode_info *ei = (struct exfat_inode_info *)foo;
+ spin_lock_init(&ei->cache_lru_lock);
+ ei->nr_caches = 0;
+ ei->cache_valid_id = EXFAT_CACHE_VALID + 1;
+ INIT_LIST_HEAD(&ei->cache_lru);
INIT_HLIST_NODE(&ei->i_hash_fat);
inode_init_once(&ei->vfs_inode);
}
diff --git a/fs/ext2/file.c b/fs/ext2/file.c
index 60378ddf1424..96044f5dbc0e 100644
--- a/fs/ext2/file.c
+++ b/fs/ext2/file.c
@@ -93,8 +93,10 @@ static vm_fault_t ext2_dax_fault(struct vm_fault *vmf)
struct inode *inode = file_inode(vmf->vma->vm_file);
struct ext2_inode_info *ei = EXT2_I(inode);
vm_fault_t ret;
+ bool write = (vmf->flags & FAULT_FLAG_WRITE) &&
+ (vmf->vma->vm_flags & VM_SHARED);
- if (vmf->flags & FAULT_FLAG_WRITE) {
+ if (write) {
sb_start_pagefault(inode->i_sb);
file_update_time(vmf->vma->vm_file);
}
@@ -103,7 +105,7 @@ static vm_fault_t ext2_dax_fault(struct vm_fault *vmf)
ret = dax_iomap_fault(vmf, PE_SIZE_PTE, NULL, NULL, &ext2_iomap_ops);
up_read(&ei->dax_sem);
- if (vmf->flags & FAULT_FLAG_WRITE)
+ if (write)
sb_end_pagefault(inode->i_sb);
return ret;
}
diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c
index 80662e1f7889..415c21f0e750 100644
--- a/fs/ext2/inode.c
+++ b/fs/ext2/inode.c
@@ -1241,7 +1241,7 @@ do_indirects:
mark_inode_dirty(inode);
ext2_free_branches(inode, &nr, &nr+1, 1);
}
- /* fall through */
+ fallthrough;
case EXT2_IND_BLOCK:
nr = i_data[EXT2_DIND_BLOCK];
if (nr) {
@@ -1249,7 +1249,7 @@ do_indirects:
mark_inode_dirty(inode);
ext2_free_branches(inode, &nr, &nr+1, 2);
}
- /* fall through */
+ fallthrough;
case EXT2_DIND_BLOCK:
nr = i_data[EXT2_TIND_BLOCK];
if (nr) {
diff --git a/fs/ext2/super.c b/fs/ext2/super.c
index dda860562ca3..7fab2b3b5b39 100644
--- a/fs/ext2/super.c
+++ b/fs/ext2/super.c
@@ -587,7 +587,7 @@ static int parse_options(char *options, struct super_block *sb,
case Opt_xip:
ext2_msg(sb, KERN_INFO, "use dax instead of xip");
set_opt(opts->s_mount_opt, XIP);
- /* Fall through */
+ fallthrough;
case Opt_dax:
#ifdef CONFIG_FS_DAX
ext2_msg(sb, KERN_WARNING,
diff --git a/fs/ext4/Kconfig b/fs/ext4/Kconfig
index 1afa5a4bcb5f..619dd35ddd48 100644
--- a/fs/ext4/Kconfig
+++ b/fs/ext4/Kconfig
@@ -110,7 +110,7 @@ config EXT4_KUNIT_TESTS
This builds the ext4 KUnit tests.
KUnit tests run during boot and output the results to the debug log
- in TAP format (http://testanything.org/). Only useful for kernel devs
+ in TAP format (https://testanything.org/). Only useful for kernel devs
running KUnit test harness and are not for inclusion into a production
build.
diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c
index 1ba46d87cdf1..48c3df47748d 100644
--- a/fs/ext4/balloc.c
+++ b/fs/ext4/balloc.c
@@ -413,7 +413,8 @@ verified:
* Return buffer_head on success or an ERR_PTR in case of failure.
*/
struct buffer_head *
-ext4_read_block_bitmap_nowait(struct super_block *sb, ext4_group_t block_group)
+ext4_read_block_bitmap_nowait(struct super_block *sb, ext4_group_t block_group,
+ bool ignore_locked)
{
struct ext4_group_desc *desc;
struct ext4_sb_info *sbi = EXT4_SB(sb);
@@ -441,6 +442,12 @@ ext4_read_block_bitmap_nowait(struct super_block *sb, ext4_group_t block_group)
return ERR_PTR(-ENOMEM);
}
+ if (ignore_locked && buffer_locked(bh)) {
+ /* buffer under IO already, return if called for prefetching */
+ put_bh(bh);
+ return NULL;
+ }
+
if (bitmap_uptodate(bh))
goto verify;
@@ -487,10 +494,11 @@ ext4_read_block_bitmap_nowait(struct super_block *sb, ext4_group_t block_group)
* submit the buffer_head for reading
*/
set_buffer_new(bh);
- trace_ext4_read_block_bitmap_load(sb, block_group);
+ trace_ext4_read_block_bitmap_load(sb, block_group, ignore_locked);
bh->b_end_io = ext4_end_bitmap_read;
get_bh(bh);
- submit_bh(REQ_OP_READ, REQ_META | REQ_PRIO, bh);
+ submit_bh(REQ_OP_READ, REQ_META | REQ_PRIO |
+ (ignore_locked ? REQ_RAHEAD : 0), bh);
return bh;
verify:
err = ext4_validate_block_bitmap(sb, desc, block_group, bh);
@@ -534,7 +542,7 @@ ext4_read_block_bitmap(struct super_block *sb, ext4_group_t block_group)
struct buffer_head *bh;
int err;
- bh = ext4_read_block_bitmap_nowait(sb, block_group);
+ bh = ext4_read_block_bitmap_nowait(sb, block_group, false);
if (IS_ERR(bh))
return bh;
err = ext4_wait_block_bitmap(sb, block_group, bh);
diff --git a/fs/ext4/block_validity.c b/fs/ext4/block_validity.c
index 16e9b2fda03a..c54ba52f2dd4 100644
--- a/fs/ext4/block_validity.c
+++ b/fs/ext4/block_validity.c
@@ -24,6 +24,7 @@ struct ext4_system_zone {
struct rb_node node;
ext4_fsblk_t start_blk;
unsigned int count;
+ u32 ino;
};
static struct kmem_cache *ext4_system_zone_cachep;
@@ -45,7 +46,8 @@ void ext4_exit_system_zone(void)
static inline int can_merge(struct ext4_system_zone *entry1,
struct ext4_system_zone *entry2)
{
- if ((entry1->start_blk + entry1->count) == entry2->start_blk)
+ if ((entry1->start_blk + entry1->count) == entry2->start_blk &&
+ entry1->ino == entry2->ino)
return 1;
return 0;
}
@@ -66,9 +68,9 @@ static void release_system_zone(struct ext4_system_blocks *system_blks)
*/
static int add_system_zone(struct ext4_system_blocks *system_blks,
ext4_fsblk_t start_blk,
- unsigned int count)
+ unsigned int count, u32 ino)
{
- struct ext4_system_zone *new_entry = NULL, *entry;
+ struct ext4_system_zone *new_entry, *entry;
struct rb_node **n = &system_blks->root.rb_node, *node;
struct rb_node *parent = NULL, *new_node = NULL;
@@ -79,30 +81,21 @@ static int add_system_zone(struct ext4_system_blocks *system_blks,
n = &(*n)->rb_left;
else if (start_blk >= (entry->start_blk + entry->count))
n = &(*n)->rb_right;
- else {
- if (start_blk + count > (entry->start_blk +
- entry->count))
- entry->count = (start_blk + count -
- entry->start_blk);
- new_node = *n;
- new_entry = rb_entry(new_node, struct ext4_system_zone,
- node);
- break;
- }
+ else /* Unexpected overlap of system zones. */
+ return -EFSCORRUPTED;
}
- if (!new_entry) {
- new_entry = kmem_cache_alloc(ext4_system_zone_cachep,
- GFP_KERNEL);
- if (!new_entry)
- return -ENOMEM;
- new_entry->start_blk = start_blk;
- new_entry->count = count;
- new_node = &new_entry->node;
-
- rb_link_node(new_node, parent, n);
- rb_insert_color(new_node, &system_blks->root);
- }
+ new_entry = kmem_cache_alloc(ext4_system_zone_cachep,
+ GFP_KERNEL);
+ if (!new_entry)
+ return -ENOMEM;
+ new_entry->start_blk = start_blk;
+ new_entry->count = count;
+ new_entry->ino = ino;
+ new_node = &new_entry->node;
+
+ rb_link_node(new_node, parent, n);
+ rb_insert_color(new_node, &system_blks->root);
/* Can we merge to the left? */
node = rb_prev(new_node);
@@ -151,40 +144,6 @@ static void debug_print_tree(struct ext4_sb_info *sbi)
printk(KERN_CONT "\n");
}
-/*
- * Returns 1 if the passed-in block region (start_blk,
- * start_blk+count) is valid; 0 if some part of the block region
- * overlaps with filesystem metadata blocks.
- */
-static int ext4_data_block_valid_rcu(struct ext4_sb_info *sbi,
- struct ext4_system_blocks *system_blks,
- ext4_fsblk_t start_blk,
- unsigned int count)
-{
- struct ext4_system_zone *entry;
- struct rb_node *n;
-
- if ((start_blk <= le32_to_cpu(sbi->s_es->s_first_data_block)) ||
- (start_blk + count < start_blk) ||
- (start_blk + count > ext4_blocks_count(sbi->s_es)))
- return 0;
-
- if (system_blks == NULL)
- return 1;
-
- n = system_blks->root.rb_node;
- while (n) {
- entry = rb_entry(n, struct ext4_system_zone, node);
- if (start_blk + count - 1 < entry->start_blk)
- n = n->rb_left;
- else if (start_blk >= (entry->start_blk + entry->count))
- n = n->rb_right;
- else
- return 0;
- }
- return 1;
-}
-
static int ext4_protect_reserved_inode(struct super_block *sb,
struct ext4_system_blocks *system_blks,
u32 ino)
@@ -214,19 +173,18 @@ static int ext4_protect_reserved_inode(struct super_block *sb,
if (n == 0) {
i++;
} else {
- if (!ext4_data_block_valid_rcu(sbi, system_blks,
- map.m_pblk, n)) {
- err = -EFSCORRUPTED;
- __ext4_error(sb, __func__, __LINE__, -err,
- map.m_pblk, "blocks %llu-%llu "
- "from inode %u overlap system zone",
- map.m_pblk,
- map.m_pblk + map.m_len - 1, ino);
+ err = add_system_zone(system_blks, map.m_pblk, n, ino);
+ if (err < 0) {
+ if (err == -EFSCORRUPTED) {
+ __ext4_error(sb, __func__, __LINE__,
+ -err, map.m_pblk,
+ "blocks %llu-%llu from inode %u overlap system zone",
+ map.m_pblk,
+ map.m_pblk + map.m_len - 1,
+ ino);
+ }
break;
}
- err = add_system_zone(system_blks, map.m_pblk, n);
- if (err < 0)
- break;
i += n;
}
}
@@ -262,14 +220,6 @@ int ext4_setup_system_zone(struct super_block *sb)
int flex_size = ext4_flex_bg_size(sbi);
int ret;
- if (!test_opt(sb, BLOCK_VALIDITY)) {
- if (sbi->system_blks)
- ext4_release_system_zone(sb);
- return 0;
- }
- if (sbi->system_blks)
- return 0;
-
system_blks = kzalloc(sizeof(*system_blks), GFP_KERNEL);
if (!system_blks)
return -ENOMEM;
@@ -277,22 +227,25 @@ int ext4_setup_system_zone(struct super_block *sb)
for (i=0; i < ngroups; i++) {
cond_resched();
if (ext4_bg_has_super(sb, i) &&
- ((i < 5) || ((i % flex_size) == 0)))
- add_system_zone(system_blks,
+ ((i < 5) || ((i % flex_size) == 0))) {
+ ret = add_system_zone(system_blks,
ext4_group_first_block_no(sb, i),
- ext4_bg_num_gdb(sb, i) + 1);
+ ext4_bg_num_gdb(sb, i) + 1, 0);
+ if (ret)
+ goto err;
+ }
gdp = ext4_get_group_desc(sb, i, NULL);
ret = add_system_zone(system_blks,
- ext4_block_bitmap(sb, gdp), 1);
+ ext4_block_bitmap(sb, gdp), 1, 0);
if (ret)
goto err;
ret = add_system_zone(system_blks,
- ext4_inode_bitmap(sb, gdp), 1);
+ ext4_inode_bitmap(sb, gdp), 1, 0);
if (ret)
goto err;
ret = add_system_zone(system_blks,
ext4_inode_table(sb, gdp),
- sbi->s_itb_per_group);
+ sbi->s_itb_per_group, 0);
if (ret)
goto err;
}
@@ -341,11 +294,24 @@ void ext4_release_system_zone(struct super_block *sb)
call_rcu(&system_blks->rcu, ext4_destroy_system_zone);
}
-int ext4_data_block_valid(struct ext4_sb_info *sbi, ext4_fsblk_t start_blk,
+/*
+ * Returns 1 if the passed-in block region (start_blk,
+ * start_blk+count) is valid; 0 if some part of the block region
+ * overlaps with some other filesystem metadata blocks.
+ */
+int ext4_inode_block_valid(struct inode *inode, ext4_fsblk_t start_blk,
unsigned int count)
{
+ struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
struct ext4_system_blocks *system_blks;
- int ret;
+ struct ext4_system_zone *entry;
+ struct rb_node *n;
+ int ret = 1;
+
+ if ((start_blk <= le32_to_cpu(sbi->s_es->s_first_data_block)) ||
+ (start_blk + count < start_blk) ||
+ (start_blk + count > ext4_blocks_count(sbi->s_es)))
+ return 0;
/*
* Lock the system zone to prevent it being released concurrently
@@ -354,8 +320,22 @@ int ext4_data_block_valid(struct ext4_sb_info *sbi, ext4_fsblk_t start_blk,
*/
rcu_read_lock();
system_blks = rcu_dereference(sbi->system_blks);
- ret = ext4_data_block_valid_rcu(sbi, system_blks, start_blk,
- count);
+ if (system_blks == NULL)
+ goto out_rcu;
+
+ n = system_blks->root.rb_node;
+ while (n) {
+ entry = rb_entry(n, struct ext4_system_zone, node);
+ if (start_blk + count - 1 < entry->start_blk)
+ n = n->rb_left;
+ else if (start_blk >= (entry->start_blk + entry->count))
+ n = n->rb_right;
+ else {
+ ret = (entry->ino == inode->i_ino);
+ break;
+ }
+ }
+out_rcu:
rcu_read_unlock();
return ret;
}
@@ -374,8 +354,7 @@ int ext4_check_blockref(const char *function, unsigned int line,
while (bref < p+max) {
blk = le32_to_cpu(*bref++);
if (blk &&
- unlikely(!ext4_data_block_valid(EXT4_SB(inode->i_sb),
- blk, 1))) {
+ unlikely(!ext4_inode_block_valid(inode, blk, 1))) {
ext4_error_inode(inode, function, line, blk,
"invalid block");
return -EFSCORRUPTED;
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 42f5060f3cdf..523e00d7b392 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -434,10 +434,36 @@ struct flex_groups {
#define EXT4_CASEFOLD_FL 0x40000000 /* Casefolded directory */
#define EXT4_RESERVED_FL 0x80000000 /* reserved for ext4 lib */
-#define EXT4_FL_USER_VISIBLE 0x725BDFFF /* User visible flags */
-#define EXT4_FL_USER_MODIFIABLE 0x624BC0FF /* User modifiable flags */
-
-/* Flags we can manipulate with through EXT4_IOC_FSSETXATTR */
+/* User modifiable flags */
+#define EXT4_FL_USER_MODIFIABLE (EXT4_SECRM_FL | \
+ EXT4_UNRM_FL | \
+ EXT4_COMPR_FL | \
+ EXT4_SYNC_FL | \
+ EXT4_IMMUTABLE_FL | \
+ EXT4_APPEND_FL | \
+ EXT4_NODUMP_FL | \
+ EXT4_NOATIME_FL | \
+ EXT4_JOURNAL_DATA_FL | \
+ EXT4_NOTAIL_FL | \
+ EXT4_DIRSYNC_FL | \
+ EXT4_TOPDIR_FL | \
+ EXT4_EXTENTS_FL | \
+ 0x00400000 /* EXT4_EOFBLOCKS_FL */ | \
+ EXT4_DAX_FL | \
+ EXT4_PROJINHERIT_FL | \
+ EXT4_CASEFOLD_FL)
+
+/* User visible flags */
+#define EXT4_FL_USER_VISIBLE (EXT4_FL_USER_MODIFIABLE | \
+ EXT4_DIRTY_FL | \
+ EXT4_COMPRBLK_FL | \
+ EXT4_NOCOMPR_FL | \
+ EXT4_ENCRYPT_FL | \
+ EXT4_INDEX_FL | \
+ EXT4_VERITY_FL | \
+ EXT4_INLINE_DATA_FL)
+
+/* Flags we can manipulate with through FS_IOC_FSSETXATTR */
#define EXT4_FL_XFLAG_VISIBLE (EXT4_SYNC_FL | \
EXT4_IMMUTABLE_FL | \
EXT4_APPEND_FL | \
@@ -669,8 +695,6 @@ enum {
/*
* ioctl commands
*/
-#define EXT4_IOC_GETFLAGS FS_IOC_GETFLAGS
-#define EXT4_IOC_SETFLAGS FS_IOC_SETFLAGS
#define EXT4_IOC_GETVERSION _IOR('f', 3, long)
#define EXT4_IOC_SETVERSION _IOW('f', 4, long)
#define EXT4_IOC_GETVERSION_OLD FS_IOC_GETVERSION
@@ -687,17 +711,11 @@ enum {
#define EXT4_IOC_RESIZE_FS _IOW('f', 16, __u64)
#define EXT4_IOC_SWAP_BOOT _IO('f', 17)
#define EXT4_IOC_PRECACHE_EXTENTS _IO('f', 18)
-#define EXT4_IOC_SET_ENCRYPTION_POLICY FS_IOC_SET_ENCRYPTION_POLICY
-#define EXT4_IOC_GET_ENCRYPTION_PWSALT FS_IOC_GET_ENCRYPTION_PWSALT
-#define EXT4_IOC_GET_ENCRYPTION_POLICY FS_IOC_GET_ENCRYPTION_POLICY
/* ioctl codes 19--39 are reserved for fscrypt */
#define EXT4_IOC_CLEAR_ES_CACHE _IO('f', 40)
#define EXT4_IOC_GETSTATE _IOW('f', 41, __u32)
#define EXT4_IOC_GET_ES_CACHE _IOWR('f', 42, struct fiemap)
-#define EXT4_IOC_FSGETXATTR FS_IOC_FSGETXATTR
-#define EXT4_IOC_FSSETXATTR FS_IOC_FSSETXATTR
-
#define EXT4_IOC_SHUTDOWN _IOR ('X', 125, __u32)
/*
@@ -722,8 +740,6 @@ enum {
/*
* ioctl commands in 32 bit emulation
*/
-#define EXT4_IOC32_GETFLAGS FS_IOC32_GETFLAGS
-#define EXT4_IOC32_SETFLAGS FS_IOC32_SETFLAGS
#define EXT4_IOC32_GETVERSION _IOR('f', 3, int)
#define EXT4_IOC32_SETVERSION _IOW('f', 4, int)
#define EXT4_IOC32_GETRSVSZ _IOR('f', 5, int)
@@ -1054,6 +1070,7 @@ struct ext4_inode_info {
struct timespec64 i_crtime;
/* mballoc */
+ atomic_t i_prealloc_active;
struct list_head i_prealloc_list;
spinlock_t i_prealloc_lock;
@@ -1172,6 +1189,7 @@ struct ext4_inode_info {
#define EXT4_MOUNT_JOURNAL_CHECKSUM 0x800000 /* Journal checksums */
#define EXT4_MOUNT_JOURNAL_ASYNC_COMMIT 0x1000000 /* Journal Async Commit */
#define EXT4_MOUNT_WARN_ON_ERROR 0x2000000 /* Trigger WARN_ON on error */
+#define EXT4_MOUNT_PREFETCH_BLOCK_BITMAPS 0x4000000
#define EXT4_MOUNT_DELALLOC 0x8000000 /* Delalloc support */
#define EXT4_MOUNT_DATA_ERR_ABORT 0x10000000 /* Abort on file data write */
#define EXT4_MOUNT_BLOCK_VALIDITY 0x20000000 /* Block validity checking */
@@ -1501,10 +1519,13 @@ struct ext4_sb_info {
unsigned int s_mb_stats;
unsigned int s_mb_order2_reqs;
unsigned int s_mb_group_prealloc;
+ unsigned int s_mb_max_inode_prealloc;
unsigned int s_max_dir_size_kb;
/* where last allocation was done - for stream allocation */
unsigned long s_mb_last_group;
unsigned long s_mb_last_start;
+ unsigned int s_mb_prefetch;
+ unsigned int s_mb_prefetch_limit;
/* stats for buddy allocator */
atomic_t s_bal_reqs; /* number of reqs with len > 1 */
@@ -1572,6 +1593,8 @@ struct ext4_sb_info {
struct ratelimit_state s_err_ratelimit_state;
struct ratelimit_state s_warning_ratelimit_state;
struct ratelimit_state s_msg_ratelimit_state;
+ atomic_t s_warning_count;
+ atomic_t s_msg_count;
/* Encryption context for '-o test_dummy_encryption' */
struct fscrypt_dummy_context s_dummy_enc_ctx;
@@ -1585,6 +1608,9 @@ struct ext4_sb_info {
#ifdef CONFIG_EXT4_DEBUG
unsigned long s_simulate_fail;
#endif
+ /* Record the errseq of the backing block device */
+ errseq_t s_bdev_wb_err;
+ spinlock_t s_bdev_wb_lock;
};
static inline struct ext4_sb_info *EXT4_SB(struct super_block *sb)
@@ -2313,9 +2339,15 @@ struct ext4_lazy_init {
struct mutex li_list_mtx;
};
+enum ext4_li_mode {
+ EXT4_LI_MODE_PREFETCH_BBITMAP,
+ EXT4_LI_MODE_ITABLE,
+};
+
struct ext4_li_request {
struct super_block *lr_super;
- struct ext4_sb_info *lr_sbi;
+ enum ext4_li_mode lr_mode;
+ ext4_group_t lr_first_not_zeroed;
ext4_group_t lr_next_group;
struct list_head lr_request;
unsigned long lr_next_sched;
@@ -2446,7 +2478,8 @@ extern struct ext4_group_desc * ext4_get_group_desc(struct super_block * sb,
extern int ext4_should_retry_alloc(struct super_block *sb, int *retries);
extern struct buffer_head *ext4_read_block_bitmap_nowait(struct super_block *sb,
- ext4_group_t block_group);
+ ext4_group_t block_group,
+ bool ignore_locked);
extern int ext4_wait_block_bitmap(struct super_block *sb,
ext4_group_t block_group,
struct buffer_head *bh);
@@ -2651,9 +2684,15 @@ extern int ext4_mb_release(struct super_block *);
extern ext4_fsblk_t ext4_mb_new_blocks(handle_t *,
struct ext4_allocation_request *, int *);
extern int ext4_mb_reserve_blocks(struct super_block *, int);
-extern void ext4_discard_preallocations(struct inode *);
+extern void ext4_discard_preallocations(struct inode *, unsigned int);
extern int __init ext4_init_mballoc(void);
extern void ext4_exit_mballoc(void);
+extern ext4_group_t ext4_mb_prefetch(struct super_block *sb,
+ ext4_group_t group,
+ unsigned int nr, int *cnt);
+extern void ext4_mb_prefetch_fini(struct super_block *sb, ext4_group_t group,
+ unsigned int nr);
+
extern void ext4_free_blocks(handle_t *handle, struct inode *inode,
struct buffer_head *bh, ext4_fsblk_t block,
unsigned long count, int flags);
@@ -2765,8 +2804,7 @@ extern int ext4_search_dir(struct buffer_head *bh,
struct ext4_filename *fname,
unsigned int offset,
struct ext4_dir_entry_2 **res_dir);
-extern int ext4_generic_delete_entry(handle_t *handle,
- struct inode *dir,
+extern int ext4_generic_delete_entry(struct inode *dir,
struct ext4_dir_entry_2 *de_del,
struct buffer_head *bh,
void *entry_buf,
@@ -2924,12 +2962,6 @@ do { \
#endif
-extern int ext4_update_compat_feature(handle_t *handle, struct super_block *sb,
- __u32 compat);
-extern int ext4_update_rocompat_feature(handle_t *handle,
- struct super_block *sb, __u32 rocompat);
-extern int ext4_update_incompat_feature(handle_t *handle,
- struct super_block *sb, __u32 incompat);
extern ext4_fsblk_t ext4_block_bitmap(struct super_block *sb,
struct ext4_group_desc *bg);
extern ext4_fsblk_t ext4_inode_bitmap(struct super_block *sb,
@@ -3145,6 +3177,7 @@ struct ext4_group_info {
(1 << EXT4_GROUP_INFO_BBITMAP_CORRUPT_BIT)
#define EXT4_GROUP_INFO_IBITMAP_CORRUPT \
(1 << EXT4_GROUP_INFO_IBITMAP_CORRUPT_BIT)
+#define EXT4_GROUP_INFO_BBITMAP_READ_BIT 4
#define EXT4_MB_GRP_NEED_INIT(grp) \
(test_bit(EXT4_GROUP_INFO_NEED_INIT_BIT, &((grp)->bb_state)))
@@ -3159,6 +3192,8 @@ struct ext4_group_info {
(set_bit(EXT4_GROUP_INFO_WAS_TRIMMED_BIT, &((grp)->bb_state)))
#define EXT4_MB_GRP_CLEAR_TRIMMED(grp) \
(clear_bit(EXT4_GROUP_INFO_WAS_TRIMMED_BIT, &((grp)->bb_state)))
+#define EXT4_MB_GRP_TEST_AND_SET_READ(grp) \
+ (test_and_set_bit(EXT4_GROUP_INFO_BBITMAP_READ_BIT, &((grp)->bb_state)))
#define EXT4_MAX_CONTENTION 8
#define EXT4_CONTENTION_THRESHOLD 2
@@ -3363,9 +3398,9 @@ extern void ext4_release_system_zone(struct super_block *sb);
extern int ext4_setup_system_zone(struct super_block *sb);
extern int __init ext4_init_system_zone(void);
extern void ext4_exit_system_zone(void);
-extern int ext4_data_block_valid(struct ext4_sb_info *sbi,
- ext4_fsblk_t start_blk,
- unsigned int count);
+extern int ext4_inode_block_valid(struct inode *inode,
+ ext4_fsblk_t start_blk,
+ unsigned int count);
extern int ext4_check_blockref(const char *, unsigned int,
struct inode *, __le32 *, unsigned int);
diff --git a/fs/ext4/ext4_jbd2.c b/fs/ext4/ext4_jbd2.c
index 0c76cdd44d90..760b9ee49dc0 100644
--- a/fs/ext4/ext4_jbd2.c
+++ b/fs/ext4/ext4_jbd2.c
@@ -195,6 +195,28 @@ static void ext4_journal_abort_handle(const char *caller, unsigned int line,
jbd2_journal_abort_handle(handle);
}
+static void ext4_check_bdev_write_error(struct super_block *sb)
+{
+ struct address_space *mapping = sb->s_bdev->bd_inode->i_mapping;
+ struct ext4_sb_info *sbi = EXT4_SB(sb);
+ int err;
+
+ /*
+ * If the block device has write error flag, it may have failed to
+ * async write out metadata buffers in the background. In this case,
+ * we could read old data from disk and write it out again, which
+ * may lead to on-disk filesystem inconsistency.
+ */
+ if (errseq_check(&mapping->wb_err, READ_ONCE(sbi->s_bdev_wb_err))) {
+ spin_lock(&sbi->s_bdev_wb_lock);
+ err = errseq_check_and_advance(&mapping->wb_err, &sbi->s_bdev_wb_err);
+ spin_unlock(&sbi->s_bdev_wb_lock);
+ if (err)
+ ext4_error_err(sb, -err,
+ "Error while async write back metadata");
+ }
+}
+
int __ext4_journal_get_write_access(const char *where, unsigned int line,
handle_t *handle, struct buffer_head *bh)
{
@@ -202,6 +224,9 @@ int __ext4_journal_get_write_access(const char *where, unsigned int line,
might_sleep();
+ if (bh->b_bdev->bd_super)
+ ext4_check_bdev_write_error(bh->b_bdev->bd_super);
+
if (ext4_handle_valid(handle)) {
err = jbd2_journal_get_write_access(handle, bh);
if (err)
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index 221f240eae60..a0481582187a 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -100,7 +100,7 @@ static int ext4_ext_trunc_restart_fn(struct inode *inode, int *dropped)
* i_mutex. So we can safely drop the i_data_sem here.
*/
BUG_ON(EXT4_JOURNAL(inode) == NULL);
- ext4_discard_preallocations(inode);
+ ext4_discard_preallocations(inode, 0);
up_write(&EXT4_I(inode)->i_data_sem);
*dropped = 1;
return 0;
@@ -340,7 +340,7 @@ static int ext4_valid_extent(struct inode *inode, struct ext4_extent *ext)
*/
if (lblock + len <= lblock)
return 0;
- return ext4_data_block_valid(EXT4_SB(inode->i_sb), block, len);
+ return ext4_inode_block_valid(inode, block, len);
}
static int ext4_valid_extent_idx(struct inode *inode,
@@ -348,7 +348,7 @@ static int ext4_valid_extent_idx(struct inode *inode,
{
ext4_fsblk_t block = ext4_idx_pblock(ext_idx);
- return ext4_data_block_valid(EXT4_SB(inode->i_sb), block, 1);
+ return ext4_inode_block_valid(inode, block, 1);
}
static int ext4_valid_extent_entries(struct inode *inode,
@@ -507,14 +507,10 @@ __read_extent_tree_block(const char *function, unsigned int line,
}
if (buffer_verified(bh) && !(flags & EXT4_EX_FORCE_CACHE))
return bh;
- if (!ext4_has_feature_journal(inode->i_sb) ||
- (inode->i_ino !=
- le32_to_cpu(EXT4_SB(inode->i_sb)->s_es->s_journal_inum))) {
- err = __ext4_ext_check(function, line, inode,
- ext_block_hdr(bh), depth, pblk);
- if (err)
- goto errout;
- }
+ err = __ext4_ext_check(function, line, inode,
+ ext_block_hdr(bh), depth, pblk);
+ if (err)
+ goto errout;
set_buffer_verified(bh);
/*
* If this is a leaf block, cache all of its entries
@@ -693,10 +689,8 @@ void ext4_ext_drop_refs(struct ext4_ext_path *path)
return;
depth = path->p_depth;
for (i = 0; i <= depth; i++, path++) {
- if (path->p_bh) {
- brelse(path->p_bh);
- path->p_bh = NULL;
- }
+ brelse(path->p_bh);
+ path->p_bh = NULL;
}
}
@@ -1915,7 +1909,7 @@ out:
/*
* ext4_ext_insert_extent:
- * tries to merge requsted extent into the existing extent or
+ * tries to merge requested extent into the existing extent or
* inserts requested extent as new one into the tree,
* creating new leaf in the no-space case.
*/
@@ -3125,7 +3119,7 @@ static int ext4_ext_zeroout(struct inode *inode, struct ext4_extent *ex)
*
*
* Splits extent [a, b] into two extents [a, @split) and [@split, b], states
- * of which are deterimined by split_flag.
+ * of which are determined by split_flag.
*
* There are two cases:
* a> the extent are splitted into two extent.
@@ -3650,7 +3644,7 @@ static int ext4_split_convert_extents(handle_t *handle,
eof_block = map->m_lblk + map->m_len;
/*
* It is safe to convert extent to initialized via explicit
- * zeroout only if extent is fully insde i_size or new_size.
+ * zeroout only if extent is fully inside i_size or new_size.
*/
depth = ext_depth(inode);
ex = path[depth].p_ext;
@@ -4272,7 +4266,7 @@ got_allocated_blocks:
* not a good idea to call discard here directly,
* but otherwise we'd need to call it every free().
*/
- ext4_discard_preallocations(inode);
+ ext4_discard_preallocations(inode, 0);
if (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE)
fb_flags = EXT4_FREE_BLOCKS_NO_QUOT_UPDATE;
ext4_free_blocks(handle, inode, NULL, newblock,
@@ -4495,7 +4489,7 @@ static long ext4_zero_range(struct file *file, loff_t offset,
}
/*
- * Round up offset. This is not fallocate, we neet to zero out
+ * Round up offset. This is not fallocate, we need to zero out
* blocks, so convert interior block aligned part of the range to
* unwritten and possibly manually zero out unaligned parts of the
* range.
@@ -5299,7 +5293,7 @@ static int ext4_collapse_range(struct inode *inode, loff_t offset, loff_t len)
}
down_write(&EXT4_I(inode)->i_data_sem);
- ext4_discard_preallocations(inode);
+ ext4_discard_preallocations(inode, 0);
ret = ext4_es_remove_extent(inode, punch_start,
EXT_MAX_BLOCKS - punch_start);
@@ -5313,7 +5307,7 @@ static int ext4_collapse_range(struct inode *inode, loff_t offset, loff_t len)
up_write(&EXT4_I(inode)->i_data_sem);
goto out_stop;
}
- ext4_discard_preallocations(inode);
+ ext4_discard_preallocations(inode, 0);
ret = ext4_ext_shift_extents(inode, handle, punch_stop,
punch_stop - punch_start, SHIFT_LEFT);
@@ -5445,7 +5439,7 @@ static int ext4_insert_range(struct inode *inode, loff_t offset, loff_t len)
goto out_stop;
down_write(&EXT4_I(inode)->i_data_sem);
- ext4_discard_preallocations(inode);
+ ext4_discard_preallocations(inode, 0);
path = ext4_find_extent(inode, offset_lblk, NULL, 0);
if (IS_ERR(path)) {
@@ -5579,7 +5573,7 @@ ext4_swap_extents(handle_t *handle, struct inode *inode1,
}
ex1 = path1[path1->p_depth].p_ext;
ex2 = path2[path2->p_depth].p_ext;
- /* Do we have somthing to swap ? */
+ /* Do we have something to swap ? */
if (unlikely(!ex2 || !ex1))
goto finish;
diff --git a/fs/ext4/file.c b/fs/ext4/file.c
index 129cc1dd6b79..7d61069531d3 100644
--- a/fs/ext4/file.c
+++ b/fs/ext4/file.c
@@ -145,10 +145,9 @@ static int ext4_release_file(struct inode *inode, struct file *filp)
/* if we are the last writer on the inode, drop the block reservation */
if ((filp->f_mode & FMODE_WRITE) &&
(atomic_read(&inode->i_writecount) == 1) &&
- !EXT4_I(inode)->i_reserved_data_blocks)
- {
+ !EXT4_I(inode)->i_reserved_data_blocks) {
down_write(&EXT4_I(inode)->i_data_sem);
- ext4_discard_preallocations(inode);
+ ext4_discard_preallocations(inode, 0);
up_write(&EXT4_I(inode)->i_data_sem);
}
if (is_dx(inode) && filp->private_data)
@@ -428,6 +427,10 @@ restart:
*/
if (*ilock_shared && (!IS_NOSEC(inode) || *extend ||
!ext4_overwrite_io(inode, offset, count))) {
+ if (iocb->ki_flags & IOCB_NOWAIT) {
+ ret = -EAGAIN;
+ goto out;
+ }
inode_unlock_shared(inode);
*ilock_shared = false;
inode_lock(inode);
@@ -812,7 +815,7 @@ out:
return err;
}
-static int ext4_file_open(struct inode * inode, struct file * filp)
+static int ext4_file_open(struct inode *inode, struct file *filp)
{
int ret;
diff --git a/fs/ext4/hash.c b/fs/ext4/hash.c
index 3e133793a5a3..2924261226e0 100644
--- a/fs/ext4/hash.c
+++ b/fs/ext4/hash.c
@@ -233,7 +233,7 @@ static int __ext4fs_dirhash(const char *name, int len,
break;
case DX_HASH_HALF_MD4_UNSIGNED:
str2hashbuf = str2hashbuf_unsigned;
- /* fall through */
+ fallthrough;
case DX_HASH_HALF_MD4:
p = name;
while (len > 0) {
@@ -247,7 +247,7 @@ static int __ext4fs_dirhash(const char *name, int len,
break;
case DX_HASH_TEA_UNSIGNED:
str2hashbuf = str2hashbuf_unsigned;
- /* fall through */
+ fallthrough;
case DX_HASH_TEA:
p = name;
while (len > 0) {
diff --git a/fs/ext4/indirect.c b/fs/ext4/indirect.c
index be2b66eb65f7..80c9f33800be 100644
--- a/fs/ext4/indirect.c
+++ b/fs/ext4/indirect.c
@@ -696,7 +696,7 @@ static int ext4_ind_trunc_restart_fn(handle_t *handle, struct inode *inode,
* i_mutex. So we can safely drop the i_data_sem here.
*/
BUG_ON(EXT4_JOURNAL(inode) == NULL);
- ext4_discard_preallocations(inode);
+ ext4_discard_preallocations(inode, 0);
up_write(&EXT4_I(inode)->i_data_sem);
*dropped = 1;
return 0;
@@ -858,8 +858,7 @@ static int ext4_clear_blocks(handle_t *handle, struct inode *inode,
else if (ext4_should_journal_data(inode))
flags |= EXT4_FREE_BLOCKS_FORGET;
- if (!ext4_data_block_valid(EXT4_SB(inode->i_sb), block_to_free,
- count)) {
+ if (!ext4_inode_block_valid(inode, block_to_free, count)) {
EXT4_ERROR_INODE(inode, "attempt to clear invalid "
"blocks %llu len %lu",
(unsigned long long) block_to_free, count);
@@ -1004,8 +1003,7 @@ static void ext4_free_branches(handle_t *handle, struct inode *inode,
if (!nr)
continue; /* A hole */
- if (!ext4_data_block_valid(EXT4_SB(inode->i_sb),
- nr, 1)) {
+ if (!ext4_inode_block_valid(inode, nr, 1)) {
EXT4_ERROR_INODE(inode,
"invalid indirect mapped "
"block %lu (level %d)",
@@ -1182,21 +1180,21 @@ do_indirects:
ext4_free_branches(handle, inode, NULL, &nr, &nr+1, 1);
i_data[EXT4_IND_BLOCK] = 0;
}
- /* fall through */
+ fallthrough;
case EXT4_IND_BLOCK:
nr = i_data[EXT4_DIND_BLOCK];
if (nr) {
ext4_free_branches(handle, inode, NULL, &nr, &nr+1, 2);
i_data[EXT4_DIND_BLOCK] = 0;
}
- /* fall through */
+ fallthrough;
case EXT4_DIND_BLOCK:
nr = i_data[EXT4_TIND_BLOCK];
if (nr) {
ext4_free_branches(handle, inode, NULL, &nr, &nr+1, 3);
i_data[EXT4_TIND_BLOCK] = 0;
}
- /* fall through */
+ fallthrough;
case EXT4_TIND_BLOCK:
;
}
@@ -1436,7 +1434,7 @@ do_indirects:
ext4_free_branches(handle, inode, NULL, &nr, &nr+1, 1);
i_data[EXT4_IND_BLOCK] = 0;
}
- /* fall through */
+ fallthrough;
case EXT4_IND_BLOCK:
if (++n >= n2)
break;
@@ -1445,7 +1443,7 @@ do_indirects:
ext4_free_branches(handle, inode, NULL, &nr, &nr+1, 2);
i_data[EXT4_DIND_BLOCK] = 0;
}
- /* fall through */
+ fallthrough;
case EXT4_DIND_BLOCK:
if (++n >= n2)
break;
@@ -1454,7 +1452,7 @@ do_indirects:
ext4_free_branches(handle, inode, NULL, &nr, &nr+1, 3);
i_data[EXT4_TIND_BLOCK] = 0;
}
- /* fall through */
+ fallthrough;
case EXT4_TIND_BLOCK:
;
}
diff --git a/fs/ext4/inline.c b/fs/ext4/inline.c
index c3a1ad2db122..75c97bca0815 100644
--- a/fs/ext4/inline.c
+++ b/fs/ext4/inline.c
@@ -276,7 +276,7 @@ static int ext4_create_inline_data(handle_t *handle,
len = 0;
}
- /* Insert the the xttr entry. */
+ /* Insert the xttr entry. */
i.value = value;
i.value_len = len;
@@ -1706,7 +1706,7 @@ int ext4_delete_inline_entry(handle_t *handle,
if (err)
goto out;
- err = ext4_generic_delete_entry(handle, dir, de_del, bh,
+ err = ext4_generic_delete_entry(dir, de_del, bh,
inline_start, inline_size, 0);
if (err)
goto out;
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 44bad4bb8831..bf596467c234 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -383,7 +383,7 @@ void ext4_da_update_reserve_space(struct inode *inode,
*/
if ((ei->i_reserved_data_blocks == 0) &&
!inode_is_open_for_write(inode))
- ext4_discard_preallocations(inode);
+ ext4_discard_preallocations(inode, 0);
}
static int __check_block_validity(struct inode *inode, const char *func,
@@ -394,8 +394,7 @@ static int __check_block_validity(struct inode *inode, const char *func,
(inode->i_ino ==
le32_to_cpu(EXT4_SB(inode->i_sb)->s_es->s_journal_inum)))
return 0;
- if (!ext4_data_block_valid(EXT4_SB(inode->i_sb), map->m_pblk,
- map->m_len)) {
+ if (!ext4_inode_block_valid(inode, map->m_pblk, map->m_len)) {
ext4_error_inode(inode, func, line, map->m_pblk,
"lblock %lu mapped to illegal pblock %llu "
"(length %d)", (unsigned long) map->m_lblk,
@@ -3288,7 +3287,7 @@ static int ext4_releasepage(struct page *page, gfp_t wait)
if (PageChecked(page))
return 0;
if (journal)
- return jbd2_journal_try_to_free_buffers(journal, page, wait);
+ return jbd2_journal_try_to_free_buffers(journal, page);
else
return try_to_free_buffers(page);
}
@@ -4056,7 +4055,7 @@ int ext4_punch_hole(struct inode *inode, loff_t offset, loff_t length)
if (stop_block > first_block) {
down_write(&EXT4_I(inode)->i_data_sem);
- ext4_discard_preallocations(inode);
+ ext4_discard_preallocations(inode, 0);
ret = ext4_es_remove_extent(inode, first_block,
stop_block - first_block);
@@ -4163,7 +4162,7 @@ int ext4_truncate(struct inode *inode)
trace_ext4_truncate_enter(inode);
if (!ext4_can_truncate(inode))
- return 0;
+ goto out_trace;
if (inode->i_size == 0 && !test_opt(inode->i_sb, NO_AUTO_DA_ALLOC))
ext4_set_inode_state(inode, EXT4_STATE_DA_ALLOC_CLOSE);
@@ -4172,16 +4171,14 @@ int ext4_truncate(struct inode *inode)
int has_inline = 1;
err = ext4_inline_data_truncate(inode, &has_inline);
- if (err)
- return err;
- if (has_inline)
- return 0;
+ if (err || has_inline)
+ goto out_trace;
}
/* If we zero-out tail of the page, we have to create jinode for jbd2 */
if (inode->i_size & (inode->i_sb->s_blocksize - 1)) {
if (ext4_inode_attach_jinode(inode) < 0)
- return 0;
+ goto out_trace;
}
if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))
@@ -4190,8 +4187,10 @@ int ext4_truncate(struct inode *inode)
credits = ext4_blocks_for_truncate(inode);
handle = ext4_journal_start(inode, EXT4_HT_TRUNCATE, credits);
- if (IS_ERR(handle))
- return PTR_ERR(handle);
+ if (IS_ERR(handle)) {
+ err = PTR_ERR(handle);
+ goto out_trace;
+ }
if (inode->i_size & (inode->i_sb->s_blocksize - 1))
ext4_block_truncate_page(handle, mapping, inode->i_size);
@@ -4211,7 +4210,7 @@ int ext4_truncate(struct inode *inode)
down_write(&EXT4_I(inode)->i_data_sem);
- ext4_discard_preallocations(inode);
+ ext4_discard_preallocations(inode, 0);
if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))
err = ext4_ext_truncate(handle, inode);
@@ -4242,6 +4241,7 @@ out_stop:
err = err2;
ext4_journal_stop(handle);
+out_trace:
trace_ext4_truncate_exit(inode);
return err;
}
@@ -4760,7 +4760,7 @@ struct inode *__ext4_iget(struct super_block *sb, unsigned long ino,
ret = 0;
if (ei->i_file_acl &&
- !ext4_data_block_valid(EXT4_SB(sb), ei->i_file_acl, 1)) {
+ !ext4_inode_block_valid(inode, ei->i_file_acl, 1)) {
ext4_error_inode(inode, function, line, 0,
"iget: bad extended attribute block %llu",
ei->i_file_acl);
@@ -4901,7 +4901,7 @@ static void __ext4_update_other_inode_time(struct super_block *sb,
(inode->i_state & I_DIRTY_TIME)) {
struct ext4_inode_info *ei = EXT4_I(inode);
- inode->i_state &= ~(I_DIRTY_TIME | I_DIRTY_TIME_EXPIRED);
+ inode->i_state &= ~I_DIRTY_TIME;
spin_unlock(&inode->i_lock);
spin_lock(&ei->i_raw_lock);
diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c
index 999cf6add39c..36eca3bc036a 100644
--- a/fs/ext4/ioctl.c
+++ b/fs/ext4/ioctl.c
@@ -202,7 +202,7 @@ static long swap_inode_boot_loader(struct super_block *sb,
reset_inode_seed(inode);
reset_inode_seed(inode_bl);
- ext4_discard_preallocations(inode);
+ ext4_discard_preallocations(inode, 0);
err = ext4_mark_inode_dirty(handle, inode);
if (err < 0) {
@@ -819,12 +819,12 @@ long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
switch (cmd) {
case FS_IOC_GETFSMAP:
return ext4_ioc_getfsmap(sb, (void __user *)arg);
- case EXT4_IOC_GETFLAGS:
+ case FS_IOC_GETFLAGS:
flags = ei->i_flags & EXT4_FL_USER_VISIBLE;
if (S_ISREG(inode->i_mode))
flags &= ~EXT4_PROJINHERIT_FL;
return put_user(flags, (int __user *) arg);
- case EXT4_IOC_SETFLAGS: {
+ case FS_IOC_SETFLAGS: {
int err;
if (!inode_owner_or_capable(inode))
@@ -1129,12 +1129,12 @@ resizefs_out:
case EXT4_IOC_PRECACHE_EXTENTS:
return ext4_ext_precache(inode);
- case EXT4_IOC_SET_ENCRYPTION_POLICY:
+ case FS_IOC_SET_ENCRYPTION_POLICY:
if (!ext4_has_feature_encrypt(sb))
return -EOPNOTSUPP;
return fscrypt_ioctl_set_policy(filp, (const void __user *)arg);
- case EXT4_IOC_GET_ENCRYPTION_PWSALT: {
+ case FS_IOC_GET_ENCRYPTION_PWSALT: {
#ifdef CONFIG_FS_ENCRYPTION
int err, err2;
struct ext4_sb_info *sbi = EXT4_SB(sb);
@@ -1174,7 +1174,7 @@ resizefs_out:
return -EOPNOTSUPP;
#endif
}
- case EXT4_IOC_GET_ENCRYPTION_POLICY:
+ case FS_IOC_GET_ENCRYPTION_POLICY:
if (!ext4_has_feature_encrypt(sb))
return -EOPNOTSUPP;
return fscrypt_ioctl_get_policy(filp, (void __user *)arg);
@@ -1236,7 +1236,7 @@ resizefs_out:
case EXT4_IOC_GET_ES_CACHE:
return ext4_ioctl_get_es_cache(filp, arg);
- case EXT4_IOC_FSGETXATTR:
+ case FS_IOC_FSGETXATTR:
{
struct fsxattr fa;
@@ -1247,7 +1247,7 @@ resizefs_out:
return -EFAULT;
return 0;
}
- case EXT4_IOC_FSSETXATTR:
+ case FS_IOC_FSSETXATTR:
{
struct fsxattr fa, old_fa;
int err;
@@ -1313,11 +1313,11 @@ long ext4_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
{
/* These are just misnamed, they actually get/put from/to user an int */
switch (cmd) {
- case EXT4_IOC32_GETFLAGS:
- cmd = EXT4_IOC_GETFLAGS;
+ case FS_IOC32_GETFLAGS:
+ cmd = FS_IOC_GETFLAGS;
break;
- case EXT4_IOC32_SETFLAGS:
- cmd = EXT4_IOC_SETFLAGS;
+ case FS_IOC32_SETFLAGS:
+ cmd = FS_IOC_SETFLAGS;
break;
case EXT4_IOC32_GETVERSION:
cmd = EXT4_IOC_GETVERSION;
@@ -1361,9 +1361,9 @@ long ext4_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
case EXT4_IOC_RESIZE_FS:
case FITRIM:
case EXT4_IOC_PRECACHE_EXTENTS:
- case EXT4_IOC_SET_ENCRYPTION_POLICY:
- case EXT4_IOC_GET_ENCRYPTION_PWSALT:
- case EXT4_IOC_GET_ENCRYPTION_POLICY:
+ case FS_IOC_SET_ENCRYPTION_POLICY:
+ case FS_IOC_GET_ENCRYPTION_PWSALT:
+ case FS_IOC_GET_ENCRYPTION_POLICY:
case FS_IOC_GET_ENCRYPTION_POLICY_EX:
case FS_IOC_ADD_ENCRYPTION_KEY:
case FS_IOC_REMOVE_ENCRYPTION_KEY:
@@ -1377,8 +1377,8 @@ long ext4_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
case EXT4_IOC_CLEAR_ES_CACHE:
case EXT4_IOC_GETSTATE:
case EXT4_IOC_GET_ES_CACHE:
- case EXT4_IOC_FSGETXATTR:
- case EXT4_IOC_FSSETXATTR:
+ case FS_IOC_FSGETXATTR:
+ case FS_IOC_FSSETXATTR:
break;
default:
return -ENOIOCTLCMD;
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index c0a331e2feb0..132c118d12e1 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -922,7 +922,7 @@ static int ext4_mb_init_cache(struct page *page, char *incore, gfp_t gfp)
bh[i] = NULL;
continue;
}
- bh[i] = ext4_read_block_bitmap_nowait(sb, group);
+ bh[i] = ext4_read_block_bitmap_nowait(sb, group, false);
if (IS_ERR(bh[i])) {
err = PTR_ERR(bh[i]);
bh[i] = NULL;
@@ -1279,9 +1279,6 @@ ext4_mb_load_buddy_gfp(struct super_block *sb, ext4_group_t group,
e4b->bd_buddy_page = page;
e4b->bd_buddy = page_address(page) + (poff * sb->s_blocksize);
- BUG_ON(e4b->bd_bitmap_page == NULL);
- BUG_ON(e4b->bd_buddy_page == NULL);
-
return 0;
err:
@@ -1743,10 +1740,6 @@ static void ext4_mb_use_best_found(struct ext4_allocation_context *ac,
}
-/*
- * regular allocator, for general purposes allocation
- */
-
static void ext4_mb_check_limits(struct ext4_allocation_context *ac,
struct ext4_buddy *e4b,
int finish_group)
@@ -2119,13 +2112,11 @@ static bool ext4_mb_good_group(struct ext4_allocation_context *ac,
BUG_ON(cr < 0 || cr >= 4);
- free = grp->bb_free;
- if (free == 0)
- return false;
- if (cr <= 2 && free < ac->ac_g_ex.fe_len)
+ if (unlikely(EXT4_MB_GRP_BBITMAP_CORRUPT(grp)))
return false;
- if (unlikely(EXT4_MB_GRP_BBITMAP_CORRUPT(grp)))
+ free = grp->bb_free;
+ if (free == 0)
return false;
fragments = grp->bb_fragments;
@@ -2142,8 +2133,10 @@ static bool ext4_mb_good_group(struct ext4_allocation_context *ac,
((group % flex_size) == 0))
return false;
- if ((ac->ac_2order > ac->ac_sb->s_blocksize_bits+1) ||
- (free / fragments) >= ac->ac_g_ex.fe_len)
+ if (free < ac->ac_g_ex.fe_len)
+ return false;
+
+ if (ac->ac_2order > ac->ac_sb->s_blocksize_bits+1)
return true;
if (grp->bb_largest_free_order < ac->ac_2order)
@@ -2177,6 +2170,7 @@ static int ext4_mb_good_group_nolock(struct ext4_allocation_context *ac,
{
struct ext4_group_info *grp = ext4_get_group_info(ac->ac_sb, group);
struct super_block *sb = ac->ac_sb;
+ struct ext4_sb_info *sbi = EXT4_SB(sb);
bool should_lock = ac->ac_flags & EXT4_MB_STRICT_CHECK;
ext4_grpblk_t free;
int ret = 0;
@@ -2195,7 +2189,25 @@ static int ext4_mb_good_group_nolock(struct ext4_allocation_context *ac,
/* We only do this if the grp has never been initialized */
if (unlikely(EXT4_MB_GRP_NEED_INIT(grp))) {
- ret = ext4_mb_init_group(ac->ac_sb, group, GFP_NOFS);
+ struct ext4_group_desc *gdp =
+ ext4_get_group_desc(sb, group, NULL);
+ int ret;
+
+ /* cr=0/1 is a very optimistic search to find large
+ * good chunks almost for free. If buddy data is not
+ * ready, then this optimization makes no sense. But
+ * we never skip the first block group in a flex_bg,
+ * since this gets used for metadata block allocation,
+ * and we want to make sure we locate metadata blocks
+ * in the first block group in the flex_bg if possible.
+ */
+ if (cr < 2 &&
+ (!sbi->s_log_groups_per_flex ||
+ ((group & ((1 << sbi->s_log_groups_per_flex) - 1)) != 0)) &&
+ !(ext4_has_group_desc_csum(sb) &&
+ (gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT))))
+ return 0;
+ ret = ext4_mb_init_group(sb, group, GFP_NOFS);
if (ret)
return ret;
}
@@ -2209,15 +2221,95 @@ out:
return ret;
}
+/*
+ * Start prefetching @nr block bitmaps starting at @group.
+ * Return the next group which needs to be prefetched.
+ */
+ext4_group_t ext4_mb_prefetch(struct super_block *sb, ext4_group_t group,
+ unsigned int nr, int *cnt)
+{
+ ext4_group_t ngroups = ext4_get_groups_count(sb);
+ struct buffer_head *bh;
+ struct blk_plug plug;
+
+ blk_start_plug(&plug);
+ while (nr-- > 0) {
+ struct ext4_group_desc *gdp = ext4_get_group_desc(sb, group,
+ NULL);
+ struct ext4_group_info *grp = ext4_get_group_info(sb, group);
+
+ /*
+ * Prefetch block groups with free blocks; but don't
+ * bother if it is marked uninitialized on disk, since
+ * it won't require I/O to read. Also only try to
+ * prefetch once, so we avoid getblk() call, which can
+ * be expensive.
+ */
+ if (!EXT4_MB_GRP_TEST_AND_SET_READ(grp) &&
+ EXT4_MB_GRP_NEED_INIT(grp) &&
+ ext4_free_group_clusters(sb, gdp) > 0 &&
+ !(ext4_has_group_desc_csum(sb) &&
+ (gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)))) {
+ bh = ext4_read_block_bitmap_nowait(sb, group, true);
+ if (bh && !IS_ERR(bh)) {
+ if (!buffer_uptodate(bh) && cnt)
+ (*cnt)++;
+ brelse(bh);
+ }
+ }
+ if (++group >= ngroups)
+ group = 0;
+ }
+ blk_finish_plug(&plug);
+ return group;
+}
+
+/*
+ * Prefetching reads the block bitmap into the buffer cache; but we
+ * need to make sure that the buddy bitmap in the page cache has been
+ * initialized. Note that ext4_mb_init_group() will block if the I/O
+ * is not yet completed, or indeed if it was not initiated by
+ * ext4_mb_prefetch did not start the I/O.
+ *
+ * TODO: We should actually kick off the buddy bitmap setup in a work
+ * queue when the buffer I/O is completed, so that we don't block
+ * waiting for the block allocation bitmap read to finish when
+ * ext4_mb_prefetch_fini is called from ext4_mb_regular_allocator().
+ */
+void ext4_mb_prefetch_fini(struct super_block *sb, ext4_group_t group,
+ unsigned int nr)
+{
+ while (nr-- > 0) {
+ struct ext4_group_desc *gdp = ext4_get_group_desc(sb, group,
+ NULL);
+ struct ext4_group_info *grp = ext4_get_group_info(sb, group);
+
+ if (!group)
+ group = ext4_get_groups_count(sb);
+ group--;
+ grp = ext4_get_group_info(sb, group);
+
+ if (EXT4_MB_GRP_NEED_INIT(grp) &&
+ ext4_free_group_clusters(sb, gdp) > 0 &&
+ !(ext4_has_group_desc_csum(sb) &&
+ (gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)))) {
+ if (ext4_mb_init_group(sb, group, GFP_NOFS))
+ break;
+ }
+ }
+}
+
static noinline_for_stack int
ext4_mb_regular_allocator(struct ext4_allocation_context *ac)
{
- ext4_group_t ngroups, group, i;
+ ext4_group_t prefetch_grp = 0, ngroups, group, i;
int cr = -1;
int err = 0, first_err = 0;
+ unsigned int nr = 0, prefetch_ios = 0;
struct ext4_sb_info *sbi;
struct super_block *sb;
struct ext4_buddy e4b;
+ int lost;
sb = ac->ac_sb;
sbi = EXT4_SB(sb);
@@ -2237,8 +2329,8 @@ ext4_mb_regular_allocator(struct ext4_allocation_context *ac)
goto out;
/*
- * ac->ac2_order is set only if the fe_len is a power of 2
- * if ac2_order is set we also set criteria to 0 so that we
+ * ac->ac_2order is set only if the fe_len is a power of 2
+ * if ac->ac_2order is set we also set criteria to 0 so that we
* try exact allocation using buddy.
*/
i = fls(ac->ac_g_ex.fe_len);
@@ -2282,6 +2374,7 @@ repeat:
* from the goal value specified
*/
group = ac->ac_g_ex.fe_group;
+ prefetch_grp = group;
for (i = 0; i < ngroups; group++, i++) {
int ret = 0;
@@ -2293,6 +2386,29 @@ repeat:
if (group >= ngroups)
group = 0;
+ /*
+ * Batch reads of the block allocation bitmaps
+ * to get multiple READs in flight; limit
+ * prefetching at cr=0/1, otherwise mballoc can
+ * spend a lot of time loading imperfect groups
+ */
+ if ((prefetch_grp == group) &&
+ (cr > 1 ||
+ prefetch_ios < sbi->s_mb_prefetch_limit)) {
+ unsigned int curr_ios = prefetch_ios;
+
+ nr = sbi->s_mb_prefetch;
+ if (ext4_has_feature_flex_bg(sb)) {
+ nr = (group / sbi->s_mb_prefetch) *
+ sbi->s_mb_prefetch;
+ nr = nr + sbi->s_mb_prefetch - group;
+ }
+ prefetch_grp = ext4_mb_prefetch(sb, group,
+ nr, &prefetch_ios);
+ if (prefetch_ios == curr_ios)
+ nr = 0;
+ }
+
/* This now checks without needing the buddy page */
ret = ext4_mb_good_group_nolock(ac, group, cr);
if (ret <= 0) {
@@ -2341,22 +2457,24 @@ repeat:
* We've been searching too long. Let's try to allocate
* the best chunk we've found so far
*/
-
ext4_mb_try_best_found(ac, &e4b);
if (ac->ac_status != AC_STATUS_FOUND) {
/*
* Someone more lucky has already allocated it.
* The only thing we can do is just take first
* found block(s)
- printk(KERN_DEBUG "EXT4-fs: someone won our chunk\n");
*/
+ lost = atomic_inc_return(&sbi->s_mb_lost_chunks);
+ mb_debug(sb, "lost chunk, group: %u, start: %d, len: %d, lost: %d\n",
+ ac->ac_b_ex.fe_group, ac->ac_b_ex.fe_start,
+ ac->ac_b_ex.fe_len, lost);
+
ac->ac_b_ex.fe_group = 0;
ac->ac_b_ex.fe_start = 0;
ac->ac_b_ex.fe_len = 0;
ac->ac_status = AC_STATUS_CONTINUE;
ac->ac_flags |= EXT4_MB_HINT_FIRST;
cr = 3;
- atomic_inc(&sbi->s_mb_lost_chunks);
goto repeat;
}
}
@@ -2367,6 +2485,10 @@ out:
mb_debug(sb, "Best len %d, origin len %d, ac_status %u, ac_flags 0x%x, cr %d ret %d\n",
ac->ac_b_ex.fe_len, ac->ac_o_ex.fe_len, ac->ac_status,
ac->ac_flags, cr, err);
+
+ if (nr)
+ ext4_mb_prefetch_fini(sb, prefetch_grp, nr);
+
return err;
}
@@ -2439,7 +2561,7 @@ static int ext4_mb_seq_groups_show(struct seq_file *seq, void *v)
for (i = 0; i <= 13; i++)
seq_printf(seq, " %-5u", i <= blocksize_bits + 1 ?
sg.info.bb_counters[i] : 0);
- seq_printf(seq, " ]\n");
+ seq_puts(seq, " ]\n");
return 0;
}
@@ -2613,6 +2735,26 @@ static int ext4_mb_init_backend(struct super_block *sb)
goto err_freebuddy;
}
+ if (ext4_has_feature_flex_bg(sb)) {
+ /* a single flex group is supposed to be read by a single IO */
+ sbi->s_mb_prefetch = 1 << sbi->s_es->s_log_groups_per_flex;
+ sbi->s_mb_prefetch *= 8; /* 8 prefetch IOs in flight at most */
+ } else {
+ sbi->s_mb_prefetch = 32;
+ }
+ if (sbi->s_mb_prefetch > ext4_get_groups_count(sb))
+ sbi->s_mb_prefetch = ext4_get_groups_count(sb);
+ /* now many real IOs to prefetch within a single allocation at cr=0
+ * given cr=0 is an CPU-related optimization we shouldn't try to
+ * load too many groups, at some point we should start to use what
+ * we've got in memory.
+ * with an average random access time 5ms, it'd take a second to get
+ * 200 groups (* N with flex_bg), so let's make this limit 4
+ */
+ sbi->s_mb_prefetch_limit = sbi->s_mb_prefetch * 4;
+ if (sbi->s_mb_prefetch_limit > ext4_get_groups_count(sb))
+ sbi->s_mb_prefetch_limit = ext4_get_groups_count(sb);
+
return 0;
err_freebuddy:
@@ -2736,6 +2878,7 @@ int ext4_mb_init(struct super_block *sb)
sbi->s_mb_stats = MB_DEFAULT_STATS;
sbi->s_mb_stream_request = MB_DEFAULT_STREAM_THRESHOLD;
sbi->s_mb_order2_reqs = MB_DEFAULT_ORDER2_REQS;
+ sbi->s_mb_max_inode_prealloc = MB_DEFAULT_MAX_INODE_PREALLOC;
/*
* The default group preallocation is 512, which for 4k block
* sizes translates to 2 megabytes. However for bigalloc file
@@ -3090,7 +3233,7 @@ ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac,
block = ext4_grp_offs_to_block(sb, &ac->ac_b_ex);
len = EXT4_C2B(sbi, ac->ac_b_ex.fe_len);
- if (!ext4_data_block_valid(sbi, block, len)) {
+ if (!ext4_inode_block_valid(ac->ac_inode, block, len)) {
ext4_error(sb, "Allocating blocks %llu-%llu which overlap "
"fs metadata", block, block+len);
/* File system mounted not to panic on error
@@ -3674,6 +3817,26 @@ void ext4_mb_generate_from_pa(struct super_block *sb, void *bitmap,
mb_debug(sb, "preallocated %d for group %u\n", preallocated, group);
}
+static void ext4_mb_mark_pa_deleted(struct super_block *sb,
+ struct ext4_prealloc_space *pa)
+{
+ struct ext4_inode_info *ei;
+
+ if (pa->pa_deleted) {
+ ext4_warning(sb, "deleted pa, type:%d, pblk:%llu, lblk:%u, len:%d\n",
+ pa->pa_type, pa->pa_pstart, pa->pa_lstart,
+ pa->pa_len);
+ return;
+ }
+
+ pa->pa_deleted = 1;
+
+ if (pa->pa_type == MB_INODE_PA) {
+ ei = EXT4_I(pa->pa_inode);
+ atomic_dec(&ei->i_prealloc_active);
+ }
+}
+
static void ext4_mb_pa_callback(struct rcu_head *head)
{
struct ext4_prealloc_space *pa;
@@ -3706,7 +3869,7 @@ static void ext4_mb_put_pa(struct ext4_allocation_context *ac,
return;
}
- pa->pa_deleted = 1;
+ ext4_mb_mark_pa_deleted(sb, pa);
spin_unlock(&pa->pa_lock);
grp_blk = pa->pa_pstart;
@@ -3830,6 +3993,7 @@ ext4_mb_new_inode_pa(struct ext4_allocation_context *ac)
spin_lock(pa->pa_obj_lock);
list_add_rcu(&pa->pa_inode_list, &ei->i_prealloc_list);
spin_unlock(pa->pa_obj_lock);
+ atomic_inc(&ei->i_prealloc_active);
}
/*
@@ -4040,7 +4204,7 @@ repeat:
}
/* seems this one can be freed ... */
- pa->pa_deleted = 1;
+ ext4_mb_mark_pa_deleted(sb, pa);
/* we can trust pa_free ... */
free += pa->pa_free;
@@ -4103,7 +4267,7 @@ out_dbg:
*
* FIXME!! Make sure it is valid at all the call sites
*/
-void ext4_discard_preallocations(struct inode *inode)
+void ext4_discard_preallocations(struct inode *inode, unsigned int needed)
{
struct ext4_inode_info *ei = EXT4_I(inode);
struct super_block *sb = inode->i_sb;
@@ -4121,15 +4285,19 @@ void ext4_discard_preallocations(struct inode *inode)
mb_debug(sb, "discard preallocation for inode %lu\n",
inode->i_ino);
- trace_ext4_discard_preallocations(inode);
+ trace_ext4_discard_preallocations(inode,
+ atomic_read(&ei->i_prealloc_active), needed);
INIT_LIST_HEAD(&list);
+ if (needed == 0)
+ needed = UINT_MAX;
+
repeat:
/* first, collect all pa's in the inode */
spin_lock(&ei->i_prealloc_lock);
- while (!list_empty(&ei->i_prealloc_list)) {
- pa = list_entry(ei->i_prealloc_list.next,
+ while (!list_empty(&ei->i_prealloc_list) && needed) {
+ pa = list_entry(ei->i_prealloc_list.prev,
struct ext4_prealloc_space, pa_inode_list);
BUG_ON(pa->pa_obj_lock != &ei->i_prealloc_lock);
spin_lock(&pa->pa_lock);
@@ -4146,10 +4314,11 @@ repeat:
}
if (pa->pa_deleted == 0) {
- pa->pa_deleted = 1;
+ ext4_mb_mark_pa_deleted(sb, pa);
spin_unlock(&pa->pa_lock);
list_del_rcu(&pa->pa_inode_list);
list_add(&pa->u.pa_tmp_list, &list);
+ needed--;
continue;
}
@@ -4399,7 +4568,7 @@ ext4_mb_initialize_context(struct ext4_allocation_context *ac,
ac->ac_g_ex = ac->ac_o_ex;
ac->ac_flags = ar->flags;
- /* we have to define context: we'll we work with a file or
+ /* we have to define context: we'll work with a file or
* locality group. this is a policy, actually */
ext4_mb_group_or_file(ac);
@@ -4450,7 +4619,7 @@ ext4_mb_discard_lg_preallocations(struct super_block *sb,
BUG_ON(pa->pa_type != MB_GROUP_PA);
/* seems this one can be freed ... */
- pa->pa_deleted = 1;
+ ext4_mb_mark_pa_deleted(sb, pa);
spin_unlock(&pa->pa_lock);
list_del_rcu(&pa->pa_inode_list);
@@ -4549,10 +4718,29 @@ static void ext4_mb_add_n_trim(struct ext4_allocation_context *ac)
}
/*
+ * if per-inode prealloc list is too long, trim some PA
+ */
+static void ext4_mb_trim_inode_pa(struct inode *inode)
+{
+ struct ext4_inode_info *ei = EXT4_I(inode);
+ struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
+ int count, delta;
+
+ count = atomic_read(&ei->i_prealloc_active);
+ delta = (sbi->s_mb_max_inode_prealloc >> 2) + 1;
+ if (count > sbi->s_mb_max_inode_prealloc + delta) {
+ count -= sbi->s_mb_max_inode_prealloc;
+ ext4_discard_preallocations(inode, count);
+ }
+}
+
+/*
* release all resource we used in allocation
*/
static int ext4_mb_release_context(struct ext4_allocation_context *ac)
{
+ struct inode *inode = ac->ac_inode;
+ struct ext4_inode_info *ei = EXT4_I(inode);
struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb);
struct ext4_prealloc_space *pa = ac->ac_pa;
if (pa) {
@@ -4564,21 +4752,31 @@ static int ext4_mb_release_context(struct ext4_allocation_context *ac)
pa->pa_free -= ac->ac_b_ex.fe_len;
pa->pa_len -= ac->ac_b_ex.fe_len;
spin_unlock(&pa->pa_lock);
+
+ /*
+ * We want to add the pa to the right bucket.
+ * Remove it from the list and while adding
+ * make sure the list to which we are adding
+ * doesn't grow big.
+ */
+ if (likely(pa->pa_free)) {
+ spin_lock(pa->pa_obj_lock);
+ list_del_rcu(&pa->pa_inode_list);
+ spin_unlock(pa->pa_obj_lock);
+ ext4_mb_add_n_trim(ac);
+ }
}
- }
- if (pa) {
- /*
- * We want to add the pa to the right bucket.
- * Remove it from the list and while adding
- * make sure the list to which we are adding
- * doesn't grow big.
- */
- if ((pa->pa_type == MB_GROUP_PA) && likely(pa->pa_free)) {
+
+ if (pa->pa_type == MB_INODE_PA) {
+ /*
+ * treat per-inode prealloc list as a lru list, then try
+ * to trim the least recently used PA.
+ */
spin_lock(pa->pa_obj_lock);
- list_del_rcu(&pa->pa_inode_list);
+ list_move(&pa->pa_inode_list, &ei->i_prealloc_list);
spin_unlock(pa->pa_obj_lock);
- ext4_mb_add_n_trim(ac);
}
+
ext4_mb_put_pa(ac, ac->ac_sb, pa);
}
if (ac->ac_bitmap_page)
@@ -4588,6 +4786,7 @@ static int ext4_mb_release_context(struct ext4_allocation_context *ac)
if (ac->ac_flags & EXT4_MB_HINT_GROUP_ALLOC)
mutex_unlock(&ac->ac_lg->lg_mutex);
ext4_mb_collect_stats(ac);
+ ext4_mb_trim_inode_pa(inode);
return 0;
}
@@ -4915,7 +5114,7 @@ void ext4_free_blocks(handle_t *handle, struct inode *inode,
sbi = EXT4_SB(sb);
if (!(flags & EXT4_FREE_BLOCKS_VALIDATED) &&
- !ext4_data_block_valid(sbi, block, count)) {
+ !ext4_inode_block_valid(inode, block, count)) {
ext4_error(sb, "Freeing blocks not in datazone - "
"block = %llu, count = %lu", block, count);
goto error_return;
diff --git a/fs/ext4/mballoc.h b/fs/ext4/mballoc.h
index 6b4d17c2935d..e75b4749aa1c 100644
--- a/fs/ext4/mballoc.h
+++ b/fs/ext4/mballoc.h
@@ -73,6 +73,10 @@
*/
#define MB_DEFAULT_GROUP_PREALLOC 512
+/*
+ * maximum length of inode prealloc list
+ */
+#define MB_DEFAULT_MAX_INODE_PREALLOC 512
struct ext4_free_data {
/* this links the free block information from sb_info */
diff --git a/fs/ext4/move_extent.c b/fs/ext4/move_extent.c
index 1ed86fb6c302..0d601b822875 100644
--- a/fs/ext4/move_extent.c
+++ b/fs/ext4/move_extent.c
@@ -686,8 +686,8 @@ ext4_move_extents(struct file *o_filp, struct file *d_filp, __u64 orig_blk,
out:
if (*moved_len) {
- ext4_discard_preallocations(orig_inode);
- ext4_discard_preallocations(donor_inode);
+ ext4_discard_preallocations(orig_inode, 0);
+ ext4_discard_preallocations(donor_inode, 0);
}
ext4_ext_drop_refs(path);
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
index 56738b538ddf..153a9fbe1dd0 100644
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -1396,8 +1396,8 @@ int ext4_search_dir(struct buffer_head *bh, char *search_buf, int buf_size,
ext4_match(dir, fname, de)) {
/* found a match - just to be sure, do
* a full check */
- if (ext4_check_dir_entry(dir, NULL, de, bh, bh->b_data,
- bh->b_size, offset))
+ if (ext4_check_dir_entry(dir, NULL, de, bh, search_buf,
+ buf_size, offset))
return -1;
*res_dir = de;
return 1;
@@ -1858,7 +1858,7 @@ static struct ext4_dir_entry_2 *do_split(handle_t *handle, struct inode *dir,
blocksize, hinfo, map);
map -= count;
dx_sort_map(map, count);
- /* Split the existing block in the middle, size-wise */
+ /* Ensure that neither split block is over half full */
size = 0;
move = 0;
for (i = count-1; i >= 0; i--) {
@@ -1868,8 +1868,18 @@ static struct ext4_dir_entry_2 *do_split(handle_t *handle, struct inode *dir,
size += map[i].size;
move++;
}
- /* map index at which we will split */
- split = count - move;
+ /*
+ * map index at which we will split
+ *
+ * If the sum of active entries didn't exceed half the block size, just
+ * split it in half by count; each resulting block will have at least
+ * half the space free.
+ */
+ if (i > 0)
+ split = count - move;
+ else
+ split = count/2;
+
hash2 = map[split].hash;
continued = hash2 == map[split - 1].hash;
dxtrace(printk(KERN_INFO "Split block %lu at %x, %i/%i\n",
@@ -2455,8 +2465,7 @@ cleanup:
* ext4_generic_delete_entry deletes a directory entry by merging it
* with the previous entry
*/
-int ext4_generic_delete_entry(handle_t *handle,
- struct inode *dir,
+int ext4_generic_delete_entry(struct inode *dir,
struct ext4_dir_entry_2 *de_del,
struct buffer_head *bh,
void *entry_buf,
@@ -2472,7 +2481,7 @@ int ext4_generic_delete_entry(handle_t *handle,
de = (struct ext4_dir_entry_2 *)entry_buf;
while (i < buf_size - csum_size) {
if (ext4_check_dir_entry(dir, NULL, de, bh,
- bh->b_data, bh->b_size, i))
+ entry_buf, buf_size, i))
return -EFSCORRUPTED;
if (de == de_del) {
if (pde)
@@ -2517,8 +2526,7 @@ static int ext4_delete_entry(handle_t *handle,
if (unlikely(err))
goto out;
- err = ext4_generic_delete_entry(handle, dir, de_del,
- bh, bh->b_data,
+ err = ext4_generic_delete_entry(dir, de_del, bh, bh->b_data,
dir->i_sb->s_blocksize, csum_size);
if (err)
goto out;
@@ -3193,30 +3201,33 @@ static int ext4_unlink(struct inode *dir, struct dentry *dentry)
* in separate transaction */
retval = dquot_initialize(dir);
if (retval)
- return retval;
+ goto out_trace;
retval = dquot_initialize(d_inode(dentry));
if (retval)
- return retval;
+ goto out_trace;
- retval = -ENOENT;
bh = ext4_find_entry(dir, &dentry->d_name, &de, NULL);
- if (IS_ERR(bh))
- return PTR_ERR(bh);
- if (!bh)
- goto end_unlink;
+ if (IS_ERR(bh)) {
+ retval = PTR_ERR(bh);
+ goto out_trace;
+ }
+ if (!bh) {
+ retval = -ENOENT;
+ goto out_trace;
+ }
inode = d_inode(dentry);
- retval = -EFSCORRUPTED;
- if (le32_to_cpu(de->inode) != inode->i_ino)
- goto end_unlink;
+ if (le32_to_cpu(de->inode) != inode->i_ino) {
+ retval = -EFSCORRUPTED;
+ goto out_bh;
+ }
handle = ext4_journal_start(dir, EXT4_HT_DIR,
EXT4_DATA_TRANS_BLOCKS(dir->i_sb));
if (IS_ERR(handle)) {
retval = PTR_ERR(handle);
- handle = NULL;
- goto end_unlink;
+ goto out_bh;
}
if (IS_DIRSYNC(dir))
@@ -3224,12 +3235,12 @@ static int ext4_unlink(struct inode *dir, struct dentry *dentry)
retval = ext4_delete_entry(handle, dir, de, bh);
if (retval)
- goto end_unlink;
+ goto out_handle;
dir->i_ctime = dir->i_mtime = current_time(dir);
ext4_update_dx_flag(dir);
retval = ext4_mark_inode_dirty(handle, dir);
if (retval)
- goto end_unlink;
+ goto out_handle;
if (inode->i_nlink == 0)
ext4_warning_inode(inode, "Deleting file '%.*s' with no links",
dentry->d_name.len, dentry->d_name.name);
@@ -3251,10 +3262,11 @@ static int ext4_unlink(struct inode *dir, struct dentry *dentry)
d_invalidate(dentry);
#endif
-end_unlink:
+out_handle:
+ ext4_journal_stop(handle);
+out_bh:
brelse(bh);
- if (handle)
- ext4_journal_stop(handle);
+out_trace:
trace_ext4_unlink_exit(dentry, retval);
return retval;
}
diff --git a/fs/ext4/readpage.c b/fs/ext4/readpage.c
index f2df2db0786c..f014c5e473a9 100644
--- a/fs/ext4/readpage.c
+++ b/fs/ext4/readpage.c
@@ -140,7 +140,7 @@ static void bio_post_read_processing(struct bio_post_read_ctx *ctx)
return;
}
ctx->cur_step++;
- /* fall-through */
+ fallthrough;
case STEP_VERITY:
if (ctx->enabled_steps & (1 << STEP_VERITY)) {
INIT_WORK(&ctx->work, verity_work);
@@ -148,7 +148,7 @@ static void bio_post_read_processing(struct bio_post_read_ctx *ctx)
return;
}
ctx->cur_step++;
- /* fall-through */
+ fallthrough;
default:
__read_end_io(ctx->bio);
}
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 0907f907c47d..ea425b49b345 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -66,10 +66,10 @@ static int ext4_load_journal(struct super_block *, struct ext4_super_block *,
unsigned long journal_devnum);
static int ext4_show_options(struct seq_file *seq, struct dentry *root);
static int ext4_commit_super(struct super_block *sb, int sync);
-static void ext4_mark_recovery_complete(struct super_block *sb,
+static int ext4_mark_recovery_complete(struct super_block *sb,
struct ext4_super_block *es);
-static void ext4_clear_journal_err(struct super_block *sb,
- struct ext4_super_block *es);
+static int ext4_clear_journal_err(struct super_block *sb,
+ struct ext4_super_block *es);
static int ext4_sync_fs(struct super_block *sb, int wait);
static int ext4_remount(struct super_block *sb, int *flags, char *data);
static int ext4_statfs(struct dentry *dentry, struct kstatfs *buf);
@@ -744,6 +744,7 @@ void __ext4_msg(struct super_block *sb,
struct va_format vaf;
va_list args;
+ atomic_inc(&EXT4_SB(sb)->s_msg_count);
if (!___ratelimit(&(EXT4_SB(sb)->s_msg_ratelimit_state), "EXT4-fs"))
return;
@@ -754,9 +755,12 @@ void __ext4_msg(struct super_block *sb,
va_end(args);
}
-#define ext4_warning_ratelimit(sb) \
- ___ratelimit(&(EXT4_SB(sb)->s_warning_ratelimit_state), \
- "EXT4-fs warning")
+static int ext4_warning_ratelimit(struct super_block *sb)
+{
+ atomic_inc(&EXT4_SB(sb)->s_warning_count);
+ return ___ratelimit(&(EXT4_SB(sb)->s_warning_ratelimit_state),
+ "EXT4-fs warning");
+}
void __ext4_warning(struct super_block *sb, const char *function,
unsigned int line, const char *fmt, ...)
@@ -1123,6 +1127,7 @@ static struct inode *ext4_alloc_inode(struct super_block *sb)
inode_set_iversion(&ei->vfs_inode, 1);
spin_lock_init(&ei->i_raw_lock);
INIT_LIST_HEAD(&ei->i_prealloc_list);
+ atomic_set(&ei->i_prealloc_active, 0);
spin_lock_init(&ei->i_prealloc_lock);
ext4_es_init_tree(&ei->i_es_tree);
rwlock_init(&ei->i_es_lock);
@@ -1216,7 +1221,7 @@ void ext4_clear_inode(struct inode *inode)
{
invalidate_inode_buffers(inode);
clear_inode(inode);
- ext4_discard_preallocations(inode);
+ ext4_discard_preallocations(inode, 0);
ext4_es_remove_extent(inode, 0, EXT_MAX_BLOCKS);
dquot_drop(inode);
if (EXT4_I(inode)->jinode) {
@@ -1288,8 +1293,8 @@ static int bdev_try_to_free_page(struct super_block *sb, struct page *page,
if (!page_has_buffers(page))
return 0;
if (journal)
- return jbd2_journal_try_to_free_buffers(journal, page,
- wait & ~__GFP_DIRECT_RECLAIM);
+ return jbd2_journal_try_to_free_buffers(journal, page);
+
return try_to_free_buffers(page);
}
@@ -1522,6 +1527,7 @@ enum {
Opt_dioread_nolock, Opt_dioread_lock,
Opt_discard, Opt_nodiscard, Opt_init_itable, Opt_noinit_itable,
Opt_max_dir_size_kb, Opt_nojournal_checksum, Opt_nombcache,
+ Opt_prefetch_block_bitmaps,
};
static const match_table_t tokens = {
@@ -1614,6 +1620,7 @@ static const match_table_t tokens = {
{Opt_inlinecrypt, "inlinecrypt"},
{Opt_nombcache, "nombcache"},
{Opt_nombcache, "no_mbcache"}, /* for backward compatibility */
+ {Opt_prefetch_block_bitmaps, "prefetch_block_bitmaps"},
{Opt_removed, "check=none"}, /* mount option from ext2/3 */
{Opt_removed, "nocheck"}, /* mount option from ext2/3 */
{Opt_removed, "reservation"}, /* mount option from ext2/3 */
@@ -1831,6 +1838,8 @@ static const struct mount_opts {
{Opt_max_dir_size_kb, 0, MOPT_GTE0},
{Opt_test_dummy_encryption, 0, MOPT_STRING},
{Opt_nombcache, EXT4_MOUNT_NO_MBCACHE, MOPT_SET},
+ {Opt_prefetch_block_bitmaps, EXT4_MOUNT_PREFETCH_BLOCK_BITMAPS,
+ MOPT_SET},
{Opt_err, 0, 0}
};
@@ -3213,15 +3222,34 @@ static void print_daily_error_info(struct timer_list *t)
static int ext4_run_li_request(struct ext4_li_request *elr)
{
struct ext4_group_desc *gdp = NULL;
- ext4_group_t group, ngroups;
- struct super_block *sb;
+ struct super_block *sb = elr->lr_super;
+ ext4_group_t ngroups = EXT4_SB(sb)->s_groups_count;
+ ext4_group_t group = elr->lr_next_group;
unsigned long timeout = 0;
+ unsigned int prefetch_ios = 0;
int ret = 0;
- sb = elr->lr_super;
- ngroups = EXT4_SB(sb)->s_groups_count;
+ if (elr->lr_mode == EXT4_LI_MODE_PREFETCH_BBITMAP) {
+ elr->lr_next_group = ext4_mb_prefetch(sb, group,
+ EXT4_SB(sb)->s_mb_prefetch, &prefetch_ios);
+ if (prefetch_ios)
+ ext4_mb_prefetch_fini(sb, elr->lr_next_group,
+ prefetch_ios);
+ trace_ext4_prefetch_bitmaps(sb, group, elr->lr_next_group,
+ prefetch_ios);
+ if (group >= elr->lr_next_group) {
+ ret = 1;
+ if (elr->lr_first_not_zeroed != ngroups &&
+ !sb_rdonly(sb) && test_opt(sb, INIT_INODE_TABLE)) {
+ elr->lr_next_group = elr->lr_first_not_zeroed;
+ elr->lr_mode = EXT4_LI_MODE_ITABLE;
+ ret = 0;
+ }
+ }
+ return ret;
+ }
- for (group = elr->lr_next_group; group < ngroups; group++) {
+ for (; group < ngroups; group++) {
gdp = ext4_get_group_desc(sb, group, NULL);
if (!gdp) {
ret = 1;
@@ -3239,9 +3267,10 @@ static int ext4_run_li_request(struct ext4_li_request *elr)
timeout = jiffies;
ret = ext4_init_inode_table(sb, group,
elr->lr_timeout ? 0 : 1);
+ trace_ext4_lazy_itable_init(sb, group);
if (elr->lr_timeout == 0) {
timeout = (jiffies - timeout) *
- elr->lr_sbi->s_li_wait_mult;
+ EXT4_SB(elr->lr_super)->s_li_wait_mult;
elr->lr_timeout = timeout;
}
elr->lr_next_sched = jiffies + elr->lr_timeout;
@@ -3256,15 +3285,11 @@ static int ext4_run_li_request(struct ext4_li_request *elr)
*/
static void ext4_remove_li_request(struct ext4_li_request *elr)
{
- struct ext4_sb_info *sbi;
-
if (!elr)
return;
- sbi = elr->lr_sbi;
-
list_del(&elr->lr_request);
- sbi->s_li_request = NULL;
+ EXT4_SB(elr->lr_super)->s_li_request = NULL;
kfree(elr);
}
@@ -3473,7 +3498,6 @@ static int ext4_li_info_new(void)
static struct ext4_li_request *ext4_li_request_new(struct super_block *sb,
ext4_group_t start)
{
- struct ext4_sb_info *sbi = EXT4_SB(sb);
struct ext4_li_request *elr;
elr = kzalloc(sizeof(*elr), GFP_KERNEL);
@@ -3481,8 +3505,13 @@ static struct ext4_li_request *ext4_li_request_new(struct super_block *sb,
return NULL;
elr->lr_super = sb;
- elr->lr_sbi = sbi;
- elr->lr_next_group = start;
+ elr->lr_first_not_zeroed = start;
+ if (test_opt(sb, PREFETCH_BLOCK_BITMAPS))
+ elr->lr_mode = EXT4_LI_MODE_PREFETCH_BBITMAP;
+ else {
+ elr->lr_mode = EXT4_LI_MODE_ITABLE;
+ elr->lr_next_group = start;
+ }
/*
* Randomize first schedule time of the request to
@@ -3512,8 +3541,9 @@ int ext4_register_li_request(struct super_block *sb,
goto out;
}
- if (first_not_zeroed == ngroups || sb_rdonly(sb) ||
- !test_opt(sb, INIT_INODE_TABLE))
+ if (!test_opt(sb, PREFETCH_BLOCK_BITMAPS) &&
+ (first_not_zeroed == ngroups || sb_rdonly(sb) ||
+ !test_opt(sb, INIT_INODE_TABLE)))
goto out;
elr = ext4_li_request_new(sb, first_not_zeroed);
@@ -4710,11 +4740,13 @@ no_journal:
ext4_set_resv_clusters(sb);
- err = ext4_setup_system_zone(sb);
- if (err) {
- ext4_msg(sb, KERN_ERR, "failed to initialize system "
- "zone (%d)", err);
- goto failed_mount4a;
+ if (test_opt(sb, BLOCK_VALIDITY)) {
+ err = ext4_setup_system_zone(sb);
+ if (err) {
+ ext4_msg(sb, KERN_ERR, "failed to initialize system "
+ "zone (%d)", err);
+ goto failed_mount4a;
+ }
}
ext4_ext_init(sb);
@@ -4777,12 +4809,23 @@ no_journal:
}
#endif /* CONFIG_QUOTA */
+ /*
+ * Save the original bdev mapping's wb_err value which could be
+ * used to detect the metadata async write error.
+ */
+ spin_lock_init(&sbi->s_bdev_wb_lock);
+ if (!sb_rdonly(sb))
+ errseq_check_and_advance(&sb->s_bdev->bd_inode->i_mapping->wb_err,
+ &sbi->s_bdev_wb_err);
+ sb->s_bdev->bd_super = sb;
EXT4_SB(sb)->s_mount_state |= EXT4_ORPHAN_FS;
ext4_orphan_cleanup(sb, es);
EXT4_SB(sb)->s_mount_state &= ~EXT4_ORPHAN_FS;
if (needs_recovery) {
ext4_msg(sb, KERN_INFO, "recovery complete");
- ext4_mark_recovery_complete(sb, es);
+ err = ext4_mark_recovery_complete(sb, es);
+ if (err)
+ goto failed_mount8;
}
if (EXT4_SB(sb)->s_journal) {
if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA)
@@ -4816,6 +4859,8 @@ no_journal:
ratelimit_state_init(&sbi->s_err_ratelimit_state, 5 * HZ, 10);
ratelimit_state_init(&sbi->s_warning_ratelimit_state, 5 * HZ, 10);
ratelimit_state_init(&sbi->s_msg_ratelimit_state, 5 * HZ, 10);
+ atomic_set(&sbi->s_warning_count, 0);
+ atomic_set(&sbi->s_msg_count, 0);
kfree(orig_data);
return 0;
@@ -4825,10 +4870,8 @@ cantfind_ext4:
ext4_msg(sb, KERN_ERR, "VFS: Can't find ext4 filesystem");
goto failed_mount;
-#ifdef CONFIG_QUOTA
failed_mount8:
ext4_unregister_sysfs(sb);
-#endif
failed_mount7:
ext4_unregister_li_request(sb);
failed_mount6:
@@ -4968,7 +5011,8 @@ static journal_t *ext4_get_journal(struct super_block *sb,
struct inode *journal_inode;
journal_t *journal;
- BUG_ON(!ext4_has_feature_journal(sb));
+ if (WARN_ON_ONCE(!ext4_has_feature_journal(sb)))
+ return NULL;
journal_inode = ext4_get_journal_inode(sb, journal_inum);
if (!journal_inode)
@@ -4998,7 +5042,8 @@ static journal_t *ext4_get_dev_journal(struct super_block *sb,
struct ext4_super_block *es;
struct block_device *bdev;
- BUG_ON(!ext4_has_feature_journal(sb));
+ if (WARN_ON_ONCE(!ext4_has_feature_journal(sb)))
+ return NULL;
bdev = ext4_blkdev_get(j_dev, sb);
if (bdev == NULL)
@@ -5089,8 +5134,10 @@ static int ext4_load_journal(struct super_block *sb,
dev_t journal_dev;
int err = 0;
int really_read_only;
+ int journal_dev_ro;
- BUG_ON(!ext4_has_feature_journal(sb));
+ if (WARN_ON_ONCE(!ext4_has_feature_journal(sb)))
+ return -EFSCORRUPTED;
if (journal_devnum &&
journal_devnum != le32_to_cpu(es->s_journal_dev)) {
@@ -5100,7 +5147,31 @@ static int ext4_load_journal(struct super_block *sb,
} else
journal_dev = new_decode_dev(le32_to_cpu(es->s_journal_dev));
- really_read_only = bdev_read_only(sb->s_bdev);
+ if (journal_inum && journal_dev) {
+ ext4_msg(sb, KERN_ERR,
+ "filesystem has both journal inode and journal device!");
+ return -EINVAL;
+ }
+
+ if (journal_inum) {
+ journal = ext4_get_journal(sb, journal_inum);
+ if (!journal)
+ return -EINVAL;
+ } else {
+ journal = ext4_get_dev_journal(sb, journal_dev);
+ if (!journal)
+ return -EINVAL;
+ }
+
+ journal_dev_ro = bdev_read_only(journal->j_dev);
+ really_read_only = bdev_read_only(sb->s_bdev) | journal_dev_ro;
+
+ if (journal_dev_ro && !sb_rdonly(sb)) {
+ ext4_msg(sb, KERN_ERR,
+ "journal device read-only, try mounting with '-o ro'");
+ err = -EROFS;
+ goto err_out;
+ }
/*
* Are we loading a blank journal or performing recovery after a
@@ -5115,27 +5186,14 @@ static int ext4_load_journal(struct super_block *sb,
ext4_msg(sb, KERN_ERR, "write access "
"unavailable, cannot proceed "
"(try mounting with noload)");
- return -EROFS;
+ err = -EROFS;
+ goto err_out;
}
ext4_msg(sb, KERN_INFO, "write access will "
"be enabled during recovery");
}
}
- if (journal_inum && journal_dev) {
- ext4_msg(sb, KERN_ERR, "filesystem has both journal "
- "and inode journals!");
- return -EINVAL;
- }
-
- if (journal_inum) {
- if (!(journal = ext4_get_journal(sb, journal_inum)))
- return -EINVAL;
- } else {
- if (!(journal = ext4_get_dev_journal(sb, journal_dev)))
- return -EINVAL;
- }
-
if (!(journal->j_flags & JBD2_BARRIER))
ext4_msg(sb, KERN_INFO, "barriers disabled");
@@ -5155,12 +5213,16 @@ static int ext4_load_journal(struct super_block *sb,
if (err) {
ext4_msg(sb, KERN_ERR, "error loading journal");
- jbd2_journal_destroy(journal);
- return err;
+ goto err_out;
}
EXT4_SB(sb)->s_journal = journal;
- ext4_clear_journal_err(sb, es);
+ err = ext4_clear_journal_err(sb, es);
+ if (err) {
+ EXT4_SB(sb)->s_journal = NULL;
+ jbd2_journal_destroy(journal);
+ return err;
+ }
if (!really_read_only && journal_devnum &&
journal_devnum != le32_to_cpu(es->s_journal_dev)) {
@@ -5171,6 +5233,10 @@ static int ext4_load_journal(struct super_block *sb,
}
return 0;
+
+err_out:
+ jbd2_journal_destroy(journal);
+ return err;
}
static int ext4_commit_super(struct super_block *sb, int sync)
@@ -5183,13 +5249,6 @@ static int ext4_commit_super(struct super_block *sb, int sync)
return error;
/*
- * The superblock bh should be mapped, but it might not be if the
- * device was hot-removed. Not much we can do but fail the I/O.
- */
- if (!buffer_mapped(sbh))
- return error;
-
- /*
* If the file system is mounted read-only, don't update the
* superblock write time. This avoids updating the superblock
* write time when we are mounting the root file system
@@ -5256,26 +5315,32 @@ static int ext4_commit_super(struct super_block *sb, int sync)
* remounting) the filesystem readonly, then we will end up with a
* consistent fs on disk. Record that fact.
*/
-static void ext4_mark_recovery_complete(struct super_block *sb,
- struct ext4_super_block *es)
+static int ext4_mark_recovery_complete(struct super_block *sb,
+ struct ext4_super_block *es)
{
+ int err;
journal_t *journal = EXT4_SB(sb)->s_journal;
if (!ext4_has_feature_journal(sb)) {
- BUG_ON(journal != NULL);
- return;
+ if (journal != NULL) {
+ ext4_error(sb, "Journal got removed while the fs was "
+ "mounted!");
+ return -EFSCORRUPTED;
+ }
+ return 0;
}
jbd2_journal_lock_updates(journal);
- if (jbd2_journal_flush(journal) < 0)
+ err = jbd2_journal_flush(journal);
+ if (err < 0)
goto out;
if (ext4_has_feature_journal_needs_recovery(sb) && sb_rdonly(sb)) {
ext4_clear_feature_journal_needs_recovery(sb);
ext4_commit_super(sb, 1);
}
-
out:
jbd2_journal_unlock_updates(journal);
+ return err;
}
/*
@@ -5283,14 +5348,17 @@ out:
* has recorded an error from a previous lifetime, move that error to the
* main filesystem now.
*/
-static void ext4_clear_journal_err(struct super_block *sb,
+static int ext4_clear_journal_err(struct super_block *sb,
struct ext4_super_block *es)
{
journal_t *journal;
int j_errno;
const char *errstr;
- BUG_ON(!ext4_has_feature_journal(sb));
+ if (!ext4_has_feature_journal(sb)) {
+ ext4_error(sb, "Journal got removed while the fs was mounted!");
+ return -EFSCORRUPTED;
+ }
journal = EXT4_SB(sb)->s_journal;
@@ -5315,6 +5383,7 @@ static void ext4_clear_journal_err(struct super_block *sb,
jbd2_journal_clear_err(journal);
jbd2_journal_update_sb_errno(journal);
}
+ return 0;
}
/*
@@ -5457,7 +5526,7 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data)
{
struct ext4_super_block *es;
struct ext4_sb_info *sbi = EXT4_SB(sb);
- unsigned long old_sb_flags;
+ unsigned long old_sb_flags, vfs_flags;
struct ext4_mount_options old_opts;
int enable_quota = 0;
ext4_group_t g;
@@ -5500,6 +5569,14 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data)
if (sbi->s_journal && sbi->s_journal->j_task->io_context)
journal_ioprio = sbi->s_journal->j_task->io_context->ioprio;
+ /*
+ * Some options can be enabled by ext4 and/or by VFS mount flag
+ * either way we need to make sure it matches in both *flags and
+ * s_flags. Copy those selected flags from *flags to s_flags
+ */
+ vfs_flags = SB_LAZYTIME | SB_I_VERSION;
+ sb->s_flags = (sb->s_flags & ~vfs_flags) | (*flags & vfs_flags);
+
if (!parse_options(data, sb, NULL, &journal_ioprio, 1)) {
err = -EINVAL;
goto restore_opts;
@@ -5553,9 +5630,6 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data)
set_task_ioprio(sbi->s_journal->j_task, journal_ioprio);
}
- if (*flags & SB_LAZYTIME)
- sb->s_flags |= SB_LAZYTIME;
-
if ((bool)(*flags & SB_RDONLY) != sb_rdonly(sb)) {
if (sbi->s_mount_flags & EXT4_MF_FS_ABORTED) {
err = -EROFS;
@@ -5585,8 +5659,13 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data)
(sbi->s_mount_state & EXT4_VALID_FS))
es->s_state = cpu_to_le16(sbi->s_mount_state);
- if (sbi->s_journal)
+ if (sbi->s_journal) {
+ /*
+ * We let remount-ro finish even if marking fs
+ * as clean failed...
+ */
ext4_mark_recovery_complete(sb, es);
+ }
if (sbi->s_mmp_tsk)
kthread_stop(sbi->s_mmp_tsk);
} else {
@@ -5629,13 +5708,24 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data)
}
/*
+ * Update the original bdev mapping's wb_err value
+ * which could be used to detect the metadata async
+ * write error.
+ */
+ errseq_check_and_advance(&sb->s_bdev->bd_inode->i_mapping->wb_err,
+ &sbi->s_bdev_wb_err);
+
+ /*
* Mounting a RDONLY partition read-write, so reread
* and store the current valid flag. (It may have
* been changed by e2fsck since we originally mounted
* the partition.)
*/
- if (sbi->s_journal)
- ext4_clear_journal_err(sb, es);
+ if (sbi->s_journal) {
+ err = ext4_clear_journal_err(sb, es);
+ if (err)
+ goto restore_opts;
+ }
sbi->s_mount_state = le16_to_cpu(es->s_state);
err = ext4_setup_super(sb, es, 0);
@@ -5665,7 +5755,17 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data)
ext4_register_li_request(sb, first_not_zeroed);
}
- ext4_setup_system_zone(sb);
+ /*
+ * Handle creation of system zone data early because it can fail.
+ * Releasing of existing data is done when we are sure remount will
+ * succeed.
+ */
+ if (test_opt(sb, BLOCK_VALIDITY) && !sbi->system_blks) {
+ err = ext4_setup_system_zone(sb);
+ if (err)
+ goto restore_opts;
+ }
+
if (sbi->s_journal == NULL && !(old_sb_flags & SB_RDONLY)) {
err = ext4_commit_super(sb, 1);
if (err)
@@ -5686,8 +5786,16 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data)
}
}
#endif
+ if (!test_opt(sb, BLOCK_VALIDITY) && sbi->system_blks)
+ ext4_release_system_zone(sb);
+
+ /*
+ * Some options can be enabled by ext4 and/or by VFS mount flag
+ * either way we need to make sure it matches in both *flags and
+ * s_flags. Copy those selected flags from s_flags to *flags
+ */
+ *flags = (*flags & ~vfs_flags) | (sb->s_flags & vfs_flags);
- *flags = (*flags & ~SB_LAZYTIME) | (sb->s_flags & SB_LAZYTIME);
ext4_msg(sb, KERN_INFO, "re-mounted. Opts: %s", orig_data);
kfree(orig_data);
return 0;
@@ -5701,6 +5809,8 @@ restore_opts:
sbi->s_commit_interval = old_opts.s_commit_interval;
sbi->s_min_batch_time = old_opts.s_min_batch_time;
sbi->s_max_batch_time = old_opts.s_max_batch_time;
+ if (!test_opt(sb, BLOCK_VALIDITY) && sbi->system_blks)
+ ext4_release_system_zone(sb);
#ifdef CONFIG_QUOTA
sbi->s_jquota_fmt = old_opts.s_jquota_fmt;
for (i = 0; i < EXT4_MAXQUOTAS; i++) {
diff --git a/fs/ext4/sysfs.c b/fs/ext4/sysfs.c
index 6c9fc9e21c13..bfabb799fa45 100644
--- a/fs/ext4/sysfs.c
+++ b/fs/ext4/sysfs.c
@@ -189,6 +189,9 @@ static struct ext4_attr ext4_attr_##_name = { \
#define EXT4_RW_ATTR_SBI_UL(_name,_elname) \
EXT4_ATTR_OFFSET(_name, 0644, pointer_ul, ext4_sb_info, _elname)
+#define EXT4_RO_ATTR_SBI_ATOMIC(_name,_elname) \
+ EXT4_ATTR_OFFSET(_name, 0444, pointer_atomic, ext4_sb_info, _elname)
+
#define EXT4_ATTR_PTR(_name,_mode,_id,_ptr) \
static struct ext4_attr ext4_attr_##_name = { \
.attr = {.name = __stringify(_name), .mode = _mode }, \
@@ -215,6 +218,7 @@ EXT4_RW_ATTR_SBI_UI(mb_min_to_scan, s_mb_min_to_scan);
EXT4_RW_ATTR_SBI_UI(mb_order2_req, s_mb_order2_reqs);
EXT4_RW_ATTR_SBI_UI(mb_stream_req, s_mb_stream_request);
EXT4_RW_ATTR_SBI_UI(mb_group_prealloc, s_mb_group_prealloc);
+EXT4_RW_ATTR_SBI_UI(mb_max_inode_prealloc, s_mb_max_inode_prealloc);
EXT4_RW_ATTR_SBI_UI(extent_max_zeroout_kb, s_extent_max_zeroout_kb);
EXT4_ATTR(trigger_fs_error, 0200, trigger_test_error);
EXT4_RW_ATTR_SBI_UI(err_ratelimit_interval_ms, s_err_ratelimit_state.interval);
@@ -226,6 +230,8 @@ EXT4_RW_ATTR_SBI_UI(msg_ratelimit_burst, s_msg_ratelimit_state.burst);
#ifdef CONFIG_EXT4_DEBUG
EXT4_RW_ATTR_SBI_UL(simulate_fail, s_simulate_fail);
#endif
+EXT4_RO_ATTR_SBI_ATOMIC(warning_count, s_warning_count);
+EXT4_RO_ATTR_SBI_ATOMIC(msg_count, s_msg_count);
EXT4_RO_ATTR_ES_UI(errors_count, s_error_count);
EXT4_RO_ATTR_ES_U8(first_error_errcode, s_first_error_errcode);
EXT4_RO_ATTR_ES_U8(last_error_errcode, s_last_error_errcode);
@@ -240,6 +246,8 @@ EXT4_RO_ATTR_ES_STRING(last_error_func, s_last_error_func, 32);
EXT4_ATTR(first_error_time, 0444, first_error_time);
EXT4_ATTR(last_error_time, 0444, last_error_time);
EXT4_ATTR(journal_task, 0444, journal_task);
+EXT4_RW_ATTR_SBI_UI(mb_prefetch, s_mb_prefetch);
+EXT4_RW_ATTR_SBI_UI(mb_prefetch_limit, s_mb_prefetch_limit);
static unsigned int old_bump_val = 128;
EXT4_ATTR_PTR(max_writeback_mb_bump, 0444, pointer_ui, &old_bump_val);
@@ -257,6 +265,7 @@ static struct attribute *ext4_attrs[] = {
ATTR_LIST(mb_order2_req),
ATTR_LIST(mb_stream_req),
ATTR_LIST(mb_group_prealloc),
+ ATTR_LIST(mb_max_inode_prealloc),
ATTR_LIST(max_writeback_mb_bump),
ATTR_LIST(extent_max_zeroout_kb),
ATTR_LIST(trigger_fs_error),
@@ -267,6 +276,8 @@ static struct attribute *ext4_attrs[] = {
ATTR_LIST(msg_ratelimit_interval_ms),
ATTR_LIST(msg_ratelimit_burst),
ATTR_LIST(errors_count),
+ ATTR_LIST(warning_count),
+ ATTR_LIST(msg_count),
ATTR_LIST(first_error_ino),
ATTR_LIST(last_error_ino),
ATTR_LIST(first_error_block),
@@ -283,6 +294,8 @@ static struct attribute *ext4_attrs[] = {
#ifdef CONFIG_EXT4_DEBUG
ATTR_LIST(simulate_fail),
#endif
+ ATTR_LIST(mb_prefetch),
+ ATTR_LIST(mb_prefetch_limit),
NULL,
};
ATTRIBUTE_GROUPS(ext4);
diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c
index 7d2f6576d954..cba4b877c606 100644
--- a/fs/ext4/xattr.c
+++ b/fs/ext4/xattr.c
@@ -1356,8 +1356,7 @@ retry:
block = 0;
while (wsize < bufsize) {
- if (bh != NULL)
- brelse(bh);
+ brelse(bh);
csize = (bufsize - wsize) > blocksize ? blocksize :
bufsize - wsize;
bh = ext4_getblk(handle, ea_inode, block, 0);
diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index ed2bca0fce92..73683e58a08d 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -3550,6 +3550,9 @@ static int check_direct_IO(struct inode *inode, struct iov_iter *iter,
unsigned long align = offset | iov_iter_alignment(iter);
struct block_device *bdev = inode->i_sb->s_bdev;
+ if (iov_iter_rw(iter) == READ && offset >= i_size_read(inode))
+ return 1;
+
if (align & blocksize_mask) {
if (bdev)
blkbits = blksize_bits(bdev_logical_block_size(bdev));
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index 16322ea5b463..d9e52a7f3702 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -2646,7 +2646,7 @@ static inline void __mark_inode_dirty_flag(struct inode *inode,
case FI_NEW_INODE:
if (set)
return;
- /* fall through */
+ fallthrough;
case FI_DATA_EXIST:
case FI_INLINE_DOTS:
case FI_PIN_FILE:
diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c
index 9bbaa2614679..cb1b5b61a1da 100644
--- a/fs/f2fs/node.c
+++ b/fs/f2fs/node.c
@@ -618,10 +618,10 @@ pgoff_t f2fs_get_next_page_offset(struct dnode_of_data *dn, pgoff_t pgofs)
switch (dn->max_level) {
case 3:
base += 2 * indirect_blks;
- /* fall through */
+ fallthrough;
case 2:
base += 2 * direct_blks;
- /* fall through */
+ fallthrough;
case 1:
base += direct_index;
break;
@@ -2373,6 +2373,9 @@ static int __f2fs_build_free_nids(struct f2fs_sb_info *sbi,
if (unlikely(nid >= nm_i->max_nid))
nid = 0;
+ if (unlikely(nid % NAT_ENTRY_PER_BLOCK))
+ nid = NAT_BLOCK_OFFSET(nid) * NAT_ENTRY_PER_BLOCK;
+
/* Enough entries */
if (nm_i->nid_cnt[FREE_NID] >= NAT_ENTRY_PER_BLOCK)
return 0;
diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
index a65d357f89a9..e247a5ef3713 100644
--- a/fs/f2fs/segment.c
+++ b/fs/f2fs/segment.c
@@ -799,7 +799,7 @@ static void __locate_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno,
if (__is_large_section(sbi)) {
unsigned int secno = GET_SEC_FROM_SEG(sbi, segno);
- unsigned short valid_blocks =
+ block_t valid_blocks =
get_valid_blocks(sbi, segno, true);
f2fs_bug_on(sbi, unlikely(!valid_blocks ||
@@ -815,7 +815,7 @@ static void __remove_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno,
enum dirty_type dirty_type)
{
struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
- unsigned short valid_blocks;
+ block_t valid_blocks;
if (test_and_clear_bit(segno, dirty_i->dirty_segmap[dirty_type]))
dirty_i->nr_dirty[dirty_type]--;
@@ -4316,8 +4316,8 @@ static void init_dirty_segmap(struct f2fs_sb_info *sbi)
struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
struct free_segmap_info *free_i = FREE_I(sbi);
unsigned int segno = 0, offset = 0, secno;
- unsigned short valid_blocks;
- unsigned short blks_per_sec = BLKS_PER_SEC(sbi);
+ block_t valid_blocks;
+ block_t blks_per_sec = BLKS_PER_SEC(sbi);
while (1) {
/* find dirty segment based on free segmap */
diff --git a/fs/fcntl.c b/fs/fcntl.c
index 2e4c0fa2074b..19ac5baad50f 100644
--- a/fs/fcntl.c
+++ b/fs/fcntl.c
@@ -362,7 +362,7 @@ static long do_fcntl(int fd, unsigned int cmd, unsigned long arg,
case F_OFD_SETLK:
case F_OFD_SETLKW:
#endif
- /* Fallthrough */
+ fallthrough;
case F_SETLK:
case F_SETLKW:
if (copy_from_user(&flock, argp, sizeof(flock)))
@@ -771,7 +771,7 @@ static void send_sigio_to_task(struct task_struct *p,
if (!do_send_sig_info(signum, &si, p, type))
break;
}
- /* fall-through - fall back on the old plain SIGIO signal */
+ fallthrough; /* fall back on the old plain SIGIO signal */
case 0:
do_send_sig_info(SIGIO, SEND_SIG_PRIV, p, type);
}
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index a605c3dddabc..58b27e4070a3 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -42,7 +42,6 @@
struct wb_writeback_work {
long nr_pages;
struct super_block *sb;
- unsigned long *older_than_this;
enum writeback_sync_modes sync_mode;
unsigned int tagged_writepages:1;
unsigned int for_kupdate:1;
@@ -144,7 +143,9 @@ static void inode_io_list_del_locked(struct inode *inode,
struct bdi_writeback *wb)
{
assert_spin_locked(&wb->list_lock);
+ assert_spin_locked(&inode->i_lock);
+ inode->i_state &= ~I_SYNC_QUEUED;
list_del_init(&inode->i_io_list);
wb_io_lists_depopulated(wb);
}
@@ -1122,7 +1123,9 @@ void inode_io_list_del(struct inode *inode)
struct bdi_writeback *wb;
wb = inode_to_wb_and_lock_list(inode);
+ spin_lock(&inode->i_lock);
inode_io_list_del_locked(inode, wb);
+ spin_unlock(&inode->i_lock);
spin_unlock(&wb->list_lock);
}
EXPORT_SYMBOL(inode_io_list_del);
@@ -1172,8 +1175,10 @@ void sb_clear_inode_writeback(struct inode *inode)
* the case then the inode must have been redirtied while it was being written
* out and we don't reset its dirtied_when.
*/
-static void redirty_tail(struct inode *inode, struct bdi_writeback *wb)
+static void redirty_tail_locked(struct inode *inode, struct bdi_writeback *wb)
{
+ assert_spin_locked(&inode->i_lock);
+
if (!list_empty(&wb->b_dirty)) {
struct inode *tail;
@@ -1182,6 +1187,14 @@ static void redirty_tail(struct inode *inode, struct bdi_writeback *wb)
inode->dirtied_when = jiffies;
}
inode_io_list_move_locked(inode, wb, &wb->b_dirty);
+ inode->i_state &= ~I_SYNC_QUEUED;
+}
+
+static void redirty_tail(struct inode *inode, struct bdi_writeback *wb)
+{
+ spin_lock(&inode->i_lock);
+ redirty_tail_locked(inode, wb);
+ spin_unlock(&inode->i_lock);
}
/*
@@ -1220,16 +1233,13 @@ static bool inode_dirtied_after(struct inode *inode, unsigned long t)
#define EXPIRE_DIRTY_ATIME 0x0001
/*
- * Move expired (dirtied before work->older_than_this) dirty inodes from
+ * Move expired (dirtied before dirtied_before) dirty inodes from
* @delaying_queue to @dispatch_queue.
*/
static int move_expired_inodes(struct list_head *delaying_queue,
struct list_head *dispatch_queue,
- int flags,
- struct wb_writeback_work *work)
+ unsigned long dirtied_before)
{
- unsigned long *older_than_this = NULL;
- unsigned long expire_time;
LIST_HEAD(tmp);
struct list_head *pos, *node;
struct super_block *sb = NULL;
@@ -1237,21 +1247,15 @@ static int move_expired_inodes(struct list_head *delaying_queue,
int do_sb_sort = 0;
int moved = 0;
- if ((flags & EXPIRE_DIRTY_ATIME) == 0)
- older_than_this = work->older_than_this;
- else if (!work->for_sync) {
- expire_time = jiffies - (dirtytime_expire_interval * HZ);
- older_than_this = &expire_time;
- }
while (!list_empty(delaying_queue)) {
inode = wb_inode(delaying_queue->prev);
- if (older_than_this &&
- inode_dirtied_after(inode, *older_than_this))
+ if (inode_dirtied_after(inode, dirtied_before))
break;
list_move(&inode->i_io_list, &tmp);
moved++;
- if (flags & EXPIRE_DIRTY_ATIME)
- set_bit(__I_DIRTY_TIME_EXPIRED, &inode->i_state);
+ spin_lock(&inode->i_lock);
+ inode->i_state |= I_SYNC_QUEUED;
+ spin_unlock(&inode->i_lock);
if (sb_is_blkdev_sb(inode->i_sb))
continue;
if (sb && sb != inode->i_sb)
@@ -1289,18 +1293,22 @@ out:
* |
* +--> dequeue for IO
*/
-static void queue_io(struct bdi_writeback *wb, struct wb_writeback_work *work)
+static void queue_io(struct bdi_writeback *wb, struct wb_writeback_work *work,
+ unsigned long dirtied_before)
{
int moved;
+ unsigned long time_expire_jif = dirtied_before;
assert_spin_locked(&wb->list_lock);
list_splice_init(&wb->b_more_io, &wb->b_io);
- moved = move_expired_inodes(&wb->b_dirty, &wb->b_io, 0, work);
+ moved = move_expired_inodes(&wb->b_dirty, &wb->b_io, dirtied_before);
+ if (!work->for_sync)
+ time_expire_jif = jiffies - dirtytime_expire_interval * HZ;
moved += move_expired_inodes(&wb->b_dirty_time, &wb->b_io,
- EXPIRE_DIRTY_ATIME, work);
+ time_expire_jif);
if (moved)
wb_io_lists_populated(wb);
- trace_writeback_queue_io(wb, work, moved);
+ trace_writeback_queue_io(wb, work, dirtied_before, moved);
}
static int write_inode(struct inode *inode, struct writeback_control *wbc)
@@ -1394,7 +1402,7 @@ static void requeue_inode(struct inode *inode, struct bdi_writeback *wb,
* writeback is not making progress due to locked
* buffers. Skip this inode for now.
*/
- redirty_tail(inode, wb);
+ redirty_tail_locked(inode, wb);
return;
}
@@ -1414,7 +1422,7 @@ static void requeue_inode(struct inode *inode, struct bdi_writeback *wb,
* retrying writeback of the dirty page/inode
* that cannot be performed immediately.
*/
- redirty_tail(inode, wb);
+ redirty_tail_locked(inode, wb);
}
} else if (inode->i_state & I_DIRTY) {
/*
@@ -1422,10 +1430,11 @@ static void requeue_inode(struct inode *inode, struct bdi_writeback *wb,
* such as delayed allocation during submission or metadata
* updates after data IO completion.
*/
- redirty_tail(inode, wb);
+ redirty_tail_locked(inode, wb);
} else if (inode->i_state & I_DIRTY_TIME) {
inode->dirtied_when = jiffies;
inode_io_list_move_locked(inode, wb, &wb->b_dirty_time);
+ inode->i_state &= ~I_SYNC_QUEUED;
} else {
/* The inode is clean. Remove from writeback lists. */
inode_io_list_del_locked(inode, wb);
@@ -1472,18 +1481,14 @@ __writeback_single_inode(struct inode *inode, struct writeback_control *wbc)
spin_lock(&inode->i_lock);
dirty = inode->i_state & I_DIRTY;
- if (inode->i_state & I_DIRTY_TIME) {
- if ((dirty & I_DIRTY_INODE) ||
- wbc->sync_mode == WB_SYNC_ALL ||
- unlikely(inode->i_state & I_DIRTY_TIME_EXPIRED) ||
- unlikely(time_after(jiffies,
- (inode->dirtied_time_when +
- dirtytime_expire_interval * HZ)))) {
- dirty |= I_DIRTY_TIME | I_DIRTY_TIME_EXPIRED;
- trace_writeback_lazytime(inode);
- }
- } else
- inode->i_state &= ~I_DIRTY_TIME_EXPIRED;
+ if ((inode->i_state & I_DIRTY_TIME) &&
+ ((dirty & I_DIRTY_INODE) ||
+ wbc->sync_mode == WB_SYNC_ALL || wbc->for_sync ||
+ time_after(jiffies, inode->dirtied_time_when +
+ dirtytime_expire_interval * HZ))) {
+ dirty |= I_DIRTY_TIME;
+ trace_writeback_lazytime(inode);
+ }
inode->i_state &= ~dirty;
/*
@@ -1669,8 +1674,8 @@ static long writeback_sb_inodes(struct super_block *sb,
*/
spin_lock(&inode->i_lock);
if (inode->i_state & (I_NEW | I_FREEING | I_WILL_FREE)) {
+ redirty_tail_locked(inode, wb);
spin_unlock(&inode->i_lock);
- redirty_tail(inode, wb);
continue;
}
if ((inode->i_state & I_SYNC) && wbc.sync_mode != WB_SYNC_ALL) {
@@ -1811,7 +1816,7 @@ static long writeback_inodes_wb(struct bdi_writeback *wb, long nr_pages,
blk_start_plug(&plug);
spin_lock(&wb->list_lock);
if (list_empty(&wb->b_io))
- queue_io(wb, &work);
+ queue_io(wb, &work, jiffies);
__writeback_inodes_wb(wb, &work);
spin_unlock(&wb->list_lock);
blk_finish_plug(&plug);
@@ -1831,7 +1836,7 @@ static long writeback_inodes_wb(struct bdi_writeback *wb, long nr_pages,
* takes longer than a dirty_writeback_interval interval, then leave a
* one-second gap.
*
- * older_than_this takes precedence over nr_to_write. So we'll only write back
+ * dirtied_before takes precedence over nr_to_write. So we'll only write back
* all dirty pages if they are all attached to "old" mappings.
*/
static long wb_writeback(struct bdi_writeback *wb,
@@ -1839,14 +1844,11 @@ static long wb_writeback(struct bdi_writeback *wb,
{
unsigned long wb_start = jiffies;
long nr_pages = work->nr_pages;
- unsigned long oldest_jif;
+ unsigned long dirtied_before = jiffies;
struct inode *inode;
long progress;
struct blk_plug plug;
- oldest_jif = jiffies;
- work->older_than_this = &oldest_jif;
-
blk_start_plug(&plug);
spin_lock(&wb->list_lock);
for (;;) {
@@ -1880,14 +1882,14 @@ static long wb_writeback(struct bdi_writeback *wb,
* safe.
*/
if (work->for_kupdate) {
- oldest_jif = jiffies -
+ dirtied_before = jiffies -
msecs_to_jiffies(dirty_expire_interval * 10);
} else if (work->for_background)
- oldest_jif = jiffies;
+ dirtied_before = jiffies;
trace_writeback_start(wb, work);
if (list_empty(&wb->b_io))
- queue_io(wb, work);
+ queue_io(wb, work, dirtied_before);
if (work->sb)
progress = writeback_sb_inodes(work->sb, wb, work);
else
@@ -2182,7 +2184,7 @@ static int __init start_dirtytime_writeback(void)
__initcall(start_dirtytime_writeback);
int dirtytime_interval_handler(struct ctl_table *table, int write,
- void __user *buffer, size_t *lenp, loff_t *ppos)
+ void *buffer, size_t *lenp, loff_t *ppos)
{
int ret;
@@ -2289,11 +2291,12 @@ void __mark_inode_dirty(struct inode *inode, int flags)
inode->i_state |= flags;
/*
- * If the inode is being synced, just update its dirty state.
- * The unlocker will place the inode on the appropriate
- * superblock list, based upon its state.
+ * If the inode is queued for writeback by flush worker, just
+ * update its dirty state. Once the flush worker is done with
+ * the inode it will place it on the appropriate superblock
+ * list, based upon its state.
*/
- if (inode->i_state & I_SYNC)
+ if (inode->i_state & I_SYNC_QUEUED)
goto out_unlock_inode;
/*
diff --git a/fs/fs_context.c b/fs/fs_context.c
index 7d5c5dd2b1d5..2834d1afa6e8 100644
--- a/fs/fs_context.c
+++ b/fs/fs_context.c
@@ -521,7 +521,7 @@ static int legacy_parse_param(struct fs_context *fc, struct fs_parameter *param)
switch (param->type) {
case fs_value_is_string:
len = 1 + param->size;
- /* Fall through */
+ fallthrough;
case fs_value_is_flag:
len += strlen(param->key);
break;
diff --git a/fs/fsopen.c b/fs/fsopen.c
index 2fa3f241b762..27a890aa493a 100644
--- a/fs/fsopen.c
+++ b/fs/fsopen.c
@@ -412,7 +412,7 @@ SYSCALL_DEFINE5(fsconfig,
break;
case FSCONFIG_SET_PATH_EMPTY:
lookup_flags = LOOKUP_EMPTY;
- /* fallthru */
+ fallthrough;
case FSCONFIG_SET_PATH:
param.type = fs_value_is_filename;
param.name = getname_flags(_value, lookup_flags, NULL);
diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c
index 770f3a720db9..0f69fbd4af66 100644
--- a/fs/gfs2/bmap.c
+++ b/fs/gfs2/bmap.c
@@ -746,7 +746,7 @@ static int gfs2_iomap_alloc(struct inode *inode, struct iomap *iomap,
}
if (n == 0)
break;
- /* fall through - To branching from existing tree */
+ fallthrough; /* To branching from existing tree */
case ALLOC_GROW_DEPTH:
if (i > 1 && i < mp->mp_fheight)
gfs2_trans_add_meta(ip->i_gl, mp->mp_bh[i-1]);
@@ -757,7 +757,7 @@ static int gfs2_iomap_alloc(struct inode *inode, struct iomap *iomap,
state = ALLOC_DATA;
if (n == 0)
break;
- /* fall through - To tree complete, adding data blocks */
+ fallthrough; /* To tree complete, adding data blocks */
case ALLOC_DATA:
BUG_ON(n > dblks);
BUG_ON(mp->mp_bh[end_of_metadata] == NULL);
diff --git a/fs/gfs2/log.c b/fs/gfs2/log.c
index a58333e3980d..3763c9ff1406 100644
--- a/fs/gfs2/log.c
+++ b/fs/gfs2/log.c
@@ -902,6 +902,36 @@ static void empty_ail1_list(struct gfs2_sbd *sdp)
}
/**
+ * drain_bd - drain the buf and databuf queue for a failed transaction
+ * @tr: the transaction to drain
+ *
+ * When this is called, we're taking an error exit for a log write that failed
+ * but since we bypassed the after_commit functions, we need to remove the
+ * items from the buf and databuf queue.
+ */
+static void trans_drain(struct gfs2_trans *tr)
+{
+ struct gfs2_bufdata *bd;
+ struct list_head *head;
+
+ if (!tr)
+ return;
+
+ head = &tr->tr_buf;
+ while (!list_empty(head)) {
+ bd = list_first_entry(head, struct gfs2_bufdata, bd_list);
+ list_del_init(&bd->bd_list);
+ kmem_cache_free(gfs2_bufdata_cachep, bd);
+ }
+ head = &tr->tr_databuf;
+ while (!list_empty(head)) {
+ bd = list_first_entry(head, struct gfs2_bufdata, bd_list);
+ list_del_init(&bd->bd_list);
+ kmem_cache_free(gfs2_bufdata_cachep, bd);
+ }
+}
+
+/**
* gfs2_log_flush - flush incore transaction(s)
* @sdp: the filesystem
* @gl: The glock structure to flush. If NULL, flush the whole incore log
@@ -1005,6 +1035,7 @@ void gfs2_log_flush(struct gfs2_sbd *sdp, struct gfs2_glock *gl, u32 flags)
out:
if (gfs2_withdrawn(sdp)) {
+ trans_drain(tr);
/**
* If the tr_list is empty, we're withdrawing during a log
* flush that targets a transaction, but the transaction was
diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c
index 4b67d47a7e00..6e173ae378c4 100644
--- a/fs/gfs2/quota.c
+++ b/fs/gfs2/quota.c
@@ -1599,7 +1599,7 @@ static int gfs2_quota_get_state(struct super_block *sb, struct qc_state *state)
case GFS2_QUOTA_ON:
state->s_state[USRQUOTA].flags |= QCI_LIMITS_ENFORCED;
state->s_state[GRPQUOTA].flags |= QCI_LIMITS_ENFORCED;
- /*FALLTHRU*/
+ fallthrough;
case GFS2_QUOTA_ACCOUNT:
state->s_state[USRQUOTA].flags |= QCI_ACCT_ENABLED |
QCI_SYSFILE;
diff --git a/fs/gfs2/trans.c b/fs/gfs2/trans.c
index e1c7eb6eb00a..6d4bf7ea7b3b 100644
--- a/fs/gfs2/trans.c
+++ b/fs/gfs2/trans.c
@@ -67,6 +67,7 @@ int gfs2_trans_begin(struct gfs2_sbd *sdp, unsigned int blocks,
tr->tr_reserved += gfs2_struct2blk(sdp, revokes);
INIT_LIST_HEAD(&tr->tr_databuf);
INIT_LIST_HEAD(&tr->tr_buf);
+ INIT_LIST_HEAD(&tr->tr_list);
INIT_LIST_HEAD(&tr->tr_ail1_list);
INIT_LIST_HEAD(&tr->tr_ail2_list);
diff --git a/fs/hfsplus/wrapper.c b/fs/hfsplus/wrapper.c
index 61eec628805d..0350dc7821bf 100644
--- a/fs/hfsplus/wrapper.c
+++ b/fs/hfsplus/wrapper.c
@@ -195,7 +195,7 @@ reread:
switch (sbi->s_vhdr->signature) {
case cpu_to_be16(HFSPLUS_VOLHEAD_SIGX):
set_bit(HFSPLUS_SB_HFSX, &sbi->flags);
- /*FALLTHRU*/
+ fallthrough;
case cpu_to_be16(HFSPLUS_VOLHEAD_SIG):
break;
case cpu_to_be16(HFSP_WRAP_MAGIC):
diff --git a/fs/io-wq.c b/fs/io-wq.c
index e92c4724480c..414beb543883 100644
--- a/fs/io-wq.c
+++ b/fs/io-wq.c
@@ -925,6 +925,24 @@ static bool io_wq_worker_cancel(struct io_worker *worker, void *data)
return match->nr_running && !match->cancel_all;
}
+static inline void io_wqe_remove_pending(struct io_wqe *wqe,
+ struct io_wq_work *work,
+ struct io_wq_work_node *prev)
+{
+ unsigned int hash = io_get_work_hash(work);
+ struct io_wq_work *prev_work = NULL;
+
+ if (io_wq_is_hashed(work) && work == wqe->hash_tail[hash]) {
+ if (prev)
+ prev_work = container_of(prev, struct io_wq_work, list);
+ if (prev_work && io_get_work_hash(prev_work) == hash)
+ wqe->hash_tail[hash] = prev_work;
+ else
+ wqe->hash_tail[hash] = NULL;
+ }
+ wq_list_del(&wqe->work_list, &work->list, prev);
+}
+
static void io_wqe_cancel_pending_work(struct io_wqe *wqe,
struct io_cb_cancel_data *match)
{
@@ -938,8 +956,7 @@ retry:
work = container_of(node, struct io_wq_work, list);
if (!match->fn(work, match->data))
continue;
-
- wq_list_del(&wqe->work_list, node, prev);
+ io_wqe_remove_pending(wqe, work, prev);
spin_unlock_irqrestore(&wqe->lock, flags);
io_run_cancel(work, wqe);
match->nr_pending++;
diff --git a/fs/io_uring.c b/fs/io_uring.c
index dc506b75659c..aae0ef2ec34d 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -540,7 +540,6 @@ enum {
REQ_F_ISREG_BIT,
REQ_F_COMP_LOCKED_BIT,
REQ_F_NEED_CLEANUP_BIT,
- REQ_F_OVERFLOW_BIT,
REQ_F_POLLED_BIT,
REQ_F_BUFFER_SELECTED_BIT,
REQ_F_NO_FILE_TABLE_BIT,
@@ -583,8 +582,6 @@ enum {
REQ_F_COMP_LOCKED = BIT(REQ_F_COMP_LOCKED_BIT),
/* needs cleanup */
REQ_F_NEED_CLEANUP = BIT(REQ_F_NEED_CLEANUP_BIT),
- /* in overflow list */
- REQ_F_OVERFLOW = BIT(REQ_F_OVERFLOW_BIT),
/* already went through poll handler */
REQ_F_POLLED = BIT(REQ_F_POLLED_BIT),
/* buffer already selected */
@@ -946,7 +943,8 @@ static void io_get_req_task(struct io_kiocb *req)
static inline void io_clean_op(struct io_kiocb *req)
{
- if (req->flags & (REQ_F_NEED_CLEANUP | REQ_F_BUFFER_SELECTED))
+ if (req->flags & (REQ_F_NEED_CLEANUP | REQ_F_BUFFER_SELECTED |
+ REQ_F_INFLIGHT))
__io_clean_op(req);
}
@@ -1152,7 +1150,7 @@ static void io_prep_async_work(struct io_kiocb *req)
io_req_init_async(req);
if (req->flags & REQ_F_ISREG) {
- if (def->hash_reg_file)
+ if (def->hash_reg_file || (req->ctx->flags & IORING_SETUP_IOPOLL))
io_wq_hash_work(&req->work, file_inode(req->file));
} else {
if (def->unbound_nonreg_file)
@@ -1366,7 +1364,6 @@ static bool io_cqring_overflow_flush(struct io_ring_ctx *ctx, bool force)
req = list_first_entry(&ctx->cq_overflow_list, struct io_kiocb,
compl.list);
list_move(&req->compl.list, &list);
- req->flags &= ~REQ_F_OVERFLOW;
if (cqe) {
WRITE_ONCE(cqe->user_data, req->user_data);
WRITE_ONCE(cqe->res, req->result);
@@ -1419,7 +1416,6 @@ static void __io_cqring_fill_event(struct io_kiocb *req, long res, long cflags)
ctx->rings->sq_flags |= IORING_SQ_CQ_OVERFLOW;
}
io_clean_op(req);
- req->flags |= REQ_F_OVERFLOW;
req->result = res;
req->compl.cflags = cflags;
refcount_inc(&req->refs);
@@ -1563,17 +1559,6 @@ static bool io_dismantle_req(struct io_kiocb *req)
if (req->file)
io_put_file(req, req->file, (req->flags & REQ_F_FIXED_FILE));
- if (req->flags & REQ_F_INFLIGHT) {
- struct io_ring_ctx *ctx = req->ctx;
- unsigned long flags;
-
- spin_lock_irqsave(&ctx->inflight_lock, flags);
- list_del(&req->inflight_entry);
- if (waitqueue_active(&ctx->inflight_wait))
- wake_up(&ctx->inflight_wait);
- spin_unlock_irqrestore(&ctx->inflight_lock, flags);
- }
-
return io_req_clean_work(req);
}
@@ -1761,12 +1746,16 @@ static struct io_kiocb *io_req_find_next(struct io_kiocb *req)
return __io_req_find_next(req);
}
-static int io_req_task_work_add(struct io_kiocb *req, struct callback_head *cb)
+static int io_req_task_work_add(struct io_kiocb *req, struct callback_head *cb,
+ bool twa_signal_ok)
{
struct task_struct *tsk = req->task;
struct io_ring_ctx *ctx = req->ctx;
int ret, notify;
+ if (tsk->flags & PF_EXITING)
+ return -ESRCH;
+
/*
* SQPOLL kernel thread doesn't need notification, just a wakeup. For
* all other cases, use TWA_SIGNAL unconditionally to ensure we're
@@ -1774,7 +1763,7 @@ static int io_req_task_work_add(struct io_kiocb *req, struct callback_head *cb)
* will do the job.
*/
notify = 0;
- if (!(ctx->flags & IORING_SETUP_SQPOLL))
+ if (!(ctx->flags & IORING_SETUP_SQPOLL) && twa_signal_ok)
notify = TWA_SIGNAL;
ret = task_work_add(tsk, cb, notify);
@@ -1801,8 +1790,10 @@ static void __io_req_task_cancel(struct io_kiocb *req, int error)
static void io_req_task_cancel(struct callback_head *cb)
{
struct io_kiocb *req = container_of(cb, struct io_kiocb, task_work);
+ struct io_ring_ctx *ctx = req->ctx;
__io_req_task_cancel(req, -ECANCELED);
+ percpu_ref_put(&ctx->refs);
}
static void __io_req_task_submit(struct io_kiocb *req)
@@ -1834,7 +1825,7 @@ static void io_req_task_queue(struct io_kiocb *req)
init_task_work(&req->task_work, io_req_task_submit);
percpu_ref_get(&req->ctx->refs);
- ret = io_req_task_work_add(req, &req->task_work);
+ ret = io_req_task_work_add(req, &req->task_work, true);
if (unlikely(ret)) {
struct task_struct *tsk;
@@ -2024,6 +2015,12 @@ static inline unsigned int io_put_rw_kbuf(struct io_kiocb *req)
static inline bool io_run_task_work(void)
{
+ /*
+ * Not safe to run on exiting task, and the task_work handling will
+ * not add work to such a task.
+ */
+ if (unlikely(current->flags & PF_EXITING))
+ return false;
if (current->task_works) {
__set_current_state(TASK_RUNNING);
task_work_run();
@@ -2063,6 +2060,7 @@ static void io_iopoll_complete(struct io_ring_ctx *ctx, unsigned int *nr_events,
req = list_first_entry(done, struct io_kiocb, inflight_entry);
if (READ_ONCE(req->result) == -EAGAIN) {
+ req->result = 0;
req->iopoll_completed = 0;
list_move_tail(&req->inflight_entry, &again);
continue;
@@ -2296,50 +2294,43 @@ static bool io_resubmit_prep(struct io_kiocb *req, int error)
goto end_req;
}
- ret = io_import_iovec(rw, req, &iovec, &iter, false);
- if (ret < 0)
- goto end_req;
- ret = io_setup_async_rw(req, iovec, inline_vecs, &iter, false);
- if (!ret)
+ if (!req->io) {
+ ret = io_import_iovec(rw, req, &iovec, &iter, false);
+ if (ret < 0)
+ goto end_req;
+ ret = io_setup_async_rw(req, iovec, inline_vecs, &iter, false);
+ if (!ret)
+ return true;
+ kfree(iovec);
+ } else {
return true;
- kfree(iovec);
+ }
end_req:
req_set_fail_links(req);
io_req_complete(req, ret);
return false;
}
-
-static void io_rw_resubmit(struct callback_head *cb)
-{
- struct io_kiocb *req = container_of(cb, struct io_kiocb, task_work);
- struct io_ring_ctx *ctx = req->ctx;
- int err;
-
- err = io_sq_thread_acquire_mm(ctx, req);
-
- if (io_resubmit_prep(req, err)) {
- refcount_inc(&req->refs);
- io_queue_async_work(req);
- }
-
- percpu_ref_put(&ctx->refs);
-}
#endif
static bool io_rw_reissue(struct io_kiocb *req, long res)
{
#ifdef CONFIG_BLOCK
+ umode_t mode = file_inode(req->file)->i_mode;
int ret;
+ if (!S_ISBLK(mode) && !S_ISREG(mode))
+ return false;
if ((res != -EAGAIN && res != -EOPNOTSUPP) || io_wq_current_is_worker())
return false;
- init_task_work(&req->task_work, io_rw_resubmit);
- percpu_ref_get(&req->ctx->refs);
+ ret = io_sq_thread_acquire_mm(req->ctx, req);
- ret = io_req_task_work_add(req, &req->task_work);
- if (!ret)
+ if (io_resubmit_prep(req, ret)) {
+ refcount_inc(&req->refs);
+ io_queue_async_work(req);
return true;
+ }
+
#endif
return false;
}
@@ -2578,7 +2569,7 @@ static inline void io_rw_done(struct kiocb *kiocb, ssize_t ret)
* IO with EINTR.
*/
ret = -EINTR;
- /* fall through */
+ fallthrough;
default:
kiocb->ki_complete(kiocb, ret, 0);
}
@@ -2819,22 +2810,15 @@ static ssize_t io_iov_buffer_select(struct io_kiocb *req, struct iovec *iov,
return __io_iov_buffer_select(req, iov, needs_lock);
}
-static ssize_t io_import_iovec(int rw, struct io_kiocb *req,
- struct iovec **iovec, struct iov_iter *iter,
- bool needs_lock)
+static ssize_t __io_import_iovec(int rw, struct io_kiocb *req,
+ struct iovec **iovec, struct iov_iter *iter,
+ bool needs_lock)
{
void __user *buf = u64_to_user_ptr(req->rw.addr);
size_t sqe_len = req->rw.len;
ssize_t ret;
u8 opcode;
- if (req->io) {
- struct io_async_rw *iorw = &req->io->rw;
-
- *iovec = NULL;
- return iov_iter_count(&iorw->iter);
- }
-
opcode = req->opcode;
if (opcode == IORING_OP_READ_FIXED || opcode == IORING_OP_WRITE_FIXED) {
*iovec = NULL;
@@ -2848,10 +2832,8 @@ static ssize_t io_import_iovec(int rw, struct io_kiocb *req,
if (opcode == IORING_OP_READ || opcode == IORING_OP_WRITE) {
if (req->flags & REQ_F_BUFFER_SELECT) {
buf = io_rw_buffer_select(req, &sqe_len, needs_lock);
- if (IS_ERR(buf)) {
- *iovec = NULL;
+ if (IS_ERR(buf))
return PTR_ERR(buf);
- }
req->rw.len = sqe_len;
}
@@ -2879,6 +2861,21 @@ static ssize_t io_import_iovec(int rw, struct io_kiocb *req,
return import_iovec(rw, buf, sqe_len, UIO_FASTIOV, iovec, iter);
}
+static ssize_t io_import_iovec(int rw, struct io_kiocb *req,
+ struct iovec **iovec, struct iov_iter *iter,
+ bool needs_lock)
+{
+ if (!req->io)
+ return __io_import_iovec(rw, req, iovec, iter, needs_lock);
+ *iovec = NULL;
+ return iov_iter_count(&req->io->rw.iter);
+}
+
+static inline loff_t *io_kiocb_ppos(struct kiocb *kiocb)
+{
+ return kiocb->ki_filp->f_mode & FMODE_STREAM ? NULL : &kiocb->ki_pos;
+}
+
/*
* For files that don't have ->read_iter() and ->write_iter(), handle them
* by looping over ->read() or ->write() manually.
@@ -2914,10 +2911,10 @@ static ssize_t loop_rw_iter(int rw, struct file *file, struct kiocb *kiocb,
if (rw == READ) {
nr = file->f_op->read(file, iovec.iov_base,
- iovec.iov_len, &kiocb->ki_pos);
+ iovec.iov_len, io_kiocb_ppos(kiocb));
} else {
nr = file->f_op->write(file, iovec.iov_base,
- iovec.iov_len, &kiocb->ki_pos);
+ iovec.iov_len, io_kiocb_ppos(kiocb));
}
if (iov_iter_is_bvec(iter))
@@ -2998,17 +2995,15 @@ static inline int io_rw_prep_async(struct io_kiocb *req, int rw,
bool force_nonblock)
{
struct io_async_rw *iorw = &req->io->rw;
+ struct iovec *iov;
ssize_t ret;
- iorw->iter.iov = iorw->fast_iov;
- /* reset ->io around the iovec import, we don't want to use it */
- req->io = NULL;
- ret = io_import_iovec(rw, req, (struct iovec **) &iorw->iter.iov,
- &iorw->iter, !force_nonblock);
- req->io = container_of(iorw, struct io_async_ctx, rw);
+ iorw->iter.iov = iov = iorw->fast_iov;
+ ret = __io_import_iovec(rw, req, &iov, &iorw->iter, !force_nonblock);
if (unlikely(ret < 0))
return ret;
+ iorw->iter.iov = iov;
io_req_map_rw(req, iorw->iter.iov, iorw->fast_iov, &iorw->iter);
return 0;
}
@@ -3054,6 +3049,7 @@ static int io_async_buf_func(struct wait_queue_entry *wait, unsigned mode,
if (!wake_page_match(wpq, key))
return 0;
+ req->rw.kiocb.ki_flags &= ~IOCB_WAITQ;
list_del_init(&wait->entry);
init_task_work(&req->task_work, io_req_task_submit);
@@ -3061,7 +3057,7 @@ static int io_async_buf_func(struct wait_queue_entry *wait, unsigned mode,
/* submit ref gets dropped, acquire a new one */
refcount_inc(&req->refs);
- ret = io_req_task_work_add(req, &req->task_work);
+ ret = io_req_task_work_add(req, &req->task_work, true);
if (unlikely(ret)) {
struct task_struct *tsk;
@@ -3074,27 +3070,6 @@ static int io_async_buf_func(struct wait_queue_entry *wait, unsigned mode,
return 1;
}
-static inline int kiocb_wait_page_queue_init(struct kiocb *kiocb,
- struct wait_page_queue *wait,
- wait_queue_func_t func,
- void *data)
-{
- /* Can't support async wakeup with polled IO */
- if (kiocb->ki_flags & IOCB_HIPRI)
- return -EINVAL;
- if (kiocb->ki_filp->f_mode & FMODE_BUF_RASYNC) {
- wait->wait.func = func;
- wait->wait.private = data;
- wait->wait.flags = 0;
- INIT_LIST_HEAD(&wait->wait.entry);
- kiocb->ki_flags |= IOCB_WAITQ;
- kiocb->ki_waitq = wait;
- return 0;
- }
-
- return -EOPNOTSUPP;
-}
-
/*
* This controls whether a given IO request should be armed for async page
* based retry. If we return false here, the request is handed to the async
@@ -3109,16 +3084,17 @@ static inline int kiocb_wait_page_queue_init(struct kiocb *kiocb,
*/
static bool io_rw_should_retry(struct io_kiocb *req)
{
+ struct wait_page_queue *wait = &req->io->rw.wpq;
struct kiocb *kiocb = &req->rw.kiocb;
- int ret;
/* never retry for NOWAIT, we just complete with -EAGAIN */
if (req->flags & REQ_F_NOWAIT)
return false;
/* Only for buffered IO */
- if (kiocb->ki_flags & IOCB_DIRECT)
+ if (kiocb->ki_flags & (IOCB_DIRECT | IOCB_HIPRI))
return false;
+
/*
* just use poll if we can, and don't attempt if the fs doesn't
* support callback based unlocks
@@ -3126,14 +3102,16 @@ static bool io_rw_should_retry(struct io_kiocb *req)
if (file_can_poll(req->file) || !(req->file->f_mode & FMODE_BUF_RASYNC))
return false;
- ret = kiocb_wait_page_queue_init(kiocb, &req->io->rw.wpq,
- io_async_buf_func, req);
- if (!ret) {
- io_get_req_task(req);
- return true;
- }
+ wait->wait.func = io_async_buf_func;
+ wait->wait.private = req;
+ wait->wait.flags = 0;
+ INIT_LIST_HEAD(&wait->wait.entry);
+ kiocb->ki_flags |= IOCB_WAITQ;
+ kiocb->ki_flags &= ~IOCB_NOWAIT;
+ kiocb->ki_waitq = wait;
- return false;
+ io_get_req_task(req);
+ return true;
}
static int io_iter_do_read(struct io_kiocb *req, struct iov_iter *iter)
@@ -3154,6 +3132,7 @@ static int io_read(struct io_kiocb *req, bool force_nonblock,
struct iov_iter __iter, *iter = &__iter;
ssize_t io_size, ret, ret2;
size_t iov_count;
+ bool no_async;
if (req->io)
iter = &req->io->rw.iter;
@@ -3161,6 +3140,7 @@ static int io_read(struct io_kiocb *req, bool force_nonblock,
ret = io_import_iovec(READ, req, &iovec, iter, !force_nonblock);
if (ret < 0)
return ret;
+ iov_count = iov_iter_count(iter);
io_size = ret;
req->result = io_size;
ret = 0;
@@ -3170,11 +3150,11 @@ static int io_read(struct io_kiocb *req, bool force_nonblock,
kiocb->ki_flags &= ~IOCB_NOWAIT;
/* If the file doesn't support async, just async punt */
- if (force_nonblock && !io_file_supports_async(req->file, READ))
+ no_async = force_nonblock && !io_file_supports_async(req->file, READ);
+ if (no_async)
goto copy_iov;
- iov_count = iov_iter_count(iter);
- ret = rw_verify_area(READ, req->file, &kiocb->ki_pos, iov_count);
+ ret = rw_verify_area(READ, req->file, io_kiocb_ppos(kiocb), iov_count);
if (unlikely(ret))
goto out_free;
@@ -3186,14 +3166,19 @@ static int io_read(struct io_kiocb *req, bool force_nonblock,
ret = 0;
goto out_free;
} else if (ret == -EAGAIN) {
- if (!force_nonblock)
+ /* IOPOLL retry should happen for io-wq threads */
+ if (!force_nonblock && !(req->ctx->flags & IORING_SETUP_IOPOLL))
goto done;
- ret = io_setup_async_rw(req, iovec, inline_vecs, iter, false);
- if (ret)
- goto out_free;
- return -EAGAIN;
+ /* no retry on NONBLOCK marked file */
+ if (req->file->f_flags & O_NONBLOCK)
+ goto done;
+ /* some cases will consume bytes even on error returns */
+ iov_iter_revert(iter, iov_count - iov_iter_count(iter));
+ ret = 0;
+ goto copy_iov;
} else if (ret < 0) {
- goto out_free;
+ /* make sure -ERESTARTSYS -> -EINTR is done */
+ goto done;
}
/* read it all, or we did blocking attempt. no retry. */
@@ -3208,6 +3193,8 @@ copy_iov:
ret = ret2;
goto out_free;
}
+ if (no_async)
+ return -EAGAIN;
/* it's copied and will be cleaned with ->io */
iovec = NULL;
/* now use our persistent iterator, if we aren't already */
@@ -3238,6 +3225,7 @@ done:
kiocb_done(kiocb, ret, cs);
ret = 0;
out_free:
+ /* it's reportedly faster than delegating the null check to kfree() */
if (iovec)
kfree(iovec);
return ret;
@@ -3276,6 +3264,7 @@ static int io_write(struct io_kiocb *req, bool force_nonblock,
ret = io_import_iovec(WRITE, req, &iovec, iter, !force_nonblock);
if (ret < 0)
return ret;
+ iov_count = iov_iter_count(iter);
io_size = ret;
req->result = io_size;
@@ -3292,8 +3281,7 @@ static int io_write(struct io_kiocb *req, bool force_nonblock,
(req->flags & REQ_F_ISREG))
goto copy_iov;
- iov_count = iov_iter_count(iter);
- ret = rw_verify_area(WRITE, req->file, &kiocb->ki_pos, iov_count);
+ ret = rw_verify_area(WRITE, req->file, io_kiocb_ppos(kiocb), iov_count);
if (unlikely(ret))
goto out_free;
@@ -3325,15 +3313,25 @@ static int io_write(struct io_kiocb *req, bool force_nonblock,
*/
if (ret2 == -EOPNOTSUPP && (kiocb->ki_flags & IOCB_NOWAIT))
ret2 = -EAGAIN;
+ /* no retry on NONBLOCK marked file */
+ if (ret2 == -EAGAIN && (req->file->f_flags & O_NONBLOCK))
+ goto done;
if (!force_nonblock || ret2 != -EAGAIN) {
+ /* IOPOLL retry should happen for io-wq threads */
+ if ((req->ctx->flags & IORING_SETUP_IOPOLL) && ret2 == -EAGAIN)
+ goto copy_iov;
+done:
kiocb_done(kiocb, ret2, cs);
} else {
copy_iov:
+ /* some cases will consume bytes even on error returns */
+ iov_iter_revert(iter, iov_count - iov_iter_count(iter));
ret = io_setup_async_rw(req, iovec, inline_vecs, iter, false);
if (!ret)
return -EAGAIN;
}
out_free:
+ /* it's reportedly faster than delegating the null check to kfree() */
if (iovec)
kfree(iovec);
return ret;
@@ -3529,8 +3527,6 @@ static int __io_openat_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe
const char __user *fname;
int ret;
- if (unlikely(req->ctx->flags & (IORING_SETUP_IOPOLL|IORING_SETUP_SQPOLL)))
- return -EINVAL;
if (unlikely(sqe->ioprio || sqe->buf_index))
return -EINVAL;
if (unlikely(req->flags & REQ_F_FIXED_FILE))
@@ -3557,6 +3553,8 @@ static int io_openat_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
{
u64 flags, mode;
+ if (unlikely(req->ctx->flags & (IORING_SETUP_IOPOLL|IORING_SETUP_SQPOLL)))
+ return -EINVAL;
if (req->flags & REQ_F_NEED_CLEANUP)
return 0;
mode = READ_ONCE(sqe->len);
@@ -3571,6 +3569,8 @@ static int io_openat2_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
size_t len;
int ret;
+ if (unlikely(req->ctx->flags & (IORING_SETUP_IOPOLL|IORING_SETUP_SQPOLL)))
+ return -EINVAL;
if (req->flags & REQ_F_NEED_CLEANUP)
return 0;
how = u64_to_user_ptr(READ_ONCE(sqe->addr2));
@@ -3788,7 +3788,7 @@ static int io_epoll_ctl_prep(struct io_kiocb *req,
#if defined(CONFIG_EPOLL)
if (sqe->ioprio || sqe->buf_index)
return -EINVAL;
- if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
+ if (unlikely(req->ctx->flags & (IORING_SETUP_IOPOLL | IORING_SETUP_SQPOLL)))
return -EINVAL;
req->epoll.epfd = READ_ONCE(sqe->fd);
@@ -3903,7 +3903,7 @@ static int io_fadvise(struct io_kiocb *req, bool force_nonblock)
static int io_statx_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
{
- if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
+ if (unlikely(req->ctx->flags & (IORING_SETUP_IOPOLL | IORING_SETUP_SQPOLL)))
return -EINVAL;
if (sqe->ioprio || sqe->buf_index)
return -EINVAL;
@@ -4600,6 +4600,7 @@ struct io_poll_table {
static int __io_async_wake(struct io_kiocb *req, struct io_poll_iocb *poll,
__poll_t mask, task_work_func_t func)
{
+ bool twa_signal_ok;
int ret;
/* for instances that support it check for an event match first: */
@@ -4615,12 +4616,20 @@ static int __io_async_wake(struct io_kiocb *req, struct io_poll_iocb *poll,
percpu_ref_get(&req->ctx->refs);
/*
+ * If we using the signalfd wait_queue_head for this wakeup, then
+ * it's not safe to use TWA_SIGNAL as we could be recursing on the
+ * tsk->sighand->siglock on doing the wakeup. Should not be needed
+ * either, as the normal wakeup will suffice.
+ */
+ twa_signal_ok = (poll->head != &req->task->sighand->signalfd_wqh);
+
+ /*
* If this fails, then the task is exiting. When a task exits, the
* work gets canceled, so just cancel this request as well instead
* of executing it. We can't safely execute it anyway, as we may not
* have the needed state needed for it anyway.
*/
- ret = io_req_task_work_add(req, &req->task_work);
+ ret = io_req_task_work_add(req, &req->task_work, twa_signal_ok);
if (unlikely(ret)) {
struct task_struct *tsk;
@@ -4736,6 +4745,8 @@ static int io_poll_double_wake(struct wait_queue_entry *wait, unsigned mode,
if (mask && !(mask & poll->events))
return 0;
+ list_del_init(&wait->entry);
+
if (poll && poll->head) {
bool done;
@@ -4909,12 +4920,20 @@ static bool io_arm_poll_handler(struct io_kiocb *req)
struct async_poll *apoll;
struct io_poll_table ipt;
__poll_t mask, ret;
+ int rw;
if (!req->file || !file_can_poll(req->file))
return false;
if (req->flags & REQ_F_POLLED)
return false;
- if (!def->pollin && !def->pollout)
+ if (def->pollin)
+ rw = READ;
+ else if (def->pollout)
+ rw = WRITE;
+ else
+ return false;
+ /* if we can't nonblock try, then no point in arming a poll handler */
+ if (!io_file_supports_async(req->file, rw))
return false;
apoll = kmalloc(sizeof(*apoll), GFP_ATOMIC);
@@ -5403,6 +5422,8 @@ static int io_async_cancel(struct io_kiocb *req)
static int io_files_update_prep(struct io_kiocb *req,
const struct io_uring_sqe *sqe)
{
+ if (unlikely(req->ctx->flags & IORING_SETUP_SQPOLL))
+ return -EINVAL;
if (unlikely(req->flags & (REQ_F_FIXED_FILE | REQ_F_BUFFER_SELECT)))
return -EINVAL;
if (sqe->ioprio || sqe->rw_flags)
@@ -5453,6 +5474,8 @@ static int io_req_defer_prep(struct io_kiocb *req,
if (unlikely(ret))
return ret;
+ io_prep_async_work(req);
+
switch (req->opcode) {
case IORING_OP_NOP:
break;
@@ -5650,9 +5673,26 @@ static void __io_clean_op(struct io_kiocb *req)
io_put_file(req, req->splice.file_in,
(req->splice.flags & SPLICE_F_FD_IN_FIXED));
break;
+ case IORING_OP_OPENAT:
+ case IORING_OP_OPENAT2:
+ if (req->open.filename)
+ putname(req->open.filename);
+ break;
}
req->flags &= ~REQ_F_NEED_CLEANUP;
}
+
+ if (req->flags & REQ_F_INFLIGHT) {
+ struct io_ring_ctx *ctx = req->ctx;
+ unsigned long flags;
+
+ spin_lock_irqsave(&ctx->inflight_lock, flags);
+ list_del(&req->inflight_entry);
+ if (waitqueue_active(&ctx->inflight_wait))
+ wake_up(&ctx->inflight_wait);
+ spin_unlock_irqrestore(&ctx->inflight_lock, flags);
+ req->flags &= ~REQ_F_INFLIGHT;
+ }
}
static int io_issue_sqe(struct io_kiocb *req, const struct io_uring_sqe *sqe,
@@ -6315,9 +6355,6 @@ static void io_submit_state_start(struct io_submit_state *state,
struct io_ring_ctx *ctx, unsigned int max_ios)
{
blk_start_plug(&state->plug);
-#ifdef CONFIG_BLOCK
- state->plug.nowait = true;
-#endif
state->comp.nr = 0;
INIT_LIST_HEAD(&state->comp.list);
state->comp.ctx = ctx;
@@ -7327,7 +7364,7 @@ static int __io_sqe_files_update(struct io_ring_ctx *ctx,
table = &ctx->file_data->table[i >> IORING_FILE_TABLE_SHIFT];
index = i & IORING_FILE_TABLE_MASK;
if (table->files[index]) {
- file = io_file_from_index(ctx, index);
+ file = table->files[index];
err = io_queue_file_removal(data, file);
if (err)
break;
@@ -7356,6 +7393,7 @@ static int __io_sqe_files_update(struct io_ring_ctx *ctx,
table->files[index] = file;
err = io_sqe_file_register(ctx, file, i);
if (err) {
+ table->files[index] = NULL;
fput(file);
break;
}
@@ -7455,9 +7493,6 @@ static int io_sq_offload_start(struct io_ring_ctx *ctx,
{
int ret;
- mmgrab(current->mm);
- ctx->sqo_mm = current->mm;
-
if (ctx->flags & IORING_SETUP_SQPOLL) {
ret = -EPERM;
if (!capable(CAP_SYS_ADMIN))
@@ -7502,10 +7537,6 @@ static int io_sq_offload_start(struct io_ring_ctx *ctx,
return 0;
err:
io_finish_async(ctx);
- if (ctx->sqo_mm) {
- mmdrop(ctx->sqo_mm);
- ctx->sqo_mm = NULL;
- }
return ret;
}
@@ -7979,7 +8010,13 @@ static void io_ring_ctx_wait_and_kill(struct io_ring_ctx *ctx)
ACCT_LOCKED);
INIT_WORK(&ctx->exit_work, io_ring_exit_work);
- queue_work(system_wq, &ctx->exit_work);
+ /*
+ * Use system_unbound_wq to avoid spawning tons of event kworkers
+ * if we're exiting a ton of rings at the same time. It just adds
+ * noise and overhead, there's no discernable change in runtime
+ * over using system_wq.
+ */
+ queue_work(system_unbound_wq, &ctx->exit_work);
}
static int io_uring_release(struct inode *inode, struct file *file)
@@ -8016,6 +8053,28 @@ static bool io_match_link(struct io_kiocb *preq, struct io_kiocb *req)
return false;
}
+static inline bool io_match_files(struct io_kiocb *req,
+ struct files_struct *files)
+{
+ return (req->flags & REQ_F_WORK_INITIALIZED) && req->work.files == files;
+}
+
+static bool io_match_link_files(struct io_kiocb *req,
+ struct files_struct *files)
+{
+ struct io_kiocb *link;
+
+ if (io_match_files(req, files))
+ return true;
+ if (req->flags & REQ_F_LINK_HEAD) {
+ list_for_each_entry(link, &req->link_list, link_list) {
+ if (io_match_files(link, files))
+ return true;
+ }
+ }
+ return false;
+}
+
/*
* We're looking to cancel 'req' because it's holding on to our files, but
* 'req' could be a link to another request. See if it is, and cancel that
@@ -8063,12 +8122,65 @@ static bool io_timeout_remove_link(struct io_ring_ctx *ctx,
return found;
}
+static bool io_cancel_link_cb(struct io_wq_work *work, void *data)
+{
+ return io_match_link(container_of(work, struct io_kiocb, work), data);
+}
+
+static void io_attempt_cancel(struct io_ring_ctx *ctx, struct io_kiocb *req)
+{
+ enum io_wq_cancel cret;
+
+ /* cancel this particular work, if it's running */
+ cret = io_wq_cancel_work(ctx->io_wq, &req->work);
+ if (cret != IO_WQ_CANCEL_NOTFOUND)
+ return;
+
+ /* find links that hold this pending, cancel those */
+ cret = io_wq_cancel_cb(ctx->io_wq, io_cancel_link_cb, req, true);
+ if (cret != IO_WQ_CANCEL_NOTFOUND)
+ return;
+
+ /* if we have a poll link holding this pending, cancel that */
+ if (io_poll_remove_link(ctx, req))
+ return;
+
+ /* final option, timeout link is holding this req pending */
+ io_timeout_remove_link(ctx, req);
+}
+
+static void io_cancel_defer_files(struct io_ring_ctx *ctx,
+ struct files_struct *files)
+{
+ struct io_defer_entry *de = NULL;
+ LIST_HEAD(list);
+
+ spin_lock_irq(&ctx->completion_lock);
+ list_for_each_entry_reverse(de, &ctx->defer_list, list) {
+ if (io_match_link_files(de->req, files)) {
+ list_cut_position(&list, &ctx->defer_list, &de->list);
+ break;
+ }
+ }
+ spin_unlock_irq(&ctx->completion_lock);
+
+ while (!list_empty(&list)) {
+ de = list_first_entry(&list, struct io_defer_entry, list);
+ list_del_init(&de->list);
+ req_set_fail_links(de->req);
+ io_put_req(de->req);
+ io_req_complete(de->req, -ECANCELED);
+ kfree(de);
+ }
+}
+
static void io_uring_cancel_files(struct io_ring_ctx *ctx,
struct files_struct *files)
{
if (list_empty_careful(&ctx->inflight_list))
return;
+ io_cancel_defer_files(ctx, files);
/* cancel all at once, should be faster than doing it one by one*/
io_wq_cancel_cb(ctx->io_wq, io_wq_files_match, files, true);
@@ -8094,35 +8206,11 @@ static void io_uring_cancel_files(struct io_ring_ctx *ctx,
/* We need to keep going until we don't find a matching req */
if (!cancel_req)
break;
-
- if (cancel_req->flags & REQ_F_OVERFLOW) {
- spin_lock_irq(&ctx->completion_lock);
- list_del(&cancel_req->compl.list);
- cancel_req->flags &= ~REQ_F_OVERFLOW;
-
- io_cqring_mark_overflow(ctx);
- WRITE_ONCE(ctx->rings->cq_overflow,
- atomic_inc_return(&ctx->cached_cq_overflow));
- io_commit_cqring(ctx);
- spin_unlock_irq(&ctx->completion_lock);
-
- /*
- * Put inflight ref and overflow ref. If that's
- * all we had, then we're done with this request.
- */
- if (refcount_sub_and_test(2, &cancel_req->refs)) {
- io_free_req(cancel_req);
- finish_wait(&ctx->inflight_wait, &wait);
- continue;
- }
- } else {
- io_wq_cancel_work(ctx->io_wq, &cancel_req->work);
- /* could be a link, check and remove if it is */
- if (!io_poll_remove_link(ctx, cancel_req))
- io_timeout_remove_link(ctx, cancel_req);
- io_put_req(cancel_req);
- }
-
+ /* cancel this request, or head link requests */
+ io_attempt_cancel(ctx, cancel_req);
+ io_put_req(cancel_req);
+ /* cancellations _may_ trigger task work */
+ io_run_task_work();
schedule();
finish_wait(&ctx->inflight_wait, &wait);
}
@@ -8328,11 +8416,19 @@ static int io_uring_show_cred(int id, void *p, void *data)
static void __io_uring_show_fdinfo(struct io_ring_ctx *ctx, struct seq_file *m)
{
+ bool has_lock;
int i;
- mutex_lock(&ctx->uring_lock);
+ /*
+ * Avoid ABBA deadlock between the seq lock and the io_uring mutex,
+ * since fdinfo case grabs it in the opposite direction of normal use
+ * cases. If we fail to get the lock, we just don't iterate any
+ * structures that could be going away outside the io_uring mutex.
+ */
+ has_lock = mutex_trylock(&ctx->uring_lock);
+
seq_printf(m, "UserFiles:\t%u\n", ctx->nr_user_files);
- for (i = 0; i < ctx->nr_user_files; i++) {
+ for (i = 0; has_lock && i < ctx->nr_user_files; i++) {
struct fixed_file_table *table;
struct file *f;
@@ -8344,13 +8440,13 @@ static void __io_uring_show_fdinfo(struct io_ring_ctx *ctx, struct seq_file *m)
seq_printf(m, "%5u: <none>\n", i);
}
seq_printf(m, "UserBufs:\t%u\n", ctx->nr_user_bufs);
- for (i = 0; i < ctx->nr_user_bufs; i++) {
+ for (i = 0; has_lock && i < ctx->nr_user_bufs; i++) {
struct io_mapped_ubuf *buf = &ctx->user_bufs[i];
seq_printf(m, "%5u: 0x%llx/%u\n", i, buf->ubuf,
(unsigned int) buf->len);
}
- if (!idr_is_empty(&ctx->personality_idr)) {
+ if (has_lock && !idr_is_empty(&ctx->personality_idr)) {
seq_printf(m, "Personalities:\n");
idr_for_each(&ctx->personality_idr, io_uring_show_cred, m);
}
@@ -8365,7 +8461,8 @@ static void __io_uring_show_fdinfo(struct io_ring_ctx *ctx, struct seq_file *m)
req->task->task_works != NULL);
}
spin_unlock_irq(&ctx->completion_lock);
- mutex_unlock(&ctx->uring_lock);
+ if (has_lock)
+ mutex_unlock(&ctx->uring_lock);
}
static void io_uring_show_fdinfo(struct seq_file *m, struct file *f)
@@ -8548,6 +8645,9 @@ static int io_uring_create(unsigned entries, struct io_uring_params *p,
ctx->user = user;
ctx->creds = get_current_cred();
+ mmgrab(current->mm);
+ ctx->sqo_mm = current->mm;
+
/*
* Account memory _before_ installing the file descriptor. Once
* the descriptor is installed, it can get closed at any time. Also
diff --git a/fs/iomap/seek.c b/fs/iomap/seek.c
index 89f61d93c0bc..107ee80c3568 100644
--- a/fs/iomap/seek.c
+++ b/fs/iomap/seek.c
@@ -127,7 +127,7 @@ iomap_seek_hole_actor(struct inode *inode, loff_t offset, loff_t length,
SEEK_HOLE);
if (offset < 0)
return length;
- /* fall through */
+ fallthrough;
case IOMAP_HOLE:
*(loff_t *)data = offset;
return 0;
@@ -175,7 +175,7 @@ iomap_seek_data_actor(struct inode *inode, loff_t offset, loff_t length,
SEEK_DATA);
if (offset < 0)
return length;
- /*FALLTHRU*/
+ fallthrough;
default:
*(loff_t *)data = offset;
return 0;
diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c
index e4944436e733..17fdc482f554 100644
--- a/fs/jbd2/journal.c
+++ b/fs/jbd2/journal.c
@@ -1285,7 +1285,7 @@ journal_t *jbd2_journal_init_inode(struct inode *inode)
* superblock as being NULL to prevent the journal destroy from writing
* back a bogus superblock.
*/
-static void journal_fail_superblock (journal_t *journal)
+static void journal_fail_superblock(journal_t *journal)
{
struct buffer_head *bh = journal->j_sb_buffer;
brelse(bh);
@@ -1367,8 +1367,10 @@ static int jbd2_write_superblock(journal_t *journal, int write_flags)
int ret;
/* Buffer got discarded which means block device got invalidated */
- if (!buffer_mapped(bh))
+ if (!buffer_mapped(bh)) {
+ unlock_buffer(bh);
return -EIO;
+ }
trace_jbd2_write_superblock(journal, write_flags);
if (!(journal->j_flags & JBD2_BARRIER))
@@ -1815,7 +1817,7 @@ int jbd2_journal_destroy(journal_t *journal)
/**
- *int jbd2_journal_check_used_features () - Check if features specified are used.
+ *int jbd2_journal_check_used_features() - Check if features specified are used.
* @journal: Journal to check.
* @compat: bitmask of compatible features
* @ro: bitmask of features that force read-only mount
@@ -1825,7 +1827,7 @@ int jbd2_journal_destroy(journal_t *journal)
* features. Return true (non-zero) if it does.
**/
-int jbd2_journal_check_used_features (journal_t *journal, unsigned long compat,
+int jbd2_journal_check_used_features(journal_t *journal, unsigned long compat,
unsigned long ro, unsigned long incompat)
{
journal_superblock_t *sb;
@@ -1860,7 +1862,7 @@ int jbd2_journal_check_used_features (journal_t *journal, unsigned long compat,
* all of a given set of features on this journal. Return true
* (non-zero) if it can. */
-int jbd2_journal_check_available_features (journal_t *journal, unsigned long compat,
+int jbd2_journal_check_available_features(journal_t *journal, unsigned long compat,
unsigned long ro, unsigned long incompat)
{
if (!compat && !ro && !incompat)
@@ -1882,7 +1884,7 @@ int jbd2_journal_check_available_features (journal_t *journal, unsigned long com
}
/**
- * int jbd2_journal_set_features () - Mark a given journal feature in the superblock
+ * int jbd2_journal_set_features() - Mark a given journal feature in the superblock
* @journal: Journal to act on.
* @compat: bitmask of compatible features
* @ro: bitmask of features that force read-only mount
@@ -1893,7 +1895,7 @@ int jbd2_journal_check_available_features (journal_t *journal, unsigned long com
*
*/
-int jbd2_journal_set_features (journal_t *journal, unsigned long compat,
+int jbd2_journal_set_features(journal_t *journal, unsigned long compat,
unsigned long ro, unsigned long incompat)
{
#define INCOMPAT_FEATURE_ON(f) \
diff --git a/fs/jbd2/recovery.c b/fs/jbd2/recovery.c
index 2ed278f0dced..faa97d748474 100644
--- a/fs/jbd2/recovery.c
+++ b/fs/jbd2/recovery.c
@@ -690,14 +690,11 @@ static int do_one_pass(journal_t *journal,
* number. */
if (pass == PASS_SCAN &&
jbd2_has_feature_checksum(journal)) {
- int chksum_err, chksum_seen;
struct commit_header *cbh =
(struct commit_header *)bh->b_data;
unsigned found_chksum =
be32_to_cpu(cbh->h_chksum[0]);
- chksum_err = chksum_seen = 0;
-
if (info->end_transaction) {
journal->j_failed_commit =
info->end_transaction;
@@ -705,42 +702,23 @@ static int do_one_pass(journal_t *journal,
break;
}
- if (crc32_sum == found_chksum &&
- cbh->h_chksum_type == JBD2_CRC32_CHKSUM &&
- cbh->h_chksum_size ==
- JBD2_CRC32_CHKSUM_SIZE)
- chksum_seen = 1;
- else if (!(cbh->h_chksum_type == 0 &&
- cbh->h_chksum_size == 0 &&
- found_chksum == 0 &&
- !chksum_seen))
- /*
- * If fs is mounted using an old kernel and then
- * kernel with journal_chksum is used then we
- * get a situation where the journal flag has
- * checksum flag set but checksums are not
- * present i.e chksum = 0, in the individual
- * commit blocks.
- * Hence to avoid checksum failures, in this
- * situation, this extra check is added.
- */
- chksum_err = 1;
-
- if (chksum_err) {
- info->end_transaction = next_commit_ID;
-
- if (!jbd2_has_feature_async_commit(journal)) {
- journal->j_failed_commit =
- next_commit_ID;
- brelse(bh);
- break;
- }
- }
+ /* Neither checksum match nor unused? */
+ if (!((crc32_sum == found_chksum &&
+ cbh->h_chksum_type ==
+ JBD2_CRC32_CHKSUM &&
+ cbh->h_chksum_size ==
+ JBD2_CRC32_CHKSUM_SIZE) ||
+ (cbh->h_chksum_type == 0 &&
+ cbh->h_chksum_size == 0 &&
+ found_chksum == 0)))
+ goto chksum_error;
+
crc32_sum = ~0;
}
if (pass == PASS_SCAN &&
!jbd2_commit_block_csum_verify(journal,
bh->b_data)) {
+ chksum_error:
info->end_transaction = next_commit_ID;
if (!jbd2_has_feature_async_commit(journal)) {
diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c
index e91aad3637a2..43985738aa86 100644
--- a/fs/jbd2/transaction.c
+++ b/fs/jbd2/transaction.c
@@ -2026,6 +2026,9 @@ static void __jbd2_journal_temp_unlink_buffer(struct journal_head *jh)
*/
static void __jbd2_journal_unfile_buffer(struct journal_head *jh)
{
+ J_ASSERT_JH(jh, jh->b_transaction != NULL);
+ J_ASSERT_JH(jh, jh->b_next_transaction == NULL);
+
__jbd2_journal_temp_unlink_buffer(jh);
jh->b_transaction = NULL;
}
@@ -2078,10 +2081,6 @@ out:
* int jbd2_journal_try_to_free_buffers() - try to free page buffers.
* @journal: journal for operation
* @page: to try and free
- * @gfp_mask: we use the mask to detect how hard should we try to release
- * buffers. If __GFP_DIRECT_RECLAIM and __GFP_FS is set, we wait for commit
- * code to release the buffers.
- *
*
* For all the buffers on this page,
* if they are fully written out ordered data, move them onto BUF_CLEAN
@@ -2112,11 +2111,11 @@ out:
*
* Return 0 on failure, 1 on success
*/
-int jbd2_journal_try_to_free_buffers(journal_t *journal,
- struct page *page, gfp_t gfp_mask)
+int jbd2_journal_try_to_free_buffers(journal_t *journal, struct page *page)
{
struct buffer_head *head;
struct buffer_head *bh;
+ bool has_write_io_error = false;
int ret = 0;
J_ASSERT(PageLocked(page));
@@ -2141,11 +2140,26 @@ int jbd2_journal_try_to_free_buffers(journal_t *journal,
jbd2_journal_put_journal_head(jh);
if (buffer_jbd(bh))
goto busy;
+
+ /*
+ * If we free a metadata buffer which has been failed to
+ * write out, the jbd2 checkpoint procedure will not detect
+ * this failure and may lead to filesystem inconsistency
+ * after cleanup journal tail.
+ */
+ if (buffer_write_io_error(bh)) {
+ pr_err("JBD2: Error while async write back metadata bh %llu.",
+ (unsigned long long)bh->b_blocknr);
+ has_write_io_error = true;
+ }
} while ((bh = bh->b_this_page) != head);
ret = try_to_free_buffers(page);
busy:
+ if (has_write_io_error)
+ jbd2_journal_abort(journal, -EIO);
+
return ret;
}
@@ -2572,6 +2586,13 @@ bool __jbd2_journal_refile_buffer(struct journal_head *jh)
was_dirty = test_clear_buffer_jbddirty(bh);
__jbd2_journal_temp_unlink_buffer(jh);
+
+ /*
+ * b_transaction must be set, otherwise the new b_transaction won't
+ * be holding jh reference
+ */
+ J_ASSERT_JH(jh, jh->b_transaction != NULL);
+
/*
* We set b_transaction here because b_next_transaction will inherit
* our jh reference and thus __jbd2_journal_file_buffer() must not
diff --git a/fs/jffs2/fs.c b/fs/jffs2/fs.c
index ab8cdd9e9325..78858f6e9583 100644
--- a/fs/jffs2/fs.c
+++ b/fs/jffs2/fs.c
@@ -341,7 +341,7 @@ struct inode *jffs2_iget(struct super_block *sb, unsigned long ino)
rdev = old_decode_dev(je16_to_cpu(jdev.old_id));
else
rdev = new_decode_dev(je32_to_cpu(jdev.new_id));
- /* fall through */
+ fallthrough;
case S_IFSOCK:
case S_IFIFO:
diff --git a/fs/jffs2/readinode.c b/fs/jffs2/readinode.c
index bccfc40b3a74..2f6f0b140c05 100644
--- a/fs/jffs2/readinode.c
+++ b/fs/jffs2/readinode.c
@@ -1273,7 +1273,7 @@ static int jffs2_do_read_inode_internal(struct jffs2_sb_info *c,
dbg_readinode("symlink's target '%s' cached\n", f->target);
}
- /* fall through... */
+ fallthrough;
case S_IFBLK:
case S_IFCHR:
diff --git a/fs/libfs.c b/fs/libfs.c
index 4d08edf19c78..e0d42e977d9a 100644
--- a/fs/libfs.c
+++ b/fs/libfs.c
@@ -137,11 +137,11 @@ loff_t dcache_dir_lseek(struct file *file, loff_t offset, int whence)
switch (whence) {
case 1:
offset += file->f_pos;
- /* fall through */
+ fallthrough;
case 0:
if (offset >= 0)
break;
- /* fall through */
+ fallthrough;
default:
return -EINVAL;
}
diff --git a/fs/locks.c b/fs/locks.c
index 8fc0542f5132..1f84a03601fe 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -1499,7 +1499,7 @@ static void lease_clear_pending(struct file_lock *fl, int arg)
switch (arg) {
case F_UNLCK:
fl->fl_flags &= ~FL_UNLOCK_PENDING;
- /* fall through */
+ fallthrough;
case F_RDLCK:
fl->fl_flags &= ~FL_DOWNGRADE_PENDING;
}
@@ -2525,7 +2525,7 @@ int fcntl_setlk(unsigned int fd, struct file *filp, unsigned int cmd,
cmd = F_SETLKW;
file_lock->fl_flags |= FL_OFDLCK;
file_lock->fl_owner = filp;
- /* Fallthrough */
+ fallthrough;
case F_SETLKW:
file_lock->fl_flags |= FL_SLEEP;
}
@@ -2656,7 +2656,7 @@ int fcntl_setlk64(unsigned int fd, struct file *filp, unsigned int cmd,
cmd = F_SETLKW64;
file_lock->fl_flags |= FL_OFDLCK;
file_lock->fl_owner = filp;
- /* Fallthrough */
+ fallthrough;
case F_SETLKW64:
file_lock->fl_flags |= FL_SLEEP;
}
diff --git a/fs/nfs/blocklayout/blocklayout.c b/fs/nfs/blocklayout/blocklayout.c
index d1a0e2c8b1b4..08108b6d2fa1 100644
--- a/fs/nfs/blocklayout/blocklayout.c
+++ b/fs/nfs/blocklayout/blocklayout.c
@@ -753,7 +753,7 @@ out:
case -ENODEV:
/* Our extent block devices are unavailable */
set_bit(NFS_LSEG_UNAVAILABLE, &lseg->pls_flags);
- /* Fall through */
+ fallthrough;
case 0:
return lseg;
default:
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index a12f42e7d8c7..cb52db9a0cfb 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -579,6 +579,9 @@ int nfs_readdir_page_filler(nfs_readdir_descriptor_t *desc, struct nfs_entry *en
xdr_set_scratch_buffer(&stream, page_address(scratch), PAGE_SIZE);
do {
+ if (entry->label)
+ entry->label->len = NFS4_MAXLABELLEN;
+
status = xdr_decode(desc, entry, &stream);
if (status != 0) {
if (status == -EAGAIN)
@@ -1181,7 +1184,7 @@ int nfs_lookup_verify_inode(struct inode *inode, unsigned int flags)
/* A NFSv4 OPEN will revalidate later */
if (server->caps & NFS_CAP_ATOMIC_OPEN)
goto out;
- /* Fallthrough */
+ fallthrough;
case S_IFDIR:
if (server->flags & NFS_MOUNT_NOCTO)
break;
diff --git a/fs/nfs/filelayout/filelayout.c b/fs/nfs/filelayout/filelayout.c
index a13e69009f19..7f5aa0403e16 100644
--- a/fs/nfs/filelayout/filelayout.c
+++ b/fs/nfs/filelayout/filelayout.c
@@ -187,7 +187,7 @@ static int filelayout_async_handle_error(struct rpc_task *task,
pnfs_error_mark_layout_for_return(inode, lseg);
pnfs_set_lo_fail(lseg);
rpc_wake_up(&tbl->slot_tbl_waitq);
- /* fall through */
+ fallthrough;
default:
reset:
dprintk("%s Retry through MDS. Error %d\n", __func__,
diff --git a/fs/nfs/flexfilelayout/flexfilelayout.c b/fs/nfs/flexfilelayout/flexfilelayout.c
index 965145592750..a163533446fa 100644
--- a/fs/nfs/flexfilelayout/flexfilelayout.c
+++ b/fs/nfs/flexfilelayout/flexfilelayout.c
@@ -715,7 +715,7 @@ nfs4_ff_layout_stat_io_end_write(struct rpc_task *task,
}
static void
-ff_layout_mark_ds_unreachable(struct pnfs_layout_segment *lseg, int idx)
+ff_layout_mark_ds_unreachable(struct pnfs_layout_segment *lseg, u32 idx)
{
struct nfs4_deviceid_node *devid = FF_LAYOUT_DEVID_NODE(lseg, idx);
@@ -724,7 +724,7 @@ ff_layout_mark_ds_unreachable(struct pnfs_layout_segment *lseg, int idx)
}
static void
-ff_layout_mark_ds_reachable(struct pnfs_layout_segment *lseg, int idx)
+ff_layout_mark_ds_reachable(struct pnfs_layout_segment *lseg, u32 idx)
{
struct nfs4_deviceid_node *devid = FF_LAYOUT_DEVID_NODE(lseg, idx);
@@ -734,14 +734,14 @@ ff_layout_mark_ds_reachable(struct pnfs_layout_segment *lseg, int idx)
static struct nfs4_pnfs_ds *
ff_layout_choose_ds_for_read(struct pnfs_layout_segment *lseg,
- int start_idx, int *best_idx,
+ u32 start_idx, u32 *best_idx,
bool check_device)
{
struct nfs4_ff_layout_segment *fls = FF_LAYOUT_LSEG(lseg);
struct nfs4_ff_layout_mirror *mirror;
struct nfs4_pnfs_ds *ds;
bool fail_return = false;
- int idx;
+ u32 idx;
/* mirrors are initially sorted by efficiency */
for (idx = start_idx; idx < fls->mirror_array_cnt; idx++) {
@@ -766,21 +766,21 @@ ff_layout_choose_ds_for_read(struct pnfs_layout_segment *lseg,
static struct nfs4_pnfs_ds *
ff_layout_choose_any_ds_for_read(struct pnfs_layout_segment *lseg,
- int start_idx, int *best_idx)
+ u32 start_idx, u32 *best_idx)
{
return ff_layout_choose_ds_for_read(lseg, start_idx, best_idx, false);
}
static struct nfs4_pnfs_ds *
ff_layout_choose_valid_ds_for_read(struct pnfs_layout_segment *lseg,
- int start_idx, int *best_idx)
+ u32 start_idx, u32 *best_idx)
{
return ff_layout_choose_ds_for_read(lseg, start_idx, best_idx, true);
}
static struct nfs4_pnfs_ds *
ff_layout_choose_best_ds_for_read(struct pnfs_layout_segment *lseg,
- int start_idx, int *best_idx)
+ u32 start_idx, u32 *best_idx)
{
struct nfs4_pnfs_ds *ds;
@@ -791,7 +791,8 @@ ff_layout_choose_best_ds_for_read(struct pnfs_layout_segment *lseg,
}
static struct nfs4_pnfs_ds *
-ff_layout_get_ds_for_read(struct nfs_pageio_descriptor *pgio, int *best_idx)
+ff_layout_get_ds_for_read(struct nfs_pageio_descriptor *pgio,
+ u32 *best_idx)
{
struct pnfs_layout_segment *lseg = pgio->pg_lseg;
struct nfs4_pnfs_ds *ds;
@@ -837,7 +838,7 @@ ff_layout_pg_init_read(struct nfs_pageio_descriptor *pgio,
struct nfs_pgio_mirror *pgm;
struct nfs4_ff_layout_mirror *mirror;
struct nfs4_pnfs_ds *ds;
- int ds_idx;
+ u32 ds_idx, i;
retry:
ff_layout_pg_check_layout(pgio, req);
@@ -863,14 +864,14 @@ retry:
goto retry;
}
- mirror = FF_LAYOUT_COMP(pgio->pg_lseg, ds_idx);
+ for (i = 0; i < pgio->pg_mirror_count; i++) {
+ mirror = FF_LAYOUT_COMP(pgio->pg_lseg, i);
+ pgm = &pgio->pg_mirrors[i];
+ pgm->pg_bsize = mirror->mirror_ds->ds_versions[0].rsize;
+ }
pgio->pg_mirror_idx = ds_idx;
- /* read always uses only one mirror - idx 0 for pgio layer */
- pgm = &pgio->pg_mirrors[0];
- pgm->pg_bsize = mirror->mirror_ds->ds_versions[0].rsize;
-
if (NFS_SERVER(pgio->pg_inode)->flags &
(NFS_MOUNT_SOFT|NFS_MOUNT_SOFTERR))
pgio->pg_maxretrans = io_maxretrans;
@@ -894,7 +895,7 @@ ff_layout_pg_init_write(struct nfs_pageio_descriptor *pgio,
struct nfs4_ff_layout_mirror *mirror;
struct nfs_pgio_mirror *pgm;
struct nfs4_pnfs_ds *ds;
- int i;
+ u32 i;
retry:
ff_layout_pg_check_layout(pgio, req);
@@ -1038,7 +1039,7 @@ static void ff_layout_reset_write(struct nfs_pgio_header *hdr, bool retry_pnfs)
static void ff_layout_resend_pnfs_read(struct nfs_pgio_header *hdr)
{
u32 idx = hdr->pgio_mirror_idx + 1;
- int new_idx = 0;
+ u32 new_idx = 0;
if (ff_layout_choose_any_ds_for_read(hdr->lseg, idx + 1, &new_idx))
ff_layout_send_layouterror(hdr->lseg);
@@ -1075,7 +1076,7 @@ static int ff_layout_async_handle_error_v4(struct rpc_task *task,
struct nfs4_state *state,
struct nfs_client *clp,
struct pnfs_layout_segment *lseg,
- int idx)
+ u32 idx)
{
struct pnfs_layout_hdr *lo = lseg->pls_layout;
struct inode *inode = lo->plh_inode;
@@ -1133,7 +1134,7 @@ static int ff_layout_async_handle_error_v4(struct rpc_task *task,
nfs4_delete_deviceid(devid->ld, devid->nfs_client,
&devid->deviceid);
rpc_wake_up(&tbl->slot_tbl_waitq);
- /* fall through */
+ fallthrough;
default:
if (ff_layout_avoid_mds_available_ds(lseg))
return -NFS4ERR_RESET_TO_PNFS;
@@ -1149,7 +1150,7 @@ reset:
/* Retry all errors through either pNFS or MDS except for -EJUKEBOX */
static int ff_layout_async_handle_error_v3(struct rpc_task *task,
struct pnfs_layout_segment *lseg,
- int idx)
+ u32 idx)
{
struct nfs4_deviceid_node *devid = FF_LAYOUT_DEVID_NODE(lseg, idx);
@@ -1184,7 +1185,7 @@ static int ff_layout_async_handle_error(struct rpc_task *task,
struct nfs4_state *state,
struct nfs_client *clp,
struct pnfs_layout_segment *lseg,
- int idx)
+ u32 idx)
{
int vers = clp->cl_nfs_mod->rpc_vers->number;
@@ -1211,7 +1212,7 @@ static int ff_layout_async_handle_error(struct rpc_task *task,
}
static void ff_layout_io_track_ds_error(struct pnfs_layout_segment *lseg,
- int idx, u64 offset, u64 length,
+ u32 idx, u64 offset, u64 length,
u32 *op_status, int opnum, int error)
{
struct nfs4_ff_layout_mirror *mirror;
@@ -1260,7 +1261,7 @@ static void ff_layout_io_track_ds_error(struct pnfs_layout_segment *lseg,
*/
if (opnum == OP_READ)
break;
- /* Fallthrough */
+ fallthrough;
default:
pnfs_error_mark_layout_for_return(lseg->pls_layout->plh_inode,
lseg);
@@ -1809,7 +1810,7 @@ ff_layout_write_pagelist(struct nfs_pgio_header *hdr, int sync)
loff_t offset = hdr->args.offset;
int vers;
struct nfs_fh *fh;
- int idx = hdr->pgio_mirror_idx;
+ u32 idx = hdr->pgio_mirror_idx;
mirror = FF_LAYOUT_COMP(lseg, idx);
ds = nfs4_ff_layout_prepare_ds(lseg, mirror, true);
diff --git a/fs/nfs/fs_context.c b/fs/nfs/fs_context.c
index 66949da0e827..524812984e2d 100644
--- a/fs/nfs/fs_context.c
+++ b/fs/nfs/fs_context.c
@@ -651,21 +651,21 @@ static int nfs_fs_context_parse_param(struct fs_context *fc,
switch (lookup_constant(nfs_xprt_protocol_tokens, param->string, -1)) {
case Opt_xprt_udp6:
protofamily = AF_INET6;
- /* fall through */
+ fallthrough;
case Opt_xprt_udp:
ctx->flags &= ~NFS_MOUNT_TCP;
ctx->nfs_server.protocol = XPRT_TRANSPORT_UDP;
break;
case Opt_xprt_tcp6:
protofamily = AF_INET6;
- /* fall through */
+ fallthrough;
case Opt_xprt_tcp:
ctx->flags |= NFS_MOUNT_TCP;
ctx->nfs_server.protocol = XPRT_TRANSPORT_TCP;
break;
case Opt_xprt_rdma6:
protofamily = AF_INET6;
- /* fall through */
+ fallthrough;
case Opt_xprt_rdma:
/* vector side protocols to TCP */
ctx->flags |= NFS_MOUNT_TCP;
@@ -684,13 +684,13 @@ static int nfs_fs_context_parse_param(struct fs_context *fc,
switch (lookup_constant(nfs_xprt_protocol_tokens, param->string, -1)) {
case Opt_xprt_udp6:
mountfamily = AF_INET6;
- /* fall through */
+ fallthrough;
case Opt_xprt_udp:
ctx->mount_server.protocol = XPRT_TRANSPORT_UDP;
break;
case Opt_xprt_tcp6:
mountfamily = AF_INET6;
- /* fall through */
+ fallthrough;
case Opt_xprt_tcp:
ctx->mount_server.protocol = XPRT_TRANSPORT_TCP;
break;
@@ -899,9 +899,11 @@ static int nfs23_parse_monolithic(struct fs_context *fc,
ctx->version = NFS_DEFAULT_VERSION;
switch (data->version) {
case 1:
- data->namlen = 0; /* fall through */
+ data->namlen = 0;
+ fallthrough;
case 2:
- data->bsize = 0; /* fall through */
+ data->bsize = 0;
+ fallthrough;
case 3:
if (data->flags & NFS_MOUNT_VER3)
goto out_no_v3;
@@ -909,14 +911,14 @@ static int nfs23_parse_monolithic(struct fs_context *fc,
memcpy(data->root.data, data->old_root.data, NFS2_FHSIZE);
/* Turn off security negotiation */
extra_flags |= NFS_MOUNT_SECFLAVOUR;
- /* fall through */
+ fallthrough;
case 4:
if (data->flags & NFS_MOUNT_SECFLAVOUR)
goto out_no_sec;
- /* fall through */
+ fallthrough;
case 5:
memset(data->context, 0, sizeof(data->context));
- /* fall through */
+ fallthrough;
case 6:
if (data->flags & NFS_MOUNT_VER3) {
if (data->root.size > NFS3_FHSIZE || data->root.size == 0)
diff --git a/fs/nfs/nfs3acl.c b/fs/nfs/nfs3acl.c
index 26c94b32d6f4..c6c863382f37 100644
--- a/fs/nfs/nfs3acl.c
+++ b/fs/nfs/nfs3acl.c
@@ -108,7 +108,7 @@ struct posix_acl *nfs3_get_acl(struct inode *inode, int type)
case -EPROTONOSUPPORT:
dprintk("NFS_V3_ACL extension not supported; disabling\n");
server->caps &= ~NFS_CAP_ACLS;
- /* fall through */
+ fallthrough;
case -ENOTSUPP:
status = -EOPNOTSUPP;
default:
@@ -228,7 +228,7 @@ static int __nfs3_proc_setacls(struct inode *inode, struct posix_acl *acl,
dprintk("NFS_V3_ACL SETACL RPC not supported"
"(will not retry)\n");
server->caps &= ~NFS_CAP_ACLS;
- /* fall through */
+ fallthrough;
case -ENOTSUPP:
status = -EOPNOTSUPP;
}
diff --git a/fs/nfs/nfs42proc.c b/fs/nfs/nfs42proc.c
index 142225f0af59..2b2211d1234e 100644
--- a/fs/nfs/nfs42proc.c
+++ b/fs/nfs/nfs42proc.c
@@ -356,7 +356,15 @@ static ssize_t _nfs42_proc_copy(struct file *src,
truncate_pagecache_range(dst_inode, pos_dst,
pos_dst + res->write_res.count);
-
+ spin_lock(&dst_inode->i_lock);
+ NFS_I(dst_inode)->cache_validity |= (NFS_INO_REVAL_PAGECACHE |
+ NFS_INO_REVAL_FORCED | NFS_INO_INVALID_SIZE |
+ NFS_INO_INVALID_ATTR | NFS_INO_INVALID_DATA);
+ spin_unlock(&dst_inode->i_lock);
+ spin_lock(&src_inode->i_lock);
+ NFS_I(src_inode)->cache_validity |= (NFS_INO_REVAL_PAGECACHE |
+ NFS_INO_REVAL_FORCED | NFS_INO_INVALID_ATIME);
+ spin_unlock(&src_inode->i_lock);
status = res->write_res.count;
out:
if (args->sync)
diff --git a/fs/nfs/nfs4file.c b/fs/nfs/nfs4file.c
index a33970765467..fdfc77486ace 100644
--- a/fs/nfs/nfs4file.c
+++ b/fs/nfs/nfs4file.c
@@ -211,7 +211,7 @@ static loff_t nfs4_file_llseek(struct file *filep, loff_t offset, int whence)
ret = nfs42_proc_llseek(filep, offset, whence);
if (ret != -ENOTSUPP)
return ret;
- /* Fall through */
+ fallthrough;
default:
return nfs_file_llseek(filep, offset, whence);
}
diff --git a/fs/nfs/nfs4idmap.c b/fs/nfs/nfs4idmap.c
index 1e7296395d71..62e6eea5c516 100644
--- a/fs/nfs/nfs4idmap.c
+++ b/fs/nfs/nfs4idmap.c
@@ -520,7 +520,7 @@ static int nfs_idmap_prepare_message(char *desc, struct idmap *idmap,
switch (token) {
case Opt_find_uid:
im->im_type = IDMAP_TYPE_USER;
- /* Fall through */
+ fallthrough;
case Opt_find_gid:
im->im_conv = IDMAP_CONV_NAMETOID;
ret = match_strlcpy(im->im_name, &substr, IDMAP_NAMESZ);
@@ -528,7 +528,7 @@ static int nfs_idmap_prepare_message(char *desc, struct idmap *idmap,
case Opt_find_user:
im->im_type = IDMAP_TYPE_USER;
- /* Fall through */
+ fallthrough;
case Opt_find_group:
im->im_conv = IDMAP_CONV_IDTONAME;
ret = match_int(&substr, &im->im_id);
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index dbd01548335b..6e95c85fe395 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -483,7 +483,7 @@ static int nfs4_do_handle_exception(struct nfs_server *server,
stateid);
goto wait_on_recovery;
}
- /* Fall through */
+ fallthrough;
case -NFS4ERR_OPENMODE:
if (inode) {
int err;
@@ -534,10 +534,10 @@ static int nfs4_do_handle_exception(struct nfs_server *server,
ret = -EBUSY;
break;
}
- /* Fall through */
+ fallthrough;
case -NFS4ERR_DELAY:
nfs_inc_server_stats(server, NFSIOS_DELAY);
- /* Fall through */
+ fallthrough;
case -NFS4ERR_GRACE:
case -NFS4ERR_LAYOUTTRYLATER:
case -NFS4ERR_RECALLCONFLICT:
@@ -1505,7 +1505,7 @@ static int can_open_delegated(struct nfs_delegation *delegation, fmode_t fmode,
case NFS4_OPEN_CLAIM_PREVIOUS:
if (!test_bit(NFS_DELEGATION_NEED_RECLAIM, &delegation->flags))
break;
- /* Fall through */
+ fallthrough;
default:
return 0;
}
@@ -2439,7 +2439,7 @@ static void nfs4_open_prepare(struct rpc_task *task, void *calldata)
case NFS4_OPEN_CLAIM_DELEG_CUR_FH:
case NFS4_OPEN_CLAIM_DELEG_PREV_FH:
data->o_arg.open_bitmap = &nfs4_open_noattr_bitmap[0];
- /* Fall through */
+ fallthrough;
case NFS4_OPEN_CLAIM_FH:
task->tk_msg.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_OPEN_NOATTR];
}
@@ -3293,8 +3293,10 @@ static int _nfs4_do_setattr(struct inode *inode,
/* Servers should only apply open mode checks for file size changes */
truncate = (arg->iap->ia_valid & ATTR_SIZE) ? true : false;
- if (!truncate)
+ if (!truncate) {
+ nfs4_inode_make_writeable(inode);
goto zero_stateid;
+ }
if (nfs4_copy_delegation_stateid(inode, FMODE_WRITE, &arg->stateid, &delegation_cred)) {
/* Use that stateid */
@@ -3545,11 +3547,11 @@ static void nfs4_close_done(struct rpc_task *task, void *data)
nfs4_free_revoked_stateid(server,
&calldata->arg.stateid,
task->tk_msg.rpc_cred);
- /* Fallthrough */
+ fallthrough;
case -NFS4ERR_BAD_STATEID:
if (calldata->arg.fmode == 0)
break;
- /* Fallthrough */
+ fallthrough;
default:
task->tk_status = nfs4_async_handle_exception(task,
server, task->tk_status, &exception);
@@ -6294,7 +6296,7 @@ static void nfs4_delegreturn_done(struct rpc_task *task, void *calldata)
nfs4_free_revoked_stateid(data->res.server,
data->args.stateid,
task->tk_msg.rpc_cred);
- /* Fallthrough */
+ fallthrough;
case -NFS4ERR_BAD_STATEID:
case -NFS4ERR_STALE_STATEID:
case -ETIMEDOUT:
@@ -6314,7 +6316,7 @@ static void nfs4_delegreturn_done(struct rpc_task *task, void *calldata)
data->res.fattr = NULL;
goto out_restart;
}
- /* Fallthrough */
+ fallthrough;
default:
task->tk_status = nfs4_async_handle_exception(task,
data->res.server, task->tk_status,
@@ -6622,13 +6624,13 @@ static void nfs4_locku_done(struct rpc_task *task, void *data)
if (nfs4_update_lock_stateid(calldata->lsp,
&calldata->res.stateid))
break;
- /* Fall through */
+ fallthrough;
case -NFS4ERR_ADMIN_REVOKED:
case -NFS4ERR_EXPIRED:
nfs4_free_revoked_stateid(calldata->server,
&calldata->arg.stateid,
task->tk_msg.rpc_cred);
- /* Fall through */
+ fallthrough;
case -NFS4ERR_BAD_STATEID:
case -NFS4ERR_STALE_STATEID:
if (nfs4_sync_lock_stateid(&calldata->arg.stateid,
@@ -7298,7 +7300,12 @@ int nfs4_lock_delegation_recall(struct file_lock *fl, struct nfs4_state *state,
err = nfs4_set_lock_state(state, fl);
if (err != 0)
return err;
- err = _nfs4_do_setlk(state, F_SETLK, fl, NFS_LOCK_NEW);
+ do {
+ err = _nfs4_do_setlk(state, F_SETLK, fl, NFS_LOCK_NEW);
+ if (err != -NFS4ERR_DELAY)
+ break;
+ ssleep(1);
+ } while (err == -NFS4ERR_DELAY);
return nfs4_handle_delegation_recall_error(server, state, stateid, fl, err);
}
@@ -8665,7 +8672,7 @@ static void nfs4_get_lease_time_done(struct rpc_task *task, void *calldata)
dprintk("%s Retry: tk_status %d\n", __func__, task->tk_status);
rpc_delay(task, NFS4_POLL_RETRY_MIN);
task->tk_status = 0;
- /* fall through */
+ fallthrough;
case -NFS4ERR_RETRY_UNCACHED_REP:
rpc_restart_call_prepare(task);
return;
@@ -9113,13 +9120,13 @@ static int nfs41_reclaim_complete_handle_errors(struct rpc_task *task, struct nf
switch(task->tk_status) {
case 0:
wake_up_all(&clp->cl_lock_waitq);
- /* Fallthrough */
+ fallthrough;
case -NFS4ERR_COMPLETE_ALREADY:
case -NFS4ERR_WRONG_CRED: /* What to do here? */
break;
case -NFS4ERR_DELAY:
rpc_delay(task, NFS4_POLL_RETRY_MAX);
- /* fall through */
+ fallthrough;
case -NFS4ERR_RETRY_UNCACHED_REP:
return -EAGAIN;
case -NFS4ERR_BADSESSION:
@@ -9434,10 +9441,10 @@ static void nfs4_layoutreturn_done(struct rpc_task *task, void *calldata)
&lrp->args.range,
lrp->args.inode))
goto out_restart;
- /* Fallthrough */
+ fallthrough;
default:
task->tk_status = 0;
- /* Fallthrough */
+ fallthrough;
case 0:
break;
case -NFS4ERR_DELAY:
diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c
index b1dba24918f8..4bf10792cb5b 100644
--- a/fs/nfs/nfs4state.c
+++ b/fs/nfs/nfs4state.c
@@ -1530,7 +1530,7 @@ restart:
default:
pr_err("NFS: %s: unhandled error %d\n",
__func__, status);
- /* Fall through */
+ fallthrough;
case -ENOMEM:
case -NFS4ERR_DENIED:
case -NFS4ERR_RECLAIM_BAD:
@@ -1667,7 +1667,7 @@ restart:
break;
}
printk(KERN_ERR "NFS: %s: unhandled error %d\n", __func__, status);
- /* Fall through */
+ fallthrough;
case -ENOENT:
case -ENOMEM:
case -EACCES:
@@ -1683,7 +1683,7 @@ restart:
set_bit(ops->state_flag_bit, &state->flags);
break;
}
- /* Fall through */
+ fallthrough;
case -NFS4ERR_ADMIN_REVOKED:
case -NFS4ERR_STALE_STATEID:
case -NFS4ERR_OLD_STATEID:
@@ -1695,7 +1695,7 @@ restart:
case -NFS4ERR_EXPIRED:
case -NFS4ERR_NO_GRACE:
nfs4_state_mark_reclaim_nograce(sp->so_server->nfs_client, state);
- /* Fall through */
+ fallthrough;
case -NFS4ERR_STALE_CLIENTID:
case -NFS4ERR_BADSESSION:
case -NFS4ERR_BADSLOT:
@@ -2273,11 +2273,11 @@ again:
case -ETIMEDOUT:
if (clnt->cl_softrtry)
break;
- /* Fall through */
+ fallthrough;
case -NFS4ERR_DELAY:
case -EAGAIN:
ssleep(1);
- /* Fall through */
+ fallthrough;
case -NFS4ERR_STALE_CLIENTID:
dprintk("NFS: %s after status %d, retrying\n",
__func__, status);
@@ -2289,7 +2289,7 @@ again:
}
if (clnt->cl_auth->au_flavor == RPC_AUTH_UNIX)
break;
- /* Fall through */
+ fallthrough;
case -NFS4ERR_CLID_INUSE:
case -NFS4ERR_WRONGSEC:
/* No point in retrying if we already used RPC_AUTH_UNIX */
diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c
index 6ea4cac41e46..6985cacf4700 100644
--- a/fs/nfs/pagelist.c
+++ b/fs/nfs/pagelist.c
@@ -711,7 +711,7 @@ static void nfs_pgio_rpcsetup(struct nfs_pgio_header *hdr,
case FLUSH_COND_STABLE:
if (nfs_reqs_to_commit(cinfo))
break;
- /* fall through */
+ fallthrough;
default:
hdr->args.stable = NFS_FILE_SYNC;
}
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index 40332c758d84..71f7741126b6 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -1541,7 +1541,7 @@ void pnfs_roc_release(struct nfs4_layoutreturn_args *args,
case 0:
if (res->lrs_present)
res_stateid = &res->stateid;
- /* Fallthrough */
+ fallthrough;
default:
arg_stateid = &args->stateid;
}
diff --git a/fs/nfs_common/nfsacl.c b/fs/nfs_common/nfsacl.c
index 8ceb6425e01a..d056ad2fdefd 100644
--- a/fs/nfs_common/nfsacl.c
+++ b/fs/nfs_common/nfsacl.c
@@ -237,7 +237,7 @@ posix_acl_from_nfsacl(struct posix_acl *acl)
break;
case ACL_MASK:
mask = pa;
- /* fall through */
+ fallthrough;
case ACL_OTHER:
break;
}
diff --git a/fs/nfsd/blocklayout.c b/fs/nfsd/blocklayout.c
index 9bbaa671c079..311e5ce80cfc 100644
--- a/fs/nfsd/blocklayout.c
+++ b/fs/nfsd/blocklayout.c
@@ -83,13 +83,13 @@ nfsd4_block_proc_layoutget(struct inode *inode, const struct svc_fh *fhp,
bex->soff = iomap.addr;
break;
}
- /*FALLTHRU*/
+ fallthrough;
case IOMAP_HOLE:
if (seg->iomode == IOMODE_READ) {
bex->es = PNFS_BLOCK_NONE_DATA;
break;
}
- /*FALLTHRU*/
+ fallthrough;
case IOMAP_DELALLOC:
default:
WARN(1, "pnfsd: filesystem returned %d extent\n", iomap.type);
diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c
index 7fbe9840a03e..052be5bf9ef5 100644
--- a/fs/nfsd/nfs4callback.c
+++ b/fs/nfsd/nfs4callback.c
@@ -1119,7 +1119,7 @@ static bool nfsd4_cb_sequence_done(struct rpc_task *task, struct nfsd4_callback
break;
case -ESERVERFAULT:
++session->se_cb_seq_nr;
- /* Fall through */
+ fallthrough;
case 1:
case -NFS4ERR_BADSESSION:
nfsd4_mark_cb_fault(cb->cb_clp, cb->cb_seq_status);
diff --git a/fs/nfsd/nfs4layouts.c b/fs/nfsd/nfs4layouts.c
index e12409eca7cc..a97873f2d22b 100644
--- a/fs/nfsd/nfs4layouts.c
+++ b/fs/nfsd/nfs4layouts.c
@@ -681,7 +681,7 @@ nfsd4_cb_layout_done(struct nfsd4_callback *cb, struct rpc_task *task)
rpc_delay(task, HZ/100); /* 10 mili-seconds */
return 0;
}
- /* Fallthrough */
+ fallthrough;
default:
/*
* Unknown error or non-responding client, we'll need to fence.
diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
index a527da3d8052..eaf50eafa935 100644
--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c
@@ -428,7 +428,7 @@ nfsd4_open(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
goto out;
open->op_openowner->oo_flags |= NFS4_OO_CONFIRMED;
reclaim = true;
- /* fall through */
+ fallthrough;
case NFS4_OPEN_CLAIM_FH:
case NFS4_OPEN_CLAIM_DELEG_CUR_FH:
status = do_open_fhandle(rqstp, cstate, open);
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 81ed8e8bab3f..c09a2a4281ec 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -3117,7 +3117,7 @@ nfsd4_exchange_id(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
break;
default: /* checked by xdr code */
WARN_ON_ONCE(1);
- /* fall through */
+ fallthrough;
case SP4_SSV:
status = nfserr_encr_alg_unsupp;
goto out_nolock;
@@ -4532,7 +4532,7 @@ static int nfsd4_cb_recall_done(struct nfsd4_callback *cb,
rpc_delay(task, 2 * HZ);
return 0;
}
- /*FALLTHRU*/
+ fallthrough;
default:
return 1;
}
@@ -4597,6 +4597,8 @@ static bool nfsd_breaker_owns_lease(struct file_lock *fl)
if (!i_am_nfsd())
return NULL;
rqst = kthread_data(current);
+ if (!rqst->rq_lease_breaker)
+ return NULL;
clp = *(rqst->rq_lease_breaker);
return dl->dl_stid.sc_client == clp;
}
@@ -5652,7 +5654,7 @@ static __be32 nfsd4_validate_stateid(struct nfs4_client *cl, stateid_t *stateid)
break;
default:
printk("unknown stateid type %x\n", s->sc_type);
- /* Fallthrough */
+ fallthrough;
case NFS4_CLOSED_STID:
case NFS4_CLOSED_DELEG_STID:
status = nfserr_bad_stateid;
@@ -6742,7 +6744,7 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
case NFS4_READW_LT:
if (nfsd4_has_session(cstate))
fl_flags |= FL_SLEEP;
- /* Fallthrough */
+ fallthrough;
case NFS4_READ_LT:
spin_lock(&fp->fi_lock);
nf = find_readable_file_locked(fp);
@@ -6754,7 +6756,7 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
case NFS4_WRITEW_LT:
if (nfsd4_has_session(cstate))
fl_flags |= FL_SLEEP;
- /* Fallthrough */
+ fallthrough;
case NFS4_WRITE_LT:
spin_lock(&fp->fi_lock);
nf = find_writeable_file_locked(fp);
@@ -6816,7 +6818,7 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
break;
case FILE_LOCK_DEFERRED:
nbl = NULL;
- /* Fallthrough */
+ fallthrough;
case -EAGAIN: /* conflock holds conflicting lock */
status = nfserr_denied;
dprintk("NFSD: nfsd4_lock: conflicting lock found!\n");
diff --git a/fs/nfsd/nfsfh.c b/fs/nfsd/nfsfh.c
index 37bc8f5f4514..c81dbbad8792 100644
--- a/fs/nfsd/nfsfh.c
+++ b/fs/nfsd/nfsfh.c
@@ -459,7 +459,7 @@ static bool fsid_type_ok_for_exp(u8 fsid_type, struct svc_export *exp)
case FSID_DEV:
if (!old_valid_dev(exp_sb(exp)->s_dev))
return false;
- /* FALL THROUGH */
+ fallthrough;
case FSID_MAJOR_MINOR:
case FSID_ENCODE_DEV:
return exp_sb(exp)->s_type->fs_flags & FS_REQUIRES_DEV;
@@ -469,7 +469,7 @@ static bool fsid_type_ok_for_exp(u8 fsid_type, struct svc_export *exp)
case FSID_UUID16:
if (!is_root_export(exp))
return false;
- /* fall through */
+ fallthrough;
case FSID_UUID4_INUM:
case FSID_UUID16_INUM:
return exp->ex_uuid != NULL;
diff --git a/fs/nfsd/nfsproc.c b/fs/nfsd/nfsproc.c
index 543bbe0a556e..6e0b066480c5 100644
--- a/fs/nfsd/nfsproc.c
+++ b/fs/nfsd/nfsproc.c
@@ -314,7 +314,7 @@ nfsd_proc_create(struct svc_rqst *rqstp)
rdev = inode->i_rdev;
attr->ia_valid |= ATTR_SIZE;
- /* FALLTHROUGH */
+ fallthrough;
case S_IFIFO:
/* this is probably a permission check..
* at least IRIX implements perm checking on
diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c
index b603dfcdd361..f7f6473578af 100644
--- a/fs/nfsd/nfssvc.c
+++ b/fs/nfsd/nfssvc.c
@@ -221,7 +221,7 @@ int nfsd_vers(struct nfsd_net *nn, int vers, enum vers_op change)
case NFSD_TEST:
if (nn->nfsd_versions)
return nn->nfsd_versions[vers];
- /* Fallthrough */
+ fallthrough;
case NFSD_AVAIL:
return nfsd_support_version(vers);
}
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index 7d2933b85b65..aba5af9df328 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -1456,7 +1456,7 @@ do_nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp,
*created = true;
break;
}
- /* fall through */
+ fallthrough;
case NFS4_CREATE_EXCLUSIVE4_1:
if ( d_inode(dchild)->i_mtime.tv_sec == v_mtime
&& d_inode(dchild)->i_atime.tv_sec == v_atime
@@ -1465,7 +1465,7 @@ do_nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp,
*created = true;
goto set_attr;
}
- /* fall through */
+ fallthrough;
case NFS3_CREATE_GUARDED:
err = nfserr_exist;
}
diff --git a/fs/nilfs2/bmap.c b/fs/nilfs2/bmap.c
index fb5a9a8a13cf..e516ae389ca5 100644
--- a/fs/nilfs2/bmap.c
+++ b/fs/nilfs2/bmap.c
@@ -519,7 +519,7 @@ int nilfs_bmap_read(struct nilfs_bmap *bmap, struct nilfs_inode *raw_inode)
break;
case NILFS_IFILE_INO:
lockdep_set_class(&bmap->b_sem, &nilfs_bmap_mdt_lock_key);
- /* Fall through */
+ fallthrough;
default:
bmap->b_ptr_type = NILFS_BMAP_PTR_VM;
bmap->b_last_allocated_key = 0;
diff --git a/fs/nilfs2/recovery.c b/fs/nilfs2/recovery.c
index 0b453ef8fae5..2217f904a7cf 100644
--- a/fs/nilfs2/recovery.c
+++ b/fs/nilfs2/recovery.c
@@ -626,7 +626,7 @@ static int nilfs_do_roll_forward(struct the_nilfs *nilfs,
!(flags & NILFS_SS_SYNDT))
goto try_next_pseg;
state = RF_DSYNC_ST;
- /* Fall through */
+ fallthrough;
case RF_DSYNC_ST:
if (!(flags & NILFS_SS_SYNDT))
goto confused;
diff --git a/fs/nilfs2/segment.c b/fs/nilfs2/segment.c
index a651e821c2de..e3726aca28ed 100644
--- a/fs/nilfs2/segment.c
+++ b/fs/nilfs2/segment.c
@@ -1138,7 +1138,8 @@ static int nilfs_segctor_collect_blocks(struct nilfs_sc_info *sci, int mode)
nilfs_sc_cstage_set(sci, NILFS_ST_DAT);
goto dat_stage;
}
- nilfs_sc_cstage_inc(sci); /* Fall through */
+ nilfs_sc_cstage_inc(sci);
+ fallthrough;
case NILFS_ST_GC:
if (nilfs_doing_gc()) {
head = &sci->sc_gc_inodes;
@@ -1159,7 +1160,8 @@ static int nilfs_segctor_collect_blocks(struct nilfs_sc_info *sci, int mode)
}
sci->sc_stage.gc_inode_ptr = NULL;
}
- nilfs_sc_cstage_inc(sci); /* Fall through */
+ nilfs_sc_cstage_inc(sci);
+ fallthrough;
case NILFS_ST_FILE:
head = &sci->sc_dirty_files;
ii = list_prepare_entry(sci->sc_stage.dirty_file_ptr, head,
@@ -1186,7 +1188,7 @@ static int nilfs_segctor_collect_blocks(struct nilfs_sc_info *sci, int mode)
}
nilfs_sc_cstage_inc(sci);
sci->sc_stage.flags |= NILFS_CF_IFILE_STARTED;
- /* Fall through */
+ fallthrough;
case NILFS_ST_IFILE:
err = nilfs_segctor_scan_file(sci, sci->sc_root->ifile,
&nilfs_sc_file_ops);
@@ -1197,13 +1199,14 @@ static int nilfs_segctor_collect_blocks(struct nilfs_sc_info *sci, int mode)
err = nilfs_segctor_create_checkpoint(sci);
if (unlikely(err))
break;
- /* Fall through */
+ fallthrough;
case NILFS_ST_CPFILE:
err = nilfs_segctor_scan_file(sci, nilfs->ns_cpfile,
&nilfs_sc_file_ops);
if (unlikely(err))
break;
- nilfs_sc_cstage_inc(sci); /* Fall through */
+ nilfs_sc_cstage_inc(sci);
+ fallthrough;
case NILFS_ST_SUFILE:
err = nilfs_sufile_freev(nilfs->ns_sufile, sci->sc_freesegs,
sci->sc_nfreesegs, &ndone);
@@ -1219,7 +1222,8 @@ static int nilfs_segctor_collect_blocks(struct nilfs_sc_info *sci, int mode)
&nilfs_sc_file_ops);
if (unlikely(err))
break;
- nilfs_sc_cstage_inc(sci); /* Fall through */
+ nilfs_sc_cstage_inc(sci);
+ fallthrough;
case NILFS_ST_DAT:
dat_stage:
err = nilfs_segctor_scan_file(sci, nilfs->ns_dat,
@@ -1230,7 +1234,8 @@ static int nilfs_segctor_collect_blocks(struct nilfs_sc_info *sci, int mode)
nilfs_sc_cstage_set(sci, NILFS_ST_DONE);
return 0;
}
- nilfs_sc_cstage_inc(sci); /* Fall through */
+ nilfs_sc_cstage_inc(sci);
+ fallthrough;
case NILFS_ST_SR:
if (mode == SC_LSEG_SR) {
/* Appending a super root */
diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c
index 559de311deca..3e01d8f2ab90 100644
--- a/fs/notify/fanotify/fanotify_user.c
+++ b/fs/notify/fanotify/fanotify_user.c
@@ -1147,7 +1147,7 @@ static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask,
}
switch (flags & (FAN_MARK_ADD | FAN_MARK_REMOVE | FAN_MARK_FLUSH)) {
- case FAN_MARK_ADD: /* fallthrough */
+ case FAN_MARK_ADD:
case FAN_MARK_REMOVE:
if (!mask)
return -EINVAL;
diff --git a/fs/ocfs2/cluster/quorum.c b/fs/ocfs2/cluster/quorum.c
index 1ef24574f481..cea739be77c4 100644
--- a/fs/ocfs2/cluster/quorum.c
+++ b/fs/ocfs2/cluster/quorum.c
@@ -67,7 +67,7 @@ static void o2quo_fence_self(void)
default:
WARN_ON(o2nm_single_cluster->cl_fence_method >=
O2NM_FENCE_METHODS);
- /* fall through */
+ fallthrough;
case O2NM_FENCE_RESET:
printk(KERN_ERR "*** ocfs2 is very sorry to be fencing this "
"system by restarting ***\n");
diff --git a/fs/pipe.c b/fs/pipe.c
index f5d74ba1bf8c..0ac197658a2d 100644
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -106,25 +106,6 @@ void pipe_double_lock(struct pipe_inode_info *pipe1,
}
}
-/* Drop the inode semaphore and wait for a pipe event, atomically */
-void pipe_wait(struct pipe_inode_info *pipe)
-{
- DEFINE_WAIT(rdwait);
- DEFINE_WAIT(wrwait);
-
- /*
- * Pipes are system-local resources, so sleeping on them
- * is considered a noninteractive wait:
- */
- prepare_to_wait(&pipe->rd_wait, &rdwait, TASK_INTERRUPTIBLE);
- prepare_to_wait(&pipe->wr_wait, &wrwait, TASK_INTERRUPTIBLE);
- pipe_unlock(pipe);
- schedule();
- finish_wait(&pipe->rd_wait, &rdwait);
- finish_wait(&pipe->wr_wait, &wrwait);
- pipe_lock(pipe);
-}
-
static void anon_pipe_buf_release(struct pipe_inode_info *pipe,
struct pipe_buffer *buf)
{
@@ -1034,12 +1015,52 @@ SYSCALL_DEFINE1(pipe, int __user *, fildes)
return do_pipe2(fildes, 0);
}
+/*
+ * This is the stupid "wait for pipe to be readable or writable"
+ * model.
+ *
+ * See pipe_read/write() for the proper kind of exclusive wait,
+ * but that requires that we wake up any other readers/writers
+ * if we then do not end up reading everything (ie the whole
+ * "wake_next_reader/writer" logic in pipe_read/write()).
+ */
+void pipe_wait_readable(struct pipe_inode_info *pipe)
+{
+ pipe_unlock(pipe);
+ wait_event_interruptible(pipe->rd_wait, pipe_readable(pipe));
+ pipe_lock(pipe);
+}
+
+void pipe_wait_writable(struct pipe_inode_info *pipe)
+{
+ pipe_unlock(pipe);
+ wait_event_interruptible(pipe->wr_wait, pipe_writable(pipe));
+ pipe_lock(pipe);
+}
+
+/*
+ * This depends on both the wait (here) and the wakeup (wake_up_partner)
+ * holding the pipe lock, so "*cnt" is stable and we know a wakeup cannot
+ * race with the count check and waitqueue prep.
+ *
+ * Normally in order to avoid races, you'd do the prepare_to_wait() first,
+ * then check the condition you're waiting for, and only then sleep. But
+ * because of the pipe lock, we can check the condition before being on
+ * the wait queue.
+ *
+ * We use the 'rd_wait' waitqueue for pipe partner waiting.
+ */
static int wait_for_partner(struct pipe_inode_info *pipe, unsigned int *cnt)
{
+ DEFINE_WAIT(rdwait);
int cur = *cnt;
while (cur == *cnt) {
- pipe_wait(pipe);
+ prepare_to_wait(&pipe->rd_wait, &rdwait, TASK_INTERRUPTIBLE);
+ pipe_unlock(pipe);
+ schedule();
+ finish_wait(&pipe->rd_wait, &rdwait);
+ pipe_lock(pipe);
if (signal_pending(current))
break;
}
@@ -1049,7 +1070,6 @@ static int wait_for_partner(struct pipe_inode_info *pipe, unsigned int *cnt)
static void wake_up_partner(struct pipe_inode_info *pipe)
{
wake_up_interruptible_all(&pipe->rd_wait);
- wake_up_interruptible_all(&pipe->wr_wait);
}
static int fifo_open(struct inode *inode, struct file *filp)
diff --git a/fs/pstore/zone.c b/fs/pstore/zone.c
index 819428dfa32f..3ce89216670c 100644
--- a/fs/pstore/zone.c
+++ b/fs/pstore/zone.c
@@ -1081,7 +1081,6 @@ next_zone:
readop = psz_ftrace_read;
break;
case PSTORE_TYPE_CONSOLE:
- fallthrough;
case PSTORE_TYPE_PMSG:
readop = psz_record_read;
break;
diff --git a/fs/quota/quota.c b/fs/quota/quota.c
index 5444d3c4d93f..47f9e151988b 100644
--- a/fs/quota/quota.c
+++ b/fs/quota/quota.c
@@ -38,7 +38,7 @@ static int check_quotactl_permission(struct super_block *sb, int type, int cmd,
if ((type == USRQUOTA && uid_eq(current_euid(), make_kuid(current_user_ns(), id))) ||
(type == GRPQUOTA && in_egroup_p(make_kgid(current_user_ns(), id))))
break;
- /*FALLTHROUGH*/
+ fallthrough;
default:
if (!capable(CAP_SYS_ADMIN))
return -EPERM;
diff --git a/fs/read_write.c b/fs/read_write.c
index 5db58b8c78d0..d3428189f36b 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -538,6 +538,14 @@ ssize_t __kernel_write(struct file *file, const void *buf, size_t count, loff_t
inc_syscw(current);
return ret;
}
+/*
+ * This "EXPORT_SYMBOL_GPL()" is more of a "EXPORT_SYMBOL_DONTUSE()",
+ * but autofs is one of the few internal kernel users that actually
+ * wants this _and_ can be built as a module. So we need to export
+ * this symbol for autofs, even though it really isn't appropriate
+ * for any other kernel modules.
+ */
+EXPORT_SYMBOL_GPL(__kernel_write);
ssize_t kernel_write(struct file *file, const void *buf, size_t count,
loff_t *pos)
diff --git a/fs/romfs/storage.c b/fs/romfs/storage.c
index 6b2b4362089e..b57b3ffcbc32 100644
--- a/fs/romfs/storage.c
+++ b/fs/romfs/storage.c
@@ -217,10 +217,8 @@ int romfs_dev_read(struct super_block *sb, unsigned long pos,
size_t limit;
limit = romfs_maxsize(sb);
- if (pos >= limit)
+ if (pos >= limit || buflen > limit - pos)
return -EIO;
- if (buflen > limit - pos)
- buflen = limit - pos;
#ifdef CONFIG_ROMFS_ON_MTD
if (sb->s_mtd)
diff --git a/fs/seq_file.c b/fs/seq_file.c
index 4e6239f33c06..31219c1db17d 100644
--- a/fs/seq_file.c
+++ b/fs/seq_file.c
@@ -295,7 +295,7 @@ loff_t seq_lseek(struct file *file, loff_t offset, int whence)
switch (whence) {
case SEEK_CUR:
offset += file->f_pos;
- /* fall through */
+ fallthrough;
case SEEK_SET:
if (offset < 0)
break;
diff --git a/fs/signalfd.c b/fs/signalfd.c
index 5b78719be445..456046e15873 100644
--- a/fs/signalfd.c
+++ b/fs/signalfd.c
@@ -176,7 +176,7 @@ static ssize_t signalfd_dequeue(struct signalfd_ctx *ctx, kernel_siginfo_t *info
if (!nonblock)
break;
ret = -EAGAIN;
- /* fall through */
+ fallthrough;
default:
spin_unlock_irq(&current->sighand->siglock);
return ret;
diff --git a/fs/splice.c b/fs/splice.c
index d7c8a7c4db07..ce75aec52274 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -526,6 +526,22 @@ static int splice_from_pipe_feed(struct pipe_inode_info *pipe, struct splice_des
return 1;
}
+/* We know we have a pipe buffer, but maybe it's empty? */
+static inline bool eat_empty_buffer(struct pipe_inode_info *pipe)
+{
+ unsigned int tail = pipe->tail;
+ unsigned int mask = pipe->ring_size - 1;
+ struct pipe_buffer *buf = &pipe->bufs[tail & mask];
+
+ if (unlikely(!buf->len)) {
+ pipe_buf_release(pipe, buf);
+ pipe->tail = tail+1;
+ return true;
+ }
+
+ return false;
+}
+
/**
* splice_from_pipe_next - wait for some data to splice from
* @pipe: pipe to splice from
@@ -545,6 +561,7 @@ static int splice_from_pipe_next(struct pipe_inode_info *pipe, struct splice_des
if (signal_pending(current))
return -ERESTARTSYS;
+repeat:
while (pipe_empty(pipe->head, pipe->tail)) {
if (!pipe->writers)
return 0;
@@ -563,9 +580,12 @@ static int splice_from_pipe_next(struct pipe_inode_info *pipe, struct splice_des
sd->need_wakeup = false;
}
- pipe_wait(pipe);
+ pipe_wait_readable(pipe);
}
+ if (eat_empty_buffer(pipe))
+ goto repeat;
+
return 1;
}
@@ -1077,7 +1097,7 @@ static int wait_for_space(struct pipe_inode_info *pipe, unsigned flags)
return -EAGAIN;
if (signal_pending(current))
return -ERESTARTSYS;
- pipe_wait(pipe);
+ pipe_wait_writable(pipe);
}
}
@@ -1454,7 +1474,7 @@ static int ipipe_prep(struct pipe_inode_info *pipe, unsigned int flags)
ret = -EAGAIN;
break;
}
- pipe_wait(pipe);
+ pipe_wait_readable(pipe);
}
pipe_unlock(pipe);
@@ -1493,7 +1513,7 @@ static int opipe_prep(struct pipe_inode_info *pipe, unsigned int flags)
ret = -ERESTARTSYS;
break;
}
- pipe_wait(pipe);
+ pipe_wait_writable(pipe);
}
pipe_unlock(pipe);
diff --git a/fs/squashfs/block.c b/fs/squashfs/block.c
index 76bb1c846845..8a19773b5a0b 100644
--- a/fs/squashfs/block.c
+++ b/fs/squashfs/block.c
@@ -87,7 +87,11 @@ static int squashfs_bio_read(struct super_block *sb, u64 index, int length,
int error, i;
struct bio *bio;
- bio = bio_alloc(GFP_NOIO, page_count);
+ if (page_count <= BIO_MAX_PAGES)
+ bio = bio_alloc(GFP_NOIO, page_count);
+ else
+ bio = bio_kmalloc(GFP_NOIO, page_count);
+
if (!bio)
return -ENOMEM;
diff --git a/fs/ubifs/lprops.c b/fs/ubifs/lprops.c
index 22bfda158f7f..6d6cd85c2b4c 100644
--- a/fs/ubifs/lprops.c
+++ b/fs/ubifs/lprops.c
@@ -269,7 +269,7 @@ void ubifs_add_to_cat(struct ubifs_info *c, struct ubifs_lprops *lprops,
break;
/* No more room on heap so make it un-categorized */
cat = LPROPS_UNCAT;
- /* Fall through */
+ fallthrough;
case LPROPS_UNCAT:
list_add(&lprops->list, &c->uncat_list);
break;
@@ -313,7 +313,7 @@ static void ubifs_remove_from_cat(struct ubifs_info *c,
case LPROPS_FREEABLE:
c->freeable_cnt -= 1;
ubifs_assert(c, c->freeable_cnt >= 0);
- /* Fall through */
+ fallthrough;
case LPROPS_UNCAT:
case LPROPS_EMPTY:
case LPROPS_FRDI_IDX:
diff --git a/fs/udf/symlink.c b/fs/udf/symlink.c
index 6023c97c6da2..25ff91c7e94a 100644
--- a/fs/udf/symlink.c
+++ b/fs/udf/symlink.c
@@ -52,7 +52,7 @@ static int udf_pc_to_char(struct super_block *sb, unsigned char *from,
elen += pc->lengthComponentIdent;
break;
}
- /* Fall through */
+ fallthrough;
case 2:
if (tolen == 0)
return -ENAMETOOLONG;
diff --git a/fs/ufs/util.h b/fs/ufs/util.h
index e1f1b2e868a7..4931bec1a01c 100644
--- a/fs/ufs/util.h
+++ b/fs/ufs/util.h
@@ -42,7 +42,7 @@ ufs_get_fs_state(struct super_block *sb, struct ufs_super_block_first *usb1,
case UFS_ST_SUNOS:
if (fs32_to_cpu(sb, usb3->fs_postblformat) == UFS_42POSTBLFMT)
return fs32_to_cpu(sb, usb1->fs_u0.fs_sun.fs_state);
- /* Fall Through - to UFS_ST_SUN */
+ fallthrough; /* to UFS_ST_SUN */
case UFS_ST_SUN:
return fs32_to_cpu(sb, usb3->fs_un2.fs_sun.fs_state);
case UFS_ST_SUNx86:
@@ -63,7 +63,7 @@ ufs_set_fs_state(struct super_block *sb, struct ufs_super_block_first *usb1,
usb1->fs_u0.fs_sun.fs_state = cpu_to_fs32(sb, value);
break;
}
- /* Fall Through - to UFS_ST_SUN */
+ fallthrough; /* to UFS_ST_SUN */
case UFS_ST_SUN:
usb3->fs_un2.fs_sun.fs_state = cpu_to_fs32(sb, value);
break;
@@ -197,7 +197,7 @@ ufs_get_inode_uid(struct super_block *sb, struct ufs_inode *inode)
case UFS_UID_EFT:
if (inode->ui_u1.oldids.ui_suid == 0xFFFF)
return fs32_to_cpu(sb, inode->ui_u3.ui_sun.ui_uid);
- /* Fall through */
+ fallthrough;
default:
return fs16_to_cpu(sb, inode->ui_u1.oldids.ui_suid);
}
@@ -215,7 +215,7 @@ ufs_set_inode_uid(struct super_block *sb, struct ufs_inode *inode, u32 value)
inode->ui_u3.ui_sun.ui_uid = cpu_to_fs32(sb, value);
if (value > 0xFFFF)
value = 0xFFFF;
- /* Fall through */
+ fallthrough;
default:
inode->ui_u1.oldids.ui_suid = cpu_to_fs16(sb, value);
break;
@@ -231,7 +231,7 @@ ufs_get_inode_gid(struct super_block *sb, struct ufs_inode *inode)
case UFS_UID_EFT:
if (inode->ui_u1.oldids.ui_sgid == 0xFFFF)
return fs32_to_cpu(sb, inode->ui_u3.ui_sun.ui_gid);
- /* Fall through */
+ fallthrough;
default:
return fs16_to_cpu(sb, inode->ui_u1.oldids.ui_sgid);
}
@@ -249,7 +249,7 @@ ufs_set_inode_gid(struct super_block *sb, struct ufs_inode *inode, u32 value)
inode->ui_u3.ui_sun.ui_gid = cpu_to_fs32(sb, value);
if (value > 0xFFFF)
value = 0xFFFF;
- /* Fall through */
+ fallthrough;
default:
inode->ui_u1.oldids.ui_sgid = cpu_to_fs16(sb, value);
break;
diff --git a/fs/vboxsf/utils.c b/fs/vboxsf/utils.c
index 96bd160da48b..018057546067 100644
--- a/fs/vboxsf/utils.c
+++ b/fs/vboxsf/utils.c
@@ -226,7 +226,7 @@ int vboxsf_getattr(const struct path *path, struct kstat *kstat,
break;
case AT_STATX_FORCE_SYNC:
sf_i->force_restat = 1;
- /* fall-through */
+ fallthrough;
default:
err = vboxsf_inode_revalidate(dentry);
}
diff --git a/fs/xfs/libxfs/xfs_attr_leaf.c b/fs/xfs/libxfs/xfs_attr_leaf.c
index 8623c815164a..305d4bc07337 100644
--- a/fs/xfs/libxfs/xfs_attr_leaf.c
+++ b/fs/xfs/libxfs/xfs_attr_leaf.c
@@ -653,8 +653,8 @@ xfs_attr_shortform_create(
ASSERT(ifp->if_flags & XFS_IFINLINE);
}
xfs_idata_realloc(dp, sizeof(*hdr), XFS_ATTR_FORK);
- hdr = (xfs_attr_sf_hdr_t *)ifp->if_u1.if_data;
- hdr->count = 0;
+ hdr = (struct xfs_attr_sf_hdr *)ifp->if_u1.if_data;
+ memset(hdr, 0, sizeof(*hdr));
hdr->totsize = cpu_to_be16(sizeof(*hdr));
xfs_trans_log_inode(args->trans, dp, XFS_ILOG_CORE | XFS_ILOG_ADATA);
}
@@ -1036,8 +1036,10 @@ xfs_attr_shortform_verify(
* struct xfs_attr_sf_entry has a variable length.
* Check the fixed-offset parts of the structure are
* within the data buffer.
+ * xfs_attr_sf_entry is defined with a 1-byte variable
+ * array at the end, so we must subtract that off.
*/
- if (((char *)sfep + sizeof(*sfep)) >= endp)
+ if (((char *)sfep + sizeof(*sfep) - 1) >= endp)
return __this_address;
/* Don't allow names with known bad length. */
diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c
index 9c40d5971035..1b0a01b06a05 100644
--- a/fs/xfs/libxfs/xfs_bmap.c
+++ b/fs/xfs/libxfs/xfs_bmap.c
@@ -6226,7 +6226,7 @@ xfs_bmap_validate_extent(
isrt = XFS_IS_REALTIME_INODE(ip);
endfsb = irec->br_startblock + irec->br_blockcount - 1;
- if (isrt) {
+ if (isrt && whichfork == XFS_DATA_FORK) {
if (!xfs_verify_rtbno(mp, irec->br_startblock))
return __this_address;
if (!xfs_verify_rtbno(mp, endfsb))
diff --git a/fs/xfs/libxfs/xfs_ialloc.c b/fs/xfs/libxfs/xfs_ialloc.c
index f742a96a2fe1..a6b37db55169 100644
--- a/fs/xfs/libxfs/xfs_ialloc.c
+++ b/fs/xfs/libxfs/xfs_ialloc.c
@@ -688,7 +688,7 @@ xfs_ialloc_ag_alloc(
args.minalignslop = igeo->cluster_align - 1;
/* Allow space for the inode btree to split. */
- args.minleft = igeo->inobt_maxlevels - 1;
+ args.minleft = igeo->inobt_maxlevels;
if ((error = xfs_alloc_vextent(&args)))
return error;
@@ -736,7 +736,7 @@ xfs_ialloc_ag_alloc(
/*
* Allow space for the inode btree to split.
*/
- args.minleft = igeo->inobt_maxlevels - 1;
+ args.minleft = igeo->inobt_maxlevels;
if ((error = xfs_alloc_vextent(&args)))
return error;
}
diff --git a/fs/xfs/libxfs/xfs_trans_inode.c b/fs/xfs/libxfs/xfs_trans_inode.c
index e15129647e00..b7e222befb08 100644
--- a/fs/xfs/libxfs/xfs_trans_inode.c
+++ b/fs/xfs/libxfs/xfs_trans_inode.c
@@ -110,9 +110,9 @@ xfs_trans_log_inode(
* to log the timestamps, or will clear already cleared fields in the
* worst case.
*/
- if (inode->i_state & (I_DIRTY_TIME | I_DIRTY_TIME_EXPIRED)) {
+ if (inode->i_state & I_DIRTY_TIME) {
spin_lock(&inode->i_lock);
- inode->i_state &= ~(I_DIRTY_TIME | I_DIRTY_TIME_EXPIRED);
+ inode->i_state &= ~I_DIRTY_TIME;
spin_unlock(&inode->i_lock);
}
diff --git a/fs/xfs/libxfs/xfs_trans_space.h b/fs/xfs/libxfs/xfs_trans_space.h
index c6df01a2a158..7ad3659c5d2a 100644
--- a/fs/xfs/libxfs/xfs_trans_space.h
+++ b/fs/xfs/libxfs/xfs_trans_space.h
@@ -58,7 +58,7 @@
#define XFS_IALLOC_SPACE_RES(mp) \
(M_IGEO(mp)->ialloc_blks + \
((xfs_sb_version_hasfinobt(&mp->m_sb) ? 2 : 1) * \
- (M_IGEO(mp)->inobt_maxlevels - 1)))
+ M_IGEO(mp)->inobt_maxlevels))
/*
* Space reservation values for various transactions.
diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c
index 73cafc843cd7..5123f82f2477 100644
--- a/fs/xfs/xfs_bmap_util.c
+++ b/fs/xfs/xfs_bmap_util.c
@@ -1165,7 +1165,7 @@ xfs_insert_file_space(
goto out_trans_cancel;
do {
- error = xfs_trans_roll_inode(&tp, ip);
+ error = xfs_defer_finish(&tp);
if (error)
goto out_trans_cancel;
diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
index c31cd3be9fb2..a29f78a663ca 100644
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -1223,6 +1223,14 @@ __xfs_filemap_fault(
return ret;
}
+static inline bool
+xfs_is_write_fault(
+ struct vm_fault *vmf)
+{
+ return (vmf->flags & FAULT_FLAG_WRITE) &&
+ (vmf->vma->vm_flags & VM_SHARED);
+}
+
static vm_fault_t
xfs_filemap_fault(
struct vm_fault *vmf)
@@ -1230,7 +1238,7 @@ xfs_filemap_fault(
/* DAX can shortcut the normal fault path on write faults! */
return __xfs_filemap_fault(vmf, PE_SIZE_PTE,
IS_DAX(file_inode(vmf->vma->vm_file)) &&
- (vmf->flags & FAULT_FLAG_WRITE));
+ xfs_is_write_fault(vmf));
}
static vm_fault_t
@@ -1243,7 +1251,7 @@ xfs_filemap_huge_fault(
/* DAX can shortcut the normal fault path on write faults! */
return __xfs_filemap_fault(vmf, pe_size,
- (vmf->flags & FAULT_FLAG_WRITE));
+ xfs_is_write_fault(vmf));
}
static vm_fault_t