aboutsummaryrefslogtreecommitdiff
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/9p/cache.c6
-rw-r--r--fs/9p/cache.h12
-rw-r--r--fs/9p/vfs_dentry.c19
-rw-r--r--fs/9p/vfs_file.c2
-rw-r--r--fs/9p/vfs_inode.c6
-rw-r--r--fs/9p/vfs_inode_dotl.c4
-rw-r--r--fs/adfs/adfs.h9
-rw-r--r--fs/adfs/super.c3
-rw-r--r--fs/affs/Changes2
-rw-r--r--fs/afs/cell.c2
-rw-r--r--fs/afs/inode.c2
-rw-r--r--fs/afs/vlocation.c3
-rw-r--r--fs/afs/volume.c2
-rw-r--r--fs/aio.c63
-rw-r--r--fs/anon_inodes.c114
-rw-r--r--fs/attr.c25
-rw-r--r--fs/autofs4/autofs_i.h3
-rw-r--r--fs/autofs4/dev-ioctl.c6
-rw-r--r--fs/autofs4/inode.c13
-rw-r--r--fs/befs/linuxvfs.c61
-rw-r--r--fs/binfmt_aout.c13
-rw-r--r--fs/binfmt_elf.c127
-rw-r--r--fs/binfmt_elf_fdpic.c152
-rw-r--r--fs/binfmt_em86.c2
-rw-r--r--fs/bio.c48
-rw-r--r--fs/btrfs/Kconfig18
-rw-r--r--fs/btrfs/Makefile4
-rw-r--r--fs/btrfs/acl.c2
-rw-r--r--fs/btrfs/async-thread.c3
-rw-r--r--fs/btrfs/backref.c8
-rw-r--r--fs/btrfs/btrfs_inode.h20
-rw-r--r--fs/btrfs/check-integrity.c75
-rw-r--r--fs/btrfs/check-integrity.h2
-rw-r--r--fs/btrfs/compat.h7
-rw-r--r--fs/btrfs/compression.c3
-rw-r--r--fs/btrfs/ctree.c75
-rw-r--r--fs/btrfs/ctree.h47
-rw-r--r--fs/btrfs/delayed-inode.c19
-rw-r--r--fs/btrfs/dev-replace.c28
-rw-r--r--fs/btrfs/dir-item.c8
-rw-r--r--fs/btrfs/disk-io.c269
-rw-r--r--fs/btrfs/disk-io.h4
-rw-r--r--fs/btrfs/export.c1
-rw-r--r--fs/btrfs/extent-tree.c174
-rw-r--r--fs/btrfs/extent_io.c158
-rw-r--r--fs/btrfs/extent_io.h8
-rw-r--r--fs/btrfs/extent_map.h8
-rw-r--r--fs/btrfs/file-item.c7
-rw-r--r--fs/btrfs/file.c163
-rw-r--r--fs/btrfs/free-space-cache.c21
-rw-r--r--fs/btrfs/free-space-cache.h4
-rw-r--r--fs/btrfs/inode-item.c2
-rw-r--r--fs/btrfs/inode-map.c13
-rw-r--r--fs/btrfs/inode.c213
-rw-r--r--fs/btrfs/ioctl.c85
-rw-r--r--fs/btrfs/ordered-data.c55
-rw-r--r--fs/btrfs/ordered-data.h6
-rw-r--r--fs/btrfs/print-tree.c2
-rw-r--r--fs/btrfs/raid56.c1
-rw-r--r--fs/btrfs/relocation.c94
-rw-r--r--fs/btrfs/scrub.c85
-rw-r--r--fs/btrfs/send.c193
-rw-r--r--fs/btrfs/super.c31
-rw-r--r--fs/btrfs/tests/btrfs-tests.c74
-rw-r--r--fs/btrfs/tests/btrfs-tests.h25
-rw-r--r--fs/btrfs/tests/extent-buffer-tests.c229
-rw-r--r--fs/btrfs/tests/extent-io-tests.c276
-rw-r--r--fs/btrfs/tests/inode-tests.c955
-rw-r--r--fs/btrfs/transaction.c85
-rw-r--r--fs/btrfs/transaction.h2
-rw-r--r--fs/btrfs/tree-defrag.c5
-rw-r--r--fs/btrfs/tree-log.c153
-rw-r--r--fs/btrfs/uuid-tree.c6
-rw-r--r--fs/btrfs/volumes.c28
-rw-r--r--fs/btrfs/volumes.h24
-rw-r--r--fs/cachefiles/interface.c6
-rw-r--r--fs/cachefiles/namei.c4
-rw-r--r--fs/ceph/addr.c2
-rw-r--r--fs/ceph/cache.c7
-rw-r--r--fs/ceph/caps.c27
-rw-r--r--fs/ceph/dir.c11
-rw-r--r--fs/ceph/inode.c49
-rw-r--r--fs/ceph/mds_client.c61
-rw-r--r--fs/ceph/mds_client.h1
-rw-r--r--fs/ceph/super.h8
-rw-r--r--fs/char_dev.c9
-rw-r--r--fs/cifs/cifs_fs_sb.h1
-rw-r--r--fs/cifs/cifsencrypt.c40
-rw-r--r--fs/cifs/cifsfs.c2
-rw-r--r--fs/cifs/cifsfs.h2
-rw-r--r--fs/cifs/cifsglob.h38
-rw-r--r--fs/cifs/cifspdu.h42
-rw-r--r--fs/cifs/cifsproto.h2
-rw-r--r--fs/cifs/cifssmb.c62
-rw-r--r--fs/cifs/connect.c35
-rw-r--r--fs/cifs/dir.c2
-rw-r--r--fs/cifs/file.c22
-rw-r--r--fs/cifs/fscache.c8
-rw-r--r--fs/cifs/inode.c23
-rw-r--r--fs/cifs/ioctl.c170
-rw-r--r--fs/cifs/link.c7
-rw-r--r--fs/cifs/misc.c22
-rw-r--r--fs/cifs/netmisc.c2
-rw-r--r--fs/cifs/readdir.c40
-rw-r--r--fs/cifs/smb1ops.c33
-rw-r--r--fs/cifs/smb2inode.c16
-rw-r--r--fs/cifs/smb2maperror.c2
-rw-r--r--fs/cifs/smb2ops.c269
-rw-r--r--fs/cifs/smb2pdu.c219
-rw-r--r--fs/cifs/smb2pdu.h73
-rw-r--r--fs/cifs/smb2proto.h7
-rw-r--r--fs/cifs/smb2transport.c12
-rw-r--r--fs/cifs/smbfsctl.h2
-rw-r--r--fs/cifs/transport.c13
-rw-r--r--fs/coda/coda_linux.h2
-rw-r--r--fs/coda/dir.c6
-rw-r--r--fs/coda/file.c6
-rw-r--r--fs/coda/inode.c2
-rw-r--r--fs/compat_ioctl.c4
-rw-r--r--fs/configfs/dir.c28
-rw-r--r--fs/coredump.c71
-rw-r--r--fs/cramfs/Kconfig5
-rw-r--r--fs/dcache.c443
-rw-r--r--fs/debugfs/inode.c3
-rw-r--r--fs/devpts/inode.c1
-rw-r--r--fs/dlm/lockspace.c4
-rw-r--r--fs/dlm/netlink.c10
-rw-r--r--fs/ecryptfs/crypto.c2
-rw-r--r--fs/ecryptfs/dentry.c29
-rw-r--r--fs/ecryptfs/ecryptfs_kernel.h19
-rw-r--r--fs/ecryptfs/file.c16
-rw-r--r--fs/ecryptfs/inode.c29
-rw-r--r--fs/ecryptfs/main.c3
-rw-r--r--fs/efivarfs/super.c11
-rw-r--r--fs/eventpoll.c150
-rw-r--r--fs/exec.c47
-rw-r--r--fs/exportfs/expfs.c267
-rw-r--r--fs/ext2/inode.c2
-rw-r--r--fs/ext2/xip.c1
-rw-r--r--fs/ext3/super.c4
-rw-r--r--fs/ext4/balloc.c13
-rw-r--r--fs/ext4/ext4.h19
-rw-r--r--fs/ext4/extents.c35
-rw-r--r--fs/ext4/ialloc.c2
-rw-r--r--fs/ext4/inline.c3
-rw-r--r--fs/ext4/inode.c54
-rw-r--r--fs/ext4/ioctl.c4
-rw-r--r--fs/ext4/mballoc.c4
-rw-r--r--fs/ext4/mmp.c2
-rw-r--r--fs/ext4/move_extent.c40
-rw-r--r--fs/ext4/page-io.c5
-rw-r--r--fs/ext4/super.c159
-rw-r--r--fs/ext4/xattr.c1
-rw-r--r--fs/f2fs/Kconfig8
-rw-r--r--fs/f2fs/acl.c36
-rw-r--r--fs/f2fs/acl.h9
-rw-r--r--fs/f2fs/checkpoint.c75
-rw-r--r--fs/f2fs/data.c29
-rw-r--r--fs/f2fs/dir.c4
-rw-r--r--fs/f2fs/f2fs.h117
-rw-r--r--fs/f2fs/file.c45
-rw-r--r--fs/f2fs/gc.c31
-rw-r--r--fs/f2fs/inode.c62
-rw-r--r--fs/f2fs/namei.c52
-rw-r--r--fs/f2fs/node.c142
-rw-r--r--fs/f2fs/recovery.c45
-rw-r--r--fs/f2fs/segment.c133
-rw-r--r--fs/f2fs/segment.h38
-rw-r--r--fs/f2fs/super.c143
-rw-r--r--fs/f2fs/xattr.c36
-rw-r--r--fs/fat/fat.h1
-rw-r--r--fs/fat/inode.c19
-rw-r--r--fs/fcntl.c5
-rw-r--r--fs/file_table.c129
-rw-r--r--fs/fs-writeback.c34
-rw-r--r--fs/fs_struct.c2
-rw-r--r--fs/fscache/cookie.c193
-rw-r--r--fs/fscache/fsdef.c1
-rw-r--r--fs/fscache/netfs.c1
-rw-r--r--fs/fscache/object.c9
-rw-r--r--fs/fscache/page.c59
-rw-r--r--fs/fuse/cuse.c7
-rw-r--r--fs/fuse/dir.c40
-rw-r--r--fs/fuse/file.c361
-rw-r--r--fs/fuse/fuse_i.h6
-rw-r--r--fs/fuse/inode.c4
-rw-r--r--fs/gfs2/aops.c4
-rw-r--r--fs/gfs2/bmap.c7
-rw-r--r--fs/gfs2/file.c10
-rw-r--r--fs/gfs2/glock.c84
-rw-r--r--fs/gfs2/glock.h2
-rw-r--r--fs/gfs2/glops.c4
-rw-r--r--fs/gfs2/incore.h41
-rw-r--r--fs/gfs2/inode.c58
-rw-r--r--fs/gfs2/lock_dlm.c8
-rw-r--r--fs/gfs2/main.c19
-rw-r--r--fs/gfs2/ops_fstype.c2
-rw-r--r--fs/gfs2/quota.c344
-rw-r--r--fs/gfs2/quota.h9
-rw-r--r--fs/gfs2/rgrp.c212
-rw-r--r--fs/gfs2/rgrp.h4
-rw-r--r--fs/gfs2/super.c2
-rw-r--r--fs/gfs2/sys.c2
-rw-r--r--fs/gfs2/util.c20
-rw-r--r--fs/gfs2/util.h2
-rw-r--r--fs/gfs2/xattr.c3
-rw-r--r--fs/hfs/btree.h5
-rw-r--r--fs/hfsplus/btree.c112
-rw-r--r--fs/hfsplus/hfsplus_fs.h10
-rw-r--r--fs/hfsplus/hfsplus_raw.h11
-rw-r--r--fs/hfsplus/super.c2
-rw-r--r--fs/hfsplus/wrapper.c17
-rw-r--r--fs/hfsplus/xattr.c210
-rw-r--r--fs/hostfs/hostfs_kern.c11
-rw-r--r--fs/hpfs/hpfs_fn.h1
-rw-r--r--fs/hpfs/namei.c2
-rw-r--r--fs/hpfs/super.c28
-rw-r--r--fs/inode.c62
-rw-r--r--fs/internal.h7
-rw-r--r--fs/ioctl.c4
-rw-r--r--fs/isofs/inode.c12
-rw-r--r--fs/jbd/transaction.c8
-rw-r--r--fs/jffs2/fs.c4
-rw-r--r--fs/libfs.c134
-rw-r--r--fs/locks.c70
-rw-r--r--fs/logfs/dev_bdev.c13
-rw-r--r--fs/minix/Kconfig2
-rw-r--r--fs/mount.h20
-rw-r--r--fs/namei.c322
-rw-r--r--fs/namespace.c390
-rw-r--r--fs/ncpfs/dir.c55
-rw-r--r--fs/ncpfs/file.c12
-rw-r--r--fs/ncpfs/inode.c19
-rw-r--r--fs/ncpfs/ncp_fs_sb.h2
-rw-r--r--fs/nfs/Kconfig17
-rw-r--r--fs/nfs/blocklayout/blocklayout.h1
-rw-r--r--fs/nfs/blocklayout/extents.c2
-rw-r--r--fs/nfs/callback.c3
-rw-r--r--fs/nfs/client.c10
-rw-r--r--fs/nfs/dir.c129
-rw-r--r--fs/nfs/direct.c17
-rw-r--r--fs/nfs/dns_resolve.c2
-rw-r--r--fs/nfs/file.c117
-rw-r--r--fs/nfs/fscache.c202
-rw-r--r--fs/nfs/fscache.h18
-rw-r--r--fs/nfs/inode.c18
-rw-r--r--fs/nfs/internal.h23
-rw-r--r--fs/nfs/namespace.c5
-rw-r--r--fs/nfs/nfs3proc.c8
-rw-r--r--fs/nfs/nfs4_fs.h25
-rw-r--r--fs/nfs/nfs4client.c138
-rw-r--r--fs/nfs/nfs4file.c6
-rw-r--r--fs/nfs/nfs4namespace.c125
-rw-r--r--fs/nfs/nfs4proc.c503
-rw-r--r--fs/nfs/nfs4state.c273
-rw-r--r--fs/nfs/nfs4super.c12
-rw-r--r--fs/nfs/nfs4xdr.c138
-rw-r--r--fs/nfs/proc.c8
-rw-r--r--fs/nfs/super.c200
-rw-r--r--fs/nfs/unlink.c12
-rw-r--r--fs/nfs/write.c6
-rw-r--r--fs/nfsd/Kconfig2
-rw-r--r--fs/nfsd/export.c24
-rw-r--r--fs/nfsd/nfs4recover.c12
-rw-r--r--fs/nfsd/nfs4state.c43
-rw-r--r--fs/nfsd/nfs4xdr.c135
-rw-r--r--fs/nfsd/nfsfh.c36
-rw-r--r--fs/nfsd/nfsfh.h4
-rw-r--r--fs/nfsd/vfs.c194
-rw-r--r--fs/ntfs/inode.c2
-rw-r--r--fs/ocfs2/alloc.c2
-rw-r--r--fs/ocfs2/aops.c32
-rw-r--r--fs/ocfs2/buffer_head_io.c4
-rw-r--r--fs/ocfs2/cluster/heartbeat.c40
-rw-r--r--fs/ocfs2/cluster/masklog.h3
-rw-r--r--fs/ocfs2/dir.c12
-rw-r--r--fs/ocfs2/dlm/dlmmaster.c8
-rw-r--r--fs/ocfs2/dlm/dlmrecovery.c7
-rw-r--r--fs/ocfs2/dlmglue.c4
-rw-r--r--fs/ocfs2/file.c5
-rw-r--r--fs/ocfs2/inode.c10
-rw-r--r--fs/ocfs2/journal.h3
-rw-r--r--fs/ocfs2/move_extents.c11
-rw-r--r--fs/ocfs2/namei.c2
-rw-r--r--fs/ocfs2/refcounttree.c20
-rw-r--r--fs/ocfs2/resize.c12
-rw-r--r--fs/ocfs2/stackglue.c8
-rw-r--r--fs/ocfs2/suballoc.c4
-rw-r--r--fs/ocfs2/super.c4
-rw-r--r--fs/ocfs2/xattr.c28
-rw-r--r--fs/open.c32
-rw-r--r--fs/pipe.c39
-rw-r--r--fs/pnode.c13
-rw-r--r--fs/proc/Kconfig4
-rw-r--r--fs/proc/array.c2
-rw-r--r--fs/proc/base.c14
-rw-r--r--fs/proc/consoles.c10
-rw-r--r--fs/proc/generic.c18
-rw-r--r--fs/proc/inode.c16
-rw-r--r--fs/proc/kcore.c3
-rw-r--r--fs/proc/meminfo.c7
-rw-r--r--fs/proc/namespaces.c8
-rw-r--r--fs/proc/nommu.c12
-rw-r--r--fs/proc/proc_devtree.c3
-rw-r--r--fs/proc/self.c10
-rw-r--r--fs/proc/task_mmu.c53
-rw-r--r--fs/proc/task_nommu.c19
-rw-r--r--fs/proc_namespace.c8
-rw-r--r--fs/qnx4/namei.c4
-rw-r--r--fs/quota/netlink.c16
-rw-r--r--fs/quota/quota.c1
-rw-r--r--fs/read_write.c25
-rw-r--r--fs/readdir.c2
-rw-r--r--fs/select.c4
-rw-r--r--fs/seq_file.c18
-rw-r--r--fs/splice.c6
-rw-r--r--fs/squashfs/Kconfig72
-rw-r--r--fs/squashfs/Makefile5
-rw-r--r--fs/squashfs/block.c36
-rw-r--r--fs/squashfs/cache.c28
-rw-r--r--fs/squashfs/decompressor.c59
-rw-r--r--fs/squashfs/decompressor.h24
-rw-r--r--fs/squashfs/decompressor_multi.c198
-rw-r--r--fs/squashfs/decompressor_multi_percpu.c97
-rw-r--r--fs/squashfs/decompressor_single.c85
-rw-r--r--fs/squashfs/file.c142
-rw-r--r--fs/squashfs/file_cache.c38
-rw-r--r--fs/squashfs/file_direct.c176
-rw-r--r--fs/squashfs/lzo_wrapper.c47
-rw-r--r--fs/squashfs/page_actor.c100
-rw-r--r--fs/squashfs/page_actor.h81
-rw-r--r--fs/squashfs/squashfs.h20
-rw-r--r--fs/squashfs/squashfs_fs_sb.h4
-rw-r--r--fs/squashfs/super.c10
-rw-r--r--fs/squashfs/xz_wrapper.c105
-rw-r--r--fs/squashfs/zlib_wrapper.c64
-rw-r--r--fs/stat.c31
-rw-r--r--fs/super.c201
-rw-r--r--fs/sync.c17
-rw-r--r--fs/sysfs/Makefile3
-rw-r--r--fs/sysfs/bin.c502
-rw-r--r--fs/sysfs/dir.c350
-rw-r--r--fs/sysfs/file.c890
-rw-r--r--fs/sysfs/group.c33
-rw-r--r--fs/sysfs/inode.c30
-rw-r--r--fs/sysfs/symlink.c50
-rw-r--r--fs/sysfs/sysfs.h71
-rw-r--r--fs/ubifs/debug.c6
-rw-r--r--fs/ubifs/dir.c41
-rw-r--r--fs/ubifs/gc.c3
-rw-r--r--fs/ubifs/journal.c6
-rw-r--r--fs/ubifs/super.c8
-rw-r--r--fs/ubifs/xattr.c16
-rw-r--r--fs/udf/super.c45
-rw-r--r--fs/utimes.c9
-rw-r--r--fs/xfs/Makefile8
-rw-r--r--fs/xfs/kmem.c22
-rw-r--r--fs/xfs/kmem.h21
-rw-r--r--fs/xfs/xfs_acl.c8
-rw-r--r--fs/xfs/xfs_ag.h4
-rw-r--r--fs/xfs/xfs_alloc.c19
-rw-r--r--fs/xfs/xfs_alloc.h3
-rw-r--r--fs/xfs/xfs_alloc_btree.c14
-rw-r--r--fs/xfs/xfs_alloc_btree.h35
-rw-r--r--fs/xfs/xfs_aops.c16
-rw-r--r--fs/xfs/xfs_attr.c12
-rw-r--r--fs/xfs/xfs_attr_inactive.c21
-rw-r--r--fs/xfs/xfs_attr_leaf.c29
-rw-r--r--fs/xfs/xfs_attr_leaf.h232
-rw-r--r--fs/xfs/xfs_attr_list.c32
-rw-r--r--fs/xfs/xfs_attr_remote.c14
-rw-r--r--fs/xfs/xfs_attr_remote.h29
-rw-r--r--fs/xfs/xfs_bit.c4
-rw-r--r--fs/xfs/xfs_bmap.c60
-rw-r--r--fs/xfs/xfs_bmap_btree.c13
-rw-r--r--fs/xfs/xfs_bmap_btree.h105
-rw-r--r--fs/xfs/xfs_bmap_util.c293
-rw-r--r--fs/xfs/xfs_bmap_util.h9
-rw-r--r--fs/xfs/xfs_btree.c12
-rw-r--r--fs/xfs/xfs_btree.h79
-rw-r--r--fs/xfs/xfs_buf.c11
-rw-r--r--fs/xfs/xfs_buf_item.c9
-rw-r--r--fs/xfs/xfs_buf_item.h4
-rw-r--r--fs/xfs/xfs_da_btree.c264
-rw-r--r--fs/xfs/xfs_da_btree.h143
-rw-r--r--fs/xfs/xfs_da_format.c907
-rw-r--r--fs/xfs/xfs_da_format.h (renamed from fs/xfs/xfs_dir2_format.h)681
-rw-r--r--fs/xfs/xfs_dir2.c20
-rw-r--r--fs/xfs/xfs_dir2.h106
-rw-r--r--fs/xfs/xfs_dir2_block.c109
-rw-r--r--fs/xfs/xfs_dir2_data.c161
-rw-r--r--fs/xfs/xfs_dir2_leaf.c243
-rw-r--r--fs/xfs/xfs_dir2_node.c351
-rw-r--r--fs/xfs/xfs_dir2_priv.h20
-rw-r--r--fs/xfs/xfs_dir2_readdir.c42
-rw-r--r--fs/xfs/xfs_dir2_sf.c216
-rw-r--r--fs/xfs/xfs_discard.c11
-rw-r--r--fs/xfs/xfs_dquot.c133
-rw-r--r--fs/xfs/xfs_dquot.h2
-rw-r--r--fs/xfs/xfs_dquot_buf.c288
-rw-r--r--fs/xfs/xfs_dquot_item.c14
-rw-r--r--fs/xfs/xfs_error.c11
-rw-r--r--fs/xfs/xfs_export.c12
-rw-r--r--fs/xfs/xfs_extent_busy.c11
-rw-r--r--fs/xfs/xfs_extent_busy.h4
-rw-r--r--fs/xfs/xfs_extfree_item.c8
-rw-r--r--fs/xfs/xfs_file.c92
-rw-r--r--fs/xfs/xfs_filestream.c12
-rw-r--r--fs/xfs/xfs_format.h263
-rw-r--r--fs/xfs/xfs_fs.h4
-rw-r--r--fs/xfs/xfs_fsops.c45
-rw-r--r--fs/xfs/xfs_ialloc.c20
-rw-r--r--fs/xfs/xfs_ialloc.h5
-rw-r--r--fs/xfs/xfs_ialloc_btree.c13
-rw-r--r--fs/xfs/xfs_ialloc_btree.h51
-rw-r--r--fs/xfs/xfs_icache.c20
-rw-r--r--fs/xfs/xfs_icreate_item.c7
-rw-r--r--fs/xfs/xfs_inode.c338
-rw-r--r--fs/xfs/xfs_inode.h6
-rw-r--r--fs/xfs/xfs_inode_buf.c10
-rw-r--r--fs/xfs/xfs_inode_buf.h3
-rw-r--r--fs/xfs/xfs_inode_fork.c40
-rw-r--r--fs/xfs/xfs_inode_fork.h1
-rw-r--r--fs/xfs/xfs_inode_item.c12
-rw-r--r--fs/xfs/xfs_ioctl.c146
-rw-r--r--fs/xfs/xfs_ioctl32.c7
-rw-r--r--fs/xfs/xfs_iomap.c23
-rw-r--r--fs/xfs/xfs_iomap.h8
-rw-r--r--fs/xfs/xfs_iops.c70
-rw-r--r--fs/xfs/xfs_iops.h8
-rw-r--r--fs/xfs/xfs_itable.c15
-rw-r--r--fs/xfs/xfs_log.c75
-rw-r--r--fs/xfs/xfs_log.h10
-rw-r--r--fs/xfs/xfs_log_cil.c26
-rw-r--r--fs/xfs/xfs_log_format.h177
-rw-r--r--fs/xfs/xfs_log_priv.h17
-rw-r--r--fs/xfs/xfs_log_recover.c171
-rw-r--r--fs/xfs/xfs_log_rlimit.c9
-rw-r--r--fs/xfs/xfs_message.c5
-rw-r--r--fs/xfs/xfs_mount.c36
-rw-r--r--fs/xfs/xfs_mount.h5
-rw-r--r--fs/xfs/xfs_qm.c39
-rw-r--r--fs/xfs/xfs_qm.h2
-rw-r--r--fs/xfs/xfs_qm_bhv.c12
-rw-r--r--fs/xfs/xfs_qm_syscalls.c28
-rw-r--r--fs/xfs/xfs_quota.h4
-rw-r--r--fs/xfs/xfs_quota_defs.h4
-rw-r--r--fs/xfs/xfs_quotaops.c5
-rw-r--r--fs/xfs/xfs_rtalloc.c1552
-rw-r--r--fs/xfs/xfs_rtalloc.h24
-rw-r--r--fs/xfs/xfs_rtbitmap.c974
-rw-r--r--fs/xfs/xfs_sb.c46
-rw-r--r--fs/xfs/xfs_sb.h3
-rw-r--r--fs/xfs/xfs_shared.h244
-rw-r--r--fs/xfs/xfs_super.c40
-rw-r--r--fs/xfs/xfs_symlink.c102
-rw-r--r--fs/xfs/xfs_symlink.h2
-rw-r--r--fs/xfs/xfs_symlink_remote.c6
-rw-r--r--fs/xfs/xfs_trace.c16
-rw-r--r--fs/xfs/xfs_trace.h84
-rw-r--r--fs/xfs/xfs_trans.c23
-rw-r--r--fs/xfs/xfs_trans.h20
-rw-r--r--fs/xfs/xfs_trans_ail.c10
-rw-r--r--fs/xfs/xfs_trans_buf.c12
-rw-r--r--fs/xfs/xfs_trans_dquot.c15
-rw-r--r--fs/xfs/xfs_trans_extfree.c7
-rw-r--r--fs/xfs/xfs_trans_inode.c21
-rw-r--r--fs/xfs/xfs_trans_priv.h1
-rw-r--r--fs/xfs/xfs_trans_resv.c21
-rw-r--r--fs/xfs/xfs_vnode.h8
-rw-r--r--fs/xfs/xfs_xattr.c8
471 files changed, 17060 insertions, 11229 deletions
diff --git a/fs/9p/cache.c b/fs/9p/cache.c
index a9ea73d6dcf3..2b7a032c37bc 100644
--- a/fs/9p/cache.c
+++ b/fs/9p/cache.c
@@ -90,7 +90,7 @@ void v9fs_cache_session_get_cookie(struct v9fs_session_info *v9ses)
v9ses->fscache = fscache_acquire_cookie(v9fs_cache_netfs.primary_index,
&v9fs_cache_session_index_def,
- v9ses);
+ v9ses, true);
p9_debug(P9_DEBUG_FSC, "session %p get cookie %p\n",
v9ses, v9ses->fscache);
}
@@ -204,7 +204,7 @@ void v9fs_cache_inode_get_cookie(struct inode *inode)
v9ses = v9fs_inode2v9ses(inode);
v9inode->fscache = fscache_acquire_cookie(v9ses->fscache,
&v9fs_cache_inode_index_def,
- v9inode);
+ v9inode, true);
p9_debug(P9_DEBUG_FSC, "inode %p get cookie %p\n",
inode, v9inode->fscache);
@@ -271,7 +271,7 @@ void v9fs_cache_inode_reset_cookie(struct inode *inode)
v9ses = v9fs_inode2v9ses(inode);
v9inode->fscache = fscache_acquire_cookie(v9ses->fscache,
&v9fs_cache_inode_index_def,
- v9inode);
+ v9inode, true);
p9_debug(P9_DEBUG_FSC, "inode %p revalidating cookie old %p new %p\n",
inode, old, v9inode->fscache);
diff --git a/fs/9p/cache.h b/fs/9p/cache.h
index 40cc54ced5d9..2f9675491095 100644
--- a/fs/9p/cache.h
+++ b/fs/9p/cache.h
@@ -101,6 +101,18 @@ static inline void v9fs_fscache_wait_on_page_write(struct inode *inode,
#else /* CONFIG_9P_FSCACHE */
+static inline void v9fs_cache_inode_get_cookie(struct inode *inode)
+{
+}
+
+static inline void v9fs_cache_inode_put_cookie(struct inode *inode)
+{
+}
+
+static inline void v9fs_cache_inode_set_cookie(struct inode *inode, struct file *file)
+{
+}
+
static inline int v9fs_fscache_release_page(struct page *page,
gfp_t gfp) {
return 1;
diff --git a/fs/9p/vfs_dentry.c b/fs/9p/vfs_dentry.c
index f039b104a98e..b03dd23feda8 100644
--- a/fs/9p/vfs_dentry.c
+++ b/fs/9p/vfs_dentry.c
@@ -43,23 +43,6 @@
#include "fid.h"
/**
- * v9fs_dentry_delete - called when dentry refcount equals 0
- * @dentry: dentry in question
- *
- * By returning 1 here we should remove cacheing of unused
- * dentry components.
- *
- */
-
-static int v9fs_dentry_delete(const struct dentry *dentry)
-{
- p9_debug(P9_DEBUG_VFS, " dentry: %s (%p)\n",
- dentry->d_name.name, dentry);
-
- return 1;
-}
-
-/**
* v9fs_cached_dentry_delete - called when dentry refcount equals 0
* @dentry: dentry in question
*
@@ -134,6 +117,6 @@ const struct dentry_operations v9fs_cached_dentry_operations = {
};
const struct dentry_operations v9fs_dentry_operations = {
- .d_delete = v9fs_dentry_delete,
+ .d_delete = always_delete_dentry,
.d_release = v9fs_dentry_release,
};
diff --git a/fs/9p/vfs_file.c b/fs/9p/vfs_file.c
index aa5ecf479a57..a0df3e73c2b1 100644
--- a/fs/9p/vfs_file.c
+++ b/fs/9p/vfs_file.c
@@ -105,10 +105,8 @@ int v9fs_file_open(struct inode *inode, struct file *file)
v9inode->writeback_fid = (void *) fid;
}
mutex_unlock(&v9inode->v_mutex);
-#ifdef CONFIG_9P_FSCACHE
if (v9ses->cache)
v9fs_cache_inode_set_cookie(inode, file);
-#endif
return 0;
out_error:
p9_client_clunk(file->private_data);
diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c
index 94de6d1482e2..4e65aa903345 100644
--- a/fs/9p/vfs_inode.c
+++ b/fs/9p/vfs_inode.c
@@ -448,9 +448,7 @@ void v9fs_evict_inode(struct inode *inode)
clear_inode(inode);
filemap_fdatawrite(inode->i_mapping);
-#ifdef CONFIG_9P_FSCACHE
v9fs_cache_inode_put_cookie(inode);
-#endif
/* clunk the fid stashed in writeback_fid */
if (v9inode->writeback_fid) {
p9_client_clunk(v9inode->writeback_fid);
@@ -531,9 +529,7 @@ static struct inode *v9fs_qid_iget(struct super_block *sb,
goto error;
v9fs_stat2inode(st, inode, sb);
-#ifdef CONFIG_9P_FSCACHE
v9fs_cache_inode_get_cookie(inode);
-#endif
unlock_new_inode(inode);
return inode;
error:
@@ -905,10 +901,8 @@ v9fs_vfs_atomic_open(struct inode *dir, struct dentry *dentry,
goto error;
file->private_data = fid;
-#ifdef CONFIG_9P_FSCACHE
if (v9ses->cache)
v9fs_cache_inode_set_cookie(dentry->d_inode, file);
-#endif
*opened |= FILE_CREATED;
out:
diff --git a/fs/9p/vfs_inode_dotl.c b/fs/9p/vfs_inode_dotl.c
index a7c481402c46..4c10edec26a0 100644
--- a/fs/9p/vfs_inode_dotl.c
+++ b/fs/9p/vfs_inode_dotl.c
@@ -141,9 +141,7 @@ static struct inode *v9fs_qid_iget_dotl(struct super_block *sb,
goto error;
v9fs_stat2inode_dotl(st, inode);
-#ifdef CONFIG_9P_FSCACHE
v9fs_cache_inode_get_cookie(inode);
-#endif
retval = v9fs_get_acl(inode, fid);
if (retval)
goto error;
@@ -355,10 +353,8 @@ v9fs_vfs_atomic_open_dotl(struct inode *dir, struct dentry *dentry,
if (err)
goto err_clunk_old_fid;
file->private_data = ofid;
-#ifdef CONFIG_9P_FSCACHE
if (v9ses->cache)
v9fs_cache_inode_set_cookie(inode, file);
-#endif
*opened |= FILE_CREATED;
out:
v9fs_put_acl(dacl, pacl);
diff --git a/fs/adfs/adfs.h b/fs/adfs/adfs.h
index 585adafb0cc2..c770337c4b45 100644
--- a/fs/adfs/adfs.h
+++ b/fs/adfs/adfs.h
@@ -43,9 +43,12 @@ struct adfs_dir_ops;
* ADFS file system superblock data in memory
*/
struct adfs_sb_info {
- struct adfs_discmap *s_map; /* bh list containing map */
- struct adfs_dir_ops *s_dir; /* directory operations */
-
+ union { struct {
+ struct adfs_discmap *s_map; /* bh list containing map */
+ struct adfs_dir_ops *s_dir; /* directory operations */
+ };
+ struct rcu_head rcu; /* used only at shutdown time */
+ };
kuid_t s_uid; /* owner uid */
kgid_t s_gid; /* owner gid */
umode_t s_owner_mask; /* ADFS owner perm -> unix perm */
diff --git a/fs/adfs/super.c b/fs/adfs/super.c
index 0ff4bae2c2a2..7b3003cb6f1b 100644
--- a/fs/adfs/super.c
+++ b/fs/adfs/super.c
@@ -123,8 +123,7 @@ static void adfs_put_super(struct super_block *sb)
for (i = 0; i < asb->s_map_size; i++)
brelse(asb->s_map[i].dm_bh);
kfree(asb->s_map);
- kfree(asb);
- sb->s_fs_info = NULL;
+ kfree_rcu(asb, rcu);
}
static int adfs_show_options(struct seq_file *seq, struct dentry *root)
diff --git a/fs/affs/Changes b/fs/affs/Changes
index a29409c1ffe0..b41c2c9792ff 100644
--- a/fs/affs/Changes
+++ b/fs/affs/Changes
@@ -91,7 +91,7 @@ more 2.4 fixes: [Roman Zippel]
Version 3.11
------------
-- Converted to use 2.3.x page cache [Dave Jones <dave@powertweak.com>]
+- Converted to use 2.3.x page cache [Dave Jones]
- Corruption in truncate() bugfix [Ken Tyler <kent@werple.net.au>]
Version 3.10
diff --git a/fs/afs/cell.c b/fs/afs/cell.c
index 3c090b7555ea..ca0a3cf93791 100644
--- a/fs/afs/cell.c
+++ b/fs/afs/cell.c
@@ -179,7 +179,7 @@ struct afs_cell *afs_cell_create(const char *name, unsigned namesz,
/* put it up for caching (this never returns an error) */
cell->cache = fscache_acquire_cookie(afs_cache_netfs.primary_index,
&afs_cell_cache_index_def,
- cell);
+ cell, true);
#endif
/* add to the cell lists */
diff --git a/fs/afs/inode.c b/fs/afs/inode.c
index 789bc253b5f6..ce25d755b7aa 100644
--- a/fs/afs/inode.c
+++ b/fs/afs/inode.c
@@ -259,7 +259,7 @@ struct inode *afs_iget(struct super_block *sb, struct key *key,
#ifdef CONFIG_AFS_FSCACHE
vnode->cache = fscache_acquire_cookie(vnode->volume->cache,
&afs_vnode_cache_index_def,
- vnode);
+ vnode, true);
#endif
ret = afs_inode_map_status(vnode, key);
diff --git a/fs/afs/vlocation.c b/fs/afs/vlocation.c
index 57bcb1596530..b6df2e83809f 100644
--- a/fs/afs/vlocation.c
+++ b/fs/afs/vlocation.c
@@ -308,7 +308,8 @@ static int afs_vlocation_fill_in_record(struct afs_vlocation *vl,
/* see if we have an in-cache copy (will set vl->valid if there is) */
#ifdef CONFIG_AFS_FSCACHE
vl->cache = fscache_acquire_cookie(vl->cell->cache,
- &afs_vlocation_cache_index_def, vl);
+ &afs_vlocation_cache_index_def, vl,
+ true);
#endif
if (vl->valid) {
diff --git a/fs/afs/volume.c b/fs/afs/volume.c
index 401eeb21869f..2b607257820c 100644
--- a/fs/afs/volume.c
+++ b/fs/afs/volume.c
@@ -131,7 +131,7 @@ struct afs_volume *afs_volume_lookup(struct afs_mount_params *params)
#ifdef CONFIG_AFS_FSCACHE
volume->cache = fscache_acquire_cookie(vlocation->cache,
&afs_volume_cache_index_def,
- volume);
+ volume, true);
#endif
afs_get_vlocation(vlocation);
volume->vlocation = vlocation;
diff --git a/fs/aio.c b/fs/aio.c
index a2f92aa23ee3..6efb7f6cb22e 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -36,10 +36,10 @@
#include <linux/eventfd.h>
#include <linux/blkdev.h>
#include <linux/compat.h>
-#include <linux/anon_inodes.h>
#include <linux/migrate.h>
#include <linux/ramfs.h>
#include <linux/percpu-refcount.h>
+#include <linux/mount.h>
#include <asm/kmap_types.h>
#include <asm/uaccess.h>
@@ -153,12 +153,67 @@ unsigned long aio_max_nr = 0x10000; /* system wide maximum number of aio request
static struct kmem_cache *kiocb_cachep;
static struct kmem_cache *kioctx_cachep;
+static struct vfsmount *aio_mnt;
+
+static const struct file_operations aio_ring_fops;
+static const struct address_space_operations aio_ctx_aops;
+
+static struct file *aio_private_file(struct kioctx *ctx, loff_t nr_pages)
+{
+ struct qstr this = QSTR_INIT("[aio]", 5);
+ struct file *file;
+ struct path path;
+ struct inode *inode = alloc_anon_inode(aio_mnt->mnt_sb);
+ if (IS_ERR(inode))
+ return ERR_CAST(inode);
+
+ inode->i_mapping->a_ops = &aio_ctx_aops;
+ inode->i_mapping->private_data = ctx;
+ inode->i_size = PAGE_SIZE * nr_pages;
+
+ path.dentry = d_alloc_pseudo(aio_mnt->mnt_sb, &this);
+ if (!path.dentry) {
+ iput(inode);
+ return ERR_PTR(-ENOMEM);
+ }
+ path.mnt = mntget(aio_mnt);
+
+ d_instantiate(path.dentry, inode);
+ file = alloc_file(&path, FMODE_READ | FMODE_WRITE, &aio_ring_fops);
+ if (IS_ERR(file)) {
+ path_put(&path);
+ return file;
+ }
+
+ file->f_flags = O_RDWR;
+ file->private_data = ctx;
+ return file;
+}
+
+static struct dentry *aio_mount(struct file_system_type *fs_type,
+ int flags, const char *dev_name, void *data)
+{
+ static const struct dentry_operations ops = {
+ .d_dname = simple_dname,
+ };
+ return mount_pseudo(fs_type, "aio:", NULL, &ops, 0xa10a10a1);
+}
+
/* aio_setup
* Creates the slab caches used by the aio routines, panic on
* failure as this is done early during the boot sequence.
*/
static int __init aio_setup(void)
{
+ static struct file_system_type aio_fs = {
+ .name = "aio",
+ .mount = aio_mount,
+ .kill_sb = kill_anon_super,
+ };
+ aio_mnt = kern_mount(&aio_fs);
+ if (IS_ERR(aio_mnt))
+ panic("Failed to create aio fs mount.");
+
kiocb_cachep = KMEM_CACHE(kiocb, SLAB_HWCACHE_ALIGN|SLAB_PANIC);
kioctx_cachep = KMEM_CACHE(kioctx,SLAB_HWCACHE_ALIGN|SLAB_PANIC);
@@ -286,16 +341,12 @@ static int aio_setup_ring(struct kioctx *ctx)
if (nr_pages < 0)
return -EINVAL;
- file = anon_inode_getfile_private("[aio]", &aio_ring_fops, ctx, O_RDWR);
+ file = aio_private_file(ctx, nr_pages);
if (IS_ERR(file)) {
ctx->aio_ring_file = NULL;
return -EAGAIN;
}
- file->f_inode->i_mapping->a_ops = &aio_ctx_aops;
- file->f_inode->i_mapping->private_data = ctx;
- file->f_inode->i_size = PAGE_SIZE * (loff_t)nr_pages;
-
for (i = 0; i < nr_pages; i++) {
struct page *page;
page = find_or_create_page(file->f_inode->i_mapping,
diff --git a/fs/anon_inodes.c b/fs/anon_inodes.c
index 85c961849953..24084732b1d0 100644
--- a/fs/anon_inodes.c
+++ b/fs/anon_inodes.c
@@ -24,7 +24,6 @@
static struct vfsmount *anon_inode_mnt __read_mostly;
static struct inode *anon_inode_inode;
-static const struct file_operations anon_inode_fops;
/*
* anon_inodefs_dname() is called from d_path().
@@ -39,51 +38,6 @@ static const struct dentry_operations anon_inodefs_dentry_operations = {
.d_dname = anon_inodefs_dname,
};
-/*
- * nop .set_page_dirty method so that people can use .page_mkwrite on
- * anon inodes.
- */
-static int anon_set_page_dirty(struct page *page)
-{
- return 0;
-};
-
-static const struct address_space_operations anon_aops = {
- .set_page_dirty = anon_set_page_dirty,
-};
-
-/*
- * A single inode exists for all anon_inode files. Contrary to pipes,
- * anon_inode inodes have no associated per-instance data, so we need
- * only allocate one of them.
- */
-static struct inode *anon_inode_mkinode(struct super_block *s)
-{
- struct inode *inode = new_inode_pseudo(s);
-
- if (!inode)
- return ERR_PTR(-ENOMEM);
-
- inode->i_ino = get_next_ino();
- inode->i_fop = &anon_inode_fops;
-
- inode->i_mapping->a_ops = &anon_aops;
-
- /*
- * Mark the inode dirty from the very beginning,
- * that way it will never be moved to the dirty
- * list because mark_inode_dirty() will think
- * that it already _is_ on the dirty list.
- */
- inode->i_state = I_DIRTY;
- inode->i_mode = S_IRUSR | S_IWUSR;
- inode->i_uid = current_fsuid();
- inode->i_gid = current_fsgid();
- inode->i_flags |= S_PRIVATE;
- inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
- return inode;
-}
-
static struct dentry *anon_inodefs_mount(struct file_system_type *fs_type,
int flags, const char *dev_name, void *data)
{
@@ -92,7 +46,7 @@ static struct dentry *anon_inodefs_mount(struct file_system_type *fs_type,
&anon_inodefs_dentry_operations, ANON_INODE_FS_MAGIC);
if (!IS_ERR(root)) {
struct super_block *s = root->d_sb;
- anon_inode_inode = anon_inode_mkinode(s);
+ anon_inode_inode = alloc_anon_inode(s);
if (IS_ERR(anon_inode_inode)) {
dput(root);
deactivate_locked_super(s);
@@ -109,72 +63,6 @@ static struct file_system_type anon_inode_fs_type = {
};
/**
- * anon_inode_getfile_private - creates a new file instance by hooking it up to an
- * anonymous inode, and a dentry that describe the "class"
- * of the file
- *
- * @name: [in] name of the "class" of the new file
- * @fops: [in] file operations for the new file
- * @priv: [in] private data for the new file (will be file's private_data)
- * @flags: [in] flags
- *
- *
- * Similar to anon_inode_getfile, but each file holds a single inode.
- *
- */
-struct file *anon_inode_getfile_private(const char *name,
- const struct file_operations *fops,
- void *priv, int flags)
-{
- struct qstr this;
- struct path path;
- struct file *file;
- struct inode *inode;
-
- if (fops->owner && !try_module_get(fops->owner))
- return ERR_PTR(-ENOENT);
-
- inode = anon_inode_mkinode(anon_inode_mnt->mnt_sb);
- if (IS_ERR(inode)) {
- file = ERR_PTR(-ENOMEM);
- goto err_module;
- }
-
- /*
- * Link the inode to a directory entry by creating a unique name
- * using the inode sequence number.
- */
- file = ERR_PTR(-ENOMEM);
- this.name = name;
- this.len = strlen(name);
- this.hash = 0;
- path.dentry = d_alloc_pseudo(anon_inode_mnt->mnt_sb, &this);
- if (!path.dentry)
- goto err_module;
-
- path.mnt = mntget(anon_inode_mnt);
-
- d_instantiate(path.dentry, inode);
-
- file = alloc_file(&path, OPEN_FMODE(flags), fops);
- if (IS_ERR(file))
- goto err_dput;
-
- file->f_mapping = inode->i_mapping;
- file->f_flags = flags & (O_ACCMODE | O_NONBLOCK);
- file->private_data = priv;
-
- return file;
-
-err_dput:
- path_put(&path);
-err_module:
- module_put(fops->owner);
- return file;
-}
-EXPORT_SYMBOL_GPL(anon_inode_getfile_private);
-
-/**
* anon_inode_getfile - creates a new file instance by hooking it up to an
* anonymous inode, and a dentry that describe the "class"
* of the file
diff --git a/fs/attr.c b/fs/attr.c
index 1449adb14ef6..267968d94673 100644
--- a/fs/attr.c
+++ b/fs/attr.c
@@ -167,7 +167,27 @@ void setattr_copy(struct inode *inode, const struct iattr *attr)
}
EXPORT_SYMBOL(setattr_copy);
-int notify_change(struct dentry * dentry, struct iattr * attr)
+/**
+ * notify_change - modify attributes of a filesytem object
+ * @dentry: object affected
+ * @iattr: new attributes
+ * @delegated_inode: returns inode, if the inode is delegated
+ *
+ * The caller must hold the i_mutex on the affected object.
+ *
+ * If notify_change discovers a delegation in need of breaking,
+ * it will return -EWOULDBLOCK and return a reference to the inode in
+ * delegated_inode. The caller should then break the delegation and
+ * retry. Because breaking a delegation may take a long time, the
+ * caller should drop the i_mutex before doing so.
+ *
+ * Alternatively, a caller may pass NULL for delegated_inode. This may
+ * be appropriate for callers that expect the underlying filesystem not
+ * to be NFS exported. Also, passing NULL is fine for callers holding
+ * the file open for write, as there can be no conflicting delegation in
+ * that case.
+ */
+int notify_change(struct dentry * dentry, struct iattr * attr, struct inode **delegated_inode)
{
struct inode *inode = dentry->d_inode;
umode_t mode = inode->i_mode;
@@ -243,6 +263,9 @@ int notify_change(struct dentry * dentry, struct iattr * attr)
error = security_inode_setattr(dentry, attr);
if (error)
return error;
+ error = try_break_deleg(inode, delegated_inode);
+ if (error)
+ return error;
if (inode->i_op->setattr)
error = inode->i_op->setattr(dentry, attr);
diff --git a/fs/autofs4/autofs_i.h b/fs/autofs4/autofs_i.h
index 3f1128b37e46..4218e26df916 100644
--- a/fs/autofs4/autofs_i.h
+++ b/fs/autofs4/autofs_i.h
@@ -122,6 +122,7 @@ struct autofs_sb_info {
spinlock_t lookup_lock;
struct list_head active_list;
struct list_head expiring_list;
+ struct rcu_head rcu;
};
static inline struct autofs_sb_info *autofs4_sbi(struct super_block *sb)
@@ -271,7 +272,7 @@ void autofs4_clean_ino(struct autofs_info *);
static inline int autofs_prepare_pipe(struct file *pipe)
{
- if (!pipe->f_op || !pipe->f_op->write)
+ if (!pipe->f_op->write)
return -EINVAL;
if (!S_ISFIFO(file_inode(pipe)->i_mode))
return -EINVAL;
diff --git a/fs/autofs4/dev-ioctl.c b/fs/autofs4/dev-ioctl.c
index 0f00da329e71..1818ce7f5a06 100644
--- a/fs/autofs4/dev-ioctl.c
+++ b/fs/autofs4/dev-ioctl.c
@@ -658,12 +658,6 @@ static int _autofs_dev_ioctl(unsigned int command, struct autofs_dev_ioctl __use
goto out;
}
- if (!fp->f_op) {
- err = -ENOTTY;
- fput(fp);
- goto out;
- }
-
sbi = autofs_dev_ioctl_sbi(fp);
if (!sbi || sbi->magic != AUTOFS_SBI_MAGIC) {
err = -EINVAL;
diff --git a/fs/autofs4/inode.c b/fs/autofs4/inode.c
index b104726e2d0a..3b9cc9b973c2 100644
--- a/fs/autofs4/inode.c
+++ b/fs/autofs4/inode.c
@@ -56,18 +56,13 @@ void autofs4_kill_sb(struct super_block *sb)
* just call kill_anon_super when we are called from
* deactivate_super.
*/
- if (!sbi)
- goto out_kill_sb;
-
- /* Free wait queues, close pipe */
- autofs4_catatonic_mode(sbi);
-
- sb->s_fs_info = NULL;
- kfree(sbi);
+ if (sbi) /* Free wait queues, close pipe */
+ autofs4_catatonic_mode(sbi);
-out_kill_sb:
DPRINTK("shutting down");
kill_litter_super(sb);
+ if (sbi)
+ kfree_rcu(sbi, rcu);
}
static int autofs4_show_options(struct seq_file *m, struct dentry *root)
diff --git a/fs/befs/linuxvfs.c b/fs/befs/linuxvfs.c
index e9c75e20db32..daa15d6ba450 100644
--- a/fs/befs/linuxvfs.c
+++ b/fs/befs/linuxvfs.c
@@ -42,7 +42,7 @@ static void befs_destroy_inode(struct inode *inode);
static int befs_init_inodecache(void);
static void befs_destroy_inodecache(void);
static void *befs_follow_link(struct dentry *, struct nameidata *);
-static void befs_put_link(struct dentry *, struct nameidata *, void *);
+static void *befs_fast_follow_link(struct dentry *, struct nameidata *);
static int befs_utf2nls(struct super_block *sb, const char *in, int in_len,
char **out, int *out_len);
static int befs_nls2utf(struct super_block *sb, const char *in, int in_len,
@@ -79,10 +79,15 @@ static const struct address_space_operations befs_aops = {
.bmap = befs_bmap,
};
+static const struct inode_operations befs_fast_symlink_inode_operations = {
+ .readlink = generic_readlink,
+ .follow_link = befs_fast_follow_link,
+};
+
static const struct inode_operations befs_symlink_inode_operations = {
.readlink = generic_readlink,
.follow_link = befs_follow_link,
- .put_link = befs_put_link,
+ .put_link = kfree_put_link,
};
/*
@@ -411,7 +416,10 @@ static struct inode *befs_iget(struct super_block *sb, unsigned long ino)
inode->i_op = &befs_dir_inode_operations;
inode->i_fop = &befs_dir_operations;
} else if (S_ISLNK(inode->i_mode)) {
- inode->i_op = &befs_symlink_inode_operations;
+ if (befs_ino->i_flags & BEFS_LONG_SYMLINK)
+ inode->i_op = &befs_symlink_inode_operations;
+ else
+ inode->i_op = &befs_fast_symlink_inode_operations;
} else {
befs_error(sb, "Inode %lu is not a regular file, "
"directory or symlink. THAT IS WRONG! BeFS has no "
@@ -477,47 +485,40 @@ befs_destroy_inodecache(void)
static void *
befs_follow_link(struct dentry *dentry, struct nameidata *nd)
{
+ struct super_block *sb = dentry->d_sb;
befs_inode_info *befs_ino = BEFS_I(dentry->d_inode);
+ befs_data_stream *data = &befs_ino->i_data.ds;
+ befs_off_t len = data->size;
char *link;
- if (befs_ino->i_flags & BEFS_LONG_SYMLINK) {
- struct super_block *sb = dentry->d_sb;
- befs_data_stream *data = &befs_ino->i_data.ds;
- befs_off_t len = data->size;
+ if (len == 0) {
+ befs_error(sb, "Long symlink with illegal length");
+ link = ERR_PTR(-EIO);
+ } else {
+ befs_debug(sb, "Follow long symlink");
- if (len == 0) {
- befs_error(sb, "Long symlink with illegal length");
+ link = kmalloc(len, GFP_NOFS);
+ if (!link) {
+ link = ERR_PTR(-ENOMEM);
+ } else if (befs_read_lsymlink(sb, data, link, len) != len) {
+ kfree(link);
+ befs_error(sb, "Failed to read entire long symlink");
link = ERR_PTR(-EIO);
} else {
- befs_debug(sb, "Follow long symlink");
-
- link = kmalloc(len, GFP_NOFS);
- if (!link) {
- link = ERR_PTR(-ENOMEM);
- } else if (befs_read_lsymlink(sb, data, link, len) != len) {
- kfree(link);
- befs_error(sb, "Failed to read entire long symlink");
- link = ERR_PTR(-EIO);
- } else {
- link[len - 1] = '\0';
- }
+ link[len - 1] = '\0';
}
- } else {
- link = befs_ino->i_data.symlink;
}
-
nd_set_link(nd, link);
return NULL;
}
-static void befs_put_link(struct dentry *dentry, struct nameidata *nd, void *p)
+
+static void *
+befs_fast_follow_link(struct dentry *dentry, struct nameidata *nd)
{
befs_inode_info *befs_ino = BEFS_I(dentry->d_inode);
- if (befs_ino->i_flags & BEFS_LONG_SYMLINK) {
- char *link = nd_get_link(nd);
- if (!IS_ERR(link))
- kfree(link);
- }
+ nd_set_link(nd, befs_ino->i_data.symlink);
+ return NULL;
}
/*
diff --git a/fs/binfmt_aout.c b/fs/binfmt_aout.c
index 89dec7f789a4..ca0ba15a7306 100644
--- a/fs/binfmt_aout.c
+++ b/fs/binfmt_aout.c
@@ -45,7 +45,6 @@ static int load_aout_library(struct file*);
*/
static int aout_core_dump(struct coredump_params *cprm)
{
- struct file *file = cprm->file;
mm_segment_t fs;
int has_dumped = 0;
void __user *dump_start;
@@ -85,10 +84,10 @@ static int aout_core_dump(struct coredump_params *cprm)
set_fs(KERNEL_DS);
/* struct user */
- if (!dump_write(file, &dump, sizeof(dump)))
+ if (!dump_emit(cprm, &dump, sizeof(dump)))
goto end_coredump;
/* Now dump all of the user data. Include malloced stuff as well */
- if (!dump_seek(cprm->file, PAGE_SIZE - sizeof(dump)))
+ if (!dump_skip(cprm, PAGE_SIZE - sizeof(dump)))
goto end_coredump;
/* now we start writing out the user space info */
set_fs(USER_DS);
@@ -96,14 +95,14 @@ static int aout_core_dump(struct coredump_params *cprm)
if (dump.u_dsize != 0) {
dump_start = START_DATA(dump);
dump_size = dump.u_dsize << PAGE_SHIFT;
- if (!dump_write(file, dump_start, dump_size))
+ if (!dump_emit(cprm, dump_start, dump_size))
goto end_coredump;
}
/* Now prepare to dump the stack area */
if (dump.u_ssize != 0) {
dump_start = START_STACK(dump);
dump_size = dump.u_ssize << PAGE_SHIFT;
- if (!dump_write(file, dump_start, dump_size))
+ if (!dump_emit(cprm, dump_start, dump_size))
goto end_coredump;
}
end_coredump:
@@ -221,7 +220,7 @@ static int load_aout_binary(struct linux_binprm * bprm)
* Requires a mmap handler. This prevents people from using a.out
* as part of an exploit attack against /proc-related vulnerabilities.
*/
- if (!bprm->file->f_op || !bprm->file->f_op->mmap)
+ if (!bprm->file->f_op->mmap)
return -ENOEXEC;
fd_offset = N_TXTOFF(ex);
@@ -374,7 +373,7 @@ static int load_aout_library(struct file *file)
* Requires a mmap handler. This prevents people from using a.out
* as part of an exploit attack against /proc-related vulnerabilities.
*/
- if (!file->f_op || !file->f_op->mmap)
+ if (!file->f_op->mmap)
goto out;
if (N_FLAGS(ex))
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index 4c94a79991bb..571a42326908 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -406,7 +406,7 @@ static unsigned long load_elf_interp(struct elfhdr *interp_elf_ex,
goto out;
if (!elf_check_arch(interp_elf_ex))
goto out;
- if (!interpreter->f_op || !interpreter->f_op->mmap)
+ if (!interpreter->f_op->mmap)
goto out;
/*
@@ -607,7 +607,7 @@ static int load_elf_binary(struct linux_binprm *bprm)
goto out;
if (!elf_check_arch(&loc->elf_ex))
goto out;
- if (!bprm->file->f_op || !bprm->file->f_op->mmap)
+ if (!bprm->file->f_op->mmap)
goto out;
/* Now read in all of the header information */
@@ -1028,7 +1028,7 @@ static int load_elf_library(struct file *file)
/* First of all, some simple consistency checks */
if (elf_ex.e_type != ET_EXEC || elf_ex.e_phnum > 2 ||
- !elf_check_arch(&elf_ex) || !file->f_op || !file->f_op->mmap)
+ !elf_check_arch(&elf_ex) || !file->f_op->mmap)
goto out;
/* Now read in all of the header information */
@@ -1225,35 +1225,17 @@ static int notesize(struct memelfnote *en)
return sz;
}
-#define DUMP_WRITE(addr, nr, foffset) \
- do { if (!dump_write(file, (addr), (nr))) return 0; *foffset += (nr); } while(0)
-
-static int alignfile(struct file *file, loff_t *foffset)
-{
- static const char buf[4] = { 0, };
- DUMP_WRITE(buf, roundup(*foffset, 4) - *foffset, foffset);
- return 1;
-}
-
-static int writenote(struct memelfnote *men, struct file *file,
- loff_t *foffset)
+static int writenote(struct memelfnote *men, struct coredump_params *cprm)
{
struct elf_note en;
en.n_namesz = strlen(men->name) + 1;
en.n_descsz = men->datasz;
en.n_type = men->type;
- DUMP_WRITE(&en, sizeof(en), foffset);
- DUMP_WRITE(men->name, en.n_namesz, foffset);
- if (!alignfile(file, foffset))
- return 0;
- DUMP_WRITE(men->data, men->datasz, foffset);
- if (!alignfile(file, foffset))
- return 0;
-
- return 1;
+ return dump_emit(cprm, &en, sizeof(en)) &&
+ dump_emit(cprm, men->name, en.n_namesz) && dump_align(cprm, 4) &&
+ dump_emit(cprm, men->data, men->datasz) && dump_align(cprm, 4);
}
-#undef DUMP_WRITE
static void fill_elf_header(struct elfhdr *elf, int segs,
u16 machine, u32 flags)
@@ -1392,7 +1374,7 @@ static void fill_auxv_note(struct memelfnote *note, struct mm_struct *mm)
}
static void fill_siginfo_note(struct memelfnote *note, user_siginfo_t *csigdata,
- siginfo_t *siginfo)
+ const siginfo_t *siginfo)
{
mm_segment_t old_fs = get_fs();
set_fs(KERNEL_DS);
@@ -1599,7 +1581,7 @@ static int fill_thread_core_info(struct elf_thread_core_info *t,
static int fill_note_info(struct elfhdr *elf, int phdrs,
struct elf_note_info *info,
- siginfo_t *siginfo, struct pt_regs *regs)
+ const siginfo_t *siginfo, struct pt_regs *regs)
{
struct task_struct *dump_task = current;
const struct user_regset_view *view = task_user_regset_view(dump_task);
@@ -1702,7 +1684,7 @@ static size_t get_note_info_size(struct elf_note_info *info)
* process-wide notes are interleaved after the first thread-specific note.
*/
static int write_note_info(struct elf_note_info *info,
- struct file *file, loff_t *foffset)
+ struct coredump_params *cprm)
{
bool first = 1;
struct elf_thread_core_info *t = info->thread;
@@ -1710,22 +1692,22 @@ static int write_note_info(struct elf_note_info *info,
do {
int i;
- if (!writenote(&t->notes[0], file, foffset))
+ if (!writenote(&t->notes[0], cprm))
return 0;
- if (first && !writenote(&info->psinfo, file, foffset))
+ if (first && !writenote(&info->psinfo, cprm))
return 0;
- if (first && !writenote(&info->signote, file, foffset))
+ if (first && !writenote(&info->signote, cprm))
return 0;
- if (first && !writenote(&info->auxv, file, foffset))
+ if (first && !writenote(&info->auxv, cprm))
return 0;
if (first && info->files.data &&
- !writenote(&info->files, file, foffset))
+ !writenote(&info->files, cprm))
return 0;
for (i = 1; i < info->thread_notes; ++i)
if (t->notes[i].data &&
- !writenote(&t->notes[i], file, foffset))
+ !writenote(&t->notes[i], cprm))
return 0;
first = 0;
@@ -1848,34 +1830,31 @@ static int elf_note_info_init(struct elf_note_info *info)
static int fill_note_info(struct elfhdr *elf, int phdrs,
struct elf_note_info *info,
- siginfo_t *siginfo, struct pt_regs *regs)
+ const siginfo_t *siginfo, struct pt_regs *regs)
{
struct list_head *t;
+ struct core_thread *ct;
+ struct elf_thread_status *ets;
if (!elf_note_info_init(info))
return 0;
- if (siginfo->si_signo) {
- struct core_thread *ct;
- struct elf_thread_status *ets;
-
- for (ct = current->mm->core_state->dumper.next;
- ct; ct = ct->next) {
- ets = kzalloc(sizeof(*ets), GFP_KERNEL);
- if (!ets)
- return 0;
+ for (ct = current->mm->core_state->dumper.next;
+ ct; ct = ct->next) {
+ ets = kzalloc(sizeof(*ets), GFP_KERNEL);
+ if (!ets)
+ return 0;
- ets->thread = ct->task;
- list_add(&ets->list, &info->thread_list);
- }
+ ets->thread = ct->task;
+ list_add(&ets->list, &info->thread_list);
+ }
- list_for_each(t, &info->thread_list) {
- int sz;
+ list_for_each(t, &info->thread_list) {
+ int sz;
- ets = list_entry(t, struct elf_thread_status, list);
- sz = elf_dump_thread_status(siginfo->si_signo, ets);
- info->thread_status_size += sz;
- }
+ ets = list_entry(t, struct elf_thread_status, list);
+ sz = elf_dump_thread_status(siginfo->si_signo, ets);
+ info->thread_status_size += sz;
}
/* now collect the dump for the current */
memset(info->prstatus, 0, sizeof(*info->prstatus));
@@ -1935,13 +1914,13 @@ static size_t get_note_info_size(struct elf_note_info *info)
}
static int write_note_info(struct elf_note_info *info,
- struct file *file, loff_t *foffset)
+ struct coredump_params *cprm)
{
int i;
struct list_head *t;
for (i = 0; i < info->numnote; i++)
- if (!writenote(info->notes + i, file, foffset))
+ if (!writenote(info->notes + i, cprm))
return 0;
/* write out the thread status notes section */
@@ -1950,7 +1929,7 @@ static int write_note_info(struct elf_note_info *info,
list_entry(t, struct elf_thread_status, list);
for (i = 0; i < tmp->num_notes; i++)
- if (!writenote(&tmp->notes[i], file, foffset))
+ if (!writenote(&tmp->notes[i], cprm))
return 0;
}
@@ -2046,10 +2025,9 @@ static int elf_core_dump(struct coredump_params *cprm)
int has_dumped = 0;
mm_segment_t fs;
int segs;
- size_t size = 0;
struct vm_area_struct *vma, *gate_vma;
struct elfhdr *elf = NULL;
- loff_t offset = 0, dataoff, foffset;
+ loff_t offset = 0, dataoff;
struct elf_note_info info = { };
struct elf_phdr *phdr4note = NULL;
struct elf_shdr *shdr4extnum = NULL;
@@ -2105,7 +2083,6 @@ static int elf_core_dump(struct coredump_params *cprm)
offset += sizeof(*elf); /* Elf header */
offset += segs * sizeof(struct elf_phdr); /* Program headers */
- foffset = offset;
/* Write notes phdr entry */
{
@@ -2136,13 +2113,10 @@ static int elf_core_dump(struct coredump_params *cprm)
offset = dataoff;
- size += sizeof(*elf);
- if (size > cprm->limit || !dump_write(cprm->file, elf, sizeof(*elf)))
+ if (!dump_emit(cprm, elf, sizeof(*elf)))
goto end_coredump;
- size += sizeof(*phdr4note);
- if (size > cprm->limit
- || !dump_write(cprm->file, phdr4note, sizeof(*phdr4note)))
+ if (!dump_emit(cprm, phdr4note, sizeof(*phdr4note)))
goto end_coredump;
/* Write program headers for segments dump */
@@ -2164,24 +2138,22 @@ static int elf_core_dump(struct coredump_params *cprm)
phdr.p_flags |= PF_X;
phdr.p_align = ELF_EXEC_PAGESIZE;
- size += sizeof(phdr);
- if (size > cprm->limit
- || !dump_write(cprm->file, &phdr, sizeof(phdr)))
+ if (!dump_emit(cprm, &phdr, sizeof(phdr)))
goto end_coredump;
}
- if (!elf_core_write_extra_phdrs(cprm->file, offset, &size, cprm->limit))
+ if (!elf_core_write_extra_phdrs(cprm, offset))
goto end_coredump;
/* write out the notes section */
- if (!write_note_info(&info, cprm->file, &foffset))
+ if (!write_note_info(&info, cprm))
goto end_coredump;
- if (elf_coredump_extra_notes_write(cprm->file, &foffset))
+ if (elf_coredump_extra_notes_write(cprm))
goto end_coredump;
/* Align to page */
- if (!dump_seek(cprm->file, dataoff - foffset))
+ if (!dump_skip(cprm, dataoff - cprm->written))
goto end_coredump;
for (vma = first_vma(current, gate_vma); vma != NULL;
@@ -2198,26 +2170,21 @@ static int elf_core_dump(struct coredump_params *cprm)
page = get_dump_page(addr);
if (page) {
void *kaddr = kmap(page);
- stop = ((size += PAGE_SIZE) > cprm->limit) ||
- !dump_write(cprm->file, kaddr,
- PAGE_SIZE);
+ stop = !dump_emit(cprm, kaddr, PAGE_SIZE);
kunmap(page);
page_cache_release(page);
} else
- stop = !dump_seek(cprm->file, PAGE_SIZE);
+ stop = !dump_skip(cprm, PAGE_SIZE);
if (stop)
goto end_coredump;
}
}
- if (!elf_core_write_extra_data(cprm->file, &size, cprm->limit))
+ if (!elf_core_write_extra_data(cprm))
goto end_coredump;
if (e_phnum == PN_XNUM) {
- size += sizeof(*shdr4extnum);
- if (size > cprm->limit
- || !dump_write(cprm->file, shdr4extnum,
- sizeof(*shdr4extnum)))
+ if (!dump_emit(cprm, shdr4extnum, sizeof(*shdr4extnum)))
goto end_coredump;
}
diff --git a/fs/binfmt_elf_fdpic.c b/fs/binfmt_elf_fdpic.c
index c166f325a183..fe2a643ee005 100644
--- a/fs/binfmt_elf_fdpic.c
+++ b/fs/binfmt_elf_fdpic.c
@@ -111,7 +111,7 @@ static int is_elf_fdpic(struct elfhdr *hdr, struct file *file)
return 0;
if (!elf_check_arch(hdr) || !elf_check_fdpic(hdr))
return 0;
- if (!file->f_op || !file->f_op->mmap)
+ if (!file->f_op->mmap)
return 0;
return 1;
}
@@ -1267,35 +1267,17 @@ static int notesize(struct memelfnote *en)
/* #define DEBUG */
-#define DUMP_WRITE(addr, nr, foffset) \
- do { if (!dump_write(file, (addr), (nr))) return 0; *foffset += (nr); } while(0)
-
-static int alignfile(struct file *file, loff_t *foffset)
-{
- static const char buf[4] = { 0, };
- DUMP_WRITE(buf, roundup(*foffset, 4) - *foffset, foffset);
- return 1;
-}
-
-static int writenote(struct memelfnote *men, struct file *file,
- loff_t *foffset)
+static int writenote(struct memelfnote *men, struct coredump_params *cprm)
{
struct elf_note en;
en.n_namesz = strlen(men->name) + 1;
en.n_descsz = men->datasz;
en.n_type = men->type;
- DUMP_WRITE(&en, sizeof(en), foffset);
- DUMP_WRITE(men->name, en.n_namesz, foffset);
- if (!alignfile(file, foffset))
- return 0;
- DUMP_WRITE(men->data, men->datasz, foffset);
- if (!alignfile(file, foffset))
- return 0;
-
- return 1;
+ return dump_emit(cprm, &en, sizeof(en)) &&
+ dump_emit(cprm, men->name, en.n_namesz) && dump_align(cprm, 4) &&
+ dump_emit(cprm, men->data, men->datasz) && dump_align(cprm, 4);
}
-#undef DUMP_WRITE
static inline void fill_elf_fdpic_header(struct elfhdr *elf, int segs)
{
@@ -1500,66 +1482,40 @@ static void fill_extnum_info(struct elfhdr *elf, struct elf_shdr *shdr4extnum,
/*
* dump the segments for an MMU process
*/
-#ifdef CONFIG_MMU
-static int elf_fdpic_dump_segments(struct file *file, size_t *size,
- unsigned long *limit, unsigned long mm_flags)
+static bool elf_fdpic_dump_segments(struct coredump_params *cprm)
{
struct vm_area_struct *vma;
- int err = 0;
for (vma = current->mm->mmap; vma; vma = vma->vm_next) {
unsigned long addr;
- if (!maydump(vma, mm_flags))
+ if (!maydump(vma, cprm->mm_flags))
continue;
+#ifdef CONFIG_MMU
for (addr = vma->vm_start; addr < vma->vm_end;
addr += PAGE_SIZE) {
+ bool res;
struct page *page = get_dump_page(addr);
if (page) {
void *kaddr = kmap(page);
- *size += PAGE_SIZE;
- if (*size > *limit)
- err = -EFBIG;
- else if (!dump_write(file, kaddr, PAGE_SIZE))
- err = -EIO;
+ res = dump_emit(cprm, kaddr, PAGE_SIZE);
kunmap(page);
page_cache_release(page);
- } else if (!dump_seek(file, PAGE_SIZE))
- err = -EFBIG;
- if (err)
- goto out;
+ } else {
+ res = dump_skip(cprm, PAGE_SIZE);
+ }
+ if (!res)
+ return false;
}
- }
-out:
- return err;
-}
-#endif
-
-/*
- * dump the segments for a NOMMU process
- */
-#ifndef CONFIG_MMU
-static int elf_fdpic_dump_segments(struct file *file, size_t *size,
- unsigned long *limit, unsigned long mm_flags)
-{
- struct vm_area_struct *vma;
-
- for (vma = current->mm->mmap; vma; vma = vma->vm_next) {
- if (!maydump(vma, mm_flags))
- continue;
-
- if ((*size += PAGE_SIZE) > *limit)
- return -EFBIG;
-
- if (!dump_write(file, (void *) vma->vm_start,
+#else
+ if (!dump_emit(cprm, (void *) vma->vm_start,
vma->vm_end - vma->vm_start))
- return -EIO;
+ return false;
+#endif
}
-
- return 0;
+ return true;
}
-#endif
static size_t elf_core_vma_data_size(unsigned long mm_flags)
{
@@ -1585,11 +1541,10 @@ static int elf_fdpic_core_dump(struct coredump_params *cprm)
int has_dumped = 0;
mm_segment_t fs;
int segs;
- size_t size = 0;
int i;
struct vm_area_struct *vma;
struct elfhdr *elf = NULL;
- loff_t offset = 0, dataoff, foffset;
+ loff_t offset = 0, dataoff;
int numnote;
struct memelfnote *notes = NULL;
struct elf_prstatus *prstatus = NULL; /* NT_PRSTATUS */
@@ -1606,6 +1561,8 @@ static int elf_fdpic_core_dump(struct coredump_params *cprm)
struct elf_shdr *shdr4extnum = NULL;
Elf_Half e_phnum;
elf_addr_t e_shoff;
+ struct core_thread *ct;
+ struct elf_thread_status *tmp;
/*
* We no longer stop all VM operations.
@@ -1641,28 +1598,23 @@ static int elf_fdpic_core_dump(struct coredump_params *cprm)
goto cleanup;
#endif
- if (cprm->siginfo->si_signo) {
- struct core_thread *ct;
- struct elf_thread_status *tmp;
-
- for (ct = current->mm->core_state->dumper.next;
- ct; ct = ct->next) {
- tmp = kzalloc(sizeof(*tmp), GFP_KERNEL);
- if (!tmp)
- goto cleanup;
+ for (ct = current->mm->core_state->dumper.next;
+ ct; ct = ct->next) {
+ tmp = kzalloc(sizeof(*tmp), GFP_KERNEL);
+ if (!tmp)
+ goto cleanup;
- tmp->thread = ct->task;
- list_add(&tmp->list, &thread_list);
- }
+ tmp->thread = ct->task;
+ list_add(&tmp->list, &thread_list);
+ }
- list_for_each(t, &thread_list) {
- struct elf_thread_status *tmp;
- int sz;
+ list_for_each(t, &thread_list) {
+ struct elf_thread_status *tmp;
+ int sz;
- tmp = list_entry(t, struct elf_thread_status, list);
- sz = elf_dump_thread_status(cprm->siginfo->si_signo, tmp);
- thread_status_size += sz;
- }
+ tmp = list_entry(t, struct elf_thread_status, list);
+ sz = elf_dump_thread_status(cprm->siginfo->si_signo, tmp);
+ thread_status_size += sz;
}
/* now collect the dump for the current */
@@ -1720,7 +1672,6 @@ static int elf_fdpic_core_dump(struct coredump_params *cprm)
offset += sizeof(*elf); /* Elf header */
offset += segs * sizeof(struct elf_phdr); /* Program headers */
- foffset = offset;
/* Write notes phdr entry */
{
@@ -1755,13 +1706,10 @@ static int elf_fdpic_core_dump(struct coredump_params *cprm)
offset = dataoff;
- size += sizeof(*elf);
- if (size > cprm->limit || !dump_write(cprm->file, elf, sizeof(*elf)))
+ if (!dump_emit(cprm, elf, sizeof(*elf)))
goto end_coredump;
- size += sizeof(*phdr4note);
- if (size > cprm->limit
- || !dump_write(cprm->file, phdr4note, sizeof(*phdr4note)))
+ if (!dump_emit(cprm, phdr4note, sizeof(*phdr4note)))
goto end_coredump;
/* write program headers for segments dump */
@@ -1785,18 +1733,16 @@ static int elf_fdpic_core_dump(struct coredump_params *cprm)
phdr.p_flags |= PF_X;
phdr.p_align = ELF_EXEC_PAGESIZE;
- size += sizeof(phdr);
- if (size > cprm->limit
- || !dump_write(cprm->file, &phdr, sizeof(phdr)))
+ if (!dump_emit(cprm, &phdr, sizeof(phdr)))
goto end_coredump;
}
- if (!elf_core_write_extra_phdrs(cprm->file, offset, &size, cprm->limit))
+ if (!elf_core_write_extra_phdrs(cprm, offset))
goto end_coredump;
/* write out the notes section */
for (i = 0; i < numnote; i++)
- if (!writenote(notes + i, cprm->file, &foffset))
+ if (!writenote(notes + i, cprm))
goto end_coredump;
/* write out the thread status notes section */
@@ -1805,25 +1751,21 @@ static int elf_fdpic_core_dump(struct coredump_params *cprm)
list_entry(t, struct elf_thread_status, list);
for (i = 0; i < tmp->num_notes; i++)
- if (!writenote(&tmp->notes[i], cprm->file, &foffset))
+ if (!writenote(&tmp->notes[i], cprm))
goto end_coredump;
}
- if (!dump_seek(cprm->file, dataoff - foffset))
+ if (!dump_skip(cprm, dataoff - cprm->written))
goto end_coredump;
- if (elf_fdpic_dump_segments(cprm->file, &size, &cprm->limit,
- cprm->mm_flags) < 0)
+ if (!elf_fdpic_dump_segments(cprm))
goto end_coredump;
- if (!elf_core_write_extra_data(cprm->file, &size, cprm->limit))
+ if (!elf_core_write_extra_data(cprm))
goto end_coredump;
if (e_phnum == PN_XNUM) {
- size += sizeof(*shdr4extnum);
- if (size > cprm->limit
- || !dump_write(cprm->file, shdr4extnum,
- sizeof(*shdr4extnum)))
+ if (!dump_emit(cprm, shdr4extnum, sizeof(*shdr4extnum)))
goto end_coredump;
}
diff --git a/fs/binfmt_em86.c b/fs/binfmt_em86.c
index 037a3e2b045b..f37b08cea1f7 100644
--- a/fs/binfmt_em86.c
+++ b/fs/binfmt_em86.c
@@ -38,7 +38,7 @@ static int load_em86(struct linux_binprm *bprm)
/* First of all, some simple consistency checks */
if ((elf_ex.e_type != ET_EXEC && elf_ex.e_type != ET_DYN) ||
(!((elf_ex.e_machine == EM_386) || (elf_ex.e_machine == EM_486))) ||
- (!bprm->file->f_op || !bprm->file->f_op->mmap)) {
+ !bprm->file->f_op->mmap) {
return -ENOEXEC;
}
diff --git a/fs/bio.c b/fs/bio.c
index ea5035da4d9a..33d79a4eb92d 100644
--- a/fs/bio.c
+++ b/fs/bio.c
@@ -601,7 +601,7 @@ EXPORT_SYMBOL(bio_get_nr_vecs);
static int __bio_add_page(struct request_queue *q, struct bio *bio, struct page
*page, unsigned int len, unsigned int offset,
- unsigned short max_sectors)
+ unsigned int max_sectors)
{
int retried_segments = 0;
struct bio_vec *bvec;
@@ -1805,6 +1805,52 @@ struct bio_pair *bio_split(struct bio *bi, int first_sectors)
EXPORT_SYMBOL(bio_split);
/**
+ * bio_trim - trim a bio
+ * @bio: bio to trim
+ * @offset: number of sectors to trim from the front of @bio
+ * @size: size we want to trim @bio to, in sectors
+ */
+void bio_trim(struct bio *bio, int offset, int size)
+{
+ /* 'bio' is a cloned bio which we need to trim to match
+ * the given offset and size.
+ * This requires adjusting bi_sector, bi_size, and bi_io_vec
+ */
+ int i;
+ struct bio_vec *bvec;
+ int sofar = 0;
+
+ size <<= 9;
+ if (offset == 0 && size == bio->bi_size)
+ return;
+
+ clear_bit(BIO_SEG_VALID, &bio->bi_flags);
+
+ bio_advance(bio, offset << 9);
+
+ bio->bi_size = size;
+
+ /* avoid any complications with bi_idx being non-zero*/
+ if (bio->bi_idx) {
+ memmove(bio->bi_io_vec, bio->bi_io_vec+bio->bi_idx,
+ (bio->bi_vcnt - bio->bi_idx) * sizeof(struct bio_vec));
+ bio->bi_vcnt -= bio->bi_idx;
+ bio->bi_idx = 0;
+ }
+ /* Make sure vcnt and last bv are not too big */
+ bio_for_each_segment(bvec, bio, i) {
+ if (sofar + bvec->bv_len > size)
+ bvec->bv_len = size - sofar;
+ if (bvec->bv_len == 0) {
+ bio->bi_vcnt = i;
+ break;
+ }
+ sofar += bvec->bv_len;
+ }
+}
+EXPORT_SYMBOL_GPL(bio_trim);
+
+/**
* bio_sector_offset - Find hardware sector offset in bio
* @bio: bio to inspect
* @index: bio_vec index
diff --git a/fs/btrfs/Kconfig b/fs/btrfs/Kconfig
index 398cbd517be2..aa976eced2d2 100644
--- a/fs/btrfs/Kconfig
+++ b/fs/btrfs/Kconfig
@@ -9,12 +9,17 @@ config BTRFS_FS
select XOR_BLOCKS
help
- Btrfs is a new filesystem with extents, writable snapshotting,
- support for multiple devices and many more features.
+ Btrfs is a general purpose copy-on-write filesystem with extents,
+ writable snapshotting, support for multiple devices and many more
+ features focused on fault tolerance, repair and easy administration.
- Btrfs is highly experimental, and THE DISK FORMAT IS NOT YET
- FINALIZED. You should say N here unless you are interested in
- testing Btrfs with non-critical data.
+ The filesystem disk format is no longer unstable, and it's not
+ expected to change unless there are strong reasons to do so. If there
+ is a format change, file systems with a unchanged format will
+ continue to be mountable and usable by newer kernels.
+
+ For more information, please see the web pages at
+ http://btrfs.wiki.kernel.org.
To compile this file system support as a module, choose M here. The
module will be called btrfs.
@@ -59,7 +64,8 @@ config BTRFS_FS_RUN_SANITY_TESTS
help
This will run some basic sanity tests on the free space cache
code to make sure it is acting as it should. These are mostly
- regression tests and are only really interesting to btrfs devlopers.
+ regression tests and are only really interesting to btrfs
+ developers.
If unsure, say N.
diff --git a/fs/btrfs/Makefile b/fs/btrfs/Makefile
index a91a6a355cc5..1a44e42d602a 100644
--- a/fs/btrfs/Makefile
+++ b/fs/btrfs/Makefile
@@ -14,4 +14,6 @@ btrfs-y += super.o ctree.o extent-tree.o print-tree.o root-tree.o dir-item.o \
btrfs-$(CONFIG_BTRFS_FS_POSIX_ACL) += acl.o
btrfs-$(CONFIG_BTRFS_FS_CHECK_INTEGRITY) += check-integrity.o
-btrfs-$(CONFIG_BTRFS_FS_RUN_SANITY_TESTS) += tests/free-space-tests.o
+btrfs-$(CONFIG_BTRFS_FS_RUN_SANITY_TESTS) += tests/free-space-tests.o \
+ tests/extent-buffer-tests.o tests/btrfs-tests.o \
+ tests/extent-io-tests.o tests/inode-tests.o
diff --git a/fs/btrfs/acl.c b/fs/btrfs/acl.c
index e15d2b0d8d3b..0890c83643e9 100644
--- a/fs/btrfs/acl.c
+++ b/fs/btrfs/acl.c
@@ -229,7 +229,7 @@ int btrfs_init_acl(struct btrfs_trans_handle *trans,
if (ret > 0) {
/* we need an acl */
ret = btrfs_set_acl(trans, inode, acl, ACL_TYPE_ACCESS);
- } else {
+ } else if (ret < 0) {
cache_no_acl(inode);
}
} else {
diff --git a/fs/btrfs/async-thread.c b/fs/btrfs/async-thread.c
index 08cc08f037a6..c1e0b0caf9cc 100644
--- a/fs/btrfs/async-thread.c
+++ b/fs/btrfs/async-thread.c
@@ -262,7 +262,7 @@ static struct btrfs_work *get_next_work(struct btrfs_worker_thread *worker,
struct btrfs_work *work = NULL;
struct list_head *cur = NULL;
- if(!list_empty(prio_head))
+ if (!list_empty(prio_head))
cur = prio_head->next;
smp_mb();
@@ -495,6 +495,7 @@ static int __btrfs_start_workers(struct btrfs_workers *workers)
spin_lock_irq(&workers->lock);
if (workers->stopping) {
spin_unlock_irq(&workers->lock);
+ ret = -EINVAL;
goto fail_kthread;
}
list_add_tail(&worker->worker_list, &workers->idle_list);
diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c
index 0552a599b28f..3775947429b2 100644
--- a/fs/btrfs/backref.c
+++ b/fs/btrfs/backref.c
@@ -185,6 +185,9 @@ static int __add_prelim_ref(struct list_head *head, u64 root_id,
{
struct __prelim_ref *ref;
+ if (root_id == BTRFS_DATA_RELOC_TREE_OBJECTID)
+ return 0;
+
ref = kmem_cache_alloc(btrfs_prelim_ref_cache, gfp_mask);
if (!ref)
return -ENOMEM;
@@ -323,8 +326,7 @@ static int __resolve_indirect_ref(struct btrfs_fs_info *fs_info,
eb = path->nodes[level];
while (!eb) {
- if (!level) {
- WARN_ON(1);
+ if (WARN_ON(!level)) {
ret = 1;
goto out;
}
@@ -1619,7 +1621,7 @@ static int iterate_inode_refs(u64 inum, struct btrfs_root *fs_root,
btrfs_set_lock_blocking_rw(eb, BTRFS_READ_LOCK);
btrfs_release_path(path);
- item = btrfs_item_nr(eb, slot);
+ item = btrfs_item_nr(slot);
iref = btrfs_item_ptr(eb, slot, struct btrfs_inode_ref);
for (cur = 0; cur < btrfs_item_size(eb, item); cur += len) {
diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h
index 71f074e1870b..ac0b39db27d1 100644
--- a/fs/btrfs/btrfs_inode.h
+++ b/fs/btrfs/btrfs_inode.h
@@ -19,6 +19,7 @@
#ifndef __BTRFS_I__
#define __BTRFS_I__
+#include <linux/hash.h>
#include "extent_map.h"
#include "extent_io.h"
#include "ordered-data.h"
@@ -179,6 +180,25 @@ static inline struct btrfs_inode *BTRFS_I(struct inode *inode)
return container_of(inode, struct btrfs_inode, vfs_inode);
}
+static inline unsigned long btrfs_inode_hash(u64 objectid,
+ const struct btrfs_root *root)
+{
+ u64 h = objectid ^ (root->objectid * GOLDEN_RATIO_PRIME);
+
+#if BITS_PER_LONG == 32
+ h = (h >> 32) ^ (h & 0xffffffff);
+#endif
+
+ return (unsigned long)h;
+}
+
+static inline void btrfs_insert_inode_hash(struct inode *inode)
+{
+ unsigned long h = btrfs_inode_hash(inode->i_ino, BTRFS_I(inode)->root);
+
+ __insert_inode_hash(inode, h);
+}
+
static inline u64 btrfs_ino(struct inode *inode)
{
u64 ino = BTRFS_I(inode)->location.objectid;
diff --git a/fs/btrfs/check-integrity.c b/fs/btrfs/check-integrity.c
index 1c47be187240..131d82800b3a 100644
--- a/fs/btrfs/check-integrity.c
+++ b/fs/btrfs/check-integrity.c
@@ -77,6 +77,15 @@
* the integrity of (super)-block write requests, do not
* enable the config option BTRFS_FS_CHECK_INTEGRITY to
* include and compile the integrity check tool.
+ *
+ * Expect millions of lines of information in the kernel log with an
+ * enabled check_int_print_mask. Therefore set LOG_BUF_SHIFT in the
+ * kernel config to at least 26 (which is 64MB). Usually the value is
+ * limited to 21 (which is 2MB) in init/Kconfig. The file needs to be
+ * changed like this before LOG_BUF_SHIFT can be set to a high value:
+ * config LOG_BUF_SHIFT
+ * int "Kernel log buffer size (16 => 64KB, 17 => 128KB)"
+ * range 12 30
*/
#include <linux/sched.h>
@@ -124,6 +133,7 @@
#define BTRFSIC_PRINT_MASK_INITIAL_DATABASE 0x00000400
#define BTRFSIC_PRINT_MASK_NUM_COPIES 0x00000800
#define BTRFSIC_PRINT_MASK_TREE_WITH_ALL_MIRRORS 0x00001000
+#define BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH_VERBOSE 0x00002000
struct btrfsic_dev_state;
struct btrfsic_state;
@@ -323,7 +333,6 @@ static void btrfsic_release_block_ctx(struct btrfsic_block_data_ctx *block_ctx);
static int btrfsic_read_block(struct btrfsic_state *state,
struct btrfsic_block_data_ctx *block_ctx);
static void btrfsic_dump_database(struct btrfsic_state *state);
-static void btrfsic_complete_bio_end_io(struct bio *bio, int err);
static int btrfsic_test_for_metadata(struct btrfsic_state *state,
char **datav, unsigned int num_pages);
static void btrfsic_process_written_block(struct btrfsic_dev_state *dev_state,
@@ -1038,7 +1047,7 @@ leaf_item_out_of_bounce_error:
disk_item_offset,
sizeof(struct btrfs_item));
item_offset = btrfs_stack_item_offset(&disk_item);
- item_size = btrfs_stack_item_offset(&disk_item);
+ item_size = btrfs_stack_item_size(&disk_item);
disk_key = &disk_item.key;
type = btrfs_disk_key_type(disk_key);
@@ -1677,7 +1686,6 @@ static int btrfsic_read_block(struct btrfsic_state *state,
for (i = 0; i < num_pages;) {
struct bio *bio;
unsigned int j;
- DECLARE_COMPLETION_ONSTACK(complete);
bio = btrfs_io_bio_alloc(GFP_NOFS, num_pages - i);
if (!bio) {
@@ -1688,8 +1696,6 @@ static int btrfsic_read_block(struct btrfsic_state *state,
}
bio->bi_bdev = block_ctx->dev->bdev;
bio->bi_sector = dev_bytenr >> 9;
- bio->bi_end_io = btrfsic_complete_bio_end_io;
- bio->bi_private = &complete;
for (j = i; j < num_pages; j++) {
ret = bio_add_page(bio, block_ctx->pagev[j],
@@ -1702,12 +1708,7 @@ static int btrfsic_read_block(struct btrfsic_state *state,
"btrfsic: error, failed to add a single page!\n");
return -1;
}
- submit_bio(READ, bio);
-
- /* this will also unplug the queue */
- wait_for_completion(&complete);
-
- if (!test_bit(BIO_UPTODATE, &bio->bi_flags)) {
+ if (submit_bio_wait(READ, bio)) {
printk(KERN_INFO
"btrfsic: read error at logical %llu dev %s!\n",
block_ctx->start, block_ctx->dev->name);
@@ -1730,11 +1731,6 @@ static int btrfsic_read_block(struct btrfsic_state *state,
return block_ctx->len;
}
-static void btrfsic_complete_bio_end_io(struct bio *bio, int err)
-{
- complete((struct completion *)bio->bi_private);
-}
-
static void btrfsic_dump_database(struct btrfsic_state *state)
{
struct list_head *elem_all;
@@ -1900,7 +1896,9 @@ again:
dev_state,
dev_bytenr);
}
- if (block->logical_bytenr != bytenr) {
+ if (block->logical_bytenr != bytenr &&
+ !(!block->is_metadata &&
+ block->logical_bytenr == 0))
printk(KERN_INFO
"Written block @%llu (%s/%llu/%d)"
" found in hash table, %c,"
@@ -1910,15 +1908,14 @@ again:
block->mirror_num,
btrfsic_get_block_type(state, block),
block->logical_bytenr);
- block->logical_bytenr = bytenr;
- } else if (state->print_mask &
- BTRFSIC_PRINT_MASK_VERBOSE)
+ else if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
printk(KERN_INFO
"Written block @%llu (%s/%llu/%d)"
" found in hash table, %c.\n",
bytenr, dev_state->name, dev_bytenr,
block->mirror_num,
btrfsic_get_block_type(state, block));
+ block->logical_bytenr = bytenr;
} else {
if (num_pages * PAGE_CACHE_SIZE <
state->datablock_size) {
@@ -2463,10 +2460,8 @@ static int btrfsic_process_written_superblock(
}
}
- if (-1 == btrfsic_check_all_ref_blocks(state, superblock, 0)) {
- WARN_ON(1);
+ if (WARN_ON(-1 == btrfsic_check_all_ref_blocks(state, superblock, 0)))
btrfsic_dump_tree(state);
- }
return 0;
}
@@ -2906,7 +2901,7 @@ static void btrfsic_cmp_log_and_dev_bytenr(struct btrfsic_state *state,
btrfsic_release_block_ctx(&block_ctx);
}
- if (!match) {
+ if (WARN_ON(!match)) {
printk(KERN_INFO "btrfs: attempt to write M-block which contains logical bytenr that doesn't map to dev+physical bytenr of submit_bio,"
" buffer->log_bytenr=%llu, submit_bio(bdev=%s,"
" phys_bytenr=%llu)!\n",
@@ -2923,7 +2918,6 @@ static void btrfsic_cmp_log_and_dev_bytenr(struct btrfsic_state *state,
bytenr, block_ctx.dev->name,
block_ctx.dev_bytenr, mirror_num);
}
- WARN_ON(1);
}
}
@@ -3000,14 +2994,12 @@ int btrfsic_submit_bh(int rw, struct buffer_head *bh)
return submit_bh(rw, bh);
}
-void btrfsic_submit_bio(int rw, struct bio *bio)
+static void __btrfsic_submit_bio(int rw, struct bio *bio)
{
struct btrfsic_dev_state *dev_state;
- if (!btrfsic_is_initialized) {
- submit_bio(rw, bio);
+ if (!btrfsic_is_initialized)
return;
- }
mutex_lock(&btrfsic_mutex);
/* since btrfsic_submit_bio() is also called before
@@ -3017,6 +3009,7 @@ void btrfsic_submit_bio(int rw, struct bio *bio)
(rw & WRITE) && NULL != bio->bi_io_vec) {
unsigned int i;
u64 dev_bytenr;
+ u64 cur_bytenr;
int bio_is_patched;
char **mapped_datav;
@@ -3035,6 +3028,7 @@ void btrfsic_submit_bio(int rw, struct bio *bio)
GFP_NOFS);
if (!mapped_datav)
goto leave;
+ cur_bytenr = dev_bytenr;
for (i = 0; i < bio->bi_vcnt; i++) {
BUG_ON(bio->bi_io_vec[i].bv_len != PAGE_CACHE_SIZE);
mapped_datav[i] = kmap(bio->bi_io_vec[i].bv_page);
@@ -3046,16 +3040,13 @@ void btrfsic_submit_bio(int rw, struct bio *bio)
kfree(mapped_datav);
goto leave;
}
- if ((BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH |
- BTRFSIC_PRINT_MASK_VERBOSE) ==
- (dev_state->state->print_mask &
- (BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH |
- BTRFSIC_PRINT_MASK_VERBOSE)))
+ if (dev_state->state->print_mask &
+ BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH_VERBOSE)
printk(KERN_INFO
- "#%u: page=%p, len=%u, offset=%u\n",
- i, bio->bi_io_vec[i].bv_page,
- bio->bi_io_vec[i].bv_len,
+ "#%u: bytenr=%llu, len=%u, offset=%u\n",
+ i, cur_bytenr, bio->bi_io_vec[i].bv_len,
bio->bi_io_vec[i].bv_offset);
+ cur_bytenr += bio->bi_io_vec[i].bv_len;
}
btrfsic_process_written_block(dev_state, dev_bytenr,
mapped_datav, bio->bi_vcnt,
@@ -3099,10 +3090,20 @@ void btrfsic_submit_bio(int rw, struct bio *bio)
}
leave:
mutex_unlock(&btrfsic_mutex);
+}
+void btrfsic_submit_bio(int rw, struct bio *bio)
+{
+ __btrfsic_submit_bio(rw, bio);
submit_bio(rw, bio);
}
+int btrfsic_submit_bio_wait(int rw, struct bio *bio)
+{
+ __btrfsic_submit_bio(rw, bio);
+ return submit_bio_wait(rw, bio);
+}
+
int btrfsic_mount(struct btrfs_root *root,
struct btrfs_fs_devices *fs_devices,
int including_extent_data, u32 print_mask)
diff --git a/fs/btrfs/check-integrity.h b/fs/btrfs/check-integrity.h
index 8b59175cc502..13b8566c97ab 100644
--- a/fs/btrfs/check-integrity.h
+++ b/fs/btrfs/check-integrity.h
@@ -22,9 +22,11 @@
#ifdef CONFIG_BTRFS_FS_CHECK_INTEGRITY
int btrfsic_submit_bh(int rw, struct buffer_head *bh);
void btrfsic_submit_bio(int rw, struct bio *bio);
+int btrfsic_submit_bio_wait(int rw, struct bio *bio);
#else
#define btrfsic_submit_bh submit_bh
#define btrfsic_submit_bio submit_bio
+#define btrfsic_submit_bio_wait submit_bio_wait
#endif
int btrfsic_mount(struct btrfs_root *root,
diff --git a/fs/btrfs/compat.h b/fs/btrfs/compat.h
deleted file mode 100644
index 7c4503ef6efd..000000000000
--- a/fs/btrfs/compat.h
+++ /dev/null
@@ -1,7 +0,0 @@
-#ifndef _COMPAT_H_
-#define _COMPAT_H_
-
-#define btrfs_drop_nlink(inode) drop_nlink(inode)
-#define btrfs_inc_nlink(inode) inc_nlink(inode)
-
-#endif /* _COMPAT_H_ */
diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c
index 6aad98cb343f..1499b27b4186 100644
--- a/fs/btrfs/compression.c
+++ b/fs/btrfs/compression.c
@@ -32,7 +32,6 @@
#include <linux/writeback.h>
#include <linux/bit_spinlock.h>
#include <linux/slab.h>
-#include "compat.h"
#include "ctree.h"
#include "disk-io.h"
#include "transaction.h"
@@ -360,7 +359,7 @@ int btrfs_submit_compressed_write(struct inode *inode, u64 start,
bdev = BTRFS_I(inode)->root->fs_info->fs_devices->latest_bdev;
bio = compressed_bio_alloc(bdev, first_byte, GFP_NOFS);
- if(!bio) {
+ if (!bio) {
kfree(cb);
return -ENOMEM;
}
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c
index 61b5bcd57b7e..316136bd6dd7 100644
--- a/fs/btrfs/ctree.c
+++ b/fs/btrfs/ctree.c
@@ -274,7 +274,7 @@ int btrfs_copy_root(struct btrfs_trans_handle *trans,
else
btrfs_set_header_owner(cow, new_root_objectid);
- write_extent_buffer(cow, root->fs_info->fsid, btrfs_header_fsid(cow),
+ write_extent_buffer(cow, root->fs_info->fsid, btrfs_header_fsid(),
BTRFS_FSID_SIZE);
WARN_ON(btrfs_header_generation(buf) > trans->transid);
@@ -996,7 +996,7 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans,
else
btrfs_set_header_owner(cow, root->root_key.objectid);
- write_extent_buffer(cow, root->fs_info->fsid, btrfs_header_fsid(cow),
+ write_extent_buffer(cow, root->fs_info->fsid, btrfs_header_fsid(),
BTRFS_FSID_SIZE);
ret = update_ref_for_cow(trans, root, buf, cow, &last_ref);
@@ -1285,11 +1285,10 @@ get_old_root(struct btrfs_root *root, u64 time_seq)
free_extent_buffer(eb_root);
blocksize = btrfs_level_size(root, old_root->level);
old = read_tree_block(root, logical, blocksize, 0);
- if (!old || !extent_buffer_uptodate(old)) {
+ if (WARN_ON(!old || !extent_buffer_uptodate(old))) {
free_extent_buffer(old);
pr_warn("btrfs: failed to read tree block %llu from get_old_root\n",
logical);
- WARN_ON(1);
} else {
eb = btrfs_clone_extent_buffer(old);
free_extent_buffer(old);
@@ -2758,7 +2757,7 @@ int btrfs_search_old_slot(struct btrfs_root *root, struct btrfs_key *key,
int level;
int lowest_unlock = 1;
u8 lowest_level = 0;
- int prev_cmp;
+ int prev_cmp = -1;
lowest_level = p->lowest_level;
WARN_ON(p->nodes[0] != NULL);
@@ -2769,7 +2768,6 @@ int btrfs_search_old_slot(struct btrfs_root *root, struct btrfs_key *key,
}
again:
- prev_cmp = -1;
b = get_old_root(root, time_seq);
level = btrfs_header_level(b);
p->locks[level] = BTRFS_READ_LOCK;
@@ -2787,6 +2785,11 @@ again:
*/
btrfs_unlock_up_safe(p, level + 1);
+ /*
+ * Since we can unwind eb's we want to do a real search every
+ * time.
+ */
+ prev_cmp = -1;
ret = key_search(b, key, level, &prev_cmp, &slot);
if (level != 0) {
@@ -3148,7 +3151,7 @@ static noinline int insert_new_root(struct btrfs_trans_handle *trans,
btrfs_set_header_backref_rev(c, BTRFS_MIXED_BACKREF_REV);
btrfs_set_header_owner(c, root->root_key.objectid);
- write_extent_buffer(c, root->fs_info->fsid, btrfs_header_fsid(c),
+ write_extent_buffer(c, root->fs_info->fsid, btrfs_header_fsid(),
BTRFS_FSID_SIZE);
write_extent_buffer(c, root->fs_info->chunk_tree_uuid,
@@ -3287,7 +3290,7 @@ static noinline int split_node(struct btrfs_trans_handle *trans,
btrfs_set_header_backref_rev(split, BTRFS_MIXED_BACKREF_REV);
btrfs_set_header_owner(split, root->root_key.objectid);
write_extent_buffer(split, root->fs_info->fsid,
- btrfs_header_fsid(split), BTRFS_FSID_SIZE);
+ btrfs_header_fsid(), BTRFS_FSID_SIZE);
write_extent_buffer(split, root->fs_info->chunk_tree_uuid,
btrfs_header_chunk_tree_uuid(split),
BTRFS_UUID_SIZE);
@@ -3337,8 +3340,8 @@ static int leaf_space_used(struct extent_buffer *l, int start, int nr)
if (!nr)
return 0;
btrfs_init_map_token(&token);
- start_item = btrfs_item_nr(l, start);
- end_item = btrfs_item_nr(l, end);
+ start_item = btrfs_item_nr(start);
+ end_item = btrfs_item_nr(end);
data_len = btrfs_token_item_offset(l, start_item, &token) +
btrfs_token_item_size(l, start_item, &token);
data_len = data_len - btrfs_token_item_offset(l, end_item, &token);
@@ -3406,7 +3409,7 @@ static noinline int __push_leaf_right(struct btrfs_trans_handle *trans,
slot = path->slots[1];
i = left_nritems - 1;
while (i >= nr) {
- item = btrfs_item_nr(left, i);
+ item = btrfs_item_nr(i);
if (!empty && push_items > 0) {
if (path->slots[0] > i)
@@ -3470,7 +3473,7 @@ static noinline int __push_leaf_right(struct btrfs_trans_handle *trans,
btrfs_set_header_nritems(right, right_nritems);
push_space = BTRFS_LEAF_DATA_SIZE(root);
for (i = 0; i < right_nritems; i++) {
- item = btrfs_item_nr(right, i);
+ item = btrfs_item_nr(i);
push_space -= btrfs_token_item_size(right, item, &token);
btrfs_set_token_item_offset(right, item, push_space, &token);
}
@@ -3612,7 +3615,7 @@ static noinline int __push_leaf_left(struct btrfs_trans_handle *trans,
nr = min(right_nritems - 1, max_slot);
for (i = 0; i < nr; i++) {
- item = btrfs_item_nr(right, i);
+ item = btrfs_item_nr(i);
if (!empty && push_items > 0) {
if (path->slots[0] < i)
@@ -3639,8 +3642,7 @@ static noinline int __push_leaf_left(struct btrfs_trans_handle *trans,
ret = 1;
goto out;
}
- if (!empty && push_items == btrfs_header_nritems(right))
- WARN_ON(1);
+ WARN_ON(!empty && push_items == btrfs_header_nritems(right));
/* push data from right to left */
copy_extent_buffer(left, right,
@@ -3663,7 +3665,7 @@ static noinline int __push_leaf_left(struct btrfs_trans_handle *trans,
for (i = old_left_nritems; i < old_left_nritems + push_items; i++) {
u32 ioff;
- item = btrfs_item_nr(left, i);
+ item = btrfs_item_nr(i);
ioff = btrfs_token_item_offset(left, item, &token);
btrfs_set_token_item_offset(left, item,
@@ -3694,7 +3696,7 @@ static noinline int __push_leaf_left(struct btrfs_trans_handle *trans,
btrfs_set_header_nritems(right, right_nritems);
push_space = BTRFS_LEAF_DATA_SIZE(root);
for (i = 0; i < right_nritems; i++) {
- item = btrfs_item_nr(right, i);
+ item = btrfs_item_nr(i);
push_space = push_space - btrfs_token_item_size(right,
item, &token);
@@ -3835,7 +3837,7 @@ static noinline void copy_for_split(struct btrfs_trans_handle *trans,
btrfs_item_end_nr(l, mid);
for (i = 0; i < nritems; i++) {
- struct btrfs_item *item = btrfs_item_nr(right, i);
+ struct btrfs_item *item = btrfs_item_nr(i);
u32 ioff;
ioff = btrfs_token_item_offset(right, item, &token);
@@ -4016,7 +4018,7 @@ again:
data_size > BTRFS_LEAF_DATA_SIZE(root)) {
if (data_size && !tried_avoid_double)
goto push_for_double;
- split = 2 ;
+ split = 2;
}
}
}
@@ -4042,7 +4044,7 @@ again:
btrfs_set_header_owner(right, root->root_key.objectid);
btrfs_set_header_level(right, 0);
write_extent_buffer(right, root->fs_info->fsid,
- btrfs_header_fsid(right), BTRFS_FSID_SIZE);
+ btrfs_header_fsid(), BTRFS_FSID_SIZE);
write_extent_buffer(right, root->fs_info->chunk_tree_uuid,
btrfs_header_chunk_tree_uuid(right),
@@ -4177,7 +4179,7 @@ static noinline int split_item(struct btrfs_trans_handle *trans,
btrfs_set_path_blocking(path);
- item = btrfs_item_nr(leaf, path->slots[0]);
+ item = btrfs_item_nr(path->slots[0]);
orig_offset = btrfs_item_offset(leaf, item);
item_size = btrfs_item_size(leaf, item);
@@ -4200,7 +4202,7 @@ static noinline int split_item(struct btrfs_trans_handle *trans,
btrfs_cpu_key_to_disk(&disk_key, new_key);
btrfs_set_item_key(leaf, &disk_key, slot);
- new_item = btrfs_item_nr(leaf, slot);
+ new_item = btrfs_item_nr(slot);
btrfs_set_item_offset(leaf, new_item, orig_offset);
btrfs_set_item_size(leaf, new_item, item_size - split_offset);
@@ -4339,7 +4341,7 @@ void btrfs_truncate_item(struct btrfs_root *root, struct btrfs_path *path,
/* first correct the data pointers */
for (i = slot; i < nritems; i++) {
u32 ioff;
- item = btrfs_item_nr(leaf, i);
+ item = btrfs_item_nr(i);
ioff = btrfs_token_item_offset(leaf, item, &token);
btrfs_set_token_item_offset(leaf, item,
@@ -4387,7 +4389,7 @@ void btrfs_truncate_item(struct btrfs_root *root, struct btrfs_path *path,
fixup_low_keys(root, path, &disk_key, 1);
}
- item = btrfs_item_nr(leaf, slot);
+ item = btrfs_item_nr(slot);
btrfs_set_item_size(leaf, item, new_size);
btrfs_mark_buffer_dirty(leaf);
@@ -4441,7 +4443,7 @@ void btrfs_extend_item(struct btrfs_root *root, struct btrfs_path *path,
/* first correct the data pointers */
for (i = slot; i < nritems; i++) {
u32 ioff;
- item = btrfs_item_nr(leaf, i);
+ item = btrfs_item_nr(i);
ioff = btrfs_token_item_offset(leaf, item, &token);
btrfs_set_token_item_offset(leaf, item,
@@ -4455,7 +4457,7 @@ void btrfs_extend_item(struct btrfs_root *root, struct btrfs_path *path,
data_end = old_data;
old_size = btrfs_item_size_nr(leaf, slot);
- item = btrfs_item_nr(leaf, slot);
+ item = btrfs_item_nr(slot);
btrfs_set_item_size(leaf, item, old_size + data_size);
btrfs_mark_buffer_dirty(leaf);
@@ -4514,7 +4516,7 @@ void setup_items_for_insert(struct btrfs_root *root, struct btrfs_path *path,
for (i = slot; i < nritems; i++) {
u32 ioff;
- item = btrfs_item_nr(leaf, i);
+ item = btrfs_item_nr( i);
ioff = btrfs_token_item_offset(leaf, item, &token);
btrfs_set_token_item_offset(leaf, item,
ioff - total_data, &token);
@@ -4535,7 +4537,7 @@ void setup_items_for_insert(struct btrfs_root *root, struct btrfs_path *path,
for (i = 0; i < nr; i++) {
btrfs_cpu_key_to_disk(&disk_key, cpu_key + i);
btrfs_set_item_key(leaf, &disk_key, slot + i);
- item = btrfs_item_nr(leaf, slot + i);
+ item = btrfs_item_nr(slot + i);
btrfs_set_token_item_offset(leaf, item,
data_end - data_size[i], &token);
data_end -= data_size[i];
@@ -4730,7 +4732,7 @@ int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root,
for (i = slot + nr; i < nritems; i++) {
u32 ioff;
- item = btrfs_item_nr(leaf, i);
+ item = btrfs_item_nr(i);
ioff = btrfs_token_item_offset(leaf, item, &token);
btrfs_set_token_item_offset(leaf, item,
ioff + dsize, &token);
@@ -4823,14 +4825,18 @@ static int btrfs_prev_leaf(struct btrfs_root *root, struct btrfs_path *path)
btrfs_item_key_to_cpu(path->nodes[0], &key, 0);
- if (key.offset > 0)
+ if (key.offset > 0) {
key.offset--;
- else if (key.type > 0)
+ } else if (key.type > 0) {
key.type--;
- else if (key.objectid > 0)
+ key.offset = (u64)-1;
+ } else if (key.objectid > 0) {
key.objectid--;
- else
+ key.type = (u8)-1;
+ key.offset = (u64)-1;
+ } else {
return 1;
+ }
btrfs_release_path(path);
ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
@@ -4866,7 +4872,6 @@ static int btrfs_prev_leaf(struct btrfs_root *root, struct btrfs_path *path)
* was nothing in the tree that matched the search criteria.
*/
int btrfs_search_forward(struct btrfs_root *root, struct btrfs_key *min_key,
- struct btrfs_key *max_key,
struct btrfs_path *path,
u64 min_trans)
{
@@ -4911,10 +4916,8 @@ again:
* If it is too old, old, skip to the next one.
*/
while (slot < nritems) {
- u64 blockptr;
u64 gen;
- blockptr = btrfs_node_blockptr(cur, slot);
gen = btrfs_node_ptr_generation(cur, slot);
if (gen < min_trans) {
slot++;
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 0506f40ede83..54ab86127f7a 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -47,6 +47,12 @@ extern struct kmem_cache *btrfs_path_cachep;
extern struct kmem_cache *btrfs_free_space_cachep;
struct btrfs_ordered_sum;
+#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
+#define STATIC noinline
+#else
+#define STATIC static noinline
+#endif
+
#define BTRFS_MAGIC 0x4D5F53665248425FULL /* ascii _BHRfS_M, no null */
#define BTRFS_MAX_MIRRORS 3
@@ -1580,7 +1586,6 @@ struct btrfs_fs_info {
atomic_t scrubs_paused;
atomic_t scrub_cancel_req;
wait_queue_head_t scrub_pause_wait;
- struct rw_semaphore scrub_super_lock;
int scrub_workers_refcnt;
struct btrfs_workers scrub_workers;
struct btrfs_workers scrub_wr_completion_workers;
@@ -1724,7 +1729,9 @@ struct btrfs_root {
int ref_cows;
int track_dirty;
int in_radix;
-
+#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
+ int dummy_root;
+#endif
u64 defrag_trans_start;
struct btrfs_key defrag_progress;
struct btrfs_key defrag_max;
@@ -2461,8 +2468,7 @@ static inline unsigned long btrfs_item_nr_offset(int nr)
sizeof(struct btrfs_item) * nr;
}
-static inline struct btrfs_item *btrfs_item_nr(struct extent_buffer *eb,
- int nr)
+static inline struct btrfs_item *btrfs_item_nr(int nr)
{
return (struct btrfs_item *)btrfs_item_nr_offset(nr);
}
@@ -2475,30 +2481,30 @@ static inline u32 btrfs_item_end(struct extent_buffer *eb,
static inline u32 btrfs_item_end_nr(struct extent_buffer *eb, int nr)
{
- return btrfs_item_end(eb, btrfs_item_nr(eb, nr));
+ return btrfs_item_end(eb, btrfs_item_nr(nr));
}
static inline u32 btrfs_item_offset_nr(struct extent_buffer *eb, int nr)
{
- return btrfs_item_offset(eb, btrfs_item_nr(eb, nr));
+ return btrfs_item_offset(eb, btrfs_item_nr(nr));
}
static inline u32 btrfs_item_size_nr(struct extent_buffer *eb, int nr)
{
- return btrfs_item_size(eb, btrfs_item_nr(eb, nr));
+ return btrfs_item_size(eb, btrfs_item_nr(nr));
}
static inline void btrfs_item_key(struct extent_buffer *eb,
struct btrfs_disk_key *disk_key, int nr)
{
- struct btrfs_item *item = btrfs_item_nr(eb, nr);
+ struct btrfs_item *item = btrfs_item_nr(nr);
read_eb_member(eb, item, struct btrfs_item, key, disk_key);
}
static inline void btrfs_set_item_key(struct extent_buffer *eb,
struct btrfs_disk_key *disk_key, int nr)
{
- struct btrfs_item *item = btrfs_item_nr(eb, nr);
+ struct btrfs_item *item = btrfs_item_nr(nr);
write_eb_member(eb, item, struct btrfs_item, key, disk_key);
}
@@ -2666,7 +2672,7 @@ static inline void btrfs_set_header_backref_rev(struct extent_buffer *eb,
btrfs_set_header_flags(eb, flags);
}
-static inline unsigned long btrfs_header_fsid(struct extent_buffer *eb)
+static inline unsigned long btrfs_header_fsid(void)
{
return offsetof(struct btrfs_header, fsid);
}
@@ -3105,11 +3111,6 @@ static inline u32 btrfs_level_size(struct btrfs_root *root, int level)
((unsigned long)(btrfs_leaf_data(leaf) + \
btrfs_item_offset_nr(leaf, slot)))
-static inline struct dentry *fdentry(struct file *file)
-{
- return file->f_path.dentry;
-}
-
static inline bool btrfs_mixed_space_info(struct btrfs_space_info *space_info)
{
return ((space_info->flags & BTRFS_BLOCK_GROUP_METADATA) &&
@@ -3308,7 +3309,6 @@ int btrfs_find_next_key(struct btrfs_root *root, struct btrfs_path *path,
struct btrfs_key *key, int lowest_level,
u64 min_trans);
int btrfs_search_forward(struct btrfs_root *root, struct btrfs_key *min_key,
- struct btrfs_key *max_key,
struct btrfs_path *path,
u64 min_trans);
enum btrfs_compare_tree_result {
@@ -3613,9 +3613,6 @@ int btrfs_csum_file_blocks(struct btrfs_trans_handle *trans,
struct btrfs_ordered_sum *sums);
int btrfs_csum_one_bio(struct btrfs_root *root, struct inode *inode,
struct bio *bio, u64 file_start, int contig);
-int btrfs_csum_truncate(struct btrfs_trans_handle *trans,
- struct btrfs_root *root, struct btrfs_path *path,
- u64 isize);
int btrfs_lookup_csums_range(struct btrfs_root *root, u64 start, u64 end,
struct list_head *list, int search_commit);
/* inode.c */
@@ -3675,8 +3672,7 @@ int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans,
u32 min_type);
int btrfs_start_delalloc_inodes(struct btrfs_root *root, int delay_iput);
-int btrfs_start_all_delalloc_inodes(struct btrfs_fs_info *fs_info,
- int delay_iput);
+int btrfs_start_delalloc_roots(struct btrfs_fs_info *fs_info, int delay_iput);
int btrfs_set_extent_delalloc(struct inode *inode, u64 start, u64 end,
struct extent_state **cached_state);
int btrfs_create_subvol_root(struct btrfs_trans_handle *trans,
@@ -3745,9 +3741,6 @@ void btrfs_cleanup_defrag_inodes(struct btrfs_fs_info *fs_info);
int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync);
void btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end,
int skip_pinned);
-int btrfs_replace_extent_cache(struct inode *inode, struct extent_map *replace,
- u64 start, u64 end, int skip_pinned,
- int modified);
extern const struct file_operations btrfs_file_operations;
int __btrfs_drop_extents(struct btrfs_trans_handle *trans,
struct btrfs_root *root, struct inode *inode,
@@ -3944,9 +3937,7 @@ int btrfs_scrub_dev(struct btrfs_fs_info *fs_info, u64 devid, u64 start,
u64 end, struct btrfs_scrub_progress *progress,
int readonly, int is_dev_replace);
void btrfs_scrub_pause(struct btrfs_root *root);
-void btrfs_scrub_pause_super(struct btrfs_root *root);
void btrfs_scrub_continue(struct btrfs_root *root);
-void btrfs_scrub_continue_super(struct btrfs_root *root);
int btrfs_scrub_cancel(struct btrfs_fs_info *info);
int btrfs_scrub_cancel_dev(struct btrfs_fs_info *info,
struct btrfs_device *dev);
@@ -4028,5 +4019,9 @@ static inline int btrfs_defrag_cancelled(struct btrfs_fs_info *fs_info)
return signal_pending(current);
}
+/* Sanity test specific functions */
+#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
+void btrfs_test_destroy_inode(struct inode *inode);
+#endif
#endif
diff --git a/fs/btrfs/delayed-inode.c b/fs/btrfs/delayed-inode.c
index cbd9523ad09c..8d292fbae659 100644
--- a/fs/btrfs/delayed-inode.c
+++ b/fs/btrfs/delayed-inode.c
@@ -108,8 +108,8 @@ static struct btrfs_delayed_node *btrfs_get_delayed_node(struct inode *inode)
return node;
}
btrfs_inode->delayed_node = node;
- atomic_inc(&node->refs); /* can be accessed */
- atomic_inc(&node->refs); /* cached in the inode */
+ /* can be accessed and cached in the inode */
+ atomic_add(2, &node->refs);
spin_unlock(&root->inode_lock);
return node;
}
@@ -138,8 +138,8 @@ again:
return ERR_PTR(-ENOMEM);
btrfs_init_delayed_node(node, root, ino);
- atomic_inc(&node->refs); /* cached in the btrfs inode */
- atomic_inc(&node->refs); /* can be accessed */
+ /* cached in the btrfs inode and can be accessed */
+ atomic_add(2, &node->refs);
ret = radix_tree_preload(GFP_NOFS & ~__GFP_HIGHMEM);
if (ret) {
@@ -649,14 +649,13 @@ static int btrfs_delayed_inode_reserve_metadata(
goto out;
ret = btrfs_block_rsv_migrate(src_rsv, dst_rsv, num_bytes);
- if (!ret)
+ if (!WARN_ON(ret))
goto out;
/*
* Ok this is a problem, let's just steal from the global rsv
* since this really shouldn't happen that often.
*/
- WARN_ON(1);
ret = btrfs_block_rsv_migrate(&root->fs_info->global_block_rsv,
dst_rsv, num_bytes);
goto out;
@@ -771,13 +770,13 @@ static int btrfs_batch_insert_items(struct btrfs_root *root,
*/
btrfs_set_path_blocking(path);
- keys = kmalloc(sizeof(struct btrfs_key) * nitems, GFP_NOFS);
+ keys = kmalloc_array(nitems, sizeof(struct btrfs_key), GFP_NOFS);
if (!keys) {
ret = -ENOMEM;
goto out;
}
- data_size = kmalloc(sizeof(u32) * nitems, GFP_NOFS);
+ data_size = kmalloc_array(nitems, sizeof(u32), GFP_NOFS);
if (!data_size) {
ret = -ENOMEM;
goto error;
@@ -1174,8 +1173,10 @@ int btrfs_commit_inode_delayed_items(struct btrfs_trans_handle *trans,
mutex_unlock(&delayed_node->mutex);
path = btrfs_alloc_path();
- if (!path)
+ if (!path) {
+ btrfs_release_delayed_node(delayed_node);
return -ENOMEM;
+ }
path->leave_spinning = 1;
block_rsv = trans->block_rsv;
diff --git a/fs/btrfs/dev-replace.c b/fs/btrfs/dev-replace.c
index 9efb94e95858..2cfc3dfff64f 100644
--- a/fs/btrfs/dev-replace.c
+++ b/fs/btrfs/dev-replace.c
@@ -26,7 +26,6 @@
#include <linux/kthread.h>
#include <linux/math64.h>
#include <asm/div64.h>
-#include "compat.h"
#include "ctree.h"
#include "extent_map.h"
#include "disk-io.h"
@@ -38,7 +37,6 @@
#include "rcu-string.h"
#include "dev-replace.h"
-static u64 btrfs_get_seconds_since_1970(void);
static int btrfs_dev_replace_finishing(struct btrfs_fs_info *fs_info,
int scrub_ret);
static void btrfs_dev_replace_update_device_in_mapping_tree(
@@ -296,13 +294,6 @@ void btrfs_after_dev_replace_commit(struct btrfs_fs_info *fs_info)
dev_replace->cursor_left_last_write_of_item;
}
-static u64 btrfs_get_seconds_since_1970(void)
-{
- struct timespec t = CURRENT_TIME_SEC;
-
- return t.tv_sec;
-}
-
int btrfs_dev_replace_start(struct btrfs_root *root,
struct btrfs_ioctl_dev_replace_args *args)
{
@@ -375,7 +366,7 @@ int btrfs_dev_replace_start(struct btrfs_root *root,
dev_replace->tgtdev = tgt_device;
printk_in_rcu(KERN_INFO
- "btrfs: dev_replace from %s (devid %llu) to %s) started\n",
+ "btrfs: dev_replace from %s (devid %llu) to %s started\n",
src_device->missing ? "<missing disk>" :
rcu_str_deref(src_device->name),
src_device->devid,
@@ -390,7 +381,7 @@ int btrfs_dev_replace_start(struct btrfs_root *root,
* go to the tgtdev as well (refer to btrfs_map_block()).
*/
dev_replace->replace_state = BTRFS_IOCTL_DEV_REPLACE_STATE_STARTED;
- dev_replace->time_started = btrfs_get_seconds_since_1970();
+ dev_replace->time_started = get_seconds();
dev_replace->cursor_left = 0;
dev_replace->committed_cursor_left = 0;
dev_replace->cursor_left_last_write_of_item = 0;
@@ -400,7 +391,7 @@ int btrfs_dev_replace_start(struct btrfs_root *root,
args->result = BTRFS_IOCTL_DEV_REPLACE_RESULT_NO_ERROR;
btrfs_dev_replace_unlock(dev_replace);
- btrfs_wait_all_ordered_extents(root->fs_info);
+ btrfs_wait_ordered_roots(root->fs_info, -1);
/* force writing the updated state information to disk */
trans = btrfs_start_transaction(root, 0);
@@ -470,12 +461,12 @@ static int btrfs_dev_replace_finishing(struct btrfs_fs_info *fs_info,
* flush all outstanding I/O and inode extent mappings before the
* copy operation is declared as being finished
*/
- ret = btrfs_start_all_delalloc_inodes(root->fs_info, 0);
+ ret = btrfs_start_delalloc_roots(root->fs_info, 0);
if (ret) {
mutex_unlock(&dev_replace->lock_finishing_cancel_unmount);
return ret;
}
- btrfs_wait_all_ordered_extents(root->fs_info);
+ btrfs_wait_ordered_roots(root->fs_info, -1);
trans = btrfs_start_transaction(root, 0);
if (IS_ERR(trans)) {
@@ -493,7 +484,7 @@ static int btrfs_dev_replace_finishing(struct btrfs_fs_info *fs_info,
: BTRFS_IOCTL_DEV_REPLACE_STATE_FINISHED;
dev_replace->tgtdev = NULL;
dev_replace->srcdev = NULL;
- dev_replace->time_stopped = btrfs_get_seconds_since_1970();
+ dev_replace->time_stopped = get_seconds();
dev_replace->item_needs_writeback = 1;
if (scrub_ret) {
@@ -650,6 +641,9 @@ static u64 __btrfs_dev_replace_cancel(struct btrfs_fs_info *fs_info)
u64 result;
int ret;
+ if (fs_info->sb->s_flags & MS_RDONLY)
+ return -EROFS;
+
mutex_lock(&dev_replace->lock_finishing_cancel_unmount);
btrfs_dev_replace_lock(dev_replace);
switch (dev_replace->replace_state) {
@@ -668,7 +662,7 @@ static u64 __btrfs_dev_replace_cancel(struct btrfs_fs_info *fs_info)
break;
}
dev_replace->replace_state = BTRFS_IOCTL_DEV_REPLACE_STATE_CANCELED;
- dev_replace->time_stopped = btrfs_get_seconds_since_1970();
+ dev_replace->time_stopped = get_seconds();
dev_replace->item_needs_writeback = 1;
btrfs_dev_replace_unlock(dev_replace);
btrfs_scrub_cancel(fs_info);
@@ -703,7 +697,7 @@ void btrfs_dev_replace_suspend_for_unmount(struct btrfs_fs_info *fs_info)
case BTRFS_IOCTL_DEV_REPLACE_STATE_STARTED:
dev_replace->replace_state =
BTRFS_IOCTL_DEV_REPLACE_STATE_SUSPENDED;
- dev_replace->time_stopped = btrfs_get_seconds_since_1970();
+ dev_replace->time_stopped = get_seconds();
dev_replace->item_needs_writeback = 1;
pr_info("btrfs: suspending dev_replace for unmount\n");
break;
diff --git a/fs/btrfs/dir-item.c b/fs/btrfs/dir-item.c
index 79e594e341c7..c031ea3fd70f 100644
--- a/fs/btrfs/dir-item.c
+++ b/fs/btrfs/dir-item.c
@@ -58,7 +58,7 @@ static struct btrfs_dir_item *insert_with_overflow(struct btrfs_trans_handle
return ERR_PTR(ret);
WARN_ON(ret > 0);
leaf = path->nodes[0];
- item = btrfs_item_nr(leaf, path->slots[0]);
+ item = btrfs_item_nr(path->slots[0]);
ptr = btrfs_item_ptr(leaf, path->slots[0], char);
BUG_ON(data_size > btrfs_item_size(leaf, item));
ptr += btrfs_item_size(leaf, item) - data_size;
@@ -474,8 +474,10 @@ int verify_dir_item(struct btrfs_root *root,
}
/* BTRFS_MAX_XATTR_SIZE is the same for all dir items */
- if (btrfs_dir_data_len(leaf, dir_item) > BTRFS_MAX_XATTR_SIZE(root)) {
- printk(KERN_CRIT "btrfs: invalid dir item data len: %u\n",
+ if ((btrfs_dir_data_len(leaf, dir_item) +
+ btrfs_dir_name_len(leaf, dir_item)) > BTRFS_MAX_XATTR_SIZE(root)) {
+ printk(KERN_CRIT "btrfs: invalid dir item name + data len: %u + %u\n",
+ (unsigned)btrfs_dir_name_len(leaf, dir_item),
(unsigned)btrfs_dir_data_len(leaf, dir_item));
return 1;
}
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 62176ad89846..8072cfa8a3b1 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -33,7 +33,6 @@
#include <linux/uuid.h>
#include <linux/semaphore.h>
#include <asm/unaligned.h>
-#include "compat.h"
#include "ctree.h"
#include "disk-io.h"
#include "transaction.h"
@@ -64,7 +63,6 @@ static void btrfs_destroy_ordered_operations(struct btrfs_transaction *t,
static void btrfs_destroy_ordered_extents(struct btrfs_root *root);
static int btrfs_destroy_delayed_refs(struct btrfs_transaction *trans,
struct btrfs_root *root);
-static void btrfs_evict_pending_snapshots(struct btrfs_transaction *t);
static void btrfs_destroy_delalloc_inodes(struct btrfs_root *root);
static int btrfs_destroy_marked_extents(struct btrfs_root *root,
struct extent_io_tree *dirty_pages,
@@ -477,14 +475,8 @@ static int csum_dirty_buffer(struct btrfs_root *root, struct page *page)
if (page != eb->pages[0])
return 0;
found_start = btrfs_header_bytenr(eb);
- if (found_start != start) {
- WARN_ON(1);
+ if (WARN_ON(found_start != start || !PageUptodate(page)))
return 0;
- }
- if (!PageUptodate(page)) {
- WARN_ON(1);
- return 0;
- }
csum_tree_block(root, eb, 0);
return 0;
}
@@ -496,7 +488,7 @@ static int check_tree_block_fsid(struct btrfs_root *root,
u8 fsid[BTRFS_UUID_SIZE];
int ret = 1;
- read_extent_buffer(eb, fsid, btrfs_header_fsid(eb), BTRFS_FSID_SIZE);
+ read_extent_buffer(eb, fsid, btrfs_header_fsid(), BTRFS_FSID_SIZE);
while (fs_devices) {
if (!memcmp(fsid, fs_devices->fsid, BTRFS_FSID_SIZE)) {
ret = 0;
@@ -1105,8 +1097,7 @@ struct extent_buffer *btrfs_find_tree_block(struct btrfs_root *root,
{
struct inode *btree_inode = root->fs_info->btree_inode;
struct extent_buffer *eb;
- eb = find_extent_buffer(&BTRFS_I(btree_inode)->io_tree,
- bytenr, blocksize);
+ eb = find_extent_buffer(&BTRFS_I(btree_inode)->io_tree, bytenr);
return eb;
}
@@ -1229,14 +1220,18 @@ static void __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize,
atomic_set(&root->refs, 1);
root->log_transid = 0;
root->last_log_commit = 0;
- extent_io_tree_init(&root->dirty_log_pages,
- fs_info->btree_inode->i_mapping);
+ if (fs_info)
+ extent_io_tree_init(&root->dirty_log_pages,
+ fs_info->btree_inode->i_mapping);
memset(&root->root_key, 0, sizeof(root->root_key));
memset(&root->root_item, 0, sizeof(root->root_item));
memset(&root->defrag_progress, 0, sizeof(root->defrag_progress));
memset(&root->root_kobj, 0, sizeof(root->root_kobj));
- root->defrag_trans_start = fs_info->generation;
+ if (fs_info)
+ root->defrag_trans_start = fs_info->generation;
+ else
+ root->defrag_trans_start = 0;
init_completion(&root->kobj_unregister);
root->defrag_running = 0;
root->root_key.objectid = objectid;
@@ -1253,6 +1248,22 @@ static struct btrfs_root *btrfs_alloc_root(struct btrfs_fs_info *fs_info)
return root;
}
+#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
+/* Should only be used by the testing infrastructure */
+struct btrfs_root *btrfs_alloc_dummy_root(void)
+{
+ struct btrfs_root *root;
+
+ root = btrfs_alloc_root(NULL);
+ if (!root)
+ return ERR_PTR(-ENOMEM);
+ __setup_root(4096, 4096, 4096, 4096, root, NULL, 1);
+ root->dummy_root = 1;
+
+ return root;
+}
+#endif
+
struct btrfs_root *btrfs_create_tree(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info,
u64 objectid)
@@ -1292,7 +1303,7 @@ struct btrfs_root *btrfs_create_tree(struct btrfs_trans_handle *trans,
btrfs_set_header_owner(leaf, objectid);
root->node = leaf;
- write_extent_buffer(leaf, fs_info->fsid, btrfs_header_fsid(leaf),
+ write_extent_buffer(leaf, fs_info->fsid, btrfs_header_fsid(),
BTRFS_FSID_SIZE);
write_extent_buffer(leaf, fs_info->chunk_tree_uuid,
btrfs_header_chunk_tree_uuid(leaf),
@@ -1379,7 +1390,7 @@ static struct btrfs_root *alloc_log_tree(struct btrfs_trans_handle *trans,
root->node = leaf;
write_extent_buffer(root->node, root->fs_info->fsid,
- btrfs_header_fsid(root->node), BTRFS_FSID_SIZE);
+ btrfs_header_fsid(), BTRFS_FSID_SIZE);
btrfs_mark_buffer_dirty(root->node);
btrfs_tree_unlock(root->node);
return root;
@@ -1780,6 +1791,9 @@ sleep:
wake_up_process(root->fs_info->cleaner_kthread);
mutex_unlock(&root->fs_info->transaction_kthread_mutex);
+ if (unlikely(test_bit(BTRFS_FS_STATE_ERROR,
+ &root->fs_info->fs_state)))
+ btrfs_cleanup_transaction(root);
if (!try_to_freeze()) {
set_current_state(TASK_INTERRUPTIBLE);
if (!kthread_should_stop() &&
@@ -2013,50 +2027,28 @@ static void btrfs_stop_all_workers(struct btrfs_fs_info *fs_info)
btrfs_stop_workers(&fs_info->qgroup_rescan_workers);
}
+static void free_root_extent_buffers(struct btrfs_root *root)
+{
+ if (root) {
+ free_extent_buffer(root->node);
+ free_extent_buffer(root->commit_root);
+ root->node = NULL;
+ root->commit_root = NULL;
+ }
+}
+
/* helper to cleanup tree roots */
static void free_root_pointers(struct btrfs_fs_info *info, int chunk_root)
{
- free_extent_buffer(info->tree_root->node);
- free_extent_buffer(info->tree_root->commit_root);
- info->tree_root->node = NULL;
- info->tree_root->commit_root = NULL;
-
- if (info->dev_root) {
- free_extent_buffer(info->dev_root->node);
- free_extent_buffer(info->dev_root->commit_root);
- info->dev_root->node = NULL;
- info->dev_root->commit_root = NULL;
- }
- if (info->extent_root) {
- free_extent_buffer(info->extent_root->node);
- free_extent_buffer(info->extent_root->commit_root);
- info->extent_root->node = NULL;
- info->extent_root->commit_root = NULL;
- }
- if (info->csum_root) {
- free_extent_buffer(info->csum_root->node);
- free_extent_buffer(info->csum_root->commit_root);
- info->csum_root->node = NULL;
- info->csum_root->commit_root = NULL;
- }
- if (info->quota_root) {
- free_extent_buffer(info->quota_root->node);
- free_extent_buffer(info->quota_root->commit_root);
- info->quota_root->node = NULL;
- info->quota_root->commit_root = NULL;
- }
- if (info->uuid_root) {
- free_extent_buffer(info->uuid_root->node);
- free_extent_buffer(info->uuid_root->commit_root);
- info->uuid_root->node = NULL;
- info->uuid_root->commit_root = NULL;
- }
- if (chunk_root) {
- free_extent_buffer(info->chunk_root->node);
- free_extent_buffer(info->chunk_root->commit_root);
- info->chunk_root->node = NULL;
- info->chunk_root->commit_root = NULL;
- }
+ free_root_extent_buffers(info->tree_root);
+
+ free_root_extent_buffers(info->dev_root);
+ free_root_extent_buffers(info->extent_root);
+ free_root_extent_buffers(info->csum_root);
+ free_root_extent_buffers(info->quota_root);
+ free_root_extent_buffers(info->uuid_root);
+ if (chunk_root)
+ free_root_extent_buffers(info->chunk_root);
}
static void del_fs_roots(struct btrfs_fs_info *fs_info)
@@ -2230,7 +2222,6 @@ int open_ctree(struct super_block *sb,
atomic_set(&fs_info->scrubs_paused, 0);
atomic_set(&fs_info->scrub_cancel_req, 0);
init_waitqueue_head(&fs_info->scrub_pause_wait);
- init_rwsem(&fs_info->scrub_super_lock);
fs_info->scrub_workers_refcnt = 0;
#ifdef CONFIG_BTRFS_FS_CHECK_INTEGRITY
fs_info->check_integrity_print_mask = 0;
@@ -2272,7 +2263,7 @@ int open_ctree(struct super_block *sb,
sizeof(struct btrfs_key));
set_bit(BTRFS_INODE_DUMMY,
&BTRFS_I(fs_info->btree_inode)->runtime_flags);
- insert_inode_hash(fs_info->btree_inode);
+ btrfs_insert_inode_hash(fs_info->btree_inode);
spin_lock_init(&fs_info->block_group_cache_lock);
fs_info->block_group_cache_tree = RB_ROOT;
@@ -2670,6 +2661,7 @@ retry_root_backup:
btrfs_set_root_node(&tree_root->root_item, tree_root->node);
tree_root->commit_root = btrfs_root_node(tree_root);
+ btrfs_set_root_refs(&tree_root->root_item, 1);
location.objectid = BTRFS_EXTENT_TREE_OBJECTID;
location.type = BTRFS_ROOT_ITEM_KEY;
@@ -3448,10 +3440,7 @@ static int write_all_supers(struct btrfs_root *root, int max_mirrors)
int write_ctree_super(struct btrfs_trans_handle *trans,
struct btrfs_root *root, int max_mirrors)
{
- int ret;
-
- ret = write_all_supers(root, max_mirrors);
- return ret;
+ return write_all_supers(root, max_mirrors);
}
/* Drop a fs root from the radix tree and free it. */
@@ -3528,7 +3517,6 @@ int btrfs_cleanup_fs_roots(struct btrfs_fs_info *fs_info)
int btrfs_commit_super(struct btrfs_root *root)
{
struct btrfs_trans_handle *trans;
- int ret;
mutex_lock(&root->fs_info->cleaner_mutex);
btrfs_run_delayed_iputs(root);
@@ -3542,25 +3530,7 @@ int btrfs_commit_super(struct btrfs_root *root)
trans = btrfs_join_transaction(root);
if (IS_ERR(trans))
return PTR_ERR(trans);
- ret = btrfs_commit_transaction(trans, root);
- if (ret)
- return ret;
- /* run commit again to drop the original snapshot */
- trans = btrfs_join_transaction(root);
- if (IS_ERR(trans))
- return PTR_ERR(trans);
- ret = btrfs_commit_transaction(trans, root);
- if (ret)
- return ret;
- ret = btrfs_write_and_wait_transaction(NULL, root);
- if (ret) {
- btrfs_error(root->fs_info, ret,
- "Failed to sync btree inode to disk.");
- return ret;
- }
-
- ret = write_ctree_super(NULL, root, 0);
- return ret;
+ return btrfs_commit_transaction(trans, root);
}
int close_ctree(struct btrfs_root *root)
@@ -3614,12 +3584,12 @@ int close_ctree(struct btrfs_root *root)
percpu_counter_sum(&fs_info->delalloc_bytes));
}
+ del_fs_roots(fs_info);
+
btrfs_free_block_groups(fs_info);
btrfs_stop_all_workers(fs_info);
- del_fs_roots(fs_info);
-
free_root_pointers(fs_info, 1);
iput(fs_info->btree_inode);
@@ -3669,10 +3639,20 @@ int btrfs_set_buffer_uptodate(struct extent_buffer *buf)
void btrfs_mark_buffer_dirty(struct extent_buffer *buf)
{
- struct btrfs_root *root = BTRFS_I(buf->pages[0]->mapping->host)->root;
+ struct btrfs_root *root;
u64 transid = btrfs_header_generation(buf);
int was_dirty;
+#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
+ /*
+ * This is a fast path so only do this check if we have sanity tests
+ * enabled. Normal people shouldn't be marking dummy buffers as dirty
+ * outside of the sanity tests.
+ */
+ if (unlikely(test_bit(EXTENT_BUFFER_DUMMY, &buf->bflags)))
+ return;
+#endif
+ root = BTRFS_I(buf->pages[0]->mapping->host)->root;
btrfs_assert_tree_locked(buf);
if (transid != root->fs_info->generation)
WARN(1, KERN_CRIT "btrfs transid mismatch buffer %llu, "
@@ -3802,7 +3782,8 @@ static void btrfs_destroy_all_ordered_extents(struct btrfs_fs_info *fs_info)
while (!list_empty(&splice)) {
root = list_first_entry(&splice, struct btrfs_root,
ordered_root);
- list_del_init(&root->ordered_root);
+ list_move_tail(&root->ordered_root,
+ &fs_info->ordered_roots);
btrfs_destroy_ordered_extents(root);
@@ -3880,24 +3861,6 @@ static int btrfs_destroy_delayed_refs(struct btrfs_transaction *trans,
return ret;
}
-static void btrfs_evict_pending_snapshots(struct btrfs_transaction *t)
-{
- struct btrfs_pending_snapshot *snapshot;
- struct list_head splice;
-
- INIT_LIST_HEAD(&splice);
-
- list_splice_init(&t->pending_snapshots, &splice);
-
- while (!list_empty(&splice)) {
- snapshot = list_entry(splice.next,
- struct btrfs_pending_snapshot,
- list);
- snapshot->error = -ECANCELED;
- list_del_init(&snapshot->list);
- }
-}
-
static void btrfs_destroy_delalloc_inodes(struct btrfs_root *root)
{
struct btrfs_inode *btrfs_inode;
@@ -4027,15 +3990,13 @@ again:
void btrfs_cleanup_one_transaction(struct btrfs_transaction *cur_trans,
struct btrfs_root *root)
{
+ btrfs_destroy_ordered_operations(cur_trans, root);
+
btrfs_destroy_delayed_refs(cur_trans, root);
- btrfs_block_rsv_release(root, &root->fs_info->trans_block_rsv,
- cur_trans->dirty_pages.dirty_bytes);
cur_trans->state = TRANS_STATE_COMMIT_START;
wake_up(&root->fs_info->transaction_blocked_wait);
- btrfs_evict_pending_snapshots(cur_trans);
-
cur_trans->state = TRANS_STATE_UNBLOCKED;
wake_up(&root->fs_info->transaction_wait);
@@ -4059,63 +4020,51 @@ void btrfs_cleanup_one_transaction(struct btrfs_transaction *cur_trans,
static int btrfs_cleanup_transaction(struct btrfs_root *root)
{
struct btrfs_transaction *t;
- LIST_HEAD(list);
mutex_lock(&root->fs_info->transaction_kthread_mutex);
spin_lock(&root->fs_info->trans_lock);
- list_splice_init(&root->fs_info->trans_list, &list);
- root->fs_info->running_transaction = NULL;
- spin_unlock(&root->fs_info->trans_lock);
-
- while (!list_empty(&list)) {
- t = list_entry(list.next, struct btrfs_transaction, list);
-
- btrfs_destroy_ordered_operations(t, root);
-
- btrfs_destroy_all_ordered_extents(root->fs_info);
-
- btrfs_destroy_delayed_refs(t, root);
-
- /*
- * FIXME: cleanup wait for commit
- * We needn't acquire the lock here, because we are during
- * the umount, there is no other task which will change it.
- */
- t->state = TRANS_STATE_COMMIT_START;
- smp_mb();
- if (waitqueue_active(&root->fs_info->transaction_blocked_wait))
- wake_up(&root->fs_info->transaction_blocked_wait);
-
- btrfs_evict_pending_snapshots(t);
-
- t->state = TRANS_STATE_UNBLOCKED;
- smp_mb();
- if (waitqueue_active(&root->fs_info->transaction_wait))
- wake_up(&root->fs_info->transaction_wait);
-
- btrfs_destroy_delayed_inodes(root);
- btrfs_assert_delayed_root_empty(root);
-
- btrfs_destroy_all_delalloc_inodes(root->fs_info);
-
- btrfs_destroy_marked_extents(root, &t->dirty_pages,
- EXTENT_DIRTY);
-
- btrfs_destroy_pinned_extent(root,
- root->fs_info->pinned_extents);
-
- t->state = TRANS_STATE_COMPLETED;
- smp_mb();
- if (waitqueue_active(&t->commit_wait))
- wake_up(&t->commit_wait);
+ while (!list_empty(&root->fs_info->trans_list)) {
+ t = list_first_entry(&root->fs_info->trans_list,
+ struct btrfs_transaction, list);
+ if (t->state >= TRANS_STATE_COMMIT_START) {
+ atomic_inc(&t->use_count);
+ spin_unlock(&root->fs_info->trans_lock);
+ btrfs_wait_for_commit(root, t->transid);
+ btrfs_put_transaction(t);
+ spin_lock(&root->fs_info->trans_lock);
+ continue;
+ }
+ if (t == root->fs_info->running_transaction) {
+ t->state = TRANS_STATE_COMMIT_DOING;
+ spin_unlock(&root->fs_info->trans_lock);
+ /*
+ * We wait for 0 num_writers since we don't hold a trans
+ * handle open currently for this transaction.
+ */
+ wait_event(t->writer_wait,
+ atomic_read(&t->num_writers) == 0);
+ } else {
+ spin_unlock(&root->fs_info->trans_lock);
+ }
+ btrfs_cleanup_one_transaction(t, root);
- atomic_set(&t->use_count, 0);
+ spin_lock(&root->fs_info->trans_lock);
+ if (t == root->fs_info->running_transaction)
+ root->fs_info->running_transaction = NULL;
list_del_init(&t->list);
- memset(t, 0, sizeof(*t));
- kmem_cache_free(btrfs_transaction_cachep, t);
- }
+ spin_unlock(&root->fs_info->trans_lock);
+ btrfs_put_transaction(t);
+ trace_btrfs_transaction_commit(root);
+ spin_lock(&root->fs_info->trans_lock);
+ }
+ spin_unlock(&root->fs_info->trans_lock);
+ btrfs_destroy_all_ordered_extents(root->fs_info);
+ btrfs_destroy_delayed_inodes(root);
+ btrfs_assert_delayed_root_empty(root);
+ btrfs_destroy_pinned_extent(root, root->fs_info->pinned_extents);
+ btrfs_destroy_all_delalloc_inodes(root->fs_info);
mutex_unlock(&root->fs_info->transaction_kthread_mutex);
return 0;
diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h
index 5ce2a7da8b11..53059df350f8 100644
--- a/fs/btrfs/disk-io.h
+++ b/fs/btrfs/disk-io.h
@@ -86,6 +86,10 @@ void btrfs_drop_and_free_fs_root(struct btrfs_fs_info *fs_info,
struct btrfs_root *root);
void btrfs_free_fs_root(struct btrfs_root *root);
+#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
+struct btrfs_root *btrfs_alloc_dummy_root(void);
+#endif
+
/*
* This function is used to grab the root, and avoid it is freed when we
* access it. But it doesn't ensure that the tree is not dropped.
diff --git a/fs/btrfs/export.c b/fs/btrfs/export.c
index 4b8691607373..41422a3de8ed 100644
--- a/fs/btrfs/export.c
+++ b/fs/btrfs/export.c
@@ -5,7 +5,6 @@
#include "btrfs_inode.h"
#include "print-tree.h"
#include "export.h"
-#include "compat.h"
#define BTRFS_FID_SIZE_NON_CONNECTABLE (offsetof(struct btrfs_fid, \
parent_objectid) / 4)
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index d58bef130a41..45d98d01028f 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -25,7 +25,6 @@
#include <linux/slab.h>
#include <linux/ratelimit.h>
#include <linux/percpu_counter.h>
-#include "compat.h"
#include "hash.h"
#include "ctree.h"
#include "disk-io.h"
@@ -1551,9 +1550,8 @@ again:
if (ret && !insert) {
err = -ENOENT;
goto out;
- } else if (ret) {
+ } else if (WARN_ON(ret)) {
err = -EIO;
- WARN_ON(1);
goto out;
}
@@ -1979,7 +1977,6 @@ static int __btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
struct btrfs_extent_item *item;
u64 refs;
int ret;
- int err = 0;
path = btrfs_alloc_path();
if (!path)
@@ -1992,14 +1989,9 @@ static int __btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
path, bytenr, num_bytes, parent,
root_objectid, owner, offset,
refs_to_add, extent_op);
- if (ret == 0)
+ if (ret != -EAGAIN)
goto out;
- if (ret != -EAGAIN) {
- err = ret;
- goto out;
- }
-
leaf = path->nodes[0];
item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
refs = btrfs_extent_refs(leaf, item);
@@ -2021,7 +2013,7 @@ static int __btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
btrfs_abort_transaction(trans, root, ret);
out:
btrfs_free_path(path);
- return err;
+ return ret;
}
static int run_delayed_data_ref(struct btrfs_trans_handle *trans,
@@ -2137,15 +2129,28 @@ again:
}
if (ret > 0) {
if (metadata) {
- btrfs_release_path(path);
- metadata = 0;
+ if (path->slots[0] > 0) {
+ path->slots[0]--;
+ btrfs_item_key_to_cpu(path->nodes[0], &key,
+ path->slots[0]);
+ if (key.objectid == node->bytenr &&
+ key.type == BTRFS_EXTENT_ITEM_KEY &&
+ key.offset == node->num_bytes)
+ ret = 0;
+ }
+ if (ret > 0) {
+ btrfs_release_path(path);
+ metadata = 0;
- key.offset = node->num_bytes;
- key.type = BTRFS_EXTENT_ITEM_KEY;
- goto again;
+ key.objectid = node->bytenr;
+ key.offset = node->num_bytes;
+ key.type = BTRFS_EXTENT_ITEM_KEY;
+ goto again;
+ }
+ } else {
+ err = -EIO;
+ goto out;
}
- err = -EIO;
- goto out;
}
leaf = path->nodes[0];
@@ -2234,8 +2239,12 @@ static int run_one_delayed_ref(struct btrfs_trans_handle *trans,
{
int ret = 0;
- if (trans->aborted)
+ if (trans->aborted) {
+ if (insert_reserved)
+ btrfs_pin_extent(root, node->bytenr,
+ node->num_bytes, 1);
return 0;
+ }
if (btrfs_delayed_ref_is_head(node)) {
struct btrfs_delayed_ref_head *head;
@@ -2411,6 +2420,14 @@ static noinline int run_clustered_refs(struct btrfs_trans_handle *trans,
btrfs_free_delayed_extent_op(extent_op);
if (ret) {
+ /*
+ * Need to reset must_insert_reserved if
+ * there was an error so the abort stuff
+ * can cleanup the reserved space
+ * properly.
+ */
+ if (must_insert_reserved)
+ locked_ref->must_insert_reserved = 1;
btrfs_debug(fs_info, "run_delayed_extent_op returned %d", ret);
spin_lock(&delayed_refs->lock);
btrfs_delayed_ref_unlock(locked_ref);
@@ -3197,8 +3214,7 @@ again:
if (ret)
goto out_put;
- ret = btrfs_truncate_free_space_cache(root, trans, path,
- inode);
+ ret = btrfs_truncate_free_space_cache(root, trans, inode);
if (ret)
goto out_put;
}
@@ -3318,10 +3334,9 @@ again:
last = cache->key.objectid + cache->key.offset;
err = write_one_cache_group(trans, root, path, cache);
+ btrfs_put_block_group(cache);
if (err) /* File system offline */
goto out;
-
- btrfs_put_block_group(cache);
}
while (1) {
@@ -3605,10 +3620,9 @@ int btrfs_check_data_free_space(struct inode *inode, u64 bytes)
/* make sure bytes are sectorsize aligned */
bytes = ALIGN(bytes, root->sectorsize);
- if (root == root->fs_info->tree_root ||
- BTRFS_I(inode)->location.objectid == BTRFS_FREE_INO_OBJECTID) {
- alloc_chunk = 0;
+ if (btrfs_is_free_space_inode(inode)) {
committed = 1;
+ ASSERT(current->journal_info);
}
data_sinfo = fs_info->data_sinfo;
@@ -3636,6 +3650,16 @@ again:
spin_unlock(&data_sinfo->lock);
alloc:
alloc_target = btrfs_get_alloc_profile(root, 1);
+ /*
+ * It is ugly that we don't call nolock join
+ * transaction for the free space inode case here.
+ * But it is safe because we only do the data space
+ * reservation for the free space cache in the
+ * transaction context, the common join transaction
+ * just increase the counter of the current transaction
+ * handler, doesn't try to acquire the trans_lock of
+ * the fs.
+ */
trans = btrfs_join_transaction(root);
if (IS_ERR(trans))
return PTR_ERR(trans);
@@ -3681,6 +3705,9 @@ commit_trans:
goto again;
}
+ trace_btrfs_space_reservation(root->fs_info,
+ "space_info:enospc",
+ data_sinfo->flags, bytes, 1);
return -ENOSPC;
}
data_sinfo->bytes_may_use += bytes;
@@ -3989,12 +4016,26 @@ static void btrfs_writeback_inodes_sb_nr(struct btrfs_root *root,
* the filesystem is readonly(all dirty pages are written to
* the disk).
*/
- btrfs_start_all_delalloc_inodes(root->fs_info, 0);
+ btrfs_start_delalloc_roots(root->fs_info, 0);
if (!current->journal_info)
- btrfs_wait_all_ordered_extents(root->fs_info);
+ btrfs_wait_ordered_roots(root->fs_info, -1);
}
}
+static inline int calc_reclaim_items_nr(struct btrfs_root *root, u64 to_reclaim)
+{
+ u64 bytes;
+ int nr;
+
+ bytes = btrfs_calc_trans_metadata_size(root, 1);
+ nr = (int)div64_u64(to_reclaim, bytes);
+ if (!nr)
+ nr = 1;
+ return nr;
+}
+
+#define EXTENT_SIZE_PER_ITEM (256 * 1024)
+
/*
* shrink metadata reservation for delalloc
*/
@@ -4007,24 +4048,30 @@ static void shrink_delalloc(struct btrfs_root *root, u64 to_reclaim, u64 orig,
u64 delalloc_bytes;
u64 max_reclaim;
long time_left;
- unsigned long nr_pages = (2 * 1024 * 1024) >> PAGE_CACHE_SHIFT;
- int loops = 0;
+ unsigned long nr_pages;
+ int loops;
+ int items;
enum btrfs_reserve_flush_enum flush;
+ /* Calc the number of the pages we need flush for space reservation */
+ items = calc_reclaim_items_nr(root, to_reclaim);
+ to_reclaim = items * EXTENT_SIZE_PER_ITEM;
+
trans = (struct btrfs_trans_handle *)current->journal_info;
block_rsv = &root->fs_info->delalloc_block_rsv;
space_info = block_rsv->space_info;
- smp_mb();
delalloc_bytes = percpu_counter_sum_positive(
&root->fs_info->delalloc_bytes);
if (delalloc_bytes == 0) {
if (trans)
return;
- btrfs_wait_all_ordered_extents(root->fs_info);
+ if (wait_ordered)
+ btrfs_wait_ordered_roots(root->fs_info, items);
return;
}
+ loops = 0;
while (delalloc_bytes && loops < 3) {
max_reclaim = min(delalloc_bytes, to_reclaim);
nr_pages = max_reclaim >> PAGE_CACHE_SHIFT;
@@ -4033,9 +4080,19 @@ static void shrink_delalloc(struct btrfs_root *root, u64 to_reclaim, u64 orig,
* We need to wait for the async pages to actually start before
* we do anything.
*/
- wait_event(root->fs_info->async_submit_wait,
- !atomic_read(&root->fs_info->async_delalloc_pages));
+ max_reclaim = atomic_read(&root->fs_info->async_delalloc_pages);
+ if (!max_reclaim)
+ goto skip_async;
+
+ if (max_reclaim <= nr_pages)
+ max_reclaim = 0;
+ else
+ max_reclaim -= nr_pages;
+ wait_event(root->fs_info->async_submit_wait,
+ atomic_read(&root->fs_info->async_delalloc_pages) <=
+ (int)max_reclaim);
+skip_async:
if (!trans)
flush = BTRFS_RESERVE_FLUSH_ALL;
else
@@ -4049,13 +4106,12 @@ static void shrink_delalloc(struct btrfs_root *root, u64 to_reclaim, u64 orig,
loops++;
if (wait_ordered && !trans) {
- btrfs_wait_all_ordered_extents(root->fs_info);
+ btrfs_wait_ordered_roots(root->fs_info, items);
} else {
time_left = schedule_timeout_killable(1);
if (time_left)
break;
}
- smp_mb();
delalloc_bytes = percpu_counter_sum_positive(
&root->fs_info->delalloc_bytes);
}
@@ -4140,16 +4196,11 @@ static int flush_space(struct btrfs_root *root,
switch (state) {
case FLUSH_DELAYED_ITEMS_NR:
case FLUSH_DELAYED_ITEMS:
- if (state == FLUSH_DELAYED_ITEMS_NR) {
- u64 bytes = btrfs_calc_trans_metadata_size(root, 1);
-
- nr = (int)div64_u64(num_bytes, bytes);
- if (!nr)
- nr = 1;
- nr *= 2;
- } else {
+ if (state == FLUSH_DELAYED_ITEMS_NR)
+ nr = calc_reclaim_items_nr(root, num_bytes) * 2;
+ else
nr = -1;
- }
+
trans = btrfs_join_transaction(root);
if (IS_ERR(trans)) {
ret = PTR_ERR(trans);
@@ -4332,6 +4383,10 @@ out:
!block_rsv_use_bytes(global_rsv, orig_bytes))
ret = 0;
}
+ if (ret == -ENOSPC)
+ trace_btrfs_space_reservation(root->fs_info,
+ "space_info:enospc",
+ space_info->flags, orig_bytes, 1);
if (flushing) {
spin_lock(&space_info->lock);
space_info->flush = 0;
@@ -4986,7 +5041,7 @@ int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes)
mutex_unlock(&BTRFS_I(inode)->delalloc_mutex);
if (to_reserve)
- trace_btrfs_space_reservation(root->fs_info,"delalloc",
+ trace_btrfs_space_reservation(root->fs_info, "delalloc",
btrfs_ino(inode), to_reserve, 1);
block_rsv_add_bytes(block_rsv, to_reserve, 1);
@@ -5264,6 +5319,8 @@ static int pin_down_extent(struct btrfs_root *root,
set_extent_dirty(root->fs_info->pinned_extents, bytenr,
bytenr + num_bytes - 1, GFP_NOFS | __GFP_NOFAIL);
+ if (reserved)
+ trace_btrfs_reserved_extent_free(root, bytenr, num_bytes);
return 0;
}
@@ -5718,9 +5775,8 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
}
extent_slot = path->slots[0];
}
- } else if (ret == -ENOENT) {
+ } else if (WARN_ON(ret == -ENOENT)) {
btrfs_print_leaf(extent_root, path->nodes[0]);
- WARN_ON(1);
btrfs_err(info,
"unable to find ref byte nr %llu parent %llu root %llu owner %llu offset %llu",
bytenr, parent, root_objectid, owner_objectid,
@@ -5967,6 +6023,7 @@ void btrfs_free_tree_block(struct btrfs_trans_handle *trans,
btrfs_add_free_space(cache, buf->start, buf->len);
btrfs_update_reserved_bytes(cache, buf->len, RESERVE_FREE);
+ trace_btrfs_reserved_extent_free(root, buf->start, buf->len);
pin = 0;
}
out:
@@ -6594,8 +6651,6 @@ again:
}
}
- trace_btrfs_reserved_extent_alloc(root, ins->objectid, ins->offset);
-
return ret;
}
@@ -6707,6 +6762,7 @@ static int alloc_reserved_file_extent(struct btrfs_trans_handle *trans,
ins->objectid, ins->offset);
BUG();
}
+ trace_btrfs_reserved_extent_alloc(root, ins->objectid, ins->offset);
return ret;
}
@@ -6731,13 +6787,18 @@ static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans,
size += sizeof(*block_info);
path = btrfs_alloc_path();
- if (!path)
+ if (!path) {
+ btrfs_free_and_pin_reserved_extent(root, ins->objectid,
+ root->leafsize);
return -ENOMEM;
+ }
path->leave_spinning = 1;
ret = btrfs_insert_empty_item(trans, fs_info->extent_root, path,
ins, size);
if (ret) {
+ btrfs_free_and_pin_reserved_extent(root, ins->objectid,
+ root->leafsize);
btrfs_free_path(path);
return ret;
}
@@ -6779,6 +6840,8 @@ static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans,
ins->objectid, ins->offset);
BUG();
}
+
+ trace_btrfs_reserved_extent_alloc(root, ins->objectid, root->leafsize);
return ret;
}
@@ -7983,7 +8046,7 @@ u64 btrfs_account_ro_block_groups_free_space(struct btrfs_space_info *sinfo)
spin_lock(&sinfo->lock);
- for(i = 0; i < BTRFS_NR_RAID_TYPES; i++)
+ for (i = 0; i < BTRFS_NR_RAID_TYPES; i++)
if (!list_empty(&sinfo->block_groups[i]))
free_bytes += __btrfs_get_ro_block_group_free_space(
&sinfo->block_groups[i]);
@@ -8271,15 +8334,14 @@ int btrfs_free_block_groups(struct btrfs_fs_info *info)
release_global_block_rsv(info);
- while(!list_empty(&info->space_info)) {
+ while (!list_empty(&info->space_info)) {
space_info = list_entry(info->space_info.next,
struct btrfs_space_info,
list);
if (btrfs_test_opt(info->tree_root, ENOSPC_DEBUG)) {
- if (space_info->bytes_pinned > 0 ||
+ if (WARN_ON(space_info->bytes_pinned > 0 ||
space_info->bytes_reserved > 0 ||
- space_info->bytes_may_use > 0) {
- WARN_ON(1);
+ space_info->bytes_may_use > 0)) {
dump_space_info(space_info, 0, 0);
}
}
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 51731b76900d..ff43802a7c88 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -13,13 +13,13 @@
#include <linux/cleancache.h>
#include "extent_io.h"
#include "extent_map.h"
-#include "compat.h"
#include "ctree.h"
#include "btrfs_inode.h"
#include "volumes.h"
#include "check-integrity.h"
#include "locking.h"
#include "rcu-string.h"
+#include "backref.h"
static struct kmem_cache *extent_state_cache;
static struct kmem_cache *extent_buffer_cache;
@@ -1597,11 +1597,10 @@ done:
*
* 1 is returned if we find something, 0 if nothing was in the tree
*/
-static noinline u64 find_lock_delalloc_range(struct inode *inode,
- struct extent_io_tree *tree,
- struct page *locked_page,
- u64 *start, u64 *end,
- u64 max_bytes)
+STATIC u64 find_lock_delalloc_range(struct inode *inode,
+ struct extent_io_tree *tree,
+ struct page *locked_page, u64 *start,
+ u64 *end, u64 max_bytes)
{
u64 delalloc_start;
u64 delalloc_end;
@@ -1740,10 +1739,8 @@ u64 count_range_bits(struct extent_io_tree *tree,
u64 last = 0;
int found = 0;
- if (search_end <= cur_start) {
- WARN_ON(1);
+ if (WARN_ON(search_end <= cur_start))
return 0;
- }
spin_lock(&tree->lock);
if (cur_start == 0 && bits == EXTENT_DIRTY) {
@@ -1955,11 +1952,6 @@ static int free_io_failure(struct inode *inode, struct io_failure_record *rec,
return err;
}
-static void repair_io_failure_callback(struct bio *bio, int err)
-{
- complete(bio->bi_private);
-}
-
/*
* this bypasses the standard btrfs submit functions deliberately, as
* the standard behavior is to write all copies in a raid setup. here we only
@@ -1976,13 +1968,13 @@ int repair_io_failure(struct btrfs_fs_info *fs_info, u64 start,
{
struct bio *bio;
struct btrfs_device *dev;
- DECLARE_COMPLETION_ONSTACK(compl);
u64 map_length = 0;
u64 sector;
struct btrfs_bio *bbio = NULL;
struct btrfs_mapping_tree *map_tree = &fs_info->mapping_tree;
int ret;
+ ASSERT(!(fs_info->sb->s_flags & MS_RDONLY));
BUG_ON(!mirror_num);
/* we can't repair anything in raid56 yet */
@@ -1992,8 +1984,6 @@ int repair_io_failure(struct btrfs_fs_info *fs_info, u64 start,
bio = btrfs_io_bio_alloc(GFP_NOFS, 1);
if (!bio)
return -EIO;
- bio->bi_private = &compl;
- bio->bi_end_io = repair_io_failure_callback;
bio->bi_size = 0;
map_length = length;
@@ -2014,10 +2004,8 @@ int repair_io_failure(struct btrfs_fs_info *fs_info, u64 start,
}
bio->bi_bdev = dev->bdev;
bio_add_page(bio, page, length, start - page_offset(page));
- btrfsic_submit_bio(WRITE_SYNC, bio);
- wait_for_completion(&compl);
- if (!test_bit(BIO_UPTODATE, &bio->bi_flags)) {
+ if (btrfsic_submit_bio_wait(WRITE_SYNC, bio)) {
/* try to remap that extent elsewhere? */
bio_put(bio);
btrfs_dev_stat_inc_and_print(dev, BTRFS_DEV_STAT_WRITE_ERRS);
@@ -2039,6 +2027,9 @@ int repair_eb_io_failure(struct btrfs_root *root, struct extent_buffer *eb,
unsigned long i, num_pages = num_extent_pages(eb->start, eb->len);
int ret = 0;
+ if (root->fs_info->sb->s_flags & MS_RDONLY)
+ return -EROFS;
+
for (i = 0; i < num_pages; i++) {
struct page *p = extent_buffer_page(eb, i);
ret = repair_io_failure(root->fs_info, start, PAGE_CACHE_SIZE,
@@ -2060,12 +2051,12 @@ static int clean_io_failure(u64 start, struct page *page)
u64 private;
u64 private_failure;
struct io_failure_record *failrec;
- struct btrfs_fs_info *fs_info;
+ struct inode *inode = page->mapping->host;
+ struct btrfs_fs_info *fs_info = BTRFS_I(inode)->root->fs_info;
struct extent_state *state;
int num_copies;
int did_repair = 0;
int ret;
- struct inode *inode = page->mapping->host;
private = 0;
ret = count_range_bits(&BTRFS_I(inode)->io_failure_tree, &private,
@@ -2088,6 +2079,8 @@ static int clean_io_failure(u64 start, struct page *page)
did_repair = 1;
goto out;
}
+ if (fs_info->sb->s_flags & MS_RDONLY)
+ goto out;
spin_lock(&BTRFS_I(inode)->io_tree.lock);
state = find_first_extent_bit_state(&BTRFS_I(inode)->io_tree,
@@ -2097,7 +2090,6 @@ static int clean_io_failure(u64 start, struct page *page)
if (state && state->start <= failrec->start &&
state->end >= failrec->start + failrec->len - 1) {
- fs_info = BTRFS_I(inode)->root->fs_info;
num_copies = btrfs_num_copies(fs_info, failrec->logical,
failrec->len);
if (num_copies > 1) {
@@ -3569,9 +3561,8 @@ retry:
* but no sense in crashing the users box for something
* we can survive anyway.
*/
- if (!eb) {
+ if (WARN_ON(!eb)) {
spin_unlock(&mapping->private_lock);
- WARN_ON(1);
continue;
}
@@ -4038,7 +4029,7 @@ static struct extent_map *get_extent_skip_holes(struct inode *inode,
if (offset >= last)
return NULL;
- while(1) {
+ while (1) {
len = last - offset;
if (len == 0)
break;
@@ -4062,6 +4053,19 @@ static struct extent_map *get_extent_skip_holes(struct inode *inode,
return NULL;
}
+static noinline int count_ext_ref(u64 inum, u64 offset, u64 root_id, void *ctx)
+{
+ unsigned long cnt = *((unsigned long *)ctx);
+
+ cnt++;
+ *((unsigned long *)ctx) = cnt;
+
+ /* Now we're sure that the extent is shared. */
+ if (cnt > 1)
+ return 1;
+ return 0;
+}
+
int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
__u64 start, __u64 len, get_extent_t *get_extent)
{
@@ -4128,7 +4132,7 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
last = found_key.offset;
last_for_get_extent = last + 1;
}
- btrfs_free_path(path);
+ btrfs_release_path(path);
/*
* we might have some extents allocated but more delalloc past those
@@ -4198,7 +4202,24 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
flags |= (FIEMAP_EXTENT_DELALLOC |
FIEMAP_EXTENT_UNKNOWN);
} else {
+ unsigned long ref_cnt = 0;
+
disko = em->block_start + offset_in_extent;
+
+ /*
+ * As btrfs supports shared space, this information
+ * can be exported to userspace tools via
+ * flag FIEMAP_EXTENT_SHARED.
+ */
+ ret = iterate_inodes_from_logical(
+ em->block_start,
+ BTRFS_I(inode)->root->fs_info,
+ path, count_ext_ref, &ref_cnt);
+ if (ret < 0 && ret != -ENOENT)
+ goto out_free;
+
+ if (ref_cnt > 1)
+ flags |= FIEMAP_EXTENT_SHARED;
}
if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags))
flags |= FIEMAP_EXTENT_ENCODED;
@@ -4230,6 +4251,7 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
out_free:
free_extent_map(em);
out:
+ btrfs_free_path(path);
unlock_extent_cached(&BTRFS_I(inode)->io_tree, start, start + len - 1,
&cached_state, GFP_NOFS);
return ret;
@@ -4455,6 +4477,23 @@ static void mark_extent_buffer_accessed(struct extent_buffer *eb)
}
}
+struct extent_buffer *find_extent_buffer(struct extent_io_tree *tree,
+ u64 start)
+{
+ struct extent_buffer *eb;
+
+ rcu_read_lock();
+ eb = radix_tree_lookup(&tree->buffer, start >> PAGE_CACHE_SHIFT);
+ if (eb && atomic_inc_not_zero(&eb->refs)) {
+ rcu_read_unlock();
+ mark_extent_buffer_accessed(eb);
+ return eb;
+ }
+ rcu_read_unlock();
+
+ return NULL;
+}
+
struct extent_buffer *alloc_extent_buffer(struct extent_io_tree *tree,
u64 start, unsigned long len)
{
@@ -4468,14 +4507,10 @@ struct extent_buffer *alloc_extent_buffer(struct extent_io_tree *tree,
int uptodate = 1;
int ret;
- rcu_read_lock();
- eb = radix_tree_lookup(&tree->buffer, start >> PAGE_CACHE_SHIFT);
- if (eb && atomic_inc_not_zero(&eb->refs)) {
- rcu_read_unlock();
- mark_extent_buffer_accessed(eb);
+
+ eb = find_extent_buffer(tree, start);
+ if (eb)
return eb;
- }
- rcu_read_unlock();
eb = __alloc_extent_buffer(tree, start, len, GFP_NOFS);
if (!eb)
@@ -4534,24 +4569,17 @@ again:
spin_lock(&tree->buffer_lock);
ret = radix_tree_insert(&tree->buffer, start >> PAGE_CACHE_SHIFT, eb);
+ spin_unlock(&tree->buffer_lock);
+ radix_tree_preload_end();
if (ret == -EEXIST) {
- exists = radix_tree_lookup(&tree->buffer,
- start >> PAGE_CACHE_SHIFT);
- if (!atomic_inc_not_zero(&exists->refs)) {
- spin_unlock(&tree->buffer_lock);
- radix_tree_preload_end();
- exists = NULL;
+ exists = find_extent_buffer(tree, start);
+ if (exists)
+ goto free_eb;
+ else
goto again;
- }
- spin_unlock(&tree->buffer_lock);
- radix_tree_preload_end();
- mark_extent_buffer_accessed(exists);
- goto free_eb;
}
/* add one reference for the tree */
check_buffer_tree_ref(eb);
- spin_unlock(&tree->buffer_lock);
- radix_tree_preload_end();
/*
* there is a race where release page may have
@@ -4582,23 +4610,6 @@ free_eb:
return exists;
}
-struct extent_buffer *find_extent_buffer(struct extent_io_tree *tree,
- u64 start, unsigned long len)
-{
- struct extent_buffer *eb;
-
- rcu_read_lock();
- eb = radix_tree_lookup(&tree->buffer, start >> PAGE_CACHE_SHIFT);
- if (eb && atomic_inc_not_zero(&eb->refs)) {
- rcu_read_unlock();
- mark_extent_buffer_accessed(eb);
- return eb;
- }
- rcu_read_unlock();
-
- return NULL;
-}
-
static inline void btrfs_release_extent_buffer_rcu(struct rcu_head *head)
{
struct extent_buffer *eb =
@@ -5062,23 +5073,6 @@ void copy_extent_buffer(struct extent_buffer *dst, struct extent_buffer *src,
}
}
-static void move_pages(struct page *dst_page, struct page *src_page,
- unsigned long dst_off, unsigned long src_off,
- unsigned long len)
-{
- char *dst_kaddr = page_address(dst_page);
- if (dst_page == src_page) {
- memmove(dst_kaddr + dst_off, dst_kaddr + src_off, len);
- } else {
- char *src_kaddr = page_address(src_page);
- char *p = dst_kaddr + dst_off + len;
- char *s = src_kaddr + src_off + len;
-
- while (len--)
- *--p = *--s;
- }
-}
-
static inline bool areas_overlap(unsigned long src, unsigned long dst, unsigned long len)
{
unsigned long distance = (src > dst) ? src - dst : dst - src;
@@ -5189,7 +5183,7 @@ void memmove_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset,
cur = min_t(unsigned long, len, src_off_in_page + 1);
cur = min(cur, dst_off_in_page + 1);
- move_pages(extent_buffer_page(dst, dst_i),
+ copy_pages(extent_buffer_page(dst, dst_i),
extent_buffer_page(dst, src_i),
dst_off_in_page - cur + 1,
src_off_in_page - cur + 1, cur);
diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h
index 6dbc645f1f3d..19620c58f096 100644
--- a/fs/btrfs/extent_io.h
+++ b/fs/btrfs/extent_io.h
@@ -271,7 +271,7 @@ struct extent_buffer *alloc_extent_buffer(struct extent_io_tree *tree,
struct extent_buffer *alloc_dummy_extent_buffer(u64 start, unsigned long len);
struct extent_buffer *btrfs_clone_extent_buffer(struct extent_buffer *src);
struct extent_buffer *find_extent_buffer(struct extent_io_tree *tree,
- u64 start, unsigned long len);
+ u64 start);
void free_extent_buffer(struct extent_buffer *eb);
void free_extent_buffer_stale(struct extent_buffer *eb);
#define WAIT_NONE 0
@@ -345,4 +345,10 @@ int repair_io_failure(struct btrfs_fs_info *fs_info, u64 start,
int end_extent_writepage(struct page *page, int err, u64 start, u64 end);
int repair_eb_io_failure(struct btrfs_root *root, struct extent_buffer *eb,
int mirror_num);
+#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
+noinline u64 find_lock_delalloc_range(struct inode *inode,
+ struct extent_io_tree *tree,
+ struct page *locked_page, u64 *start,
+ u64 *end, u64 max_bytes);
+#endif
#endif
diff --git a/fs/btrfs/extent_map.h b/fs/btrfs/extent_map.h
index 61adc44b7805..93fba716d7f8 100644
--- a/fs/btrfs/extent_map.h
+++ b/fs/btrfs/extent_map.h
@@ -3,10 +3,10 @@
#include <linux/rbtree.h>
-#define EXTENT_MAP_LAST_BYTE (u64)-4
-#define EXTENT_MAP_HOLE (u64)-3
-#define EXTENT_MAP_INLINE (u64)-2
-#define EXTENT_MAP_DELALLOC (u64)-1
+#define EXTENT_MAP_LAST_BYTE ((u64)-4)
+#define EXTENT_MAP_HOLE ((u64)-3)
+#define EXTENT_MAP_INLINE ((u64)-2)
+#define EXTENT_MAP_DELALLOC ((u64)-1)
/* bits for the flags field */
#define EXTENT_FLAG_PINNED 0 /* this entry not yet on disk, don't free it */
diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c
index 4f53159bdb9d..6f3848860283 100644
--- a/fs/btrfs/file-item.c
+++ b/fs/btrfs/file-item.c
@@ -329,6 +329,9 @@ int btrfs_lookup_csums_range(struct btrfs_root *root, u64 start, u64 end,
u64 csum_end;
u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
+ ASSERT(start == ALIGN(start, root->sectorsize) &&
+ (end + 1) == ALIGN(end + 1, root->sectorsize));
+
path = btrfs_alloc_path();
if (!path)
return -ENOMEM;
@@ -846,10 +849,8 @@ insert:
path->leave_spinning = 0;
if (ret < 0)
goto fail_unlock;
- if (ret != 0) {
- WARN_ON(1);
+ if (WARN_ON(ret != 0))
goto fail_unlock;
- }
leaf = path->nodes[0];
csum:
item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_csum_item);
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 72da4df53c9a..82d0342763c5 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -39,7 +39,6 @@
#include "print-tree.h"
#include "tree-log.h"
#include "locking.h"
-#include "compat.h"
#include "volumes.h"
static struct kmem_cache *btrfs_inode_defrag_cachep;
@@ -370,7 +369,7 @@ int btrfs_run_defrag_inodes(struct btrfs_fs_info *fs_info)
u64 root_objectid = 0;
atomic_inc(&fs_info->defrag_running);
- while(1) {
+ while (1) {
/* Pause the auto defragger. */
if (test_bit(BTRFS_FS_STATE_REMOUNTING,
&fs_info->fs_state))
@@ -1281,6 +1280,7 @@ again:
}
wait_on_page_writeback(pages[i]);
}
+ faili = num_pages - 1;
err = 0;
if (start_pos < inode->i_size) {
struct btrfs_ordered_extent *ordered;
@@ -1299,8 +1299,10 @@ again:
unlock_page(pages[i]);
page_cache_release(pages[i]);
}
- btrfs_wait_ordered_range(inode, start_pos,
- last_pos - start_pos);
+ err = btrfs_wait_ordered_range(inode, start_pos,
+ last_pos - start_pos);
+ if (err)
+ goto fail;
goto again;
}
if (ordered)
@@ -1809,8 +1811,13 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
atomic_inc(&root->log_batch);
full_sync = test_bit(BTRFS_INODE_NEEDS_FULL_SYNC,
&BTRFS_I(inode)->runtime_flags);
- if (full_sync)
- btrfs_wait_ordered_range(inode, start, end - start + 1);
+ if (full_sync) {
+ ret = btrfs_wait_ordered_range(inode, start, end - start + 1);
+ if (ret) {
+ mutex_unlock(&inode->i_mutex);
+ goto out;
+ }
+ }
atomic_inc(&root->log_batch);
/*
@@ -1876,27 +1883,20 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
mutex_unlock(&inode->i_mutex);
if (ret != BTRFS_NO_LOG_SYNC) {
- if (ret > 0) {
- /*
- * If we didn't already wait for ordered extents we need
- * to do that now.
- */
- if (!full_sync)
- btrfs_wait_ordered_range(inode, start,
- end - start + 1);
- ret = btrfs_commit_transaction(trans, root);
- } else {
+ if (!ret) {
ret = btrfs_sync_log(trans, root);
- if (ret == 0) {
+ if (!ret) {
ret = btrfs_end_transaction(trans, root);
- } else {
- if (!full_sync)
- btrfs_wait_ordered_range(inode, start,
- end -
- start + 1);
- ret = btrfs_commit_transaction(trans, root);
+ goto out;
}
}
+ if (!full_sync) {
+ ret = btrfs_wait_ordered_range(inode, start,
+ end - start + 1);
+ if (ret)
+ goto out;
+ }
+ ret = btrfs_commit_transaction(trans, root);
} else {
ret = btrfs_end_transaction(trans, root);
}
@@ -2067,7 +2067,9 @@ static int btrfs_punch_hole(struct inode *inode, loff_t offset, loff_t len)
bool same_page = ((offset >> PAGE_CACHE_SHIFT) ==
((offset + len - 1) >> PAGE_CACHE_SHIFT));
- btrfs_wait_ordered_range(inode, offset, len);
+ ret = btrfs_wait_ordered_range(inode, offset, len);
+ if (ret)
+ return ret;
mutex_lock(&inode->i_mutex);
/*
@@ -2136,8 +2138,12 @@ static int btrfs_punch_hole(struct inode *inode, loff_t offset, loff_t len)
btrfs_put_ordered_extent(ordered);
unlock_extent_cached(&BTRFS_I(inode)->io_tree, lockstart,
lockend, &cached_state, GFP_NOFS);
- btrfs_wait_ordered_range(inode, lockstart,
- lockend - lockstart + 1);
+ ret = btrfs_wait_ordered_range(inode, lockstart,
+ lockend - lockstart + 1);
+ if (ret) {
+ mutex_unlock(&inode->i_mutex);
+ return ret;
+ }
}
path = btrfs_alloc_path();
@@ -2308,7 +2314,10 @@ static long btrfs_fallocate(struct file *file, int mode,
* wait for ordered IO before we have any locks. We'll loop again
* below with the locks held.
*/
- btrfs_wait_ordered_range(inode, alloc_start, alloc_end - alloc_start);
+ ret = btrfs_wait_ordered_range(inode, alloc_start,
+ alloc_end - alloc_start);
+ if (ret)
+ goto out;
locked_end = alloc_end - 1;
while (1) {
@@ -2332,8 +2341,10 @@ static long btrfs_fallocate(struct file *file, int mode,
* we can't wait on the range with the transaction
* running or with the extent lock held
*/
- btrfs_wait_ordered_range(inode, alloc_start,
- alloc_end - alloc_start);
+ ret = btrfs_wait_ordered_range(inode, alloc_start,
+ alloc_end - alloc_start);
+ if (ret)
+ goto out;
} else {
if (ordered)
btrfs_put_ordered_extent(ordered);
@@ -2405,14 +2416,12 @@ out_reserve_fail:
static int find_desired_extent(struct inode *inode, loff_t *offset, int whence)
{
struct btrfs_root *root = BTRFS_I(inode)->root;
- struct extent_map *em;
+ struct extent_map *em = NULL;
struct extent_state *cached_state = NULL;
u64 lockstart = *offset;
u64 lockend = i_size_read(inode);
u64 start = *offset;
- u64 orig_start = *offset;
u64 len = i_size_read(inode);
- u64 last_end = 0;
int ret = 0;
lockend = max_t(u64, root->sectorsize, lockend);
@@ -2429,89 +2438,35 @@ static int find_desired_extent(struct inode *inode, loff_t *offset, int whence)
lock_extent_bits(&BTRFS_I(inode)->io_tree, lockstart, lockend, 0,
&cached_state);
- /*
- * Delalloc is such a pain. If we have a hole and we have pending
- * delalloc for a portion of the hole we will get back a hole that
- * exists for the entire range since it hasn't been actually written
- * yet. So to take care of this case we need to look for an extent just
- * before the position we want in case there is outstanding delalloc
- * going on here.
- */
- if (whence == SEEK_HOLE && start != 0) {
- if (start <= root->sectorsize)
- em = btrfs_get_extent_fiemap(inode, NULL, 0, 0,
- root->sectorsize, 0);
- else
- em = btrfs_get_extent_fiemap(inode, NULL, 0,
- start - root->sectorsize,
- root->sectorsize, 0);
- if (IS_ERR(em)) {
- ret = PTR_ERR(em);
- goto out;
- }
- last_end = em->start + em->len;
- if (em->block_start == EXTENT_MAP_DELALLOC)
- last_end = min_t(u64, last_end, inode->i_size);
- free_extent_map(em);
- }
-
- while (1) {
+ while (start < inode->i_size) {
em = btrfs_get_extent_fiemap(inode, NULL, 0, start, len, 0);
if (IS_ERR(em)) {
ret = PTR_ERR(em);
+ em = NULL;
break;
}
- if (em->block_start == EXTENT_MAP_HOLE) {
- if (test_bit(EXTENT_FLAG_VACANCY, &em->flags)) {
- if (last_end <= orig_start) {
- free_extent_map(em);
- ret = -ENXIO;
- break;
- }
- }
-
- if (whence == SEEK_HOLE) {
- *offset = start;
- free_extent_map(em);
- break;
- }
- } else {
- if (whence == SEEK_DATA) {
- if (em->block_start == EXTENT_MAP_DELALLOC) {
- if (start >= inode->i_size) {
- free_extent_map(em);
- ret = -ENXIO;
- break;
- }
- }
-
- if (!test_bit(EXTENT_FLAG_PREALLOC,
- &em->flags)) {
- *offset = start;
- free_extent_map(em);
- break;
- }
- }
- }
+ if (whence == SEEK_HOLE &&
+ (em->block_start == EXTENT_MAP_HOLE ||
+ test_bit(EXTENT_FLAG_PREALLOC, &em->flags)))
+ break;
+ else if (whence == SEEK_DATA &&
+ (em->block_start != EXTENT_MAP_HOLE &&
+ !test_bit(EXTENT_FLAG_PREALLOC, &em->flags)))
+ break;
start = em->start + em->len;
- last_end = em->start + em->len;
-
- if (em->block_start == EXTENT_MAP_DELALLOC)
- last_end = min_t(u64, last_end, inode->i_size);
-
- if (test_bit(EXTENT_FLAG_VACANCY, &em->flags)) {
- free_extent_map(em);
- ret = -ENXIO;
- break;
- }
free_extent_map(em);
+ em = NULL;
cond_resched();
}
- if (!ret)
- *offset = min(*offset, inode->i_size);
-out:
+ free_extent_map(em);
+ if (!ret) {
+ if (whence == SEEK_DATA && start >= inode->i_size)
+ ret = -ENXIO;
+ else
+ *offset = min_t(loff_t, start, inode->i_size);
+ }
unlock_extent_cached(&BTRFS_I(inode)->io_tree, lockstart, lockend,
&cached_state, GFP_NOFS);
return ret;
diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c
index b4f9904c4c6b..057be95b1e1e 100644
--- a/fs/btrfs/free-space-cache.c
+++ b/fs/btrfs/free-space-cache.c
@@ -218,7 +218,6 @@ int btrfs_check_trunc_cache_free_space(struct btrfs_root *root,
int btrfs_truncate_free_space_cache(struct btrfs_root *root,
struct btrfs_trans_handle *trans,
- struct btrfs_path *path,
struct inode *inode)
{
int ret = 0;
@@ -1009,8 +1008,13 @@ static int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode,
if (ret)
goto out;
-
- btrfs_wait_ordered_range(inode, 0, (u64)-1);
+ ret = btrfs_wait_ordered_range(inode, 0, (u64)-1);
+ if (ret) {
+ clear_extent_bit(&BTRFS_I(inode)->io_tree, 0, inode->i_size - 1,
+ EXTENT_DIRTY | EXTENT_DELALLOC, 0, 0, NULL,
+ GFP_NOFS);
+ goto out;
+ }
key.objectid = BTRFS_FREE_SPACE_OBJECTID;
key.offset = offset;
@@ -2276,7 +2280,7 @@ u64 btrfs_alloc_from_cluster(struct btrfs_block_group_cache *block_group,
goto out;
entry = rb_entry(node, struct btrfs_free_space, offset_index);
- while(1) {
+ while (1) {
if (entry->bytes < bytes && entry->bytes > *max_extent_size)
*max_extent_size = entry->bytes;
@@ -2967,19 +2971,15 @@ out:
int btrfs_write_out_ino_cache(struct btrfs_root *root,
struct btrfs_trans_handle *trans,
- struct btrfs_path *path)
+ struct btrfs_path *path,
+ struct inode *inode)
{
struct btrfs_free_space_ctl *ctl = root->free_ino_ctl;
- struct inode *inode;
int ret;
if (!btrfs_test_opt(root, INODE_MAP_CACHE))
return 0;
- inode = lookup_free_ino_inode(root, path);
- if (IS_ERR(inode))
- return 0;
-
ret = __btrfs_write_out_cache(root, inode, ctl, NULL, trans, path, 0);
if (ret) {
btrfs_delalloc_release_metadata(inode, inode->i_size);
@@ -2990,7 +2990,6 @@ int btrfs_write_out_ino_cache(struct btrfs_root *root,
#endif
}
- iput(inode);
return ret;
}
diff --git a/fs/btrfs/free-space-cache.h b/fs/btrfs/free-space-cache.h
index e737f92cf6d0..0cf4977ef70d 100644
--- a/fs/btrfs/free-space-cache.h
+++ b/fs/btrfs/free-space-cache.h
@@ -58,7 +58,6 @@ int btrfs_check_trunc_cache_free_space(struct btrfs_root *root,
struct btrfs_block_rsv *rsv);
int btrfs_truncate_free_space_cache(struct btrfs_root *root,
struct btrfs_trans_handle *trans,
- struct btrfs_path *path,
struct inode *inode);
int load_free_space_cache(struct btrfs_fs_info *fs_info,
struct btrfs_block_group_cache *block_group);
@@ -76,7 +75,8 @@ int load_free_ino_cache(struct btrfs_fs_info *fs_info,
struct btrfs_root *root);
int btrfs_write_out_ino_cache(struct btrfs_root *root,
struct btrfs_trans_handle *trans,
- struct btrfs_path *path);
+ struct btrfs_path *path,
+ struct inode *inode);
void btrfs_init_free_space_ctl(struct btrfs_block_group_cache *block_group);
int __btrfs_add_free_space(struct btrfs_free_space_ctl *ctl,
diff --git a/fs/btrfs/inode-item.c b/fs/btrfs/inode-item.c
index e0b7034d6343..ec82fae07097 100644
--- a/fs/btrfs/inode-item.c
+++ b/fs/btrfs/inode-item.c
@@ -369,7 +369,7 @@ static int btrfs_insert_inode_extref(struct btrfs_trans_handle *trans,
goto out;
leaf = path->nodes[0];
- item = btrfs_item_nr(leaf, path->slots[0]);
+ item = btrfs_item_nr(path->slots[0]);
ptr = (unsigned long)btrfs_item_ptr(leaf, path->slots[0], char);
ptr += btrfs_item_size(leaf, item) - ins_len;
extref = (struct btrfs_inode_extref *)ptr;
diff --git a/fs/btrfs/inode-map.c b/fs/btrfs/inode-map.c
index 2c66ddbbe670..ab485e57b6fe 100644
--- a/fs/btrfs/inode-map.c
+++ b/fs/btrfs/inode-map.c
@@ -78,10 +78,8 @@ again:
btrfs_transaction_in_commit(fs_info)) {
leaf = path->nodes[0];
- if (btrfs_header_nritems(leaf) == 0) {
- WARN_ON(1);
+ if (WARN_ON(btrfs_header_nritems(leaf) == 0))
break;
- }
/*
* Save the key so we can advances forward
@@ -237,7 +235,7 @@ again:
start_caching(root);
if (objectid <= root->cache_progress ||
- objectid > root->highest_objectid)
+ objectid >= root->highest_objectid)
__btrfs_add_free_space(ctl, objectid, 1);
else
__btrfs_add_free_space(pinned, objectid, 1);
@@ -412,8 +410,7 @@ int btrfs_save_ino_cache(struct btrfs_root *root,
return 0;
/* Don't save inode cache if we are deleting this root */
- if (btrfs_root_refs(&root->root_item) == 0 &&
- root != root->fs_info->tree_root)
+ if (btrfs_root_refs(&root->root_item) == 0)
return 0;
if (!btrfs_test_opt(root, INODE_MAP_CACHE))
@@ -467,7 +464,7 @@ again:
}
if (i_size_read(inode) > 0) {
- ret = btrfs_truncate_free_space_cache(root, trans, path, inode);
+ ret = btrfs_truncate_free_space_cache(root, trans, inode);
if (ret) {
if (ret != -ENOSPC)
btrfs_abort_transaction(trans, root, ret);
@@ -504,7 +501,7 @@ again:
}
btrfs_free_reserved_data_space(inode, prealloc);
- ret = btrfs_write_out_ino_cache(root, trans, path);
+ ret = btrfs_write_out_ino_cache(root, trans, path, inode);
out_put:
iput(inode);
out_release:
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 51e3afa78354..f1a77449d032 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -43,7 +43,6 @@
#include <linux/btrfs.h>
#include <linux/blkdev.h>
#include <linux/posix_acl_xattr.h>
-#include "compat.h"
#include "ctree.h"
#include "disk-io.h"
#include "transaction.h"
@@ -844,7 +843,10 @@ static noinline int cow_file_range(struct inode *inode,
struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
int ret = 0;
- BUG_ON(btrfs_is_free_space_inode(inode));
+ if (btrfs_is_free_space_inode(inode)) {
+ WARN_ON_ONCE(1);
+ return -EINVAL;
+ }
num_bytes = ALIGN(end - start + 1, blocksize);
num_bytes = max(blocksize, num_bytes);
@@ -1178,10 +1180,8 @@ static noinline int run_delalloc_nocow(struct inode *inode,
while (1) {
ret = btrfs_lookup_file_extent(trans, root, path, ino,
cur_offset, 0);
- if (ret < 0) {
- btrfs_abort_transaction(trans, root, ret);
+ if (ret < 0)
goto error;
- }
if (ret > 0 && path->slots[0] > 0 && check_prev) {
leaf = path->nodes[0];
btrfs_item_key_to_cpu(leaf, &found_key,
@@ -1195,10 +1195,8 @@ next_slot:
leaf = path->nodes[0];
if (path->slots[0] >= btrfs_header_nritems(leaf)) {
ret = btrfs_next_leaf(root, path);
- if (ret < 0) {
- btrfs_abort_transaction(trans, root, ret);
+ if (ret < 0)
goto error;
- }
if (ret > 0)
break;
leaf = path->nodes[0];
@@ -1289,10 +1287,8 @@ out_check:
ret = cow_file_range(inode, locked_page,
cow_start, found_key.offset - 1,
page_started, nr_written, 1);
- if (ret) {
- btrfs_abort_transaction(trans, root, ret);
+ if (ret)
goto error;
- }
cow_start = (u64)-1;
}
@@ -1339,10 +1335,8 @@ out_check:
BTRFS_DATA_RELOC_TREE_OBJECTID) {
ret = btrfs_reloc_clone_csums(inode, cur_offset,
num_bytes);
- if (ret) {
- btrfs_abort_transaction(trans, root, ret);
+ if (ret)
goto error;
- }
}
extent_clear_unlock_delalloc(inode, cur_offset,
@@ -1364,10 +1358,8 @@ out_check:
if (cow_start != (u64)-1) {
ret = cow_file_range(inode, locked_page, cow_start, end,
page_started, nr_written, 1);
- if (ret) {
- btrfs_abort_transaction(trans, root, ret);
+ if (ret)
goto error;
- }
}
error:
@@ -1551,7 +1543,13 @@ static void btrfs_clear_bit_hook(struct inode *inode,
spin_unlock(&BTRFS_I(inode)->lock);
}
- if (*bits & EXTENT_DO_ACCOUNTING)
+ /*
+ * We don't reserve metadata space for space cache inodes so we
+ * don't need to call dellalloc_release_metadata if there is an
+ * error.
+ */
+ if (*bits & EXTENT_DO_ACCOUNTING &&
+ root != root->fs_info->tree_root)
btrfs_delalloc_release_metadata(inode, len);
if (root->root_key.objectid != BTRFS_DATA_RELOC_TREE_OBJECTID
@@ -2041,10 +2039,8 @@ static noinline int record_one_backref(u64 inum, u64 offset, u64 root_id,
key.offset = offset;
ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
- if (ret < 0) {
- WARN_ON(1);
+ if (WARN_ON(ret < 0))
return ret;
- }
ret = 0;
while (1) {
@@ -2133,7 +2129,8 @@ static noinline bool record_extent_backrefs(struct btrfs_path *path,
old->extent_offset, fs_info,
path, record_one_backref,
old);
- BUG_ON(ret < 0 && ret != -ENOENT);
+ if (ret < 0 && ret != -ENOENT)
+ return false;
/* no backref to be processed for this extent */
if (!old->count) {
@@ -2367,10 +2364,23 @@ out_unlock:
return ret;
}
+static void free_sa_defrag_extent(struct new_sa_defrag_extent *new)
+{
+ struct old_sa_defrag_extent *old, *tmp;
+
+ if (!new)
+ return;
+
+ list_for_each_entry_safe(old, tmp, &new->head, list) {
+ list_del(&old->list);
+ kfree(old);
+ }
+ kfree(new);
+}
+
static void relink_file_extents(struct new_sa_defrag_extent *new)
{
struct btrfs_path *path;
- struct old_sa_defrag_extent *old, *tmp;
struct sa_defrag_extent_backref *backref;
struct sa_defrag_extent_backref *prev = NULL;
struct inode *inode;
@@ -2413,16 +2423,11 @@ static void relink_file_extents(struct new_sa_defrag_extent *new)
kfree(prev);
btrfs_free_path(path);
-
- list_for_each_entry_safe(old, tmp, &new->head, list) {
- list_del(&old->list);
- kfree(old);
- }
out:
+ free_sa_defrag_extent(new);
+
atomic_dec(&root->fs_info->defrag_running);
wake_up(&root->fs_info->transaction_wait);
-
- kfree(new);
}
static struct new_sa_defrag_extent *
@@ -2432,7 +2437,7 @@ record_old_file_extents(struct inode *inode,
struct btrfs_root *root = BTRFS_I(inode)->root;
struct btrfs_path *path;
struct btrfs_key key;
- struct old_sa_defrag_extent *old, *tmp;
+ struct old_sa_defrag_extent *old;
struct new_sa_defrag_extent *new;
int ret;
@@ -2480,7 +2485,7 @@ record_old_file_extents(struct inode *inode,
if (slot >= btrfs_header_nritems(l)) {
ret = btrfs_next_leaf(root, path);
if (ret < 0)
- goto out_free_list;
+ goto out_free_path;
else if (ret > 0)
break;
continue;
@@ -2509,7 +2514,7 @@ record_old_file_extents(struct inode *inode,
old = kmalloc(sizeof(*old), GFP_NOFS);
if (!old)
- goto out_free_list;
+ goto out_free_path;
offset = max(new->file_pos, key.offset);
end = min(new->file_pos + new->len, key.offset + num_bytes);
@@ -2531,15 +2536,10 @@ next:
return new;
-out_free_list:
- list_for_each_entry_safe(old, tmp, &new->head, list) {
- list_del(&old->list);
- kfree(old);
- }
out_free_path:
btrfs_free_path(path);
out_kfree:
- kfree(new);
+ free_sa_defrag_extent(new);
return NULL;
}
@@ -2710,8 +2710,14 @@ out:
btrfs_remove_ordered_extent(inode, ordered_extent);
/* for snapshot-aware defrag */
- if (new)
- relink_file_extents(new);
+ if (new) {
+ if (ret) {
+ free_sa_defrag_extent(new);
+ atomic_dec(&root->fs_info->defrag_running);
+ } else {
+ relink_file_extents(new);
+ }
+ }
/* once for us */
btrfs_put_ordered_extent(ordered_extent);
@@ -2969,6 +2975,7 @@ int btrfs_orphan_add(struct btrfs_trans_handle *trans, struct inode *inode)
if (insert >= 1) {
ret = btrfs_insert_orphan_item(trans, root, btrfs_ino(inode));
if (ret) {
+ atomic_dec(&root->orphan_inodes);
if (reserve) {
clear_bit(BTRFS_INODE_ORPHAN_META_RESERVED,
&BTRFS_I(inode)->runtime_flags);
@@ -3018,14 +3025,16 @@ static int btrfs_orphan_del(struct btrfs_trans_handle *trans,
release_rsv = 1;
spin_unlock(&root->orphan_lock);
- if (trans && delete_item)
- ret = btrfs_del_orphan_item(trans, root, btrfs_ino(inode));
-
- if (release_rsv) {
- btrfs_orphan_release_metadata(inode);
+ if (delete_item) {
atomic_dec(&root->orphan_inodes);
+ if (trans)
+ ret = btrfs_del_orphan_item(trans, root,
+ btrfs_ino(inode));
}
+ if (release_rsv)
+ btrfs_orphan_release_metadata(inode);
+
return ret;
}
@@ -3172,8 +3181,7 @@ int btrfs_orphan_cleanup(struct btrfs_root *root)
/* if we have links, this was a truncate, lets do that */
if (inode->i_nlink) {
- if (!S_ISREG(inode->i_mode)) {
- WARN_ON(1);
+ if (WARN_ON(!S_ISREG(inode->i_mode))) {
iput(inode);
continue;
}
@@ -3636,7 +3644,7 @@ int btrfs_unlink_inode(struct btrfs_trans_handle *trans,
int ret;
ret = __btrfs_unlink_inode(trans, root, dir, inode, name, name_len);
if (!ret) {
- btrfs_drop_nlink(inode);
+ drop_nlink(inode);
ret = btrfs_update_inode(trans, root, inode);
}
return ret;
@@ -4230,15 +4238,16 @@ int btrfs_cont_expand(struct inode *inode, loff_t oldsize, loff_t size)
while (1) {
struct btrfs_ordered_extent *ordered;
- btrfs_wait_ordered_range(inode, hole_start,
- block_end - hole_start);
+
lock_extent_bits(io_tree, hole_start, block_end - 1, 0,
&cached_state);
- ordered = btrfs_lookup_ordered_extent(inode, hole_start);
+ ordered = btrfs_lookup_ordered_range(inode, hole_start,
+ block_end - hole_start);
if (!ordered)
break;
unlock_extent_cached(io_tree, hole_start, block_end - 1,
&cached_state, GFP_NOFS);
+ btrfs_start_ordered_extent(inode, ordered, 1);
btrfs_put_ordered_extent(ordered);
}
@@ -4472,8 +4481,10 @@ void btrfs_evict_inode(struct inode *inode)
trace_btrfs_inode_evict(inode);
truncate_inode_pages(&inode->i_data, 0);
- if (inode->i_nlink && (btrfs_root_refs(&root->root_item) != 0 ||
- btrfs_is_free_space_inode(inode)))
+ if (inode->i_nlink &&
+ ((btrfs_root_refs(&root->root_item) != 0 &&
+ root->root_key.objectid != BTRFS_ROOT_TREE_OBJECTID) ||
+ btrfs_is_free_space_inode(inode)))
goto no_delete;
if (is_bad_inode(inode)) {
@@ -4490,7 +4501,8 @@ void btrfs_evict_inode(struct inode *inode)
}
if (inode->i_nlink > 0) {
- BUG_ON(btrfs_root_refs(&root->root_item) != 0);
+ BUG_ON(btrfs_root_refs(&root->root_item) != 0 &&
+ root->root_key.objectid != BTRFS_ROOT_TREE_OBJECTID);
goto no_delete;
}
@@ -4731,14 +4743,7 @@ static void inode_tree_del(struct inode *inode)
}
spin_unlock(&root->inode_lock);
- /*
- * Free space cache has inodes in the tree root, but the tree root has a
- * root_refs of 0, so this could end up dropping the tree root as a
- * snapshot, so we need the extra !root->fs_info->tree_root check to
- * make sure we don't drop it.
- */
- if (empty && btrfs_root_refs(&root->root_item) == 0 &&
- root != root->fs_info->tree_root) {
+ if (empty && btrfs_root_refs(&root->root_item) == 0) {
synchronize_srcu(&root->fs_info->subvol_srcu);
spin_lock(&root->inode_lock);
empty = RB_EMPTY_ROOT(&root->inode_tree);
@@ -4831,10 +4836,12 @@ static struct inode *btrfs_iget_locked(struct super_block *s,
{
struct inode *inode;
struct btrfs_iget_args args;
+ unsigned long hashval = btrfs_inode_hash(objectid, root);
+
args.ino = objectid;
args.root = root;
- inode = iget5_locked(s, objectid, btrfs_find_actor,
+ inode = iget5_locked(s, hashval, btrfs_find_actor,
btrfs_init_locked_inode,
(void *)&args);
return inode;
@@ -5048,7 +5055,7 @@ static int btrfs_real_readdir(struct file *file, struct dir_context *ctx)
continue;
}
- item = btrfs_item_nr(leaf, slot);
+ item = btrfs_item_nr(slot);
btrfs_item_key_to_cpu(leaf, &found_key, slot);
if (found_key.objectid != key.objectid)
@@ -5454,7 +5461,7 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans,
BTRFS_INODE_NODATASUM;
}
- insert_inode_hash(inode);
+ btrfs_insert_inode_hash(inode);
inode_tree_add(inode);
trace_btrfs_inode_new(inode);
@@ -5730,7 +5737,7 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir,
goto fail;
}
- btrfs_inc_nlink(inode);
+ inc_nlink(inode);
inode_inc_iversion(inode);
inode->i_ctime = CURRENT_TIME;
ihold(inode);
@@ -5860,7 +5867,7 @@ static noinline int uncompress_inline(struct btrfs_path *path,
compress_type = btrfs_file_extent_compression(leaf, item);
max_size = btrfs_file_extent_ram_bytes(leaf, item);
inline_size = btrfs_file_extent_inline_item_len(leaf,
- btrfs_item_nr(leaf, path->slots[0]));
+ btrfs_item_nr(path->slots[0]));
tmp = kmalloc(inline_size, GFP_NOFS);
if (!tmp)
return -ENOMEM;
@@ -5974,7 +5981,14 @@ again:
found_type = btrfs_key_type(&found_key);
if (found_key.objectid != objectid ||
found_type != BTRFS_EXTENT_DATA_KEY) {
- goto not_found;
+ /*
+ * If we backup past the first extent we want to move forward
+ * and see if there is an extent in front of us, otherwise we'll
+ * say there is a hole for our whole search range which can
+ * cause problems.
+ */
+ extent_end = start;
+ goto next;
}
found_type = btrfs_file_extent_type(leaf, item);
@@ -5989,7 +6003,7 @@ again:
size = btrfs_file_extent_inline_len(leaf, item);
extent_end = ALIGN(extent_start + size, root->sectorsize);
}
-
+next:
if (start >= extent_end) {
path->slots[0]++;
if (path->slots[0] >= btrfs_header_nritems(leaf)) {
@@ -6173,8 +6187,7 @@ insert:
write_unlock(&em_tree->lock);
out:
- if (em)
- trace_btrfs_get_extent(root, em);
+ trace_btrfs_get_extent(root, em);
if (path)
btrfs_free_path(path);
@@ -6249,7 +6262,7 @@ struct extent_map *btrfs_get_extent_fiemap(struct inode *inode, struct page *pag
/* adjust the range_start to make sure it doesn't
* go backwards from the start they passed in
*/
- range_start = max(start,range_start);
+ range_start = max(start, range_start);
found = found_end - range_start;
if (found > 0) {
@@ -7053,7 +7066,7 @@ static int btrfs_submit_direct_hook(int rw, struct btrfs_dio_private *dip,
}
} else {
submit_len += bvec->bv_len;
- nr_pages ++;
+ nr_pages++;
bvec++;
}
}
@@ -7222,7 +7235,9 @@ static ssize_t btrfs_direct_IO(int rw, struct kiocb *iocb,
* outstanding dirty pages are on disk.
*/
count = iov_length(iov, nr_segs);
- btrfs_wait_ordered_range(inode, offset, count);
+ ret = btrfs_wait_ordered_range(inode, offset, count);
+ if (ret)
+ return ret;
if (rw & WRITE) {
/*
@@ -7563,7 +7578,10 @@ static int btrfs_truncate(struct inode *inode)
u64 mask = root->sectorsize - 1;
u64 min_size = btrfs_calc_trunc_metadata_size(root, 1);
- btrfs_wait_ordered_range(inode, inode->i_size & (~mask), (u64)-1);
+ ret = btrfs_wait_ordered_range(inode, inode->i_size & (~mask),
+ (u64)-1);
+ if (ret)
+ return ret;
/*
* Yes ladies and gentelment, this is indeed ugly. The fact is we have
@@ -7787,6 +7805,14 @@ struct inode *btrfs_alloc_inode(struct super_block *sb)
return inode;
}
+#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
+void btrfs_test_destroy_inode(struct inode *inode)
+{
+ btrfs_drop_extent_cache(inode, 0, (u64)-1, 0);
+ kmem_cache_free(btrfs_inode_cachep, BTRFS_I(inode));
+}
+#endif
+
static void btrfs_i_callback(struct rcu_head *head)
{
struct inode *inode = container_of(head, struct inode, i_rcu);
@@ -7857,8 +7883,7 @@ int btrfs_drop_inode(struct inode *inode)
return 1;
/* the snap/subvol tree is on deleting */
- if (btrfs_root_refs(&root->root_item) == 0 &&
- root != root->fs_info->tree_root)
+ if (btrfs_root_refs(&root->root_item) == 0)
return 1;
else
return generic_drop_inode(inode);
@@ -7995,8 +8020,7 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry,
if (ret == -EEXIST) {
/* we shouldn't get
* eexist without a new_inode */
- if (!new_inode) {
- WARN_ON(1);
+ if (WARN_ON(!new_inode)) {
return ret;
}
} else {
@@ -8144,18 +8168,24 @@ out_notrans:
static void btrfs_run_delalloc_work(struct btrfs_work *work)
{
struct btrfs_delalloc_work *delalloc_work;
+ struct inode *inode;
delalloc_work = container_of(work, struct btrfs_delalloc_work,
work);
- if (delalloc_work->wait)
- btrfs_wait_ordered_range(delalloc_work->inode, 0, (u64)-1);
- else
- filemap_flush(delalloc_work->inode->i_mapping);
+ inode = delalloc_work->inode;
+ if (delalloc_work->wait) {
+ btrfs_wait_ordered_range(inode, 0, (u64)-1);
+ } else {
+ filemap_flush(inode->i_mapping);
+ if (test_bit(BTRFS_INODE_HAS_ASYNC_EXTENT,
+ &BTRFS_I(inode)->runtime_flags))
+ filemap_flush(inode->i_mapping);
+ }
if (delalloc_work->delay_iput)
- btrfs_add_delayed_iput(delalloc_work->inode);
+ btrfs_add_delayed_iput(inode);
else
- iput(delalloc_work->inode);
+ iput(inode);
complete(&delalloc_work->completion);
}
@@ -8276,8 +8306,7 @@ int btrfs_start_delalloc_inodes(struct btrfs_root *root, int delay_iput)
return ret;
}
-int btrfs_start_all_delalloc_inodes(struct btrfs_fs_info *fs_info,
- int delay_iput)
+int btrfs_start_delalloc_roots(struct btrfs_fs_info *fs_info, int delay_iput)
{
struct btrfs_root *root;
struct list_head splice;
@@ -8337,14 +8366,14 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry,
int err;
int drop_inode = 0;
u64 objectid;
- u64 index = 0 ;
+ u64 index = 0;
int name_len;
int datasize;
unsigned long ptr;
struct btrfs_file_extent_item *ei;
struct extent_buffer *leaf;
- name_len = strlen(symname) + 1;
+ name_len = strlen(symname);
if (name_len > BTRFS_MAX_INLINE_DATA_SIZE(root))
return -ENAMETOOLONG;
@@ -8432,7 +8461,7 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry,
inode->i_mapping->a_ops = &btrfs_symlink_aops;
inode->i_mapping->backing_dev_info = &root->fs_info->bdi;
inode_set_bytes(inode, name_len);
- btrfs_i_size_write(inode, name_len - 1);
+ btrfs_i_size_write(inode, name_len);
err = btrfs_update_inode(trans, root, inode);
if (err)
drop_inode = 1;
@@ -8491,6 +8520,8 @@ static int __btrfs_prealloc_file_range(struct inode *inode, int mode,
ins.offset, 0, 0, 0,
BTRFS_FILE_EXTENT_PREALLOC);
if (ret) {
+ btrfs_free_reserved_extent(root, ins.objectid,
+ ins.offset);
btrfs_abort_transaction(trans, root, ret);
if (own_trans)
btrfs_end_transaction(trans, root);
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 9d46f60cb943..a111622598b0 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -44,7 +44,6 @@
#include <linux/uuid.h>
#include <linux/btrfs.h>
#include <linux/uaccess.h>
-#include "compat.h"
#include "ctree.h"
#include "disk-io.h"
#include "transaction.h"
@@ -321,7 +320,7 @@ static int btrfs_ioctl_getversion(struct file *file, int __user *arg)
static noinline int btrfs_ioctl_fitrim(struct file *file, void __user *arg)
{
- struct btrfs_fs_info *fs_info = btrfs_sb(fdentry(file)->d_sb);
+ struct btrfs_fs_info *fs_info = btrfs_sb(file_inode(file)->i_sb);
struct btrfs_device *device;
struct request_queue *q;
struct fstrim_range range;
@@ -369,9 +368,13 @@ static noinline int btrfs_ioctl_fitrim(struct file *file, void __user *arg)
int btrfs_is_empty_uuid(u8 *uuid)
{
- static char empty_uuid[BTRFS_UUID_SIZE] = {0};
+ int i;
- return !memcmp(uuid, empty_uuid, BTRFS_UUID_SIZE);
+ for (i = 0; i < BTRFS_UUID_SIZE; i++) {
+ if (uuid[i])
+ return 0;
+ }
+ return 1;
}
static noinline int create_subvol(struct inode *dir,
@@ -436,7 +439,7 @@ static noinline int create_subvol(struct inode *dir,
btrfs_set_header_backref_rev(leaf, BTRFS_MIXED_BACKREF_REV);
btrfs_set_header_owner(leaf, objectid);
- write_extent_buffer(leaf, root->fs_info->fsid, btrfs_header_fsid(leaf),
+ write_extent_buffer(leaf, root->fs_info->fsid, btrfs_header_fsid(),
BTRFS_FSID_SIZE);
write_extent_buffer(leaf, root->fs_info->chunk_tree_uuid,
btrfs_header_chunk_tree_uuid(leaf),
@@ -574,7 +577,7 @@ static int create_snapshot(struct btrfs_root *root, struct inode *dir,
if (ret)
return ret;
- btrfs_wait_ordered_extents(root);
+ btrfs_wait_ordered_extents(root, -1);
pending_snapshot = kzalloc(sizeof(*pending_snapshot), GFP_NOFS);
if (!pending_snapshot)
@@ -688,7 +691,7 @@ static inline int btrfs_check_sticky(struct inode *dir, struct inode *inode)
* nfs_async_unlink().
*/
-static int btrfs_may_delete(struct inode *dir,struct dentry *victim,int isdir)
+static int btrfs_may_delete(struct inode *dir, struct dentry *victim, int isdir)
{
int error;
@@ -842,7 +845,6 @@ static int find_new_extents(struct btrfs_root *root,
{
struct btrfs_path *path;
struct btrfs_key min_key;
- struct btrfs_key max_key;
struct extent_buffer *leaf;
struct btrfs_file_extent_item *extent;
int type;
@@ -857,15 +859,10 @@ static int find_new_extents(struct btrfs_root *root,
min_key.type = BTRFS_EXTENT_DATA_KEY;
min_key.offset = *off;
- max_key.objectid = ino;
- max_key.type = (u8)-1;
- max_key.offset = (u64)-1;
-
path->keep_locks = 1;
- while(1) {
- ret = btrfs_search_forward(root, &min_key, &max_key,
- path, newer_than);
+ while (1) {
+ ret = btrfs_search_forward(root, &min_key, path, newer_than);
if (ret != 0)
goto none;
if (min_key.objectid != ino)
@@ -1206,7 +1203,7 @@ int btrfs_defrag_file(struct inode *inode, struct file *file,
ra = &file->f_ra;
}
- pages = kmalloc(sizeof(struct page *) * max_cluster,
+ pages = kmalloc_array(max_cluster, sizeof(struct page *),
GFP_NOFS);
if (!pages) {
ret = -ENOMEM;
@@ -1893,7 +1890,6 @@ static noinline int search_ioctl(struct inode *inode,
{
struct btrfs_root *root;
struct btrfs_key key;
- struct btrfs_key max_key;
struct btrfs_path *path;
struct btrfs_ioctl_search_key *sk = &args->key;
struct btrfs_fs_info *info = BTRFS_I(inode)->root->fs_info;
@@ -1925,15 +1921,10 @@ static noinline int search_ioctl(struct inode *inode,
key.type = sk->min_type;
key.offset = sk->min_offset;
- max_key.objectid = sk->max_objectid;
- max_key.type = sk->max_type;
- max_key.offset = sk->max_offset;
-
path->keep_locks = 1;
- while(1) {
- ret = btrfs_search_forward(root, &key, &max_key, path,
- sk->min_transid);
+ while (1) {
+ ret = btrfs_search_forward(root, &key, path, sk->min_transid);
if (ret != 0) {
if (ret > 0)
ret = 0;
@@ -2018,7 +2009,7 @@ static noinline int btrfs_search_path_in_tree(struct btrfs_fs_info *info,
key.type = BTRFS_INODE_REF_KEY;
key.offset = (u64)-1;
- while(1) {
+ while (1) {
ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
if (ret < 0)
goto out;
@@ -2047,7 +2038,7 @@ static noinline int btrfs_search_path_in_tree(struct btrfs_fs_info *info,
}
*(ptr + len) = '/';
- read_extent_buffer(l, ptr,(unsigned long)(iref + 1), len);
+ read_extent_buffer(l, ptr, (unsigned long)(iref + 1), len);
if (key.offset == BTRFS_FIRST_FREE_OBJECTID)
break;
@@ -2058,7 +2049,7 @@ static noinline int btrfs_search_path_in_tree(struct btrfs_fs_info *info,
dirid = key.objectid;
}
memmove(name, ptr, total_len);
- name[total_len]='\0';
+ name[total_len] = '\0';
ret = 0;
out:
btrfs_free_path(path);
@@ -2098,7 +2089,7 @@ static noinline int btrfs_ioctl_ino_lookup(struct file *file,
static noinline int btrfs_ioctl_snap_destroy(struct file *file,
void __user *arg)
{
- struct dentry *parent = fdentry(file);
+ struct dentry *parent = file->f_path.dentry;
struct dentry *dentry;
struct inode *dir = parent->d_inode;
struct inode *inode;
@@ -2144,7 +2135,7 @@ static noinline int btrfs_ioctl_snap_destroy(struct file *file,
inode = dentry->d_inode;
dest = BTRFS_I(inode)->root;
- if (!capable(CAP_SYS_ADMIN)){
+ if (!capable(CAP_SYS_ADMIN)) {
/*
* Regular user. Only allow this with a special mount
* option, when the user has write+exec access to the
@@ -2727,15 +2718,10 @@ static long btrfs_ioctl_file_extent_same(struct file *file,
size = sizeof(tmp) +
tmp.dest_count * sizeof(struct btrfs_ioctl_same_extent_info);
- same = kmalloc(size, GFP_NOFS);
- if (!same) {
- ret = -EFAULT;
- goto out;
- }
+ same = memdup_user((struct btrfs_ioctl_same_args __user *)argp, size);
- if (copy_from_user(same,
- (struct btrfs_ioctl_same_args __user *)argp, size)) {
- ret = -EFAULT;
+ if (IS_ERR(same)) {
+ ret = PTR_ERR(same);
goto out;
}
@@ -3119,7 +3105,7 @@ out:
static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd,
u64 off, u64 olen, u64 destoff)
{
- struct inode *inode = fdentry(file)->d_inode;
+ struct inode *inode = file_inode(file);
struct btrfs_root *root = BTRFS_I(inode)->root;
struct fd src_file;
struct inode *src;
@@ -3679,9 +3665,10 @@ static long btrfs_ioctl_dev_replace(struct btrfs_root *root, void __user *arg)
switch (p->cmd) {
case BTRFS_IOCTL_DEV_REPLACE_CMD_START:
- if (root->fs_info->sb->s_flags & MS_RDONLY)
- return -EROFS;
-
+ if (root->fs_info->sb->s_flags & MS_RDONLY) {
+ ret = -EROFS;
+ goto out;
+ }
if (atomic_xchg(
&root->fs_info->mutually_exclusive_operation_running,
1)) {
@@ -3707,7 +3694,7 @@ static long btrfs_ioctl_dev_replace(struct btrfs_root *root, void __user *arg)
if (copy_to_user(arg, p, sizeof(*p)))
ret = -EFAULT;
-
+out:
kfree(p);
return ret;
}
@@ -4317,7 +4304,7 @@ static long btrfs_ioctl_quota_rescan_status(struct file *file, void __user *arg)
static long btrfs_ioctl_quota_rescan_wait(struct file *file, void __user *arg)
{
- struct btrfs_root *root = BTRFS_I(fdentry(file)->d_inode)->root;
+ struct btrfs_root *root = BTRFS_I(file_inode(file))->root;
if (!capable(CAP_SYS_ADMIN))
return -EPERM;
@@ -4557,9 +4544,15 @@ long btrfs_ioctl(struct file *file, unsigned int
return btrfs_ioctl_logical_to_ino(root, argp);
case BTRFS_IOC_SPACE_INFO:
return btrfs_ioctl_space_info(root, argp);
- case BTRFS_IOC_SYNC:
- btrfs_sync_fs(file->f_dentry->d_sb, 1);
- return 0;
+ case BTRFS_IOC_SYNC: {
+ int ret;
+
+ ret = btrfs_start_delalloc_roots(root->fs_info, 0);
+ if (ret)
+ return ret;
+ ret = btrfs_sync_fs(file->f_dentry->d_sb, 1);
+ return ret;
+ }
case BTRFS_IOC_START_SYNC:
return btrfs_ioctl_start_sync(root, argp);
case BTRFS_IOC_WAIT_SYNC:
diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c
index c702cb62f78a..69582d5b69d1 100644
--- a/fs/btrfs/ordered-data.c
+++ b/fs/btrfs/ordered-data.c
@@ -537,7 +537,9 @@ void btrfs_remove_ordered_extent(struct inode *inode,
*/
if (RB_EMPTY_ROOT(&tree->tree) &&
!mapping_tagged(inode->i_mapping, PAGECACHE_TAG_DIRTY)) {
+ spin_lock(&root->fs_info->ordered_root_lock);
list_del_init(&BTRFS_I(inode)->ordered_operations);
+ spin_unlock(&root->fs_info->ordered_root_lock);
}
if (!root->nr_ordered_extents) {
@@ -563,10 +565,11 @@ static void btrfs_run_ordered_extent_work(struct btrfs_work *work)
* wait for all the ordered extents in a root. This is done when balancing
* space between drives.
*/
-void btrfs_wait_ordered_extents(struct btrfs_root *root)
+int btrfs_wait_ordered_extents(struct btrfs_root *root, int nr)
{
struct list_head splice, works;
struct btrfs_ordered_extent *ordered, *next;
+ int count = 0;
INIT_LIST_HEAD(&splice);
INIT_LIST_HEAD(&works);
@@ -574,7 +577,7 @@ void btrfs_wait_ordered_extents(struct btrfs_root *root)
mutex_lock(&root->fs_info->ordered_operations_mutex);
spin_lock(&root->ordered_extent_lock);
list_splice_init(&root->ordered_extents, &splice);
- while (!list_empty(&splice)) {
+ while (!list_empty(&splice) && nr) {
ordered = list_first_entry(&splice, struct btrfs_ordered_extent,
root_extent_list);
list_move_tail(&ordered->root_extent_list,
@@ -589,7 +592,11 @@ void btrfs_wait_ordered_extents(struct btrfs_root *root)
cond_resched();
spin_lock(&root->ordered_extent_lock);
+ if (nr != -1)
+ nr--;
+ count++;
}
+ list_splice_tail(&splice, &root->ordered_extents);
spin_unlock(&root->ordered_extent_lock);
list_for_each_entry_safe(ordered, next, &works, work_list) {
@@ -599,18 +606,21 @@ void btrfs_wait_ordered_extents(struct btrfs_root *root)
cond_resched();
}
mutex_unlock(&root->fs_info->ordered_operations_mutex);
+
+ return count;
}
-void btrfs_wait_all_ordered_extents(struct btrfs_fs_info *fs_info)
+void btrfs_wait_ordered_roots(struct btrfs_fs_info *fs_info, int nr)
{
struct btrfs_root *root;
struct list_head splice;
+ int done;
INIT_LIST_HEAD(&splice);
spin_lock(&fs_info->ordered_root_lock);
list_splice_init(&fs_info->ordered_roots, &splice);
- while (!list_empty(&splice)) {
+ while (!list_empty(&splice) && nr) {
root = list_first_entry(&splice, struct btrfs_root,
ordered_root);
root = btrfs_grab_fs_root(root);
@@ -619,11 +629,16 @@ void btrfs_wait_all_ordered_extents(struct btrfs_fs_info *fs_info)
&fs_info->ordered_roots);
spin_unlock(&fs_info->ordered_root_lock);
- btrfs_wait_ordered_extents(root);
+ done = btrfs_wait_ordered_extents(root, nr);
btrfs_put_fs_root(root);
spin_lock(&fs_info->ordered_root_lock);
+ if (nr != -1) {
+ nr -= done;
+ WARN_ON(nr < 0);
+ }
}
+ list_splice_tail(&splice, &fs_info->ordered_roots);
spin_unlock(&fs_info->ordered_root_lock);
}
@@ -734,8 +749,9 @@ void btrfs_start_ordered_extent(struct inode *inode,
/*
* Used to wait on ordered extents across a large range of bytes.
*/
-void btrfs_wait_ordered_range(struct inode *inode, u64 start, u64 len)
+int btrfs_wait_ordered_range(struct inode *inode, u64 start, u64 len)
{
+ int ret = 0;
u64 end;
u64 orig_end;
struct btrfs_ordered_extent *ordered;
@@ -751,8 +767,9 @@ void btrfs_wait_ordered_range(struct inode *inode, u64 start, u64 len)
/* start IO across the range first to instantiate any delalloc
* extents
*/
- filemap_fdatawrite_range(inode->i_mapping, start, orig_end);
-
+ ret = filemap_fdatawrite_range(inode->i_mapping, start, orig_end);
+ if (ret)
+ return ret;
/*
* So with compression we will find and lock a dirty page and clear the
* first one as dirty, setup an async extent, and immediately return
@@ -768,10 +785,15 @@ void btrfs_wait_ordered_range(struct inode *inode, u64 start, u64 len)
* right and you are wrong.
*/
if (test_bit(BTRFS_INODE_HAS_ASYNC_EXTENT,
- &BTRFS_I(inode)->runtime_flags))
- filemap_fdatawrite_range(inode->i_mapping, start, orig_end);
-
- filemap_fdatawait_range(inode->i_mapping, start, orig_end);
+ &BTRFS_I(inode)->runtime_flags)) {
+ ret = filemap_fdatawrite_range(inode->i_mapping, start,
+ orig_end);
+ if (ret)
+ return ret;
+ }
+ ret = filemap_fdatawait_range(inode->i_mapping, start, orig_end);
+ if (ret)
+ return ret;
end = orig_end;
while (1) {
@@ -782,17 +804,20 @@ void btrfs_wait_ordered_range(struct inode *inode, u64 start, u64 len)
btrfs_put_ordered_extent(ordered);
break;
}
- if (ordered->file_offset + ordered->len < start) {
+ if (ordered->file_offset + ordered->len <= start) {
btrfs_put_ordered_extent(ordered);
break;
}
btrfs_start_ordered_extent(inode, ordered, 1);
end = ordered->file_offset;
+ if (test_bit(BTRFS_ORDERED_IOERR, &ordered->flags))
+ ret = -EIO;
btrfs_put_ordered_extent(ordered);
- if (end == 0 || end == start)
+ if (ret || end == 0 || end == start)
break;
end--;
}
+ return ret;
}
/*
@@ -1076,7 +1101,7 @@ void btrfs_add_ordered_operation(struct btrfs_trans_handle *trans,
* if this file hasn't been changed since the last transaction
* commit, we can safely return without doing anything
*/
- if (last_mod < root->fs_info->last_trans_committed)
+ if (last_mod <= root->fs_info->last_trans_committed)
return;
spin_lock(&root->fs_info->ordered_root_lock);
diff --git a/fs/btrfs/ordered-data.h b/fs/btrfs/ordered-data.h
index 0c0b35612d7a..9b0450f7ac20 100644
--- a/fs/btrfs/ordered-data.h
+++ b/fs/btrfs/ordered-data.h
@@ -180,7 +180,7 @@ struct btrfs_ordered_extent *btrfs_lookup_ordered_extent(struct inode *inode,
u64 file_offset);
void btrfs_start_ordered_extent(struct inode *inode,
struct btrfs_ordered_extent *entry, int wait);
-void btrfs_wait_ordered_range(struct inode *inode, u64 start, u64 len);
+int btrfs_wait_ordered_range(struct inode *inode, u64 start, u64 len);
struct btrfs_ordered_extent *
btrfs_lookup_first_ordered_extent(struct inode * inode, u64 file_offset);
struct btrfs_ordered_extent *btrfs_lookup_ordered_range(struct inode *inode,
@@ -195,8 +195,8 @@ int btrfs_run_ordered_operations(struct btrfs_trans_handle *trans,
void btrfs_add_ordered_operation(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
struct inode *inode);
-void btrfs_wait_ordered_extents(struct btrfs_root *root);
-void btrfs_wait_all_ordered_extents(struct btrfs_fs_info *fs_info);
+int btrfs_wait_ordered_extents(struct btrfs_root *root, int nr);
+void btrfs_wait_ordered_roots(struct btrfs_fs_info *fs_info, int nr);
void btrfs_get_logged_extents(struct btrfs_root *log, struct inode *inode);
void btrfs_wait_logged_extents(struct btrfs_root *log, u64 transid);
void btrfs_free_logged_extents(struct btrfs_root *log, u64 transid);
diff --git a/fs/btrfs/print-tree.c b/fs/btrfs/print-tree.c
index 0088bedc8631..417053b17181 100644
--- a/fs/btrfs/print-tree.c
+++ b/fs/btrfs/print-tree.c
@@ -193,7 +193,7 @@ void btrfs_print_leaf(struct btrfs_root *root, struct extent_buffer *l)
btrfs_info(root->fs_info, "leaf %llu total ptrs %d free space %d",
btrfs_header_bytenr(l), nr, btrfs_leaf_free_space(root, l));
for (i = 0 ; i < nr ; i++) {
- item = btrfs_item_nr(l, i);
+ item = btrfs_item_nr(i);
btrfs_item_key_to_cpu(l, &key, i);
type = btrfs_key_type(&key);
printk(KERN_INFO "\titem %d key (%llu %u %llu) itemoff %d "
diff --git a/fs/btrfs/raid56.c b/fs/btrfs/raid56.c
index d0ecfbd9cc9f..24ac21840a9a 100644
--- a/fs/btrfs/raid56.c
+++ b/fs/btrfs/raid56.c
@@ -33,7 +33,6 @@
#include <linux/raid/xor.h>
#include <linux/vmalloc.h>
#include <asm/div64.h>
-#include "compat.h"
#include "ctree.h"
#include "extent_map.h"
#include "disk-io.h"
diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c
index 4a355726151e..ce459a7cb16d 100644
--- a/fs/btrfs/relocation.c
+++ b/fs/btrfs/relocation.c
@@ -1383,6 +1383,7 @@ int btrfs_init_reloc_root(struct btrfs_trans_handle *trans,
{
struct btrfs_root *reloc_root;
struct reloc_control *rc = root->fs_info->reloc_ctl;
+ struct btrfs_block_rsv *rsv;
int clear_rsv = 0;
int ret;
@@ -1396,13 +1397,14 @@ int btrfs_init_reloc_root(struct btrfs_trans_handle *trans,
root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID)
return 0;
- if (!trans->block_rsv) {
+ if (!trans->reloc_reserved) {
+ rsv = trans->block_rsv;
trans->block_rsv = rc->block_rsv;
clear_rsv = 1;
}
reloc_root = create_reloc_root(trans, root, root->root_key.objectid);
if (clear_rsv)
- trans->block_rsv = NULL;
+ trans->block_rsv = rsv;
ret = __add_reloc_root(reloc_root);
BUG_ON(ret < 0);
@@ -1775,8 +1777,7 @@ again:
new_ptr_gen = 0;
}
- if (new_bytenr > 0 && new_bytenr == old_bytenr) {
- WARN_ON(1);
+ if (WARN_ON(new_bytenr > 0 && new_bytenr == old_bytenr)) {
ret = level;
break;
}
@@ -2058,7 +2059,7 @@ static noinline_for_stack int merge_reloc_root(struct reloc_control *rc,
LIST_HEAD(inode_list);
struct btrfs_key key;
struct btrfs_key next_key;
- struct btrfs_trans_handle *trans;
+ struct btrfs_trans_handle *trans = NULL;
struct btrfs_root *reloc_root;
struct btrfs_root_item *root_item;
struct btrfs_path *path;
@@ -2107,18 +2108,19 @@ static noinline_for_stack int merge_reloc_root(struct reloc_control *rc,
memset(&next_key, 0, sizeof(next_key));
while (1) {
- trans = btrfs_start_transaction(root, 0);
- BUG_ON(IS_ERR(trans));
- trans->block_rsv = rc->block_rsv;
-
ret = btrfs_block_rsv_refill(root, rc->block_rsv, min_reserved,
BTRFS_RESERVE_FLUSH_ALL);
if (ret) {
- BUG_ON(ret != -EAGAIN);
- ret = btrfs_commit_transaction(trans, root);
- BUG_ON(ret);
- continue;
+ err = ret;
+ goto out;
}
+ trans = btrfs_start_transaction(root, 0);
+ if (IS_ERR(trans)) {
+ err = PTR_ERR(trans);
+ trans = NULL;
+ goto out;
+ }
+ trans->block_rsv = rc->block_rsv;
replaced = 0;
max_level = level;
@@ -2164,6 +2166,7 @@ static noinline_for_stack int merge_reloc_root(struct reloc_control *rc,
root_item->drop_level = level;
btrfs_end_transaction_throttle(trans, root);
+ trans = NULL;
btrfs_btree_balance_dirty(root);
@@ -2192,7 +2195,8 @@ out:
btrfs_update_reloc_root(trans, root);
}
- btrfs_end_transaction_throttle(trans, root);
+ if (trans)
+ btrfs_end_transaction_throttle(trans, root);
btrfs_btree_balance_dirty(root);
@@ -3258,7 +3262,7 @@ static int add_tree_block(struct reloc_control *rc,
struct rb_node *rb_node;
u32 item_size;
int level = -1;
- int generation;
+ u64 generation;
eb = path->nodes[0];
item_size = btrfs_item_size_nr(eb, path->slots[0]);
@@ -3407,7 +3411,6 @@ static int delete_block_group_cache(struct btrfs_fs_info *fs_info,
struct inode *inode, u64 ino)
{
struct btrfs_key key;
- struct btrfs_path *path;
struct btrfs_root *root = fs_info->tree_root;
struct btrfs_trans_handle *trans;
int ret = 0;
@@ -3432,22 +3435,14 @@ truncate:
if (ret)
goto out;
- path = btrfs_alloc_path();
- if (!path) {
- ret = -ENOMEM;
- goto out;
- }
-
trans = btrfs_join_transaction(root);
if (IS_ERR(trans)) {
- btrfs_free_path(path);
ret = PTR_ERR(trans);
goto out;
}
- ret = btrfs_truncate_free_space_cache(root, trans, path, inode);
+ ret = btrfs_truncate_free_space_cache(root, trans, inode);
- btrfs_free_path(path);
btrfs_end_transaction(trans, root);
btrfs_btree_balance_dirty(root);
out:
@@ -3549,10 +3544,8 @@ static int find_data_references(struct reloc_control *rc,
err = ret;
goto out;
}
- if (ret > 0) {
- WARN_ON(1);
+ if (WARN_ON(ret > 0))
goto out;
- }
leaf = path->nodes[0];
nritems = btrfs_header_nritems(leaf);
@@ -3572,11 +3565,9 @@ static int find_data_references(struct reloc_control *rc,
}
btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
- if (key.objectid != ref_objectid ||
- key.type != BTRFS_EXTENT_DATA_KEY) {
- WARN_ON(1);
+ if (WARN_ON(key.objectid != ref_objectid ||
+ key.type != BTRFS_EXTENT_DATA_KEY))
break;
- }
fi = btrfs_item_ptr(leaf, path->slots[0],
struct btrfs_file_extent_item);
@@ -4001,16 +3992,6 @@ restart:
}
}
- ret = btrfs_block_rsv_check(rc->extent_root, rc->block_rsv, 5);
- if (ret < 0) {
- if (ret != -ENOSPC) {
- err = ret;
- WARN_ON(1);
- break;
- }
- rc->commit_transaction = 1;
- }
-
if (rc->commit_transaction) {
rc->commit_transaction = 0;
ret = btrfs_commit_transaction(trans, rc->extent_root);
@@ -4241,12 +4222,12 @@ int btrfs_relocate_block_group(struct btrfs_root *extent_root, u64 group_start)
printk(KERN_INFO "btrfs: relocating block group %llu flags %llu\n",
rc->block_group->key.objectid, rc->block_group->flags);
- ret = btrfs_start_all_delalloc_inodes(fs_info, 0);
+ ret = btrfs_start_delalloc_roots(fs_info, 0);
if (ret < 0) {
err = ret;
goto out;
}
- btrfs_wait_all_ordered_extents(fs_info);
+ btrfs_wait_ordered_roots(fs_info, -1);
while (1) {
mutex_lock(&fs_info->cleaner_mutex);
@@ -4264,7 +4245,12 @@ int btrfs_relocate_block_group(struct btrfs_root *extent_root, u64 group_start)
rc->extents_found);
if (rc->stage == MOVE_DATA_EXTENTS && rc->found_file_extent) {
- btrfs_wait_ordered_range(rc->data_inode, 0, (u64)-1);
+ ret = btrfs_wait_ordered_range(rc->data_inode, 0,
+ (u64)-1);
+ if (ret) {
+ err = ret;
+ goto out;
+ }
invalidate_mapping_pages(rc->data_inode->i_mapping,
0, -1);
rc->stage = UPDATE_DATA_PTRS;
@@ -4481,6 +4467,7 @@ int btrfs_reloc_clone_csums(struct inode *inode, u64 file_pos, u64 len)
struct btrfs_root *root = BTRFS_I(inode)->root;
int ret;
u64 disk_bytenr;
+ u64 new_bytenr;
LIST_HEAD(list);
ordered = btrfs_lookup_ordered_extent(inode, file_pos);
@@ -4492,13 +4479,24 @@ int btrfs_reloc_clone_csums(struct inode *inode, u64 file_pos, u64 len)
if (ret)
goto out;
- disk_bytenr = ordered->start;
while (!list_empty(&list)) {
sums = list_entry(list.next, struct btrfs_ordered_sum, list);
list_del_init(&sums->list);
- sums->bytenr = disk_bytenr;
- disk_bytenr += sums->len;
+ /*
+ * We need to offset the new_bytenr based on where the csum is.
+ * We need to do this because we will read in entire prealloc
+ * extents but we may have written to say the middle of the
+ * prealloc extent, so we need to make sure the csum goes with
+ * the right disk offset.
+ *
+ * We can do this because the data reloc inode refers strictly
+ * to the on disk bytes, so we don't have to worry about
+ * disk_len vs real len like with real inodes since it's all
+ * disk length.
+ */
+ new_bytenr = ordered->start + (sums->bytenr - disk_bytenr);
+ sums->bytenr = new_bytenr;
btrfs_add_ordered_sum(inode, ordered, sums);
}
diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c
index a18e0e23f6a6..1fd3f33c330a 100644
--- a/fs/btrfs/scrub.c
+++ b/fs/btrfs/scrub.c
@@ -208,7 +208,6 @@ static void scrub_recheck_block_checksum(struct btrfs_fs_info *fs_info,
int is_metadata, int have_csum,
const u8 *csum, u64 generation,
u16 csum_size);
-static void scrub_complete_bio_end_io(struct bio *bio, int err);
static int scrub_repair_block_from_good_copy(struct scrub_block *sblock_bad,
struct scrub_block *sblock_good,
int force_write);
@@ -938,8 +937,10 @@ static int scrub_handle_errored_block(struct scrub_block *sblock_to_check)
BTRFS_DEV_STAT_CORRUPTION_ERRS);
}
- if (sctx->readonly && !sctx->is_dev_replace)
- goto did_not_correct_error;
+ if (sctx->readonly) {
+ ASSERT(!sctx->is_dev_replace);
+ goto out;
+ }
if (!is_metadata && !have_csum) {
struct scrub_fixup_nodatasum *fixup_nodatasum;
@@ -1292,7 +1293,6 @@ static void scrub_recheck_block(struct btrfs_fs_info *fs_info,
for (page_num = 0; page_num < sblock->page_count; page_num++) {
struct bio *bio;
struct scrub_page *page = sblock->pagev[page_num];
- DECLARE_COMPLETION_ONSTACK(complete);
if (page->dev->bdev == NULL) {
page->io_error = 1;
@@ -1309,18 +1309,11 @@ static void scrub_recheck_block(struct btrfs_fs_info *fs_info,
}
bio->bi_bdev = page->dev->bdev;
bio->bi_sector = page->physical >> 9;
- bio->bi_end_io = scrub_complete_bio_end_io;
- bio->bi_private = &complete;
bio_add_page(bio, page->page, PAGE_SIZE, 0);
- btrfsic_submit_bio(READ, bio);
-
- /* this will also unplug the queue */
- wait_for_completion(&complete);
-
- page->io_error = !test_bit(BIO_UPTODATE, &bio->bi_flags);
- if (!test_bit(BIO_UPTODATE, &bio->bi_flags))
+ if (btrfsic_submit_bio_wait(READ, bio))
sblock->no_io_error_seen = 0;
+
bio_put(bio);
}
@@ -1389,11 +1382,6 @@ static void scrub_recheck_block_checksum(struct btrfs_fs_info *fs_info,
sblock->checksum_error = 1;
}
-static void scrub_complete_bio_end_io(struct bio *bio, int err)
-{
- complete((struct completion *)bio->bi_private);
-}
-
static int scrub_repair_block_from_good_copy(struct scrub_block *sblock_bad,
struct scrub_block *sblock_good,
int force_write)
@@ -1428,7 +1416,6 @@ static int scrub_repair_page_from_good_copy(struct scrub_block *sblock_bad,
sblock_bad->checksum_error || page_bad->io_error) {
struct bio *bio;
int ret;
- DECLARE_COMPLETION_ONSTACK(complete);
if (!page_bad->dev->bdev) {
printk_ratelimited(KERN_WARNING
@@ -1441,19 +1428,14 @@ static int scrub_repair_page_from_good_copy(struct scrub_block *sblock_bad,
return -EIO;
bio->bi_bdev = page_bad->dev->bdev;
bio->bi_sector = page_bad->physical >> 9;
- bio->bi_end_io = scrub_complete_bio_end_io;
- bio->bi_private = &complete;
ret = bio_add_page(bio, page_good->page, PAGE_SIZE, 0);
if (PAGE_SIZE != ret) {
bio_put(bio);
return -EIO;
}
- btrfsic_submit_bio(WRITE, bio);
- /* this will also unplug the queue */
- wait_for_completion(&complete);
- if (!bio_flagged(bio, BIO_UPTODATE)) {
+ if (btrfsic_submit_bio_wait(WRITE, bio)) {
btrfs_dev_stat_inc_and_print(page_bad->dev,
BTRFS_DEV_STAT_WRITE_ERRS);
btrfs_dev_replace_stats_inc(
@@ -2717,8 +2699,6 @@ int scrub_enumerate_chunks(struct scrub_ctx *sctx,
mutex_unlock(&fs_info->scrub_lock);
wake_up(&fs_info->scrub_pause_wait);
- dev_replace->cursor_left = dev_replace->cursor_right;
- dev_replace->item_needs_writeback = 1;
btrfs_put_block_group(cache);
if (ret)
break;
@@ -2732,6 +2712,9 @@ int scrub_enumerate_chunks(struct scrub_ctx *sctx,
break;
}
+ dev_replace->cursor_left = dev_replace->cursor_right;
+ dev_replace->item_needs_writeback = 1;
+
key.offset = found_key.offset + length;
btrfs_release_path(path);
}
@@ -2783,7 +2766,6 @@ static noinline_for_stack int scrub_workers_get(struct btrfs_fs_info *fs_info,
{
int ret = 0;
- mutex_lock(&fs_info->scrub_lock);
if (fs_info->scrub_workers_refcnt == 0) {
if (is_dev_replace)
btrfs_init_workers(&fs_info->scrub_workers, "scrub", 1,
@@ -2813,21 +2795,17 @@ static noinline_for_stack int scrub_workers_get(struct btrfs_fs_info *fs_info,
}
++fs_info->scrub_workers_refcnt;
out:
- mutex_unlock(&fs_info->scrub_lock);
-
return ret;
}
static noinline_for_stack void scrub_workers_put(struct btrfs_fs_info *fs_info)
{
- mutex_lock(&fs_info->scrub_lock);
if (--fs_info->scrub_workers_refcnt == 0) {
btrfs_stop_workers(&fs_info->scrub_workers);
btrfs_stop_workers(&fs_info->scrub_wr_completion_workers);
btrfs_stop_workers(&fs_info->scrub_nocow_workers);
}
WARN_ON(fs_info->scrub_workers_refcnt < 0);
- mutex_unlock(&fs_info->scrub_lock);
}
int btrfs_scrub_dev(struct btrfs_fs_info *fs_info, u64 devid, u64 start,
@@ -2888,23 +2866,18 @@ int btrfs_scrub_dev(struct btrfs_fs_info *fs_info, u64 devid, u64 start,
return -EINVAL;
}
- ret = scrub_workers_get(fs_info, is_dev_replace);
- if (ret)
- return ret;
mutex_lock(&fs_info->fs_devices->device_list_mutex);
dev = btrfs_find_device(fs_info, devid, NULL, NULL);
if (!dev || (dev->missing && !is_dev_replace)) {
mutex_unlock(&fs_info->fs_devices->device_list_mutex);
- scrub_workers_put(fs_info);
return -ENODEV;
}
- mutex_lock(&fs_info->scrub_lock);
+ mutex_lock(&fs_info->scrub_lock);
if (!dev->in_fs_metadata || dev->is_tgtdev_for_dev_replace) {
mutex_unlock(&fs_info->scrub_lock);
mutex_unlock(&fs_info->fs_devices->device_list_mutex);
- scrub_workers_put(fs_info);
return -EIO;
}
@@ -2915,10 +2888,17 @@ int btrfs_scrub_dev(struct btrfs_fs_info *fs_info, u64 devid, u64 start,
btrfs_dev_replace_unlock(&fs_info->dev_replace);
mutex_unlock(&fs_info->scrub_lock);
mutex_unlock(&fs_info->fs_devices->device_list_mutex);
- scrub_workers_put(fs_info);
return -EINPROGRESS;
}
btrfs_dev_replace_unlock(&fs_info->dev_replace);
+
+ ret = scrub_workers_get(fs_info, is_dev_replace);
+ if (ret) {
+ mutex_unlock(&fs_info->scrub_lock);
+ mutex_unlock(&fs_info->fs_devices->device_list_mutex);
+ return ret;
+ }
+
sctx = scrub_setup_ctx(dev, is_dev_replace);
if (IS_ERR(sctx)) {
mutex_unlock(&fs_info->scrub_lock);
@@ -2931,13 +2911,15 @@ int btrfs_scrub_dev(struct btrfs_fs_info *fs_info, u64 devid, u64 start,
atomic_inc(&fs_info->scrubs_running);
mutex_unlock(&fs_info->scrub_lock);
- mutex_unlock(&fs_info->fs_devices->device_list_mutex);
if (!is_dev_replace) {
- down_read(&fs_info->scrub_super_lock);
+ /*
+ * by holding device list mutex, we can
+ * kick off writing super in log tree sync.
+ */
ret = scrub_supers(sctx, dev);
- up_read(&fs_info->scrub_super_lock);
}
+ mutex_unlock(&fs_info->fs_devices->device_list_mutex);
if (!ret)
ret = scrub_enumerate_chunks(sctx, dev, start, end,
@@ -2954,10 +2936,10 @@ int btrfs_scrub_dev(struct btrfs_fs_info *fs_info, u64 devid, u64 start,
mutex_lock(&fs_info->scrub_lock);
dev->scrub_device = NULL;
+ scrub_workers_put(fs_info);
mutex_unlock(&fs_info->scrub_lock);
scrub_free_ctx(sctx);
- scrub_workers_put(fs_info);
return ret;
}
@@ -2987,16 +2969,6 @@ void btrfs_scrub_continue(struct btrfs_root *root)
wake_up(&fs_info->scrub_pause_wait);
}
-void btrfs_scrub_pause_super(struct btrfs_root *root)
-{
- down_write(&root->fs_info->scrub_super_lock);
-}
-
-void btrfs_scrub_continue_super(struct btrfs_root *root)
-{
- up_write(&root->fs_info->scrub_super_lock);
-}
-
int btrfs_scrub_cancel(struct btrfs_fs_info *fs_info)
{
mutex_lock(&fs_info->scrub_lock);
@@ -3383,7 +3355,6 @@ static int write_page_nocow(struct scrub_ctx *sctx,
struct bio *bio;
struct btrfs_device *dev;
int ret;
- DECLARE_COMPLETION_ONSTACK(compl);
dev = sctx->wr_ctx.tgtdev;
if (!dev)
@@ -3400,8 +3371,6 @@ static int write_page_nocow(struct scrub_ctx *sctx,
spin_unlock(&sctx->stat_lock);
return -ENOMEM;
}
- bio->bi_private = &compl;
- bio->bi_end_io = scrub_complete_bio_end_io;
bio->bi_size = 0;
bio->bi_sector = physical_for_dev_replace >> 9;
bio->bi_bdev = dev->bdev;
@@ -3412,10 +3381,8 @@ leave_with_eio:
btrfs_dev_stat_inc_and_print(dev, BTRFS_DEV_STAT_WRITE_ERRS);
return -EIO;
}
- btrfsic_submit_bio(WRITE_SYNC, bio);
- wait_for_completion(&compl);
- if (!test_bit(BIO_UPTODATE, &bio->bi_flags))
+ if (btrfsic_submit_bio_wait(WRITE_SYNC, bio))
goto leave_with_eio;
bio_put(bio);
diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c
index e46e0ed74925..6837fe87f3a6 100644
--- a/fs/btrfs/send.c
+++ b/fs/btrfs/send.c
@@ -121,7 +121,6 @@ struct send_ctx {
struct list_head name_cache_list;
int name_cache_size;
- struct file *cur_inode_filp;
char *read_buf;
};
@@ -565,10 +564,8 @@ static int begin_cmd(struct send_ctx *sctx, int cmd)
{
struct btrfs_cmd_header *hdr;
- if (!sctx->send_buf) {
- WARN_ON(1);
+ if (WARN_ON(!sctx->send_buf))
return -EINVAL;
- }
BUG_ON(sctx->send_size);
@@ -791,7 +788,7 @@ static int iterate_inode_ref(struct btrfs_root *root, struct btrfs_path *path,
if (found_key->type == BTRFS_INODE_REF_KEY) {
ptr = (unsigned long)btrfs_item_ptr(eb, slot,
struct btrfs_inode_ref);
- item = btrfs_item_nr(eb, slot);
+ item = btrfs_item_nr(slot);
total = btrfs_item_size(eb, item);
elem_size = sizeof(*iref);
} else {
@@ -905,7 +902,7 @@ static int iterate_dir_item(struct btrfs_root *root, struct btrfs_path *path,
eb = path->nodes[0];
slot = path->slots[0];
- item = btrfs_item_nr(eb, slot);
+ item = btrfs_item_nr(slot);
di = btrfs_item_ptr(eb, slot, struct btrfs_dir_item);
cur = 0;
len = 0;
@@ -2120,77 +2117,6 @@ out:
}
/*
- * Called for regular files when sending extents data. Opens a struct file
- * to read from the file.
- */
-static int open_cur_inode_file(struct send_ctx *sctx)
-{
- int ret = 0;
- struct btrfs_key key;
- struct path path;
- struct inode *inode;
- struct dentry *dentry;
- struct file *filp;
- int new = 0;
-
- if (sctx->cur_inode_filp)
- goto out;
-
- key.objectid = sctx->cur_ino;
- key.type = BTRFS_INODE_ITEM_KEY;
- key.offset = 0;
-
- inode = btrfs_iget(sctx->send_root->fs_info->sb, &key, sctx->send_root,
- &new);
- if (IS_ERR(inode)) {
- ret = PTR_ERR(inode);
- goto out;
- }
-
- dentry = d_obtain_alias(inode);
- inode = NULL;
- if (IS_ERR(dentry)) {
- ret = PTR_ERR(dentry);
- goto out;
- }
-
- path.mnt = sctx->mnt;
- path.dentry = dentry;
- filp = dentry_open(&path, O_RDONLY | O_LARGEFILE, current_cred());
- dput(dentry);
- dentry = NULL;
- if (IS_ERR(filp)) {
- ret = PTR_ERR(filp);
- goto out;
- }
- sctx->cur_inode_filp = filp;
-
-out:
- /*
- * no xxxput required here as every vfs op
- * does it by itself on failure
- */
- return ret;
-}
-
-/*
- * Closes the struct file that was created in open_cur_inode_file
- */
-static int close_cur_inode_file(struct send_ctx *sctx)
-{
- int ret = 0;
-
- if (!sctx->cur_inode_filp)
- goto out;
-
- ret = filp_close(sctx->cur_inode_filp, NULL);
- sctx->cur_inode_filp = NULL;
-
-out:
- return ret;
-}
-
-/*
* Sends a BTRFS_SEND_C_SUBVOL command/item to userspace
*/
static int send_subvol_begin(struct send_ctx *sctx)
@@ -3622,6 +3548,72 @@ out:
return ret;
}
+static ssize_t fill_read_buf(struct send_ctx *sctx, u64 offset, u32 len)
+{
+ struct btrfs_root *root = sctx->send_root;
+ struct btrfs_fs_info *fs_info = root->fs_info;
+ struct inode *inode;
+ struct page *page;
+ char *addr;
+ struct btrfs_key key;
+ pgoff_t index = offset >> PAGE_CACHE_SHIFT;
+ pgoff_t last_index;
+ unsigned pg_offset = offset & ~PAGE_CACHE_MASK;
+ ssize_t ret = 0;
+
+ key.objectid = sctx->cur_ino;
+ key.type = BTRFS_INODE_ITEM_KEY;
+ key.offset = 0;
+
+ inode = btrfs_iget(fs_info->sb, &key, root, NULL);
+ if (IS_ERR(inode))
+ return PTR_ERR(inode);
+
+ if (offset + len > i_size_read(inode)) {
+ if (offset > i_size_read(inode))
+ len = 0;
+ else
+ len = offset - i_size_read(inode);
+ }
+ if (len == 0)
+ goto out;
+
+ last_index = (offset + len - 1) >> PAGE_CACHE_SHIFT;
+ while (index <= last_index) {
+ unsigned cur_len = min_t(unsigned, len,
+ PAGE_CACHE_SIZE - pg_offset);
+ page = find_or_create_page(inode->i_mapping, index, GFP_NOFS);
+ if (!page) {
+ ret = -ENOMEM;
+ break;
+ }
+
+ if (!PageUptodate(page)) {
+ btrfs_readpage(NULL, page);
+ lock_page(page);
+ if (!PageUptodate(page)) {
+ unlock_page(page);
+ page_cache_release(page);
+ ret = -EIO;
+ break;
+ }
+ }
+
+ addr = kmap(page);
+ memcpy(sctx->read_buf + ret, addr + pg_offset, cur_len);
+ kunmap(page);
+ unlock_page(page);
+ page_cache_release(page);
+ index++;
+ pg_offset = 0;
+ len -= cur_len;
+ ret += cur_len;
+ }
+out:
+ iput(inode);
+ return ret;
+}
+
/*
* Read some bytes from the current inode/file and send a write command to
* user space.
@@ -3630,35 +3622,20 @@ static int send_write(struct send_ctx *sctx, u64 offset, u32 len)
{
int ret = 0;
struct fs_path *p;
- loff_t pos = offset;
- int num_read = 0;
- mm_segment_t old_fs;
+ ssize_t num_read = 0;
p = fs_path_alloc();
if (!p)
return -ENOMEM;
- /*
- * vfs normally only accepts user space buffers for security reasons.
- * we only read from the file and also only provide the read_buf buffer
- * to vfs. As this buffer does not come from a user space call, it's
- * ok to temporary allow kernel space buffers.
- */
- old_fs = get_fs();
- set_fs(KERNEL_DS);
-
verbose_printk("btrfs: send_write offset=%llu, len=%d\n", offset, len);
- ret = open_cur_inode_file(sctx);
- if (ret < 0)
- goto out;
-
- ret = vfs_read(sctx->cur_inode_filp, sctx->read_buf, len, &pos);
- if (ret < 0)
- goto out;
- num_read = ret;
- if (!num_read)
+ num_read = fill_read_buf(sctx, offset, len);
+ if (num_read <= 0) {
+ if (num_read < 0)
+ ret = num_read;
goto out;
+ }
ret = begin_cmd(sctx, BTRFS_SEND_C_WRITE);
if (ret < 0)
@@ -3677,7 +3654,6 @@ verbose_printk("btrfs: send_write offset=%llu, len=%d\n", offset, len);
tlv_put_failure:
out:
fs_path_free(p);
- set_fs(old_fs);
if (ret < 0)
return ret;
return num_read;
@@ -3926,16 +3902,16 @@ static int is_extent_unchanged(struct send_ctx *sctx,
while (key.offset < ekey->offset + left_len) {
ei = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
right_type = btrfs_file_extent_type(eb, ei);
- right_disknr = btrfs_file_extent_disk_bytenr(eb, ei);
- right_len = btrfs_file_extent_num_bytes(eb, ei);
- right_offset = btrfs_file_extent_offset(eb, ei);
- right_gen = btrfs_file_extent_generation(eb, ei);
-
if (right_type != BTRFS_FILE_EXTENT_REG) {
ret = 0;
goto out;
}
+ right_disknr = btrfs_file_extent_disk_bytenr(eb, ei);
+ right_len = btrfs_file_extent_num_bytes(eb, ei);
+ right_offset = btrfs_file_extent_offset(eb, ei);
+ right_gen = btrfs_file_extent_generation(eb, ei);
+
/*
* Are we at extent 8? If yes, we know the extent is changed.
* This may only happen on the first iteration.
@@ -4222,10 +4198,6 @@ static int changed_inode(struct send_ctx *sctx,
u64 left_gen = 0;
u64 right_gen = 0;
- ret = close_cur_inode_file(sctx);
- if (ret < 0)
- goto out;
-
sctx->cur_ino = key->objectid;
sctx->cur_inode_new_gen = 0;
@@ -4686,11 +4658,6 @@ static int send_subvol(struct send_ctx *sctx)
}
out:
- if (!ret)
- ret = close_cur_inode_file(sctx);
- else
- close_cur_inode_file(sctx);
-
free_recorded_refs(sctx);
return ret;
}
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index e913328d0f2a..2d8ac1bf0cf9 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -42,7 +42,6 @@
#include <linux/cleancache.h>
#include <linux/ratelimit.h>
#include <linux/btrfs.h>
-#include "compat.h"
#include "delayed-inode.h"
#include "ctree.h"
#include "disk-io.h"
@@ -921,7 +920,7 @@ int btrfs_sync_fs(struct super_block *sb, int wait)
return 0;
}
- btrfs_wait_all_ordered_extents(fs_info);
+ btrfs_wait_ordered_roots(fs_info, -1);
trans = btrfs_attach_transaction_barrier(root);
if (IS_ERR(trans)) {
@@ -1330,6 +1329,12 @@ static int btrfs_remount(struct super_block *sb, int *flags, char *data)
* this also happens on 'umount -rf' or on shutdown, when
* the filesystem is busy.
*/
+
+ /* wait for the uuid_scan task to finish */
+ down(&fs_info->uuid_tree_rescan_sem);
+ /* avoid complains from lockdep et al. */
+ up(&fs_info->uuid_tree_rescan_sem);
+
sb->s_flags |= MS_RDONLY;
btrfs_dev_replace_suspend_for_unmount(fs_info);
@@ -1465,7 +1470,7 @@ static int btrfs_calc_avail_data_space(struct btrfs_root *root, u64 *free_bytes)
nr_devices = fs_info->fs_devices->open_devices;
BUG_ON(!nr_devices);
- devices_info = kmalloc(sizeof(*devices_info) * nr_devices,
+ devices_info = kmalloc_array(nr_devices, sizeof(*devices_info),
GFP_NOFS);
if (!devices_info)
return -ENOMEM;
@@ -1789,7 +1794,25 @@ static void btrfs_print_info(void)
static int btrfs_run_sanity_tests(void)
{
- return btrfs_test_free_space_cache();
+ int ret;
+
+ ret = btrfs_init_test_fs();
+ if (ret)
+ return ret;
+
+ ret = btrfs_test_free_space_cache();
+ if (ret)
+ goto out;
+ ret = btrfs_test_extent_buffer_operations();
+ if (ret)
+ goto out;
+ ret = btrfs_test_extent_io();
+ if (ret)
+ goto out;
+ ret = btrfs_test_inodes();
+out:
+ btrfs_destroy_test_fs();
+ return ret;
}
static int __init init_btrfs_fs(void)
diff --git a/fs/btrfs/tests/btrfs-tests.c b/fs/btrfs/tests/btrfs-tests.c
new file mode 100644
index 000000000000..757ef00a75a4
--- /dev/null
+++ b/fs/btrfs/tests/btrfs-tests.c
@@ -0,0 +1,74 @@
+/*
+ * Copyright (C) 2013 Fusion IO. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License v2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public
+ * License along with this program; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 021110-1307, USA.
+ */
+
+#include <linux/fs.h>
+#include <linux/mount.h>
+#include <linux/magic.h>
+#include "btrfs-tests.h"
+#include "../ctree.h"
+
+static struct vfsmount *test_mnt = NULL;
+
+static const struct super_operations btrfs_test_super_ops = {
+ .alloc_inode = btrfs_alloc_inode,
+ .destroy_inode = btrfs_test_destroy_inode,
+};
+
+static struct dentry *btrfs_test_mount(struct file_system_type *fs_type,
+ int flags, const char *dev_name,
+ void *data)
+{
+ return mount_pseudo(fs_type, "btrfs_test:", &btrfs_test_super_ops,
+ NULL, BTRFS_TEST_MAGIC);
+}
+
+static struct file_system_type test_type = {
+ .name = "btrfs_test_fs",
+ .mount = btrfs_test_mount,
+ .kill_sb = kill_anon_super,
+};
+
+struct inode *btrfs_new_test_inode(void)
+{
+ return new_inode(test_mnt->mnt_sb);
+}
+
+int btrfs_init_test_fs(void)
+{
+ int ret;
+
+ ret = register_filesystem(&test_type);
+ if (ret) {
+ printk(KERN_ERR "btrfs: cannot register test file system\n");
+ return ret;
+ }
+
+ test_mnt = kern_mount(&test_type);
+ if (IS_ERR(test_mnt)) {
+ printk(KERN_ERR "btrfs: cannot mount test file system\n");
+ unregister_filesystem(&test_type);
+ return ret;
+ }
+ return 0;
+}
+
+void btrfs_destroy_test_fs(void)
+{
+ kern_unmount(test_mnt);
+ unregister_filesystem(&test_type);
+}
diff --git a/fs/btrfs/tests/btrfs-tests.h b/fs/btrfs/tests/btrfs-tests.h
index 580877625776..b353bc806ca0 100644
--- a/fs/btrfs/tests/btrfs-tests.h
+++ b/fs/btrfs/tests/btrfs-tests.h
@@ -24,11 +24,36 @@
#define test_msg(fmt, ...) pr_info("btrfs: selftest: " fmt, ##__VA_ARGS__)
int btrfs_test_free_space_cache(void);
+int btrfs_test_extent_buffer_operations(void);
+int btrfs_test_extent_io(void);
+int btrfs_test_inodes(void);
+int btrfs_init_test_fs(void);
+void btrfs_destroy_test_fs(void);
+struct inode *btrfs_new_test_inode(void);
#else
static inline int btrfs_test_free_space_cache(void)
{
return 0;
}
+static inline int btrfs_test_extent_buffer_operations(void)
+{
+ return 0;
+}
+static inline int btrfs_init_test_fs(void)
+{
+ return 0;
+}
+static inline void btrfs_destroy_test_fs(void)
+{
+}
+static inline int btrfs_test_extent_io(void)
+{
+ return 0;
+}
+static inline int btrfs_test_inodes(void)
+{
+ return 0;
+}
#endif
#endif
diff --git a/fs/btrfs/tests/extent-buffer-tests.c b/fs/btrfs/tests/extent-buffer-tests.c
new file mode 100644
index 000000000000..cc286ce97d1e
--- /dev/null
+++ b/fs/btrfs/tests/extent-buffer-tests.c
@@ -0,0 +1,229 @@
+/*
+ * Copyright (C) 2013 Fusion IO. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License v2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public
+ * License along with this program; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 021110-1307, USA.
+ */
+
+#include <linux/slab.h>
+#include "btrfs-tests.h"
+#include "../ctree.h"
+#include "../extent_io.h"
+#include "../disk-io.h"
+
+static int test_btrfs_split_item(void)
+{
+ struct btrfs_path *path;
+ struct btrfs_root *root;
+ struct extent_buffer *eb;
+ struct btrfs_item *item;
+ char *value = "mary had a little lamb";
+ char *split1 = "mary had a little";
+ char *split2 = " lamb";
+ char *split3 = "mary";
+ char *split4 = " had a little";
+ char buf[32];
+ struct btrfs_key key;
+ u32 value_len = strlen(value);
+ int ret = 0;
+
+ test_msg("Running btrfs_split_item tests\n");
+
+ root = btrfs_alloc_dummy_root();
+ if (IS_ERR(root)) {
+ test_msg("Could not allocate root\n");
+ return PTR_ERR(root);
+ }
+
+ path = btrfs_alloc_path();
+ if (!path) {
+ test_msg("Could not allocate path\n");
+ kfree(root);
+ return -ENOMEM;
+ }
+
+ path->nodes[0] = eb = alloc_dummy_extent_buffer(0, 4096);
+ if (!eb) {
+ test_msg("Could not allocate dummy buffer\n");
+ ret = -ENOMEM;
+ goto out;
+ }
+ path->slots[0] = 0;
+
+ key.objectid = 0;
+ key.type = BTRFS_EXTENT_CSUM_KEY;
+ key.offset = 0;
+
+ setup_items_for_insert(root, path, &key, &value_len, value_len,
+ value_len + sizeof(struct btrfs_item), 1);
+ item = btrfs_item_nr(0);
+ write_extent_buffer(eb, value, btrfs_item_ptr_offset(eb, 0),
+ value_len);
+
+ key.offset = 3;
+
+ /*
+ * Passing NULL trans here should be safe because we have plenty of
+ * space in this leaf to split the item without having to split the
+ * leaf.
+ */
+ ret = btrfs_split_item(NULL, root, path, &key, 17);
+ if (ret) {
+ test_msg("Split item failed %d\n", ret);
+ goto out;
+ }
+
+ /*
+ * Read the first slot, it should have the original key and contain only
+ * 'mary had a little'
+ */
+ btrfs_item_key_to_cpu(eb, &key, 0);
+ if (key.objectid != 0 || key.type != BTRFS_EXTENT_CSUM_KEY ||
+ key.offset != 0) {
+ test_msg("Invalid key at slot 0\n");
+ ret = -EINVAL;
+ goto out;
+ }
+
+ item = btrfs_item_nr(0);
+ if (btrfs_item_size(eb, item) != strlen(split1)) {
+ test_msg("Invalid len in the first split\n");
+ ret = -EINVAL;
+ goto out;
+ }
+
+ read_extent_buffer(eb, buf, btrfs_item_ptr_offset(eb, 0),
+ strlen(split1));
+ if (memcmp(buf, split1, strlen(split1))) {
+ test_msg("Data in the buffer doesn't match what it should "
+ "in the first split have='%.*s' want '%s'\n",
+ (int)strlen(split1), buf, split1);
+ ret = -EINVAL;
+ goto out;
+ }
+
+ btrfs_item_key_to_cpu(eb, &key, 1);
+ if (key.objectid != 0 || key.type != BTRFS_EXTENT_CSUM_KEY ||
+ key.offset != 3) {
+ test_msg("Invalid key at slot 1\n");
+ ret = -EINVAL;
+ goto out;
+ }
+
+ item = btrfs_item_nr(1);
+ if (btrfs_item_size(eb, item) != strlen(split2)) {
+ test_msg("Invalid len in the second split\n");
+ ret = -EINVAL;
+ goto out;
+ }
+
+ read_extent_buffer(eb, buf, btrfs_item_ptr_offset(eb, 1),
+ strlen(split2));
+ if (memcmp(buf, split2, strlen(split2))) {
+ test_msg("Data in the buffer doesn't match what it should "
+ "in the second split\n");
+ ret = -EINVAL;
+ goto out;
+ }
+
+ key.offset = 1;
+ /* Do it again so we test memmoving the other items in the leaf */
+ ret = btrfs_split_item(NULL, root, path, &key, 4);
+ if (ret) {
+ test_msg("Second split item failed %d\n", ret);
+ goto out;
+ }
+
+ btrfs_item_key_to_cpu(eb, &key, 0);
+ if (key.objectid != 0 || key.type != BTRFS_EXTENT_CSUM_KEY ||
+ key.offset != 0) {
+ test_msg("Invalid key at slot 0\n");
+ ret = -EINVAL;
+ goto out;
+ }
+
+ item = btrfs_item_nr(0);
+ if (btrfs_item_size(eb, item) != strlen(split3)) {
+ test_msg("Invalid len in the first split\n");
+ ret = -EINVAL;
+ goto out;
+ }
+
+ read_extent_buffer(eb, buf, btrfs_item_ptr_offset(eb, 0),
+ strlen(split3));
+ if (memcmp(buf, split3, strlen(split3))) {
+ test_msg("Data in the buffer doesn't match what it should "
+ "in the third split");
+ ret = -EINVAL;
+ goto out;
+ }
+
+ btrfs_item_key_to_cpu(eb, &key, 1);
+ if (key.objectid != 0 || key.type != BTRFS_EXTENT_CSUM_KEY ||
+ key.offset != 1) {
+ test_msg("Invalid key at slot 1\n");
+ ret = -EINVAL;
+ goto out;
+ }
+
+ item = btrfs_item_nr(1);
+ if (btrfs_item_size(eb, item) != strlen(split4)) {
+ test_msg("Invalid len in the second split\n");
+ ret = -EINVAL;
+ goto out;
+ }
+
+ read_extent_buffer(eb, buf, btrfs_item_ptr_offset(eb, 1),
+ strlen(split4));
+ if (memcmp(buf, split4, strlen(split4))) {
+ test_msg("Data in the buffer doesn't match what it should "
+ "in the fourth split\n");
+ ret = -EINVAL;
+ goto out;
+ }
+
+ btrfs_item_key_to_cpu(eb, &key, 2);
+ if (key.objectid != 0 || key.type != BTRFS_EXTENT_CSUM_KEY ||
+ key.offset != 3) {
+ test_msg("Invalid key at slot 2\n");
+ ret = -EINVAL;
+ goto out;
+ }
+
+ item = btrfs_item_nr(2);
+ if (btrfs_item_size(eb, item) != strlen(split2)) {
+ test_msg("Invalid len in the second split\n");
+ ret = -EINVAL;
+ goto out;
+ }
+
+ read_extent_buffer(eb, buf, btrfs_item_ptr_offset(eb, 2),
+ strlen(split2));
+ if (memcmp(buf, split2, strlen(split2))) {
+ test_msg("Data in the buffer doesn't match what it should "
+ "in the last chunk\n");
+ ret = -EINVAL;
+ goto out;
+ }
+out:
+ btrfs_free_path(path);
+ kfree(root);
+ return ret;
+}
+
+int btrfs_test_extent_buffer_operations(void)
+{
+ test_msg("Running extent buffer operation tests");
+ return test_btrfs_split_item();
+}
diff --git a/fs/btrfs/tests/extent-io-tests.c b/fs/btrfs/tests/extent-io-tests.c
new file mode 100644
index 000000000000..7e99c2f98dd0
--- /dev/null
+++ b/fs/btrfs/tests/extent-io-tests.c
@@ -0,0 +1,276 @@
+/*
+ * Copyright (C) 2013 Fusion IO. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License v2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public
+ * License along with this program; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 021110-1307, USA.
+ */
+
+#include <linux/pagemap.h>
+#include <linux/sched.h>
+#include "btrfs-tests.h"
+#include "../extent_io.h"
+
+#define PROCESS_UNLOCK (1 << 0)
+#define PROCESS_RELEASE (1 << 1)
+#define PROCESS_TEST_LOCKED (1 << 2)
+
+static noinline int process_page_range(struct inode *inode, u64 start, u64 end,
+ unsigned long flags)
+{
+ int ret;
+ struct page *pages[16];
+ unsigned long index = start >> PAGE_CACHE_SHIFT;
+ unsigned long end_index = end >> PAGE_CACHE_SHIFT;
+ unsigned long nr_pages = end_index - index + 1;
+ int i;
+ int count = 0;
+ int loops = 0;
+
+ while (nr_pages > 0) {
+ ret = find_get_pages_contig(inode->i_mapping, index,
+ min_t(unsigned long, nr_pages,
+ ARRAY_SIZE(pages)), pages);
+ for (i = 0; i < ret; i++) {
+ if (flags & PROCESS_TEST_LOCKED &&
+ !PageLocked(pages[i]))
+ count++;
+ if (flags & PROCESS_UNLOCK && PageLocked(pages[i]))
+ unlock_page(pages[i]);
+ page_cache_release(pages[i]);
+ if (flags & PROCESS_RELEASE)
+ page_cache_release(pages[i]);
+ }
+ nr_pages -= ret;
+ index += ret;
+ cond_resched();
+ loops++;
+ if (loops > 100000) {
+ printk(KERN_ERR "stuck in a loop, start %Lu, end %Lu, nr_pages %lu, ret %d\n", start, end, nr_pages, ret);
+ break;
+ }
+ }
+ return count;
+}
+
+static int test_find_delalloc(void)
+{
+ struct inode *inode;
+ struct extent_io_tree tmp;
+ struct page *page;
+ struct page *locked_page = NULL;
+ unsigned long index = 0;
+ u64 total_dirty = 256 * 1024 * 1024;
+ u64 max_bytes = 128 * 1024 * 1024;
+ u64 start, end, test_start;
+ u64 found;
+ int ret = -EINVAL;
+
+ inode = btrfs_new_test_inode();
+ if (!inode) {
+ test_msg("Failed to allocate test inode\n");
+ return -ENOMEM;
+ }
+
+ extent_io_tree_init(&tmp, &inode->i_data);
+
+ /*
+ * First go through and create and mark all of our pages dirty, we pin
+ * everything to make sure our pages don't get evicted and screw up our
+ * test.
+ */
+ for (index = 0; index < (total_dirty >> PAGE_CACHE_SHIFT); index++) {
+ page = find_or_create_page(inode->i_mapping, index, GFP_NOFS);
+ if (!page) {
+ test_msg("Failed to allocate test page\n");
+ ret = -ENOMEM;
+ goto out;
+ }
+ SetPageDirty(page);
+ if (index) {
+ unlock_page(page);
+ } else {
+ page_cache_get(page);
+ locked_page = page;
+ }
+ }
+
+ /* Test this scenario
+ * |--- delalloc ---|
+ * |--- search ---|
+ */
+ set_extent_delalloc(&tmp, 0, 4095, NULL, GFP_NOFS);
+ start = 0;
+ end = 0;
+ found = find_lock_delalloc_range(inode, &tmp, locked_page, &start,
+ &end, max_bytes);
+ if (!found) {
+ test_msg("Should have found at least one delalloc\n");
+ goto out_bits;
+ }
+ if (start != 0 || end != 4095) {
+ test_msg("Expected start 0 end 4095, got start %Lu end %Lu\n",
+ start, end);
+ goto out_bits;
+ }
+ unlock_extent(&tmp, start, end);
+ unlock_page(locked_page);
+ page_cache_release(locked_page);
+
+ /*
+ * Test this scenario
+ *
+ * |--- delalloc ---|
+ * |--- search ---|
+ */
+ test_start = 64 * 1024 * 1024;
+ locked_page = find_lock_page(inode->i_mapping,
+ test_start >> PAGE_CACHE_SHIFT);
+ if (!locked_page) {
+ test_msg("Couldn't find the locked page\n");
+ goto out_bits;
+ }
+ set_extent_delalloc(&tmp, 4096, max_bytes - 1, NULL, GFP_NOFS);
+ start = test_start;
+ end = 0;
+ found = find_lock_delalloc_range(inode, &tmp, locked_page, &start,
+ &end, max_bytes);
+ if (!found) {
+ test_msg("Couldn't find delalloc in our range\n");
+ goto out_bits;
+ }
+ if (start != test_start || end != max_bytes - 1) {
+ test_msg("Expected start %Lu end %Lu, got start %Lu, end "
+ "%Lu\n", test_start, max_bytes - 1, start, end);
+ goto out_bits;
+ }
+ if (process_page_range(inode, start, end,
+ PROCESS_TEST_LOCKED | PROCESS_UNLOCK)) {
+ test_msg("There were unlocked pages in the range\n");
+ goto out_bits;
+ }
+ unlock_extent(&tmp, start, end);
+ /* locked_page was unlocked above */
+ page_cache_release(locked_page);
+
+ /*
+ * Test this scenario
+ * |--- delalloc ---|
+ * |--- search ---|
+ */
+ test_start = max_bytes + 4096;
+ locked_page = find_lock_page(inode->i_mapping, test_start >>
+ PAGE_CACHE_SHIFT);
+ if (!locked_page) {
+ test_msg("Could'nt find the locked page\n");
+ goto out_bits;
+ }
+ start = test_start;
+ end = 0;
+ found = find_lock_delalloc_range(inode, &tmp, locked_page, &start,
+ &end, max_bytes);
+ if (found) {
+ test_msg("Found range when we shouldn't have\n");
+ goto out_bits;
+ }
+ if (end != (u64)-1) {
+ test_msg("Did not return the proper end offset\n");
+ goto out_bits;
+ }
+
+ /*
+ * Test this scenario
+ * [------- delalloc -------|
+ * [max_bytes]|-- search--|
+ *
+ * We are re-using our test_start from above since it works out well.
+ */
+ set_extent_delalloc(&tmp, max_bytes, total_dirty - 1, NULL, GFP_NOFS);
+ start = test_start;
+ end = 0;
+ found = find_lock_delalloc_range(inode, &tmp, locked_page, &start,
+ &end, max_bytes);
+ if (!found) {
+ test_msg("Didn't find our range\n");
+ goto out_bits;
+ }
+ if (start != test_start || end != total_dirty - 1) {
+ test_msg("Expected start %Lu end %Lu, got start %Lu end %Lu\n",
+ test_start, total_dirty - 1, start, end);
+ goto out_bits;
+ }
+ if (process_page_range(inode, start, end,
+ PROCESS_TEST_LOCKED | PROCESS_UNLOCK)) {
+ test_msg("Pages in range were not all locked\n");
+ goto out_bits;
+ }
+ unlock_extent(&tmp, start, end);
+
+ /*
+ * Now to test where we run into a page that is no longer dirty in the
+ * range we want to find.
+ */
+ page = find_get_page(inode->i_mapping, (max_bytes + (1 * 1024 * 1024))
+ >> PAGE_CACHE_SHIFT);
+ if (!page) {
+ test_msg("Couldn't find our page\n");
+ goto out_bits;
+ }
+ ClearPageDirty(page);
+ page_cache_release(page);
+
+ /* We unlocked it in the previous test */
+ lock_page(locked_page);
+ start = test_start;
+ end = 0;
+ /*
+ * Currently if we fail to find dirty pages in the delalloc range we
+ * will adjust max_bytes down to PAGE_CACHE_SIZE and then re-search. If
+ * this changes at any point in the future we will need to fix this
+ * tests expected behavior.
+ */
+ found = find_lock_delalloc_range(inode, &tmp, locked_page, &start,
+ &end, max_bytes);
+ if (!found) {
+ test_msg("Didn't find our range\n");
+ goto out_bits;
+ }
+ if (start != test_start && end != test_start + PAGE_CACHE_SIZE - 1) {
+ test_msg("Expected start %Lu end %Lu, got start %Lu end %Lu\n",
+ test_start, test_start + PAGE_CACHE_SIZE - 1, start,
+ end);
+ goto out_bits;
+ }
+ if (process_page_range(inode, start, end, PROCESS_TEST_LOCKED |
+ PROCESS_UNLOCK)) {
+ test_msg("Pages in range were not all locked\n");
+ goto out_bits;
+ }
+ ret = 0;
+out_bits:
+ clear_extent_bits(&tmp, 0, total_dirty - 1,
+ (unsigned long)-1, GFP_NOFS);
+out:
+ if (locked_page)
+ page_cache_release(locked_page);
+ process_page_range(inode, 0, total_dirty - 1,
+ PROCESS_UNLOCK | PROCESS_RELEASE);
+ iput(inode);
+ return ret;
+}
+
+int btrfs_test_extent_io(void)
+{
+ test_msg("Running find delalloc tests\n");
+ return test_find_delalloc();
+}
diff --git a/fs/btrfs/tests/inode-tests.c b/fs/btrfs/tests/inode-tests.c
new file mode 100644
index 000000000000..397d1f99a8eb
--- /dev/null
+++ b/fs/btrfs/tests/inode-tests.c
@@ -0,0 +1,955 @@
+/*
+ * Copyright (C) 2013 Fusion IO. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License v2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public
+ * License along with this program; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 021110-1307, USA.
+ */
+
+#include "btrfs-tests.h"
+#include "../ctree.h"
+#include "../btrfs_inode.h"
+#include "../disk-io.h"
+#include "../extent_io.h"
+#include "../volumes.h"
+
+static struct btrfs_fs_info *alloc_dummy_fs_info(void)
+{
+ struct btrfs_fs_info *fs_info = kzalloc(sizeof(struct btrfs_fs_info),
+ GFP_NOFS);
+ if (!fs_info)
+ return fs_info;
+ fs_info->fs_devices = kzalloc(sizeof(struct btrfs_fs_devices),
+ GFP_NOFS);
+ if (!fs_info->fs_devices) {
+ kfree(fs_info);
+ return NULL;
+ }
+ return fs_info;
+}
+static void free_dummy_root(struct btrfs_root *root)
+{
+ if (!root)
+ return;
+ if (root->fs_info) {
+ kfree(root->fs_info->fs_devices);
+ kfree(root->fs_info);
+ }
+ if (root->node)
+ free_extent_buffer(root->node);
+ kfree(root);
+}
+
+static void insert_extent(struct btrfs_root *root, u64 start, u64 len,
+ u64 ram_bytes, u64 offset, u64 disk_bytenr,
+ u64 disk_len, u32 type, u8 compression, int slot)
+{
+ struct btrfs_path path;
+ struct btrfs_file_extent_item *fi;
+ struct extent_buffer *leaf = root->node;
+ struct btrfs_key key;
+ u32 value_len = sizeof(struct btrfs_file_extent_item);
+
+ if (type == BTRFS_FILE_EXTENT_INLINE)
+ value_len += len;
+ memset(&path, 0, sizeof(path));
+
+ path.nodes[0] = leaf;
+ path.slots[0] = slot;
+
+ key.objectid = BTRFS_FIRST_FREE_OBJECTID;
+ key.type = BTRFS_EXTENT_DATA_KEY;
+ key.offset = start;
+
+ setup_items_for_insert(root, &path, &key, &value_len, value_len,
+ value_len + sizeof(struct btrfs_item), 1);
+ fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item);
+ btrfs_set_file_extent_generation(leaf, fi, 1);
+ btrfs_set_file_extent_type(leaf, fi, type);
+ btrfs_set_file_extent_disk_bytenr(leaf, fi, disk_bytenr);
+ btrfs_set_file_extent_disk_num_bytes(leaf, fi, disk_len);
+ btrfs_set_file_extent_offset(leaf, fi, offset);
+ btrfs_set_file_extent_num_bytes(leaf, fi, len);
+ btrfs_set_file_extent_ram_bytes(leaf, fi, ram_bytes);
+ btrfs_set_file_extent_compression(leaf, fi, compression);
+ btrfs_set_file_extent_encryption(leaf, fi, 0);
+ btrfs_set_file_extent_other_encoding(leaf, fi, 0);
+}
+
+static void insert_inode_item_key(struct btrfs_root *root)
+{
+ struct btrfs_path path;
+ struct extent_buffer *leaf = root->node;
+ struct btrfs_key key;
+ u32 value_len = 0;
+
+ memset(&path, 0, sizeof(path));
+
+ path.nodes[0] = leaf;
+ path.slots[0] = 0;
+
+ key.objectid = BTRFS_INODE_ITEM_KEY;
+ key.type = BTRFS_INODE_ITEM_KEY;
+ key.offset = 0;
+
+ setup_items_for_insert(root, &path, &key, &value_len, value_len,
+ value_len + sizeof(struct btrfs_item), 1);
+}
+
+/*
+ * Build the most complicated map of extents the earth has ever seen. We want
+ * this so we can test all of the corner cases of btrfs_get_extent. Here is a
+ * diagram of how the extents will look though this may not be possible we still
+ * want to make sure everything acts normally (the last number is not inclusive)
+ *
+ * [0 - 5][5 - 6][6 - 10][10 - 4096][ 4096 - 8192 ][8192 - 12288]
+ * [hole ][inline][ hole ][ regular ][regular1 split][ hole ]
+ *
+ * [ 12288 - 20480][20480 - 24576][ 24576 - 28672 ][28672 - 36864][36864 - 45056]
+ * [regular1 split][ prealloc1 ][prealloc1 written][ prealloc1 ][ compressed ]
+ *
+ * [45056 - 49152][49152-53248][53248-61440][61440-65536][ 65536+81920 ]
+ * [ compressed1 ][ regular ][compressed1][ regular ][ hole but no extent]
+ *
+ * [81920-86016]
+ * [ regular ]
+ */
+static void setup_file_extents(struct btrfs_root *root)
+{
+ int slot = 0;
+ u64 disk_bytenr = 1 * 1024 * 1024;
+ u64 offset = 0;
+
+ /* First we want a hole */
+ insert_extent(root, offset, 5, 5, 0, 0, 0, BTRFS_FILE_EXTENT_REG, 0,
+ slot);
+ slot++;
+ offset += 5;
+
+ /*
+ * Now we want an inline extent, I don't think this is possible but hey
+ * why not? Also keep in mind if we have an inline extent it counts as
+ * the whole first page. If we were to expand it we would have to cow
+ * and we wouldn't have an inline extent anymore.
+ */
+ insert_extent(root, offset, 1, 1, 0, 0, 0, BTRFS_FILE_EXTENT_INLINE, 0,
+ slot);
+ slot++;
+ offset = 4096;
+
+ /* Now another hole */
+ insert_extent(root, offset, 4, 4, 0, 0, 0, BTRFS_FILE_EXTENT_REG, 0,
+ slot);
+ slot++;
+ offset += 4;
+
+ /* Now for a regular extent */
+ insert_extent(root, offset, 4095, 4095, 0, disk_bytenr, 4096,
+ BTRFS_FILE_EXTENT_REG, 0, slot);
+ slot++;
+ disk_bytenr += 4096;
+ offset += 4095;
+
+ /*
+ * Now for 3 extents that were split from a hole punch so we test
+ * offsets properly.
+ */
+ insert_extent(root, offset, 4096, 16384, 0, disk_bytenr, 16384,
+ BTRFS_FILE_EXTENT_REG, 0, slot);
+ slot++;
+ offset += 4096;
+ insert_extent(root, offset, 4096, 4096, 0, 0, 0, BTRFS_FILE_EXTENT_REG,
+ 0, slot);
+ slot++;
+ offset += 4096;
+ insert_extent(root, offset, 8192, 16384, 8192, disk_bytenr, 16384,
+ BTRFS_FILE_EXTENT_REG, 0, slot);
+ slot++;
+ offset += 8192;
+ disk_bytenr += 16384;
+
+ /* Now for a unwritten prealloc extent */
+ insert_extent(root, offset, 4096, 4096, 0, disk_bytenr, 4096,
+ BTRFS_FILE_EXTENT_PREALLOC, 0, slot);
+ slot++;
+ offset += 4096;
+
+ /*
+ * We want to jack up disk_bytenr a little more so the em stuff doesn't
+ * merge our records.
+ */
+ disk_bytenr += 8192;
+
+ /*
+ * Now for a partially written prealloc extent, basically the same as
+ * the hole punch example above. Ram_bytes never changes when you mark
+ * extents written btw.
+ */
+ insert_extent(root, offset, 4096, 16384, 0, disk_bytenr, 16384,
+ BTRFS_FILE_EXTENT_PREALLOC, 0, slot);
+ slot++;
+ offset += 4096;
+ insert_extent(root, offset, 4096, 16384, 4096, disk_bytenr, 16384,
+ BTRFS_FILE_EXTENT_REG, 0, slot);
+ slot++;
+ offset += 4096;
+ insert_extent(root, offset, 8192, 16384, 8192, disk_bytenr, 16384,
+ BTRFS_FILE_EXTENT_PREALLOC, 0, slot);
+ slot++;
+ offset += 8192;
+ disk_bytenr += 16384;
+
+ /* Now a normal compressed extent */
+ insert_extent(root, offset, 8192, 8192, 0, disk_bytenr, 4096,
+ BTRFS_FILE_EXTENT_REG, BTRFS_COMPRESS_ZLIB, slot);
+ slot++;
+ offset += 8192;
+ /* No merges */
+ disk_bytenr += 8192;
+
+ /* Now a split compressed extent */
+ insert_extent(root, offset, 4096, 16384, 0, disk_bytenr, 4096,
+ BTRFS_FILE_EXTENT_REG, BTRFS_COMPRESS_ZLIB, slot);
+ slot++;
+ offset += 4096;
+ insert_extent(root, offset, 4096, 4096, 0, disk_bytenr + 4096, 4096,
+ BTRFS_FILE_EXTENT_REG, 0, slot);
+ slot++;
+ offset += 4096;
+ insert_extent(root, offset, 8192, 16384, 8192, disk_bytenr, 4096,
+ BTRFS_FILE_EXTENT_REG, BTRFS_COMPRESS_ZLIB, slot);
+ slot++;
+ offset += 8192;
+ disk_bytenr += 8192;
+
+ /* Now extents that have a hole but no hole extent */
+ insert_extent(root, offset, 4096, 4096, 0, disk_bytenr, 4096,
+ BTRFS_FILE_EXTENT_REG, 0, slot);
+ slot++;
+ offset += 16384;
+ disk_bytenr += 4096;
+ insert_extent(root, offset, 4096, 4096, 0, disk_bytenr, 4096,
+ BTRFS_FILE_EXTENT_REG, 0, slot);
+}
+
+static unsigned long prealloc_only = 0;
+static unsigned long compressed_only = 0;
+static unsigned long vacancy_only = 0;
+
+static noinline int test_btrfs_get_extent(void)
+{
+ struct inode *inode = NULL;
+ struct btrfs_root *root = NULL;
+ struct extent_map *em = NULL;
+ u64 orig_start;
+ u64 disk_bytenr;
+ u64 offset;
+ int ret = -ENOMEM;
+
+ inode = btrfs_new_test_inode();
+ if (!inode) {
+ test_msg("Couldn't allocate inode\n");
+ return ret;
+ }
+
+ BTRFS_I(inode)->location.type = BTRFS_INODE_ITEM_KEY;
+ BTRFS_I(inode)->location.objectid = BTRFS_FIRST_FREE_OBJECTID;
+ BTRFS_I(inode)->location.offset = 0;
+
+ root = btrfs_alloc_dummy_root();
+ if (IS_ERR(root)) {
+ test_msg("Couldn't allocate root\n");
+ goto out;
+ }
+
+ /*
+ * We do this since btrfs_get_extent wants to assign em->bdev to
+ * root->fs_info->fs_devices->latest_bdev.
+ */
+ root->fs_info = alloc_dummy_fs_info();
+ if (!root->fs_info) {
+ test_msg("Couldn't allocate dummy fs info\n");
+ goto out;
+ }
+
+ root->node = alloc_dummy_extent_buffer(0, 4096);
+ if (!root->node) {
+ test_msg("Couldn't allocate dummy buffer\n");
+ goto out;
+ }
+
+ /*
+ * We will just free a dummy node if it's ref count is 2 so we need an
+ * extra ref so our searches don't accidently release our page.
+ */
+ extent_buffer_get(root->node);
+ btrfs_set_header_nritems(root->node, 0);
+ btrfs_set_header_level(root->node, 0);
+ ret = -EINVAL;
+
+ /* First with no extents */
+ BTRFS_I(inode)->root = root;
+ em = btrfs_get_extent(inode, NULL, 0, 0, 4096, 0);
+ if (IS_ERR(em)) {
+ em = NULL;
+ test_msg("Got an error when we shouldn't have\n");
+ goto out;
+ }
+ if (em->block_start != EXTENT_MAP_HOLE) {
+ test_msg("Expected a hole, got %llu\n", em->block_start);
+ goto out;
+ }
+ if (!test_bit(EXTENT_FLAG_VACANCY, &em->flags)) {
+ test_msg("Vacancy flag wasn't set properly\n");
+ goto out;
+ }
+ free_extent_map(em);
+ btrfs_drop_extent_cache(inode, 0, (u64)-1, 0);
+
+ /*
+ * All of the magic numbers are based on the mapping setup in
+ * setup_file_extents, so if you change anything there you need to
+ * update the comment and update the expected values below.
+ */
+ setup_file_extents(root);
+
+ em = btrfs_get_extent(inode, NULL, 0, 0, (u64)-1, 0);
+ if (IS_ERR(em)) {
+ test_msg("Got an error when we shouldn't have\n");
+ goto out;
+ }
+ if (em->block_start != EXTENT_MAP_HOLE) {
+ test_msg("Expected a hole, got %llu\n", em->block_start);
+ goto out;
+ }
+ if (em->start != 0 || em->len != 5) {
+ test_msg("Unexpected extent wanted start 0 len 5, got start "
+ "%llu len %llu\n", em->start, em->len);
+ goto out;
+ }
+ if (em->flags != 0) {
+ test_msg("Unexpected flags set, want 0 have %lu\n", em->flags);
+ goto out;
+ }
+ offset = em->start + em->len;
+ free_extent_map(em);
+
+ em = btrfs_get_extent(inode, NULL, 0, offset, 4096, 0);
+ if (IS_ERR(em)) {
+ test_msg("Got an error when we shouldn't have\n");
+ goto out;
+ }
+ if (em->block_start != EXTENT_MAP_INLINE) {
+ test_msg("Expected an inline, got %llu\n", em->block_start);
+ goto out;
+ }
+ if (em->start != offset || em->len != 4091) {
+ test_msg("Unexpected extent wanted start %llu len 1, got start "
+ "%llu len %llu\n", offset, em->start, em->len);
+ goto out;
+ }
+ if (em->flags != 0) {
+ test_msg("Unexpected flags set, want 0 have %lu\n", em->flags);
+ goto out;
+ }
+ /*
+ * We don't test anything else for inline since it doesn't get set
+ * unless we have a page for it to write into. Maybe we should change
+ * this?
+ */
+ offset = em->start + em->len;
+ free_extent_map(em);
+
+ em = btrfs_get_extent(inode, NULL, 0, offset, 4096, 0);
+ if (IS_ERR(em)) {
+ test_msg("Got an error when we shouldn't have\n");
+ goto out;
+ }
+ if (em->block_start != EXTENT_MAP_HOLE) {
+ test_msg("Expected a hole, got %llu\n", em->block_start);
+ goto out;
+ }
+ if (em->start != offset || em->len != 4) {
+ test_msg("Unexpected extent wanted start %llu len 4, got start "
+ "%llu len %llu\n", offset, em->start, em->len);
+ goto out;
+ }
+ if (em->flags != 0) {
+ test_msg("Unexpected flags set, want 0 have %lu\n", em->flags);
+ goto out;
+ }
+ offset = em->start + em->len;
+ free_extent_map(em);
+
+ /* Regular extent */
+ em = btrfs_get_extent(inode, NULL, 0, offset, 4096, 0);
+ if (IS_ERR(em)) {
+ test_msg("Got an error when we shouldn't have\n");
+ goto out;
+ }
+ if (em->block_start >= EXTENT_MAP_LAST_BYTE) {
+ test_msg("Expected a real extent, got %llu\n", em->block_start);
+ goto out;
+ }
+ if (em->start != offset || em->len != 4095) {
+ test_msg("Unexpected extent wanted start %llu len 4095, got "
+ "start %llu len %llu\n", offset, em->start, em->len);
+ goto out;
+ }
+ if (em->flags != 0) {
+ test_msg("Unexpected flags set, want 0 have %lu\n", em->flags);
+ goto out;
+ }
+ if (em->orig_start != em->start) {
+ test_msg("Wrong orig offset, want %llu, have %llu\n", em->start,
+ em->orig_start);
+ goto out;
+ }
+ offset = em->start + em->len;
+ free_extent_map(em);
+
+ /* The next 3 are split extents */
+ em = btrfs_get_extent(inode, NULL, 0, offset, 4096, 0);
+ if (IS_ERR(em)) {
+ test_msg("Got an error when we shouldn't have\n");
+ goto out;
+ }
+ if (em->block_start >= EXTENT_MAP_LAST_BYTE) {
+ test_msg("Expected a real extent, got %llu\n", em->block_start);
+ goto out;
+ }
+ if (em->start != offset || em->len != 4096) {
+ test_msg("Unexpected extent wanted start %llu len 4096, got "
+ "start %llu len %llu\n", offset, em->start, em->len);
+ goto out;
+ }
+ if (em->flags != 0) {
+ test_msg("Unexpected flags set, want 0 have %lu\n", em->flags);
+ goto out;
+ }
+ if (em->orig_start != em->start) {
+ test_msg("Wrong orig offset, want %llu, have %llu\n", em->start,
+ em->orig_start);
+ goto out;
+ }
+ disk_bytenr = em->block_start;
+ orig_start = em->start;
+ offset = em->start + em->len;
+ free_extent_map(em);
+
+ em = btrfs_get_extent(inode, NULL, 0, offset, 4096, 0);
+ if (IS_ERR(em)) {
+ test_msg("Got an error when we shouldn't have\n");
+ goto out;
+ }
+ if (em->block_start != EXTENT_MAP_HOLE) {
+ test_msg("Expected a hole, got %llu\n", em->block_start);
+ goto out;
+ }
+ if (em->start != offset || em->len != 4096) {
+ test_msg("Unexpected extent wanted start %llu len 4096, got "
+ "start %llu len %llu\n", offset, em->start, em->len);
+ goto out;
+ }
+ if (em->flags != 0) {
+ test_msg("Unexpected flags set, want 0 have %lu\n", em->flags);
+ goto out;
+ }
+ offset = em->start + em->len;
+ free_extent_map(em);
+
+ em = btrfs_get_extent(inode, NULL, 0, offset, 4096, 0);
+ if (IS_ERR(em)) {
+ test_msg("Got an error when we shouldn't have\n");
+ goto out;
+ }
+ if (em->block_start >= EXTENT_MAP_LAST_BYTE) {
+ test_msg("Expected a real extent, got %llu\n", em->block_start);
+ goto out;
+ }
+ if (em->start != offset || em->len != 8192) {
+ test_msg("Unexpected extent wanted start %llu len 8192, got "
+ "start %llu len %llu\n", offset, em->start, em->len);
+ goto out;
+ }
+ if (em->flags != 0) {
+ test_msg("Unexpected flags set, want 0 have %lu\n", em->flags);
+ goto out;
+ }
+ if (em->orig_start != orig_start) {
+ test_msg("Wrong orig offset, want %llu, have %llu\n",
+ orig_start, em->orig_start);
+ goto out;
+ }
+ disk_bytenr += (em->start - orig_start);
+ if (em->block_start != disk_bytenr) {
+ test_msg("Wrong block start, want %llu, have %llu\n",
+ disk_bytenr, em->block_start);
+ goto out;
+ }
+ offset = em->start + em->len;
+ free_extent_map(em);
+
+ /* Prealloc extent */
+ em = btrfs_get_extent(inode, NULL, 0, offset, 4096, 0);
+ if (IS_ERR(em)) {
+ test_msg("Got an error when we shouldn't have\n");
+ goto out;
+ }
+ if (em->block_start >= EXTENT_MAP_LAST_BYTE) {
+ test_msg("Expected a real extent, got %llu\n", em->block_start);
+ goto out;
+ }
+ if (em->start != offset || em->len != 4096) {
+ test_msg("Unexpected extent wanted start %llu len 4096, got "
+ "start %llu len %llu\n", offset, em->start, em->len);
+ goto out;
+ }
+ if (em->flags != prealloc_only) {
+ test_msg("Unexpected flags set, want %lu have %lu\n",
+ prealloc_only, em->flags);
+ goto out;
+ }
+ if (em->orig_start != em->start) {
+ test_msg("Wrong orig offset, want %llu, have %llu\n", em->start,
+ em->orig_start);
+ goto out;
+ }
+ offset = em->start + em->len;
+ free_extent_map(em);
+
+ /* The next 3 are a half written prealloc extent */
+ em = btrfs_get_extent(inode, NULL, 0, offset, 4096, 0);
+ if (IS_ERR(em)) {
+ test_msg("Got an error when we shouldn't have\n");
+ goto out;
+ }
+ if (em->block_start >= EXTENT_MAP_LAST_BYTE) {
+ test_msg("Expected a real extent, got %llu\n", em->block_start);
+ goto out;
+ }
+ if (em->start != offset || em->len != 4096) {
+ test_msg("Unexpected extent wanted start %llu len 4096, got "
+ "start %llu len %llu\n", offset, em->start, em->len);
+ goto out;
+ }
+ if (em->flags != prealloc_only) {
+ test_msg("Unexpected flags set, want %lu have %lu\n",
+ prealloc_only, em->flags);
+ goto out;
+ }
+ if (em->orig_start != em->start) {
+ test_msg("Wrong orig offset, want %llu, have %llu\n", em->start,
+ em->orig_start);
+ goto out;
+ }
+ disk_bytenr = em->block_start;
+ orig_start = em->start;
+ offset = em->start + em->len;
+ free_extent_map(em);
+
+ em = btrfs_get_extent(inode, NULL, 0, offset, 4096, 0);
+ if (IS_ERR(em)) {
+ test_msg("Got an error when we shouldn't have\n");
+ goto out;
+ }
+ if (em->block_start >= EXTENT_MAP_HOLE) {
+ test_msg("Expected a real extent, got %llu\n", em->block_start);
+ goto out;
+ }
+ if (em->start != offset || em->len != 4096) {
+ test_msg("Unexpected extent wanted start %llu len 4096, got "
+ "start %llu len %llu\n", offset, em->start, em->len);
+ goto out;
+ }
+ if (em->flags != 0) {
+ test_msg("Unexpected flags set, want 0 have %lu\n", em->flags);
+ goto out;
+ }
+ if (em->orig_start != orig_start) {
+ test_msg("Unexpected orig offset, wanted %llu, have %llu\n",
+ orig_start, em->orig_start);
+ goto out;
+ }
+ if (em->block_start != (disk_bytenr + (em->start - em->orig_start))) {
+ test_msg("Unexpected block start, wanted %llu, have %llu\n",
+ disk_bytenr + (em->start - em->orig_start),
+ em->block_start);
+ goto out;
+ }
+ offset = em->start + em->len;
+ free_extent_map(em);
+
+ em = btrfs_get_extent(inode, NULL, 0, offset, 4096, 0);
+ if (IS_ERR(em)) {
+ test_msg("Got an error when we shouldn't have\n");
+ goto out;
+ }
+ if (em->block_start >= EXTENT_MAP_LAST_BYTE) {
+ test_msg("Expected a real extent, got %llu\n", em->block_start);
+ goto out;
+ }
+ if (em->start != offset || em->len != 8192) {
+ test_msg("Unexpected extent wanted start %llu len 8192, got "
+ "start %llu len %llu\n", offset, em->start, em->len);
+ goto out;
+ }
+ if (em->flags != prealloc_only) {
+ test_msg("Unexpected flags set, want %lu have %lu\n",
+ prealloc_only, em->flags);
+ goto out;
+ }
+ if (em->orig_start != orig_start) {
+ test_msg("Wrong orig offset, want %llu, have %llu\n", orig_start,
+ em->orig_start);
+ goto out;
+ }
+ if (em->block_start != (disk_bytenr + (em->start - em->orig_start))) {
+ test_msg("Unexpected block start, wanted %llu, have %llu\n",
+ disk_bytenr + (em->start - em->orig_start),
+ em->block_start);
+ goto out;
+ }
+ offset = em->start + em->len;
+ free_extent_map(em);
+
+ /* Now for the compressed extent */
+ em = btrfs_get_extent(inode, NULL, 0, offset, 4096, 0);
+ if (IS_ERR(em)) {
+ test_msg("Got an error when we shouldn't have\n");
+ goto out;
+ }
+ if (em->block_start >= EXTENT_MAP_LAST_BYTE) {
+ test_msg("Expected a real extent, got %llu\n", em->block_start);
+ goto out;
+ }
+ if (em->start != offset || em->len != 8192) {
+ test_msg("Unexpected extent wanted start %llu len 8192, got "
+ "start %llu len %llu\n", offset, em->start, em->len);
+ goto out;
+ }
+ if (em->flags != compressed_only) {
+ test_msg("Unexpected flags set, want %lu have %lu\n",
+ compressed_only, em->flags);
+ goto out;
+ }
+ if (em->orig_start != em->start) {
+ test_msg("Wrong orig offset, want %llu, have %llu\n",
+ em->start, em->orig_start);
+ goto out;
+ }
+ if (em->compress_type != BTRFS_COMPRESS_ZLIB) {
+ test_msg("Unexpected compress type, wanted %d, got %d\n",
+ BTRFS_COMPRESS_ZLIB, em->compress_type);
+ goto out;
+ }
+ offset = em->start + em->len;
+ free_extent_map(em);
+
+ /* Split compressed extent */
+ em = btrfs_get_extent(inode, NULL, 0, offset, 4096, 0);
+ if (IS_ERR(em)) {
+ test_msg("Got an error when we shouldn't have\n");
+ goto out;
+ }
+ if (em->block_start >= EXTENT_MAP_LAST_BYTE) {
+ test_msg("Expected a real extent, got %llu\n", em->block_start);
+ goto out;
+ }
+ if (em->start != offset || em->len != 4096) {
+ test_msg("Unexpected extent wanted start %llu len 4096, got "
+ "start %llu len %llu\n", offset, em->start, em->len);
+ goto out;
+ }
+ if (em->flags != compressed_only) {
+ test_msg("Unexpected flags set, want %lu have %lu\n",
+ compressed_only, em->flags);
+ goto out;
+ }
+ if (em->orig_start != em->start) {
+ test_msg("Wrong orig offset, want %llu, have %llu\n",
+ em->start, em->orig_start);
+ goto out;
+ }
+ if (em->compress_type != BTRFS_COMPRESS_ZLIB) {
+ test_msg("Unexpected compress type, wanted %d, got %d\n",
+ BTRFS_COMPRESS_ZLIB, em->compress_type);
+ goto out;
+ }
+ disk_bytenr = em->block_start;
+ orig_start = em->start;
+ offset = em->start + em->len;
+ free_extent_map(em);
+
+ em = btrfs_get_extent(inode, NULL, 0, offset, 4096, 0);
+ if (IS_ERR(em)) {
+ test_msg("Got an error when we shouldn't have\n");
+ goto out;
+ }
+ if (em->block_start >= EXTENT_MAP_LAST_BYTE) {
+ test_msg("Expected a real extent, got %llu\n", em->block_start);
+ goto out;
+ }
+ if (em->start != offset || em->len != 4096) {
+ test_msg("Unexpected extent wanted start %llu len 4096, got "
+ "start %llu len %llu\n", offset, em->start, em->len);
+ goto out;
+ }
+ if (em->flags != 0) {
+ test_msg("Unexpected flags set, want 0 have %lu\n", em->flags);
+ goto out;
+ }
+ if (em->orig_start != em->start) {
+ test_msg("Wrong orig offset, want %llu, have %llu\n", em->start,
+ em->orig_start);
+ goto out;
+ }
+ offset = em->start + em->len;
+ free_extent_map(em);
+
+ em = btrfs_get_extent(inode, NULL, 0, offset, 4096, 0);
+ if (IS_ERR(em)) {
+ test_msg("Got an error when we shouldn't have\n");
+ goto out;
+ }
+ if (em->block_start != disk_bytenr) {
+ test_msg("Block start does not match, want %llu got %llu\n",
+ disk_bytenr, em->block_start);
+ goto out;
+ }
+ if (em->start != offset || em->len != 8192) {
+ test_msg("Unexpected extent wanted start %llu len 8192, got "
+ "start %llu len %llu\n", offset, em->start, em->len);
+ goto out;
+ }
+ if (em->flags != compressed_only) {
+ test_msg("Unexpected flags set, want %lu have %lu\n",
+ compressed_only, em->flags);
+ goto out;
+ }
+ if (em->orig_start != orig_start) {
+ test_msg("Wrong orig offset, want %llu, have %llu\n",
+ em->start, orig_start);
+ goto out;
+ }
+ if (em->compress_type != BTRFS_COMPRESS_ZLIB) {
+ test_msg("Unexpected compress type, wanted %d, got %d\n",
+ BTRFS_COMPRESS_ZLIB, em->compress_type);
+ goto out;
+ }
+ offset = em->start + em->len;
+ free_extent_map(em);
+
+ /* A hole between regular extents but no hole extent */
+ em = btrfs_get_extent(inode, NULL, 0, offset + 6, 4096, 0);
+ if (IS_ERR(em)) {
+ test_msg("Got an error when we shouldn't have\n");
+ goto out;
+ }
+ if (em->block_start >= EXTENT_MAP_LAST_BYTE) {
+ test_msg("Expected a real extent, got %llu\n", em->block_start);
+ goto out;
+ }
+ if (em->start != offset || em->len != 4096) {
+ test_msg("Unexpected extent wanted start %llu len 4096, got "
+ "start %llu len %llu\n", offset, em->start, em->len);
+ goto out;
+ }
+ if (em->flags != 0) {
+ test_msg("Unexpected flags set, want 0 have %lu\n", em->flags);
+ goto out;
+ }
+ if (em->orig_start != em->start) {
+ test_msg("Wrong orig offset, want %llu, have %llu\n", em->start,
+ em->orig_start);
+ goto out;
+ }
+ offset = em->start + em->len;
+ free_extent_map(em);
+
+ em = btrfs_get_extent(inode, NULL, 0, offset, 4096 * 1024, 0);
+ if (IS_ERR(em)) {
+ test_msg("Got an error when we shouldn't have\n");
+ goto out;
+ }
+ if (em->block_start != EXTENT_MAP_HOLE) {
+ test_msg("Expected a hole extent, got %llu\n", em->block_start);
+ goto out;
+ }
+ /*
+ * Currently we just return a length that we requested rather than the
+ * length of the actual hole, if this changes we'll have to change this
+ * test.
+ */
+ if (em->start != offset || em->len != 12288) {
+ test_msg("Unexpected extent wanted start %llu len 12288, got "
+ "start %llu len %llu\n", offset, em->start, em->len);
+ goto out;
+ }
+ if (em->flags != vacancy_only) {
+ test_msg("Unexpected flags set, want %lu have %lu\n",
+ vacancy_only, em->flags);
+ goto out;
+ }
+ if (em->orig_start != em->start) {
+ test_msg("Wrong orig offset, want %llu, have %llu\n", em->start,
+ em->orig_start);
+ goto out;
+ }
+ offset = em->start + em->len;
+ free_extent_map(em);
+
+ em = btrfs_get_extent(inode, NULL, 0, offset, 4096, 0);
+ if (IS_ERR(em)) {
+ test_msg("Got an error when we shouldn't have\n");
+ goto out;
+ }
+ if (em->block_start >= EXTENT_MAP_LAST_BYTE) {
+ test_msg("Expected a real extent, got %llu\n", em->block_start);
+ goto out;
+ }
+ if (em->start != offset || em->len != 4096) {
+ test_msg("Unexpected extent wanted start %llu len 4096, got "
+ "start %llu len %llu\n", offset, em->start, em->len);
+ goto out;
+ }
+ if (em->flags != 0) {
+ test_msg("Unexpected flags set, want 0 have %lu\n", em->flags);
+ goto out;
+ }
+ if (em->orig_start != em->start) {
+ test_msg("Wrong orig offset, want %llu, have %llu\n", em->start,
+ em->orig_start);
+ goto out;
+ }
+ ret = 0;
+out:
+ if (!IS_ERR(em))
+ free_extent_map(em);
+ iput(inode);
+ free_dummy_root(root);
+ return ret;
+}
+
+static int test_hole_first(void)
+{
+ struct inode *inode = NULL;
+ struct btrfs_root *root = NULL;
+ struct extent_map *em = NULL;
+ int ret = -ENOMEM;
+
+ inode = btrfs_new_test_inode();
+ if (!inode) {
+ test_msg("Couldn't allocate inode\n");
+ return ret;
+ }
+
+ BTRFS_I(inode)->location.type = BTRFS_INODE_ITEM_KEY;
+ BTRFS_I(inode)->location.objectid = BTRFS_FIRST_FREE_OBJECTID;
+ BTRFS_I(inode)->location.offset = 0;
+
+ root = btrfs_alloc_dummy_root();
+ if (IS_ERR(root)) {
+ test_msg("Couldn't allocate root\n");
+ goto out;
+ }
+
+ root->fs_info = alloc_dummy_fs_info();
+ if (!root->fs_info) {
+ test_msg("Couldn't allocate dummy fs info\n");
+ goto out;
+ }
+
+ root->node = alloc_dummy_extent_buffer(0, 4096);
+ if (!root->node) {
+ test_msg("Couldn't allocate dummy buffer\n");
+ goto out;
+ }
+
+ extent_buffer_get(root->node);
+ btrfs_set_header_nritems(root->node, 0);
+ btrfs_set_header_level(root->node, 0);
+ BTRFS_I(inode)->root = root;
+ ret = -EINVAL;
+
+ /*
+ * Need a blank inode item here just so we don't confuse
+ * btrfs_get_extent.
+ */
+ insert_inode_item_key(root);
+ insert_extent(root, 4096, 4096, 4096, 0, 4096, 4096,
+ BTRFS_FILE_EXTENT_REG, 0, 1);
+ em = btrfs_get_extent(inode, NULL, 0, 0, 8192, 0);
+ if (IS_ERR(em)) {
+ test_msg("Got an error when we shouldn't have\n");
+ goto out;
+ }
+ if (em->block_start != EXTENT_MAP_HOLE) {
+ test_msg("Expected a hole, got %llu\n", em->block_start);
+ goto out;
+ }
+ if (em->start != 0 || em->len != 4096) {
+ test_msg("Unexpected extent wanted start 0 len 4096, got start "
+ "%llu len %llu\n", em->start, em->len);
+ goto out;
+ }
+ if (em->flags != vacancy_only) {
+ test_msg("Wrong flags, wanted %lu, have %lu\n", vacancy_only,
+ em->flags);
+ goto out;
+ }
+ free_extent_map(em);
+
+ em = btrfs_get_extent(inode, NULL, 0, 4096, 8192, 0);
+ if (IS_ERR(em)) {
+ test_msg("Got an error when we shouldn't have\n");
+ goto out;
+ }
+ if (em->block_start != 4096) {
+ test_msg("Expected a real extent, got %llu\n", em->block_start);
+ goto out;
+ }
+ if (em->start != 4096 || em->len != 4096) {
+ test_msg("Unexpected extent wanted start 4096 len 4096, got "
+ "start %llu len %llu\n", em->start, em->len);
+ goto out;
+ }
+ if (em->flags != 0) {
+ test_msg("Unexpected flags set, wanted 0 got %lu\n",
+ em->flags);
+ goto out;
+ }
+ ret = 0;
+out:
+ if (!IS_ERR(em))
+ free_extent_map(em);
+ iput(inode);
+ free_dummy_root(root);
+ return ret;
+}
+
+int btrfs_test_inodes(void)
+{
+ int ret;
+
+ set_bit(EXTENT_FLAG_COMPRESSED, &compressed_only);
+ set_bit(EXTENT_FLAG_VACANCY, &vacancy_only);
+ set_bit(EXTENT_FLAG_PREALLOC, &prealloc_only);
+
+ test_msg("Running btrfs_get_extent tests\n");
+ ret = test_btrfs_get_extent();
+ if (ret)
+ return ret;
+ test_msg("Running hole first btrfs_get_extent test\n");
+ return test_hole_first();
+}
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index 8c81bdc1ef9b..c6a872a8a468 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -57,7 +57,7 @@ static unsigned int btrfs_blocked_trans_types[TRANS_STATE_MAX] = {
__TRANS_JOIN_NOLOCK),
};
-static void put_transaction(struct btrfs_transaction *transaction)
+void btrfs_put_transaction(struct btrfs_transaction *transaction)
{
WARN_ON(atomic_read(&transaction->use_count) == 0);
if (atomic_dec_and_test(&transaction->use_count)) {
@@ -332,7 +332,7 @@ static void wait_current_trans(struct btrfs_root *root)
wait_event(root->fs_info->transaction_wait,
cur_trans->state >= TRANS_STATE_UNBLOCKED ||
cur_trans->aborted);
- put_transaction(cur_trans);
+ btrfs_put_transaction(cur_trans);
} else {
spin_unlock(&root->fs_info->trans_lock);
}
@@ -353,6 +353,17 @@ static int may_wait_transaction(struct btrfs_root *root, int type)
return 0;
}
+static inline bool need_reserve_reloc_root(struct btrfs_root *root)
+{
+ if (!root->fs_info->reloc_ctl ||
+ !root->ref_cows ||
+ root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID ||
+ root->reloc_root)
+ return false;
+
+ return true;
+}
+
static struct btrfs_trans_handle *
start_transaction(struct btrfs_root *root, u64 num_items, unsigned int type,
enum btrfs_reserve_flush_enum flush)
@@ -360,8 +371,9 @@ start_transaction(struct btrfs_root *root, u64 num_items, unsigned int type,
struct btrfs_trans_handle *h;
struct btrfs_transaction *cur_trans;
u64 num_bytes = 0;
- int ret;
u64 qgroup_reserved = 0;
+ bool reloc_reserved = false;
+ int ret;
if (test_bit(BTRFS_FS_STATE_ERROR, &root->fs_info->fs_state))
return ERR_PTR(-EROFS);
@@ -390,6 +402,14 @@ start_transaction(struct btrfs_root *root, u64 num_items, unsigned int type,
}
num_bytes = btrfs_calc_trans_metadata_size(root, num_items);
+ /*
+ * Do the reservation for the relocation root creation
+ */
+ if (unlikely(need_reserve_reloc_root(root))) {
+ num_bytes += root->nodesize;
+ reloc_reserved = true;
+ }
+
ret = btrfs_block_rsv_add(root,
&root->fs_info->trans_block_rsv,
num_bytes, flush);
@@ -451,6 +471,7 @@ again:
h->delayed_ref_elem.seq = 0;
h->type = type;
h->allocating_chunk = false;
+ h->reloc_reserved = false;
INIT_LIST_HEAD(&h->qgroup_ref_list);
INIT_LIST_HEAD(&h->new_bgs);
@@ -466,6 +487,7 @@ again:
h->transid, num_bytes, 1);
h->block_rsv = &root->fs_info->trans_block_rsv;
h->bytes_reserved = num_bytes;
+ h->reloc_reserved = reloc_reserved;
}
h->qgroup_reserved = qgroup_reserved;
@@ -610,7 +632,7 @@ int btrfs_wait_for_commit(struct btrfs_root *root, u64 transid)
}
wait_for_commit(root, cur_trans);
- put_transaction(cur_trans);
+ btrfs_put_transaction(cur_trans);
out:
return ret;
}
@@ -735,7 +757,7 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans,
smp_mb();
if (waitqueue_active(&cur_trans->writer_wait))
wake_up(&cur_trans->writer_wait);
- put_transaction(cur_trans);
+ btrfs_put_transaction(cur_trans);
if (current->journal_info == trans)
current->journal_info = NULL;
@@ -744,8 +766,10 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans,
btrfs_run_delayed_iputs(root);
if (trans->aborted ||
- test_bit(BTRFS_FS_STATE_ERROR, &root->fs_info->fs_state))
+ test_bit(BTRFS_FS_STATE_ERROR, &root->fs_info->fs_state)) {
+ wake_up_process(info->transaction_kthread);
err = -EIO;
+ }
assert_qgroups_uptodate(trans);
kmem_cache_free(btrfs_trans_handle_cachep, trans);
@@ -948,16 +972,19 @@ static noinline int commit_cowonly_roots(struct btrfs_trans_handle *trans,
return ret;
ret = btrfs_run_dev_stats(trans, root->fs_info);
- WARN_ON(ret);
+ if (ret)
+ return ret;
ret = btrfs_run_dev_replace(trans, root->fs_info);
- WARN_ON(ret);
-
+ if (ret)
+ return ret;
ret = btrfs_run_qgroups(trans, root->fs_info);
- BUG_ON(ret);
+ if (ret)
+ return ret;
/* run_qgroups might have added some more refs */
ret = btrfs_run_delayed_refs(trans, root, (unsigned long)-1);
- BUG_ON(ret);
+ if (ret)
+ return ret;
while (!list_empty(&fs_info->dirty_cowonly_roots)) {
next = fs_info->dirty_cowonly_roots.next;
@@ -1453,7 +1480,7 @@ static void do_async_commit(struct work_struct *work)
* We've got freeze protection passed with the transaction.
* Tell lockdep about it.
*/
- if (ac->newtrans->type < TRANS_JOIN_NOLOCK)
+ if (ac->newtrans->type & __TRANS_FREEZABLE)
rwsem_acquire_read(
&ac->root->fs_info->sb->s_writers.lock_map[SB_FREEZE_FS-1],
0, 1, _THIS_IP_);
@@ -1494,7 +1521,7 @@ int btrfs_commit_transaction_async(struct btrfs_trans_handle *trans,
* Tell lockdep we've released the freeze rwsem, since the
* async commit thread will be the one to unlock it.
*/
- if (trans->type < TRANS_JOIN_NOLOCK)
+ if (ac->newtrans->type & __TRANS_FREEZABLE)
rwsem_release(
&root->fs_info->sb->s_writers.lock_map[SB_FREEZE_FS-1],
1, _THIS_IP_);
@@ -1510,7 +1537,7 @@ int btrfs_commit_transaction_async(struct btrfs_trans_handle *trans,
if (current->journal_info == trans)
current->journal_info = NULL;
- put_transaction(cur_trans);
+ btrfs_put_transaction(cur_trans);
return 0;
}
@@ -1552,8 +1579,10 @@ static void cleanup_transaction(struct btrfs_trans_handle *trans,
root->fs_info->running_transaction = NULL;
spin_unlock(&root->fs_info->trans_lock);
- put_transaction(cur_trans);
- put_transaction(cur_trans);
+ if (trans->type & __TRANS_FREEZABLE)
+ sb_end_intwrite(root->fs_info->sb);
+ btrfs_put_transaction(cur_trans);
+ btrfs_put_transaction(cur_trans);
trace_btrfs_transaction_commit(root);
@@ -1571,15 +1600,19 @@ static int btrfs_flush_all_pending_stuffs(struct btrfs_trans_handle *trans,
int ret;
ret = btrfs_run_delayed_items(trans, root);
- if (ret)
- return ret;
-
/*
* running the delayed items may have added new refs. account
* them now so that they hinder processing of more delayed refs
* as little as possible.
*/
- btrfs_delayed_refs_qgroup_accounting(trans, root->fs_info);
+ if (ret) {
+ btrfs_delayed_refs_qgroup_accounting(trans, root->fs_info);
+ return ret;
+ }
+
+ ret = btrfs_delayed_refs_qgroup_accounting(trans, root->fs_info);
+ if (ret)
+ return ret;
/*
* rename don't use btrfs_join_transaction, so, once we
@@ -1596,14 +1629,14 @@ static int btrfs_flush_all_pending_stuffs(struct btrfs_trans_handle *trans,
static inline int btrfs_start_delalloc_flush(struct btrfs_fs_info *fs_info)
{
if (btrfs_test_opt(fs_info->tree_root, FLUSHONCOMMIT))
- return btrfs_start_all_delalloc_inodes(fs_info, 1);
+ return btrfs_start_delalloc_roots(fs_info, 1);
return 0;
}
static inline void btrfs_wait_delalloc_flush(struct btrfs_fs_info *fs_info)
{
if (btrfs_test_opt(fs_info->tree_root, FLUSHONCOMMIT))
- btrfs_wait_all_ordered_extents(fs_info);
+ btrfs_wait_ordered_roots(fs_info, -1);
}
int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
@@ -1669,7 +1702,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
wait_for_commit(root, cur_trans);
- put_transaction(cur_trans);
+ btrfs_put_transaction(cur_trans);
return ret;
}
@@ -1686,7 +1719,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
wait_for_commit(root, prev_trans);
- put_transaction(prev_trans);
+ btrfs_put_transaction(prev_trans);
} else {
spin_unlock(&root->fs_info->trans_lock);
}
@@ -1885,8 +1918,8 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
list_del_init(&cur_trans->list);
spin_unlock(&root->fs_info->trans_lock);
- put_transaction(cur_trans);
- put_transaction(cur_trans);
+ btrfs_put_transaction(cur_trans);
+ btrfs_put_transaction(cur_trans);
if (trans->type & __TRANS_FREEZABLE)
sb_end_intwrite(root->fs_info->sb);
diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h
index 5c2af8491621..7657d115067d 100644
--- a/fs/btrfs/transaction.h
+++ b/fs/btrfs/transaction.h
@@ -92,6 +92,7 @@ struct btrfs_trans_handle {
short aborted;
short adding_csums;
bool allocating_chunk;
+ bool reloc_reserved;
unsigned int type;
/*
* this root is only needed to validate that the root passed to
@@ -166,4 +167,5 @@ int btrfs_wait_marked_extents(struct btrfs_root *root,
struct extent_io_tree *dirty_pages, int mark);
int btrfs_transaction_blocked(struct btrfs_fs_info *info);
int btrfs_transaction_in_commit(struct btrfs_fs_info *info);
+void btrfs_put_transaction(struct btrfs_transaction *transaction);
#endif
diff --git a/fs/btrfs/tree-defrag.c b/fs/btrfs/tree-defrag.c
index 94e05c1f118a..76928ca97741 100644
--- a/fs/btrfs/tree-defrag.c
+++ b/fs/btrfs/tree-defrag.c
@@ -37,7 +37,6 @@ int btrfs_defrag_leaves(struct btrfs_trans_handle *trans,
int ret = 0;
int wret;
int level;
- int is_extent = 0;
int next_key_ret = 0;
u64 last_ret = 0;
u64 min_trans = 0;
@@ -50,7 +49,7 @@ int btrfs_defrag_leaves(struct btrfs_trans_handle *trans,
goto out;
}
- if (root->ref_cows == 0 && !is_extent)
+ if (root->ref_cows == 0)
goto out;
if (btrfs_test_opt(root, SSD))
@@ -85,7 +84,7 @@ int btrfs_defrag_leaves(struct btrfs_trans_handle *trans,
path->keep_locks = 1;
- ret = btrfs_search_forward(root, &key, NULL, path, min_trans);
+ ret = btrfs_search_forward(root, &key, path, min_trans);
if (ret < 0)
goto out;
if (ret > 0) {
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index 79f057c0619a..9f7fc51ca334 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -26,7 +26,6 @@
#include "locking.h"
#include "print-tree.h"
#include "backref.h"
-#include "compat.h"
#include "tree-log.h"
#include "hash.h"
@@ -936,7 +935,7 @@ again:
parent_objectid,
victim_name,
victim_name_len)) {
- btrfs_inc_nlink(inode);
+ inc_nlink(inode);
btrfs_release_path(path);
ret = btrfs_unlink_inode(trans, root, dir,
@@ -1006,7 +1005,7 @@ again:
victim_parent = read_one_inode(root,
parent_objectid);
if (victim_parent) {
- btrfs_inc_nlink(inode);
+ inc_nlink(inode);
btrfs_release_path(path);
ret = btrfs_unlink_inode(trans, root,
@@ -1113,11 +1112,11 @@ static noinline int add_inode_ref(struct btrfs_trans_handle *trans,
struct extent_buffer *eb, int slot,
struct btrfs_key *key)
{
- struct inode *dir;
- struct inode *inode;
+ struct inode *dir = NULL;
+ struct inode *inode = NULL;
unsigned long ref_ptr;
unsigned long ref_end;
- char *name;
+ char *name = NULL;
int namelen;
int ret;
int search_done = 0;
@@ -1150,13 +1149,15 @@ static noinline int add_inode_ref(struct btrfs_trans_handle *trans,
* care of the rest
*/
dir = read_one_inode(root, parent_objectid);
- if (!dir)
- return -ENOENT;
+ if (!dir) {
+ ret = -ENOENT;
+ goto out;
+ }
inode = read_one_inode(root, inode_objectid);
if (!inode) {
- iput(dir);
- return -EIO;
+ ret = -EIO;
+ goto out;
}
while (ref_ptr < ref_end) {
@@ -1169,14 +1170,16 @@ static noinline int add_inode_ref(struct btrfs_trans_handle *trans,
*/
if (!dir)
dir = read_one_inode(root, parent_objectid);
- if (!dir)
- return -ENOENT;
+ if (!dir) {
+ ret = -ENOENT;
+ goto out;
+ }
} else {
ret = ref_get_fields(eb, ref_ptr, &namelen, &name,
&ref_index);
}
if (ret)
- return ret;
+ goto out;
/* if we already have a perfect match, we're done */
if (!inode_in_dir(root, path, btrfs_ino(dir), btrfs_ino(inode),
@@ -1196,12 +1199,11 @@ static noinline int add_inode_ref(struct btrfs_trans_handle *trans,
parent_objectid,
ref_index, name, namelen,
&search_done);
- if (ret == 1) {
- ret = 0;
+ if (ret) {
+ if (ret == 1)
+ ret = 0;
goto out;
}
- if (ret)
- goto out;
}
/* insert our name */
@@ -1215,6 +1217,7 @@ static noinline int add_inode_ref(struct btrfs_trans_handle *trans,
ref_ptr = (unsigned long)(ref_ptr + ref_struct_size) + namelen;
kfree(name);
+ name = NULL;
if (log_ref_ver) {
iput(dir);
dir = NULL;
@@ -1225,6 +1228,7 @@ static noinline int add_inode_ref(struct btrfs_trans_handle *trans,
ret = overwrite_item(trans, root, path, eb, slot, key);
out:
btrfs_release_path(path);
+ kfree(name);
iput(dir);
iput(inode);
return ret;
@@ -1307,6 +1311,7 @@ static int count_inode_refs(struct btrfs_root *root,
break;
path->slots[0]--;
}
+process_slot:
btrfs_item_key_to_cpu(path->nodes[0], &key,
path->slots[0]);
if (key.objectid != ino ||
@@ -1327,6 +1332,10 @@ static int count_inode_refs(struct btrfs_root *root,
if (key.offset == 0)
break;
+ if (path->slots[0] > 0) {
+ path->slots[0]--;
+ goto process_slot;
+ }
key.offset--;
btrfs_release_path(path);
}
@@ -1480,7 +1489,7 @@ static noinline int link_to_fixup_dir(struct btrfs_trans_handle *trans,
if (!inode->i_nlink)
set_nlink(inode, 1);
else
- btrfs_inc_nlink(inode);
+ inc_nlink(inode);
ret = btrfs_update_inode(trans, root, inode);
} else if (ret == -EEXIST) {
ret = 0;
@@ -1823,7 +1832,7 @@ again:
dir_key->offset,
name, name_len, 0);
}
- if (IS_ERR_OR_NULL(log_di)) {
+ if (!log_di || (IS_ERR(log_di) && PTR_ERR(log_di) == -ENOENT)) {
btrfs_dir_item_key_to_cpu(eb, di, &location);
btrfs_release_path(path);
btrfs_release_path(log_path);
@@ -1841,7 +1850,7 @@ again:
goto out;
}
- btrfs_inc_nlink(inode);
+ inc_nlink(inode);
ret = btrfs_unlink_inode(trans, root, dir, inode,
name, name_len);
if (!ret)
@@ -1860,6 +1869,9 @@ again:
goto again;
ret = 0;
goto out;
+ } else if (IS_ERR(log_di)) {
+ kfree(name);
+ return PTR_ERR(log_di);
}
btrfs_release_path(log_path);
kfree(name);
@@ -2118,8 +2130,7 @@ static noinline int walk_down_log_tree(struct btrfs_trans_handle *trans,
WARN_ON(*level >= BTRFS_MAX_LEVEL);
cur = path->nodes[*level];
- if (btrfs_header_level(cur) != *level)
- WARN_ON(1);
+ WARN_ON(btrfs_header_level(cur) != *level);
if (path->slots[*level] >=
btrfs_header_nritems(cur))
@@ -2151,11 +2162,13 @@ static noinline int walk_down_log_tree(struct btrfs_trans_handle *trans,
return ret;
}
- btrfs_tree_lock(next);
- btrfs_set_lock_blocking(next);
- clean_tree_block(trans, root, next);
- btrfs_wait_tree_block_writeback(next);
- btrfs_tree_unlock(next);
+ if (trans) {
+ btrfs_tree_lock(next);
+ btrfs_set_lock_blocking(next);
+ clean_tree_block(trans, root, next);
+ btrfs_wait_tree_block_writeback(next);
+ btrfs_tree_unlock(next);
+ }
WARN_ON(root_owner !=
BTRFS_TREE_LOG_OBJECTID);
@@ -2227,11 +2240,13 @@ static noinline int walk_up_log_tree(struct btrfs_trans_handle *trans,
next = path->nodes[*level];
- btrfs_tree_lock(next);
- btrfs_set_lock_blocking(next);
- clean_tree_block(trans, root, next);
- btrfs_wait_tree_block_writeback(next);
- btrfs_tree_unlock(next);
+ if (trans) {
+ btrfs_tree_lock(next);
+ btrfs_set_lock_blocking(next);
+ clean_tree_block(trans, root, next);
+ btrfs_wait_tree_block_writeback(next);
+ btrfs_tree_unlock(next);
+ }
WARN_ON(root_owner != BTRFS_TREE_LOG_OBJECTID);
ret = btrfs_free_and_pin_reserved_extent(root,
@@ -2301,11 +2316,13 @@ static int walk_log_tree(struct btrfs_trans_handle *trans,
next = path->nodes[orig_level];
- btrfs_tree_lock(next);
- btrfs_set_lock_blocking(next);
- clean_tree_block(trans, log, next);
- btrfs_wait_tree_block_writeback(next);
- btrfs_tree_unlock(next);
+ if (trans) {
+ btrfs_tree_lock(next);
+ btrfs_set_lock_blocking(next);
+ clean_tree_block(trans, log, next);
+ btrfs_wait_tree_block_writeback(next);
+ btrfs_tree_unlock(next);
+ }
WARN_ON(log->root_key.objectid !=
BTRFS_TREE_LOG_OBJECTID);
@@ -2571,9 +2588,7 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans,
* the running transaction open, so a full commit can't hop
* in and cause problems either.
*/
- btrfs_scrub_pause_super(root);
ret = write_ctree_super(trans, root->fs_info->tree_root, 1);
- btrfs_scrub_continue_super(root);
if (ret) {
btrfs_abort_transaction(trans, root, ret);
goto out_wake_log_root;
@@ -2608,13 +2623,10 @@ static void free_log_tree(struct btrfs_trans_handle *trans,
.process_func = process_one_buffer
};
- if (trans) {
- ret = walk_log_tree(trans, log, &wc);
-
- /* I don't think this can happen but just in case */
- if (ret)
- btrfs_abort_transaction(trans, log, ret);
- }
+ ret = walk_log_tree(trans, log, &wc);
+ /* I don't think this can happen but just in case */
+ if (ret)
+ btrfs_abort_transaction(trans, log, ret);
while (1) {
ret = find_first_extent_bit(&log->dirty_log_pages,
@@ -2867,7 +2879,6 @@ static noinline int log_dir_items(struct btrfs_trans_handle *trans,
u64 min_offset, u64 *last_offset_ret)
{
struct btrfs_key min_key;
- struct btrfs_key max_key;
struct btrfs_root *log = root->log_root;
struct extent_buffer *src;
int err = 0;
@@ -2879,9 +2890,6 @@ static noinline int log_dir_items(struct btrfs_trans_handle *trans,
u64 ino = btrfs_ino(inode);
log = root->log_root;
- max_key.objectid = ino;
- max_key.offset = (u64)-1;
- max_key.type = key_type;
min_key.objectid = ino;
min_key.type = key_type;
@@ -2889,8 +2897,7 @@ static noinline int log_dir_items(struct btrfs_trans_handle *trans,
path->keep_locks = 1;
- ret = btrfs_search_forward(root, &min_key, &max_key,
- path, trans->transid);
+ ret = btrfs_search_forward(root, &min_key, path, trans->transid);
/*
* we didn't find anything from this transaction, see if there
@@ -2943,10 +2950,8 @@ static noinline int log_dir_items(struct btrfs_trans_handle *trans,
/* find the first key from this transaction again */
ret = btrfs_search_slot(NULL, root, &min_key, path, 0, 0);
- if (ret != 0) {
- WARN_ON(1);
+ if (WARN_ON(ret != 0))
goto done;
- }
/*
* we have a block from this transaction, log every item in it
@@ -3172,11 +3177,10 @@ static int log_inode_item(struct btrfs_trans_handle *trans,
struct inode *inode)
{
struct btrfs_inode_item *inode_item;
- struct btrfs_key key;
int ret;
- memcpy(&key, &BTRFS_I(inode)->location, sizeof(key));
- ret = btrfs_insert_empty_item(trans, log, path, &key,
+ ret = btrfs_insert_empty_item(trans, log, path,
+ &BTRFS_I(inode)->location,
sizeof(*inode_item));
if (ret && ret != -EEXIST)
return ret;
@@ -3375,7 +3379,7 @@ static int log_one_extent(struct btrfs_trans_handle *trans,
btrfs_set_token_file_extent_type(leaf, fi,
BTRFS_FILE_EXTENT_REG,
&token);
- if (em->block_start == 0)
+ if (em->block_start == EXTENT_MAP_HOLE)
skip_csum = true;
}
@@ -3417,11 +3421,6 @@ static int log_one_extent(struct btrfs_trans_handle *trans,
if (skip_csum)
return 0;
- if (em->compress_type) {
- csum_offset = 0;
- csum_len = block_len;
- }
-
/*
* First check and see if our csums are on our outstanding ordered
* extents.
@@ -3505,8 +3504,13 @@ unlocked:
if (!mod_len || ret)
return ret;
- csum_offset = mod_start - em->start;
- csum_len = mod_len;
+ if (em->compress_type) {
+ csum_offset = 0;
+ csum_len = block_len;
+ } else {
+ csum_offset = mod_start - em->start;
+ csum_len = mod_len;
+ }
/* block start is already adjusted for the file extent offset. */
ret = btrfs_lookup_csums_range(log->fs_info->csum_root,
@@ -3693,7 +3697,8 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans,
ret = btrfs_truncate_inode_items(trans, log,
inode, 0, 0);
} else if (test_and_clear_bit(BTRFS_INODE_COPY_EVERYTHING,
- &BTRFS_I(inode)->runtime_flags)) {
+ &BTRFS_I(inode)->runtime_flags) ||
+ inode_only == LOG_INODE_EXISTS) {
if (inode_only == LOG_INODE_ALL)
fast_search = true;
max_key.type = BTRFS_XATTR_ITEM_KEY;
@@ -3719,7 +3724,7 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans,
while (1) {
ins_nr = 0;
- ret = btrfs_search_forward(root, &min_key, &max_key,
+ ret = btrfs_search_forward(root, &min_key,
path, trans->transid);
if (ret != 0)
break;
@@ -3769,14 +3774,14 @@ next_slot:
}
btrfs_release_path(path);
- if (min_key.offset < (u64)-1)
+ if (min_key.offset < (u64)-1) {
min_key.offset++;
- else if (min_key.type < (u8)-1)
+ } else if (min_key.type < max_key.type) {
min_key.type++;
- else if (min_key.objectid < (u64)-1)
- min_key.objectid++;
- else
+ min_key.offset = 0;
+ } else {
break;
+ }
}
if (ins_nr) {
ret = copy_items(trans, inode, dst_path, src, ins_start_slot,
@@ -3797,7 +3802,7 @@ log_extents:
err = ret;
goto out_unlock;
}
- } else {
+ } else if (inode_only == LOG_INODE_ALL) {
struct extent_map_tree *tree = &BTRFS_I(inode)->extent_tree;
struct extent_map *em, *n;
diff --git a/fs/btrfs/uuid-tree.c b/fs/btrfs/uuid-tree.c
index dd0dea3766f7..fbda90004fe9 100644
--- a/fs/btrfs/uuid-tree.c
+++ b/fs/btrfs/uuid-tree.c
@@ -260,7 +260,6 @@ int btrfs_uuid_tree_iterate(struct btrfs_fs_info *fs_info,
{
struct btrfs_root *root = fs_info->uuid_root;
struct btrfs_key key;
- struct btrfs_key max_key;
struct btrfs_path *path;
int ret = 0;
struct extent_buffer *leaf;
@@ -277,13 +276,10 @@ int btrfs_uuid_tree_iterate(struct btrfs_fs_info *fs_info,
key.objectid = 0;
key.type = 0;
key.offset = 0;
- max_key.objectid = (u64)-1;
- max_key.type = (u8)-1;
- max_key.offset = (u64)-1;
again_search_slot:
path->keep_locks = 1;
- ret = btrfs_search_forward(root, &key, &max_key, path, 0);
+ ret = btrfs_search_forward(root, &key, path, 0);
if (ret) {
if (ret > 0)
ret = 0;
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 043b215769c2..92303f42baaa 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -28,7 +28,6 @@
#include <linux/raid/pq.h>
#include <linux/semaphore.h>
#include <asm/div64.h>
-#include "compat.h"
#include "ctree.h"
#include "extent_map.h"
#include "disk-io.h"
@@ -666,7 +665,8 @@ static int __btrfs_close_devices(struct btrfs_fs_devices *fs_devices)
if (device->bdev)
fs_devices->open_devices--;
- if (device->writeable && !device->is_tgtdev_for_dev_replace) {
+ if (device->writeable &&
+ device->devid != BTRFS_DEV_REPLACE_DEVID) {
list_del_init(&device->dev_alloc_list);
fs_devices->rw_devices--;
}
@@ -2041,6 +2041,7 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path)
device->in_fs_metadata = 1;
device->is_tgtdev_for_dev_replace = 0;
device->mode = FMODE_EXCL;
+ device->dev_stats_valid = 1;
set_blocksize(device->bdev, 4096);
if (seeding_dev) {
@@ -2208,6 +2209,7 @@ int btrfs_init_dev_replace_tgtdev(struct btrfs_root *root, char *device_path,
device->in_fs_metadata = 1;
device->is_tgtdev_for_dev_replace = 1;
device->mode = FMODE_EXCL;
+ device->dev_stats_valid = 1;
set_blocksize(device->bdev, 4096);
device->fs_devices = fs_info->fs_devices;
list_add(&device->dev_list, &fs_info->fs_devices->devices);
@@ -2550,8 +2552,7 @@ again:
failed = 0;
retried = true;
goto again;
- } else if (failed && retried) {
- WARN_ON(1);
+ } else if (WARN_ON(failed && retried)) {
ret = -ENOSPC;
}
error:
@@ -3423,6 +3424,9 @@ int btrfs_pause_balance(struct btrfs_fs_info *fs_info)
int btrfs_cancel_balance(struct btrfs_fs_info *fs_info)
{
+ if (fs_info->sb->s_flags & MS_RDONLY)
+ return -EROFS;
+
mutex_lock(&fs_info->balance_mutex);
if (!fs_info->balance_ctl) {
mutex_unlock(&fs_info->balance_mutex);
@@ -3488,7 +3492,7 @@ static int btrfs_uuid_scan_kthread(void *data)
path->keep_locks = 1;
while (1) {
- ret = btrfs_search_forward(root, &key, &max_key, path, 0);
+ ret = btrfs_search_forward(root, &key, path, 0);
if (ret) {
if (ret > 0)
ret = 0;
@@ -4488,6 +4492,7 @@ int btrfs_num_copies(struct btrfs_fs_info *fs_info, u64 logical, u64 len)
btrfs_crit(fs_info, "Invalid mapping for %Lu-%Lu, got "
"%Lu-%Lu\n", logical, logical+len, em->start,
em->start + em->len);
+ free_extent_map(em);
return 1;
}
@@ -4668,6 +4673,7 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw,
btrfs_crit(fs_info, "found a bad mapping, wanted %Lu, "
"found %Lu-%Lu\n", logical, em->start,
em->start + em->len);
+ free_extent_map(em);
return -EINVAL;
}
@@ -4895,7 +4901,7 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw,
num_stripes = map->num_stripes;
max_errors = nr_parity_stripes(map);
- raid_map = kmalloc(sizeof(u64) * num_stripes,
+ raid_map = kmalloc_array(num_stripes, sizeof(u64),
GFP_NOFS);
if (!raid_map) {
ret = -ENOMEM;
@@ -5388,17 +5394,15 @@ static int bio_size_ok(struct block_device *bdev, struct bio *bio,
{
struct bio_vec *prev;
struct request_queue *q = bdev_get_queue(bdev);
- unsigned short max_sectors = queue_max_sectors(q);
+ unsigned int max_sectors = queue_max_sectors(q);
struct bvec_merge_data bvm = {
.bi_bdev = bdev,
.bi_sector = sector,
.bi_rw = bio->bi_rw,
};
- if (bio->bi_vcnt == 0) {
- WARN_ON(1);
+ if (WARN_ON(bio->bi_vcnt == 0))
return 1;
- }
prev = &bio->bi_io_vec[bio->bi_vcnt - 1];
if (bio_sectors(bio) > max_sectors)
@@ -5631,10 +5635,8 @@ struct btrfs_device *btrfs_alloc_device(struct btrfs_fs_info *fs_info,
struct btrfs_device *dev;
u64 tmp;
- if (!devid && !fs_info) {
- WARN_ON(1);
+ if (WARN_ON(!devid && !fs_info))
return ERR_PTR(-EINVAL);
- }
dev = __alloc_device();
if (IS_ERR(dev))
diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h
index b72f540c8b29..8b3cd142b373 100644
--- a/fs/btrfs/volumes.h
+++ b/fs/btrfs/volumes.h
@@ -43,9 +43,8 @@ struct btrfs_device {
/* WRITE_SYNC bios */
struct btrfs_pending_bios pending_sync_bios;
- int running_pending;
u64 generation;
-
+ int running_pending;
int writeable;
int in_fs_metadata;
int missing;
@@ -53,11 +52,11 @@ struct btrfs_device {
int is_tgtdev_for_dev_replace;
spinlock_t io_lock;
+ /* the mode sent to blkdev_get */
+ fmode_t mode;
struct block_device *bdev;
- /* the mode sent to blkdev_get */
- fmode_t mode;
struct rcu_string *name;
@@ -78,16 +77,21 @@ struct btrfs_device {
/* optimal io width for this device */
u32 io_width;
+ /* type and info about this device */
+ u64 type;
/* minimal io size for this device */
u32 sector_size;
- /* type and info about this device */
- u64 type;
/* physical drive uuid (or lvm uuid) */
u8 uuid[BTRFS_UUID_SIZE];
+ /* for sending down flush barriers */
+ int nobarriers;
+ struct bio *flush_bio;
+ struct completion flush_wait;
+
/* per-device scrub information */
struct scrub_ctx *scrub_device;
@@ -103,10 +107,6 @@ struct btrfs_device {
struct radix_tree_root reada_zones;
struct radix_tree_root reada_extents;
- /* for sending down flush barriers */
- struct bio *flush_bio;
- struct completion flush_wait;
- int nobarriers;
/* disk I/O failure stats. For detailed description refer to
* enum btrfs_dev_stat_values in ioctl.h */
@@ -132,7 +132,9 @@ struct btrfs_fs_devices {
/* all of the devices in the FS, protected by a mutex
* so we can safely walk it to write out the supers without
- * worrying about add/remove by the multi-device code
+ * worrying about add/remove by the multi-device code.
+ * Scrubbing super can kick off supers writing by holding
+ * this mutex lock.
*/
struct mutex device_list_mutex;
struct list_head devices;
diff --git a/fs/cachefiles/interface.c b/fs/cachefiles/interface.c
index 43eb5592cdea..57e17fe6121a 100644
--- a/fs/cachefiles/interface.c
+++ b/fs/cachefiles/interface.c
@@ -270,7 +270,7 @@ static void cachefiles_drop_object(struct fscache_object *_object)
#endif
/* delete retired objects */
- if (test_bit(FSCACHE_COOKIE_RETIRED, &object->fscache.cookie->flags) &&
+ if (test_bit(FSCACHE_OBJECT_RETIRED, &object->fscache.flags) &&
_object != cache->cache.fsdef
) {
_debug("- retire object OBJ%x", object->fscache.debug_id);
@@ -449,14 +449,14 @@ static int cachefiles_attr_changed(struct fscache_object *_object)
_debug("discard tail %llx", oi_size);
newattrs.ia_valid = ATTR_SIZE;
newattrs.ia_size = oi_size & PAGE_MASK;
- ret = notify_change(object->backer, &newattrs);
+ ret = notify_change(object->backer, &newattrs, NULL);
if (ret < 0)
goto truncate_failed;
}
newattrs.ia_valid = ATTR_SIZE;
newattrs.ia_size = ni_size;
- ret = notify_change(object->backer, &newattrs);
+ ret = notify_change(object->backer, &newattrs, NULL);
truncate_failed:
mutex_unlock(&object->backer->d_inode->i_mutex);
diff --git a/fs/cachefiles/namei.c b/fs/cachefiles/namei.c
index f4a08d7fa2f7..ca65f39dc8dc 100644
--- a/fs/cachefiles/namei.c
+++ b/fs/cachefiles/namei.c
@@ -294,7 +294,7 @@ static int cachefiles_bury_object(struct cachefiles_cache *cache,
if (ret < 0) {
cachefiles_io_error(cache, "Unlink security error");
} else {
- ret = vfs_unlink(dir->d_inode, rep);
+ ret = vfs_unlink(dir->d_inode, rep, NULL);
if (preemptive)
cachefiles_mark_object_buried(cache, rep);
@@ -396,7 +396,7 @@ try_again:
cachefiles_io_error(cache, "Rename security error %d", ret);
} else {
ret = vfs_rename(dir->d_inode, rep,
- cache->graveyard->d_inode, grave);
+ cache->graveyard->d_inode, grave, NULL);
if (ret != 0 && ret != -ENOMEM)
cachefiles_io_error(cache,
"Rename failed with error %d", ret);
diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c
index 6df8bd481425..1e561c059539 100644
--- a/fs/ceph/addr.c
+++ b/fs/ceph/addr.c
@@ -216,7 +216,7 @@ static int readpage_nounlock(struct file *filp, struct page *page)
}
SetPageUptodate(page);
- if (err == 0)
+ if (err >= 0)
ceph_readpage_to_fscache(inode, page);
out:
diff --git a/fs/ceph/cache.c b/fs/ceph/cache.c
index 6bfe65e0b038..8c44fdd4e1c3 100644
--- a/fs/ceph/cache.c
+++ b/fs/ceph/cache.c
@@ -68,7 +68,7 @@ int ceph_fscache_register_fs(struct ceph_fs_client* fsc)
{
fsc->fscache = fscache_acquire_cookie(ceph_cache_netfs.primary_index,
&ceph_fscache_fsid_object_def,
- fsc);
+ fsc, true);
if (fsc->fscache == NULL) {
pr_err("Unable to resgister fsid: %p fscache cookie", fsc);
@@ -204,7 +204,7 @@ void ceph_fscache_register_inode_cookie(struct ceph_fs_client* fsc,
ci->fscache = fscache_acquire_cookie(fsc->fscache,
&ceph_fscache_inode_object_def,
- ci);
+ ci, true);
done:
mutex_unlock(&inode->i_mutex);
@@ -324,6 +324,9 @@ void ceph_invalidate_fscache_page(struct inode* inode, struct page *page)
{
struct ceph_inode_info *ci = ceph_inode(inode);
+ if (!PageFsCache(page))
+ return;
+
fscache_wait_on_page_write(ci->fscache, page);
fscache_uncache_page(ci->fscache, page);
}
diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
index 13976c33332e..3c0a4bd74996 100644
--- a/fs/ceph/caps.c
+++ b/fs/ceph/caps.c
@@ -897,7 +897,7 @@ static int __ceph_is_any_caps(struct ceph_inode_info *ci)
* caller should hold i_ceph_lock.
* caller will not hold session s_mutex if called from destroy_inode.
*/
-void __ceph_remove_cap(struct ceph_cap *cap)
+void __ceph_remove_cap(struct ceph_cap *cap, bool queue_release)
{
struct ceph_mds_session *session = cap->session;
struct ceph_inode_info *ci = cap->ci;
@@ -909,6 +909,16 @@ void __ceph_remove_cap(struct ceph_cap *cap)
/* remove from session list */
spin_lock(&session->s_cap_lock);
+ /*
+ * s_cap_reconnect is protected by s_cap_lock. no one changes
+ * s_cap_gen while session is in the reconnect state.
+ */
+ if (queue_release &&
+ (!session->s_cap_reconnect ||
+ cap->cap_gen == session->s_cap_gen))
+ __queue_cap_release(session, ci->i_vino.ino, cap->cap_id,
+ cap->mseq, cap->issue_seq);
+
if (session->s_cap_iterator == cap) {
/* not yet, we are iterating over this very cap */
dout("__ceph_remove_cap delaying %p removal from session %p\n",
@@ -1023,7 +1033,6 @@ void __queue_cap_release(struct ceph_mds_session *session,
struct ceph_mds_cap_release *head;
struct ceph_mds_cap_item *item;
- spin_lock(&session->s_cap_lock);
BUG_ON(!session->s_num_cap_releases);
msg = list_first_entry(&session->s_cap_releases,
struct ceph_msg, list_head);
@@ -1052,7 +1061,6 @@ void __queue_cap_release(struct ceph_mds_session *session,
(int)CEPH_CAPS_PER_RELEASE,
(int)msg->front.iov_len);
}
- spin_unlock(&session->s_cap_lock);
}
/*
@@ -1067,12 +1075,8 @@ void ceph_queue_caps_release(struct inode *inode)
p = rb_first(&ci->i_caps);
while (p) {
struct ceph_cap *cap = rb_entry(p, struct ceph_cap, ci_node);
- struct ceph_mds_session *session = cap->session;
-
- __queue_cap_release(session, ceph_ino(inode), cap->cap_id,
- cap->mseq, cap->issue_seq);
p = rb_next(p);
- __ceph_remove_cap(cap);
+ __ceph_remove_cap(cap, true);
}
}
@@ -2791,7 +2795,7 @@ static void handle_cap_export(struct inode *inode, struct ceph_mds_caps *ex,
}
spin_unlock(&mdsc->cap_dirty_lock);
}
- __ceph_remove_cap(cap);
+ __ceph_remove_cap(cap, false);
}
/* else, we already released it */
@@ -2931,9 +2935,12 @@ void ceph_handle_caps(struct ceph_mds_session *session,
if (!inode) {
dout(" i don't have ino %llx\n", vino.ino);
- if (op == CEPH_CAP_OP_IMPORT)
+ if (op == CEPH_CAP_OP_IMPORT) {
+ spin_lock(&session->s_cap_lock);
__queue_cap_release(session, vino.ino, cap_id,
mseq, seq);
+ spin_unlock(&session->s_cap_lock);
+ }
goto flush_cap_releases;
}
diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c
index 868b61d56cac..2a0bcaeb189a 100644
--- a/fs/ceph/dir.c
+++ b/fs/ceph/dir.c
@@ -352,8 +352,18 @@ more:
}
/* note next offset and last dentry name */
+ rinfo = &req->r_reply_info;
+ if (le32_to_cpu(rinfo->dir_dir->frag) != frag) {
+ frag = le32_to_cpu(rinfo->dir_dir->frag);
+ if (ceph_frag_is_leftmost(frag))
+ fi->next_offset = 2;
+ else
+ fi->next_offset = 0;
+ off = fi->next_offset;
+ }
fi->offset = fi->next_offset;
fi->last_readdir = req;
+ fi->frag = frag;
if (req->r_reply_info.dir_end) {
kfree(fi->last_name);
@@ -363,7 +373,6 @@ more:
else
fi->next_offset = 0;
} else {
- rinfo = &req->r_reply_info;
err = note_last_dentry(fi,
rinfo->dir_dname[rinfo->dir_nr-1],
rinfo->dir_dname_len[rinfo->dir_nr-1]);
diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c
index 8549a48115f7..9a8e396aed89 100644
--- a/fs/ceph/inode.c
+++ b/fs/ceph/inode.c
@@ -577,6 +577,8 @@ static int fill_inode(struct inode *inode,
int issued = 0, implemented;
struct timespec mtime, atime, ctime;
u32 nsplits;
+ struct ceph_inode_frag *frag;
+ struct rb_node *rb_node;
struct ceph_buffer *xattr_blob = NULL;
int err = 0;
int queue_trunc = 0;
@@ -751,15 +753,38 @@ no_change:
/* FIXME: move me up, if/when version reflects fragtree changes */
nsplits = le32_to_cpu(info->fragtree.nsplits);
mutex_lock(&ci->i_fragtree_mutex);
+ rb_node = rb_first(&ci->i_fragtree);
for (i = 0; i < nsplits; i++) {
u32 id = le32_to_cpu(info->fragtree.splits[i].frag);
- struct ceph_inode_frag *frag = __get_or_create_frag(ci, id);
-
- if (IS_ERR(frag))
- continue;
+ frag = NULL;
+ while (rb_node) {
+ frag = rb_entry(rb_node, struct ceph_inode_frag, node);
+ if (ceph_frag_compare(frag->frag, id) >= 0) {
+ if (frag->frag != id)
+ frag = NULL;
+ else
+ rb_node = rb_next(rb_node);
+ break;
+ }
+ rb_node = rb_next(rb_node);
+ rb_erase(&frag->node, &ci->i_fragtree);
+ kfree(frag);
+ frag = NULL;
+ }
+ if (!frag) {
+ frag = __get_or_create_frag(ci, id);
+ if (IS_ERR(frag))
+ continue;
+ }
frag->split_by = le32_to_cpu(info->fragtree.splits[i].by);
dout(" frag %x split by %d\n", frag->frag, frag->split_by);
}
+ while (rb_node) {
+ frag = rb_entry(rb_node, struct ceph_inode_frag, node);
+ rb_node = rb_next(rb_node);
+ rb_erase(&frag->node, &ci->i_fragtree);
+ kfree(frag);
+ }
mutex_unlock(&ci->i_fragtree_mutex);
/* were we issued a capability? */
@@ -1250,8 +1275,20 @@ int ceph_readdir_prepopulate(struct ceph_mds_request *req,
int err = 0, i;
struct inode *snapdir = NULL;
struct ceph_mds_request_head *rhead = req->r_request->front.iov_base;
- u64 frag = le32_to_cpu(rhead->args.readdir.frag);
struct ceph_dentry_info *di;
+ u64 r_readdir_offset = req->r_readdir_offset;
+ u32 frag = le32_to_cpu(rhead->args.readdir.frag);
+
+ if (rinfo->dir_dir &&
+ le32_to_cpu(rinfo->dir_dir->frag) != frag) {
+ dout("readdir_prepopulate got new frag %x -> %x\n",
+ frag, le32_to_cpu(rinfo->dir_dir->frag));
+ frag = le32_to_cpu(rinfo->dir_dir->frag);
+ if (ceph_frag_is_leftmost(frag))
+ r_readdir_offset = 2;
+ else
+ r_readdir_offset = 0;
+ }
if (req->r_aborted)
return readdir_prepopulate_inodes_only(req, session);
@@ -1315,7 +1352,7 @@ retry_lookup:
}
di = dn->d_fsdata;
- di->offset = ceph_make_fpos(frag, i + req->r_readdir_offset);
+ di->offset = ceph_make_fpos(frag, i + r_readdir_offset);
/* inode */
if (dn->d_inode) {
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index b7bda5d9611d..d90861f45210 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -43,6 +43,7 @@
*/
struct ceph_reconnect_state {
+ int nr_caps;
struct ceph_pagelist *pagelist;
bool flock;
};
@@ -443,6 +444,7 @@ static struct ceph_mds_session *register_session(struct ceph_mds_client *mdsc,
INIT_LIST_HEAD(&s->s_waiting);
INIT_LIST_HEAD(&s->s_unsafe);
s->s_num_cap_releases = 0;
+ s->s_cap_reconnect = 0;
s->s_cap_iterator = NULL;
INIT_LIST_HEAD(&s->s_cap_releases);
INIT_LIST_HEAD(&s->s_cap_releases_done);
@@ -642,6 +644,8 @@ static void __unregister_request(struct ceph_mds_client *mdsc,
req->r_unsafe_dir = NULL;
}
+ complete_all(&req->r_safe_completion);
+
ceph_mdsc_put_request(req);
}
@@ -986,7 +990,7 @@ static int remove_session_caps_cb(struct inode *inode, struct ceph_cap *cap,
dout("removing cap %p, ci is %p, inode is %p\n",
cap, ci, &ci->vfs_inode);
spin_lock(&ci->i_ceph_lock);
- __ceph_remove_cap(cap);
+ __ceph_remove_cap(cap, false);
if (!__ceph_is_any_real_caps(ci)) {
struct ceph_mds_client *mdsc =
ceph_sb_to_client(inode->i_sb)->mdsc;
@@ -1231,9 +1235,7 @@ static int trim_caps_cb(struct inode *inode, struct ceph_cap *cap, void *arg)
session->s_trim_caps--;
if (oissued) {
/* we aren't the only cap.. just remove us */
- __queue_cap_release(session, ceph_ino(inode), cap->cap_id,
- cap->mseq, cap->issue_seq);
- __ceph_remove_cap(cap);
+ __ceph_remove_cap(cap, true);
} else {
/* try to drop referring dentries */
spin_unlock(&ci->i_ceph_lock);
@@ -1416,7 +1418,6 @@ static void discard_cap_releases(struct ceph_mds_client *mdsc,
unsigned num;
dout("discard_cap_releases mds%d\n", session->s_mds);
- spin_lock(&session->s_cap_lock);
/* zero out the in-progress message */
msg = list_first_entry(&session->s_cap_releases,
@@ -1443,8 +1444,6 @@ static void discard_cap_releases(struct ceph_mds_client *mdsc,
msg->front.iov_len = sizeof(*head);
list_add(&msg->list_head, &session->s_cap_releases);
}
-
- spin_unlock(&session->s_cap_lock);
}
/*
@@ -1875,8 +1874,11 @@ static int __do_request(struct ceph_mds_client *mdsc,
int mds = -1;
int err = -EAGAIN;
- if (req->r_err || req->r_got_result)
+ if (req->r_err || req->r_got_result) {
+ if (req->r_aborted)
+ __unregister_request(mdsc, req);
goto out;
+ }
if (req->r_timeout &&
time_after_eq(jiffies, req->r_started + req->r_timeout)) {
@@ -2186,7 +2188,6 @@ static void handle_reply(struct ceph_mds_session *session, struct ceph_msg *msg)
if (head->safe) {
req->r_got_safe = true;
__unregister_request(mdsc, req);
- complete_all(&req->r_safe_completion);
if (req->r_got_unsafe) {
/*
@@ -2238,8 +2239,7 @@ static void handle_reply(struct ceph_mds_session *session, struct ceph_msg *msg)
err = ceph_fill_trace(mdsc->fsc->sb, req, req->r_session);
if (err == 0) {
if (result == 0 && (req->r_op == CEPH_MDS_OP_READDIR ||
- req->r_op == CEPH_MDS_OP_LSSNAP) &&
- rinfo->dir_nr)
+ req->r_op == CEPH_MDS_OP_LSSNAP))
ceph_readdir_prepopulate(req, req->r_session);
ceph_unreserve_caps(mdsc, &req->r_caps_reservation);
}
@@ -2490,6 +2490,7 @@ static int encode_caps_cb(struct inode *inode, struct ceph_cap *cap,
cap->seq = 0; /* reset cap seq */
cap->issue_seq = 0; /* and issue_seq */
cap->mseq = 0; /* and migrate_seq */
+ cap->cap_gen = cap->session->s_cap_gen;
if (recon_state->flock) {
rec.v2.cap_id = cpu_to_le64(cap->cap_id);
@@ -2552,6 +2553,8 @@ encode_again:
} else {
err = ceph_pagelist_append(pagelist, &rec, reclen);
}
+
+ recon_state->nr_caps++;
out_free:
kfree(path);
out_dput:
@@ -2579,6 +2582,7 @@ static void send_mds_reconnect(struct ceph_mds_client *mdsc,
struct rb_node *p;
int mds = session->s_mds;
int err = -ENOMEM;
+ int s_nr_caps;
struct ceph_pagelist *pagelist;
struct ceph_reconnect_state recon_state;
@@ -2610,20 +2614,38 @@ static void send_mds_reconnect(struct ceph_mds_client *mdsc,
dout("session %p state %s\n", session,
session_state_name(session->s_state));
+ spin_lock(&session->s_gen_ttl_lock);
+ session->s_cap_gen++;
+ spin_unlock(&session->s_gen_ttl_lock);
+
+ spin_lock(&session->s_cap_lock);
+ /*
+ * notify __ceph_remove_cap() that we are composing cap reconnect.
+ * If a cap get released before being added to the cap reconnect,
+ * __ceph_remove_cap() should skip queuing cap release.
+ */
+ session->s_cap_reconnect = 1;
/* drop old cap expires; we're about to reestablish that state */
discard_cap_releases(mdsc, session);
+ spin_unlock(&session->s_cap_lock);
/* traverse this session's caps */
- err = ceph_pagelist_encode_32(pagelist, session->s_nr_caps);
+ s_nr_caps = session->s_nr_caps;
+ err = ceph_pagelist_encode_32(pagelist, s_nr_caps);
if (err)
goto fail;
+ recon_state.nr_caps = 0;
recon_state.pagelist = pagelist;
recon_state.flock = session->s_con.peer_features & CEPH_FEATURE_FLOCK;
err = iterate_session_caps(session, encode_caps_cb, &recon_state);
if (err < 0)
goto fail;
+ spin_lock(&session->s_cap_lock);
+ session->s_cap_reconnect = 0;
+ spin_unlock(&session->s_cap_lock);
+
/*
* snaprealms. we provide mds with the ino, seq (version), and
* parent for all of our realms. If the mds has any newer info,
@@ -2646,11 +2668,18 @@ static void send_mds_reconnect(struct ceph_mds_client *mdsc,
if (recon_state.flock)
reply->hdr.version = cpu_to_le16(2);
- if (pagelist->length) {
- /* set up outbound data if we have any */
- reply->hdr.data_len = cpu_to_le32(pagelist->length);
- ceph_msg_data_add_pagelist(reply, pagelist);
+
+ /* raced with cap release? */
+ if (s_nr_caps != recon_state.nr_caps) {
+ struct page *page = list_first_entry(&pagelist->head,
+ struct page, lru);
+ __le32 *addr = kmap_atomic(page);
+ *addr = cpu_to_le32(recon_state.nr_caps);
+ kunmap_atomic(addr);
}
+
+ reply->hdr.data_len = cpu_to_le32(pagelist->length);
+ ceph_msg_data_add_pagelist(reply, pagelist);
ceph_con_send(&session->s_con, reply);
mutex_unlock(&session->s_mutex);
diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h
index c2a19fbbe517..4c053d099ae4 100644
--- a/fs/ceph/mds_client.h
+++ b/fs/ceph/mds_client.h
@@ -132,6 +132,7 @@ struct ceph_mds_session {
struct list_head s_caps; /* all caps issued by this session */
int s_nr_caps, s_trim_caps;
int s_num_cap_releases;
+ int s_cap_reconnect;
struct list_head s_cap_releases; /* waiting cap_release messages */
struct list_head s_cap_releases_done; /* ready to send */
struct ceph_cap *s_cap_iterator;
diff --git a/fs/ceph/super.h b/fs/ceph/super.h
index 6014b0a3c405..ef4ac38bb614 100644
--- a/fs/ceph/super.h
+++ b/fs/ceph/super.h
@@ -741,13 +741,7 @@ extern int ceph_add_cap(struct inode *inode,
int fmode, unsigned issued, unsigned wanted,
unsigned cap, unsigned seq, u64 realmino, int flags,
struct ceph_cap_reservation *caps_reservation);
-extern void __ceph_remove_cap(struct ceph_cap *cap);
-static inline void ceph_remove_cap(struct ceph_cap *cap)
-{
- spin_lock(&cap->ci->i_ceph_lock);
- __ceph_remove_cap(cap);
- spin_unlock(&cap->ci->i_ceph_lock);
-}
+extern void __ceph_remove_cap(struct ceph_cap *cap, bool queue_release);
extern void ceph_put_cap(struct ceph_mds_client *mdsc,
struct ceph_cap *cap);
diff --git a/fs/char_dev.c b/fs/char_dev.c
index afc2bb691780..f77f7702fabe 100644
--- a/fs/char_dev.c
+++ b/fs/char_dev.c
@@ -368,6 +368,7 @@ void cdev_put(struct cdev *p)
*/
static int chrdev_open(struct inode *inode, struct file *filp)
{
+ const struct file_operations *fops;
struct cdev *p;
struct cdev *new = NULL;
int ret = 0;
@@ -400,10 +401,11 @@ static int chrdev_open(struct inode *inode, struct file *filp)
return ret;
ret = -ENXIO;
- filp->f_op = fops_get(p->ops);
- if (!filp->f_op)
+ fops = fops_get(p->ops);
+ if (!fops)
goto out_cdev_put;
+ replace_fops(filp, fops);
if (filp->f_op->open) {
ret = filp->f_op->open(inode, filp);
if (ret)
@@ -574,7 +576,8 @@ static struct kobject *base_probe(dev_t dev, int *part, void *data)
void __init chrdev_init(void)
{
cdev_map = kobj_map_init(base_probe, &chrdevs_lock);
- bdi_init(&directly_mappable_cdev_bdi);
+ if (bdi_init(&directly_mappable_cdev_bdi))
+ panic("Failed to init directly mappable cdev bdi");
}
diff --git a/fs/cifs/cifs_fs_sb.h b/fs/cifs/cifs_fs_sb.h
index 37e4a72a7d1c..9409fa10bd5c 100644
--- a/fs/cifs/cifs_fs_sb.h
+++ b/fs/cifs/cifs_fs_sb.h
@@ -65,5 +65,6 @@ struct cifs_sb_info {
char *mountdata; /* options received at mount time or via DFS refs */
struct backing_dev_info bdi;
struct delayed_work prune_tlinks;
+ struct rcu_head rcu;
};
#endif /* _CIFS_FS_SB_H */
diff --git a/fs/cifs/cifsencrypt.c b/fs/cifs/cifsencrypt.c
index fc6f4f3a1a9d..4934347321d3 100644
--- a/fs/cifs/cifsencrypt.c
+++ b/fs/cifs/cifsencrypt.c
@@ -548,7 +548,13 @@ static int
CalcNTLMv2_response(const struct cifs_ses *ses, char *ntlmv2_hash)
{
int rc;
- unsigned int offset = CIFS_SESS_KEY_SIZE + 8;
+ struct ntlmv2_resp *ntlmv2 = (struct ntlmv2_resp *)
+ (ses->auth_key.response + CIFS_SESS_KEY_SIZE);
+ unsigned int hash_len;
+
+ /* The MD5 hash starts at challenge_key.key */
+ hash_len = ses->auth_key.len - (CIFS_SESS_KEY_SIZE +
+ offsetof(struct ntlmv2_resp, challenge.key[0]));
if (!ses->server->secmech.sdeschmacmd5) {
cifs_dbg(VFS, "%s: can't generate ntlmv2 hash\n", __func__);
@@ -556,7 +562,7 @@ CalcNTLMv2_response(const struct cifs_ses *ses, char *ntlmv2_hash)
}
rc = crypto_shash_setkey(ses->server->secmech.hmacmd5,
- ntlmv2_hash, CIFS_HMAC_MD5_HASH_SIZE);
+ ntlmv2_hash, CIFS_HMAC_MD5_HASH_SIZE);
if (rc) {
cifs_dbg(VFS, "%s: Could not set NTLMV2 Hash as a key\n",
__func__);
@@ -570,20 +576,21 @@ CalcNTLMv2_response(const struct cifs_ses *ses, char *ntlmv2_hash)
}
if (ses->server->negflavor == CIFS_NEGFLAVOR_EXTENDED)
- memcpy(ses->auth_key.response + offset,
- ses->ntlmssp->cryptkey, CIFS_SERVER_CHALLENGE_SIZE);
+ memcpy(ntlmv2->challenge.key,
+ ses->ntlmssp->cryptkey, CIFS_SERVER_CHALLENGE_SIZE);
else
- memcpy(ses->auth_key.response + offset,
- ses->server->cryptkey, CIFS_SERVER_CHALLENGE_SIZE);
+ memcpy(ntlmv2->challenge.key,
+ ses->server->cryptkey, CIFS_SERVER_CHALLENGE_SIZE);
rc = crypto_shash_update(&ses->server->secmech.sdeschmacmd5->shash,
- ses->auth_key.response + offset, ses->auth_key.len - offset);
+ ntlmv2->challenge.key, hash_len);
if (rc) {
cifs_dbg(VFS, "%s: Could not update with response\n", __func__);
return rc;
}
+ /* Note that the MD5 digest over writes anon.challenge_key.key */
rc = crypto_shash_final(&ses->server->secmech.sdeschmacmd5->shash,
- ses->auth_key.response + CIFS_SESS_KEY_SIZE);
+ ntlmv2->ntlmv2_hash);
if (rc)
cifs_dbg(VFS, "%s: Could not generate md5 hash\n", __func__);
@@ -627,7 +634,7 @@ setup_ntlmv2_rsp(struct cifs_ses *ses, const struct nls_table *nls_cp)
int rc;
int baselen;
unsigned int tilen;
- struct ntlmv2_resp *buf;
+ struct ntlmv2_resp *ntlmv2;
char ntlmv2_hash[16];
unsigned char *tiblob = NULL; /* target info blob */
@@ -660,13 +667,14 @@ setup_ntlmv2_rsp(struct cifs_ses *ses, const struct nls_table *nls_cp)
}
ses->auth_key.len += baselen;
- buf = (struct ntlmv2_resp *)
+ ntlmv2 = (struct ntlmv2_resp *)
(ses->auth_key.response + CIFS_SESS_KEY_SIZE);
- buf->blob_signature = cpu_to_le32(0x00000101);
- buf->reserved = 0;
- buf->time = cpu_to_le64(cifs_UnixTimeToNT(CURRENT_TIME));
- get_random_bytes(&buf->client_chal, sizeof(buf->client_chal));
- buf->reserved2 = 0;
+ ntlmv2->blob_signature = cpu_to_le32(0x00000101);
+ ntlmv2->reserved = 0;
+ /* Must be within 5 minutes of the server */
+ ntlmv2->time = cpu_to_le64(cifs_UnixTimeToNT(CURRENT_TIME));
+ get_random_bytes(&ntlmv2->client_chal, sizeof(ntlmv2->client_chal));
+ ntlmv2->reserved2 = 0;
memcpy(ses->auth_key.response + baselen, tiblob, tilen);
@@ -706,7 +714,7 @@ setup_ntlmv2_rsp(struct cifs_ses *ses, const struct nls_table *nls_cp)
}
rc = crypto_shash_update(&ses->server->secmech.sdeschmacmd5->shash,
- ses->auth_key.response + CIFS_SESS_KEY_SIZE,
+ ntlmv2->ntlmv2_hash,
CIFS_HMAC_MD5_HASH_SIZE);
if (rc) {
cifs_dbg(VFS, "%s: Could not update with response\n", __func__);
diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index 77fc5e181077..849f6132b327 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -862,7 +862,7 @@ const struct inode_operations cifs_file_inode_ops = {
const struct inode_operations cifs_symlink_inode_ops = {
.readlink = generic_readlink,
.follow_link = cifs_follow_link,
- .put_link = cifs_put_link,
+ .put_link = kfree_put_link,
.permission = cifs_permission,
/* BB add the following two eventually */
/* revalidate: cifs_revalidate,
diff --git a/fs/cifs/cifsfs.h b/fs/cifs/cifsfs.h
index 6d0b07217ac9..26a754f49ba1 100644
--- a/fs/cifs/cifsfs.h
+++ b/fs/cifs/cifsfs.h
@@ -115,8 +115,6 @@ extern struct vfsmount *cifs_dfs_d_automount(struct path *path);
/* Functions related to symlinks */
extern void *cifs_follow_link(struct dentry *direntry, struct nameidata *nd);
-extern void cifs_put_link(struct dentry *direntry,
- struct nameidata *nd, void *);
extern int cifs_readlink(struct dentry *direntry, char __user *buffer,
int buflen);
extern int cifs_symlink(struct inode *inode, struct dentry *direntry,
diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h
index 52b6f6c26bfc..f918a998a087 100644
--- a/fs/cifs/cifsglob.h
+++ b/fs/cifs/cifsglob.h
@@ -261,7 +261,7 @@ struct smb_version_operations {
/* query path data from the server */
int (*query_path_info)(const unsigned int, struct cifs_tcon *,
struct cifs_sb_info *, const char *,
- FILE_ALL_INFO *, bool *);
+ FILE_ALL_INFO *, bool *, bool *);
/* query file data from the server */
int (*query_file_info)(const unsigned int, struct cifs_tcon *,
struct cifs_fid *, FILE_ALL_INFO *);
@@ -278,6 +278,8 @@ struct smb_version_operations {
/* set attributes */
int (*set_file_info)(struct inode *, const char *, FILE_BASIC_INFO *,
const unsigned int);
+ int (*set_compression)(const unsigned int, struct cifs_tcon *,
+ struct cifsFileInfo *);
/* check if we can send an echo or nor */
bool (*can_echo)(struct TCP_Server_Info *);
/* send echo request */
@@ -379,6 +381,10 @@ struct smb_version_operations {
char * (*create_lease_buf)(u8 *, u8);
/* parse lease context buffer and return oplock/epoch info */
__u8 (*parse_lease_buf)(void *, unsigned int *);
+ int (*clone_range)(const unsigned int, struct cifsFileInfo *src_file,
+ struct cifsFileInfo *target_file, u64 src_off, u64 len,
+ u64 dest_off);
+ int (*validate_negotiate)(const unsigned int, struct cifs_tcon *);
};
struct smb_version_values {
@@ -620,11 +626,34 @@ set_credits(struct TCP_Server_Info *server, const int val)
}
static inline __u64
-get_next_mid(struct TCP_Server_Info *server)
+get_next_mid64(struct TCP_Server_Info *server)
{
return server->ops->get_next_mid(server);
}
+static inline __le16
+get_next_mid(struct TCP_Server_Info *server)
+{
+ __u16 mid = get_next_mid64(server);
+ /*
+ * The value in the SMB header should be little endian for easy
+ * on-the-wire decoding.
+ */
+ return cpu_to_le16(mid);
+}
+
+static inline __u16
+get_mid(const struct smb_hdr *smb)
+{
+ return le16_to_cpu(smb->Mid);
+}
+
+static inline bool
+compare_mid(__u16 mid, const struct smb_hdr *smb)
+{
+ return mid == le16_to_cpu(smb->Mid);
+}
+
/*
* When the server supports very large reads and writes via POSIX extensions,
* we can allow up to 2^24-1, minus the size of a READ/WRITE_AND_X header, not
@@ -828,6 +857,11 @@ struct cifs_tcon {
__u32 maximal_access;
__u32 vol_serial_number;
__le64 vol_create_time;
+ __u32 ss_flags; /* sector size flags */
+ __u32 perf_sector_size; /* best sector size for perf */
+ __u32 max_chunks;
+ __u32 max_bytes_chunk;
+ __u32 max_bytes_copy;
#endif /* CONFIG_CIFS_SMB2 */
#ifdef CONFIG_CIFS_FSCACHE
u64 resource_id; /* server resource id */
diff --git a/fs/cifs/cifspdu.h b/fs/cifs/cifspdu.h
index 08f9dfb1a894..33df36ef9d52 100644
--- a/fs/cifs/cifspdu.h
+++ b/fs/cifs/cifspdu.h
@@ -428,7 +428,7 @@ struct smb_hdr {
__u16 Tid;
__le16 Pid;
__u16 Uid;
- __u16 Mid;
+ __le16 Mid;
__u8 WordCount;
} __attribute__((packed));
@@ -697,7 +697,13 @@ struct ntlmssp2_name {
} __attribute__((packed));
struct ntlmv2_resp {
- char ntlmv2_hash[CIFS_ENCPWD_SIZE];
+ union {
+ char ntlmv2_hash[CIFS_ENCPWD_SIZE];
+ struct {
+ __u8 reserved[8];
+ __u8 key[CIFS_SERVER_CHALLENGE_SIZE];
+ } __attribute__((packed)) challenge;
+ } __attribute__((packed));
__le32 blob_signature;
__u32 reserved;
__le64 time;
@@ -1352,6 +1358,35 @@ typedef struct smb_com_transaction_ioctl_req {
__u8 Data[1];
} __attribute__((packed)) TRANSACT_IOCTL_REQ;
+typedef struct smb_com_transaction_compr_ioctl_req {
+ struct smb_hdr hdr; /* wct = 23 */
+ __u8 MaxSetupCount;
+ __u16 Reserved;
+ __le32 TotalParameterCount;
+ __le32 TotalDataCount;
+ __le32 MaxParameterCount;
+ __le32 MaxDataCount;
+ __le32 ParameterCount;
+ __le32 ParameterOffset;
+ __le32 DataCount;
+ __le32 DataOffset;
+ __u8 SetupCount; /* four setup words follow subcommand */
+ /* SNIA spec incorrectly included spurious pad here */
+ __le16 SubCommand; /* 2 = IOCTL/FSCTL */
+ __le32 FunctionCode;
+ __u16 Fid;
+ __u8 IsFsctl; /* 1 = File System Control 0 = device control (IOCTL) */
+ __u8 IsRootFlag; /* 1 = apply command to root of share (must be DFS) */
+ __le16 ByteCount;
+ __u8 Pad[3];
+ __le16 compression_state; /* See below for valid flags */
+} __attribute__((packed)) TRANSACT_COMPR_IOCTL_REQ;
+
+/* compression state flags */
+#define COMPRESSION_FORMAT_NONE 0x0000
+#define COMPRESSION_FORMAT_DEFAULT 0x0001
+#define COMPRESSION_FORMAT_LZNT1 0x0002
+
typedef struct smb_com_transaction_ioctl_rsp {
struct smb_hdr hdr; /* wct = 19 */
__u8 Reserved[3];
@@ -2215,6 +2250,9 @@ typedef struct {
__le32 DeviceCharacteristics;
} __attribute__((packed)) FILE_SYSTEM_DEVICE_INFO; /* device info level 0x104 */
+/* minimum includes first three fields, and empty FS Name */
+#define MIN_FS_ATTR_INFO_SIZE 12
+
typedef struct {
__le32 Attributes;
__le32 MaxPathNameComponentLength;
diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h
index b5ec2a268f56..aa3397620342 100644
--- a/fs/cifs/cifsproto.h
+++ b/fs/cifs/cifsproto.h
@@ -360,6 +360,8 @@ extern int CIFSSMBUnixQuerySymLink(const unsigned int xid,
extern int CIFSSMBQuerySymLink(const unsigned int xid, struct cifs_tcon *tcon,
__u16 fid, char **symlinkinfo,
const struct nls_table *nls_codepage);
+extern int CIFSSMB_set_compression(const unsigned int xid,
+ struct cifs_tcon *tcon, __u16 fid);
extern int CIFSSMBOpen(const unsigned int xid, struct cifs_tcon *tcon,
const char *fileName, const int disposition,
const int access_flags, const int omode,
diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c
index ccd31ab815d4..124aa0230c1b 100644
--- a/fs/cifs/cifssmb.c
+++ b/fs/cifs/cifssmb.c
@@ -3199,6 +3199,60 @@ qreparse_out:
return rc;
}
+int
+CIFSSMB_set_compression(const unsigned int xid, struct cifs_tcon *tcon,
+ __u16 fid)
+{
+ int rc = 0;
+ int bytes_returned;
+ struct smb_com_transaction_compr_ioctl_req *pSMB;
+ struct smb_com_transaction_ioctl_rsp *pSMBr;
+
+ cifs_dbg(FYI, "Set compression for %u\n", fid);
+ rc = smb_init(SMB_COM_NT_TRANSACT, 23, tcon, (void **) &pSMB,
+ (void **) &pSMBr);
+ if (rc)
+ return rc;
+
+ pSMB->compression_state = cpu_to_le16(COMPRESSION_FORMAT_DEFAULT);
+
+ pSMB->TotalParameterCount = 0;
+ pSMB->TotalDataCount = __constant_cpu_to_le32(2);
+ pSMB->MaxParameterCount = 0;
+ pSMB->MaxDataCount = 0;
+ pSMB->MaxSetupCount = 4;
+ pSMB->Reserved = 0;
+ pSMB->ParameterOffset = 0;
+ pSMB->DataCount = __constant_cpu_to_le32(2);
+ pSMB->DataOffset =
+ cpu_to_le32(offsetof(struct smb_com_transaction_compr_ioctl_req,
+ compression_state) - 4); /* 84 */
+ pSMB->SetupCount = 4;
+ pSMB->SubCommand = __constant_cpu_to_le16(NT_TRANSACT_IOCTL);
+ pSMB->ParameterCount = 0;
+ pSMB->FunctionCode = __constant_cpu_to_le32(FSCTL_SET_COMPRESSION);
+ pSMB->IsFsctl = 1; /* FSCTL */
+ pSMB->IsRootFlag = 0;
+ pSMB->Fid = fid; /* file handle always le */
+ /* 3 byte pad, followed by 2 byte compress state */
+ pSMB->ByteCount = __constant_cpu_to_le16(5);
+ inc_rfc1001_len(pSMB, 5);
+
+ rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
+ (struct smb_hdr *) pSMBr, &bytes_returned, 0);
+ if (rc)
+ cifs_dbg(FYI, "Send error in SetCompression = %d\n", rc);
+
+ cifs_buf_release(pSMB);
+
+ /*
+ * Note: On -EAGAIN error only caller can retry on handle based calls
+ * since file handle passed in no longer valid.
+ */
+ return rc;
+}
+
+
#ifdef CONFIG_CIFS_POSIX
/*Convert an Access Control Entry from wire format to local POSIX xattr format*/
@@ -3315,11 +3369,13 @@ static __u16 ACL_to_cifs_posix(char *parm_data, const char *pACL,
return 0;
}
cifs_acl->version = cpu_to_le16(1);
- if (acl_type == ACL_TYPE_ACCESS)
+ if (acl_type == ACL_TYPE_ACCESS) {
cifs_acl->access_entry_count = cpu_to_le16(count);
- else if (acl_type == ACL_TYPE_DEFAULT)
+ cifs_acl->default_entry_count = __constant_cpu_to_le16(0xFFFF);
+ } else if (acl_type == ACL_TYPE_DEFAULT) {
cifs_acl->default_entry_count = cpu_to_le16(count);
- else {
+ cifs_acl->access_entry_count = __constant_cpu_to_le16(0xFFFF);
+ } else {
cifs_dbg(FYI, "unknown ACL type %d\n", acl_type);
return 0;
}
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index a279ffc0bc29..8813ff776ba3 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -2242,6 +2242,8 @@ cifs_find_smb_ses(struct TCP_Server_Info *server, struct smb_vol *vol)
spin_lock(&cifs_tcp_ses_lock);
list_for_each_entry(ses, &server->smb_ses_list, smb_ses_list) {
+ if (ses->status == CifsExiting)
+ continue;
if (!match_session(ses, vol))
continue;
++ses->ses_count;
@@ -2255,24 +2257,37 @@ cifs_find_smb_ses(struct TCP_Server_Info *server, struct smb_vol *vol)
static void
cifs_put_smb_ses(struct cifs_ses *ses)
{
- unsigned int xid;
+ unsigned int rc, xid;
struct TCP_Server_Info *server = ses->server;
cifs_dbg(FYI, "%s: ses_count=%d\n", __func__, ses->ses_count);
+
spin_lock(&cifs_tcp_ses_lock);
+ if (ses->status == CifsExiting) {
+ spin_unlock(&cifs_tcp_ses_lock);
+ return;
+ }
if (--ses->ses_count > 0) {
spin_unlock(&cifs_tcp_ses_lock);
return;
}
-
- list_del_init(&ses->smb_ses_list);
+ if (ses->status == CifsGood)
+ ses->status = CifsExiting;
spin_unlock(&cifs_tcp_ses_lock);
- if (ses->status == CifsGood && server->ops->logoff) {
+ if (ses->status == CifsExiting && server->ops->logoff) {
xid = get_xid();
- server->ops->logoff(xid, ses);
+ rc = server->ops->logoff(xid, ses);
+ if (rc)
+ cifs_dbg(VFS, "%s: Session Logoff failure rc=%d\n",
+ __func__, rc);
_free_xid(xid);
}
+
+ spin_lock(&cifs_tcp_ses_lock);
+ list_del_init(&ses->smb_ses_list);
+ spin_unlock(&cifs_tcp_ses_lock);
+
sesInfoFree(ses);
cifs_put_tcp_session(server);
}
@@ -3755,6 +3770,13 @@ CIFSTCon(const unsigned int xid, struct cifs_ses *ses,
return rc;
}
+static void delayed_free(struct rcu_head *p)
+{
+ struct cifs_sb_info *sbi = container_of(p, struct cifs_sb_info, rcu);
+ unload_nls(sbi->local_nls);
+ kfree(sbi);
+}
+
void
cifs_umount(struct cifs_sb_info *cifs_sb)
{
@@ -3779,8 +3801,7 @@ cifs_umount(struct cifs_sb_info *cifs_sb)
bdi_destroy(&cifs_sb->bdi);
kfree(cifs_sb->mountdata);
- unload_nls(cifs_sb->local_nls);
- kfree(cifs_sb);
+ call_rcu(&cifs_sb->rcu, delayed_free);
}
int
diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c
index 5384c2a640ca..11ff5f116b20 100644
--- a/fs/cifs/dir.c
+++ b/fs/cifs/dir.c
@@ -756,7 +756,7 @@ cifs_lookup(struct inode *parent_dir_inode, struct dentry *direntry,
/* if it was once a directory (but how can we tell?) we could do
shrink_dcache_parent(direntry); */
} else if (rc != -EACCES) {
- cifs_dbg(VFS, "Unexpected lookup error %d\n", rc);
+ cifs_dbg(FYI, "Unexpected lookup error %d\n", rc);
/* We special case check for Access Denied - since that
is a common return code */
}
diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index 7ddddf2e2504..5a5a87240fe2 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -3663,6 +3663,27 @@ void cifs_oplock_break(struct work_struct *work)
}
}
+/*
+ * The presence of cifs_direct_io() in the address space ops vector
+ * allowes open() O_DIRECT flags which would have failed otherwise.
+ *
+ * In the non-cached mode (mount with cache=none), we shunt off direct read and write requests
+ * so this method should never be called.
+ *
+ * Direct IO is not yet supported in the cached mode.
+ */
+static ssize_t
+cifs_direct_io(int rw, struct kiocb *iocb, const struct iovec *iov,
+ loff_t pos, unsigned long nr_segs)
+{
+ /*
+ * FIXME
+ * Eventually need to support direct IO for non forcedirectio mounts
+ */
+ return -EINVAL;
+}
+
+
const struct address_space_operations cifs_addr_ops = {
.readpage = cifs_readpage,
.readpages = cifs_readpages,
@@ -3672,6 +3693,7 @@ const struct address_space_operations cifs_addr_ops = {
.write_end = cifs_write_end,
.set_page_dirty = __set_page_dirty_nobuffers,
.releasepage = cifs_release_page,
+ .direct_IO = cifs_direct_io,
.invalidatepage = cifs_invalidate_page,
.launder_page = cifs_launder_page,
};
diff --git a/fs/cifs/fscache.c b/fs/cifs/fscache.c
index b3258f35e88a..8d4b7bc8ae91 100644
--- a/fs/cifs/fscache.c
+++ b/fs/cifs/fscache.c
@@ -27,7 +27,7 @@ void cifs_fscache_get_client_cookie(struct TCP_Server_Info *server)
{
server->fscache =
fscache_acquire_cookie(cifs_fscache_netfs.primary_index,
- &cifs_fscache_server_index_def, server);
+ &cifs_fscache_server_index_def, server, true);
cifs_dbg(FYI, "%s: (0x%p/0x%p)\n",
__func__, server, server->fscache);
}
@@ -46,7 +46,7 @@ void cifs_fscache_get_super_cookie(struct cifs_tcon *tcon)
tcon->fscache =
fscache_acquire_cookie(server->fscache,
- &cifs_fscache_super_index_def, tcon);
+ &cifs_fscache_super_index_def, tcon, true);
cifs_dbg(FYI, "%s: (0x%p/0x%p)\n",
__func__, server->fscache, tcon->fscache);
}
@@ -69,7 +69,7 @@ static void cifs_fscache_enable_inode_cookie(struct inode *inode)
if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_FSCACHE) {
cifsi->fscache = fscache_acquire_cookie(tcon->fscache,
- &cifs_fscache_inode_object_def, cifsi);
+ &cifs_fscache_inode_object_def, cifsi, true);
cifs_dbg(FYI, "%s: got FH cookie (0x%p/0x%p)\n",
__func__, tcon->fscache, cifsi->fscache);
}
@@ -119,7 +119,7 @@ void cifs_fscache_reset_inode_cookie(struct inode *inode)
cifsi->fscache = fscache_acquire_cookie(
cifs_sb_master_tcon(cifs_sb)->fscache,
&cifs_fscache_inode_object_def,
- cifsi);
+ cifsi, true);
cifs_dbg(FYI, "%s: new cookie 0x%p oldcookie 0x%p\n",
__func__, cifsi->fscache, old);
}
diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c
index 867b7cdc794a..36f9ebb93ceb 100644
--- a/fs/cifs/inode.c
+++ b/fs/cifs/inode.c
@@ -542,7 +542,8 @@ static int cifs_sfu_mode(struct cifs_fattr *fattr, const unsigned char *path,
/* Fill a cifs_fattr struct with info from FILE_ALL_INFO */
static void
cifs_all_info_to_fattr(struct cifs_fattr *fattr, FILE_ALL_INFO *info,
- struct cifs_sb_info *cifs_sb, bool adjust_tz)
+ struct cifs_sb_info *cifs_sb, bool adjust_tz,
+ bool symlink)
{
struct cifs_tcon *tcon = cifs_sb_master_tcon(cifs_sb);
@@ -569,7 +570,11 @@ cifs_all_info_to_fattr(struct cifs_fattr *fattr, FILE_ALL_INFO *info,
fattr->cf_createtime = le64_to_cpu(info->CreationTime);
fattr->cf_nlink = le32_to_cpu(info->NumberOfLinks);
- if (fattr->cf_cifsattrs & ATTR_DIRECTORY) {
+
+ if (symlink) {
+ fattr->cf_mode = S_IFLNK;
+ fattr->cf_dtype = DT_LNK;
+ } else if (fattr->cf_cifsattrs & ATTR_DIRECTORY) {
fattr->cf_mode = S_IFDIR | cifs_sb->mnt_dir_mode;
fattr->cf_dtype = DT_DIR;
/*
@@ -578,10 +583,6 @@ cifs_all_info_to_fattr(struct cifs_fattr *fattr, FILE_ALL_INFO *info,
*/
if (!tcon->unix_ext)
fattr->cf_flags |= CIFS_FATTR_UNKNOWN_NLINK;
- } else if (fattr->cf_cifsattrs & ATTR_REPARSE) {
- fattr->cf_mode = S_IFLNK;
- fattr->cf_dtype = DT_LNK;
- fattr->cf_nlink = le32_to_cpu(info->NumberOfLinks);
} else {
fattr->cf_mode = S_IFREG | cifs_sb->mnt_file_mode;
fattr->cf_dtype = DT_REG;
@@ -626,7 +627,8 @@ cifs_get_file_info(struct file *filp)
rc = server->ops->query_file_info(xid, tcon, &cfile->fid, &find_data);
switch (rc) {
case 0:
- cifs_all_info_to_fattr(&fattr, &find_data, cifs_sb, false);
+ cifs_all_info_to_fattr(&fattr, &find_data, cifs_sb, false,
+ false);
break;
case -EREMOTE:
cifs_create_dfs_fattr(&fattr, inode->i_sb);
@@ -673,6 +675,7 @@ cifs_get_inode_info(struct inode **inode, const char *full_path,
bool adjust_tz = false;
struct cifs_fattr fattr;
struct cifs_search_info *srchinf = NULL;
+ bool symlink = false;
tlink = cifs_sb_tlink(cifs_sb);
if (IS_ERR(tlink))
@@ -702,12 +705,12 @@ cifs_get_inode_info(struct inode **inode, const char *full_path,
}
data = (FILE_ALL_INFO *)buf;
rc = server->ops->query_path_info(xid, tcon, cifs_sb, full_path,
- data, &adjust_tz);
+ data, &adjust_tz, &symlink);
}
if (!rc) {
- cifs_all_info_to_fattr(&fattr, (FILE_ALL_INFO *)data, cifs_sb,
- adjust_tz);
+ cifs_all_info_to_fattr(&fattr, data, cifs_sb, adjust_tz,
+ symlink);
} else if (rc == -EREMOTE) {
cifs_create_dfs_fattr(&fattr, sb);
rc = 0;
diff --git a/fs/cifs/ioctl.c b/fs/cifs/ioctl.c
index 3e0845585853..77492301cc2b 100644
--- a/fs/cifs/ioctl.c
+++ b/fs/cifs/ioctl.c
@@ -3,7 +3,7 @@
*
* vfs operations that deal with io control
*
- * Copyright (C) International Business Machines Corp., 2005,2007
+ * Copyright (C) International Business Machines Corp., 2005,2013
* Author(s): Steve French (sfrench@us.ibm.com)
*
* This library is free software; you can redistribute it and/or modify
@@ -22,25 +22,132 @@
*/
#include <linux/fs.h>
+#include <linux/file.h>
+#include <linux/mount.h>
+#include <linux/mm.h>
+#include <linux/pagemap.h>
#include "cifspdu.h"
#include "cifsglob.h"
#include "cifsproto.h"
#include "cifs_debug.h"
#include "cifsfs.h"
+#define CIFS_IOCTL_MAGIC 0xCF
+#define CIFS_IOC_COPYCHUNK_FILE _IOW(CIFS_IOCTL_MAGIC, 3, int)
+
+static long cifs_ioctl_clone(unsigned int xid, struct file *dst_file,
+ unsigned long srcfd, u64 off, u64 len, u64 destoff)
+{
+ int rc;
+ struct cifsFileInfo *smb_file_target = dst_file->private_data;
+ struct inode *target_inode = file_inode(dst_file);
+ struct cifs_tcon *target_tcon;
+ struct fd src_file;
+ struct cifsFileInfo *smb_file_src;
+ struct inode *src_inode;
+ struct cifs_tcon *src_tcon;
+
+ cifs_dbg(FYI, "ioctl clone range\n");
+ /* the destination must be opened for writing */
+ if (!(dst_file->f_mode & FMODE_WRITE)) {
+ cifs_dbg(FYI, "file target not open for write\n");
+ return -EINVAL;
+ }
+
+ /* check if target volume is readonly and take reference */
+ rc = mnt_want_write_file(dst_file);
+ if (rc) {
+ cifs_dbg(FYI, "mnt_want_write failed with rc %d\n", rc);
+ return rc;
+ }
+
+ src_file = fdget(srcfd);
+ if (!src_file.file) {
+ rc = -EBADF;
+ goto out_drop_write;
+ }
+
+ if ((!src_file.file->private_data) || (!dst_file->private_data)) {
+ rc = -EBADF;
+ cifs_dbg(VFS, "missing cifsFileInfo on copy range src file\n");
+ goto out_fput;
+ }
+
+ rc = -EXDEV;
+ smb_file_target = dst_file->private_data;
+ smb_file_src = src_file.file->private_data;
+ src_tcon = tlink_tcon(smb_file_src->tlink);
+ target_tcon = tlink_tcon(smb_file_target->tlink);
+
+ /* check if source and target are on same tree connection */
+ if (src_tcon != target_tcon) {
+ cifs_dbg(VFS, "file copy src and target on different volume\n");
+ goto out_fput;
+ }
+
+ src_inode = src_file.file->f_dentry->d_inode;
+
+ /*
+ * Note: cifs case is easier than btrfs since server responsible for
+ * checks for proper open modes and file type and if it wants
+ * server could even support copy of range where source = target
+ */
+
+ /* so we do not deadlock racing two ioctls on same files */
+ if (target_inode < src_inode) {
+ mutex_lock_nested(&target_inode->i_mutex, I_MUTEX_PARENT);
+ mutex_lock_nested(&src_inode->i_mutex, I_MUTEX_CHILD);
+ } else {
+ mutex_lock_nested(&src_inode->i_mutex, I_MUTEX_PARENT);
+ mutex_lock_nested(&target_inode->i_mutex, I_MUTEX_CHILD);
+ }
+
+ /* determine range to clone */
+ rc = -EINVAL;
+ if (off + len > src_inode->i_size || off + len < off)
+ goto out_unlock;
+ if (len == 0)
+ len = src_inode->i_size - off;
+
+ cifs_dbg(FYI, "about to flush pages\n");
+ /* should we flush first and last page first */
+ truncate_inode_pages_range(&target_inode->i_data, destoff,
+ PAGE_CACHE_ALIGN(destoff + len)-1);
+
+ if (target_tcon->ses->server->ops->clone_range)
+ rc = target_tcon->ses->server->ops->clone_range(xid,
+ smb_file_src, smb_file_target, off, len, destoff);
+
+ /* force revalidate of size and timestamps of target file now
+ that target is updated on the server */
+ CIFS_I(target_inode)->time = 0;
+out_unlock:
+ /* although unlocking in the reverse order from locking is not
+ strictly necessary here it is a little cleaner to be consistent */
+ if (target_inode < src_inode) {
+ mutex_unlock(&src_inode->i_mutex);
+ mutex_unlock(&target_inode->i_mutex);
+ } else {
+ mutex_unlock(&target_inode->i_mutex);
+ mutex_unlock(&src_inode->i_mutex);
+ }
+out_fput:
+ fdput(src_file);
+out_drop_write:
+ mnt_drop_write_file(dst_file);
+ return rc;
+}
+
long cifs_ioctl(struct file *filep, unsigned int command, unsigned long arg)
{
struct inode *inode = file_inode(filep);
int rc = -ENOTTY; /* strange error - but the precedent */
unsigned int xid;
struct cifs_sb_info *cifs_sb;
-#ifdef CONFIG_CIFS_POSIX
struct cifsFileInfo *pSMBFile = filep->private_data;
struct cifs_tcon *tcon;
__u64 ExtAttrBits = 0;
- __u64 ExtAttrMask = 0;
__u64 caps;
-#endif /* CONFIG_CIFS_POSIX */
xid = get_xid();
@@ -49,13 +156,14 @@ long cifs_ioctl(struct file *filep, unsigned int command, unsigned long arg)
cifs_sb = CIFS_SB(inode->i_sb);
switch (command) {
-#ifdef CONFIG_CIFS_POSIX
case FS_IOC_GETFLAGS:
if (pSMBFile == NULL)
break;
tcon = tlink_tcon(pSMBFile->tlink);
caps = le64_to_cpu(tcon->fsUnixInfo.Capability);
+#ifdef CONFIG_CIFS_POSIX
if (CIFS_UNIX_EXTATTR_CAP & caps) {
+ __u64 ExtAttrMask = 0;
rc = CIFSGetExtAttr(xid, tcon,
pSMBFile->fid.netfid,
&ExtAttrBits, &ExtAttrMask);
@@ -63,29 +171,53 @@ long cifs_ioctl(struct file *filep, unsigned int command, unsigned long arg)
rc = put_user(ExtAttrBits &
FS_FL_USER_VISIBLE,
(int __user *)arg);
+ if (rc != EOPNOTSUPP)
+ break;
+ }
+#endif /* CONFIG_CIFS_POSIX */
+ rc = 0;
+ if (CIFS_I(inode)->cifsAttrs & ATTR_COMPRESSED) {
+ /* add in the compressed bit */
+ ExtAttrBits = FS_COMPR_FL;
+ rc = put_user(ExtAttrBits & FS_FL_USER_VISIBLE,
+ (int __user *)arg);
}
break;
-
case FS_IOC_SETFLAGS:
if (pSMBFile == NULL)
break;
tcon = tlink_tcon(pSMBFile->tlink);
caps = le64_to_cpu(tcon->fsUnixInfo.Capability);
- if (CIFS_UNIX_EXTATTR_CAP & caps) {
- if (get_user(ExtAttrBits, (int __user *)arg)) {
- rc = -EFAULT;
- break;
- }
- /*
- * rc = CIFSGetExtAttr(xid, tcon,
- * pSMBFile->fid.netfid,
- * extAttrBits,
- * &ExtAttrMask);
- */
+
+ if (get_user(ExtAttrBits, (int __user *)arg)) {
+ rc = -EFAULT;
+ break;
+ }
+
+ /*
+ * if (CIFS_UNIX_EXTATTR_CAP & caps)
+ * rc = CIFSSetExtAttr(xid, tcon,
+ * pSMBFile->fid.netfid,
+ * extAttrBits,
+ * &ExtAttrMask);
+ * if (rc != EOPNOTSUPP)
+ * break;
+ */
+
+ /* Currently only flag we can set is compressed flag */
+ if ((ExtAttrBits & FS_COMPR_FL) == 0)
+ break;
+
+ /* Try to set compress flag */
+ if (tcon->ses->server->ops->set_compression) {
+ rc = tcon->ses->server->ops->set_compression(
+ xid, tcon, pSMBFile);
+ cifs_dbg(FYI, "set compress flag rc %d\n", rc);
}
- cifs_dbg(FYI, "set flags not implemented yet\n");
break;
-#endif /* CONFIG_CIFS_POSIX */
+ case CIFS_IOC_COPYCHUNK_FILE:
+ rc = cifs_ioctl_clone(xid, filep, arg, 0, 0, 0);
+ break;
default:
cifs_dbg(FYI, "unsupported ioctl\n");
break;
diff --git a/fs/cifs/link.c b/fs/cifs/link.c
index 7e36ceba0c7a..cc0234710ddb 100644
--- a/fs/cifs/link.c
+++ b/fs/cifs/link.c
@@ -621,10 +621,3 @@ symlink_exit:
free_xid(xid);
return rc;
}
-
-void cifs_put_link(struct dentry *direntry, struct nameidata *nd, void *cookie)
-{
- char *p = nd_get_link(nd);
- if (!IS_ERR(p))
- kfree(p);
-}
diff --git a/fs/cifs/misc.c b/fs/cifs/misc.c
index 138a011633fe..2f9f3790679d 100644
--- a/fs/cifs/misc.c
+++ b/fs/cifs/misc.c
@@ -278,7 +278,7 @@ header_assemble(struct smb_hdr *buffer, char smb_command /* command */ ,
}
static int
-check_smb_hdr(struct smb_hdr *smb, __u16 mid)
+check_smb_hdr(struct smb_hdr *smb)
{
/* does it have the right SMB "signature" ? */
if (*(__le32 *) smb->Protocol != cpu_to_le32(0x424d53ff)) {
@@ -287,13 +287,6 @@ check_smb_hdr(struct smb_hdr *smb, __u16 mid)
return 1;
}
- /* Make sure that message ids match */
- if (mid != smb->Mid) {
- cifs_dbg(VFS, "Mids do not match. received=%u expected=%u\n",
- smb->Mid, mid);
- return 1;
- }
-
/* if it's a response then accept */
if (smb->Flags & SMBFLG_RESPONSE)
return 0;
@@ -302,7 +295,8 @@ check_smb_hdr(struct smb_hdr *smb, __u16 mid)
if (smb->Command == SMB_COM_LOCKING_ANDX)
return 0;
- cifs_dbg(VFS, "Server sent request, not response. mid=%u\n", smb->Mid);
+ cifs_dbg(VFS, "Server sent request, not response. mid=%u\n",
+ get_mid(smb));
return 1;
}
@@ -310,7 +304,6 @@ int
checkSMB(char *buf, unsigned int total_read)
{
struct smb_hdr *smb = (struct smb_hdr *)buf;
- __u16 mid = smb->Mid;
__u32 rfclen = be32_to_cpu(smb->smb_buf_length);
__u32 clc_len; /* calculated length */
cifs_dbg(FYI, "checkSMB Length: 0x%x, smb_buf_length: 0x%x\n",
@@ -348,7 +341,7 @@ checkSMB(char *buf, unsigned int total_read)
}
/* otherwise, there is enough to get to the BCC */
- if (check_smb_hdr(smb, mid))
+ if (check_smb_hdr(smb))
return -EIO;
clc_len = smbCalcSize(smb);
@@ -359,6 +352,7 @@ checkSMB(char *buf, unsigned int total_read)
}
if (4 + rfclen != clc_len) {
+ __u16 mid = get_mid(smb);
/* check if bcc wrapped around for large read responses */
if ((rfclen > 64 * 1024) && (rfclen > clc_len)) {
/* check if lengths match mod 64K */
@@ -366,11 +360,11 @@ checkSMB(char *buf, unsigned int total_read)
return 0; /* bcc wrapped */
}
cifs_dbg(FYI, "Calculated size %u vs length %u mismatch for mid=%u\n",
- clc_len, 4 + rfclen, smb->Mid);
+ clc_len, 4 + rfclen, mid);
if (4 + rfclen < clc_len) {
cifs_dbg(VFS, "RFC1001 size %u smaller than SMB for mid=%u\n",
- rfclen, smb->Mid);
+ rfclen, mid);
return -EIO;
} else if (rfclen > clc_len + 512) {
/*
@@ -383,7 +377,7 @@ checkSMB(char *buf, unsigned int total_read)
* data to 512 bytes.
*/
cifs_dbg(VFS, "RFC1001 size %u more than 512 bytes larger than SMB for mid=%u\n",
- rfclen, smb->Mid);
+ rfclen, mid);
return -EIO;
}
}
diff --git a/fs/cifs/netmisc.c b/fs/cifs/netmisc.c
index 651a5279607b..049884552e76 100644
--- a/fs/cifs/netmisc.c
+++ b/fs/cifs/netmisc.c
@@ -51,7 +51,7 @@ static const struct smb_to_posix_error mapping_table_ERRDOS[] = {
{ERRnoaccess, -EACCES},
{ERRbadfid, -EBADF},
{ERRbadmcb, -EIO},
- {ERRnomem, -ENOMEM},
+ {ERRnomem, -EREMOTEIO},
{ERRbadmem, -EFAULT},
{ERRbadenv, -EFAULT},
{ERRbadformat, -EINVAL},
diff --git a/fs/cifs/readdir.c b/fs/cifs/readdir.c
index 53a75f3d0179..5940ecabbe6a 100644
--- a/fs/cifs/readdir.c
+++ b/fs/cifs/readdir.c
@@ -134,22 +134,6 @@ out:
dput(dentry);
}
-/*
- * Is it possible that this directory might turn out to be a DFS referral
- * once we go to try and use it?
- */
-static bool
-cifs_dfs_is_possible(struct cifs_sb_info *cifs_sb)
-{
-#ifdef CONFIG_CIFS_DFS_UPCALL
- struct cifs_tcon *tcon = cifs_sb_master_tcon(cifs_sb);
-
- if (tcon->Flags & SMB_SHARE_IS_IN_DFS)
- return true;
-#endif
- return false;
-}
-
static void
cifs_fill_common_info(struct cifs_fattr *fattr, struct cifs_sb_info *cifs_sb)
{
@@ -159,27 +143,19 @@ cifs_fill_common_info(struct cifs_fattr *fattr, struct cifs_sb_info *cifs_sb)
if (fattr->cf_cifsattrs & ATTR_DIRECTORY) {
fattr->cf_mode = S_IFDIR | cifs_sb->mnt_dir_mode;
fattr->cf_dtype = DT_DIR;
- /*
- * Windows CIFS servers generally make DFS referrals look
- * like directories in FIND_* responses with the reparse
- * attribute flag also set (since DFS junctions are
- * reparse points). We must revalidate at least these
- * directory inodes before trying to use them (if
- * they are DFS we will get PATH_NOT_COVERED back
- * when queried directly and can then try to connect
- * to the DFS target)
- */
- if (cifs_dfs_is_possible(cifs_sb) &&
- (fattr->cf_cifsattrs & ATTR_REPARSE))
- fattr->cf_flags |= CIFS_FATTR_NEED_REVAL;
- } else if (fattr->cf_cifsattrs & ATTR_REPARSE) {
- fattr->cf_mode = S_IFLNK;
- fattr->cf_dtype = DT_LNK;
} else {
fattr->cf_mode = S_IFREG | cifs_sb->mnt_file_mode;
fattr->cf_dtype = DT_REG;
}
+ /*
+ * We need to revalidate it further to make a decision about whether it
+ * is a symbolic link, DFS referral or a reparse point with a direct
+ * access like junctions, deduplicated files, NFS symlinks.
+ */
+ if (fattr->cf_cifsattrs & ATTR_REPARSE)
+ fattr->cf_flags |= CIFS_FATTR_NEED_REVAL;
+
/* non-unix readdir doesn't provide nlink */
fattr->cf_flags |= CIFS_FATTR_UNKNOWN_NLINK;
diff --git a/fs/cifs/smb1ops.c b/fs/cifs/smb1ops.c
index 8233b174de3d..5f5ba0dc2ee1 100644
--- a/fs/cifs/smb1ops.c
+++ b/fs/cifs/smb1ops.c
@@ -67,7 +67,7 @@ send_nt_cancel(struct TCP_Server_Info *server, void *buf,
mutex_unlock(&server->srv_mutex);
cifs_dbg(FYI, "issued NT_CANCEL for mid %u, rc = %d\n",
- in_buf->Mid, rc);
+ get_mid(in_buf), rc);
return rc;
}
@@ -101,7 +101,7 @@ cifs_find_mid(struct TCP_Server_Info *server, char *buffer)
spin_lock(&GlobalMid_Lock);
list_for_each_entry(mid, &server->pending_mid_q, qhead) {
- if (mid->mid == buf->Mid &&
+ if (compare_mid(mid->mid, buf) &&
mid->mid_state == MID_REQUEST_SUBMITTED &&
le16_to_cpu(mid->command) == buf->Command) {
spin_unlock(&GlobalMid_Lock);
@@ -534,10 +534,12 @@ cifs_is_path_accessible(const unsigned int xid, struct cifs_tcon *tcon,
static int
cifs_query_path_info(const unsigned int xid, struct cifs_tcon *tcon,
struct cifs_sb_info *cifs_sb, const char *full_path,
- FILE_ALL_INFO *data, bool *adjustTZ)
+ FILE_ALL_INFO *data, bool *adjustTZ, bool *symlink)
{
int rc;
+ *symlink = false;
+
/* could do find first instead but this returns more info */
rc = CIFSSMBQPathInfo(xid, tcon, full_path, data, 0 /* not legacy */,
cifs_sb->local_nls, cifs_sb->mnt_cifs_flags &
@@ -554,6 +556,23 @@ cifs_query_path_info(const unsigned int xid, struct cifs_tcon *tcon,
CIFS_MOUNT_MAP_SPECIAL_CHR);
*adjustTZ = true;
}
+
+ if (!rc && (le32_to_cpu(data->Attributes) & ATTR_REPARSE)) {
+ int tmprc;
+ int oplock = 0;
+ __u16 netfid;
+
+ /* Need to check if this is a symbolic link or not */
+ tmprc = CIFSSMBOpen(xid, tcon, full_path, FILE_OPEN,
+ FILE_READ_ATTRIBUTES, 0, &netfid, &oplock,
+ NULL, cifs_sb->local_nls,
+ cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR);
+ if (tmprc == -EOPNOTSUPP)
+ *symlink = true;
+ else
+ CIFSSMBClose(xid, tcon, netfid);
+ }
+
return rc;
}
@@ -807,6 +826,13 @@ out:
}
static int
+cifs_set_compression(const unsigned int xid, struct cifs_tcon *tcon,
+ struct cifsFileInfo *cfile)
+{
+ return CIFSSMB_set_compression(xid, tcon, cfile->fid.netfid);
+}
+
+static int
cifs_query_dir_first(const unsigned int xid, struct cifs_tcon *tcon,
const char *path, struct cifs_sb_info *cifs_sb,
struct cifs_fid *fid, __u16 search_flags,
@@ -956,6 +982,7 @@ struct smb_version_operations smb1_operations = {
.set_path_size = CIFSSMBSetEOF,
.set_file_size = CIFSSMBSetFileSize,
.set_file_info = smb_set_file_info,
+ .set_compression = cifs_set_compression,
.echo = CIFSSMBEcho,
.mkdir = CIFSSMBMkDir,
.mkdir_setinfo = cifs_mkdir_setinfo,
diff --git a/fs/cifs/smb2inode.c b/fs/cifs/smb2inode.c
index 78ff88c467b9..84c012a6aba0 100644
--- a/fs/cifs/smb2inode.c
+++ b/fs/cifs/smb2inode.c
@@ -123,12 +123,13 @@ move_smb2_info_to_cifs(FILE_ALL_INFO *dst, struct smb2_file_all_info *src)
int
smb2_query_path_info(const unsigned int xid, struct cifs_tcon *tcon,
struct cifs_sb_info *cifs_sb, const char *full_path,
- FILE_ALL_INFO *data, bool *adjust_tz)
+ FILE_ALL_INFO *data, bool *adjust_tz, bool *symlink)
{
int rc;
struct smb2_file_all_info *smb2_data;
*adjust_tz = false;
+ *symlink = false;
smb2_data = kzalloc(sizeof(struct smb2_file_all_info) + MAX_NAME * 2,
GFP_KERNEL);
@@ -136,9 +137,16 @@ smb2_query_path_info(const unsigned int xid, struct cifs_tcon *tcon,
return -ENOMEM;
rc = smb2_open_op_close(xid, tcon, cifs_sb, full_path,
- FILE_READ_ATTRIBUTES, FILE_OPEN,
- OPEN_REPARSE_POINT, smb2_data,
- SMB2_OP_QUERY_INFO);
+ FILE_READ_ATTRIBUTES, FILE_OPEN, 0,
+ smb2_data, SMB2_OP_QUERY_INFO);
+ if (rc == -EOPNOTSUPP) {
+ *symlink = true;
+ /* Failed on a symbolic link - query a reparse point info */
+ rc = smb2_open_op_close(xid, tcon, cifs_sb, full_path,
+ FILE_READ_ATTRIBUTES, FILE_OPEN,
+ OPEN_REPARSE_POINT, smb2_data,
+ SMB2_OP_QUERY_INFO);
+ }
if (rc)
goto out;
diff --git a/fs/cifs/smb2maperror.c b/fs/cifs/smb2maperror.c
index 7c2f45c06fc2..94bd4fbb13d3 100644
--- a/fs/cifs/smb2maperror.c
+++ b/fs/cifs/smb2maperror.c
@@ -306,7 +306,7 @@ static const struct status_to_posix_error smb2_error_map_table[] = {
{STATUS_NONEXISTENT_SECTOR, -EIO, "STATUS_NONEXISTENT_SECTOR"},
{STATUS_MORE_PROCESSING_REQUIRED, -EIO,
"STATUS_MORE_PROCESSING_REQUIRED"},
- {STATUS_NO_MEMORY, -ENOMEM, "STATUS_NO_MEMORY"},
+ {STATUS_NO_MEMORY, -EREMOTEIO, "STATUS_NO_MEMORY"},
{STATUS_CONFLICTING_ADDRESSES, -EADDRINUSE,
"STATUS_CONFLICTING_ADDRESSES"},
{STATUS_NOT_MAPPED_VIEW, -EIO, "STATUS_NOT_MAPPED_VIEW"},
diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c
index 861b33214144..757da3e54d3d 100644
--- a/fs/cifs/smb2ops.c
+++ b/fs/cifs/smb2ops.c
@@ -209,6 +209,94 @@ smb2_negotiate_rsize(struct cifs_tcon *tcon, struct smb_vol *volume_info)
return rsize;
}
+#ifdef CONFIG_CIFS_STATS2
+static int
+SMB3_request_interfaces(const unsigned int xid, struct cifs_tcon *tcon)
+{
+ int rc;
+ unsigned int ret_data_len = 0;
+ struct network_interface_info_ioctl_rsp *out_buf;
+
+ rc = SMB2_ioctl(xid, tcon, NO_FILE_ID, NO_FILE_ID,
+ FSCTL_QUERY_NETWORK_INTERFACE_INFO, true /* is_fsctl */,
+ NULL /* no data input */, 0 /* no data input */,
+ (char **)&out_buf, &ret_data_len);
+
+ if ((rc == 0) && (ret_data_len > 0)) {
+ /* Dump info on first interface */
+ cifs_dbg(FYI, "Adapter Capability 0x%x\t",
+ le32_to_cpu(out_buf->Capability));
+ cifs_dbg(FYI, "Link Speed %lld\n",
+ le64_to_cpu(out_buf->LinkSpeed));
+ } else
+ cifs_dbg(VFS, "error %d on ioctl to get interface list\n", rc);
+
+ return rc;
+}
+#endif /* STATS2 */
+
+static void
+smb3_qfs_tcon(const unsigned int xid, struct cifs_tcon *tcon)
+{
+ int rc;
+ __le16 srch_path = 0; /* Null - open root of share */
+ u8 oplock = SMB2_OPLOCK_LEVEL_NONE;
+ struct cifs_open_parms oparms;
+ struct cifs_fid fid;
+
+ oparms.tcon = tcon;
+ oparms.desired_access = FILE_READ_ATTRIBUTES;
+ oparms.disposition = FILE_OPEN;
+ oparms.create_options = 0;
+ oparms.fid = &fid;
+ oparms.reconnect = false;
+
+ rc = SMB2_open(xid, &oparms, &srch_path, &oplock, NULL, NULL);
+ if (rc)
+ return;
+
+#ifdef CONFIG_CIFS_STATS2
+ SMB3_request_interfaces(xid, tcon);
+#endif /* STATS2 */
+
+ SMB2_QFS_attr(xid, tcon, fid.persistent_fid, fid.volatile_fid,
+ FS_ATTRIBUTE_INFORMATION);
+ SMB2_QFS_attr(xid, tcon, fid.persistent_fid, fid.volatile_fid,
+ FS_DEVICE_INFORMATION);
+ SMB2_QFS_attr(xid, tcon, fid.persistent_fid, fid.volatile_fid,
+ FS_SECTOR_SIZE_INFORMATION); /* SMB3 specific */
+ SMB2_close(xid, tcon, fid.persistent_fid, fid.volatile_fid);
+ return;
+}
+
+static void
+smb2_qfs_tcon(const unsigned int xid, struct cifs_tcon *tcon)
+{
+ int rc;
+ __le16 srch_path = 0; /* Null - open root of share */
+ u8 oplock = SMB2_OPLOCK_LEVEL_NONE;
+ struct cifs_open_parms oparms;
+ struct cifs_fid fid;
+
+ oparms.tcon = tcon;
+ oparms.desired_access = FILE_READ_ATTRIBUTES;
+ oparms.disposition = FILE_OPEN;
+ oparms.create_options = 0;
+ oparms.fid = &fid;
+ oparms.reconnect = false;
+
+ rc = SMB2_open(xid, &oparms, &srch_path, &oplock, NULL, NULL);
+ if (rc)
+ return;
+
+ SMB2_QFS_attr(xid, tcon, fid.persistent_fid, fid.volatile_fid,
+ FS_ATTRIBUTE_INFORMATION);
+ SMB2_QFS_attr(xid, tcon, fid.persistent_fid, fid.volatile_fid,
+ FS_DEVICE_INFORMATION);
+ SMB2_close(xid, tcon, fid.persistent_fid, fid.volatile_fid);
+ return;
+}
+
static int
smb2_is_path_accessible(const unsigned int xid, struct cifs_tcon *tcon,
struct cifs_sb_info *cifs_sb, const char *full_path)
@@ -304,7 +392,19 @@ smb2_dump_share_caps(struct seq_file *m, struct cifs_tcon *tcon)
seq_puts(m, " ASYMMETRIC,");
if (tcon->capabilities == 0)
seq_puts(m, " None");
+ if (tcon->ss_flags & SSINFO_FLAGS_ALIGNED_DEVICE)
+ seq_puts(m, " Aligned,");
+ if (tcon->ss_flags & SSINFO_FLAGS_PARTITION_ALIGNED_ON_DEVICE)
+ seq_puts(m, " Partition Aligned,");
+ if (tcon->ss_flags & SSINFO_FLAGS_NO_SEEK_PENALTY)
+ seq_puts(m, " SSD,");
+ if (tcon->ss_flags & SSINFO_FLAGS_TRIM_ENABLED)
+ seq_puts(m, " TRIM-support,");
+
seq_printf(m, "\tShare Flags: 0x%x", tcon->share_flags);
+ if (tcon->perf_sector_size)
+ seq_printf(m, "\tOptimal sector size: 0x%x",
+ tcon->perf_sector_size);
}
static void
@@ -394,6 +494,157 @@ smb2_close_file(const unsigned int xid, struct cifs_tcon *tcon,
}
static int
+SMB2_request_res_key(const unsigned int xid, struct cifs_tcon *tcon,
+ u64 persistent_fid, u64 volatile_fid,
+ struct copychunk_ioctl *pcchunk)
+{
+ int rc;
+ unsigned int ret_data_len;
+ struct resume_key_req *res_key;
+
+ rc = SMB2_ioctl(xid, tcon, persistent_fid, volatile_fid,
+ FSCTL_SRV_REQUEST_RESUME_KEY, true /* is_fsctl */,
+ NULL, 0 /* no input */,
+ (char **)&res_key, &ret_data_len);
+
+ if (rc) {
+ cifs_dbg(VFS, "refcpy ioctl error %d getting resume key\n", rc);
+ goto req_res_key_exit;
+ }
+ if (ret_data_len < sizeof(struct resume_key_req)) {
+ cifs_dbg(VFS, "Invalid refcopy resume key length\n");
+ rc = -EINVAL;
+ goto req_res_key_exit;
+ }
+ memcpy(pcchunk->SourceKey, res_key->ResumeKey, COPY_CHUNK_RES_KEY_SIZE);
+
+req_res_key_exit:
+ kfree(res_key);
+ return rc;
+}
+
+static int
+smb2_clone_range(const unsigned int xid,
+ struct cifsFileInfo *srcfile,
+ struct cifsFileInfo *trgtfile, u64 src_off,
+ u64 len, u64 dest_off)
+{
+ int rc;
+ unsigned int ret_data_len;
+ struct copychunk_ioctl *pcchunk;
+ struct copychunk_ioctl_rsp *retbuf = NULL;
+ struct cifs_tcon *tcon;
+ int chunks_copied = 0;
+ bool chunk_sizes_updated = false;
+
+ pcchunk = kmalloc(sizeof(struct copychunk_ioctl), GFP_KERNEL);
+
+ if (pcchunk == NULL)
+ return -ENOMEM;
+
+ cifs_dbg(FYI, "in smb2_clone_range - about to call request res key\n");
+ /* Request a key from the server to identify the source of the copy */
+ rc = SMB2_request_res_key(xid, tlink_tcon(srcfile->tlink),
+ srcfile->fid.persistent_fid,
+ srcfile->fid.volatile_fid, pcchunk);
+
+ /* Note: request_res_key sets res_key null only if rc !=0 */
+ if (rc)
+ goto cchunk_out;
+
+ /* For now array only one chunk long, will make more flexible later */
+ pcchunk->ChunkCount = __constant_cpu_to_le32(1);
+ pcchunk->Reserved = 0;
+ pcchunk->Reserved2 = 0;
+
+ tcon = tlink_tcon(trgtfile->tlink);
+
+ while (len > 0) {
+ pcchunk->SourceOffset = cpu_to_le64(src_off);
+ pcchunk->TargetOffset = cpu_to_le64(dest_off);
+ pcchunk->Length =
+ cpu_to_le32(min_t(u32, len, tcon->max_bytes_chunk));
+
+ /* Request server copy to target from src identified by key */
+ rc = SMB2_ioctl(xid, tcon, trgtfile->fid.persistent_fid,
+ trgtfile->fid.volatile_fid, FSCTL_SRV_COPYCHUNK_WRITE,
+ true /* is_fsctl */, (char *)pcchunk,
+ sizeof(struct copychunk_ioctl), (char **)&retbuf,
+ &ret_data_len);
+ if (rc == 0) {
+ if (ret_data_len !=
+ sizeof(struct copychunk_ioctl_rsp)) {
+ cifs_dbg(VFS, "invalid cchunk response size\n");
+ rc = -EIO;
+ goto cchunk_out;
+ }
+ if (retbuf->TotalBytesWritten == 0) {
+ cifs_dbg(FYI, "no bytes copied\n");
+ rc = -EIO;
+ goto cchunk_out;
+ }
+ /*
+ * Check if server claimed to write more than we asked
+ */
+ if (le32_to_cpu(retbuf->TotalBytesWritten) >
+ le32_to_cpu(pcchunk->Length)) {
+ cifs_dbg(VFS, "invalid copy chunk response\n");
+ rc = -EIO;
+ goto cchunk_out;
+ }
+ if (le32_to_cpu(retbuf->ChunksWritten) != 1) {
+ cifs_dbg(VFS, "invalid num chunks written\n");
+ rc = -EIO;
+ goto cchunk_out;
+ }
+ chunks_copied++;
+
+ src_off += le32_to_cpu(retbuf->TotalBytesWritten);
+ dest_off += le32_to_cpu(retbuf->TotalBytesWritten);
+ len -= le32_to_cpu(retbuf->TotalBytesWritten);
+
+ cifs_dbg(FYI, "Chunks %d PartialChunk %d Total %d\n",
+ le32_to_cpu(retbuf->ChunksWritten),
+ le32_to_cpu(retbuf->ChunkBytesWritten),
+ le32_to_cpu(retbuf->TotalBytesWritten));
+ } else if (rc == -EINVAL) {
+ if (ret_data_len != sizeof(struct copychunk_ioctl_rsp))
+ goto cchunk_out;
+
+ cifs_dbg(FYI, "MaxChunks %d BytesChunk %d MaxCopy %d\n",
+ le32_to_cpu(retbuf->ChunksWritten),
+ le32_to_cpu(retbuf->ChunkBytesWritten),
+ le32_to_cpu(retbuf->TotalBytesWritten));
+
+ /*
+ * Check if this is the first request using these sizes,
+ * (ie check if copy succeed once with original sizes
+ * and check if the server gave us different sizes after
+ * we already updated max sizes on previous request).
+ * if not then why is the server returning an error now
+ */
+ if ((chunks_copied != 0) || chunk_sizes_updated)
+ goto cchunk_out;
+
+ /* Check that server is not asking us to grow size */
+ if (le32_to_cpu(retbuf->ChunkBytesWritten) <
+ tcon->max_bytes_chunk)
+ tcon->max_bytes_chunk =
+ le32_to_cpu(retbuf->ChunkBytesWritten);
+ else
+ goto cchunk_out; /* server gave us bogus size */
+
+ /* No need to change MaxChunks since already set to 1 */
+ chunk_sizes_updated = true;
+ }
+ }
+
+cchunk_out:
+ kfree(pcchunk);
+ return rc;
+}
+
+static int
smb2_flush_file(const unsigned int xid, struct cifs_tcon *tcon,
struct cifs_fid *fid)
{
@@ -446,6 +697,14 @@ smb2_set_file_size(const unsigned int xid, struct cifs_tcon *tcon,
}
static int
+smb2_set_compression(const unsigned int xid, struct cifs_tcon *tcon,
+ struct cifsFileInfo *cfile)
+{
+ return SMB2_set_compression(xid, tcon, cfile->fid.persistent_fid,
+ cfile->fid.volatile_fid);
+}
+
+static int
smb2_query_dir_first(const unsigned int xid, struct cifs_tcon *tcon,
const char *path, struct cifs_sb_info *cifs_sb,
struct cifs_fid *fid, __u16 search_flags,
@@ -865,6 +1124,7 @@ struct smb_version_operations smb20_operations = {
.logoff = SMB2_logoff,
.tree_connect = SMB2_tcon,
.tree_disconnect = SMB2_tdis,
+ .qfs_tcon = smb2_qfs_tcon,
.is_path_accessible = smb2_is_path_accessible,
.can_echo = smb2_can_echo,
.echo = SMB2_echo,
@@ -874,6 +1134,7 @@ struct smb_version_operations smb20_operations = {
.set_path_size = smb2_set_path_size,
.set_file_size = smb2_set_file_size,
.set_file_info = smb2_set_file_info,
+ .set_compression = smb2_set_compression,
.mkdir = smb2_mkdir,
.mkdir_setinfo = smb2_mkdir_setinfo,
.rmdir = smb2_rmdir,
@@ -907,6 +1168,7 @@ struct smb_version_operations smb20_operations = {
.set_oplock_level = smb2_set_oplock_level,
.create_lease_buf = smb2_create_lease_buf,
.parse_lease_buf = smb2_parse_lease_buf,
+ .clone_range = smb2_clone_range,
};
struct smb_version_operations smb21_operations = {
@@ -936,6 +1198,7 @@ struct smb_version_operations smb21_operations = {
.logoff = SMB2_logoff,
.tree_connect = SMB2_tcon,
.tree_disconnect = SMB2_tdis,
+ .qfs_tcon = smb2_qfs_tcon,
.is_path_accessible = smb2_is_path_accessible,
.can_echo = smb2_can_echo,
.echo = SMB2_echo,
@@ -945,6 +1208,7 @@ struct smb_version_operations smb21_operations = {
.set_path_size = smb2_set_path_size,
.set_file_size = smb2_set_file_size,
.set_file_info = smb2_set_file_info,
+ .set_compression = smb2_set_compression,
.mkdir = smb2_mkdir,
.mkdir_setinfo = smb2_mkdir_setinfo,
.rmdir = smb2_rmdir,
@@ -978,6 +1242,7 @@ struct smb_version_operations smb21_operations = {
.set_oplock_level = smb21_set_oplock_level,
.create_lease_buf = smb2_create_lease_buf,
.parse_lease_buf = smb2_parse_lease_buf,
+ .clone_range = smb2_clone_range,
};
struct smb_version_operations smb30_operations = {
@@ -1008,6 +1273,7 @@ struct smb_version_operations smb30_operations = {
.logoff = SMB2_logoff,
.tree_connect = SMB2_tcon,
.tree_disconnect = SMB2_tdis,
+ .qfs_tcon = smb3_qfs_tcon,
.is_path_accessible = smb2_is_path_accessible,
.can_echo = smb2_can_echo,
.echo = SMB2_echo,
@@ -1017,6 +1283,7 @@ struct smb_version_operations smb30_operations = {
.set_path_size = smb2_set_path_size,
.set_file_size = smb2_set_file_size,
.set_file_info = smb2_set_file_info,
+ .set_compression = smb2_set_compression,
.mkdir = smb2_mkdir,
.mkdir_setinfo = smb2_mkdir_setinfo,
.rmdir = smb2_rmdir,
@@ -1051,6 +1318,8 @@ struct smb_version_operations smb30_operations = {
.set_oplock_level = smb3_set_oplock_level,
.create_lease_buf = smb3_create_lease_buf,
.parse_lease_buf = smb3_parse_lease_buf,
+ .clone_range = smb2_clone_range,
+ .validate_negotiate = smb3_validate_negotiate,
};
struct smb_version_values smb20_values = {
diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c
index edccb5252462..2013234b73ad 100644
--- a/fs/cifs/smb2pdu.c
+++ b/fs/cifs/smb2pdu.c
@@ -454,6 +454,81 @@ neg_exit:
return rc;
}
+int smb3_validate_negotiate(const unsigned int xid, struct cifs_tcon *tcon)
+{
+ int rc = 0;
+ struct validate_negotiate_info_req vneg_inbuf;
+ struct validate_negotiate_info_rsp *pneg_rsp;
+ u32 rsplen;
+
+ cifs_dbg(FYI, "validate negotiate\n");
+
+ /*
+ * validation ioctl must be signed, so no point sending this if we
+ * can not sign it. We could eventually change this to selectively
+ * sign just this, the first and only signed request on a connection.
+ * This is good enough for now since a user who wants better security
+ * would also enable signing on the mount. Having validation of
+ * negotiate info for signed connections helps reduce attack vectors
+ */
+ if (tcon->ses->server->sign == false)
+ return 0; /* validation requires signing */
+
+ vneg_inbuf.Capabilities =
+ cpu_to_le32(tcon->ses->server->vals->req_capabilities);
+ memcpy(vneg_inbuf.Guid, cifs_client_guid, SMB2_CLIENT_GUID_SIZE);
+
+ if (tcon->ses->sign)
+ vneg_inbuf.SecurityMode =
+ cpu_to_le16(SMB2_NEGOTIATE_SIGNING_REQUIRED);
+ else if (global_secflags & CIFSSEC_MAY_SIGN)
+ vneg_inbuf.SecurityMode =
+ cpu_to_le16(SMB2_NEGOTIATE_SIGNING_ENABLED);
+ else
+ vneg_inbuf.SecurityMode = 0;
+
+ vneg_inbuf.DialectCount = cpu_to_le16(1);
+ vneg_inbuf.Dialects[0] =
+ cpu_to_le16(tcon->ses->server->vals->protocol_id);
+
+ rc = SMB2_ioctl(xid, tcon, NO_FILE_ID, NO_FILE_ID,
+ FSCTL_VALIDATE_NEGOTIATE_INFO, true /* is_fsctl */,
+ (char *)&vneg_inbuf, sizeof(struct validate_negotiate_info_req),
+ (char **)&pneg_rsp, &rsplen);
+
+ if (rc != 0) {
+ cifs_dbg(VFS, "validate protocol negotiate failed: %d\n", rc);
+ return -EIO;
+ }
+
+ if (rsplen != sizeof(struct validate_negotiate_info_rsp)) {
+ cifs_dbg(VFS, "invalid size of protocol negotiate response\n");
+ return -EIO;
+ }
+
+ /* check validate negotiate info response matches what we got earlier */
+ if (pneg_rsp->Dialect !=
+ cpu_to_le16(tcon->ses->server->vals->protocol_id))
+ goto vneg_out;
+
+ if (pneg_rsp->SecurityMode != cpu_to_le16(tcon->ses->server->sec_mode))
+ goto vneg_out;
+
+ /* do not validate server guid because not saved at negprot time yet */
+
+ if ((le32_to_cpu(pneg_rsp->Capabilities) | SMB2_NT_FIND |
+ SMB2_LARGE_FILES) != tcon->ses->server->capabilities)
+ goto vneg_out;
+
+ /* validate negotiate successful */
+ cifs_dbg(FYI, "validate negotiate info successful\n");
+ return 0;
+
+vneg_out:
+ cifs_dbg(VFS, "protocol revalidation - security settings mismatch\n");
+ return -EIO;
+}
+
int
SMB2_sess_setup(const unsigned int xid, struct cifs_ses *ses,
const struct nls_table *nls_cp)
@@ -630,6 +705,8 @@ ssetup_ntlmssp_authenticate:
goto ssetup_exit;
ses->session_flags = le16_to_cpu(rsp->SessionFlags);
+ if (ses->session_flags & SMB2_SESSION_FLAG_ENCRYPT_DATA)
+ cifs_dbg(VFS, "SMB3 encryption not supported yet\n");
ssetup_exit:
free_rsp_buf(resp_buftype, rsp);
@@ -717,6 +794,14 @@ static inline void cifs_stats_fail_inc(struct cifs_tcon *tcon, uint16_t code)
#define MAX_SHARENAME_LENGTH (255 /* server */ + 80 /* share */ + 1 /* NULL */)
+/* These are similar values to what Windows uses */
+static inline void init_copy_chunk_defaults(struct cifs_tcon *tcon)
+{
+ tcon->max_chunks = 256;
+ tcon->max_bytes_chunk = 1048576;
+ tcon->max_bytes_copy = 16777216;
+}
+
int
SMB2_tcon(const unsigned int xid, struct cifs_ses *ses, const char *tree,
struct cifs_tcon *tcon, const struct nls_table *cp)
@@ -818,7 +903,9 @@ SMB2_tcon(const unsigned int xid, struct cifs_ses *ses, const char *tree,
if ((rsp->Capabilities & SMB2_SHARE_CAP_DFS) &&
((tcon->share_flags & SHI1005_FLAGS_DFS) == 0))
cifs_dbg(VFS, "DFS capability contradicts DFS flag\n");
-
+ init_copy_chunk_defaults(tcon);
+ if (tcon->ses->server->ops->validate_negotiate)
+ rc = tcon->ses->server->ops->validate_negotiate(xid, tcon);
tcon_exit:
free_rsp_buf(resp_buftype, rsp);
kfree(unc_path);
@@ -1137,6 +1224,7 @@ SMB2_ioctl(const unsigned int xid, struct cifs_tcon *tcon, u64 persistent_fid,
cifs_dbg(FYI, "SMB2 IOCTL\n");
+ *out_data = NULL;
/* zero out returned data len, in case of error */
if (plen)
*plen = 0;
@@ -1182,19 +1270,38 @@ SMB2_ioctl(const unsigned int xid, struct cifs_tcon *tcon, u64 persistent_fid,
req->Flags = 0;
iov[0].iov_base = (char *)req;
- /* 4 for rfc1002 length field */
- iov[0].iov_len = get_rfc1002_length(req) + 4;
- if (indatalen)
- inc_rfc1001_len(req, indatalen);
+ /*
+ * If no input data, the size of ioctl struct in
+ * protocol spec still includes a 1 byte data buffer,
+ * but if input data passed to ioctl, we do not
+ * want to double count this, so we do not send
+ * the dummy one byte of data in iovec[0] if sending
+ * input data (in iovec[1]). We also must add 4 bytes
+ * in first iovec to allow for rfc1002 length field.
+ */
+
+ if (indatalen) {
+ iov[0].iov_len = get_rfc1002_length(req) + 4 - 1;
+ inc_rfc1001_len(req, indatalen - 1);
+ } else
+ iov[0].iov_len = get_rfc1002_length(req) + 4;
+
rc = SendReceive2(xid, ses, iov, num_iovecs, &resp_buftype, 0);
rsp = (struct smb2_ioctl_rsp *)iov[0].iov_base;
- if (rc != 0) {
+ if ((rc != 0) && (rc != -EINVAL)) {
if (tcon)
cifs_stats_fail_inc(tcon, SMB2_IOCTL_HE);
goto ioctl_exit;
+ } else if (rc == -EINVAL) {
+ if ((opcode != FSCTL_SRV_COPYCHUNK_WRITE) &&
+ (opcode != FSCTL_SRV_COPYCHUNK)) {
+ if (tcon)
+ cifs_stats_fail_inc(tcon, SMB2_IOCTL_HE);
+ goto ioctl_exit;
+ }
}
/* check if caller wants to look at return data or just return rc */
@@ -1234,6 +1341,33 @@ ioctl_exit:
return rc;
}
+/*
+ * Individual callers to ioctl worker function follow
+ */
+
+int
+SMB2_set_compression(const unsigned int xid, struct cifs_tcon *tcon,
+ u64 persistent_fid, u64 volatile_fid)
+{
+ int rc;
+ char *res_key = NULL;
+ struct compress_ioctl fsctl_input;
+ char *ret_data = NULL;
+
+ fsctl_input.CompressionState =
+ __constant_cpu_to_le16(COMPRESSION_FORMAT_DEFAULT);
+
+ rc = SMB2_ioctl(xid, tcon, persistent_fid, volatile_fid,
+ FSCTL_SET_COMPRESSION, true /* is_fsctl */,
+ (char *)&fsctl_input /* data input */,
+ 2 /* in data len */, &ret_data /* out data */, NULL);
+
+ cifs_dbg(FYI, "set compression rc %d\n", rc);
+ kfree(res_key);
+
+ return rc;
+}
+
int
SMB2_close(const unsigned int xid, struct cifs_tcon *tcon,
u64 persistent_fid, u64 volatile_fid)
@@ -2104,11 +2238,9 @@ send_set_info(const unsigned int xid, struct cifs_tcon *tcon,
rc = SendReceive2(xid, ses, iov, num, &resp_buftype, 0);
rsp = (struct smb2_set_info_rsp *)iov[0].iov_base;
- if (rc != 0) {
+ if (rc != 0)
cifs_stats_fail_inc(tcon, SMB2_SET_INFO_HE);
- goto out;
- }
-out:
+
free_rsp_buf(resp_buftype, rsp);
kfree(iov);
return rc;
@@ -2299,7 +2431,7 @@ SMB2_QFS_info(const unsigned int xid, struct cifs_tcon *tcon,
rc = SendReceive2(xid, ses, &iov, 1, &resp_buftype, 0);
if (rc) {
cifs_stats_fail_inc(tcon, SMB2_QUERY_INFO_HE);
- goto qinf_exit;
+ goto qfsinf_exit;
}
rsp = (struct smb2_query_info_rsp *)iov.iov_base;
@@ -2311,7 +2443,70 @@ SMB2_QFS_info(const unsigned int xid, struct cifs_tcon *tcon,
if (!rc)
copy_fs_info_to_kstatfs(info, fsdata);
-qinf_exit:
+qfsinf_exit:
+ free_rsp_buf(resp_buftype, iov.iov_base);
+ return rc;
+}
+
+int
+SMB2_QFS_attr(const unsigned int xid, struct cifs_tcon *tcon,
+ u64 persistent_fid, u64 volatile_fid, int level)
+{
+ struct smb2_query_info_rsp *rsp = NULL;
+ struct kvec iov;
+ int rc = 0;
+ int resp_buftype, max_len, min_len;
+ struct cifs_ses *ses = tcon->ses;
+ unsigned int rsp_len, offset;
+
+ if (level == FS_DEVICE_INFORMATION) {
+ max_len = sizeof(FILE_SYSTEM_DEVICE_INFO);
+ min_len = sizeof(FILE_SYSTEM_DEVICE_INFO);
+ } else if (level == FS_ATTRIBUTE_INFORMATION) {
+ max_len = sizeof(FILE_SYSTEM_ATTRIBUTE_INFO);
+ min_len = MIN_FS_ATTR_INFO_SIZE;
+ } else if (level == FS_SECTOR_SIZE_INFORMATION) {
+ max_len = sizeof(struct smb3_fs_ss_info);
+ min_len = sizeof(struct smb3_fs_ss_info);
+ } else {
+ cifs_dbg(FYI, "Invalid qfsinfo level %d\n", level);
+ return -EINVAL;
+ }
+
+ rc = build_qfs_info_req(&iov, tcon, level, max_len,
+ persistent_fid, volatile_fid);
+ if (rc)
+ return rc;
+
+ rc = SendReceive2(xid, ses, &iov, 1, &resp_buftype, 0);
+ if (rc) {
+ cifs_stats_fail_inc(tcon, SMB2_QUERY_INFO_HE);
+ goto qfsattr_exit;
+ }
+ rsp = (struct smb2_query_info_rsp *)iov.iov_base;
+
+ rsp_len = le32_to_cpu(rsp->OutputBufferLength);
+ offset = le16_to_cpu(rsp->OutputBufferOffset);
+ rc = validate_buf(offset, rsp_len, &rsp->hdr, min_len);
+ if (rc)
+ goto qfsattr_exit;
+
+ if (level == FS_ATTRIBUTE_INFORMATION)
+ memcpy(&tcon->fsAttrInfo, 4 /* RFC1001 len */ + offset
+ + (char *)&rsp->hdr, min_t(unsigned int,
+ rsp_len, max_len));
+ else if (level == FS_DEVICE_INFORMATION)
+ memcpy(&tcon->fsDevInfo, 4 /* RFC1001 len */ + offset
+ + (char *)&rsp->hdr, sizeof(FILE_SYSTEM_DEVICE_INFO));
+ else if (level == FS_SECTOR_SIZE_INFORMATION) {
+ struct smb3_fs_ss_info *ss_info = (struct smb3_fs_ss_info *)
+ (4 /* RFC1001 len */ + offset + (char *)&rsp->hdr);
+ tcon->ss_flags = le32_to_cpu(ss_info->Flags);
+ tcon->perf_sector_size =
+ le32_to_cpu(ss_info->PhysicalBytesPerSectorForPerf);
+ }
+
+qfsattr_exit:
free_rsp_buf(resp_buftype, iov.iov_base);
return rc;
}
diff --git a/fs/cifs/smb2pdu.h b/fs/cifs/smb2pdu.h
index b83d0118a757..2022c542ea3a 100644
--- a/fs/cifs/smb2pdu.h
+++ b/fs/cifs/smb2pdu.h
@@ -122,6 +122,23 @@ struct smb2_pdu {
__le16 StructureSize2; /* size of wct area (varies, request specific) */
} __packed;
+struct smb2_transform_hdr {
+ __be32 smb2_buf_length; /* big endian on wire */
+ /* length is only two or three bytes - with
+ one or two byte type preceding it that MBZ */
+ __u8 ProtocolId[4]; /* 0xFD 'S' 'M' 'B' */
+ __u8 Signature[16];
+ __u8 Nonce[11];
+ __u8 Reserved[5];
+ __le32 OriginalMessageSize;
+ __u16 Reserved1;
+ __le16 EncryptionAlgorithm;
+ __u64 SessionId;
+} __packed;
+
+/* Encryption Algorithms */
+#define SMB2_ENCRYPTION_AES128_CCM __constant_cpu_to_le16(0x0001)
+
/*
* SMB2 flag definitions
*/
@@ -237,6 +254,7 @@ struct smb2_sess_setup_req {
/* Currently defined SessionFlags */
#define SMB2_SESSION_FLAG_IS_GUEST 0x0001
#define SMB2_SESSION_FLAG_IS_NULL 0x0002
+#define SMB2_SESSION_FLAG_ENCRYPT_DATA 0x0004
struct smb2_sess_setup_rsp {
struct smb2_hdr hdr;
__le16 StructureSize; /* Must be 9 */
@@ -534,9 +552,16 @@ struct create_durable {
} Data;
} __packed;
+#define COPY_CHUNK_RES_KEY_SIZE 24
+struct resume_key_req {
+ char ResumeKey[COPY_CHUNK_RES_KEY_SIZE];
+ __le32 ContextLength; /* MBZ */
+ char Context[0]; /* ignored, Windows sets to 4 bytes of zero */
+} __packed;
+
/* this goes in the ioctl buffer when doing a copychunk request */
struct copychunk_ioctl {
- char SourceKey[24];
+ char SourceKey[COPY_CHUNK_RES_KEY_SIZE];
__le32 ChunkCount; /* we are only sending 1 */
__le32 Reserved;
/* array will only be one chunk long for us */
@@ -546,13 +571,25 @@ struct copychunk_ioctl {
__u32 Reserved2;
} __packed;
-/* Response and Request are the same format */
-struct validate_negotiate_info {
+struct copychunk_ioctl_rsp {
+ __le32 ChunksWritten;
+ __le32 ChunkBytesWritten;
+ __le32 TotalBytesWritten;
+} __packed;
+
+struct validate_negotiate_info_req {
__le32 Capabilities;
__u8 Guid[SMB2_CLIENT_GUID_SIZE];
__le16 SecurityMode;
__le16 DialectCount;
- __le16 Dialect[1];
+ __le16 Dialects[1]; /* dialect (someday maybe list) client asked for */
+} __packed;
+
+struct validate_negotiate_info_rsp {
+ __le32 Capabilities;
+ __u8 Guid[SMB2_CLIENT_GUID_SIZE];
+ __le16 SecurityMode;
+ __le16 Dialect; /* Dialect in use for the connection */
} __packed;
#define RSS_CAPABLE 0x00000001
@@ -569,6 +606,10 @@ struct network_interface_info_ioctl_rsp {
#define NO_FILE_ID 0xFFFFFFFFFFFFFFFFULL /* general ioctls to srv not to file */
+struct compress_ioctl {
+ __le16 CompressionState; /* See cifspdu.h for possible flag values */
+} __packed;
+
struct smb2_ioctl_req {
struct smb2_hdr hdr;
__le16 StructureSize; /* Must be 57 */
@@ -584,7 +625,7 @@ struct smb2_ioctl_req {
__le32 MaxOutputResponse;
__le32 Flags;
__u32 Reserved2;
- char Buffer[0];
+ __u8 Buffer[0];
} __packed;
struct smb2_ioctl_rsp {
@@ -870,14 +911,16 @@ struct smb2_lease_ack {
/* File System Information Classes */
#define FS_VOLUME_INFORMATION 1 /* Query */
-#define FS_LABEL_INFORMATION 2 /* Set */
+#define FS_LABEL_INFORMATION 2 /* Local only */
#define FS_SIZE_INFORMATION 3 /* Query */
#define FS_DEVICE_INFORMATION 4 /* Query */
#define FS_ATTRIBUTE_INFORMATION 5 /* Query */
#define FS_CONTROL_INFORMATION 6 /* Query, Set */
#define FS_FULL_SIZE_INFORMATION 7 /* Query */
#define FS_OBJECT_ID_INFORMATION 8 /* Query, Set */
-#define FS_DRIVER_PATH_INFORMATION 9 /* Query */
+#define FS_DRIVER_PATH_INFORMATION 9 /* Local only */
+#define FS_VOLUME_FLAGS_INFORMATION 10 /* Local only */
+#define FS_SECTOR_SIZE_INFORMATION 11 /* SMB3 or later. Query */
struct smb2_fs_full_size_info {
__le64 TotalAllocationUnits;
@@ -887,6 +930,22 @@ struct smb2_fs_full_size_info {
__le32 BytesPerSector;
} __packed;
+#define SSINFO_FLAGS_ALIGNED_DEVICE 0x00000001
+#define SSINFO_FLAGS_PARTITION_ALIGNED_ON_DEVICE 0x00000002
+#define SSINFO_FLAGS_NO_SEEK_PENALTY 0x00000004
+#define SSINFO_FLAGS_TRIM_ENABLED 0x00000008
+
+/* sector size info struct */
+struct smb3_fs_ss_info {
+ __le32 LogicalBytesPerSector;
+ __le32 PhysicalBytesPerSectorForAtomicity;
+ __le32 PhysicalBytesPerSectorForPerf;
+ __le32 FileSystemEffectivePhysicalBytesPerSectorForAtomicity;
+ __le32 Flags;
+ __le32 ByteOffsetForSectorAlignment;
+ __le32 ByteOffsetForPartitionAlignment;
+} __packed;
+
/* partial list of QUERY INFO levels */
#define FILE_DIRECTORY_INFORMATION 1
#define FILE_FULL_DIRECTORY_INFORMATION 2
diff --git a/fs/cifs/smb2proto.h b/fs/cifs/smb2proto.h
index e3fb4801ee96..93adc64666f3 100644
--- a/fs/cifs/smb2proto.h
+++ b/fs/cifs/smb2proto.h
@@ -61,7 +61,7 @@ extern void move_smb2_info_to_cifs(FILE_ALL_INFO *dst,
extern int smb2_query_path_info(const unsigned int xid, struct cifs_tcon *tcon,
struct cifs_sb_info *cifs_sb,
const char *full_path, FILE_ALL_INFO *data,
- bool *adjust_tz);
+ bool *adjust_tz, bool *symlink);
extern int smb2_set_path_size(const unsigned int xid, struct cifs_tcon *tcon,
const char *full_path, __u64 size,
struct cifs_sb_info *cifs_sb, bool set_alloc);
@@ -142,12 +142,16 @@ extern int SMB2_set_eof(const unsigned int xid, struct cifs_tcon *tcon,
extern int SMB2_set_info(const unsigned int xid, struct cifs_tcon *tcon,
u64 persistent_fid, u64 volatile_fid,
FILE_BASIC_INFO *buf);
+extern int SMB2_set_compression(const unsigned int xid, struct cifs_tcon *tcon,
+ u64 persistent_fid, u64 volatile_fid);
extern int SMB2_oplock_break(const unsigned int xid, struct cifs_tcon *tcon,
const u64 persistent_fid, const u64 volatile_fid,
const __u8 oplock_level);
extern int SMB2_QFS_info(const unsigned int xid, struct cifs_tcon *tcon,
u64 persistent_file_id, u64 volatile_file_id,
struct kstatfs *FSData);
+extern int SMB2_QFS_attr(const unsigned int xid, struct cifs_tcon *tcon,
+ u64 persistent_file_id, u64 volatile_file_id, int lvl);
extern int SMB2_lock(const unsigned int xid, struct cifs_tcon *tcon,
const __u64 persist_fid, const __u64 volatile_fid,
const __u32 pid, const __u64 length, const __u64 offset,
@@ -158,5 +162,6 @@ extern int smb2_lockv(const unsigned int xid, struct cifs_tcon *tcon,
struct smb2_lock_element *buf);
extern int SMB2_lease_break(const unsigned int xid, struct cifs_tcon *tcon,
__u8 *lease_key, const __le32 lease_state);
+extern int smb3_validate_negotiate(const unsigned int, struct cifs_tcon *);
#endif /* _SMB2PROTO_H */
diff --git a/fs/cifs/smb2transport.c b/fs/cifs/smb2transport.c
index 340abca3aa52..59c748ce872f 100644
--- a/fs/cifs/smb2transport.c
+++ b/fs/cifs/smb2transport.c
@@ -466,7 +466,7 @@ smb2_verify_signature(struct smb_rqst *rqst, struct TCP_Server_Info *server)
static inline void
smb2_seq_num_into_buf(struct TCP_Server_Info *server, struct smb2_hdr *hdr)
{
- hdr->MessageId = get_next_mid(server);
+ hdr->MessageId = get_next_mid64(server);
}
static struct mid_q_entry *
@@ -516,13 +516,19 @@ smb2_get_mid_entry(struct cifs_ses *ses, struct smb2_hdr *buf,
return -EAGAIN;
}
- if (ses->status != CifsGood) {
- /* check if SMB2 session is bad because we are setting it up */
+ if (ses->status == CifsNew) {
if ((buf->Command != SMB2_SESSION_SETUP) &&
(buf->Command != SMB2_NEGOTIATE))
return -EAGAIN;
/* else ok - we are setting up session */
}
+
+ if (ses->status == CifsExiting) {
+ if (buf->Command != SMB2_LOGOFF)
+ return -EAGAIN;
+ /* else ok - we are shutting down the session */
+ }
+
*mid = smb2_mid_entry_alloc(buf, ses->server);
if (*mid == NULL)
return -ENOMEM;
diff --git a/fs/cifs/smbfsctl.h b/fs/cifs/smbfsctl.h
index a4b2391fe66e..0e538b5c9622 100644
--- a/fs/cifs/smbfsctl.h
+++ b/fs/cifs/smbfsctl.h
@@ -90,7 +90,7 @@
#define FSCTL_LMR_REQUEST_RESILIENCY 0x001401D4 /* BB add struct */
#define FSCTL_LMR_GET_LINK_TRACK_INF 0x001400E8 /* BB add struct */
#define FSCTL_LMR_SET_LINK_TRACK_INF 0x001400EC /* BB add struct */
-#define FSCTL_VALIDATE_NEGOTIATE_INFO 0x00140204 /* BB add struct */
+#define FSCTL_VALIDATE_NEGOTIATE_INFO 0x00140204
/* Perform server-side data movement */
#define FSCTL_SRV_COPYCHUNK 0x001440F2
#define FSCTL_SRV_COPYCHUNK_WRITE 0x001480F2
diff --git a/fs/cifs/transport.c b/fs/cifs/transport.c
index 800b938e4061..b37570952846 100644
--- a/fs/cifs/transport.c
+++ b/fs/cifs/transport.c
@@ -58,7 +58,7 @@ AllocMidQEntry(const struct smb_hdr *smb_buffer, struct TCP_Server_Info *server)
return temp;
else {
memset(temp, 0, sizeof(struct mid_q_entry));
- temp->mid = smb_buffer->Mid; /* always LE */
+ temp->mid = get_mid(smb_buffer);
temp->pid = current->pid;
temp->command = cpu_to_le16(smb_buffer->Command);
cifs_dbg(FYI, "For smb_command %d\n", smb_buffer->Command);
@@ -431,13 +431,20 @@ static int allocate_mid(struct cifs_ses *ses, struct smb_hdr *in_buf,
return -EAGAIN;
}
- if (ses->status != CifsGood) {
- /* check if SMB session is bad because we are setting it up */
+ if (ses->status == CifsNew) {
if ((in_buf->Command != SMB_COM_SESSION_SETUP_ANDX) &&
(in_buf->Command != SMB_COM_NEGOTIATE))
return -EAGAIN;
/* else ok - we are setting up session */
}
+
+ if (ses->status == CifsExiting) {
+ /* check if SMB session is bad because we are setting it up */
+ if (in_buf->Command != SMB_COM_LOGOFF_ANDX)
+ return -EAGAIN;
+ /* else ok - we are shutting down session */
+ }
+
*ppmidQ = AllocMidQEntry(in_buf, ses->server);
if (*ppmidQ == NULL)
return -ENOMEM;
diff --git a/fs/coda/coda_linux.h b/fs/coda/coda_linux.h
index cc0ea9fe5ecf..e7550cb9fb74 100644
--- a/fs/coda/coda_linux.h
+++ b/fs/coda/coda_linux.h
@@ -40,7 +40,7 @@ extern const struct file_operations coda_ioctl_operations;
int coda_open(struct inode *i, struct file *f);
int coda_release(struct inode *i, struct file *f);
int coda_permission(struct inode *inode, int mask);
-int coda_revalidate_inode(struct dentry *);
+int coda_revalidate_inode(struct inode *);
int coda_getattr(struct vfsmount *, struct dentry *, struct kstat *);
int coda_setattr(struct dentry *, struct iattr *);
diff --git a/fs/coda/dir.c b/fs/coda/dir.c
index 190effc6a6fa..5efbb5ee0adc 100644
--- a/fs/coda/dir.c
+++ b/fs/coda/dir.c
@@ -387,9 +387,6 @@ static int coda_readdir(struct file *coda_file, struct dir_context *ctx)
BUG_ON(!cfi || cfi->cfi_magic != CODA_MAGIC);
host_file = cfi->cfi_container;
- if (!host_file->f_op)
- return -ENOTDIR;
-
if (host_file->f_op->iterate) {
struct inode *host_inode = file_inode(host_file);
mutex_lock(&host_inode->i_mutex);
@@ -566,13 +563,12 @@ static int coda_dentry_delete(const struct dentry * dentry)
* cache manager Venus issues a downcall to the kernel when this
* happens
*/
-int coda_revalidate_inode(struct dentry *dentry)
+int coda_revalidate_inode(struct inode *inode)
{
struct coda_vattr attr;
int error;
int old_mode;
ino_t old_ino;
- struct inode *inode = dentry->d_inode;
struct coda_inode_info *cii = ITOC(inode);
if (!cii->c_flags)
diff --git a/fs/coda/file.c b/fs/coda/file.c
index 380b798f8443..9e83b7790212 100644
--- a/fs/coda/file.c
+++ b/fs/coda/file.c
@@ -36,7 +36,7 @@ coda_file_read(struct file *coda_file, char __user *buf, size_t count, loff_t *p
BUG_ON(!cfi || cfi->cfi_magic != CODA_MAGIC);
host_file = cfi->cfi_container;
- if (!host_file->f_op || !host_file->f_op->read)
+ if (!host_file->f_op->read)
return -EINVAL;
return host_file->f_op->read(host_file, buf, count, ppos);
@@ -75,7 +75,7 @@ coda_file_write(struct file *coda_file, const char __user *buf, size_t count, lo
BUG_ON(!cfi || cfi->cfi_magic != CODA_MAGIC);
host_file = cfi->cfi_container;
- if (!host_file->f_op || !host_file->f_op->write)
+ if (!host_file->f_op->write)
return -EINVAL;
host_inode = file_inode(host_file);
@@ -105,7 +105,7 @@ coda_file_mmap(struct file *coda_file, struct vm_area_struct *vma)
BUG_ON(!cfi || cfi->cfi_magic != CODA_MAGIC);
host_file = cfi->cfi_container;
- if (!host_file->f_op || !host_file->f_op->mmap)
+ if (!host_file->f_op->mmap)
return -ENODEV;
coda_inode = file_inode(coda_file);
diff --git a/fs/coda/inode.c b/fs/coda/inode.c
index 4dcc0d81a7aa..506de34a4ef3 100644
--- a/fs/coda/inode.c
+++ b/fs/coda/inode.c
@@ -257,7 +257,7 @@ static void coda_evict_inode(struct inode *inode)
int coda_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat)
{
- int err = coda_revalidate_inode(dentry);
+ int err = coda_revalidate_inode(dentry->d_inode);
if (!err)
generic_fillattr(dentry->d_inode, stat);
return err;
diff --git a/fs/compat_ioctl.c b/fs/compat_ioctl.c
index 5d19acfa7c6c..dc52e13d58e0 100644
--- a/fs/compat_ioctl.c
+++ b/fs/compat_ioctl.c
@@ -1583,13 +1583,13 @@ asmlinkage long compat_sys_ioctl(unsigned int fd, unsigned int cmd,
/*FALL THROUGH*/
default:
- if (f.file->f_op && f.file->f_op->compat_ioctl) {
+ if (f.file->f_op->compat_ioctl) {
error = f.file->f_op->compat_ioctl(f.file, cmd, arg);
if (error != -ENOIOCTLCMD)
goto out_fput;
}
- if (!f.file->f_op || !f.file->f_op->unlocked_ioctl)
+ if (!f.file->f_op->unlocked_ioctl)
goto do_ioctl;
break;
}
diff --git a/fs/configfs/dir.c b/fs/configfs/dir.c
index 277bd1be21fd..e081acbac2e7 100644
--- a/fs/configfs/dir.c
+++ b/fs/configfs/dir.c
@@ -56,29 +56,28 @@ static void configfs_d_iput(struct dentry * dentry,
struct configfs_dirent *sd = dentry->d_fsdata;
if (sd) {
- BUG_ON(sd->s_dentry != dentry);
/* Coordinate with configfs_readdir */
spin_lock(&configfs_dirent_lock);
- sd->s_dentry = NULL;
+ /* Coordinate with configfs_attach_attr where will increase
+ * sd->s_count and update sd->s_dentry to new allocated one.
+ * Only set sd->dentry to null when this dentry is the only
+ * sd owner.
+ * If not do so, configfs_d_iput may run just after
+ * configfs_attach_attr and set sd->s_dentry to null
+ * even it's still in use.
+ */
+ if (atomic_read(&sd->s_count) <= 2)
+ sd->s_dentry = NULL;
+
spin_unlock(&configfs_dirent_lock);
configfs_put(sd);
}
iput(inode);
}
-/*
- * We _must_ delete our dentries on last dput, as the chain-to-parent
- * behavior is required to clear the parents of default_groups.
- */
-static int configfs_d_delete(const struct dentry *dentry)
-{
- return 1;
-}
-
const struct dentry_operations configfs_dentry_ops = {
.d_iput = configfs_d_iput,
- /* simple_delete_dentry() isn't exported */
- .d_delete = configfs_d_delete,
+ .d_delete = always_delete_dentry,
};
#ifdef CONFIG_LOCKDEP
@@ -426,8 +425,11 @@ static int configfs_attach_attr(struct configfs_dirent * sd, struct dentry * den
struct configfs_attribute * attr = sd->s_element;
int error;
+ spin_lock(&configfs_dirent_lock);
dentry->d_fsdata = configfs_get(sd);
sd->s_dentry = dentry;
+ spin_unlock(&configfs_dirent_lock);
+
error = configfs_create(dentry, (attr->ca_mode & S_IALLUGO) | S_IFREG,
configfs_init_file);
if (error) {
diff --git a/fs/coredump.c b/fs/coredump.c
index 9bdeca12ae0e..bc3fbcd32558 100644
--- a/fs/coredump.c
+++ b/fs/coredump.c
@@ -485,7 +485,7 @@ static int umh_pipe_setup(struct subprocess_info *info, struct cred *new)
return err;
}
-void do_coredump(siginfo_t *siginfo)
+void do_coredump(const siginfo_t *siginfo)
{
struct core_state core_state;
struct core_name cn;
@@ -645,7 +645,7 @@ void do_coredump(siginfo_t *siginfo)
*/
if (!uid_eq(inode->i_uid, current_fsuid()))
goto close_fail;
- if (!cprm.file->f_op || !cprm.file->f_op->write)
+ if (!cprm.file->f_op->write)
goto close_fail;
if (do_truncate(cprm.file->f_path.dentry, 0, 0, cprm.file))
goto close_fail;
@@ -685,40 +685,55 @@ fail:
* do on a core-file: use only these functions to write out all the
* necessary info.
*/
-int dump_write(struct file *file, const void *addr, int nr)
+int dump_emit(struct coredump_params *cprm, const void *addr, int nr)
{
- return !dump_interrupted() &&
- access_ok(VERIFY_READ, addr, nr) &&
- file->f_op->write(file, addr, nr, &file->f_pos) == nr;
+ struct file *file = cprm->file;
+ loff_t pos = file->f_pos;
+ ssize_t n;
+ if (cprm->written + nr > cprm->limit)
+ return 0;
+ while (nr) {
+ if (dump_interrupted())
+ return 0;
+ n = __kernel_write(file, addr, nr, &pos);
+ if (n <= 0)
+ return 0;
+ file->f_pos = pos;
+ cprm->written += n;
+ nr -= n;
+ }
+ return 1;
}
-EXPORT_SYMBOL(dump_write);
+EXPORT_SYMBOL(dump_emit);
-int dump_seek(struct file *file, loff_t off)
+int dump_skip(struct coredump_params *cprm, size_t nr)
{
- int ret = 1;
-
+ static char zeroes[PAGE_SIZE];
+ struct file *file = cprm->file;
if (file->f_op->llseek && file->f_op->llseek != no_llseek) {
+ if (cprm->written + nr > cprm->limit)
+ return 0;
if (dump_interrupted() ||
- file->f_op->llseek(file, off, SEEK_CUR) < 0)
+ file->f_op->llseek(file, nr, SEEK_CUR) < 0)
return 0;
+ cprm->written += nr;
+ return 1;
} else {
- char *buf = (char *)get_zeroed_page(GFP_KERNEL);
-
- if (!buf)
- return 0;
- while (off > 0) {
- unsigned long n = off;
-
- if (n > PAGE_SIZE)
- n = PAGE_SIZE;
- if (!dump_write(file, buf, n)) {
- ret = 0;
- break;
- }
- off -= n;
+ while (nr > PAGE_SIZE) {
+ if (!dump_emit(cprm, zeroes, PAGE_SIZE))
+ return 0;
+ nr -= PAGE_SIZE;
}
- free_page((unsigned long)buf);
+ return dump_emit(cprm, zeroes, nr);
}
- return ret;
}
-EXPORT_SYMBOL(dump_seek);
+EXPORT_SYMBOL(dump_skip);
+
+int dump_align(struct coredump_params *cprm, int align)
+{
+ unsigned mod = cprm->written & (align - 1);
+ if (align & (align - 1))
+ return 0;
+ return mod ? dump_skip(cprm, align - mod) : 1;
+}
+EXPORT_SYMBOL(dump_align);
diff --git a/fs/cramfs/Kconfig b/fs/cramfs/Kconfig
index cd06466f365e..11b29d491b7c 100644
--- a/fs/cramfs/Kconfig
+++ b/fs/cramfs/Kconfig
@@ -1,5 +1,5 @@
config CRAMFS
- tristate "Compressed ROM file system support (cramfs)"
+ tristate "Compressed ROM file system support (cramfs) (OBSOLETE)"
depends on BLOCK
select ZLIB_INFLATE
help
@@ -16,4 +16,7 @@ config CRAMFS
cramfs. Note that the root file system (the one containing the
directory /) cannot be compiled as a module.
+ This filesystem is obsoleted by SquashFS, which is much better
+ in terms of performance and features.
+
If unsure, say N.
diff --git a/fs/dcache.c b/fs/dcache.c
index ae6ebb88ceff..4bdb300b16e2 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -88,35 +88,6 @@ EXPORT_SYMBOL(rename_lock);
static struct kmem_cache *dentry_cache __read_mostly;
-/**
- * read_seqbegin_or_lock - begin a sequence number check or locking block
- * @lock: sequence lock
- * @seq : sequence number to be checked
- *
- * First try it once optimistically without taking the lock. If that fails,
- * take the lock. The sequence number is also used as a marker for deciding
- * whether to be a reader (even) or writer (odd).
- * N.B. seq must be initialized to an even number to begin with.
- */
-static inline void read_seqbegin_or_lock(seqlock_t *lock, int *seq)
-{
- if (!(*seq & 1)) /* Even */
- *seq = read_seqbegin(lock);
- else /* Odd */
- read_seqlock_excl(lock);
-}
-
-static inline int need_seqretry(seqlock_t *lock, int seq)
-{
- return !(seq & 1) && read_seqretry(lock, seq);
-}
-
-static inline void done_seqretry(seqlock_t *lock, int seq)
-{
- if (seq & 1)
- read_sequnlock_excl(lock);
-}
-
/*
* This is the single most critical data structure when it comes
* to the dcache: the hashtable for lookups. Somebody should try
@@ -125,8 +96,6 @@ static inline void done_seqretry(seqlock_t *lock, int seq)
* This hash-function tries to avoid losing too many bits of hash
* information, yet avoid using a prime hash-size or similar.
*/
-#define D_HASHBITS d_hash_shift
-#define D_HASHMASK d_hash_mask
static unsigned int d_hash_mask __read_mostly;
static unsigned int d_hash_shift __read_mostly;
@@ -137,8 +106,8 @@ static inline struct hlist_bl_head *d_hash(const struct dentry *parent,
unsigned int hash)
{
hash += (unsigned long) parent / L1_CACHE_BYTES;
- hash = hash + (hash >> D_HASHBITS);
- return dentry_hashtable + (hash & D_HASHMASK);
+ hash = hash + (hash >> d_hash_shift);
+ return dentry_hashtable + (hash & d_hash_mask);
}
/* Statistics gathering. */
@@ -343,6 +312,7 @@ static void dentry_unlink_inode(struct dentry * dentry)
__releases(dentry->d_inode->i_lock)
{
struct inode *inode = dentry->d_inode;
+ __d_clear_type(dentry);
dentry->d_inode = NULL;
hlist_del_init(&dentry->d_alias);
dentry_rcuwalk_barrier(dentry);
@@ -468,7 +438,7 @@ static struct dentry *d_kill(struct dentry *dentry, struct dentry *parent)
{
list_del(&dentry->d_u.d_child);
/*
- * Inform try_to_ascend() that we are no longer attached to the
+ * Inform d_walk() that we are no longer attached to the
* dentry tree
*/
dentry->d_flags |= DCACHE_DENTRY_KILLED;
@@ -483,27 +453,6 @@ static struct dentry *d_kill(struct dentry *dentry, struct dentry *parent)
return parent;
}
-/*
- * Unhash a dentry without inserting an RCU walk barrier or checking that
- * dentry->d_lock is locked. The caller must take care of that, if
- * appropriate.
- */
-static void __d_shrink(struct dentry *dentry)
-{
- if (!d_unhashed(dentry)) {
- struct hlist_bl_head *b;
- if (unlikely(dentry->d_flags & DCACHE_DISCONNECTED))
- b = &dentry->d_sb->s_anon;
- else
- b = d_hash(dentry->d_parent, dentry->d_name.hash);
-
- hlist_bl_lock(b);
- __hlist_bl_del(&dentry->d_hash);
- dentry->d_hash.pprev = NULL;
- hlist_bl_unlock(b);
- }
-}
-
/**
* d_drop - drop a dentry
* @dentry: dentry to drop
@@ -522,7 +471,21 @@ static void __d_shrink(struct dentry *dentry)
void __d_drop(struct dentry *dentry)
{
if (!d_unhashed(dentry)) {
- __d_shrink(dentry);
+ struct hlist_bl_head *b;
+ /*
+ * Hashed dentries are normally on the dentry hashtable,
+ * with the exception of those newly allocated by
+ * d_obtain_alias, which are always IS_ROOT:
+ */
+ if (unlikely(IS_ROOT(dentry)))
+ b = &dentry->d_sb->s_anon;
+ else
+ b = d_hash(dentry->d_parent, dentry->d_name.hash);
+
+ hlist_bl_lock(b);
+ __hlist_bl_del(&dentry->d_hash);
+ dentry->d_hash.pprev = NULL;
+ hlist_bl_unlock(b);
dentry_rcuwalk_barrier(dentry);
}
}
@@ -1075,144 +1038,6 @@ void shrink_dcache_sb(struct super_block *sb)
}
EXPORT_SYMBOL(shrink_dcache_sb);
-/*
- * destroy a single subtree of dentries for unmount
- * - see the comments on shrink_dcache_for_umount() for a description of the
- * locking
- */
-static void shrink_dcache_for_umount_subtree(struct dentry *dentry)
-{
- struct dentry *parent;
-
- BUG_ON(!IS_ROOT(dentry));
-
- for (;;) {
- /* descend to the first leaf in the current subtree */
- while (!list_empty(&dentry->d_subdirs))
- dentry = list_entry(dentry->d_subdirs.next,
- struct dentry, d_u.d_child);
-
- /* consume the dentries from this leaf up through its parents
- * until we find one with children or run out altogether */
- do {
- struct inode *inode;
-
- /*
- * inform the fs that this dentry is about to be
- * unhashed and destroyed.
- */
- if ((dentry->d_flags & DCACHE_OP_PRUNE) &&
- !d_unhashed(dentry))
- dentry->d_op->d_prune(dentry);
-
- dentry_lru_del(dentry);
- __d_shrink(dentry);
-
- if (dentry->d_lockref.count != 0) {
- printk(KERN_ERR
- "BUG: Dentry %p{i=%lx,n=%s}"
- " still in use (%d)"
- " [unmount of %s %s]\n",
- dentry,
- dentry->d_inode ?
- dentry->d_inode->i_ino : 0UL,
- dentry->d_name.name,
- dentry->d_lockref.count,
- dentry->d_sb->s_type->name,
- dentry->d_sb->s_id);
- BUG();
- }
-
- if (IS_ROOT(dentry)) {
- parent = NULL;
- list_del(&dentry->d_u.d_child);
- } else {
- parent = dentry->d_parent;
- parent->d_lockref.count--;
- list_del(&dentry->d_u.d_child);
- }
-
- inode = dentry->d_inode;
- if (inode) {
- dentry->d_inode = NULL;
- hlist_del_init(&dentry->d_alias);
- if (dentry->d_op && dentry->d_op->d_iput)
- dentry->d_op->d_iput(dentry, inode);
- else
- iput(inode);
- }
-
- d_free(dentry);
-
- /* finished when we fall off the top of the tree,
- * otherwise we ascend to the parent and move to the
- * next sibling if there is one */
- if (!parent)
- return;
- dentry = parent;
- } while (list_empty(&dentry->d_subdirs));
-
- dentry = list_entry(dentry->d_subdirs.next,
- struct dentry, d_u.d_child);
- }
-}
-
-/*
- * destroy the dentries attached to a superblock on unmounting
- * - we don't need to use dentry->d_lock because:
- * - the superblock is detached from all mountings and open files, so the
- * dentry trees will not be rearranged by the VFS
- * - s_umount is write-locked, so the memory pressure shrinker will ignore
- * any dentries belonging to this superblock that it comes across
- * - the filesystem itself is no longer permitted to rearrange the dentries
- * in this superblock
- */
-void shrink_dcache_for_umount(struct super_block *sb)
-{
- struct dentry *dentry;
-
- if (down_read_trylock(&sb->s_umount))
- BUG();
-
- dentry = sb->s_root;
- sb->s_root = NULL;
- dentry->d_lockref.count--;
- shrink_dcache_for_umount_subtree(dentry);
-
- while (!hlist_bl_empty(&sb->s_anon)) {
- dentry = hlist_bl_entry(hlist_bl_first(&sb->s_anon), struct dentry, d_hash);
- shrink_dcache_for_umount_subtree(dentry);
- }
-}
-
-/*
- * This tries to ascend one level of parenthood, but
- * we can race with renaming, so we need to re-check
- * the parenthood after dropping the lock and check
- * that the sequence number still matches.
- */
-static struct dentry *try_to_ascend(struct dentry *old, unsigned seq)
-{
- struct dentry *new = old->d_parent;
-
- rcu_read_lock();
- spin_unlock(&old->d_lock);
- spin_lock(&new->d_lock);
-
- /*
- * might go back up the wrong parent if we have had a rename
- * or deletion
- */
- if (new != old->d_parent ||
- (old->d_flags & DCACHE_DENTRY_KILLED) ||
- need_seqretry(&rename_lock, seq)) {
- spin_unlock(&new->d_lock);
- new = NULL;
- }
- rcu_read_unlock();
- return new;
-}
-
/**
* enum d_walk_ret - action to talke during tree walk
* @D_WALK_CONTINUE: contrinue walk
@@ -1301,9 +1126,24 @@ resume:
*/
if (this_parent != parent) {
struct dentry *child = this_parent;
- this_parent = try_to_ascend(this_parent, seq);
- if (!this_parent)
+ this_parent = child->d_parent;
+
+ rcu_read_lock();
+ spin_unlock(&child->d_lock);
+ spin_lock(&this_parent->d_lock);
+
+ /*
+ * might go back up the wrong parent if we have had a rename
+ * or deletion
+ */
+ if (this_parent != child->d_parent ||
+ (child->d_flags & DCACHE_DENTRY_KILLED) ||
+ need_seqretry(&rename_lock, seq)) {
+ spin_unlock(&this_parent->d_lock);
+ rcu_read_unlock();
goto rename_retry;
+ }
+ rcu_read_unlock();
next = child->d_u.d_child.next;
goto resume;
}
@@ -1478,6 +1318,91 @@ void shrink_dcache_parent(struct dentry *parent)
}
EXPORT_SYMBOL(shrink_dcache_parent);
+static enum d_walk_ret umount_collect(void *_data, struct dentry *dentry)
+{
+ struct select_data *data = _data;
+ enum d_walk_ret ret = D_WALK_CONTINUE;
+
+ if (dentry->d_lockref.count) {
+ dentry_lru_del(dentry);
+ if (likely(!list_empty(&dentry->d_subdirs)))
+ goto out;
+ if (dentry == data->start && dentry->d_lockref.count == 1)
+ goto out;
+ printk(KERN_ERR
+ "BUG: Dentry %p{i=%lx,n=%s}"
+ " still in use (%d)"
+ " [unmount of %s %s]\n",
+ dentry,
+ dentry->d_inode ?
+ dentry->d_inode->i_ino : 0UL,
+ dentry->d_name.name,
+ dentry->d_lockref.count,
+ dentry->d_sb->s_type->name,
+ dentry->d_sb->s_id);
+ BUG();
+ } else if (!(dentry->d_flags & DCACHE_SHRINK_LIST)) {
+ /*
+ * We can't use d_lru_shrink_move() because we
+ * need to get the global LRU lock and do the
+ * LRU accounting.
+ */
+ if (dentry->d_flags & DCACHE_LRU_LIST)
+ d_lru_del(dentry);
+ d_shrink_add(dentry, &data->dispose);
+ data->found++;
+ ret = D_WALK_NORETRY;
+ }
+out:
+ if (data->found && need_resched())
+ ret = D_WALK_QUIT;
+ return ret;
+}
+
+/*
+ * destroy the dentries attached to a superblock on unmounting
+ */
+void shrink_dcache_for_umount(struct super_block *sb)
+{
+ struct dentry *dentry;
+
+ if (down_read_trylock(&sb->s_umount))
+ BUG();
+
+ dentry = sb->s_root;
+ sb->s_root = NULL;
+ for (;;) {
+ struct select_data data;
+
+ INIT_LIST_HEAD(&data.dispose);
+ data.start = dentry;
+ data.found = 0;
+
+ d_walk(dentry, &data, umount_collect, NULL);
+ if (!data.found)
+ break;
+
+ shrink_dentry_list(&data.dispose);
+ cond_resched();
+ }
+ d_drop(dentry);
+ dput(dentry);
+
+ while (!hlist_bl_empty(&sb->s_anon)) {
+ struct select_data data;
+ dentry = hlist_bl_entry(hlist_bl_first(&sb->s_anon), struct dentry, d_hash);
+
+ INIT_LIST_HEAD(&data.dispose);
+ data.start = NULL;
+ data.found = 0;
+
+ d_walk(dentry, &data, umount_collect, NULL);
+ if (data.found)
+ shrink_dentry_list(&data.dispose);
+ cond_resched();
+ }
+}
+
static enum d_walk_ret check_and_collect(void *_data, struct dentry *dentry)
{
struct select_data *data = _data;
@@ -1638,12 +1563,17 @@ struct dentry *d_alloc(struct dentry * parent, const struct qstr *name)
}
EXPORT_SYMBOL(d_alloc);
+/**
+ * d_alloc_pseudo - allocate a dentry (for lookup-less filesystems)
+ * @sb: the superblock
+ * @name: qstr of the name
+ *
+ * For a filesystem that just pins its dentries in memory and never
+ * performs lookups at all, return an unhashed IS_ROOT dentry.
+ */
struct dentry *d_alloc_pseudo(struct super_block *sb, const struct qstr *name)
{
- struct dentry *dentry = __d_alloc(sb, name);
- if (dentry)
- dentry->d_flags |= DCACHE_DISCONNECTED;
- return dentry;
+ return __d_alloc(sb, name);
}
EXPORT_SYMBOL(d_alloc_pseudo);
@@ -1685,14 +1615,42 @@ void d_set_d_op(struct dentry *dentry, const struct dentry_operations *op)
}
EXPORT_SYMBOL(d_set_d_op);
+static unsigned d_flags_for_inode(struct inode *inode)
+{
+ unsigned add_flags = DCACHE_FILE_TYPE;
+
+ if (!inode)
+ return DCACHE_MISS_TYPE;
+
+ if (S_ISDIR(inode->i_mode)) {
+ add_flags = DCACHE_DIRECTORY_TYPE;
+ if (unlikely(!(inode->i_opflags & IOP_LOOKUP))) {
+ if (unlikely(!inode->i_op->lookup))
+ add_flags = DCACHE_AUTODIR_TYPE;
+ else
+ inode->i_opflags |= IOP_LOOKUP;
+ }
+ } else if (unlikely(!(inode->i_opflags & IOP_NOFOLLOW))) {
+ if (unlikely(inode->i_op->follow_link))
+ add_flags = DCACHE_SYMLINK_TYPE;
+ else
+ inode->i_opflags |= IOP_NOFOLLOW;
+ }
+
+ if (unlikely(IS_AUTOMOUNT(inode)))
+ add_flags |= DCACHE_NEED_AUTOMOUNT;
+ return add_flags;
+}
+
static void __d_instantiate(struct dentry *dentry, struct inode *inode)
{
+ unsigned add_flags = d_flags_for_inode(inode);
+
spin_lock(&dentry->d_lock);
- if (inode) {
- if (unlikely(IS_AUTOMOUNT(inode)))
- dentry->d_flags |= DCACHE_NEED_AUTOMOUNT;
+ dentry->d_flags &= ~DCACHE_ENTRY_TYPE;
+ dentry->d_flags |= add_flags;
+ if (inode)
hlist_add_head(&dentry->d_alias, &inode->i_dentry);
- }
dentry->d_inode = inode;
dentry_rcuwalk_barrier(dentry);
spin_unlock(&dentry->d_lock);
@@ -1801,6 +1759,33 @@ struct dentry *d_instantiate_unique(struct dentry *entry, struct inode *inode)
EXPORT_SYMBOL(d_instantiate_unique);
+/**
+ * d_instantiate_no_diralias - instantiate a non-aliased dentry
+ * @entry: dentry to complete
+ * @inode: inode to attach to this dentry
+ *
+ * Fill in inode information in the entry. If a directory alias is found, then
+ * return an error (and drop inode). Together with d_materialise_unique() this
+ * guarantees that a directory inode may never have more than one alias.
+ */
+int d_instantiate_no_diralias(struct dentry *entry, struct inode *inode)
+{
+ BUG_ON(!hlist_unhashed(&entry->d_alias));
+
+ spin_lock(&inode->i_lock);
+ if (S_ISDIR(inode->i_mode) && !hlist_empty(&inode->i_dentry)) {
+ spin_unlock(&inode->i_lock);
+ iput(inode);
+ return -EBUSY;
+ }
+ __d_instantiate(entry, inode);
+ spin_unlock(&inode->i_lock);
+ security_d_instantiate(entry, inode);
+
+ return 0;
+}
+EXPORT_SYMBOL(d_instantiate_no_diralias);
+
struct dentry *d_make_root(struct inode *root_inode)
{
struct dentry *res = NULL;
@@ -1870,6 +1855,7 @@ struct dentry *d_obtain_alias(struct inode *inode)
static const struct qstr anonstring = QSTR_INIT("/", 1);
struct dentry *tmp;
struct dentry *res;
+ unsigned add_flags;
if (!inode)
return ERR_PTR(-ESTALE);
@@ -1895,9 +1881,11 @@ struct dentry *d_obtain_alias(struct inode *inode)
}
/* attach a disconnected dentry */
+ add_flags = d_flags_for_inode(inode) | DCACHE_DISCONNECTED;
+
spin_lock(&tmp->d_lock);
tmp->d_inode = inode;
- tmp->d_flags |= DCACHE_DISCONNECTED;
+ tmp->d_flags |= add_flags;
hlist_add_head(&tmp->d_alias, &inode->i_dentry);
hlist_bl_lock(&tmp->d_sb->s_anon);
hlist_bl_add_head(&tmp->d_hash, &tmp->d_sb->s_anon);
@@ -2574,7 +2562,7 @@ static void __d_move(struct dentry * dentry, struct dentry * target)
dentry_lock_for_move(dentry, target);
write_seqcount_begin(&dentry->d_seq);
- write_seqcount_begin(&target->d_seq);
+ write_seqcount_begin_nested(&target->d_seq, DENTRY_D_LOCK_NESTED);
/* __d_drop does write_seqcount_barrier, but they're OK to nest. */
@@ -2706,7 +2694,7 @@ static void __d_materialise_dentry(struct dentry *dentry, struct dentry *anon)
dentry_lock_for_move(anon, dentry);
write_seqcount_begin(&dentry->d_seq);
- write_seqcount_begin(&anon->d_seq);
+ write_seqcount_begin_nested(&anon->d_seq, DENTRY_D_LOCK_NESTED);
dparent = dentry->d_parent;
@@ -2725,7 +2713,6 @@ static void __d_materialise_dentry(struct dentry *dentry, struct dentry *anon)
spin_unlock(&dentry->d_lock);
/* anon->d_lock still locked, returns locked */
- anon->d_flags &= ~DCACHE_DISCONNECTED;
}
/**
@@ -2881,27 +2868,36 @@ static int prepend_path(const struct path *path,
const struct path *root,
char **buffer, int *buflen)
{
- struct dentry *dentry = path->dentry;
- struct vfsmount *vfsmnt = path->mnt;
- struct mount *mnt = real_mount(vfsmnt);
+ struct dentry *dentry;
+ struct vfsmount *vfsmnt;
+ struct mount *mnt;
int error = 0;
- unsigned seq = 0;
+ unsigned seq, m_seq = 0;
char *bptr;
int blen;
rcu_read_lock();
+restart_mnt:
+ read_seqbegin_or_lock(&mount_lock, &m_seq);
+ seq = 0;
+ rcu_read_lock();
restart:
bptr = *buffer;
blen = *buflen;
+ error = 0;
+ dentry = path->dentry;
+ vfsmnt = path->mnt;
+ mnt = real_mount(vfsmnt);
read_seqbegin_or_lock(&rename_lock, &seq);
while (dentry != root->dentry || vfsmnt != root->mnt) {
struct dentry * parent;
if (dentry == vfsmnt->mnt_root || IS_ROOT(dentry)) {
+ struct mount *parent = ACCESS_ONCE(mnt->mnt_parent);
/* Global root? */
- if (mnt_has_parent(mnt)) {
- dentry = mnt->mnt_mountpoint;
- mnt = mnt->mnt_parent;
+ if (mnt != parent) {
+ dentry = ACCESS_ONCE(mnt->mnt_mountpoint);
+ mnt = parent;
vfsmnt = &mnt->mnt;
continue;
}
@@ -2936,6 +2932,14 @@ restart:
}
done_seqretry(&rename_lock, seq);
+ if (!(m_seq & 1))
+ rcu_read_unlock();
+ if (need_seqretry(&mount_lock, m_seq)) {
+ m_seq = 1;
+ goto restart_mnt;
+ }
+ done_seqretry(&mount_lock, m_seq);
+
if (error >= 0 && bptr == *buffer) {
if (--blen < 0)
error = -ENAMETOOLONG;
@@ -2971,9 +2975,7 @@ char *__d_path(const struct path *path,
int error;
prepend(&res, &buflen, "\0", 1);
- br_read_lock(&vfsmount_lock);
error = prepend_path(path, root, &res, &buflen);
- br_read_unlock(&vfsmount_lock);
if (error < 0)
return ERR_PTR(error);
@@ -2990,9 +2992,7 @@ char *d_absolute_path(const struct path *path,
int error;
prepend(&res, &buflen, "\0", 1);
- br_read_lock(&vfsmount_lock);
error = prepend_path(path, &root, &res, &buflen);
- br_read_unlock(&vfsmount_lock);
if (error > 1)
error = -EINVAL;
@@ -3067,9 +3067,7 @@ char *d_path(const struct path *path, char *buf, int buflen)
rcu_read_lock();
get_fs_root_rcu(current->fs, &root);
- br_read_lock(&vfsmount_lock);
error = path_with_deleted(path, &root, &res, &buflen);
- br_read_unlock(&vfsmount_lock);
rcu_read_unlock();
if (error < 0)
@@ -3224,7 +3222,6 @@ SYSCALL_DEFINE2(getcwd, char __user *, buf, unsigned long, size)
get_fs_root_and_pwd_rcu(current->fs, &root, &pwd);
error = -ENOENT;
- br_read_lock(&vfsmount_lock);
if (!d_unlinked(pwd.dentry)) {
unsigned long len;
char *cwd = page + PATH_MAX;
@@ -3232,7 +3229,6 @@ SYSCALL_DEFINE2(getcwd, char __user *, buf, unsigned long, size)
prepend(&cwd, &buflen, "\0", 1);
error = prepend_path(&pwd, &root, &cwd, &buflen);
- br_read_unlock(&vfsmount_lock);
rcu_read_unlock();
if (error < 0)
@@ -3253,7 +3249,6 @@ SYSCALL_DEFINE2(getcwd, char __user *, buf, unsigned long, size)
error = -EFAULT;
}
} else {
- br_read_unlock(&vfsmount_lock);
rcu_read_unlock();
}
diff --git a/fs/debugfs/inode.c b/fs/debugfs/inode.c
index c7c83ff0f752..9c0444cccbe1 100644
--- a/fs/debugfs/inode.c
+++ b/fs/debugfs/inode.c
@@ -566,8 +566,7 @@ void debugfs_remove_recursive(struct dentry *dentry)
mutex_lock(&parent->d_inode->i_mutex);
if (child != dentry) {
- next = list_entry(child->d_u.d_child.next, struct dentry,
- d_u.d_child);
+ next = list_next_entry(child, d_u.d_child);
goto up;
}
diff --git a/fs/devpts/inode.c b/fs/devpts/inode.c
index 073d30b9d1ac..a726b9f29cb7 100644
--- a/fs/devpts/inode.c
+++ b/fs/devpts/inode.c
@@ -498,6 +498,7 @@ static void devpts_kill_sb(struct super_block *sb)
{
struct pts_fs_info *fsi = DEVPTS_SB(sb);
+ ida_destroy(&fsi->allocated_ptys);
kfree(fsi);
kill_litter_super(sb);
}
diff --git a/fs/dlm/lockspace.c b/fs/dlm/lockspace.c
index 88556dc0458e..d5abafd56a6d 100644
--- a/fs/dlm/lockspace.c
+++ b/fs/dlm/lockspace.c
@@ -706,9 +706,7 @@ static int lkb_idr_is_local(int id, void *p, void *data)
{
struct dlm_lkb *lkb = p;
- if (!lkb->lkb_nodeid)
- return 1;
- return 0;
+ return lkb->lkb_nodeid == 0 && lkb->lkb_grmode != DLM_LOCK_IV;
}
static int lkb_idr_is_any(int id, void *p, void *data)
diff --git a/fs/dlm/netlink.c b/fs/dlm/netlink.c
index 60a327863b11..e7cfbaf8d0e2 100644
--- a/fs/dlm/netlink.c
+++ b/fs/dlm/netlink.c
@@ -74,14 +74,16 @@ static int user_cmd(struct sk_buff *skb, struct genl_info *info)
return 0;
}
-static struct genl_ops dlm_nl_ops = {
- .cmd = DLM_CMD_HELLO,
- .doit = user_cmd,
+static struct genl_ops dlm_nl_ops[] = {
+ {
+ .cmd = DLM_CMD_HELLO,
+ .doit = user_cmd,
+ },
};
int __init dlm_netlink_init(void)
{
- return genl_register_family_with_ops(&family, &dlm_nl_ops, 1);
+ return genl_register_family_with_ops(&family, dlm_nl_ops);
}
void dlm_netlink_exit(void)
diff --git a/fs/ecryptfs/crypto.c b/fs/ecryptfs/crypto.c
index 000eae2782b6..2f6735dbf1a9 100644
--- a/fs/ecryptfs/crypto.c
+++ b/fs/ecryptfs/crypto.c
@@ -392,7 +392,7 @@ static int crypt_scatterlist(struct ecryptfs_crypt_stat *crypt_stat,
wait_for_completion(&ecr->completion);
rc = ecr->rc;
- INIT_COMPLETION(ecr->completion);
+ reinit_completion(&ecr->completion);
}
out:
ablkcipher_request_free(req);
diff --git a/fs/ecryptfs/dentry.c b/fs/ecryptfs/dentry.c
index bf12ba5dd223..4000f6b3a750 100644
--- a/fs/ecryptfs/dentry.c
+++ b/fs/ecryptfs/dentry.c
@@ -44,15 +44,15 @@
*/
static int ecryptfs_d_revalidate(struct dentry *dentry, unsigned int flags)
{
- struct dentry *lower_dentry;
- int rc = 1;
+ struct dentry *lower_dentry = ecryptfs_dentry_to_lower(dentry);
+ int rc;
+
+ if (!(lower_dentry->d_flags & DCACHE_OP_REVALIDATE))
+ return 1;
if (flags & LOOKUP_RCU)
return -ECHILD;
- lower_dentry = ecryptfs_dentry_to_lower(dentry);
- if (!lower_dentry->d_op || !lower_dentry->d_op->d_revalidate)
- goto out;
rc = lower_dentry->d_op->d_revalidate(lower_dentry, flags);
if (dentry->d_inode) {
struct inode *lower_inode =
@@ -60,12 +60,17 @@ static int ecryptfs_d_revalidate(struct dentry *dentry, unsigned int flags)
fsstack_copy_attr_all(dentry->d_inode, lower_inode);
}
-out:
return rc;
}
struct kmem_cache *ecryptfs_dentry_info_cache;
+static void ecryptfs_dentry_free_rcu(struct rcu_head *head)
+{
+ kmem_cache_free(ecryptfs_dentry_info_cache,
+ container_of(head, struct ecryptfs_dentry_info, rcu));
+}
+
/**
* ecryptfs_d_release
* @dentry: The ecryptfs dentry
@@ -74,15 +79,11 @@ struct kmem_cache *ecryptfs_dentry_info_cache;
*/
static void ecryptfs_d_release(struct dentry *dentry)
{
- if (ecryptfs_dentry_to_private(dentry)) {
- if (ecryptfs_dentry_to_lower(dentry)) {
- dput(ecryptfs_dentry_to_lower(dentry));
- mntput(ecryptfs_dentry_to_lower_mnt(dentry));
- }
- kmem_cache_free(ecryptfs_dentry_info_cache,
- ecryptfs_dentry_to_private(dentry));
+ struct ecryptfs_dentry_info *p = dentry->d_fsdata;
+ if (p) {
+ path_put(&p->lower_path);
+ call_rcu(&p->rcu, ecryptfs_dentry_free_rcu);
}
- return;
}
const struct dentry_operations ecryptfs_dops = {
diff --git a/fs/ecryptfs/ecryptfs_kernel.h b/fs/ecryptfs/ecryptfs_kernel.h
index df19d34a033b..90d1882b306f 100644
--- a/fs/ecryptfs/ecryptfs_kernel.h
+++ b/fs/ecryptfs/ecryptfs_kernel.h
@@ -261,7 +261,10 @@ struct ecryptfs_inode_info {
* vfsmount too. */
struct ecryptfs_dentry_info {
struct path lower_path;
- struct ecryptfs_crypt_stat *crypt_stat;
+ union {
+ struct ecryptfs_crypt_stat *crypt_stat;
+ struct rcu_head rcu;
+ };
};
/**
@@ -512,13 +515,6 @@ ecryptfs_dentry_to_lower(struct dentry *dentry)
return ((struct ecryptfs_dentry_info *)dentry->d_fsdata)->lower_path.dentry;
}
-static inline void
-ecryptfs_set_dentry_lower(struct dentry *dentry, struct dentry *lower_dentry)
-{
- ((struct ecryptfs_dentry_info *)dentry->d_fsdata)->lower_path.dentry =
- lower_dentry;
-}
-
static inline struct vfsmount *
ecryptfs_dentry_to_lower_mnt(struct dentry *dentry)
{
@@ -531,13 +527,6 @@ ecryptfs_dentry_to_lower_path(struct dentry *dentry)
return &((struct ecryptfs_dentry_info *)dentry->d_fsdata)->lower_path;
}
-static inline void
-ecryptfs_set_dentry_lower_mnt(struct dentry *dentry, struct vfsmount *lower_mnt)
-{
- ((struct ecryptfs_dentry_info *)dentry->d_fsdata)->lower_path.mnt =
- lower_mnt;
-}
-
#define ecryptfs_printk(type, fmt, arg...) \
__ecryptfs_printk(type "%s: " fmt, __func__, ## arg);
__printf(1, 2)
diff --git a/fs/ecryptfs/file.c b/fs/ecryptfs/file.c
index 992cf95830b5..b1eaa7a1f82c 100644
--- a/fs/ecryptfs/file.c
+++ b/fs/ecryptfs/file.c
@@ -271,7 +271,7 @@ static int ecryptfs_flush(struct file *file, fl_owner_t td)
{
struct file *lower_file = ecryptfs_file_to_lower(file);
- if (lower_file->f_op && lower_file->f_op->flush) {
+ if (lower_file->f_op->flush) {
filemap_write_and_wait(file->f_mapping);
return lower_file->f_op->flush(lower_file, td);
}
@@ -305,7 +305,7 @@ static int ecryptfs_fasync(int fd, struct file *file, int flag)
struct file *lower_file = NULL;
lower_file = ecryptfs_file_to_lower(file);
- if (lower_file->f_op && lower_file->f_op->fasync)
+ if (lower_file->f_op->fasync)
rc = lower_file->f_op->fasync(fd, lower_file, flag);
return rc;
}
@@ -313,12 +313,10 @@ static int ecryptfs_fasync(int fd, struct file *file, int flag)
static long
ecryptfs_unlocked_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
{
- struct file *lower_file = NULL;
+ struct file *lower_file = ecryptfs_file_to_lower(file);
long rc = -ENOTTY;
- if (ecryptfs_file_to_private(file))
- lower_file = ecryptfs_file_to_lower(file);
- if (lower_file && lower_file->f_op && lower_file->f_op->unlocked_ioctl)
+ if (lower_file->f_op->unlocked_ioctl)
rc = lower_file->f_op->unlocked_ioctl(lower_file, cmd, arg);
return rc;
}
@@ -327,12 +325,10 @@ ecryptfs_unlocked_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
static long
ecryptfs_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
{
- struct file *lower_file = NULL;
+ struct file *lower_file = ecryptfs_file_to_lower(file);
long rc = -ENOIOCTLCMD;
- if (ecryptfs_file_to_private(file))
- lower_file = ecryptfs_file_to_lower(file);
- if (lower_file && lower_file->f_op && lower_file->f_op->compat_ioctl)
+ if (lower_file->f_op && lower_file->f_op->compat_ioctl)
rc = lower_file->f_op->compat_ioctl(lower_file, cmd, arg);
return rc;
}
diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c
index 67e9b6339691..c36c44824471 100644
--- a/fs/ecryptfs/inode.c
+++ b/fs/ecryptfs/inode.c
@@ -153,7 +153,7 @@ static int ecryptfs_do_unlink(struct inode *dir, struct dentry *dentry,
dget(lower_dentry);
lower_dir_dentry = lock_parent(lower_dentry);
- rc = vfs_unlink(lower_dir_inode, lower_dentry);
+ rc = vfs_unlink(lower_dir_inode, lower_dentry, NULL);
if (rc) {
printk(KERN_ERR "Error in vfs_unlink; rc = [%d]\n", rc);
goto out_unlock;
@@ -208,7 +208,7 @@ ecryptfs_do_create(struct inode *directory_inode,
inode = __ecryptfs_get_inode(lower_dentry->d_inode,
directory_inode->i_sb);
if (IS_ERR(inode)) {
- vfs_unlink(lower_dir_dentry->d_inode, lower_dentry);
+ vfs_unlink(lower_dir_dentry->d_inode, lower_dentry, NULL);
goto out_lock;
}
fsstack_copy_attr_times(directory_inode, lower_dir_dentry->d_inode);
@@ -361,8 +361,8 @@ static int ecryptfs_lookup_interpose(struct dentry *dentry,
BUG_ON(!d_count(lower_dentry));
ecryptfs_set_dentry_private(dentry, dentry_info);
- ecryptfs_set_dentry_lower(dentry, lower_dentry);
- ecryptfs_set_dentry_lower_mnt(dentry, lower_mnt);
+ dentry_info->lower_path.mnt = lower_mnt;
+ dentry_info->lower_path.dentry = lower_dentry;
if (!lower_dentry->d_inode) {
/* We want to add because we couldn't find in lower */
@@ -475,7 +475,7 @@ static int ecryptfs_link(struct dentry *old_dentry, struct inode *dir,
dget(lower_new_dentry);
lower_dir_dentry = lock_parent(lower_new_dentry);
rc = vfs_link(lower_old_dentry, lower_dir_dentry->d_inode,
- lower_new_dentry);
+ lower_new_dentry, NULL);
if (rc || !lower_new_dentry->d_inode)
goto out_lock;
rc = ecryptfs_interpose(lower_new_dentry, new_dentry, dir->i_sb);
@@ -640,7 +640,8 @@ ecryptfs_rename(struct inode *old_dir, struct dentry *old_dentry,
goto out_lock;
}
rc = vfs_rename(lower_old_dir_dentry->d_inode, lower_old_dentry,
- lower_new_dir_dentry->d_inode, lower_new_dentry);
+ lower_new_dir_dentry->d_inode, lower_new_dentry,
+ NULL);
if (rc)
goto out_lock;
if (target_inode)
@@ -703,16 +704,6 @@ out:
return NULL;
}
-static void
-ecryptfs_put_link(struct dentry *dentry, struct nameidata *nd, void *ptr)
-{
- char *buf = nd_get_link(nd);
- if (!IS_ERR(buf)) {
- /* Free the char* */
- kfree(buf);
- }
-}
-
/**
* upper_size_to_lower_size
* @crypt_stat: Crypt_stat associated with file
@@ -891,7 +882,7 @@ int ecryptfs_truncate(struct dentry *dentry, loff_t new_length)
struct dentry *lower_dentry = ecryptfs_dentry_to_lower(dentry);
mutex_lock(&lower_dentry->d_inode->i_mutex);
- rc = notify_change(lower_dentry, &lower_ia);
+ rc = notify_change(lower_dentry, &lower_ia, NULL);
mutex_unlock(&lower_dentry->d_inode->i_mutex);
}
return rc;
@@ -992,7 +983,7 @@ static int ecryptfs_setattr(struct dentry *dentry, struct iattr *ia)
lower_ia.ia_valid &= ~ATTR_MODE;
mutex_lock(&lower_dentry->d_inode->i_mutex);
- rc = notify_change(lower_dentry, &lower_ia);
+ rc = notify_change(lower_dentry, &lower_ia, NULL);
mutex_unlock(&lower_dentry->d_inode->i_mutex);
out:
fsstack_copy_attr_all(inode, lower_inode);
@@ -1121,7 +1112,7 @@ out:
const struct inode_operations ecryptfs_symlink_iops = {
.readlink = generic_readlink,
.follow_link = ecryptfs_follow_link,
- .put_link = ecryptfs_put_link,
+ .put_link = kfree_put_link,
.permission = ecryptfs_permission,
.setattr = ecryptfs_setattr,
.getattr = ecryptfs_getattr_link,
diff --git a/fs/ecryptfs/main.c b/fs/ecryptfs/main.c
index eb1c5979ecaf..1b119d3bf924 100644
--- a/fs/ecryptfs/main.c
+++ b/fs/ecryptfs/main.c
@@ -585,8 +585,7 @@ static struct dentry *ecryptfs_mount(struct file_system_type *fs_type, int flags
/* ->kill_sb() will take care of root_info */
ecryptfs_set_dentry_private(s->s_root, root_info);
- ecryptfs_set_dentry_lower(s->s_root, path.dentry);
- ecryptfs_set_dentry_lower_mnt(s->s_root, path.mnt);
+ root_info->lower_path = path;
s->s_flags |= MS_ACTIVE;
return dget(s->s_root);
diff --git a/fs/efivarfs/super.c b/fs/efivarfs/super.c
index a8766b880c07..becc725a1953 100644
--- a/fs/efivarfs/super.c
+++ b/fs/efivarfs/super.c
@@ -83,19 +83,10 @@ static int efivarfs_d_hash(const struct dentry *dentry, struct qstr *qstr)
return 0;
}
-/*
- * Retaining negative dentries for an in-memory filesystem just wastes
- * memory and lookup time: arrange for them to be deleted immediately.
- */
-static int efivarfs_delete_dentry(const struct dentry *dentry)
-{
- return 1;
-}
-
static struct dentry_operations efivarfs_d_ops = {
.d_compare = efivarfs_d_compare,
.d_hash = efivarfs_d_hash,
- .d_delete = efivarfs_delete_dentry,
+ .d_delete = always_delete_dentry,
};
static struct dentry *efivarfs_alloc_dentry(struct dentry *parent, char *name)
diff --git a/fs/eventpoll.c b/fs/eventpoll.c
index 810c28fb8c3c..8b5e2584c840 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -41,6 +41,7 @@
#include <linux/proc_fs.h>
#include <linux/seq_file.h>
#include <linux/compat.h>
+#include <linux/rculist.h>
/*
* LOCKING:
@@ -133,8 +134,12 @@ struct nested_calls {
* of these on a server and we do not want this to take another cache line.
*/
struct epitem {
- /* RB tree node used to link this structure to the eventpoll RB tree */
- struct rb_node rbn;
+ union {
+ /* RB tree node links this structure to the eventpoll RB tree */
+ struct rb_node rbn;
+ /* Used to free the struct epitem */
+ struct rcu_head rcu;
+ };
/* List header used to link this structure to the eventpoll ready list */
struct list_head rdllink;
@@ -580,14 +585,14 @@ static inline void ep_pm_stay_awake_rcu(struct epitem *epi)
* @sproc: Pointer to the scan callback.
* @priv: Private opaque data passed to the @sproc callback.
* @depth: The current depth of recursive f_op->poll calls.
+ * @ep_locked: caller already holds ep->mtx
*
* Returns: The same integer error code returned by the @sproc callback.
*/
static int ep_scan_ready_list(struct eventpoll *ep,
int (*sproc)(struct eventpoll *,
struct list_head *, void *),
- void *priv,
- int depth)
+ void *priv, int depth, bool ep_locked)
{
int error, pwake = 0;
unsigned long flags;
@@ -598,7 +603,9 @@ static int ep_scan_ready_list(struct eventpoll *ep,
* We need to lock this because we could be hit by
* eventpoll_release_file() and epoll_ctl().
*/
- mutex_lock_nested(&ep->mtx, depth);
+
+ if (!ep_locked)
+ mutex_lock_nested(&ep->mtx, depth);
/*
* Steal the ready list, and re-init the original one to the
@@ -662,7 +669,8 @@ static int ep_scan_ready_list(struct eventpoll *ep,
}
spin_unlock_irqrestore(&ep->lock, flags);
- mutex_unlock(&ep->mtx);
+ if (!ep_locked)
+ mutex_unlock(&ep->mtx);
/* We have to call this outside the lock */
if (pwake)
@@ -671,6 +679,12 @@ static int ep_scan_ready_list(struct eventpoll *ep,
return error;
}
+static void epi_rcu_free(struct rcu_head *head)
+{
+ struct epitem *epi = container_of(head, struct epitem, rcu);
+ kmem_cache_free(epi_cache, epi);
+}
+
/*
* Removes a "struct epitem" from the eventpoll RB tree and deallocates
* all the associated resources. Must be called with "mtx" held.
@@ -692,8 +706,7 @@ static int ep_remove(struct eventpoll *ep, struct epitem *epi)
/* Remove the current item from the list of epoll hooks */
spin_lock(&file->f_lock);
- if (ep_is_linked(&epi->fllink))
- list_del_init(&epi->fllink);
+ list_del_rcu(&epi->fllink);
spin_unlock(&file->f_lock);
rb_erase(&epi->rbn, &ep->rbr);
@@ -704,9 +717,14 @@ static int ep_remove(struct eventpoll *ep, struct epitem *epi)
spin_unlock_irqrestore(&ep->lock, flags);
wakeup_source_unregister(ep_wakeup_source(epi));
-
- /* At this point it is safe to free the eventpoll item */
- kmem_cache_free(epi_cache, epi);
+ /*
+ * At this point it is safe to free the eventpoll item. Use the union
+ * field epi->rcu, since we are trying to minimize the size of
+ * 'struct epitem'. The 'rbn' field is no longer in use. Protected by
+ * ep->mtx. The rcu read side, reverse_path_check_proc(), does not make
+ * use of the rbn field.
+ */
+ call_rcu(&epi->rcu, epi_rcu_free);
atomic_long_dec(&ep->user->epoll_watches);
@@ -807,15 +825,34 @@ static int ep_read_events_proc(struct eventpoll *ep, struct list_head *head,
return 0;
}
+static void ep_ptable_queue_proc(struct file *file, wait_queue_head_t *whead,
+ poll_table *pt);
+
+struct readyevents_arg {
+ struct eventpoll *ep;
+ bool locked;
+};
+
static int ep_poll_readyevents_proc(void *priv, void *cookie, int call_nests)
{
- return ep_scan_ready_list(priv, ep_read_events_proc, NULL, call_nests + 1);
+ struct readyevents_arg *arg = priv;
+
+ return ep_scan_ready_list(arg->ep, ep_read_events_proc, NULL,
+ call_nests + 1, arg->locked);
}
static unsigned int ep_eventpoll_poll(struct file *file, poll_table *wait)
{
int pollflags;
struct eventpoll *ep = file->private_data;
+ struct readyevents_arg arg;
+
+ /*
+ * During ep_insert() we already hold the ep->mtx for the tfile.
+ * Prevent re-aquisition.
+ */
+ arg.locked = wait && (wait->_qproc == ep_ptable_queue_proc);
+ arg.ep = ep;
/* Insert inside our poll wait queue */
poll_wait(file, &ep->poll_wait, wait);
@@ -827,7 +864,7 @@ static unsigned int ep_eventpoll_poll(struct file *file, poll_table *wait)
* could re-enter here.
*/
pollflags = ep_call_nested(&poll_readywalk_ncalls, EP_MAX_NESTS,
- ep_poll_readyevents_proc, ep, ep, current);
+ ep_poll_readyevents_proc, &arg, ep, current);
return pollflags != -1 ? pollflags : 0;
}
@@ -872,7 +909,6 @@ static const struct file_operations eventpoll_fops = {
*/
void eventpoll_release_file(struct file *file)
{
- struct list_head *lsthead = &file->f_ep_links;
struct eventpoll *ep;
struct epitem *epi;
@@ -890,17 +926,12 @@ void eventpoll_release_file(struct file *file)
* Besides, ep_remove() acquires the lock, so we can't hold it here.
*/
mutex_lock(&epmutex);
-
- while (!list_empty(lsthead)) {
- epi = list_first_entry(lsthead, struct epitem, fllink);
-
+ list_for_each_entry_rcu(epi, &file->f_ep_links, fllink) {
ep = epi->ep;
- list_del_init(&epi->fllink);
mutex_lock_nested(&ep->mtx, 0);
ep_remove(ep, epi);
mutex_unlock(&ep->mtx);
}
-
mutex_unlock(&epmutex);
}
@@ -1138,7 +1169,9 @@ static int reverse_path_check_proc(void *priv, void *cookie, int call_nests)
struct file *child_file;
struct epitem *epi;
- list_for_each_entry(epi, &file->f_ep_links, fllink) {
+ /* CTL_DEL can remove links here, but that can't increase our count */
+ rcu_read_lock();
+ list_for_each_entry_rcu(epi, &file->f_ep_links, fllink) {
child_file = epi->ep->file;
if (is_file_epoll(child_file)) {
if (list_empty(&child_file->f_ep_links)) {
@@ -1160,6 +1193,7 @@ static int reverse_path_check_proc(void *priv, void *cookie, int call_nests)
"file is not an ep!\n");
}
}
+ rcu_read_unlock();
return error;
}
@@ -1231,7 +1265,7 @@ static noinline void ep_destroy_wakeup_source(struct epitem *epi)
* Must be called with "mtx" held.
*/
static int ep_insert(struct eventpoll *ep, struct epoll_event *event,
- struct file *tfile, int fd)
+ struct file *tfile, int fd, int full_check)
{
int error, revents, pwake = 0;
unsigned long flags;
@@ -1286,7 +1320,7 @@ static int ep_insert(struct eventpoll *ep, struct epoll_event *event,
/* Add the current item to the list of active epoll hook for this file */
spin_lock(&tfile->f_lock);
- list_add_tail(&epi->fllink, &tfile->f_ep_links);
+ list_add_tail_rcu(&epi->fllink, &tfile->f_ep_links);
spin_unlock(&tfile->f_lock);
/*
@@ -1297,7 +1331,7 @@ static int ep_insert(struct eventpoll *ep, struct epoll_event *event,
/* now check if we've created too many backpaths */
error = -EINVAL;
- if (reverse_path_check())
+ if (full_check && reverse_path_check())
goto error_remove_epi;
/* We have to drop the new item inside our item list to keep track of it */
@@ -1327,8 +1361,7 @@ static int ep_insert(struct eventpoll *ep, struct epoll_event *event,
error_remove_epi:
spin_lock(&tfile->f_lock);
- if (ep_is_linked(&epi->fllink))
- list_del_init(&epi->fllink);
+ list_del_rcu(&epi->fllink);
spin_unlock(&tfile->f_lock);
rb_erase(&epi->rbn, &ep->rbr);
@@ -1521,7 +1554,7 @@ static int ep_send_events(struct eventpoll *ep,
esed.maxevents = maxevents;
esed.events = events;
- return ep_scan_ready_list(ep, ep_send_events_proc, &esed, 0);
+ return ep_scan_ready_list(ep, ep_send_events_proc, &esed, 0, false);
}
static inline struct timespec ep_set_mstimeout(long ms)
@@ -1791,11 +1824,12 @@ SYSCALL_DEFINE4(epoll_ctl, int, epfd, int, op, int, fd,
struct epoll_event __user *, event)
{
int error;
- int did_lock_epmutex = 0;
+ int full_check = 0;
struct fd f, tf;
struct eventpoll *ep;
struct epitem *epi;
struct epoll_event epds;
+ struct eventpoll *tep = NULL;
error = -EFAULT;
if (ep_op_has_event(op) &&
@@ -1814,12 +1848,11 @@ SYSCALL_DEFINE4(epoll_ctl, int, epfd, int, op, int, fd,
/* The target file descriptor must support poll */
error = -EPERM;
- if (!tf.file->f_op || !tf.file->f_op->poll)
+ if (!tf.file->f_op->poll)
goto error_tgt_fput;
/* Check if EPOLLWAKEUP is allowed */
- if ((epds.events & EPOLLWAKEUP) && !capable(CAP_BLOCK_SUSPEND))
- epds.events &= ~EPOLLWAKEUP;
+ ep_take_care_of_epollwakeup(&epds);
/*
* We have to check that the file structure underneath the file descriptor
@@ -1844,26 +1877,40 @@ SYSCALL_DEFINE4(epoll_ctl, int, epfd, int, op, int, fd,
* and hang them on the tfile_check_list, so we can check that we
* haven't created too many possible wakeup paths.
*
- * We need to hold the epmutex across both ep_insert and ep_remove
- * b/c we want to make sure we are looking at a coherent view of
- * epoll network.
+ * We do not need to take the global 'epumutex' on EPOLL_CTL_ADD when
+ * the epoll file descriptor is attaching directly to a wakeup source,
+ * unless the epoll file descriptor is nested. The purpose of taking the
+ * 'epmutex' on add is to prevent complex toplogies such as loops and
+ * deep wakeup paths from forming in parallel through multiple
+ * EPOLL_CTL_ADD operations.
*/
- if (op == EPOLL_CTL_ADD || op == EPOLL_CTL_DEL) {
- mutex_lock(&epmutex);
- did_lock_epmutex = 1;
- }
+ mutex_lock_nested(&ep->mtx, 0);
if (op == EPOLL_CTL_ADD) {
- if (is_file_epoll(tf.file)) {
- error = -ELOOP;
- if (ep_loop_check(ep, tf.file) != 0) {
- clear_tfile_check_list();
- goto error_tgt_fput;
+ if (!list_empty(&f.file->f_ep_links) ||
+ is_file_epoll(tf.file)) {
+ full_check = 1;
+ mutex_unlock(&ep->mtx);
+ mutex_lock(&epmutex);
+ if (is_file_epoll(tf.file)) {
+ error = -ELOOP;
+ if (ep_loop_check(ep, tf.file) != 0) {
+ clear_tfile_check_list();
+ goto error_tgt_fput;
+ }
+ } else
+ list_add(&tf.file->f_tfile_llink,
+ &tfile_check_list);
+ mutex_lock_nested(&ep->mtx, 0);
+ if (is_file_epoll(tf.file)) {
+ tep = tf.file->private_data;
+ mutex_lock_nested(&tep->mtx, 1);
}
- } else
- list_add(&tf.file->f_tfile_llink, &tfile_check_list);
+ }
+ }
+ if (op == EPOLL_CTL_DEL && is_file_epoll(tf.file)) {
+ tep = tf.file->private_data;
+ mutex_lock_nested(&tep->mtx, 1);
}
-
- mutex_lock_nested(&ep->mtx, 0);
/*
* Try to lookup the file inside our RB tree, Since we grabbed "mtx"
@@ -1877,10 +1924,11 @@ SYSCALL_DEFINE4(epoll_ctl, int, epfd, int, op, int, fd,
case EPOLL_CTL_ADD:
if (!epi) {
epds.events |= POLLERR | POLLHUP;
- error = ep_insert(ep, &epds, tf.file, fd);
+ error = ep_insert(ep, &epds, tf.file, fd, full_check);
} else
error = -EEXIST;
- clear_tfile_check_list();
+ if (full_check)
+ clear_tfile_check_list();
break;
case EPOLL_CTL_DEL:
if (epi)
@@ -1896,10 +1944,12 @@ SYSCALL_DEFINE4(epoll_ctl, int, epfd, int, op, int, fd,
error = -ENOENT;
break;
}
+ if (tep != NULL)
+ mutex_unlock(&tep->mtx);
mutex_unlock(&ep->mtx);
error_tgt_fput:
- if (did_lock_epmutex)
+ if (full_check)
mutex_unlock(&epmutex);
fdput(tf);
diff --git a/fs/exec.c b/fs/exec.c
index 8875dd10ae7a..7ea097f6b341 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -106,6 +106,7 @@ static inline void put_binfmt(struct linux_binfmt * fmt)
*/
SYSCALL_DEFINE1(uselib, const char __user *, library)
{
+ struct linux_binfmt *fmt;
struct file *file;
struct filename *tmp = getname(library);
int error = PTR_ERR(tmp);
@@ -136,24 +137,21 @@ SYSCALL_DEFINE1(uselib, const char __user *, library)
fsnotify_open(file);
error = -ENOEXEC;
- if(file->f_op) {
- struct linux_binfmt * fmt;
- read_lock(&binfmt_lock);
- list_for_each_entry(fmt, &formats, lh) {
- if (!fmt->load_shlib)
- continue;
- if (!try_module_get(fmt->module))
- continue;
- read_unlock(&binfmt_lock);
- error = fmt->load_shlib(file);
- read_lock(&binfmt_lock);
- put_binfmt(fmt);
- if (error != -ENOEXEC)
- break;
- }
+ read_lock(&binfmt_lock);
+ list_for_each_entry(fmt, &formats, lh) {
+ if (!fmt->load_shlib)
+ continue;
+ if (!try_module_get(fmt->module))
+ continue;
read_unlock(&binfmt_lock);
+ error = fmt->load_shlib(file);
+ read_lock(&binfmt_lock);
+ put_binfmt(fmt);
+ if (error != -ENOEXEC)
+ break;
}
+ read_unlock(&binfmt_lock);
exit:
fput(file);
out:
@@ -1277,13 +1275,10 @@ static int check_unsafe_exec(struct linux_binprm *bprm)
*/
int prepare_binprm(struct linux_binprm *bprm)
{
- umode_t mode;
- struct inode * inode = file_inode(bprm->file);
+ struct inode *inode = file_inode(bprm->file);
+ umode_t mode = inode->i_mode;
int retval;
- mode = inode->i_mode;
- if (bprm->file->f_op == NULL)
- return -EACCES;
/* clear any previous set[ug]id data from a previous binary */
bprm->cred->euid = current_euid();
@@ -1385,10 +1380,6 @@ int search_binary_handler(struct linux_binprm *bprm)
if (retval)
return retval;
- retval = audit_bprm(bprm);
- if (retval)
- return retval;
-
retval = -ENOENT;
retry:
read_lock(&binfmt_lock);
@@ -1436,6 +1427,7 @@ static int exec_binprm(struct linux_binprm *bprm)
ret = search_binary_handler(bprm);
if (ret >= 0) {
+ audit_bprm(bprm);
trace_sched_process_exec(current, old_pid, bprm);
ptrace_event(PTRACE_EVENT_EXEC, old_vpid);
current->did_exec = 1;
@@ -1547,6 +1539,7 @@ static int do_execve_common(const char *filename,
current->fs->in_exec = 0;
current->in_execve = 0;
acct_update_integrals(current);
+ task_numa_free(current);
free_bprm(bprm);
if (displaced)
put_files_struct(displaced);
@@ -1668,6 +1661,12 @@ int __get_dumpable(unsigned long mm_flags)
return (ret > SUID_DUMP_USER) ? SUID_DUMP_ROOT : ret;
}
+/*
+ * This returns the actual value of the suid_dumpable flag. For things
+ * that are using this for checking for privilege transitions, it must
+ * test against SUID_DUMP_USER rather than treating it as a boolean
+ * value.
+ */
int get_dumpable(struct mm_struct *mm)
{
return __get_dumpable(mm->flags);
diff --git a/fs/exportfs/expfs.c b/fs/exportfs/expfs.c
index a235f0016889..48a359dd286e 100644
--- a/fs/exportfs/expfs.c
+++ b/fs/exportfs/expfs.c
@@ -69,145 +69,162 @@ find_acceptable_alias(struct dentry *result,
return NULL;
}
-/*
- * Find root of a disconnected subtree and return a reference to it.
- */
-static struct dentry *
-find_disconnected_root(struct dentry *dentry)
+static bool dentry_connected(struct dentry *dentry)
{
dget(dentry);
- while (!IS_ROOT(dentry)) {
+ while (dentry->d_flags & DCACHE_DISCONNECTED) {
struct dentry *parent = dget_parent(dentry);
- if (!(parent->d_flags & DCACHE_DISCONNECTED)) {
+ dput(dentry);
+ if (IS_ROOT(dentry)) {
dput(parent);
- break;
+ return false;
}
+ dentry = parent;
+ }
+ dput(dentry);
+ return true;
+}
+
+static void clear_disconnected(struct dentry *dentry)
+{
+ dget(dentry);
+ while (dentry->d_flags & DCACHE_DISCONNECTED) {
+ struct dentry *parent = dget_parent(dentry);
+
+ WARN_ON_ONCE(IS_ROOT(dentry));
+
+ spin_lock(&dentry->d_lock);
+ dentry->d_flags &= ~DCACHE_DISCONNECTED;
+ spin_unlock(&dentry->d_lock);
dput(dentry);
dentry = parent;
}
- return dentry;
+ dput(dentry);
+}
+
+/*
+ * Reconnect a directory dentry with its parent.
+ *
+ * This can return a dentry, or NULL, or an error.
+ *
+ * In the first case the returned dentry is the parent of the given
+ * dentry, and may itself need to be reconnected to its parent.
+ *
+ * In the NULL case, a concurrent VFS operation has either renamed or
+ * removed this directory. The concurrent operation has reconnected our
+ * dentry, so we no longer need to.
+ */
+static struct dentry *reconnect_one(struct vfsmount *mnt,
+ struct dentry *dentry, char *nbuf)
+{
+ struct dentry *parent;
+ struct dentry *tmp;
+ int err;
+
+ parent = ERR_PTR(-EACCES);
+ mutex_lock(&dentry->d_inode->i_mutex);
+ if (mnt->mnt_sb->s_export_op->get_parent)
+ parent = mnt->mnt_sb->s_export_op->get_parent(dentry);
+ mutex_unlock(&dentry->d_inode->i_mutex);
+
+ if (IS_ERR(parent)) {
+ dprintk("%s: get_parent of %ld failed, err %d\n",
+ __func__, dentry->d_inode->i_ino, PTR_ERR(parent));
+ return parent;
+ }
+
+ dprintk("%s: find name of %lu in %lu\n", __func__,
+ dentry->d_inode->i_ino, parent->d_inode->i_ino);
+ err = exportfs_get_name(mnt, parent, nbuf, dentry);
+ if (err == -ENOENT)
+ goto out_reconnected;
+ if (err)
+ goto out_err;
+ dprintk("%s: found name: %s\n", __func__, nbuf);
+ mutex_lock(&parent->d_inode->i_mutex);
+ tmp = lookup_one_len(nbuf, parent, strlen(nbuf));
+ mutex_unlock(&parent->d_inode->i_mutex);
+ if (IS_ERR(tmp)) {
+ dprintk("%s: lookup failed: %d\n", __func__, PTR_ERR(tmp));
+ goto out_err;
+ }
+ if (tmp != dentry) {
+ dput(tmp);
+ goto out_reconnected;
+ }
+ dput(tmp);
+ if (IS_ROOT(dentry)) {
+ err = -ESTALE;
+ goto out_err;
+ }
+ return parent;
+
+out_err:
+ dput(parent);
+ return ERR_PTR(err);
+out_reconnected:
+ dput(parent);
+ /*
+ * Someone must have renamed our entry into another parent, in
+ * which case it has been reconnected by the rename.
+ *
+ * Or someone removed it entirely, in which case filehandle
+ * lookup will succeed but the directory is now IS_DEAD and
+ * subsequent operations on it will fail.
+ *
+ * Alternatively, maybe there was no race at all, and the
+ * filesystem is just corrupt and gave us a parent that doesn't
+ * actually contain any entry pointing to this inode. So,
+ * double check that this worked and return -ESTALE if not:
+ */
+ if (!dentry_connected(dentry))
+ return ERR_PTR(-ESTALE);
+ return NULL;
}
/*
* Make sure target_dir is fully connected to the dentry tree.
*
- * It may already be, as the flag isn't always updated when connection happens.
+ * On successful return, DCACHE_DISCONNECTED will be cleared on
+ * target_dir, and target_dir->d_parent->...->d_parent will reach the
+ * root of the filesystem.
+ *
+ * Whenever DCACHE_DISCONNECTED is unset, target_dir is fully connected.
+ * But the converse is not true: target_dir may have DCACHE_DISCONNECTED
+ * set but already be connected. In that case we'll verify the
+ * connection to root and then clear the flag.
+ *
+ * Note that target_dir could be removed by a concurrent operation. In
+ * that case reconnect_path may still succeed with target_dir fully
+ * connected, but further operations using the filehandle will fail when
+ * necessary (due to S_DEAD being set on the directory).
*/
static int
reconnect_path(struct vfsmount *mnt, struct dentry *target_dir, char *nbuf)
{
- int noprogress = 0;
- int err = -ESTALE;
+ struct dentry *dentry, *parent;
- /*
- * It is possible that a confused file system might not let us complete
- * the path to the root. For example, if get_parent returns a directory
- * in which we cannot find a name for the child. While this implies a
- * very sick filesystem we don't want it to cause knfsd to spin. Hence
- * the noprogress counter. If we go through the loop 10 times (2 is
- * probably enough) without getting anywhere, we just give up
- */
- while (target_dir->d_flags & DCACHE_DISCONNECTED && noprogress++ < 10) {
- struct dentry *pd = find_disconnected_root(target_dir);
-
- if (!IS_ROOT(pd)) {
- /* must have found a connected parent - great */
- spin_lock(&pd->d_lock);
- pd->d_flags &= ~DCACHE_DISCONNECTED;
- spin_unlock(&pd->d_lock);
- noprogress = 0;
- } else if (pd == mnt->mnt_sb->s_root) {
- printk(KERN_ERR "export: Eeek filesystem root is not connected, impossible\n");
- spin_lock(&pd->d_lock);
- pd->d_flags &= ~DCACHE_DISCONNECTED;
- spin_unlock(&pd->d_lock);
- noprogress = 0;
- } else {
- /*
- * We have hit the top of a disconnected path, try to
- * find parent and connect.
- *
- * Racing with some other process renaming a directory
- * isn't much of a problem here. If someone renames
- * the directory, it will end up properly connected,
- * which is what we want
- *
- * Getting the parent can't be supported generically,
- * the locking is too icky.
- *
- * Instead we just return EACCES. If server reboots
- * or inodes get flushed, you lose
- */
- struct dentry *ppd = ERR_PTR(-EACCES);
- struct dentry *npd;
-
- mutex_lock(&pd->d_inode->i_mutex);
- if (mnt->mnt_sb->s_export_op->get_parent)
- ppd = mnt->mnt_sb->s_export_op->get_parent(pd);
- mutex_unlock(&pd->d_inode->i_mutex);
-
- if (IS_ERR(ppd)) {
- err = PTR_ERR(ppd);
- dprintk("%s: get_parent of %ld failed, err %d\n",
- __func__, pd->d_inode->i_ino, err);
- dput(pd);
- break;
- }
+ dentry = dget(target_dir);
- dprintk("%s: find name of %lu in %lu\n", __func__,
- pd->d_inode->i_ino, ppd->d_inode->i_ino);
- err = exportfs_get_name(mnt, ppd, nbuf, pd);
- if (err) {
- dput(ppd);
- dput(pd);
- if (err == -ENOENT)
- /* some race between get_parent and
- * get_name? just try again
- */
- continue;
- break;
- }
- dprintk("%s: found name: %s\n", __func__, nbuf);
- mutex_lock(&ppd->d_inode->i_mutex);
- npd = lookup_one_len(nbuf, ppd, strlen(nbuf));
- mutex_unlock(&ppd->d_inode->i_mutex);
- if (IS_ERR(npd)) {
- err = PTR_ERR(npd);
- dprintk("%s: lookup failed: %d\n",
- __func__, err);
- dput(ppd);
- dput(pd);
- break;
- }
- /* we didn't really want npd, we really wanted
- * a side-effect of the lookup.
- * hopefully, npd == pd, though it isn't really
- * a problem if it isn't
- */
- if (npd == pd)
- noprogress = 0;
- else
- printk("%s: npd != pd\n", __func__);
- dput(npd);
- dput(ppd);
- if (IS_ROOT(pd)) {
- /* something went wrong, we have to give up */
- dput(pd);
- break;
- }
- }
- dput(pd);
- }
+ while (dentry->d_flags & DCACHE_DISCONNECTED) {
+ BUG_ON(dentry == mnt->mnt_sb->s_root);
- if (target_dir->d_flags & DCACHE_DISCONNECTED) {
- /* something went wrong - oh-well */
- if (!err)
- err = -ESTALE;
- return err;
- }
+ if (IS_ROOT(dentry))
+ parent = reconnect_one(mnt, dentry, nbuf);
+ else
+ parent = dget_parent(dentry);
+ if (!parent)
+ break;
+ dput(dentry);
+ if (IS_ERR(parent))
+ return PTR_ERR(parent);
+ dentry = parent;
+ }
+ dput(dentry);
+ clear_disconnected(target_dir);
return 0;
}
@@ -215,7 +232,7 @@ struct getdents_callback {
struct dir_context ctx;
char *name; /* name that was found. It already points to a
buffer NAME_MAX+1 is size */
- unsigned long ino; /* the inum we are looking for */
+ u64 ino; /* the inum we are looking for */
int found; /* inode matched? */
int sequence; /* sequence counter */
};
@@ -255,10 +272,14 @@ static int get_name(const struct path *path, char *name, struct dentry *child)
struct inode *dir = path->dentry->d_inode;
int error;
struct file *file;
+ struct kstat stat;
+ struct path child_path = {
+ .mnt = path->mnt,
+ .dentry = child,
+ };
struct getdents_callback buffer = {
.ctx.actor = filldir_one,
.name = name,
- .ino = child->d_inode->i_ino
};
error = -ENOTDIR;
@@ -268,6 +289,16 @@ static int get_name(const struct path *path, char *name, struct dentry *child)
if (!dir->i_fop)
goto out;
/*
+ * inode->i_ino is unsigned long, kstat->ino is u64, so the
+ * former would be insufficient on 32-bit hosts when the
+ * filesystem supports 64-bit inode numbers. So we need to
+ * actually call ->getattr, not just read i_ino:
+ */
+ error = vfs_getattr_nosec(&child_path, &stat);
+ if (error)
+ return error;
+ buffer.ino = stat.ino;
+ /*
* Open the directory ...
*/
file = dentry_open(path, O_RDONLY, cred);
diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c
index c260de6d7b6d..8a337640a46a 100644
--- a/fs/ext2/inode.c
+++ b/fs/ext2/inode.c
@@ -632,6 +632,8 @@ static int ext2_get_blocks(struct inode *inode,
int count = 0;
ext2_fsblk_t first_block = 0;
+ BUG_ON(maxblocks == 0);
+
depth = ext2_block_to_path(inode,iblock,offsets,&blocks_to_boundary);
if (depth == 0)
diff --git a/fs/ext2/xip.c b/fs/ext2/xip.c
index 1c3312858fcf..e98171a11cfe 100644
--- a/fs/ext2/xip.c
+++ b/fs/ext2/xip.c
@@ -35,6 +35,7 @@ __ext2_get_block(struct inode *inode, pgoff_t pgoff, int create,
int rc;
memset(&tmp, 0, sizeof(struct buffer_head));
+ tmp.b_size = 1 << inode->i_blkbits;
rc = ext2_get_block(inode, pgoff, &tmp, create);
*result = tmp.b_blocknr;
diff --git a/fs/ext3/super.c b/fs/ext3/super.c
index c50c76190373..37fd31ed16e7 100644
--- a/fs/ext3/super.c
+++ b/fs/ext3/super.c
@@ -2825,6 +2825,10 @@ static int ext3_statfs (struct dentry * dentry, struct kstatfs * buf)
* bitmap, and an inode table.
*/
overhead += ngroups * (2 + sbi->s_itb_per_group);
+
+ /* Add the journal blocks as well */
+ overhead += sbi->s_journal->j_maxlen;
+
sbi->s_overhead_last = overhead;
smp_wmb();
sbi->s_blocks_last = le32_to_cpu(es->s_blocks_count);
diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c
index dc5d572ebd6a..6ea7b1436bbc 100644
--- a/fs/ext4/balloc.c
+++ b/fs/ext4/balloc.c
@@ -640,6 +640,7 @@ ext4_fsblk_t ext4_count_free_clusters(struct super_block *sb)
struct ext4_group_desc *gdp;
ext4_group_t i;
ext4_group_t ngroups = ext4_get_groups_count(sb);
+ struct ext4_group_info *grp;
#ifdef EXT4FS_DEBUG
struct ext4_super_block *es;
ext4_fsblk_t bitmap_count;
@@ -655,7 +656,11 @@ ext4_fsblk_t ext4_count_free_clusters(struct super_block *sb)
gdp = ext4_get_group_desc(sb, i, NULL);
if (!gdp)
continue;
- desc_count += ext4_free_group_clusters(sb, gdp);
+ grp = NULL;
+ if (EXT4_SB(sb)->s_group_info)
+ grp = ext4_get_group_info(sb, i);
+ if (!grp || !EXT4_MB_GRP_BBITMAP_CORRUPT(grp))
+ desc_count += ext4_free_group_clusters(sb, gdp);
brelse(bitmap_bh);
bitmap_bh = ext4_read_block_bitmap(sb, i);
if (bitmap_bh == NULL)
@@ -679,7 +684,11 @@ ext4_fsblk_t ext4_count_free_clusters(struct super_block *sb)
gdp = ext4_get_group_desc(sb, i, NULL);
if (!gdp)
continue;
- desc_count += ext4_free_group_clusters(sb, gdp);
+ grp = NULL;
+ if (EXT4_SB(sb)->s_group_info)
+ grp = ext4_get_group_info(sb, i);
+ if (!grp || !EXT4_MB_GRP_BBITMAP_CORRUPT(grp))
+ desc_count += ext4_free_group_clusters(sb, gdp);
}
return desc_count;
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index af815ea9d7cc..e6185031c1cc 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -29,6 +29,7 @@
#include <linux/wait.h>
#include <linux/blockgroup_lock.h>
#include <linux/percpu_counter.h>
+#include <linux/ratelimit.h>
#include <crypto/hash.h>
#ifdef __KERNEL__
#include <linux/compat.h>
@@ -1314,6 +1315,11 @@ struct ext4_sb_info {
unsigned long s_es_last_sorted;
struct percpu_counter s_extent_cache_cnt;
spinlock_t s_es_lru_lock ____cacheline_aligned_in_smp;
+
+ /* Ratelimit ext4 messages. */
+ struct ratelimit_state s_err_ratelimit_state;
+ struct ratelimit_state s_warning_ratelimit_state;
+ struct ratelimit_state s_msg_ratelimit_state;
};
static inline struct ext4_sb_info *EXT4_SB(struct super_block *sb)
@@ -1396,7 +1402,18 @@ static inline void ext4_clear_inode_##name(struct inode *inode, int bit) \
clear_bit(bit + (offset), &EXT4_I(inode)->i_##field); \
}
+/* Add these declarations here only so that these functions can be
+ * found by name. Otherwise, they are very hard to locate. */
+static inline int ext4_test_inode_flag(struct inode *inode, int bit);
+static inline void ext4_set_inode_flag(struct inode *inode, int bit);
+static inline void ext4_clear_inode_flag(struct inode *inode, int bit);
EXT4_INODE_BIT_FNS(flag, flags, 0)
+
+/* Add these declarations here only so that these functions can be
+ * found by name. Otherwise, they are very hard to locate. */
+static inline int ext4_test_inode_state(struct inode *inode, int bit);
+static inline void ext4_set_inode_state(struct inode *inode, int bit);
+static inline void ext4_clear_inode_state(struct inode *inode, int bit);
#if (BITS_PER_LONG < 64)
EXT4_INODE_BIT_FNS(state, state_flags, 0)
@@ -2734,8 +2751,6 @@ extern void ext4_double_down_write_data_sem(struct inode *first,
struct inode *second);
extern void ext4_double_up_write_data_sem(struct inode *orig_inode,
struct inode *donor_inode);
-void ext4_inode_double_lock(struct inode *inode1, struct inode *inode2);
-void ext4_inode_double_unlock(struct inode *inode1, struct inode *inode2);
extern int ext4_move_extents(struct file *o_filp, struct file *d_filp,
__u64 start_orig, __u64 start_donor,
__u64 len, __u64 *moved_len);
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index 54d52afcdb19..35f65cf4f318 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -1666,7 +1666,7 @@ int
ext4_can_extents_be_merged(struct inode *inode, struct ext4_extent *ex1,
struct ext4_extent *ex2)
{
- unsigned short ext1_ee_len, ext2_ee_len, max_len;
+ unsigned short ext1_ee_len, ext2_ee_len;
/*
* Make sure that both extents are initialized. We don't merge
@@ -1677,11 +1677,6 @@ ext4_can_extents_be_merged(struct inode *inode, struct ext4_extent *ex1,
if (ext4_ext_is_uninitialized(ex1) || ext4_ext_is_uninitialized(ex2))
return 0;
- if (ext4_ext_is_uninitialized(ex1))
- max_len = EXT_UNINIT_MAX_LEN;
- else
- max_len = EXT_INIT_MAX_LEN;
-
ext1_ee_len = ext4_ext_get_actual_len(ex1);
ext2_ee_len = ext4_ext_get_actual_len(ex2);
@@ -1694,7 +1689,7 @@ ext4_can_extents_be_merged(struct inode *inode, struct ext4_extent *ex1,
* as an RO_COMPAT feature, refuse to merge to extents if
* this can result in the top bit of ee_len being set.
*/
- if (ext1_ee_len + ext2_ee_len > max_len)
+ if (ext1_ee_len + ext2_ee_len > EXT_INIT_MAX_LEN)
return 0;
#ifdef AGGRESSIVE_TEST
if (ext1_ee_len >= 4)
@@ -1720,7 +1715,6 @@ static int ext4_ext_try_to_merge_right(struct inode *inode,
struct ext4_extent_header *eh;
unsigned int depth, len;
int merge_done = 0;
- int uninitialized = 0;
depth = ext_depth(inode);
BUG_ON(path[depth].p_hdr == NULL);
@@ -1730,12 +1724,8 @@ static int ext4_ext_try_to_merge_right(struct inode *inode,
if (!ext4_can_extents_be_merged(inode, ex, ex + 1))
break;
/* merge with next extent! */
- if (ext4_ext_is_uninitialized(ex))
- uninitialized = 1;
ex->ee_len = cpu_to_le16(ext4_ext_get_actual_len(ex)
+ ext4_ext_get_actual_len(ex + 1));
- if (uninitialized)
- ext4_ext_mark_uninitialized(ex);
if (ex + 1 < EXT_LAST_EXTENT(eh)) {
len = (EXT_LAST_EXTENT(eh) - ex - 1)
@@ -1890,7 +1880,6 @@ int ext4_ext_insert_extent(handle_t *handle, struct inode *inode,
struct ext4_ext_path *npath = NULL;
int depth, len, err;
ext4_lblk_t next;
- unsigned uninitialized = 0;
int mb_flags = 0;
if (unlikely(ext4_ext_get_actual_len(newext) == 0)) {
@@ -1942,18 +1931,8 @@ int ext4_ext_insert_extent(handle_t *handle, struct inode *inode,
if (err)
return err;
- /*
- * ext4_can_extents_be_merged should have checked
- * that either both extents are uninitialized, or
- * both aren't. Thus we need to check only one of
- * them here.
- */
- if (ext4_ext_is_uninitialized(ex))
- uninitialized = 1;
ex->ee_len = cpu_to_le16(ext4_ext_get_actual_len(ex)
+ ext4_ext_get_actual_len(newext));
- if (uninitialized)
- ext4_ext_mark_uninitialized(ex);
eh = path[depth].p_hdr;
nearex = ex;
goto merge;
@@ -1976,20 +1955,10 @@ prepend:
if (err)
return err;
- /*
- * ext4_can_extents_be_merged should have checked
- * that either both extents are uninitialized, or
- * both aren't. Thus we need to check only one of
- * them here.
- */
- if (ext4_ext_is_uninitialized(ex))
- uninitialized = 1;
ex->ee_block = newext->ee_block;
ext4_ext_store_pblock(ex, ext4_ext_pblock(newext));
ex->ee_len = cpu_to_le16(ext4_ext_get_actual_len(ex)
+ ext4_ext_get_actual_len(newext));
- if (uninitialized)
- ext4_ext_mark_uninitialized(ex);
eh = path[depth].p_hdr;
nearex = ex;
goto merge;
diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c
index 137193ff389b..0ee59a6644e2 100644
--- a/fs/ext4/ialloc.c
+++ b/fs/ext4/ialloc.c
@@ -432,7 +432,7 @@ static int find_group_orlov(struct super_block *sb, struct inode *parent,
ext4fs_dirhash(qstr->name, qstr->len, &hinfo);
grp = hinfo.hash;
} else
- get_random_bytes(&grp, sizeof(grp));
+ grp = prandom_u32();
parent_group = (unsigned)grp % ngroups;
for (i = 0; i < ngroups; i++) {
g = (parent_group + i) % ngroups;
diff --git a/fs/ext4/inline.c b/fs/ext4/inline.c
index d9ecbf1113a7..bae987549dc3 100644
--- a/fs/ext4/inline.c
+++ b/fs/ext4/inline.c
@@ -994,11 +994,9 @@ static int ext4_add_dirent_to_inline(handle_t *handle,
struct inode *dir = dentry->d_parent->d_inode;
const char *name = dentry->d_name.name;
int namelen = dentry->d_name.len;
- unsigned short reclen;
int err;
struct ext4_dir_entry_2 *de;
- reclen = EXT4_DIR_REC_LEN(namelen);
err = ext4_find_dest_de(dir, inode, iloc->bh,
inline_start, inline_size,
name, namelen, &de);
@@ -1442,6 +1440,7 @@ int ext4_read_inline_dir(struct file *file,
if (ret < 0)
goto out;
+ ret = 0;
sb = inode->i_sb;
parent_ino = le32_to_cpu(((struct ext4_dir_entry_2 *)dir_buf)->inode);
offset = ctx->pos;
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index e274e9c1171f..075763474118 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -2178,6 +2178,9 @@ static int mpage_map_one_extent(handle_t *handle, struct mpage_da_data *mpd)
*
* @handle - handle for journal operations
* @mpd - extent to map
+ * @give_up_on_write - we set this to true iff there is a fatal error and there
+ * is no hope of writing the data. The caller should discard
+ * dirty pages to avoid infinite loops.
*
* The function maps extent starting at mpd->lblk of length mpd->len. If it is
* delayed, blocks are allocated, if it is unwritten, we may need to convert
@@ -2295,6 +2298,7 @@ static int mpage_prepare_extent_to_map(struct mpage_da_data *mpd)
struct address_space *mapping = mpd->inode->i_mapping;
struct pagevec pvec;
unsigned int nr_pages;
+ long left = mpd->wbc->nr_to_write;
pgoff_t index = mpd->first_page;
pgoff_t end = mpd->last_page;
int tag;
@@ -2330,6 +2334,17 @@ static int mpage_prepare_extent_to_map(struct mpage_da_data *mpd)
if (page->index > end)
goto out;
+ /*
+ * Accumulated enough dirty pages? This doesn't apply
+ * to WB_SYNC_ALL mode. For integrity sync we have to
+ * keep going because someone may be concurrently
+ * dirtying pages, and we might have synced a lot of
+ * newly appeared dirty pages, but have not synced all
+ * of the old dirty pages.
+ */
+ if (mpd->wbc->sync_mode == WB_SYNC_NONE && left <= 0)
+ goto out;
+
/* If we can't merge this page, we are done. */
if (mpd->map.m_len > 0 && mpd->next_page != page->index)
goto out;
@@ -2364,19 +2379,7 @@ static int mpage_prepare_extent_to_map(struct mpage_da_data *mpd)
if (err <= 0)
goto out;
err = 0;
-
- /*
- * Accumulated enough dirty pages? This doesn't apply
- * to WB_SYNC_ALL mode. For integrity sync we have to
- * keep going because someone may be concurrently
- * dirtying pages, and we might have synced a lot of
- * newly appeared dirty pages, but have not synced all
- * of the old dirty pages.
- */
- if (mpd->wbc->sync_mode == WB_SYNC_NONE &&
- mpd->next_page - mpd->first_page >=
- mpd->wbc->nr_to_write)
- goto out;
+ left--;
}
pagevec_release(&pvec);
cond_resched();
@@ -2420,16 +2423,15 @@ static int ext4_writepages(struct address_space *mapping,
* because that could violate lock ordering on umount
*/
if (!mapping->nrpages || !mapping_tagged(mapping, PAGECACHE_TAG_DIRTY))
- return 0;
+ goto out_writepages;
if (ext4_should_journal_data(inode)) {
struct blk_plug plug;
- int ret;
blk_start_plug(&plug);
ret = write_cache_pages(mapping, wbc, __writepage, mapping);
blk_finish_plug(&plug);
- return ret;
+ goto out_writepages;
}
/*
@@ -2442,8 +2444,10 @@ static int ext4_writepages(struct address_space *mapping,
* *never* be called, so if that ever happens, we would want
* the stack trace.
*/
- if (unlikely(sbi->s_mount_flags & EXT4_MF_FS_ABORTED))
- return -EROFS;
+ if (unlikely(sbi->s_mount_flags & EXT4_MF_FS_ABORTED)) {
+ ret = -EROFS;
+ goto out_writepages;
+ }
if (ext4_should_dioread_nolock(inode)) {
/*
@@ -4690,6 +4694,15 @@ int ext4_getattr(struct vfsmount *mnt, struct dentry *dentry,
generic_fillattr(inode, stat);
/*
+ * If there is inline data in the inode, the inode will normally not
+ * have data blocks allocated (it may have an external xattr block).
+ * Report at least one sector for such files, so tools like tar, rsync,
+ * others doen't incorrectly think the file is completely sparse.
+ */
+ if (unlikely(ext4_has_inline_data(inode)))
+ stat->blocks += (stat->size + 511) >> 9;
+
+ /*
* We can't update i_blocks if the block allocation is delayed
* otherwise in the case of system crash before the real block
* allocation is done, we will have i_blocks inconsistent with
@@ -4700,9 +4713,8 @@ int ext4_getattr(struct vfsmount *mnt, struct dentry *dentry,
* blocks for this file.
*/
delalloc_blocks = EXT4_C2B(EXT4_SB(inode->i_sb),
- EXT4_I(inode)->i_reserved_data_blocks);
-
- stat->blocks += delalloc_blocks << (inode->i_sb->s_blocksize_bits-9);
+ EXT4_I(inode)->i_reserved_data_blocks);
+ stat->blocks += delalloc_blocks << (inode->i_sb->s_blocksize_bits - 9);
return 0;
}
diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c
index a569d335f804..60589b60e9b0 100644
--- a/fs/ext4/ioctl.c
+++ b/fs/ext4/ioctl.c
@@ -130,7 +130,7 @@ static long swap_inode_boot_loader(struct super_block *sb,
/* Protect orig inodes against a truncate and make sure,
* that only 1 swap_inode_boot_loader is running. */
- ext4_inode_double_lock(inode, inode_bl);
+ lock_two_nondirectories(inode, inode_bl);
truncate_inode_pages(&inode->i_data, 0);
truncate_inode_pages(&inode_bl->i_data, 0);
@@ -205,7 +205,7 @@ static long swap_inode_boot_loader(struct super_block *sb,
ext4_inode_resume_unlocked_dio(inode);
ext4_inode_resume_unlocked_dio(inode_bl);
- ext4_inode_double_unlock(inode, inode_bl);
+ unlock_two_nondirectories(inode, inode_bl);
iput(inode_bl);
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index a41e3ba8cfaa..4d113efa024c 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -4794,8 +4794,8 @@ do_more:
" group:%d block:%d count:%lu failed"
" with %d", block_group, bit, count,
err);
- }
-
+ } else
+ EXT4_MB_GRP_CLEAR_TRIMMED(e4b.bd_info);
ext4_lock_group(sb, block_group);
mb_clear_bits(bitmap_bh->b_data, bit, count_clusters);
diff --git a/fs/ext4/mmp.c b/fs/ext4/mmp.c
index 214461e42a05..04434ad3e8e0 100644
--- a/fs/ext4/mmp.c
+++ b/fs/ext4/mmp.c
@@ -259,7 +259,7 @@ static unsigned int mmp_new_seq(void)
u32 new_seq;
do {
- get_random_bytes(&new_seq, sizeof(u32));
+ new_seq = prandom_u32();
} while (new_seq > EXT4_MMP_SEQ_MAX);
return new_seq;
diff --git a/fs/ext4/move_extent.c b/fs/ext4/move_extent.c
index 7fa4d855dbd5..773b503bd18c 100644
--- a/fs/ext4/move_extent.c
+++ b/fs/ext4/move_extent.c
@@ -1203,42 +1203,6 @@ mext_check_arguments(struct inode *orig_inode,
}
/**
- * ext4_inode_double_lock - Lock i_mutex on both @inode1 and @inode2
- *
- * @inode1: the inode structure
- * @inode2: the inode structure
- *
- * Lock two inodes' i_mutex
- */
-void
-ext4_inode_double_lock(struct inode *inode1, struct inode *inode2)
-{
- BUG_ON(inode1 == inode2);
- if (inode1 < inode2) {
- mutex_lock_nested(&inode1->i_mutex, I_MUTEX_PARENT);
- mutex_lock_nested(&inode2->i_mutex, I_MUTEX_CHILD);
- } else {
- mutex_lock_nested(&inode2->i_mutex, I_MUTEX_PARENT);
- mutex_lock_nested(&inode1->i_mutex, I_MUTEX_CHILD);
- }
-}
-
-/**
- * ext4_inode_double_unlock - Release i_mutex on both @inode1 and @inode2
- *
- * @inode1: the inode that is released first
- * @inode2: the inode that is released second
- *
- */
-
-void
-ext4_inode_double_unlock(struct inode *inode1, struct inode *inode2)
-{
- mutex_unlock(&inode1->i_mutex);
- mutex_unlock(&inode2->i_mutex);
-}
-
-/**
* ext4_move_extents - Exchange the specified range of a file
*
* @o_filp: file structure of the original file
@@ -1327,7 +1291,7 @@ ext4_move_extents(struct file *o_filp, struct file *d_filp,
return -EINVAL;
}
/* Protect orig and donor inodes against a truncate */
- ext4_inode_double_lock(orig_inode, donor_inode);
+ lock_two_nondirectories(orig_inode, donor_inode);
/* Wait for all existing dio workers */
ext4_inode_block_unlocked_dio(orig_inode);
@@ -1535,7 +1499,7 @@ out:
ext4_double_up_write_data_sem(orig_inode, donor_inode);
ext4_inode_resume_unlocked_dio(orig_inode);
ext4_inode_resume_unlocked_dio(donor_inode);
- ext4_inode_double_unlock(orig_inode, donor_inode);
+ unlock_two_nondirectories(orig_inode, donor_inode);
return ret;
}
diff --git a/fs/ext4/page-io.c b/fs/ext4/page-io.c
index d7d0c7b46ed4..d488f80ee32d 100644
--- a/fs/ext4/page-io.c
+++ b/fs/ext4/page-io.c
@@ -197,14 +197,15 @@ static void dump_completed_IO(struct inode *inode, struct list_head *head)
static void ext4_add_complete_io(ext4_io_end_t *io_end)
{
struct ext4_inode_info *ei = EXT4_I(io_end->inode);
+ struct ext4_sb_info *sbi = EXT4_SB(io_end->inode->i_sb);
struct workqueue_struct *wq;
unsigned long flags;
/* Only reserved conversions from writeback should enter here */
WARN_ON(!(io_end->flag & EXT4_IO_END_UNWRITTEN));
- WARN_ON(!io_end->handle);
+ WARN_ON(!io_end->handle && sbi->s_journal);
spin_lock_irqsave(&ei->i_completed_io_lock, flags);
- wq = EXT4_SB(io_end->inode->i_sb)->rsv_conversion_wq;
+ wq = sbi->rsv_conversion_wq;
if (list_empty(&ei->i_rsv_conversion_list))
queue_work(wq, &ei->i_rsv_conversion_work);
list_add_tail(&io_end->list, &ei->i_rsv_conversion_list);
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 2c2e6cbc6bed..c977f4e4e63b 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -411,20 +411,26 @@ static void ext4_handle_error(struct super_block *sb)
sb->s_id);
}
+#define ext4_error_ratelimit(sb) \
+ ___ratelimit(&(EXT4_SB(sb)->s_err_ratelimit_state), \
+ "EXT4-fs error")
+
void __ext4_error(struct super_block *sb, const char *function,
unsigned int line, const char *fmt, ...)
{
struct va_format vaf;
va_list args;
- va_start(args, fmt);
- vaf.fmt = fmt;
- vaf.va = &args;
- printk(KERN_CRIT "EXT4-fs error (device %s): %s:%d: comm %s: %pV\n",
- sb->s_id, function, line, current->comm, &vaf);
- va_end(args);
+ if (ext4_error_ratelimit(sb)) {
+ va_start(args, fmt);
+ vaf.fmt = fmt;
+ vaf.va = &args;
+ printk(KERN_CRIT
+ "EXT4-fs error (device %s): %s:%d: comm %s: %pV\n",
+ sb->s_id, function, line, current->comm, &vaf);
+ va_end(args);
+ }
save_error_info(sb, function, line);
-
ext4_handle_error(sb);
}
@@ -438,22 +444,23 @@ void __ext4_error_inode(struct inode *inode, const char *function,
es->s_last_error_ino = cpu_to_le32(inode->i_ino);
es->s_last_error_block = cpu_to_le64(block);
+ if (ext4_error_ratelimit(inode->i_sb)) {
+ va_start(args, fmt);
+ vaf.fmt = fmt;
+ vaf.va = &args;
+ if (block)
+ printk(KERN_CRIT "EXT4-fs error (device %s): %s:%d: "
+ "inode #%lu: block %llu: comm %s: %pV\n",
+ inode->i_sb->s_id, function, line, inode->i_ino,
+ block, current->comm, &vaf);
+ else
+ printk(KERN_CRIT "EXT4-fs error (device %s): %s:%d: "
+ "inode #%lu: comm %s: %pV\n",
+ inode->i_sb->s_id, function, line, inode->i_ino,
+ current->comm, &vaf);
+ va_end(args);
+ }
save_error_info(inode->i_sb, function, line);
- va_start(args, fmt);
- vaf.fmt = fmt;
- vaf.va = &args;
- if (block)
- printk(KERN_CRIT "EXT4-fs error (device %s): %s:%d: "
- "inode #%lu: block %llu: comm %s: %pV\n",
- inode->i_sb->s_id, function, line, inode->i_ino,
- block, current->comm, &vaf);
- else
- printk(KERN_CRIT "EXT4-fs error (device %s): %s:%d: "
- "inode #%lu: comm %s: %pV\n",
- inode->i_sb->s_id, function, line, inode->i_ino,
- current->comm, &vaf);
- va_end(args);
-
ext4_handle_error(inode->i_sb);
}
@@ -469,27 +476,28 @@ void __ext4_error_file(struct file *file, const char *function,
es = EXT4_SB(inode->i_sb)->s_es;
es->s_last_error_ino = cpu_to_le32(inode->i_ino);
+ if (ext4_error_ratelimit(inode->i_sb)) {
+ path = d_path(&(file->f_path), pathname, sizeof(pathname));
+ if (IS_ERR(path))
+ path = "(unknown)";
+ va_start(args, fmt);
+ vaf.fmt = fmt;
+ vaf.va = &args;
+ if (block)
+ printk(KERN_CRIT
+ "EXT4-fs error (device %s): %s:%d: inode #%lu: "
+ "block %llu: comm %s: path %s: %pV\n",
+ inode->i_sb->s_id, function, line, inode->i_ino,
+ block, current->comm, path, &vaf);
+ else
+ printk(KERN_CRIT
+ "EXT4-fs error (device %s): %s:%d: inode #%lu: "
+ "comm %s: path %s: %pV\n",
+ inode->i_sb->s_id, function, line, inode->i_ino,
+ current->comm, path, &vaf);
+ va_end(args);
+ }
save_error_info(inode->i_sb, function, line);
- path = d_path(&(file->f_path), pathname, sizeof(pathname));
- if (IS_ERR(path))
- path = "(unknown)";
- va_start(args, fmt);
- vaf.fmt = fmt;
- vaf.va = &args;
- if (block)
- printk(KERN_CRIT
- "EXT4-fs error (device %s): %s:%d: inode #%lu: "
- "block %llu: comm %s: path %s: %pV\n",
- inode->i_sb->s_id, function, line, inode->i_ino,
- block, current->comm, path, &vaf);
- else
- printk(KERN_CRIT
- "EXT4-fs error (device %s): %s:%d: inode #%lu: "
- "comm %s: path %s: %pV\n",
- inode->i_sb->s_id, function, line, inode->i_ino,
- current->comm, path, &vaf);
- va_end(args);
-
ext4_handle_error(inode->i_sb);
}
@@ -543,11 +551,13 @@ void __ext4_std_error(struct super_block *sb, const char *function,
(sb->s_flags & MS_RDONLY))
return;
- errstr = ext4_decode_error(sb, errno, nbuf);
- printk(KERN_CRIT "EXT4-fs error (device %s) in %s:%d: %s\n",
- sb->s_id, function, line, errstr);
- save_error_info(sb, function, line);
+ if (ext4_error_ratelimit(sb)) {
+ errstr = ext4_decode_error(sb, errno, nbuf);
+ printk(KERN_CRIT "EXT4-fs error (device %s) in %s:%d: %s\n",
+ sb->s_id, function, line, errstr);
+ }
+ save_error_info(sb, function, line);
ext4_handle_error(sb);
}
@@ -597,6 +607,9 @@ void __ext4_msg(struct super_block *sb,
struct va_format vaf;
va_list args;
+ if (!___ratelimit(&(EXT4_SB(sb)->s_msg_ratelimit_state), "EXT4-fs"))
+ return;
+
va_start(args, fmt);
vaf.fmt = fmt;
vaf.va = &args;
@@ -610,6 +623,10 @@ void __ext4_warning(struct super_block *sb, const char *function,
struct va_format vaf;
va_list args;
+ if (!___ratelimit(&(EXT4_SB(sb)->s_warning_ratelimit_state),
+ "EXT4-fs warning"))
+ return;
+
va_start(args, fmt);
vaf.fmt = fmt;
vaf.va = &args;
@@ -633,18 +650,20 @@ __acquires(bitlock)
es->s_last_error_block = cpu_to_le64(block);
__save_error_info(sb, function, line);
- va_start(args, fmt);
-
- vaf.fmt = fmt;
- vaf.va = &args;
- printk(KERN_CRIT "EXT4-fs error (device %s): %s:%d: group %u, ",
- sb->s_id, function, line, grp);
- if (ino)
- printk(KERN_CONT "inode %lu: ", ino);
- if (block)
- printk(KERN_CONT "block %llu:", (unsigned long long) block);
- printk(KERN_CONT "%pV\n", &vaf);
- va_end(args);
+ if (ext4_error_ratelimit(sb)) {
+ va_start(args, fmt);
+ vaf.fmt = fmt;
+ vaf.va = &args;
+ printk(KERN_CRIT "EXT4-fs error (device %s): %s:%d: group %u, ",
+ sb->s_id, function, line, grp);
+ if (ino)
+ printk(KERN_CONT "inode %lu: ", ino);
+ if (block)
+ printk(KERN_CONT "block %llu:",
+ (unsigned long long) block);
+ printk(KERN_CONT "%pV\n", &vaf);
+ va_end(args);
+ }
if (test_opt(sb, ERRORS_CONT)) {
ext4_commit_super(sb, 0);
@@ -2606,6 +2625,12 @@ EXT4_RW_ATTR_SBI_UI(mb_group_prealloc, s_mb_group_prealloc);
EXT4_DEPRECATED_ATTR(max_writeback_mb_bump, 128);
EXT4_RW_ATTR_SBI_UI(extent_max_zeroout_kb, s_extent_max_zeroout_kb);
EXT4_ATTR(trigger_fs_error, 0200, NULL, trigger_test_error);
+EXT4_RW_ATTR_SBI_UI(err_ratelimit_interval_ms, s_err_ratelimit_state.interval);
+EXT4_RW_ATTR_SBI_UI(err_ratelimit_burst, s_err_ratelimit_state.burst);
+EXT4_RW_ATTR_SBI_UI(warning_ratelimit_interval_ms, s_warning_ratelimit_state.interval);
+EXT4_RW_ATTR_SBI_UI(warning_ratelimit_burst, s_warning_ratelimit_state.burst);
+EXT4_RW_ATTR_SBI_UI(msg_ratelimit_interval_ms, s_msg_ratelimit_state.interval);
+EXT4_RW_ATTR_SBI_UI(msg_ratelimit_burst, s_msg_ratelimit_state.burst);
static struct attribute *ext4_attrs[] = {
ATTR_LIST(delayed_allocation_blocks),
@@ -2623,6 +2648,12 @@ static struct attribute *ext4_attrs[] = {
ATTR_LIST(max_writeback_mb_bump),
ATTR_LIST(extent_max_zeroout_kb),
ATTR_LIST(trigger_fs_error),
+ ATTR_LIST(err_ratelimit_interval_ms),
+ ATTR_LIST(err_ratelimit_burst),
+ ATTR_LIST(warning_ratelimit_interval_ms),
+ ATTR_LIST(warning_ratelimit_burst),
+ ATTR_LIST(msg_ratelimit_interval_ms),
+ ATTR_LIST(msg_ratelimit_burst),
NULL,
};
@@ -3037,7 +3068,6 @@ static struct ext4_li_request *ext4_li_request_new(struct super_block *sb,
{
struct ext4_sb_info *sbi = EXT4_SB(sb);
struct ext4_li_request *elr;
- unsigned long rnd;
elr = kzalloc(sizeof(*elr), GFP_KERNEL);
if (!elr)
@@ -3052,10 +3082,8 @@ static struct ext4_li_request *ext4_li_request_new(struct super_block *sb,
* spread the inode table initialization requests
* better.
*/
- get_random_bytes(&rnd, sizeof(rnd));
- elr->lr_next_sched = jiffies + (unsigned long)rnd %
- (EXT4_DEF_LI_MAX_START_DELAY * HZ);
-
+ elr->lr_next_sched = jiffies + (prandom_u32() %
+ (EXT4_DEF_LI_MAX_START_DELAY * HZ));
return elr;
}
@@ -4118,6 +4146,11 @@ no_journal:
if (es->s_error_count)
mod_timer(&sbi->s_err_report, jiffies + 300*HZ); /* 5 minutes */
+ /* Enable message ratelimiting. Default is 10 messages per 5 secs. */
+ ratelimit_state_init(&sbi->s_err_ratelimit_state, 5 * HZ, 10);
+ ratelimit_state_init(&sbi->s_warning_ratelimit_state, 5 * HZ, 10);
+ ratelimit_state_init(&sbi->s_msg_ratelimit_state, 5 * HZ, 10);
+
kfree(orig_data);
return 0;
diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c
index 03e9bebba198..1423c4816a47 100644
--- a/fs/ext4/xattr.c
+++ b/fs/ext4/xattr.c
@@ -1352,6 +1352,7 @@ retry:
new_extra_isize = s_min_extra_isize;
kfree(is); is = NULL;
kfree(bs); bs = NULL;
+ brelse(bh);
goto retry;
}
error = -1;
diff --git a/fs/f2fs/Kconfig b/fs/f2fs/Kconfig
index e06e0995e00f..214fe1054fce 100644
--- a/fs/f2fs/Kconfig
+++ b/fs/f2fs/Kconfig
@@ -63,3 +63,11 @@ config F2FS_FS_SECURITY
the extended attribute support in advance.
If you are not using a security module, say N.
+
+config F2FS_CHECK_FS
+ bool "F2FS consistency checking feature"
+ depends on F2FS_FS
+ help
+ Enables BUG_ONs which check the file system consistency in runtime.
+
+ If you want to improve the performance, say N.
diff --git a/fs/f2fs/acl.c b/fs/f2fs/acl.c
index b7826ec1b470..d0fc287efeff 100644
--- a/fs/f2fs/acl.c
+++ b/fs/f2fs/acl.c
@@ -205,7 +205,8 @@ struct posix_acl *f2fs_get_acl(struct inode *inode, int type)
return acl;
}
-static int f2fs_set_acl(struct inode *inode, int type, struct posix_acl *acl)
+static int f2fs_set_acl(struct inode *inode, int type,
+ struct posix_acl *acl, struct page *ipage)
{
struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
struct f2fs_inode_info *fi = F2FS_I(inode);
@@ -250,7 +251,7 @@ static int f2fs_set_acl(struct inode *inode, int type, struct posix_acl *acl)
}
}
- error = f2fs_setxattr(inode, name_index, "", value, size, NULL);
+ error = f2fs_setxattr(inode, name_index, "", value, size, ipage);
kfree(value);
if (!error)
@@ -260,10 +261,10 @@ static int f2fs_set_acl(struct inode *inode, int type, struct posix_acl *acl)
return error;
}
-int f2fs_init_acl(struct inode *inode, struct inode *dir)
+int f2fs_init_acl(struct inode *inode, struct inode *dir, struct page *ipage)
{
- struct posix_acl *acl = NULL;
struct f2fs_sb_info *sbi = F2FS_SB(dir->i_sb);
+ struct posix_acl *acl = NULL;
int error = 0;
if (!S_ISLNK(inode->i_mode)) {
@@ -276,19 +277,19 @@ int f2fs_init_acl(struct inode *inode, struct ino